# Libraries ---------------------------------------------------------------
library(tidyverse)
## ── Attaching packages ──────────────────────────────────────────────────────────── tidyverse 1.2.1 ──
## ✔ ggplot2 3.1.0 ✔ purrr 0.2.5
## ✔ tibble 2.0.1 ✔ dplyr 0.7.6
## ✔ tidyr 0.8.1 ✔ stringr 1.3.1
## ✔ readr 1.1.1 ✔ forcats 0.3.0
## Warning: package 'tibble' was built under R version 3.5.2
## ── Conflicts ─────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
library(tidytext)
library(sentimentr)
library(readr)
library(tokenizers)
library(lubridate)
##
## Attaching package: 'lubridate'
## The following object is masked from 'package:base':
##
## date
library(ggplot2)
library(wordcloud)
## Loading required package: RColorBrewer
library(wordcloud2)
library(RColorBrewer)
library(formattable)
# Color Vectors -----------------------------------------------------------
cbbPalette <- c("#000000", "#E69F00", "#56B4E9", "#009E73", "#F0E442", "#0072B2", "#D55E00", "#CC79A7")
a11y25 <- c("#fc9272", "#fb6a4a", "#ef3b2c", "#cb181d", "#67000d", "#7fcdbb", "#41b6c4", "#1d91c0", "#225ea8", "#081d58", "#fa9fb5", "#f768a1", "#dd3497", "#ae017e", "#7a0177", "#a1d99b", "#74c476", "#41ab5d", "#238b45", "#00441b", "#bcbddc", "#9e9ac8", "#807dba", "#6a51a3", "#54278f", "#3f007d")
a11yRed <- c("#fc9272", "#fb6a4a", "#ef3b2c", "#cb181d", "#67000d")
a11yBlue <- c("#7fcdbb", "#41b6c4", "#1d91c0", "#225ea8", "#081d58")
a11yPink <- c("#fa9fb5", "#f768a1", "#dd3497", "#ae017e", "#7a0177")
a11yGreen <- c("#a1d99b", "#74c476", "#41ab5d", "#238b45", "#00441b")
a11yPurple <- c("#bcbddc", "#9e9ac8", "#807dba", "#6a51a3", "#54278f")
CBBlues <- c('#08306b', '#08519c', '#2171b5', '#3888BC', '#4399CB')
# Import Dataset ----------------------------------------------------------
all_tweets <- read_csv("~/HLB Drive/R/Final Scripts/all_tweets_final.csv",
col_types = cols(hashtag_norm = col_character(),
tweet_id = col_character())) %>%
arrange(linenumber) %>%
as_tibble()
# Clean and Tokenize All Tweets -----------------------------------------------------------
remove_symbol <- "&|<|>|=|>|<|~|≠|[+]|[\"]"
all_tweets_token <- all_tweets %>%
arrange(hashtag_norm) %>%
ungroup() %>%
unnest_tokens(word, tweet_text_org, token = "tweets", strip_url = TRUE) %>%
mutate(word = str_remove_all(word, remove_symbol)) %>%
filter(!word %in% stop_words$word,
!word %in% str_remove_all(stop_words$word, "'")) %>%
filter(!str_detect(word, "^(?i)RT|@|#"), # remove retweets, user screen names, hashtags
!str_detect(word, "hwc111"), # remove un-hashtaged "hwc111" (the class hashtag)
!str_detect(word, "\\d"), # remove digits
!str_detect(word, "\\s")) %>% # remove white space
select(linenumber, date, time, tweet_id, tweet_type, user_screen_name, hashtag_norm, word) %>%
arrange(linenumber)
# Methodology: Data Analysis ----------------------------------------------
## Word Cloud: All Tweets
top_100_words <- all_tweets_token %>%
group_by(word) %>%
count(word) %>%
arrange(desc(n)) %>%
head(100)
top_100_words %>%
wordcloud2(color = a11y25, shape = 'circle', ellipticity = 0.75, rotateRatio = 0)
# Affect & Historical Empathy: Identifying Affect in Students' Tweets ----------------------------------
## Clean, Tokenize, Gather All Tweets
sentimentr_all_clean <- all_tweets %>% # No stopwords removed; some are valence shifters
arrange(hashtag_norm) %>%
ungroup() %>%
unnest_tokens(word, tweet_text_org, token = "tweets", strip_url = TRUE) %>%
mutate(word = str_remove_all(word, remove_symbol)) %>%
filter(!str_detect(word, "^(?i)RT|@|#"), # remove retweets, user screen names, hashtags
!str_detect(word, "hwc111"), # remove un-hashtaged "hwc111" (the class hashtag)
!str_detect(word, "\\d"), # remove digits
!str_detect(word, "\\s")) %>% # remove white space
select(linenumber, date, time, tweet_id, tweet_type, user_screen_name, hashtag_norm, word) %>%
arrange(linenumber)
sentimentr_all_gather <- sentimentr_all_clean %>%
group_by(linenumber, tweet_type, date, time, tweet_id, user_screen_name, hashtag_norm) %>%
summarise(tweet_text = paste(word, collapse = " "))
## Get sentimentr scores
sentimentr_all <- sentimentr_all_gather %>%
get_sentences() %>%
sentiment()
## Count remaining tweets
tally(sentimentr_all) ## Returns 11351; tweets containing only hashtags, user screen names, and/or URLs removed in cleaning step
## n
## 1 11351
## Count Positive Sentimentr
sentiment_positive_all <- sentimentr_all %>%
filter(sentiment > 0)
tally(sentiment_positive_all) ## 6782
## n
## 1 6782
6782/11351 ## 59.74804%
## [1] 0.5974804
## Count Negative Sentimentr
sentiment_negative_all <- sentimentr_all %>%
filter(sentiment < 0)
tally(sentiment_negative_all) ## 2926
## n
## 1 2926
2629/11351 ## 23.16095%
## [1] 0.2316095
## Count 0
sentiment_zero_all <- sentimentr_all %>%
filter(sentiment == 0)
tally(sentiment_zero_all) ## 1643
## n
## 1 1643
1643/11351 ## 14.4745%
## [1] 0.144745
## All Tweets Average Sentiment by Hashtag
count_all_hashtag <- sentimentr_all %>%
group_by(hashtag_norm) %>%
count(hashtag_norm) %>%
summarise(total = n)
avg_sentiment_hashtag_all <- sentimentr_all %>%
group_by(hashtag_norm) %>%
summarise(avg_sentiment = average_mean(sentiment)) %>%
filter(avg_sentiment != 0) %>%
arrange(desc(avg_sentiment)) %>%
ungroup() %>%
inner_join(count_all_hashtag)
## Joining, by = "hashtag_norm"
avg_sentiment_hashtag_all %>%
filter(!is.na(hashtag_norm),
!str_detect(hashtag_norm, "02|05|12|15|20|25")) %>%
mutate(hashtag_norm = reorder(hashtag_norm, avg_sentiment)) %>%
ggplot(aes(hashtag_norm, avg_sentiment, fill = avg_sentiment)) +
geom_col(show.legend = FALSE, color = "black") +
geom_text(aes(label = total),
color = "white",
size = 3,
position = position_stack(vjust = 0.5)) +
labs(title = "Sentimentr: All Tweets Average Sentiment by Hashtag",
caption = "Note: Hashtags 02, 05, 12, 15, 20, 25 not shown.",
x = "Hashtag",
y = "Average Sentiment")

## Average Sentiment by Hashtag & Tweet Type
count_all_hashtag_type <- sentimentr_all %>%
group_by(hashtag_norm, tweet_type) %>%
count(tweet_type) %>%
summarise(total = n)
avg_sentiment_all_hashtag_type <- sentimentr_all %>%
group_by(hashtag_norm, tweet_type) %>%
summarise(avg_sentiment = average_mean(sentiment)) %>%
filter(avg_sentiment != 0) %>%
arrange(desc(avg_sentiment)) %>%
ungroup() %>%
inner_join(count_all_hashtag_type)
## Joining, by = c("hashtag_norm", "tweet_type")
avg_sentiment_all_hashtag_type %>%
filter(!str_detect(tweet_type, "Fishbowl|Participlan"),
!str_detect(hashtag_norm, "05|12|15|20|25")) %>%
ggplot(aes(hashtag_norm, avg_sentiment, fill = avg_sentiment)) +
geom_col(show.legend = FALSE, color = "black") +
geom_text(aes(label = total),
color = "white",
size = 2,
position = position_stack(vjust = 0.5)) +
facet_wrap(~tweet_type, scales = "free_y", ncol = 1) +
labs(title = "All Tweets: Comparing PST and Exit Tweet Average Sentiments",
caption = "Note: Class 03 did not have PST. For Class 10 the PST average sentiment was zero.",
x = "Hashtag",
y = "Average Sentiment")

## Min/Max/Range by Tweet Type - calculated in Google Sheets from joined all_tweets.csv & sentimentr_scores.csv
## PST & Exit Average Sentiment Comparison - calculated in Google Sheets from joined all_tweets.csv & sentiment_scores
## PST & Exit Compared (Class 22 & 08)
token_sentiment_all <- sentimentr_all %>%
arrange(hashtag_norm) %>%
ungroup() %>%
unnest_tokens(word, tweet_text, token = "words") %>%
filter(!word %in% stop_words$word,
!word %in% str_remove_all(stop_words$word, "'")) %>%
select(linenumber, date, time, tweet_id, tweet_type, user_screen_name, hashtag_norm, word, sentiment) %>%
arrange(linenumber)
pst_08 <- token_sentiment_all %>%
filter(hashtag_norm == 8,
tweet_type == "PST",
!str_detect(word, "source|significant|todays")) %>%
group_by(word) %>%
count(word) %>%
summarise(pst_total = n) %>%
arrange(desc(pst_total))
exit_08 <- token_sentiment_all %>%
filter(hashtag_norm == 8,
tweet_type == "Exit",
!str_detect(word, "source|significant|todays")) %>%
group_by(word) %>%
count(word) %>%
summarise(exit_total = n) %>%
arrange(desc(exit_total))
pst_22 <- token_sentiment_all %>%
filter(hashtag_norm == 22,
tweet_type == "PST",
!str_detect(word, "source|significant|todays")) %>%
group_by(word) %>%
count(word) %>%
summarise(pst_total = n) %>%
arrange(desc(pst_total))
exit_22 <- token_sentiment_all %>%
filter(hashtag_norm == 22,
tweet_type == "Exit",
!str_detect(word, "source|significant|todays")) %>%
group_by(word) %>%
count(word) %>%
summarise(exit_total = n) %>%
arrange(desc(exit_total))
# Affect & Historical Empathy: Affect as Evidence of Care -----------------
## Create Women Subset
women_terms <- c("(?i)wom.", "(?i)^she$", "(?i)^her$", "(?i)wif.", "(?i)wiv.", "(?i)mothe.", "(?i)daughte.", "(?i)Hatshepsut", "(?i)Shamhat", "(?i)Ninsun", "(?i)harlo.", "(?i)prostitut.", "(?i)Lysistrata", "(?i)Cleonice", "(?i)Lampito", "(?i)Myrrhine", "(?i)Tomyris", "(?i)Sabin.", "(?i)Khadij.", "(?i)female.") # all terms related to women from the course are included in the women subset
women_match <- str_c(women_terms, collapse = "|")
women_subset <- all_tweets %>%
filter(str_detect(tweet_text_org, women_match)) %>% #match women terms to content of tweet text
mutate(linenumberW = row_number()) # add linenumber distinct to women subset
## Create Sentimentr Women Subset
sentimentr_women_clean <- women_subset %>%
arrange(hashtag_norm) %>%
ungroup() %>%
unnest_tokens(word, tweet_text_org, token = "tweets", strip_url = TRUE) %>%
mutate(word = str_remove_all(word, remove_symbol)) %>%
filter(!str_detect(word, "^(?i)RT|@|#"), # remove retweets, user screen names, hashtags
!str_detect(word, "hwc111"), # remove un-hashtaged "hwc111" (the class hashtag)
!str_detect(word, "\\d"), # remove digits
!str_detect(word, "\\s")) %>% # remove white space
select(linenumber, linenumberW, date, time, tweet_id, tweet_type, user_screen_name, hashtag_norm, word) %>%
arrange(linenumber)
sentimentr_women_gather <- sentimentr_women_clean %>%
group_by(linenumber, linenumberW, tweet_type, date, time, tweet_id, user_screen_name, hashtag_norm) %>%
summarise(tweet_text = paste(word, collapse = " "))
sentimentr_women <- sentimentr_women_gather %>%
get_sentences() %>%
sentiment() # runs sentimentr analysis on women tweets
## Average Sentiment Women Subset
sentimentr_women %>%
summarise(avg_sent = average_mean(sentiment))
## avg_sent
## 1 0.1027464
## All Tweets Word Cloud (Blue)
all_word_count <- all_tweets_token %>%
count(word) %>%
arrange(desc(n))
wordcloud_all_tweets <- all_tweets_token %>%
count(word) %>%
filter(word != "significant") %>% # removed "significant" because it appeared in the prompt for the exit tweets
top_n(100) %>%
arrange(desc(n))
## Selecting by n
wordcloud2(wordcloud_all_tweets, size = 0.7, color=rep_len(CBBlues, nrow(wordcloud_all_tweets)))
## Positive Sentimentr Women
sentiment_positive_women <- sentimentr_women %>%
filter(sentiment > 0) # filter for tweets with sentiment value >0
token_sentiment_positive_women <- sentiment_positive_women %>%
arrange(hashtag_norm) %>%
ungroup() %>%
unnest_tokens(word, tweet_text, token = "words") %>%
filter(!word %in% stop_words$word,
!word %in% str_remove_all(stop_words$word, "'")) %>%
select(linenumber, linenumberW, date, time, tweet_id, tweet_type, user_screen_name, hashtag_norm, word, sentiment) %>%
arrange(linenumberW) # tokenize words in tweet text of positive tweets
## Negative Sentimentr Women
sentiment_negative_women <- sentimentr_women %>%
filter(sentiment < 0) # filter for tweets with sentimentr value <0
token_sentiment_negative_women <- sentiment_negative_women %>%
arrange(hashtag_norm) %>%
ungroup() %>%
unnest_tokens(word, tweet_text, token = "words") %>%
filter(!word %in% stop_words$word,
!word %in% str_remove_all(stop_words$word, "'")) %>%
select(linenumber, linenumberW, date, time, tweet_id, tweet_type, user_screen_name, hashtag_norm, word, sentiment) %>%
arrange(linenumberW) # tokenize words in tweet text of negative tweets
## Word Frequency Dataset
base_url <- "https://programminghistorian.org/assets/basic-text-processing-in-r"
wf <- read_csv(sprintf("%s/%s", base_url, "word_frequency.csv"))
## Parsed with column specification:
## cols(
## language = col_character(),
## word = col_character(),
## frequency = col_double()
## )
## Positive Sentiment Low Frequency
positive_sentiment_wf_women <- token_sentiment_positive_women %>%
group_by(word) %>%
count(word) %>%
summarise(total = n) %>%
arrange(desc(total)) %>%
inner_join(wf) %>%
select(word, total, frequency) %>%
arrange(desc(frequency))
## Joining, by = "word"
positive_low_frequency_women <- positive_sentiment_wf_women %>%
filter(frequency < 0.005, total >= 3) %>%
filter(!str_detect(word, "tho|rly|ppl|abt|tt|cuz|sth|cos")) %>%
arrange(desc(total))
positive_low_frequency_women %>%
with(wordcloud(word, total, scale = c(2, 0.5), max.words = 200))

## Negative Sentiment Low Frequency
negative_sentiment_wf_women <- token_sentiment_negative_women %>%
group_by(word) %>%
count(word) %>%
summarise(total = n) %>%
arrange(desc(total)) %>%
inner_join(wf) %>% # combine negative sentiment tweets with word frequency dataset
select(word, total, frequency) %>%
arrange(desc(frequency))
## Joining, by = "word"
negative_low_frequency_women <- negative_sentiment_wf_women %>%
filter(frequency < 0.005, total >= 3) %>%
filter(!str_detect(word, "tho|rly|ppl|abt")) %>% # removed abbreviated words from set
arrange(desc(total))
negative_low_frequency_women %>%
with(wordcloud(word, total, scale = c(2, 0.5), max.words = 200))

## Find Unique Terms in Positive, Negative, and Positive/Negative Subsets
positive_subset <- positive_low_frequency_women %>%
mutate(sentiment = "positive") %>% # add sentiment column labeling words in this subset as "positive"
select(word, total, sentiment)
negative_subset <- negative_low_frequency_women %>%
mutate(sentiment2 = "negative") %>%
mutate(total2 = total) %>% # add sentiment column labeling words in this subset as "negative"
select(word, total2, sentiment2)
combined_pos_neg_subsets <- full_join(positive_subset, negative_subset) # combine negative and positive set
## Joining, by = "word"
## Positive Word Cloud
positive_only_women <- combined_pos_neg_subsets %>%
filter(sentiment == "positive",
is.na(sentiment2)) %>%
with(wordcloud(word, total, scale = c(2.5, 0.5))) # word cloud containing words that appear *only* in tweets with positive sentimentr values

negative_only_women <- combined_pos_neg_subsets %>%
filter(sentiment2 == "negative",
is.na(sentiment),
word != "ye") %>%
select(word, total2, sentiment2) %>%
with(wordcloud(word, total2, scale = c(2.5, 0.5))) # word cloud containing words that appear *only* in tweets with negative sentimentr values

pos_neg_women <- combined_pos_neg_subsets %>%
filter(!is.na(sentiment),
!is.na(sentiment2)) %>%
group_by(word) %>%
mutate(combined_total = sum(total, total2))
pos_neg_women %>%
with(wordcloud(word, combined_total, scale = c(2.5, 0.5))) # word cloud containing words that appear in tweets with positive *and* negative sentimentr values

# Affect & Historical Empathy: Care & Understanding -----------------------
## Calculations for "asserted" and "qualified" tweets completed in Google Sheets
# The Attention Economy & Historical Significance: Tastemakers & Communities of Participation -------------------------
## Create Exit Tweet Subset
exit_tweets <- all_tweets %>%
filter(tweet_type == "Exit")
## Find Exit Tweet Word Frequencies
exit_tweet_count <- all_tweets_token %>%
filter(tweet_type == "Exit",
!str_detect(word, "significant|class|exit|todays")) %>%
group_by(word) %>%
count() %>%
summarise(total = n) %>%
arrange(desc(total))
## Combine with Word Frequency Dataset
base_url <- "https://programminghistorian.org/assets/basic-text-processing-in-r"
wf <- read_csv(sprintf("%s/%s", base_url, "word_frequency.csv"))
## Parsed with column specification:
## cols(
## language = col_character(),
## word = col_character(),
## frequency = col_double()
## )
exit_tweet_relative <- exit_tweet_count %>%
inner_join(wf) %>%
filter(frequency <= 0.002,
total >= 5) %>%
arrange(desc(total)) # find words with low frequency (English), high frequency (Exit Tweets)
## Joining, by = "word"
exit_tweet_relative %>%
wordcloud2(color = a11y25, shape = 'circle', ellipticity = 0.5, hoverFunction = NULL)
## Count Tweets Containing Low-Frequency Exit Tweet Words
low_frequency_exit <- exit_tweets %>%
filter(str_detect(tweet_text_org, "(?i)gods|(?i)hatshepsut|(?i)germanic|(?i)romans|(?i)herodotus|(?i)gilgamesh|(?i)ibn|(?i)mongols|(?i)confucius|(?i)christianity|(?i)tribes|(?i)learnt|(?i)persians|(?i)deities|(?i)beliefs|(?i)buddhism|(?i)dharma|(?i)suffering|(?i)germans|(?i)buddha|(?i)abraham|(?i)islam|(?i)quran|(?i)ren|(?i)greeks|(?i)epicurus|(?i)kinda|(?i)egyptians|(?i)religions|(?i)teachings|(?i)ppl|(?i)yahweh|(?i)enkidu|(?i)pliny|(?i)sacrifice|(?i)bias|(?i)caesar|(?i)epictetus|(?i)cuz|(?i)medieval|(?i)equality|(?i)punishment|(?i)biased|(?i)brutus|(?i)fate|(?i)happiness|(?i)lysistrata|(?i)significance|(?i)wives|(?i)desires|(?i)haha|(?i)hinduism|(?i)khan|(?i)chaos|(?i)intriguing|(?i)isaac|(?i)perspectives|(?i)portrayed|(?i)abt|(?i)imp|(?i)judaism|(?i)julius|(?i)reminds|(?i)slaves|(?i)christians|(?i)daoism|(?i)divine|(?i)ethical|(?i)karma|(?i)loyalty|(?i)persian|(?i)sig|(?i)takeaway|(?i)tribe|(?i)tweet|(?i)vulnerable|(?i)zoroastrianism|(?i)civilised|(?i)enlightenment|(?i)fascinating|(?i)rly|(?i)temples|(?i)transient|(?i)amun|(?i)arjuna|(?i)barbarians|(?i)barbaric|(?i)cultures|(?i)mali|(?i)mesopotamia|(?i)muhammad|(?i)puppet|(?i)ruling|(?i)tolerance|(?i)uruk|(?i)civilisations|(?i)clement|(?i)egyptian|(?i)jews|(?i)krishna|(?i)legalism|(?i)marco|(?i)mecca|(?i)polo|(?i)sanskrit|(?i)similarities|(?i)societies|(?i)tacitus|(?i)travels|(?i)weaknesses|(?i)abram|(?i)aristophanes|(?i)artifacts|(?i)caste|(?i)chose|(?i)civilizations|(?i)confused|(?i)cos|(?i)devotion|(?i)discipline|(?i)hv|(?i)influenced|(?i)interpretations|(?i)loyal|(?i)reflects|(?i)ruler|(?i)stood|(?i)strengths|(?i)surprising|(?i)tt|(?i)abrahams|(?i)civilized|(?i)constantly|(?i)creativity|(?i)crucial|(?i)cyrus|(?i)dao|(?i)enlightening|(?i)epic|(?i)equally|(?i)followers|(?i)greatness|(?i)honestly|(?i)humour|(?i)impressive|(?i)males|(?i)marry|(?i)monastic|(?i)norms|(?i)peoples|(?i)punishments|(?i)sexuality|(?i)surprised|(?i)teaches|(?i)tho|(?i)tht|(?i)treaty|(?i)weird|(?i)accepting|(?i)amazed|(?i)convey|(?i)depicted|(?i)emotions|(?i)esp|(?i)goodness|(?i)harsh|(?i)husbands|(?i)impermanent|(?i)innate|(?i)mistakes|(?i)morals|(?i)orthodox|(?i)perception|(?i)promotes|(?i)sacred|(?i)slave|(?i)slavery|(?i)statues|(?i)ted|(?i)tolerant|(?i)admirable|(?i)affects|(?i)afterlife|(?i)amos|(?i)attain|(?i)augustus|(?i)biases|(?i)civ|(?i)clements|(?i)confusing|(?i)conquer|(?i)deeds|(?i)dhammapada|(?i)dynasty|(?i)earthly|(?i)egalitarian|(?i)emphasized|(?i)engaging|(?i)females|(?i)fulfill|(?i)fulfilling|(?i)genghis|(?i)greatly|(?i)inequality|(?i)innately|(?i)maat|(?i)mansa|(?i)mithras|(?i)monks|(?i)muslim|(?i)optimistic|(?i)orphans|(?i)peaceful|(?i)physically|(?i)prominent|(?i)puppets|(?i)reminded|(?i)scary|(?i)sculptures|(?i)selfless|(?i)shaped|(?i)subjective|(?i)travelled|(?i)valued|(?i)weakness|(?i)admire|(?i)alot|(?i)athens|(?i)bali|(?i)blessed|(?i)blindly|(?i)brutal|(?i)citizenship|(?i)civilization|(?i)compassion|(?i)confucianism|(?i)conquered|(?i)courage|(?i)covenant|(?i)dedication|(?i)demigod|(?i)drunk|(?i)dukkha|(?i)essence|(?i)euphemisms|(?i)focuses|(?i)heir|(?i)hindu|(?i)humanity|(?i)humble|(?i)injustice|(?i)interpret|(?i)interpreted|(?i)justified|(?i)justify|(?i)literally|(?i)lying|(?i)meanings|(?i)mindset|(?i)mongol|(?i)musa|(?i)neutral|(?i)nuns|(?i)paranoid|(?i)perceptions|(?i)pericles|(?i)persia|(?i)piety|(?i)pleasures|(?i)portrayal|(?i)possess|(?i)possessed|(?i)prof|(?i)propaganda|(?i)qin|(?i)readings|(?i)realised|(?i)regarded|(?i)reign|(?i)respectful|(?i)rituals|(?i)spoke|(?i)stereotypes|(?i)sth|(?i)survive|(?i)tao|(?i)traditions|(?i)treating|(?i)wanna|(?i)widespread"))
low_frequency_exit %>%
tally() # 2430
## # A tibble: 1 x 1
## n
## <int>
## 1 2430
top_50_low_frequency <- exit_tweets %>%
filter(str_detect(tweet_text_org, "(?i)gods|(?i)hatshepsut|(?i)germanic|(?i)romans|(?i)herodotus|(?i)gilgamesh|(?i)ibn|(?i)mongols|(?i)confucius|(?i)christianity|(?i)tribes|(?i)learnt|(?i)persians|(?i)deities|(?i)beliefs|(?i)buddhism|(?i)dharma|(?i)suffering|(?i)germans|(?i)buddha|(?i)abraham|(?i)islam|(?i)quran|(?i)ren|(?i)greeks|(?i)epicurus|(?i)kinda|(?i)egyptians|(?i)religions|(?i)teachings|(?i)ppl|(?i)yahweh|(?i)enkidu|(?i)pliny|(?i)sacrifice|(?i)bias|(?i)caesar|(?i)epictetus|(?i)cuz|(?i)medieval|(?i)equality|(?i)punishment|(?i)biased|(?i)brutus|(?i)fate|(?i)happiness|(?i)lysistrata|(?i)significance|(?i)wives|(?i)desires"))
top_50_low_frequency %>%
tally() #1352
## # A tibble: 1 x 1
## n
## <int>
## 1 1342
## Class 04 Exit Tweets Analysis
count_04_exit <- exit_tweets %>%
filter(hashtag_norm == "4") %>%
tally() ## 177
count_dqs <- exit_tweets %>%
filter(hashtag_norm == "4",
str_detect(tweet_text_org, "deities|god.|women|men|dream.|marker.|civilization")) %>%
tally() ## 87
count_dreams <- exit_tweets %>%
filter(hashtag_norm == "4",
str_detect(tweet_text_org, "(?i)dream.")) %>%
tally() ## 48
## Peer & Prof Influence
peer_influence <- exit_tweets %>%
filter(str_detect(tweet_text_org, "(?i)classmate.|(?i)peer.|(?i)student.|(?i)someone.|(?i)fellow.|(?i)everyone.")) %>%
tally() ## 68
prof_influence <- exit_tweets %>%
filter(str_detect(tweet_text_org, "(?i)prof.|(?i)heather.|(?i)bennett.|(?i)helloworldciv.")) %>%
tally() ## 36
# GIFs from a History Class: Constraints -----------------------------------------------
## Count GIFs with/without media calculated in Google Sheets
## Total number & avg interactions by media type calculated in Google Sheets
## Create GIF Subset
gif_subset <- all_tweets %>%
filter(media_type == "GIF",
!str_detect(tweet_text_org, "^RT"))
gif_subset_clean <- gif_subset %>%
arrange(hashtag_norm) %>%
ungroup() %>%
unnest_tokens(word, tweet_text_org, token = "tweets", strip_url = TRUE) %>%
mutate(word = str_remove_all(word, remove_symbol)) %>%
filter(!str_detect(word, "^(?i)RT|@|#"), # remove retweets, user screen names, hashtags
!str_detect(word, "hwc111"), # remove un-hashtaged "hwc111" (the class hashtag)
!str_detect(word, "\\d"), # remove digits
!str_detect(word, "\\s")) %>% # remove white space
arrange(linenumber)
gif_subset_gather <- gif_subset_clean %>%
group_by(linenumber, linenumberW, tweet_type, date, time, tweet_id, user_screen_name, hashtag_norm) %>%
summarise(tweet_text = paste(word, collapse = " "))
## GIF Subset Sentimentr
gif_sentimentr <- gif_subset_gather %>%
get_sentences() %>%
sentiment()
## GIF Embodiment Coded + Sentimentr
gif_embod_sent <- read_csv("gif_embodiment_sentiment.csv",
col_types = cols(tweet_id = col_character()))
count_gif_embod_sent <- gif_embod_sent %>%
group_by(embodiment) %>%
count() %>%
summarise(total = n)
avg_gif_embod_sent <- gif_embod_sent %>%
group_by(embodiment) %>%
summarise(avg_sent = average_mean(sentiment)) %>%
inner_join(count_gif_embod_sent) %>%
arrange(desc(avg_sent))
## Joining, by = "embodiment"
gif_embod_sent %>%
summarise(avg_sent = average_mean(sentiment)) #0.0993
## # A tibble: 1 x 1
## avg_sent
## <dbl>
## 1 0.0993