# Libraries ---------------------------------------------------------------

library(tidyverse)
## ── Attaching packages ──────────────────────────────────────────────────────────── tidyverse 1.2.1 ──
## ✔ ggplot2 3.1.0     ✔ purrr   0.2.5
## ✔ tibble  2.0.1     ✔ dplyr   0.7.6
## ✔ tidyr   0.8.1     ✔ stringr 1.3.1
## ✔ readr   1.1.1     ✔ forcats 0.3.0
## Warning: package 'tibble' was built under R version 3.5.2
## ── Conflicts ─────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
library(tidytext)
library(sentimentr)
library(readr)
library(tokenizers)
library(lubridate)
## 
## Attaching package: 'lubridate'
## The following object is masked from 'package:base':
## 
##     date
library(ggplot2)
library(wordcloud)
## Loading required package: RColorBrewer
library(wordcloud2)
library(RColorBrewer)
library(formattable)

# Color Vectors -----------------------------------------------------------

cbbPalette <- c("#000000", "#E69F00", "#56B4E9", "#009E73", "#F0E442", "#0072B2", "#D55E00", "#CC79A7")

a11y25 <- c("#fc9272", "#fb6a4a", "#ef3b2c", "#cb181d", "#67000d", "#7fcdbb", "#41b6c4", "#1d91c0", "#225ea8", "#081d58", "#fa9fb5", "#f768a1", "#dd3497", "#ae017e", "#7a0177", "#a1d99b", "#74c476", "#41ab5d", "#238b45", "#00441b", "#bcbddc", "#9e9ac8", "#807dba", "#6a51a3", "#54278f", "#3f007d")

a11yRed <- c("#fc9272", "#fb6a4a", "#ef3b2c", "#cb181d", "#67000d")

a11yBlue <- c("#7fcdbb", "#41b6c4", "#1d91c0", "#225ea8", "#081d58")

a11yPink <- c("#fa9fb5", "#f768a1", "#dd3497", "#ae017e", "#7a0177")

a11yGreen <- c("#a1d99b", "#74c476", "#41ab5d", "#238b45", "#00441b")

a11yPurple <- c("#bcbddc", "#9e9ac8", "#807dba", "#6a51a3", "#54278f")

CBBlues <- c('#08306b', '#08519c', '#2171b5', '#3888BC', '#4399CB')

# Import Dataset ----------------------------------------------------------

all_tweets <- read_csv("~/HLB Drive/R/Final Scripts/all_tweets_final.csv", 
                             col_types = cols(hashtag_norm = col_character(), 
                                              tweet_id = col_character())) %>% 
  arrange(linenumber) %>% 
  as_tibble()

# Clean and Tokenize All Tweets -----------------------------------------------------------

remove_symbol <- "&amp;|&lt;|&gt;|=|>|<|~|≠|[+]|[\"]"

all_tweets_token <- all_tweets %>% 
  arrange(hashtag_norm) %>% 
  ungroup() %>% 
  unnest_tokens(word, tweet_text_org, token = "tweets", strip_url = TRUE) %>%
  mutate(word = str_remove_all(word, remove_symbol)) %>% 
  filter(!word %in% stop_words$word,
         !word %in% str_remove_all(stop_words$word, "'")) %>% 
  filter(!str_detect(word, "^(?i)RT|@|#"), # remove retweets, user screen names, hashtags
         !str_detect(word, "hwc111"), # remove un-hashtaged "hwc111" (the class hashtag)
         !str_detect(word, "\\d"), # remove digits
         !str_detect(word, "\\s")) %>% # remove white space
  select(linenumber, date, time, tweet_id, tweet_type, user_screen_name, hashtag_norm, word) %>% 
  arrange(linenumber)

# Methodology: Data Analysis ----------------------------------------------

## Word Cloud: All Tweets

top_100_words <- all_tweets_token %>% 
  group_by(word) %>%
  count(word) %>% 
  arrange(desc(n)) %>% 
  head(100) 
  
top_100_words %>% 
  wordcloud2(color = a11y25, shape = 'circle', ellipticity = 0.75, rotateRatio = 0)
# Affect & Historical Empathy: Identifying Affect in Students' Tweets ----------------------------------

## Clean, Tokenize, Gather All Tweets

sentimentr_all_clean <- all_tweets %>% # No stopwords removed; some are valence shifters
  arrange(hashtag_norm) %>% 
  ungroup() %>% 
  unnest_tokens(word, tweet_text_org, token = "tweets", strip_url = TRUE) %>%
  mutate(word = str_remove_all(word, remove_symbol)) %>% 
  filter(!str_detect(word, "^(?i)RT|@|#"), # remove retweets, user screen names, hashtags
         !str_detect(word, "hwc111"), # remove un-hashtaged "hwc111" (the class hashtag)
         !str_detect(word, "\\d"), # remove digits
         !str_detect(word, "\\s")) %>% # remove white space
  select(linenumber, date, time, tweet_id, tweet_type, user_screen_name, hashtag_norm, word) %>% 
  arrange(linenumber)

sentimentr_all_gather <- sentimentr_all_clean %>% 
  group_by(linenumber, tweet_type, date, time, tweet_id, user_screen_name, hashtag_norm) %>% 
  summarise(tweet_text = paste(word, collapse = " "))

## Get sentimentr scores

sentimentr_all <- sentimentr_all_gather %>% 
  get_sentences() %>% 
  sentiment()

## Count remaining tweets

tally(sentimentr_all) ## Returns 11351; tweets containing only hashtags, user screen names, and/or URLs removed in cleaning step
##       n
## 1 11351
## Count Positive Sentimentr

sentiment_positive_all <- sentimentr_all %>% 
  filter(sentiment > 0)

tally(sentiment_positive_all) ## 6782
##      n
## 1 6782
6782/11351 ## 59.74804%
## [1] 0.5974804
## Count Negative Sentimentr

sentiment_negative_all <- sentimentr_all %>% 
  filter(sentiment < 0)

tally(sentiment_negative_all) ## 2926
##      n
## 1 2926
2629/11351 ## 23.16095%
## [1] 0.2316095
## Count 0

sentiment_zero_all <- sentimentr_all %>% 
  filter(sentiment == 0)

tally(sentiment_zero_all) ## 1643
##      n
## 1 1643
1643/11351 ## 14.4745%
## [1] 0.144745
## All Tweets Average Sentiment by Hashtag

count_all_hashtag <- sentimentr_all %>% 
  group_by(hashtag_norm) %>%
  count(hashtag_norm) %>% 
  summarise(total = n)

avg_sentiment_hashtag_all <- sentimentr_all %>% 
  group_by(hashtag_norm) %>%
  summarise(avg_sentiment = average_mean(sentiment)) %>% 
  filter(avg_sentiment != 0) %>% 
  arrange(desc(avg_sentiment)) %>% 
  ungroup() %>% 
  inner_join(count_all_hashtag)
## Joining, by = "hashtag_norm"
avg_sentiment_hashtag_all %>% 
  filter(!is.na(hashtag_norm),
         !str_detect(hashtag_norm, "02|05|12|15|20|25")) %>% 
  mutate(hashtag_norm = reorder(hashtag_norm, avg_sentiment)) %>% 
  ggplot(aes(hashtag_norm, avg_sentiment, fill = avg_sentiment)) +
  geom_col(show.legend = FALSE, color = "black") +
  geom_text(aes(label = total),
            color = "white",
            size = 3,
            position = position_stack(vjust = 0.5)) +
  labs(title = "Sentimentr: All Tweets Average Sentiment by Hashtag",
       caption = "Note: Hashtags 02, 05, 12, 15, 20, 25 not shown.",
       x = "Hashtag",
       y = "Average Sentiment")

## Average Sentiment by Hashtag & Tweet Type

count_all_hashtag_type <- sentimentr_all %>% 
  group_by(hashtag_norm, tweet_type) %>%
  count(tweet_type) %>% 
  summarise(total = n)

avg_sentiment_all_hashtag_type <- sentimentr_all %>% 
  group_by(hashtag_norm, tweet_type) %>%
  summarise(avg_sentiment = average_mean(sentiment)) %>% 
  filter(avg_sentiment != 0) %>% 
  arrange(desc(avg_sentiment)) %>%
  ungroup() %>% 
  inner_join(count_all_hashtag_type)
## Joining, by = c("hashtag_norm", "tweet_type")
avg_sentiment_all_hashtag_type %>%
  filter(!str_detect(tweet_type, "Fishbowl|Participlan"),
         !str_detect(hashtag_norm, "05|12|15|20|25")) %>% 
  ggplot(aes(hashtag_norm, avg_sentiment, fill = avg_sentiment)) +
  geom_col(show.legend = FALSE, color = "black") +
  geom_text(aes(label = total),
            color = "white",
            size = 2,
            position = position_stack(vjust = 0.5)) +
  facet_wrap(~tweet_type, scales = "free_y", ncol = 1) +
  labs(title = "All Tweets: Comparing PST and Exit Tweet Average Sentiments",
       caption = "Note: Class 03 did not have PST. For Class 10 the PST average sentiment was zero.",
       x = "Hashtag",
       y = "Average Sentiment")

## Min/Max/Range by Tweet Type - calculated in Google Sheets from joined all_tweets.csv & sentimentr_scores.csv

## PST & Exit Average Sentiment Comparison - calculated in Google Sheets from joined all_tweets.csv & sentiment_scores

## PST & Exit Compared (Class 22 & 08)

token_sentiment_all <- sentimentr_all %>% 
  arrange(hashtag_norm) %>% 
  ungroup() %>% 
  unnest_tokens(word, tweet_text, token = "words") %>%
  filter(!word %in% stop_words$word,
         !word %in% str_remove_all(stop_words$word, "'")) %>% 
  select(linenumber, date, time, tweet_id, tweet_type, user_screen_name, hashtag_norm, word, sentiment) %>% 
  arrange(linenumber)

pst_08 <- token_sentiment_all %>% 
  filter(hashtag_norm == 8,
         tweet_type == "PST",
         !str_detect(word, "source|significant|todays")) %>% 
  group_by(word) %>% 
  count(word) %>% 
  summarise(pst_total = n) %>% 
  arrange(desc(pst_total))

exit_08 <- token_sentiment_all %>% 
  filter(hashtag_norm == 8,
         tweet_type == "Exit",
         !str_detect(word, "source|significant|todays")) %>% 
  group_by(word) %>% 
  count(word) %>% 
  summarise(exit_total = n) %>% 
  arrange(desc(exit_total))

pst_22 <- token_sentiment_all %>% 
  filter(hashtag_norm == 22,
         tweet_type == "PST",
         !str_detect(word, "source|significant|todays")) %>% 
  group_by(word) %>% 
  count(word) %>% 
  summarise(pst_total = n) %>% 
  arrange(desc(pst_total))

exit_22 <- token_sentiment_all %>% 
  filter(hashtag_norm == 22,
         tweet_type == "Exit",
         !str_detect(word, "source|significant|todays")) %>% 
  group_by(word) %>% 
  count(word) %>% 
  summarise(exit_total = n) %>% 
  arrange(desc(exit_total))

# Affect & Historical Empathy: Affect as Evidence of Care -----------------

## Create Women Subset

women_terms <- c("(?i)wom.", "(?i)^she$", "(?i)^her$", "(?i)wif.", "(?i)wiv.", "(?i)mothe.", "(?i)daughte.", "(?i)Hatshepsut", "(?i)Shamhat", "(?i)Ninsun", "(?i)harlo.", "(?i)prostitut.", "(?i)Lysistrata", "(?i)Cleonice", "(?i)Lampito", "(?i)Myrrhine", "(?i)Tomyris", "(?i)Sabin.", "(?i)Khadij.", "(?i)female.") # all terms related to women from the course are included in the women subset

women_match <- str_c(women_terms, collapse = "|") 

women_subset <- all_tweets %>% 
  filter(str_detect(tweet_text_org, women_match)) %>% #match women terms to content of tweet text
  mutate(linenumberW = row_number()) # add linenumber distinct to women subset

## Create Sentimentr Women Subset

sentimentr_women_clean <- women_subset %>% 
  arrange(hashtag_norm) %>% 
  ungroup() %>% 
  unnest_tokens(word, tweet_text_org, token = "tweets", strip_url = TRUE) %>%
  mutate(word = str_remove_all(word, remove_symbol)) %>% 
  filter(!str_detect(word, "^(?i)RT|@|#"), # remove retweets, user screen names, hashtags
         !str_detect(word, "hwc111"), # remove un-hashtaged "hwc111" (the class hashtag)
         !str_detect(word, "\\d"), # remove digits
         !str_detect(word, "\\s")) %>% # remove white space
  select(linenumber, linenumberW, date, time, tweet_id, tweet_type, user_screen_name, hashtag_norm, word) %>% 
  arrange(linenumber)

sentimentr_women_gather <- sentimentr_women_clean %>% 
  group_by(linenumber, linenumberW, tweet_type, date, time, tweet_id, user_screen_name, hashtag_norm) %>% 
  summarise(tweet_text = paste(word, collapse = " "))

sentimentr_women <- sentimentr_women_gather %>% 
  get_sentences() %>% 
  sentiment() # runs sentimentr analysis on women tweets

## Average Sentiment Women Subset

sentimentr_women %>% 
  summarise(avg_sent = average_mean(sentiment))
##    avg_sent
## 1 0.1027464
## All Tweets Word Cloud (Blue)

all_word_count <- all_tweets_token %>% 
  count(word) %>% 
  arrange(desc(n))

wordcloud_all_tweets <- all_tweets_token %>% 
  count(word) %>%
  filter(word != "significant") %>% # removed "significant" because it appeared in the prompt for the exit tweets
  top_n(100) %>% 
  arrange(desc(n))
## Selecting by n
wordcloud2(wordcloud_all_tweets, size = 0.7, color=rep_len(CBBlues, nrow(wordcloud_all_tweets)))
## Positive Sentimentr Women

sentiment_positive_women <- sentimentr_women %>% 
  filter(sentiment > 0) # filter for tweets with sentiment value >0

token_sentiment_positive_women <- sentiment_positive_women %>% 
  arrange(hashtag_norm) %>% 
  ungroup() %>% 
  unnest_tokens(word, tweet_text, token = "words") %>%
  filter(!word %in% stop_words$word,
         !word %in% str_remove_all(stop_words$word, "'")) %>% 
  select(linenumber, linenumberW, date, time, tweet_id, tweet_type, user_screen_name, hashtag_norm, word, sentiment) %>% 
  arrange(linenumberW) # tokenize words in tweet text of positive tweets

## Negative Sentimentr Women

sentiment_negative_women <- sentimentr_women %>% 
  filter(sentiment < 0) # filter for tweets with sentimentr value <0

token_sentiment_negative_women <- sentiment_negative_women %>% 
  arrange(hashtag_norm) %>% 
  ungroup() %>% 
  unnest_tokens(word, tweet_text, token = "words") %>%
  filter(!word %in% stop_words$word,
         !word %in% str_remove_all(stop_words$word, "'")) %>% 
  select(linenumber, linenumberW, date, time, tweet_id, tweet_type, user_screen_name, hashtag_norm, word, sentiment) %>% 
  arrange(linenumberW) # tokenize words in tweet text of negative tweets

## Word Frequency Dataset

base_url <- "https://programminghistorian.org/assets/basic-text-processing-in-r"
wf <- read_csv(sprintf("%s/%s", base_url, "word_frequency.csv"))
## Parsed with column specification:
## cols(
##   language = col_character(),
##   word = col_character(),
##   frequency = col_double()
## )
## Positive Sentiment Low Frequency

positive_sentiment_wf_women <- token_sentiment_positive_women %>% 
  group_by(word) %>% 
  count(word) %>% 
  summarise(total = n) %>% 
  arrange(desc(total)) %>% 
  inner_join(wf) %>% 
  select(word, total, frequency) %>% 
  arrange(desc(frequency))
## Joining, by = "word"
positive_low_frequency_women <- positive_sentiment_wf_women %>% 
  filter(frequency < 0.005, total >= 3) %>% 
  filter(!str_detect(word, "tho|rly|ppl|abt|tt|cuz|sth|cos")) %>% 
  arrange(desc(total))

positive_low_frequency_women %>%
  with(wordcloud(word, total, scale = c(2, 0.5), max.words = 200))

## Negative Sentiment Low Frequency

negative_sentiment_wf_women <- token_sentiment_negative_women %>% 
  group_by(word) %>% 
  count(word) %>% 
  summarise(total = n) %>% 
  arrange(desc(total)) %>% 
  inner_join(wf) %>% # combine negative sentiment tweets with word frequency dataset
  select(word, total, frequency) %>% 
  arrange(desc(frequency))
## Joining, by = "word"
negative_low_frequency_women <- negative_sentiment_wf_women %>% 
  filter(frequency < 0.005, total >= 3) %>% 
  filter(!str_detect(word, "tho|rly|ppl|abt")) %>% # removed abbreviated words from set
  arrange(desc(total))

negative_low_frequency_women %>% 
  with(wordcloud(word, total, scale = c(2, 0.5), max.words = 200))

## Find Unique Terms in Positive, Negative, and Positive/Negative Subsets

positive_subset <- positive_low_frequency_women %>% 
  mutate(sentiment = "positive") %>% # add sentiment column labeling words in this subset as "positive"
  select(word, total, sentiment)

negative_subset <- negative_low_frequency_women %>% 
  mutate(sentiment2 = "negative") %>% 
  mutate(total2 = total) %>% # add sentiment column labeling words in this subset as "negative"
  select(word, total2, sentiment2)

combined_pos_neg_subsets <- full_join(positive_subset, negative_subset) # combine negative and positive set
## Joining, by = "word"
## Positive Word Cloud

positive_only_women <- combined_pos_neg_subsets %>% 
  filter(sentiment == "positive",
         is.na(sentiment2)) %>% 
  with(wordcloud(word, total, scale = c(2.5, 0.5))) # word cloud containing words that appear *only* in tweets with positive sentimentr values

negative_only_women <- combined_pos_neg_subsets %>% 
  filter(sentiment2 == "negative",
         is.na(sentiment),
         word != "ye") %>% 
  select(word, total2, sentiment2) %>% 
  with(wordcloud(word, total2, scale = c(2.5, 0.5))) # word cloud containing words that appear *only* in tweets with negative sentimentr values

pos_neg_women <- combined_pos_neg_subsets %>% 
  filter(!is.na(sentiment),
         !is.na(sentiment2)) %>% 
  group_by(word) %>% 
  mutate(combined_total = sum(total, total2))

pos_neg_women %>% 
  with(wordcloud(word, combined_total, scale = c(2.5, 0.5))) # word cloud containing words that appear in tweets with positive *and* negative sentimentr values

# Affect & Historical Empathy: Care & Understanding -----------------------

## Calculations for "asserted" and "qualified" tweets completed in Google Sheets

# The Attention Economy & Historical Significance: Tastemakers & Communities of Participation -------------------------

## Create Exit Tweet Subset

exit_tweets <- all_tweets %>% 
  filter(tweet_type == "Exit")

## Find Exit Tweet Word Frequencies

exit_tweet_count <- all_tweets_token %>% 
  filter(tweet_type == "Exit",
         !str_detect(word, "significant|class|exit|todays")) %>% 
  group_by(word) %>% 
  count() %>% 
  summarise(total = n) %>% 
  arrange(desc(total))

## Combine with Word Frequency Dataset

base_url <- "https://programminghistorian.org/assets/basic-text-processing-in-r"
wf <- read_csv(sprintf("%s/%s", base_url, "word_frequency.csv"))
## Parsed with column specification:
## cols(
##   language = col_character(),
##   word = col_character(),
##   frequency = col_double()
## )
exit_tweet_relative <- exit_tweet_count %>% 
  inner_join(wf) %>% 
  filter(frequency <= 0.002,
         total >= 5) %>% 
  arrange(desc(total)) # find words with low frequency (English), high frequency (Exit Tweets)
## Joining, by = "word"
exit_tweet_relative %>% 
  wordcloud2(color = a11y25, shape = 'circle', ellipticity = 0.5, hoverFunction = NULL)
## Count Tweets Containing Low-Frequency Exit Tweet Words

low_frequency_exit <- exit_tweets %>% 
  filter(str_detect(tweet_text_org, "(?i)gods|(?i)hatshepsut|(?i)germanic|(?i)romans|(?i)herodotus|(?i)gilgamesh|(?i)ibn|(?i)mongols|(?i)confucius|(?i)christianity|(?i)tribes|(?i)learnt|(?i)persians|(?i)deities|(?i)beliefs|(?i)buddhism|(?i)dharma|(?i)suffering|(?i)germans|(?i)buddha|(?i)abraham|(?i)islam|(?i)quran|(?i)ren|(?i)greeks|(?i)epicurus|(?i)kinda|(?i)egyptians|(?i)religions|(?i)teachings|(?i)ppl|(?i)yahweh|(?i)enkidu|(?i)pliny|(?i)sacrifice|(?i)bias|(?i)caesar|(?i)epictetus|(?i)cuz|(?i)medieval|(?i)equality|(?i)punishment|(?i)biased|(?i)brutus|(?i)fate|(?i)happiness|(?i)lysistrata|(?i)significance|(?i)wives|(?i)desires|(?i)haha|(?i)hinduism|(?i)khan|(?i)chaos|(?i)intriguing|(?i)isaac|(?i)perspectives|(?i)portrayed|(?i)abt|(?i)imp|(?i)judaism|(?i)julius|(?i)reminds|(?i)slaves|(?i)christians|(?i)daoism|(?i)divine|(?i)ethical|(?i)karma|(?i)loyalty|(?i)persian|(?i)sig|(?i)takeaway|(?i)tribe|(?i)tweet|(?i)vulnerable|(?i)zoroastrianism|(?i)civilised|(?i)enlightenment|(?i)fascinating|(?i)rly|(?i)temples|(?i)transient|(?i)amun|(?i)arjuna|(?i)barbarians|(?i)barbaric|(?i)cultures|(?i)mali|(?i)mesopotamia|(?i)muhammad|(?i)puppet|(?i)ruling|(?i)tolerance|(?i)uruk|(?i)civilisations|(?i)clement|(?i)egyptian|(?i)jews|(?i)krishna|(?i)legalism|(?i)marco|(?i)mecca|(?i)polo|(?i)sanskrit|(?i)similarities|(?i)societies|(?i)tacitus|(?i)travels|(?i)weaknesses|(?i)abram|(?i)aristophanes|(?i)artifacts|(?i)caste|(?i)chose|(?i)civilizations|(?i)confused|(?i)cos|(?i)devotion|(?i)discipline|(?i)hv|(?i)influenced|(?i)interpretations|(?i)loyal|(?i)reflects|(?i)ruler|(?i)stood|(?i)strengths|(?i)surprising|(?i)tt|(?i)abrahams|(?i)civilized|(?i)constantly|(?i)creativity|(?i)crucial|(?i)cyrus|(?i)dao|(?i)enlightening|(?i)epic|(?i)equally|(?i)followers|(?i)greatness|(?i)honestly|(?i)humour|(?i)impressive|(?i)males|(?i)marry|(?i)monastic|(?i)norms|(?i)peoples|(?i)punishments|(?i)sexuality|(?i)surprised|(?i)teaches|(?i)tho|(?i)tht|(?i)treaty|(?i)weird|(?i)accepting|(?i)amazed|(?i)convey|(?i)depicted|(?i)emotions|(?i)esp|(?i)goodness|(?i)harsh|(?i)husbands|(?i)impermanent|(?i)innate|(?i)mistakes|(?i)morals|(?i)orthodox|(?i)perception|(?i)promotes|(?i)sacred|(?i)slave|(?i)slavery|(?i)statues|(?i)ted|(?i)tolerant|(?i)admirable|(?i)affects|(?i)afterlife|(?i)amos|(?i)attain|(?i)augustus|(?i)biases|(?i)civ|(?i)clements|(?i)confusing|(?i)conquer|(?i)deeds|(?i)dhammapada|(?i)dynasty|(?i)earthly|(?i)egalitarian|(?i)emphasized|(?i)engaging|(?i)females|(?i)fulfill|(?i)fulfilling|(?i)genghis|(?i)greatly|(?i)inequality|(?i)innately|(?i)maat|(?i)mansa|(?i)mithras|(?i)monks|(?i)muslim|(?i)optimistic|(?i)orphans|(?i)peaceful|(?i)physically|(?i)prominent|(?i)puppets|(?i)reminded|(?i)scary|(?i)sculptures|(?i)selfless|(?i)shaped|(?i)subjective|(?i)travelled|(?i)valued|(?i)weakness|(?i)admire|(?i)alot|(?i)athens|(?i)bali|(?i)blessed|(?i)blindly|(?i)brutal|(?i)citizenship|(?i)civilization|(?i)compassion|(?i)confucianism|(?i)conquered|(?i)courage|(?i)covenant|(?i)dedication|(?i)demigod|(?i)drunk|(?i)dukkha|(?i)essence|(?i)euphemisms|(?i)focuses|(?i)heir|(?i)hindu|(?i)humanity|(?i)humble|(?i)injustice|(?i)interpret|(?i)interpreted|(?i)justified|(?i)justify|(?i)literally|(?i)lying|(?i)meanings|(?i)mindset|(?i)mongol|(?i)musa|(?i)neutral|(?i)nuns|(?i)paranoid|(?i)perceptions|(?i)pericles|(?i)persia|(?i)piety|(?i)pleasures|(?i)portrayal|(?i)possess|(?i)possessed|(?i)prof|(?i)propaganda|(?i)qin|(?i)readings|(?i)realised|(?i)regarded|(?i)reign|(?i)respectful|(?i)rituals|(?i)spoke|(?i)stereotypes|(?i)sth|(?i)survive|(?i)tao|(?i)traditions|(?i)treating|(?i)wanna|(?i)widespread"))

low_frequency_exit %>% 
  tally() # 2430
## # A tibble: 1 x 1
##       n
##   <int>
## 1  2430
top_50_low_frequency <- exit_tweets %>% 
  filter(str_detect(tweet_text_org, "(?i)gods|(?i)hatshepsut|(?i)germanic|(?i)romans|(?i)herodotus|(?i)gilgamesh|(?i)ibn|(?i)mongols|(?i)confucius|(?i)christianity|(?i)tribes|(?i)learnt|(?i)persians|(?i)deities|(?i)beliefs|(?i)buddhism|(?i)dharma|(?i)suffering|(?i)germans|(?i)buddha|(?i)abraham|(?i)islam|(?i)quran|(?i)ren|(?i)greeks|(?i)epicurus|(?i)kinda|(?i)egyptians|(?i)religions|(?i)teachings|(?i)ppl|(?i)yahweh|(?i)enkidu|(?i)pliny|(?i)sacrifice|(?i)bias|(?i)caesar|(?i)epictetus|(?i)cuz|(?i)medieval|(?i)equality|(?i)punishment|(?i)biased|(?i)brutus|(?i)fate|(?i)happiness|(?i)lysistrata|(?i)significance|(?i)wives|(?i)desires"))

top_50_low_frequency %>% 
  tally() #1352
## # A tibble: 1 x 1
##       n
##   <int>
## 1  1342
## Class 04 Exit Tweets Analysis

count_04_exit <- exit_tweets %>% 
  filter(hashtag_norm == "4") %>% 
  tally() ## 177

count_dqs <- exit_tweets %>% 
  filter(hashtag_norm == "4",
         str_detect(tweet_text_org, "deities|god.|women|men|dream.|marker.|civilization")) %>% 
  tally() ## 87

count_dreams <- exit_tweets %>% 
  filter(hashtag_norm == "4",
         str_detect(tweet_text_org, "(?i)dream.")) %>% 
  tally() ## 48

## Peer & Prof Influence

peer_influence <- exit_tweets %>% 
  filter(str_detect(tweet_text_org, "(?i)classmate.|(?i)peer.|(?i)student.|(?i)someone.|(?i)fellow.|(?i)everyone.")) %>% 
  tally() ## 68

prof_influence <- exit_tweets %>% 
  filter(str_detect(tweet_text_org, "(?i)prof.|(?i)heather.|(?i)bennett.|(?i)helloworldciv.")) %>% 
  tally() ## 36

# GIFs from a History Class: Constraints -----------------------------------------------

## Count GIFs with/without media calculated in Google Sheets

## Total number & avg interactions by media type calculated in Google Sheets

## Create GIF Subset

gif_subset <- all_tweets %>% 
  filter(media_type == "GIF",
         !str_detect(tweet_text_org, "^RT"))

gif_subset_clean <- gif_subset %>% 
  arrange(hashtag_norm) %>% 
  ungroup() %>% 
  unnest_tokens(word, tweet_text_org, token = "tweets", strip_url = TRUE) %>%
  mutate(word = str_remove_all(word, remove_symbol)) %>% 
  filter(!str_detect(word, "^(?i)RT|@|#"), # remove retweets, user screen names, hashtags
         !str_detect(word, "hwc111"), # remove un-hashtaged "hwc111" (the class hashtag)
         !str_detect(word, "\\d"), # remove digits
         !str_detect(word, "\\s")) %>% # remove white space
  arrange(linenumber)

gif_subset_gather <- gif_subset_clean %>% 
  group_by(linenumber, linenumberW, tweet_type, date, time, tweet_id, user_screen_name, hashtag_norm) %>% 
  summarise(tweet_text = paste(word, collapse = " "))

## GIF Subset Sentimentr

gif_sentimentr <- gif_subset_gather %>% 
  get_sentences() %>% 
  sentiment()

## GIF Embodiment Coded + Sentimentr

gif_embod_sent <- read_csv("gif_embodiment_sentiment.csv", 
                           col_types = cols(tweet_id = col_character()))

count_gif_embod_sent <- gif_embod_sent %>% 
  group_by(embodiment) %>% 
  count() %>% 
  summarise(total = n) 

avg_gif_embod_sent <- gif_embod_sent %>% 
  group_by(embodiment) %>% 
  summarise(avg_sent = average_mean(sentiment)) %>% 
  inner_join(count_gif_embod_sent) %>% 
  arrange(desc(avg_sent))
## Joining, by = "embodiment"
gif_embod_sent %>% 
  summarise(avg_sent = average_mean(sentiment)) #0.0993
## # A tibble: 1 x 1
##   avg_sent
##      <dbl>
## 1   0.0993