TyT2019W39 - Not a Real Graph

By Johanie Fournier, agr. in rstats tidyverse tidytuesday

September 25, 2019

Get the data

school_diversity <- readr::read_csv("https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2019/2019-09-24/school_diversity.csv")
## Rows: 27944 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (7): LEAID, LEA_NAME, ST, d_Locale_Txt, SCHOOL_YEAR, diverse, int_group
## dbl (8): AIAN, Asian, Black, Hispanic, White, Multi, Total, variance
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

Explore the data

summary(school_diversity)
##     LEAID             LEA_NAME              ST            d_Locale_Txt      
##  Length:27944       Length:27944       Length:27944       Length:27944      
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##                                                                             
##  SCHOOL_YEAR             AIAN              Asian              Black         
##  Length:27944       Min.   :  0.0000   Min.   : 0.00000   Min.   :  0.0000  
##  Class :character   1st Qu.:  0.0000   1st Qu.: 0.05818   1st Qu.:  0.2023  
##  Mode  :character   Median :  0.1566   Median : 0.53124   Median :  0.9231  
##                     Mean   :  2.7879   Mean   : 1.76956   Mean   :  6.5515  
##                     3rd Qu.:  0.6144   3rd Qu.: 1.44514   3rd Qu.:  3.9649  
##                     Max.   :100.0000   Max.   :74.90586   Max.   :100.0000  
##                                                                             
##     Hispanic            White            Multi            Total        
##  Min.   :  0.0000   Min.   :  0.00   Min.   : 0.000   Min.   :      1  
##  1st Qu.:  0.5505   1st Qu.: 65.24   1st Qu.: 1.070   1st Qu.:    366  
##  Median :  2.7094   Median : 88.60   Median : 2.378   Median :   1044  
##  Mean   : 10.4520   Mean   : 76.93   Mean   : 3.202   Mean   :   3228  
##  3rd Qu.: 10.1256   3rd Qu.: 96.45   3rd Qu.: 4.174   3rd Qu.:   2720  
##  Max.   :100.0000   Max.   :100.00   Max.   :85.308   Max.   :1020747  
##                                      NA's   :14760                     
##    diverse             variance      int_group        
##  Length:27944       Min.   :0.000   Length:27944      
##  Class :character   1st Qu.:0.017   Class :character  
##  Mode  :character   Median :0.046   Mode  :character  
##                     Mean   :0.078                     
##                     3rd Qu.:0.107                     
##                     Max.   :0.601                     
##                     NA's   :24923

Prepare the data

school <- school_diversity %>% 
  mutate(Abbreviation=ST) %>% 
  left_join(code, by="Abbreviation") %>% 
  mutate(code=case_when(diverse == 'Diverse' ~ 1,
                         diverse == 'Undiverse'  ~ 2, 
                         diverse == 'Extremely undiverse'  ~ 3)) %>%
  mutate(annee=case_when(SCHOOL_YEAR == '1994-1995' ~ 1,
                         SCHOOL_YEAR == '2016-2017'  ~ 2)) %>%
  select("State", "annee", "code") %>%
  group_by(State, annee, code) %>% 
  dplyr::summarise(freq=n()) %>% 
  ungroup() %>% 
  group_by(State, annee) %>% 
  top_n(1,freq) %>% 
  ungroup()

 rect_1 = data.frame(xmin = c(0.95,1.95),
                     xmax = c(1.05,2.05),
                     ymin = c(0.75,0.75),
                     ymax = c(1.25,1.25))
 
 rect_2 = data.frame(xmin = c(0.95,1.95),
                     xmax = c(1.05,2.05),
                     ymin = c(1.75,1.75),
                     ymax = c(2.25,2.25))

 rect_3 = data.frame(xmin = c(0.95,1.95),
                     xmax = c(1.05,2.05),
                     ymin = c(2.75,2.75),
                     ymax = c(3.25,3.25))
 
 school_1995<-school %>% 
   filter(annee==1) %>% 
   group_by(code) %>% 
   dplyr::summarise(freq=n()) 
 
  school_2017<-school %>% 
   filter(annee==2) %>% 
   group_by(code) %>% 
   dplyr::summarise(freq=n()) 

Visualize the data

#Graphique
gg<-ggplot(school, aes(x=annee, y=code, group=State, color=State))
gg<-gg + geom_line(position=position_jitter(w=0, h=0.1),size=1,color="#2E2E2E", alpha=0.5)
gg<-gg + geom_rect(data=rect_1, aes(xmin = xmin, xmax = xmax, ymin = ymin, ymax = ymax), color="#B494CC",fill = "#B494CC", inherit.aes=FALSE)
gg<-gg + geom_rect(data=rect_2, aes(xmin = xmin, xmax = xmax, ymin = ymin, ymax = ymax), color="#99C9DE",fill = "#99C9DE", inherit.aes=FALSE)
gg<-gg + geom_rect(data=rect_3, aes(xmin = xmin, xmax = xmax, ymin = ymin, ymax = ymax), color="#D1EAE5",fill = "#D1EAE5", inherit.aes=FALSE)
#ajuster les axes
gg<-gg + scale_x_continuous(breaks=seq(1,2,1), limits=c(0.8, 3), expand = c(0, 0), labels = c("1994-1995","2016-2017"))
#annoter
gg<-gg + annotate(geom="text", x=1,y=1, label="Diverse", color="#2E2E2E", size=4, hjust=0.5,vjust=0.5, fontface="bold", angle=90)
gg<-gg + annotate(geom="text", x=2,y=1, label="Diverse", color="#2E2E2E", size=4, hjust=0.5,vjust=0.5, fontface="bold", angle=270)
gg<-gg + annotate(geom="text", x=1,y=2, label="Undiverse", color="#2E2E2E", size=4, hjust=0.5,vjust=0.5, fontface="bold", angle=90)
gg<-gg + annotate(geom="text", x=2,y=2, label="Undiverse", color="#2E2E2E", size=4, hjust=0.5,vjust=0.5, fontface="bold", angle=270)
gg<-gg + annotate(geom="text", x=1,y=3, label="Extremely\nundiverse", color="#2E2E2E", size=4, hjust=0.5,vjust=0.5, fontface="bold", angle=90)
gg<-gg + annotate(geom="text", x=2,y=3, label="Extremely\nundiverse", color="#2E2E2E", size=4, hjust=0.5,vjust=0.5, fontface="bold", angle=270)
#modifier le thème
gg <- gg +  theme(panel.border = element_blank(),
                    panel.background = element_blank(),
                    plot.background = element_blank(),
                    panel.grid.major.x= element_blank(),
                    panel.grid.major.y= element_blank(),
                    panel.grid.minor = element_blank(),
                    axis.line.x = element_blank(),
                    axis.line.y = element_blank(),
                    axis.ticks = element_blank())

#ajouter les titres
gg<-gg + labs(title="Diversity's Evolution in US Schools\n",
              subtitle = " ",
              x=" ", 
              y=" ", 
              caption="\nSOURCE:  Washington Post  |  DESIGN: Johanie Fournier, agr.")
gg<-gg + theme(  plot.title    = element_text(size=37, hjust=0,vjust=0.5, family="Tw Cen MT", color="#2E2E2E"),
                 plot.subtitle = element_blank(),
                 plot.caption  = element_text(size=8, hjust=1,vjust=0.5, family="Tw Cen MT", color="#8B8B8B"),
                 axis.title.y  = element_blank(),
                 axis.title.x  = element_blank(),
                 axis.text.x   = element_text(size=12, hjust=0.5,vjust=0.5, family="Tw Cen MT", color="#2E2E2E"), 
                 axis.text.y   = element_blank())
#étiquette
gg <- gg +  geom_text(aes(x = 2.1, y = 3),label = "Represents schools where more than 90% of students are\nof a different race. In 1995, 62% of the American states\nhad schools that were extremely undiverse. This percentage\nfall to 42% in 2017.", size = 4.5, family = "Tw Cen MT",  color="#8B8B8B", hjust=0)
gg <- gg +  geom_text(aes(x = 2.1, y = 2),label = "Represents schools of which between 75% and 90% of\nstudents are of a different race. In 1995, 4% of the\nAmerican states had schools that were udiverse. This\npercentage rose to 16% in 2017.", size = 4.5, family = "Tw Cen MT",  color="#8B8B8B", hjust=0)
gg <- gg +  geom_text(aes(x = 2.1, y = 1),label = "Represents schools where less than 75% of students are\nof a different race. In 1995, 34% of the American states\nhad schools that were diverse. This percentage rose to\n44% in 2017.", size = 4.5, family = "Tw Cen MT",  color="#8B8B8B", hjust=0)
Posted on:
September 25, 2019
Length:
4 minute read, 802 words
Categories:
rstats tidyverse tidytuesday
Tags:
rstats tidyverse tidytuesday
See Also:
Predicting MO with H2O Models from IRDA data
IRDA soil data
This is the begining of a cheat sheet!