Il n’y a pas de conclusion…

CONTEXTE

Les données du #Tidytuesday de cette semaine proviennent de The Wallstreet Journal.



OBJECTIFS

  1. Trouver une manière de présenter les données



IMPORTER

tuesdata <- tidytuesdayR::tt_load('2020-02-25')
measles <- tuesdata$measles



EXPLORER

glimpse(measles)
## Observations: 66,113  
## Variables: 16  
## $ index    <dbl> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 10, 11, 12, 13, 14, 15, …  
## $ state    <chr> "Arizona", "Arizona", "Arizona", "Arizona", "Arizona", …  
## $ year     <chr> "2018-19", "2018-19", "2018-19", "2018-19", "2018-19", …  
## $ name     <chr> "A J Mitchell Elementary", "Academy Del Sol", "Academy …  
## $ type     <chr> "Public", "Charter", "Charter", "Charter", "Charter", "…  
## $ city     <chr> "Nogales", "Tucson", "Tucson", "Phoenix", "Phoenix", "P…  
## $ county   <chr> "Santa Cruz", "Pima", "Pima", "Maricopa", "Maricopa", "…  
## $ district <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…  
## $ enroll   <dbl> 51, 22, 85, 60, 43, 36, 24, 22, 26, 78, 78, 35, 54, 54,…  
## $ mmr      <dbl> 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, …  
## $ overall  <dbl> -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,…  
## $ xrel     <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…  
## $ xmed     <dbl> NA, NA, NA, NA, 2.33, NA, NA, NA, NA, NA, NA, 2.86, NA,…  
## $ xper     <dbl> NA, NA, NA, NA, 2.33, NA, 4.17, NA, NA, NA, NA, NA, NA,…  
## $ lat      <dbl> 31.34782, 32.22192, 32.13049, 33.48545, 33.49562, 33.43…  
## $ lng      <dbl> -110.9380, -110.8961, -111.1170, -112.1306, -112.2247, …
summary(measles)
##      index         state               year               name            ##  Min.   :   1   Length:66113       Length:66113       Length:66113        
##  1st Qu.: 429   Class :character   Class :character   Class :character    
##  Median : 997   Mode  :character   Mode  :character   Mode  :character    
##  Mean   :1608                                                             
##  3rd Qu.:2133                                                             
##  Max.   :8066                                                             
##                                                                           
##      type               city              county          district        
##  Length:66113       Length:66113       Length:66113       Mode:logical    
##  Class :character   Class :character   Class :character   NA's:66113      
##  Mode  :character   Mode  :character   Mode  :character                   
##                                                                           
##                                                                           
##                                                                           
##                                                                           
##      enroll            mmr            overall         xrel          
##  Min.   :   0.0   Min.   : -1.00   Min.   : -1.00   Mode:logical    
##  1st Qu.:  46.0   1st Qu.: -1.00   1st Qu.: -1.00   TRUE:109        
##  Median :  80.0   Median : 95.00   Median : 87.00   NA's:66004      
##  Mean   : 131.9   Mean   : 63.17   Mean   : 54.09                   
##  3rd Qu.: 129.0   3rd Qu.: 98.00   3rd Qu.: 96.10                   
##  Max.   :6222.0   Max.   :100.00   Max.   :100.00                   
##  NA's   :16260                                                      
##       xmed             xper             lat             lng           
##  Min.   :  0.04   Min.   :  0.17   Min.   :24.55   Min.   :-124.50    
##  1st Qu.:  1.00   1st Qu.:  2.84   1st Qu.:35.69   1st Qu.:-117.63    
##  Median :  2.00   Median :  5.00   Median :40.21   Median : -89.97    
##  Mean   :  2.91   Mean   :  6.78   Mean   :39.15   Mean   : -96.28    
##  3rd Qu.:  3.53   3rd Qu.:  7.55   3rd Qu.:42.18   3rd Qu.: -81.75    
##  Max.   :100.00   Max.   :169.23   Max.   :49.00   Max.   :  80.21    
##  NA's   :45122    NA's   :57560    NA's   :1549    NA's   :1549
measles %>%
     count(year) %>%
     knitr::kable()
year n
2017 1939
2017-18 10418
2018-19 48075
null 5681
measles %>%
     count(type) %>%
     knitr::kable()
type n
BOCES 47
Charter 276
Kindergarten 1488
Nonpublic 173
Private 6815
Public 20692
NA 36622
measles %>%
     filter(mmr>=0) %>%
     group_by(year, type) %>%
     count(mmr) %>%
     select(-mmr) %>%
     group_by(year,type) %>%
     summarise(n=sum(n)) %>%
     spread(type, n) %>%
     knitr::kable()
year BOCES Charter Kindergarten Nonpublic Private Public
2017 NA NA NA NA NA NA 1939
2017-18 47 NA 1486 18 2232 3075 2848
2018-19 NA 217 NA NA 2365 16687 11265
null NA NA NA NA NA NA 1978
measles %>%
     filter(mmr>=0) %>%
     group_by(state, year) %>%
     count(mmr) %>%
    select(-mmr) %>%
     group_by(year,state) %>%
     summarise(n=sum(n)) %>%
     spread(year, n) %>%
     knitr::kable()
state 2017 2017-18 2018-19 null
Arizona NA NA 1171 NA
Arkansas NA NA 567 NA
California NA NA 14225 NA
Colorado NA 1507 NA NA
Connecticut NA 589 NA NA
Illinois NA NA 7686 NA
Maine NA NA 357 NA
Massachusetts NA NA 954 NA
Minnesota NA 1593 NA NA
Missouri NA NA 700 NA
Montana NA 522 NA NA
New York NA 4159 NA NA
North Dakota NA 360 NA NA
Ohio NA NA 2919 NA
Oregon NA NA 806 NA
Pennsylvania 1939 NA NA NA
South Dakota NA 373 NA NA
Texas NA NA 811 NA
Utah NA 603 NA NA
Vermont NA NA 338 NA
Washington NA NA NA 1978
plt1 <-measles %>%
     filter(mmr>=0) %>%
     ggplot(aes(x=" ", y = mmr)) +
     geom_boxplot(fill = "#FFFFFF", color = "black") + 
     coord_flip() +
     theme_classic() +
     xlab("") +
     ylab("taux de vaccination rougeole")+
    theme(axis.text.y=element_blank(),
          axis.ticks.y=element_blank())    

plt2 <-measles %>%
    filter(mmr>=0) %>%
    ggplot() +
    geom_histogram(aes(x = mmr, y = (..count..)/sum(..count..)),
                       position = "identity",
                       binwidth = 1,
                       fill = "#FFFFFF",
                       color = "black") +
     ylab("Fréquence Relative")+
     xlab("")+
    theme_classic()+
    theme(axis.text.x = element_blank())+
    theme(axis.ticks.x = element_blank())  

plt2 + plt1 + plot_layout(nrow = 2, heights = c(2, 1))

plt1 <-measles %>%
     filter(overall>0)  %>%
     ggplot(aes(x=" ", y = overall)) +
     geom_boxplot(fill = "#FFFFFF", color = "black") +
     coord_flip() +
    theme_classic() +
    xlab("") +
    ylab("taux de vaccination rougeole")+
    theme(axis.text.y=element_blank(),
          axis.ticks.y=element_blank())    

plt2 <-measles %>%
    filter(overall>0) %>%
     ggplot() +
     geom_histogram(aes(x = overall, y = (..count..)/sum(..count..)),
                         position = "identity", binwidth = 1,                          fill = "#FFFFFF", color = "black") +
     ylab("Fréquence Relative")+
     xlab("")+
    theme_classic()+
    theme(axis.text.x = element_blank())+
    theme(axis.ticks.x = element_blank())  

plt2 + plt1 + plot_layout(nrow = 2, heights = c(2, 1))

 

PRÉPARER

data<-measles %>%
    unique() %>% 
    filter(mmr>=0, overall>=0) %>%
    select(mmr, overall) 



VISUALISER

#Graphique  
gg <- ggplot(data=data, aes(x=overall, y=mmr))  
gg<-gg + geom_point(size=1.5, alpha = 1)





Comment est-ce que le taux de vaccination pour la Rougeole peut être au dessus du taux de vaccination globale pour ces écoles des États-Unis???

Seulement si te taux de vaccination globale implique que se sont les étudiants qui ont reçu tous les vaccins requis par leur calendrier de vaccination.



Alors, tu veux en savoir plus sur ma démarche? Abonne-toi à mon infolettre pour savoir quand est-ce que le prochain épisode de podcast sera disponible. J’y expliquerai toute la réflexion et les concepts de data visualisation qui ont menés à la création de cette viz.

Laisser un commentaire

Ce site utilise Akismet pour réduire les indésirables. En savoir plus sur comment les données de vos commentaires sont utilisées.