CONTEXTE
Les données du #Tidytuesday de cette semaine proviennent de The Wallstreet Journal.
OBJECTIFS
- Trouver une manière de présenter les données
IMPORTER
tuesdata <- tidytuesdayR::tt_load('2020-02-25')
measles <- tuesdata$measles
EXPLORER
glimpse(measles)
## Observations: 66,113
## Variables: 16
## $ index <dbl> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 10, 11, 12, 13, 14, 15, …
## $ state <chr> "Arizona", "Arizona", "Arizona", "Arizona", "Arizona", …
## $ year <chr> "2018-19", "2018-19", "2018-19", "2018-19", "2018-19", …
## $ name <chr> "A J Mitchell Elementary", "Academy Del Sol", "Academy …
## $ type <chr> "Public", "Charter", "Charter", "Charter", "Charter", "…
## $ city <chr> "Nogales", "Tucson", "Tucson", "Phoenix", "Phoenix", "P…
## $ county <chr> "Santa Cruz", "Pima", "Pima", "Maricopa", "Maricopa", "…
## $ district <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
## $ enroll <dbl> 51, 22, 85, 60, 43, 36, 24, 22, 26, 78, 78, 35, 54, 54,…
## $ mmr <dbl> 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, …
## $ overall <dbl> -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,…
## $ xrel <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
## $ xmed <dbl> NA, NA, NA, NA, 2.33, NA, NA, NA, NA, NA, NA, 2.86, NA,…
## $ xper <dbl> NA, NA, NA, NA, 2.33, NA, 4.17, NA, NA, NA, NA, NA, NA,…
## $ lat <dbl> 31.34782, 32.22192, 32.13049, 33.48545, 33.49562, 33.43…
## $ lng <dbl> -110.9380, -110.8961, -111.1170, -112.1306, -112.2247, …
summary(measles)
## index state year name ## Min. : 1 Length:66113 Length:66113 Length:66113
## 1st Qu.: 429 Class :character Class :character Class :character
## Median : 997 Mode :character Mode :character Mode :character
## Mean :1608
## 3rd Qu.:2133
## Max. :8066
##
## type city county district
## Length:66113 Length:66113 Length:66113 Mode:logical
## Class :character Class :character Class :character NA's:66113
## Mode :character Mode :character Mode :character
##
##
##
##
## enroll mmr overall xrel
## Min. : 0.0 Min. : -1.00 Min. : -1.00 Mode:logical
## 1st Qu.: 46.0 1st Qu.: -1.00 1st Qu.: -1.00 TRUE:109
## Median : 80.0 Median : 95.00 Median : 87.00 NA's:66004
## Mean : 131.9 Mean : 63.17 Mean : 54.09
## 3rd Qu.: 129.0 3rd Qu.: 98.00 3rd Qu.: 96.10
## Max. :6222.0 Max. :100.00 Max. :100.00
## NA's :16260
## xmed xper lat lng
## Min. : 0.04 Min. : 0.17 Min. :24.55 Min. :-124.50
## 1st Qu.: 1.00 1st Qu.: 2.84 1st Qu.:35.69 1st Qu.:-117.63
## Median : 2.00 Median : 5.00 Median :40.21 Median : -89.97
## Mean : 2.91 Mean : 6.78 Mean :39.15 Mean : -96.28
## 3rd Qu.: 3.53 3rd Qu.: 7.55 3rd Qu.:42.18 3rd Qu.: -81.75
## Max. :100.00 Max. :169.23 Max. :49.00 Max. : 80.21
## NA's :45122 NA's :57560 NA's :1549 NA's :1549
measles %>%
count(year) %>%
knitr::kable()
year | n |
---|---|
2017 | 1939 |
2017-18 | 10418 |
2018-19 | 48075 |
null | 5681 |
measles %>%
count(type) %>%
knitr::kable()
type | n |
---|---|
BOCES | 47 |
Charter | 276 |
Kindergarten | 1488 |
Nonpublic | 173 |
Private | 6815 |
Public | 20692 |
NA | 36622 |
measles %>%
filter(mmr>=0) %>%
group_by(year, type) %>%
count(mmr) %>%
select(-mmr) %>%
group_by(year,type) %>%
summarise(n=sum(n)) %>%
spread(type, n) %>%
knitr::kable()
year | BOCES | Charter | Kindergarten | Nonpublic | Private | Public | |
---|---|---|---|---|---|---|---|
2017 | NA | NA | NA | NA | NA | NA | 1939 |
2017-18 | 47 | NA | 1486 | 18 | 2232 | 3075 | 2848 |
2018-19 | NA | 217 | NA | NA | 2365 | 16687 | 11265 |
null | NA | NA | NA | NA | NA | NA | 1978 |
measles %>%
filter(mmr>=0) %>%
group_by(state, year) %>%
count(mmr) %>%
select(-mmr) %>%
group_by(year,state) %>%
summarise(n=sum(n)) %>%
spread(year, n) %>%
knitr::kable()
state | 2017 | 2017-18 | 2018-19 | null |
---|---|---|---|---|
Arizona | NA | NA | 1171 | NA |
Arkansas | NA | NA | 567 | NA |
California | NA | NA | 14225 | NA |
Colorado | NA | 1507 | NA | NA |
Connecticut | NA | 589 | NA | NA |
Illinois | NA | NA | 7686 | NA |
Maine | NA | NA | 357 | NA |
Massachusetts | NA | NA | 954 | NA |
Minnesota | NA | 1593 | NA | NA |
Missouri | NA | NA | 700 | NA |
Montana | NA | 522 | NA | NA |
New York | NA | 4159 | NA | NA |
North Dakota | NA | 360 | NA | NA |
Ohio | NA | NA | 2919 | NA |
Oregon | NA | NA | 806 | NA |
Pennsylvania | 1939 | NA | NA | NA |
South Dakota | NA | 373 | NA | NA |
Texas | NA | NA | 811 | NA |
Utah | NA | 603 | NA | NA |
Vermont | NA | NA | 338 | NA |
Washington | NA | NA | NA | 1978 |
plt1 <-measles %>%
filter(mmr>=0) %>%
ggplot(aes(x=" ", y = mmr)) +
geom_boxplot(fill = "#FFFFFF", color = "black") +
coord_flip() +
theme_classic() +
xlab("") +
ylab("taux de vaccination rougeole")+
theme(axis.text.y=element_blank(),
axis.ticks.y=element_blank())
plt2 <-measles %>%
filter(mmr>=0) %>%
ggplot() +
geom_histogram(aes(x = mmr, y = (..count..)/sum(..count..)),
position = "identity",
binwidth = 1,
fill = "#FFFFFF",
color = "black") +
ylab("Fréquence Relative")+
xlab("")+
theme_classic()+
theme(axis.text.x = element_blank())+
theme(axis.ticks.x = element_blank())
plt2 + plt1 + plot_layout(nrow = 2, heights = c(2, 1))
plt1 <-measles %>%
filter(overall>0) %>%
ggplot(aes(x=" ", y = overall)) +
geom_boxplot(fill = "#FFFFFF", color = "black") +
coord_flip() +
theme_classic() +
xlab("") +
ylab("taux de vaccination rougeole")+
theme(axis.text.y=element_blank(),
axis.ticks.y=element_blank())
plt2 <-measles %>%
filter(overall>0) %>%
ggplot() +
geom_histogram(aes(x = overall, y = (..count..)/sum(..count..)),
position = "identity", binwidth = 1, fill = "#FFFFFF", color = "black") +
ylab("Fréquence Relative")+
xlab("")+
theme_classic()+
theme(axis.text.x = element_blank())+
theme(axis.ticks.x = element_blank())
plt2 + plt1 + plot_layout(nrow = 2, heights = c(2, 1))
PRÉPARER
data<-measles %>%
unique() %>%
filter(mmr>=0, overall>=0) %>%
select(mmr, overall)
VISUALISER
#Graphique
gg <- ggplot(data=data, aes(x=overall, y=mmr))
gg<-gg + geom_point(size=1.5, alpha = 1)
Comment est-ce que le taux de vaccination pour la Rougeole peut être au dessus du taux de vaccination globale pour ces écoles des États-Unis???
Seulement si te taux de vaccination globale implique que se sont les étudiants qui ont reçu tous les vaccins requis par leur calendrier de vaccination.
Alors, tu veux en savoir plus sur ma démarche? Abonne-toi à mon infolettre pour savoir quand est-ce que le prochain épisode de podcast sera disponible. J’y expliquerai toute la réflexion et les concepts de data visualisation qui ont menés à la création de cette viz.