TyT2019W29 - R4DS
By Johanie Fournier, agr. in rstats tidyverse tidytuesday
July 24, 2019
Get the data
r4ds_members <- readr::read_csv("https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2019/2019-07-16/r4ds_members.csv")
## Rows: 678 Columns: 21
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (20): total_membership, full_members, guests, daily_active_members, dai...
## date (1): date
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Explore the data
summary(r4ds_members)
## date total_membership full_members guests
## Min. :2017-08-27 Min. : 1.0 Min. : 1.0 Min. :0
## 1st Qu.:2018-02-12 1st Qu.: 978.2 1st Qu.: 978.2 1st Qu.:0
## Median :2018-07-31 Median :1605.0 Median :1605.0 Median :0
## Mean :2018-07-31 Mean :1567.8 Mean :1567.8 Mean :0
## 3rd Qu.:2019-01-16 3rd Qu.:2142.8 3rd Qu.:2142.8 3rd Qu.:0
## Max. :2019-07-05 Max. :3029.0 Max. :3029.0 Max. :0
## daily_active_members daily_members_posting_messages weekly_active_members
## Min. : 1.00 Min. : 0.00 Min. : 1.0
## 1st Qu.: 63.00 1st Qu.: 6.00 1st Qu.:206.0
## Median : 88.00 Median : 11.00 Median :239.0
## Mean : 91.39 Mean : 13.24 Mean :249.7
## 3rd Qu.:110.00 3rd Qu.: 16.00 3rd Qu.:307.8
## Max. :258.00 Max. :111.00 Max. :525.0
## weekly_members_posting_messages messages_in_public_channels
## Min. : 1.00 Min. : 0.00
## 1st Qu.: 35.00 1st Qu.: 9.25
## Median : 48.00 Median : 19.00
## Mean : 52.16 Mean : 28.46
## 3rd Qu.: 59.00 3rd Qu.: 35.00
## Max. :278.00 Max. :326.00
## messages_in_private_channels messages_in_shared_channels messages_in_d_ms
## Min. : 0.000 Min. :0 Min. : 0.00
## 1st Qu.: 0.000 1st Qu.:0 1st Qu.: 1.00
## Median : 0.000 Median :0 Median : 4.00
## Mean : 1.718 Mean :0 Mean : 13.05
## 3rd Qu.: 0.000 3rd Qu.:0 3rd Qu.: 12.00
## Max. :75.000 Max. :0 Max. :227.00
## percent_of_messages_public_channels percent_of_messages_private_channels
## Min. :0.0000 Min. :0.0000
## 1st Qu.:0.5840 1st Qu.:0.0000
## Median :0.8000 Median :0.0000
## Mean :0.7248 Mean :0.0305
## 3rd Qu.:0.9444 3rd Qu.:0.0000
## Max. :1.0000 Max. :1.0000
## percent_of_messages_d_ms percent_of_views_public_channels
## Min. :0.0000 Min. :0.2726
## 1st Qu.:0.0345 1st Qu.:0.9115
## Median :0.1595 Median :0.9519
## Mean :0.2270 Mean :0.9285
## 3rd Qu.:0.3478 3rd Qu.:0.9744
## Max. :1.0000 Max. :1.0000
## percent_of_views_private_channels percent_of_views_d_ms name
## Min. :0.000000 Min. :0.00000 Min. :0
## 1st Qu.:0.000000 1st Qu.:0.02235 1st Qu.:0
## Median :0.000000 Median :0.04170 Median :0
## Mean :0.009773 Mean :0.06176 Mean :0
## 3rd Qu.:0.006450 3rd Qu.:0.07433 3rd Qu.:0
## Max. :0.267400 Max. :0.72170 Max. :0
## public_channels_single_workspace messages_posted
## Min. :10.0 Min. : 35
## 1st Qu.:15.0 1st Qu.:20543
## Median :19.0 Median :33828
## Mean :17.8 Mean :32936
## 3rd Qu.:21.0 3rd Qu.:40104
## Max. :27.0 Max. :59627
Prepare the data
r4ds<-r4ds_members %>%
select('date','total_membership','messages_posted') %>%
mutate(quarter=quarter(date, with_year = TRUE)) %>%
group_by(quarter) %>%
summarise(tot_m_1000=sum(total_membership/1000)) %>%
filter(!quarter %in% 2019.3)
r4ds_point<-r4ds %>%
filter(quarter %in% c(2017.3, 2019.2))
r4ds_active<-r4ds_members %>%
select('date','total_membership','daily_active_members') %>%
mutate(quarter=quarter(date, with_year = TRUE)) %>%
group_by(quarter) %>%
summarise(active_1000=sum(daily_active_members/1000)) %>%
filter(!quarter %in% 2019.3)
r4ds_point_active<-r4ds_active %>%
filter(quarter %in% c(2017.3, 2019.2))
r4ds<-r4ds_members %>%
mutate(daily_message=messages_posted-shift(messages_posted)) %>%
filter(daily_message>0 & daily_message<5000) %>%
mutate(activity=(daily_message/daily_active_members)) %>%
mutate(quarter=quarter(date, with_year = TRUE)) %>%
group_by(quarter) %>%
summarise(active=sum(activity)) %>%
filter(!quarter %in% 2019.3)
r4ds_point_activity<-r4ds %>%
filter(quarter %in% c(2017.3, 2019.2))
Visualize the data
#Graphique
gg1<-ggplot(data=r4ds, aes(x = quarter, y=tot_m_1000))
gg1<-gg1 + geom_step(linetype=5, color="#A9A9A9", size=2.5)
gg1<-gg1 + geom_step(data=r4ds_active, aes(x = quarter, y=active_1000),linetype=5, color="#A9A9A9", size=2.5)
gg1<-gg1 + geom_rect(data=r4ds,
mapping=aes(xmin=2018.1,xmax=2018.4,ymin=0,ymax=Inf),
fill='#01A7C2',alpha=0.05)
gg1<-gg1 + geom_point(data=r4ds_point,
mapping=(aes(x=quarter,y=tot_m_1000)),
color="#A9A9A9", size=5)
gg1<-gg1 + geom_point(data=r4ds_point_active,
mapping=(aes(x=quarter,y=active_1000)),
color="#A9A9A9", size=5)
#ajuster les axes
gg1<-gg1 + scale_x_yearqtr(breaks = seq(from = min(r4ds$quarter), to = max(r4ds$quarter), by = 0.25),
format = "%Y-%q")
gg1<-gg1 + scale_y_continuous(breaks=seq(0,300,50), limits = c(0, 300))
#modifier la légende
gg1<-gg1 + theme(legend.position="none")
#modifier le thème
gg1<-gg1 +theme(panel.border = element_blank(),
panel.background = element_blank(),
plot.background = element_blank(),
panel.grid.major.y= element_blank(),
panel.grid.major.x= element_blank(),
panel.grid.minor = element_blank(),
axis.line.x = element_line(color="#A9A9A9"),
axis.line.y = element_line(color="#A9A9A9"),
axis.ticks= element_blank())
#ajouter les titres
gg1<-gg1 + labs(title="",
subtitle=" ",
y="Members (x1000)",
x=" ")
gg1<-gg1 + theme(plot.title = element_text(hjust=0,size=15, color="#A9A9A9", face="bold"),
plot.subtitle = element_text(hjust=0,size=12, color="#A9A9A9"),
axis.title.y = element_text(hjust=1,size=12, color="#A9A9A9", angle=90),
axis.title.x = element_blank(),
axis.text.y = element_text(hjust=0.5, size=10, color="#A9A9A9"),
axis.text.x = element_text(hjust=0.5, size=10, color="#A9A9A9"))
#ajouter les étiquettes
gg1<-gg1 + annotate(geom="text", x=2019.2,y=270, label="Total", color="#A9A9A9", size=5, hjust=1,vjust=0, fontface="bold")
gg1<-gg1 + annotate(geom="text", x=2019.2,y=18, label="Active", color="#A9A9A9", size=5, hjust=1
,vjust=0, fontface="bold")
gg2<-ggplot(data=r4ds, aes(x = quarter, y=active))
gg2<-gg2 + geom_step(linetype=5, color="#A9A9A9", size=2.5)
gg2<-gg2 + geom_rect(data=r4ds,
mapping=aes(xmin=2018.1,xmax=2018.4,ymin=0,ymax=Inf),
fill='#01A7C2',alpha=0.05)
gg2<-gg2 + geom_point(data=r4ds_point_activity,
mapping=(aes(x=quarter,y=active)),
color="#A9A9A9", size=5)
#ajuster les axes
gg2<-gg2 + scale_x_yearqtr(breaks = seq(from = min(r4ds$quarter), to = max(r4ds$quarter), by = 0.25),
format = "%Y-%q")
gg2<-gg2 + scale_y_continuous(breaks=seq(0,100,25), limits = c(0, 100))
#modifier la légende
gg2<-gg2 + theme(legend.position="none")
#modifier le thème
gg2<-gg2 +theme(panel.border = element_blank(),
panel.background = element_blank(),
plot.background = element_blank(),
panel.grid.major.y= element_blank(),
panel.grid.major.x= element_blank(),
panel.grid.minor = element_blank(),
axis.line.x = element_line(color="#A9A9A9"),
axis.line.y = element_line(color="#A9A9A9"),
axis.ticks= element_blank())
#ajouter les titres
gg2<-gg2 + labs(title="",
subtitle=" ",
y="Daily messages/acive member",
x=" ")
gg2<-gg2 + theme(plot.title = element_text(hjust=0,size=15, color="#A9A9A9", face="bold"),
plot.subtitle = element_text(hjust=0,size=12, color="#A9A9A9"),
axis.title.y = element_text(hjust=1,size=12, color="#A9A9A9", angle=90),
axis.title.x = element_blank(),
axis.text.y = element_text(hjust=0.5, size=10, color="#A9A9A9"),
axis.text.x = element_text(hjust=0.5, size=10, color="#A9A9A9"))
- Posted on:
- July 24, 2019
- Length:
- 4 minute read, 789 words
- Categories:
- rstats tidyverse tidytuesday
- Tags:
- rstats tidyverse tidytuesday