Cette semaine, le #Tidytuesday nous fournit une base de données de Spotify.
CONTEXTE
Les données de cette semaine proviennent de spotifyr et un article sur le sujet peut être trouvé ici
OBJECTIFS
- Visualiser les indicateurs d’énergie et de dansabilité de tempo et de bonheur pour chaque genre sur un même axe.
IMPORTER
spotify_songs <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-01-21/spotify_songs.csv')
FALSE Parsed with column specification:
FALSE cols(
FALSE .default = col_double(),
FALSE track_id = col_character(),
FALSE track_name = col_character(),
FALSE track_artist = col_character(),
FALSE track_album_id = col_character(),
FALSE track_album_name = col_character(),
FALSE track_album_release_date = col_character(),
FALSE playlist_name = col_character(),
FALSE playlist_id = col_character(),
FALSE playlist_genre = col_character(),
FALSE playlist_subgenre = col_character()
FALSE )
FALSE See spec(...) for full column specifications.
EXPLORER
summary(spotify_songs)
## track_id track_name track_artist track_popularity
## Length:32833 Length:32833 Length:32833 Min. : 0.00
## Class :character Class :character Class :character 1st Qu.: 24.00
## Mode :character Mode :character Mode :character Median : 45.00
## Mean : 42.48
## 3rd Qu.: 62.00
## Max. :100.00
## track_album_id track_album_name track_album_release_date
## Length:32833 Length:32833 Length:32833
## Class :character Class :character Class :character
## Mode :character Mode :character Mode :character
##
##
##
## playlist_name playlist_id playlist_genre
## Length:32833 Length:32833 Length:32833
## Class :character Class :character Class :character
## Mode :character Mode :character Mode :character
##
##
##
## playlist_subgenre danceability energy key
## Length:32833 Min. :0.0000 Min. :0.000175 Min. : 0.000
## Class :character 1st Qu.:0.5630 1st Qu.:0.581000 1st Qu.: 2.000
## Mode :character Median :0.6720 Median :0.721000 Median : 6.000
## Mean :0.6548 Mean :0.698619 Mean : 5.374
## 3rd Qu.:0.7610 3rd Qu.:0.840000 3rd Qu.: 9.000
## Max. :0.9830 Max. :1.000000 Max. :11.000
## loudness mode speechiness acousticness
## Min. :-46.448 Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.: -8.171 1st Qu.:0.0000 1st Qu.:0.0410 1st Qu.:0.0151
## Median : -6.166 Median :1.0000 Median :0.0625 Median :0.0804
## Mean : -6.720 Mean :0.5657 Mean :0.1071 Mean :0.1753
## 3rd Qu.: -4.645 3rd Qu.:1.0000 3rd Qu.:0.1320 3rd Qu.:0.2550
## Max. : 1.275 Max. :1.0000 Max. :0.9180 Max. :0.9940
## instrumentalness liveness valence tempo
## Min. :0.0000000 Min. :0.0000 Min. :0.0000 Min. : 0.00
## 1st Qu.:0.0000000 1st Qu.:0.0927 1st Qu.:0.3310 1st Qu.: 99.96
## Median :0.0000161 Median :0.1270 Median :0.5120 Median :121.98
## Mean :0.0847472 Mean :0.1902 Mean :0.5106 Mean :120.88
## 3rd Qu.:0.0048300 3rd Qu.:0.2480 3rd Qu.:0.6930 3rd Qu.:133.92
## Max. :0.9940000 Max. :0.9960 Max. :0.9910 Max. :239.44
## duration_ms
## Min. : 4000
## 1st Qu.:187819
## Median :216000
## Mean :225800
## 3rd Qu.:253585
## Max. :517810
glimpse(spotify_songs)
## Observations: 32,833
## Variables: 23
## $ track_id <chr> "6f807x0ima9a1j3VPbc7VN", "0r7CVbZTWZgb…
## $ track_name <chr> "I Don't Care (with Justin Bieber) - Lo…
## $ track_artist <chr> "Ed Sheeran", "Maroon 5", "Zara Larsson…
## $ track_popularity <dbl> 66, 67, 70, 60, 69, 67, 62, 69, 68, 67,…
## $ track_album_id <chr> "2oCs0DGTsRO98Gh5ZSl2Cx", "63rPSO264uRj…
## $ track_album_name <chr> "I Don't Care (with Justin Bieber) [Lou…
## $ track_album_release_date <chr> "2019-06-14", "2019-12-13", "2019-07-05…
## $ playlist_name <chr> "Pop Remix", "Pop Remix", "Pop Remix", …
## $ playlist_id <chr> "37i9dQZF1DXcZDD7cfEKhW", "37i9dQZF1DXc…
## $ playlist_genre <chr> "pop", "pop", "pop", "pop", "pop", "pop…
## $ playlist_subgenre <chr> "dance pop", "dance pop", "dance pop", …
## $ danceability <dbl> 0.748, 0.726, 0.675, 0.718, 0.650, 0.67…
## $ energy <dbl> 0.916, 0.815, 0.931, 0.930, 0.833, 0.91…
## $ key <dbl> 6, 11, 1, 7, 1, 8, 5, 4, 8, 2, 6, 8, 1,…
## $ loudness <dbl> -2.634, -4.969, -3.432, -3.778, -4.672,…
## $ mode <dbl> 1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, …
## $ speechiness <dbl> 0.0583, 0.0373, 0.0742, 0.1020, 0.0359,…
## $ acousticness <dbl> 0.10200, 0.07240, 0.07940, 0.02870, 0.0…
## $ instrumentalness <dbl> 0.00e+00, 4.21e-03, 2.33e-05, 9.43e-06,…
## $ liveness <dbl> 0.0653, 0.3570, 0.1100, 0.2040, 0.0833,…
## $ valence <dbl> 0.518, 0.693, 0.613, 0.277, 0.725, 0.58…
## $ tempo <dbl> 122.036, 99.972, 124.008, 121.956, 123.…
## $ duration_ms <dbl> 194754, 162600, 176616, 169093, 189052,…
PRÉPARER
# convertir le tempo en un indicateur qui varie entre 0 et 1
tempo<-data.frame(tempo=c(0,99.96,120.88,133.92,239.44),tempo_0_1=c(0,0.25,0.50,0.75,1))
formula <- y ~ x
gg<- ggplot(data=tempo, aes(x=tempo, y=tempo_0_1))
gg<- gg + geom_point()
gg<- gg + geom_smooth(method='lm', se=FALSE, formula=formula)
gg<- gg + stat_poly_eq(aes(label = paste(..eq.label.., ..adj.rr.label.., sep = "~~~~")),
formula = formula, parse = TRUE)
gg<- gg + theme_classic()
gg<- gg + scale_y_continuous(limits=c(0,1))
gg
data<-spotify_songs %>%
select(playlist_genre, danceability, energy,valence,tempo) %>%
mutate(tempo_0_1=0.00438*tempo-0.0208) %>%
select(-tempo) %>%
gather(type, valeur,-playlist_genre) %>%
group_by(playlist_genre, type) %>%
summarise_if(is.numeric, median, na.rm = TRUE) %>%
ungroup()
VISUALISER
#Graphique
gg<- ggplot(data=data,aes(x = valeur, y=playlist_genre))
gg <- gg + geom_segment( aes(x=0.3, xend=0.9,y=playlist_genre, yend=playlist_genre),
color="#838A90", alpha=0.6, size=4)
gg <- gg + geom_segment( aes(x=0.3, xend=0.9,y=playlist_genre, yend=playlist_genre),
color="#ffffff", alpha=1, size=0.5
)
gg <- gg + geom_point(aes(fill=type, group=playlist_genre), color="#FFFFFF", size=20, pch=21, alpha=0.5)
gg <- gg + geom_point(aes(fill=type, group=playlist_genre), color="#FFFFFF", size=12, pch=21)
gg <- gg + scale_fill_brewer(palette = "Set1",labels=c("Dansabilité","Énergie", "Tempo", "Bonheur"))
#ajouter les étiquettes de points
gg<-gg + geom_text(data=data, aes(x=valeur, y=playlist_genre, label=(round(data$valeur,1)), group=NULL), color="#ffffff", size=4.5, vjust=0.5, hjust=0.5, fontface="bold")
#pour l'axe des y à l'intérieur du graphique
gg<-gg + annotate(geom="text",x=0.3, y=1.3, label="Edm", color="#ffffff", size=5.5, vjust=0.5, hjust=0, fontface="bold")
gg<-gg + annotate(geom="text",x=0.3, y=2.3, label="Latin", color="#ffffff", size=5.5, vjust=0.5, hjust=0, fontface="bold")
gg<-gg + annotate(geom="text",x=0.3, y=3.3, label="Pop", color="#ffffff", size=5.5, vjust=0.5, hjust=0, fontface="bold")
gg<-gg + annotate(geom="text",x=0.3, y=4.3, label="R&B", color="#ffffff", size=5.5, vjust=0.5, hjust=0, fontface="bold")
gg<-gg + annotate(geom="text",x=0.3, y=5.3, label="Rap", color="#ffffff", size=5.5, vjust=0.5, hjust=0, fontface="bold")
gg<-gg + annotate(geom="text",x=0.3, y=6.3, label="Rock", color="#ffffff", size=5.5, vjust=0.5, hjust=0, fontface="bold")
#ajuster l'axe des x
gg <- gg + scale_x_continuous(expand = c(0,0))
#modifier le thème
gg <- gg + theme(plot.background = element_rect(fill = "#171717"),
panel.background = element_rect(fill = "#171717"),
panel.grid.major.y= element_blank(),
panel.grid.major.x= element_blank(),
panel.grid.minor = element_blank(),
axis.line.x = element_blank(),
axis.line.y =element_blank(),
axis.ticks.x = element_blank(),
axis.ticks.y = element_blank())
gg <- gg + theme(legend.position="top",
legend.title = element_blank(),
legend.background = element_blank(),
legend.key=element_blank(),
legend.spacing.x = unit(0.2, 'cm'),
legend.text= element_text(hjust =0.5,size= 8, colour = "#FFFFFF"))
gg <- gg + guides(fill = guide_legend(nrow = 1, reverse = TRUE))
#ajouter les titres
gg<-gg + labs(title="Quelle genre de musique préfères-tu?",
subtitle = "\nSpotify classe les différents genre de musique en fonction de leurs caractéristiques pour aider\nles utilisateurs à faire leur choix.\n",
x="Indice",
y=" ",
caption="\nSOURCE: Spotifyr | DESIGN: Johanie Fournier, agr.")
gg<-gg + theme( plot.title = element_text(size=30, hjust=0,vjust=0.5, color="#FFFFFF", family="Tw Cen MT", face="bold"),
plot.subtitle = element_text(size=15, hjust=0,vjust=0.5, color="#FFFFFF", family="Tw Cen MT"),
plot.caption = element_text(size=10, hjust=1,vjust=0.5, color="#FFFFFF", family="Tw Cen MT"),
axis.title.x = element_text(size=
15, hjust=0.05,vjust=0.5, color="#FFFFFF", family="Tw Cen MT", angle =0),
axis.title.y = element_blank(),
axis.text.y = element_blank(),
axis.text.x = element_blank())
Voici ce que ça donne:
Alors, tu veux en savoir plus sur ma démarche? Abonne-toi à mon infolettre pour savoir quand est-ce que le prochain épisode de podcast sera disponible. J’y expliquerai toute la réflexion et les concepts de data visualisation qui ont menés à la création de cette viz.
Pingback: ADV32_TRANSCRIPT - Comment ne pas mettre 2 axes sur un même graphique | Johanie Fournier, agr.