TyT2019W32 - Bob Ross Painting

By Johanie Fournier, agr. in rstats tidyverse tidytuesday

August 7, 2019

Get the data

bob_ross <- readr::read_csv("https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2019/2019-08-06/bob-ross.csv")
## Rows: 403 Columns: 69
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (2): EPISODE, TITLE
## dbl (67): APPLE_FRAME, AURORA_BOREALIS, BARN, BEACH, BOAT, BRIDGE, BUILDING,...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

Explore the data

summary(bob_ross)
##    EPISODE             TITLE            APPLE_FRAME       AURORA_BOREALIS   
##  Length:403         Length:403         Min.   :0.000000   Min.   :0.000000  
##  Class :character   Class :character   1st Qu.:0.000000   1st Qu.:0.000000  
##  Mode  :character   Mode  :character   Median :0.000000   Median :0.000000  
##                                        Mean   :0.002481   Mean   :0.004963  
##                                        3rd Qu.:0.000000   3rd Qu.:0.000000  
##                                        Max.   :1.000000   Max.   :1.000000  
##       BARN             BEACH            BOAT              BRIDGE       
##  Min.   :0.00000   Min.   :0.000   Min.   :0.000000   Min.   :0.00000  
##  1st Qu.:0.00000   1st Qu.:0.000   1st Qu.:0.000000   1st Qu.:0.00000  
##  Median :0.00000   Median :0.000   Median :0.000000   Median :0.00000  
##  Mean   :0.04218   Mean   :0.067   Mean   :0.004963   Mean   :0.01737  
##  3rd Qu.:0.00000   3rd Qu.:0.000   3rd Qu.:0.000000   3rd Qu.:0.00000  
##  Max.   :1.00000   Max.   :1.000   Max.   :1.000000   Max.   :1.00000  
##     BUILDING            BUSHES           CABIN            CACTUS        
##  Min.   :0.000000   Min.   :0.0000   Min.   :0.0000   Min.   :0.000000  
##  1st Qu.:0.000000   1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:0.000000  
##  Median :0.000000   Median :0.0000   Median :0.0000   Median :0.000000  
##  Mean   :0.002481   Mean   :0.2978   Mean   :0.1712   Mean   :0.009926  
##  3rd Qu.:0.000000   3rd Qu.:1.0000   3rd Qu.:0.0000   3rd Qu.:0.000000  
##  Max.   :1.000000   Max.   :1.0000   Max.   :1.0000   Max.   :1.000000  
##   CIRCLE_FRAME          CIRRUS            CLIFF             CLOUDS      
##  Min.   :0.000000   Min.   :0.00000   Min.   :0.00000   Min.   :0.0000  
##  1st Qu.:0.000000   1st Qu.:0.00000   1st Qu.:0.00000   1st Qu.:0.0000  
##  Median :0.000000   Median :0.00000   Median :0.00000   Median :0.0000  
##  Mean   :0.004963   Mean   :0.06948   Mean   :0.01985   Mean   :0.4442  
##  3rd Qu.:0.000000   3rd Qu.:0.00000   3rd Qu.:0.00000   3rd Qu.:1.0000  
##  Max.   :1.000000   Max.   :1.00000   Max.   :1.00000   Max.   :1.0000  
##     CONIFER          CUMULUS         DECIDUOUS       DIANE_ANDRE      
##  Min.   :0.0000   Min.   :0.0000   Min.   :0.0000   Min.   :0.000000  
##  1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:0.000000  
##  Median :1.0000   Median :0.0000   Median :1.0000   Median :0.000000  
##  Mean   :0.5261   Mean   :0.2134   Mean   :0.5633   Mean   :0.002481  
##  3rd Qu.:1.0000   3rd Qu.:0.0000   3rd Qu.:1.0000   3rd Qu.:0.000000  
##  Max.   :1.0000   Max.   :1.0000   Max.   :1.0000   Max.   :1.000000  
##       DOCK          DOUBLE_OVAL_FRAME       FARM              FENCE        
##  Min.   :0.000000   Min.   :0.000000   Min.   :0.000000   Min.   :0.00000  
##  1st Qu.:0.000000   1st Qu.:0.000000   1st Qu.:0.000000   1st Qu.:0.00000  
##  Median :0.000000   Median :0.000000   Median :0.000000   Median :0.00000  
##  Mean   :0.002481   Mean   :0.002481   Mean   :0.002481   Mean   :0.05955  
##  3rd Qu.:0.000000   3rd Qu.:0.000000   3rd Qu.:0.000000   3rd Qu.:0.00000  
##  Max.   :1.000000   Max.   :1.000000   Max.   :1.000000   Max.   :1.00000  
##       FIRE          FLORIDA_FRAME         FLOWERS             FOG         
##  Min.   :0.000000   Min.   :0.000000   Min.   :0.00000   Min.   :0.00000  
##  1st Qu.:0.000000   1st Qu.:0.000000   1st Qu.:0.00000   1st Qu.:0.00000  
##  Median :0.000000   Median :0.000000   Median :0.00000   Median :0.00000  
##  Mean   :0.002481   Mean   :0.002481   Mean   :0.02978   Mean   :0.05707  
##  3rd Qu.:0.000000   3rd Qu.:0.000000   3rd Qu.:0.00000   3rd Qu.:0.00000  
##  Max.   :1.000000   Max.   :1.000000   Max.   :1.00000   Max.   :1.00000  
##      FRAMED           GRASS            GUEST         HALF_CIRCLE_FRAME 
##  Min.   :0.0000   Min.   :0.0000   Min.   :0.00000   Min.   :0.000000  
##  1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:0.00000   1st Qu.:0.000000  
##  Median :0.0000   Median :0.0000   Median :0.00000   Median :0.000000  
##  Mean   :0.1315   Mean   :0.3524   Mean   :0.05459   Mean   :0.002481  
##  3rd Qu.:0.0000   3rd Qu.:1.0000   3rd Qu.:0.00000   3rd Qu.:0.000000  
##  Max.   :1.0000   Max.   :1.0000   Max.   :1.00000   Max.   :1.000000  
##  HALF_OVAL_FRAME        HILLS              LAKE            LAKES  
##  Min.   :0.000000   Min.   :0.00000   Min.   :0.0000   Min.   :0  
##  1st Qu.:0.000000   1st Qu.:0.00000   1st Qu.:0.0000   1st Qu.:0  
##  Median :0.000000   Median :0.00000   Median :0.0000   Median :0  
##  Mean   :0.002481   Mean   :0.04467   Mean   :0.3548   Mean   :0  
##  3rd Qu.:0.000000   3rd Qu.:0.00000   3rd Qu.:1.0000   3rd Qu.:0  
##  Max.   :1.000000   Max.   :1.00000   Max.   :1.0000   Max.   :0  
##    LIGHTHOUSE            MILL               MOON             MOUNTAIN    
##  Min.   :0.000000   Min.   :0.000000   Min.   :0.000000   Min.   :0.000  
##  1st Qu.:0.000000   1st Qu.:0.000000   1st Qu.:0.000000   1st Qu.:0.000  
##  Median :0.000000   Median :0.000000   Median :0.000000   Median :0.000  
##  Mean   :0.002481   Mean   :0.004963   Mean   :0.007444   Mean   :0.397  
##  3rd Qu.:0.000000   3rd Qu.:0.000000   3rd Qu.:0.000000   3rd Qu.:1.000  
##  Max.   :1.000000   Max.   :1.000000   Max.   :1.000000   Max.   :1.000  
##    MOUNTAINS          NIGHT            OCEAN           OVAL_FRAME     
##  Min.   :0.0000   Min.   :0.0000   Min.   :0.00000   Min.   :0.00000  
##  1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:0.00000   1st Qu.:0.00000  
##  Median :0.0000   Median :0.0000   Median :0.00000   Median :0.00000  
##  Mean   :0.2457   Mean   :0.0273   Mean   :0.08933   Mean   :0.09429  
##  3rd Qu.:0.0000   3rd Qu.:0.0000   3rd Qu.:0.00000   3rd Qu.:0.00000  
##  Max.   :1.0000   Max.   :1.0000   Max.   :1.00000   Max.   :1.00000  
##    PALM_TREES           PATH            PERSON            PORTRAIT       
##  Min.   :0.00000   Min.   :0.0000   Min.   :0.000000   Min.   :0.000000  
##  1st Qu.:0.00000   1st Qu.:0.0000   1st Qu.:0.000000   1st Qu.:0.000000  
##  Median :0.00000   Median :0.0000   Median :0.000000   Median :0.000000  
##  Mean   :0.02233   Mean   :0.1216   Mean   :0.002481   Mean   :0.007444  
##  3rd Qu.:0.00000   3rd Qu.:0.0000   3rd Qu.:0.000000   3rd Qu.:0.000000  
##  Max.   :1.00000   Max.   :1.0000   Max.   :1.000000   Max.   :1.000000  
##  RECTANGLE_3D_FRAME RECTANGULAR_FRAME      RIVER            ROCKS       
##  Min.   :0.000000   Min.   :0.000000   Min.   :0.0000   Min.   :0.0000  
##  1st Qu.:0.000000   1st Qu.:0.000000   1st Qu.:0.0000   1st Qu.:0.0000  
##  Median :0.000000   Median :0.000000   Median :0.0000   Median :0.0000  
##  Mean   :0.002481   Mean   :0.002481   Mean   :0.3127   Mean   :0.1911  
##  3rd Qu.:0.000000   3rd Qu.:0.000000   3rd Qu.:1.0000   3rd Qu.:0.0000  
##  Max.   :1.000000   Max.   :1.000000   Max.   :1.0000   Max.   :1.0000  
##  SEASHELL_FRAME          SNOW        SNOWY_MOUNTAIN    SPLIT_FRAME      
##  Min.   :0.000000   Min.   :0.0000   Min.   :0.0000   Min.   :0.000000  
##  1st Qu.:0.000000   1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:0.000000  
##  Median :0.000000   Median :0.0000   Median :0.0000   Median :0.000000  
##  Mean   :0.002481   Mean   :0.1861   Mean   :0.2705   Mean   :0.002481  
##  3rd Qu.:0.000000   3rd Qu.:0.0000   3rd Qu.:1.0000   3rd Qu.:0.000000  
##  Max.   :1.000000   Max.   :1.0000   Max.   :1.0000   Max.   :1.000000  
##    STEVE_ROSS       STRUCTURE           SUN            TOMB_FRAME      
##  Min.   :0.0000   Min.   :0.0000   Min.   :0.00000   Min.   :0.000000  
##  1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:0.00000   1st Qu.:0.000000  
##  Median :0.0000   Median :0.0000   Median :0.00000   Median :0.000000  
##  Mean   :0.0273   Mean   :0.2109   Mean   :0.09926   Mean   :0.002481  
##  3rd Qu.:0.0000   3rd Qu.:0.0000   3rd Qu.:0.00000   3rd Qu.:0.000000  
##  Max.   :1.0000   Max.   :1.0000   Max.   :1.00000   Max.   :1.000000  
##       TREE            TREES         TRIPLE_FRAME        WATERFALL      
##  Min.   :0.0000   Min.   :0.0000   Min.   :0.000000   Min.   :0.00000  
##  1st Qu.:1.0000   1st Qu.:1.0000   1st Qu.:0.000000   1st Qu.:0.00000  
##  Median :1.0000   Median :1.0000   Median :0.000000   Median :0.00000  
##  Mean   :0.8958   Mean   :0.8362   Mean   :0.002481   Mean   :0.09677  
##  3rd Qu.:1.0000   3rd Qu.:1.0000   3rd Qu.:0.000000   3rd Qu.:0.00000  
##  Max.   :1.0000   Max.   :1.0000   Max.   :1.000000   Max.   :1.00000  
##      WAVES            WINDMILL         WINDOW_FRAME          WINTER      
##  Min.   :0.00000   Min.   :0.000000   Min.   :0.000000   Min.   :0.0000  
##  1st Qu.:0.00000   1st Qu.:0.000000   1st Qu.:0.000000   1st Qu.:0.0000  
##  Median :0.00000   Median :0.000000   Median :0.000000   Median :0.0000  
##  Mean   :0.08437   Mean   :0.002481   Mean   :0.002481   Mean   :0.1712  
##  3rd Qu.:0.00000   3rd Qu.:0.000000   3rd Qu.:0.000000   3rd Qu.:0.0000  
##  Max.   :1.00000   Max.   :1.000000   Max.   :1.000000   Max.   :1.0000  
##   WOOD_FRAMED      
##  Min.   :0.000000  
##  1st Qu.:0.000000  
##  Median :0.000000  
##  Mean   :0.002481  
##  3rd Qu.:0.000000  
##  Max.   :1.000000

Prepare the data

data<-bob_ross %>% 
  janitor::clean_names() %>% 
  separate(episode, into = c("season", "episode"), sep = "E") %>% 
  mutate(season = str_extract(season, "[:digit:]+")) %>% 
  mutate_at(vars(season, episode), as.integer) %>% 
    select(-episode, -title) %>%
  gather(-season, key = "element", value = "count") %>%
  mutate(element = case_when(.$element %in% c("lake", "lakes") ~ "lake",
                             TRUE ~ as.character(.$element))) %>% 
  mutate(element = case_when(.$element %in% c("mountain", "mountains") ~ "mountain",
                             TRUE ~ as.character(.$element))) %>% 
  mutate(element = case_when(.$element %in% c("tree", "trees", "conifer", "palm_trees", "deciduous") ~ "tree",
                             TRUE ~ as.character(.$element))) %>% 
  mutate(element = case_when(.$element %in% c("person", "portrait") ~ "portrait",
                             TRUE ~ as.character(.$element))) %>% 
  mutate(element = case_when(.$element %in% c("cloud", "cumulus") ~ "cloud",
                             TRUE ~ as.character(.$element))) %>% 
  group_by(season, element) %>% 
  summarise(count=sum(count))

#Quels sont les 3 éléments les plus importants dans les peintures:
sommaire<-data %>% 
  group_by(element) %>% 
  summarise(count=sum(count))

#Arbres: 1146
#montagnes:259
#nuages: 179

#modifier la base de données pour visualisation:
freq<-data %>% 
  mutate(element=ifelse(!element %in% c("tree", "mountain", "clouds"), "divers", element))

cat<-freq %>% 
  inspect_cat()

cat$levels$element

data_cat<-freq %>% 
  filter(element %in% c("tree", "mountain", "clouds"))

data_divers<-freq %>% 
  filter(!element %in% c("tree", "mountain", "clouds"))

Visualize the data

#Graphique
gg<-ggplot(data_cat, aes(x=season, y=count, group=element, color=element))
gg <- gg + geom_point(data=data_divers,size=4, alpha=0.3, color="#AFB7BB")
gg <- gg + geom_point(data=data_cat, size=4, alpha=0.7)
gg <- gg + scale_color_manual(values=c("#266DD3", "#F6AE2D","#33673B"))
gg <- gg + geom_smooth(data=data_cat,se=FALSE, size=2)
#retirer la légende
gg <- gg + theme(legend.position = "none")
#ajuster les axes 
gg <- gg + scale_y_continuous(breaks=seq(00,50,10), limits = c(0, 50))
gg <- gg + scale_x_continuous(breaks=seq(1,31,5), limits = c(1, 33.5))
#modifier le thème
gg <- gg +  theme(panel.border = element_blank(),
                    panel.background = element_blank(),
                    plot.background = element_blank(),
                    panel.grid.major.y= element_line(size=0.2,linetype="dotted", color="#6D7C83"),
                    panel.grid.major.x= element_blank(),
                    panel.grid.minor = element_blank(),
                    axis.line.x = element_line(color="#6D7C83"),
                    axis.line.y = element_line(color="#6D7C83"),
                    axis.ticks.y = element_blank(),
                    axis.ticks.x = element_blank())
#ajouter les étiquettes de données
gg<-gg + annotate(geom="text", x=31,y=40, label="Tree", color="#33673B", size=4, hjust=0,vjust=0, fontface="bold")
gg<-gg + annotate(geom="text", x=31,y=5, label="Cloud", color="#266DD3", size=4, hjust=0,vjust=0, fontface="bold")
gg<-gg + annotate(geom="text", x=31,y=11,  label="Mountain", color="#F6AE2D", size=4, hjust=0,vjust=0, fontface="bold")
#ajouter les titres
gg<-gg + labs(title=" ",
              subtitle="",
              y="Number", 
              x="Season")
gg<-gg + theme(plot.title    = element_blank(),
                 plot.subtitle = element_blank(),
                 axis.title.y  = element_text(hjust=1, vjust=0, size=12, color="#6D7C83", face="bold"),
                 axis.title.x  = element_text(hjust=0, vjust=0, size=12, color="#6D7C83", face="bold"),
                 axis.text.y   = element_text(hjust=0.5, vjust=0, size=12, color="#6D7C83", face="bold"), 
                 axis.text.x   = element_text(hjust=0.5, vjust=0, size=12, color="#6D7C83", face="bold"))
Posted on:
August 7, 2019
Length:
7 minute read, 1414 words
Categories:
rstats tidyverse tidytuesday
Tags:
rstats tidyverse tidytuesday
See Also:
Predicting MO with H2O Models from IRDA data
IRDA soil data
This is the begining of a cheat sheet!