Chapter 8 Reporting: hierarchical chart

Scenario. Upon reviewing the growth report you created, Lucy asked for a report that shows composition of Product Categories and Sub Categories based on certain filters, including Year, Country, Customer Gender, and Age Group.

Specifically, Lucy wants to see the report visualized using a hierarchical chart.

Load necessary libraries

library(tidyverse)
library(treemapify)
library(RColorBrewer)
library(knitr)
library(scales)

Load data for Lab7

my_data <- readRDS("./data/processing/data4week3.rds")

8.1 Explore the sales composition of bikes category for each age group. Which age group does the composition (rank of sales) differ than the rest?

ans_1 <- my_data %>%
    filter(`Product Category` == "Bikes") %>%
    group_by(`Age Group`, `Sub Category`) %>%
    summarise(Total = sum(Revenue)) %>%
    arrange(desc(Total)) %>% 
    ungroup() 

ans_1 %>%
  mutate_at(vars(Total), funs(scales::comma)) %>% 
  kable()
Age Group Sub Category Total
Adults Road Bikes 15,723,543
Young Adults Road Bikes 12,439,924
Adults Mountain Bikes 10,534,548
Young Adults Mountain Bikes 7,861,417
Youth Road Bikes 5,140,111
Adults Touring Bikes 4,078,076
Youth Mountain Bikes 2,661,311
Young Adults Touring Bikes 2,614,215
Youth Touring Bikes 593,930
Seniors Mountain Bikes 66,250
Seniors Road Bikes 59,483
Seniors Touring Bikes 9,326

Show answer on chart:

ggplot(ans_1, aes(area = Total, fill = `Sub Category`, group = `Age Group`, label = `Age Group` )) +
    geom_treemap(colour = "white", size = 2) +
    geom_treemap_text(colour = "black", place = "centre", grow = T, min.size = 3) +
    scale_fill_brewer(palette = "Set2", labels = paste(ans_1$`Sub Category`, '\t')) +
    theme(legend.position = "top",
          legend.title = element_blank(),
          legend.text = element_text(size = 14, face = "bold"))

Answer: Seniors (right upper corner)


Now explore the sales composition of Bikes category for each Age Group for the Male customers.

ans_2 <- my_data %>%
    filter(`Product Category` == "Bikes", `Customer Gender` == "M") %>%
    group_by(`Age Group`, `Sub Category`) %>%
    summarise(Total = sum(Revenue)) %>%
    arrange(desc(Total)) %>% 
    ungroup()

ans_2 %>% 
  mutate_at(vars(Total), funs(scales::comma)) %>% 
  kable()
Age Group Sub Category Total
Adults Road Bikes 7,565,634
Young Adults Road Bikes 6,393,719
Adults Mountain Bikes 5,260,117
Young Adults Mountain Bikes 3,660,311
Youth Road Bikes 2,787,992
Adults Touring Bikes 2,028,869
Youth Mountain Bikes 1,406,305
Young Adults Touring Bikes 1,276,009
Youth Touring Bikes 330,496
Seniors Road Bikes 44,154
Seniors Mountain Bikes 41,447
Seniors Touring Bikes 4,563

8.2 Now explore the sales composition of bikes category for each age group, for the Male customers. Which age group does the composition differ than the rest?

ggplot(ans_2, aes(area = Total, fill = `Sub Category`, group = `Sub Category`, label = `Age Group` )) +
    geom_treemap(colour = "white", size = 2) +
    geom_treemap_text(colour = "black", place = "centre", grow = T, min.size = 3) +
    scale_fill_brewer(palette = "Set2", labels = paste(ans_2$`Sub Category`, '\t')) +
    theme(legend.position = "top",
          legend.title = element_blank(),
          legend.text = element_text(size = 14, face = "bold"))

Answer 2: The composition are the same across Age Group for Male customers.


8.3 Clear all filters. Now, filter for the year 2016 and Germany. Rank the sales from the highest to lowest for the clothing category

ans_3 <- my_data %>%
    filter(Year == 2016, Country == "Germany", `Product Category` == "Clothing") %>%
    group_by(`Sub Category`) %>%
    summarise(Total = sum(Revenue)) %>%
    arrange(desc(Total)) %>% 
    ungroup()

ans_3 %>% 
  mutate_at(vars(Total), funs(scales::comma)) %>% 
  kable()
Sub Category Total
Jerseys 121,186
Vests 25,190
Caps 20,441
Gloves 17,337
Shorts 6,066
Socks 3,646

Show answer on chart:

ggplot(ans_3, aes(x = reorder(`Sub Category`, -Total), y = Total, fill = `Sub Category`)) +
    geom_bar(stat = "identity", width = 0.5, show.legend = FALSE) +
    labs(x = "Sub Category") +
    scale_y_log10(breaks = c(1, 1e2, 1e3, 1e4, 2e4, 1e5),
                  expand = c(0.02, 0.0), labels = scales::dollar) +
    theme_classic() +
    theme(axis.title = element_text(size = 12, face = "bold"),
          axis.text = element_text(size = 11, colour = "black"))


8.4 Rank the sales from the highest to lowest for the clothing category. Keep the filter settings and add filter by male customers.

ans_4 <- my_data %>%
    filter(Year == 2016, Country == "Germany",
           `Product Category` == "Clothing", `Customer Gender` == "M") %>%
    group_by(`Sub Category`) %>%
    summarise(Total = sum(Revenue)) %>%
    arrange(desc(Total)) %>% 
    ungroup()

ans_4 %>% 
  mutate_at(vars(Total), funs(scales::comma)) %>% 
  kable()
Sub Category Total
Jerseys 64,969
Vests 13,528
Gloves 11,511
Caps 9,776
Shorts 4,568
Socks 2,304

Show answer on chart:

ggplot(ans_4, aes(x = reorder(`Sub Category`, -Total), y = Total, fill = `Sub Category`)) +
    geom_bar(stat = "identity", width = 0.5, show.legend = FALSE) +
    labs(x = "Sub Category") +
    scale_y_log10(breaks = c(1, 10, 100, 1e3, 5e3, 15e3, 5e4),
                  expand = c(0.02, 0.0), labels = scales::dollar) +
    theme_classic() +
    theme(axis.title = element_text(size = 12, face = "bold"),
          axis.text = element_text(size = 11, colour = "black"))


8.5 Rank the sales from the highest to lowest for the clothing category. Keep the filter settings and add filter by youth age group

ans_5 <- my_data %>%
    filter(Year == 2016, Country == "Germany", `Product Category` == "Clothing",
           `Customer Gender` == "M", `Age Group` == "Youth") %>%
    group_by(`Sub Category`) %>%
    summarise(Total = sum(Revenue)) %>%
    arrange(desc(Total)) %>% 
    ungroup()

ans_5 %>% 
  mutate_at(vars(Total), funs(scales::comma)) %>% 
  kable()
Sub Category Total
Jerseys 5,723
Gloves 3,208
Caps 1,119
Vests 536
Socks 219
Shorts 57

Show answer on chart:

ggplot(ans_5, aes(x = reorder(`Sub Category`, -Total), y = Total, fill = `Sub Category`)) +
    geom_bar(stat = "identity", width = 0.5, show.legend = FALSE) +
    labs(x = "Sub Category") +
    scale_y_log10(breaks = c(1, 10, 100, 1000, 5000),
                  expand = c(0.02, 0.0), labels = scales::dollar) +
    theme_classic() +
    theme(axis.title = element_text(size = 12, face = "bold"),
          axis.text = element_text(size = 11, colour = "black")
          )