Descriptive Analytics-Part 6: Interactive dashboard ( 2/2) solutions

Below are the solutions to these exercises on interactive dashboarding.

In case, you feel like you need the full script, you can find it here.

Learn more about Shiny in the online course R Shiny Interactive Web Apps – Next Level Data Visualization. In this course you will learn how to create advanced Shiny web apps; embed video, pdfs and images; add focus and zooming tools; and many other functionalities (30 lectures, 3hrs.).

###############
#             #
# Exercise 1  #
#             #
###############

ui <- fluidPage(pageWithSidebar(
  headerPanel("Visualization")))
## Error in eval(expr, envir, enclos): could not find function "fluidPage"
###############
#             #
# Exercise 2  #
#             #
###############

ui <- fluidPage(pageWithSidebar(
  headerPanel("Visualization"),
  sidebarPanel()))
## Error in eval(expr, envir, enclos): could not find function "fluidPage"
###############
#             #
# Exercise 3  #
#             #
###############

ui <- fluidPage(pageWithSidebar(
  headerPanel("Visualization"),
  sidebarPanel(
    selectInput("delays",
                h3("Select type of delay"),
                list("Carrier" = "CarrierDelay",
                     "Weather" = "WeatherDelay",
                     "NAS" = "NASDelay","Security"="SecurityDelay",
                     "LateAircraft"="LateAircraftDelay"),selected = "CarrierDelay" ),
    selectInput("var",
                h3("Select categorical variable"),
                list("Destination" = "Dest",
                     "Origin" = "Origin",
                     "Carrier" = "UniqueCarrier","Airplane"="TailNum",
                     "CancellationCode"="CancellationCode"),selected = "Dest" ))))
## Error in eval(expr, envir, enclos): could not find function "fluidPage"
###############
#             #
# Exercise 4  #
#             #
###############

ui <- fluidPage(pageWithSidebar(
  headerPanel("Visualization"),
  sidebarPanel(
    selectInput("delays",
                h3("Select type of delay"),
                list("Carrier" = "CarrierDelay",
                     "Weather" = "WeatherDelay",
                     "NAS" = "NASDelay","Security"="SecurityDelay",
                     "LateAircraft"="LateAircraftDelay"),selected = "CarrierDelay" ),
    selectInput("var",
                h3("Select categorical variable"),
                list("Destination" = "Dest",
                     "Origin" = "Origin",
                     "Carrier" = "UniqueCarrier","Airplane"="TailNum",
                     "CancellationCode"="CancellationCode"),selected = "Dest" ),
    radioButtons("plot_cont",
                 h3("Select plot"),
                 list("Histotgram" = 1,
                      "Scatterplot" = 2,"ViolinPlot"=3),selected = 1 ))))
## Error in eval(expr, envir, enclos): could not find function "fluidPage"
###############
#             #
# Exercise 5  #
#             #
###############

ui <- fluidPage(pageWithSidebar(
  headerPanel("Visualization"),
  sidebarPanel(
    selectInput("delays",
                h3("Select type of delay"),
                list("Carrier" = "CarrierDelay",
                     "Weather" = "WeatherDelay",
                     "NAS" = "NASDelay","Security"="SecurityDelay",
                     "LateAircraft"="LateAircraftDelay"),selected = "CarrierDelay" ),
    selectInput("var",
                h3("Select categorical variable"),
                list("Destination" = "Dest",
                     "Origin" = "Origin",
                     "Carrier" = "UniqueCarrier","Airplane"="TailNum",
                     "CancellationCode"="CancellationCode"),selected = "Dest" ),
    radioButtons("plot_cont",
                 h3("Select plot"),
                 list("Histotgram" = 1,
                      "Scatterplot" = 2,"ViolinPlot"=3),selected = 1 ),
    radioButtons("plot_cat",
                 h3("Select plot"),
                 list("Barplot" = 1,
                      "Pie Chart" = 2,
                      "Rose wind" = 3),selected = 1 ))))
## Error in eval(expr, envir, enclos): could not find function "fluidPage"
###############
#             #
# Exercise 6  #
#             #
###############

ui <- fluidPage(pageWithSidebar(
  headerPanel("Visualization"),
  sidebarPanel(
    selectInput("delays",
                h3("Select type of delay"),
                list("Carrier" = "CarrierDelay",
                     "Weather" = "WeatherDelay",
                     "NAS" = "NASDelay","Security"="SecurityDelay",
                     "LateAircraft"="LateAircraftDelay"),selected = "CarrierDelay" ),
    selectInput("var",
                h3("Select categorical variable"),
                list("Destination" = "Dest",
                     "Origin" = "Origin",
                     "Carrier" = "UniqueCarrier","Airplane"="TailNum",
                     "CancellationCode"="CancellationCode"),selected = "Dest" ),
    radioButtons("plot_cont",
                 h3("Select plot"),
                 list("Histotgram" = 1,
                      "Scatterplot" = 2,"ViolinPlot"=3),selected = 1 ),
    radioButtons("plot_cat",
                 h3("Select plot"),
                 list("Barplot" = 1,
                      "Pie Chart" = 2,
                      "Rose wind" = 3),selected = 1 )),
  mainPanel(tabsetPanel())))
## Error in eval(expr, envir, enclos): could not find function "fluidPage"
###############
#             #
# Exercise 7  #
#             #
###############

ui <- fluidPage(pageWithSidebar(
  headerPanel("Visualization"),
  sidebarPanel(
    selectInput("delays",
                h3("Select type of delay"),
                list("Carrier" = "CarrierDelay",
                     "Weather" = "WeatherDelay",
                     "NAS" = "NASDelay","Security"="SecurityDelay",
                     "LateAircraft"="LateAircraftDelay"),selected = "CarrierDelay" ),
    selectInput("var",
                h3("Select categorical variable"),
                list("Destination" = "Dest",
                     "Origin" = "Origin",
                     "Carrier" = "UniqueCarrier","Airplane"="TailNum",
                     "CancellationCode"="CancellationCode"),selected = "Dest" ),
    radioButtons("plot_cont",
                 h3("Select plot"),
                 list("Histotgram" = 1,
                      "Scatterplot" = 2,"ViolinPlot"=3),selected = 1 ),
    radioButtons("plot_cat",
                 h3("Select plot"),
                 list("Barplot" = 1,
                      "Pie Chart" = 2,
                      "Rose wind" = 3),selected = 1 )),
  mainPanel(tabsetPanel(
    tabPanel("Delays",plotOutput("cont")),
    tabPanel("Categorical",plotOutput("cat")))
  )))
## Error in eval(expr, envir, enclos): could not find function "fluidPage"
###############
#             #
# Exercise 8  #
#             #
###############

server <- function(input, output) {
  observe({
    if (input$plot_cont == 1){
      output$cont <- renderPlot({
        ggplot(flights, aes(flights[[input$delays]])) +
          geom_histogram(breaks=seq(0, 100, by =2),
                         col="red",
                         aes(fill=..count..)) +
          scale_fill_gradient("Count", low = "green", high = "red") +
          labs(title=cat("Histogram for", input$delays,"time"), x=input$delays,y="# of flights")
      })
    }else if (input$plot_cont == 2 ){
      output$cont <- renderPlot({
        ggplot(flights,
               aes(x=Full_Date,
                   y=flights[[input$delays]],
                   color= UniqueCarrier,alpha =1/3))+
          geom_point()+ theme_bw(base_family='Times')+
          theme(axis.text.x=element_blank(),
                axis.ticks.x=element_blank())
      })
    }
    else{
      output$cont <- renderPlot({
        ggplot(flights, aes(factor(DayOfWeek), flights[[input$delays]]))+
          geom_violin(aes(fill = factor(DayOfWeek)),trim = FALSE)+ guides(fill=FALSE)+
          scale_y_continuous(limits = c(0, 25))+
          labs( y=input$delays,x="Day of Week")
      })
    }
})
}
###############
#             #
# Exercise 9  #
#             #
###############

server <- function(input, output) {
  observe({
    if (input$plot_cont == 1){
      output$cont <- renderPlot({
        ggplot(flights, aes(flights[[input$delays]])) +
          geom_histogram(breaks=seq(0, 100, by =2),
                         col="red",
                         aes(fill=..count..)) +
          scale_fill_gradient("Count", low = "green", high = "red") +
          labs(title=cat("Histogram for", input$delays,"time"), x=input$delays,y="# of flights")
      })
    }else if (input$plot_cont == 2 ){
      output$cont <- renderPlot({
        ggplot(flights,
               aes(x=Full_Date,
                   y=flights[[input$delays]],
                   color= UniqueCarrier,alpha =1/3))+
          geom_point()+ theme_bw(base_family='Times')+
          theme(axis.text.x=element_blank(),
                axis.ticks.x=element_blank())
      })
    }
    else{
      output$cont <- renderPlot({
        ggplot(flights, aes(factor(DayOfWeek), flights[[input$delays]]))+
          geom_violin(aes(fill = factor(DayOfWeek)),trim = FALSE)+ guides(fill=FALSE)+
          scale_y_continuous(limits = c(0, 25))+
          labs( y=input$delays,x="Day of Week")
      })
    }

    if (input$plot_cat == 1 ){
      output$cat <- renderPlot({
        ggplot (flights)+ aes (as.factor(flights[[input$var]])) +
          labs(title=cat("Bar plot for", input$var), x=input$var,y="# of flights")+ theme(axis.text.x = element_text(angle=90))+
          geom_bar()
      })
    }else if (input$plot_cat == 2 ){
      output$cat <- renderPlot({
         ggplot(flights, aes(x = factor(1), fill = as.factor(flights[[input$var]]))) +
          geom_bar(width = 1) + coord_polar(theta = "y")
      })
    }else {
      output$cat <- renderPlot({
        ggplot(flights, aes(x = DayOfWeek, fill = input$var)) + geom_bar(width = 1) + coord_polar()
      })
    }
    })
}

###############
#             #
# Exercise 10 #
#             #
###############

shinyApp(ui = ui, server = server)



Descriptive Analytics-Part 6: Interactive dashboard ( 2/2)

downloadDescriptive Analytics is the examination of data or content, usually manually performed, to answer the question “What happened?”.As this series of exercises comes to an end, the last part is going to be the development of a data product. Not everybody is able to code in R, so it is useful to be able to make GUIs in order to share your work with non-technical people. This part may be a little challenging, since it requires some basic knowledge of the shiny package. The outcome of this set of exercises will be almost like this web app (some variables are missing because I had to reduce the size of the data set).

In order to be able to solve this set of exercises you should have solved the part 0, part 1, part 2,part 3, and part 4 of this series but also you should run this script which contain some more data cleaning. In case you haven’t, run this script in your machine which contains the lines of code we used to modify our data set. This is the tenth set of exercise of a series of exercises that aims to provide a descriptive analytics solution to the ‘2008’ data set from here. This data set which contains the arrival and departure information for all domestic flights in the US from 2008 has become the “iris” data set for Big Data. The goal of Descriptive analytics is to inform the user about what is going on at the dataset. Before proceeding, it might be helpful to look over the help pages for the fluidPage, pageWithSidebar, headerPanel , sidebarPanel, selectInput, mainPanel, tabPanel, observe, verbatimTextOutput, renderPrint, shinyApp.

For this set of exercises you will need to install and load the package shiny.

install.packages('shiny')
library(shiny)

I have also changed the values of the DaysOfWeek variable, if you wish to do that as well the code for that is :
install.packages('lubridate')
library(lubridate)
flights$DayOfWeek <- wday(as.Date(flights$Full1_Date,'%m/%d/%Y'), label=TRUE)

Because the app requires some time to run, I have also removed the rows with missing values from the data set just to save some time.

flights <-flights[which(!is.na(flights['WeatherDelay'])),]
flights <-flights[which(!is.na(flights['ArrDelay'])),]

Answers to the exercises are available here.

If you obtained a different (correct) answer than those listed on the solutions page, please feel free to post your answer as a comment on that page. Moreover it would be really nice of you to share the links of the apps you have developed. It would be a great contribution the community.

Learn more about Shiny in the online course R Shiny Interactive Web Apps – Next Level Data Visualization. In this course you will learn how to create advanced Shiny web apps; embed video, pdfs and images; add focus and zooming tools; and many other functionalities (30 lectures, 3hrs.).

Exercise 1

Create the user interface and set as the header of the web app : “Descriptive Analysis”

Exercise 2

Create a side panel.

Exercise 3

Create two select list input control. The former will contain the variables: CarrierDelay, WeatherDelay, NASDelay, SecurityDelay, LateAircraftDelay. The latter will contain the variables :Dest, Origin, UniqueCarrier, TailNum, CancellationCode.

Exercise 4

Create a set of radio buttons used to select a plot from a list ( Histogram, Scatter plot, Violin plot),and set as default plot the Histogram.

Exercise 5

Create a set of radio buttons used to select a plot from a list ( bar plot, pie chart, rose wind),and set as default plot the bar plot.

Exercise 6

Create a main panel.

Exercise 7

Create in the main panel two tabs named “Delays” and “Categorical” that will contain the plots of the exercises 4 and 5 respectively.

Exercise 8

Now that we are done with the user interface, create the server side of the app. Create the output of the first tab, which will be the plots from exercise 4 in respect to the first set of variables from exercise 3 ( notice that they are all continuous variables), bear in mind that at the scatter plot the x-axis should be the Full_Date and at the violin plot the x-axis should be the DayOfWeek as we did at the previous set of exercises. (please check out the first tab of the app, to make things more clear).

Exercise 9

Create the output of the second tab, , which will be the plots from exercise 5 in respect to the second set of variables from exercise 3 from the exercise 5, use the knowledge you applied ( or acquired at the previous exercises for the plots, make them as interesting as you can).(please check out the second tab of the app, to make things more clear).

Exercise 10

Launch the app.




Descriptive Analytics-Part 6: Interactive dashboard ( 1/2)

downloadDescriptive Analytics is the examination of data or content, usually manually performed, to answer the question “What happened?”.As this series of exercises comes to an end, the last part is going to be the development of a data product. Not everybody is able to code in R, so it is useful to be able to make GUIs in order to share your work with non-technical people. This part may be a little challenging, since it requires some basic knowledge of the shiny package. The outcome of this set of exercises will be almost like this web app (some variables are missing because I had to reduce the size of the data set).

In order to be able to solve this set of exercises you should have solved the part 0, part 1, part 2,part 3, and part 4 of this series but also you should run this script which contain some more data cleaning. In case you haven’t, run this script in your machine which contains the lines of code we used to modify our data set. This is the ninth set of exercise of a series of exercises that aims to provide a descriptive analytics solution to the ‘2008’ data set from here. This data set which contains the arrival and departure information for all domestic flights in the US from 2008 has become the “iris” data set for Big Data. The goal of Descriptive analytics is to inform the user about what is going on at the dataset. Before proceeding, it might be helpful to look over the help pages for the fluidPage, pageWithSidebar, headerPanel , sidebarPanel, selectInput, mainPanel, tabPanel, verbatimTextOutput, renderPrint, shinyApp.

For this set of exercises you will need to install and load the package shiny.

install.packages('shiny')
library(shiny)

I have also changed the values of the DaysOfWeek variable, if you wish to do that as well the code for that is :
install.packages('lubridate')
library(lubridate)
flights$DayOfWeek <- wday(as.Date(flights$Full1_Date,'%m/%d/%Y'), label=TRUE)

Because the app requires some time to run, I have also removed the rows with missing values from the data set just to save some time.

flights <-flights[which(!is.na(flights['WeatherDelay'])),]
flights <-flights[which(!is.na(flights['ArrDelay'])),]

Answers to the exercises are available here.

If you obtained a different (correct) answer than those listed on the solutions page, please feel free to post your answer as a comment on that page.

Learn more about Shiny in the online course R Shiny Interactive Web Apps – Next Level Data Visualization. In this course you will learn how to create advanced Shiny web apps; embed video, pdfs and images; add focus and zooming tools; and many other functionalities (30 lectures, 3hrs.).

Exercise 1

Create the user interface and set as the header of the web app : “Descriptive Analysis”

Exercise 2

Create a side panel.

Exercise 3

Create a select list input control that contains the functions : summary, str, head, tail, names, summary.

Exercise 4

Create a select list input control that contains the functions : mean, median, max, min, range, sd.

Exercise 5

Create a select list input control that contains the variables : ActualElapsedTime, CRSElapsedTime, AirTime, ArrDelay, DepDelay, TaxiIn, TaxiOut.

Exercise 6

Create a main panel.

Exercise 7

Create in the main panel two tabs named “Content” and “Measures” that will contain the output of the functions of exercise 3 and exercise 4 respectively.

Exercise 8

Now that we are done with the user interface, create the server side of the app and the output that is supposed to print the functions of the exercise 3. (please check out the first tab of the app, to make things more clear).

Exercise 9

Create the output of the second tab, combining the functions of exercise 4 and the variables from the exercise 5.(please check out the second tab of the app, to make things more clear).

Exercise 10

Launch the app.




Descriptive Analytics-Part 6: Interactive dashboard ( 1/2) Solutions

Below are the solutions to these exercises on interactive dashboarding.
In case, you feel like you need the full script, you can find it here.

Learn more about Shiny in the online course R Shiny Interactive Web Apps – Next Level Data Visualization. In this course you will learn how to create advanced Shiny web apps; embed video, pdfs and images; add focus and zooming tools; and many other functionalities (30 lectures, 3hrs.).

###############
#             #
# Exercise 1  #
#             #
###############

ui <- fluidPage(pageWithSidebar(
  headerPanel("Descriptive Analysis")))

###############
#             #
# Exercise 2  #
#             #
###############

ui <- fluidPage(pageWithSidebar(
  headerPanel("Descriptive Analysis"),
  sidebarPanel()))

###############
#             #
# Exercise 3  #
#             #
###############

ui <- fluidPage(pageWithSidebar(
  headerPanel("Descriptive Analysis"),
  sidebarPanel(
    selectInput("fun",
                h3("Select functions"),
                list("Summary" = 1, 
                     "Structure" = 2, 
                     "Head" = 3,
                     "Tail"=4,"Names"=5),selected = 1 ))))

###############
#             #
# Exercise 4  #
#             #
###############

ui <- fluidPage(pageWithSidebar(
  headerPanel("Descriptive Analysis"),
  sidebarPanel(
    #3
    selectInput("fun",
                h3("Select functions"),
                list("Summary" = 1, 
                     "Structure" = 2, 
                     "Head" = 3,
                     "Tail"=4,"Names"=5),selected = 1 ),
    selectInput("me",
              h3("Select measure"),
              list("Mean" = 1, 
                   "Median" = 2, 
                   "Max" = 3,
                   "Min"=4,"Range"=5,"Standard  Deviation"=6),selected = 1 ))))

###############
#             #
# Exercise 5  #
#             #
###############

ui <- fluidPage(pageWithSidebar(
  headerPanel("Descriptive Analysis"),
  sidebarPanel(
    selectInput("fun",
                h3("Select functions"),
                list("Summary" = 1, 
                     "Structure" = 2, 
                     "Head" = 3,
                     "Tail"=4,"Names"=5),selected = 1 ),
    selectInput("me",
              h3("Select measure"),
              list("Mean" = 1, 
                   "Median" = 2, 
                   "Max" = 3,
                   "Min"=4,"Range"=5,"Standard  Deviation"=6  ui),selected = 1 ),
    selectInput("var",
              h3("Select variable"),
              list("ActualElapsedTime" = "ActualElapsedTime", 
                   "CRSElapsedTime" = "CRSElapsedTime", 
                   "AirTime" = "AirTime",
                   "ArrDelay"="ArrDelay","DepDelay"="DepDelay","TaxiIn"="TaxiIn","TaxiOut"="TaxiOut"),selected = "ActualElapsedTime" ))))

###############
#             #
# Exercise 6  #
#             #
###############

ui <- fluidPage(pageWithSidebar(
  headerPanel("Descriptive Analysis"),
  sidebarPanel(
    selectInput("fun",
                h3("Select functions"),
                list("Summary" = 1, 
                     "Structure" = 2, 
                     "Head" = 3,
                     "Tail"=4,"Names"=5),selected = 1 ),
    selectInput("me",
              h3("Select measure"),
              list("Mean" = 1, 
                   "Median" = 2, 
                   "Max" = 3,
                   "Min"=4,"Range"=5,"Standard  Deviation"=6  ui),selected = 1 ),
    selectInput("var",
              h3("Select variable"),
              list("ActualElapsedTime" = "ActualElapsedTime", 
                   "CRSElapsedTime" = "CRSElapsedTime", 
                   "AirTime" = "AirTime",
                   "ArrDelay"="ArrDelay","DepDelay"="DepDelay","TaxiIn"="TaxiIn","TaxiOut"="TaxiOut"),selected = "ActualElapsedTime" )),
  mainPanel(tabsetPanel())))

###############
#             #
# Exercise 7  #
#             #
###############

ui <- fluidPage(pageWithSidebar(
  headerPanel("Descriptive Analysis"),
  sidebarPanel(
    selectInput("fun",
                h3("Select functions"),
                list("Summary" = 1, 
                     "Structure" = 2, 
                     "Head" = 3,
                     "Tail"=4,"Names"=5),selected = 1 ),
    selectInput("me",
              h3("Select measure"),
              list("Mean" = 1, 
                   "Median" = 2, 
                   "Max" = 3,
                   "Min"=4,"Range"=5,"Standard  Deviation"=6  ui),selected = 1 ),
    selectInput("var",
              h3("Select variable"),
              list("ActualElapsedTime" = "ActualElapsedTime", 
                   "CRSElapsedTime" = "CRSElapsedTime", 
                   "AirTime" = "AirTime",
                   "ArrDelay"="ArrDelay","DepDelay"="DepDelay","TaxiIn"="TaxiIn","TaxiOut"="TaxiOut"),selected = "ActualElapsedTime" )),
  mainPanel(tabsetPanel(tabPanel("Content",verbatimTextOutput("cont")),
    tabPanel("Measures",verbatimTextOutput("meas"))))))

###############
#             #
# Exercise 8  #
#             #
###############

server <- function(input, output) {
  output$cont <- renderPrint({
    if (input$fun == 1 ){
      print(summary(flights))
    }
    else if (input$fun == 2 ){
      print(str(flights))
    }
    else if (input$fun == 3 ){
      print(head(flights))
    }
    else if (input$fun == 4 ){
      print(tail(flights))
    }
    else {
      print(names(flights))
    }
  })}

###############
#             #
# Exercise 9  #
#             #
###############

server <- function(input, output) {
  output$cont <- renderPrint({
    if (input$fun == 1 ){
      print(summary(flights))
    }
    else if (input$fun == 2 ){
      print(str(flights))
    }
    else if (input$fun == 3 ){
      print(head(flights))
    }
    else if (input$fun == 4 ){
      print(tail(flights))
    }
    else {
      print(names(flights))
    }
  })
  output$meas <- renderPrint({
    if (input$me == 1 ){
      print(mean(flights[[input$var]],na.rm = TRUE))
    }
    else if (input$me == 2 ){
      print(median(flights[[input$var]],na.rm = TRUE))
    }
    else if (input$me == 3 ){
      print(max(flights[[input$var]],na.rm = TRUE))
    }
    else if (input$me == 4 ){
      print(min(flights[[input$var]],na.rm = TRUE))
    }
    else if (input$me == 5 ){
      print(range(flights[[input$var]],na.rm = TRUE))
    }
    else{
      print(sd(flights[[input$var]],na.rm = TRUE))
    }
  })
  }

###############
#             #
# Exercise 10 #
#             #
###############

shinyApp(ui = ui, server = server)



Descriptive Analytics-Part 5: Data Visualisation (Spatial data) Solutions

Below are the solutions to these exercises on data visualisation on spatial data.

####################
#                  #
#    Exercise 1    #
#                  #
####################

map <- get_map(location='united states',zoom = 4,
                 source="stamen", maptype="toner-lite", crop=FALSE)

####################
#                  #
#    Exercise 2    #
#                  #
####################

ggmap(map)
plot of chunk unnamed-chunk-1
####################
#                  #
#    Exercise 3    #
#                  #
####################

destinations <- flights %>%
  group_by(Dest)%>%
  mutate(flights = n(), arr_delay = sum(ArrDelay,na.rm = TRUE))%>%
  select (Dest,Origin, d_long, d_lat,  flights,arr_delay, DayOfWeek )

m <- ggmap(map, extent = "device", legend = "topleft")
####################
#                  #
#    Exercise 4    #
#                  #
####################

m + geom_point(data = destinations , aes(x = d_long, y = d_lat))
plot of chunk unnamed-chunk-1
####################
#                  #
#    Exercise 5    #
#                  #
####################

m +
  geom_point(data = destinations , aes(x = d_long, y = d_lat,size = flights),alpha=0.5, color="darkred")
plot of chunk unnamed-chunk-1
####################
#                  #
#    Exercise 6    #
#                  #
####################

m + geom_point(data = destinations, aes(x = d_long, y = d_lat,colour = flights),alpha=0.5)+
  scale_colour_gradient(low = "green",  high = "red")
plot of chunk unnamed-chunk-1
####################
#                  #
#    Exercise 7    #
#                  #
####################

m +
  geom_point(data = destinations , aes(x = d_long, y = d_lat,size = flights,colour=arr_delay),alpha=0.5) +
  scale_colour_gradient(low = "green", high = "red")
plot of chunk unnamed-chunk-1
####################
#                  #
#    Exercise 8    #
#                  #
####################


m +
  geom_point(data = destinations , aes(x = d_long, y = d_lat,size = flights,colour=arr_delay/flights),alpha=0.5) +
  scale_colour_gradient(low = "green", high = "red")
plot of chunk unnamed-chunk-1
####################
#                  #
#    Exercise 9    #
#                  #
####################

m <- ggmap(map, extent = "device",
           base_layer=ggplot(destinations,aes(x = d_long, y = d_lat)))+geom_point(size=1)
m+facet_wrap(~DayOfWeek)
plot of chunk unnamed-chunk-1
####################
#                  #
#    Exercise 10   #
#                  #
####################
m <- ggmap(map, extent = "device",
           base_layer=ggplot(destinations,aes(x = d_long, y = d_lat,size = flights)))+geom_point(aes(colour=arr_delay/flights),alpha=0.5)+scale_colour_gradient(low = "green",  high = "red")
m+facet_wrap(~DayOfWeek)
plot of chunk unnamed-chunk-1



Descriptive Analytics-Part 5: Data Visualisation (Spatial data)

downloadDescriptive Analytics is the examination of data or content, usually manually performed, to answer the question “What happened?”.

In order to be able to solve this set of exercises you should have solved the part 0, part 1, part 2,part 3, and part 4 of this series but also you should run this script which contain some more data cleaning. In case you haven’t, run this script in your machine which contains the lines of code we used to modify our data set. This is the eighth set of exercise of a series of exercises that aims to provide a descriptive analytics solution to the ‘2008’ data set from here. This data set which contains the arrival and departure information for all domestic flights in the US from 2008 has become the “iris” data set for Big Data.In order to solve this set of exercises, you have to download this data set which provide us the coordinates of each airport.Please find the script used to create a merged dataset here . I don’t expect you to do the pre-processing yourself since it is beyond the scope of this set but I highly encourage you to give it a try, in case you did that with a better or more efficient way than I did, please post your solution at the comment section(it will be highly appreciated). Moreover we will remove the rows with missing values (various delays) because the methods that we will use are computationally expensive so having a big data set is just a waste of time. The goal of Descriptive analytics is to inform the user about what is going on at the dataset. A great way to do that fast and effectively is by performing data visualisation. Data visualisation is also a form of art, it has to be simple, comprehended and full of information. On this set of exercises we will explore different ways of visualising spatial using the famous ggmap package. Before proceeding, it might be helpful to look over the help pages for the get_map, ggmap, facet_wrap.

For this set of exercises you will need to install and load the packages ggplot2, dplyr, and ggmap.

install.packages('ggplot2')
library(ggplot2)
install.packages('dplyr')
library(dplyr)
install.packages('ggmap')
library(ggmap)

I have also changed the values of the DaysOfWeek variable, if you wish to do that as well the code for that is :
install.packages('lubridate')
library(lubridate)
flights$DayOfWeek <- wday(as.Date(flights$Full1_Date,'%m/%d/%Y'), label=TRUE)

Answers to the exercises are available here.

If you obtained a different (correct) answer than those listed on the solutions page, please feel free to post your answer as a comment on that page.

Exercise 1

Query the map of United States using the get_map function.
It is recommended to experiment with the various types of maps and select the one that you think is the best. (I have used the toner-lite from Stamen Maps.)

Exercise 2

Print the map that you have selected.

Exercise 3

Modify the printed map in order to print out a bigger image( extent) and assign it to a m object.

Exercise 4

Plot the destination airports of the flights on the map.

Exercise 5

Plot the destination airports of the flights on the map, the size of the points should be based on the number of flights that arrived to the destination airports.

Exercise 6

Plot the destination airports of the flights on the map, the colour of the points should be based on the number of flights that arrived to the destination airport. Make it a bit prettier, use the scale_colour_gradient and set the lows and the highs of your preferences.

Exercise 7

Plot the destination airports of the flights on the map, the colour of the points should be based on the number of flights that arrived to the destination airport and the size of the points should be based on the total delay of arrival of the flights that arrived at the destination airport.
Something is not right here, right?

Exercise 8

Plot the destination airports of the flights on the map, the colour of the points should be based on the number of flights that arrived to the destination airport and the size of the points should be based on the total delay of arrival divided by the number of flights per destination.

Exercise 9

Plot the destination airports for everyday of the week (hint : facet_wrap )

Exercise 10
Plot the destination airports of the flights on the map, the colour of the points should be based on the number of flights that arrived to the destination airports, the size of the points should be based on the total delay of arrival of the flights that arrived at the destination airport for everyday of the week.
(This may be a bit more challenging , if you can’t solve it go to the solutions and try to understand the reason I did what I did, if you have any questions please post them at the comment section).




Descriptive Analytics-Part 5: Data Visualisation (Categorical variables)

downloadDescriptive Analytics is the examination of data or content, usually manually performed, to answer the question “What happened?”.

In order to be able to solve this set of exercises you should have solved the part 0, part 1, part 2,part 3, and part 4 of this series but also you should run this script which contain some more data cleaning. In case you haven’t, run this script in your machine which contains the lines of code we used to modify our data set. This is the sixth set of exercise of a series of exercises that aims to provide a descriptive analytics solution to the ‘2008’ data set from here. This data set which contains the arrival and departure information for all domestic flights in the US from 2008 has become the “iris” data set for Big Data. The goal of Descriptive analytics is to inform the user about what is going on at the dataset. A great way to do that fast and effectively is by performing data visualisation. Data visualisation is also a form of art, it has to be simple, comprehended and full of information. On this set of exercises we will explore different ways of visualising categorical variables using the famous ggplot2 package. Before proceeding, it might be helpful to look over the help pages for the ggplot, geom_bar, facet_wrap,facet_grid, coord_polar, geom_raster, scale_fill_distiller.

For this set of exercises you will need to install and load the packages ggplot2, code>dplyr, and RColorBrewer.

install.packages('ggplot2')
library(ggplot2)
install.packages('dplyr')
library(dplyr)
install.packages('RColorBrewer')
library(RColorBrewer)

I have also changed the values of the DaysOfWeek variable, if you wish to do that as well the code for that is :
install.packages('lubridate')
library(lubridate)
flights$DayOfWeek <- wday(as.Date(flights$Full1_Date,'%m/%d/%Y'), label=TRUE)

Answers to the exercises are available here.

If you obtained a different (correct) answer than those listed on the solutions page, please feel free to post your answer as a comment on that page.

Exercise 1
Construct a barplot which illustrates the number of flights per carrier.

Exercise 2
Make a barplot which illustrates the number of flights per carrier and each bar also contains information regarding the number of cancellations per carrier.

Exercise 3
Make a barplot which illustrates the number of flights per carrier but also for every carrier to have two bars that show the number of flights that were cancelled and the ones that departed.

Exercise 4
Make a barplot that shows the proportion of cancelled flights per carrier.

Exercise 5
Make seven barplots which illustrate the number of flights per carrier and each bar also contains information regarding the number of cancellations per carrier for every day of the week. hint:facet

Exercise 6
Make one barplot which illustrates the number of flights per carrier and each bar also contains information regarding the number of cancellations per carrier for every day of the week.

Exercise 7
Create a pie chart that illustrates the number of flights per carrier

Exercise 8
Create a wind rose that illustrates the number of flights per carrier for every day of the week.

Exercise 9
Make a heat map that illustrates the number of flights per carrier for every day of the week.

Exercise 10
With the same data from the heatmap from the previous exercise, also provide some information regarding the cancellation ratio (2 digits recommended) and make customise the heatmap in order for the higher values to be more distinctive.




Descriptive Analytics-Part 5: Data Visualisation (Categorical variables) Solutions

Below are the solutions to these exercises on data visualization.

###############
#             #
# Exercise 1  #
#             #
###############

ggplot (flights)+ aes (UniqueCarrier) + geom_bar()
plot of chunk unnamed-chunk-1
###############
#             #
# Exercise 2  #
#             #
###############

flights$Cancelled <- as.character(flights$Cancelled)
ggplot (flights)+ aes (UniqueCarrier, fill = Cancelled) + geom_bar()
plot of chunk unnamed-chunk-1
###############
#             #
# Exercise 3  #
#             #
###############

ggplot (flights)+
  aes (UniqueCarrier, fill = Cancelled) +
  geom_bar(position = 'dodge')
plot of chunk unnamed-chunk-1
###############
#             #
# Exercise 4  #
#             #
###############

ggplot (flights)+
  aes (UniqueCarrier, fill = Cancelled) +
  geom_bar(position ='fill')
plot of chunk unnamed-chunk-1
###############
#             #
# Exercise 5  #
#             #
###############

ggplot(flights, aes(UniqueCarrier, fill = Cancelled)) +
  geom_bar() +
  facet_wrap(~ DayOfWeek) +
  guides(fill=FALSE)
plot of chunk unnamed-chunk-1
###############
#             #
# Exercise 6  #
#             #
###############

ggplot(flights, aes(UniqueCarrier, fill=UniqueCarrier)) + geom_bar() +facet_grid(. ~ DayOfWeek)+
  theme(axis.text.x=element_blank(),
        axis.ticks.x=element_blank())
plot of chunk unnamed-chunk-1
###############
#             #
# Exercise 7  #
#             #
###############

pie <- ggplot(flights, aes(x = factor(1), fill = factor(UniqueCarrier))) +
  geom_bar(width = 1)
pie + coord_polar(theta = "y")
plot of chunk unnamed-chunk-1
###############
#             #
# Exercise 8  #
#             #
###############

wind <- ggplot(flights, aes(x = DayOfWeek, fill = UniqueCarrier))
wind + geom_bar(width = 1) + coord_polar()
plot of chunk unnamed-chunk-1
###############
#             #
# Exercise 9  #
#             #
###############

data <- flights%>%
  group_by(UniqueCarrier,DayOfWeek)%>%
  summarise(count=n())

ggplot(data)+aes(y=DayOfWeek,x=UniqueCarrier,fill=count)+geom_raster()
plot of chunk unnamed-chunk-1
 ###############
# #
# Exercise 10 #
# #
###############

data <- flights%>%
group_by(UniqueCarrier,DayOfWeek)%>%
summarise(count=n(),average_cancelled=mean(as.numeric(Cancelled)))

data$label <- data$average_cancelled%>% round(2)%>% as.character()

ggplot(data)+aes(y=DayOfWeek,x=UniqueCarrier,fill=count,label=label)+
geom_raster()+
geom_text(colour='white',size=2)+
scale_fill_distiller(name='# of flights',type='div',palette = 9)+theme_bw()

plot of chunk unnamed-chunk-1



Descriptive Analytics-Part 5: Data Visualisation (Continuous variables)

downloadDescriptive Analytics is the examination of data or content, usually manually performed, to answer the question “What happened?”.

In order to be able to solve this set of exercises you should have solved the part 0, part 1, part 2,part 3, and part 4 of this series but also you should run this script which contain some more data cleaning. In case you haven’t, run this script in your machine which contains the lines of code we used to modify our data set. This is the sixth set of exercise of a series of exercises that aims to provide a descriptive analytics solution to the ‘2008’ data set from here. This data set which contains the arrival and departure information for all domestic flights in the US from 2008 has become the “iris” data set for Big Data. The goal of Descriptive analytics is to inform the user about what is going on at the dataset. A great way to do that fast and effectively is by performing data visualisation. Data visualisation is also a form of art, it has to be simple, comprehended and full of information. On this set of exercises we will explore different ways of visualising continuous variables using the famous ggplot2 package. Before proceeding, it might be helpful to look over the help pages for the ggplot, geom_histogram, scale_fill_gradient,geom_point, geom_line, geom_boxplot, coord_flip, geom_violin.

For this set of exercises you will need to install and load the packages ggplot2 and dplyr.

install.packages('ggplot2')
library(ggplot2)
install.packages('dplyr')
library(dplyr)

Answers to the exercises are available here.

If you obtained a different (correct) answer than those listed on the solutions page, please feel free to post your answer as a comment on that page.

Exercise 1
Develop an histogram which illustrates the TaxIn variable.

Exercise 2
Let’s make things a bit fancier, illustrate the histogram of TaxiIn variable, with range from 0 to 50, while they break by 2 ,the highest values will be filled with red and the lowest will be filled with green and finally add a title.

Exercise 3
Make a scatter plot of ArrDelay in respect to Full_Date while illustrating each carrier with a different colour.

Exercise 4
Create a new variable called mean_delay which is the mean of ArrDelay for each carrier every day.
Now make a scatter plot of Mean_ArrDelay in respect to Full_Date while illustrating each carrier with a different colour.

Exercise 5
Make the previous plot a bit more appealing by changing the alpha parameter of the data points, the theme of the points , and by inserting names to the x-axis and y-axis.

Exercise 6
With the same variables, plot a line chart.
Hint: set the parameter ,group in order to proceed

Exercise 7
Create a box plot which illustrates the mean of daily ArrDelay for every day of the week.

Exercise 8
Modify the box plot by setting a colour and a size for the outliers, also make every day of the week to be illustrated with a different colour. Also, if you wish and your screen is not big enough, remove the legend.

Exercise 9
While box plot is a great way to demonstrate distributions, an even better way are violin plots. Plot a violin plot with the same data.

Exercise 10
Modify the violin plot, use different colour for every day of the week, remove the trim and the legends.




Descriptive Analytics-Part 5: Data Visualisation ( Continuous variables) Solutions

Below are the solutions to these exercises on data exploration.

####################
#                  #
#    Exercise 1    #
#                  #
####################

ggplot(flights, aes(x = TaxiIn)) +
  geom_histogram()
plot of chunk unnamed-chunk-1
####################
#                  #
#    Exercise 2    #
#                  #
####################

ggplot(flights, aes(TaxiIn)) +
  geom_histogram(breaks=seq(0, 50, by =2),
                 col="red",
                 aes(fill=..count..)) +
  scale_fill_gradient("Count", low = "green", high = "red") +
  labs(title="Histogram for TaxIn time")
plot of chunk unnamed-chunk-1
####################
#                  #
#    Exercise 3    #
#                  #
####################

ggplot(flights,
       aes(x=Full_Date,
           y=ArrDelay,
           color= UniqueCarrier))+
  geom_point()
## Warning: Removed 1 rows containing missing values (geom_point).
plot of chunk unnamed-chunk-1
####################
#                  #
#    Exercise 4    #
#                  #
####################

mean_delay <- flights %>%
  group_by(Full_Date,UniqueCarrier)%>%
  summarise(Mean_ArrDelay= mean(ArrDelay))


p <- ggplot(mean_delay,
            aes(x=Full_Date,
                y=Mean_ArrDelay,
                color= UniqueCarrier))
p+geom_point()
## Warning: Removed 1 rows containing missing values (geom_point).
plot of chunk unnamed-chunk-1
####################
#                  #
#    Exercise 5    #
#                  #
####################

ggplot(mean_delay,
       aes(x=Full_Date,
           y=Mean_ArrDelay,
           color= UniqueCarrier,alpha =1/3))+
  geom_point()+ theme_bw(base_family='Times')+
  theme(axis.text.x=element_blank(),
        axis.ticks.x=element_blank())
## Warning: Removed 1 rows containing missing values (geom_point).
plot of chunk unnamed-chunk-1
####################
#                  #
#    Exercise 6    #
#                  #
####################

p <- ggplot(mean_delay,
            aes(x=Full_Date,
                y=Mean_ArrDelay,
                color= UniqueCarrier, group = 1))
p+geom_line(alpha = 0.5)
plot of chunk unnamed-chunk-1
####################
#                  #
#    Exercise 7    #
#                  #
####################

mean_delay <- flights %>%
  group_by(Full_Date,DayOfWeek)%>%
  summarise(Mean_ArrDelay= mean(ArrDelay))
p <- ggplot(mean_delay, aes(factor(DayOfWeek), Mean_ArrDelay))

p + coord_flip() + geom_boxplot()
## Warning: Removed 1 rows containing non-finite values (stat_boxplot).
plot of chunk unnamed-chunk-1
####################
#                  #
#    Exercise 8    #
#                  #
####################


p+ coord_flip() + geom_boxplot(outlier.colour = "darkgreen", outlier.size = 3,aes(fill = DayOfWeek))+ guides(fill=FALSE)
## Warning: Removed 1 rows containing non-finite values (stat_boxplot).
plot of chunk unnamed-chunk-1
####################
#                  #
#    Exercise 9    #
#                  #
####################

p + geom_violin()
## Warning: Removed 1 rows containing non-finite values (stat_ydensity).
plot of chunk unnamed-chunk-1
####################
#                  #
#    Exercise 10   #
#                  #
####################
p + geom_violin(aes(fill = factor(DayOfWeek)),trim = FALSE)+ guides(fill=FALSE)
## Warning: Removed 1 rows containing non-finite values (stat_ydensity).
plot of chunk unnamed-chunk-1