Data Science for Operational Excellence (Part-4)

Below are the solutions to these exercises on Linear Programming.

####################
#                  #
#    Exercise 1    #
#                  #
####################
library(lpSolve)
set.seed(1234)
offers <- 4
demands <- 3
commodities <- 2
f.obj <- c(sample(0:1000, offers*demands*commodities, replace=F))

####################
#                  #
#    Exercise 2    #
#                  #
####################
constraints <- demands*commodities
vectorCon <- vector(length = constraints*offers*demands*commodities)
place = 1
for (x in 1:commodities) {
  for (y in 1:demands) {

    for (k in 1:commodities) {
      for (j in 1:demands) {
        for (i in 1:offers) {

          if (x==k & y==j){
            vectorCon[place] <- 1
          }
          place=place+1
        }
      }
    }
  }
}
f.con1 <- matrix(vectorCon, nrow = constraints, byrow = T)
f.dir1 <- rep (">", constraints)
f.rhs1 <- sample(100:500, constraints, replace=F)

####################
#                  #
#    Exercise 3    #
#                  #
####################
constraints <- offers*commodities
vectorCon <- vector(length = constraints*offers*demands*commodities)
place = 1
for (x in 1:commodities) {
  for (y in 1:offers) {

    for (k in 1:commodities) {
      for (j in 1:demands) {
        for (i in 1:offers) {

          if (x==k & y==i){
            vectorCon[place] <- 1
          }
          place=place+1
        }
      }
    }
  }
}
f.con2 <- matrix(vectorCon, nrow = constraints, byrow = T)
f.dir2 <- rep ("<", constraints)
f.rhs2 <- sample(200:700, constraints, replace=F) # offer

####################
#                  #
#    Exercise 4    #
#                  #
####################
f.con <- rbind(f.con1, f.con2)
f.dir <- c(f.dir1, f.dir2)
f.rhs <- c(f.rhs1, f.rhs2)

####################
#                  #
#    Exercise 5    #
#                  #
####################
sol<- lp("min", f.obj, f.con, f.dir, f.rhs)
solMatrix <- matrix(sol$solution, ncol = demands*commodities, byrow = F)
rownames(solMatrix) <- paste("Supplier ", 1:offers)
realColnames <- length(demands*commodities)
place <- 1
for(k in 1:commodities){
  for(j in 1:demands){
    realColnames[place] <- paste("Rest.-Prod. (", j, "," , k, ")")
    place <- place+1
  }
}
colnames(solMatrix) <- realColnames
solMatrix
##             Rest.-Prod. ( 1 , 1 ) Rest.-Prod. ( 2 , 1 )
## Supplier  1                   187                     0
## Supplier  2                     0                     0
## Supplier  3                     0                   352
## Supplier  4                     0                    72
##             Rest.-Prod. ( 3 , 1 ) Rest.-Prod. ( 1 , 2 )
## Supplier  1                     0                   290
## Supplier  2                   309                     0
## Supplier  3                     0                   174
## Supplier  4                     0                     0
##             Rest.-Prod. ( 2 , 2 ) Rest.-Prod. ( 3 , 2 )
## Supplier  1                     0                     0
## Supplier  2                    96                     0
## Supplier  3                   125                     0
## Supplier  4                   209                   118
####################
#                  #
#    Exercise 6    #
#                  #
####################
constraints <- offers*demands
vectorCon <- vector(length = constraints*offers*demands*commodities)
place = 1
for (x in 1:demands) {
  for (y in 1:offers) {

    for (k in 1:commodities) {
      for (j in 1:demands) {
        for (i in 1:offers) {

          if (x==j & y==i){
            vectorCon[place] <- 1
          }
          place=place+1
        }
      }
    }
  }
}
f.con3 <- matrix(vectorCon, nrow = constraints, byrow = T)
f.dir3 <- rep (">", constraints)
f.rhs3 <- sample(50:70, constraints, replace=F) # capacityMin

####################
#                  #
#    Exercise 7    #
#                  #
####################
f.con <- rbind(f.con1, f.con2, f.con3)
f.dir <- c(f.dir1, f.dir2, f.dir3)
f.rhs <- c(f.rhs1, f.rhs2, f.rhs3)

sol<- lp("min", f.obj, f.con, f.dir, f.rhs)
solMatrix <- matrix(sol$solution, ncol = demands*commodities, byrow = F)
rownames(solMatrix) <- paste("Supplier ", 1:offers)
realColnames <- length(demands*commodities)
place <- 1
for(k in 1:commodities){
  for(j in 1:demands){
    realColnames[place] <- paste("Rest.-Prod. (", j, "," , k, ")")
    place <- place+1
  }
}
colnames(solMatrix) <- realColnames
solMatrix
##             Rest.-Prod. ( 1 , 1 ) Rest.-Prod. ( 2 , 1 )
## Supplier  1                   121                     0
## Supplier  2                    66                     0
## Supplier  3                     0                   352
## Supplier  4                     0                    72
##             Rest.-Prod. ( 3 , 1 ) Rest.-Prod. ( 1 , 2 )
## Supplier  1                    58                   235
## Supplier  2                   251                     0
## Supplier  3                     0                   168
## Supplier  4                     0                    61
##             Rest.-Prod. ( 2 , 2 ) Rest.-Prod. ( 3 , 2 )
## Supplier  1                    55                     0
## Supplier  2                    96                     0
## Supplier  3                    79                    52
## Supplier  4                   200                    66
####################
#                  #
#    Exercise 8    #
#                  #
####################
constraints <- offers*demands
vectorCon <- vector(length = constraints*offers*demands*commodities)
place = 1
for (x in 1:demands) {
  for (y in 1:offers) {

    for (k in 1:commodities) {
      for (j in 1:demands) {
        for (i in 1:offers) {

          if (x==j & y==i){
            vectorCon[place] <- 1
          }
          place=place+1
        }
      }
    }
  }
}
f.con4 <- matrix(vectorCon, nrow = constraints, byrow = T)
f.dir4 <- rep ("<", constraints)
f.rhs4 <- sample(100:500, constraints, replace=F) # capacityMax

####################
#                  #
#    Exercise 9    #
#                  #
####################
f.con <- rbind(f.con1, f.con2, f.con3, f.con4)
f.dir <- c(f.dir1, f.dir2, f.dir3, f.dir4)
f.rhs <- c(f.rhs1, f.rhs2, f.rhs3, f.rhs4)

####################
#                  #
#    Exercise 10   #
#                  #
####################
sol<- lp("min", f.obj, f.con, f.dir, f.rhs)
solMatrix <- matrix(sol$solution, ncol = demands*commodities, byrow = F)
rownames(solMatrix) <- paste("Supplier ", 1:offers)
realColnames <- length(demands*commodities)
place <- 1
for(k in 1:commodities){
  for(j in 1:demands){
    realColnames[place] <- paste("Rest.-Prod. (", j, "," , k, ")")
    place <- place+1
  }
}
colnames(solMatrix) <- realColnames
solMatrix
##             Rest.-Prod. ( 1 , 1 ) Rest.-Prod. ( 2 , 1 )
## Supplier  1                    51                     0
## Supplier  2                   131                     0
## Supplier  3                     5                   295
## Supplier  4                     0                   129
##             Rest.-Prod. ( 3 , 1 ) Rest.-Prod. ( 1 , 2 )
## Supplier  1                    56                    78
## Supplier  2                   201                     0
## Supplier  3                    52                   299
## Supplier  4                     0                    87
##             Rest.-Prod. ( 2 , 2 ) Rest.-Prod. ( 3 , 2 )
## Supplier  1                    55                     2
## Supplier  2                   251                     0
## Supplier  3                     0                     0
## Supplier  4                   124                   116



Shiny Application Layouts Solutions (Part-2)

Below are the solutions to these exercises on Shiny Application Layouts.

####################
#                  #
#    Exercise 1    #
#                  #
####################

install.packages("shiny")
library(shiny)

####################
#                  #
#    Exercise 2    #
#                  #
####################

#ui.R
library(shiny)
dataset <- diamonds

shinyUI(fluidPage(
  title = "Diamonds Explorer",
h4("Diamonds Analyzer")

))
#server.R
library(shiny)
function(input, output) {}

####################
#                  #
#    Exercise 3    #
#                  #
####################

#ui.R
library(shiny)
dataset <- diamonds

shinyUI(fluidPage(
  title = "Diamonds Explorer",
h4("Diamonds Analyzer"),
  fluidRow(column(4),
           column(4))

))
#server.R
library(shiny)
function(input, output) {}

####################
#                  #
#    Exercise 4    #
#                  #
####################

#ui.R
library(shiny)

shinyUI(fluidPage(
  title = "Diamonds Explorer",
h4("Diamonds Analyzer"),


  fluidRow(column(4,
                  selectInput('x', 'Variable X', names(diamonds))),
           column(4))



))
#server.R
library(shiny)
function(input, output) {
}

####################
#                  #
#    Exercise 5    #
#                  #
####################

#ui.R
library(shiny)

shinyUI(fluidPage(
  title = "Diamonds Explorer",
  h4("Diamonds Analyzer"),

  fluidRow(column(4,
                  selectInput('x', 'Variable X', names(diamonds))),
           column(4,
                  selectInput('y', 'Variable Y', names(diamonds), names(diamonds)[[2]])))



))
#server.R
library(shiny)
function(input, output) {
}

####################
#                  #
#    Exercise 6    #
#                  #
####################

#ui.R
library(shiny)
shinyUI(fluidPage(
  title = "Diamonds Explorer",
  h4("Diamonds Analyzer"),

  fluidRow(column(4,
                  selectInput('x', 'Variable X', names(diamonds))),
           column(4,
                  offset = 1,
                  selectInput('y', 'Variable Y', names(diamonds), names(diamonds)[[2]])))



))
#server.R
library(shiny)
function(input, output) {
}

####################
#                  #
#    Exercise 7    #
#                  #
####################

#ui.R
library(shiny)

shinyUI(fluidPage(
  title = "Diamonds Explorer",
h4("Diamonds Analyzer"),
  plotOutput('plot'),

  hr(),

  fluidRow(column(4,
                  selectInput('x', 'Variable X', names(diamonds))),
           column(4,
                  offset = 1,
                  selectInput('y', 'Variable Y', names(diamonds), names(diamonds)[[2]])))



))
#server.R
library(shiny)
function(input, output) {
}

####################
#                  #
#    Exercise 8    #
#                  #
####################

#ui.R
library(shiny)


shinyUI(fluidPage(
  title = "Diamonds Analyzer",
  h4("Diamonds Analyzer"),

  plotOutput('plot'),

  hr(),

  fluidRow(column(4,
                  selectInput('x', 'Variable X', names(diamonds))),
           column(4,
                  offset = 1,
                  selectInput('y', 'Variable Y', names(diamonds), names(diamonds)[[2]])))



))
#server.R
library(shiny)
function(input, output) {
  selectedData <- reactive({
    diamonds[, c(input$x, input$y)]
  })

}

####################
#                  #
#    Exercise 9    #
#                  #
####################

#ui.R
library(shiny)


shinyUI(fluidPage(
  title = "Diamonds Analyzer",
  h4("Diamonds Analyzer"),

  plotOutput('plot'),

  hr(),

  fluidRow(column(4,
                  selectInput('x', 'Variable X', names(diamonds))),
           column(4,
                  offset = 1,
                  selectInput('y', 'Variable Y', names(diamonds), names(diamonds)[[2]])))



))
#server.R
library(shiny)
function(input, output) {
  selectedData <- reactive({
    diamonds[, c(input$x, input$y)]
  })
  output$plot <- renderPlot({
    plot(selectedData())
  })
}

####################
#                  #
#    Exercise 10   #
#                  #
####################

#ui.R
library(shiny)


shinyUI(fluidPage(
  title = "Diamonds Analyzer",
  h4("Diamonds Analyzer"),

  plotOutput('plot'),

  hr(),

  fluidRow(column(4,
                  selectInput('x', 'Variable X', names(diamonds))),
           column(4,
                  offset = 1,
                  selectInput('y', 'Variable Y', names(diamonds), names(diamonds)[[2]])),
           column(4,
                  selectInput('x1', 'Variable X', names(diamonds))))



))
#server.R
library(shiny)
function(input, output) {
  selectedData <- reactive({
    diamonds[, c(input$x, input$y)]
  })
  output$plot <- renderPlot({
    plot(selectedData())
  })
}



Forecasting for small business Solutions (Part-3)

Below are the solutions to these exercises on time series.

####################
#                  #
#    Exercise 1    #
#                  #
####################
data(EuStockMarkets)
plot(EuStockMarkets[,1])
plot of chunk time_series_exercises_part3
white.noise.EU<-diff(EuStockMarkets[,1],lag=1)
plot(white.noise.EU)
plot of chunk time_series_exercises_part3
####################
#                  #
#    Exercise 2    #
#                  #
####################
white.noise.log.EU<-diff(log(EuStockMarkets[,1]),lag=1)
plot(white.noise.log.EU)

####################
#                  #
#    Exercise 3    #
#                  #
####################
library("tseries")
plot of chunk time_series_exercises_part3
adf.test(white.noise.log.EU,k=1, alternative="stationary")
## Warning in adf.test(white.noise.log.EU, k = 1, alternative = "stationary"):
## p-value smaller than printed p-value
## 
## 	Augmented Dickey-Fuller Test
## 
## data:  white.noise.log.EU
## Dickey-Fuller = -31.343, Lag order = 1, p-value = 0.01
## alternative hypothesis: stationary
####################
#                  #
#    Exercise 4    #
#                  #
####################
data(co2)
plot(co2)
lines(lowess(co2), col = "red")
plot of chunk time_series_exercises_part3
####################
#                  #
#    Exercise 5    #
#                  #
####################
diff.co2<-diff(co2,lag=1)
plot(diff.co2)
plot of chunk time_series_exercises_part3
acf(diff.co2)
plot of chunk time_series_exercises_part3
####################
#                  #
#    Exercise 6    #
#                  #
####################
acf.diff.co2 <- acf(diff.co2, plot=FALSE)
acf.diff.co2$lag <- acf.diff.co2$lag * 12
plot(acf.diff.co2, xlab="Lag in months")
plot of chunk time_series_exercises_part3
####################
#                  #
#    Exercise 7    #
#                  #
####################
library("forecast")
tbats.model <- tbats(diff.co2)
tbats.model$seasonal.periods
## [1] 12
####################
#                  #
#    Exercise 8    #
#                  #
####################
unseason.co2<-diff(co2,lag=1)
unseason.co2<-diff(unseason.co2,lag=12)
plot(unseason.co2)
plot of chunk time_series_exercises_part3
acf(unseason.co2)
plot of chunk time_series_exercises_part3
####################
#                  #
#    Exercise 9    #
#                  #
####################
adf.test(unseason.co2, alternative="stationary")
## Warning in adf.test(unseason.co2, alternative = "stationary"): p-value
## smaller than printed p-value
## 
## 	Augmented Dickey-Fuller Test
## 
## data:  unseason.co2
## Dickey-Fuller = -8.9846, Lag order = 7, p-value = 0.01
## alternative hypothesis: stationary
kpss.test(unseason.co2, null="Level")
## Warning in kpss.test(unseason.co2, null = "Level"): p-value greater than
## printed p-value
## 
## 	KPSS Test for Level Stationarity
## 
## data:  unseason.co2
## KPSS Level = 0.010653, Truncation lag parameter = 4, p-value = 0.1
Box.test(unseason.co2, lag=5, type="Ljung-Box")
## 
## 	Box-Ljung test
## 
## data:  unseason.co2
## X-squared = 52.343, df = 5, p-value = 4.588e-10
####################
#                  #
#    Exercise 10   #
#                  #
####################
component.co2<- decompose(co2, "additive")
plot(as.ts(component.co2$seasonal))
plot of chunk time_series_exercises_part3
plot(as.ts(component.co2$trend))
plot of chunk time_series_exercises_part3
plot(as.ts(component.co2$random))
plot of chunk time_series_exercises_part3



Data Science for Operational Excellence (Part-3)

Below are the solutions to these exercises on Linear Programming.

####################
#                  #
#    Exercise 1    #
#                  #
####################
library(ggmap)
library(fields)
library(lpSolve)
library(leaflet)
library(dplyr)
library(magrittr)

soyaCities <- c("Sapezal","Sorriso", "Nova Mutum", "Diamantino", "Cascavel")
transhipment <- c("Alto Arraguaia", "Cascavel")
ports <- c("Santos", "Paranagua")
allCitiesAux <- c(soyaCities, transhipment, ports)

####################
#                  #
#    Exercise 2    #
#                  #
####################
vectorSizelat<-length(allCitiesAux)
vectorSizelng<-length(allCitiesAux)
for (i in 1:length(allCitiesAux)) {
  position <- geocode(allCitiesAux[i])
  vectorSizelat[i] <- position$lat
  vectorSizelng[i] <- position$lon
}

####################
#                  #
#    Exercise 3    #
#                  #
####################
allCities <- cbind(allCitiesAux, vectorSizelat, vectorSizelng)
colnames(allCities) <- c("City", "lat", "lng")
allCities <- as.data.frame(allCities)
allCities <- as.tbl(allCities) %>%
            mutate(lat = as.numeric(as.character(lat)), lng = as.numeric(as.character(lng)))

####################
#                  #
#    Exercise 4    #
#                  #
####################
distMatrix <- rdist(allCities[,2:3])
colnames(distMatrix) <- allCitiesAux
rownames(distMatrix) <- allCitiesAux
cost.mat <- distMatrix[-c(6:9),8:9] #rows must be offer points and columns demand points

####################
#                  #
#    Exercise 5    #
#                  #
####################
set.seed(1234)
row.signs <- rep ("=", length(soyaCities))
row.rhs <- sample(50:300, length(soyaCities), replace=F) # offer
col.signs <- rep (">", length(ports))
col.rhs <- sample(300:600, length(ports), replace=F) # demand

####################
#                  #
#    Exercise 6    #
#                  #
####################
sol <- lp.transport (cost.mat, "min", row.signs, row.rhs, col.signs, col.rhs)
dimnames(sol$solution) <- dimnames(cost.mat)
sol$solution
##            Santos Paranagua
## Sapezal         0        78
## Sorriso       205         0
## Nova Mutum    201         0
## Diamantino     86       118
## Cascavel        0       262
####################
#                  #
#    Exercise 7    #
#                  #
####################
mydf2 <- list()
for (i in 1:dim(sol$solution)[1]) {
  for (j in 1:dim(sol$solution)[2]) {
    if(sol$solution[i,j]!=0){
      mydf2[[i]] <- data.frame(group = c(rownames(sol$solution)[i], colnames(sol$solution)[j]),
                          lat = c(as.numeric(filter(allCities, City==rownames(sol$solution)[i])[1,2]),
                                  as.numeric(filter(allCities, City==colnames(sol$solution)[j])[1,2])),
                          long = c(as.numeric(filter(allCities, City==rownames(sol$solution)[i])[1,3]),
                                   as.numeric(filter(allCities, City==colnames(sol$solution)[j])[1,3]))
                          )
    }
  }
}

####################
#                  #
#    Exercise 8    #
#                  #
####################
map <- leaflet(allCities) %>% addTiles() %>% addMarkers(popup= ~City)
for(i in 1:dim(sol$solution)[1]){
  map <- addPolylines(map, data = mydf2[[i]], lng = ~long, lat = ~lat, group = ~group)
}
map
####################
#                  #
#    Exercise 9    #
#                  #
####################
vectorCost<- NULL
route_dfs <- list()
vectorCost <- vector(length = dim(sol$solution)[1]*dim(sol$solution)[2])
for (i in 1:dim(sol$solution)[1]) {
  for (j in 1:dim(sol$solution)[2]) {
    if(sol$solution[i,j]!=0){
      from <- rownames(sol$solution)[i]
      to <- colnames(sol$solution)[j]
      route_df <- route(from, to, structure = "route")
      route_df <- route_df[complete.cases(route_df),]
      route_dfs[[i]] <- route_df
      vectorCost <- c(vectorCost,lapply(route_df,sum)[2])
    }
  }
}

####################
#                  #
#    Exercise 10   #
#                  #
####################
map2 <- leaflet(allCities) %>% addTiles() %>% addMarkers(popup= ~City)
for(i in 1:dim(sol$solution)[1]){
  map2 <- addPolylines(map2, data = route_dfs[[i]], lng = ~lon, lat = ~lat)
}
map2



Experimental Design Solutions

Below are the solutions to these exercises on Experimental design exercises

####################
#                  #
#    Exercise 1    #
#                  #
####################

data <- read.csv("experimental-design.csv")
as.factor(data$group) -> data$group
as.factor(data$age) -> data$age
summary(data$initial_mass)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   53.50   62.18   68.90   67.70   72.27   86.00
summary(data$final_mass)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   50.60   60.08   64.45   65.44   72.00   81.30
shapiro.test(data$initial_mass)
## 
## 	Shapiro-Wilk normality test
## 
## data:  data$initial_mass
## W = 0.98306, p-value = 0.5053
shapiro.test(data$final_mass)
## 
## 	Shapiro-Wilk normality test
## 
## data:  data$final_mass
## W = 0.97517, p-value = 0.2073
sapply(split(data$initial_mass, data$group), summary)
##             1     2     3
## Min.    53.50 53.50 54.50
## 1st Qu. 63.40 62.00 62.18
## Median  68.20 69.65 68.25
## Mean    67.24 68.26 67.58
## 3rd Qu. 70.00 73.00 72.72
## Max.    86.00 82.00 81.00
sapply(split(data$final_mass, data$group), summary)
##             1     2     3
## Min.    50.60 52.00 51.00
## 1st Qu. 60.08 60.25 61.00
## Median  62.95 70.50 65.00
## Mean    62.43 68.64 65.25
## 3rd Qu. 64.62 75.00 72.00
## Max.    81.30 81.00 77.00
sapply(split(data$initial_mass, data$group), shapiro.test)
##           1                             2                            
## statistic 0.9561618                     0.9735745                    
## p.value   0.415793                      0.7920937                    
## method    "Shapiro-Wilk normality test" "Shapiro-Wilk normality test"
## data.name "X[[i]]"                      "X[[i]]"                     
##           3                            
## statistic 0.9777148                    
## p.value   0.8768215                    
## method    "Shapiro-Wilk normality test"
## data.name "X[[i]]"
sapply(split(data$final_mass, data$group), shapiro.test)
##           1                             2                            
## statistic 0.9447748                     0.9231407                    
## p.value   0.2479696                     0.08832153                   
## method    "Shapiro-Wilk normality test" "Shapiro-Wilk normality test"
## data.name "X[[i]]"                      "X[[i]]"                     
##           3                            
## statistic 0.9453135                    
## p.value   0.2543017                    
## method    "Shapiro-Wilk normality test"
## data.name "X[[i]]"
####################
#                  #
#    Exercise 2    #
#                  #
####################


invisible(sapply(split(data, data$group), function(x)
    {
      t.test(x$initial_mass, x$final_mass, paired=TRUE) -> t
      cat(sprintf("Group %d\r\nstatistic=%.3f\r\ndf=%d\r\np=%.3f\r\neta^2=%.3f\r\n\r\n",
              unique(x$group), t$statistic, t$parameter, t$p.value,
              t$statistic^2/(t$statistic^2+t$parameter)))

    }))
## Group 1

## statistic=7.474

## df=21

## p=0.000

## eta^2=0.727

## 

## Group 2

## statistic=-0.687

## df=21

## p=0.500

## eta^2=0.022

## 

## Group 3

## statistic=4.372

## df=21

## p=0.000

## eta^2=0.477

## 

####################
#                  #
#    Exercise 3    #
#                  #
####################

library("car")
leveneTest(data$final_mass, data$group, center=mean)
## Levene's Test for Homogeneity of Variance (center = mean)
##       Df F value Pr(>F)
## group  2   1.232 0.2986
##       63
####################
#                  #
#    Exercise 4    #
#                  #
####################

print(summary(aov(final_mass ~ group, data)) -> f)
##             Df Sum Sq Mean Sq F value Pr(>F)  
## group        2    425  212.64   3.626 0.0323 *
## Residuals   63   3694   58.64                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ss = f[[1]]$'Sum Sq'
paste("eta squared=", round(ss[1] / (ss[1]+ss[2]), 3))
## [1] "eta squared= 0.103"
####################
#                  #
#    Exercise 5    #
#                  #
####################

summary(f <- aov(final_mass ~ group, data))
##             Df Sum Sq Mean Sq F value Pr(>F)  
## group        2    425  212.64   3.626 0.0323 *
## Residuals   63   3694   58.64                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
TukeyHSD(f, "group")
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = final_mass ~ group, data = data)
## 
## $group
##          diff        lwr       upr     p adj
## 2-1  6.209091  0.6669343 11.751248 0.0245055
## 3-1  2.818182 -2.7239748  8.360338 0.4455950
## 3-2 -3.390909 -8.9330657  2.151248 0.3128050
# significant difference appears between 1st and 2nd group (p<0.05)

####################
#                  #
#    Exercise 6    #
#                  #
####################

options(contrasts = c("contr.helmert", "contr.poly"))
m.lm <- lm(final_mass ~ age + group + age*group, data=data)
print(m.anova <- Anova(m.lm, type=3))
## Anova Table (Type III tests)
## 
## Response: final_mass
##             Sum Sq Df   F value    Pr(>F)    
## (Intercept) 267773  1 8536.0541 < 2.2e-16 ***
## age           1388  2   22.1282 7.725e-08 ***
## group          186  2    2.9678  0.059415 .  
## age:group      564  4    4.4981  0.003152 ** 
## Residuals     1788 57                        
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
m.anova[[1]][2:4] / (m.anova[[1]][2:4]+m.anova[[1]][5])
## [1] 0.43707242 0.09431139 0.23992294
####################
#                  #
#    Exercise 7    #
#                  #
####################

m.aov <- aov(final_mass ~ age + group +  age*group, data)
TukeyHSD(x=m.aov, "age")
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = final_mass ~ age + group + age * group, data = data)
## 
## $age
##                       diff        lwr       upr     p adj
## old-middle-age     6.69775   2.382683 11.012817 0.0012494
## young-middle-age  -5.75200  -9.564155 -1.939845 0.0017300
## young-old        -12.44975 -16.764817 -8.134683 0.0000000
# there is significant difference between all groups

####################
#                  #
#    Exercise 8    #
#                  #
####################

library(lattice)
xyplot(initial_mass ~ final_mass | group, data = data, panel=function(x, y, ...)
  {
  panel.xyplot(x, y, ...)
  panel.lmline(x, y, ...)
})
Linearity between initial and final mass
####################
#                  #
#    Exercise 9    #
#                  #
####################

model.1 = lm(final_mass~initial_mass, data=data)
model.2 = lm(final_mass~initial_mass+group, data=data)
anova(model.1, model.2)
## Analysis of Variance Table
## 
## Model 1: final_mass ~ initial_mass
## Model 2: final_mass ~ initial_mass + group
##   Res.Df    RSS Df Sum of Sq      F    Pr(>F)    
## 1     64 746.24                                  
## 2     62 442.63  2    303.62 21.264 9.286e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# there is the difference between groups while controlling initial measurement (p < 0.05)

####################
#                  #
#    Exercise 10   #
#                  #
####################

model.3 = lm(final_mass~initial_mass+group, data=data)
library(heplots)
etasq(model.3, anova=TRUE)
## Anova Table (Type II tests)
## 
## Response: final_mass
##              Partial eta^2 Sum Sq Df F value    Pr(>F)    
## initial_mass       0.88019 3251.8  1 455.495 < 2.2e-16 ***
## group              0.40686  303.6  2  21.264 9.286e-08 ***
## Residuals                   442.6 62                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# independent variable (group) explains 41%, covariate (initial_mass) explains 88%



Data science for Doctors: Cluster Analysis Exercises Solutions

Below are the solutions to these exercises on cluster analysis.

In order to run the multiplot function , run this script first

####################
#                  #
#    Exercise 1    #
#                  #
####################

p1 <- ggplot(data, aes(mass, age, color = class)) + geom_point()+ scale_colour_gradient(low="green", high="red")
p1
plot of chunk unnamed-chunk-2
####################
#                  #
#    Exercise 2    #
#                  #
####################

d <- dist(as.matrix(data))

####################
#                  #
#    Exercise 3    #
#                  #
####################

single_link <- hclust(d,method="single")
clust_sing <- cutree(single_link, k = 2)

####################
#                  #
#    Exercise 4    #
#                  #
####################

complete_link <- hclust(d,method="complete")
clust_complete <- cutree(complete_link, k = 2)

####################
#                  #
#    Exercise 5    #
#                  #
####################

plot(complete_link)
stats::rect.hclust(complete_link, 2)
plot of chunk unnamed-chunk-2
plot(single_link)
stats::rect.hclust(single_link, 2)
plot of chunk unnamed-chunk-2
####################
#                  #
#    Exercise 6    #
#                  #
####################

p2 <-ggplot(data, aes(mass, age, color = clust_complete)) + geom_point()+ scale_color_gradient(low="green", high="red")

p3 <- ggplot(data, aes(mass, age, color = clust_sing)) + geom_point()+ scale_color_gradient(low="green", high="red")

multiplot(p1,p2, cols = 2)
plot of chunk unnamed-chunk-2
multiplot(p1,p3, cols = 2)
plot of chunk unnamed-chunk-2
####################
#                  #
#    Exercise 7    #
#                  #
####################


kmean_cluster <- kmeans(data_mat, 2, nstart = 20) # initializes the centroids 20 times and selects the one with the lowest within cluster variation

####################
#                  #
#    Exercise 8    #
#                  #
####################

kmean_cluster$cluster <- kmean_cluster$cluster -1 # You are not required to do it, but I did that for scaling purposes.

p4 <- ggplot(data, aes(mass, age, color = kmean_cluster$cluster)) + geom_point()+ scale_color_gradient(low="green", high="red")

multiplot(p1,p4, cols = 2)
plot of chunk unnamed-chunk-2
####################
#                  #
#    Exercise 9    #
#                  #
####################

kmedian_cluster <-kGmedian(data_mat, ncenters=2, nstart = 20)

####################
#                  #
#    Exercise 10   #
#                  #
####################
kmedian_cluster$cluster <- kmedian_cluster$cluster -1
p5 <- ggplot(data, aes(mass, age, color = kmedian_cluster$cluster)) + geom_point()+ scale_color_gradient(low="green", high="red")

multiplot(p1,p5, cols = 2)
plot of chunk unnamed-chunk-2



User Defined Functions in R Solutions (Part-1)

Below are the solutions to these User Defined Functions in R Part 1

####################
#                  #
#    Exercise 1    #
#                  #
####################
sqr<- function(number) {
  i<-1
  while(i<=number){
    s <- i^2
    i=i+1
}
print(s)
}

sqr(10)
## [1] 100
####################
#                  #
#    Exercise 2    #
#                  #
####################
raisefn<- function(num, r = 3) {
  i = 1
  raise = 1
  while(i<=r){
  raise = raise * num
  i=i+1
  }
  print(raise)
}


raisefn(4)
## [1] 64
raisefn(4,5)
## [1] 1024
####################
#                  #
#    Exercise 3    #
#                  #
####################
Class.Finder<-function(arg){
  print(class(arg))
}

value1<-c(1,2,3,4,5)

Class.Finder(value)
## [1] "numeric"
####################
#                  #
#    Exercise 4    #
#                  #
####################
SumMatr<-function(M1,M2){
  print("The Matrix1")
  print(M1)
  print("The Matrix2")
  print(M2)
  summat<-M1+M2
  print("Sum of Matrices")
  print(summat)

}


Mat1 <- matrix(3:14, nrow = 4,ncol=3,byrow = TRUE)
Mat2 <- matrix(6:17, nrow = 4,ncol=3,byrow = TRUE)
SumMatr(Mat1,Mat2)
## [1] "The Matrix1"
##      [,1] [,2] [,3]
## [1,]    3    4    5
## [2,]    6    7    8
## [3,]    9   10   11
## [4,]   12   13   14
## [1] "The Matrix2"
##      [,1] [,2] [,3]
## [1,]    6    7    8
## [2,]    9   10   11
## [3,]   12   13   14
## [4,]   15   16   17
## [1] "Sum of Matrices"
##      [,1] [,2] [,3]
## [1,]    9   11   13
## [2,]   15   17   19
## [3,]   21   23   25
## [4,]   27   29   31
####################
#                  #
#    Exercise 5    #
#                  #
####################

userread <- function()
{
  str <- readline(prompt="Enter the Name: ")
  return(as.character(str))
}

print(userread())
## [1] "Ryan"
####################
#                  #
#    Exercise 6    #
#                  #
####################

userread1<-function(){
myvalues = scan()
return(myvalues)
}

print(userread1())
## [1] 1 2 3 4
####################
#                  #
#    Exercise 7    #
#                  #
####################

userread2<-function(){
print("Enter values for 4 columns(There should be multiples for 4 values)")
mat = matrix(scan(),ncol=4,byrow=TRUE)
return(mat)
}

print(userread2())
## [1] "Enter values for 4 columns(There should be multiples for 4 values)"
## Warning in matrix(scan(), ncol = 4, byrow = TRUE): data length [3] is not a
## sub-multiple or multiple of the number of columns [4]
##      [,1] [,2] [,3] [,4]
## [1,]    1    2    3    1
####################
#                  #
#    Exercise 8    #
#                  #
####################

graphplot<-function(x,y){
barplot(y,names.arg=x,xlab="Students",ylab="Marks", col="yellow",border="blue",main="Student v/s Marks",)
}

yval<-c(80,90,50,66,70,88,90,96,78,100)
xval<-c("student1","student2","student3","student4","student5","student6","student7","student8","student9","student10")
graphplot(xval,yval)
####################
#                  #
#    Exercise 9    #
#                  #
####################
Emp<-function(EmpData)
{
  print("Employee 1 Details")
  print(EmpData[1,])
  print("Employee 5 Details")
  print(EmpData[2,])
  print("Name & Deignation of Employees")
  print(EmpData[c(1,4)])
}
Employees<-data.frame(Name=c("ALAN S","RYAN S","SERAH S", "CHRISTY S","THOMAS MARTIN"),
                      Gender=c("Male","Male","Female","Female","Male"),
                      Age=c(23,22,25,26,32),
                      Designation=c("Clerk","Manager","Exective","CEO","CTO"),
                      SSN=c("134-34-2345","349-44-789","556-34-443","898-98-987","679-67-676")
)
Emp(Employees)
## [1] "Employee 1 Details"
##     Name Gender Age Designation         SSN
## 1 ALAN S   Male  23       Clerk 134-34-2345
## [1] "Employee 5 Details"
##     Name Gender Age Designation        SSN
## 2 RYAN S   Male  22     Manager 349-44-789
## [1] "Name & Deignation of Employees"
##            Name Designation
## 1        ALAN S       Clerk
## 2        RYAN S     Manager
## 3       SERAH S    Exective
## 4     CHRISTY S         CEO
## 5 THOMAS MARTIN         CTO
####################
#                  #
#    Exercise 10   #
#                  #
####################


Emp<-function()
{
  Employees<-data.frame(Name=c("ALAN S","RYAN S","SERAH S", "CHRISTY S","THOMAS MARTIN"),
                        Gender=c("Male","Male","Female","Female","Male"),
                        Age=c(23,22,25,26,32),
                        Designation=c("Clerk","Manager","Exective","CEO","CTO"),
                        SSN=c("134-34-2345","349-44-789","556-34-443","898-98-987","679-67-676")
  )
  return(Employees[c(1,3,4)])
}

print(Emp())
##            Name Age Designation
## 1        ALAN S  23       Clerk
## 2        RYAN S  22     Manager
## 3       SERAH S  25    Exective
## 4     CHRISTY S  26         CEO
## 5 THOMAS MARTIN  32         CTO



Shiny Application Layouts solutions (Part-1)

Below are the solutions to these on Shiny Application Layouts.

####################
#                  #
#    Exercise 1    #
#                  #
####################

install.packages("shiny")
library(shiny)

####################
#                  #
#    Exercise 2    #
#                  #
####################

#ui.R
library(shiny)

shinyUI(pageWithSidebar(
  headerPanel("APP 1"),
  sidebarPanel(),
  mainPanel()
))
#server.R
shinyServer(function(input, output) {})


####################
#                  #
#    Exercise 3    #
#                  #
####################
#ui.R
library(shiny)

shinyUI(pageWithSidebar(
  headerPanel("APP 1"),
  sidebarPanel(
    radioButtons("dist", "Distributions:",
                 list("Normal" = "norm",
                      "Uniform" = "unif",
                      "Log-normal" = "lnorm",
                      "Exponential" = "exp"))
  ),
  mainPanel()
))
#server.R
shinyServer(function(input, output) {})


####################
#                  #
#    Exercise 4    #
#                  #
####################

#ui.R
library(shiny)

shinyUI(pageWithSidebar(
  headerPanel("APP 1"),
  sidebarPanel(
    radioButtons("dist", "Distributions:",
                 list("Normal" = "norm",
                      "Uniform" = "unif",
                      "Log-normal" = "lnorm",
                      "Exponential" = "exp")),
    sliderInput("n",
                "Number of observations:",
                value = 500,
                min = 1,
                max = 1000)
  ),
  mainPanel()
))
#server.R
shinyServer(function(input, output) {})


####################
#                  #
#    Exercise 5    #
#                  #
####################

#ui.R
library(shiny)

shinyUI(pageWithSidebar(
  headerPanel("APP 1"),
  sidebarPanel(
    radioButtons("dist", "Distributions:",
                 list("Normal" = "norm",
                      "Uniform" = "unif",
                      "Log-normal" = "lnorm",
                      "Exponential" = "exp")),
    sliderInput("n",
                "Number of observations:",
                value = 500,
                min = 1,
                max = 1000)
  ),
  mainPanel(
    tabsetPanel()
  )
))
#server.R
shinyServer(function(input, output) {})


####################
#                  #
#    Exercise 6    #
#                  #
####################

#ui.R
library(shiny)

shinyUI(pageWithSidebar(
  headerPanel("APP 1"),
  sidebarPanel(
    radioButtons("dist", "Distributions:",
                 list("Normal" = "norm",
                      "Uniform" = "unif",
                      "Log-normal" = "lnorm",
                      "Exponential" = "exp")),
    sliderInput("n",
                "Number of observations:",
                value = 500,
                min = 1,
                max = 1000)
  ),
  mainPanel(
    tabsetPanel(
      tabPanel("Plot", plotOutput("plot")),
      tabPanel("Summary", verbatimTextOutput("summary")),
      tabPanel("Table", tableOutput("table"))
    )
  )
))
#server.R
shinyServer(function(input, output) {})


####################
#                  #
#    Exercise 7    #
#                  #
####################

#ui.R
library(shiny)

shinyUI(pageWithSidebar(
  headerPanel("APP 1"),
  sidebarPanel(
    radioButtons("dist", "Distributions:",
                 list("Normal" = "norm",
                      "Uniform" = "unif",
                      "Log-normal" = "lnorm",
                      "Exponential" = "exp")),
    sliderInput("n",
                "Number of observations:",
                value = 500,
                min = 1,
                max = 1000)
  ),
  mainPanel(
    tabsetPanel(
      tabPanel("Plot", plotOutput("plot")),
      tabPanel("Summary", verbatimTextOutput("summary")),
      tabPanel("Table", tableOutput("table"))
    )
  )
))
#server.R
shinyServer(function(input, output) {
  data <- reactive({
    dist <- switch(input$dist,
                   norm = rnorm,
                   unif = runif,
                   lnorm = rlnorm,
                   exp = rexp,
                   rnorm)

    dist(input$n)
  })

})

####################
#                  #
#    Exercise 8    #
#                  #
####################

#ui.R
library(shiny)

shinyUI(pageWithSidebar(
  headerPanel("APP 1"),
  sidebarPanel(
    radioButtons("dist", "Distributions:",
                 list("Normal" = "norm",
                      "Uniform" = "unif",
                      "Log-normal" = "lnorm",
                      "Exponential" = "exp")),
    sliderInput("n",
                "Number of observations:",
                value = 500,
                min = 1,
                max = 1000)
  ),
  mainPanel(
    tabsetPanel(
      tabPanel("Plot", plotOutput("plot")),
      tabPanel("Summary", verbatimTextOutput("summary")),
      tabPanel("Table", tableOutput("table"))
    )
  )
))
#server.R
shinyServer(function(input, output) {
  data <- reactive({
    dist <- switch(input$dist,
                   norm = rnorm,
                   unif = runif,
                   lnorm = rlnorm,
                   exp = rexp,
                   rnorm)

    dist(input$n)
  })
  output$plot <- renderPlot({
    hist(data())

  })
})


####################
#                  #
#    Exercise 9    #
#                  #
####################

#ui.R
library(shiny)

shinyUI(pageWithSidebar(
  headerPanel("APP 1"),
  sidebarPanel(
    radioButtons("dist", "Distributions:",
                 list("Normal" = "norm",
                      "Uniform" = "unif",
                      "Log-normal" = "lnorm",
                      "Exponential" = "exp")),
    sliderInput("n",
                "Number of observations:",
                value = 500,
                min = 1,
                max = 1000)
  ),
  mainPanel(
    tabsetPanel(
      tabPanel("Plot", plotOutput("plot")),
      tabPanel("Summary", verbatimTextOutput("summary")),
      tabPanel("Table", tableOutput("table"))
    )
  )
))
#server.R
shinyServer(function(input, output) {
  data <- reactive({
    dist <- switch(input$dist,
                   norm = rnorm,
                   unif = runif,
                   lnorm = rlnorm,
                   exp = rexp,
                   rnorm)

    dist(input$n)
  })
  output$plot <- renderPlot({
    hist(data())

  })
  output$summary <- renderPrint({
    summary(data())
  })
})

####################
#                  #
#    Exercise 10   #
#                  #
####################

#ui.R
library(shiny)

shinyUI(pageWithSidebar(
  headerPanel("APP 1"),
  sidebarPanel(
    radioButtons("dist", "Distributions:",
                 list("Normal" = "norm",
                      "Uniform" = "unif",
                      "Log-normal" = "lnorm",
                      "Exponential" = "exp")),
    sliderInput("n",
                "Number of observations:",
                value = 500,
                min = 1,
                max = 1000)
  ),
  mainPanel(
    tabsetPanel(
      tabPanel("Plot", plotOutput("plot")),
      tabPanel("Summary", verbatimTextOutput("summary")),
      tabPanel("Table", tableOutput("table"))
    )
  )
))
#server.R
shinyServer(function(input, output) {
  data <- reactive({
    dist <- switch(input$dist,
                   norm = rnorm,
                   unif = runif,
                   lnorm = rlnorm,
                   exp = rexp,
                   rnorm)

    dist(input$n)
  })
  output$plot <- renderPlot({
    hist(data())

  })
  output$summary <- renderPrint({
    summary(data())
  })
  output$table <- renderTable({
    data.frame(x=data())
  })
})



Forecasting: Exponential Smoothing Exercises (Part-3) Solutions

Below are the solutions to these exercises on forecasting with exponential smoothing.

####################
#                  #
#    Exercise 1    #
#                  #
####################
df <- read.csv("unemployment.csv")
unempl <- ts(df, start = c(2012, 1), frequency = 12)
plot(unempl)
plot of chunk forecasting-part-3
####################
#                  #
#    Exercise 2    #
#                  #
####################
require(forecast)
fcast_ses <- ses(unempl, h = 12)
plot(fcast_ses)
plot of chunk forecasting-part-3
####################
#                  #
#    Exercise 3    #
#                  #
####################
require(forecast)
fit_ets_default <- ets(unempl)
fcast_ets_default <- forecast(fit_ets_default, h = 12)
plot(fcast_ets_default)
plot of chunk forecasting-part-3
####################
#                  #
#    Exercise 4    #
#                  #
####################
require(forecast)
summary(fit_ets_default)
## ETS(M,A,M) 
## 
## Call:
##  ets(y = unempl) 
## 
##   Smoothing parameters:
##     alpha = 0.5717 
##     beta  = 1e-04 
##     gamma = 1e-04 
## 
##   Initial states:
##     l = 8.4353 
##     b = -0.0564 
##     s=0.9567 0.9453 0.957 0.9669 1.0245 1.0591
##            1.0365 0.9642 0.9403 1.0224 1.0542 1.0729
## 
##   sigma:  0.021
## 
##      AIC     AICc      BIC 
## 37.38299 50.98299 73.81628 
## 
## Training set error measures:
##                        ME      RMSE      MAE        MPE     MAPE      MASE
## Training set -0.009791689 0.1271141 0.102154 -0.1209349 1.687472 0.1322298
##                    ACF1
## Training set 0.08649403
# The first line in the printed summary is "ETS(M,A,M)"
# It implies that (1) the model has error, trend, and seasonal components,
# (2) error and seasonal components are multiplicative, 
# trend component is additive.

####################
#                  #
#    Exercise 5    #
#                  #
####################
require(forecast)
fit_ets_damped_trend <- ets(unempl, damped = TRUE)
fcast_ets_damped_trend <- forecast(fit_ets_damped_trend, h = 12)
plot(fcast_ets_damped_trend)
plot of chunk forecasting-part-3
####################
#                  #
#    Exercise 6    #
#                  #
####################
require(forecast)
fit_ets_no_trend <- ets(unempl, model = "ZNZ")
fcast_ets_no_trend <- forecast(fit_ets_no_trend, h = 12)
plot(fcast_ets_no_trend)
plot of chunk forecasting-part-3
####################
#                  #
#    Exercise 7    #
#                  #
####################
require(forecast)
fit_bats <- bats(unempl, use.damped.trend = TRUE)
fcast_bats <- forecast(fit_bats, h = 12)
plot(fcast_bats)
plot of chunk forecasting-part-3
####################
#                  #
#    Exercise 8    #
#                  #
####################
require(forecast)
acc_bats <- accuracy(fcast_bats)
print(acc_bats)
##                       ME      RMSE       MAE        MPE     MAPE      MASE
## Training set -0.02284822 0.1503005 0.1073459 -0.2853875 1.707524 0.1389502
##                   ACF1
## Training set 0.1717618
mae_bats <- acc_bats[1, "MAE"]

####################
#                  #
#    Exercise 9    #
#                  #
####################
require(forecast)

# the function that selects the forecast with the smallest mean absolute error
best_forecast <- function(series, forecasting_functions) {
  smallest_mae <- Inf
  best_forecast <- NULL

  for (f in forecasting_functions) {
    fit <- f(series)
    fcast <- forecast(fit, h = 12)
    mae <- accuracy(fcast)[1, "MAE"]

    if (mae < smallest_mae) {
      smallest_mae <- mae
      best_forecast <- fcast
    }

  }

  return(best_forecast)
}

# run the function with the unpemployment time series and a list of forecasting models 
forecasting_functions <- c(ets, bats, auto.arima)
fcast_best <- best_forecast(unempl, forecasting_functions)
plot(fcast_best)
plot of chunk forecasting-part-3
####################
#                  #
#    Exercise 10   #
#                  #
####################

# the same as in the previous exercise except of the commented line
best_forecast <- function(series, forecasting_functions) {
  smallest_mae <- Inf
  best_forecast <- NULL

  for (f in forecasting_functions) {
    fit <- f(series)
    fcast <- forecast(fit, h = 12)
    mae <- accuracy(fcast)[1, "MAE"]
    cat(sprintf("%-35s %f\n", fcast$method, mae))  # a line that prints mae's

    if (mae < smallest_mae) {
      smallest_mae <- mae
      best_forecast <- fcast
    }

  }

  return(best_forecast)
}

forecasting_functions <- c(ets, bats, auto.arima)
fcast_best <- best_forecast(unempl, forecasting_functions)
## ETS(M,A,M)                          0.102154
## BATS(0.005, {0,0}, 1, {12})         0.108308
## ARIMA(2,1,2)(1,0,0)[12] with drift  0.144354



Data science for Doctors: Inferential Statistics Exercises(Part-5) Solutions

Below are the solutions to these exercises on inferential statistics.

####################
#                  #
#    Exercise 1    #
#                  #
####################

hist(unlist(data["pres"]))
plot of chunk unnamed-chunk-2
#OR

ggplot(data, aes(unlist(data["pres"]))) + geom_histogram()
plot of chunk unnamed-chunk-2
####################
#                  #
#    Exercise 2    #
#                  #
####################

qqnorm(unlist(data["pres"]))
qqline(unlist(data["pres"]))
plot of chunk unnamed-chunk-2
####################
#                  #
#    Exercise 3    #
#                  #
####################

shapiro.test(unlist(data["pres"]))
## 
## 	Shapiro-Wilk normality test
## 
## data:  unlist(data["pres"])
## W = 0.81892, p-value < 2.2e-16
####################
#                  #
#    Exercise 4    #
#                  #
####################


ad.test(unlist(data["pres"]))
## 
## 	Anderson-Darling normality test
## 
## data:  unlist(data["pres"])
## A = 33.901, p-value < 2.2e-16
####################
#                  #
#    Exercise 5    #
#                  #
####################

sh <- apply(data, 1, function(x) shapiro.test(x)$p.value)
sum(sh > 0.05)/nrow(data) * 100
## [1] 32.68229
####################
#                  #
#    Exercise 6    #
#                  #
####################

boxplot(data["pres"])
plot of chunk unnamed-chunk-2
# OR 

qplot(y=data["pres"], x= 1, geom = "boxplot")
plot of chunk unnamed-chunk-2
#OR

ggplot(data, aes(x="pres", y=pres))+geom_boxplot()
plot of chunk unnamed-chunk-2
####################
#                  #
#    Exercise 7    #
#                  #
####################

grubbs.test(unlist(data$pres))
## 
## 	Grubbs test for one outlier
## 
## data:  unlist(data$pres)
## G = 3.57030, U = 0.98336, p-value = 0.1299
## alternative hypothesis: lowest value 0 is an outlier
####################
#                  #
#    Exercise 8    #
#                  #
####################

grubbs.test(unlist(data["pres"]),two.sided = TRUE)
## 
## 	Grubbs test for one outlier
## 
## data:  unlist(data["pres"])
## G.pres8 = 3.57030, U = 0.98336, p-value = 0.2598
## alternative hypothesis: lowest value 0 is an outlier
####################
#                  #
#    Exercise 9    #
#                  #
####################

mass_1 <- data$mass[sample(length(data$mass), 14)]
mass_2 <- rnorm(14,28,4)

wilcox.test(mass_1,mass_2,paired = TRUE)
## 
## 	Wilcoxon signed rank test
## 
## data:  mass_1 and mass_2
## V = 71, p-value = 0.2676
## alternative hypothesis: true location shift is not equal to 0
####################
#                  #
#    Exercise 10   #
#                  #
####################

wilcox.test(data$pres ~ data$class)
## 
## 	Wilcoxon rank sum test with continuity correction
## 
## data:  data$pres by data$class
## W = 55414, p-value = 7.559e-05
## alternative hypothesis: true location shift is not equal to 0