Data science for Doctors: Inferential Statistics Solutions (part-2)

Below are the solutions to these exercises on inferential statistics.

####################
#                  #
#    Exercise 1    #
#                  #
####################

binom.test(5 ,30, mean(data$class),alternative = "two.sided")
## 
## 	Exact binomial test
## 
## data:  5 and 30
## number of successes = 5, number of trials = 30, p-value = 0.03587
## alternative hypothesis: true probability of success is not equal to 0.3489583
## 95 percent confidence interval:
##  0.0564217 0.3472117
## sample estimates:
## probability of success 
##              0.1666667
####################
#                  #
#    Exercise 2    #
#                  #
####################

binom.test(c(5, 25), mean(data$class) ,alternative = "two.sided")
## 
## 	Exact binomial test
## 
## data:  c(5, 25)
## number of successes = 5, number of trials = 30, p-value =
## 0.0003249
## alternative hypothesis: true probability of success is not equal to 0.5
## 95 percent confidence interval:
##  0.0564217 0.3472117
## sample estimates:
## probability of success 
##              0.1666667
####################
#                  #
#    Exercise 3    #
#                  #
####################

binom.test(5, 30, mean(data$class), alternative="less")
## 
## 	Exact binomial test
## 
## data:  5 and 30
## number of successes = 5, number of trials = 30, p-value = 0.0239
## alternative hypothesis: true probability of success is less than 0.3489583
## 95 percent confidence interval:
##  0.0000000 0.3189712
## sample estimates:
## probability of success 
##              0.1666667
#OR 
pbinom(5, 30, mean(data$class))
## [1] 0.0238959
# We reject our null hypothesis

####################
#                  #
#    Exercise 4    #
#                  #
####################

binom.test(5,30, mean(data$class), conf.level=0.99,alternative="less")
## 
## 	Exact binomial test
## 
## data:  5 and 30
## number of successes = 5, number of trials = 30, p-value = 0.0239
## alternative hypothesis: true probability of success is less than 0.3489583
## 99 percent confidence interval:
##  0.0000000 0.3808047
## sample estimates:
## probability of success 
##              0.1666667
# we can't reject our null hypothesis

####################
#                  #
#    Exercise 5    #
#                  #
####################

binom.test(2, 30, mean(data$class), conf.level=0.999,alternative="less")
## 
## 	Exact binomial test
## 
## data:  2 and 30
## number of successes = 2, number of trials = 30, p-value =
## 0.0003637
## alternative hypothesis: true probability of success is less than 0.3489583
## 99.9 percent confidence interval:
##  0.0000000 0.3214435
## sample estimates:
## probability of success 
##             0.06666667
# We reject our null hypothesis

####################
#                  #
#    Exercise 6    #
#                  #
####################

z <- 1.96
low <- mean(data$mass) - z*sd(data$mass)/sqrt(30)
high <- mean(data$mass) + z*sd(data$mass)/sqrt(30)
low;high
## [1] 29.17127
## [1] 34.81389
####################
#                  #
#    Exercise 7    #
#                  #
####################

z <- (29 - mean(data$mass))/(sd(data$mass)/sqrt(30))

####################
#                  #
#    Exercise 8    #
#                  #
####################

2*pnorm(-abs(z),0,1) #Reject the null hypothesis
## [1] 0.03761903
####################
#                  #
#    Exercise 9    #
#                  #
####################

library(TeachingDemos)
z.test(29,mu=mean(data$mass),sd=sd(data$mass)/sqrt(30), alternative = "two.sided", conf.level = 0.95)
## 
## 	One Sample z-test
## 
## data:  29
## z = -2.079, n = 1.0000, Std. Dev. = 1.4394, Std. Dev. of the
## sample mean = 1.4394, p-value = 0.03762
## alternative hypothesis: true mean is not equal to 31.99258
## 95 percent confidence interval:
##  26.17874 31.82126
## sample estimates:
## mean of 29 
##         29
####################
#                  #
#    Exercise 10   #
#                  #
####################

z.test(29,mu=mean(data$mass),sd=sd(data$mass)/sqrt(30), alternative = "less", conf.level = 0.99)
## 
## 	One Sample z-test
## 
## data:  29
## z = -2.079, n = 1.0000, Std. Dev. = 1.4394, Std. Dev. of the
## sample mean = 1.4394, p-value = 0.01881
## alternative hypothesis: true mean is less than 31.99258
## 99 percent confidence interval:
##      -Inf 32.34865
## sample estimates:
## mean of 29 
##         29



One way MANOVA in R solutions

These are solutions to exercises found here

####################
#                  #
#    Exercise 1    #
#                  #
####################
# Read in SPSS data
library(foreign)
koro.data = read.spss("C:/Users/INVESTS/Downloads/koro.sav",to.data.frame = TRUE)
View(koro.data)
attach(koro.data)
####################
#                  #
#    Exercise 2    #
#                  #
####################
#Check number of observations in each group
table(therapy)
## therapy
## Abreaction Behavioral Cognitive  Control    
##         10         10         10         10
####################
#                  #
#    Exercise 3    #
#                  #
####################
#Create the variables that hold the change in indices
koro.data$si.diff = si_post - si_pre
koro.data$sf.diff = sf_post - sf_pre
koro.data$oa.diff = oa_post - oa_pre
####################
#                  #
#    Exercise 4    #
#                  #
####################
#Summarize the change variables
library(pastecs)
indices = koro.data[,c(3,10,11,12)]
stat.desc(indices)
##          therapy    si.diff     sf.diff    oa.diff
## nbr.val       NA  40.000000  40.0000000  40.000000
## nbr.null      NA   1.000000   0.0000000   1.000000
## nbr.na        NA   0.000000   0.0000000   0.000000
## min           NA  -7.000000 -16.0000000 -13.000000
## max           NA  26.000000  38.0000000  20.000000
## range         NA  33.000000  54.0000000  33.000000
## sum           NA 328.000000 484.0000000 236.000000
## median        NA   9.500000  13.5000000   6.000000
## mean          NA   8.200000  12.1000000   5.900000
## SE.mean       NA   1.392839   1.7646093   1.189592
## CI.mean       NA   2.817282   3.5692593   2.406176
## var           NA  77.600000 124.5538462  56.605128
## std.dev       NA   8.809086  11.1603694   7.523638
## coef.var      NA   1.074279   0.9223446   1.275193
####################
#                  #
#    Exercise 5    #
#                  #
####################
#Get descriptive statistics for each therapy
library(psych)
describeBy(indices[-1],therapy)
## group: Abreaction
##         vars  n mean    sd median trimmed   mad min max range  skew
## si.diff    1 10  7.8  7.60    9.5    8.62  7.41  -7  16    23 -0.69
## sf.diff    2 10 19.1 12.38   18.5   18.75 17.79   3  38    35  0.17
## oa.diff    3 10 10.1  7.16   11.5   10.12  8.15   0  20    20 -0.18
##         kurtosis   se
## si.diff    -1.00 2.40
## sf.diff    -1.58 3.91
## oa.diff    -1.63 2.26
## -------------------------------------------------------- 
## group: Behavioral
##         vars  n mean   sd median trimmed  mad min max range  skew kurtosis
## si.diff    1 10 12.1 6.45   14.0   13.00 5.93  -1  18    19 -0.78    -0.83
## sf.diff    2 10 16.0 6.11   17.5   16.88 5.19   3  22    19 -0.88    -0.55
## oa.diff    3 10  6.3 7.67    7.5    6.38 4.45  -8  20    28 -0.17    -0.60
##           se
## si.diff 2.04
## sf.diff 1.93
## oa.diff 2.43
## -------------------------------------------------------- 
## group: Cognitive 
##         vars  n mean    sd median trimmed   mad min max range  skew
## si.diff    1 10 10.1 10.94   14.0   10.25 11.12  -7  26    33 -0.24
## sf.diff    2 10 12.3  8.60   14.5   12.88  7.41  -3  23    26 -0.52
## oa.diff    3 10  2.9  6.35    4.5    2.75  3.71  -7  14    21 -0.04
##         kurtosis   se
## si.diff    -1.53 3.46
## sf.diff    -1.30 2.72
## oa.diff    -1.16 2.01
## -------------------------------------------------------- 
## group: Control   
##         vars  n mean   sd median trimmed  mad min max range  skew kurtosis
## si.diff    1 10  2.8 7.98    1.5    1.12 3.71  -5  24    29  1.73     2.09
## sf.diff    2 10  1.0 8.18    2.0    1.38 5.93 -16  15    31 -0.41    -0.20
## oa.diff    3 10  4.3 7.89    5.0    5.38 7.41 -13  13    26 -0.77    -0.33
##           se
## si.diff 2.52
## sf.diff 2.59
## oa.diff 2.49
####################
#                  #
#    Exercise 6    #
#                  #
####################
#Obtain the correlation matrix
library(Hmisc)
rcorr(as.matrix(indices[-1]),type = "pearson")
##         si.diff sf.diff oa.diff
## si.diff    1.00    0.56    0.41
## sf.diff    0.56    1.00    0.41
## oa.diff    0.41    0.41    1.00
## 
## n= 40 
## 
## 
## P
##         si.diff sf.diff oa.diff
## si.diff         0.0002  0.0079 
## sf.diff 0.0002          0.0089 
## oa.diff 0.0079  0.0089
#Our variables are moderately correlated. When variables are highly correlated some need to be dropped
####################
#                  #
#    Exercise 7    #
#                  #
####################
#Check for univariate and multivariate outliers
library(ggplot2)
#Check univariate outliers
ggplot(indices,aes(x=therapy,y=si.diff)) + geom_boxplot()

ggplot(indices,aes(x=therapy,y=oa.diff)) + geom_boxplot()

ggplot(indices,aes(x=therapy,y=sf.diff)) + geom_boxplot()

#Box plots show some observations are outliers
#Check multivariate outliers
library(mvoutlier)
aq.plot(indices[-1])
## Projection to the first and second robust principal components.
## Proportion of total variation (explained variance): 0.7961158

## $outliers
##  [1] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [12] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [23] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [34] FALSE FALSE FALSE FALSE FALSE FALSE FALSE
#No observations were identified as multivariate outliers
####################
#                  #
#    Exercise 8    #
#                  #
####################
#Check for  homogeneity of variance
bartlett.test(sf.diff~therapy, data = indices)
## 
##  Bartlett test of homogeneity of variances
## 
## data:  sf.diff by therapy
## Bartlett's K-squared = 4.3844, df = 3, p-value = 0.2228
bartlett.test(si.diff~therapy,data = indices)
## 
##  Bartlett test of homogeneity of variances
## 
## data:  si.diff by therapy
## Bartlett's K-squared = 2.6569, df = 3, p-value = 0.4476
bartlett.test(oa.diff~therapy,data = indices)
## 
##  Bartlett test of homogeneity of variances
## 
## data:  oa.diff by therapy
## Bartlett's K-squared = 0.46566, df = 3, p-value = 0.9264
#There was no evidence of departure from homogeneity of variance
####################
#                  #
#    Exercise 9    #
#                  #
####################
#Run MANOVA with outliers
manova.analysis = manova(as.matrix(indices[-1])~therapy)
summary(manova.analysis)
##           Df  Pillai approx F num Df den Df   Pr(>F)   
## therapy    3 0.63123   3.1978      9    108 0.001812 **
## Residuals 36                                           
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(manova.analysis,test = "Wilks")
##           Df   Wilks approx F num Df den Df   Pr(>F)   
## therapy    3 0.46438   3.4126      9 82.898 0.001301 **
## Residuals 36                                           
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#Both Pillai and Wilk's lambda showed significance
####################
#                  #
#    Exercise 10   #
#                  #
####################
#We did not find violation of any of assumptions required for MANOVA analysis
#However we identified some observations that were outliers
#In part 2 of MANOVA exercises we will drop the outliers and repeat the analysis
#In part 2 we will explain what to do when we have a significant result



Data Structures Solutions

Below are the solutions to these exercises on data structures.

####################
#                  #
#    Exercise 1    #
#                  #
####################

v <- sample(-100:100, 10, replace=TRUE)

####################
#                  #
#    Exercise 2    #
#                  #
####################

a <- array(seq(from = 26, length.out = 25, by = 2), c(5, 5))
s <- LETTERS[match("C", LETTERS):(match("C", LETTERS)+19)]

####################
#                  #
#    Exercise 3    #
#                  #
####################

l <- list(a = v, b = a, c = s)
length(l)
## [1] 3
str(l)
## List of 3
##  $ a: int [1:10] -83 72 -44 71 -54 -17 -40 -76 22 58
##  $ b: num [1:5, 1:5] 26 28 30 32 34 36 38 40 42 44 ...
##  $ c: chr [1:20] "C" "D" "E" "F" ...
length(unlist(l))
## [1] 55
####################
#                  #
#    Exercise 4    #
#                  #
####################

l[[3]]
##  [1] "C" "D" "E" "F" "G" "H" "I" "J" "K" "L" "M" "N" "O" "P" "Q" "R" "S"
## [18] "T" "U" "V"
l[[3]][sample(1:length(l[[3]]), 1)]
## [1] "O"
class(unlist(l))
## [1] "character"
x <- array(l)
class(x[1])
## [1] "list"
####################
#                  #
#    Exercise 5    #
#                  #
####################

l$c <- NULL
class(unlist(l))
## [1] "numeric"
####################
#                  #
#    Exercise 6    #
#                  #
####################

setdiff(l$a, l$b)
## [1] -83 -44  71 -54 -17 -40 -76  22
intersect(l$a, l$b)
## [1] 72 58
33 %in% union(l$a, l$b)
## [1] FALSE
####################
#                  #
#    Exercise 7    #
#                  #
####################

m <- matrix(data = round(runif(5*5, 0.99, 100.00), 2), nrow = 5)

####################
#                  #
#    Exercise 8    #
#                  #
####################

class(as.vector(m))
## [1] "numeric"
class(as.list(m))
## [1] "list"
class(as.data.frame(m))
## [1] "data.frame"
class(as.array(m))
## [1] "matrix"
####################
#                  #
#    Exercise 9    #
#                  #
####################

as.matrix(aperm(l$b))
##      [,1] [,2] [,3] [,4] [,5]
## [1,]   26   28   30   32   34
## [2,]   36   38   40   42   44
## [3,]   46   48   50   52   54
## [4,]   56   58   60   62   64
## [5,]   66   68   70   72   74
####################
#                  #
#    Exercise 10   #
#                  #
####################

sort(union(as.vector(m), unlist(l)))
##  [1] -83.00 -76.00 -54.00 -44.00 -40.00 -17.00   8.02   9.58  10.41  10.46
## [11]  10.51  16.28  20.85  22.00  22.33  25.58  25.66  26.00  27.96  28.00
## [21]  28.07  30.00  32.00  34.00  36.00  37.02  38.00  38.36  40.00  42.00
## [31]  44.00  45.22  46.00  48.00  50.00  52.00  53.18  54.00  56.00  58.00
## [41]  60.00  62.00  64.00  66.00  67.11  68.00  70.00  71.00  72.00  73.88
## [51]  74.00  74.64  83.52  89.62  89.72  91.35  99.19  99.45



Unit testing in R using testthat library – Solutions

Below are the solutions to these exercises on unit testing in R.

####################
#                  #
#    Exercise 1    #
#                  #
####################

install.packages("testthat")
library(testthat) # loading the package


####################
#                  #
#    Exercise 2    #
#                  #
####################

#Passes
expect_that(5*2, equals(10))

####################
#                  #
#    Exercise 3    #
#                  #
####################

#Passes
expect_that(5*2, equals(10+ 1e-7))
#fails
expect_that(5*2, equals(10+ 1e-6))

####################
#                  #
#    Exercise 4    #
#                  #
####################

expect_that(2*2, is_identical_to(4+ 1e-8))

####################
#                  #
#    Exercise 5    #
#                  #
####################

m <- function(x,y){return(x*y)}
expect_that(m("2","3"), throws_error("non-numeric argument to binary operator"))


####################
#                  #
#    Exercise 6    #
#                  #
####################

test_that("Testing multiplication function",{
  expect_that(m(2,3), equals(6))
  expect_that(m(2,c(2,3)), equals(c(4,6)))
  expect_that(m(2,"3"), throws_error("non-numeric argument to binary operator"))
})

####################
#                  #
#    Exercise 7    #
#                  #
####################

is_greater_10 <- function() {
  function(x){
    expect(x > 10, "Not greater than 10")
  }
}

####################
#                  #
#    Exercise 8    #
#                  #
####################

expect_that(9, is_greater_10())

####################
#                  #
#    Exercise 9    #
#                  #
####################

test_file(<Path to the file with test script>)

####################
#                  #
#    Exercise 10   #
#                  #
####################

test_dir(<Path to the directory with test files>)



Multiple Regression (Part 3) Diagnostics – Solutions

Below are the solutions to these exercises on Multiple Regression (part 3).

data(state)
state77 <- as.data.frame(state.x77)
names(state77)[4] <- "Life.Exp"
names(state77)[6] <- "HS.Grad"


####################
#                  #
#    Exercise 1    #
#                  #
####################
#a.
library(car)
m1 <- lm(Life.Exp ~ HS.Grad+Murder, data=state77)
avPlots(m1)
plot of chunk mlr-exercises3
#Note that the slope of the line is positive in the HS.Grad plot, and negative in the Murder plot, as expected. 

#b.
avPlots(m1,id.method=list("mahal"),id.n=2)
plot of chunk mlr-exercises3
####################
#                  #
#    Exercise 2    #
#                  #
####################
#a.
with(state77,avPlot(lm(Life.Exp ~ HS.Grad+Murder+Illiteracy),variable=Illiteracy))
plot of chunk mlr-exercises3
#Note that the slope is positive, contrary to what is expected

#b.
avPlots(lm(Life.Exp ~ .,data=state77), terms= ~ Population+Area)
plot of chunk mlr-exercises3
####################
#                  #
#    Exercise 3    #
#                  #
####################
crPlots(lm(Life.Exp ~ HS.Grad+Murder+Income+Area,data=state77))
plot of chunk mlr-exercises3
#We see that there seems to be a problem with linearity for Income and Area (which could be due to the outlier in the lower right corner in both plots).


####################
#                  #
#    Exercise 4    #
#                  #
####################
ceresPlots(lm(Life.Exp ~ HS.Grad+Murder+Income+Area,data=state77))
plot of chunk mlr-exercises3
#Here, there is not much difference with the plots in Exercise 3 (although, in general, CERES plots are "less prone to leakage of nonlinearity among the predictors.")


####################
#                  #
#    Exercise 5    #
#                  #
####################
vif(lm(Life.Exp ~ .,data=state77))
## Population     Income Illiteracy     Murder    HS.Grad      Frost 
##   1.499915   1.992680   4.403151   2.616472   3.134887   2.358206 
##       Area 
##   1.789764
#Some authors advocate that a vif>2.5 is a cause for concern, while others mention vif>4 or vif>10. According to these criteria, Illiteracy, Murder, and HS.Grad are the most problematic (in the presence of all the other predictors).


####################
#                  #
#    Exercise 6    #
#                  #
####################
library(lmtest)
bptest(m1)
## 
## 	studentized Breusch-Pagan test
## 
## data:  m1
## BP = 2.9728, df = 2, p-value = 0.2262
#There is no evidence of heteroscedasticity (of the type that depends on a linear combination of the predictors).


####################
#                  #
#    Exercise 7    #
#                  #
####################
ncvTest(m1)
## Non-constant Variance Score Test 
## Variance formula: ~ fitted.values 
## Chisquare = 0.01065067    Df = 1     p = 0.9178026
#Note that the results are different to Exercise 6 because bptest (by default) uses studentized residuals (which is preferred for robustness) and assumes the error variance depends on a linear combination of the predictors, whereas ncvTest (by default) uses regular residuals and assumes the error variance depends on the fitted values. 
#ncvTest(m1) is equivalent to bptest(m1,varformula= ~ m1$fitted,studentize=F,data=state77)


####################
#                  #
#    Exercise 8    #
#                  #
####################
bptest(m1,varformula= ~ I(HS.Grad^2)+I(Murder^2)+HS.Grad*Murder,data=state77)
## 
## 	studentized Breusch-Pagan test
## 
## data:  m1
## BP = 6.7384, df = 5, p-value = 0.2408
####################
#                  #
#    Exercise 9    #
#                  #
####################
#a. 
ks.test(m1$residuals,"pnorm")
## 
## 	One-sample Kolmogorov-Smirnov test
## 
## data:  m1$residuals
## D = 0.15546, p-value = 0.1603
## alternative hypothesis: two-sided
#There is no evidence that the residuals are not Normal. 

#b.
shapiro.test(m1$residuals)
## 
## 	Shapiro-Wilk normality test
## 
## data:  m1$residuals
## W = 0.96961, p-value = 0.2231
#Again, there is no evidence of nonnormality.


####################
#                  #
#    Exercise 10   #
#                  #
####################
durbinWatsonTest(m1)
##  lag Autocorrelation D-W Statistic p-value
##    1      0.04919151        1.8495   0.582
##  Alternative hypothesis: rho != 0
#There is no evidence of lag-1 autocorrelation in the residuals.



Spatial analysis with ggmap Solutions (part-1)

Below are the solutions to these exercises on Spatial Analysis with R.

####################
#                  #
#    Exercise 1    #
#                  #
####################
library(ggmap)
uk <- get_map(location="United Kingdom", zoom=5, maptype='terrain', source='google', color='color')
ggmap(uk)
plot of chunk unnamed-chunk-1
####################
#                  #
#    Exercise 2    #
#                  #
####################
uk <- get_map(location="United Kingdom", zoom=5, maptype='toner', source='stamen', color='color')
ggmap(uk)
plot of chunk unnamed-chunk-1
####################
#                  #
#    Exercise 3    #
#                  #
####################

clubs<-c("Arsenal FC", "Manchester City FC", "Manchester United FC",
         "Liverpool FC", "Chelsea FC", "Tottenham Hotspur FC")
coord<-geocode(clubs)
clubs<-cbind(clubs,coord)

uk <- get_map(location="United Kingdom", zoom=5, maptype='terrain', source='google', color='color')
ggmap(uk) +
  geom_point(
    aes(x = lon, y = lat),
    data = clubs, colour = "red", size = 3
  )
plot of chunk unnamed-chunk-1
####################
#                  #
#    Exercise 4    #
#                  #
####################
revgeocode(c(-0.119543, 51.50332), output="address")
## [1] "30 The Queen's Walk, Lambeth, London SE1 8XX, UK"
####################
#                  #
#    Exercise 5    #
#                  #
####################
london <- get_map(location="london", zoom=10, maptype='terrain', source='google', color='color')
ggmap(london)
plot of chunk unnamed-chunk-1
####################
#                  #
#    Exercise 6    #
#                  #
####################
clubs<-c("Arsenal FC", "Tottenham Hotspur FC", "Chelsea FC",
         "West Ham FC", "Crystal Palace FC")
coord<-geocode(clubs)
clubs<-cbind(clubs,coord)

london <- get_map(location="london", zoom=10, maptype='terrain', source='google', color='color')
ggmap(london) +
  geom_point(
    aes(x = lon, y = lat, shape = factor(clubs), colour=factor(clubs)),
    data = clubs, size = 3
  )
plot of chunk unnamed-chunk-1
####################
#                  #
#    Exercise 7    #
#                  #
####################
distance<-mapdist("Emirates Stadium, London", "Wembley, London", mode="driving", output="simple")
distance$km
## [1] 19.526
distance$minutes
## [1] 41.41667
####################
#                  #
#    Exercise 8    #
#                  #
####################
coords<-geocode(c("Emirates Stadium, London","Wembley, London"))
calc_zoom(lon, lat, coords)
## [1] 16
####################
#                  #
#    Exercise 9    #
#                  #
####################
map<-get_map("Wembley", zoom = 12, maptype='roadmap', source='google', color='color')
ggmap(map)
plot of chunk unnamed-chunk-1
####################
#                  #
#    Exercise 10   #
#                  #
####################
Emirates2Wembley<- route("Emirates Stadium, London", "Wembley, London")
map<-get_map("Wembley", zoom = 12, maptype='roadmap', source='google', color='color')
ggmap(map)+
  geom_segment(
    aes(x = startLon, y = startLat, xend = endLon, yend = endLat),
    colour ="red", size = 2, data = Emirates2Wembley
  )
## Warning: Removed 9 rows containing missing values (geom_segment).
plot of chunk unnamed-chunk-1



Data Science for Doctors – Part 4 : Inferential Statistics (1/5) Solutions

Below are the solutions to these exercises on data display.

####################
#                  #
#    Exercise 1    #
#                  #
####################

iter <- 10000
means <- rep(NA, iter)

for (i in 1:iter){
  sam_50 <- sample(data$mass, 50)
  means[i] <- mean(sam_50)
}

hist(means)
plot of chunk unnamed-chunk-1
hist(data$mass)
plot of chunk unnamed-chunk-1
####################
#                  #
#    Exercise 2    #
#                  #
####################

mean(data$mass)
## [1] 31.99258
sd(data$mass)/sqrt(50)
## [1] 1.114989
#OR 
mean(means)
## [1] 31.98233
sd(means)
## [1] 1.081871
####################
#                  #
#    Exercise 3    #
#                  #
####################

library(moments)
skewness(means)
## [1] -0.0333564
#  slight positive skewness, which means that it is slightly light tailed
kurtosis(means)
## [1] 3.064367
# The kurtosis is close to the expected value 3.

####################
#                  #
#    Exercise 4    #
#                  #
####################

z = (30.5-mean(data$mass))/(sd(data$mass)/sqrt(50))
z
## [1] -1.338649
####################
#                  #
#    Exercise 5    #
#                  #
####################

pnorm(z)
## [1] 0.09034253
####################
#                  #
#    Exercise 6    #
#                  #
####################

z = (31-mean(data$mass))/(sd(data$mass)/sqrt(150))

####################
#                  #
#    Exercise 7    #
#                  #
####################

pnorm(z)
## [1] 0.06154952
####################
#                  #
#    Exercise 8    #
#                  #
####################

z*sd(data$mass)/sqrt(150)
## [1] -0.9925781
####################
#                  #
#    Exercise 9    #
#                  #
####################

z = 1.96
low <- 31 - z*sd(data$mass)/sqrt(250)
high <- 31 + z*sd(data$mass)/sqrt(250)
low;high
## [1] 30.02267
## [1] 31.97733
####################
#                  #
#    Exercise 10   #
#                  #
####################

z = 2.33
low <- 31 - z*sd(data$mass)/sqrt(250)
high <- 31 + z*sd(data$mass)/sqrt(250)
low;high
## [1] 29.83817
## [1] 32.16183
z = 2.58
low <- 31 - z*sd(data$mass)/sqrt(250)
high <- 31 + z*sd(data$mass)/sqrt(250)
low;high
## [1] 29.71351
## [1] 32.28649



Building Shiny app solutions part 7

Below are the solutions to these exercises on Building Shiny App.

Learn more about Shiny in the online course R Shiny Interactive Web Apps – Next Level Data Visualization. In this course you will learn how to create advanced Shiny web apps; embed video, pdfs and images; add focus and zooming tools; and many other functionalities (30 lectures, 3hrs.).

####################
#                  #
#    Exercise 1    #
#                  #
####################

#ui.R
library(shiny)
shinyUI(fluidPage(
  titlePanel("Shiny App"),

  sidebarLayout(
    sidebarPanel(h2("Menu"),
                 br(),
                 fluidRow(
                   column(6,
                 h4("Help Text"),
                 helpText("Just for help"))),
                 br(),
                 fluidRow(
                 column(6,
                        numericInput("numer",
                                     label = h4("Numeric Input"),
                                     value = 10))),
                 fluidRow(
                   column(6,
                 h4("Single Checkbox"),
                 checkboxInput("checkbox", label = "Choice A", value = TRUE))),
                 fluidRow(
                   column(6,
                 dateInput("date",
                           label = h4("Date input"),
                           value = "2016-12-01")),
                 column(6
                        )),
                 fluidRow(
                   column(6,
                 dateRangeInput("dates", label = h4("Date Range"))),
                 column(6,
                        textInput("text", label = h4("Text Input"),
                                  value = "Some Text"))),
                 fileInput("file", label = h4("File Input"))),
    mainPanel(h1("Main"),
              img(src = "petal.jpg", height = 150, width = 200),
              br(),
              br(),
              p("This famous (Fisher's or Anderson's) ", a("iris",href="http://stat.ethz.ch/R-manual/R-devel/library/datasets/html/iris.html"), "data set gives the measurements in centimeters of the variables sepal length and width and petal length and width, respectively, for 50 flowers from each of 3 species of iris. The species are ",strong( "Iris setosa,"),strong( "versicolor"), "and", strong("virginica.")),
              br(),
              h2("Analysis"),
              tabsetPanel(type="tabs",tabPanel("Data Table",dataTableOutput("Table")),
                                      tabPanel("Summary",dataTableOutput("Table2")),
                                      tabPanel("K means",
                                               plotOutput("plot1",click = "mouse"),
                                               verbatimTextOutput("coord"),
                                               uiOutput("All")))

    )
  )
  ))
#server.R
shinyServer(function(input, output) {
  output$Table <- renderDataTable(
    iris,options = list(
      lengthMenu = list(c(10, 20, 30,-1),c('10','20','30','ALL')),
      pageLength = 10))
  sumiris<-as.data.frame.array(summary(iris))
  output$Table2 <- renderDataTable(sumiris)
  output$plot1 <- renderPlot({
    plot(iris$Petal.Length,iris$Petal.Width,main = "K-MEANS",xlab="Petal Length",ylab = "Petal Width",
         cex.main = 2,   font.main= 4, col.main= "blue")
  }, width = "auto",height = "auto")
  output$coord <- renderText({
    paste0("x=", input$mouse$x, "\ny=", input$mouse$y)
  })
  output$All <- renderUI({
    tagList(
      fluidRow(
        column(4,
      sliderInput("slider1", label = h4("Clusters"),
                  min = 3, max = 10, value = 3)),
        column(4,
      checkboxGroupInput("checkGroup",
                         label = h4("Variable X"),
                         choices = list("Choice 1" = 1,
                                        "Choice 2" = 2, "Choice 3" = 3),
                         selected = 2)),
      column(4,
             selectInput("select", label = h4("Variable Y"),
                         choices = list("Choice 1" = 1, "Choice 2" = 2
                         ), selected = 1)))


    )
  })
})

####################
#                  #
#    Exercise 2    #
#                  #
####################

#ui.R
library(shiny)
shinyUI(fluidPage(
  titlePanel("Shiny App"),

  sidebarLayout(
    sidebarPanel(h2("Menu"),
                 br(),
                 fluidRow(
                   column(6,
                 h4("Help Text"),
                 helpText("Just for help"))),
                 br(),
                 fluidRow(
                 column(6,
                        numericInput("numer",
                                     label = h4("Numeric Input"),
                                     value = 10))),
                 fluidRow(
                   column(6,
                 h4("Single Checkbox"),
                 checkboxInput("checkbox", label = "Choice A", value = TRUE))),
                 fluidRow(
                   column(6,
                 dateInput("date",
                           label = h4("Date input"),
                           value = "2016-12-01")),
                 column(6
                        )),
                 fluidRow(
                   column(6,
                 dateRangeInput("dates", label = h4("Date Range"))),
                 column(6,
                        textInput("text", label = h4("Text Input"),
                                  value = "Some Text"))),
                 fileInput("file", label = h4("File Input"))),
    mainPanel(h1("Main"),
              img(src = "petal.jpg", height = 150, width = 200),
              br(),
              br(),
              p("This famous (Fisher's or Anderson's) ", a("iris",href="http://stat.ethz.ch/R-manual/R-devel/library/datasets/html/iris.html"), "data set gives the measurements in centimeters of the variables sepal length and width and petal length and width, respectively, for 50 flowers from each of 3 species of iris. The species are ",strong( "Iris setosa,"),strong( "versicolor"), "and", strong("virginica.")),
              br(),
              h2("Analysis"),
              tabsetPanel(type="tabs",tabPanel("Data Table",dataTableOutput("Table")),
                                      tabPanel("Summary",dataTableOutput("Table2")),
                                      tabPanel("K means",
                                               plotOutput("plot1",click = "mouse"),
                                               verbatimTextOutput("coord"),
                                               uiOutput("All")))

    )
  )
  ))
#server.R
shinyServer(function(input, output) {
  output$Table <- renderDataTable(
    iris,options = list(
      lengthMenu = list(c(10, 20, 30,-1),c('10','20','30','ALL')),
      pageLength = 10))
  sumiris<-as.data.frame.array(summary(iris))
  output$Table2 <- renderDataTable(sumiris)
  output$plot1 <- renderPlot({
    plot(iris$Petal.Length,iris$Petal.Width,main = "K-MEANS",xlab="Petal Length",ylab = "Petal Width",
         cex.main = 2,   font.main= 4, col.main= "blue")
  }, width = "auto",height = "auto")
  output$coord <- renderText({
    paste0("x=", input$mouse$x, "\ny=", input$mouse$y)
  })
  output$All <- renderUI({
    tagList(
      fluidRow(
        column(4,
      sliderInput("slider1", label = h4("Clusters"),
                  min = 3, max = 10, value = 3)),
        column(4,
      checkboxGroupInput("checkGroup",
                         label = h4("Variable X"),
                         choices = list("Choice 1" = 1,
                                        "Choice 2" = 2, "Choice 3" = 3),
                         selected = 2)),
      column(4,
             selectInput("select", label = h4("Variable Y"),
                         choices = list("Choice 1" = 1, "Choice 2" = 2
                         ), selected = 1)))


    )
  })
  Data <- reactive({
  })
})

####################
#                  #
#    Exercise 3    #
#                  #
####################

#ui.R
library(shiny)
shinyUI(fluidPage(
  titlePanel("Shiny App"),

  sidebarLayout(
    sidebarPanel(h2("Menu"),
                 br(),
                 fluidRow(
                   column(6,
                 h4("Help Text"),
                 helpText("Just for help"))),
                 br(),
                 fluidRow(
                 column(6,
                        numericInput("numer",
                                     label = h4("Numeric Input"),
                                     value = 10))),
                 fluidRow(
                   column(6,
                 h4("Single Checkbox"),
                 checkboxInput("checkbox", label = "Choice A", value = TRUE))),
                 fluidRow(
                   column(6,
                 dateInput("date",
                           label = h4("Date input"),
                           value = "2016-12-01")),
                 column(6
                        )),
                 fluidRow(
                   column(6,
                 dateRangeInput("dates", label = h4("Date Range"))),
                 column(6,
                        textInput("text", label = h4("Text Input"),
                                  value = "Some Text"))),
                 fileInput("file", label = h4("File Input"))),
    mainPanel(h1("Main"),
              img(src = "petal.jpg", height = 150, width = 200),
              br(),
              br(),
              p("This famous (Fisher's or Anderson's) ", a("iris",href="http://stat.ethz.ch/R-manual/R-devel/library/datasets/html/iris.html"), "data set gives the measurements in centimeters of the variables sepal length and width and petal length and width, respectively, for 50 flowers from each of 3 species of iris. The species are ",strong( "Iris setosa,"),strong( "versicolor"), "and", strong("virginica.")),
              br(),
              h2("Analysis"),
              tabsetPanel(type="tabs",tabPanel("Data Table",dataTableOutput("Table")),
                                      tabPanel("Summary",dataTableOutput("Table2")),
                                      tabPanel("K means",
                                               plotOutput("plot1",click = "mouse"),
                                               verbatimTextOutput("coord"),
                                               uiOutput("All")))

    )
  )
  ))
#server.R
shinyServer(function(input, output) {
  output$Table <- renderDataTable(
    iris,options = list(
      lengthMenu = list(c(10, 20, 30,-1),c('10','20','30','ALL')),
      pageLength = 10))
  sumiris<-as.data.frame.array(summary(iris))
  output$Table2 <- renderDataTable(sumiris)
  output$plot1 <- renderPlot({
    plot(iris$Petal.Length,iris$Petal.Width,main = "K-MEANS",xlab="Petal Length",ylab = "Petal Width",
         cex.main = 2,   font.main= 4, col.main= "blue")
  }, width = "auto",height = "auto")
  output$coord <- renderText({
    paste0("x=", input$mouse$x, "\ny=", input$mouse$y)
  })
  output$All <- renderUI({
    tagList(
      fluidRow(
        column(4,
      sliderInput("slider1", label = h4("Clusters"),
                  min = 3, max = 10, value = 3)),
        column(4,
      checkboxGroupInput("checkGroup",
                         label = h4("Variable X"),
                         choices = list("Choice 1" = 1,
                                        "Choice 2" = 2, "Choice 3" = 3),
                         selected = 2)),
      column(4,
             selectInput("select", label = h4("Variable Y"),
                         names(iris)
                         )))


    )
  })
  Data <- reactive({iris[, c(input$select)]
  })
})

####################
#                  #
#    Exercise 4    #
#                  #
####################

#ui.R
library(shiny)
shinyUI(fluidPage(
  titlePanel("Shiny App"),

  sidebarLayout(
    sidebarPanel(h2("Menu"),
                 br(),
                 fluidRow(
                   column(6,
                 h4("Help Text"),
                 helpText("Just for help"))),
                 br(),
                 fluidRow(
                 column(6,
                        numericInput("numer",
                                     label = h4("Numeric Input"),
                                     value = 10))),
                 fluidRow(
                   column(6,
                 h4("Single Checkbox"),
                 checkboxInput("checkbox", label = "Choice A", value = TRUE))),
                 fluidRow(
                   column(6,
                 dateInput("date",
                           label = h4("Date input"),
                           value = "2016-12-01")),
                 column(6
                        )),
                 fluidRow(
                   column(6,
                 dateRangeInput("dates", label = h4("Date Range"))),
                 column(6,
                        textInput("text", label = h4("Text Input"),
                                  value = "Some Text"))),
                 fileInput("file", label = h4("File Input"))),
    mainPanel(h1("Main"),
              img(src = "petal.jpg", height = 150, width = 200),
              br(),
              br(),
              p("This famous (Fisher's or Anderson's) ", a("iris",href="http://stat.ethz.ch/R-manual/R-devel/library/datasets/html/iris.html"), "data set gives the measurements in centimeters of the variables sepal length and width and petal length and width, respectively, for 50 flowers from each of 3 species of iris. The species are ",strong( "Iris setosa,"),strong( "versicolor"), "and", strong("virginica.")),
              br(),
              h2("Analysis"),
              tabsetPanel(type="tabs",tabPanel("Data Table",dataTableOutput("Table")),
                                      tabPanel("Summary",dataTableOutput("Table2")),
                                      tabPanel("K means",
                                               plotOutput("plot1",click = "mouse"),
                                               verbatimTextOutput("coord"),
                                               uiOutput("All")))

    )
  )
  ))
#server.R
shinyServer(function(input, output) {
  output$Table <- renderDataTable(
    iris,options = list(
      lengthMenu = list(c(10, 20, 30,-1),c('10','20','30','ALL')),
      pageLength = 10))
  sumiris<-as.data.frame.array(summary(iris))
  output$Table2 <- renderDataTable(sumiris)
  output$plot1 <- renderPlot({
    plot(iris$Petal.Length,iris$Petal.Width,main = "K-MEANS",xlab="Petal Length",ylab = "Petal Width",
         cex.main = 2,   font.main= 4, col.main= "blue")
  }, width = "auto",height = "auto")
  output$coord <- renderText({
    paste0("x=", input$mouse$x, "\ny=", input$mouse$y)
  })
  output$All <- renderUI({
    tagList(
      fluidRow(
        column(4,
      sliderInput("slider1", label = h4("Clusters"),
                  min = 3, max = 10, value = 3)),
        column(4,
      checkboxGroupInput("checkGroup",
                         label = h4("Variable X"),names(iris)
                         )),
      column(4,
             selectInput("select", label = h4("Variable Y"),
                         names(iris)
                         )))


    )
  })
  Data <- reactive({iris[, c(input$select,input$checkGroup)]
  })
})

####################
#                  #
#    Exercise 5    #
#                  #
####################

#ui.R
library(shiny)
shinyUI(fluidPage(
  titlePanel("Shiny App"),

  sidebarLayout(
    sidebarPanel(h2("Menu"),
                 br(),
                 fluidRow(
                   column(6,
                 h4("Help Text"),
                 helpText("Just for help"))),
                 br(),
                 fluidRow(
                 column(6,
                        numericInput("numer",
                                     label = h4("Numeric Input"),
                                     value = 10))),
                 fluidRow(
                   column(6,
                 h4("Single Checkbox"),
                 checkboxInput("checkbox", label = "Choice A", value = TRUE))),
                 fluidRow(
                   column(6,
                 dateInput("date",
                           label = h4("Date input"),
                           value = "2016-12-01")),
                 column(6
                        )),
                 fluidRow(
                   column(6,
                 dateRangeInput("dates", label = h4("Date Range"))),
                 column(6,
                        textInput("text", label = h4("Text Input"),
                                  value = "Some Text"))),
                 fileInput("file", label = h4("File Input"))),
    mainPanel(h1("Main"),
              img(src = "petal.jpg", height = 150, width = 200),
              br(),
              br(),
              p("This famous (Fisher's or Anderson's) ", a("iris",href="http://stat.ethz.ch/R-manual/R-devel/library/datasets/html/iris.html"), "data set gives the measurements in centimeters of the variables sepal length and width and petal length and width, respectively, for 50 flowers from each of 3 species of iris. The species are ",strong( "Iris setosa,"),strong( "versicolor"), "and", strong("virginica.")),
              br(),
              h2("Analysis"),
              tabsetPanel(type="tabs",tabPanel("Data Table",dataTableOutput("Table")),
                                      tabPanel("Summary",dataTableOutput("Table2")),
                                      tabPanel("K means",
                                               plotOutput("plot1",click = "mouse"),
                                               verbatimTextOutput("coord"),
                                               uiOutput("All")))

    )
  )
  ))
#server.R
shinyServer(function(input, output) {
  output$Table <- renderDataTable(
    iris,options = list(
      lengthMenu = list(c(10, 20, 30,-1),c('10','20','30','ALL')),
      pageLength = 10))
  sumiris<-as.data.frame.array(summary(iris))
  output$Table2 <- renderDataTable(sumiris)
  output$plot1 <- renderPlot({
    plot(iris$Petal.Length,iris$Petal.Width,main = "K-MEANS",xlab="Petal Length",ylab = "Petal Width",
         cex.main = 2,   font.main= 4, col.main= "blue")
  }, width = "auto",height = "auto")
  output$coord <- renderText({
    paste0("x=", input$mouse$x, "\ny=", input$mouse$y)
  })
  output$All <- renderUI({
    tagList(
      fluidRow(
        column(4,
      sliderInput("slider1", label = h4("Clusters"),
                  min = 3, max = 10, value = 3)),
        column(4,
      checkboxGroupInput("checkGroup",
                         label = h4("Variable X"),names(iris),
                         selected=names(iris)[[2]]
                         )),
      column(4,
             selectInput("select", label = h4("Variable Y"),
                         names(iris),selected=names(iris)[[2]]
                         )))


    )
  })
  Data <- reactive({iris[, c(input$select,input$checkGroup)]
  })
})

####################
#                  #
#    Exercise 6    #
#                  #
####################

#ui.R
library(shiny)
shinyUI(fluidPage(
  titlePanel("Shiny App"),

  sidebarLayout(
    sidebarPanel(h2("Menu"),
                 br(),
                 fluidRow(
                   column(6,
                 h4("Help Text"),
                 helpText("Just for help"))),
                 br(),
                 fluidRow(
                 column(6,
                        numericInput("numer",
                                     label = h4("Numeric Input"),
                                     value = 10))),
                 fluidRow(
                   column(6,
                 h4("Single Checkbox"),
                 checkboxInput("checkbox", label = "Choice A", value = TRUE))),
                 fluidRow(
                   column(6,
                 dateInput("date",
                           label = h4("Date input"),
                           value = "2016-12-01")),
                 column(6
                        )),
                 fluidRow(
                   column(6,
                 dateRangeInput("dates", label = h4("Date Range"))),
                 column(6,
                        textInput("text", label = h4("Text Input"),
                                  value = "Some Text"))),
                 fileInput("file", label = h4("File Input"))),
    mainPanel(h1("Main"),
              img(src = "petal.jpg", height = 150, width = 200),
              br(),
              br(),
              p("This famous (Fisher's or Anderson's) ", a("iris",href="http://stat.ethz.ch/R-manual/R-devel/library/datasets/html/iris.html"), "data set gives the measurements in centimeters of the variables sepal length and width and petal length and width, respectively, for 50 flowers from each of 3 species of iris. The species are ",strong( "Iris setosa,"),strong( "versicolor"), "and", strong("virginica.")),
              br(),
              h2("Analysis"),
              tabsetPanel(type="tabs",tabPanel("Data Table",dataTableOutput("Table")),
                                      tabPanel("Summary",dataTableOutput("Table2")),
                                      tabPanel("K means",
                                               plotOutput("plot1",click = "mouse"),
                                               verbatimTextOutput("coord"),
                                               uiOutput("All")))

    )
  )
  ))
#server.R
shinyServer(function(input, output) {
  output$Table <- renderDataTable(
    iris,options = list(
      lengthMenu = list(c(10, 20, 30,-1),c('10','20','30','ALL')),
      pageLength = 10))
  sumiris<-as.data.frame.array(summary(iris))
  output$Table2 <- renderDataTable(sumiris)
  output$plot1 <- renderPlot({
    plot(iris$Petal.Length,iris$Petal.Width,main = "K-MEANS",xlab="Petal Length",ylab = "Petal Width",
         cex.main = 2,   font.main= 4, col.main= "blue")
  }, width = "auto",height = "auto")
  output$coord <- renderText({
    paste0("x=", input$mouse$x, "\ny=", input$mouse$y)
  })
  output$All <- renderUI({
    tagList(
      fluidRow(
        column(4,
      sliderInput("slider1", label = h4("Clusters"),
                  min = 3, max = 9, value = 3)),
        column(4,
      checkboxGroupInput("checkGroup",
                         label = h4("Variable X"),names(iris),
                         selected=names(iris)[[2]]
                         )),
      column(4,
             selectInput("select", label = h4("Variable Y"),
                         names(iris),selected=names(iris)[[2]]
                         )))

    )
  })
  Data <- reactive({iris[, c(input$select,input$checkGroup)]
  })
  Clusters <- reactive({
    kmeans(Data())
  })
})

####################
#                  #
#    Exercise 7    #
#                  #
####################

#ui.R
library(shiny)
shinyUI(fluidPage(
  titlePanel("Shiny App"),

  sidebarLayout(
    sidebarPanel(h2("Menu"),
                 br(),
                 fluidRow(
                   column(6,
                 h4("Help Text"),
                 helpText("Just for help"))),
                 br(),
                 fluidRow(
                 column(6,
                        numericInput("numer",
                                     label = h4("Numeric Input"),
                                     value = 10))),
                 fluidRow(
                   column(6,
                 h4("Single Checkbox"),
                 checkboxInput("checkbox", label = "Choice A", value = TRUE))),
                 fluidRow(
                   column(6,
                 dateInput("date",
                           label = h4("Date input"),
                           value = "2016-12-01")),
                 column(6
                        )),
                 fluidRow(
                   column(6,
                 dateRangeInput("dates", label = h4("Date Range"))),
                 column(6,
                        textInput("text", label = h4("Text Input"),
                                  value = "Some Text"))),
                 fileInput("file", label = h4("File Input"))),
    mainPanel(h1("Main"),
              img(src = "petal.jpg", height = 150, width = 200),
              br(),
              br(),
              p("This famous (Fisher's or Anderson's) ", a("iris",href="http://stat.ethz.ch/R-manual/R-devel/library/datasets/html/iris.html"), "data set gives the measurements in centimeters of the variables sepal length and width and petal length and width, respectively, for 50 flowers from each of 3 species of iris. The species are ",strong( "Iris setosa,"),strong( "versicolor"), "and", strong("virginica.")),
              br(),
              h2("Analysis"),
              tabsetPanel(type="tabs",tabPanel("Data Table",dataTableOutput("Table")),
                                      tabPanel("Summary",dataTableOutput("Table2")),
                                      tabPanel("K means",
                                               plotOutput("plot1",click = "mouse"),
                                               verbatimTextOutput("coord"),
                                               uiOutput("All")))

    )
  )
  ))
#server.R
shinyServer(function(input, output) {
  output$Table <- renderDataTable(
    iris,options = list(
      lengthMenu = list(c(10, 20, 30,-1),c('10','20','30','ALL')),
      pageLength = 10))
  sumiris<-as.data.frame.array(summary(iris))
  output$Table2 <- renderDataTable(sumiris)
  output$plot1 <- renderPlot({
    plot(Data(),main = "K-MEANS",
         cex.main = 2,   font.main= 4, col.main= "blue")
  }, width = "auto",height = "auto")
  output$coord <- renderText({
    paste0("x=", input$mouse$x, "\ny=", input$mouse$y)
  })
  output$All <- renderUI({
    tagList(
      fluidRow(
        column(4,
      sliderInput("slider1", label = h4("Clusters"),
                  min = 3, max = 10, value = 3)),
        column(4,
      checkboxGroupInput("checkGroup",
                         label = h4("Variable X"),names(iris),
                         selected=names(iris)[[2]]
                         )),
      column(4,
             selectInput("select", label = h4("Variable Y"),
                         names(iris),selected=names(iris)[[2]]
                         )))


    )
  })
  Data <- reactive({iris[, c(input$select,input$checkGroup)]
  })
  Clusters <- reactive({
    kmeans(Data(),input$slider1)
  })
})

####################
#                  #
#    Exercise 8    #
#                  #
####################

#ui.R
library(shiny)
shinyUI(fluidPage(
  titlePanel("Shiny App"),

  sidebarLayout(
    sidebarPanel(h2("Menu"),
                 br(),
                 fluidRow(
                   column(6,
                 h4("Help Text"),
                 helpText("Just for help"))),
                 br(),
                 fluidRow(
                 column(6,
                        numericInput("numer",
                                     label = h4("Numeric Input"),
                                     value = 10))),
                 fluidRow(
                   column(6,
                 h4("Single Checkbox"),
                 checkboxInput("checkbox", label = "Choice A", value = TRUE))),
                 fluidRow(
                   column(6,
                 dateInput("date",
                           label = h4("Date input"),
                           value = "2016-12-01")),
                 column(6
                        )),
                 fluidRow(
                   column(6,
                 dateRangeInput("dates", label = h4("Date Range"))),
                 column(6,
                        textInput("text", label = h4("Text Input"),
                                  value = "Some Text"))),
                 fileInput("file", label = h4("File Input"))),
    mainPanel(h1("Main"),
              img(src = "petal.jpg", height = 150, width = 200),
              br(),
              br(),
              p("This famous (Fisher's or Anderson's) ", a("iris",href="http://stat.ethz.ch/R-manual/R-devel/library/datasets/html/iris.html"), "data set gives the measurements in centimeters of the variables sepal length and width and petal length and width, respectively, for 50 flowers from each of 3 species of iris. The species are ",strong( "Iris setosa,"),strong( "versicolor"), "and", strong("virginica.")),
              br(),
              h2("Analysis"),
              tabsetPanel(type="tabs",tabPanel("Data Table",dataTableOutput("Table")),
                                      tabPanel("Summary",dataTableOutput("Table2")),
                                      tabPanel("K means",
                                               plotOutput("plot1",click = "mouse"),
                                               verbatimTextOutput("coord"),
                                               uiOutput("All")))

    )
  )
  ))
#server.R
shinyServer(function(input, output) {
  output$Table <- renderDataTable(
    iris,options = list(
      lengthMenu = list(c(10, 20, 30,-1),c('10','20','30','ALL')),
      pageLength = 10))
  sumiris<-as.data.frame.array(summary(iris))
  output$Table2 <- renderDataTable(sumiris)
  output$plot1 <- renderPlot({
    palette(c("#E41A1C", "#377EB8", "#4DAF4A", "#984EA3",
              "#FF7F00", "#FFFF33", "#A65628", "#F781BF", "#999999"))
    plot(Data(),main = "K-MEANS",

         cex.main = 2,   font.main= 4, col.main= "blue")
  }, width = "auto",height = "auto")
  output$coord <- renderText({
    paste0("x=", input$mouse$x, "\ny=", input$mouse$y)
  })
  output$All <- renderUI({
    tagList(
      fluidRow(
        column(4,
               sliderInput("slider1", label = h4("Clusters"),
                           min = 1, max = 9, value = 4)),
        column(4,
               checkboxGroupInput("checkGroup",
                                  label = h4("Variable X"),names(iris),
                                  selected=names(iris)[[2]]
               )),
        column(4,
               selectInput("select", label = h4("Variable Y"),
                           names(iris),selected=names(iris)[[2]]
               )))


    )
  })
  Data <- reactive({iris[, c(input$select,input$checkGroup)]
  })
  Clusters <- reactive({
    kmeans(Data(),input$slider1)
  })
})


####################
#                  #
#    Exercise 9    #
#                  #
####################

#ui.R
library(shiny)
shinyUI(fluidPage(
  titlePanel("Shiny App"),

  sidebarLayout(
    sidebarPanel(h2("Menu"),
                 br(),
                 fluidRow(
                   column(6,
                 h4("Help Text"),
                 helpText("Just for help"))),
                 br(),
                 fluidRow(
                 column(6,
                        numericInput("numer",
                                     label = h4("Numeric Input"),
                                     value = 10))),
                 fluidRow(
                   column(6,
                 h4("Single Checkbox"),
                 checkboxInput("checkbox", label = "Choice A", value = TRUE))),
                 fluidRow(
                   column(6,
                 dateInput("date",
                           label = h4("Date input"),
                           value = "2016-12-01")),
                 column(6
                        )),
                 fluidRow(
                   column(6,
                 dateRangeInput("dates", label = h4("Date Range"))),
                 column(6,
                        textInput("text", label = h4("Text Input"),
                                  value = "Some Text"))),
                 fileInput("file", label = h4("File Input"))),
    mainPanel(h1("Main"),
              img(src = "petal.jpg", height = 150, width = 200),
              br(),
              br(),
              p("This famous (Fisher's or Anderson's) ", a("iris",href="http://stat.ethz.ch/R-manual/R-devel/library/datasets/html/iris.html"), "data set gives the measurements in centimeters of the variables sepal length and width and petal length and width, respectively, for 50 flowers from each of 3 species of iris. The species are ",strong( "Iris setosa,"),strong( "versicolor"), "and", strong("virginica.")),
              br(),
              h2("Analysis"),
              tabsetPanel(type="tabs",tabPanel("Data Table",dataTableOutput("Table")),
                                      tabPanel("Summary",dataTableOutput("Table2")),
                                      tabPanel("K means",
                                               plotOutput("plot1",click = "mouse"),
                                               verbatimTextOutput("coord"),
                                               uiOutput("All")))

    )
  )
  ))
#server.R
shinyServer(function(input, output) {
  output$Table <- renderDataTable(
    iris,options = list(
      lengthMenu = list(c(10, 20, 30,-1),c('10','20','30','ALL')),
      pageLength = 10))
  sumiris<-as.data.frame.array(summary(iris))
  output$Table2 <- renderDataTable(sumiris)
  output$plot1 <- renderPlot({
    palette(c("#E41A1C", "#377EB8", "#4DAF4A", "#984EA3",
              "#FF7F00", "#FFFF33", "#A65628", "#F781BF", "#999999"))
    plot(Data(),main = "K-MEANS",
         col = Clusters()$cluster,
         cex.main = 2,   font.main= 4, col.main= "blue")
  }, width = "auto",height = "auto")
  output$coord <- renderText({
    paste0("x=", input$mouse$x, "\ny=", input$mouse$y)
  })
  output$All <- renderUI({
    tagList(
      fluidRow(
        column(4,
               sliderInput("slider1", label = h4("Clusters"),
                           min = 1, max = 9, value = 4)),
        column(4,
               checkboxGroupInput("checkGroup",
                                  label = h4("Variable X"),names(iris),
                                  selected=names(iris)[[2]]
               )),
        column(4,
               selectInput("select", label = h4("Variable Y"),
                           names(iris),selected=names(iris)[[2]]
               )))


    )
  })
  Data <- reactive({iris[, c(input$select,input$checkGroup)]
  })
  Clusters <- reactive({
    kmeans(Data(),input$slider1)
  })
})

####################
#                  #
#    Exercise 10   #
#                  #
####################

#ui.R
library(shiny)
shinyUI(fluidPage(
  titlePanel("Shiny App"),

  sidebarLayout(
    sidebarPanel(h2("Menu"),
                 br(),
                 fluidRow(
                   column(6,
                 h4("Help Text"),
                 helpText("Just for help"))),
                 br(),
                 fluidRow(
                 column(6,
                        numericInput("numer",
                                     label = h4("Numeric Input"),
                                     value = 10))),
                 fluidRow(
                   column(6,
                 h4("Single Checkbox"),
                 checkboxInput("checkbox", label = "Choice A", value = TRUE))),
                 fluidRow(
                   column(6,
                 dateInput("date",
                           label = h4("Date input"),
                           value = "2016-12-01")),
                 column(6
                        )),
                 fluidRow(
                   column(6,
                 dateRangeInput("dates", label = h4("Date Range"))),
                 column(6,
                        textInput("text", label = h4("Text Input"),
                                  value = "Some Text"))),
                 fileInput("file", label = h4("File Input"))),
    mainPanel(h1("Main"),
              img(src = "petal.jpg", height = 150, width = 200),
              br(),
              br(),
              p("This famous (Fisher's or Anderson's) ", a("iris",href="http://stat.ethz.ch/R-manual/R-devel/library/datasets/html/iris.html"), "data set gives the measurements in centimeters of the variables sepal length and width and petal length and width, respectively, for 50 flowers from each of 3 species of iris. The species are ",strong( "Iris setosa,"),strong( "versicolor"), "and", strong("virginica.")),
              br(),
              h2("Analysis"),
              tabsetPanel(type="tabs",tabPanel("Data Table",dataTableOutput("Table")),
                                      tabPanel("Summary",dataTableOutput("Table2")),
                                      tabPanel("K means",
                                               plotOutput("plot1",click = "mouse"),
                                               verbatimTextOutput("coord"),
                                               uiOutput("All")))

    )
  )
  ))
#server.R
shinyServer(function(input, output) {
  output$Table <- renderDataTable(
    iris,options = list(
      lengthMenu = list(c(10, 20, 30,-1),c('10','20','30','ALL')),
      pageLength = 10))
  sumiris<-as.data.frame.array(summary(iris))
  output$Table2 <- renderDataTable(sumiris)
  output$plot1 <- renderPlot({
    palette(c("#E41A1C", "#377EB8", "#4DAF4A", "#984EA3",
              "#FF7F00", "#FFFF33", "#A65628", "#F781BF", "#999999"))
    plot(Data(),main = "K-MEANS",
         col = Clusters()$cluster,
         pch = 20, cex = 3,
         cex.main = 2,   font.main= 4, col.main= "blue")
  }, width = "auto",height = "auto")
  output$coord <- renderText({
    paste0("x=", input$mouse$x, "\ny=", input$mouse$y)
  })
  output$All <- renderUI({
    tagList(
      fluidRow(
        column(4,
               sliderInput("slider1", label = h4("Clusters"),
                           min = 1, max = 9, value = 4)),
        column(4,
               checkboxGroupInput("checkGroup",
                                  label = h4("Variable X"),names(iris),
                                  selected=names(iris)[[2]]
               )),
        column(4,
               selectInput("select", label = h4("Variable Y"),
                           names(iris),selected=names(iris)[[2]]
               )))


    )
  })
  Data <- reactive({iris[, c(input$select,input$checkGroup)]
  })
  Clusters <- reactive({
    kmeans(Data(),input$slider1)
  })
})



Data Hacking with RDSTK solution 3

Below are the solutions to these exercises on the RDSTK package

###############
#             #
# Exercise 1  #
#             #
###############
list=c("97.77.104.22","104.199.228.65","50.93.204.169","107.189.46.5","104.154.142.10","104.131.255.12","209.212.253.44","70.248.28.23","52.119.20.75","192.169.168.15","47.88.31.75  80","107.178.4.109","152.160.35.171","104.236.54.196","50.93.197.102","159.203.117.1","206.125.41.132","50.93.201.28","8.21.67.248  31","104.28.16.199")

###############
#             #
# Exercise 2  #
#             #
###############

df=data.frame(list)
df[,1]=as.character(df[,1])
data=lapply(df[,1],ip2coordinates)
df=do.call(rbind.data.frame,data)
df
##        ip.address dma_code latitude country_code3 area_code longitude
## 1    97.77.104.22      641  29.4717           USA       210  -98.5140
## 2   50.93.204.169      539  28.0499           USA       813  -82.3625
## 3  209.212.253.44      515  39.0705           USA       513  -84.2803
## 4    70.248.28.23      641  29.3171           USA       210  -98.5555
## 5    52.119.20.75      504  39.5645           USA       302  -75.5970
## 6  152.160.35.171      505  42.4634           USA       248  -83.4646
## 7   50.93.197.102      539  28.0499           USA       813  -82.3625
## 8   159.203.117.1      505  42.6644           USA       248  -83.2303
## 9  206.125.41.132      803  34.0530           USA       213 -118.2642
## 10   50.93.201.28      539  28.0499           USA       813  -82.3625
##     country_name postal_code region     locality country_code
## 1  United States                 TX  San Antonio           US
## 2  United States       33637     FL        Tampa           US
## 3  United States       45245     OH   Cincinnati           US
## 4  United States       78224     TX  San Antonio           US
## 5  United States       19893     DE   Wilmington           US
## 6  United States       48375     MI         Novi           US
## 7  United States       33637     FL        Tampa           US
## 8  United States       48326     MI Auburn Hills           US
## 9  United States       90017     CA  Los Angeles           US
## 10 United States       33637     FL        Tampa           US
###############
#             #
# Exercise 3  #
#             #
###############

stat_maker=function(s2){
   s1="statistics"
   s3="value"
   s2=as.character(s2)
   for (i in 1:nrow(df)) {
     df$pop[i] <<-coordinates2statistics(df[i,3],df[i,6],s2)[paste(s1,s2,s3, sep = ".")]
     assign("test2",50,envir = .GlobalEnv)


   }
 }

 stat_maker("population_density")


###############
#             #
# Exercise 4  #
#             #
###############

 stat_maker=function(s2){
   s1="statistics"
   s3="value"
   s2=as.character(s2)
 for (i in 1:nrow(df)) {
   a[i]=coordinates2statistics(df[i,3],df[i,6],s2)[paste(s1,s2,s3, sep = ".")]
   a=unlist(a)
   assign(paste(s2),a,envir = .GlobalEnv)


 }
 }


###############
#             #
# Exercise 5  #
#             #
###############

stat_maker("elevation")
elevation
##  [1] 214  16 278 189   1 266  16 278 116  16
###############
#             #
# Exercise 6  #
#             #
###############

stat_maker("population_density")

###############
#             #
# Exercise 7  #
#             #
###############
 stat_maker=function(s2){
   s1="statistics"
   s3="value"
   s2=as.character(s2)
   for (i in 1:nrow(df)) {
     df$pop2[i] <<-coordinates2statistics(df[i,4],df[i,7],s2)[paste(s1,s2,s3, sep = ".")]



   }
 }

 stat_maker("population_density")


###############
#             #
# Exercise 8  #
#             #
###############

 stat_maker=function(s2){
   s1="statistics"
   s3="value"
   s2=as.character(s2)
   for (i in 1:nrow(df)) {
     a[i]=coordinates2statistics(df[i,3],df[i,6],s2)[paste(s1,"us_population",s3, sep = ".")]
     a=unlist(a)
     assign(paste(s2),a,envir = .GlobalEnv)
   }
 }


###############
#             #
# Exercise 9  #
#             #
###############
stat_maker("us_population")
stat_maker("us_population_poverty")
stat_maker("us_population_asian")
stat_maker("us_population_bachelors_degree")
stat_maker("us_population_black_or_african_american")
stat_maker("us_population_black_or_african_american_not_hispanic ")
stat_maker("us_population_eighteen_to_twenty_four_years_old")
stat_maker("us_population_five_to_seventeen_years_old")
stat_maker("us_population_foreign_born")
stat_maker("us_population_hispanic_or_latino")

###############
#             #
# Exercise 10  #
#             #
###############


df=cbind(df,elevation,us_population,us_population_asian,us_population_bachelors_degree,us_population_black_or_african_american,us_population_eighteen_to_twenty_four_years_old,us_population_five_to_seventeen_years_old,us_population_foreign_born,us_population_hispanic_or_latino,us_population_poverty)

df
##        ip.address dma_code latitude country_code3 area_code longitude
## 1    97.77.104.22      641  29.4717           USA       210  -98.5140
## 2   50.93.204.169      539  28.0499           USA       813  -82.3625
## 3  209.212.253.44      515  39.0705           USA       513  -84.2803
## 4    70.248.28.23      641  29.3171           USA       210  -98.5555
## 5    52.119.20.75      504  39.5645           USA       302  -75.5970
## 6  152.160.35.171      505  42.4634           USA       248  -83.4646
## 7   50.93.197.102      539  28.0499           USA       813  -82.3625
## 8   159.203.117.1      505  42.6644           USA       248  -83.2303
## 9  206.125.41.132      803  34.0530           USA       213 -118.2642
## 10   50.93.201.28      539  28.0499           USA       813  -82.3625
##     country_name postal_code region     locality country_code   pop
## 1  United States                 TX  San Antonio           US  3194
## 2  United States       33637     FL        Tampa           US   671
## 3  United States       45245     OH   Cincinnati           US   498
## 4  United States       78224     TX  San Antonio           US   355
## 5  United States       19893     DE   Wilmington           US   188
## 6  United States       48375     MI         Novi           US   610
## 7  United States       33637     FL        Tampa           US   671
## 8  United States       48326     MI Auburn Hills           US   580
## 9  United States       90017     CA  Los Angeles           US 13701
## 10 United States       33637     FL        Tampa           US   671
##                                  pop2 elevation us_population
## 1  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0       214          1768
## 2  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0        16           360
## 3  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0       278           547
## 4  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0       189           593
## 5  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0         1           143
## 6  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0       266           793
## 7  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0        16           360
## 8  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0       278             0
## 9  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0       116          5230
## 10 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0        16           360
##    us_population_asian us_population_bachelors_degree
## 1                 1768                           1768
## 2                  360                            360
## 3                  547                            547
## 4                  593                            593
## 5                  143                            143
## 6                  793                            793
## 7                  360                            360
## 8                    0                              0
## 9                 5230                           5230
## 10                 360                            360
##    us_population_black_or_african_american
## 1                                     1768
## 2                                      360
## 3                                      547
## 4                                      593
## 5                                      143
## 6                                      793
## 7                                      360
## 8                                        0
## 9                                     5230
## 10                                     360
##    us_population_eighteen_to_twenty_four_years_old
## 1                                             1768
## 2                                              360
## 3                                              547
## 4                                              593
## 5                                              143
## 6                                              793
## 7                                              360
## 8                                                0
## 9                                             5230
## 10                                             360
##    us_population_five_to_seventeen_years_old us_population_foreign_born
## 1                                       1768                       1768
## 2                                        360                        360
## 3                                        547                        547
## 4                                        593                        593
## 5                                        143                        143
## 6                                        793                        793
## 7                                        360                        360
## 8                                          0                          0
## 9                                       5230                       5230
## 10                                       360                        360
##    us_population_hispanic_or_latino us_population_poverty
## 1                              1768                  1768
## 2                               360                   360
## 3                               547                   547
## 4                               593                   593
## 5                               143                   143
## 6                               793                   793
## 7                               360                   360
## 8                                 0                     0
## 9                              5230                  5230
## 10                              360                   360



Data Science for Doctors – Part 3 : Distributions Solutions

Below are the solutions to these exercises on data display.

####################
#                  #
#    Exercise 1    #
#                  #
####################

n <- 100
p <- 0.3

#a
dbinom(34, n, p)
## [1] 0.05788395
sum(dbinom(34:n, n, p))
## [1] 0.2207422
pbinom(34, n, p)
## [1] 0.8371417
#b
sum(dbinom(30:60, n, p))
## [1] 0.5376603
#c
qbinom(0.025,n,p)
## [1] 21
qbinom(0.975,n,p)
## [1] 39
####################
#                  #
#    Exercise 2    #
#                  #
####################

m <- 3
s <- 1
#a

pnorm(2,m,s)
## [1] 0.1586553
pnorm(4,m,s) - pnorm(2,m,s)
## [1] 0.6826895
#b
qnorm(0.025,m,s)
## [1] 1.040036
qnorm(0.975,m,s)
## [1] 4.959964
qnorm(0.5,m,s)
## [1] 3
####################
#                  #
#    Exercise 3    #
#                  #
####################

df <- 8
#a
pt(1,df)
## [1] 0.8267032
1-pt(2,df)
## [1] 0.04025812
pt(1,df)-pt(-1,df)
## [1] 0.6534065
#b  
qt(0.025,df)
## [1] -2.306004
qt(0.5,df)
## [1] 0
1-qt(0.075,df)
## [1] 2.592221
####################
#                  #
#    Exercise 4    #
#                  #
####################

df <- 5
#a
pchisq(2,df)
## [1] 0.150855
1-pchisq(4,df)
## [1] 0.549416
# OR
pchisq(4,df,lower.tail = FALSE)
## [1] 0.549416
pchisq(6,df)-pchisq(4,df)
## [1] 0.243197
#b
qchisq(0.025, df, lower.tail=TRUE)
## [1] 0.8312116
qchisq(0.5, df, lower.tail=TRUE)
## [1] 4.35146
qchisq(0.075, df, lower.tail=FALSE)
## [1] 10.00831
####################
#                  #
#    Exercise 5    #
#                  #
####################

df_1 <- 6
df_2 <- 3

pf(2, df_1, df_2)
## [1] 0.6958948
1 - pf(3, df_1, df_2)
## [1] 0.1977977
pf(4, df_1, df_2) - pf(1, df_1, df_2)
## [1] 0.4039858
qf(0.025,df_1, df_2)
## [1] 0.1515427
qf(0.975,df_1, df_2)
## [1] 14.73472
####################
#                  #
#    Exercise 6    #
#                  #
####################

data <- data.frame(case = factor(rep(c("A","B","C"), each=100)),
                  gen = c(rbinom(100, 20, 0.3), rbinom(100, 20, 0.5),
                             rbinom(100, 20, 0.7)))

ggplot(data, aes(x=gen, fill=case)) + geom_density(alpha=.3)
plot of chunk unnamed-chunk-1
####################
#                  #
#    Exercise 7    #
#                  #
####################


data <- data.frame(case = factor(rep(c("A","B","C"), each=100)),
                   gen = c(rnorm(100, 0, 1), rnorm(100, 0, 3),
                           rnorm(100, 0, 7)))

ggplot(data, aes(x=gen, fill=case)) + geom_density(alpha=.3)
plot of chunk unnamed-chunk-1
####################
#                  #
#    Exercise 8    #
#                  #
####################

data <- data.frame(case = factor(rep(c("A","B","C"), each = 100)),
                   gen = c(rt(100, 5), rt(100, 10),
                           rt(100, 25)))


ggplot(data, aes(x=gen, fill=case)) + geom_density(alpha=.3)
plot of chunk unnamed-chunk-1
#Notice the variance, which decreases as the degrees of freedom increase 


####################
#                  #
#    Exercise 9    #
#                  #
####################

data <- data.frame(case = factor(rep(c("A","B","C"), each = 100)),
                   gen = c(rchisq(100, 5), rchisq(100, 10),
                           rchisq(100, 25)))

ggplot(data, aes(x=gen, fill=case)) + geom_density(alpha=.3)
plot of chunk unnamed-chunk-1
#Observe that the graphs change from heavily skew to the right into more bell-shaped.


####################
#                  #
#    Exercise 10   #
#                  #
####################

data <- data.frame(case = factor(rep(c("A","B","C"), each = 100)),
                    gen = c(rf(100, 3, 9), rf(100, 9, 3),
                            rf(100,15, 15)))

ggplot(data, aes(x=gen, fill=case)) + geom_density(alpha=.3)+xlim(0, 10)
plot of chunk unnamed-chunk-1