Assignment 8

(1) Regression

##################################################
# FUNCTION: fitLinear 
# fits a simple linear regression
# inputs: numeric vectors of predictor (x) and response (y)
# outputs: slope, standard error, t-value, p-value, and adjusted r-squared value
#------------------------------------------------- 
fitLinear <- function(x=runif(50),y=runif(50)) {
  dataFrame <- data.frame(x,y)
  myMod <- lm(y~x) 
  myOut <- c(slope=summary(myMod)$coefficients[2,1],
             standardError=summary(myMod)$coefficients[2,2],
             tValue=summary(myMod)$coefficients[2,3],
             pValue=summary(myMod)$coefficients[2,4],
             adjustedRsquared=summary(myMod)$adj.r)
  return(myOut)
}
##################################################

# run the function with its default values
fitLinear()

##            slope    standardError           tValue           pValue 
##       0.08420489       0.12983914       0.64853242       0.51973140 
## adjustedRsquared 
##      -0.01196610

# create a tiny fake data set 
myData <- matrix(data=runif(10),nrow=5,ncol=2)
myData <- as.data.frame(myData)
colnames(myData) <- c("Time","Distance")

# run your code on the fake data
fitLinear(x=myData$Time,y=myData$Distance)

##            slope    standardError           tValue           pValue 
##        0.5778984        0.7558698        0.7645476        0.5001775 
## adjustedRsquared 
##       -0.1159055

##################################################
# FUNCTION: fitLinearPlot
# fits a simple linear regression and creates a scatterplot
# inputs: numeric vectors of predictor (x) and response (y)
# outputs: scatterplot with regression line
#------------------------------------------------- 
fitLinearPlot <- function(x=runif(50),y=runif(50)) {
  dataFrame <- data.frame(x,y)
  myMod <- lm(y~x) 
  return(plot(y=y,x=x,pch=21,bg="lightgreen",cex=1.5, abline(myMod)))
}
##################################################

# illustrate the graphics function for your default settings and for the tiny fake data set
fitLinearPlot()

fitLinearPlot(x=myData$Time,y=myData$Distance)

(2) ANOVA

##################################################
# FUNCTION: anova
# performs an analysis of variance
# inputs: a vector of categorical data (x) and a vector of continuous data (y)
# outputs: degrees of freedom, sums of squares, mean, F-value, and p-value
#------------------------------------------------- 
anova <- function(x=as.factor(rep(c("Treatment1","Treatment2","Treatment3"),each=3)),y=runif(9)) {
  dataFrame <- data.frame(x,y)
  myMod <- aov(y~x, data = dataFrame)
  myOut <- summary(myMod)
  return(myOut)
}
##################################################

# run the function with its default values
anova()

##             Df Sum Sq Mean Sq F value Pr(>F)  
## x            2 0.6383  0.3191   5.069 0.0514 .
## Residuals    6 0.3777  0.0630                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

# create a tiny fake data set 
xVar <- as.factor(rep(c("High","Low"),each=5)) 
yVar <- runif(10)
dataFrame <- data.frame(xVar,yVar)

# run your code on the fake data
anova(x=dataFrame$xVar,y=dataFrame$yVar)

##             Df Sum Sq Mean Sq F value Pr(>F)
## x            1 0.0010 0.00103   0.016  0.903
## Residuals    8 0.5251 0.06564

##################################################
# FUNCTION: anovaGraph
# performs an analysis of variance and creates a boxplot depicting the results
# inputs: a vector of categorical data (x) and a vector of continuous data (y)
# outputs: barplot 
#------------------------------------------------- 
anovaGraph <- function(x=as.factor(rep(c("Treatment1","Treatment2","Treatment3"),each=3)),y=runif(9)) {
  dataFrame <- data.frame(x,y)
  myMod <- aov(y~x, data = dataFrame)
  myOut <- boxplot(y~x,data=dataFrame,col=c("deepskyblue4","deepskyblue3","deepskyblue2"))
  return(myOut)
}
##################################################

# illustrate the graphics function for your default settings and for the tiny fake data set
anovaGraph()

## $stats
##            [,1]      [,2]      [,3]
## [1,] 0.05113409 0.1896468 0.2380656
## [2,] 0.28524662 0.4657184 0.3238146
## [3,] 0.51935914 0.7417901 0.4095636
## [4,] 0.75940156 0.8248703 0.5871741
## [5,] 0.99944398 0.9079505 0.7647846
## 
## $n
## [1] 3 3 3
## 
## $conf
##            [,1]      [,2]      [,3]
## [1,] 0.08682864 0.4141669 0.1693236
## [2,] 0.95188965 1.0694132 0.6498037
## 
## $out
## numeric(0)
## 
## $group
## numeric(0)
## 
## $names
## [1] "Treatment1" "Treatment2" "Treatment3"

anovaGraph(x=dataFrame$xVar,y=dataFrame$yVar)

## $stats
##           [,1]      [,2]
## [1,] 0.1476132 0.2890005
## [2,] 0.3996780 0.4875010
## [3,] 0.7060163 0.6198650
## [4,] 0.7894212 0.7450086
## [5,] 0.8286079 0.8315315
## 
## $n
## [1] 5 5
## 
## $conf
##           [,1]      [,2]
## [1,] 0.4306247 0.4379107
## [2,] 0.9814079 0.8018193
## 
## $out
## numeric(0)
## 
## $group
## numeric(0)
## 
## $names
## [1] "High" "Low"

(3) Contingency Table Analysis

##################################################
# FUNCTION: contTable
# performs a contingency table analysis (chi-squared test) 
# inputs: two vectors of count data
# outputs: chi-squared value, degrees of freedom, p-value
#------------------------------------------------- 
contTable <- function(x=sample(1:100,3),y=sample(1:100,3)) {
  dataMatrix <- rbind(x,y)
  myMod <- chisq.test(dataMatrix)
  return(print(myMod))
}
##################################################

# run the function with its default values
contTable()

## 
##  Pearson's Chi-squared test
## 
## data:  dataMatrix
## X-squared = 33.704, df = 2, p-value = 4.801e-08

# create a tiny fake data set 
vec1 <- sample(1:100,4)
vec2 <- sample(1:100,4)
dataFrame <- rbind(vec1,vec2)

# run your code on the fake data
contTable(x=dataFrame[1,], y=dataFrame[2,])

## 
##  Pearson's Chi-squared test
## 
## data:  dataMatrix
## X-squared = 57.065, df = 3, p-value = 2.489e-12

##################################################
# FUNCTION: contTableGraph
# performs a contingency table analysis (chi-squared test) and creates a mosaic plot
# inputs: two vectors of count data
# outputs: mosaic plot
#------------------------------------------------- 
contTableGraph <- function(x=sample(1:100,3),y=sample(1:100,3)) {
  dataMatrix <- rbind(x,y)
  myMod <- chisq.test(dataMatrix)
  myOut <- mosaicplot(x=dataMatrix,
           col=c("blue","green","purple"),
           shade=FALSE)
  return(myOut)
}
##################################################

# illustrate the graphics function for your default settings and for the tiny fake data set
contTableGraph()

## NULL

contTableGraph(x=dataFrame[1,], y=dataFrame[2,])

## NULL

(4) Logistic Regression

##################################################
# FUNCTION: logReg
# performs a logistic regression analysis
# inputs: a vector of continuous data (x) and a vector of categorical data (y)
# outputs: slope, standard error, z-value, and p-value for the logistic regression
#------------------------------------------------- 
logReg <- function(x = rgamma(n=25,shape=5,scale=5), y = rbinom(n=25,size=1,p=0.5)) {
  dataFrame <- data.frame(x,y)
  myMod <- glm(y ~ x,
                 data=dataFrame,
                 family=binomial(link="logit"))
  return(summary(myMod)$coefficients)
}
##################################################

# run the function with its default values
logReg()

##               Estimate Std. Error   z value   Pr(>|z|)
## (Intercept) -3.6033535 1.82523243 -1.974189 0.04836027
## x            0.1795657 0.07954023  2.257545 0.02397403

# create a tiny fake data set 
xVec <- c(2,3,8,20,4,1,3,5)
yVec <- c(0,1,1,0,0,1,0,1)
dataFrame <- data.frame(xVec,yVec)

# run your code on the fake data
logReg(x=dataFrame$xVec,y=dataFrame$yVec)

##               Estimate Std. Error    z value  Pr(>|z|)
## (Intercept)  0.5702479  1.0559008  0.5400583 0.5891569
## x           -0.1039407  0.1511526 -0.6876543 0.4916705

##################################################
# FUNCTION: logRegGraph
# performs a logistic regression analysis and creates a scatterplot depicting the results
# inputs: a vector of continuous data (x) and a vector of categorical data (y)
# outputs: scatterplot
#------------------------------------------------- 
logRegGraph <- function(x = rgamma(n=25,shape=5,scale=5), y = rbinom(n=25,size=1,p=0.5)) {
  dataFrame <- data.frame(x,y)
  myMod <- glm(y ~ x,
                 data=dataFrame,
                 family=binomial(link="logit"))
  myOut <- plot(x=dataFrame$x, y=dataFrame$y,pch=21,bg="green",cex=2)
  return(myOut)
}
##################################################

# illustrate the graphics function for your default settings and for the tiny fake data set
logRegGraph()

## NULL

logRegGraph(x=dataFrame$xVec,y=dataFrame$yVec)

## NULL