2. Use your data from HW4 to input into your functions (where applicable) and produce output and plots
# Read in the data from Homework 4
z <- read.table("Data.csv", header = TRUE, sep=",", stringsAsFactors=FALSE)
# Effect of warming treatment on Shannon diversity
x <- data.frame(z$WarmingTreatment,z$DivShann)
anova(x)
## Df Sum Sq Mean Sq F value Pr(>F)
## x 1 0.83 0.8276 1.486 0.225
## Residuals 118 65.71 0.5568
## $stats
## [,1] [,2]
## [1,] 0.7577747 0.6171423
## [2,] 2.1728058 2.0442667
## [3,] 2.8217602 2.6659332
## [4,] 3.2198981 3.1863547
## [5,] 4.1736013 4.4157717
##
## $n
## [1] 60 60
##
## $conf
## [,1] [,2]
## [1,] 2.608177 2.432973
## [2,] 3.035343 2.898893
##
## $out
## [1] 0.2678693
##
## $group
## [1] 2
##
## $names
## [1] "control" "warmed"
# Effect of warming treatment on richness
y <- data.frame(z$WarmingTreatment,z$Richness)
anova(y)
## Df Sum Sq Mean Sq F value Pr(>F)
## x 1 552 552 0.097 0.757
## Residuals 58 330361 5696
## 60 observations deleted due to missingness
## $stats
## [,1] [,2]
## [1,] 10.0 6
## [2,] 37.0 35
## [3,] 89.5 102
## [4,] 152.0 160
## [5,] 296.0 256
## attr(,"class")
## control
## "integer"
##
## $n
## [1] 30 30
##
## $conf
## [,1] [,2]
## [1,] 56.32627 65.9416
## [2,] 122.67373 138.0584
##
## $out
## numeric(0)
##
## $group
## numeric(0)
##
## $names
## [1] "control" "warmed"
# Effect of warming treatment on evenness
a <- data.frame(z$WarmingTreatment,z$Evenness)
anova(a)
## Df Sum Sq Mean Sq F value Pr(>F)
## x 1 0.0733 0.07334 3.815 0.0556 .
## Residuals 58 1.1149 0.01922
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 60 observations deleted due to missingness
## $stats
## [,1] [,2]
## [1,] 0.4467246 0.3090829
## [2,] 0.6190977 0.5025169
## [3,] 0.7214350 0.6102760
## [4,] 0.7725752 0.7655159
## [5,] 0.9093725 0.9024945
##
## $n
## [1] 30 30
##
## $conf
## [,1] [,2]
## [1,] 0.6771618 0.5344094
## [2,] 0.7657083 0.6861425
##
## $out
## numeric(0)
##
## $group
## numeric(0)
##
## $names
## [1] "control" "warmed"
3. Make a random data set that mimics your existing data (test mean and variance) and use the functions
# Read in original data
z <- read.table("Data.csv", header = TRUE, sep=",", stringsAsFactors=FALSE)
# I chose to focus on one of my response variables (Shannon diversity, or "DivShann"). To begin, I examined the distribution of my data, and calculated the mean and standard deviation:
hist(z$DivShann)
## [1] 2.644833
## [1] 0.7477299
# Given the distribution of my data, to create a random data set with structure similar to my original data, I decided to use the 'rsnorm' function to simulate a skewed normal distribution. I used the sample size, mean, and standard deviation from my actual data set as inputs in the 'rsnorm' function. I then tested out different values for the skewness parameter (xi) and used histograms to compare each simulated random data set to my own data until the distributions matched relatively closely.
library(fGarch)
## Loading required package: timeDate
## Loading required package: timeSeries
## Loading required package: fBasics
##
## Rmetrics Package fBasics
## Analysing Markets and calculating Basic Statistics
## Copyright (C) 2005-2014 Rmetrics Association Zurich
## Educational Software for Financial Engineering and Computational Science
## Rmetrics is free software and comes with ABSOLUTELY NO WARRANTY.
## https://www.rmetrics.org --- Mail to: info@rmetrics.org
par(mfrow = c(2,1))
# Random data with xi = -5 compared to my data
hist(rsnorm(120, mean = 2.644833, sd = 0.7477299, xi = -5))
hist(z$DivShann)
# Random data with xi = -3 compared to my data
hist(rsnorm(120, mean = 2.644833, sd = 0.7477299, xi = -3))
hist(z$DivShann)
# Random data with xi = -4 compared to my data (closest match)
hist(rsnorm(120, mean = 2.644833, sd = 0.7477299, xi = -4))
hist(z$DivShann)
# Create random data using xi = -4
y <- rsnorm(120, mean = 2.644833, sd = 0.7477299, xi = -4)
# Test the effect of warming treatment on the response using the random data
randomData <- data.frame(z$WarmingTreatment,y)
anova(randomData)
## Df Sum Sq Mean Sq F value Pr(>F)
## x 1 0.00 0.0024 0.003 0.954
## Residuals 118 82.64 0.7004
par(mfrow = c(1,1))
anovaGraph(randomData)
## $stats
## [,1] [,2]
## [1,] 0.8384443 0.5892447
## [2,] 2.0482677 2.1199834
## [3,] 2.8254233 2.8694451
## [4,] 3.2616891 3.1531174
## [5,] 3.6319215 3.6139021
##
## $n
## [1] 60 60
##
## $conf
## [,1] [,2]
## [1,] 2.577913 2.658709
## [2,] 3.072934 3.080181
##
## $out
## [1] -0.792271
##
## $group
## [1] 1
##
## $names
## [1] "control" "warmed"
4. Manipulate your random data set to mimic a hypothesis you created (i.e. I expect a positive relationship between the number of cats someone owns and that person’s IQ) and use the functions
# Hypothesis: Shannon diversity is higher in warmed plots relative to control plots.
print(z$WarmingTreatment) # view data
## [1] "control" "warmed" "control" "warmed" "control" "warmed" "control"
## [8] "warmed" "control" "warmed" "control" "warmed" "warmed" "control"
## [15] "warmed" "control" "control" "warmed" "control" "warmed" "control"
## [22] "warmed" "control" "warmed" "control" "warmed" "control" "warmed"
## [29] "warmed" "control" "warmed" "control" "warmed" "control" "warmed"
## [36] "control" "warmed" "control" "control" "warmed" "control" "warmed"
## [43] "control" "warmed" "control" "warmed" "control" "warmed" "control"
## [50] "warmed" "warmed" "control" "warmed" "control" "warmed" "control"
## [57] "warmed" "control" "warmed" "control" "control" "control" "warmed"
## [64] "control" "warmed" "control" "warmed" "warmed" "control" "warmed"
## [71] "control" "warmed" "control" "warmed" "control" "warmed" "control"
## [78] "warmed" "control" "warmed" "control" "warmed" "warmed" "control"
## [85] "warmed" "control" "warmed" "control" "warmed" "control" "warmed"
## [92] "control" "control" "warmed" "control" "warmed" "control" "warmed"
## [99] "control" "warmed" "control" "warmed" "control" "warmed" "warmed"
## [106] "control" "warmed" "control" "warmed" "control" "warmed" "control"
## [113] "warmed" "control" "control" "warmed" "control" "warmed" "control"
## [120] "warmed"
z2 <- z[order(z$WarmingTreatment),] # sort data by warming treatment
print(z2$WarmingTreatment) # view sorted data
## [1] "control" "control" "control" "control" "control" "control" "control"
## [8] "control" "control" "control" "control" "control" "control" "control"
## [15] "control" "control" "control" "control" "control" "control" "control"
## [22] "control" "control" "control" "control" "control" "control" "control"
## [29] "control" "control" "control" "control" "control" "control" "control"
## [36] "control" "control" "control" "control" "control" "control" "control"
## [43] "control" "control" "control" "control" "control" "control" "control"
## [50] "control" "control" "control" "control" "control" "control" "control"
## [57] "control" "control" "control" "control" "warmed" "warmed" "warmed"
## [64] "warmed" "warmed" "warmed" "warmed" "warmed" "warmed" "warmed"
## [71] "warmed" "warmed" "warmed" "warmed" "warmed" "warmed" "warmed"
## [78] "warmed" "warmed" "warmed" "warmed" "warmed" "warmed" "warmed"
## [85] "warmed" "warmed" "warmed" "warmed" "warmed" "warmed" "warmed"
## [92] "warmed" "warmed" "warmed" "warmed" "warmed" "warmed" "warmed"
## [99] "warmed" "warmed" "warmed" "warmed" "warmed" "warmed" "warmed"
## [106] "warmed" "warmed" "warmed" "warmed" "warmed" "warmed" "warmed"
## [113] "warmed" "warmed" "warmed" "warmed" "warmed" "warmed" "warmed"
## [120] "warmed"
# Simulate normally distributed random data for each warming treatment, and combine the two vectors of data
warmed <- rnorm(60, mean = 4, sd = 0.7)
control <- rnorm(60, mean = 2, sd = 0.7)
combined <- c(warmed,control)
# Test the effect of warming treatment on the simulated Shannon diversity data
randomDataBling <- data.frame(z2$WarmingTreatment,combined)
anova(randomDataBling)
## Df Sum Sq Mean Sq F value Pr(>F)
## x 1 122.94 122.9 206.1 <2e-16 ***
## Residuals 118 70.37 0.6
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
par(mfrow = c(1,1))
anovaGraph(randomDataBling)
## $stats
## [,1] [,2]
## [1,] 2.446143 0.3080192
## [2,] 3.376627 1.3304953
## [3,] 3.952301 1.9033361
## [4,] 4.425954 2.4290995
## [5,] 5.268086 3.4315183
##
## $n
## [1] 60 60
##
## $conf
## [,1] [,2]
## [1,] 3.738262 1.679246
## [2,] 4.166340 2.127426
##
## $out
## [1] 1.734383
##
## $group
## [1] 1
##
## $names
## [1] "control" "warmed"
Use logistic regression to see how Shannon diversity of a sample might predict whether it is from a warmed or control plot
Logistic regression functions
##################################################
# FUNCTION: logReg
# performs a logistic regression analysis
# inputs: a vector of continuous data (x) and a vector of categorical data (y)
# outputs: slope, standard error, z-value, and p-value for the logistic regression
#-------------------------------------------------
logReg <- function(x = rgamma(n=25,shape=5,scale=5), y = rbinom(n=25,size=1,p=0.5)) {
dataFrame <- data.frame(x,y)
myMod <- glm(y ~ x,
data=dataFrame,
family=binomial(link="logit"))
return(summary(myMod)$coefficients)
}
##################################################
# FUNCTION: logRegGraph
# performs a logistic regression analysis and creates a scatterplot depicting the results
# inputs: a vector of continuous data (x) and a vector of categorical data (y)
# outputs: scatterplot
#-------------------------------------------------
logRegGraph <- function(x = rgamma(n=25,shape=5,scale=5), y = rbinom(n=25,size=1,p=0.5)) {
dataFrame <- data.frame(x,y)
myMod <- glm(y ~ x,
data=dataFrame,
family=binomial(link="logit"))
myOut <- plot(x=dataFrame$x, y=dataFrame$y,pch=21,bg="green",cex=2)
return(myOut)
}
Using logistic regression functions on the three data sets
# Actual data
z <- read.table("Data.csv", header = TRUE, sep=",", stringsAsFactors=FALSE)
logReg(x=z$DivShann, y=z$WarmingTreatment)
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.8042558 0.6887884 1.167638 0.2429526
## x -0.3038786 0.2504912 -1.213131 0.2250797
logRegGraph(x=z$DivShann, y=z$WarmingTreatment)
## NULL
# Random data with same structure as original data
x <- rsnorm(120, mean = 2.644833, sd = 0.7477299, xi = -4)
randomData <- data.frame(z$WarmingTreatment,x)
logReg(x=randomData$x, y=randomData$z.WarmingTreatment)
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.2670118 0.6320632 2.004565 0.04500957
## x -0.5139291 0.2442263 -2.104315 0.03535096
logRegGraph(x=randomData$x, y=randomData$z.WarmingTreatment)
## NULL
# Random data related to hypothesis
warmed <- rnorm(60, mean = 4, sd = 0.7)
control <- rnorm(60, mean = 2, sd = 0.7)
combined <- c(warmed,control)
randomDataBling <- data.frame(z2$WarmingTreatment,combined)
logReg(x=randomDataBling$combined, y=randomDataBling$z2.WarmingTreatment)
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 10.425074 1.971131 5.288880 1.230677e-07
## x -3.388557 0.620611 -5.460034 4.760447e-08
logRegGraph(x=randomDataBling$combined, y=randomDataBling$z2.WarmingTreatment)
## NULL