R

# Gerry's useful code, updated July 4, 2019
# gcarter1640@gmail.com

library(tidyverse)
library(boot)

# resample function to replace the sample function so that it works with n=1
resample <- function(x, ...) x[sample.int(length(x), ...)]

# get mean function for bootstrapping (below)
mean.w <- function(x,w) sum(x*w)

# get 95% quantiles for a vector
ci95 <- function(x) {
  numNA <- sum(is.na(x))
  x <- as.vector(na.omit(x))
  mean <- mean(x)
  low <- as.numeric(quantile(x, 0.025))
  high <- as.numeric(quantile(x, 0.975))
  c(low=low,mean=mean,high=high, N=round(length(x)))
}

# permute within blocks 

# method 1 (using sapply)
data$new.y <- as.vector(sapply(by(data$y, data$block, resample),identity))

# method 2 (using tidyverse and resample function)
data %>% 
  group_by(group) %>% 
  mutate(value2 = value[resample(row_number())]) 

# method 3 (using tidyverse and sample function)
data %>% 
  group_by(class) %>%
  mutate(value = if(n() == 1) value else sample(value, n()))

# method 2 example
# create data
data <- 
  data.frame(bat= letters[1:12]) %>% 
  mutate(group = rep(c("A","B","C"), each=4)) %>%  
  mutate(value= 1:12)
data
# permute within groups
data %>% 
  group_by(group) %>% 
  mutate(new.value = value[sample(row_number())]) 

# recode using the match function
# find the corresponding group for each bat (using bat list) and add it to my sharing event data
obs$group <- individuals$group[match(obs$donor,individuals$bat)]

# subset and sort common nodes in two networks (matrices)
# i.e. get the common individuals from two different networks to compare them
common_matrices <- function(m1 = m1, m2 = m2){
  # create subset of m1 for nodes that are in m2, and vice versa
  subset.m1 <- subset(m1, rownames(m1) %in% rownames(m2), select=c(colnames(m1) %in% colnames (m2)))
  subset.m2 <- subset(m2, rownames(m2) %in% rownames(subset.m1), select=c(colnames(m2) %in% colnames (subset.m1)))
  # put them in same order
  subset.m1 <- subset.m1[order(rownames(subset.m1)),order(colnames(subset.m1))]
  subset.m2 <- subset.m2[order(rownames(subset.m2)),order(colnames(subset.m2))]
  list(subset.m1,subset.m2)  
}


# functions for bootstrapping 95% confidence intervals ------------------------

# bootstrap the mean of a vector
boot_ci <- function(x, perms=5000) {
  mean.w=function(x,w) sum(x*w)
  numNA <- sum(is.na(x))
  x <- as.vector(na.omit(x))
  mean <- mean(x)
  boot <- boot.ci(boot(data=x, statistic=mean.w, R=perms, stype="w", parallel = "multicore", ncpus = 4), type="basic")
  low <- boot$basic[1,4]
  high <- boot$basic[1,5]
  c(low=low,mean=mean,high=high, N=round(length(x)))
}

# bootstrap the mean within groups of a dataframe
boot_ci2 <- function(d=d, y=d$y, x=d$x, perms=5000){
  df <- data.frame(effect=unique(x))
  df$low <- NA
  df$mean <- NA
  df$high <- NA
  df$n.obs <- NA
  for (i in 1:nrow(df)) {
    ys <- y[which(x==df$effect[i])]
    if (length(ys)>1){
      b <- boot_ci(y[which(x==df$effect[i])], perms=perms) 
      df$low[i] <- b[1]
      df$mean[i] <- b[2]
      df$high[i] <- b[3]
      df$n.obs[i] <- b[4]
    }else{
      df$low[i] <- NA
      df$mean[i] <- ys
      df$high[i] <- NA
      df$n.obs[i] <- 1
    }
  }
  df
}

# example
df <- 
  data.frame(v1= rnorm(100),  v2= rnorm(100), bat= rep(c("A", "B"), 50)) %>% 
  mutate(v2= ifelse(bat=="A", v2+1, v2))
head(df)

# bootstrap mean of x
boot_ci(df$v1)
boot_ci2(df, df$v1, df$bat)
boot_ci2(df, df$v2, df$bat)

# plot results from above
boot_ci2(df, df$v1, df$bat) %>% 
  ggplot(aes(x=effect, y=mean, color=effect))+
  geom_point(size=3)+
  geom_errorbar(aes(ymin=low, ymax=high, width=.1), size=1)+
  geom_hline(yintercept = 0)+
  ylab("response")+
  xlab("group")

boot_ci2(df, df$v2, df$bat) %>% 
  ggplot(aes(x=effect, y=mean, color=effect))+
  geom_point(size=3)+
  geom_errorbar(aes(ymin=low, ymax=high, width=.1), size=1)+
  geom_hline(yintercept = 0)+
  ylab("response")+
  xlab("group")

# bootstrap to get mean and 95% CI of slope
boot_slope <- function(x, y, perms=5000) {
  numNAx <- sum(is.na(x)) 
  numNAy <- sum(is.na(y))
  df <- data.frame(y=y, x=x)
  s <- function(formula, data, indices) {
    d <- data[indices,] # allows boot to select sample 
    fit <- lm(formula, data=d)
    return(coef(fit)) 
  } 
  results <- boot(data=df, statistic=s,  R=perms, formula=y~x, parallel = "multicore", ncpus = 4)
  b <- boot.ci(results, type="basic", index=2)
  mean <- as.vector(s(y~x, df)[2])
  low <- b$basic[4]
  high <- b$basic[5]
  c(low=low,mean=mean,high=high, N=round(length(x)), x.missing= numNAx, y.missing= numNAy)
}


# bootstrap to get mean and 95% CI of slope within groups
boot_slope2 <- function(df=df, y=df$y, x=df$x, groups= df$z, perms=5000){
  df <- data.frame(effect=unique(groups))
  df$low <- NA
  df$mean <- NA
  df$high <- NA
  df$n.obs <- NA
  for (i in 1:nrow(df)) {
    b <- boot_slope(x[which(groups==df$effect[i])], y[which(groups==df$effect[i])])
    df$low[i] <- b[1]
    df$mean[i] <- b[2]
    df$high[i] <- b[3]
    df$n.obs[i] <- b[4]
    df$na.x[i] <- b[5]
    df$na.y[i] <- b[6]
    df$perms <- perms
  }
  df
}


# bootstrap to get mean and 95% CI of a correlation
boot_cor <- function(x, y, perms=5000) {
  numNAx <- sum(is.na(x)) 
  numNAy <- sum(is.na(y))
  x <- as.vector(na.omit(x))
  y <- as.vector(na.omit(y))
  mean <- cor(x,y)
  boot <- boot.ci(boot(data= c(x,y),statistic=cor, R=perms, parallel = "multicore", ncpus = 4), type="basic")
  low <- boot$basic[1,4]
  high <- boot$basic[1,5]
  c(low=low,mean=mean,high=high, N=round(length(x)), x.missing= numNAx, y.missing= numNAy, perms=perms)
}

# bootstrap 95%CI of mean correlation within groups
boot_cor2 <- function(df=df, y=df$y, x=df$x, groups= df$z, perms=5000){
  df <- data.frame(effect=unique(groups))
  df$low <- NA
  df$mean <- NA
  df$high <- NA
  df$n.obs <- NA
  for (i in 1:nrow(df)) {
    b <- boot_cor(x[which(groups==df$effect[i])], y[which(groups==df$effect[i])], perms=perms)
    df$low[i] <- b[1]
    df$mean[i] <- b[2]
    df$high[i] <- b[3]
    df$n.obs[i] <- b[4]
    df$na.x[i] <- b[5]
    df$na.y[i] <- b[6]
  }
  df
}

  

# function to convert a list of dyadic interactions to a network ------
a_b_edgelist_to_matrix <- function(el=el, symbol="_", directed= T, make.NA.zero=T){
  a <- str_split(as.data.frame(el)[,1],symbol, simplify = TRUE)[,1]
  r <- str_split(as.data.frame(el)[,1],symbol, simplify = TRUE)[,2]
  y <- as.data.frame(el)[,2]
  e <- data.frame(a,r,y, stringsAsFactors = F)
  require(igraph)
  if (make.NA.zero){
    g <- graph_from_data_frame(e, directed=T)
    m <- get.adjacency(g, attr='y', sparse=FALSE)
    m
  }else{
    e$y <- e$y+1 # temporarily add one to distinguish between 0 rate and no observation (NA)
    g <- graph_from_data_frame(e, directed=directed)
    m <- get.adjacency(g, attr='y', sparse=FALSE)
    m[m==0] <- NA # relabel missing values as NA
    m <- m-1 # subtract one to adjust values back
    m
    }
  }
  

# create network-----
library(igraph)
g <- graph.data.frame(rates)

# create sociomatrix-----
sociomatrix <- get.adjacency(g, attr='weight', sparse=FALSE)

# create network plot----
plot(g,layout <- layout.fruchterman.reingold,edge.width=E(g)$weight/2)

# plot permutation test results----
hist_perm <- function(exp=exp, obs=obs, perms=perms){
  ggplot()+
    geom_histogram(aes(x=exp), color="black",fill="light blue")+
    xlim(min= min(c(exp,obs)), max= max(c(exp,obs)))+
    geom_vline(aes(xintercept=obs), color="red", size=1)+
    xlab("expected values from null model")+
    ggtitle(paste("expected greater than observed: p", ifelse(mean(exp>=obs)==0,paste("<",1/perms), paste("=",signif(mean(exp>=obs),digits=2))),", permutations=",perms, sep=""))
}

# permuted paired t-test function------
ppt.test <- function(x){
  perms=5000
  n <- length(x)
  obs <- t.test(x)$statistic
  exp <- rep(NA, perms)
  for (i in 1:perms) {
    exp[i] <- t.test(x * sample(c(-1, 1), n, replace = TRUE))$statistic
  }
  p <- mean(abs(obs)<abs(exp))
  out=ifelse(p==0,"p<0.0002",p)
  out
}

set.seed(123) # generate same random numbers (change this number to get different random samples)
a <- sample(1:100, size=50) # sample random numbers 1 to 100
b <- sample(20:120, size=50) # sample random numbers 20 to 120
x <- a-b
t.test(x) # parametric test with t-value (can have high type 1 or type 2 error)
ppt.test(x) # permutation test with t-value 
wilcox.test(x) # rank-based test (lacks power, type )



# save output with timestamp-----
timestamp <- substr(gsub(x=gsub(":","_",Sys.time()), pattern=" ", replace="_"), start=1, stop=16)
save.image(file= paste("results_", timestamp, ".Rdata", sep=""))
write.csv(out, file=paste("results_", timestamp, ".csv", sep=""))


# power analysis through resampling
# create fake data
set.seed(1)
d <- 
  data.frame(x= sample(c(1:100), size=120, replace=T)) %>% 
  mutate(y= rnorm(120, mean=10, sd=5)) %>% 
  mutate(group = rep(c("treatment","control"), each=60)) %>% 
  mutate(y= ifelse(group=="treatment", y+2, y)) 

d %>% 
  ggplot(aes(x=x, y=y,color=group))+
  geom_point(size=2)

obs <- summary(lm(y~x+group, data=d))$coefficient[3,1]
obs    

# choose sample sizes
nreps <- 100
jumps <-  round(nrow(d)/20)
size <- seq(jumps,nrow(d),jumps)
effect_size <- matrix(NA, nrow=length(size),ncol=nreps)

# bootstrap
for (j in 1:length(size)){
  for (i in 1:nreps){
    sample.d <- 
      sample_n(d, size=size[j], replace= T)
    effect_size[j,i] <- tryCatch(summary(lm(y~x+group, data=sample.d))$coefficient[3,1], error=function(err) NA)
    
  }
}

# get CIs
mean_effects <-
  apply(effect_size, 1, ci95) %>%
  t() %>%
  as.data.frame() %>%
  mutate(size = size)

# plot
mean_effects %>%
  ggplot(aes(x = size, y = mean)) +
  geom_ribbon(
    aes(x = size, ymin = low, ymax = high),
    fill = "light grey",
    color = "black",
    alpha = 0.7
  ) +
  geom_point(size = 2) +
  geom_line() +
  geom_hline(yintercept = 0)+
  ylab("effect of treatment") +
  xlab("sampling effort") +
  ggtitle(
    "effect of sampling effort on observed effect",
    subtitle = " sample size is number of observations")

# MY CODE SNIPPETS (FOR RSTUDIO)
https://support.rstudio.com/hc/en-us/articles/204463668-Code-Snippets

# generating expected values for permutation test
snippet exp
	exp <- rep(NA, perms)

# for loop for permutation test
snippet for
	for (i in 1:perms){
	${0}
	}

# ggplot
snippet ggp
	ggplot(aes(x=${1}, y=${2}))+
		geom_point(${3})
# ggplot histogram
snippet histo
	ggplot(aes(x=${1})+
		geom_histogram(${2})
	
# store next result in data frame (row n, columns 1 and 2)
snippet oo
	n+1; out[n+1,1] <- "${0}"
	out[n+1,2] <-