The datasets needed to reproduce this analysis are deposited on Bertinetti, César et al. 2023. Data from: Repeated Divergence in Opsin Gene Expression Mirrors Photic Habitat Changes in Rapidly Evolving Crater Lake Cichlid Fishes [Dataset]. Dryad. https://doi.org/10.5061/dryad.j3tx95xgk

This code is associated with Repeated Divergence in Opsin Gene Expression Mirrors Photic Habitat Changes in Rapidly Evolving Crater Lake Cichlid Fishes, The American Naturalist https://doi.org/10.1086/729420

Following R version and packages were used to run this code:

R.version$platform
## [1] "aarch64-apple-darwin20"
R.version$version.string
## [1] "R version 4.2.3 (2023-03-15)"
installed.packages()[names(sessionInfo()$otherPkgs), "Version"]
##    sensemakr        vegan      lattice      permute       partR2    ggfortify 
##      "0.1.4"      "2.6-4"     "0.21-8"      "0.9-7"      "0.9.1"     "0.4.16" 
##    agricolae       pander         lme4       Matrix          rsq       pracma 
##      "1.3-5"      "0.6.5"     "1.1-34"    "1.5-4.1"        "2.5"      "2.4.2" 
##    gridExtra      ggplot2          car      carData          zoo         MESS 
##        "2.3"      "3.4.2"      "3.1-2"      "3.0-5"     "1.8-12"      "0.5.9" 
## RColorBrewer      plotrix        tidyr        dplyr  matrixStats      stringr 
##      "1.1-3"      "3.8-2"      "1.3.0"      "1.1.2"      "1.0.0"      "1.5.0" 
##        readr 
##      "2.1.4"

Irradiance raw measurements

The “RawIrradiance.zip” contains the raw irradiance measurements for each depth and sensor orientation placed within the folder of the location (lake) where they were collected. First, all the names of the files within one folder (lake) can be loaded using:

D_Lake<-list.files("~/Documents/StayTuned/Scripts/LAKE")
Sp_Lake <-data.frame()


Then, all the irradiance measurements for one location are merged adjusting by integration time for downstream analysis. Please ensure the files are located within your working directory.

setwd("~/Documents/StayTuned/Scripts/LAKE/")
for (i in 1:length(D_Lake)){    # going through the list of file names
  D_Lake_name<-D_Lake[i] # choose the name in the list of file names
  print(i)  # I want to know where it is 
  
  Depth <- paste(substr(D_Lake_name, 1,1), str_sub(D_Lake_name, -40, -38) , sep="") # Extracts Depth
 
  Down_Lake_data<-read.table(D_Lake_name, header=F,sep="\t",dec=".",
                             skip = 337, nrows = 1032, stringsAsFactors = F) 
  lines<- readLines(D_Lake_name, n=17)
  int.time <- as.numeric((substr(lines[7],  25,35)))
  Down_Lake_data[,2] <- Down_Lake_data[,2] / int.time # Adjusting for integration time of measurement
  Down_Lake_data[,3] <- Depth
  #filling the df
  Sp_Lake <- rbind(Sp_Lake,Down_Lake_data)
  Sp_Lake [i,3] <- Depth
}
colnames(Sp_Lake)<-c("Wavelength","Photons", "Depth")


Account for different spelling in the files names. Depth consists of a letter for light orientation (either d=downwelling, s=sidewelling or u=upwelling) and a number for meters below water surface (su)

Sp_Lake$Depth <- gsub("DG" , "d", Sp_Lake$Depth)
Sp_Lake$Depth <- gsub("D" , "d", Sp_Lake$Depth)
Sp_Lake$Depth <- gsub("dg" , "d", Sp_Lake$Depth)
Sp_Lake$Depth <- gsub("UG" , "u", Sp_Lake$Depth)
Sp_Lake$Depth <- gsub("U" , "u", Sp_Lake$Depth)
Sp_Lake$Depth <- gsub("ug" , "u", Sp_Lake$Depth)
Sp_Lake$Depth <- gsub("SG" , "s", Sp_Lake$Depth)
Sp_Lake$Depth <- gsub("S" , "s", Sp_Lake$Depth)
Sp_Lake$Depth <- gsub("sg" , "s", Sp_Lake$Depth)
Sp_Lake$Depth <- gsub("fac" , "_su", Sp_Lake$Depth)


Finally, the irradiance spectra need to be converted from mW/cm²/nm to photons/s/cm²/nm based on Johnsen, S. (2012). The Optics of Life: A Biologist’s Guide to Light in Nature. Princeton: Princeton University Press. https://doi.org/10.1515/9781400840663, Chapter 2, page 9-12

  Sp_Lake$Photons <- Sp_Lake$Wavelength * (Sp_Lake$Photons/10^6) * 5.05*10^15


Then create the dataframe with the median irradiance values for each wavelength (rows) at each depth & orientation (columns):

Sp_Lake <- subset(Sp_Lake, Wavelength >= 340 & Wavelength <= 700) # Here the visible part of the spectrum is selected
m_sp_lake <- aggregate(Photons ~ Wavelength + Depth, data = Sp_Lake, FUN=median) # median from measurements of same depth & orientation as reference to reduce noise
m_sp_lake <- do.call(data.frame, m_sp_lake) 
m_sp_lake$Depth<- factor(m_sp_lake$Depth,levels=c("d_su","d_0", "d_1","d_2","d_3","s_0", "s_1","s_2","s_3"))
# Depending on lake more depths and orientation area avaible. These can be inspected with beforehand with levels()
lake <- spread(m_sp_lake,  key = "Depth", value = "Photons") # Now each column represents the mean photons at each depth & orienation
head(lake)


Further rolling mean is applied for smoothing the spectra. The functions for estimating λP50 were used in Rennison et al. (2016). Rapid adaptive evolution of colour vision in the threespine stickleback radiation. Proc. Royal Soc. B, 283 (1830), s. 20160242. doi:10.1098/rspb.2016.0242 and were kindly provided by the authors.

window.w <- 15  # the size to the left/right of vocal point over which mean is taken
for (i in 2:ncol(lake)) {
  irr.t.roll.mean <- data.frame(cbind(rollmean(lake$Wavelength, window.w), rollmean(lake[,i], window.w)))
  colnames(irr.t.roll.mean) <- c("wl", "nI")
  fitted.spline <- splinefun(irr.t.roll.mean$wl, irr.t.roll.mean$nI)
  new.intensity <- fitted.spline(lake$Wavelength)
  lake[i] <- new.intensity
}


Now the photic parameters can be extracted from the median reference spectra:

lake[lake[,] < 0] <- 0 # Some negaitve values arise from spectrometer inaccuracy
P50_lake <-data.frame()
for (i in 2:ncol(lake)){
  x <- lake$Wavelength # Due to the rolling mean, the range is made a bit smaller here to reduce noise in the extremes of the visible spectrum
  y <- lake[,i]
  dx <- diff(x)
  mx <- (x[-1] + x[-length(x)])/2
  my <- (y[-1] + y[-length(y)])/2
  lambda.50 <- uniroot(approxfun(mx, cumsum(my * dx) - sum(my * dx)/2), range(mx))$root
  lambda.25 <- uniroot(approxfun(mx, cumsum(my * dx) - sum(my * dx)*0.25), range(mx))$root
  lambda.75 <- uniroot(approxfun(mx, cumsum(my * dx) - sum(my * dx)*0.75), range(mx))$root
  spwidth <- lambda.75 - lambda.25
  depth <- colnames(lake[i])
  if (startsWith(colnames(lake[i]), "d") == T){  # Downwelling compared to d_0 and Sidewelling compared to s_0
  lux<- (trapz(x,y) *100) / (trapz(x,lake$d_0))  # Total amount of photons relative to 15cm below surface "luminosity"
  auc <- trapz(x,y)
  }else{
  lux<- (trapz(x,y) *100) / (trapz(x,lake$s_0))
  auc <- trapz(x,y)
}
  P50_lake[i-1,c(1:7)] <- cbind(lambda.50,lambda.25,lambda.75,spwidth,depth,lux,auc) # generate dataframe
}
colnames(P50_lake) <- c("P50","P25","P75","Band", "d", "lux", "auc") #Naming the variables
P50_lake$loc <- "lake" # Use the location's name to identify measurements
head(P50_lake)


Spectral downwelling attenuation coefficients are calculated from the absolute values at a given depth compared to the surface following Kirk, J. (1994). Light and Photosynthesis in Aquatic Ecosystems (2nd ed.). Cambridge: Cambridge University Press. doi:10.1017/CBO9780511623370, Chapter 1, pg 11-15.

lake$Kd <- 1 * log (lake$d_1 / lake$d_su)
lake$Kd <- lake$Kd / min(lake$Kd) # Normalized spectral attenaution coefficients


The absolute irradiance measurements are divided by the maximal value to obtain normalized irradiance:

for (i in 2:(ncol(lake)-1)){             #last column is Kd from previous code chunk
  lake[,i] <- lake[,i] / max(lake[,i])
}


Finally a correlation-based Principals Component Analysis (PCA) is used to generate a composite axis that accounts for most of the variation among photic environments:

to50 <- read.table("PhoticParameters.csv", stringsAsFactors=F, header = T, sep=",", dec = ".") [-1]
d1 <- subset(to50, d == 'd_1') # Extracts photic parameters calculated using downwelling irradiance at one meter depth
s1 <- subset(to50, d == 's_1')# Extracts photic parameters calculated using sidewelling irradiance at one meter depth
to50 <-cbind(d1,s1)
rownames(to50) <- to50$loc  # Add lakes names as rownames for matrix later one
to50 <- to50[,-c(4:5,7,11:14)]
# USING P25 and P75 instead of Interval "Width" makes more sense and gives better results!
colnames(to50) <-c("P50_d","P25_d","P75_d","lux_1","P50_s","P25_s","P75_s") #[,-c(6:11)] 

t50 <- scale(to50, scale = T) # Scaled variables (mean=0, variance=1)
Idata.dist.t <- vegdist(as.matrix(t50), method = "euclidean")
Idata.pca.t <- cmdscale(Idata.dist.t,eig=T )
Idata.pca.t # GOF = 0.98
## $points
##                 [,1]          [,2]
## Apoyo     -4.1099176  0.0839598116
## Xiloa     -1.7677757 -0.4963150676
## Apoyeque  -1.8813743  0.0003730403
## AsManagua -1.6728802  0.1219201034
## AsLeon    -0.8228473  0.2966310858
## Masaya    -0.1394389 -0.7100516844
## Tiscapa    2.6019994  1.1103833604
## Isletas    1.7871344  0.0016969679
## LkManagua  4.3917467 -0.8694270253
## SanJuan    1.6133535  0.4608294079
## 
## $eig
##  [1] 5.890567e+01 3.061627e+00 5.796832e-01 3.636577e-01 6.562180e-02
##  [6] 1.929325e-02 4.447284e-03 1.251224e-15 9.750631e-16 6.566937e-16
## 
## $x
## NULL
## 
## $ac
## [1] 0
## 
## $GOF
## [1] 0.9836079 0.9836079
Idata.pca.t$eig[1]/sum(Idata.pca.t$eig)  # 0.93% of aviation explained by PCA1
## [1] 0.9350106
to50<- merge(to50,Idata.pca.t$points[,1], by.x="row.names", by.y="row.names") # adds position of lakes on PCA to the table for downstream analysis 

Opsin Gene Expression and Spectral Sensitivity Curves

First, create the absorbance spectra templates based on Govardovskii et al. (2000). In search of the visual pigment template. Vis Neurosci, 17 (4), s. 509–528. doi:10.1017/s0952523800174036. These are then adjusted by the peak in absorbance of the respective opsin protein according to Torres-Dowdall et al. (2017). Rapid and Parallel Adaptive Evolution of the Visual System of Neotropical Midas Cichlid Fishes. Mol Biol Evol, 34 (10), s. 2469–2485. doi:10.1093/molbev/msx143

wl <- lake$Wavelength[22:972]# selecting 350 to 750 nm

#Define lambda max for each pigments A1 #A1 <- c(360,431,450,476,509,559)
#??max <- 559  #LWS - R
#??max <- 509  #RH2a-beta - Gb
#??max <- 476  #RH2b - BG
#??max <- 450  #Sws2a - Bl
#??max <- 431  #SWs2b - V
#??max <- 360  #SWS1 - UV

#Define lambda max for each pigments A2 #A2 <- c(360,440,466,500,555,610)
#??max <- 610  #LWS - R
#??max <- 555  #RH2a-beta - Gb
#??max <- 500  #RH2b - BG
#??max <- 466  #Sws2a - Bl
#??max <- 440  #SWs2b - V
#??max <- 360  #SWS1 - UV

### #ratio A1 and A2 opsins
ratio.A2_A1 <- c(1,0) # adjust the chromophore used for analysis, here assume A2
if(sum(ratio.A2_A1) != 1) print("WARNING: A1-A2 ratio is not 1!!!")
# functie to calculate lambda max
# "SWS1" "SWS2b" "S "RH2"  "LWS"
opsin.max.range <- matrix(NA, nrow = 6, ncol = 2)
rownames(opsin.max.range) <- c("SWS1","SWS2B","SWS2A","RH2b","RH2a","LWS")
colnames(opsin.max.range) <- c("A1", "A2")
opsin.max.range[1,] <- c(360, 360)
opsin.max.range[2,] <- c(431, 440)
opsin.max.range[3,] <- c(450, 466)
opsin.max.range[4,] <- c(476, 500)
opsin.max.range[5,] <- c(509, 555)
opsin.max.range[6,] <- c(559, 610)

f.calc.lamba.max <- function(A1.proportion){
  l.max.temp <- rep(NA,6)
  for(l.m in 1:6) l.max.temp[l.m] <- opsin.max.range[l.m, 1] + ((opsin.max.range[l.m, 2] - opsin.max.range[l.m, 1]) * A1.proportion)
  return(l.max.temp)
}

peaks <- f.calc.lamba.max(ratio.A2_A1[1])

#LWS #Govardovskii's template
lambdamax <- peaks[6]
R_lambdamax <- lambdamax/wl
A <- 69.7; B <- 28; b <- 0.922; C <- (-14.9); c <- 1.104;  D <- 0.674; a_lambdamax <- 0.8795 + 0.0459 * exp(-(lambdamax-300)^2/11940)
S_alpha <- 1/(exp(A*(a_lambdamax-R_lambdamax))+exp(B*(b-R_lambdamax))+exp(C*(c-R_lambdamax))+D)
lambdamlambda <- 189 + 0.315*lambdamax; be <- -40.5 + 0.195*lambdamax
S_beta <- 0.26*exp(-((wl-lambdamlambda)/be)^2)

R <- S_alpha+S_beta # For LWS 

#rh2ab #Govardovskii's template
lambdamax <- peaks[5]
R_lambdamax <- lambdamax/wl
A <- 69.7; B <- 28; b <- 0.922; C <- (-14.9); c <- 1.104;  D <- 0.674; a_lambdamax <- 0.8795 + 0.0459 * exp(-(lambdamax-300)^2/11940)
S_alpha <- 1/(exp(A*(a_lambdamax-R_lambdamax))+exp(B*(b-R_lambdamax))+exp(C*(c-R_lambdamax))+D)
lambdamlambda <- 189 + 0.315*lambdamax; be <- -40.5 + 0.195*lambdamax
S_beta <- 0.26*exp(-((wl-lambdamlambda)/be)^2)

Gb <- S_alpha+S_beta

#rh2b #Govardovskii's template
lambdamax <- peaks[4]
R_lambdamax <- lambdamax/wl
A <- 69.7; B <- 28; b <- 0.922; C <- (-14.9); c <- 1.104;  D <- 0.674; a_lambdamax <- 0.8795 + 0.0459 * exp(-(lambdamax-300)^2/11940)
S_alpha <- 1/(exp(A*(a_lambdamax-R_lambdamax))+exp(B*(b-R_lambdamax))+exp(C*(c-R_lambdamax))+D)
lambdamlambda <- 189 + 0.315*lambdamax; be <- -40.5 + 0.195*lambdamax
S_beta <- 0.26*exp(-((wl-lambdamlambda)/be)^2)

BG <- S_alpha+S_beta


#sws2a #Govardovskii's template
lambdamax <- peaks[3]
R_lambdamax <- lambdamax/wl
A <- 69.7; B <- 28; b <- 0.922; C <- (-14.9); c <- 1.104;  D <- 0.674; a_lambdamax <- 0.8795 + 0.0459 * exp(-(lambdamax-300)^2/11940)
S_alpha <- 1/(exp(A*(a_lambdamax-R_lambdamax))+exp(B*(b-R_lambdamax))+exp(C*(c-R_lambdamax))+D)
lambdamlambda <- 189 + 0.315*lambdamax; be <- -40.5 + 0.195*lambdamax
S_beta <- 0.26*exp(-((wl-lambdamlambda)/be)^2)

Bl <- S_alpha+S_beta

#sws2b #Govardovskii's template
lambdamax <- peaks[2]
R_lambdamax <- lambdamax/wl
A <- 69.7; B <- 28; b <- 0.922; C <- (-14.9); c <- 1.104;  D <- 0.674; a_lambdamax <- 0.8795 + 0.0459 * exp(-(lambdamax-300)^2/11940)
S_alpha <- 1/(exp(A*(a_lambdamax-R_lambdamax))+exp(B*(b-R_lambdamax))+exp(C*(c-R_lambdamax))+D)
lambdamlambda <- 189 + 0.315*lambdamax; be <- -40.5 + 0.195*lambdamax
S_beta <- 0.26*exp(-((wl-lambdamlambda)/be)^2)
V <- S_alpha+S_beta

#sws1 #Govardovskii's template
lambdamax <- peaks[1]
R_lambdamax <- lambdamax/wl
A <- 69.7; B <- 28; b <- 0.922; C <- (-14.9); c <- 1.104;  D <- 0.674; a_lambdamax <- 0.8795 + 0.0459 * exp(-(lambdamax-300)^2/11940)
S_alpha <- 1/(exp(A*(a_lambdamax-R_lambdamax))+exp(B*(b-R_lambdamax))+exp(C*(c-R_lambdamax))+D)
lambdamlambda <- 189 + 0.315*lambdamax; be <- -40.5 + 0.195*lambdamax
S_beta <- 0.26*exp(-((wl-lambdamlambda)/be)^2)
UV <- S_alpha+S_beta


Then load the opsin proportional opsin gene expression:

opsin <- read.table("ProportionalExpression-wild.csv", header=T, sep =",", dec=".",stringsAsFactors = F)
prop_opsin <- gather(opsin, key="p_opsin", value="value", dplyr::starts_with('P_'))[,-c(11:12)]

prop_opsin$Location <- as.factor(prop_opsin$Location)
prop_opsin$Location <- factor(prop_opsin$Location,levels=c("SanJuan","Isletas","Apoyo", "LkManagua","Tiscapa","Masaya","AsLeon","Apoyeque", "Xiloa", "AsManagua")) 
prop_opsin$p_opsin <- factor(prop_opsin$p_opsin,levels=c("P_sws1", "P_sws2b", "P_sws2a", "P_rh2b", "P_rh2a", "P_lws"))


Load individual sensitivities into empty data frame for each wavelength within visiblie spectrum (row) and individiual spectral sensitivity (column) for each location

lake_sens <-data.frame()
# 825 rows (350-700nm)
for (i in 1:8){ # The indexes allow to select the individuals from each location
  lake_sens[1:length(wl),i]  <-  R * opsin$P_lws[i] + Gb* opsin$P_rh2a[i] + BG* opsin$P_rh2b[i] + Bl * opsin$P_sws2a[i] + V* opsin$P_sws2b[i]+ UV * opsin$P_sws1[i]
  names(lake_sens)[i] <- paste("lake_",opsin$Probe[i], sep="")
}


Obtain median sensitivity curve for each population:

momdis = function(x) {
  c(m = median(x), s = sd(x), iqr = IQR(x), n = length(x), std.error(x))
}
population <- aggregate(value ~ p_opsin + Location, data = prop_opsin, momdis)
population <- do.call(data.frame, population)

Lake_m <- subset(population, Location =="Apoyo") # Here Crater Lake Apoyo as an example

# Mean Curve
eR <- R * Lake_m[6,3]   # LWS
eGb <- Gb* Lake_m[5,3]  # RH2a
eBG <- BG* Lake_m[4,3]  # RH2B
eBl <- Bl * Lake_m[3,3] # SWS2A
eV <- V* Lake_m[2,3]    # SWS2B
eUV<-UV * Lake_m[1,3]   # SWS1

I_lake <- eUV + eR+eGb+eBG+eBl+eV


Then the change in sensitivity of ancestral (median) - derived populations (individual) is calculated:

sens_ind <- read.csv("SensitivityCurves-Individuals-A2.csv") [0:827,-1] # Either A1, A2 or 50% ratio, here A2 (350-700nm)
sens <- read.csv("Sens_Pop-A2.csv") [0:827,-1]                          # Either A1, A2 or 50% ratio, here A2 (350-700nm)

for (i in 2:9){
  sens_ind[i] <- (sens$I_lknic_med - sens_ind[i]) # Here example using Great Lake Nicaragua - Crater Lake Apoyo
}

Correlated Changes in Photic Conditions and Spectral Sensitivity Curves


Analogous as above, the change in spectra is calculated for ancestral - derived populations pairs (here normalized d_1 = downwelling irradiance at one meter depth):

Lake_SpectralShift <- lake_source$d_1 - lake_derived$d_1 # This has to be replaced accordingly!
SpectralChange <- lake$d_1 - lake$s_1 # This is just an example comparing spectral change from down- vs sidewelling normalized irradiance


Then the changes in photic conditions and sensitivity among ancestral-derived populations are correlated and p-values adjusted:

sens_ind <- read.csv("ChangeSensitivityA2.csv") [0:827,-1] # This file contains change in sensitivity in pairs of ancestral-derived popualtions (350-700nm)
b <-   read.csv("ChangeDownIrradiance-ChangeSensitivityA2.csv") [0:827,-1] # This file contains change in irradiance in pairs of ancestral-derived popualtions (350-700nm)
Coef_Ind <-data.frame()

for (i in 2:9){
  Test <- cor.test(b$Apoyo_SS, sens_ind[,i])
  p <- Test$p.value
  coef <- as.numeric(Test$estimate)
  Sample <- colnames(sens_ind)[i]
  IndCoef<- as.data.frame(cbind(Sample, coef, p))
  Coef_Ind <- rbind(Coef_Ind, IndCoef)
}
for (i in 1:nrow(Coef_Ind)){
  Coef_Ind$p_adj[i] <- p.adjust(Coef_Ind$p[i], "fdr", n=nrow(Coef_Ind)) # Adjust p-values with Hochberg-Benjamini false discovery rate method
}
head(Coef_Ind)

Random Mixed-Effects Models

These models used PSI (Predicted Sensitivity Index) as response variable and Photic (x-positions along PCA1 from photic conditions) and Rearing rearing conditions (lab-reared vs wild-caught) as predictor variables with Location as random intercept

m3<- read.csv("Linear Mixed-Effect Model Dataset.csv", stringsAsFactors = T) [,-1]
m3$Rearing <- factor(m3$Rearing, levels=c("wild", "lab"))
# 
m3.lmer <- lmer(PSI_A2 ~ Photic*Rearing + (1|Location), data=m3)     # Model includes interaction
m3.lmer.add <- lmer(PSI_A2 ~ Photic+Rearing + (1|Location), data=m3) # Only additive model
#Anova(m3.lmer, type = 2,test.statistic = "F")
#Anova(m3.lmer, type = 2,test.statistic = "F")
pander(Anova(m3.lmer, type = 3,test.statistic = "F"))
Analysis of Deviance Table (Type III Wald F tests with Kenward-Roger df)
  F Df Df.res Pr(>F)
(Intercept) 229151 1 11.94 5.999e-27
Photic 66.87 1 12.16 2.748e-06
Rearing 65.27 1 132.4 3.571e-13
Photic:Rearing 3.569 1 131.1 0.06107
rsq(m3.lmer,adj=T,type=c('v'))
## $model
## [1] 0.7006618
## 
## $fixed
## [1] 0.6526284
## 
## $random
## [1] 0.04803334
pander(Anova(m3.lmer.add, type = 2,test.statistic = "F"))
Analysis of Deviance Table (Type II Wald F tests with Kenward-Roger df)
  F Df Df.res Pr(>F)
Photic 66.83 1 7.99 3.764e-05
Rearing 63.94 1 133.4 5.413e-13
rsq(m3.lmer.add,adj=T,type=c('v'))
## $model
## [1] 0.6952007
## 
## $fixed
## [1] 0.6476031
## 
## $random
## [1] 0.04759758


Now we can calculate the amount of variation explained by each predictor based on Stoffel. partR2: partitioning R2 in generalized linear mixed models 9:e11414. https://doi.org/10.7717/peerj.11414.

part1 <- partR2(m3.lmer, partvars = c("Photic:Rearing"), nboot=100)
part2 <- partR2(m3.lmer.add, partvars = c("Photic", "Rearing"), nboot=100) 
# Final analysis used nboot=1000 iterations
R3 <- mergeR2(part1, part2)
R3$R2


It is also wise to perform sensitivity analysis to detect the influence of confounders on treatment and outcome. Making Sense of Sensitivity: Extending Omitted Variable Bias, Journal of the Royal Statistical Society Series B: Statistical Methodology, Volume 82, Issue 1, February 2020. https://doi.org/10.1111/rssb.12348.

lm3 <- lm(PSI_A2 ~ Photic*Rearing, data=m3)
photic.sensitivity <- sensemakr(model = lm3, 
                                treatment = "Photic",
                                benchmark_covariates='Rearinglab',
                                kd =1:2,
                                ky =1:2, 
                                q = 1,
                                alpha = 0.05, 
                                reduce = TRUE)

Outcome: PSI_A2
Treatment Est. S.E. t-value \(R^2_{Y \sim D |{\bf X}}\) \(RV_{q = 1}\) \(RV_{q = 1, \alpha = 0.05}\)
Photic 4.031 0.35 11.529 49.4% 61.4% 54.9%
Note: df = 136; Bound ( 1x Rearinglab ): \(R^2_{Y\sim Z| {\bf X}, D}\) = 46.8%, \(R^2_{D\sim Z| {\bf X} }\) = 0%


summary(photic.sensitivity)
## Sensitivity Analysis to Unobserved Confounding
## 
## Model Formula: PSI_A2 ~ Photic * Rearing
## 
## Null hypothesis: q = 1 and reduce = TRUE 
## -- This means we are considering biases that reduce the absolute value of the current estimate.
## -- The null hypothesis deemed problematic is H0:tau = 0 
## 
## Unadjusted Estimates of 'Photic': 
##   Coef. estimate: 4.0311 
##   Standard Error: 0.3496 
##   t-value (H0:tau = 0): 11.5291 
## 
## Sensitivity Statistics:
##   Partial R2 of treatment with outcome: 0.4943 
##   Robustness Value, q = 1: 0.6141 
##   Robustness Value, q = 1, alpha = 0.05: 0.5494 
## 
## Verbal interpretation of sensitivity statistics:
## 
## -- Partial R2 of the treatment with the outcome: an extreme confounder (orthogonal to the covariates) that explains 100% of the residual variance of the outcome, would need to explain at least 49.43% of the residual variance of the treatment to fully account for the observed estimated effect.
## 
## -- Robustness Value, q = 1: unobserved confounders (orthogonal to the covariates) that explain more than 61.41% of the residual variance of both the treatment and the outcome are strong enough to bring the point estimate to 0 (a bias of 100% of the original estimate). Conversely, unobserved confounders that do not explain more than 61.41% of the residual variance of both the treatment and the outcome are not strong enough to bring the point estimate to 0.
## 
## -- Robustness Value, q = 1, alpha = 0.05: unobserved confounders (orthogonal to the covariates) that explain more than 54.94% of the residual variance of both the treatment and the outcome are strong enough to bring the estimate to a range where it is no longer 'statistically different' from 0 (a bias of 100% of the original estimate), at the significance level of alpha = 0.05. Conversely, unobserved confounders that do not explain more than 54.94% of the residual variance of both the treatment and the outcome are not strong enough to bring the estimate to a range where it is no longer 'statistically different' from 0, at the significance level of alpha = 0.05.
## 
## Bounds on omitted variable bias:
## 
## --The table below shows the maximum strength of unobserved confounders with association with the treatment and the outcome bounded by a multiple of the observed explanatory power of the chosen benchmark covariate(s).
## 
##    Bound Label R2dz.x R2yz.dx Treatment Adjusted Estimate Adjusted Se
##  1x Rearinglab  5e-04  0.4682    Photic            3.9713      0.2560
##  2x Rearinglab  9e-04  0.9363    Photic            3.9115      0.0886
##  Adjusted T Adjusted Lower CI Adjusted Upper CI
##     15.5137            3.4651            4.4775
##     44.1573            3.7363            4.0866