The datasets needed to reproduce this analysis are deposited on Bertinetti et al. 2024. Data from: Visual pigment chromophore usage in Nicaraguan Midas cichlids: Phenotypic plasticity and genetic assimilation of cyp27c1 expression [Dataset] https://doi.org/10.5281/zenodo.10850332

Following R version and packages were used to run this code:

R.version$platform
## [1] "x86_64-apple-darwin20"
R.version$version.string
## [1] "R version 4.4.0 (2024-04-24)"
installed.packages()[names(sessionInfo()$otherPkgs), "Version"]
##       ggtext         ragg   colorspace       ggdist       ggpubr        vegan 
##      "0.1.2"      "1.3.0"      "2.1-0"      "3.3.2"      "0.6.0"      "2.6-4" 
##      lattice      permute    ggfortify       pander     devtools      usethis 
##     "0.22-6"      "0.9-7"     "0.4.17"      "0.6.5"      "2.4.5"      "2.2.3" 
##   effectsize       partR2          rsq         lme4       Matrix  performance 
##      "0.8.7"      "0.9.2"        "2.6"   "1.1-35.3"      "1.7-0"     "0.11.0" 
##      sjstats       visreg       pracma    gridExtra      ggplot2         nlme 
##     "0.18.2"      "2.7.0"      "2.4.4"        "2.3"      "3.5.1"    "3.1-164" 
##    agricolae     corrplot        Hmisc          car      carData          zoo 
##      "1.3-7"       "0.92"      "5.1-2"      "3.1-2"      "3.0-5"     "1.8-12" 
##         MESS RColorBrewer      plotrix        tidyr        dplyr  matrixStats 
##     "0.5.12"      "1.1-3"      "3.8-4"      "1.3.1"      "1.1.4"      "1.3.0" 
##      stringr        readr 
##      "1.5.1"      "2.1.5"
## HERE USE setwd() to set your working directory where the data files will be located

Cyp27c1 Gene Expression

#### USEFUL FUNCTION
momdis = function(x) {
  c(m = mean(x), med = median(x), s = sd(x), iqr = IQR(x), n = length(x), se = std.error(x))
}

cyp27_adults <- read.table("Gene Expression - Wild and Lab Individuals.csv", header=T, sep =",", dec=".",stringsAsFactors = F) ### READ DATA FOR ADULTS
 
cyp27_adults$Location <- factor(cyp27_adults$Location,levels=c("SanJuan","Isletas","Apoyo", "LkManagua","Tiscapa","Masaya","AsLeon","Apoyeque", "Xiloa", "AsManagua")) # ALTERNATIVE PLOT ORDER FOR OPSIN EXPRESSION

Irradiance raw measurements


Performing correlation-based Principals Component Analysis (PCA) to generate a composite axis that accounts for most of the variation among photic environments. The raw data is available at Bertinetti et al. 2023. Data from: Repeated Divergence in Opsin Gene Expression Mirrors Photic Habitat Changes in Rapidly Evolving Crater Lake Cichlid Fishes [Dataset]. Dryad. https://doi.org/10.5061/dryad.j3tx95xgk

to50 <- read.table("Photic Parameters.csv", stringsAsFactors=F, header = T, sep=",", dec = ".") [-1]
d1 <- subset(to50, d == 'd_1') # Extracts photic parameters calculated using downwelling irradiance at one meter depth
s1 <- subset(to50, d == 's_1')# Extracts photic parameters calculated using sidewelling irradiance at one meter depth
to50 <-cbind(d1,s1)
rownames(to50) <- to50$loc  # Add lakes names as rownames for matrix later one
to50 <- to50[,-c(4:5,7:8,12:16)]
# USING P25 and P75 instead of Interval "Width" makes more sense and gives better results!
colnames(to50) <-c("P50_d","P25_d","P75_d","lux_1","P50_s","P25_s","P75_s") #[,-c(6:11)] 


pca1 <- prcomp(to50, scale. = T,center=T) ## doing PCA
summary(pca1) # 0.935% variation PC1
## Importance of components:
##                          PC1    PC2    PC3     PC4     PC5     PC6     PC7
## Standard deviation     2.558 0.5833 0.2538 0.20101 0.08539 0.04630 0.02223
## Proportion of Variance 0.935 0.0486 0.0092 0.00577 0.00104 0.00031 0.00007
## Cumulative Proportion  0.935 0.9836 0.9928 0.99858 0.99962 0.99993 1.00000
pca1$rotation ## check values
##              PC1          PC2         PC3        PC4         PC5         PC6
## P50_d  0.3884146 -0.080822307  0.20405789  0.3984942 -0.37023902 -0.03459849
## P25_d  0.3849762  0.006651055  0.60002497  0.3461262  0.50056690  0.16342188
## P75_d  0.3844217 -0.136312462 -0.50919993  0.4759206 -0.20096420 -0.30033874
## lux_1 -0.3284813 -0.927233825  0.09490456  0.1306572  0.07514075 -0.02184485
## P50_s  0.3859833 -0.230669994 -0.18558499 -0.2668902 -0.21235482  0.79734953
## P25_s  0.3836522 -0.196155004  0.37021534 -0.5794844 -0.30613204 -0.45781105
## P75_s  0.3861027 -0.152952389 -0.39816803 -0.2660728  0.65384379 -0.18989062
##              PC7
## P50_d -0.7098526
## P25_d  0.3076391
## P75_d  0.4661158
## lux_1 -0.0116613
## P50_s  0.1061847
## P25_s  0.1953583
## P75_s -0.3669163
pca1$x[,1] # points of samples on x-coordinate (Dim 1) to use for linear model
##      Apoyo      Xiloa   Apoyeque  AsManagua     AsLeon     Masaya    Tiscapa 
## -4.1099176 -1.7677757 -1.8813743 -1.6728802 -0.8228473 -0.1394389  2.6019994 
##    Isletas  LkManagua    SanJuan 
##  1.7871344  4.3917467  1.6133535
cyp_adults <-merge(as.data.frame(pca1$x[,1]),cyp27_adults, by.x="row.names",by.y="Location") ## merge to gene expression dataframe
colnames(cyp_adults)[1:2] <-c("Location","Photic")
cyp_adults$logR_cyp27c <- log2(cyp_adults$R_cyp27c) # improve distribution logarithmic scale
a <- cyp_adults[complete.cases(cyp_adults), ] # partialR2 does not like NA's

w <- subset(a, rearing == "wild")

m1.lmer <- lmer(logR_cyp27c ~ Photic + (1|Location), data=w) ## mixed-effect model with random intercept
pander(Anova(m1.lmer, type = 2,test.statistic = "F"))
Analysis of Deviance Table (Type II Wald F tests with Kenward-Roger df)
  F Df Df.res Pr(>F)
Photic 7.585 1 8.044 0.02477
capture.output(Anova(m1.lmer, type=3,test.statistic = "F"), file="log27~P50-random,~1|Location-ONLYWILD.txt")
summary(m1.lmer)
## Linear mixed model fit by REML ['lmerMod']
## Formula: logR_cyp27c ~ Photic + (1 | Location)
##    Data: w
## 
## REML criterion at convergence: 302.8
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -2.9242 -0.7128  0.1956  0.6237  2.6691 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  Location (Intercept) 1.911    1.383   
##  Residual             2.209    1.486   
## Number of obs: 78, groups:  Location, 10
## 
## Fixed effects:
##             Estimate Std. Error t value
## (Intercept)  -1.7981     0.4685  -3.837
## Photic        0.5324     0.1933   2.754
## 
## Correlation of Fixed Effects:
##        (Intr)
## Photic -0.004
rsq(m1.lmer,adj=T,type=c('v')) #model 0.51 , fixed 0.32 (marginal)
## $model
## [1] 0.6088237
## 
## $fixed
## [1] 0.3070833
## 
## $random
## [1] 0.3017404
l <- subset(a, rearing == "lab")
### POPULATION DIFF FOR LAB (ANOVA WITH POSTHOCS)
lcyp27<- lm(logR_cyp27c ~ Location, data = l)
Anova(lcyp27, type='2')                                 #<0.001***
(m2 <- HSD.test(lcyp27, trt = c('Location'), unbalanced=T))
## $statistics
##    MSerror Df      Mean      CV
##   1.406482 49 -3.515286 -33.737
## 
## $parameters
##    test   name.t ntr StudentizedRange alpha
##   Tukey Location   9         4.587972  0.05
## 
## $means
##           logR_cyp27c       std r        se       Min        Max       Q25
## Apoyeque    -2.407985 1.0171211 6 0.4841629 -3.529913 -1.1800019 -3.213655
## Apoyo       -6.204881 2.0438332 5 0.5303739 -8.446038 -3.6496243 -7.952377
## AsLeon      -3.547626 0.1866717 6 0.4841629 -3.861089 -3.3685457 -3.633871
## AsManagua   -1.652835 0.8670742 6 0.4841629 -2.625428 -0.2070244 -2.264246
## Isletas     -1.923736 1.7610787 8 0.4192974 -3.395378  1.9903505 -3.068703
## LkManagua   -1.997903 1.0588193 8 0.4192974 -2.976979 -0.1822931 -2.702620
## Masaya      -1.614507 0.7621970 5 0.5303739 -2.560767 -0.4318901 -1.759217
## Tiscapa     -2.681419 1.2106888 6 0.4841629 -4.726166 -1.5327632 -3.109341
## Xiloa       -8.959667 0.7457053 8 0.4192974 -9.635348 -7.7808874 -9.635348
##                 Q50       Q75
## Apoyeque  -2.557807 -1.532033
## Apoyo     -6.221614 -4.754753
## AsLeon    -3.489097 -3.419121
## AsManagua -1.632946 -1.416227
## Isletas   -2.325379 -1.915405
## LkManagua -2.593093 -1.133979
## Masaya    -1.708530 -1.612129
## Tiscapa   -2.544291 -1.718556
## Xiloa     -9.029485 -8.465720
## 
## $comparison
## NULL
## 
## $groups
##           logR_cyp27c groups
## Masaya      -1.614507      a
## AsManagua   -1.652835      a
## Isletas     -1.923736      a
## LkManagua   -1.997903      a
## Apoyeque    -2.407985      a
## Tiscapa     -2.681419      a
## AsLeon      -3.547626      a
## Apoyo       -6.204881      b
## Xiloa       -8.959667      c
## 
## attr(,"class")
## [1] "group"
plot(m1.lmer)

cyp27_juve <- read.table("Gene Expression - Light Experiments.csv", header=T, sep =",", dec=".",stringsAsFactors = T)
cyp27_juve$logRcyp27 <- log2(cyp27_juve$R_cyp27c1) ### NON LOG SCALE IS HIHGLY SKEWED
treat_col <- c("#3288BD","#9E0142")

## ARRANGE DATA
cyp27_juve<- cyp27_juve %>%
  unite("combi", Spec, Treat, sep="", remove=F)
cyp27_juve$combi <- as.factor(cyp27_juve$combi)
cyp27_juve$combi<- factor(cyp27_juve$combi,levels=c("ASTRL","ASTWL","SAGRL","SAGWL","CITRL", "CITWL")) 

## ANOVA
acyp<- aov(logRcyp27 ~ Spec * Treat, data = cyp27_juve)
pander(Anova(acyp, type='3'))                             
Anova Table (Type III tests)
  Sum Sq Df F value Pr(>F)
(Intercept) 1051 1 557.3 5.951e-64
Spec 19.85 2 5.261 0.005817
Treat 6.567 1 3.481 0.06334
Spec:Treat 26.96 2 7.144 0.0009726
Residuals 443.4 235 NA NA
### Tukey HSD post hoc
acyp27 <- aov(logRcyp27 ~ combi, data = cyp27_juve)
(m4 <- HSD.test(acyp27, trt = c('combi'), unbalanced=T))   
## $statistics
##    MSerror  Df      Mean        CV
##   1.886692 235 -6.041902 -22.73405
## 
## $parameters
##    test name.t ntr StudentizedRange alpha
##   Tukey  combi   6         4.063582  0.05
## 
## $means
##       logRcyp27       std  r        se        Min       Max       Q25       Q50
## ASTRL -5.192185 0.5906257 39 0.2199471  -7.020926 -4.287712 -5.496583 -5.132894
## ASTWL -5.765375 0.9923818 41 0.2145155  -8.587273 -4.532825 -6.333516 -5.680382
## CITRL -5.931758 1.6605490 41 0.2145155  -8.480357 -2.625044 -7.673003 -5.930160
## CITWL -7.290927 1.5935560 41 0.2145155  -9.828281 -3.803897 -8.532825 -7.333516
## SAGRL -6.154514 1.9554877 39 0.2199471 -12.287712 -3.754383 -6.500534 -5.629501
## SAGWL -5.876667 0.9273730 40 0.2171804  -8.243318 -4.115285 -6.290743 -5.710303
##             Q75
## ASTRL -4.723594
## ASTWL -4.840629
## CITRL -4.729292
## CITWL -6.345198
## SAGRL -5.083174
## SAGWL -5.333655
## 
## $comparison
## NULL
## 
## $groups
##       logRcyp27 groups
## ASTRL -5.192185      a
## ASTWL -5.765375     ab
## SAGWL -5.876667     ab
## CITRL -5.931758     ab
## SAGRL -6.154514      b
## CITWL -7.290927      c
## 
## attr(,"class")
## [1] "group"