The datasets needed to reproduce this analysis are deposited on Bertinetti et al. 2024. Data from: Robust sensory traits across light habitats: Visual signals but not receptors vary in centrarchids inhabiting distinct photic environments [Dataset]. Zenodo. https://doi.org/10.5281/zenodo.13910039

Following packages are required:

R.version$platform
## [1] "aarch64-apple-darwin20"
R.version$version.string
## [1] "R version 4.4.0 (2024-04-24)"
installed.packages()[names(sessionInfo()$otherPkgs), "Version"]
##  BiocManager       gplots        edgeR       Glimma        limma        vegan 
##    "1.30.25"    "3.1.3.1"      "4.2.1"     "2.14.0"     "3.60.4"      "2.6-8" 
##      lattice      permute       partR2    ggfortify    agricolae         lme4 
##     "0.22-6"      "0.9-7"      "0.9.2"     "0.4.17"      "1.3-7"   "1.1-35.5" 
##       Matrix          rsq       pracma    gridExtra      ggplot2          car 
##      "1.7-0"        "2.6"      "2.4.4"        "2.3"      "3.5.1"      "3.1-2" 
##      carData          zoo         MESS RColorBrewer      plotrix        tidyr 
##      "3.0-5"     "1.8-12"     "0.5.12"      "1.1-3"      "3.8-4"      "1.3.1" 
##        dplyr  matrixStats      stringr        readr 
##      "1.1.4"      "1.4.0"      "1.5.1"      "2.1.5"

Irradiance raw measurements

The “LightData.zip” contains the raw irradiance measurements, the normalized irradiance and the photic factors

absir  <-readRDS("LightData/Absolute_Irradiance.Rds") # read measurements
photic <- read.table("LightData/Photic factors.csv", stringsAsFactors=T, header = T, sep=",", dec = ".") [-1] 
# contains photic variables


### PLOT ONE EXAMPLE OF ABSOLUTE IRRADIANCE
pig <- c("#56B4E9","#009E73","#D55E00")
plot( absir$BIGARBORVITAE$d_1m[-c(1:37)] ~ absir$BIGARBORVITAE$wl[-c(1:37)], type = "l",axes=F, col = "#56B4E9", lwd=4,xlim=c(350,750),ylim=c(0,9.435e+14), yaxt="n",xaxt="n",ylab="",xlab="")
#lines(absir$BAVBLOOM$d_1m[-c(1:37)]  ~ absir$BAVBLOOM$wl[-c(1:37)], col=pig[2], lwd=4)
lines(absir$CRANBERRY$d_1m[-c(1:37)]  ~ absir$CRANBERRY$wl[-c(1:37)], col=pig[3], lwd=4)
axis(side=1, at = c(seq(350,750, by=25)),font=2,lwd=3, labels = c(seq(350,750, by=25)), las = 1, cex = 1, col.axis = 1, cex.axis=1.5)
axis(side=2, at= c(seq(0,9.435e+14 , by=1.5725e+14)), las = 2, lwd=3, font=2, line=-2, cex.axis=1.2)
text(700,8.87e+14, "BIG ARBOR VITAE", col=pig[1],font=2)
text(680,3.1e+14, "CRANBERRY", col=pig[3],font=2)
mtext("ABS-IRRAD", font=2)

Morphological Traits


The data associated with morphological traits is found in “Morphology.csv”

morph <- read.table("Morphology.csv", header=T, sep =",", dec=".",stringsAsFactors = T)
morp <- aov(rel_eye ~ SPEC, data= morph)
(m2 <- LSD.test(morp, trt = c('SPEC'), p.adj = "fdr"))                
## $statistics
##        MSerror Df      Mean       CV
##   5.172069e-05 43 0.0726456 9.899718
## 
## $parameters
##         test p.ajusted name.t ntr alpha
##   Fisher-LSD       fdr   SPEC   6  0.05
## 
## $means
##       rel_eye         std  r          se        LCL        UCL        Min
## BL 0.07684131 0.007291179 12 0.002076068 0.07265452 0.08102810 0.05843956
## CR 0.06936959 0.008559780  5 0.003216230 0.06288344 0.07585574 0.06111288
## LB 0.04958091 0.004093451  7 0.002718211 0.04409912 0.05506270 0.04437961
## PK 0.07417629 0.007164541 14 0.001922065 0.07030008 0.07805250 0.06143421
## RB 0.08967307 0.008493313  9 0.002397237 0.08483858 0.09450756 0.07983530
## SB 0.04904939 0.001099388  2 0.005085307 0.03879389 0.05930489 0.04827200
##           Max        Q25        Q50        Q75
## BL 0.08480040 0.07372169 0.07793701 0.08177309
## CR 0.08304055 0.06332002 0.06890885 0.07046565
## LB 0.05531000 0.04643345 0.04968547 0.05241220
## PK 0.08597273 0.06865083 0.07442161 0.07840304
## RB 0.10633060 0.08461965 0.08820102 0.09563796
## SB 0.04982677 0.04866070 0.04904939 0.04943808
## 
## $comparison
## NULL
## 
## $groups
##       rel_eye groups
## RB 0.08967307      a
## BL 0.07684131      b
## PK 0.07417629      b
## CR 0.06936959      b
## LB 0.04958091      c
## SB 0.04904939      c
## 
## attr(,"class")
## [1] "group"
plot(m2, variation = 'SE', lwd = 2, las = 1, horiz = T, xlab = 'combi')

Differential Gene Expression


“Read_Counts_Table.csv” contains the table of raw read counts for each individual generated using STAR - Dobin et al. (2013). STAR: ultrafast universal RNA-seq aligner. Bioinformatics (Oxford, England), 29(1), 15–21 https://doi.org/10.1093/bioinformatics/bts635

counts <- read.table("Read_Counts_Table.csv", header=T, sep =",", dec=".",stringsAsFactors = T, check.names = F, row.names = 1) 
head(counts) # columns = sample_id , rows = gene_id
## ALSO ATTACH METADATA ASSOCAIAED WITH EACH INDIVIDUAL
sample.info <- read.table("SampleInfo.csv", header=T, sep =",", dec=".",stringsAsFactors = T) [,-1]


Then generate DGElist, normalize and filter by expression:

data <- DGEList(counts, gene=rownames(counts), group=sample.info$Group,lib.size = colSums(counts), remove.zeros = 0)
## data pre-processing
# convert  gene counts into counts per million reads
cpm <- cpm(data)
# transform counts per million to log2 counts per million
lcpm <- cpm(data , log = TRUE)
# set filtering criteria
keep.exprs2 <- filterByExpr(data , group = data$group , min.prop = .33 , min.count = 100)    # keep genes expressed at least at 100 cpm in one third of all samples in one group
# filter the data based on previously set criteria
dataF2 <- data[keep.exprs2,,keep.lib.sizes = FALSE]
# dim(dataF2)   # 7947 genes kept with more stringent filtering
## normalization of gene expression distributions
dataF2 <- calcNormFactors(dataF2 , method = "TMM")
lcpmF2 <- cpm(dataF2 , log = TRUE)      
col <- brewer.pal(ncol(data), "Paired")
boxplot(lcpmF2 , las = 2 ,col=col, main = "B. Normalized")


Proceed to differential gene expression analysis

#create a design matrix
group <- dataF2$samples$group
design <- model.matrix(~0+group)
# change header
colnames(design) <- gsub("group" , "" , colnames(design))
# specify what contrasts to use for differential gene expression analysis
contr.matrix <- makeContrasts( BLG = BLBAV - BLCY ,
                               PK = PKBAV - PKCY ,
                               RB = RBBAV - RBCY ,
                               CR = CRBAV - CRCY,
                               LB = LBBAV - LBCY,
                               levels = colnames(design))
contr.matrix
##        Contrasts
## Levels  BLG PK RB CR LB
##   BLBAV   1  0  0  0  0
##   BLCY   -1  0  0  0  0
##   CRBAV   0  0  0  1  0
##   CRCY    0  0  0 -1  0
##   LBBAV   0  0  0  0  1
##   LBCY    0  0  0  0 -1
##   PKBAV   0  1  0  0  0
##   PKCY    0 -1  0  0  0
##   RBBAV   0  0  1  0  0
##   RBCY    0  0 -1  0  0
##   SBBAV   0  0  0  0  0
##   SBCY    0  0  0  0  0
# transform counts to log2 counts per million, estimate mean-variance relationship and compute appropriate weights
v <- voom(dataF2 , design , plot = TRUE)

# fitting linear models
vfit <- lmFit(v , design)      # specify data and group design used for linear models
vfit <- contrasts.fit(vfit , contrasts = contr.matrix)    # specify the contrasts to be used
efit <- eBayes(vfit)
plotSA(efit)     # residual standard deviation vs average log2 expression; no drop in standard deviation is good!

# examine numbers of differentially expressed genes
DEG <- decideTests(efit)
summary(DEG)
##         BLG   PK   RB   CR   LB
## Down      0    1    0    0  145
## NotSig 7947 7939 7947 7947 7741
## Up        0    7    0    0   61
vennDiagram(DEG[,1:5] , circle.col = c("red" , "blue" , "green", "orange","violet"))

Visual Opsin Gene and cyp27c1 Expression


“Visual_Genes.csv” contains the normalized counts (TMM) from the previous section, the proportional expression of each cone opsin and the predicted sensitivity index based on Carleton et al. (2016). Proximate and ultimate causes of variable visual sensitivities: Insights from cichlid fish radiations. Genesis (New York, N.Y. : 2000), 54(6), 299–325. https://doi.org/10.1002/dvg.22940

opsin <- read.table("Visual_Genes.csv", header=T, sep =",", dec=".",stringsAsFactors = T) [,-1] 

### THEN RUN STATISTICAL TST FOR EACH GENE REPLACING RESPECTIVELY (Here= "SWS2)

#########
#########       SWS2
#########

asws2 <- aov(rank(P_SWS2) ~ SPEC + SITE, data = opsin)
par(mfrow=c(2,2))
plot(asws2)

Anova(asws2, type='2') 
TukeyHSD(asws2)
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = rank(P_SWS2) ~ SPEC + SITE, data = opsin)
## 
## $SPEC
##              diff         lwr         upr     p adj
## CR-BL  15.8166667   1.7929864  29.8403469 0.0190672
## LB-BL -13.3690476 -25.8990261  -0.8390692 0.0305146
## PK-BL  11.2628205   0.7160166  21.8096244 0.0303019
## RB-BL -13.8611111 -25.4785644  -2.2436578 0.0112897
## SB-BL  -5.5833333 -25.7053527  14.5386861 0.9603044
## LB-CR -29.1857143 -44.6122983 -13.7591303 0.0000190
## PK-CR  -4.5538462 -18.4179799   9.3102876 0.9211716
## RB-CR -29.6777778 -44.3728230 -14.9827325 0.0000055
## SB-CR -21.4000000 -43.4425679   0.6425679 0.0614749
## PK-LB  24.6318681  12.2807161  36.9830202 0.0000071
## RB-LB  -0.4920635 -13.7691530  12.7850260 0.9999975
## SB-LB   7.7857143 -13.3380058  28.9094343 0.8779017
## RB-PK -25.1239316 -36.5482836 -13.6995796 0.0000010
## SB-PK -16.8461538 -36.8573073   3.1649996 0.1431511
## SB-RB   8.2777778 -12.3177657  28.8733212 0.8337160
## 
## $SITE
##                   diff       lwr      upr     p adj
## CRANBERRY-BAV -1.84889 -7.006084 3.308305 0.4731651
asws2b<- aov(rank(P_SWS2) ~ SPEC, data = opsin) ## FOR NORMAL DISTRIBUTION
#(m2 <- HSD.test(asws2b, trt = c('combi'), unbalanced=T))      
(m1 <- LSD.test(asws2b, trt = c('SPEC'), p.adj = "fdr")) 
## $statistics
##    MSerror Df Mean       CV
##   76.85222 42 24.5 35.78179
## 
## $parameters
##         test p.ajusted name.t ntr alpha
##   Fisher-LSD       fdr   SPEC   6  0.05
## 
## $means
##    rank(P_SWS2)      std  r       se       LCL      UCL  Min Max   Q25  Q50
## BL     24.58333 9.307459 12 2.530682 19.476210 29.69046 11.0  43 19.25 25.5
## CR     40.40000 6.877500  5 3.920516 32.488079 48.31192 29.0  47 40.00 42.0
## LB     11.21429 7.521398  7 3.313441  4.527492 17.90108  4.5  23  4.50 10.0
## PK     35.84615 9.099873 13 2.431401 30.939389 40.75292 16.0  48 34.00 37.0
## RB     10.72222 9.686044  9 2.922180  4.825024 16.61942  4.5  33  4.50  4.5
## SB     19.00000 1.414214  2 6.198880  6.490154 31.50985 18.0  20 18.50 19.0
##      Q75
## BL 30.25
## CR 44.00
## LB 16.00
## PK 41.00
## RB 15.00
## SB 19.50
## 
## $comparison
## NULL
## 
## $groups
##    rank(P_SWS2) groups
## CR     40.40000      a
## PK     35.84615      a
## BL     24.58333      b
## SB     19.00000     bc
## LB     11.21429      c
## RB     10.72222      c
## 
## attr(,"class")
## [1] "group"
par(mfrow=c(1,1))
plot(m1, variation = 'SE', lwd = 2, las = 1, horiz = F, xlab = 'sws2')

Body Reflectance


Read the normalized individual reflectance measurements for each individual found in “Body_Reflectance.csv”. Perform PCA, extract values along axis of major variation (PC1) and centroid distances for each species in each environment.

reflectance <- read.table("Body_Reflectance.csv", header=T, sep =",", dec=".",stringsAsFactors = T, row.names = 1)

## GENERATE PCS
pca1 = prcomp(reflectance[,-c(1:3)], scale. = F,center=F) # KEEP ONLY NUMERIC VALUES
s <- summary(pca1) # 0.78% variation + 10%
pca1$x[,1] # points of samples on x-coordinate (Dim 1) to use for linear model
## NDFC00117.BL.BV NDFC00118.BL.BV NDFC00119.PK.BV NDFC00120.BL.BV NDFC00121.PK.BV 
##       -226.7617       -342.8464       -616.2570       -598.6792       -516.1868 
## NDFC00123.LB.BV NDFC00124.BL.BV NDFC00128.BL.BV NDFC00129.RB.BV NDFC00130.RB.BV 
##      -1464.8595       -730.4720       -286.5806       -252.5308       -247.3381 
## NDFC00131.BL.BV NDFC00132.LB.BV NDFC00133.PK.BV NDFC00135.RB.BV NDFC00137.PK.BV 
##       -476.1326      -1198.3716       -981.1855       -219.4763       -848.8488 
## NDFC00138.BL.BV NDFC00139.PK.BV NDFC00141.PK.CY NDFC00142.PK.CY NDFC00143.PK.CY 
##       -674.3307       -749.3149       -554.7776       -647.5275       -758.9739 
## NDFC00145.BL.CY NDFC00146.PK.CY NDFC00148.BL.CY NDFC00149.BL.CY NDFC00150.BL.CY 
##       -641.9252       -604.1344      -1226.9368       -814.4939       -711.4287 
## NDFC00151.BL.CY NDFC00152.BL.CY NDFC00153.BL.CY NDFC00154.BL.CY NDFC00155.BL.CY 
##       -553.2209       -665.8861       -837.8850      -1065.0151       -749.4142 
## NDFC00157.RB.CY NDFC00160.PK.CY NDFC00169.RB.CY NDFC00187.PK.BV NDFC00188.BL.BV 
##       -439.6606       -547.8949       -552.5637       -655.7684       -424.6005 
## NDFC00189.LB.BV NDFC00191.PK.BV NDFC00193.RB.BV NDFC00195.RB.BV NDFC00197.BL.BV 
##       -999.7880       -677.2799       -452.0982       -403.7918       -319.2490 
## NDFC00198.LB.BV NDFC00199.PK.BV NDFC00200.RB.BV NDFC00201.PK.BV NDFC00202.BC.BV 
##      -1804.6781       -719.7529      -1088.4266       -623.0049       -672.3206 
## NDFC00203.BC.CY NDFC00204.RB.CY NDFC00205.PK.CY NDFC00206.RB.CY NDFC00207.SB.CY 
##       -576.0799       -442.8936       -412.4722       -369.7745      -1484.8858 
## NDFC00209.BC.CY NDFC00211.RB.CY NDFC00212.RB.CY NDFC00213.RB.CY NDFC00214.PK.CY 
##       -610.4444       -588.8737       -342.6823       -451.6047       -696.9742 
## NDFC00215.LB.CY NDFC00216.LB.CY NDFC00217.LB.CY NDFC00218.BC.CY NDFC00262.SB.BV 
##       -541.9053       -600.9686       -762.6622      -1979.5888      -1139.4591 
## NDFC00269.LB.BV NDFC00273.PK.BV NDFC00274.RB.BV NDFC00276.BC.BV NDFC00277.BC.BV 
##      -1327.9533       -744.7101       -573.9296       -701.8349      -1037.7740 
## NDFC00278.PK.BV NDFC00279.BC.BV 
##      -1025.8202      -2087.0136
### CREATE CONVEX HULLS (NOT ALL GROUPS NEEDED)
tab <- matrix(c(pca1$x[,1], pca1$x[,2]), ncol=2)
blbv <- tab[str_which(reflectance[,1],"BL.BV"),]
blcy <- tab[str_which(reflectance[,1],"BL.CY"),]
pkbl <- tab[str_which(reflectance[,1],"PK.BV"),]
pkcy <- tab[str_which(reflectance[,1],"PK.CY"),]
rbbv <- tab[str_which(reflectance[,1],"RB.BV"),]
rbcy <- tab[str_which(reflectance[,1],"RB.CY"),]
lbbv <- tab[str_which(reflectance[,1],"LB.BV"),]
lbcy <- tab[str_which(reflectance[,1],"LB.CY"),]

ch1 <- chull(blbv)
ch2 <- chull(blcy)
ch3 <- chull(pkbl)
ch4 <- chull(pkcy)
ch5 <- chull(rbbv)
ch6 <- chull(rbcy)
ch7 <- chull(lbbv)
ch8 <- chull(lbcy)

##### PLOT SCATTERPLOT + CONVEX HULLS
# Plot
par(mfrow=c(1,1),mar=c(4.5, 4.5, 1, 1),oma=c(1,1,1,0))
plot(pca1$x[,1], pca1$x[,2], xlim=c(-2100, -200), ylim=c(-750, 600),axes=F, xlab=paste("PCA 1 (",
     round(s$importance[2]*100, 1), "%)", sep = ""), ylab=paste("PCA 2 (", round(s$importance[5]*100, 1), "%)", sep = ""), 
     pch=21, col=as.numeric(as.factor(reflectance$lake)), bg=as.numeric(as.factor(reflectance$lake)), cex=2.5, cex.axis=1.5, cex.lab=1.5, las=1,
     panel.first= {
       # Add convex hulls
       polygon(blbv[ch1, ], border="blue", col=alpha("blue", 0.25), lwd=2)
       polygon(blcy[ch2, ], border="red", col=alpha("red", 0.25), lwd=2)
       polygon(pkbl[ch3, ], border="lightblue", col=alpha("lightblue", 0.25), lwd=2)
       polygon(pkcy[ch4, ], border="darkred", col=alpha("darkred", 0.25), lwd=2)
       polygon(rbbv[ch5, ], border="cyan", col=alpha("cyan", 0.25), lwd=2)
       polygon(rbcy[ch6, ], border="magenta", col=alpha("magenta", 0.25), lwd=2)
       polygon(lbbv[ch7, ], border="turquoise", col=alpha("turquoise", 0.25), lwd=2)
       polygon(lbcy[ch8, ], border="orange", col=alpha("orange", 0.25), lwd=2)
       # Add grid lines
       abline(v=0, lty=2, col="grey50",lwd=0.5) 
       abline(h=0, lty=2, col="grey50",lwd=0.5)
     })
axis(side=1, at= c(seq(-2100,-200, by=100)), labels=c(seq(-2100,-200, by=100)), cex.axis=1, font=2, las = 1, lwd=2, line=0)
axis(side=2, at= c(seq(-800,600, by=100)), labels=c(seq(-800,600, by=100)), cex.axis=1, font=4, las = 2, lwd=2, line=0)
text(pca1$x[,1], pca1$x[,2],labels=substr(rownames(reflectance), 7,12),
     col=as.numeric(as.factor(substr(rownames(reflectance), 14,16))) ,offset=0.5, pos=4, cex = 0.5)


Extract centroid distances:

ordimat <- scale(reflectance[,-c(1:3)], scale = F, center = F) # remove column wiht names
Idata.dist.t <- vegdist(ordimat, method = "euclidean")

centroid_dist <- betadisper(Idata.dist.t, as.factor(reflectance$spec_pop),type='centroid')

test_distances <- cbind(reflectance[,c(1:3)],centroid_dist$distances)
colnames(test_distances)[4] <- "dist"

## remove smallmouth bass (M. dolomieu) since only have one individual for each site
test_distances <- test_distances %>% filter(!str_detect(spec, 'SB')) 
test_distances$dist <- log(test_distances$dist) # log-transformed

acol <- aov(dist ~ spec * lake, data= test_distances)
Anova(acol, type='3')