The datasets needed to reproduce this analysis are deposited on Bertinetti et al. 2024. Data from: Robust sensory traits across light habitats: Visual signals but not receptors vary in centrarchids inhabiting distinct photic environments [Dataset]. Zenodo. https://doi.org/10.5281/zenodo.13910039
Following packages are required:
R.version$platform
## [1] "aarch64-apple-darwin20"
R.version$version.string
## [1] "R version 4.4.0 (2024-04-24)"
installed.packages()[names(sessionInfo()$otherPkgs), "Version"]
## BiocManager gplots edgeR Glimma limma vegan
## "1.30.25" "3.1.3.1" "4.2.1" "2.14.0" "3.60.4" "2.6-8"
## lattice permute partR2 ggfortify agricolae lme4
## "0.22-6" "0.9-7" "0.9.2" "0.4.17" "1.3-7" "1.1-35.5"
## Matrix rsq pracma gridExtra ggplot2 car
## "1.7-0" "2.6" "2.4.4" "2.3" "3.5.1" "3.1-2"
## carData zoo MESS RColorBrewer plotrix tidyr
## "3.0-5" "1.8-12" "0.5.12" "1.1-3" "3.8-4" "1.3.1"
## dplyr matrixStats stringr readr
## "1.1.4" "1.4.0" "1.5.1" "2.1.5"
The “LightData.zip” contains the raw irradiance measurements, the normalized irradiance and the photic factors
absir <-readRDS("LightData/Absolute_Irradiance.Rds") # read measurements
photic <- read.table("LightData/Photic factors.csv", stringsAsFactors=T, header = T, sep=",", dec = ".") [-1]
# contains photic variables
### PLOT ONE EXAMPLE OF ABSOLUTE IRRADIANCE
pig <- c("#56B4E9","#009E73","#D55E00")
plot( absir$BIGARBORVITAE$d_1m[-c(1:37)] ~ absir$BIGARBORVITAE$wl[-c(1:37)], type = "l",axes=F, col = "#56B4E9", lwd=4,xlim=c(350,750),ylim=c(0,9.435e+14), yaxt="n",xaxt="n",ylab="",xlab="")
#lines(absir$BAVBLOOM$d_1m[-c(1:37)] ~ absir$BAVBLOOM$wl[-c(1:37)], col=pig[2], lwd=4)
lines(absir$CRANBERRY$d_1m[-c(1:37)] ~ absir$CRANBERRY$wl[-c(1:37)], col=pig[3], lwd=4)
axis(side=1, at = c(seq(350,750, by=25)),font=2,lwd=3, labels = c(seq(350,750, by=25)), las = 1, cex = 1, col.axis = 1, cex.axis=1.5)
axis(side=2, at= c(seq(0,9.435e+14 , by=1.5725e+14)), las = 2, lwd=3, font=2, line=-2, cex.axis=1.2)
text(700,8.87e+14, "BIG ARBOR VITAE", col=pig[1],font=2)
text(680,3.1e+14, "CRANBERRY", col=pig[3],font=2)
mtext("ABS-IRRAD", font=2)
The data associated with morphological traits is found in
“Morphology.csv”
morph <- read.table("Morphology.csv", header=T, sep =",", dec=".",stringsAsFactors = T)
morp <- aov(rel_eye ~ SPEC, data= morph)
(m2 <- LSD.test(morp, trt = c('SPEC'), p.adj = "fdr"))
## $statistics
## MSerror Df Mean CV
## 5.172069e-05 43 0.0726456 9.899718
##
## $parameters
## test p.ajusted name.t ntr alpha
## Fisher-LSD fdr SPEC 6 0.05
##
## $means
## rel_eye std r se LCL UCL Min
## BL 0.07684131 0.007291179 12 0.002076068 0.07265452 0.08102810 0.05843956
## CR 0.06936959 0.008559780 5 0.003216230 0.06288344 0.07585574 0.06111288
## LB 0.04958091 0.004093451 7 0.002718211 0.04409912 0.05506270 0.04437961
## PK 0.07417629 0.007164541 14 0.001922065 0.07030008 0.07805250 0.06143421
## RB 0.08967307 0.008493313 9 0.002397237 0.08483858 0.09450756 0.07983530
## SB 0.04904939 0.001099388 2 0.005085307 0.03879389 0.05930489 0.04827200
## Max Q25 Q50 Q75
## BL 0.08480040 0.07372169 0.07793701 0.08177309
## CR 0.08304055 0.06332002 0.06890885 0.07046565
## LB 0.05531000 0.04643345 0.04968547 0.05241220
## PK 0.08597273 0.06865083 0.07442161 0.07840304
## RB 0.10633060 0.08461965 0.08820102 0.09563796
## SB 0.04982677 0.04866070 0.04904939 0.04943808
##
## $comparison
## NULL
##
## $groups
## rel_eye groups
## RB 0.08967307 a
## BL 0.07684131 b
## PK 0.07417629 b
## CR 0.06936959 b
## LB 0.04958091 c
## SB 0.04904939 c
##
## attr(,"class")
## [1] "group"
plot(m2, variation = 'SE', lwd = 2, las = 1, horiz = T, xlab = 'combi')
“Read_Counts_Table.csv” contains the table of raw read counts
for each individual generated using STAR - Dobin et al. (2013). STAR:
ultrafast universal RNA-seq aligner. Bioinformatics (Oxford, England),
29(1), 15–21 https://doi.org/10.1093/bioinformatics/bts635
counts <- read.table("Read_Counts_Table.csv", header=T, sep =",", dec=".",stringsAsFactors = T, check.names = F, row.names = 1)
head(counts) # columns = sample_id , rows = gene_id
## ALSO ATTACH METADATA ASSOCAIAED WITH EACH INDIVIDUAL
sample.info <- read.table("SampleInfo.csv", header=T, sep =",", dec=".",stringsAsFactors = T) [,-1]
Then generate DGElist, normalize and filter by expression:
data <- DGEList(counts, gene=rownames(counts), group=sample.info$Group,lib.size = colSums(counts), remove.zeros = 0)
## data pre-processing
# convert gene counts into counts per million reads
cpm <- cpm(data)
# transform counts per million to log2 counts per million
lcpm <- cpm(data , log = TRUE)
# set filtering criteria
keep.exprs2 <- filterByExpr(data , group = data$group , min.prop = .33 , min.count = 100) # keep genes expressed at least at 100 cpm in one third of all samples in one group
# filter the data based on previously set criteria
dataF2 <- data[keep.exprs2,,keep.lib.sizes = FALSE]
# dim(dataF2) # 7947 genes kept with more stringent filtering
## normalization of gene expression distributions
dataF2 <- calcNormFactors(dataF2 , method = "TMM")
lcpmF2 <- cpm(dataF2 , log = TRUE)
col <- brewer.pal(ncol(data), "Paired")
boxplot(lcpmF2 , las = 2 ,col=col, main = "B. Normalized")
Proceed to differential gene expression analysis
#create a design matrix
group <- dataF2$samples$group
design <- model.matrix(~0+group)
# change header
colnames(design) <- gsub("group" , "" , colnames(design))
# specify what contrasts to use for differential gene expression analysis
contr.matrix <- makeContrasts( BLG = BLBAV - BLCY ,
PK = PKBAV - PKCY ,
RB = RBBAV - RBCY ,
CR = CRBAV - CRCY,
LB = LBBAV - LBCY,
levels = colnames(design))
contr.matrix
## Contrasts
## Levels BLG PK RB CR LB
## BLBAV 1 0 0 0 0
## BLCY -1 0 0 0 0
## CRBAV 0 0 0 1 0
## CRCY 0 0 0 -1 0
## LBBAV 0 0 0 0 1
## LBCY 0 0 0 0 -1
## PKBAV 0 1 0 0 0
## PKCY 0 -1 0 0 0
## RBBAV 0 0 1 0 0
## RBCY 0 0 -1 0 0
## SBBAV 0 0 0 0 0
## SBCY 0 0 0 0 0
# transform counts to log2 counts per million, estimate mean-variance relationship and compute appropriate weights
v <- voom(dataF2 , design , plot = TRUE)
# fitting linear models
vfit <- lmFit(v , design) # specify data and group design used for linear models
vfit <- contrasts.fit(vfit , contrasts = contr.matrix) # specify the contrasts to be used
efit <- eBayes(vfit)
plotSA(efit) # residual standard deviation vs average log2 expression; no drop in standard deviation is good!
# examine numbers of differentially expressed genes
DEG <- decideTests(efit)
summary(DEG)
## BLG PK RB CR LB
## Down 0 1 0 0 145
## NotSig 7947 7939 7947 7947 7741
## Up 0 7 0 0 61
vennDiagram(DEG[,1:5] , circle.col = c("red" , "blue" , "green", "orange","violet"))
“Visual_Genes.csv” contains the normalized counts (TMM) from the
previous section, the proportional expression of each cone opsin and the
predicted sensitivity index based on Carleton et al. (2016). Proximate
and ultimate causes of variable visual sensitivities: Insights from
cichlid fish radiations. Genesis (New York, N.Y. : 2000), 54(6),
299–325. https://doi.org/10.1002/dvg.22940
opsin <- read.table("Visual_Genes.csv", header=T, sep =",", dec=".",stringsAsFactors = T) [,-1]
### THEN RUN STATISTICAL TST FOR EACH GENE REPLACING RESPECTIVELY (Here= "SWS2)
#########
######### SWS2
#########
asws2 <- aov(rank(P_SWS2) ~ SPEC + SITE, data = opsin)
par(mfrow=c(2,2))
plot(asws2)
Anova(asws2, type='2')
TukeyHSD(asws2)
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = rank(P_SWS2) ~ SPEC + SITE, data = opsin)
##
## $SPEC
## diff lwr upr p adj
## CR-BL 15.8166667 1.7929864 29.8403469 0.0190672
## LB-BL -13.3690476 -25.8990261 -0.8390692 0.0305146
## PK-BL 11.2628205 0.7160166 21.8096244 0.0303019
## RB-BL -13.8611111 -25.4785644 -2.2436578 0.0112897
## SB-BL -5.5833333 -25.7053527 14.5386861 0.9603044
## LB-CR -29.1857143 -44.6122983 -13.7591303 0.0000190
## PK-CR -4.5538462 -18.4179799 9.3102876 0.9211716
## RB-CR -29.6777778 -44.3728230 -14.9827325 0.0000055
## SB-CR -21.4000000 -43.4425679 0.6425679 0.0614749
## PK-LB 24.6318681 12.2807161 36.9830202 0.0000071
## RB-LB -0.4920635 -13.7691530 12.7850260 0.9999975
## SB-LB 7.7857143 -13.3380058 28.9094343 0.8779017
## RB-PK -25.1239316 -36.5482836 -13.6995796 0.0000010
## SB-PK -16.8461538 -36.8573073 3.1649996 0.1431511
## SB-RB 8.2777778 -12.3177657 28.8733212 0.8337160
##
## $SITE
## diff lwr upr p adj
## CRANBERRY-BAV -1.84889 -7.006084 3.308305 0.4731651
asws2b<- aov(rank(P_SWS2) ~ SPEC, data = opsin) ## FOR NORMAL DISTRIBUTION
#(m2 <- HSD.test(asws2b, trt = c('combi'), unbalanced=T))
(m1 <- LSD.test(asws2b, trt = c('SPEC'), p.adj = "fdr"))
## $statistics
## MSerror Df Mean CV
## 76.85222 42 24.5 35.78179
##
## $parameters
## test p.ajusted name.t ntr alpha
## Fisher-LSD fdr SPEC 6 0.05
##
## $means
## rank(P_SWS2) std r se LCL UCL Min Max Q25 Q50
## BL 24.58333 9.307459 12 2.530682 19.476210 29.69046 11.0 43 19.25 25.5
## CR 40.40000 6.877500 5 3.920516 32.488079 48.31192 29.0 47 40.00 42.0
## LB 11.21429 7.521398 7 3.313441 4.527492 17.90108 4.5 23 4.50 10.0
## PK 35.84615 9.099873 13 2.431401 30.939389 40.75292 16.0 48 34.00 37.0
## RB 10.72222 9.686044 9 2.922180 4.825024 16.61942 4.5 33 4.50 4.5
## SB 19.00000 1.414214 2 6.198880 6.490154 31.50985 18.0 20 18.50 19.0
## Q75
## BL 30.25
## CR 44.00
## LB 16.00
## PK 41.00
## RB 15.00
## SB 19.50
##
## $comparison
## NULL
##
## $groups
## rank(P_SWS2) groups
## CR 40.40000 a
## PK 35.84615 a
## BL 24.58333 b
## SB 19.00000 bc
## LB 11.21429 c
## RB 10.72222 c
##
## attr(,"class")
## [1] "group"
par(mfrow=c(1,1))
plot(m1, variation = 'SE', lwd = 2, las = 1, horiz = F, xlab = 'sws2')
Read the normalized individual reflectance measurements for each
individual found in “Body_Reflectance.csv”. Perform PCA, extract values
along axis of major variation (PC1) and centroid distances for each
species in each environment.
reflectance <- read.table("Body_Reflectance.csv", header=T, sep =",", dec=".",stringsAsFactors = T, row.names = 1)
## GENERATE PCS
pca1 = prcomp(reflectance[,-c(1:3)], scale. = F,center=F) # KEEP ONLY NUMERIC VALUES
s <- summary(pca1) # 0.78% variation + 10%
pca1$x[,1] # points of samples on x-coordinate (Dim 1) to use for linear model
## NDFC00117.BL.BV NDFC00118.BL.BV NDFC00119.PK.BV NDFC00120.BL.BV NDFC00121.PK.BV
## -226.7617 -342.8464 -616.2570 -598.6792 -516.1868
## NDFC00123.LB.BV NDFC00124.BL.BV NDFC00128.BL.BV NDFC00129.RB.BV NDFC00130.RB.BV
## -1464.8595 -730.4720 -286.5806 -252.5308 -247.3381
## NDFC00131.BL.BV NDFC00132.LB.BV NDFC00133.PK.BV NDFC00135.RB.BV NDFC00137.PK.BV
## -476.1326 -1198.3716 -981.1855 -219.4763 -848.8488
## NDFC00138.BL.BV NDFC00139.PK.BV NDFC00141.PK.CY NDFC00142.PK.CY NDFC00143.PK.CY
## -674.3307 -749.3149 -554.7776 -647.5275 -758.9739
## NDFC00145.BL.CY NDFC00146.PK.CY NDFC00148.BL.CY NDFC00149.BL.CY NDFC00150.BL.CY
## -641.9252 -604.1344 -1226.9368 -814.4939 -711.4287
## NDFC00151.BL.CY NDFC00152.BL.CY NDFC00153.BL.CY NDFC00154.BL.CY NDFC00155.BL.CY
## -553.2209 -665.8861 -837.8850 -1065.0151 -749.4142
## NDFC00157.RB.CY NDFC00160.PK.CY NDFC00169.RB.CY NDFC00187.PK.BV NDFC00188.BL.BV
## -439.6606 -547.8949 -552.5637 -655.7684 -424.6005
## NDFC00189.LB.BV NDFC00191.PK.BV NDFC00193.RB.BV NDFC00195.RB.BV NDFC00197.BL.BV
## -999.7880 -677.2799 -452.0982 -403.7918 -319.2490
## NDFC00198.LB.BV NDFC00199.PK.BV NDFC00200.RB.BV NDFC00201.PK.BV NDFC00202.BC.BV
## -1804.6781 -719.7529 -1088.4266 -623.0049 -672.3206
## NDFC00203.BC.CY NDFC00204.RB.CY NDFC00205.PK.CY NDFC00206.RB.CY NDFC00207.SB.CY
## -576.0799 -442.8936 -412.4722 -369.7745 -1484.8858
## NDFC00209.BC.CY NDFC00211.RB.CY NDFC00212.RB.CY NDFC00213.RB.CY NDFC00214.PK.CY
## -610.4444 -588.8737 -342.6823 -451.6047 -696.9742
## NDFC00215.LB.CY NDFC00216.LB.CY NDFC00217.LB.CY NDFC00218.BC.CY NDFC00262.SB.BV
## -541.9053 -600.9686 -762.6622 -1979.5888 -1139.4591
## NDFC00269.LB.BV NDFC00273.PK.BV NDFC00274.RB.BV NDFC00276.BC.BV NDFC00277.BC.BV
## -1327.9533 -744.7101 -573.9296 -701.8349 -1037.7740
## NDFC00278.PK.BV NDFC00279.BC.BV
## -1025.8202 -2087.0136
### CREATE CONVEX HULLS (NOT ALL GROUPS NEEDED)
tab <- matrix(c(pca1$x[,1], pca1$x[,2]), ncol=2)
blbv <- tab[str_which(reflectance[,1],"BL.BV"),]
blcy <- tab[str_which(reflectance[,1],"BL.CY"),]
pkbl <- tab[str_which(reflectance[,1],"PK.BV"),]
pkcy <- tab[str_which(reflectance[,1],"PK.CY"),]
rbbv <- tab[str_which(reflectance[,1],"RB.BV"),]
rbcy <- tab[str_which(reflectance[,1],"RB.CY"),]
lbbv <- tab[str_which(reflectance[,1],"LB.BV"),]
lbcy <- tab[str_which(reflectance[,1],"LB.CY"),]
ch1 <- chull(blbv)
ch2 <- chull(blcy)
ch3 <- chull(pkbl)
ch4 <- chull(pkcy)
ch5 <- chull(rbbv)
ch6 <- chull(rbcy)
ch7 <- chull(lbbv)
ch8 <- chull(lbcy)
##### PLOT SCATTERPLOT + CONVEX HULLS
# Plot
par(mfrow=c(1,1),mar=c(4.5, 4.5, 1, 1),oma=c(1,1,1,0))
plot(pca1$x[,1], pca1$x[,2], xlim=c(-2100, -200), ylim=c(-750, 600),axes=F, xlab=paste("PCA 1 (",
round(s$importance[2]*100, 1), "%)", sep = ""), ylab=paste("PCA 2 (", round(s$importance[5]*100, 1), "%)", sep = ""),
pch=21, col=as.numeric(as.factor(reflectance$lake)), bg=as.numeric(as.factor(reflectance$lake)), cex=2.5, cex.axis=1.5, cex.lab=1.5, las=1,
panel.first= {
# Add convex hulls
polygon(blbv[ch1, ], border="blue", col=alpha("blue", 0.25), lwd=2)
polygon(blcy[ch2, ], border="red", col=alpha("red", 0.25), lwd=2)
polygon(pkbl[ch3, ], border="lightblue", col=alpha("lightblue", 0.25), lwd=2)
polygon(pkcy[ch4, ], border="darkred", col=alpha("darkred", 0.25), lwd=2)
polygon(rbbv[ch5, ], border="cyan", col=alpha("cyan", 0.25), lwd=2)
polygon(rbcy[ch6, ], border="magenta", col=alpha("magenta", 0.25), lwd=2)
polygon(lbbv[ch7, ], border="turquoise", col=alpha("turquoise", 0.25), lwd=2)
polygon(lbcy[ch8, ], border="orange", col=alpha("orange", 0.25), lwd=2)
# Add grid lines
abline(v=0, lty=2, col="grey50",lwd=0.5)
abline(h=0, lty=2, col="grey50",lwd=0.5)
})
axis(side=1, at= c(seq(-2100,-200, by=100)), labels=c(seq(-2100,-200, by=100)), cex.axis=1, font=2, las = 1, lwd=2, line=0)
axis(side=2, at= c(seq(-800,600, by=100)), labels=c(seq(-800,600, by=100)), cex.axis=1, font=4, las = 2, lwd=2, line=0)
text(pca1$x[,1], pca1$x[,2],labels=substr(rownames(reflectance), 7,12),
col=as.numeric(as.factor(substr(rownames(reflectance), 14,16))) ,offset=0.5, pos=4, cex = 0.5)
Extract centroid distances:
ordimat <- scale(reflectance[,-c(1:3)], scale = F, center = F) # remove column wiht names
Idata.dist.t <- vegdist(ordimat, method = "euclidean")
centroid_dist <- betadisper(Idata.dist.t, as.factor(reflectance$spec_pop),type='centroid')
test_distances <- cbind(reflectance[,c(1:3)],centroid_dist$distances)
colnames(test_distances)[4] <- "dist"
## remove smallmouth bass (M. dolomieu) since only have one individual for each site
test_distances <- test_distances %>% filter(!str_detect(spec, 'SB'))
test_distances$dist <- log(test_distances$dist) # log-transformed
acol <- aov(dist ~ spec * lake, data= test_distances)
Anova(acol, type='3')