The datasets needed to reproduce this analysis are deposited on Bertinetti et al. 2024. Data from: Visual pigment chromophore usage in Nicaraguan Midas cichlids: Phenotypic plasticity and genetic assimilation of cyp27c1 expression [Dataset] https://doi.org/10.5281/zenodo.10850332
Following R version and packages were used to run this code:
R.version$platform
## [1] "x86_64-apple-darwin20"
R.version$version.string
## [1] "R version 4.4.0 (2024-04-24)"
installed.packages()[names(sessionInfo()$otherPkgs), "Version"]
## ggtext ragg colorspace ggdist ggpubr vegan
## "0.1.2" "1.3.0" "2.1-0" "3.3.2" "0.6.0" "2.6-4"
## lattice permute ggfortify pander devtools usethis
## "0.22-6" "0.9-7" "0.4.17" "0.6.5" "2.4.5" "2.2.3"
## effectsize partR2 rsq lme4 Matrix performance
## "0.8.7" "0.9.2" "2.6" "1.1-35.3" "1.7-0" "0.11.0"
## sjstats visreg pracma gridExtra ggplot2 nlme
## "0.18.2" "2.7.0" "2.4.4" "2.3" "3.5.1" "3.1-164"
## agricolae corrplot Hmisc car carData zoo
## "1.3-7" "0.92" "5.1-2" "3.1-2" "3.0-5" "1.8-12"
## MESS RColorBrewer plotrix tidyr dplyr matrixStats
## "0.5.12" "1.1-3" "3.8-4" "1.3.1" "1.1.4" "1.3.0"
## stringr readr
## "1.5.1" "2.1.5"
## HERE USE setwd() to set your working directory where the data files will be located
#### USEFUL FUNCTION
momdis = function(x) {
c(m = mean(x), med = median(x), s = sd(x), iqr = IQR(x), n = length(x), se = std.error(x))
}
cyp27_adults <- read.table("Gene Expression - Wild and Lab Individuals.csv", header=T, sep =",", dec=".",stringsAsFactors = F) ### READ DATA FOR ADULTS
cyp27_adults$Location <- factor(cyp27_adults$Location,levels=c("SanJuan","Isletas","Apoyo", "LkManagua","Tiscapa","Masaya","AsLeon","Apoyeque", "Xiloa", "AsManagua")) # ALTERNATIVE PLOT ORDER FOR OPSIN EXPRESSION
Performing correlation-based Principals Component Analysis (PCA)
to generate a composite axis that accounts for most of the variation
among photic environments. The raw data is available at Bertinetti et
al. 2023. Data from: Repeated Divergence in Opsin Gene Expression
Mirrors Photic Habitat Changes in Rapidly Evolving Crater Lake Cichlid
Fishes [Dataset]. Dryad. https://doi.org/10.5061/dryad.j3tx95xgk
to50 <- read.table("Photic Parameters.csv", stringsAsFactors=F, header = T, sep=",", dec = ".") [-1]
d1 <- subset(to50, d == 'd_1') # Extracts photic parameters calculated using downwelling irradiance at one meter depth
s1 <- subset(to50, d == 's_1')# Extracts photic parameters calculated using sidewelling irradiance at one meter depth
to50 <-cbind(d1,s1)
rownames(to50) <- to50$loc # Add lakes names as rownames for matrix later one
to50 <- to50[,-c(4:5,7:8,12:16)]
# USING P25 and P75 instead of Interval "Width" makes more sense and gives better results!
colnames(to50) <-c("P50_d","P25_d","P75_d","lux_1","P50_s","P25_s","P75_s") #[,-c(6:11)]
pca1 <- prcomp(to50, scale. = T,center=T) ## doing PCA
summary(pca1) # 0.935% variation PC1
## Importance of components:
## PC1 PC2 PC3 PC4 PC5 PC6 PC7
## Standard deviation 2.558 0.5833 0.2538 0.20101 0.08539 0.04630 0.02223
## Proportion of Variance 0.935 0.0486 0.0092 0.00577 0.00104 0.00031 0.00007
## Cumulative Proportion 0.935 0.9836 0.9928 0.99858 0.99962 0.99993 1.00000
pca1$rotation ## check values
## PC1 PC2 PC3 PC4 PC5 PC6
## P50_d 0.3884146 -0.080822307 0.20405789 0.3984942 -0.37023902 -0.03459849
## P25_d 0.3849762 0.006651055 0.60002497 0.3461262 0.50056690 0.16342188
## P75_d 0.3844217 -0.136312462 -0.50919993 0.4759206 -0.20096420 -0.30033874
## lux_1 -0.3284813 -0.927233825 0.09490456 0.1306572 0.07514075 -0.02184485
## P50_s 0.3859833 -0.230669994 -0.18558499 -0.2668902 -0.21235482 0.79734953
## P25_s 0.3836522 -0.196155004 0.37021534 -0.5794844 -0.30613204 -0.45781105
## P75_s 0.3861027 -0.152952389 -0.39816803 -0.2660728 0.65384379 -0.18989062
## PC7
## P50_d -0.7098526
## P25_d 0.3076391
## P75_d 0.4661158
## lux_1 -0.0116613
## P50_s 0.1061847
## P25_s 0.1953583
## P75_s -0.3669163
pca1$x[,1] # points of samples on x-coordinate (Dim 1) to use for linear model
## Apoyo Xiloa Apoyeque AsManagua AsLeon Masaya Tiscapa
## -4.1099176 -1.7677757 -1.8813743 -1.6728802 -0.8228473 -0.1394389 2.6019994
## Isletas LkManagua SanJuan
## 1.7871344 4.3917467 1.6133535
cyp_adults <-merge(as.data.frame(pca1$x[,1]),cyp27_adults, by.x="row.names",by.y="Location") ## merge to gene expression dataframe
colnames(cyp_adults)[1:2] <-c("Location","Photic")
cyp_adults$logR_cyp27c <- log2(cyp_adults$R_cyp27c) # improve distribution logarithmic scale
a <- cyp_adults[complete.cases(cyp_adults), ] # partialR2 does not like NA's
w <- subset(a, rearing == "wild")
m1.lmer <- lmer(logR_cyp27c ~ Photic + (1|Location), data=w) ## mixed-effect model with random intercept
pander(Anova(m1.lmer, type = 2,test.statistic = "F"))
| F | Df | Df.res | Pr(>F) | |
|---|---|---|---|---|
| Photic | 7.585 | 1 | 8.044 | 0.02477 |
capture.output(Anova(m1.lmer, type=3,test.statistic = "F"), file="log27~P50-random,~1|Location-ONLYWILD.txt")
summary(m1.lmer)
## Linear mixed model fit by REML ['lmerMod']
## Formula: logR_cyp27c ~ Photic + (1 | Location)
## Data: w
##
## REML criterion at convergence: 302.8
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.9242 -0.7128 0.1956 0.6237 2.6691
##
## Random effects:
## Groups Name Variance Std.Dev.
## Location (Intercept) 1.911 1.383
## Residual 2.209 1.486
## Number of obs: 78, groups: Location, 10
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) -1.7981 0.4685 -3.837
## Photic 0.5324 0.1933 2.754
##
## Correlation of Fixed Effects:
## (Intr)
## Photic -0.004
rsq(m1.lmer,adj=T,type=c('v')) #model 0.51 , fixed 0.32 (marginal)
## $model
## [1] 0.6088237
##
## $fixed
## [1] 0.3070833
##
## $random
## [1] 0.3017404
l <- subset(a, rearing == "lab")
### POPULATION DIFF FOR LAB (ANOVA WITH POSTHOCS)
lcyp27<- lm(logR_cyp27c ~ Location, data = l)
Anova(lcyp27, type='2') #<0.001***
(m2 <- HSD.test(lcyp27, trt = c('Location'), unbalanced=T))
## $statistics
## MSerror Df Mean CV
## 1.406482 49 -3.515286 -33.737
##
## $parameters
## test name.t ntr StudentizedRange alpha
## Tukey Location 9 4.587972 0.05
##
## $means
## logR_cyp27c std r se Min Max Q25
## Apoyeque -2.407985 1.0171211 6 0.4841629 -3.529913 -1.1800019 -3.213655
## Apoyo -6.204881 2.0438332 5 0.5303739 -8.446038 -3.6496243 -7.952377
## AsLeon -3.547626 0.1866717 6 0.4841629 -3.861089 -3.3685457 -3.633871
## AsManagua -1.652835 0.8670742 6 0.4841629 -2.625428 -0.2070244 -2.264246
## Isletas -1.923736 1.7610787 8 0.4192974 -3.395378 1.9903505 -3.068703
## LkManagua -1.997903 1.0588193 8 0.4192974 -2.976979 -0.1822931 -2.702620
## Masaya -1.614507 0.7621970 5 0.5303739 -2.560767 -0.4318901 -1.759217
## Tiscapa -2.681419 1.2106888 6 0.4841629 -4.726166 -1.5327632 -3.109341
## Xiloa -8.959667 0.7457053 8 0.4192974 -9.635348 -7.7808874 -9.635348
## Q50 Q75
## Apoyeque -2.557807 -1.532033
## Apoyo -6.221614 -4.754753
## AsLeon -3.489097 -3.419121
## AsManagua -1.632946 -1.416227
## Isletas -2.325379 -1.915405
## LkManagua -2.593093 -1.133979
## Masaya -1.708530 -1.612129
## Tiscapa -2.544291 -1.718556
## Xiloa -9.029485 -8.465720
##
## $comparison
## NULL
##
## $groups
## logR_cyp27c groups
## Masaya -1.614507 a
## AsManagua -1.652835 a
## Isletas -1.923736 a
## LkManagua -1.997903 a
## Apoyeque -2.407985 a
## Tiscapa -2.681419 a
## AsLeon -3.547626 a
## Apoyo -6.204881 b
## Xiloa -8.959667 c
##
## attr(,"class")
## [1] "group"
plot(m1.lmer)
cyp27_juve <- read.table("Gene Expression - Light Experiments.csv", header=T, sep =",", dec=".",stringsAsFactors = T)
cyp27_juve$logRcyp27 <- log2(cyp27_juve$R_cyp27c1) ### NON LOG SCALE IS HIHGLY SKEWED
treat_col <- c("#3288BD","#9E0142")
## ARRANGE DATA
cyp27_juve<- cyp27_juve %>%
unite("combi", Spec, Treat, sep="", remove=F)
cyp27_juve$combi <- as.factor(cyp27_juve$combi)
cyp27_juve$combi<- factor(cyp27_juve$combi,levels=c("ASTRL","ASTWL","SAGRL","SAGWL","CITRL", "CITWL"))
## ANOVA
acyp<- aov(logRcyp27 ~ Spec * Treat, data = cyp27_juve)
pander(Anova(acyp, type='3'))
| Sum Sq | Df | F value | Pr(>F) | |
|---|---|---|---|---|
| (Intercept) | 1051 | 1 | 557.3 | 5.951e-64 |
| Spec | 19.85 | 2 | 5.261 | 0.005817 |
| Treat | 6.567 | 1 | 3.481 | 0.06334 |
| Spec:Treat | 26.96 | 2 | 7.144 | 0.0009726 |
| Residuals | 443.4 | 235 | NA | NA |
### Tukey HSD post hoc
acyp27 <- aov(logRcyp27 ~ combi, data = cyp27_juve)
(m4 <- HSD.test(acyp27, trt = c('combi'), unbalanced=T))
## $statistics
## MSerror Df Mean CV
## 1.886692 235 -6.041902 -22.73405
##
## $parameters
## test name.t ntr StudentizedRange alpha
## Tukey combi 6 4.063582 0.05
##
## $means
## logRcyp27 std r se Min Max Q25 Q50
## ASTRL -5.192185 0.5906257 39 0.2199471 -7.020926 -4.287712 -5.496583 -5.132894
## ASTWL -5.765375 0.9923818 41 0.2145155 -8.587273 -4.532825 -6.333516 -5.680382
## CITRL -5.931758 1.6605490 41 0.2145155 -8.480357 -2.625044 -7.673003 -5.930160
## CITWL -7.290927 1.5935560 41 0.2145155 -9.828281 -3.803897 -8.532825 -7.333516
## SAGRL -6.154514 1.9554877 39 0.2199471 -12.287712 -3.754383 -6.500534 -5.629501
## SAGWL -5.876667 0.9273730 40 0.2171804 -8.243318 -4.115285 -6.290743 -5.710303
## Q75
## ASTRL -4.723594
## ASTWL -4.840629
## CITRL -4.729292
## CITWL -6.345198
## SAGRL -5.083174
## SAGWL -5.333655
##
## $comparison
## NULL
##
## $groups
## logRcyp27 groups
## ASTRL -5.192185 a
## ASTWL -5.765375 ab
## SAGWL -5.876667 ab
## CITRL -5.931758 ab
## SAGRL -6.154514 b
## CITWL -7.290927 c
##
## attr(,"class")
## [1] "group"