# packages
require(lmerTest)
require(tidyverse)
5 Evolutionary potential of seed sources
5.1 Data
<- read.csv("./data/tnc_spring_2022.csv", header=TRUE)
data <- read.csv("./data/plot_coordinates.csv", header=TRUE)
plot_cor
# add coordinates
<- merge(data,plot_cor[,c("Replication","POINT_X","POINT_Y")]) data
5.1.1 Calculations
phenotypic variance (Vp) = variance of phenotypic trait observed
- Vp = var(model residuals x model intercept)
Additive genetic variance (Va) = heritability of the trait x phenotypic variance (Vp)
- Va = H2 x Vp
Phenotypic variance (\(V_{P}\)) = variance of phenotypic trait observed
- \(V_{P}\) = var(model residuals x model intercept)
Additive genetic variance (Va) = heritability of the trait x phenotypic variance (Vp)
\[V_{A} = h^{2}.{V_{P}}\] Where,
\(V_{A}\) is additie genetic variance,
\(h^{2}\) is narrow sense heritability,
\(V_{P}\) is phenotypic varianceCoefficent of Evolvability (\(CV_{A}\)) = the evolutionary potential of a trait to evolve
\[CV_{A} = \frac{\sqrt{V_{A}}}{\overline{X}}\]
Where,
\(e_{\mu}\) is evolvability,
\(V_{A}\) is additie genetic variance,
\(\overline{X}\) is phenotypic mean
We however use broad sense heritability (\(H^{2} = V_{G}/V_{P}\)) in our calculation, thus we take \(V_{G}\) for calculating evolvability instead of \(V_{A}\).
\[CV_{G} = \frac{\sqrt{V_{G}}}{\overline{X}}\]
5.2 Heritability
5.2.1 MCMCglmm model
## note
# gw - growth
# B - lmer family blups (means)
require(lmerTest)
require(dplyr)
require(tidyr)
require(MCMCglmm)
# set priors
<- list(R=list(V=1, nu=0.002), # R - prior on residual variance
Prior G=list(G1=list(V=1, nu=0.002), # G prior for random variance # G1 = for first random effect, here its Source
G2=list(V=1, nu=0.002))) # G2 = for second random effect, here its Replication
# Common garden data
# height
<- read.csv("./data/MCMCglmm/Growth_2020.csv", stringsAsFactors = T)
growth_2020 $mBed <- paste0(growth_2020$Garden,"_",growth_2020$Bed)
growth_2020
# Meta data
<- read.table("./data/RS_Exome_metadata.txt", sep="\t",header=T)
Meta # remove family YRB_01
<- Meta[Meta$Family!="YRB_01",]
Meta
<- as.data.frame(Meta$Family);colnames(meta)[1] <- "Family"
meta nrow(meta)
[1] 339
# height 2019
<- growth_2020 %>% filter(Region=="Edge")
growth_edge length(unique(growth_edge$Family)) # no. of familes considered
[1] 110
length(unique(growth_edge$Population)) # no. of populations considered
[1] 23
# per garden site
# MD
<- growth_2020 %>% filter(Garden=="Maryland")
growth_edge_MD colnames(growth_edge_MD)[19] <- "Height"
# One year height growth
<- MCMCglmm(Height ~ 1,
OneYear_height_mod_edge_MD random= ~ Family + Population,
family="gaussian",
data=growth_edge_MD,
prior=Prior, pr=TRUE, burnin=10000 , nitt=100000000 , thin=1000)
# saveRDS(OneYear_height_mod_edge_MD,"/home/Anoob/mydata/Anoob/TNC/OUTPUT/MCMCglmm/OneYear_height_mod_edge_MD_2019")
5.2.2 Estimate heritability based on MCMCglmm output
## A) One year height
# One year height
<- readRDS("./OUTPUT/MCMCglmm/OneYear_height_mod_edge_MD_2019")
OneYear_height_mod_edge_MD # heritability
<- (2*(OneYear_height_mod_edge_MD$VCV[,"Family"]) + OneYear_height_mod_edge_MD$VCV[,"Population"])/rowSums(OneYear_height_mod_edge_MD[["VCV"]])
OneYear_height_mod_edge_MD_H2
posterior.mode(OneYear_height_mod_edge_MD_H2)
5.3 Part I: Bootstrapping \(CV_{G}\)
# function to bootstrap the CVg estiamtes per region
<- function(data,n,model_test,maxiter,H2_val){
Evol_boot
for (i in 1:maxiter) {
#Creating a resampled dataset from the sample data
= data[sample(1:nrow(data), size=n, replace = TRUE),]
subset_data
#Running the regression on these data
<- lmer(model_test, data = subset_data)
model
# pooled
$pooledVp <- predict(model)
subset_data
# model predictions
<- subset_data
VP_temp_tot <- VP_temp_tot$pooledVp
VP_temp_tot
# remove outliers
<- quantile(VP_temp_tot, probs=c(.25, .75), na.rm = FALSE)
quartiles <- IQR(VP_temp_tot)
IQR
<- quartiles[1] - 1.5*IQR
Lower <- quartiles[2] + 1.5*IQR
Upper
# outliers removed
<- subset(VP_temp_tot, VP_temp_tot > Lower & VP_temp_tot < Upper)
VP_temp_tot
# pooled results
<- data.frame(Run=i,
final_df Source = "Pooled",
Vp = var(VP_temp_tot),
mean = mean(VP_temp_tot),
H2 = H2_val,
Va = var(VP_temp_tot)*H2_val,
CVa = sqrt(var(VP_temp_tot)*H2_val)/mean(VP_temp_tot))
# Append to dataframe
<- rbind(output, final_df)
output
}return(output)
}
# input vals
<- 0.396 # heritability for height
H2_val <- Height ~ 1 + Source + (1|Replication)
model_test <- NULL
output
<- data %>% filter(Region=="Maryland")
MD_dat <- data %>% filter(Region=="West_Virginia")
WV_dat <- data %>% filter(Region=="Virginia")
VA_dat
# boot strap the CVg for the regions
set.seed(123) # set seed
<- Evol_boot(data=MD_dat,model_test=model_test,
MD_boot_CVG n=200,maxiter=1000,H2_val=H2_val)
<- Evol_boot(data=WV_dat,model_test=model_test,
WV_boot_CVG n=200,maxiter=1000,H2_val=H2_val)
<- Evol_boot(data=VA_dat,model_test=model_test,
VA_boot_CVG n=200,maxiter=1000,H2_val=H2_val)
5.4 Part II: Data curation
Genetic diversity and Genetic load
<- readRDS("./data/Genetic_diversity_and_Genetic_load/GDGL_list")
GDGL_list <- GDGL_list$Maryland_GDGL
MD_GDGL <- GDGL_list$West_Virginia_GDGL
WV_GDGL <- GDGL_list$Virginia_GDGL
VA_GDGL
<- MD_GDGL[-1,]
MD_GDGL <- WV_GDGL[-1,]
WV_GDGL <- VA_GDGL[-1,] VA_GDGL
GD:GL per source
<- read.csv("./data/Genetic_diversity_and_Genetic_load/maryland_GDGL_per_source.csv");md_gdgl <- md_gdgl[-1,];colnames(md_gdgl)[1] <- "Source"
md_gdgl <- read.csv("./data/Genetic_diversity_and_Genetic_load/west_virginia_GDGL_per_source.csv");wv_gdgl <- wv_gdgl[-1,];colnames(wv_gdgl)[1] <- "Source"
wv_gdgl <- read.csv("./data/Genetic_diversity_and_Genetic_load/virginia_GDGL_per_source.csv");va_gdgl <- va_gdgl[-1,];colnames(va_gdgl)[1] <- "Source" va_gdgl
# Mean CVg
<- MD_boot_CVG %>%
CVg_pop_MD mutate(mean_CVg = mean(CVa)) %>%
mutate(SD = sd(CVa)) %>%
ungroup()
$Region <- "Maryland"
CVg_pop_MD$Source <- "Pooled"
CVg_pop_MD$Vp <- NA
CVg_pop_MD$mean <- NA
CVg_pop_MD$H2 <- NA
CVg_pop_MD$Va <- NA
CVg_pop_MD<- unique(CVg_pop_MD[,c("Region","Source","Vp","mean","H2", "Va","mean_CVg","SD")]) CVg_pop_MD
# Mean CVg
<- WV_boot_CVG %>%
CVg_pop_WV mutate(mean_CVg = mean(CVa)) %>%
mutate(SD = sd(CVa)) %>%
ungroup()
$Region <- "West_Virginia"
CVg_pop_WV$Source <- "Pooled"
CVg_pop_WV$Vp <- NA
CVg_pop_WV$mean <- NA
CVg_pop_WV$H2 <- NA
CVg_pop_WV$Va <- NA
CVg_pop_WV<- unique(CVg_pop_WV[,c("Region","Source","Vp","mean","H2", "Va","mean_CVg","SD")]) CVg_pop_WV
# Mean CVg
<- VA_boot_CVG %>%
CVg_pop_VA mutate(mean_CVg = mean(CVa)) %>%
mutate(SD = sd(CVa)) %>%
ungroup()
$Region <- "Virginia"
CVg_pop_VA$Source <- "Pooled"
CVg_pop_VA$Vp <- NA
CVg_pop_VA$mean <- NA
CVg_pop_VA$H2 <- NA
CVg_pop_VA$Va <- NA
CVg_pop_VA<- unique(CVg_pop_VA[,c("Region","Source","Vp","mean","H2", "Va","mean_CVg","SD")])
CVg_pop_VA
<- rbind(CVg_pop_MD,CVg_pop_WV,CVg_pop_VA)
CVg_pooled colnames(CVg_pooled)[7] <- "CVg"
<- read.csv("./data/CVa_TNC_H2")
CVg_table <- CVg_table[CVg_table$Source!="Pooled",]
CVg_table colnames(CVg_table)[7] <- "CVg"
$SD <- NA
CVg_table
<- rbind(CVg_table,CVg_pooled) CVg_comb
# merge data
<- merge(CVg_comb %>% filter(Region=="Maryland"),md_gdgl,all.x=T)
CVg_pop_MD $Source=="Pooled",9] <- round(MD_GDGL["XCS_XDS_XPK",]$Hexp,5)
CVg_pop_MD[CVg_pop_MD$Source=="Pooled",10] <- round(MD_GDGL["XCS_XDS_XPK",]$GenLoad,5)
CVg_pop_MD[CVg_pop_MD$GDGL <- CVg_pop_MD$Hexp/CVg_pop_MD$GenLoad
CVg_pop_MD
<- merge(CVg_comb %>% filter(Region=="West_Virginia"),wv_gdgl,all.x=T)
CVg_pop_WV $Source=="Pooled",9] <- round(WV_GDGL["XCS_XDS_XPK_XSK",]$Hexp,5)
CVg_pop_WV[CVg_pop_WV$Source=="Pooled",10] <- round(WV_GDGL["XCS_XDS_XPK_XSK",]$GenLoad,5)
CVg_pop_WV[CVg_pop_WV$GDGL <- CVg_pop_WV$Hexp/CVg_pop_WV$GenLoad
CVg_pop_WV
<- merge(CVg_comb %>% filter(Region=="Virginia"),va_gdgl,all.x=T)
CVg_pop_VA $Source=="Pooled",9] <- round(VA_GDGL["BFA_KOS_XDS_XPK",]$Hexp,5)
CVg_pop_VA[CVg_pop_VA$Source=="Pooled",10] <- round(VA_GDGL["BFA_KOS_XDS_XPK",]$GenLoad,5)
CVg_pop_VA[CVg_pop_VA$GDGL <- CVg_pop_VA$Hexp/CVg_pop_VA$GenLoad
CVg_pop_VA
<- rbind(CVg_pop_MD,CVg_pop_WV,CVg_pop_VA)
CVg_gdgl <- CVg_gdgl %>% mutate(across(where(is.numeric), ~round(., 3)))
CVg_gdgl
$Region <- as.factor(CVg_gdgl$Region)
CVg_gdgl$Source <- as.factor(CVg_gdgl$Source)
CVg_gdgl
$Region <- factor(CVg_gdgl$Region, levels = c("Maryland","West_Virginia","Virginia"))
CVg_gdgl
$Source <- factor(CVg_gdgl$Source, levels = c("BFA","KOS","XCS","XDS","XPK","XSK","Pooled"))
CVg_gdgl
::datatable(CVg_gdgl, rownames = T, options = list(pageLength = 14, dom = 'tip'),
DTfilter = 'top')
5.5 Part III: Visualization
# dim 800w 780h jpg 7.8h 8.0w pdf
<- ggplot(CVg_gdgl, aes(x= GDGL, y = CVg, label = Source)) +
CVg_gdgl_plot geom_point(aes(color = Source,
size = 4, alpha = 0.8)) +
scale_color_manual(values = c("#FFD92F","#FC8D59","#D73027","#1B9E77","#1F78B4","#E78AC3","black"))+
geom_errorbar(aes(ymin=CVg-SD,ymax=CVg+SD, color=Source)) +
# theme
ylab(bquote(Evolvability~(CV[G]))) +
xlab("Genetic diversity:Genetic load") +
theme_bw(base_size = 11, base_family = "Times") +
theme(axis.text=element_text(size=14),
axis.title=element_text(size=18),
panel.background = element_blank(),
legend.background = element_blank(),
panel.grid = element_blank(),
plot.background = element_blank(),
legend.title = element_text(size=15),
legend.text=element_text(size=15),
strip.text = element_text(size=30))
+ guides(alpha = "none", size = "none") + facet_grid(Region~.) CVg_gdgl_plot