Section 17 Conserved Paralog Substitution
This part of code collects the output of 06-ComplexesCoExpressionMSData, it collect all together and binds creating a dataset with all the stoichiometric co-expression for different subunits.
17.1 Load Parameters and Library
In this first part of the script as usually library and functions are called, plus information on how to retrieve the different files that contains the stoichiometric values for different subunits.
#Change Directory
setwd("../ComplexScript/")
#Paralogs Palette
ParalogsColor <- c("#e6d5b8","#0278ae")
######################################
# PARAMETERS
######################################
library(VennDiagram)
library(RColorBrewer)
library(ComplexHeatmap)
library(circlize)
library(spatialR)
library(dplyr)
library(cowplot)
library(ggpubr)
library(rstatix)
library(ggrepel)
source("../ComplexScript/complexes_function.R")First we read the output files generated by the complexes co-expression analysis, the output of the 07-ParalogsSubtitution.Rmd. That indicates paralog pairs inside MS neuronal differentiation data. While the results of the co-expression of subunits are stored inside the InputDir.stability
#Get the switch File, compare all the ParalogSwitch File in the Out/ directory
InputDir = "../out/paralogs/"
InputDir.stability = "../out/complex_coexpr/"
Files = list.files(InputDir)
CoexprFiles <- list.files(InputDir.stability)We define the different conditions of the comparison in order to grep them in an efficient way and avoid taking different files.
#All subunits
Comps <- c("Neur.Stem","NPC.iPS","Neu.IPS","Neu.NPC",
"DIV3.DIV0","DIV10.DIV0","DIV10.DIV3","DIV5.DIV1","DIV14.DIV1","DIV14.DIV5")We then collect the info from different species, in a dataframe called Dataset. The SwitchFile variables contains the filename of the different files that stores the comparison of paralog pairs that are part of protein complexes during neuronal differentiation.
Species = c("drerio","rnorvegicus","hsapiens","mmusculus","drerio")
Dataset = c("Drerio","Frese","Djuric","MouseTMT","DrerioRNASeq")
Dataset = data.frame(Dataset = Dataset,Species=Species)
rownames(Dataset) = Dataset$Dataset
SwitchFile = Files[grep("PutativeSwitch",Files)]
Stability = CoexprFiles[grep("subunits_stability",CoexprFiles)]We now select only file names that matches our comparison that we want to consider. This in order to avoid any different files to be considered in our analysis.
SwitchFile <- SwitchFile[grep(paste(Comps,collapse = '|'),SwitchFile)]
Stability <- Stability[grep(paste(Comps,collapse = '|'),Stability)]Here we read all the files from the output of 06-ComplexesCoExpressionMSData and we collect them inside the ParalogsList variable. This contains the protein-coexpression values for each subunits in the different complexes, comparing paralog pairs. While StabilityList just contains the coexpression values for single subunits across all datasets.
#Change Directory
setwd("../ComplexScript/")
#********************************************
#********************************************
#RUN THIS PART AFTER SETTING PARAMETERS
#ParalogsList
#Load Files
ParalogsList = lapply(SwitchFile,function(x){read.csv(paste(InputDir,x,sep=""),stringsAsFactors=FALSE)})
names(ParalogsList) = SwitchFile
#StabilityList
StabilityList = lapply(Stability,function(x){read.csv(paste(InputDir.stability,x,sep=""),stringsAsFactors=FALSE)})
names(StabilityList) = StabilityWe first bind together the information coming from the complexes co-expression analysis. We extract the information coming from the species and conditions, and finally we also order them as factor, based on the Comps variable, for future plotting purpose.
AllSubunits <- do.call(rbind.data.frame,StabilityList)
AllSubunits$species <- unlist(lapply(strsplit(rownames(AllSubunits),"_"),function(x)x[1]))
AllSubunits$species <- Dataset[AllSubunits$species,"Species"]
AllSubunits$cond <- factor(AllSubunits$cond,levels = Comps)17.2 Paralogs Subunits variability across datasets
We can store the information coming from the subunits co-expression analysis from the different datasets.
hsap <- ggplot(data=AllSubunits[AllSubunits$species=="hsapiens",]) + geom_density(aes(x=pvalue)) + facet_wrap(~cond) + theme_bw()
mmus <- ggplot(data=AllSubunits[AllSubunits$species=="mmusculus",]) + geom_density(aes(x=pvalue)) + facet_wrap(~cond)+ theme_bw()
rnor <- ggplot(data=AllSubunits[AllSubunits$species=="rnorvegicus",]) + geom_density(aes(x=pvalue)) + facet_wrap(~cond) + theme_bw()
drer <- ggplot(data=AllSubunits[AllSubunits$species=="drerio",]) + geom_density(aes(x=pvalue)) + facet_wrap(~cond) + theme_bw()17.3 Paralogs Subunits variability across datasets ( Boxplot)
Here we can then plot the stoichiometric stability of each subunits, (the pvalue) column. Low pvalues denote high co-expression, and therefore high pvalues denotes high stoichiometric variability.
#Add test information
AllSubunits.test <- AllSubunits %>% group_by(species,cond) %>%
wilcox_test(pvalue ~ has.Paralogs) %>%
add_significance("p") %>% add_xy_position(x = "cond") %>%
group_by(species) %>% mutate(x.pos = 1:n()) %>% ungroup()
pdf("../out/plots/p09Cons.pdf",width = 6.5,height = 3.4)
p09Cons <- ggplot(data=AllSubunits) +
geom_boxplot(aes(x=cond,y=pvalue,fill=has.Paralogs),alpha=0.4) +
facet_wrap(~species,ncol=2,scales = "free") +
stat_pvalue_manual(AllSubunits.test,x = 'x.pos',tip.length = 0.5,position = position_dodge(0.8),coord.flip = T) +
theme_bw() + scale_y_continuous(expand = expansion(mult = c(0.05, 0.1))) + coord_flip() +
scale_fill_manual(values = ParalogsColor) + xlab("") +
ylab('stoichiometric stability (p.value)') + theme(legend.position = 'top')
p09Cons
dev.off()## png
## 2
17.4 save workspace
We then save the workspace for future analysis.
save.image("../Markdown/workspaces/08-ConservedSubstitution_CompleteWorkSpace.RData")17.5 load previous workspace
load("../Markdown/workspaces/05-MSProteomicsData_ParalogsCompleteWorkspace.RData")17.7 Calculate Paralog Pairs ratios
Here we calculate paralog ratios inside the different dataset using the CalculateParalogRatios function. This function takes in input a dataset with protein intensity and a ParalogSet object. It will first calculate all possible paralog pairs present in the data, then it will compute for each paralog pairs, ratios intended as Log2(Paralog1) - Log2(Paralog2) = Log(Paralog1/Paralog2). Then the ratios from each condition are tested for differences with a differential expression analysis done with limma. In this way we can define if the ratio between a specific paralog pairs increase or decrease across conditions. Conditions to extract are declared in with the exprsparameter.
17.7.1 Human
We do this from the human dataset. We use eggNOGPair function to map each gene pair to an eggNOG pair. The function eggNOGPair takes in input a dataframe, and with the pair parameters we indicates the column name where the paralog pairs are present. To remove any redundancy in the dataset we also restrinct just to unique rows in all the matrix. We also remove pairs that was not possible to map to an eggNOG.
Hsapiens.Ratios <- CalculateParalogsRatios(Paralogs.hsapiens,Djuric,
idcol = "Gene.Symbol",
cond_col = c("iPS37","NPC37","Neu37"),
exprs = "iPS37,NPC37,Neu37,NPC37-iPS37,Neu37-iPS37",
use = "pair")## Warning: Partial NA coefficients for 2770 probe(s)
## Warning: Zero sample variances detected, have been offset away from zero
#Results matrix
Hsapiens.Ratios.m <- Hsapiens.Ratios$ratios
Hsapiens.Ratios.m$species <- "hsapiens"
#Add Hsapiens reference pairs
Pairs.hsap <- strsplit(Hsapiens.Ratios.m$pairs,split = " / ")
Pairs.hsap <- do.call(rbind.data.frame,Pairs.hsap);colnames(Pairs.hsap) <-c("p.x","p.y")
Hsapiens.Ratios.m <- cbind.data.frame(Pairs.hsap,Hsapiens.Ratios.m)
#Add eggNOG annotation
Hsapiens.Ratios.m <- eggNOGPair(Hsapiens.Ratios.m,pairs = c("p.x","p.y"),
eggNOG = eggNOG.Hsap) %>% unique()
#Remove Paralog Pairs with NA
Hsapiens.Ratios.m <-Hsapiens.Ratios.m[-grep("NA",Hsapiens.Ratios.m$eggNOG.pair),]17.7.2 Zebrafish
We do this from the zebrafish dataset. We use eggNOGPair function to map each gene pair to an eggNOG pair. The function eggNOGPair takes in input a dataframe, and with the pair parameters we indicates the column name where the paralog pairs are present. To remove any redundancy in the dataset we also restrinct just to unique rows in all the matrix. We also remove pairs that was not possible to map to an eggNOG.
Drerio.Ratios <- CalculateParalogsRatios(Paralogs.drerio,Drerio,
idcol = "genename",
cond_col = c("iBAQ.L","iBAQ.H"),
exprs = c("iBAQ.L,iBAQ.H,iBAQ.H-iBAQ.L"),
use = "pair") ## Warning: Partial NA coefficients for 56 probe(s)
## Warning: Zero sample variances detected, have been offset away from zero
#Results matrix
Drerio.Ratios.m <- Drerio.Ratios$ratios
Drerio.Ratios.m$species <- "drerio"
#Add Hsapiens reference pairs
Pairs.drerio <- strsplit(Drerio.Ratios.m$pairs,split = " / ")
Pairs.drerio <- do.call(rbind.data.frame,Pairs.drerio);colnames(Pairs.drerio) <-c("p.x","p.y")
Drerio.Ratios.m <- cbind.data.frame(Drerio.Ratios.m,Pairs.drerio)
#Add eggNOG annotation
Drerio.Ratios.m <- eggNOGPair(Drerio.Ratios.m,pairs = c("p.x","p.y"),
eggNOG = eggNOG.Drer) %>% unique()
Drerio.Ratios.m <-Drerio.Ratios.m[-grep("NA",Drerio.Ratios.m$eggNOG.pair),]17.7.3 Mouse
We do this from the mouse dataset. We use eggNOGPair function to map each gene pair to an eggNOG pair. The function eggNOGPair takes in input a dataframe, and with the pair parameters we indicates the column name where the paralog pairs are present. To remove any redundancy in the dataset we also restrinct just to unique rows in all the matrix. We also remove pairs that was not possible to map to an eggNOG.
Mouse.Ratios <- CalculateParalogsRatios(Paralogs.mouse,Mouse,
idcol = "SYMBOL",
cond_col = c("DIV0","DIV3","DIV10"),
exprs = c("DIV0,DIV3,DIV10,DIV3-DIV0,DIV10-DIV0"),
use = "pair")## Warning: Zero sample variances detected, have been offset away from zero
#Results matrix
Mouse.Ratios.m <- Mouse.Ratios$ratios
Mouse.Ratios.m$species <- "mmusculus"
#Add Hsapiens reference pairs
Pairs <- strsplit(Mouse.Ratios.m$pairs,split = " / ")
Pairs <- do.call(rbind.data.frame,Pairs);colnames(Pairs) <-c("p.x","p.y")
#Ratios
Mouse.Ratios.m <- cbind.data.frame(Mouse.Ratios.m,Pairs)
#Add eggNOG annotation
Mouse.Ratios.m <- eggNOGPair(Mouse.Ratios.m,pairs = c("p.x","p.y"),
eggNOG = eggNOG.Mmus) %>% unique()
Mouse.Ratios.m <-Mouse.Ratios.m[-grep("NA",Mouse.Ratios.m$eggNOG.pair),]17.7.4 Rat
We do this from the mouse rat. We use eggNOGPair function to map each gene pair to an eggNOG pair. The function eggNOGPair takes in input a dataframe, and with the pair parameters we indicates the column name where the paralog pairs are present. To remove any redundancy in the dataset we also restrinct just to unique rows in all the matrix. We also remove pairs that was not possible to map to an eggNOG.
Frese.Ratios <- CalculateParalogsRatios(Paralogs.rnorvegi,Frese,
idcol = "SYMBOL",
cond_col = c("DIV01","DIV5","DIV14"),
exprs = c("DIV01,DIV5,DIV14,DIV5-DIV01,DIV14-DIV01"),
use = "pair")## Warning: Partial NA coefficients for 24 probe(s)
## Warning: Zero sample variances detected, have been offset away from zero
#Results matrix
Frese.Ratios.m <- Frese.Ratios$ratios
Frese.Ratios.m$species <- "rnorvegicus"
#Add Hsapiens reference pairs
Pairs <- strsplit(Frese.Ratios.m$pairs,split = " / ")
Pairs <- do.call(rbind.data.frame,Pairs);colnames(Pairs) <-c("p.x","p.y")
#Ratios
Frese.Ratios.m <- cbind.data.frame(Frese.Ratios.m,Pairs)
#Add eggNOG annotation
Frese.Ratios.m <- eggNOGPair(Frese.Ratios.m,pairs = c("p.x","p.y"),
eggNOG = eggNOG.Rnor) %>% unique()
Frese.Ratios.m <-Frese.Ratios.m[-grep("NA",Frese.Ratios.m$eggNOG.pair),]17.9 Show Some Substitutions
We use the function PlotParalogsRatios to plot the ratios of different paralog pairs across datasets.
17.9.1 COPII
#Make a functions out of it
P <- c("SEC23A","SEC23B")
p1.sec <- PlotParalogsRatios(P,Mouse.Ratios)
p2.sec <- PlotParalogsRatios(P,Hsapiens.Ratios)
p3.sec <- PlotParalogsRatios(P,Drerio.Ratios)
p4.sec <- PlotParalogsRatios(P,Frese.Ratios)
sec23 <- gridExtra::grid.arrange(p1.sec$final,p2.sec$final,p3.sec$final,p4.sec$final,nrow=1)pdf("../out/figures/Fig3/Sec23ab_ratios.pdf",width = 9.8,height = 4.7)
grid.draw(sec23)
dev.off()## png
## 2
17.9.2 VPS26A / VPS26B
#Make a functions out of it
P <- c("VPS26B","VPS26A")
p1 <- PlotParalogsRatios(P,Mouse.Ratios)
p2 <- PlotParalogsRatios(P,Hsapiens.Ratios)
p3 <- PlotParalogsRatios(P,Drerio.Ratios)
p4 <- PlotParalogsRatios(P,Frese.Ratios)
vps <- gridExtra::grid.arrange(p1$final,p2$final,p3$final,p4$final,nrow=1)17.9.3 RAB14 / RAB8A
#Make a functions out of it
P <- c("RAB14","RAB8A")
p1 <- PlotParalogsRatios(P,Mouse.Ratios)
p2 <- PlotParalogsRatios(P,Hsapiens.Ratios)
p3 <- PlotParalogsRatios(P,Drerio.Ratios)
p4 <- PlotParalogsRatios(P,Frese.Ratios)
rabs <- gridExtra::grid.arrange(p1$final,p2$final,p3$final,p4$final,nrow=1)
grid::grid.draw(rabs)17.9.4 DYNC1LI1 / DYNC1LI2
#Make a functions out of it
P <- c("DYNC1LI1","DYNC1LI2")
p1 <- PlotParalogsRatios(P,Mouse.Ratios)
p2 <- PlotParalogsRatios(P,Hsapiens.Ratios)
p3 <- PlotParalogsRatios(P,Drerio.Ratios)
p4 <- PlotParalogsRatios(P,Frese.Ratios)
dynein <- gridExtra::grid.arrange(p1$final,p2$final,p3$final,p4$final,nrow=1)
grid::grid.draw(dynein)17.9.5 TARDBP / SF3B4
#Make a functions out of it
P <- c("TARDBP","SF3B4")
p1 <- PlotParalogsRatios(P,Mouse.Ratios)
p2 <- PlotParalogsRatios(P,Hsapiens.Ratios)
p3 <- PlotParalogsRatios(P,Drerio.Ratios)
p4 <- PlotParalogsRatios(P,Frese.Ratios)
tardbp <- gridExtra::grid.arrange(p1$final,p2$final,p3$final,p4$final,nrow=1)
grid::grid.draw(tardbp)17.10 Save workspace
We save now the complete workspace of the analysis.
save.image("../Markdown/workspaces/09-SharedSubstitution_CompleteWorkspace.RData")17.11 Fig2
We assemble the Figures, using the cowplot library.
load("../Markdown/workspaces/05-MSProteomicsData_ParalogsCompleteWorkspace.RData")
load("../Markdown/workspaces/08-ConservedSubstitution_CompleteWorkSpace.RData")
load("../Markdown/workspaces/09-SharedSubstitution_CompleteWorkspace.RData")Starting from the figure2.
#Modify facetting
p09Cons.long <- p09Cons + facet_wrap(~species,nrow=1,scales = "free")
#Arrange plots
top.block <- plot_grid(p01MSData,p09Cons,ncol=2,labels = c("A","B"))
top.block <- plot_grid(p02MSData,p01MSData,ncol = 2,labels = c('A','B'))## Warning: Removed 1703 rows containing non-finite values (stat_boxplot).
btt.block <- plot_grid(p09Cons.long,labels='C')
#Align
plots <- align_plots(p01MSData,p09Cons.long,align = 'hv',axis = 'lr')
#Fig2
Fig2 <- plot_grid(top.block,plots[[2]],nrow=2,rel_heights = c(1,0.55),
labels = c('','C'))pdf(paste("../out/figures/Fig2/Fig2Draft_",Sys.Date(),".pdf",sep = ''),
width = 10,height = 5.4)
Fig2
dev.off()## png
## 2
Fig217.12 Fig3
And Figure 3
heatmap = grid.grabExpr(ComplexHeatmap::draw(heatmap))
heatmap.sub = grid.grabExpr(ComplexHeatmap::draw(heatmap.sub))
Fig3 <- plot_grid(heatmap,heatmap.sub,sec23,nrow = 3,rel_heights = c(0.5,0.8,0.8),
labels = c("A","","C"))pdf(paste("../out/figures/Fig3/","Fig3_",Sys.Date(),".pdf",sep = ""),width = 11.8,height = 7.6)
Fig3
dev.off()## png
## 2
Fig317.13 Supp Fig 5
fname <- paste('../out/figures/FigSupp5/SuppFig5_Updated_',Sys.Date(),'.pdf',
sep = '')
pdf(fname,width = 7.10,height = 15)
FigSupp5 <- plot_grid(vps,dynein,rabs,nrow = 4,labels = c('A','B','C'))
FigSupp5
dev.off()## png
## 2
17.13.1 Fig Supp 8,9,10, 11
FigSupp8 <- pca.ratio
FigSupp9 <- vps
FigSupp10 <- rabs
FigSupp11 <- dyneinpdf(paste("../out/figures/FigSupp8/","FigSupp8_",Sys.Date(),".pdf",sep = ""),width = 6.51,height = 5.3)
grid.draw(FigSupp8)
dev.off()## png
## 2
pdf(paste("../out/figures/FigSupp9/","FigSupp9",Sys.Date(),".pdf",sep = ""),width = 9.8,height = 4.7)
grid.draw(FigSupp9)
dev.off()## png
## 2
pdf(paste("../out/figures/FigSupp10/","FigSupp10",Sys.Date(),".pdf",sep = ""),width = 9.8,height = 4.7)
grid.draw(FigSupp10)
dev.off()## png
## 2
pdf(paste("../out/figures/FigSupp11/","FigSupp11",Sys.Date(),".pdf",sep = ""),width = 9.8,height = 4.7)
grid.draw(FigSupp11)
dev.off()## png
## 2
17.14 Write Tables
17.14.1 Table S6
l <- list('Subunits Co-Expression' = AllSubunits)
#Write table
openxlsx::write.xlsx(l,'../out/tables/Table_S6.xlsx')17.14.2 Table S7
l <- list('AllParalogs'=All.Sub.comp,
'Shared Paralogs' = Reliable.Sub)
#Write table
openxlsx::write.xlsx(l,'../out/tables/Table_S7.xlsx')