Section 17 Conserved Paralog Substitution
This part of code collects the output of 06-ComplexesCoExpressionMSData
, it collect all together and binds creating a dataset with all the stoichiometric co-expression for different subunits.
17.1 Load Parameters and Library
In this first part of the script as usually library and functions are called, plus information on how to retrieve the different files that contains the stoichiometric values for different subunits.
#Change Directory
setwd("../ComplexScript/")
#Paralogs Palette
<- c("#e6d5b8","#0278ae")
ParalogsColor
######################################
# PARAMETERS
######################################
library(VennDiagram)
library(RColorBrewer)
library(ComplexHeatmap)
library(circlize)
library(spatialR)
library(dplyr)
library(cowplot)
library(ggpubr)
library(rstatix)
library(ggrepel)
source("../ComplexScript/complexes_function.R")
First we read the output files generated by the complexes co-expression analysis, the output of the 07-ParalogsSubtitution.Rmd
. That indicates paralog pairs inside MS neuronal differentiation data. While the results of the co-expression of subunits are stored inside the InputDir.stability
#Get the switch File, compare all the ParalogSwitch File in the Out/ directory
= "../out/paralogs/"
InputDir = "../out/complex_coexpr/"
InputDir.stability
= list.files(InputDir)
Files <- list.files(InputDir.stability) CoexprFiles
We define the different conditions of the comparison in order to grep them in an efficient way and avoid taking different files.
#All subunits
<- c("Neur.Stem","NPC.iPS","Neu.IPS","Neu.NPC",
Comps "DIV3.DIV0","DIV10.DIV0","DIV10.DIV3","DIV5.DIV1","DIV14.DIV1","DIV14.DIV5")
We then collect the info from different species, in a dataframe called Dataset
. The SwitchFile
variables contains the filename of the different files that stores the comparison of paralog pairs that are part of protein complexes during neuronal differentiation.
= c("drerio","rnorvegicus","hsapiens","mmusculus","drerio")
Species = c("Drerio","Frese","Djuric","MouseTMT","DrerioRNASeq")
Dataset = data.frame(Dataset = Dataset,Species=Species)
Dataset rownames(Dataset) = Dataset$Dataset
= Files[grep("PutativeSwitch",Files)]
SwitchFile = CoexprFiles[grep("subunits_stability",CoexprFiles)] Stability
We now select only file names that matches our comparison that we want to consider. This in order to avoid any different files to be considered in our analysis.
<- SwitchFile[grep(paste(Comps,collapse = '|'),SwitchFile)]
SwitchFile <- Stability[grep(paste(Comps,collapse = '|'),Stability)] Stability
Here we read all the files from the output of 06-ComplexesCoExpressionMSData
and we collect them inside the ParalogsList
variable. This contains the protein-coexpression values for each subunits in the different complexes, comparing paralog pairs. While StabilityList
just contains the coexpression values for single subunits across all datasets.
#Change Directory
setwd("../ComplexScript/")
#********************************************
#********************************************
#RUN THIS PART AFTER SETTING PARAMETERS
#ParalogsList
#Load Files
= lapply(SwitchFile,function(x){read.csv(paste(InputDir,x,sep=""),stringsAsFactors=FALSE)})
ParalogsList names(ParalogsList) = SwitchFile
#StabilityList
= lapply(Stability,function(x){read.csv(paste(InputDir.stability,x,sep=""),stringsAsFactors=FALSE)})
StabilityList names(StabilityList) = Stability
We first bind together the information coming from the complexes co-expression analysis. We extract the information coming from the species and conditions, and finally we also order them as factor, based on the Comps
variable, for future plotting purpose.
<- do.call(rbind.data.frame,StabilityList)
AllSubunits $species <- unlist(lapply(strsplit(rownames(AllSubunits),"_"),function(x)x[1]))
AllSubunits$species <- Dataset[AllSubunits$species,"Species"]
AllSubunits$cond <- factor(AllSubunits$cond,levels = Comps) AllSubunits
17.2 Paralogs Subunits variability across datasets
We can store the information coming from the subunits co-expression analysis from the different datasets.
<- ggplot(data=AllSubunits[AllSubunits$species=="hsapiens",]) + geom_density(aes(x=pvalue)) + facet_wrap(~cond) + theme_bw()
hsap <- ggplot(data=AllSubunits[AllSubunits$species=="mmusculus",]) + geom_density(aes(x=pvalue)) + facet_wrap(~cond)+ theme_bw()
mmus <- ggplot(data=AllSubunits[AllSubunits$species=="rnorvegicus",]) + geom_density(aes(x=pvalue)) + facet_wrap(~cond) + theme_bw()
rnor <- ggplot(data=AllSubunits[AllSubunits$species=="drerio",]) + geom_density(aes(x=pvalue)) + facet_wrap(~cond) + theme_bw() drer
17.3 Paralogs Subunits variability across datasets ( Boxplot)
Here we can then plot the stoichiometric stability of each subunits, (the pvalue
) column. Low pvalues denote high co-expression, and therefore high pvalues denotes high stoichiometric variability.
#Add test information
<- AllSubunits %>% group_by(species,cond) %>%
AllSubunits.test wilcox_test(pvalue ~ has.Paralogs) %>%
add_significance("p") %>% add_xy_position(x = "cond") %>%
group_by(species) %>% mutate(x.pos = 1:n()) %>% ungroup()
pdf("../out/plots/p09Cons.pdf",width = 6.5,height = 3.4)
<- ggplot(data=AllSubunits) +
p09Cons geom_boxplot(aes(x=cond,y=pvalue,fill=has.Paralogs),alpha=0.4) +
facet_wrap(~species,ncol=2,scales = "free") +
stat_pvalue_manual(AllSubunits.test,x = 'x.pos',tip.length = 0.5,position = position_dodge(0.8),coord.flip = T) +
theme_bw() + scale_y_continuous(expand = expansion(mult = c(0.05, 0.1))) + coord_flip() +
scale_fill_manual(values = ParalogsColor) + xlab("") +
ylab('stoichiometric stability (p.value)') + theme(legend.position = 'top')
p09Consdev.off()
## png
## 2
17.4 save workspace
We then save the workspace for future analysis.
save.image("../Markdown/workspaces/08-ConservedSubstitution_CompleteWorkSpace.RData")
17.5 load previous workspace
load("../Markdown/workspaces/05-MSProteomicsData_ParalogsCompleteWorkspace.RData")
17.7 Calculate Paralog Pairs ratios
Here we calculate paralog ratios inside the different dataset using the CalculateParalogRatios
function. This function takes in input a dataset with protein intensity and a ParalogSet
object. It will first calculate all possible paralog pairs present in the data, then it will compute for each paralog pairs, ratios intended as Log2(Paralog1) - Log2(Paralog2) = Log(Paralog1/Paralog2). Then the ratios from each condition are tested for differences with a differential expression analysis done with limma
. In this way we can define if the ratio between a specific paralog pairs increase or decrease across conditions. Conditions to extract are declared in with the exprs
parameter.
17.7.1 Human
We do this from the human dataset. We use eggNOGPair
function to map each gene pair to an eggNOG pair. The function eggNOGPair
takes in input a dataframe, and with the pair
parameters we indicates the column name where the paralog pairs are present. To remove any redundancy in the dataset we also restrinct just to unique rows in all the matrix. We also remove pairs that was not possible to map to an eggNOG.
<- CalculateParalogsRatios(Paralogs.hsapiens,Djuric,
Hsapiens.Ratios idcol = "Gene.Symbol",
cond_col = c("iPS37","NPC37","Neu37"),
exprs = "iPS37,NPC37,Neu37,NPC37-iPS37,Neu37-iPS37",
use = "pair")
## Warning: Partial NA coefficients for 2770 probe(s)
## Warning: Zero sample variances detected, have been offset away from zero
#Results matrix
<- Hsapiens.Ratios$ratios
Hsapiens.Ratios.m $species <- "hsapiens"
Hsapiens.Ratios.m
#Add Hsapiens reference pairs
<- strsplit(Hsapiens.Ratios.m$pairs,split = " / ")
Pairs.hsap <- do.call(rbind.data.frame,Pairs.hsap);colnames(Pairs.hsap) <-c("p.x","p.y")
Pairs.hsap <- cbind.data.frame(Pairs.hsap,Hsapiens.Ratios.m)
Hsapiens.Ratios.m
#Add eggNOG annotation
<- eggNOGPair(Hsapiens.Ratios.m,pairs = c("p.x","p.y"),
Hsapiens.Ratios.m eggNOG = eggNOG.Hsap) %>% unique()
#Remove Paralog Pairs with NA
<-Hsapiens.Ratios.m[-grep("NA",Hsapiens.Ratios.m$eggNOG.pair),] Hsapiens.Ratios.m
17.7.2 Zebrafish
We do this from the zebrafish dataset. We use eggNOGPair
function to map each gene pair to an eggNOG pair. The function eggNOGPair
takes in input a dataframe, and with the pair
parameters we indicates the column name where the paralog pairs are present. To remove any redundancy in the dataset we also restrinct just to unique rows in all the matrix. We also remove pairs that was not possible to map to an eggNOG.
<- CalculateParalogsRatios(Paralogs.drerio,Drerio,
Drerio.Ratios idcol = "genename",
cond_col = c("iBAQ.L","iBAQ.H"),
exprs = c("iBAQ.L,iBAQ.H,iBAQ.H-iBAQ.L"),
use = "pair")
## Warning: Partial NA coefficients for 56 probe(s)
## Warning: Zero sample variances detected, have been offset away from zero
#Results matrix
<- Drerio.Ratios$ratios
Drerio.Ratios.m $species <- "drerio"
Drerio.Ratios.m
#Add Hsapiens reference pairs
<- strsplit(Drerio.Ratios.m$pairs,split = " / ")
Pairs.drerio <- do.call(rbind.data.frame,Pairs.drerio);colnames(Pairs.drerio) <-c("p.x","p.y")
Pairs.drerio <- cbind.data.frame(Drerio.Ratios.m,Pairs.drerio)
Drerio.Ratios.m
#Add eggNOG annotation
<- eggNOGPair(Drerio.Ratios.m,pairs = c("p.x","p.y"),
Drerio.Ratios.m eggNOG = eggNOG.Drer) %>% unique()
<-Drerio.Ratios.m[-grep("NA",Drerio.Ratios.m$eggNOG.pair),] Drerio.Ratios.m
17.7.3 Mouse
We do this from the mouse dataset. We use eggNOGPair
function to map each gene pair to an eggNOG pair. The function eggNOGPair
takes in input a dataframe, and with the pair
parameters we indicates the column name where the paralog pairs are present. To remove any redundancy in the dataset we also restrinct just to unique rows in all the matrix. We also remove pairs that was not possible to map to an eggNOG.
<- CalculateParalogsRatios(Paralogs.mouse,Mouse,
Mouse.Ratios idcol = "SYMBOL",
cond_col = c("DIV0","DIV3","DIV10"),
exprs = c("DIV0,DIV3,DIV10,DIV3-DIV0,DIV10-DIV0"),
use = "pair")
## Warning: Zero sample variances detected, have been offset away from zero
#Results matrix
<- Mouse.Ratios$ratios
Mouse.Ratios.m $species <- "mmusculus"
Mouse.Ratios.m
#Add Hsapiens reference pairs
<- strsplit(Mouse.Ratios.m$pairs,split = " / ")
Pairs <- do.call(rbind.data.frame,Pairs);colnames(Pairs) <-c("p.x","p.y")
Pairs #Ratios
<- cbind.data.frame(Mouse.Ratios.m,Pairs)
Mouse.Ratios.m
#Add eggNOG annotation
<- eggNOGPair(Mouse.Ratios.m,pairs = c("p.x","p.y"),
Mouse.Ratios.m eggNOG = eggNOG.Mmus) %>% unique()
<-Mouse.Ratios.m[-grep("NA",Mouse.Ratios.m$eggNOG.pair),] Mouse.Ratios.m
17.7.4 Rat
We do this from the mouse rat. We use eggNOGPair
function to map each gene pair to an eggNOG pair. The function eggNOGPair
takes in input a dataframe, and with the pair
parameters we indicates the column name where the paralog pairs are present. To remove any redundancy in the dataset we also restrinct just to unique rows in all the matrix. We also remove pairs that was not possible to map to an eggNOG.
<- CalculateParalogsRatios(Paralogs.rnorvegi,Frese,
Frese.Ratios idcol = "SYMBOL",
cond_col = c("DIV01","DIV5","DIV14"),
exprs = c("DIV01,DIV5,DIV14,DIV5-DIV01,DIV14-DIV01"),
use = "pair")
## Warning: Partial NA coefficients for 24 probe(s)
## Warning: Zero sample variances detected, have been offset away from zero
#Results matrix
<- Frese.Ratios$ratios
Frese.Ratios.m $species <- "rnorvegicus"
Frese.Ratios.m
#Add Hsapiens reference pairs
<- strsplit(Frese.Ratios.m$pairs,split = " / ")
Pairs <- do.call(rbind.data.frame,Pairs);colnames(Pairs) <-c("p.x","p.y")
Pairs
#Ratios
<- cbind.data.frame(Frese.Ratios.m,Pairs)
Frese.Ratios.m
#Add eggNOG annotation
<- eggNOGPair(Frese.Ratios.m,pairs = c("p.x","p.y"),
Frese.Ratios.m eggNOG = eggNOG.Rnor) %>% unique()
<-Frese.Ratios.m[-grep("NA",Frese.Ratios.m$eggNOG.pair),] Frese.Ratios.m
17.9 Show Some Substitutions
We use the function PlotParalogsRatios
to plot the ratios of different paralog pairs across datasets.
17.9.1 COPII
#Make a functions out of it
<- c("SEC23A","SEC23B")
P <- PlotParalogsRatios(P,Mouse.Ratios)
p1.sec <- PlotParalogsRatios(P,Hsapiens.Ratios)
p2.sec <- PlotParalogsRatios(P,Drerio.Ratios)
p3.sec <- PlotParalogsRatios(P,Frese.Ratios)
p4.sec
<- gridExtra::grid.arrange(p1.sec$final,p2.sec$final,p3.sec$final,p4.sec$final,nrow=1) sec23
pdf("../out/figures/Fig3/Sec23ab_ratios.pdf",width = 9.8,height = 4.7)
grid.draw(sec23)
dev.off()
## png
## 2
17.9.2 VPS26A / VPS26B
#Make a functions out of it
<- c("VPS26B","VPS26A")
P
<- PlotParalogsRatios(P,Mouse.Ratios)
p1 <- PlotParalogsRatios(P,Hsapiens.Ratios)
p2 <- PlotParalogsRatios(P,Drerio.Ratios)
p3 <- PlotParalogsRatios(P,Frese.Ratios)
p4
<- gridExtra::grid.arrange(p1$final,p2$final,p3$final,p4$final,nrow=1) vps
17.9.3 RAB14 / RAB8A
#Make a functions out of it
<- c("RAB14","RAB8A")
P
<- PlotParalogsRatios(P,Mouse.Ratios)
p1 <- PlotParalogsRatios(P,Hsapiens.Ratios)
p2 <- PlotParalogsRatios(P,Drerio.Ratios)
p3 <- PlotParalogsRatios(P,Frese.Ratios)
p4
<- gridExtra::grid.arrange(p1$final,p2$final,p3$final,p4$final,nrow=1)
rabs ::grid.draw(rabs) grid
17.9.4 DYNC1LI1 / DYNC1LI2
#Make a functions out of it
<- c("DYNC1LI1","DYNC1LI2")
P
<- PlotParalogsRatios(P,Mouse.Ratios)
p1 <- PlotParalogsRatios(P,Hsapiens.Ratios)
p2 <- PlotParalogsRatios(P,Drerio.Ratios)
p3 <- PlotParalogsRatios(P,Frese.Ratios)
p4
<- gridExtra::grid.arrange(p1$final,p2$final,p3$final,p4$final,nrow=1)
dynein ::grid.draw(dynein) grid
17.9.5 TARDBP / SF3B4
#Make a functions out of it
<- c("TARDBP","SF3B4")
P
<- PlotParalogsRatios(P,Mouse.Ratios)
p1 <- PlotParalogsRatios(P,Hsapiens.Ratios)
p2 <- PlotParalogsRatios(P,Drerio.Ratios)
p3 <- PlotParalogsRatios(P,Frese.Ratios)
p4
<- gridExtra::grid.arrange(p1$final,p2$final,p3$final,p4$final,nrow=1)
tardbp ::grid.draw(tardbp) grid
17.10 Save workspace
We save now the complete workspace of the analysis.
save.image("../Markdown/workspaces/09-SharedSubstitution_CompleteWorkspace.RData")
17.11 Fig2
We assemble the Figures, using the cowplot
library.
load("../Markdown/workspaces/05-MSProteomicsData_ParalogsCompleteWorkspace.RData")
load("../Markdown/workspaces/08-ConservedSubstitution_CompleteWorkSpace.RData")
load("../Markdown/workspaces/09-SharedSubstitution_CompleteWorkspace.RData")
Starting from the figure2.
#Modify facetting
<- p09Cons + facet_wrap(~species,nrow=1,scales = "free")
p09Cons.long
#Arrange plots
<- plot_grid(p01MSData,p09Cons,ncol=2,labels = c("A","B"))
top.block <- plot_grid(p02MSData,p01MSData,ncol = 2,labels = c('A','B')) top.block
## Warning: Removed 1703 rows containing non-finite values (stat_boxplot).
<- plot_grid(p09Cons.long,labels='C')
btt.block
#Align
<- align_plots(p01MSData,p09Cons.long,align = 'hv',axis = 'lr')
plots
#Fig2
<- plot_grid(top.block,plots[[2]],nrow=2,rel_heights = c(1,0.55),
Fig2 labels = c('','C'))
pdf(paste("../out/figures/Fig2/Fig2Draft_",Sys.Date(),".pdf",sep = ''),
width = 10,height = 5.4)
Fig2dev.off()
## png
## 2
Fig2
17.12 Fig3
And Figure 3
= grid.grabExpr(ComplexHeatmap::draw(heatmap))
heatmap = grid.grabExpr(ComplexHeatmap::draw(heatmap.sub))
heatmap.sub
<- plot_grid(heatmap,heatmap.sub,sec23,nrow = 3,rel_heights = c(0.5,0.8,0.8),
Fig3 labels = c("A","","C"))
pdf(paste("../out/figures/Fig3/","Fig3_",Sys.Date(),".pdf",sep = ""),width = 11.8,height = 7.6)
Fig3dev.off()
## png
## 2
Fig3
17.13 Supp Fig 5
<- paste('../out/figures/FigSupp5/SuppFig5_Updated_',Sys.Date(),'.pdf',
fname sep = '')
pdf(fname,width = 7.10,height = 15)
<- plot_grid(vps,dynein,rabs,nrow = 4,labels = c('A','B','C'))
FigSupp5
FigSupp5dev.off()
## png
## 2
17.13.1 Fig Supp 8,9,10, 11
<- pca.ratio
FigSupp8 <- vps
FigSupp9 <- rabs
FigSupp10 <- dynein FigSupp11
pdf(paste("../out/figures/FigSupp8/","FigSupp8_",Sys.Date(),".pdf",sep = ""),width = 6.51,height = 5.3)
grid.draw(FigSupp8)
dev.off()
## png
## 2
pdf(paste("../out/figures/FigSupp9/","FigSupp9",Sys.Date(),".pdf",sep = ""),width = 9.8,height = 4.7)
grid.draw(FigSupp9)
dev.off()
## png
## 2
pdf(paste("../out/figures/FigSupp10/","FigSupp10",Sys.Date(),".pdf",sep = ""),width = 9.8,height = 4.7)
grid.draw(FigSupp10)
dev.off()
## png
## 2
pdf(paste("../out/figures/FigSupp11/","FigSupp11",Sys.Date(),".pdf",sep = ""),width = 9.8,height = 4.7)
grid.draw(FigSupp11)
dev.off()
## png
## 2
17.14 Write Tables
17.14.1 Table S6
<- list('Subunits Co-Expression' = AllSubunits)
l #Write table
::write.xlsx(l,'../out/tables/Table_S6.xlsx') openxlsx
17.14.2 Table S7
<- list('AllParalogs'=All.Sub.comp,
l 'Shared Paralogs' = Reliable.Sub)
#Write table
::write.xlsx(l,'../out/tables/Table_S7.xlsx') openxlsx