Section 18 SEC23a/b Knockdown
In this part of the code, we analyze the MS data coming from the Sec23a and Sec23b knock-down in FAC sorted in vitro differentiated neurons. We first as usual loads all libraries and functions that we need.
18.1 Load Libraries and packages
library(dplyr)
library(ggplot2)
library(ggrepel)
library(ggpubr)
library(rstatix)
library(spatialR)
library(tidyr)
library(factoextra)
library(reshape2)
library(gridExtra)
library(limma)
library(proteomicstools)
library(pheatmap)
library(tidyverse)
library(MASS)
source("../ComplexScript/complexes_function.R")18.2 MS-Data overview
We load the candidates table cname that contains the differential expression
analysis coming out from the Spectronaut software. We also passing the report
file rname that contains the protein quantification in all the samples.
#Load data
cname <- "../Data/Dataset/SEC23AbKD/FACS_data/Sec23ab/20210329_DDF_siSec23ab_Mouse_Neurons_allSamples_candidates_TOP3_RunWiseImp.xls"
rname <- "../Data/Dataset/SEC23AbKD/FACS_data/Sec23ab/20210705_164714_20210329_DDF_siSec23ab_Mouse_Neurons_allSamples_Report_TOP3_RunWiseImp.xls"We can also use the function ProcessSNReport to process the report table,
containing the protein quantification data. ProcessSNReport returns a list
with the different information, data from the report. We can extract the protein
matrix with the $matrix.
#Load candidates
candidates <- read.delim(cname,sep = "\t",header = T)
#Load Report
Report <- proteomicstools::ProcessSNReport(rname)
#Report Matrix
Report.m <- Report$matrix
#Substitute Nan with NA
Report.m.i <- apply(Report.m,2,function(x)(is.na(x))|x==0)
Report.m[Report.m.i] <- NAWe use CreateConditionMatrix to annotate the report data, with the different
condition, creating the new column cond that indicates the different condition
and batch that indicates the different batch of the MS processing and run of
the samples.
#Create Condition matrix
Cond.Matrix <- CreateConditionMatrix(Report,grepOn = "R.FileName",
list(cond=c("Sec23a","Sec23b","Non"),
batch=c("Ori030","Ori038")))
#Save the complete matrix
Cond.Matrix <- Cond.Matrix[complete.cases(Cond.Matrix),]18.3 Knockdown-efficiency
First we plot the knock-down efficiency, plotting the protein intesity for SEC23A and SEC23B
#Sec Colors
sec.colors <- c("lightgrey","#92cbee","#efb1d0")We can just plot the quantification values for the two paralogs in a barplot across the different conditions.
Interesting <- c("Sec23a","Sec23b")
s2 <- Report$original[Report$original$PG.Genes %in% Interesting,]
s2 <- s2[s2$R.Condition %in% c("siNon","siSec23a","siSec23b"),]
pdf("../out/plots/p01Sec.pdf",width = 6,height = 3.3)
p01Sec <- ggplot(s2, aes(PG.Genes, PG.Quantity, fill = PG.Genes)) +
stat_summary(geom = "bar", fun.y = mean, position = "dodge",
color="black",alpha=0.5) +
stat_summary(geom = "errorbar", fun.data = mean_se,
position = "dodge",width=0.4) +
facet_grid(~R.Condition) +
geom_jitter(width=0.1) +
theme_bw() +
scale_fill_brewer(palette = "Set2") +
theme(axis.text.x = element_text(angle = 60,hjust=1),
legend.position = "none") +
ylab("Protein Quantity") + xlab("")## Warning: `fun.y` is deprecated. Use `fun` instead.
p01Sec
dev.off()## png
## 2
18.4 Paralog Load
Now we can calculate the total paralog amount in the different replicate and
condition. We create a dataframe s2 that counts the protein intensity of
SEC23A and SEC23B inside each replicates. And we run a paired wilcox test to
check differences in total paralog abundance within conditions.
s2 <- s2 %>% dplyr::group_by(R.Condition,R.Replicate) %>%
dplyr::mutate(loads=sum(log2(PG.Quantity)),) %>%
as.data.frame()
#Add test information
Datasets.test <- s2 %>%
wilcox_test(loads ~ R.Condition,paired = T) %>%
add_significance("p") %>% add_xy_position(x = "R.Condition") %>%
as.data.frame()
BarData <- s2 %>% dplyr::group_by(R.Condition) %>%
dplyr::summarise(median = median(loads)) %>%
as.data.frame()
s2.t <- s2
s2 <- s2[!duplicated(s2[,c(1,3)]),]We plot the results in the variable p.loads
p.loads <- ggplot(s2, aes(x=R.Condition)) +
geom_bar(data=BarData,aes(x=R.Condition,y=median,fill=R.Condition),stat='identity',alpha=0.4) +
geom_jitter(aes(y=loads),width = 0.1) +
theme_bw() + scale_fill_manual(values=sec.colors) + xlab("") +
stat_pvalue_manual(Datasets.test, label = "p.signif",
bracket.nudge.y=5,tip.length=0.5,
step.increase = 1) +
ylab("Paired Replicates\nLog2(SEC23A) + Log2(SEC23B)") +
theme(legend.position = "none")
p.loads#Sec Colors
sec.colors <- c("lightgrey","#92cbee","#efb1d0")18.5 Differential expression analysis
Using the information contained in the candidates table, we can extract differentially expressed proteins in the different conditions.
18.5.0.1 siSec23a Response
siSec23a.siNon <- candidates %>%
filter(Comparison..group1.group2. == "siSec23a / siNon")18.5.0.2 siSec23b Response
siSec23b.siNon <- candidates %>%
filter(Comparison..group1.group2. == "siSec23b / siNon")18.5.0.3 siSec23b/siSec23a Response
siSec23b.siSec23a <- candidates %>%
filter(Comparison..group1.group2. == "siSec23b / siSec23a")18.6 GOEnrich siSec23a
We can run a GOEnrichment analysis subsetting on significant comparison
Qvalue<0.05. Between siSec23b / siSec23a differentially expressed genes. We
use the gene enrichment test using Kolmogorov-smirnov test. Since by default the
function sort gene names by the fold change (from negative to positive) in this
case for considering genes enrichment more in siSec23a we can just give as a
vector the Log2 fold-change values.
sig.subset <- siSec23b.siSec23a
geneList <- sig.subset$AVG.Log2.Ratio
names(geneList) <- sig.subset$Genes
#GOEnrich
GOEnrich.siSec23a <- spatialR::GOEnrichmentKS(geneList,species = 'Mm',ontology = 'BP')18.7 GOEnrich siSec23b
We can run a GOEnrichment analysis subsetting on significant comparison. For
Sec23a. We use the gene enrichment test using Kolmogorov-smirnov test. Since by
default the function sort gene names by the fold change (from negative to
positive) in this case for considering genes enrichment more in siSec23b we need
to multiply the fold change by -1
sig.subset <- siSec23b.siSec23a
geneList <- sig.subset$AVG.Log2.Ratio*(-1)
names(geneList) <- sig.subset$Genes
#GOEnrich
GOEnrich.siSec23b <- spatialR::GOEnrichmentKS(geneList,species = 'Mm',ontology = 'BP')18.7.1 Cumulative GOplot
GORankedPlot is used then to create a plot for the enrichment in the two
conditions. GORankedPlot takes in input the table coming out from the two
GOEnrichment, and plot the different terms as a ranked scatterplot. See
?GORankedPlot.
GO.dist <- proteomicstools::GORankedPlot(list('siSec23a'=GOEnrich.siSec23a$table,
'siSec23b'=GOEnrich.siSec23b$table))GO.dist18.8 Extract Gene
LIST_siSec23a <- spatialR::ShowGenesGOTerms(GOEnrich.siSec23a,siSec23a.siNon$Genes)
LIST_siSec23b <- spatialR::ShowGenesGOTerms(GOEnrich.siSec23b,siSec23b.siNon$Genes)siSec23b.siNon$funct <- 'other'
siSec23b.siNon$funct[siSec23b.siNon$Genes %in% LIST_siSec23b$`synaptic signaling`] = 'synap'
siSec23b.siSec23a$funct[siSec23b.siSec23a$Genes %in% LIST_siSec23b$`synaptic signaling`] = 'synapt'18.9 Annotate with GOTerms
We can then annotate with GOTerms the differential expression table.
siSec23b.siSec23a <- spatialR::Annotate(siSec23b.siSec23a,
organism = 'Mm',
idcol = 'Genes',annot = 'GO')18.10 Correlation of the response
siSec23 <- merge(siSec23a.siNon,siSec23b.siNon,by="ProteinGroups")ScatterBetweenCandidates helps plotting candidates results against each
others.
#Scatter Plot
p02Sec <- ScatterBetweenCandidates(candidates.x = siSec23a.siNon,
candidates.y = siSec23b.siNon,
x = 'siSec23a / siNon',
y = 'siSec23b / siNon')We show the correlation test for the response to the two paralogs knockdowns.
ctest <- cor.test(siSec23$AVG.Log2.Ratio.x,siSec23$AVG.Log2.Ratio.y)
ctest$p.value## [1] 0
18.11 Cumulative Distribution siSec23b/siSec23a
In this part of the script we merge the information coming from our Mouse TMT10plex neuronal differentiation data with the data coming out from the siSec23 paralogs knockdown. Using the Mouse TMT10 plex data we can create a neuronal differentiation signature selecting genes that are up and down regulated, and see the distribution of these genes in the Sec23 paralogs knockdowns.
18.11.1 Load Mouse data
First we load again the data coming from Mouse TMT10plex neuronal
differentiation. Mouse.Candidates represent the values from the differential
expression analysis of these tables.
Mouse.Candidates <- read.delim("../Data/Dataset/270519_MouseNeuron_TMT10_contrast.txt",sep="\t",header = T)
Mouse.Intensities <- read.delim("../Data/Dataset/Intensities/17062019_MouseNeuron_TMT10_Log2Intensity.txt",sep="\t",header = T)We define here the upregulated and downregulated signature. We define gene
strongly upregulated during neurogenesis, in reference to DAY3/DAY0. Since our
Sec23 paralog knockdowns were also harvested at DAY3. Upregulated genes are the
one that have a logFC.D3.D0>=1 and adjusted P.value <= 0.05while
downregulated one have logFC.D3.D0<= -1 and adjusted P.value <= 0.05.We
collect them in a variable called hits
#Hits
Upregulated.Neuro <- Mouse.Candidates[Mouse.Candidates$logFC.D3.D0>=1 & Mouse.Candidates$adj.P.Val.D3.D0<=0.05,"Protein.ID"]
Downregulated.Neuro <- Mouse.Candidates[Mouse.Candidates$logFC.D3.D0<= -1 & Mouse.Candidates$adj.P.Val.D3.D0<=0.5,"Protein.ID"]
#Hits
hits <- list(Upregulated=Upregulated.Neuro,Downregulated=Downregulated.Neuro)To show the differential enrichment in protein that are upregulated or
downregulated during neuronal differentiation we can mark them on a ranked plot
coming from the differential expression analysis (siSec23b/siSec23a). Running
then a Kolmogorov-Smirnov test on the two distribution. For this purpose we use
the CumulativeDistribution
Cum.dropreps <- CumulativeDistribution(siSec23b.siSec23a,
ids = "ProteinGroups",
fc = "AVG.Log2.Ratio",hits = hits)
#Pvalue siSec23b
pval.b <- format(Cum.dropreps$ks.x$p.value,scientific = T,digits = 2)
pval.b <- paste('KS test \np.value:',pval.b)
pval.a <- format(Cum.dropreps$ks.y$p.value,scientific = T,digits = 2)
pval.a <- paste('KS test \np.value:',pval.a)
#Cumulative distribution
p03.Sec <- Cum.dropreps$plot + xlab('Ranked siSec23b/siSec23a Log2 Fold Change') +
annotate('text',x=1000,y=0.50,label = pval.b,size=2.5) +
annotate('text',x=4000,y=0.25,label = pval.a,size=2.5) +
scale_color_manual(values = c("red","grey","blue")) +
labs(color="During Neurogenesis") + ylab('Cumulative distribution') +
theme(
legend.key.size = unit(0.2, "cm"),
legend.position = c(.05, .95),
legend.justification = c("left", "top"),
legend.box.just = "left",
legend.margin = margin(6, 6, 6, 6)
)18.12 Some Hits - siSec23b Promoted
We can then use PlotProtQuantification to highlight some specific proteins.
That are involved in pro-neurogenic state.
P <- c('Syt1','Shc3','Smn1')
p04.Sec <- proteomicstools::PlotProtQuantification(Report$original,
candidates,P,test = 'pval') +
scale_fill_manual(values=sec.colors)
print(p04.Sec)18.13 Some Hits - siSec23a promoted
We can then use PlotProtQuantification to highlight some specific proteins.
That are promoted by Sec23a, (so in response to Sec23b knockdown)
P <- c('Pou3f3','Cks2')
p05.Sec <- proteomicstools::PlotProtQuantification(Report$original,candidates,P,
test = 'pval') +
scale_fill_manual(values=sec.colors)
print(p05.Sec)18.14 Fig4
We finally assemble the 4th figure.
library(cowplot)
#empty plot
blank <- ggplot() + theme_void()
pls <- align_plots(p01Sec,p02Sec,GO.dist,axis = 'b',align = 'h')
#UpperBlock
Fig4.up <- plot_grid(blank,pls[[1]],pls[[3]],nrow = 1,
labels = c('A','B','C'))
middle.row <- align_plots(p03.Sec,p04.Sec,p05.Sec,align = "h",axis = "bt")
Fig4.bottom <- plot_grid(middle.row[[2]],middle.row[[3]],middle.row[[1]],nrow = 1,
rel_widths = c(1,0.75,1),
labels = c('D','E',"F"))
#col.2[[2]]
Fig4.bottom2 <- plot_grid(p05.Sec,blank,blank,nrow = 1)
#Align Column
col.1 <- align_plots(pls[[1]],p04.Sec,axis = 'l',align = 'v')
Fig4 <- plot_grid(Fig4.up,Fig4.bottom,nrow = 2,rel_heights = c(1,1,1))And save it.
pdf(paste('../out/figures/Fig4/Fig4_',Sys.Date(),'.pdf',sep = ''),height = 6.5,width = 10.3)
Fig4
dev.off()## png
## 2
Fig418.15 Save Workspace
We also save the workspace in case we need to fast recover those informations.
save.image('../Markdown/workspaces/10-NeuronsSec23_KD_01.RData')18.16 Fig.Supp 6
We can start assemble Fig. Supp 6
blank <- ggplot() + theme_void()
FigSupp6 <- plot_grid(p.loads,p02Sec,nrow = 1,labels = c("A","B"))pdf(paste("../out/figures/FigSupp6/","FigSupp6_Updated",Sys.Date(),".pdf",sep = ""),width = 7,height = 3.38)
FigSupp6
dev.off()## png
## 2
18.17 Save Final Workspace
save.image('../Markdown/workspaces/10-NeuronsSec23_KD_Final.RData')18.18 Write Tables
tab1 <- GOEnrich.siSec23a$table;tab1$cond <- 'siSec23a/siSec23b'
tab2 <- GOEnrich.siSec23b$table;tab2$cond <- 'siSec23b/siSec23a'
tab <- rbind.data.frame(tab1,tab2)
l <- list('Sec23-KDs-Prot.Quant' = Report$original,
'Sec23-KDs-Diff.Quant' = candidates,
'Cumulative Dist.' = Cum.dropreps$data[,c(1,3,4,10,23,24)],
'GOEnrichment(GSEA)' = tab)
#Write table
openxlsx::write.xlsx(l,'../out/tables/Table_S8.xlsx')