Section 18 SEC23a/b Knockdown
In this part of the code, we analyze the MS data coming from the Sec23a and Sec23b knock-down in FAC sorted in vitro differentiated neurons. We first as usual loads all libraries and functions that we need.
18.1 Load Libraries and packages
library(dplyr)
library(ggplot2)
library(ggrepel)
library(ggpubr)
library(rstatix)
library(spatialR)
library(tidyr)
library(factoextra)
library(reshape2)
library(gridExtra)
library(limma)
library(proteomicstools)
library(pheatmap)
library(tidyverse)
library(MASS)
source("../ComplexScript/complexes_function.R")
18.2 MS-Data overview
We load the candidates table cname
that contains the differential expression
analysis coming out from the Spectronaut software. We also passing the report
file rname
that contains the protein quantification in all the samples.
#Load data
<- "../Data/Dataset/SEC23AbKD/FACS_data/Sec23ab/20210329_DDF_siSec23ab_Mouse_Neurons_allSamples_candidates_TOP3_RunWiseImp.xls"
cname <- "../Data/Dataset/SEC23AbKD/FACS_data/Sec23ab/20210705_164714_20210329_DDF_siSec23ab_Mouse_Neurons_allSamples_Report_TOP3_RunWiseImp.xls" rname
We can also use the function ProcessSNReport
to process the report table,
containing the protein quantification data. ProcessSNReport
returns a list
with the different information, data from the report. We can extract the protein
matrix with the $matrix
.
#Load candidates
<- read.delim(cname,sep = "\t",header = T)
candidates #Load Report
<- proteomicstools::ProcessSNReport(rname)
Report #Report Matrix
<- Report$matrix
Report.m
#Substitute Nan with NA
<- apply(Report.m,2,function(x)(is.na(x))|x==0)
Report.m.i <- NA Report.m[Report.m.i]
We use CreateConditionMatrix
to annotate the report data, with the different
condition, creating the new column cond
that indicates the different condition
and batch
that indicates the different batch of the MS processing and run of
the samples.
#Create Condition matrix
<- CreateConditionMatrix(Report,grepOn = "R.FileName",
Cond.Matrix list(cond=c("Sec23a","Sec23b","Non"),
batch=c("Ori030","Ori038")))
#Save the complete matrix
<- Cond.Matrix[complete.cases(Cond.Matrix),] Cond.Matrix
18.3 Knockdown-efficiency
First we plot the knock-down efficiency, plotting the protein intesity for SEC23A and SEC23B
#Sec Colors
<- c("lightgrey","#92cbee","#efb1d0") sec.colors
We can just plot the quantification values for the two paralogs in a barplot across the different conditions.
<- c("Sec23a","Sec23b")
Interesting
<- Report$original[Report$original$PG.Genes %in% Interesting,]
s2 <- s2[s2$R.Condition %in% c("siNon","siSec23a","siSec23b"),]
s2
pdf("../out/plots/p01Sec.pdf",width = 6,height = 3.3)
<- ggplot(s2, aes(PG.Genes, PG.Quantity, fill = PG.Genes)) +
p01Sec stat_summary(geom = "bar", fun.y = mean, position = "dodge",
color="black",alpha=0.5) +
stat_summary(geom = "errorbar", fun.data = mean_se,
position = "dodge",width=0.4) +
facet_grid(~R.Condition) +
geom_jitter(width=0.1) +
theme_bw() +
scale_fill_brewer(palette = "Set2") +
theme(axis.text.x = element_text(angle = 60,hjust=1),
legend.position = "none") +
ylab("Protein Quantity") + xlab("")
## Warning: `fun.y` is deprecated. Use `fun` instead.
p01Secdev.off()
## png
## 2
18.4 Paralog Load
Now we can calculate the total paralog amount in the different replicate and
condition. We create a dataframe s2
that counts the protein intensity of
SEC23A and SEC23B inside each replicates. And we run a paired wilcox test to
check differences in total paralog abundance within conditions.
<- s2 %>% dplyr::group_by(R.Condition,R.Replicate) %>%
s2 ::mutate(loads=sum(log2(PG.Quantity)),) %>%
dplyras.data.frame()
#Add test information
<- s2 %>%
Datasets.test wilcox_test(loads ~ R.Condition,paired = T) %>%
add_significance("p") %>% add_xy_position(x = "R.Condition") %>%
as.data.frame()
<- s2 %>% dplyr::group_by(R.Condition) %>%
BarData ::summarise(median = median(loads)) %>%
dplyras.data.frame()
<- s2
s2.t <- s2[!duplicated(s2[,c(1,3)]),] s2
We plot the results in the variable p.loads
<- ggplot(s2, aes(x=R.Condition)) +
p.loads geom_bar(data=BarData,aes(x=R.Condition,y=median,fill=R.Condition),stat='identity',alpha=0.4) +
geom_jitter(aes(y=loads),width = 0.1) +
theme_bw() + scale_fill_manual(values=sec.colors) + xlab("") +
stat_pvalue_manual(Datasets.test, label = "p.signif",
bracket.nudge.y=5,tip.length=0.5,
step.increase = 1) +
ylab("Paired Replicates\nLog2(SEC23A) + Log2(SEC23B)") +
theme(legend.position = "none")
p.loads
#Sec Colors
<- c("lightgrey","#92cbee","#efb1d0") sec.colors
18.5 Differential expression analysis
Using the information contained in the candidates table, we can extract differentially expressed proteins in the different conditions.
18.5.0.1 siSec23a Response
<- candidates %>%
siSec23a.siNon filter(Comparison..group1.group2. == "siSec23a / siNon")
18.5.0.2 siSec23b Response
<- candidates %>%
siSec23b.siNon filter(Comparison..group1.group2. == "siSec23b / siNon")
18.5.0.3 siSec23b/siSec23a Response
<- candidates %>%
siSec23b.siSec23a filter(Comparison..group1.group2. == "siSec23b / siSec23a")
18.6 GOEnrich siSec23a
We can run a GOEnrichment analysis subsetting on significant comparison
Qvalue<0.05
. Between siSec23b / siSec23a differentially expressed genes. We
use the gene enrichment test using Kolmogorov-smirnov test. Since by default the
function sort gene names by the fold change (from negative to positive) in this
case for considering genes enrichment more in siSec23a we can just give as a
vector the Log2 fold-change values.
<- siSec23b.siSec23a
sig.subset <- sig.subset$AVG.Log2.Ratio
geneList names(geneList) <- sig.subset$Genes
#GOEnrich
<- spatialR::GOEnrichmentKS(geneList,species = 'Mm',ontology = 'BP') GOEnrich.siSec23a
18.7 GOEnrich siSec23b
We can run a GOEnrichment analysis subsetting on significant comparison. For
Sec23a. We use the gene enrichment test using Kolmogorov-smirnov test. Since by
default the function sort gene names by the fold change (from negative to
positive) in this case for considering genes enrichment more in siSec23b we need
to multiply the fold change by -1
<- siSec23b.siSec23a
sig.subset <- sig.subset$AVG.Log2.Ratio*(-1)
geneList names(geneList) <- sig.subset$Genes
#GOEnrich
<- spatialR::GOEnrichmentKS(geneList,species = 'Mm',ontology = 'BP') GOEnrich.siSec23b
18.7.1 Cumulative GOplot
GORankedPlot
is used then to create a plot for the enrichment in the two
conditions. GORankedPlot
takes in input the table coming out from the two
GOEnrichment, and plot the different terms as a ranked scatterplot. See
?GORankedPlot
.
<- proteomicstools::GORankedPlot(list('siSec23a'=GOEnrich.siSec23a$table,
GO.dist 'siSec23b'=GOEnrich.siSec23b$table))
GO.dist
18.8 Extract Gene
<- spatialR::ShowGenesGOTerms(GOEnrich.siSec23a,siSec23a.siNon$Genes)
LIST_siSec23a <- spatialR::ShowGenesGOTerms(GOEnrich.siSec23b,siSec23b.siNon$Genes) LIST_siSec23b
$funct <- 'other'
siSec23b.siNon$funct[siSec23b.siNon$Genes %in% LIST_siSec23b$`synaptic signaling`] = 'synap'
siSec23b.siNon$funct[siSec23b.siSec23a$Genes %in% LIST_siSec23b$`synaptic signaling`] = 'synapt' siSec23b.siSec23a
18.9 Annotate with GOTerms
We can then annotate with GOTerms the differential expression table.
<- spatialR::Annotate(siSec23b.siSec23a,
siSec23b.siSec23a organism = 'Mm',
idcol = 'Genes',annot = 'GO')
18.10 Correlation of the response
<- merge(siSec23a.siNon,siSec23b.siNon,by="ProteinGroups") siSec23
ScatterBetweenCandidates
helps plotting candidates results against each
others.
#Scatter Plot
<- ScatterBetweenCandidates(candidates.x = siSec23a.siNon,
p02Sec candidates.y = siSec23b.siNon,
x = 'siSec23a / siNon',
y = 'siSec23b / siNon')
We show the correlation test for the response to the two paralogs knockdowns.
<- cor.test(siSec23$AVG.Log2.Ratio.x,siSec23$AVG.Log2.Ratio.y)
ctest $p.value ctest
## [1] 0
18.11 Cumulative Distribution siSec23b/siSec23a
In this part of the script we merge the information coming from our Mouse TMT10plex neuronal differentiation data with the data coming out from the siSec23 paralogs knockdown. Using the Mouse TMT10 plex data we can create a neuronal differentiation signature selecting genes that are up and down regulated, and see the distribution of these genes in the Sec23 paralogs knockdowns.
18.11.1 Load Mouse data
First we load again the data coming from Mouse TMT10plex neuronal
differentiation. Mouse.Candidates
represent the values from the differential
expression analysis of these tables.
<- read.delim("../Data/Dataset/270519_MouseNeuron_TMT10_contrast.txt",sep="\t",header = T)
Mouse.Candidates <- read.delim("../Data/Dataset/Intensities/17062019_MouseNeuron_TMT10_Log2Intensity.txt",sep="\t",header = T) Mouse.Intensities
We define here the upregulated and downregulated signature. We define gene
strongly upregulated during neurogenesis, in reference to DAY3/DAY0. Since our
Sec23 paralog knockdowns were also harvested at DAY3. Upregulated genes are the
one that have a logFC.D3.D0>=1
and adjusted P.value <= 0.05
while
downregulated one have logFC.D3.D0<= -1
and adjusted P.value <= 0.05.
We
collect them in a variable called hits
#Hits
<- Mouse.Candidates[Mouse.Candidates$logFC.D3.D0>=1 & Mouse.Candidates$adj.P.Val.D3.D0<=0.05,"Protein.ID"]
Upregulated.Neuro <- Mouse.Candidates[Mouse.Candidates$logFC.D3.D0<= -1 & Mouse.Candidates$adj.P.Val.D3.D0<=0.5,"Protein.ID"]
Downregulated.Neuro #Hits
<- list(Upregulated=Upregulated.Neuro,Downregulated=Downregulated.Neuro) hits
To show the differential enrichment in protein that are upregulated or
downregulated during neuronal differentiation we can mark them on a ranked plot
coming from the differential expression analysis (siSec23b/siSec23a). Running
then a Kolmogorov-Smirnov test on the two distribution. For this purpose we use
the CumulativeDistribution
<- CumulativeDistribution(siSec23b.siSec23a,
Cum.dropreps ids = "ProteinGroups",
fc = "AVG.Log2.Ratio",hits = hits)
#Pvalue siSec23b
<- format(Cum.dropreps$ks.x$p.value,scientific = T,digits = 2)
pval.b <- paste('KS test \np.value:',pval.b)
pval.b
<- format(Cum.dropreps$ks.y$p.value,scientific = T,digits = 2)
pval.a <- paste('KS test \np.value:',pval.a)
pval.a
#Cumulative distribution
<- Cum.dropreps$plot + xlab('Ranked siSec23b/siSec23a Log2 Fold Change') +
p03.Sec annotate('text',x=1000,y=0.50,label = pval.b,size=2.5) +
annotate('text',x=4000,y=0.25,label = pval.a,size=2.5) +
scale_color_manual(values = c("red","grey","blue")) +
labs(color="During Neurogenesis") + ylab('Cumulative distribution') +
theme(
legend.key.size = unit(0.2, "cm"),
legend.position = c(.05, .95),
legend.justification = c("left", "top"),
legend.box.just = "left",
legend.margin = margin(6, 6, 6, 6)
)
18.12 Some Hits - siSec23b Promoted
We can then use PlotProtQuantification
to highlight some specific proteins.
That are involved in pro-neurogenic state.
<- c('Syt1','Shc3','Smn1')
P <- proteomicstools::PlotProtQuantification(Report$original,
p04.Sec test = 'pval') +
candidates,P,scale_fill_manual(values=sec.colors)
print(p04.Sec)
18.13 Some Hits - siSec23a promoted
We can then use PlotProtQuantification
to highlight some specific proteins.
That are promoted by Sec23a, (so in response to Sec23b knockdown)
<- c('Pou3f3','Cks2')
P <- proteomicstools::PlotProtQuantification(Report$original,candidates,P,
p05.Sec test = 'pval') +
scale_fill_manual(values=sec.colors)
print(p05.Sec)
18.14 Fig4
We finally assemble the 4th figure.
library(cowplot)
#empty plot
<- ggplot() + theme_void()
blank
<- align_plots(p01Sec,p02Sec,GO.dist,axis = 'b',align = 'h')
pls
#UpperBlock
<- plot_grid(blank,pls[[1]],pls[[3]],nrow = 1,
Fig4.up labels = c('A','B','C'))
<- align_plots(p03.Sec,p04.Sec,p05.Sec,align = "h",axis = "bt")
middle.row
<- plot_grid(middle.row[[2]],middle.row[[3]],middle.row[[1]],nrow = 1,
Fig4.bottom rel_widths = c(1,0.75,1),
labels = c('D','E',"F"))
#col.2[[2]]
<- plot_grid(p05.Sec,blank,blank,nrow = 1)
Fig4.bottom2
#Align Column
.1 <- align_plots(pls[[1]],p04.Sec,axis = 'l',align = 'v')
col
<- plot_grid(Fig4.up,Fig4.bottom,nrow = 2,rel_heights = c(1,1,1)) Fig4
And save it.
pdf(paste('../out/figures/Fig4/Fig4_',Sys.Date(),'.pdf',sep = ''),height = 6.5,width = 10.3)
Fig4dev.off()
## png
## 2
Fig4
18.15 Save Workspace
We also save the workspace in case we need to fast recover those informations.
save.image('../Markdown/workspaces/10-NeuronsSec23_KD_01.RData')
18.16 Fig.Supp 6
We can start assemble Fig. Supp 6
<- ggplot() + theme_void()
blank <- plot_grid(p.loads,p02Sec,nrow = 1,labels = c("A","B")) FigSupp6
pdf(paste("../out/figures/FigSupp6/","FigSupp6_Updated",Sys.Date(),".pdf",sep = ""),width = 7,height = 3.38)
FigSupp6dev.off()
## png
## 2
18.17 Save Final Workspace
save.image('../Markdown/workspaces/10-NeuronsSec23_KD_Final.RData')
18.18 Write Tables
<- GOEnrich.siSec23a$table;tab1$cond <- 'siSec23a/siSec23b'
tab1 <- GOEnrich.siSec23b$table;tab2$cond <- 'siSec23b/siSec23a'
tab2 <- rbind.data.frame(tab1,tab2)
tab
<- list('Sec23-KDs-Prot.Quant' = Report$original,
l 'Sec23-KDs-Diff.Quant' = candidates,
'Cumulative Dist.' = Cum.dropreps$data[,c(1,3,4,10,23,24)],
'GOEnrichment(GSEA)' = tab)
#Write table
::write.xlsx(l,'../out/tables/Table_S8.xlsx') openxlsx