Section 18 SEC23a/b Knockdown

In this part of the code, we analyze the MS data coming from the Sec23a and Sec23b knock-down in FAC sorted in vitro differentiated neurons. We first as usual loads all libraries and functions that we need.

18.1 Load Libraries and packages

library(dplyr)
library(ggplot2)
library(ggrepel)
library(ggpubr)
library(rstatix)
library(spatialR)
library(tidyr)
library(factoextra)
library(reshape2)
library(gridExtra)
library(limma)
library(proteomicstools)
library(pheatmap)
library(tidyverse)
library(MASS)

source("../ComplexScript/complexes_function.R")

18.2 MS-Data overview

We load the candidates table cname that contains the differential expression analysis coming out from the Spectronaut software. We also passing the report file rname that contains the protein quantification in all the samples.

#Load data
cname <- "../Data/Dataset/SEC23AbKD/FACS_data/Sec23ab/20210329_DDF_siSec23ab_Mouse_Neurons_allSamples_candidates_TOP3_RunWiseImp.xls"
rname <- "../Data/Dataset/SEC23AbKD/FACS_data/Sec23ab/20210705_164714_20210329_DDF_siSec23ab_Mouse_Neurons_allSamples_Report_TOP3_RunWiseImp.xls"

We can also use the function ProcessSNReport to process the report table, containing the protein quantification data. ProcessSNReport returns a list with the different information, data from the report. We can extract the protein matrix with the $matrix.

#Load candidates
candidates <- read.delim(cname,sep = "\t",header = T)
#Load Report
Report <- proteomicstools::ProcessSNReport(rname)
#Report Matrix
Report.m <- Report$matrix

#Substitute Nan with NA
Report.m.i <- apply(Report.m,2,function(x)(is.na(x))|x==0)
Report.m[Report.m.i] <- NA

We use CreateConditionMatrix to annotate the report data, with the different condition, creating the new column cond that indicates the different condition and batch that indicates the different batch of the MS processing and run of the samples.

#Create Condition matrix
Cond.Matrix <- CreateConditionMatrix(Report,grepOn = "R.FileName",
                                     list(cond=c("Sec23a","Sec23b","Non"),
                                          batch=c("Ori030","Ori038")))
#Save the complete matrix
Cond.Matrix <- Cond.Matrix[complete.cases(Cond.Matrix),]

18.3 Knockdown-efficiency

First we plot the knock-down efficiency, plotting the protein intesity for SEC23A and SEC23B

#Sec Colors
sec.colors <- c("lightgrey","#92cbee","#efb1d0")

We can just plot the quantification values for the two paralogs in a barplot across the different conditions.

Interesting <- c("Sec23a","Sec23b")

s2 <- Report$original[Report$original$PG.Genes %in% Interesting,]
s2 <- s2[s2$R.Condition %in% c("siNon","siSec23a","siSec23b"),]

pdf("../out/plots/p01Sec.pdf",width = 6,height = 3.3)
p01Sec <- ggplot(s2, aes(PG.Genes, PG.Quantity, fill = PG.Genes)) +
          stat_summary(geom = "bar", fun.y = mean, position = "dodge",
                       color="black",alpha=0.5) +
          stat_summary(geom = "errorbar", fun.data = mean_se, 
                       position = "dodge",width=0.4) + 
          facet_grid(~R.Condition) + 
          geom_jitter(width=0.1) + 
          theme_bw() + 
          scale_fill_brewer(palette = "Set2") + 
          theme(axis.text.x = element_text(angle = 60,hjust=1),
                legend.position = "none") +
          ylab("Protein Quantity") + xlab("")

## Warning: `fun.y` is deprecated. Use `fun` instead.

p01Sec
dev.off()

## png 
##   2

18.4 Paralog Load

Now we can calculate the total paralog amount in the different replicate and condition. We create a dataframe s2 that counts the protein intensity of SEC23A and SEC23B inside each replicates. And we run a paired wilcox test to check differences in total paralog abundance within conditions.

s2 <- s2 %>% dplyr::group_by(R.Condition,R.Replicate) %>%
             dplyr::mutate(loads=sum(log2(PG.Quantity)),) %>%
             as.data.frame()

#Add test information
Datasets.test <- s2 %>% 
                 wilcox_test(loads ~ R.Condition,paired = T) %>%
                 add_significance("p") %>% add_xy_position(x = "R.Condition") %>%
                 as.data.frame()

BarData <- s2 %>% dplyr::group_by(R.Condition) %>%
           dplyr::summarise(median = median(loads)) %>%
           as.data.frame()

s2.t <- s2
s2 <- s2[!duplicated(s2[,c(1,3)]),]

We plot the results in the variable p.loads

p.loads <- ggplot(s2, aes(x=R.Condition)) +
           geom_bar(data=BarData,aes(x=R.Condition,y=median,fill=R.Condition),stat='identity',alpha=0.4) + 
           geom_jitter(aes(y=loads),width = 0.1) + 
           theme_bw() + scale_fill_manual(values=sec.colors) + xlab("") +
           stat_pvalue_manual(Datasets.test, label = "p.signif",
                              bracket.nudge.y=5,tip.length=0.5,
                              step.increase = 1) + 
           ylab("Paired Replicates\nLog2(SEC23A) + Log2(SEC23B)") +
           theme(legend.position = "none")
           

p.loads

#Sec Colors
sec.colors <- c("lightgrey","#92cbee","#efb1d0")

18.5 Differential expression analysis

Using the information contained in the candidates table, we can extract differentially expressed proteins in the different conditions.

18.5.0.1 siSec23a Response

siSec23a.siNon <- candidates %>% 
                  filter(Comparison..group1.group2. == "siSec23a / siNon")

18.5.0.2 siSec23b Response

siSec23b.siNon <- candidates %>% 
                  filter(Comparison..group1.group2. == "siSec23b / siNon")

18.5.0.3 siSec23b/siSec23a Response

siSec23b.siSec23a <- candidates %>% 
                     filter(Comparison..group1.group2. == "siSec23b / siSec23a")

18.6 GOEnrich siSec23a

We can run a GOEnrichment analysis subsetting on significant comparison Qvalue<0.05. Between siSec23b / siSec23a differentially expressed genes. We use the gene enrichment test using Kolmogorov-smirnov test. Since by default the function sort gene names by the fold change (from negative to positive) in this case for considering genes enrichment more in siSec23a we can just give as a vector the Log2 fold-change values.

sig.subset <- siSec23b.siSec23a 
geneList <- sig.subset$AVG.Log2.Ratio
names(geneList) <- sig.subset$Genes

#GOEnrich
GOEnrich.siSec23a <- spatialR::GOEnrichmentKS(geneList,species = 'Mm',ontology = 'BP')

18.7 GOEnrich siSec23b

We can run a GOEnrichment analysis subsetting on significant comparison. For Sec23a. We use the gene enrichment test using Kolmogorov-smirnov test. Since by default the function sort gene names by the fold change (from negative to positive) in this case for considering genes enrichment more in siSec23b we need to multiply the fold change by -1

sig.subset <- siSec23b.siSec23a 
geneList <- sig.subset$AVG.Log2.Ratio*(-1)
names(geneList) <- sig.subset$Genes

#GOEnrich
GOEnrich.siSec23b <- spatialR::GOEnrichmentKS(geneList,species = 'Mm',ontology = 'BP')

18.7.1 Cumulative GOplot

GORankedPlot is used then to create a plot for the enrichment in the two conditions. GORankedPlot takes in input the table coming out from the two GOEnrichment, and plot the different terms as a ranked scatterplot. See ?GORankedPlot.

GO.dist <- proteomicstools::GORankedPlot(list('siSec23a'=GOEnrich.siSec23a$table,
                                              'siSec23b'=GOEnrich.siSec23b$table))

GO.dist

18.8 Extract Gene

LIST_siSec23a <- spatialR::ShowGenesGOTerms(GOEnrich.siSec23a,siSec23a.siNon$Genes)
LIST_siSec23b <- spatialR::ShowGenesGOTerms(GOEnrich.siSec23b,siSec23b.siNon$Genes)

siSec23b.siNon$funct <- 'other'
siSec23b.siNon$funct[siSec23b.siNon$Genes %in% LIST_siSec23b$`synaptic signaling`] = 'synap'
siSec23b.siSec23a$funct[siSec23b.siSec23a$Genes %in% LIST_siSec23b$`synaptic signaling`] = 'synapt'

18.9 Annotate with GOTerms

We can then annotate with GOTerms the differential expression table.

siSec23b.siSec23a <- spatialR::Annotate(siSec23b.siSec23a,
                                        organism = 'Mm',
                                        idcol = 'Genes',annot = 'GO')

18.10 Correlation of the response

siSec23 <- merge(siSec23a.siNon,siSec23b.siNon,by="ProteinGroups")

ScatterBetweenCandidates helps plotting candidates results against each others.

#Scatter Plot
p02Sec <- ScatterBetweenCandidates(candidates.x = siSec23a.siNon,
                                   candidates.y = siSec23b.siNon,
                                   x = 'siSec23a / siNon',
                                   y = 'siSec23b / siNon')

We show the correlation test for the response to the two paralogs knockdowns.

ctest <- cor.test(siSec23$AVG.Log2.Ratio.x,siSec23$AVG.Log2.Ratio.y)
ctest$p.value

## [1] 0

18.11 Cumulative Distribution siSec23b/siSec23a

In this part of the script we merge the information coming from our Mouse TMT10plex neuronal differentiation data with the data coming out from the siSec23 paralogs knockdown. Using the Mouse TMT10 plex data we can create a neuronal differentiation signature selecting genes that are up and down regulated, and see the distribution of these genes in the Sec23 paralogs knockdowns.

18.11.1 Load Mouse data

First we load again the data coming from Mouse TMT10plex neuronal differentiation. Mouse.Candidates represent the values from the differential expression analysis of these tables.

Mouse.Candidates <- read.delim("../Data/Dataset/270519_MouseNeuron_TMT10_contrast.txt",sep="\t",header = T)
Mouse.Intensities <- read.delim("../Data/Dataset/Intensities/17062019_MouseNeuron_TMT10_Log2Intensity.txt",sep="\t",header = T)

We define here the upregulated and downregulated signature. We define gene strongly upregulated during neurogenesis, in reference to DAY3/DAY0. Since our Sec23 paralog knockdowns were also harvested at DAY3. Upregulated genes are the one that have a logFC.D3.D0>=1 and adjusted P.value <= 0.05while downregulated one have logFC.D3.D0<= -1 and adjusted P.value <= 0.05.We collect them in a variable called hits

#Hits
Upregulated.Neuro <- Mouse.Candidates[Mouse.Candidates$logFC.D3.D0>=1 & Mouse.Candidates$adj.P.Val.D3.D0<=0.05,"Protein.ID"]
Downregulated.Neuro <- Mouse.Candidates[Mouse.Candidates$logFC.D3.D0<= -1 & Mouse.Candidates$adj.P.Val.D3.D0<=0.5,"Protein.ID"]
#Hits
hits <- list(Upregulated=Upregulated.Neuro,Downregulated=Downregulated.Neuro)

To show the differential enrichment in protein that are upregulated or downregulated during neuronal differentiation we can mark them on a ranked plot coming from the differential expression analysis (siSec23b/siSec23a). Running then a Kolmogorov-Smirnov test on the two distribution. For this purpose we use the CumulativeDistribution

Cum.dropreps <- CumulativeDistribution(siSec23b.siSec23a,
                                       ids = "ProteinGroups",
                                       fc = "AVG.Log2.Ratio",hits = hits)

#Pvalue siSec23b
pval.b <- format(Cum.dropreps$ks.x$p.value,scientific = T,digits = 2)
pval.b <- paste('KS test \np.value:',pval.b)

pval.a <- format(Cum.dropreps$ks.y$p.value,scientific = T,digits = 2)
pval.a <- paste('KS test \np.value:',pval.a)

#Cumulative distribution
p03.Sec <- Cum.dropreps$plot + xlab('Ranked siSec23b/siSec23a Log2 Fold Change') + 
           annotate('text',x=1000,y=0.50,label = pval.b,size=2.5) + 
           annotate('text',x=4000,y=0.25,label = pval.a,size=2.5) + 
           scale_color_manual(values = c("red","grey","blue")) + 
           labs(color="During Neurogenesis") + ylab('Cumulative distribution') + 
           theme(
                legend.key.size = unit(0.2, "cm"),
                legend.position = c(.05, .95),
                legend.justification = c("left", "top"),
                legend.box.just = "left",
                legend.margin = margin(6, 6, 6, 6)
                )

18.12 Some Hits - siSec23b Promoted

We can then use PlotProtQuantification to highlight some specific proteins. That are involved in pro-neurogenic state.

P <- c('Syt1','Shc3','Smn1')
p04.Sec <- proteomicstools::PlotProtQuantification(Report$original,
                                                   candidates,P,test = 'pval') + 
           scale_fill_manual(values=sec.colors)

print(p04.Sec)

18.13 Some Hits - siSec23a promoted

We can then use PlotProtQuantification to highlight some specific proteins. That are promoted by Sec23a, (so in response to Sec23b knockdown)

P <- c('Pou3f3','Cks2')
p05.Sec <- proteomicstools::PlotProtQuantification(Report$original,candidates,P,
                                                   test = 'pval') + 
           scale_fill_manual(values=sec.colors)

print(p05.Sec)

18.14 Fig4

We finally assemble the 4th figure.

library(cowplot)
#empty plot
blank <- ggplot() + theme_void()

pls <- align_plots(p01Sec,p02Sec,GO.dist,axis = 'b',align = 'h')

#UpperBlock
Fig4.up <- plot_grid(blank,pls[[1]],pls[[3]],nrow = 1,
                     labels = c('A','B','C'))


middle.row <- align_plots(p03.Sec,p04.Sec,p05.Sec,align = "h",axis = "bt")

Fig4.bottom <- plot_grid(middle.row[[2]],middle.row[[3]],middle.row[[1]],nrow = 1,
                         rel_widths = c(1,0.75,1),
                         labels = c('D','E',"F"))

#col.2[[2]]
Fig4.bottom2 <- plot_grid(p05.Sec,blank,blank,nrow = 1)

#Align Column
col.1 <- align_plots(pls[[1]],p04.Sec,axis = 'l',align = 'v')

Fig4 <- plot_grid(Fig4.up,Fig4.bottom,nrow = 2,rel_heights = c(1,1,1))

And save it.

pdf(paste('../out/figures/Fig4/Fig4_',Sys.Date(),'.pdf',sep = ''),height = 6.5,width = 10.3)
Fig4
dev.off()

## png 
##   2

Fig4

18.15 Save Workspace

We also save the workspace in case we need to fast recover those informations.

save.image('../Markdown/workspaces/10-NeuronsSec23_KD_01.RData')

18.16 Fig.Supp 6

We can start assemble Fig. Supp 6

blank <- ggplot() + theme_void()
FigSupp6 <- plot_grid(p.loads,p02Sec,nrow = 1,labels = c("A","B"))

pdf(paste("../out/figures/FigSupp6/","FigSupp6_Updated",Sys.Date(),".pdf",sep = ""),width = 7,height = 3.38)
FigSupp6
dev.off()

## png 
##   2

18.17 Save Final Workspace

save.image('../Markdown/workspaces/10-NeuronsSec23_KD_Final.RData')

18.18 Write Tables

tab1 <- GOEnrich.siSec23a$table;tab1$cond <- 'siSec23a/siSec23b'
tab2 <- GOEnrich.siSec23b$table;tab2$cond <- 'siSec23b/siSec23a'
tab <- rbind.data.frame(tab1,tab2)

l <- list('Sec23-KDs-Prot.Quant' = Report$original,
          'Sec23-KDs-Diff.Quant' = candidates,
          'Cumulative Dist.' = Cum.dropreps$data[,c(1,3,4,10,23,24)],
          'GOEnrichment(GSEA)' = tab)

#Write table
openxlsx::write.xlsx(l,'../out/tables/Table_S8.xlsx')