Section 12 Mass Spectrometry Datasets Overview
12.1 Load Libraries
library(factoextra)
library(dplyr)
library(spatialR)
source('../ComplexScript/complexes_function.R')
12.2 Load Protein Intensity data
We need to start by loading protein intensity data from the different datasets that we have. And organize them in a data-friendly way.
<- "../Data/Dataset/processed/ZebrafishNeurogProcessed.txt"
Drerio <- "../Data/Dataset/processed/Djuric_et_al_2017_processed.csv"
Djuric <- "../Data/Dataset/DataSetProcessing/27052019_MouseNeuron_TMT10_normalized_protein_matrix.csv"
Mouse <- "../Data/Dataset/processed/Frese_et_al_2017_processed.csv" Frese
And we load them into different dataframes.
<- read.delim(Drerio,sep = "\t",header = T)
Drerio <- read.delim(Djuric,sep=",",header = T)
Djuric #Remove last column not a sample from neurons
<- read.delim(Mouse,sep = ",",header = T)
Mouse <- read.delim(Frese,sep = ",",header = T) Frese
For mouse we add the indications coming from the different samples, and annotate with gene name.
colnames(Mouse) <- c("X","DIV0.1","DIV0.2","DIV0.3","DIV3.1",
"DIV3.2","DIV3.3","DIV10.1","DIV10.2",
"DIV10.3","Pool")
#Annotate with gene name
<- spatialR::Annotate(Mouse,organism = "Mm","X",idsep = ";",annot = "SYMBOL") Mouse
And try to organize them in a standard way by selecting only meaningful columns.
#select interesting columns for different datasets
<- c(2,134,grep("iBAQ.L.Mix",colnames(Drerio)),
col.drerio grep("iBAQ.H.Mix",colnames(Drerio)))
Now we subset for selected columns and convert in log the data from Zebrafish.
#Subset datasets
<- Drerio[,col.drerio]
Drerio ==0] <- NA
Drerio[Drerio
#Convert in Log
-c(1:2)] <- log2(Drerio[,-c(1:2)])
Drerio[,<- Djuric[,c(1,3:12)] Djuric
12.3 Samples PCA
For every species, we load and prepare the data for the Principal Component Analysis. We remove non numeric columns, and we transpose the data. We then run a PCA with the prcomp
function. And plot the results using the fviz_pca_ind
function, from the factoextra package.
12.3.1 Mouse
First we run this for the Mouse data, we remove the last column that indicates the gene name, we then add to the matrix the genename annotation as rownames, we transpose the matrix in order to have proteins as features and the different conditions as samples. We also annotate with a condition column. And run a PCA with the prcomp
function. We finally visualize it using the factoextra function fviz_pca_ind
.
#Drop GeneName Col and add rownames
<- Mouse[,-ncol(Mouse)]
PCA.Mouse.data rownames(PCA.Mouse.data) <- Mouse$X
<- t(PCA.Mouse.data[,-1]) %>% as.data.frame()
PCA.Mouse.data $condition <- gsub("\\.\\d+","",rownames(PCA.Mouse.data))
PCA.Mouse.data#PCA
<- prcomp(PCA.Mouse.data[,-ncol(PCA.Mouse.data)])
PCA.Mouse
.1 <- fviz_pca_ind(PCA.Mouse,
pcahabillage = PCA.Mouse.data$condition,addEllipses = T,
geom="point") +
ggtitle("Mouse TMT 10") +
theme(legend.position = "bottom",
plot.title = element_text(hjust = 0.5, size = 10))
12.3.2 Human Djuric et al 2017
Human data from Djuric 2017. We remove the column that indicates the gene name, we then add to the matrix the genename annotation as rownames, we transpose the matrix in order to have proteins as features and the different conditions as samples. We also annotate with a condition column. And run a PCA with the prcomp
function. We finally visualize it using the factoextra function fviz_pca_ind
.
<- Djuric[,-2]
PCA.Human.data <- PCA.Human.data[complete.cases(PCA.Human.data),]
PCA.Human.data rownames(PCA.Human.data) <- PCA.Human.data$Majority.protein.Uniprot.IDs
<- t(PCA.Human.data[,-1]) %>% as.data.frame()
PCA.Human.data $condition <- gsub("\\d+_\\d","",rownames(PCA.Human.data))
PCA.Human.data#PCA
<- prcomp(PCA.Human.data[,-ncol(PCA.Human.data)])
PCA.Human
.2 <- fviz_pca_ind(PCA.Human,habillage = PCA.Human.data$condition,addEllipses = T,
pcageom="point") + ggtitle("Djuric et al., 2017") +
theme(legend.position = "bottom",
plot.title = element_text(hjust = 0.5, size = 10))
12.3.3 Zebrafish Neuron/Stem
Zebrafish Dimethylabelling data. We remove the column that indicates the gene name, we then add to the matrix the genename annotation as rownames, we transpose the matrix in order to have proteins as features and the different conditions as samples. We also annotate with a condition column. And run a PCA with the prcomp
function. We finally visualize it using the factoextra function fviz_pca_ind
.
<- Drerio[,-2]
PCA.Drerio.data rownames(PCA.Drerio.data) <- PCA.Drerio.data$Majority.protein.IDs
<- PCA.Drerio.data[complete.cases(PCA.Drerio.data),-1]
PCA.Drerio.data <- t(PCA.Drerio.data) %>% as.data.frame()
PCA.Drerio.data $condition <- gsub("\\w+\\.L\\.\\w+\\d","Stem",rownames(PCA.Drerio.data))
PCA.Drerio.data$condition <- gsub("\\w+\\.H\\.\\w+\\d","Neu",PCA.Drerio.data$condition)
PCA.Drerio.data
#PCA Drerio
<- prcomp(PCA.Drerio.data[,-ncol(PCA.Drerio.data)])
PCA.Drerio
.3 <- fviz_pca_ind(PCA.Drerio,habillage = PCA.Drerio.data$condition,addEllipses = T,
pcageom="point") + ggtitle("Danio rerio Neuron/Stem") +
theme(legend.position = "bottom",
plot.title = element_text(hjust = 0.5, size = 10))
12.3.4 Rat - Frese et al 2017
And rat data from Frese et al., 2017. We select the column that indicates protein quantification values, we then add to the matrix the genename annotation as rownames, we transpose the matrix in order to have proteins as features and the different conditions as samples. We also annotate with a condition column. And run a PCA with the prcomp
function. We finally visualize it using the factoextra function fviz_pca_ind
.
<- Frese[,c(11,2:7)]
PCA.frese.data rownames(PCA.frese.data) <- make.names(PCA.frese.data$SYMBOL,unique = T)
<- PCA.frese.data[complete.cases(PCA.frese.data),-1]
PCA.frese.data <- t(PCA.frese.data) %>% as.data.frame()
PCA.frese.data $condition <- gsub(".rep\\d","",rownames(PCA.frese.data))
PCA.frese.data
#PCA Drerio
<- prcomp(PCA.frese.data[,-ncol(PCA.frese.data)])
PCA.frese
.4 <- fviz_pca_ind(PCA.frese,habillage = PCA.frese.data$condition,addEllipses = T,
pcageom="point") + ggtitle("Frese et al., 2017") +
theme(legend.position = "bottom",
plot.title = element_text(hjust = 0.5, size = 10))
12.4 GOEnrichment on Data
We first load the DataInfo.txt
file that contains all the information coming from the different datasets.
#Load data info for each dataset < ----
<- read.table("../Data/Dataset/DataInfo.txt",sep = "\t",header = T,
DataInfo stringsAsFactors = F)
We can show the content of the dataset.
head(DataInfo)
## filename
## 1 ../Data/Dataset/270519_MouseNeuron_TMT10_contrast_updatedNames.txt
## 2 ../Data/Dataset/270519_MouseNeuron_TMT10_contrast_updatedNames.txt
## 3 ../Data/Dataset/270519_MouseNeuron_TMT10_contrast_updatedNames.txt
## 4 ../Data/Dataset/processed/Frese_et_al_2017_processed.csv
## 5 ../Data/Dataset/processed/Frese_et_al_2017_processed.csv
## 6 ../Data/Dataset/processed/Djuric_et_al_2017_processed.csv
## Id.col fold.change fdr.col condition.col organism
## 1 Gene.name logFC.DIV10.DIV3 adj.P.Val.DIV10.DIV3 condition3 mmusculus
## 2 Gene.name logFC.DIV3.DIV0 adj.P.Val.DIV3.DIV0 condition1 mmusculus
## 3 Gene.name logFC.DIV10.DIV0 adj.P.Val.DIV10.DIV0 condition2 mmusculus
## 4 SYMBOL Log.DIV5.DIV1 Log.DIV5.DIV1.pvalue condition1 rnorvegicus
## 5 SYMBOL Log.DIV14.DIV1 Log.DIV14.DIV1.pvalue condition2 rnorvegicus
## 6 Gene.Symbol logFC.NPC.iPS adj.P.Val.NPC.iPS condition1 hsapiens
## out.label sep complex.name ID.type species condition
## 1 Out/MouseTMT \\t mouseGeneNames SYMBOL Mm DIV10.DIV3
## 2 Out/MouseTMT \\t mouseGeneNames SYMBOL Mm DIV3.DIV0
## 3 Out/MouseTMT \\t mouseGeneNames SYMBOL Mm DIV10.DIV0
## 4 Out/Frese , ratGeneNames SYMBOL Rn DIV5.DIV1
## 5 Out/Frese , ratGeneNames SYMBOL Rn DIV14.DIV1
## 6 Out/Djuric , humanGeneNames SYMBOL Hs NPC.iPS
## paralogs.file
## 1 ../Data/Paralogs/mmusculus_SYMBOL_paralogs_v102.txt
## 2 ../Data/Paralogs/mmusculus_SYMBOL_paralogs_v102.txt
## 3 ../Data/Paralogs/mmusculus_SYMBOL_paralogs_v102.txt
## 4 ../Data/Paralogs/rnorvegicus_SYMBOL_paralogs_v102.txt
## 5 ../Data/Paralogs/rnorvegicus_SYMBOL_paralogs_v102.txt
## 6 ../Data/Paralogs/hsapiens_SYMBOL_paralogs_v102.txt
Here we run a GOEnrichment on all the data. We take the informations coming from the different datasets from the DataInfo.txt
file. And for each dataset we run a GOEnrichment analysis between the upregulated genes (Log2FC >- 0.58) and all the rest of the distribution.
<- vector(mode = "list",length = nrow(DataInfo))
GOALL names(GOALL) <- DataInfo$condition
#Run Plots for all the datasets.
for (N in c(1:nrow(DataInfo)))
{#GetData information form DataInfo File
<- DataInfo[N,"filename"]
filename <- DataInfo[N,"Id.col"]
Id.col <- DataInfo[N,"fold.change"]
fold.change <- DataInfo[N,"fdr.col"]
fdr.col <- DataInfo[N,"condition"]
condition <- DataInfo[N,"organism"]
organism <- DataInfo[N,"species"]
species <- DataInfo[N,"sep"];if(sep=="\\t"){sep<-"\t"}
sep
#Read Data
<- read.delim(filename,sep = sep,header = T)
Data
#Take genes
<- Data[,fold.change]
geneList names(geneList) <- stringr::str_to_title(Data[,Id.col])
if(species=="Hs")names(geneList) <- (Data[,Id.col])
if(species=="Dr")names(geneList) <- tolower(Data[,Id.col])
#GOEnrichment
<- spatialR::GOEnrichment(names(geneList),
GO names(geneList)[geneList>= 0.58],
species = species,ontology = "BP",
topnode = 200)
#Remove not quantified GOTerms in GOEnrichment
$table <- GO$table[!is.na(GO$table$classic),]
GO<- GO
GOALL[[condition]] }
Summarize GO Terms
<- lapply(GOALL,function(x)spatialR::SummarizeGO(x)) GO.Summ
## Warning: `show_guide` has been deprecated. Please use `show.legend` instead.
## Warning: It is deprecated to specify `guide = FALSE` to remove a guide. Please
## use `guide = "none"` instead.
## Warning: `show_guide` has been deprecated. Please use `show.legend` instead.
## Warning: It is deprecated to specify `guide = FALSE` to remove a guide. Please
## use `guide = "none"` instead.
## Warning: `show_guide` has been deprecated. Please use `show.legend` instead.
## Warning: It is deprecated to specify `guide = FALSE` to remove a guide. Please
## use `guide = "none"` instead.
## Warning: `show_guide` has been deprecated. Please use `show.legend` instead.
## Warning: It is deprecated to specify `guide = FALSE` to remove a guide. Please
## use `guide = "none"` instead.
## Warning: `show_guide` has been deprecated. Please use `show.legend` instead.
## Warning: It is deprecated to specify `guide = FALSE` to remove a guide. Please
## use `guide = "none"` instead.
## Warning: `show_guide` has been deprecated. Please use `show.legend` instead.
## Warning: It is deprecated to specify `guide = FALSE` to remove a guide. Please
## use `guide = "none"` instead.
## Warning: `show_guide` has been deprecated. Please use `show.legend` instead.
## Warning: It is deprecated to specify `guide = FALSE` to remove a guide. Please
## use `guide = "none"` instead.
## Warning: `show_guide` has been deprecated. Please use `show.legend` instead.
## Warning: It is deprecated to specify `guide = FALSE` to remove a guide. Please
## use `guide = "none"` instead.
## Warning: `show_guide` has been deprecated. Please use `show.legend` instead.
## Warning: It is deprecated to specify `guide = FALSE` to remove a guide. Please
## use `guide = "none"` instead.
Add titles to Plots
<- lapply(seq_along(GO.Summ),
GOSumm.plot function(x){GO.Summ[[x]]$plot +
ggtitle(gsub("\\.","/",names(GO.Summ)[x]))})
#Center and scale the title
<- lapply(GOSumm.plot,function(x){
GOSumm.plot + theme(plot.title = element_text(hjust = 0.5, size = 10))
x
})names(GOSumm.plot) <- names(GO.Summ)
12.5 Supplementary Figure 4
We now produce the output for the different figures.
12.5.1 Align Plots
Using the cowplot
library we align the figures coming from the PCA analysis and the GOEnrichment.
library(cowplot)
#Align plot
<- align_plots(pca.1,GOSumm.plot$DIV10.DIV0,align = "hv",axis = "lr") mouse.pl
## Warning: It is deprecated to specify `guide = FALSE` to remove a guide. Please
## use `guide = "none"` instead.
<- plot_grid(mouse.pl[[1]],mouse.pl[[2]],ncol = 1,rel_heights = c(1.1,1))
mouse.block
<- align_plots(pca.2,GOSumm.plot$Neu.IPS,align = "hv",axis = "lr") human.pl
## Warning: It is deprecated to specify `guide = FALSE` to remove a guide. Please
## use `guide = "none"` instead.
<- plot_grid(human.pl[[1]],human.pl[[2]],ncol = 1,rel_heights = c(1.1,1))
human.block
<- align_plots(pca.3,GOSumm.plot$Neur.Stem,align = "hv",axis = "lr") drerio.pl
## Warning: It is deprecated to specify `guide = FALSE` to remove a guide. Please
## use `guide = "none"` instead.
<- plot_grid(drerio.pl[[1]],drerio.pl[[2]],ncol = 1,rel_heights = c(1.1,1))
drerio.block
<- align_plots(pca.4,GOSumm.plot$DIV14.DIV1,align = "hv",axis = "lr") frese.pl
## Warning: It is deprecated to specify `guide = FALSE` to remove a guide. Please
## use `guide = "none"` instead.
<- plot_grid(frese.pl[[1]],frese.pl[[2]],ncol = 1,rel_heights = c(1.1,1)) frese.block
12.5.2 Assemble Figure
And assemble it using the plot_grid
function.
<- plot_grid(drerio.block,human.block,
FigSupp4 nrow=2,
mouse.block,frese.block,labels = c("A","B","C","D"))
pdf(paste("../out/figures/FigSupp4/FigSupp4_",Sys.Date(),'.pdf',sep = ''),
width = 9,height = 12.3)
FigSupp4dev.off()
## png
## 2
FigSupp4