Section 12 Mass Spectrometry Datasets Overview
12.1 Load Libraries
library(factoextra)
library(dplyr)
library(spatialR)
source('../ComplexScript/complexes_function.R')12.2 Load Protein Intensity data
We need to start by loading protein intensity data from the different datasets that we have. And organize them in a data-friendly way.
Drerio <- "../Data/Dataset/processed/ZebrafishNeurogProcessed.txt"
Djuric <- "../Data/Dataset/processed/Djuric_et_al_2017_processed.csv"
Mouse <- "../Data/Dataset/DataSetProcessing/27052019_MouseNeuron_TMT10_normalized_protein_matrix.csv"
Frese <- "../Data/Dataset/processed/Frese_et_al_2017_processed.csv"And we load them into different dataframes.
Drerio <- read.delim(Drerio,sep = "\t",header = T)
Djuric <- read.delim(Djuric,sep=",",header = T)
#Remove last column not a sample from neurons
Mouse <- read.delim(Mouse,sep = ",",header = T)
Frese <- read.delim(Frese,sep = ",",header = T)For mouse we add the indications coming from the different samples, and annotate with gene name.
colnames(Mouse) <- c("X","DIV0.1","DIV0.2","DIV0.3","DIV3.1",
"DIV3.2","DIV3.3","DIV10.1","DIV10.2",
"DIV10.3","Pool")
#Annotate with gene name
Mouse <- spatialR::Annotate(Mouse,organism = "Mm","X",idsep = ";",annot = "SYMBOL")And try to organize them in a standard way by selecting only meaningful columns.
#select interesting columns for different datasets
col.drerio <- c(2,134,grep("iBAQ.L.Mix",colnames(Drerio)),
grep("iBAQ.H.Mix",colnames(Drerio)))Now we subset for selected columns and convert in log the data from Zebrafish.
#Subset datasets
Drerio <- Drerio[,col.drerio]
Drerio[Drerio==0] <- NA
#Convert in Log
Drerio[,-c(1:2)] <- log2(Drerio[,-c(1:2)])
Djuric <- Djuric[,c(1,3:12)]12.3 Samples PCA
For every species, we load and prepare the data for the Principal Component Analysis. We remove non numeric columns, and we transpose the data. We then run a PCA with the prcomp function. And plot the results using the fviz_pca_ind function, from the factoextra package.
12.3.1 Mouse
First we run this for the Mouse data, we remove the last column that indicates the gene name, we then add to the matrix the genename annotation as rownames, we transpose the matrix in order to have proteins as features and the different conditions as samples. We also annotate with a condition column. And run a PCA with the prcomp function. We finally visualize it using the factoextra function fviz_pca_ind.
#Drop GeneName Col and add rownames
PCA.Mouse.data <- Mouse[,-ncol(Mouse)]
rownames(PCA.Mouse.data) <- Mouse$X
PCA.Mouse.data <- t(PCA.Mouse.data[,-1]) %>% as.data.frame()
PCA.Mouse.data$condition <- gsub("\\.\\d+","",rownames(PCA.Mouse.data))
#PCA
PCA.Mouse <- prcomp(PCA.Mouse.data[,-ncol(PCA.Mouse.data)])
pca.1 <- fviz_pca_ind(PCA.Mouse,
habillage = PCA.Mouse.data$condition,addEllipses = T,
geom="point") +
ggtitle("Mouse TMT 10") +
theme(legend.position = "bottom",
plot.title = element_text(hjust = 0.5, size = 10))12.3.2 Human Djuric et al 2017
Human data from Djuric 2017. We remove the column that indicates the gene name, we then add to the matrix the genename annotation as rownames, we transpose the matrix in order to have proteins as features and the different conditions as samples. We also annotate with a condition column. And run a PCA with the prcomp function. We finally visualize it using the factoextra function fviz_pca_ind.
PCA.Human.data <- Djuric[,-2]
PCA.Human.data <- PCA.Human.data[complete.cases(PCA.Human.data),]
rownames(PCA.Human.data) <- PCA.Human.data$Majority.protein.Uniprot.IDs
PCA.Human.data <- t(PCA.Human.data[,-1]) %>% as.data.frame()
PCA.Human.data$condition <- gsub("\\d+_\\d","",rownames(PCA.Human.data))
#PCA
PCA.Human <- prcomp(PCA.Human.data[,-ncol(PCA.Human.data)])
pca.2 <- fviz_pca_ind(PCA.Human,habillage = PCA.Human.data$condition,addEllipses = T,
geom="point") + ggtitle("Djuric et al., 2017") +
theme(legend.position = "bottom",
plot.title = element_text(hjust = 0.5, size = 10))12.3.3 Zebrafish Neuron/Stem
Zebrafish Dimethylabelling data. We remove the column that indicates the gene name, we then add to the matrix the genename annotation as rownames, we transpose the matrix in order to have proteins as features and the different conditions as samples. We also annotate with a condition column. And run a PCA with the prcomp function. We finally visualize it using the factoextra function fviz_pca_ind.
PCA.Drerio.data <- Drerio[,-2]
rownames(PCA.Drerio.data) <- PCA.Drerio.data$Majority.protein.IDs
PCA.Drerio.data <- PCA.Drerio.data[complete.cases(PCA.Drerio.data),-1]
PCA.Drerio.data <- t(PCA.Drerio.data) %>% as.data.frame()
PCA.Drerio.data$condition <- gsub("\\w+\\.L\\.\\w+\\d","Stem",rownames(PCA.Drerio.data))
PCA.Drerio.data$condition <- gsub("\\w+\\.H\\.\\w+\\d","Neu",PCA.Drerio.data$condition)
#PCA Drerio
PCA.Drerio <- prcomp(PCA.Drerio.data[,-ncol(PCA.Drerio.data)])
pca.3 <- fviz_pca_ind(PCA.Drerio,habillage = PCA.Drerio.data$condition,addEllipses = T,
geom="point") + ggtitle("Danio rerio Neuron/Stem") +
theme(legend.position = "bottom",
plot.title = element_text(hjust = 0.5, size = 10))12.3.4 Rat - Frese et al 2017
And rat data from Frese et al., 2017. We select the column that indicates protein quantification values, we then add to the matrix the genename annotation as rownames, we transpose the matrix in order to have proteins as features and the different conditions as samples. We also annotate with a condition column. And run a PCA with the prcomp function. We finally visualize it using the factoextra function fviz_pca_ind.
PCA.frese.data <- Frese[,c(11,2:7)]
rownames(PCA.frese.data) <- make.names(PCA.frese.data$SYMBOL,unique = T)
PCA.frese.data <- PCA.frese.data[complete.cases(PCA.frese.data),-1]
PCA.frese.data <- t(PCA.frese.data) %>% as.data.frame()
PCA.frese.data$condition <- gsub(".rep\\d","",rownames(PCA.frese.data))
#PCA Drerio
PCA.frese <- prcomp(PCA.frese.data[,-ncol(PCA.frese.data)])
pca.4 <- fviz_pca_ind(PCA.frese,habillage = PCA.frese.data$condition,addEllipses = T,
geom="point") + ggtitle("Frese et al., 2017") +
theme(legend.position = "bottom",
plot.title = element_text(hjust = 0.5, size = 10))12.4 GOEnrichment on Data
We first load the DataInfo.txt file that contains all the information coming from the different datasets.
#Load data info for each dataset < ----
DataInfo <- read.table("../Data/Dataset/DataInfo.txt",sep = "\t",header = T,
stringsAsFactors = F)We can show the content of the dataset.
head(DataInfo)## filename
## 1 ../Data/Dataset/270519_MouseNeuron_TMT10_contrast_updatedNames.txt
## 2 ../Data/Dataset/270519_MouseNeuron_TMT10_contrast_updatedNames.txt
## 3 ../Data/Dataset/270519_MouseNeuron_TMT10_contrast_updatedNames.txt
## 4 ../Data/Dataset/processed/Frese_et_al_2017_processed.csv
## 5 ../Data/Dataset/processed/Frese_et_al_2017_processed.csv
## 6 ../Data/Dataset/processed/Djuric_et_al_2017_processed.csv
## Id.col fold.change fdr.col condition.col organism
## 1 Gene.name logFC.DIV10.DIV3 adj.P.Val.DIV10.DIV3 condition3 mmusculus
## 2 Gene.name logFC.DIV3.DIV0 adj.P.Val.DIV3.DIV0 condition1 mmusculus
## 3 Gene.name logFC.DIV10.DIV0 adj.P.Val.DIV10.DIV0 condition2 mmusculus
## 4 SYMBOL Log.DIV5.DIV1 Log.DIV5.DIV1.pvalue condition1 rnorvegicus
## 5 SYMBOL Log.DIV14.DIV1 Log.DIV14.DIV1.pvalue condition2 rnorvegicus
## 6 Gene.Symbol logFC.NPC.iPS adj.P.Val.NPC.iPS condition1 hsapiens
## out.label sep complex.name ID.type species condition
## 1 Out/MouseTMT \\t mouseGeneNames SYMBOL Mm DIV10.DIV3
## 2 Out/MouseTMT \\t mouseGeneNames SYMBOL Mm DIV3.DIV0
## 3 Out/MouseTMT \\t mouseGeneNames SYMBOL Mm DIV10.DIV0
## 4 Out/Frese , ratGeneNames SYMBOL Rn DIV5.DIV1
## 5 Out/Frese , ratGeneNames SYMBOL Rn DIV14.DIV1
## 6 Out/Djuric , humanGeneNames SYMBOL Hs NPC.iPS
## paralogs.file
## 1 ../Data/Paralogs/mmusculus_SYMBOL_paralogs_v102.txt
## 2 ../Data/Paralogs/mmusculus_SYMBOL_paralogs_v102.txt
## 3 ../Data/Paralogs/mmusculus_SYMBOL_paralogs_v102.txt
## 4 ../Data/Paralogs/rnorvegicus_SYMBOL_paralogs_v102.txt
## 5 ../Data/Paralogs/rnorvegicus_SYMBOL_paralogs_v102.txt
## 6 ../Data/Paralogs/hsapiens_SYMBOL_paralogs_v102.txt
Here we run a GOEnrichment on all the data. We take the informations coming from the different datasets from the DataInfo.txt file. And for each dataset we run a GOEnrichment analysis between the upregulated genes (Log2FC >- 0.58) and all the rest of the distribution.
GOALL <- vector(mode = "list",length = nrow(DataInfo))
names(GOALL) <- DataInfo$condition
#Run Plots for all the datasets.
for (N in c(1:nrow(DataInfo)))
{
#GetData information form DataInfo File
filename <- DataInfo[N,"filename"]
Id.col <- DataInfo[N,"Id.col"]
fold.change <- DataInfo[N,"fold.change"]
fdr.col <- DataInfo[N,"fdr.col"]
condition <- DataInfo[N,"condition"]
organism <- DataInfo[N,"organism"]
species <- DataInfo[N,"species"]
sep <- DataInfo[N,"sep"];if(sep=="\\t"){sep<-"\t"}
#Read Data
Data <- read.delim(filename,sep = sep,header = T)
#Take genes
geneList <- Data[,fold.change]
names(geneList) <- stringr::str_to_title(Data[,Id.col])
if(species=="Hs")names(geneList) <- (Data[,Id.col])
if(species=="Dr")names(geneList) <- tolower(Data[,Id.col])
#GOEnrichment
GO <- spatialR::GOEnrichment(names(geneList),
names(geneList)[geneList>= 0.58],
species = species,ontology = "BP",
topnode = 200)
#Remove not quantified GOTerms in GOEnrichment
GO$table <- GO$table[!is.na(GO$table$classic),]
GOALL[[condition]] <- GO
}Summarize GO Terms
GO.Summ <- lapply(GOALL,function(x)spatialR::SummarizeGO(x))## Warning: `show_guide` has been deprecated. Please use `show.legend` instead.
## Warning: It is deprecated to specify `guide = FALSE` to remove a guide. Please
## use `guide = "none"` instead.
## Warning: `show_guide` has been deprecated. Please use `show.legend` instead.
## Warning: It is deprecated to specify `guide = FALSE` to remove a guide. Please
## use `guide = "none"` instead.
## Warning: `show_guide` has been deprecated. Please use `show.legend` instead.
## Warning: It is deprecated to specify `guide = FALSE` to remove a guide. Please
## use `guide = "none"` instead.
## Warning: `show_guide` has been deprecated. Please use `show.legend` instead.
## Warning: It is deprecated to specify `guide = FALSE` to remove a guide. Please
## use `guide = "none"` instead.
## Warning: `show_guide` has been deprecated. Please use `show.legend` instead.
## Warning: It is deprecated to specify `guide = FALSE` to remove a guide. Please
## use `guide = "none"` instead.
## Warning: `show_guide` has been deprecated. Please use `show.legend` instead.
## Warning: It is deprecated to specify `guide = FALSE` to remove a guide. Please
## use `guide = "none"` instead.
## Warning: `show_guide` has been deprecated. Please use `show.legend` instead.
## Warning: It is deprecated to specify `guide = FALSE` to remove a guide. Please
## use `guide = "none"` instead.
## Warning: `show_guide` has been deprecated. Please use `show.legend` instead.
## Warning: It is deprecated to specify `guide = FALSE` to remove a guide. Please
## use `guide = "none"` instead.
## Warning: `show_guide` has been deprecated. Please use `show.legend` instead.
## Warning: It is deprecated to specify `guide = FALSE` to remove a guide. Please
## use `guide = "none"` instead.
Add titles to Plots
GOSumm.plot <- lapply(seq_along(GO.Summ),
function(x){GO.Summ[[x]]$plot +
ggtitle(gsub("\\.","/",names(GO.Summ)[x]))})
#Center and scale the title
GOSumm.plot <- lapply(GOSumm.plot,function(x){
x + theme(plot.title = element_text(hjust = 0.5, size = 10))
})
names(GOSumm.plot) <- names(GO.Summ)12.5 Supplementary Figure 4
We now produce the output for the different figures.
12.5.1 Align Plots
Using the cowplot library we align the figures coming from the PCA analysis and the GOEnrichment.
library(cowplot)
#Align plot
mouse.pl <- align_plots(pca.1,GOSumm.plot$DIV10.DIV0,align = "hv",axis = "lr")## Warning: It is deprecated to specify `guide = FALSE` to remove a guide. Please
## use `guide = "none"` instead.
mouse.block <- plot_grid(mouse.pl[[1]],mouse.pl[[2]],ncol = 1,rel_heights = c(1.1,1))
human.pl <- align_plots(pca.2,GOSumm.plot$Neu.IPS,align = "hv",axis = "lr")## Warning: It is deprecated to specify `guide = FALSE` to remove a guide. Please
## use `guide = "none"` instead.
human.block <- plot_grid(human.pl[[1]],human.pl[[2]],ncol = 1,rel_heights = c(1.1,1))
drerio.pl <- align_plots(pca.3,GOSumm.plot$Neur.Stem,align = "hv",axis = "lr")## Warning: It is deprecated to specify `guide = FALSE` to remove a guide. Please
## use `guide = "none"` instead.
drerio.block <- plot_grid(drerio.pl[[1]],drerio.pl[[2]],ncol = 1,rel_heights = c(1.1,1))
frese.pl <- align_plots(pca.4,GOSumm.plot$DIV14.DIV1,align = "hv",axis = "lr")## Warning: It is deprecated to specify `guide = FALSE` to remove a guide. Please
## use `guide = "none"` instead.
frese.block <- plot_grid(frese.pl[[1]],frese.pl[[2]],ncol = 1,rel_heights = c(1.1,1))12.5.2 Assemble Figure
And assemble it using the plot_grid function.
FigSupp4 <- plot_grid(drerio.block,human.block,
mouse.block,frese.block,nrow=2,
labels = c("A","B","C","D"))
pdf(paste("../out/figures/FigSupp4/FigSupp4_",Sys.Date(),'.pdf',sep = ''),
width = 9,height = 12.3)
FigSupp4
dev.off()## png
## 2
FigSupp4