Section 12 Mass Spectrometry Datasets Overview

12.1 Load Libraries

library(factoextra)
library(dplyr)
library(spatialR)

source('../ComplexScript/complexes_function.R')

12.2 Load Protein Intensity data

We need to start by loading protein intensity data from the different datasets that we have. And organize them in a data-friendly way.

Drerio <- "../Data/Dataset/processed/ZebrafishNeurogProcessed.txt"
Djuric <- "../Data/Dataset/processed/Djuric_et_al_2017_processed.csv"
Mouse <- "../Data/Dataset/DataSetProcessing/27052019_MouseNeuron_TMT10_normalized_protein_matrix.csv"
Frese <- "../Data/Dataset/processed/Frese_et_al_2017_processed.csv"

And we load them into different dataframes.

Drerio <- read.delim(Drerio,sep = "\t",header = T)
Djuric <- read.delim(Djuric,sep=",",header = T)
#Remove last column not a sample from neurons
Mouse <- read.delim(Mouse,sep = ",",header = T)
Frese <- read.delim(Frese,sep = ",",header = T)

For mouse we add the indications coming from the different samples, and annotate with gene name.

colnames(Mouse) <- c("X","DIV0.1","DIV0.2","DIV0.3","DIV3.1",
                     "DIV3.2","DIV3.3","DIV10.1","DIV10.2",
                     "DIV10.3","Pool")

#Annotate with gene name
Mouse <- spatialR::Annotate(Mouse,organism = "Mm","X",idsep = ";",annot = "SYMBOL")

And try to organize them in a standard way by selecting only meaningful columns.

#select interesting columns for different datasets
col.drerio <- c(2,134,grep("iBAQ.L.Mix",colnames(Drerio)),
                grep("iBAQ.H.Mix",colnames(Drerio)))

Now we subset for selected columns and convert in log the data from Zebrafish.

#Subset datasets
Drerio <- Drerio[,col.drerio]
Drerio[Drerio==0] <- NA

#Convert in Log
Drerio[,-c(1:2)] <- log2(Drerio[,-c(1:2)])
Djuric <- Djuric[,c(1,3:12)]

12.3 Samples PCA

For every species, we load and prepare the data for the Principal Component Analysis. We remove non numeric columns, and we transpose the data. We then run a PCA with the prcomp function. And plot the results using the fviz_pca_ind function, from the factoextra package.

12.3.1 Mouse

First we run this for the Mouse data, we remove the last column that indicates the gene name, we then add to the matrix the genename annotation as rownames, we transpose the matrix in order to have proteins as features and the different conditions as samples. We also annotate with a condition column. And run a PCA with the prcomp function. We finally visualize it using the factoextra function fviz_pca_ind.

#Drop GeneName Col and add rownames
PCA.Mouse.data <- Mouse[,-ncol(Mouse)]
rownames(PCA.Mouse.data) <- Mouse$X
PCA.Mouse.data <- t(PCA.Mouse.data[,-1]) %>% as.data.frame()
PCA.Mouse.data$condition <- gsub("\\.\\d+","",rownames(PCA.Mouse.data))
#PCA
PCA.Mouse <- prcomp(PCA.Mouse.data[,-ncol(PCA.Mouse.data)])

pca.1 <- fviz_pca_ind(PCA.Mouse,
                      habillage = PCA.Mouse.data$condition,addEllipses = T,
                      geom="point") + 
         ggtitle("Mouse TMT 10") + 
         theme(legend.position = "bottom",
               plot.title = element_text(hjust = 0.5, size = 10))

12.3.2 Human Djuric et al 2017

Human data from Djuric 2017. We remove the column that indicates the gene name, we then add to the matrix the genename annotation as rownames, we transpose the matrix in order to have proteins as features and the different conditions as samples. We also annotate with a condition column. And run a PCA with the prcomp function. We finally visualize it using the factoextra function fviz_pca_ind.

PCA.Human.data <- Djuric[,-2]
PCA.Human.data <- PCA.Human.data[complete.cases(PCA.Human.data),]
rownames(PCA.Human.data) <- PCA.Human.data$Majority.protein.Uniprot.IDs
PCA.Human.data <- t(PCA.Human.data[,-1]) %>% as.data.frame()
PCA.Human.data$condition <- gsub("\\d+_\\d","",rownames(PCA.Human.data))
#PCA
PCA.Human <- prcomp(PCA.Human.data[,-ncol(PCA.Human.data)])

pca.2 <- fviz_pca_ind(PCA.Human,habillage = PCA.Human.data$condition,addEllipses = T,
                      geom="point") + ggtitle("Djuric et al., 2017") + 
                      theme(legend.position = "bottom",
                            plot.title = element_text(hjust = 0.5, size = 10))

12.3.3 Zebrafish Neuron/Stem

Zebrafish Dimethylabelling data. We remove the column that indicates the gene name, we then add to the matrix the genename annotation as rownames, we transpose the matrix in order to have proteins as features and the different conditions as samples. We also annotate with a condition column. And run a PCA with the prcomp function. We finally visualize it using the factoextra function fviz_pca_ind.

PCA.Drerio.data <- Drerio[,-2]
rownames(PCA.Drerio.data) <- PCA.Drerio.data$Majority.protein.IDs
PCA.Drerio.data <- PCA.Drerio.data[complete.cases(PCA.Drerio.data),-1]
PCA.Drerio.data <- t(PCA.Drerio.data) %>% as.data.frame()
PCA.Drerio.data$condition <- gsub("\\w+\\.L\\.\\w+\\d","Stem",rownames(PCA.Drerio.data))
PCA.Drerio.data$condition <- gsub("\\w+\\.H\\.\\w+\\d","Neu",PCA.Drerio.data$condition)

#PCA Drerio
PCA.Drerio <- prcomp(PCA.Drerio.data[,-ncol(PCA.Drerio.data)])

pca.3 <- fviz_pca_ind(PCA.Drerio,habillage = PCA.Drerio.data$condition,addEllipses = T,
                      geom="point") + ggtitle("Danio rerio Neuron/Stem") + 
                      theme(legend.position = "bottom",
                            plot.title = element_text(hjust = 0.5, size = 10))

12.3.4 Rat - Frese et al 2017

And rat data from Frese et al., 2017. We select the column that indicates protein quantification values, we then add to the matrix the genename annotation as rownames, we transpose the matrix in order to have proteins as features and the different conditions as samples. We also annotate with a condition column. And run a PCA with the prcomp function. We finally visualize it using the factoextra function fviz_pca_ind.

PCA.frese.data <- Frese[,c(11,2:7)]
rownames(PCA.frese.data) <- make.names(PCA.frese.data$SYMBOL,unique = T)
PCA.frese.data <- PCA.frese.data[complete.cases(PCA.frese.data),-1]
PCA.frese.data <- t(PCA.frese.data) %>% as.data.frame()
PCA.frese.data$condition <- gsub(".rep\\d","",rownames(PCA.frese.data))

#PCA Drerio
PCA.frese <- prcomp(PCA.frese.data[,-ncol(PCA.frese.data)])

pca.4 <- fviz_pca_ind(PCA.frese,habillage = PCA.frese.data$condition,addEllipses = T,
                      geom="point") + ggtitle("Frese et al., 2017") + 
                      theme(legend.position = "bottom",
                            plot.title = element_text(hjust = 0.5, size = 10))

12.4 GOEnrichment on Data

We first load the DataInfo.txt file that contains all the information coming from the different datasets.

#Load data info for each dataset < ----
DataInfo <- read.table("../Data/Dataset/DataInfo.txt",sep = "\t",header = T,
                       stringsAsFactors = F)

We can show the content of the dataset.

head(DataInfo)
##                                                             filename
## 1 ../Data/Dataset/270519_MouseNeuron_TMT10_contrast_updatedNames.txt
## 2 ../Data/Dataset/270519_MouseNeuron_TMT10_contrast_updatedNames.txt
## 3 ../Data/Dataset/270519_MouseNeuron_TMT10_contrast_updatedNames.txt
## 4           ../Data/Dataset/processed/Frese_et_al_2017_processed.csv
## 5           ../Data/Dataset/processed/Frese_et_al_2017_processed.csv
## 6          ../Data/Dataset/processed/Djuric_et_al_2017_processed.csv
##        Id.col      fold.change               fdr.col condition.col    organism
## 1   Gene.name logFC.DIV10.DIV3  adj.P.Val.DIV10.DIV3    condition3   mmusculus
## 2   Gene.name  logFC.DIV3.DIV0   adj.P.Val.DIV3.DIV0    condition1   mmusculus
## 3   Gene.name logFC.DIV10.DIV0  adj.P.Val.DIV10.DIV0    condition2   mmusculus
## 4      SYMBOL    Log.DIV5.DIV1  Log.DIV5.DIV1.pvalue    condition1 rnorvegicus
## 5      SYMBOL   Log.DIV14.DIV1 Log.DIV14.DIV1.pvalue    condition2 rnorvegicus
## 6 Gene.Symbol    logFC.NPC.iPS     adj.P.Val.NPC.iPS    condition1    hsapiens
##      out.label sep   complex.name ID.type species  condition
## 1 Out/MouseTMT \\t mouseGeneNames  SYMBOL      Mm DIV10.DIV3
## 2 Out/MouseTMT \\t mouseGeneNames  SYMBOL      Mm  DIV3.DIV0
## 3 Out/MouseTMT \\t mouseGeneNames  SYMBOL      Mm DIV10.DIV0
## 4    Out/Frese   ,   ratGeneNames  SYMBOL      Rn  DIV5.DIV1
## 5    Out/Frese   ,   ratGeneNames  SYMBOL      Rn DIV14.DIV1
## 6   Out/Djuric   , humanGeneNames  SYMBOL      Hs    NPC.iPS
##                                           paralogs.file
## 1   ../Data/Paralogs/mmusculus_SYMBOL_paralogs_v102.txt
## 2   ../Data/Paralogs/mmusculus_SYMBOL_paralogs_v102.txt
## 3   ../Data/Paralogs/mmusculus_SYMBOL_paralogs_v102.txt
## 4 ../Data/Paralogs/rnorvegicus_SYMBOL_paralogs_v102.txt
## 5 ../Data/Paralogs/rnorvegicus_SYMBOL_paralogs_v102.txt
## 6    ../Data/Paralogs/hsapiens_SYMBOL_paralogs_v102.txt

Here we run a GOEnrichment on all the data. We take the informations coming from the different datasets from the DataInfo.txt file. And for each dataset we run a GOEnrichment analysis between the upregulated genes (Log2FC >- 0.58) and all the rest of the distribution.

GOALL <- vector(mode = "list",length = nrow(DataInfo))
names(GOALL) <- DataInfo$condition

#Run Plots for all the datasets.
for (N in c(1:nrow(DataInfo)))
{
  #GetData information form DataInfo File
  filename <- DataInfo[N,"filename"]
  Id.col <- DataInfo[N,"Id.col"]
  fold.change <- DataInfo[N,"fold.change"]
  fdr.col <- DataInfo[N,"fdr.col"]
  condition <- DataInfo[N,"condition"]
  organism <- DataInfo[N,"organism"]
  species <- DataInfo[N,"species"]
  sep <- DataInfo[N,"sep"];if(sep=="\\t"){sep<-"\t"}
  
  #Read Data
  Data <- read.delim(filename,sep = sep,header = T)
  
  #Take genes
  geneList <- Data[,fold.change]
  names(geneList) <- stringr::str_to_title(Data[,Id.col])
  
  if(species=="Hs")names(geneList) <- (Data[,Id.col])
  if(species=="Dr")names(geneList) <- tolower(Data[,Id.col])
  
  #GOEnrichment
  GO <- spatialR::GOEnrichment(names(geneList),
                               names(geneList)[geneList>= 0.58],
                               species = species,ontology = "BP",
                               topnode = 200)
  
  #Remove not quantified GOTerms in GOEnrichment
  GO$table <- GO$table[!is.na(GO$table$classic),]
  GOALL[[condition]] <- GO
}

Summarize GO Terms

GO.Summ <- lapply(GOALL,function(x)spatialR::SummarizeGO(x))
## Warning: `show_guide` has been deprecated. Please use `show.legend` instead.
## Warning: It is deprecated to specify `guide = FALSE` to remove a guide. Please
## use `guide = "none"` instead.
## Warning: `show_guide` has been deprecated. Please use `show.legend` instead.

## Warning: It is deprecated to specify `guide = FALSE` to remove a guide. Please
## use `guide = "none"` instead.

## Warning: `show_guide` has been deprecated. Please use `show.legend` instead.

## Warning: It is deprecated to specify `guide = FALSE` to remove a guide. Please
## use `guide = "none"` instead.

## Warning: `show_guide` has been deprecated. Please use `show.legend` instead.

## Warning: It is deprecated to specify `guide = FALSE` to remove a guide. Please
## use `guide = "none"` instead.

## Warning: `show_guide` has been deprecated. Please use `show.legend` instead.

## Warning: It is deprecated to specify `guide = FALSE` to remove a guide. Please
## use `guide = "none"` instead.

## Warning: `show_guide` has been deprecated. Please use `show.legend` instead.

## Warning: It is deprecated to specify `guide = FALSE` to remove a guide. Please
## use `guide = "none"` instead.

## Warning: `show_guide` has been deprecated. Please use `show.legend` instead.

## Warning: It is deprecated to specify `guide = FALSE` to remove a guide. Please
## use `guide = "none"` instead.

## Warning: `show_guide` has been deprecated. Please use `show.legend` instead.

## Warning: It is deprecated to specify `guide = FALSE` to remove a guide. Please
## use `guide = "none"` instead.

## Warning: `show_guide` has been deprecated. Please use `show.legend` instead.

## Warning: It is deprecated to specify `guide = FALSE` to remove a guide. Please
## use `guide = "none"` instead.

Add titles to Plots

GOSumm.plot <- lapply(seq_along(GO.Summ),
                      function(x){GO.Summ[[x]]$plot + 
                                 ggtitle(gsub("\\.","/",names(GO.Summ)[x]))})

#Center and scale the title
GOSumm.plot <- lapply(GOSumm.plot,function(x){
                      x + theme(plot.title = element_text(hjust = 0.5, size = 10))
  })
names(GOSumm.plot) <- names(GO.Summ)

12.5 Supplementary Figure 4

We now produce the output for the different figures.

12.5.1 Align Plots

Using the cowplot library we align the figures coming from the PCA analysis and the GOEnrichment.

library(cowplot)

#Align plot
mouse.pl <- align_plots(pca.1,GOSumm.plot$DIV10.DIV0,align = "hv",axis = "lr")
## Warning: It is deprecated to specify `guide = FALSE` to remove a guide. Please
## use `guide = "none"` instead.
mouse.block <- plot_grid(mouse.pl[[1]],mouse.pl[[2]],ncol = 1,rel_heights = c(1.1,1))

human.pl <- align_plots(pca.2,GOSumm.plot$Neu.IPS,align = "hv",axis = "lr")
## Warning: It is deprecated to specify `guide = FALSE` to remove a guide. Please
## use `guide = "none"` instead.
human.block <- plot_grid(human.pl[[1]],human.pl[[2]],ncol = 1,rel_heights = c(1.1,1))

drerio.pl <- align_plots(pca.3,GOSumm.plot$Neur.Stem,align = "hv",axis = "lr")
## Warning: It is deprecated to specify `guide = FALSE` to remove a guide. Please
## use `guide = "none"` instead.
drerio.block <- plot_grid(drerio.pl[[1]],drerio.pl[[2]],ncol = 1,rel_heights = c(1.1,1))

frese.pl <- align_plots(pca.4,GOSumm.plot$DIV14.DIV1,align = "hv",axis = "lr")
## Warning: It is deprecated to specify `guide = FALSE` to remove a guide. Please
## use `guide = "none"` instead.
frese.block <- plot_grid(frese.pl[[1]],frese.pl[[2]],ncol = 1,rel_heights = c(1.1,1))

12.5.2 Assemble Figure

And assemble it using the plot_grid function.

FigSupp4 <- plot_grid(drerio.block,human.block,
                      mouse.block,frese.block,nrow=2,
                      labels = c("A","B","C","D"))

pdf(paste("../out/figures/FigSupp4/FigSupp4_",Sys.Date(),'.pdf',sep = ''),
    width = 9,height = 12.3)
FigSupp4
dev.off()
## png 
##   2
FigSupp4