####################################################
### Deconvolution TCGA-LGG with 'microglia' data ###
####################################################
library(reshape2)
library(ggplot2)
library(SPOTlight)
library(SingleCellExperiment)
library(SpatialExperiment)
library(scater)
library(scran)
library(Seurat)
library(dplyr)
library(DESeq2)
library(tidyr)
library(harmony)
library(cowplot)
## 1- SINCLE CELL DATA ##
# single cell RNA counts
microglia_combined_to_use <- readRDS("/Users/an/Documents/JC_projects/microglia/microglia_combined_to_use.rds")
# marker genes from Imma
microglia_primary <- readRDS("/Users/an/Documents/JC_projects/microglia/microglia_primary.rds")
mgs <- microglia_primary
# Recompute HVG excluding Ribosomal and mitochondrial
mtx <- microglia_combined_to_use@assays$RNA@counts
# remove ribosomal and mitochondrial genes
mtx <- mtx[stringr::str_detect(rownames(mtx), "^RP[L|S]|^MT-", negate = TRUE), ]
sc <- CreateSeuratObject(counts = mtx, meta.data = microglia_combined_to_use@meta.data)
sc <- Seurat::NormalizeData(sc, verbose = FALSE) %>%
Seurat::FindVariableFeatures(selection.method = "vst", nfeatures = 3000)
# hvg top 3000
hvg <- VariableFeatures(sc)
### Subset SC object so all the cells from the same cell type to come from the same batch
# subset most representative sample(s)
microglia_combined_to_use@meta.data$cell_id <- rownames(microglia_combined_to_use@meta.data)
ns <- with(microglia_combined_to_use@meta.data, table(cell_id, labelling))
ns <- as.matrix(unclass(ns))
m <- 100 # Max cells per cell type
# Extract cell barcodes
meta <- microglia_combined_to_use@meta.data
id <- lapply(colnames(ns), function(nm) {
x <- ns[, nm]
# Initialize variables
n <- 0 # N of cells
s <- c() # Gem IDs
b <- c() # Cell barcodes
while (n < m & length(x) > 0) {
# select gem id with the most cells
i <- names(sort(x, decreasing = TRUE))[[1]]
# Add gem id to vector s
s <- c(s, i)
# Add number of cells per cell type to n
n <- n + x[[i]]
# Remove gem id from x to move on to the next
x <- x[names(x) != i]
# extract barcode
barcode <- rownames(meta[meta[, "cell_id"] == i &
meta[, "labelling"] == nm, ])
# make sure it adds up to m
# print(barcode)
if ((length(b) + length(barcode)) > m) {
barcode <- sample(x = barcode, size = m - length(b), replace = FALSE)
}
b <- c(b, barcode) # Cell barcodes
}
return(b)
})
sc_sub <- microglia_combined_to_use[, unlist(id)]
table(sc_sub@meta.data$labelling)
gc()
## 2- TCGA-LGG DATA COLLECTION AND SELECTION ##
# # TCGA RNA-seq data download:
# library(TCGAbiolinks)
#
# query <- GDCquery(
# project = "TCGA-LGG",
# data.category = "Gene expression",
# data.type = "Gene expression quantification",
# platform = "Illumina HiSeq",
# file.type = "normalized_results",
# experimental.strategy = "RNA-Seq",
# legacy = TRUE
# )
# GDCdownload(
# query = query,
# method = "api",
# files.per.chunk = 10
# )
# data <- GDCprepare(query = query)
# data_LGG <- data
# save(data_LGG,file="/Users/an/Documents/JC_projects/microglia/TCGA_RNA_data_LGG.RData")
# Data downloaded from TCGA: (with the script hided before)
load("/Users/an/Documents/JC_projects/microglia/TCGA_RNA_data_LGG.RData")
# Selection of the same list of patients Imma had:
# tables TCGA patients (from Imma)
astro_proportion <- read.csv("/Users/an/Documents/JC_projects/tcga_survival/astro_proportion.csv",header=TRUE)
oligo_proportion <- read.csv("/Users/an/Documents/JC_projects/tcga_survival/oligo_proportion.csv",header=TRUE)
# list of patients
astro_patients <- astro_proportion$ID.y
oligo_patients <- oligo_proportion$ID.y
all_patients <- c(astro_patients,oligo_patients) # to select the same patients
# subset expression data
data_LGG_subset <- data_LGG[,which(data_LGG@colData$patient %in% all_patients)]
# For some patients there are more than one RNA sample:
duplicated_patients <- unique(data_LGG_subset@colData$patient[which(duplicated(data_LGG_subset@colData$patient))]) # 12 patients
metadata_duplis <- as.data.frame(data_LGG_subset@colData[which(data_LGG_subset@colData$patient %in% duplicated_patients), 1:59])
for(patient in duplicated_patients){
print(patient)
print(metadata_duplis[which(metadata_duplis$patient == patient),])
print("")
print("----------------------------------------")
print("")
print("")
}
# Select the "Primary solid Tumor"
selected_barcodes <- c("TCGA-DU-6397-01A-11R-1708-07",
"TCGA-DU-5872-01A-11R-1708-07",
"TCGA-TQ-A7RV-01A-21R-A34F-07",
"TCGA-DU-7304-01A-12R-2090-07",
"TCGA-FG-A4MT-01A-11R-A26U-07",
"TCGA-TQ-A7RK-01A-11R-A33Z-07",
"TCGA-DU-5870-01A-11R-1708-07",
"TCGA-DU-6407-01A-13R-1708-07",
"TCGA-FG-5965-01B-11R-1896-07",
"TCGA-TQ-A8XE-01A-11R-A36H-07",
"TCGA-DH-A669-01A-12R-A31N-07",
"TCGA-TM-A7CF-01A-11R-A32Q-07")
# TCGA RNAseq data subset unique samples, 418 in total:
data_LGG_subset_unique <- data_LGG_subset[,-which(data_LGG_subset@colData$patient %in% duplicated_patients & ! data_LGG_subset@colData$barcode %in% selected_barcodes)]
counts_expr_lgg <- as.data.frame(assay(data_LGG_subset_unique))
counts_expr_lgg_matrix <- as.matrix(counts_expr_lgg)
## 3- RUN SPOTLIGHT ##
res <- SPOTlight(
x = sc_sub,
y = counts_expr_lgg_matrix,
groups = as.character(sc_sub$labelling),
mgs = mgs,
hvg = hvg,
weight_id = "avg_log2FC",
group_id = "cluster",
gene_id = "gene")
save(res, file=paste0("/Users/an/Documents/JC_projects/microglia/res_spotlight_tcga_LGG_mgsImma_hvgComputed.RData"))
## 4- PLOT TOPIC PROFILES ##
pdf(file=paste0("/Users/an/Documents/JC_projects/microglia/plot_TopicProfiles_tcga_LGG_mgsImma_hvgComputed.pdf"),heigh=8,width=8)
plotTopicProfiles(
x = res$NMF,
y = sc_sub$labelling,
facet = FALSE,
min_prop = 0.01,
ncol = 1) +
theme(aspect.ratio = 1)
dev.off()
## 5- PLOT HEATMAP WITH THE PROPORTIONS OF THE 10 CELL CATEGORIES, ANNOTATED IF THE SAMPLE IS ASTRO OR OLIGO ##
library(grid)
library(ComplexHeatmap)
input_data <- as.matrix(res$mat)
#annotation astro and oligo
out_mat <- res$mat
patients <- rownames(out_mat)
out_df <- as.data.frame(out_mat)
out_df$patient_id <- patients
out_df$group <- rep(NA,nrow(out_df))
out_df[which(out_df$patient_id %in% astro_patients),"group"] <- "ASTRO"
out_df[which(out_df$patient_id %in% oligo_patients),"group"] <- "OLIGO"
as.data.frame(table(out_df$group))
# Var1 Freq
# 1 ASTRO 249
# 2 OLIGO 169
row_ha = rowAnnotation(group = out_df$group)
heatmap <- Heatmap(as.matrix(res$mat),
row_names_gp = gpar(fontsize = 2),
column_names_gp = gpar(fontsize = 8),
cluster_rows = TRUE,cluster_columns=TRUE,
right_annotation = row_ha) #top_annotation = column_ha
ht = draw(heatmap)
pdf(paste0("/Users/an/Documents/JC_projects/microglia/heatmap_deconv_tcga_LGG_mgsImma_hvgComputed.pdf"),heigh=15,width=8)
ht
dev.off()
# Heatmap only OLIGO:
data_oligo <- res$mat[which(rownames(res$mat) %in% oligo_patients),]
heatmap_res <- Heatmap(as.matrix(data_oligo),
row_names_gp = gpar(fontsize = 4),
column_names_gp = gpar(fontsize = 10),
cluster_rows = TRUE,#cluster_columns = FALSE,
heatmap_legend_param = list(title = "proportion",legend_height = unit(4, "cm"),
title_position = "lefttop-rot")
)
ht = draw(heatmap_res)
pdf(paste0("/Users/an/Documents/JC_projects/microglia/heatmap_deconv_tcga_LGG_mgsImma_hvgComputed_only_OLIGO.pdf"),heigh=15,width=8)
ht
dev.off()
# Heatmap only ASTRO:
data_astro <- res$mat[which(rownames(res$mat) %in% astro_patients),]
heatmap_res <- Heatmap(as.matrix(data_astro),
row_names_gp = gpar(fontsize = 4),
column_names_gp = gpar(fontsize = 10),
cluster_rows = TRUE,#cluster_columns = FALSE,
heatmap_legend_param = list(title = "proportion",legend_height = unit(4, "cm"),
title_position = "lefttop-rot")
)
ht = draw(heatmap_res)
pdf(paste0("/Users/an/Documents/JC_projects/microglia/heatmap_deconv_tcga_LGG_mgsImma_hvgComputed_only_ASTRO.pdf"),heigh=15,width=8)
ht
dev.off()
## 6- BOXPLOTS PROPORTION PER CELL CATEGORY ASTRO VS OLIGO ##
library(ggpubr)
out_deconv <- res$mat
out_deconv_melted <- melt(out_deconv)
colnames(out_deconv_melted) <- c("patient_id","cluster","value")
out_deconv_melted$group <- rep(NA,nrow(out_deconv_melted))
out_deconv_melted[which(out_deconv_melted$patient_id %in% astro_patients),"group"] <- "ASTRO"
out_deconv_melted[which(out_deconv_melted$patient_id %in% oligo_patients),"group"] <- "OLIGO"
clusters <- as.character(unique(out_deconv_melted$cluster))
for(cluster in clusters){
print(cluster)
subset <- out_deconv_melted[which(out_deconv_melted$cluster==cluster),]
cellname <- gsub(" ","_",cluster)
cellname <- gsub("-","_",cellname)
cellname <- gsub("+","",cellname,fixed=TRUE)
plot <- ggplot(subset, aes(y = value,x=group, fill=group)) +
geom_violin() +
#geom_violin(color=col_cells[which(names(col_cells) == cluster)],trim=FALSE) +
scale_fill_manual(values=c("ASTRO"="lightsalmon","OLIGO"="mediumaquamarine")) +
geom_boxplot(color="black",width=0.1) +
ylab(paste0("Proportion of ",cluster," cluster")) +
xlab("") +
#ggtitle(paste0(cluster)) +
stat_compare_means(size=6,label.x = 1.3) +
#ylim(0,100) +
theme(axis.text.x = element_text(size=20, hjust=0.5,vjust=0.5,face="bold"),
axis.text.y = element_text(size=18, hjust=1, vjust = 0.5),
axis.title.x = element_text(size=22, face= "bold"),
axis.title.y = element_text(size=22, face= "bold"),
legend.title = element_text(size = 12, face="bold"),
strip.text.x = element_text(size=18, color="black",face="bold"),
plot.title = element_text(hjust = 0.5, face="bold",size=25),
legend.text = element_text(size = 12), legend.position = "none")
ggsave(plot, file=paste0("/Users/an/Documents/JC_projects/microglia/boxplots/comparison_deconv_astro_oligo_",cellname,"_violin.pdf"), width=15,height=20, units = "cm",limitsize = FALSE)
plot <- ggplot(subset, aes(y = value,x=group, fill=group)) +
#geom_violin() +
#geom_violin(color=col_cells[which(names(col_cells) == cluster)],trim=FALSE) +
scale_fill_manual(values=c("ASTRO"="lightsalmon","OLIGO"="mediumaquamarine")) +
geom_boxplot(color="black",width=0.5) +
ylab(paste0("Proportion of ",cluster)) +
xlab("") +
#ggtitle(paste0(cluster)) +
stat_compare_means(size=6,label.x = 1.3) +
#ylim(0,100) +
theme(axis.text.x = element_text(size=20, hjust=0.5,vjust=0.5,face="bold"),
axis.text.y = element_text(size=18, hjust=1, vjust = 0.5),
axis.title.x = element_text(size=22, face= "bold"),
axis.title.y = element_text(size=22, face= "bold"),
legend.title = element_text(size = 12, face="bold"),
strip.text.x = element_text(size=18, color="black",face="bold"),
plot.title = element_text(hjust = 0.5, face="bold",size=25),
legend.text = element_text(size = 12), legend.position = "none")
ggsave(plot, file=paste0("/Users/an/Documents/JC_projects/microglia/boxplots/comparison_deconv_astro_oligo_",cellname,"_boxplot.pdf"), width=20,height=20, units = "cm",limitsize = FALSE)
}