# Enrique Blanco Carmona# e.blancocarmona@kitz-heidelberg.de# PhD Student – Clinical Bioinformatics# Division of Pediatric Neurooncology (B062)# DKFZ-KiTZ | Germany##Input:# Peak files output of CellRanger# Code heavily based on Signac's vignette: https://satijalab.org/signac/articles/merging.html# Generate the following objects.# Vector containig all the sample IDs.atac_samples<-c("sample_a","sample_b","sample_c")# List containing as names the IDs in atac_samples and as values the full path to the peaks file.peak.list<-list("sample_a"="full_path_to_peaks_file","sample_b"="full_path_to_peaks_file","sample_c"="full_path_to_peaks_file")# Generate a list with the peak objects.peak.out<-list()for(s.nameinatac_samples){db<-generate_database(sprintf("/icgc/dkfzlsdf/analysis/OE0145_projects/idh_gliomas/scripts/main/06_scana/config_files/%s_config.yml", s.name))peaks<-read.table(file =peak.list[[s.name]], col.names =c("chr", "start", "end"))peak.out[[s.name]]<-peaks}# Convert peaks into GRanges (Adapt for the number of samples you have).gr.1<-GenomicRanges::makeGRangesFromDataFrame(peak.out$`sample_a`)gr.2<-GenomicRanges::makeGRangesFromDataFrame(peak.out$`sample_b`)gr.3<-GenomicRanges::makeGRangesFromDataFrame(peak.out$`sample_c`)# Put them back to a vector.reduce.vector<-c(gr.1, gr.2, gr.3, gr.4, gr.5, gr.6, gr.7, gr.8, gr.9, gr.10, gr.11)# Create an unified set of peaks to quantify in each dataset.combined.peaks<-GenomicRanges::reduce(x =reduce.vector)# Generate a list for the fragment files location.fragment.paths<-list("sample_a"="full_path_to_fragments_file","sample_b"="full_path_to_fragments_file","sample_c"="full_path_to_fragments_file")# Generate a list for the metadata files location.metadata.paths<-list("sample_a"="full_path_to_metadata_file","sample_b"="full_path_to_metadata_file","sample_c"="full_path_to_metadata_file")# Generate a list to store the fragment objects.fragment.list<-list()# Will contain the framgent objects.md.list<-list()# Will contain the metadata objects.for(s.nameinatac_samples){md<-read.table(file =metadata.paths[[s.name]], stringsAsFactors =FALSE, sep =",", header =TRUE, row.names =1,)[-1, ]# Filter low count cells.md<-md[md$passed_filters>500, ]md.list[[s.name]]<-md# Create fragment object.frag.md<-Signac::CreateFragmentObject(path =fragment.paths[[s.name]], cells =rownames(md))fragment.list[[s.name]]<-frag.md}# Generate count matrices and store them in a list.count.list<-list()for(s.nameinatac_samples){ct<-Signac::FeatureMatrix(fragments =fragment.list[[s.name]], features =combined.peaks, cells =rownames(md.list[[s.name]]))count.list[[s.name]]<-ct}# Generate a list that contains the signac objects.sample.list<-list()# Generate an annotation object to include to the Signac object. GenomeInfoDb::seqlevelsStyle(EnsDb.Hsapiens.v86)<-"UCSC"annotation<-GetGRangesFromEnsDb(EnsDb.Hsapiens.v86)Signac::genome(annotation)<-"hg38"for(s.nameinsamples.use){s.assay<-Signac::CreateChromatinAssay(count.list[[s.name]], fragments =fragment.list[[s.name]], genome ="hg38", annotation =annotation)s<-Seurat::CreateSeuratObject(s.assay, assay ="ATAC", meta.data =md.list[[s.name]], project =s.name)sample.list[[s.name]]<-s}# Generate a merged sample.# To merge, you need to select a "reference" object upon which you merge the others. ref_id<-"sample_a"sample<-merge(x =sample.list[[ref_id]], y =sample.list[!(names(sample.list)%in%ref_id)], add.cell.ids =c(ref_id, names(sample.list)[!(names(sample.list)%in%ref_id)]))# Reassign the annotation just in case. Signac::Annotation(sample)<-annotation# Define QC cutoffs.reference_organism<-"h.sapiens"# QC cutoff: lower peak region fragments.lower_peak_region_fragments_cutoff=3000# QC cutoff: higher peak region fragments.higher_peak_region_fragments_cutoff=20000# QC cutoff: percentage of reads in peaks (higher than).percentage_reads_in_peaks_cutoff=15# QC cutoff: blacklist ratio (lower than).blacklist_ratio_cutoff=0.05# QC cutoff: nucleosome signal cutoff (lower than).nucleosome_signal_cutoff=10# QC cutoff: TSS enrichment score cutoff (higher than).TSS_enrichment_score_cutoff=2# Calculate the chromosome binding pattern. Stored as "nucleosome_signal" column. # Computes it for chr1.sample<-Signac::NucleosomeSignal(object =sample)# Compute TSS enrichment.sample<-Signac::TSSEnrichment(sample, fast =FALSE)# Compute the percentage of reads in peaks.sample$percentage_reads_in_peaks<-sample$peak_region_fragments/sample$passed_filters*100# Compute ratio of reads in blacklist sites.sample$blacklist_ratio<-sample$blacklist_region_fragments/sample$peak_region_fragments# Classify in different nucleosome groups.sample$nucleosome_group<-ifelse(sample$nucleosome_signal>4, 'NS > 4', 'NS < 4')# Assign cells to high or low TSS enrichment.sample$high_tss<-ifelse(sample$TSS.enrichment>2, "High", "Low")# Compute the QC logical vectors.peak_lower_cutoff<-sample$peak_region_fragments>lower_peak_region_fragments_cutoffpeak_higher_cutoff<-sample$peak_region_fragments<higher_peak_region_fragments_cutoffpercentage_reads_in_peaks_cutoff<-sample$percentage_reads_in_peaks>percentage_reads_in_peaks_cutoffblacklist_ratio_cutoff<-sample$blacklist_ratio<blacklist_ratio_cutoffnucleosome_signal_cutoff<-sample$nucleosome_signal<nucleosome_signal_cutofftss_cutoff<-sample$TSS.enrichment>TSS_enrichment_score_cutoff# Combine them.mask<-peak_lower_cutoff&peak_higher_cutoff&percentage_reads_in_peaks_cutoff&blacklist_ratio_cutoff&nucleosome_signal_cutoff&tss_cutoff# Subset the sample. sample<-sample[, which(mask)]# Normalize.sample<-Signac::RunTFIDF(sample)# Find top peaks (in our case, we use q0 to select all of them).sample<-Signac::FindTopFeatures(sample, min.cutoff ="q0")# Run SVD.sample<-Signac::RunSVD(object =sample, assay ="ATAC", reduction.key ="LSI_", reduction.name ="lsi")# Check the LSI components.Signac::DepthCor(sample)# If the component 1 is at -1, exclude it.first_comp<-2# Set to 1 if component 1 is not at -1.# Compute UMAP.sample<-Seurat::RunUMAP(object =sample, reduction ="lsi", dims =first_comp:30)# Find neighbors.sample<-Seurat::FindNeighbors(object =sample, reduction ="lsi", dims =first_comp:30)# Find clusters.sample<-Seurat::FindClusters(object =sample, verbose =FALSE, algorithm =1)# Compute activity matrix.gene.activities<-Signac::GeneActivity(object =sample)# Add the activity matrix to the Seurat object as a new assay, and normalize it.sample[["RNA"]]<-Seurat::CreateAssayObject(counts =gene.activities)# Normalize new assay data - treating the Activity assay as if it was a scRNAseq experiment.sample<-Seurat::SCTransform(object =sample, assay ="RNA", new.assay.name ="SCT")# Transfer labels from true scRNAseq object to the activity assay from scATACseq object. # This assumes you have a corresponding scRNAseq experiment.# Load snRNAseq dataset.sample.rna<-readRDS("full_path_to_scRNAseq_object")# Rename gene activity assay to ACTIVITY.sample<-SeuratObject::RenameAssays(sample, SCT ="ACTIVITY")# Find anchors between datasets.transfer.anchors<-Seurat::FindTransferAnchors(reference =sample.rna, # Seurat object to use as reference. query =sample, # Seurat object to use as querey. features =Seurat::VariableFeatures(object =sample.rna), # Features to use for Dimensional Reduction. reference.assay ="SCT", # Assay to use as reference. query.assay ="ACTIVITY", # Assay to use as query. reduction ="cca")# Dimensional reduction to use when finding anchors.# Transfer cluster labels from snRNAseq dataset.celltype.predictions<-Seurat::TransferData(anchorset =transfer.anchors, refdata =sample.rna$cluster_names, # Data to transfer (cluster names). weight.reduction =sample[["lsi"]], dims =1:15)# Dimensional reduction to use.# Add the predictions as metadata.sample<-Seurat::AddMetaData(sample, metadata =celltype.predictions)# Predictions stored in "predicted.id" metadata column.# Generate an integrated object removing sample-specific effects..Seurat::DefaultAssay(sample)<-"ATAC"Seurat::Idents(sample)<-sample$predicted.id# Set the new identities.# Run Harmony. sample.integrated<-harmony::RunHarmony(object =sample, group.by.vars ='orig.ident', reduction ='lsi', assay.use ='ATAC', project.dim =FALSE)# Recompute UMAP.sample.integrated<-Seurat::RunUMAP(sample.integrated, reduction ="harmony", dims =first_comp:30)##Output:#OE0145-IDH_integrated_astrocytoma_peaks_activity_chromvar#OE0145-IDH_integrated_oligodendroglioma_peaks_activity_chromvar#QC analysis - supplementary figures