suppressMessages(library(ComplexHeatmap))
suppressMessages(library(RColorBrewer))
suppressMessages(library(tidyverse))
suppressMessages(library(dplyr))
suppressMessages(library(ggplot2))
suppressMessages(library(cowplot))
df <- read.table("./DiffFootprinting/All_statistics.txt", header = TRUE) %>%
as.data.frame()
## sum up the TF and protection score for each cell type
df$CLP <- df$Protection_Score_CLP + df$TC_CLP
df$CMP <- df$Protection_Score_CMP + df$TC_CMP
df$GMP <- df$Protection_Score_GMP + df$TC_GMP
df$HSC <- df$Protection_Score_HSC + df$TC_HSC
df$LMPP <- df$Protection_Score_LMPP + df$TC_LMPP
df$MEP <- df$Protection_Score_MEP + df$TC_MEP
df$MPP <- df$Protection_Score_MPP + df$TC_MPP
df$pDC <- df$Protection_Score_pDC + df$TC_pDC
# For some TFs, JASPAR database includes different variants of motifs, here we remove them
df <- df %>%
dplyr::filter(!grepl("var", Motif)) %>%
textshape::column_to_rownames("Motif")
# We only keep TFs with more than 1000 binding sites
df <- subset(df, Num > 1000)
df <- subset(df, select = c("CLP", "CMP", "GMP",
"HSC", "LMPP", "MEP",
"MPP", "pDC"))
# we can select the top 100 highly variable TFs
df$Var <- apply(df, 1, sd)
df <- df %>%
top_n(50, wt = Var) %>%
select(-Var)
df <- t(scale(t(df)))
options(repr.plot.width = 6, repr.plot.height = 8)
p <- Heatmap(as.matrix(df),
name = "TF Activity",
cluster_columns = TRUE,
cluster_rows = TRUE,
show_row_names = TRUE,
rect_gp = gpar(col = "black", lwd = 0.5)
)
p