
Visualization Guide
Chen Yang
2025-06-27
Source:vignettes/06-visualization-guide.Rmd
06-visualization-guide.Rmd

Visualization Guide
This guide provides detailed instructions for visualizing mLLMCelltype results. Creating effective visualizations is crucial for interpreting cell type annotations and communicating uncertainty metrics in your single-cell RNA sequencing analysis.
Basic Visualization Concepts
mLLMCelltype provides three key types of information that can be visualized:
- Cell Type Annotations: The final cell type labels assigned to each cluster
- Consensus Proportion: A measure of agreement among models (0-1)
- Shannon Entropy: A measure of uncertainty in the annotations (lower is better)
These can be visualized separately or combined into informative multi-panel figures.
Integrating with Seurat
Adding mLLMCelltype Results to Seurat Objects
Before visualization, you need to add the mLLMCelltype results to your Seurat object:
library(Seurat)
library(mLLMCelltype)
library(ggplot2)
library(patchwork)
# Assuming you have a Seurat object named 'seurat_obj' and consensus results
# First, check the structure of consensus_results
# Print the structure of consensus_results to understand its format
# str(consensus_results)
# print(names(consensus_results))
# print(names(consensus_results$final_annotations))
# Get cluster IDs from Seurat object
cluster_ids <- unique(as.character(Idents(seurat_obj)))
cat("Cluster IDs in Seurat object:", paste(cluster_ids, collapse=", "), "\n")
# Convert final_annotations to a vector if it's a list
final_annotations <- unlist(consensus_results$final_annotations)
cat("Final annotations:", paste(names(final_annotations), collapse=", "), "\n")
# Create a mapping between cluster IDs and annotations
annotation_map <- vector("character", length(cluster_ids))
names(annotation_map) <- cluster_ids
# First, try direct mapping where cluster ID matches the name in final_annotations
for (i in seq_along(cluster_ids)) {
cluster_id <- cluster_ids[i]
# Check if this cluster ID exists in final_annotations names
if (cluster_id %in% names(final_annotations)) {
annotation_map[cluster_id] <- final_annotations[cluster_id]
}
}
# For any remaining unmapped clusters, try to infer the mapping
unmapped <- is.na(annotation_map) | annotation_map == ""
if (any(unmapped)) {
unmapped_ids <- names(annotation_map)[unmapped]
cat("Unmapped cluster IDs:", paste(unmapped_ids, collapse=", "), "\n")
# If we have the same number of unmapped clusters as remaining annotations,
# we can try to assign them in order
remaining_annotations <- final_annotations[!names(final_annotations) %in% names(annotation_map)[!unmapped]]
if (length(remaining_annotations) == length(unmapped_ids)) {
for (i in seq_along(unmapped_ids)) {
annotation_map[unmapped_ids[i]] <- remaining_annotations[i]
}
} else {
# If we can't map directly, assign "Unknown" to unmapped clusters
for (id in unmapped_ids) {
annotation_map[id] <- paste("Unknown", id)
}
}
}
cat("Final annotation map:\n")
print(annotation_map)
# Add consensus annotations to Seurat object
seurat_obj$cell_type_consensus <- plyr::mapvalues(
x = as.character(Idents(seurat_obj)),
from = names(annotation_map),
to = as.character(annotation_map)
)
# Verify the annotations were added correctly
table(seurat_obj$cell_type_consensus, as.character(Idents(seurat_obj)))
# Extract consensus metrics from the consensus results
# Check if consensus_results has the expected structure
if (!is.null(consensus_results$initial_results) &&
!is.null(consensus_results$initial_results$consensus_results)) {
# Get consensus metrics
consensus_metrics <- lapply(
names(consensus_results$initial_results$consensus_results),
function(cluster_id) {
metrics <- consensus_results$initial_results$consensus_results[[cluster_id]]
return(list(
cluster = cluster_id,
consensus_proportion = metrics$consensus_proportion,
entropy = metrics$entropy
))
}
)
# Convert to data frame for easier handling
metrics_df <- do.call(rbind, lapply(consensus_metrics, data.frame))
# Create a mapping for consensus proportion
proportion_map <- metrics_df$consensus_proportion
names(proportion_map) <- metrics_df$cluster
# Print the proportion map for debugging
cat("Consensus proportion map:\n")
print(proportion_map)
# Create a mapping for entropy
entropy_map <- metrics_df$entropy
names(entropy_map) <- metrics_df$cluster
# Print the entropy map for debugging
cat("Entropy map:\n")
print(entropy_map)
# Add consensus proportion to Seurat object
# First check if all cluster IDs are in the proportion map
missing_clusters <- setdiff(cluster_ids, names(proportion_map))
if (length(missing_clusters) > 0) {
cat("Warning: Some clusters are missing from proportion map:",
paste(missing_clusters, collapse=", "), "\n")
# Add default values for missing clusters
for (cluster in missing_clusters) {
proportion_map[cluster] <- 1.0 # Default to perfect consensus
}
}
seurat_obj$consensus_proportion <- as.numeric(plyr::mapvalues(
x = as.character(Idents(seurat_obj)),
from = names(proportion_map),
to = as.character(proportion_map)
))
# Add entropy to Seurat object
# First check if all cluster IDs are in the entropy map
missing_clusters <- setdiff(cluster_ids, names(entropy_map))
if (length(missing_clusters) > 0) {
cat("Warning: Some clusters are missing from entropy map:",
paste(missing_clusters, collapse=", "), "\n")
# Add default values for missing clusters
for (cluster in missing_clusters) {
entropy_map[cluster] <- 0.0 # Default to no uncertainty
}
}
seurat_obj$entropy <- as.numeric(plyr::mapvalues(
x = as.character(Idents(seurat_obj)),
from = names(entropy_map),
to = as.character(entropy_map)
))
# Verify metrics were added correctly
summary(seurat_obj$consensus_proportion)
summary(seurat_obj$entropy)
} else {
# If the expected structure is not found, create dummy metrics
warning("Could not find consensus metrics in the results. Creating dummy metrics for visualization purposes.")
# Create dummy metrics
seurat_obj$consensus_proportion <- 1.0 # Perfect consensus
seurat_obj$entropy <- 0.0 # No uncertainty
}
Basic Visualization with Seurat
Cell Type Annotations
# Basic cell type visualization
p1 <- DimPlot(seurat_obj,
group.by = "cell_type_consensus",
label = TRUE,
repel = TRUE) +
ggtitle("Cell Type Annotations") +
theme(plot.title = element_text(hjust = 0.5))
p1
Consensus Proportion
# Visualize consensus proportion
p2 <- FeaturePlot(seurat_obj,
features = "consensus_proportion",
cols = c("yellow", "green", "blue"),
min.cutoff = 0.3,
max.cutoff = 1) +
ggtitle("Consensus Proportion") +
theme(plot.title = element_text(hjust = 0.5))
p2
Shannon Entropy
# Visualize Shannon entropy
p3 <- FeaturePlot(seurat_obj,
features = "entropy", # Note: The column name is 'entropy', not 'shannon_entropy'
cols = c("red", "orange", "yellow"),
min.cutoff = 0,
max.cutoff = 1.5) +
scale_color_gradient(low = "red", high = "yellow") +
ggtitle("Shannon Entropy") +
theme(plot.title = element_text(hjust = 0.5))
p3
Publication-Ready Visualizations with SCpubr
For publication-quality visualizations, we recommend using the SCpubr package, which provides enhanced aesthetics for single-cell visualizations. Below are several advanced visualization techniques that can significantly improve the quality and interpretability of your results.
Note: For a gallery of all visualization examples, please see the Visualization Gallery.
Installing and Loading Required Packages
# Install SCpubr if not already installed
if (!requireNamespace("SCpubr", quietly = TRUE)) {
remotes::install_github("enblacar/SCpubr")
}
# Install other required packages
if (!requireNamespace("viridis", quietly = TRUE)) {
install.packages("viridis")
}
if (!requireNamespace("patchwork", quietly = TRUE)) {
install.packages("patchwork")
}
if (!requireNamespace("ggExtra", quietly = TRUE)) {
install.packages("ggExtra")
}
# Load required packages
library(SCpubr)
library(viridis) # For color palettes
library(patchwork) # For combining plots
library(ggExtra) # For marginal distributions
Standardizing Cell Type Names
Before visualization, it’s important to standardize cell type names to ensure consistency:
# Standardize cell type names
# Merge singular and plural forms
seurat_obj$cell_type_consensus <- gsub("B cell$", "B cells", seurat_obj$cell_type_consensus)
seurat_obj$cell_type_consensus <- gsub("T cell$", "T cells", seurat_obj$cell_type_consensus)
seurat_obj$cell_type_consensus <- gsub("Mast cell", "Mast Cell", seurat_obj$cell_type_consensus)
seurat_obj$cell_type_consensus <- gsub("Plasmacytoid Dendritic Cell$", "Plasmacytoid Dendritic Cells", seurat_obj$cell_type_consensus)
seurat_obj$cell_type_consensus <- gsub("Natural Killer \\(NK\\) Cell", "Natural Killer Cell", seurat_obj$cell_type_consensus)
seurat_obj$cell_type_consensus <- gsub("NK Cell", "Natural Killer Cell", seurat_obj$cell_type_consensus)
Creating a Custom Color Palette
Using a colorblind-friendly palette enhances the accessibility of your visualizations:
# Create a custom color palette that enhances cluster separation
# Using a colorblind-friendly palette from viridis
n_cell_types <- length(unique(seurat_obj$cell_type_consensus))
custom_colors <- viridis::viridis(n_cell_types, option = "turbo")
names(custom_colors) <- unique(seurat_obj$cell_type_consensus)
Enhanced Cell Type UMAP Visualization
This enhanced visualization includes cell borders, optimized label placement, and improved aesthetics:
# 1. Enhanced UMAP visualization with cell borders and optimized styling
p1 <- SCpubr::do_DimPlot(sample = seurat_obj,
group.by = "cell_type_consensus",
label = TRUE,
label.box = TRUE,
label.size = 5,
repel = TRUE,
legend.position = "right",
pt.size = 1.2,
plot_cell_borders = TRUE,
border.size = 1.5,
border.color = "black",
colors.use = custom_colors,
font.size = 14,
plot.title = "Cell Types") +
theme(plot.title = element_text(hjust = 0.5, face = "bold", margin = margin(b = 1, t = 5)))
p1
Example Output:

This visualization clearly shows distinct cell type clusters with optimized labels and borders, making it easy to identify different cell populations.
UMAP with Density Contours
Adding density contours helps to better visualize cluster boundaries:
# 2. UMAP with density contours to better show clustering patterns
p2 <- SCpubr::do_DimPlot(sample = seurat_obj,
group.by = "cell_type_consensus",
label = TRUE,
label.box = TRUE,
repel = TRUE,
legend.position = "right",
pt.size = 1.0,
plot_density_contour = TRUE,
contour.color = "black",
# Place contours below labels for better visibility
contour.position = "bottom",
colors.use = custom_colors,
font.size = 14,
plot.title = "Cell Type Clusters with Density Contours") +
theme(plot.title = element_text(hjust = 0.5, face = "bold", margin = margin(b = 1, t = 5)))
p2
Example Output:

Density contours highlight the concentration of cells within each cluster, providing additional information about cluster density and boundaries.
Highlighting Controversial Clusters
Visualizing controversial clusters (those with low consensus or high entropy) can provide valuable insights:
# Create a controversial clusters column (low consensus or high entropy)
seurat_obj$controversial <- ifelse(seurat_obj$consensus_proportion < 0.6 | seurat_obj$entropy > 0.5,
"Controversial", "Consensus")
# 3. Controversial clusters visualization
p3 <- SCpubr::do_DimPlot(sample = seurat_obj,
group.by = "controversial",
label = FALSE,
legend.position = "right",
pt.size = 1.2,
colors.use = c("Controversial" = "#E41A1C", "Consensus" = "#4DAF4A"),
font.size = 14,
plot.title = "Controversial vs. Consensus Clusters") +
theme(plot.title = element_text(hjust = 0.5, face = "bold", margin = margin(b = 1, t = 5)))
p3
Example Output:

This visualization highlights clusters with low consensus or high entropy (red), which may require further investigation or validation.
UMAP with Marginal Distributions
Adding marginal distributions can help visualize the overall distribution of cells:
# 4. UMAP with marginal distributions
# Using basic ggplot2 to create UMAP plot, then adding marginal distributions
p4_base <- ggplot(data.frame(UMAP_1 = Embeddings(seurat_obj, "umap")[,1],
UMAP_2 = Embeddings(seurat_obj, "umap")[,2],
cell_type = seurat_obj$cell_type_consensus)) +
geom_point(aes(x = UMAP_1, y = UMAP_2, color = cell_type), size = 1) +
scale_color_manual(values = custom_colors) +
theme_classic() +
labs(title = "Cell Types with Marginal Distributions") +
theme(plot.title = element_text(hjust = 0.5, face = "bold"),
legend.position = "right")
# Add marginal distributions, controlling the size
p4 <- ggExtra::ggMarginal(p4_base, type = "density",
groupColour = TRUE,
groupFill = TRUE,
size = 8) # Control the size of marginal plots
p4
Example Output:

Marginal distributions show the density of cells along each axis, providing additional context about the overall distribution of cells in the UMAP space.
Enhanced Consensus Proportion Visualization
Improved visualization of consensus proportion with density contours:
# 5. Enhanced consensus proportion visualization
p5 <- SCpubr::do_FeaturePlot(sample = seurat_obj,
features = "consensus_proportion",
order = TRUE,
pt.size = 1.2,
enforce_symmetry = FALSE,
legend.title = "Consensus\nProportion",
plot.title = "Annotation Confidence",
sequential.palette = "YlGnBu",
sequential.direction = 1,
min.cutoff = min(seurat_obj$consensus_proportion),
max.cutoff = max(seurat_obj$consensus_proportion),
na.value = "lightgrey",
plot_density_contour = TRUE,
contour.color = "black",
contour.position = "bottom",
font.size = 14) +
theme(plot.title = element_text(hjust = 0.5, face = "bold", margin = margin(b = 1, t = 5)))
p5
Example Output:

This visualization shows the consensus proportion for each cell, with higher values (blue) indicating greater agreement among models about the cell type annotation.
Enhanced Entropy Visualization
Improved visualization of entropy with density contours:
# 6. Enhanced entropy visualization
p6 <- SCpubr::do_FeaturePlot(sample = seurat_obj,
features = "entropy",
order = TRUE,
pt.size = 1.2,
enforce_symmetry = FALSE,
legend.title = "Shannon\nEntropy",
plot.title = "Annotation Uncertainty",
sequential.palette = "OrRd",
sequential.direction = -1,
min.cutoff = min(seurat_obj$entropy),
max.cutoff = max(seurat_obj$entropy),
na.value = "lightgrey",
plot_density_contour = TRUE,
contour.color = "black",
contour.position = "bottom",
font.size = 14) +
theme(plot.title = element_text(hjust = 0.5, face = "bold", margin = margin(b = 1, t = 5)))
p6
Example Output:

This visualization shows the entropy for each cell, with higher values (yellow) indicating greater uncertainty in the cell type annotation. Lower entropy (red) suggests more confident annotations.
Cell Types with Consensus Proportion
Visualizing cell types with consensus proportion provides a combined view:
# 7. Group by cell type but show consensus proportion with group.by.cell_borders
p7 <- SCpubr::do_FeaturePlot(sample = seurat_obj,
features = "consensus_proportion",
group.by = "cell_type_consensus",
group.by.cell_borders = TRUE,
group.by.cell_borders.alpha = 0.2,
group.by.show.dots = TRUE,
group.by.dot.size = 8,
group.by.colors.use = custom_colors,
order = TRUE,
pt.size = 1.0,
sequential.palette = "YlGnBu",
sequential.direction = 1,
legend.position = "right",
legend.title = "Consensus\nProportion",
plot.title = "Cell Types with Consensus Proportion") +
theme(plot.title = element_text(hjust = 0.5, face = "bold"),
legend.key.size = unit(0.5, "cm"),
legend.text = element_text(size = 8))
p7
Example Output:

This visualization combines cell type information with consensus proportion, showing both the cell type clusters and the confidence in each annotation. Cell type boundaries are shown with colored outlines, while the color intensity represents the consensus proportion.
Uncertainty Metrics Violin Plots
Violin plots can help visualize the distribution of uncertainty metrics by cell type:
# 8. Violin plots of uncertainty metrics by cell type
p8a <- SCpubr::do_ViolinPlot(sample = seurat_obj,
features = "consensus_proportion",
group.by = "cell_type_consensus",
flip = TRUE,
colors.use = custom_colors,
legend.position = "bottom",
legend.title = "Cell Type",
plot.title = "Consensus Proportion by Cell Type") +
theme(legend.text = element_text(size = 8),
legend.key.size = unit(0.5, "cm"))
p8b <- SCpubr::do_ViolinPlot(sample = seurat_obj,
features = "entropy",
group.by = "cell_type_consensus",
flip = TRUE,
colors.use = custom_colors,
legend.position = "bottom",
legend.title = "Cell Type",
plot.title = "Entropy by Cell Type") +
theme(legend.text = element_text(size = 8),
legend.key.size = unit(0.5, "cm"))
# Combine violin plots
p8 <- p8a / p8b + plot_layout(heights = c(1, 1))
p8
Example Output:

Violin plots show the distribution of consensus proportion and entropy values for each cell type, allowing for easy comparison of annotation confidence across different cell populations.
Combined Dashboard
Creating a dashboard with multiple visualizations provides a comprehensive view:
# 9. Combined visualization dashboard
# Create a simplified dashboard with the main visualizations
simplified_dashboard <- cowplot::plot_grid(
p1, p5, p6,
ncol = 3,
labels = c("A", "B", "C"),
label_size = 18,
rel_widths = c(1.5, 1, 1) # Give cell type plot more space
)
simplified_dashboard
Example Output:

This dashboard combines multiple visualizations into a single figure, providing a comprehensive view of cell type annotations and uncertainty metrics. The panels are labeled for easy reference in publications.
High-Resolution Publication Figure
Creating a high-resolution figure for publication:
# 10. High-resolution figure for publication
p1_pub <- SCpubr::do_DimPlot(sample = seurat_obj,
group.by = "cell_type_consensus",
label = TRUE,
label.box = TRUE,
label.size = 5,
repel = TRUE,
legend.position = "right",
pt.size = 1.0,
plot_cell_borders = TRUE,
border.size = 1.0,
colors.use = custom_colors,
font.size = 14,
plot.title = "Cell Types") +
theme(plot.title = element_text(hjust = 0.5, face = "bold", margin = margin(b = 1, t = 5)),
legend.text = element_text(size = 10),
legend.key.size = unit(0.5, "cm"))
p1_pub
Example Output:

This high-resolution figure is specifically designed for publication, with optimized dimensions, font sizes, and visual elements. The increased height provides more space for clear visualization of cell type clusters and labels.
Saving Visualizations
# Create a directory to save all visualizations
dir.create("mLLMCelltype_visualizations", showWarnings = FALSE)
# Save individual plots
ggsave("mLLMCelltype_visualizations/1_enhanced_cell_type_umap.pdf", plot = p1, width = 10, height = 8)
ggsave("mLLMCelltype_visualizations/2_density_contour_umap.pdf", plot = p2, width = 10, height = 8)
ggsave("mLLMCelltype_visualizations/3_controversial_clusters.pdf", plot = p3, width = 10, height = 8)
ggsave("mLLMCelltype_visualizations/5_enhanced_consensus_proportion.pdf", plot = p5, width = 10, height = 8)
ggsave("mLLMCelltype_visualizations/6_enhanced_entropy.pdf", plot = p6, width = 10, height = 8)
ggsave("mLLMCelltype_visualizations/7_cell_type_with_consensus.pdf", plot = p7, width = 12, height = 10)
ggsave("mLLMCelltype_visualizations/8_uncertainty_violins.pdf", plot = p8, width = 16, height = 14)
ggsave("mLLMCelltype_visualizations/9_combined_dashboard.pdf", plot = simplified_dashboard, width = 24, height = 12)
ggsave("mLLMCelltype_visualizations/10_publication_figure.pdf", plot = p1_pub, width = 18, height = 16)
# Save circlize plot (requires additional packages)
# Note: Circlize plots are saved directly in the plot_circlize function using pdf()
# If you want to save it manually, you can use:
pdf("mLLMCelltype_visualizations/11_circlize_plot.pdf", width = 10, height = 10)
plot_circlize(
circ_data,
do.label = TRUE,
pt.size = 0.5,
col.use = NULL,
bg.color = '#F5F5F5',
kde2d.n = 200,
repel = TRUE,
label.cex = 0.8,
color_scheme = "set1"
)
add_track(circ_data, group = "controversial", colors = controversy_colors, track_num = 2)
add_track_continuous(circ_data, feature_value = "consensus_proportion", track_num = 3,
point_size = 0.5, gradient_color = c("blue", "yellow", "red"))
dev.off()
Marker Gene Visualization
While mLLMCelltype focuses on cell type annotation, you can create marker gene visualizations using standard Seurat functions. These visualizations help validate the consensus annotations by showing marker gene expression patterns across annotated cell types.
Prerequisites for Marker Gene Visualization
Before creating marker gene visualizations, ensure you have the required packages:
# Install required packages if not already installed
if (!requireNamespace("pheatmap", quietly = TRUE)) {
install.packages("pheatmap")
}
if (!requireNamespace("viridis", quietly = TRUE)) {
install.packages("viridis")
}
if (!requireNamespace("dplyr", quietly = TRUE)) {
install.packages("dplyr")
}
if (!requireNamespace("ComplexHeatmap", quietly = TRUE)) {
BiocManager::install("ComplexHeatmap")
}
if (!requireNamespace("circlize", quietly = TRUE)) {
install.packages("circlize")
}
# Load required packages
library(pheatmap)
library(viridis)
library(dplyr)
library(ComplexHeatmap) # For advanced heatmaps
library(circlize) # For color functions
Finding Marker Genes
First, identify marker genes for each cluster using Seurat:
# Find marker genes for each cluster
pbmc_markers <- FindAllMarkers(
seurat_obj,
only.pos = TRUE,
min.pct = 0.25,
logfc.threshold = 0.25,
verbose = FALSE
)
# Get top markers for visualization
top_markers <- pbmc_markers %>%
group_by(cluster) %>%
top_n(n = 5, wt = avg_log2FC) %>%
pull(gene) %>%
unique()
# Display top markers preview
top_markers_preview <- pbmc_markers %>%
group_by(cluster) %>%
top_n(3, avg_log2FC) %>%
arrange(cluster, desc(avg_log2FC))
print(top_markers_preview)
print(paste("Selected", length(top_markers), "marker genes for visualization"))
Dotplot Visualization (Marker Genes vs Cell Types)
The dotplot is a popular visualization that shows marker gene expression across cell types with both expression level (color) and expression percentage (dot size):
# Create marker gene dotplot using Seurat's DotPlot function
# First, prepare the top marker genes for each cluster
top_markers <- pbmc_markers %>%
group_by(cluster) %>%
top_n(n = 5, wt = avg_log2FC) %>%
pull(gene) %>%
unique()
# Set cell identities to the consensus cell types for proper grouping
Idents(seurat_obj) <- seurat_obj$cell_type_consensus
# Create the basic dotplot
p_dotplot <- DotPlot(
seurat_obj,
features = top_markers,
dot.scale = 8,
col.min = 0,
col.max = 2.5,
dot.min = 0.1
) +
scale_color_gradient2(
low = "blue",
mid = "white",
high = "red",
midpoint = 0,
name = "Average\nExpression"
) +
guides(
size = guide_legend(
title = "Percent\nExpressed",
override.aes = list(color = "black")
)
) +
theme(
axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1),
axis.text.y = element_text(size = 10),
axis.title = element_text(size = 12),
plot.title = element_text(hjust = 0.5, size = 14, face = "bold"),
legend.title = element_text(size = 10),
legend.text = element_text(size = 9)
) +
labs(
title = "Marker Gene Expression Dotplot",
x = "Marker Genes",
y = "Cell Types"
) +
coord_flip() # Flip to have genes on Y-axis, cell types on X-axis
# Display the dotplot
print(p_dotplot)
# Save the dotplot
ggsave(
"marker_genes_dotplot.png",
plot = p_dotplot,
width = 12,
height = 10,
dpi = 300,
bg = "white"
)
Example Output:

This dotplot visualization shows:
- Y-axis: Marker genes (top genes for each cell type)
- X-axis: Cell types (from consensus annotation)
- Dot size: Percentage of cells expressing the gene
- Color intensity: Average expression level (scaled)
Enhanced Heatmap (Marker Genes vs Cell Types)
Create a classic heatmap showing marker gene expression across cell types:
# Create enhanced heatmap with marker genes vs cell types
library(ComplexHeatmap)
library(circlize)
# Prepare expression matrix for top marker genes
top_markers_per_type <- pbmc_markers %>%
group_by(cluster) %>%
top_n(n = 4, wt = avg_log2FC) %>%
arrange(cluster, desc(avg_log2FC))
# Get unique top markers
selected_genes <- unique(top_markers_per_type$gene)
# Calculate average expression per cell type
Idents(seurat_obj) <- seurat_obj$cell_type_consensus
avg_expression <- AverageExpression(
seurat_obj,
features = selected_genes,
group.by = "cell_type_consensus"
)$RNA
# Scale the expression data (row-wise scaling)
scaled_expression <- t(scale(t(avg_expression)))
# Create annotation for cell types showing consensus metrics
cell_type_names <- colnames(scaled_expression)
consensus_annotation <- data.frame(
CellType = cell_type_names,
row.names = cell_type_names
)
# Get consensus proportion for each cell type (take average per cell type)
consensus_props <- sapply(cell_type_names, function(ct) {
cells_of_type <- which(seurat_obj$cell_type_consensus == ct)
if(length(cells_of_type) > 0) {
mean(seurat_obj$consensus_proportion[cells_of_type], na.rm = TRUE)
} else {
NA
}
})
consensus_annotation$ConsensusProp <- consensus_props
# Create color schemes
col_fun_expr = colorRamp2(
c(-2, 0, 2),
c("blue", "white", "red")
)
col_fun_consensus = colorRamp2(
c(0.5, 1),
c("yellow", "darkgreen")
)
# Create the heatmap with ComplexHeatmap
p_heatmap <- Heatmap(
scaled_expression,
name = "Scaled\nExpression",
col = col_fun_expr,
# Row (gene) configuration
row_title = "Marker Genes",
row_title_side = "left",
row_names_gp = gpar(fontsize = 10),
row_dend_side = "right",
cluster_rows = TRUE,
# Column (cell type) configuration
column_title = "Cell Types",
column_names_gp = gpar(fontsize = 10),
column_names_rot = 45,
cluster_columns = FALSE,
# Add consensus proportion annotation
top_annotation = HeatmapAnnotation(
ConsensusProp = consensus_annotation$ConsensusProp,
col = list(ConsensusProp = col_fun_consensus),
annotation_name_gp = gpar(fontsize = 10),
simple_anno_size = unit(0.5, "cm")
),
# Heatmap body configuration
rect_gp = gpar(col = "white", lwd = 0.5),
cell_fun = function(j, i, x, y, width, height, fill) {
# Add text showing expression values for high expression
if(abs(scaled_expression[i, j]) > 1.5) {
grid.text(sprintf("%.1f", scaled_expression[i, j]),
x, y, gp = gpar(fontsize = 8, col = "white"))
}
},
# Legend configuration
heatmap_legend_param = list(
title_gp = gpar(fontsize = 12),
labels_gp = gpar(fontsize = 10),
legend_height = unit(4, "cm")
),
# Size configuration
width = unit(0.8 * length(cell_type_names), "cm"),
height = unit(0.4 * length(selected_genes), "cm")
)
# Save the heatmap
png("marker_genes_heatmap_complex.png",
width = 1400, height = 1000, res = 300, bg = "white")
draw(p_heatmap,
column_title = "Marker Gene Expression Across Cell Types",
column_title_gp = gpar(fontsize = 14, fontface = "bold"))
dev.off()
# Also create with base R/pheatmap for compatibility
library(pheatmap)
# Create annotation data frame for pheatmap
annotation_col <- data.frame(
ConsensusProp = consensus_props,
row.names = cell_type_names
)
# Create the pheatmap version
png("marker_genes_heatmap_pheatmap.png",
width = 1200, height = 1000, res = 300, bg = "white")
pheatmap(
scaled_expression,
scale = "none", # Already scaled
clustering_distance_rows = "euclidean",
clustering_method = "complete",
cluster_cols = FALSE,
cluster_rows = TRUE,
# Colors
color = colorRampPalette(c("blue", "white", "red"))(100),
breaks = seq(-2.5, 2.5, length.out = 101),
# Annotations
annotation_col = annotation_col,
annotation_colors = list(
ConsensusProp = colorRampPalette(c("yellow", "darkgreen"))(100)
),
# Labels and titles
main = "Marker Gene Expression Heatmap",
fontsize = 10,
fontsize_row = 9,
fontsize_col = 9,
# Display numbers for high expression
display_numbers = matrix(
ifelse(abs(scaled_expression) > 1.5,
sprintf("%.1f", scaled_expression), ""),
nrow = nrow(scaled_expression)
),
number_color = "white",
fontsize_number = 8,
# Cell size
cellwidth = 25,
cellheight = 15,
# Borders
border_color = "white"
)
dev.off()
Example Output:

This enhanced heatmap visualization shows:
- Rows: Marker genes (hierarchically clustered by expression pattern)
- Columns: Cell types (from consensus annotation)
-
Color scale: Scaled expression values (navy = low,
red = high)
- Top annotation: Consensus proportion for each cell type
- Clean layout: No cluttered text, focus on expression patterns
Alternative: Simplified Dotplot
For a quick analysis with predefined key markers, you can create a simplified version by selecting specific genes of interest and using a simpler color scheme. This approach is faster when you already know which markers to focus on.
Marker Gene Visualization Best Practices
- Gene Selection: Select top 3-5 marker genes per cell type for clarity; too many genes can make plots cluttered
-
Color Palettes:
- Use
viridis
or sequential color palettes for dotplots - Use blue-white-red diverging palettes for heatmaps (intuitive for expression)
- Consider colorblind-friendly palettes
- Use
-
Scaling: Row-wise scaling is recommended for
heatmaps when using
pheatmap
orComplexHeatmap
-
Dotplots: Use Seurat’s
DotPlot
function with appropriate dot size scaling and clear color gradients - File Formats: Save as PNG for presentations, PDF for publications
- Resolution: Use 300 DPI for publication-quality figures
Troubleshooting Marker Gene Visualizations
Common issues and solutions:
# Issue: Empty or missing consensus results
if (is.null(consensus_results$final_annotations)) {
stop("Consensus results are missing. Please run consensus annotation first.")
}
# Issue: Marker genes data frame format
if (!"avg_log2FC" %in% colnames(pbmc_markers)) {
stop("Marker genes data frame must contain 'avg_log2FC' column from FindAllMarkers()")
}
# Issue: Seurat object compatibility
if (!"RNA" %in% names(seurat_obj@assays)) {
stop("Seurat object must contain 'RNA' assay for expression data")
}
# Issue: Cluster ID mismatch
cluster_ids_seurat <- unique(as.character(Idents(seurat_obj)))
cluster_ids_consensus <- names(consensus_results$final_annotations)
if (!all(cluster_ids_consensus %in% cluster_ids_seurat)) {
warning("Some cluster IDs in consensus results don't match Seurat object")
}
Additional Advanced Visualization Techniques
Circlize Plot Visualization
Circlize plots provide a unique circular representation of your data, combining cell type information with metadata in concentric rings:
# Install required packages if not already installed
if (!requireNamespace("circlize", quietly = TRUE)) {
install.packages("circlize")
}
if (!requireNamespace("plot1cell", quietly = TRUE)) {
remotes::install_github("HaojiaWu/plot1cell", upgrade = "never", dependencies = FALSE)
}
# Load required packages
library(circlize)
library(plot1cell)
# Prepare circlize data
# This function transforms UMAP coordinates to a circular layout
circ_data <- prepare_circlize_data(seurat_obj, scale = 0.8)
# Generate color mapping for cell types
celltypes <- levels(seurat_obj)
cell_colors <- viridis::viridis(length(celltypes), option = "turbo")
names(cell_colors) <- celltypes
# Define colors for controversial clusters
controversy_colors <- c("Controversial" = "#E41A1C", "Consensus" = "#4DAF4A")
# Create the main circlize plot
plot_circlize(
circ_data,
do.label = TRUE,
pt.size = 0.5, # Increase point size for better visibility
col.use = NULL, # Use automatically generated color scheme
bg.color = '#F5F5F5',
kde2d.n = 200,
repel = TRUE,
label.cex = 0.8,
color_scheme = "set1" # Use RColorBrewer's Set1 scheme
)
# Add a track for controversial clusters
add_track(
circ_data,
group = "controversial",
colors = controversy_colors,
track_num = 2
)
# Add a track for consensus proportion
add_track_continuous(
circ_data,
feature_value = "consensus_proportion",
track_num = 3,
point_size = 0.5,
gradient_color = c("blue", "yellow", "red")
)
Example Output:

This visualization presents cell types in a circular layout with additional tracks showing metadata. The outer ring shows cell types, the middle ring indicates controversial vs. consensus clusters, and the inner ring displays consensus proportion values. This compact representation is particularly useful for visualizing multiple aspects of your data simultaneously.
Model Agreement Visualization
You can visualize how different models agree or disagree with the consensus:
# Assuming you have individual model results in the Seurat object
# Create a function to calculate agreement with consensus
calculate_agreement <- function(seurat_obj, model_column, consensus_column) {
seurat_obj$agreement <- ifelse(
seurat_obj[[model_column]] == seurat_obj[[consensus_column]],
"Agrees with consensus",
"Disagrees with consensus"
)
return(seurat_obj)
}
# Apply to each model
models <- c("claude_3_7_sonnet", "gpt_4o", "gemini_1_5_pro")
plot_list <- list()
for (model in models) {
column_name <- paste0("cell_type_", model)
temp_obj <- calculate_agreement(seurat_obj, column_name, "cell_type_consensus")
# Create plot with SCpubr
p_temp <- SCpubr::do_DimPlot(temp_obj,
group.by = "agreement",
colors.use = c("Agrees with consensus" = "darkgreen",
"Disagrees with consensus" = "darkred"),
pt.size = 0.5)
# Add title using ggplot2
plot_list[[model]] <- p_temp +
ggtitle(paste0(model, " Agreement")) +
theme(plot.title = element_text(hjust = 0.5))
}
# Combine all agreement plots
wrap_plots(plot_list, ncol = 2)
Heatmap of Model Predictions
Create a heatmap to visualize all model predictions for each cluster:
library(pheatmap)
# Create a matrix of model predictions for each cluster
create_prediction_matrix <- function(consensus_results, models) {
n_clusters <- length(consensus_results$final_annotations)
prediction_matrix <- matrix(NA, nrow = n_clusters, ncol = length(models))
rownames(prediction_matrix) <- paste0("Cluster_", 0:(n_clusters-1))
colnames(prediction_matrix) <- models
# Extract individual model predictions from initial_results
initial_predictions <- consensus_results$initial_results$individual_predictions
for (i in 1:length(models)) {
model <- models[i]
if (model %in% names(initial_predictions)) {
predictions <- initial_predictions[[model]]
# Convert predictions to a vector if it's a list
if (is.list(predictions)) {
pred_vector <- unlist(predictions)
prediction_matrix[, i] <- pred_vector
} else {
prediction_matrix[, i] <- predictions
}
}
}
return(prediction_matrix)
}
# Convert cell type names to numeric codes for visualization
encode_cell_types <- function(prediction_matrix) {
unique_types <- unique(as.vector(prediction_matrix))
# Remove NA values before creating mapping
unique_types <- unique_types[!is.na(unique_types)]
type_mapping <- setNames(1:length(unique_types), unique_types)
encoded_matrix <- matrix(NA, nrow = nrow(prediction_matrix), ncol = ncol(prediction_matrix))
rownames(encoded_matrix) <- rownames(prediction_matrix)
colnames(encoded_matrix) <- colnames(prediction_matrix)
for (i in 1:nrow(prediction_matrix)) {
for (j in 1:ncol(prediction_matrix)) {
if (!is.na(prediction_matrix[i, j])) {
encoded_matrix[i, j] <- type_mapping[prediction_matrix[i, j]]
}
}
}
return(list(matrix = encoded_matrix, mapping = type_mapping))
}
# Create and plot the heatmap
models <- c("claude-3-7-sonnet-20250219", "gpt-4o", "gemini-1.5-pro")
prediction_matrix <- create_prediction_matrix(consensus_results, models)
encoded_data <- encode_cell_types(prediction_matrix)
# Extract metrics for annotation row
metrics_df <- do.call(rbind, lapply(consensus_metrics, data.frame))
# Add consensus and uncertainty metrics as annotations
annotation_row <- data.frame(
Consensus = consensus_results$final_annotations,
Proportion = metrics_df$consensus_proportion,
Entropy = metrics_df$entropy
)
rownames(annotation_row) <- rownames(prediction_matrix)
# Create color scales
n_types <- length(encoded_data$mapping)
cell_type_colors <- colorRampPalette(rainbow(n_types))(n_types)
names(cell_type_colors) <- 1:n_types
proportion_colors <- colorRampPalette(c("yellow", "green", "blue"))(100)
entropy_colors <- colorRampPalette(c("red", "orange", "yellow"))(100)
annotation_colors <- list(
Proportion = proportion_colors,
Entropy = entropy_colors
)
# Plot heatmap
pheatmap(
encoded_data$matrix,
cluster_rows = FALSE,
cluster_cols = FALSE,
annotation_row = annotation_row,
annotation_colors = annotation_colors,
main = "Model Predictions by Cluster",
fontsize = 10,
cellwidth = 30,
cellheight = 30,
legend = TRUE,
display_numbers = TRUE,
number_format = function(x) names(encoded_data$mapping)[encoded_data$mapping == x]
)
Visualizing Discussion Logs
The discussion logs contain valuable information about the reasoning process. Here’s how to visualize key aspects:
library(tidytext)
library(wordcloud)
library(stringr)
# Extract discussion text for a specific cluster
extract_discussion_text <- function(consensus_results, cluster_id) {
discussion_logs <- consensus_results$discussion_logs
if (is.null(discussion_logs) || !cluster_id %in% names(discussion_logs)) {
return(NULL)
}
# Extract text from discussion logs
discussion_log <- discussion_logs[[cluster_id]]
# Combine all text from all rounds
all_text <- ""
if (!is.null(discussion_log$rounds)) {
for (round in discussion_log$rounds) {
if (!is.null(round$responses)) {
for (model_name in names(round$responses)) {
response <- round$responses[[model_name]]
if (is.character(response)) {
all_text <- paste(all_text, response, sep = " ")
}
}
}
}
}
return(all_text)
}
# Create a word cloud from discussion text
create_discussion_wordcloud <- function(discussion_text, title = "Discussion Word Cloud", output_file = NULL) {
# Clean and tokenize text
words <- discussion_text %>%
str_remove_all("[[:punct:]]") %>%
str_remove_all("[[:digit:]]") %>%
str_to_lower() %>%
str_split("\\s+") %>%
unlist()
# Remove stop words
stop_words <- c("the", "and", "a", "to", "of", "is", "in", "that", "this", "it", "as", "for", "with", "be", "are", "on", "by", "an", "or", "at", "but", "not", "from", "have", "has", "was", "were")
words <- words[!words %in% stop_words]
# Count word frequencies
word_freq <- table(words)
word_freq <- sort(word_freq, decreasing = TRUE)
# Create word cloud
if (!is.null(output_file)) {
# If output file is specified, save to file
png(output_file, width = 800, height = 600, res = 100)
par(mar = c(0, 0, 2, 0))
wordcloud(names(word_freq), freq = word_freq, min.freq = 2, max.words = 100,
random.order = FALSE, colors = brewer.pal(8, "Dark2"), main = title)
title(main = title)
dev.off()
} else {
# Otherwise display in the current device
par(mar = c(0, 0, 2, 0))
wordcloud(names(word_freq), freq = word_freq, min.freq = 2, max.words = 100,
random.order = FALSE, colors = brewer.pal(8, "Dark2"), main = title)
title(main = title)
}
}
# Example usage
cluster_id <- "0" # Replace with the cluster you want to visualize
discussion_text <- extract_discussion_text(consensus_results, cluster_id)
if (!is.null(discussion_text) && nchar(discussion_text) > 0) {
# Save to file
output_file <- paste0("wordcloud_cluster_", cluster_id, ".png")
create_discussion_wordcloud(
discussion_text,
title = paste("Cluster", cluster_id, "Discussion Keywords"),
output_file = output_file
)
cat("Word cloud saved to:", output_file, "\n")
} else {
cat("No discussion text found for cluster", cluster_id, "\n")
}
Additional Saving Options
For more flexibility in saving your visualizations:
# Save a single plot with high resolution
ggsave("cell_type_annotations.png", plot = p1, width = 10, height = 8, dpi = 600)
# Save the combined plot with custom dimensions
ggsave("mLLMCelltype_results.png", plot = simplified_dashboard, width = 20, height = 10, dpi = 300)
# Save as PDF for vector graphics with compression
ggsave("mLLMCelltype_results.pdf", plot = simplified_dashboard, width = 20, height = 10, device = cairo_pdf)
Visualization Best Practices
Color Selection
- Cell Type Annotations: Use distinct colors for different cell types
- Consensus Proportion: Use a gradient from yellow (low) to blue (high)
- Shannon Entropy: Use a gradient from red (low uncertainty) to yellow (high uncertainty)
Example: Complete Visualization Workflow
Here’s a complete example workflow for creating publication-ready visualizations:
library(Seurat)
library(mLLMCelltype)
library(ggplot2)
library(patchwork)
library(SCpubr)
# Load data and results
# Assuming you have a Seurat object and consensus results
# Add results to Seurat object
seurat_obj$cell_type_consensus <- plyr::mapvalues(
x = as.character(Idents(seurat_obj)),
from = as.character(0:(length(consensus_results$final_annotations)-1)),
to = consensus_results$final_annotations
)
# Extract consensus metrics from the consensus results
consensus_metrics <- lapply(names(consensus_results$initial_results$consensus_results), function(cluster_id) {
metrics <- consensus_results$initial_results$consensus_results[[cluster_id]]
return(list(
cluster = cluster_id,
consensus_proportion = metrics$consensus_proportion,
entropy = metrics$entropy
))
})
# Convert to data frame for easier handling
metrics_df <- do.call(rbind, lapply(consensus_metrics, data.frame))
# Add consensus proportion to Seurat object
seurat_obj$consensus_proportion <- as.numeric(plyr::mapvalues(
x = as.character(Idents(seurat_obj)),
from = metrics_df$cluster,
to = metrics_df$consensus_proportion
))
# Add entropy to Seurat object
seurat_obj$entropy <- as.numeric(plyr::mapvalues(
x = as.character(Idents(seurat_obj)),
from = metrics_df$cluster,
to = metrics_df$entropy
))
# Create publication-ready visualizations
# Cell type annotations with SCpubr
p1 <- SCpubr::do_DimPlot(seurat_obj,
group.by = "cell_type_consensus",
label = TRUE,
repel = TRUE,
legend.position = "right",
font.size = 14,
pt.size = 0.5)
# Add title using ggplot2
p1 <- p1 + ggtitle("Cell Type Annotations") +
theme(plot.title = element_text(hjust = 0.5))
# Consensus proportion with SCpubr
p2 <- SCpubr::do_FeaturePlot(seurat_obj,
features = "consensus_proportion",
order = TRUE,
pt.size = 0.5,
colors.use = c("yellow", "green", "blue"),
legend.position = "right",
font.size = 14)
# Add title using ggplot2
p2 <- p2 + ggtitle("Consensus Proportion") +
theme(plot.title = element_text(hjust = 0.5))
# Shannon entropy with SCpubr
p3 <- SCpubr::do_FeaturePlot(seurat_obj,
features = "entropy", # Note: The column name is 'entropy', not 'shannon_entropy'
order = TRUE,
pt.size = 0.5,
colors.use = c("red", "orange", "yellow"),
legend.position = "right",
font.size = 14)
# Add title using ggplot2
p3 <- p3 + ggtitle("Shannon Entropy") +
theme(plot.title = element_text(hjust = 0.5))
# Combine plots
combined_plot <- p1 | p2 | p3
# Add annotation
combined_plot <- combined_plot +
plot_annotation(
title = "mLLMCelltype Results",
subtitle = "Cell type annotations with uncertainty metrics",
caption = "Created with mLLMCelltype and SCpubr",
theme = theme(
plot.title = element_text(size = 16, hjust = 0.5, face = "bold"),
plot.subtitle = element_text(size = 12, hjust = 0.5),
plot.caption = element_text(size = 10, hjust = 1)
)
)
# Save the visualization
ggsave("mLLMCelltype_visualization.png",
plot = combined_plot,
width = 15,
height = 8,
dpi = 300)
# Also save as PDF for vector graphics
ggsave("mLLMCelltype_visualization.pdf",
plot = combined_plot,
width = 15,
height = 8)
Troubleshooting Common Issues
SCpubr Parameter Compatibility
If you encounter errors with SCpubr functions:
- The
title
parameter may not be supported in your version of SCpubr - Use standard ggplot2 functions to add titles instead:
p <- p + ggtitle("My Title") + theme(plot.title = element_text(hjust = 0.5))
- Check the SCpubr documentation for your installed version
Handling NA Values
When working with model predictions:
- Always check for and handle NA values in matrices
- Use
unique_types <- unique_types[!is.na(unique_types)]
to remove NA values - Add conditional checks like
if (!is.na(prediction_matrix[i, j]))
in loops
Next Steps
Now that you know how to create effective visualizations for mLLMCelltype results, you can explore:
- FAQ: Find answers to common questions
- Advanced Features: Explore hierarchical annotation and other advanced features
- Contributing Guide: Learn how to contribute to the project
- Version History: Review the development history of mLLMCelltype