Performance Tips

Optimize ChatSpatial performance for faster analysis and better resource utilization.

Hardware Recommendations

Minimum Requirements

CPU: 4 cores, 2.5GHz
RAM: 8GB
Storage: 100GB SSD
Network: Stable internet connection

Recommended Setup

CPU: 8+ cores, 3.0GHz+ (Intel i7/AMD Ryzen 7)
RAM: 32GB+ DDR4
Storage: 500GB+ NVMe SSD
GPU: NVIDIA RTX 3060+ (for deep learning methods)
Network: High-speed internet for large data downloads

High-Performance Setup

CPU: 16+ cores, 3.5GHz+ (Intel i9/AMD Ryzen 9)
RAM: 64GB+ DDR4/DDR5
Storage: 1TB+ NVMe SSD (PCIe 4.0)
GPU: NVIDIA RTX 4080+ or A100
Network: Gigabit ethernet

Memory Optimization

Data Loading Optimization

# Use sparse matrices for count data
import scipy.sparse as sp
adata.X = sp.csr_matrix(adata.X)

# Load only necessary data
adata = sc.read_h5ad('data.h5ad', backed='r')  # Read-only mode

# Subsample large datasets
adata = adata[::10, :].copy()  # Every 10th spot
adata = adata[:, adata.var.highly_variable].copy()  # HVGs only

Memory-Efficient Processing

# Process in chunks
def process_in_chunks(adata, chunk_size=1000):
    n_chunks = (adata.n_obs + chunk_size - 1) // chunk_size
    results = []
    
    for i in range(n_chunks):
        start = i * chunk_size
        end = min((i + 1) * chunk_size, adata.n_obs)
        chunk = adata[start:end, :].copy()
        
        # Process chunk
        result = analyze_chunk(chunk)
        results.append(result)
        
        # Free memory
        del chunk
        gc.collect()
    
    return combine_results(results)

Memory Monitoring

import psutil
import gc

def monitor_memory():
    """Monitor memory usage."""
    process = psutil.Process()
    memory_info = process.memory_info()
    
    print(f"RSS: {memory_info.rss / 1024**3:.2f} GB")
    print(f"VMS: {memory_info.vms / 1024**3:.2f} GB")
    print(f"Available: {psutil.virtual_memory().available / 1024**3:.2f} GB")

# Use throughout analysis
monitor_memory()
# ... run analysis ...
gc.collect()  # Force garbage collection
monitor_memory()

CPU Optimization

Parallel Processing

# Set number of jobs
import os
os.environ['NUMBA_NUM_THREADS'] = '8'
os.environ['OMP_NUM_THREADS'] = '8'

# Preprocessing is automatically optimized
# Note: Parallel processing controlled at system level (NUMBA_NUM_THREADS, OMP_NUM_THREADS)
from chatspatial.tools import preprocess_data
result = preprocess_data(
    data_id="sample"
    # Parallelization handled internally via environment variables
)

Efficient Algorithms

# Use approximate algorithms for large datasets
from chatspatial.tools import identify_spatial_domains

# Fast approximate clustering
result = identify_spatial_domains(
    data_id="sample",
    method="leiden",  # Faster than spagcn method
    resolution=1.0
    # Leiden is fast by design, no iteration limiting needed
)

# For large datasets, subsample during preprocessing first
# Then run spatial domain identification
# preprocess_data(data_id="sample", subsample_spots=5000)
result = identify_spatial_domains(
    data_id="sample",
    method="spagcn"
    # Note: Subsample data during preprocessing, not here
)

Threading Configuration

# Optimize threading
import threadpoolctl

# Limit threads for specific operations
with threadpoolctl.threadpool_limits(limits=4, user_api='blas'):
    # CPU-intensive operation
    result = analyze_spatial_data(data_id="sample")

GPU Acceleration

CUDA Setup

# Install CUDA-enabled PyTorch
pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118

# Verify CUDA
python -c "import torch; print(torch.cuda.is_available())"

GPU-Accelerated Methods

# Enable GPU for deep learning methods (deconvolution)
from chatspatial.tools import deconvolve_data

result = deconvolve_data(
    data_id="sample",
    method="cell2location",
    cell_type_key="cell_type",  # Required parameter
    reference_data_id="reference",  # Required parameter
    use_gpu=True
    # Note: GPU device selection handled automatically by scvi-tools
)

# GPU-accelerated spatial analysis (GraphST method)
result = identify_spatial_domains(
    data_id="sample",
    method="graphst",  # GraphST supports GPU
    graphst_use_gpu=True  # Enable GPU for GraphST
    # Note: STAGATE does not have GPU support, use GraphST instead
)

GPU Memory Management

import torch

# Clear GPU cache
torch.cuda.empty_cache()

# Monitor GPU memory
def gpu_memory_usage():
    if torch.cuda.is_available():
        allocated = torch.cuda.memory_allocated() / 1024**3
        cached = torch.cuda.memory_reserved() / 1024**3
        print(f"GPU Memory - Allocated: {allocated:.2f} GB, Cached: {cached:.2f} GB")

# Use mixed precision for memory efficiency
from torch.cuda.amp import autocast

with autocast():
    # GPU operations with reduced memory usage
    result = gpu_intensive_analysis()

Storage Optimization

SSD Configuration

# Check storage type
lsblk -d -o name,rota
# 0 = SSD, 1 = HDD

# Optimize SSD (Linux)
sudo fstrim -av  # TRIM unused blocks

File Format Optimization

# Use compressed HDF5
adata.write('data.h5ad', compression='gzip', compression_opts=9)

# Use Zarr for large datasets
adata.write_zarr('data.zarr', chunks=(1000, 1000))

# Optimize chunk sizes
import h5py
with h5py.File('data.h5ad', 'r') as f:
    print(f"Chunk size: {f['X'].chunks}")

Data Caching

# Enable caching
import os
os.environ['CHATSPATIAL_CACHE_DIR'] = '/fast/cache/directory'

# Use memory mapping
adata = sc.read_h5ad('data.h5ad', backed='r')
# Data stays on disk, loaded as needed

Network Optimization

Data Transfer

# Compress data for transfer
import gzip
import pickle

# Compress results
with gzip.open('results.pkl.gz', 'wb') as f:
    pickle.dump(results, f)

# Use efficient serialization
import joblib
joblib.dump(results, 'results.joblib', compress=3)

Batch Processing

# Process multiple samples efficiently
samples = ['sample1', 'sample2', 'sample3']

# Batch load
for sample in samples:
    load_data(f"data/{sample}.h5ad", sample)

# Batch process
results = {}
for sample in samples:
    results[sample] = preprocess_data(data_id=sample)

Analysis-Specific Optimizations

Spatial Domain Identification

# Fast methods for exploration
quick_result = identify_spatial_domains(
    data_id="sample",
    method="leiden",  # Fastest
    resolution=1.0
)

# High-quality methods for final analysis
final_result = identify_spatial_domains(
    data_id="sample",
    method="spagcn",  # More accurate
    n_domains=7,  # Number of spatial domains
    refine_domains=True  # Enable domain refinement
)

Cell Communication Analysis

# Optimize for large datasets
result = analyze_cell_communication(
    data_id="sample",
    method="liana",
    cell_type_column="cell_type",  # Required parameter
    perform_spatial_analysis=True,  # Spatial bivariate analysis
    liana_n_perms=100  # Reduce permutations for speed
    # Note: For speed, reduce n_perms or subsample data during preprocessing
)

Visualization Optimization

# Reduce image resolution for exploration
plot = visualize_data(
    data_id="sample",
    plot_type="spatial",
    dpi=150,  # Lower DPI
    size=(8, 6)  # Smaller size
)

# High-resolution for publication
final_plot = visualize_data(
    data_id="sample",
    plot_type="spatial",
    dpi=300,  # High DPI
    size=(12, 10),
    format="svg"  # Vector format
)

Monitoring and Profiling

Performance Monitoring

import time
import psutil

class PerformanceMonitor:
    def __init__(self):
        self.start_time = time.time()
        self.start_memory = psutil.virtual_memory().used
    
    def checkpoint(self, name):
        current_time = time.time()
        current_memory = psutil.virtual_memory().used
        
        elapsed = current_time - self.start_time
        memory_diff = (current_memory - self.start_memory) / 1024**3
        
        print(f"{name}: {elapsed:.2f}s, Memory: {memory_diff:+.2f} GB")

# Usage
monitor = PerformanceMonitor()
preprocess_data(data_id="sample")
monitor.checkpoint("Preprocessing")
identify_spatial_domains(data_id="sample")
monitor.checkpoint("Spatial domains")

Profiling Code

# Profile with cProfile
import cProfile
import pstats

profiler = cProfile.Profile()
profiler.enable()

# Your analysis code here
result = analyze_spatial_data(data_id="sample")

profiler.disable()
stats = pstats.Stats(profiler)
stats.sort_stats('cumulative').print_stats(10)

Memory Profiling

# Install memory_profiler
# pip install memory-profiler

from memory_profiler import profile

@profile
def memory_intensive_function():
    # Your analysis code
    return result

# Run with: python -m memory_profiler script.py

Troubleshooting Performance Issues

Common Bottlenecks

Memory Issues
- Reduce dataset size
- Use sparse matrices
- Process in chunks
CPU Bottlenecks
- Increase parallelization
- Use faster algorithms
- Optimize threading
I/O Bottlenecks
- Use SSD storage
- Optimize file formats
- Enable caching
Network Issues
- Use local data
- Compress transfers
- Batch operations

Performance Tuning Checklist

Benchmarking

Performance Benchmarks

def benchmark_methods():
    """Benchmark different spatial domain methods."""
    methods = ['leiden', 'spagcn', 'stagate']
    results = {}
    
    for method in methods:
        start_time = time.time()
        result = identify_spatial_domains(
            data_id="sample",
            method=method
        )
        elapsed = time.time() - start_time
        results[method] = elapsed
        print(f"{method}: {elapsed:.2f}s")
    
    return results

Dataset Size Guidelines

Dataset Size	Recommended Method	Expected Time	Memory Usage
< 1K spots	Any method	< 1 min	< 2GB
1K-10K spots	SpaGCN, STAGATE	1-10 min	2-8GB
10K-50K spots	Leiden, subsampling	10-30 min	8-16GB
> 50K spots	Chunked processing	30+ min	16+ GB

Next Steps

After optimizing performance:

Monitor Usage: Track resource consumption
Benchmark Methods: Compare different approaches
Scale Analysis: Handle larger datasets
Automate Workflows: Create efficient pipelines

See Configuration Guide for system setup!