Skip to content

cluster_plots

Short Description

sm.pl.cluster_plots: This versatile function streamlines the visualization process by generating UMAP plots, heatmaps of the expression matrix, and lists of ranked marker genes for each user-defined group, typically following clustering analysis via sm.tl.cluster. It offers a comprehensive overview of clustering results, facilitating the exploration of spatial patterns, molecular profiles, and key markers distinguishing each cluster.

Function

cluster_plots(adata, group_by, subsample=100000, palette='viridis', use_raw=False, size=None, output_dir=None)

Parameters:

Name Type Description Default
adata AnnData

The annotated data matrix.

required
group_by str

The column name in adata.obs that contains the clustering labels to visualize.

required
subsample int

The number of cells to randomly subsample from the dataset for visualization to enhance performance. Default is 100000. If set to None, no subsampling is performed.

100000
palette str

The name of a matplotlib colormap to use for coloring clusters. Default is 'viridis'.

'viridis'
use_raw bool

If True, uses the .raw attribute of adata for extracting expression data for the matrix plot. Default is False.

False
size int

The size of the points in the UMAP plot. Default is 40.

None
output_dir str

The directory where the plots should be saved. If not specified, plots are shown but not saved.

None

Returns:

Name Type Description
plots matplotlib

The function does not return a value but generates and optionally saves the specified plots.

Example
1
2
3
4
5
6
7
8
# Generate cluster plots using default settings
sm.pl.cluster_plots(adata, group_by='leiden')

# Generate cluster plots with a custom palette and subsampling
sm.pl.cluster_plots(adata, group_by='leiden', palette='plasma', subsample=50000)

# Generate cluster plots without subsampling, using raw data, and save them to a directory
sm.pl.cluster_plots(adata, group_by='leiden', subsample=None, use_raw=True, output_dir='./cluster_plots')
Source code in scimap/plotting/cluster_plots.py
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
def cluster_plots (adata, 
                   group_by, 
                   subsample=100000, 
                   palette ='viridis', 
                   use_raw=False,
                   size=None, output_dir=None):
    """
Parameters:
        adata (anndata.AnnData):  
            The annotated data matrix.

        group_by (str):  
            The column name in `adata.obs` that contains the clustering labels to visualize.

        subsample (int, optional):  
            The number of cells to randomly subsample from the dataset for visualization to enhance performance. 
            Default is 100000. If set to None, no subsampling is performed.

        palette (str, optional):  
            The name of a matplotlib colormap to use for coloring clusters. Default is 'viridis'.

        use_raw (bool, optional):  
            If True, uses the `.raw` attribute of `adata` for extracting expression data for the matrix plot. 
            Default is False.

        size (int, optional):  
            The size of the points in the UMAP plot. Default is 40.

        output_dir (str, optional):  
            The directory where the plots should be saved. If not specified, plots are shown but not saved.

Returns:
        plots (matplotlib):  
            The function does not return a value but generates and optionally saves the specified plots.

Example:
    ```python

    # Generate cluster plots using default settings
    sm.pl.cluster_plots(adata, group_by='leiden')

    # Generate cluster plots with a custom palette and subsampling
    sm.pl.cluster_plots(adata, group_by='leiden', palette='plasma', subsample=50000)

    # Generate cluster plots without subsampling, using raw data, and save them to a directory
    sm.pl.cluster_plots(adata, group_by='leiden', subsample=None, use_raw=True, output_dir='./cluster_plots')

    ```
    """

    # Load the data 
    if isinstance(adata, str):
        imid = pathlib.Path(adata).stem
        adata = ad.read(adata)  
    else:
        adata = adata
        imid = ""

    # Subset data if needed
    if subsample is not None:
        if adata.shape[0] > subsample:
            sc.pp.subsample(adata, n_obs=subsample)


    # UMAP
    try:
        sc.pp.neighbors(adata) # Computing the neighborhood graph
        sc.tl.umap(adata)
        fig = sc.pl.umap(adata, color=group_by, palette = palette, size=size, return_fig=True, show=False) # View the clustering
        fig.tight_layout()
        # save figure
        if output_dir is not None:
            output_dir = pathlib.Path(output_dir)
            output_dir.mkdir(exist_ok=True, parents=True)
            #fig.savefig(output_dir / f"{imid}_umap.pdf")
            fig.savefig(pathlib.Path(output_dir) / f"{imid}_umap.pdf")

    except Exception as exc:
        print('UMAP could not be generated')
        print (exc)

    # Matrix plot
    try:
        mat_fig = sc.pl.matrixplot(adata, var_names=adata.var.index, groupby=group_by, use_raw=use_raw,
                         cmap='RdBu_r', dendrogram=True, title = group_by,
                         return_fig=True
                         )
        if output_dir is not None:
            #mat_fig.savefig(output_dir / 'matrixplot.pdf')
            mat_fig.savefig(pathlib.Path(output_dir) / f"{imid}_matrixplot.pdf")

    except Exception as exc:
        print('Heatmap could not be generated')
        print (exc)

    # Marker expression per group
    try:
        sc.tl.rank_genes_groups(adata, group_by, method='t-test')

        # find number of genes in dataset
        if len(adata.var.index) > 20:
            n_genes = 20
        else:
            n_genes = len(adata.var.index)

        if output_dir is not None:
            sc.pl.rank_genes_groups(adata, sharey=False, n_genes=n_genes, fontsize=12, show=False)
            plt.suptitle(group_by, fontsize=20)
            #plt.savefig(output_dir / 'ranked_markers_per_cluster.pdf')
            plt.savefig(pathlib.Path(output_dir) / f"{imid}_ranked_markers_per_cluster.pdf")
        else:
            sc.pl.rank_genes_groups(adata, sharey=False, n_genes=n_genes, fontsize=12)

    except Exception as exc:
        print('Finding differential markers per group cannot be completed')
        print (exc)