Skip to content

cluster_plots

Short Description

sm.pl.cluster_plots: A quick meta function that outputs umap plots, heatmap of the expression matrix and ranked makers for each group provided by the user (generally run after using the sm.tl.cluster function)

Function

cluster_plots(adata, group_by, subsample=100000, palette='viridis', use_raw=False, size=None, output_dir=None)

Parameters:

Name Type Description Default
adata

AnnData object loaded into memory or path to AnnData object.

required
group_by string

Name of the categorical column that contains the clustering results.

required
subsample string

Subsample number of observations.

100000
palette string

Colors to use for plotting categorical annotation groups.

'viridis'
size string

Point size of UMAP plot.

None
use_raw string

Use .raw attribute of adata for coloring the matrixplot expression matrix.

False
output_dir string

Path to output directory.

None

Returns:

Name Type Description
plots

UMAP, matrixplot and ranked makers per group.

1
    sm.pl.cluster_plots (adata, group_by='spatial_kmeans')
Source code in scimap/plotting/_cluster_plots.py
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
def cluster_plots (adata, group_by, subsample=100000, palette ='viridis', 
                   use_raw=False,
                   size=None, output_dir=None):
    """
Parameters:
    adata : AnnData object loaded into memory or path to AnnData object.

    group_by (string):  
        Name of the categorical column that contains the clustering results.

    subsample (string):  
        Subsample number of observations.

    palette (string):  
        Colors to use for plotting categorical annotation groups.

    size (string):  
        Point size of UMAP plot.

    use_raw (string):   
        Use `.raw` attribute of adata for coloring the matrixplot expression matrix.

    output_dir (string):  
        Path to output directory.

Returns:
    plots :   
        UMAP, matrixplot and ranked makers per group.

Example:
```python
    sm.pl.cluster_plots (adata, group_by='spatial_kmeans')
```
    """

    # Load the data 
    if isinstance(adata, str):
        imid = pathlib.Path(adata).stem
        adata = ad.read(adata)  
    else:
        adata = adata
        imid = ""

    # Subset data if needed
    if subsample is not None:
        if adata.shape[0] > subsample:
            sc.pp.subsample(adata, n_obs=subsample)


    # UMAP
    try:
        sc.pp.neighbors(adata) # Computing the neighborhood graph
        sc.tl.umap(adata)
        fig = sc.pl.umap(adata, color=group_by, palette = palette, size=size, return_fig=True, show=False) # View the clustering
        fig.tight_layout()
        # save figure
        if output_dir is not None:
            output_dir = pathlib.Path(output_dir)
            output_dir.mkdir(exist_ok=True, parents=True)
            #fig.savefig(output_dir / f"{imid}_umap.pdf")
            fig.savefig(pathlib.Path(output_dir) / f"{imid}_umap.pdf")

    except Exception as exc:
        print('UMAP could not be generated')
        print (exc)

    # Matrix plot
    try:
        mat_fig = sc.pl.matrixplot(adata, var_names=adata.var.index, groupby=group_by, use_raw=use_raw,
                         cmap='RdBu_r', dendrogram=True, title = group_by,
                         return_fig=True
                         )
        if output_dir is not None:
            #mat_fig.savefig(output_dir / 'matrixplot.pdf')
            mat_fig.savefig(pathlib.Path(output_dir) / f"{imid}_matrixplot.pdf")

    except Exception as exc:
        print('Heatmap could not be generated')
        print (exc)

    # Marker expression per group
    try:
        sc.tl.rank_genes_groups(adata, group_by, method='t-test')

        # find number of genes in dataset
        if len(adata.var.index) > 20:
            n_genes = 20
        else:
            n_genes = len(adata.var.index)

        if output_dir is not None:
            sc.pl.rank_genes_groups(adata, sharey=False, n_genes=n_genes, fontsize=12, show=False)
            plt.suptitle(group_by, fontsize=20)
            #plt.savefig(output_dir / 'ranked_markers_per_cluster.pdf')
            plt.savefig(pathlib.Path(output_dir) / f"{imid}_ranked_markers_per_cluster.pdf")
        else:
            sc.pl.rank_genes_groups(adata, sharey=False, n_genes=n_genes, fontsize=12)

    except Exception as exc:
        print('Finding differential markers per group cannot be completed')
        print (exc)