Skip to content

stacked_barplot

Short Description

sm.pl.stacked_barplot: This function creates stacked bar plots to visualize the distribution and proportions of categories within a specified categorical column across different groups or samples in an AnnData object. It supports both matplotlib for generating static plots and Plotly for interactive, browser-based visualizations. The flexibility to choose between plotting libraries caters to diverse analysis needs, from detailed publication-ready figures to dynamic exploration of complex datasets, enhancing the interpretability of spatial and phenotypic compositions.

Function

stacked_barplot(adata, x_axis='imageid', y_axis='phenotype', subset_xaxis=None, subset_yaxis=None, order_xaxis=None, order_yaxis=None, method='percent', plot_tool='matplotlib', matplotlib_cmap=None, matplotlib_bbox_to_anchor=(1, 1.02), matplotlib_legend_loc=2, fileName='stacked_barplot.pdf', saveDir=None, return_data=False, **kwargs)

Parameters:

Name Type Description Default
adata AnnData

The annotated data matrix.

required
x_axis str

Column in adata.obs to be used as x-axis categories.

'imageid'
y_axis str

Column in adata.obs representing categories to stack.

'phenotype'
subset_xaxis list

Subsets categories in x_axis before plotting.

None
subset_yaxis list

Subsets categories in y_axis before plotting.

None
order_xaxis list

Specifies custom ordering for x-axis categories.

None
order_yaxis list

Specifies custom ordering for y-axis categories.

None
method str

Plotting method; 'percent' for percentage proportions, 'absolute' for actual counts.

'percent'
plot_tool str

Choice of plotting library; 'matplotlib' for static plots, 'plotly' for interactive plots.

'matplotlib'
matplotlib_cmap str

Matplotlib colormap for coloring the bars.

None
matplotlib_bbox_to_anchor tuple

Adjusts the legend's bounding box location in matplotlib plots.

(1, 1.02)
matplotlib_legend_loc int

Sets the legend location in matplotlib plots.

2
return_data bool

If True, returns a DataFrame used for plotting instead of displaying the plot.

False
fileName str

Name of the file to save the plot. Relevant only if saveDir is not None.

'stacked_barplot.pdf'
saveDir str

Directory to save the generated plot. If None, the plot is not saved.

None
**kwargs

Additional arguments passed to the plotting function (matplotlib or plotly).

{}

Returns:

Name Type Description
Plot matplotlib

If return_data is True, returns a DataFrame containing the data used for plotting. Otherwise, displays the stacked bar plot.

Example
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
# Default stacked bar plot showing percentage composition
sm.pl.stacked_barplot(adata, x_axis='sample_id', y_axis='cell_type', method='percent')

# Stacked bar plot using absolute counts with matplotlib customization
sm.pl.stacked_barplot(adata, x_axis='region', y_axis='phenotype', method='absolute', plot_tool='matplotlib',
                matplotlib_cmap='tab20', figsize=(12, 6), edgecolor='white')

# Interactive stacked bar plot using Plotly with subset and custom order
sm.pl.stacked_barplot(adata, x_axis='condition', y_axis='cell_state', subset_xaxis=['Control', 'Treated'],
                order_yaxis=['State1', 'State2', 'State3'], method='percent', plot_tool='plotly',
                color_discrete_map={'State1': '#1f77b4', 'State2': '#ff7f0e', 'State3': '#2ca02c'})

# Retrieve data used for plotting
data_df = sm.pl.stacked_barplot(adata, x_axis='batch', y_axis='cell_type', return_data=True)
Source code in scimap/plotting/stacked_barplot.py
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
def stacked_barplot(
    adata,
    x_axis='imageid',
    y_axis='phenotype',
    subset_xaxis=None,
    subset_yaxis=None,
    order_xaxis=None,
    order_yaxis=None,
    method='percent',
    plot_tool='matplotlib',
    matplotlib_cmap=None,
    matplotlib_bbox_to_anchor=(1, 1.02),
    matplotlib_legend_loc=2,
    fileName='stacked_barplot.pdf',
    saveDir=None,
    return_data=False,
    **kwargs,
):
    """
    Parameters:
            adata (anndata.AnnData):
                The annotated data matrix.

            x_axis (str):
                Column in `adata.obs` to be used as x-axis categories.

            y_axis (str):
                Column in `adata.obs` representing categories to stack.

            subset_xaxis (list, optional):
                Subsets categories in x_axis before plotting.

            subset_yaxis (list, optional):
                Subsets categories in y_axis before plotting.

            order_xaxis (list, optional):
                Specifies custom ordering for x-axis categories.

            order_yaxis (list, optional):
                Specifies custom ordering for y-axis categories.

            method (str, optional):
                Plotting method; 'percent' for percentage proportions, 'absolute' for actual counts.

            plot_tool (str, optional):
                Choice of plotting library; 'matplotlib' for static plots, 'plotly' for interactive plots.

            matplotlib_cmap (str, optional):
                Matplotlib colormap for coloring the bars.

            matplotlib_bbox_to_anchor (tuple, optional):
                Adjusts the legend's bounding box location in matplotlib plots.

            matplotlib_legend_loc (int, optional):
                Sets the legend location in matplotlib plots.

            return_data (bool, optional):
                If True, returns a DataFrame used for plotting instead of displaying the plot.

            fileName (str, optional):
                Name of the file to save the plot. Relevant only if `saveDir` is not None.

            saveDir (str, optional):
                Directory to save the generated plot. If None, the plot is not saved.

            **kwargs:
                Additional arguments passed to the plotting function (matplotlib or plotly).

    Returns:
        Plot (matplotlib):
            If `return_data` is True, returns a DataFrame containing the data used for plotting.
            Otherwise, displays the stacked bar plot.

    Example:
        ```python

        # Default stacked bar plot showing percentage composition
        sm.pl.stacked_barplot(adata, x_axis='sample_id', y_axis='cell_type', method='percent')

        # Stacked bar plot using absolute counts with matplotlib customization
        sm.pl.stacked_barplot(adata, x_axis='region', y_axis='phenotype', method='absolute', plot_tool='matplotlib',
                        matplotlib_cmap='tab20', figsize=(12, 6), edgecolor='white')

        # Interactive stacked bar plot using Plotly with subset and custom order
        sm.pl.stacked_barplot(adata, x_axis='condition', y_axis='cell_state', subset_xaxis=['Control', 'Treated'],
                        order_yaxis=['State1', 'State2', 'State3'], method='percent', plot_tool='plotly',
                        color_discrete_map={'State1': '#1f77b4', 'State2': '#ff7f0e', 'State3': '#2ca02c'})

        # Retrieve data used for plotting
        data_df = sm.pl.stacked_barplot(adata, x_axis='batch', y_axis='cell_type', return_data=True)

        ```
    """

    # create the dataframe with details
    data = pd.DataFrame(adata.obs)[[x_axis, y_axis]].astype(str)

    # subset the data if needed
    # if subset_data is not None:data = data[data[list(subset_data.keys())[0]].isin(list(subset_data.values())[0])]

    if subset_xaxis is not None:
        if isinstance(subset_xaxis, str):
            subset_xaxis = [subset_xaxis]
        data = data[data[x_axis].isin(subset_xaxis)]
    if subset_yaxis is not None:
        if isinstance(subset_yaxis, str):
            subset_yaxis = [subset_yaxis]
        data = data[data[y_axis].isin(subset_yaxis)]

    # Method: Absolute or Percentile
    if method == 'percent':
        total = data.groupby([x_axis, y_axis]).size().unstack().fillna(0).sum(axis=1)
        rg = pd.DataFrame(
            data.groupby([x_axis, y_axis])
            .size()
            .unstack()
            .fillna(0)
            .div(total, axis=0)
            .stack()
        )
    elif method == 'absolute':
        rg = pd.DataFrame(
            data.groupby([x_axis, y_axis]).size().unstack().fillna(0).stack()
        )
    else:
        raise ValueError('method should be either percent or absolute')

    # change column name
    rg.columns = ['count']

    # Add the index as columns in the data frame
    rg.reset_index(inplace=True)

    # re-order the x oy y axis if requested by user
    if order_xaxis is not None:
        rg[x_axis] = rg[x_axis].astype('category')
        rg[x_axis] = rg[x_axis].cat.reorder_categories(order_xaxis)
        rg = rg.sort_values(x_axis)
    if order_yaxis is not None:
        rg[y_axis] = rg[y_axis].astype('category')
        rg[y_axis] = rg[y_axis].cat.reorder_categories(order_yaxis)
        rg = rg.sort_values(y_axis)
    if order_xaxis and order_yaxis is not None:
        rg = rg.sort_values([x_axis, y_axis])

    pivot_df = rg.pivot(index=x_axis, columns=y_axis, values='count')

    # Plotting tool
    if plot_tool == 'matplotlib':

        if matplotlib_cmap is None:
            if len(rg[y_axis].unique()) <= 9:
                matplotlib_cmap = "Set1"
            elif len(rg[y_axis].unique()) > 9 and len(rg[y_axis].unique()) <= 20:
                matplotlib_cmap = plt.cm.tab20  # tab20
            else:
                matplotlib_cmap = plt.cm.gist_ncar

        # Plotting
        # add width if not passed via parameters
        try:
            width
        except NameError:
            width = 0.9
        # actual plotting
        # p = pivot_df.plot.bar(stacked=True, cmap=matplotlib_cmap, width=width,  **kwargs)
        # handles, labels = p.get_legend_handles_labels() # for reversing the order of the legend
        # p.legend(reversed(handles), reversed(labels), bbox_to_anchor=matplotlib_bbox_to_anchor, loc=matplotlib_legend_loc)

        # Actual plotting
        ax = pivot_df.plot.bar(
            stacked=True, cmap=matplotlib_cmap, width=width, **kwargs
        )
        fig = ax.get_figure()  # Get the Figure object to save
        handles, labels = (
            ax.get_legend_handles_labels()
        )  # for reversing the order of the legend
        ax.legend(
            reversed(handles),
            reversed(labels),
            bbox_to_anchor=matplotlib_bbox_to_anchor,
            loc=matplotlib_legend_loc,
        )

        # Saving the figure if saveDir and fileName are provided
        if saveDir:
            if not os.path.exists(saveDir):
                os.makedirs(saveDir)
            full_path = os.path.join(saveDir, fileName)
            fig.savefig(full_path, dpi=300)  # Use fig.savefig instead of p.savefig
            plt.close(fig)  # Close the figure properly
            print(f"Saved plot to {full_path}")
        else:
            plt.show()

    elif plot_tool == 'plotly':

        fig = px.bar(rg, x=x_axis, y="count", color=y_axis, **kwargs)
        fig.update_layout(
            {'plot_bgcolor': 'rgba(0, 0, 0, 0)', 'paper_bgcolor': 'rgba(0, 0, 0, 0)'},
            xaxis=dict(tickmode='linear'),  # type = 'category'
        )
        fig.show()

    else:

        raise ValueError('plot_tool should be either matplotlib or plotly')

    # Return data
    if return_data is True:
        return pivot_df