20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138 | def classify (adata, pos=None, neg=None, classify_label='passed_classify',
phenotype=None, subclassify_phenotype=None, threshold = 0.5,
collapse_failed=True, label="classify"):
"""
Parameters:
adata : AnnData object
pos : list, optional
Pass a list of markers that should be expressed in the resultant cells.
neg : list, optional
Pass a list of markers that should not be expressed in the resultant cells.
classify_label : string, optional
Provide a name for the calssified cells.
subclassify_phenotype : list, optional
If only a subset of phenotypes require to classified, pass the name of those phenotypes as a list
through this argument.
threshold: float, optional
Above or below the given value will be considered for positive and negative classification.
If the data was scaled using the `sm.pp.rescale` function, 0.5 is the classification threshold.
phenotype : string, required
Column name of the column containing the phenotype information.
This is important if `subclassify_phenotype` or `collapse_failed` arguments are used.
collapse_failed : bool, optional
If set to true, the cells that were not classified based on the given criteria will be
binned into a single category named 'failed_classify'. When False, the phenotype
inforamation for other cells will be borrowed from the `phenotype` argument.
label : string, optional
Key for the returned data, stored in `adata.obs`.
Returns:
adata : AnnData
Updated AnnData Object.
Example:
```python
# Classify all cells with both pos and neg markers
# (Identify cytotoxic T-cells)
adata = sm.hl.classify(adata, pos=['CD3D','CD8A'], neg=['ASMA'])
# Classify specific sub-types of cells
adata = sm.hl.classify(adata, pos=['CD3D','FOXP3'],
neg=['ASMA'], subclassify_phenotype=['T cells','Regulatory T cells'])
# Classify specific sub-types of cells and borrow labels
# from another column
adata = sm.hl.classify(adata, pos=['CD3D'], neg=['ASMA'],
subclassify_phenotype=['T cells'], collapse_failed=False,
phenotype='phenotype')
```
"""
# clean the input
if isinstance(pos, str):
pos = [pos]
if isinstance(neg, str):
neg = [neg]
if isinstance(subclassify_phenotype, str):
subclassify_phenotype = [subclassify_phenotype]
# Create a dataFrame with the necessary inforamtion
data = pd.DataFrame(adata.X, index= adata.obs.index, columns = adata.var.index)
# if user requests to subset a specific phenotype
if subclassify_phenotype is not None:
meta = pd.DataFrame(adata.obs[phenotype])
subset_index = meta[meta[phenotype].isin(subclassify_phenotype)].index
data = data.loc[subset_index]
# Subset cells that pass the pos criteria
if pos is not None:
for i in pos:
# subset data
data = data[data[i] >= threshold]
# Subset cells that pass the neg criteria
if neg is not None and not data.empty:
for j in neg:
# subset data
data = data[data[j] < threshold]
# cells that passed the classify criteria
if data.empty:
raise TypeError("No cells were found to satisfy your `classify` criteria")
else:
classify_idx = data.index
classified = pd.DataFrame(np.repeat(classify_label, len(classify_idx)), index = classify_idx)
classified.columns = [label]
#classified = pd.DataFrame(np.repeat(classify_label, len(classify_idx)), index = classify_idx, columns = [phenotype])
if collapse_failed is True:
meta = pd.DataFrame(adata.obs.iloc[:, 0])
meta = meta.merge(classified, how='outer', left_index=True, right_index=True)
meta[label] = meta[label].fillna('failed_classify')
meta = meta[label]
else:
if phenotype is None:
raise ValueError("Please pass a column name to the PHENOTYPE argument")
meta = pd.DataFrame(adata.obs[phenotype])
classified = pd.DataFrame(np.repeat(classify_label, len(classify_idx)), index = classify_idx, columns = [phenotype])
meta.update(classified)
# Add to Anndata
meta = meta.reindex(adata.obs.index)
adata.obs[label] = meta
# return
return adata
|