23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232 | def spatial_interaction (adata,x_coordinate='X_centroid',y_coordinate='Y_centroid',
z_coordinate=None,
phenotype='phenotype',
method='radius', radius=30, knn=10,
permutation=1000,
imageid='imageid',subset=None,
pval_method='zscore',
label='spatial_interaction'):
"""
Parameters:
adata : AnnData object
x_coordinate : float, required
Column name containing the x-coordinates values.
y_coordinate : float, required
Column name containing the y-coordinates values.
z_coordinate : float, optional
Column name containing the z-coordinates values.
phenotype : string, required
Column name of the column containing the phenotype information.
It could also be any categorical assignment given to single cells.
method : string, optional
Two options are available: a) 'radius', b) 'knn'.
a) radius - Identifies the neighbours within a given radius for every cell.
b) knn - Identifies the K nearest neigbours for every cell.
radius : int, optional
The radius used to define a local neighbhourhood.
knn : int, optional
Number of cells considered for defining the local neighbhourhood.
permutation : int, optional
The number of permutations to be performed for calculating the P-Value.
imageid : string, optional
Column name of the column containing the image id.
subset : string, optional
imageid of a single image to be subsetted for analyis.
pval_method : string, optional
Two options are available: a) 'histocat', b) 'zscore'.
a) P-values are calculated by subtracting the permuted mean from the observed mean
divided by the number of permutations as described in the histoCAT manuscript (Denis et.al, Nature Methods 2017)
b) zscores are calculated from the mean and standard deviation and further p-values are
derived by fitting the observed values to a normal distribution. The default is 'histocat'.
label : string, optional
Key for the returned data, stored in `adata.obs`. The default is 'spatial_interaction'.
Returns:
adata : AnnData object
Updated AnnData object with the results stored in `adata.obs['spatial_aggregate']`.
Example:
```python
# Using the radius method to identify local neighbours and histocat to compute P-values
adata = sm.tl.spatial_interaction(adata, method='radius', radius=30, pval_method='histocat',
imageid='imageid',x_coordinate='X',y_coordinate='Y')
# Using the KNN method to identify local neighbours and zscore to compute P-values
adata = sm.tl.spatial_interaction(adata, method='knn', radius=30,pval_method='zscore',
imageid='ImageId',x_coordinate='X_position',y_coordinate='Y_position')
# Interaction analysis on 3D data
adata = sm.tl.spatial_interaction(adata, method='radius', radius=60, pval_method='zscore',
imageid='ImageId',x_coordinate='X_position',
y_coordinate='Y_position', z_coordinate='Z_position')
```
"""
def spatial_interaction_internal (adata_subset,x_coordinate,y_coordinate,
z_coordinate,
phenotype,
method, radius, knn,
permutation,
imageid,subset,
pval_method):
print("Processing Image: " + str(adata_subset.obs[imageid].unique()))
# Create a dataFrame with the necessary inforamtion
if z_coordinate is not None:
print("Including Z -axis")
data = pd.DataFrame({'x': adata_subset.obs[x_coordinate], 'y': adata_subset.obs[y_coordinate], 'z': adata_subset.obs[z_coordinate], 'phenotype': adata_subset.obs[phenotype]})
else:
data = pd.DataFrame({'x': adata_subset.obs[x_coordinate], 'y': adata_subset.obs[y_coordinate], 'phenotype': adata_subset.obs[phenotype]})
# Identify neighbourhoods based on the method used
# a) KNN method
if method == 'knn':
print("Identifying the " + str(knn) + " nearest neighbours for every cell")
if z_coordinate is not None:
tree = BallTree(data[['x','y','z']], leaf_size= 2)
ind = tree.query(data[['x','y','z']], k=knn, return_distance= False)
else:
tree = BallTree(data[['x','y']], leaf_size= 2)
ind = tree.query(data[['x','y']], k=knn, return_distance= False)
neighbours = pd.DataFrame(ind.tolist(), index = data.index) # neighbour DF
neighbours.drop(0, axis=1, inplace=True) # Remove self neighbour
# b) Local radius method
if method == 'radius':
print("Identifying neighbours within " + str(radius) + " pixels of every cell")
if z_coordinate is not None:
kdt = BallTree(data[['x','y','z']], metric='euclidean')
ind = kdt.query_radius(data[['x','y','z']], r=radius, return_distance=False)
else:
kdt = BallTree(data[['x','y']], metric='euclidean')
ind = kdt.query_radius(data[['x','y']], r=radius, return_distance=False)
for i in range(0, len(ind)): ind[i] = np.delete(ind[i], np.argwhere(ind[i] == i))#remove self
neighbours = pd.DataFrame(ind.tolist(), index = data.index) # neighbour DF
# Map Phenotypes to Neighbours
# Loop through (all functionized methods were very slow)
phenomap = dict(zip(list(range(len(ind))), data['phenotype'])) # Used for mapping
print("Mapping phenotype to neighbors")
for i in neighbours.columns:
neighbours[i] = neighbours[i].dropna().map(phenomap, na_action='ignore')
# Drop NA
neighbours = neighbours.dropna(how='all')
# Collapse all the neighbours into a single column
n = pd.DataFrame(neighbours.stack(), columns = ["neighbour_phenotype"])
n.index = n.index.get_level_values(0) # Drop the multi index
# Merge with real phenotype
n = n.merge(data['phenotype'], how='inner', left_index=True, right_index=True)
# Permutation
print('Performing '+ str(permutation) + ' permutations')
def permutation_pval (data):
data = data.assign(neighbour_phenotype=np.random.permutation(data['neighbour_phenotype']))
#data['neighbour_phenotype'] = np.random.permutation(data['neighbour_phenotype'])
data_freq = data.groupby(['phenotype','neighbour_phenotype']).size().unstack()
data_freq = data_freq.fillna(0).stack().values
return data_freq
# Apply function
final_scores = Parallel(n_jobs=-1)(delayed(permutation_pval)(data=n) for i in range(permutation))
perm = pd.DataFrame(final_scores).T
# Consolidate the permutation results
print('Consolidating the permutation results')
# Calculate P value
# real
n_freq = n.groupby(['phenotype','neighbour_phenotype']).size().unstack().fillna(0).stack()
# permutation
mean = perm.mean(axis=1)
std = perm.std(axis=1)
# P-value calculation
if pval_method == 'histocat':
# real value - prem value / no of perm
p_values = abs(n_freq.values - mean) / (permutation+1)
p_values = p_values[~np.isnan(p_values)].values
if pval_method == 'zscore':
z_scores = (n_freq.values - mean) / std
z_scores[np.isnan(z_scores)] = 0
p_values = scipy.stats.norm.sf(abs(z_scores))*2
p_values = p_values[~np.isnan(p_values)]
# Compute Direction of interaction (interaction or avoidance)
direction = ((n_freq.values - mean) / abs(n_freq.values - mean)).fillna(1)
# Normalize based on total cell count
k = n.groupby(['phenotype','neighbour_phenotype']).size().unstack().fillna(0)
# add neighbour phenotype that are not present to make k a square matrix
columns_to_add = dict.fromkeys(np.setdiff1d(k.index,k.columns), 0)
k = k.assign(**columns_to_add)
total_cell_count = data['phenotype'].value_counts()
total_cell_count = total_cell_count[k.columns].values # keep only cell types that are present in the column of k
# total_cell_count = total_cell_count.reindex(k.columns).values # replaced by above
k_max = k.div(total_cell_count, axis = 0)
k_max = k_max.div(k_max.max(axis=1), axis=0).stack()
# DataFrame with the neighbour frequency and P values
count = (k_max.values * direction).values # adding directionallity to interaction
neighbours = pd.DataFrame({'count': count,'p_val': p_values}, index = k_max.index)
#neighbours.loc[neighbours[neighbours['p_val'] > p_val].index,'count'] = np.NaN
#del neighbours['p_val']
neighbours.columns = [adata_subset.obs[imageid].unique()[0], 'pvalue_' + str(adata_subset.obs[imageid].unique()[0])]
neighbours = neighbours.reset_index()
#neighbours = neighbours['count'].unstack()
# return
return neighbours
# subset a particular subset of cells if the user wants else break the adata into list of anndata objects
if subset is not None:
adata_list = [adata[adata.obs[imageid] == subset]]
else:
adata_list = [adata[adata.obs[imageid] == i] for i in adata.obs[imageid].unique()]
# Apply function to all images and create a master dataframe
# Create lamda function
r_spatial_interaction_internal = lambda x: spatial_interaction_internal (adata_subset=x, x_coordinate=x_coordinate, y_coordinate=y_coordinate,
z_coordinate=z_coordinate, phenotype=phenotype, method=method, radius=radius, knn=knn, permutation=permutation, imageid=imageid,subset=subset,pval_method=pval_method)
all_data = list(map(r_spatial_interaction_internal, adata_list)) # Apply function
# Merge all the results into a single dataframe
df_merged = reduce(lambda left,right: pd.merge(left,right,on=['phenotype', 'neighbour_phenotype'], how='outer'), all_data)
# Add to anndata
adata.uns[label] = df_merged
# return
return adata
|