6 def main(thisID
) -> None:
8 nfoDict
= SamplesDict
[thisID
]
9 print("[i]Start.", file=sys
.stderr
)
10 for platform
in PlatformTuple
:
11 nfoDict
['platformK'] = platform
12 nfoDict
['platformV'] = nfoDict
['platforms'][platform
]
13 nfoDict
['suffixOutV'] = nfoDict
['suffixOut'][platform
]
14 h5Path
= f
"{nfoDict['sid']}_{platform}.h5ad"
15 print(f
"[i]Reading {h5Path}", file=sys
.stderr
)
16 adata
= ad
.read_h5ad(h5Path
)
17 #adata.layers["raw"] = adata.X.copy()
18 #adata.layers["prnorm"] = adata.X.copy()
19 #sc.experimental.pp.normalize_pearson_residuals(adata,layer='prnorm')
20 #sc.pp.normalize_total(adata,target_sum=1e6,key_added='CPMnormFactor')
21 #adata.layers["norm"] = adata.X.copy()
22 scDat
[platform
] = adata
23 print(f
"[i]Read {thisID}.{platform}: {adata.raw.shape} -> {adata.shape}", file=sys
.stderr
)
25 rawList
=[scDat
[v
].raw
.to_adata() for v
in PlatformTuple
]
26 adata
=ad
.concat(rawList
, label
='Platform', keys
=PlatformTuple
, index_unique
='-')
27 adata
.var
['mt'] = adata
.var_names
.str.startswith('MT-') | adata
.var_names
.str.startswith('mt-')
28 sc
.pp
.calculate_qc_metrics(adata
, qc_vars
=['mt'], percent_top
=None, log1p
=True, inplace
=True)
29 adata
.obs
['sqrt_inv_total_counts'] = 1 / np
.sqrt(adata
.obs
['total_counts'])
30 p995
= np
.percentile(adata
.obs
['sqrt_inv_total_counts'].values
, 99.5)
32 print(f
"[i] {nfoDict['sub']} sqrt_inv_total_counts: {p995} ({round(c995,3)})", file=sys
.stderr
)
34 ax
=sc
.pl
.violin(adata
,['sqrt_inv_total_counts'],jitter
=0.4, stripplot
=True,show
=False)
35 ax
.set_title(f
"sqrt_inv_total_counts Violin - {nfoDict['sub']} @ {round(p995,9)}({round(c995,3)})")
36 ax
.axhline(y
=p995
, color
='red', linestyle
='dotted', label
=f
'p995={p995}')
37 plt
.savefig(f
"2C_umiEstd_{nfoDict['sid']}_{round(c995)}.pdf", metadata
={'Title': 'sqrt_inv_total_counts Violin', 'Subject': f
"{nfoDict['sub']} Data @ {round(c995)}", 'Author': 'HU Xuesong'})
38 adata
.raw
= adata
.copy()
39 sc
.pp
.filter_cells(adata
, min_counts
=round(c995
)) # sqrt_inv_total_counts < p995 按照样品均值的标准差考虑。写作round(c995)。
40 sc
.pp
.filter_genes(adata
, min_cells
=1) # adata.var[adata.var['n_cells']<2].sort_values(by='sqrt_inv_total_counts') 有800个,就不过滤了。
41 print(f
"[i]Filtered: {adata.raw.shape} -> {adata.shape}", file=sys
.stderr
)
43 #adata.layers["prnorm"] = adata.X.copy()
44 #sc.experimental.pp.normalize_pearson_residuals(adata,layer='prnorm')
45 sc
.pp
.normalize_total(adata
,target_sum
=1e6
, key_added
='CPMnormFactor')
46 adata
.layers
["norm"] = adata
.X
.copy()
50 sc
.pp
.neighbors(adata
)
51 sc
.tl
.umap(adata
,random_state
=369)
52 sc
.tl
.draw_graph(adata
,random_state
=369)
53 sc
.tl
.tsne(adata
,random_state
=369)
54 sc
.tl
.leiden(adata
,random_state
=0)
56 fig1, ax1 = plt.subplots()
58 ax1.set_title("Axis 1 title")
59 ax1.set_xlabel("X-label for axis 1")
61 print("[i]Begin fig E. 2Ca", file=sys
.stderr
)
63 ax
=sc
.pl
.pca(adata
, color
='Platform', show
=False, title
=f
"PCA - {nfoDict['sub']}", annotate_var_explained
=True)
64 plt
.savefig(f
"2C_PCA_{nfoDict['sid']}.pdf", bbox_extra_artists
=(ax
.get_legend(),), metadata
={'Title': 'PCA', 'Subject': f
"{nfoDict['sub']} Data", 'Author': 'HU Xuesong'})
66 ax
=sc
.pl
.umap(adata
,color
='Platform', show
=False, title
=f
"UMAP - {nfoDict['sub']}")
67 plt
.savefig(f
"2C_UMAP_{nfoDict['sid']}.pdf", bbox_extra_artists
=(ax
.get_legend(),), metadata
={'Title': 'UMAP', 'Subject': f
"{nfoDict['sub']} Data", 'Author': 'HU Xuesong'})
69 ax
=sc
.pl
.tsne(adata
, color
='Platform', show
=False, title
=f
"t-SNE - {nfoDict['sub']}")
70 plt
.savefig(f
"2C_tSNE_{nfoDict['sid']}.pdf", bbox_extra_artists
=(ax
.get_legend(),), metadata
={'Title': 't-SNE', 'Subject': f
"{nfoDict['sub']} Data", 'Author': 'HU Xuesong'})
72 ax
=sc
.pl
.draw_graph(adata
, color
='Platform', show
=False, title
=f
"ForceAtlas2 - {nfoDict['sub']}")
73 plt
.savefig(f
"2C_ForceAtlas2_{nfoDict['sid']}.pdf", bbox_extra_artists
=(ax
.get_legend(),), metadata
={'Title': 'ForceAtlas2', 'Subject': f
"{nfoDict['sub']} Data", 'Author': 'HU Xuesong'})
75 fig
, ax
= plt
.subplots()
76 fig
.patch
.set(alpha
=0)
78 palette
= ['#CC0000','#00CC00','#CCCC00']
79 sc
.pl
.pca(adata
, color
='Platform', show
=False, title
=f
"PCA - {nfoDict['sub']}", ax
=ax
, palette
=palette
, annotate_var_explained
=True)
80 arts
=ax
.findobj(matplotlib
.collections
.PathCollection
)
82 mplcairo
.operator_t
.ADD
.patch_artist(art
)
83 newlabels
= adata
.obs
['Platform'].unique().tolist() + ['Both']
84 legend_elements
= [plt
.scatter([],[],linewidths
=0, marker
='o', label
=label
, color
=color
) for label
, color
in zip(newlabels
, palette
)]
85 ax
.legend(handles
=legend_elements
,bbox_to_anchor
=(1.05, 0.5), loc
='center left', borderaxespad
=0.2)
86 plt
.savefig(f
"2C_mPCA_{nfoDict['sid']}.pdf", bbox_extra_artists
=(ax
.get_legend(),), metadata
={'Title': 'PCA', 'Subject': f
"{nfoDict['sub']} Data", 'Author': 'HU Xuesong'})
88 fig
, ax
= plt
.subplots()
89 fig
.patch
.set(alpha
=0)
91 palette
= ['#CC0000','#00CC00','#CCCC00']
92 sc
.pl
.tsne(adata
, color
='Platform', show
=False, title
=f
"PCA - {nfoDict['sub']}", ax
=ax
, palette
=palette
)
93 arts
=ax
.findobj(matplotlib
.collections
.PathCollection
)
95 mplcairo
.operator_t
.ADD
.patch_artist(art
)
96 newlabels
= adata
.obs
['Platform'].unique().tolist() + ['Both']
97 legend_elements
= [plt
.scatter([],[],linewidths
=0, marker
='o', label
=label
, color
=color
) for label
, color
in zip(newlabels
, palette
)]
98 ax
.legend(handles
=legend_elements
,bbox_to_anchor
=(1.05, 0.5), loc
='center left', borderaxespad
=0.2)
99 plt
.savefig(f
"2C_mtSNE_{nfoDict['sid']}.pdf", bbox_extra_artists
=(ax
.get_legend(),), metadata
={'Title': 't-SNE', 'Subject': f
"{nfoDict['sub']} Data", 'Author': 'HU Xuesong'})
100 print("[i]Begin fig E. 2Cb", file=sys
.stderr
)
101 fig
, axes
= plt
.subplots(1, 2, figsize
=(12, 6))
102 plt
.subplots_adjust(wspace
=0.1)
103 sc
.pl
.umap(adata
[adata
.obs
['Platform']=='Illumina'], color
='leiden', ax
=axes
[0], title
=f
'UMAP - Illumina')
104 sc
.pl
.umap(adata
[adata
.obs
['Platform']=='Salus'], color
='leiden', ax
=axes
[1], title
=f
'UMAP - Salus')
105 axes
[0].legend().set_visible(False)
106 fig
.suptitle(f
"Clusters by Leiden - {nfoDict['sub']}")
107 fig
.savefig(f
"2C_leidenUMAP_{nfoDict['sid']}.pdf", metadata
={'Title': 'Cluster UMAP', 'Subject': f
"{nfoDict['sub']} Data", 'Author': 'HU Xuesong'})
108 plt
.figure(figsize
=(6,4))
109 plt
.title(f
"Cluster Size Histogram - {nfoDict['sub']}")
110 axB
= sns
.histplot(adata
.obs
,x
='leiden',hue
='Platform',multiple
="dodge",shrink
=.66)
111 axB
.set_xlabel('leiden Cluster NO.')
112 axB
.set_ylabel('Cluster Size')
113 plt
.savefig(f
"2C_leidenHist_{nfoDict['sid']}.pdf", metadata
={'Title': 'Cluster Size Histogram', 'Subject': f
"{nfoDict['sub']} Data", 'Author': 'HU Xuesong'})
114 print("[i]Begin fig E. 2D", file=sys
.stderr
)
116 adata
.obs
['cell.cluster'] = adata
.obs
['leiden'].astype(str)
117 adata
.obs
['study_id'] = adata
.obs
['Platform'].astype(str)
118 pymn
.variableGenes(adata
, study_col
='Platform')
119 pymn
.MetaNeighborUS(adata
, study_col
='study_id', ct_col
='cell.cluster', fast_version
=True)
120 plt
.figure(figsize
=(6,4))
121 plt
.title(f
"MetaNeighborUS - {nfoDict['sub']}")
122 pymn
.plotMetaNeighborUS(adata
, figsize
=(10, 10), cmap
='coolwarm')
123 plt
.savefig(f
"2D_MetaNeighborUS_{nfoDict['sid']}.pdf", metadata
={'Title': 'MetaNeighborUS', 'Subject': f
"{nfoDict['sub']} Data", 'Author': 'HU Xuesong'})
124 pymn
.topHits(adata
, threshold
=0)
125 mndf
= adata
.uns
['MetaNeighborUS_topHits']
126 mndf
['ClusterID'] = mndf
['Study_ID|Celltype_1'].str.split('|').str[1].astype(int)
127 pndf
=mndf
[mndf
['Match_type']=='Reciprocal_top_hit']
128 plt
.figure(figsize
=(6,4))
129 plt
.title(f
"Mean_AUROC Between Platforms - {nfoDict['sub']}")
130 axC
= sns
.barplot(pndf
,x
='ClusterID',y
='Mean_AUROC')
131 axC
.set_xlabel('leiden Cluster NO.')
132 plt
.savefig(f
"2D_AUROC_{nfoDict['sid']}.pdf", metadata
={'Title': 'AUROC', 'Subject': f
"{nfoDict['sub']} Data", 'Author': 'HU Xuesong'})
135 if __name__
== "__main__":
136 if len(sys
.argv
) > 1:
138 if thisID
not in SamplesDict
:
139 print(f
"[x]sid can only be {SamplesDict.keys()}", file=sys
.stderr
)
143 print(sys
.argv
, file=sys
.stderr
)
144 print(f
"[i]{thisID}")
149 ./fig1.py human; ./fig2.py human ; ./fig1.py mbrain; ./fig2.py mbrain ; ./fig1.py mkidney; ./fig2.py mkidney
150 time (./fig1.py human; ./fig1.py mbrain ; ./fig1.py mkidney ) | tee plot.log
151 time (./fig2.py human; ./fig2.py mbrain ; ./fig2.py mkidney )