modified: fig2.py
[GalaxyCodeBases.git] / python / salus / cmplatform / fig2.py
blob5c132714cef248d829a2f608a39e99ad0cce5d06
1 #!/usr/bin/env python3
3 from fig1 import *
4 #print(SamplesDict)
6 def main(thisID) -> None:
7 scDat = {}
8 nfoDict = SamplesDict[thisID]
9 print("[i]Start.", file=sys.stderr)
10 for platform in PlatformTuple:
11 nfoDict['platformK'] = platform
12 nfoDict['platformV'] = nfoDict['platforms'][platform]
13 nfoDict['suffixOutV'] = nfoDict['suffixOut'][platform]
14 h5Path = f"{nfoDict['sid']}_{platform}.h5ad"
15 print(f"[i]Reading {h5Path}", file=sys.stderr)
16 adata = ad.read_h5ad(h5Path)
17 #adata.layers["raw"] = adata.X.copy()
18 #adata.layers["prnorm"] = adata.X.copy()
19 #sc.experimental.pp.normalize_pearson_residuals(adata,layer='prnorm')
20 #sc.pp.normalize_total(adata,target_sum=1e6,key_added='CPMnormFactor')
21 #adata.layers["norm"] = adata.X.copy()
22 scDat[platform] = adata
23 print(f"[i]Read {thisID}.{platform}: {adata.raw.shape} -> {adata.shape}", file=sys.stderr)
24 #print(scDat)
25 rawList=[scDat[v].raw.to_adata() for v in PlatformTuple]
26 adata=ad.concat(rawList, label='Platform', keys=PlatformTuple, index_unique='-')
27 adata.var['mt'] = adata.var_names.str.startswith('MT-') | adata.var_names.str.startswith('mt-')
28 sc.pp.calculate_qc_metrics(adata, qc_vars=['mt'], percent_top=None, log1p=True, inplace=True)
29 adata.obs['sqrt_inv_total_counts'] = 1 / np.sqrt(adata.obs['total_counts'])
30 p995 = np.percentile(adata.obs['sqrt_inv_total_counts'].values, 99.5)
31 c995 = 1/(p995*p995)
32 print(f"[i] {nfoDict['sub']} sqrt_inv_total_counts: {p995} ({round(c995,3)})", file=sys.stderr)
33 plt.figure(1)
34 ax=sc.pl.violin(adata,['sqrt_inv_total_counts'],jitter=0.4, stripplot=True,show=False)
35 ax.set_title(f"sqrt_inv_total_counts Violin - {nfoDict['sub']} @ {round(p995,9)}({round(c995,3)})")
36 ax.axhline(y=p995, color='red', linestyle='dotted', label=f'p995={p995}')
37 plt.savefig(f"2C_umiEstd_{nfoDict['sid']}_{round(c995)}.pdf", metadata={'Title': 'sqrt_inv_total_counts Violin', 'Subject': f"{nfoDict['sub']} Data @ {round(c995)}", 'Author': 'HU Xuesong'})
38 adata.raw = adata.copy()
39 sc.pp.filter_cells(adata, min_counts=round(c995)) # sqrt_inv_total_counts < p995 按照样品均值的标准差考虑。写作round(c995)。
40 sc.pp.filter_genes(adata, min_cells=1) # adata.var[adata.var['n_cells']<2].sort_values(by='sqrt_inv_total_counts') 有800个,就不过滤了。
41 print(f"[i]Filtered: {adata.raw.shape} -> {adata.shape}", file=sys.stderr)
43 #adata.layers["prnorm"] = adata.X.copy()
44 #sc.experimental.pp.normalize_pearson_residuals(adata,layer='prnorm')
45 sc.pp.normalize_total(adata,target_sum=1e6, key_added='CPMnormFactor')
46 adata.layers["norm"] = adata.X.copy()
47 sc.pp.log1p(adata)
49 sc.pp.pca(adata)
50 sc.pp.neighbors(adata)
51 sc.tl.umap(adata,random_state=369)
52 sc.tl.draw_graph(adata,random_state=369)
53 sc.tl.tsne(adata,random_state=369)
54 sc.tl.leiden(adata,random_state=0)
55 '''
56 fig1, ax1 = plt.subplots()
57 ax1.plot(x, y)
58 ax1.set_title("Axis 1 title")
59 ax1.set_xlabel("X-label for axis 1")
60 '''
61 print("[i]Begin fig E. 2Ca", file=sys.stderr)
62 plt.figure(2)
63 ax=sc.pl.pca(adata, color='Platform', show=False, title=f"PCA - {nfoDict['sub']}", annotate_var_explained=True)
64 plt.savefig(f"2C_PCA_{nfoDict['sid']}.pdf", bbox_extra_artists=(ax.get_legend(),), metadata={'Title': 'PCA', 'Subject': f"{nfoDict['sub']} Data", 'Author': 'HU Xuesong'})
65 plt.figure(3)
66 ax=sc.pl.umap(adata,color='Platform', show=False, title=f"UMAP - {nfoDict['sub']}")
67 plt.savefig(f"2C_UMAP_{nfoDict['sid']}.pdf", bbox_extra_artists=(ax.get_legend(),), metadata={'Title': 'UMAP', 'Subject': f"{nfoDict['sub']} Data", 'Author': 'HU Xuesong'})
68 plt.figure(4)
69 ax=sc.pl.tsne(adata, color='Platform', show=False, title=f"t-SNE - {nfoDict['sub']}")
70 plt.savefig(f"2C_tSNE_{nfoDict['sid']}.pdf", bbox_extra_artists=(ax.get_legend(),), metadata={'Title': 't-SNE', 'Subject': f"{nfoDict['sub']} Data", 'Author': 'HU Xuesong'})
71 plt.figure(5)
72 ax=sc.pl.draw_graph(adata, color='Platform', show=False, title=f"ForceAtlas2 - {nfoDict['sub']}")
73 plt.savefig(f"2C_ForceAtlas2_{nfoDict['sid']}.pdf", bbox_extra_artists=(ax.get_legend(),), metadata={'Title': 'ForceAtlas2', 'Subject': f"{nfoDict['sub']} Data", 'Author': 'HU Xuesong'})
75 fig, ax = plt.subplots()
76 fig.patch.set(alpha=0)
77 ax.patch.set(alpha=0)
78 palette = ['#CC0000','#00CC00','#CCCC00']
79 sc.pl.pca(adata, color='Platform', show=False, title=f"PCA - {nfoDict['sub']}", ax=ax, palette=palette, annotate_var_explained=True)
80 arts=ax.findobj(matplotlib.collections.PathCollection)
81 for art in arts:
82 mplcairo.operator_t.ADD.patch_artist(art)
83 newlabels = adata.obs['Platform'].unique().tolist() + ['Both']
84 legend_elements = [plt.scatter([],[],linewidths=0, marker='o', label=label, color=color) for label, color in zip(newlabels, palette)]
85 ax.legend(handles=legend_elements,bbox_to_anchor=(1.05, 0.5), loc='center left', borderaxespad=0.2)
86 plt.savefig(f"2C_mPCA_{nfoDict['sid']}.pdf", bbox_extra_artists=(ax.get_legend(),), metadata={'Title': 'PCA', 'Subject': f"{nfoDict['sub']} Data", 'Author': 'HU Xuesong'})
88 fig, ax = plt.subplots()
89 fig.patch.set(alpha=0)
90 ax.patch.set(alpha=0)
91 palette = ['#CC0000','#00CC00','#CCCC00']
92 sc.pl.tsne(adata, color='Platform', show=False, title=f"PCA - {nfoDict['sub']}", ax=ax, palette=palette)
93 arts=ax.findobj(matplotlib.collections.PathCollection)
94 for art in arts:
95 mplcairo.operator_t.ADD.patch_artist(art)
96 newlabels = adata.obs['Platform'].unique().tolist() + ['Both']
97 legend_elements = [plt.scatter([],[],linewidths=0, marker='o', label=label, color=color) for label, color in zip(newlabels, palette)]
98 ax.legend(handles=legend_elements,bbox_to_anchor=(1.05, 0.5), loc='center left', borderaxespad=0.2)
99 plt.savefig(f"2C_mtSNE_{nfoDict['sid']}.pdf", bbox_extra_artists=(ax.get_legend(),), metadata={'Title': 't-SNE', 'Subject': f"{nfoDict['sub']} Data", 'Author': 'HU Xuesong'})
100 print("[i]Begin fig E. 2Cb", file=sys.stderr)
101 fig, axes = plt.subplots(1, 2, figsize=(12, 6))
102 plt.subplots_adjust(wspace=0.1)
103 sc.pl.umap(adata[adata.obs['Platform']=='Illumina'], color='leiden', ax=axes[0], title=f'UMAP - Illumina')
104 sc.pl.umap(adata[adata.obs['Platform']=='Salus'], color='leiden', ax=axes[1], title=f'UMAP - Salus')
105 axes[0].legend().set_visible(False)
106 fig.suptitle(f"Clusters by Leiden - {nfoDict['sub']}")
107 fig.savefig(f"2C_leidenUMAP_{nfoDict['sid']}.pdf", metadata={'Title': 'Cluster UMAP', 'Subject': f"{nfoDict['sub']} Data", 'Author': 'HU Xuesong'})
108 plt.figure(figsize=(6,4))
109 plt.title(f"Cluster Size Histogram - {nfoDict['sub']}")
110 axB = sns.histplot(adata.obs,x='leiden',hue='Platform',multiple="dodge",shrink=.66)
111 axB.set_xlabel('leiden Cluster NO.')
112 axB.set_ylabel('Cluster Size')
113 plt.savefig(f"2C_leidenHist_{nfoDict['sid']}.pdf", metadata={'Title': 'Cluster Size Histogram', 'Subject': f"{nfoDict['sub']} Data", 'Author': 'HU Xuesong'})
114 print("[i]Begin fig E. 2D", file=sys.stderr)
115 import pymn
116 adata.obs['cell.cluster'] = adata.obs['leiden'].astype(str)
117 adata.obs['study_id'] = adata.obs['Platform'].astype(str)
118 pymn.variableGenes(adata, study_col='Platform')
119 pymn.MetaNeighborUS(adata, study_col='study_id', ct_col='cell.cluster', fast_version=True)
120 plt.figure(figsize=(6,4))
121 plt.title(f"MetaNeighborUS - {nfoDict['sub']}")
122 pymn.plotMetaNeighborUS(adata, figsize=(10, 10), cmap='coolwarm')
123 plt.savefig(f"2D_MetaNeighborUS_{nfoDict['sid']}.pdf", metadata={'Title': 'MetaNeighborUS', 'Subject': f"{nfoDict['sub']} Data", 'Author': 'HU Xuesong'})
124 pymn.topHits(adata, threshold=0)
125 mndf = adata.uns['MetaNeighborUS_topHits']
126 mndf['ClusterID'] = mndf['Study_ID|Celltype_1'].str.split('|').str[1].astype(int)
127 pndf=mndf[mndf['Match_type']=='Reciprocal_top_hit']
128 plt.figure(figsize=(6,4))
129 plt.title(f"Mean_AUROC Between Platforms - {nfoDict['sub']}")
130 axC = sns.barplot(pndf,x='ClusterID',y='Mean_AUROC')
131 axC.set_xlabel('leiden Cluster NO.')
132 plt.savefig(f"2D_AUROC_{nfoDict['sid']}.pdf", metadata={'Title': 'AUROC', 'Subject': f"{nfoDict['sub']} Data", 'Author': 'HU Xuesong'})
133 #adata = None
135 if __name__ == "__main__":
136 if len(sys.argv) > 1:
137 thisID = sys.argv[1]
138 if thisID not in SamplesDict:
139 print(f"[x]sid can only be {SamplesDict.keys()}", file=sys.stderr)
140 exit(1)
141 else:
142 thisID = 'mbrain'
143 print(sys.argv, file=sys.stderr)
144 print(f"[i]{thisID}")
145 sys.stdout.flush()
146 main(thisID)
149 ./fig1.py human; ./fig2.py human ; ./fig1.py mbrain; ./fig2.py mbrain ; ./fig1.py mkidney; ./fig2.py mkidney
150 time (./fig1.py human; ./fig1.py mbrain ; ./fig1.py mkidney ) | tee plot.log
151 time (./fig2.py human; ./fig2.py mbrain ; ./fig2.py mkidney )