modified: fig1.py
[GalaxyCodeBases.git] / python / salus / cmplatform / fig2.py
blob30de5c106e283981adc5594a5e50e93ddb1dbaa5
1 #!/usr/bin/env python3
3 from fig1 import *
4 #print(SamplesDict)
6 def main(thisID) -> None:
7 scDat = {}
8 nfoDict = SamplesDict[thisID]
9 print("[i]Start.", file=sys.stderr)
10 for platform in PlatformTuple:
11 nfoDict['platformK'] = platform
12 nfoDict['platformV'] = nfoDict['platforms'][platform]
13 nfoDict['suffixOutV'] = nfoDict['suffixOut'][platform]
14 h5Path = f"{nfoDict['sid']}_{platform}.h5ad"
15 print(f"[i]Reading {h5Path}", file=sys.stderr)
16 adata = ad.read_h5ad(h5Path)
17 #adata.layers["raw"] = adata.X.copy()
18 #adata.layers["prnorm"] = adata.X.copy()
19 #sc.experimental.pp.normalize_pearson_residuals(adata,layer='prnorm')
20 #sc.pp.normalize_total(adata,target_sum=1e6,key_added='CPMnormFactor')
21 #adata.layers["norm"] = adata.X.copy()
22 scDat[platform] = adata
23 print(f"[i]Read {thisID}.{platform}: {adata.raw.shape} -> {adata.shape}", file=sys.stderr)
24 #print(scDat)
25 rawList=[scDat[v].raw.to_adata() for v in PlatformTuple]
26 adata=ad.concat(rawList, label='Platform', keys=PlatformTuple, index_unique='-')
27 adata.var['mt'] = adata.var_names.str.startswith('MT-') | adata.var_names.str.startswith('mt-')
28 sc.pp.calculate_qc_metrics(adata, qc_vars=['mt'], percent_top=None, log1p=True, inplace=True)
29 adata.obs['sqrt_inv_total_counts'] = 1 / np.sqrt(adata.obs['total_counts'])
30 adata.raw = adata.copy()
31 sc.pp.filter_cells(adata, min_counts=2000) # sqrt_inv_total_counts < 0.02236 按照样品均值的标准差考虑。
32 sc.pp.filter_genes(adata, min_cells=1) # adata.var[adata.var['n_cells']<2].sort_values(by='sqrt_inv_total_counts') 有800个,就不过滤了。
33 print(f"[i]Filtered: {adata.raw.shape} -> {adata.shape}", file=sys.stderr)
35 #adata.layers["raw"] = adata.X.copy()
36 adata.layers["prnorm"] = adata.X.copy()
37 sc.experimental.pp.normalize_pearson_residuals(adata,layer='prnorm')
38 sc.pp.normalize_total(adata,target_sum=1e6, key_added='CPMnormFactor')
39 adata.layers["norm"] = adata.X.copy()
40 sc.pp.log1p(adata)
42 sc.pp.pca(adata)
43 sc.pp.neighbors(adata)
44 sc.tl.umap(adata,random_state=369)
45 sc.tl.draw_graph(adata,random_state=369)
46 sc.tl.tsne(adata,random_state=369)
47 sc.tl.leiden(adata,random_state=0)
48 '''
49 fig1, ax1 = plt.subplots()
50 ax1.plot(x, y)
51 ax1.set_title("Axis 1 title")
52 ax1.set_xlabel("X-label for axis 1")
53 '''
54 print("[i]Begin fig E. 2Ca", file=sys.stderr)
55 plt.figure(1)
56 ax=sc.pl.pca(adata, color='Platform', show=False, title=f"PCA - {nfoDict['sub']}", annotate_var_explained=True)
57 plt.savefig(f"2C_PCA_{nfoDict['sid']}.pdf", bbox_extra_artists=(ax.get_legend(),), metadata={'Title': 'PCA', 'Subject': f"{nfoDict['sub']} Data", 'Author': 'HU Xuesong'})
58 plt.figure(2)
59 ax=sc.pl.umap(adata,color='Platform', show=False, title=f"UMAP - {nfoDict['sub']}")
60 plt.savefig(f"2C_UMAP_{nfoDict['sid']}.pdf", bbox_extra_artists=(ax.get_legend(),), metadata={'Title': 'UMAP', 'Subject': f"{nfoDict['sub']} Data", 'Author': 'HU Xuesong'})
61 plt.figure(3)
62 ax=sc.pl.tsne(adata, color='Platform', show=False, title=f"t-SNE - {nfoDict['sub']}")
63 plt.savefig(f"2C_tSNE_{nfoDict['sid']}.pdf", bbox_extra_artists=(ax.get_legend(),), metadata={'Title': 't-SNE', 'Subject': f"{nfoDict['sub']} Data", 'Author': 'HU Xuesong'})
64 plt.figure(4)
65 ax=sc.pl.draw_graph(adata, color='Platform', show=False, title=f"ForceAtlas2 - {nfoDict['sub']}")
66 plt.savefig(f"2C_ForceAtlas2_{nfoDict['sid']}.pdf", bbox_extra_artists=(ax.get_legend(),), metadata={'Title': 'ForceAtlas2', 'Subject': f"{nfoDict['sub']} Data", 'Author': 'HU Xuesong'})
68 fig, ax = plt.subplots()
69 fig.patch.set(alpha=0)
70 ax.patch.set(alpha=0)
71 palette = ['#CC0000','#00CC00','#CCCC00']
72 sc.pl.pca(adata, color='Platform', show=False, title=f"PCA - {nfoDict['sub']}", ax=ax, palette=palette, annotate_var_explained=True)
73 arts=ax.findobj(matplotlib.collections.PathCollection)
74 for art in arts:
75 mplcairo.operator_t.ADD.patch_artist(art)
76 newlabels = adata.obs['Platform'].unique().tolist() + ['Both']
77 legend_elements = [plt.scatter([],[],s=0, marker='o', label=label, color=color) for label, color in zip(newlabels, palette)]
78 ax.legend(handles=legend_elements, loc = 'right margin')
79 plt.savefig(f"2C_mPCA_{nfoDict['sid']}.pdf", bbox_extra_artists=(ax.get_legend(),), metadata={'Title': 'PCA', 'Subject': f"{nfoDict['sub']} Data", 'Author': 'HU Xuesong'})
81 fig, ax = plt.subplots()
82 fig.patch.set(alpha=0)
83 ax.patch.set(alpha=0)
84 palette = ['#CC0000','#00CC00','#CCCC00']
85 sc.pl.tsne(adata, color='Platform', show=False, title=f"PCA - {nfoDict['sub']}", ax=ax, palette=palette)
86 arts=ax.findobj(matplotlib.collections.PathCollection)
87 for art in arts:
88 mplcairo.operator_t.ADD.patch_artist(art)
89 newlabels = adata.obs['Platform'].unique().tolist() + ['Both']
90 legend_elements = [plt.scatter([],[],s=0, marker='o', label=label, color=color) for label, color in zip(newlabels, palette)]
91 ax.legend(handles=legend_elements, loc = 'right margin')
92 plt.savefig(f"2C_mtSNE_{nfoDict['sid']}.pdf", bbox_extra_artists=(ax.get_legend(),), metadata={'Title': 't-SNE', 'Subject': f"{nfoDict['sub']} Data", 'Author': 'HU Xuesong'})
95 adata = None
96 print("[i]Begin fig E. 2Cb", file=sys.stderr)
97 for platform in PlatformTuple:
98 adata = scDat[platform]
100 if __name__ == "__main__":
101 if len(sys.argv) > 1:
102 thisID = sys.argv[1]
103 if thisID not in SamplesDict:
104 print(f"[x]sid can only be {SamplesDict.keys()}", file=sys.stderr)
105 exit(1)
106 else:
107 thisID = 'mbrain'
108 print(sys.argv, file=sys.stderr)
109 print(f"[i]{thisID}")
110 sys.stdout.flush()
111 main(thisID)
114 ./fig1.py human; ./fig2.py human ; ./fig1.py mbrain; ./fig2.py mbrain ; ./fig1.py mkidney; ./fig2.py mkidney
115 time (./fig1.py human; ./fig1.py mbrain ; ./fig1.py mkidney ) | tee plot.log
116 time (./fig2.py human; ./fig2.py mbrain ; ./fig2.py mkidney )