modified: fig1.py
[GalaxyCodeBases.git] / python / salus / cmplatform / fig1.py
blobd171236d8225b219ae827918facc4b985decb076
1 #!/usr/bin/env python3
3 import sys
4 import os
5 from typing import NamedTuple
7 PlatformTuple = ('Illumina', 'Salus')
8 SamplesDict = {
9 'mbrain': {
10 'sid' : 'mbrain',
11 'sub' : 'Mouse Brain Sptial',
12 'type': 'visium',
13 'prefix' : '/share/result/spatial/data/BoAo_sp',
14 'suffixOut': dict.fromkeys(PlatformTuple,"outs"),
15 'suffixMtx': 'filtered_feature_bc_matrix',
16 'platforms': {PlatformTuple[0]:'illumina', PlatformTuple[1]: 'salus'},
17 'pattern': ('prefix', 'platformV', 'sid', 'suffixOutV', 'suffixMtx')
19 'mkidney': {
20 'sid' : 'mkidney',
21 'sub' : 'Mouse Kindey Sptial',
22 'type': 'visium',
23 'prefix' : '/share/result/spatial/data/BoAo_sp',
24 'suffixOut': dict.fromkeys(PlatformTuple,"outs"),
25 'suffixMtx': 'filtered_feature_bc_matrix',
26 'platforms': {PlatformTuple[0]:'illumina', PlatformTuple[1]: 'salus'},
27 'pattern': ('prefix', 'platformV', 'sid', 'suffixOutV', 'suffixMtx')
29 'human': {
30 'sid' : 'human',
31 'sub' : 'Human Single Cell',
32 'type': 'mobivision',
33 'prefix' : '/share/result/spatial/data/MoZhuo_sc/FX20230913',
34 'suffixOut': {PlatformTuple[0]: 'out/R22045213-220914-LYY-S11-R03-220914-LYY-S11-R03_combined_outs',
35 PlatformTuple[1]: 'out_subset/20221124-LYY-S09-R03_AGGCAGAA_fastq_outs'},
36 'suffixMtx': 'filtered_cell_gene_matrix',
37 'platforms': {PlatformTuple[0]:'illumina', PlatformTuple[1]: 'sailu'},
38 'pattern': ('prefix', 'platformV', 'suffixOutV', 'suffixMtx')
42 def checkModules() -> None:
43 import importlib.metadata
44 from packaging import version
45 pkgname = "squidpy"
46 min_ver = "1.2.3"
47 got_ver = importlib.metadata.version(pkgname)
48 if version.parse(got_ver) < version.parse(min_ver):
49 raise importlib.VersionConflict(f"{pkgname}>={min_ver} is needed, but found {pkgname}=={got_ver}")
51 def main() -> None:
52 import matplotlib; matplotlib.use("module://mplcairo.base")
53 from matplotlib import pyplot as plt
54 import mplcairo
56 import numpy as np
57 import pandas as pd
58 import anndata as ad
59 import scanpy as sc
60 import squidpy as sq
61 import seaborn as sns
63 class scDatItem(NamedTuple):
64 name: str
65 bgRaw: tuple[int,int]
66 bgFlt: tuple[int,int]
67 annDat: ad.AnnData
69 def __repr__(self) -> str:
70 return f'<scDatItem:{self.name}, Raw_BC*Gene={self.bgRaw[0]}x{self.bgRaw[1]}, NonZero_BC*Gene={self.bgFlt[0]}x{self.bgFlt[1]}, ann={self.annDat.n_obs}x{self.annDat.n_vars}>'
72 scDat = []
73 nfoDict = SamplesDict[thisID]
74 for platform in PlatformTuple:
75 nfoDict['platformV'] = nfoDict['platforms'][platform]
76 nfoDict['suffixOutV'] = nfoDict['suffixOut'][platform]
77 mtxPath = os.path.join( *[nfoDict[v] for v in nfoDict['pattern']] )
78 #print(mtxPath)
79 adata=sc.read_10x_mtx(mtxPath, var_names='gene_symbols', make_unique=True, gex_only=True)
80 adata.var_names_make_unique() # this is necessary if using `var_names='gene_symbols'` in `sc.read_10x_mtx`
81 nnRaw = (adata.n_obs,adata.n_vars)
82 adata.var['mt'] = adata.var_names.str.startswith('MT-') | adata.var_names.str.startswith('mt-')
83 sc.pp.calculate_qc_metrics(adata, qc_vars=['mt'], percent_top=None, log1p=True, inplace=True)
84 sc.pp.filter_cells(adata, min_genes=1)
85 sc.pp.filter_genes(adata, min_cells=1)
86 nnFlt = (adata.n_obs,adata.n_vars)
87 scDat.append(scDatItem(platform,nnRaw,nnFlt,adata))
89 print("\n".join(map(str,scDat)))
91 with pd.option_context("mode.copy_on_write", True):
92 obsmbi = scDat[0].annDat.obs[['n_genes_by_counts', 'total_counts']].copy(deep=False)
93 obsmbs = scDat[1].annDat.obs[['n_genes_by_counts', 'total_counts']].copy(deep=False)
94 p1df = pd.concat([obsmbi.assign(Platform=scDat[0].name), obsmbs.assign(Platform=scDat[1].name)], ignore_index=True).replace([np.inf, -np.inf, 0], np.nan).dropna()
95 p2df = obsmbi.join(obsmbs,lsuffix='_'+scDat[0].name,rsuffix='_'+scDat[1].name,how='inner').replace([np.inf, -np.inf, 0], np.nan).dropna()
97 custom_params = {"axes.spines.right": False, "axes.spines.top": False}
98 sns.set_theme(style="ticks", rc=custom_params, font="STIX Two Text")
99 figA=sns.JointGrid(data=p1df, x="total_counts", y="n_genes_by_counts", hue='Platform', dropna=True)
100 #figA.plot(sns.scatterplot, sns.histplot, alpha=.7, edgecolor=".2", linewidth=.5)
101 figA.plot_joint(sns.scatterplot, s=12.7, alpha=.6)
102 figA.plot_marginals(sns.histplot, kde=True, alpha=.618)
103 figA.figure.suptitle(f"Gene to UMI plot - {nfoDict['sub']}")
104 figA.set_axis_labels(xlabel='UMIs per Barcode', ylabel='Genes per Barcode')
105 figA.savefig(f"1D_{nfoDict['sid']}.pdf", transparent=True, dpi=300, metadata={'Title': 'Gene to UMI plot', 'Subject': f"{nfoDict['sub']} Data", 'Author': 'HU Xuesong'})
107 figB=sns.JointGrid(data=p2df, x="total_counts_Illumina", y="total_counts_Salus", dropna=True)
108 figB.plot_joint(sns.scatterplot, s=12.7, alpha=.6)
109 figB.plot_marginals(sns.histplot, kde=True, alpha=.618)
110 figB.figure.suptitle(f"UMI per Barcode Counts Comparing - {nfoDict['sub']}")
111 figB.set_axis_labels(xlabel='UMI Counts from Illumina', ylabel='UMI Counts from Salus')
112 figB.savefig(f"1E_{nfoDict['sid']}.pdf", transparent=True, dpi=300, metadata={'Title': 'UMI per Barcode Counts Comparing', 'Subject': f"{nfoDict['sub']} Data", 'Author': 'HU Xuesong'})
114 if __name__ == "__main__":
115 if len(sys.argv) > 1:
116 thisID = sys.argv[1]
117 if thisID not in SamplesDict:
118 print(f"[x]sid can only be {SamplesDict.keys()}", file=sys.stderr)
119 exit(1)
120 else:
121 thisID = 'mbrain'
122 print(sys.argv, file=sys.stderr)
123 print(f"[i]{thisID}")
124 sys.stdout.flush()
125 checkModules()
126 main()
129 x1 = np.random.randn(1000)
130 y1 = np.random.randn(1000)
131 x2 = np.random.randn(1000) * 5
132 y2 = np.random.randn(1000)
133 fig, ax = plt.subplots()
134 # The figure and axes background must be made transparent.
135 fig.patch.set(alpha=0)
136 ax.patch.set(alpha=0)
137 pc1 = ax.scatter(x1, y1, c='b', edgecolors='none')
138 pc2 = ax.scatter(x2, y2, c='r', edgecolors='none')
139 mplcairo.operator_t.ADD.patch_artist(pc2) # Use additive blending.
140 plt.show()