modified: splanegrid.py
[GalaxyCodeBases.git] / python / salus / pygrid / splanegrid.py
blob5e3cdb1828c47ed5dfd33f532cd1bbe376d95722
1 #!/usr/bin/env python3
3 import sys
4 import os
5 import argparse
6 import pathlib
7 import gzip
8 import graphblas as gb
9 import dinopy
10 from speedict import Rdict
12 import pprint
13 pp = pprint.PrettyPrinter(indent=4)
14 # import gc
15 # gc.collect()
17 def eprint(*args, **kwargs) -> None:
18 print(*args, **kwargs, file=sys.stderr, flush=True)
20 def init_argparse() -> argparse.ArgumentParser:
21 parser = argparse.ArgumentParser(
22 description='merge scSeq data with spBarcode coordinates that gridded by given binning size ',
23 epilog='Contact: <huxs@salus-bio.com>')
24 parser.add_argument('-b', '--bin', type=int, required = True, help='grid binning pixels')
25 parser.add_argument('-i', '--spatial', type=pathlib.Path, default='spatial.txt', metavar='txt', help='For spatial.txt[.gz]')
26 group = parser.add_mutually_exclusive_group(required=True)
27 group.add_argument('-r', '--scseq-path', type=pathlib.Path, dest='scSeqPath')
28 group.add_argument('-f', '--scseq-files', type=pathlib.Path, nargs=3, action='extend', metavar='<f>', dest='scSeqFiles', help='matrix.mtx[.gz] barcodes.tsv[.gz] features.tsv[.gz]')
30 parser.add_argument('-s', '--split-zones', dest='zones', type=int, choices=[0,4,5], default=0, help='split to 4 or 5 zones, default 0=off')
31 #parser.add_argument("files", nargs="*")
32 parser.add_argument('-o', '--output-path', type=pathlib.Path, default='./gridded/', dest='outpath')
33 parser.add_argument('-z', '--gzip', action=argparse.BooleanOptionalAction, default=True, help='Output gzipped files, default on', dest='gzip')
34 parser.add_argument('-n', '--dryrun', '--dry-run', action='store_true', dest='dryrun')
35 parser.add_argument(
36 "-v", "--version", action="version",
37 version=f"{parser.prog} version 1.0.0"
39 return parser
41 def checkFile(PathList, suffixStrs):
42 for onePath in PathList:
43 for oneExt in suffixStrs:
44 thisPath = pathlib.Path(''.join((onePath.as_posix(),oneExt)))
45 #print(thisPath)
46 if thisPath.exists():
47 return thisPath;
48 return None;
50 def main() -> None:
51 parser = init_argparse()
52 if len(sys.argv) == 1:
53 parser.print_help()
54 exit(0);
55 args = parser.parse_args()
56 #pp.pprint(args)
57 eprint('[!]GridBin=[',args.bin,'], SplitZone:[',args.zones,']. OutPath:[',args.outpath,']',sep='');
58 scFileNameTuple = ('matrix.mtx', 'barcodes.tsv', 'features.tsv', 'genes.tsv')
59 spFileNameList = ['spatial.txt']; spFileNameList.extend(scFileNameTuple[0:3])
60 #pp.pprint(spFileNameList)
61 if args.scSeqPath == None:
62 #args.scSeqFiles.append( args.scSeqFiles[2].with_stem('genes') )
63 scSeqFiles = tuple( args.scSeqFiles )
64 else:
65 scSeqFiles = tuple( args.scSeqPath.joinpath(x) for x in scFileNameTuple )
66 FileDotExts = ('', '.gz')
67 #pp.pprint(scSeqFiles)
68 spNameTuple = ('spatial', 'matrix', 'barcodes', 'features')
69 spStandardNameDict = dict(zip(spNameTuple,[ '.'.join((fn,'gz')) if args.gzip else fn for fn in spFileNameList ]))
70 #pp.pprint(spStandardNameDict)
71 InFileDict={}
72 InFileDict['spatial'] = checkFile([args.spatial], FileDotExts)
73 InFileDict['matrix'] = checkFile([scSeqFiles[0]], FileDotExts)
74 InFileDict['barcodes'] = checkFile([scSeqFiles[1]], FileDotExts)
75 InFileDict['features'] = checkFile(scSeqFiles[2:], FileDotExts)
76 #pp.pprint(inFiles)
77 eprint('[!]Confirmed Input Files:[',', '.join([ str(x) if x else '<Missing>' for x in InFileDict.values() ]),'].',sep='')
78 for fname in spNameTuple:
79 if InFileDict[fname]==None:
80 eprint('[x]The',fname,'file is missing !\n')
81 exit(1)
82 OutFileDict={}
83 for fname in spNameTuple:
84 OutFileDict[fname] = args.outpath.joinpath(spStandardNameDict[fname])
85 #pp.pprint(OutFileDict)
86 args.outpath.mkdir(parents=True, exist_ok=True)
87 eprint('[!]Output Files:[',', '.join([ x.as_posix() for x in OutFileDict.values()]),'].',sep='')
88 if args.dryrun: exit(0);
89 exit(0);
90 #outMtx = ''.join((outPrefix,'.mtx'))
91 #matrixData = gb.io.mmread(matrixFile)
93 if __name__ == "__main__":
94 gb.init("suitesparse", blocking=True)
95 main() # time ./splanegrid.py ...