10 from speedict
import Rdict
13 pp
= pprint
.PrettyPrinter(indent
=4)
17 def eprint(*args
, **kwargs
) -> None:
18 print(*args
, **kwargs
, file=sys
.stderr
, flush
=True)
20 def init_argparse() -> argparse
.ArgumentParser
:
21 parser
= argparse
.ArgumentParser(
22 description
='merge scSeq data with spBarcode coordinates that gridded by given binning size ',
23 epilog
='Contact: <huxs@salus-bio.com>')
24 parser
.add_argument('-b', '--bin', type=int, required
= True, help='grid binning pixels')
25 parser
.add_argument('-i', '--spatial', type=pathlib
.Path
, default
='spatial.txt', metavar
='txt', help='For spatial.txt[.gz]')
26 group
= parser
.add_mutually_exclusive_group(required
=True)
27 group
.add_argument('-r', '--scseq-path', type=pathlib
.Path
, dest
='scSeqPath')
28 group
.add_argument('-f', '--scseq-files', type=pathlib
.Path
, nargs
=3, action
='extend', metavar
='<f>', dest
='scSeqFiles', help='matrix.mtx[.gz] barcodes.tsv[.gz] features.tsv[.gz]')
30 parser
.add_argument('-s', '--split-zones', dest
='zones', type=int, choices
=[0,4,5], default
=0, help='split to 4 or 5 zones, default 0=off')
31 #parser.add_argument("files", nargs="*")
32 parser
.add_argument('-o', '--output-path', type=pathlib
.Path
, default
='./gridded/', dest
='outpath')
33 parser
.add_argument('-z', '--gzip', action
=argparse
.BooleanOptionalAction
, default
=True, help='Output gzipped files, default on', dest
='gzip')
34 parser
.add_argument('-n', '--dryrun', '--dry-run', action
='store_true', dest
='dryrun')
36 "-v", "--version", action
="version",
37 version
=f
"{parser.prog} version 1.0.0"
41 def checkFile(PathList
, suffixStrs
):
42 for onePath
in PathList
:
43 for oneExt
in suffixStrs
:
44 thisPath
= pathlib
.Path(''.join((onePath
.as_posix(),oneExt
)))
51 parser
= init_argparse()
52 if len(sys
.argv
) == 1:
55 args
= parser
.parse_args()
57 eprint('[!]GridBin=[',args
.bin
,'], SplitZone:[',args
.zones
,']. OutPath:[',args
.outpath
,']',sep
='');
58 scFileNameTuple
= ('matrix.mtx', 'barcodes.tsv', 'features.tsv', 'genes.tsv')
59 spFileNameList
= ['spatial.txt']; spFileNameList
.extend(scFileNameTuple
[0:3])
60 #pp.pprint(spFileNameList)
61 if args
.scSeqPath
== None:
62 #args.scSeqFiles.append( args.scSeqFiles[2].with_stem('genes') )
63 scSeqFiles
= tuple( args
.scSeqFiles
)
65 scSeqFiles
= tuple( args
.scSeqPath
.joinpath(x
) for x
in scFileNameTuple
)
66 FileDotExts
= ('', '.gz')
67 #pp.pprint(scSeqFiles)
68 spNameTuple
= ('spatial', 'matrix', 'barcodes', 'features')
69 spStandardNameDict
= dict(zip(spNameTuple
,[ '.'.join((fn
,'gz')) if args
.gzip
else fn
for fn
in spFileNameList
]))
70 #pp.pprint(spStandardNameDict)
72 InFileDict
['spatial'] = checkFile([args
.spatial
], FileDotExts
)
73 InFileDict
['matrix'] = checkFile([scSeqFiles
[0]], FileDotExts
)
74 InFileDict
['barcodes'] = checkFile([scSeqFiles
[1]], FileDotExts
)
75 InFileDict
['features'] = checkFile(scSeqFiles
[2:], FileDotExts
)
77 eprint('[!]Confirmed Input Files:[',', '.join([ str(x
) if x
else '<Missing>' for x
in InFileDict
.values() ]),'].',sep
='')
78 for fname
in spNameTuple
:
79 if InFileDict
[fname
]==None:
80 eprint('[x]The',fname
,'file is missing !\n')
83 for fname
in spNameTuple
:
84 OutFileDict
[fname
] = args
.outpath
.joinpath(spStandardNameDict
[fname
])
85 #pp.pprint(OutFileDict)
86 args
.outpath
.mkdir(parents
=True, exist_ok
=True)
87 eprint('[!]Output Files:[',', '.join([ x
.as_posix() for x
in OutFileDict
.values()]),'].',sep
='')
88 if args
.dryrun
: exit(0);
90 #outMtx = ''.join((outPrefix,'.mtx'))
91 #matrixData = gb.io.mmread(matrixFile)
93 if __name__
== "__main__":
94 gb
.init("suitesparse", blocking
=True)
95 main() # time ./splanegrid.py ...