12 pp
= pprint
.PrettyPrinter(indent
=4)
14 def eprint(*args
, **kwargs
) -> None:
15 print(*args
, **kwargs
, file=sys
.stderr
, flush
=True)
17 def init_argparse() -> argparse
.ArgumentParser
:
18 parser
= argparse
.ArgumentParser(
19 description
='intersection of FastQ Read 2 with spatial barcodes, and dump matching Read 1 with spatial coordinates',
20 epilog
='Contact: <huxs@salus-bio.com>')
21 #parser.add_argument('-s', '--spatial', type=pathlib.Path, default='spatial.txt', metavar='file', help='For spatial.txt[.gz]')
22 parser
.add_argument('-1', '--read1', type=pathlib
.Path
, default
='Unmapped.out.mate1', metavar
='file', help='For Unmapped.out.mate1[.gz]')
23 #parser.add_argument('-2', '--read2', type=pathlib.Path, default='Unmapped.out.mate2', metavar='file', help='For Unmapped.out.mate2[.gz]')
24 parser
.add_argument('-p', '--read2-paf', type=pathlib
.Path
, default
='Unmapped.mate2.paf', metavar
='file', help='For Unmapped.mate2.paf')
25 #parser.add_argument('-m', '--max-mismatch', dest='mismatch', type=int, default=1, help='max allowed mismatch, default=1')
26 parser
.add_argument('-o', '--output', type=pathlib
.Path
, default
='Unmapped.fq.gz', dest
='outfile')
27 #parser.add_argument('-z', '--gzip', action=argparse.BooleanOptionalAction, default=True, help='Output gzipped files, default on', dest='gzip')
28 #parser.add_argument('-n', '--dryrun', '--dry-run', action='store_true', dest='dryrun')
30 "-v", "--version", action
="version",
31 version
=f
"{parser.prog} version 1.0.0"
35 def fileOpener(filename
):
36 f
= open(filename
,'rb')
38 if (f
.read(2) == b
'\x1f\x8b'):
40 fh
= gzip
.GzipFile(fileobj
=f
, mode
='rb')
43 #fht = io.TextIOWrapper(fh, encoding='utf-8', line_buffering=True)
47 parser
= init_argparse()
48 if len(sys
.argv
) == 1:
52 (01) perl -lane 'print ">",join("_",@F),"\\n$F[0]"' spatial.txt | minimap2 -k 15 -d spatial.miniref - 2>spatial.miniref.log
53 (2a) seqtk trimfq -L 30 Unmapped.out.mate2 | minimap2 -x sr spatial.miniref - -k15 -w10 -N1 -t8 -QL2c --eqx --cs --sr --end-bonus 200 --for-only -A4 -B0 -o Unmapped.mate2.paf 2>Unmapped.mate2.paf.log
54 (2b) seqtk trimfq -L 30 Unmapped.out.mate2 | minimap2 -x sr spatial.miniref - -k15 -w10 -N1 -t8 -QL2c --eqx --cs --sr --end-bonus 200 --for-only -A4 -B0 2>Unmapped.mate2.paf.log | gzip -1c >Unmapped.mate2.paf.gz
57 args
= parser
.parse_args()
59 eprint('[!]Read1:[',args
.read1
,'], Read2.PAF:[',args
.read2_paf
,']. OutFile:[',args
.outfile
,']',sep
='');
62 with gzip
.open(args
.outfile
, mode
='wt', compresslevel
=1) as fh
:
63 with
fileOpener(args
.read2_paf
) as fp2
:
64 with pafpy
.PafFile(fp2
) as paf
:
66 if record
.is_primary():
68 for name
,seq
,qual
in pyfastx
.Fastq(args
.read1
.as_posix(), build_index
=False):
69 #print('|'.join((name, seq, qual)))
70 if record
.qname
== name
:
72 str(found
), str(skipped
),
73 str(record
.tags
['cg']),str(record
.tags
['cs']),
81 if __name__
== "__main__":
82 main() # time ./spffq.py -1 n4457360.Unmapped.out.mate1.gz -p n175410.Unmapped.mate2.paf.gz
85 [1]+ Running perl -lane 'print ">",join("_",@F),"\n$F[0]"' spatial.txt | minimap2 -k 15 -d spatial.miniref - 2> spatial.miniref.log &
86 [2]+ Running seqtk trimfq -L 30 Unmapped.out.mate2 | minimap2 -x sr spatial.miniref - -k15 -w10 -N1 -t8 -QL2c --eqx --cs --sr --end-bonus 200 --for-only -A4 -B0 -o Unmapped2.paf 2> Unmapped2.err &