From 687e2b5773ade2c93ecb6e50a096ac761238628b Mon Sep 17 00:00:00 2001 From: Yuuki Galaxy Date: Fri, 30 Jun 2023 17:26:23 +0800 Subject: [PATCH] modified: alignkmer.py --- python/etc/alignkmer.py | 114 ++++++++++++++++++++++++++++-------------------- 1 file changed, 67 insertions(+), 47 deletions(-) rewrite python/etc/alignkmer.py (98%) diff --git a/python/etc/alignkmer.py b/python/etc/alignkmer.py dissimilarity index 98% index 65b24aedb..cd1d70f0c 100755 --- a/python/etc/alignkmer.py +++ b/python/etc/alignkmer.py @@ -1,47 +1,67 @@ -#!/usr/bin/env python3 - -from skbio.alignment import StripedSmithWaterman -from skbio import DNA - -Adapters = { - 'adapter1': 'CAAGCAGAAGACGGCATACGAGATTCTTTCCCTACACGACGCTCTTCCGATCT', - 'adapter2': 'CCCGTTCGCAACATGTCTGGCGTCATATCTTGTGACTACAGCACCCTCGACTCTCGCAGACTTTCACCAGTCCATGAT', - 'adapter3': 'AGATCGGAAGAGCACACGTCTGAACTCCAGTCACGTGTAGATCTCGGTGGTCGCCGTATCATT' -} - -Seq = 'ACATAGTCTGGCGTCATTCTTGTGTACA' -Query = StripedSmithWaterman(Seq) - -rcSeq = str(DNA(Seq).reverse_complement()) -rcQuery = StripedSmithWaterman(rcSeq) - -Alignments = {} -Scores = {} -for tid in Adapters.keys(): - target_sequence = Adapters[tid] - Alignment = Query(target_sequence) - rcAlignment = rcQuery(target_sequence) - strand = '*' - if rcAlignment == None or Alignment.optimal_alignment_score > rcAlignment.optimal_alignment_score: - resAli = Alignment - strand = '+' - else: - resAli = rcAlignment - strand = '-' - Alignments[tid] = (resAli,strand) - Scores[tid] = resAli.optimal_alignment_score -maxScores = [key for key, value in Scores.items() if value == max(Scores.values())] - -for tid in maxScores: - ali = Alignments[tid] - prnStr = ' '.join(( - tid, ali[1], ali[0].cigar, - str(ali[0].optimal_alignment_score), - str(ali[0].suboptimal_alignment_score) - )) - print(prnStr) - prnStr = ' -> '.join(( - ali[0].aligned_query_sequence,ali[0].aligned_target_sequence - )) - print(prnStr) - print() +#!/usr/bin/env python3 + +import sys + +def buildQuery(): + from skbio import DNA + from skbio.alignment import StripedSmithWaterman + Adapters = { + 'adapter1': 'CAAGCAGAAGACGGCATACGAGATTCTTTCCCTACACGACGCTCTTCCGATCT', + 'adapter2': 'CCCGTTCGCAACATGTCTGGCGTCATATCTTGTGACTACAGCACCCTCGACTCTCGCAGACTTTCACCAGTCCATGAT', + 'adapter3': 'AGATCGGAAGAGCACACGTCTGAACTCCAGTCACGTGTAGATCTCGGTGGTCGCCGTATCATT' + } + Querys={} + for qid in Adapters.keys(): + Seq = Adapters[qid] + rcSeq = str(DNA(Seq).reverse_complement()) + Query = StripedSmithWaterman(Seq) + rcQuery = StripedSmithWaterman(rcSeq) + Querys[qid] = (Query,rcQuery) + return Querys + +def aliOneSeq(QuerySSWs,targetSeq): + Alignments = {} + Scores = {} + for qid in QuerySSWs.keys(): + (Query,rcQuery) = QuerySSWs[qid] + Alignment = Query(targetSeq) + rcAlignment = rcQuery(targetSeq) + strand = '*' + if rcAlignment == None or Alignment.optimal_alignment_score > rcAlignment.optimal_alignment_score: + resAli = Alignment + strand = '+' + else: + resAli = rcAlignment + strand = '-' + Alignments[qid] = (resAli,strand) + Scores[qid] = resAli.optimal_alignment_score + maxScores = [key for key, value in Scores.items() if value == max(Scores.values())] + maxAlignments = {k: Alignments[k] for k in maxScores} + return maxAlignments + +def main(): + if len(sys.argv) < 3 : + print('Usage:',sys.argv[0],' ',file=sys.stderr,flush=True); + exit(0); + + QuerySSWs = buildQuery() + targetSeq = 'ACATAGTCTGGCGTCATTCTTGTGTACA' + maxAlignments = aliOneSeq(QuerySSWs,targetSeq) + + for tid in maxAlignments.keys(): + ali = maxAlignments[tid] + prnStr = ' '.join(( + tid, ali[1], ali[0].cigar, + str(ali[0].optimal_alignment_score), + str(ali[0].suboptimal_alignment_score) + )) + print(prnStr) + prnStr = ' -> '.join(( + ali[0].aligned_target_sequence, + ali[0].aligned_query_sequence + )) + print(prnStr) + print() + +if __name__ == "__main__": + main() -- 2.11.4.GIT