From 3261e4e7d9b2b2899a5e07c6c51b5dc43af495e0 Mon Sep 17 00:00:00 2001 From: Petr Baudis Date: Tue, 8 Feb 2011 09:14:52 +0100 Subject: [PATCH] Add comparator based on Needleman-Wunsch score --- cmp_nw/cmp.sh | 31 +++++++++++++++++++++++++++++++ cmp_nw/needleman-wunsch.py | 34 ++++++++++++++++++++++++++++++++++ 2 files changed, 65 insertions(+) create mode 100755 cmp_nw/cmp.sh create mode 100755 cmp_nw/needleman-wunsch.py diff --git a/cmp_nw/cmp.sh b/cmp_nw/cmp.sh new file mode 100755 index 0000000..ecfce71 --- /dev/null +++ b/cmp_nw/cmp.sh @@ -0,0 +1,31 @@ +#!/bin/sh +# The diff-based DNA file comparator. + +filea="$1" +numa="$2" +fileb="$3" +numb="$4" +scratch="$5/cmp_diff" + +echo "$filea - $fileb" >&2 + +mkdir -p "$scratch" +[ -e "$scratch/$numa.dna" ] || cp "$filea" "$scratch/$numa.dna" +[ -e "$scratch/$numb.dna" ] || cp "$fileb" "$scratch/$numb.dna" +srcdir="$(pwd)" +cd "$scratch" + +if [ $numa -eq $numb ]; then + echo 0 + exit +fi + +(echo -n "1 - "; + "$srcdir/cmp_nw/needleman-wunsch.py" "$(cat "$numa.dna")" "$(cat "$numb.dna")" | tr -d '\n'; + echo " / ($(cat "$numa.dna" "$numb.dna" | wc -c) - 2)") | bc -l + +#cat "$numa.dna" "$numb.dna" >both.dna +#"$srcdir/cmp_nw/nw.pl" "$srcdir/cmp_nw/sm" both.dna | +# perl -le '$a=<>; chomp $a; $b=<>; chomp $b; @a = split(//, $a); @b = split(//, $b); $c = 0; +# for (0..length($a)) { $a[$_] eq $b[$_] or $c++; } +# print $c / length($a);' diff --git a/cmp_nw/needleman-wunsch.py b/cmp_nw/needleman-wunsch.py new file mode 100755 index 0000000..40b1fff --- /dev/null +++ b/cmp_nw/needleman-wunsch.py @@ -0,0 +1,34 @@ +#!/usr/bin/python + +from numpy import * +import sys + +def NeedlemanWunsch(seqA, seqB, match, mismatch, gap): + a = len(seqA) + b = len(seqB) + F = zeros((a+1,b+1)) + for i in range(0, a): + F[i,0] = gap * i + for j in range(0, b): + F[0,j] = gap * j + + for i in range(1, a+1): + for j in range(1, b+1): + F[i,j] = -10000 + + if (seqA[i-1] == seqB[j-1]): + chscore = match + else: + chscore = mismatch + if (F[i-1,j-1] + chscore > F[i,j]): + F[i,j] = F[i-1,j-1] + chscore + + if (F[i-1,j] + gap > F[i,j]): + F[i,j] = F[i-1,j] + gap + if (F[i,j-1] + gap > F[i,j]): + F[i,j] = F[i,j-1] + gap + # print i, j, F[i,j] + # print "** ", a, b, F[a,b] + return F[a,b] + +print NeedlemanWunsch(sys.argv[1],sys.argv[2],1,0,-1) -- 2.11.4.GIT