tag fourth (and hopefully last) alpha
[bioperl-live.git] / branch-1-6 / Bio / SearchIO / Writer / BSMLResultWriter.pm
blob6b9781dc455d3bb37001f47b513aa19baf6cfc10
1 # $Id$
3 # BioPerl module for Bio::SearchIO::Writer::BSMLResultWriter
5 # Please direct questions and support issues to <bioperl-l@bioperl.org>
7 # Cared for by Jason Stajich <jason-at-bioperl-dot-org>
9 # Copyright Jason Stajich
11 # You may distribute this module under the same terms as perl itself
13 # POD documentation - main docs before the code
15 =head1 NAME
17 Bio::SearchIO::Writer::BSMLResultWriter - BSML output writer
19 =head1 SYNOPSIS
21 use Bio::SearchIO;
22 my $in = Bio::SearchIO->new(-file => 'result.blast',
23 -format => 'blast');
24 my $out = Bio::SearchIO->new(-output_format => 'BSMLResultWriter',
25 -file => ">result.bsml");
26 while( my $r = $in->next_result ) {
27 $out->write_result($r);
30 =head1 DESCRIPTION
32 This is a writer to produce BSML for a search result.
34 =head1 FEEDBACK
36 =head2 Mailing Lists
38 User feedback is an integral part of the evolution of this and other
39 Bioperl modules. Send your comments and suggestions preferably to
40 the Bioperl mailing list. Your participation is much appreciated.
42 bioperl-l@bioperl.org - General discussion
43 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
45 =head2 Support
47 Please direct usage questions or support issues to the mailing list:
49 I<bioperl-l@bioperl.org>
51 rather than to the module maintainer directly. Many experienced and
52 reponsive experts will be able look at the problem and quickly
53 address it. Please include a thorough description of the problem
54 with code and data examples if at all possible.
56 =head2 Reporting Bugs
58 Report bugs to the Bioperl bug tracking system to help us keep track
59 of the bugs and their resolution. Bug reports can be submitted via
60 the web:
62 http://bugzilla.open-bio.org/
64 =head1 AUTHOR - Jason Stajich
66 Email jason-at-bioperl-dot-org
68 =head1 APPENDIX
70 The rest of the documentation details each of the object methods.
71 Internal methods are usually preceded with a _
73 =cut
76 # Let the code begin...
79 package Bio::SearchIO::Writer::BSMLResultWriter;
80 use strict;
82 use XML::Writer;
83 use IO::String;
85 use base qw(Bio::Root::Root Bio::SearchIO::SearchWriterI);
88 =head2 new
90 Title : new
91 Usage : my $obj = Bio::SearchIO::Writer::BSMLResultWriter->new();
92 Function: Builds a new Bio::SearchIO::Writer::BSMLResultWriter object
93 Returns : an instance of Bio::SearchIO::Writer::BSMLResultWriter
94 Args :
97 =cut
99 sub new {
100 my($class,@args) = @_;
102 my $self = $class->SUPER::new(@args);
103 return $self;
106 =head2 to_string
108 Purpose : Produces data for each Search::Result::ResultI in a string.
109 : This is an abstract method. For some useful implementations,
110 : see ResultTableWriter.pm, HitTableWriter.pm,
111 : and HSPTableWriter.pm.
112 Usage : print $writer->to_string( $result_obj, @args );
113 Argument : $result_obj = A Bio::Search::Result::ResultI object
114 : @args = any additional arguments used by your implementation.
115 Returns : String containing data for each search Result or any of its
116 : sub-objects (Hits and HSPs).
117 Throws : n/a
119 =cut
121 # this implementation is largely adapted from the Incogen XSLT stylesheet
122 # to convert NCBI BLAST XML to BSML
124 sub to_string {
125 my ($self,$result,$num) = @_;
126 my $str = new IO::String();
127 my $writer = new XML::Writer(OUTPUT => $str,
128 DATA_INDENT => 1,
129 DATA_MODE => 1);
130 $writer->xmlDecl('UTF-8');
131 $writer->doctype('Bsml','-//EBI//Labbook, Inc. BSML DTD//EN',
132 'http://www.labbook.com/dtd/bsml3_1.dtd');
133 $writer->startTag('Bsml');
134 $writer->startTag('Definitions');
135 $writer->startTag('Sequences');
136 my $reporttype = $result->algorithm;
137 my ($qmoltype,$hmoltype);
138 my $hit = $result->next_hit;
139 my $hsp = $hit->next_hsp;
140 if( $hsp->query->strand == 0 ) { $qmoltype = 'aa' }
141 else { $qmoltype = 'nt' }
143 if( $hsp->hit->strand == 0 ) { $hmoltype = 'aa' }
144 else { $hmoltype = 'nt' }
146 $writer->startTag('Sequence',
147 'length' => $result->query_length,
148 'title' => $result->query_name . " ". $result->query_description,
149 'molecule' => $qmoltype,
150 'representation' => 'virtual',
151 'id' => $result->query_name
153 # Here we're annotating the Query sequence with hits
154 # hence the Feature-table
155 $writer->startTag('Feature-tables');
156 $writer->startTag('Feature-table',
157 'title' => "$reporttype Result",
158 'class' => $reporttype);
159 my ($hitnum,$hspnum) = (1,1);
160 foreach my $hit ( $result->hits ) {
161 $hspnum = 1;
162 foreach my $hsp ( $hit->hsps ) {
163 $writer->startTag('Feature',
164 'class' => $reporttype,
165 'value-type' => 'alignment',
166 'title' => $hit->name. " ". $hit->description,
169 $writer->emptyTag('Interval-loc',
170 'startpos' => $hsp->query->start,
171 'endpos' => $hsp->query->end);
172 $writer->emptyTag('Qualifier',
173 'value-type' => 'score',
174 'value' => $hsp->score,
177 $writer->emptyTag('Qualifier',
178 'value-type' => 'target-start',
179 'value' => $hsp->hit->start,
181 $writer->emptyTag('Qualifier',
182 'value-type' => 'target-end',
183 'value' => $hsp->hit->end,
185 $writer->emptyTag('Link',
186 'title' => 'alignment',
187 'href' => sprintf("#SPA%d.%d",$hitnum,$hspnum)
190 if( $hsp->hit->strand < 0 ) {
191 $writer->emptyTag('Qualifier',
192 'value-type' => 'target-on-complement',
193 'value' => 1,
196 $hspnum++;
197 $writer->endTag('Feature');
199 $hitnum++;
201 $writer->endTag('Feature-table');
202 $writer->endTag('Feature-tables');
203 $writer->endTag('Sequence');
204 $writer->endTag('Sequences');
206 $writer->startTag('Tables');
207 $writer->startTag('Sequence-search-table',
208 'search-type' => $reporttype,
209 'query-length' => $result->query_length);
210 $hitnum = $hspnum = 1;
211 foreach my $hit ( $result->hits ) {
212 $hspnum = 1;
213 foreach my $hsp ( $hit->hsps ) {
214 $writer->startTag('Seq-pair-alignment',
215 'id' => sprintf("SPA%d.%d",$hitnum,$hspnum),
216 'method' => join(' ',$result->algorithm),
217 'compxref' => sprintf("%s:%s",
218 '',$result->query_name),
219 'refxref' => sprintf("%s:%s",
220 $result->database_name,
221 $hit->name),
222 'refseq' => $hit->name,
223 'title' => $result->query_name,
224 'compseq' => $result->query_name,
225 'compcaption' => $result->query_name . ' ' .
226 $result->query_description,
227 'refcaption' => $hit->name . " ".
228 $hit->description,
229 'totalscore' => $hsp->score,
230 'refstart' => $hsp->query->start,
231 'refend' => $hsp->query->end,
232 'compstart' => $hsp->hit->start,
233 'compend' => $hsp->hit->end,
234 'complength' => $hit->length,
235 'reflength' => $result->query_length);
237 $writer->emptyTag('Attribute',
238 'name' => 'hit-num',
239 'content' => $hitnum);
240 $writer->emptyTag('Attribute',
241 'name' => 'hit-id',
242 'content' => $hit->name);
243 $writer->emptyTag('Attribute',
244 'name' => 'hsp-num',
245 'content' => $hspnum);
246 $writer->emptyTag('Attribute',
247 'name' => 'hsp-bit-score',
248 'content' => $hsp->bits);
249 $writer->emptyTag('Attribute',
250 'name' => 'hsp-evalue',
251 'content' => $hsp->evalue);
252 $writer->emptyTag('Attribute',
253 'name' => 'pattern-from',
254 'content' => 0);
255 $writer->emptyTag('Attribute',
256 'name' => 'pattern-to',
257 'content' => 0);
258 $writer->emptyTag('Attribute',
259 'name' => 'query-frame',
260 'content' => $hsp->query->frame);
261 $writer->emptyTag('Attribute',
262 'name' => 'hit-frame',
263 'content' => $hsp->hit->frame * $hsp->hit->strand);
264 $writer->emptyTag('Attribute',
265 'name' => 'percent_identity',
266 'content' => sprintf("%.2f",$hsp->percent_identity));
267 $writer->emptyTag('Attribute',
268 'name' => 'percent_similarity',
269 'content' => sprintf("%.2f",$hsp->frac_conserved('total') * 100));
270 my $cons = $hsp->frac_conserved('total') * $hsp->length('total');
271 my $ident = $hsp->frac_identical('total') * $hsp->length('total');
273 $writer->emptyTag('Attribute',
274 'name' => 'identity',
275 'content' => $ident);
276 $writer->emptyTag('Attribute',
277 'name' => 'positive',
278 'content' => $cons);
279 $writer->emptyTag('Attribute',
280 'name' => 'gaps',
281 'content' => $hsp->gaps('total'));
282 $writer->emptyTag('Attribute',
283 'name' => 'align-len',
284 'content' => $hsp->length('total'));
285 $writer->emptyTag('Attribute',
286 'name' => 'density',
287 'content' => 0);
288 $writer->emptyTag('Attribute',
289 'name' => 'hit-len',
290 'content' => $hit->length);
291 my @extrafields;
293 $writer->emptyTag('Seq-pair-run',
294 'runlength' => $hsp->hit->length,
295 'comprunlength' => $hsp->hsp_length,
296 'complength' => $hsp->hit->length,
297 'compcomplement'=> $hsp->hit->strand < 0 ? 1 :0,
298 'refcomplement' => $hsp->query->strand < 0 ? 1 :0,
299 'refdata' => $hsp->query_string,
300 'compdata' => $hsp->hit_string,
301 'alignment' => $hsp->homology_string,
303 $hspnum++;
304 $writer->endTag('Seq-pair-alignment');
306 $hitnum++;
308 $writer->endTag('Sequence-search-table');
309 $writer->endTag('Tables');
311 $writer->startTag('Research');
312 $writer->startTag('Analyses');
313 $writer->startTag('Analysis');
314 $writer->emptyTag('Attribute',
315 'name' => 'program',
316 'content' => $reporttype);
317 $writer->emptyTag('Attribute',
318 'name' => 'version',
319 'content' => join(' ',$reporttype,
320 $result->algorithm_version));
321 $writer->emptyTag('Attribute',
322 'name' => 'reference',
323 'content' => $result->algorithm_reference);
324 $writer->emptyTag('Attribute',
325 'name' => 'db',
326 'content' => $result->database_name);
327 $writer->emptyTag('Attribute',
328 'name' => 'db-size',
329 'content' => $result->database_entries);
330 $writer->emptyTag('Attribute',
331 'name' => 'db-length',
332 'content' => $result->database_letters);
333 # $writer->emptyTag('Attribute',
334 # 'name' => 'iter-num',
335 # 'content' => $result->iteration_num);
336 foreach my $attr ( $result->available_parameters ) {
337 $writer->emptyTag('Attribute',
338 'name' => $attr,
339 'content' => $result->get_parameter($attr));
341 foreach my $attr ( $result->available_statistics ) {
342 $writer->emptyTag('Attribute',
343 'name' => $attr,
344 'content' => $result->get_statistic($attr));
346 $writer->endTag('Analysis');
347 $writer->endTag('Analyses');
348 $writer->endTag('Research');
350 $writer->endTag('Definitions');
351 $writer->endTag('Bsml');
352 $writer->end();
353 return ${$str->string_ref};