10 -requires_modules => [qw(XML::LibXML XML::LibXML::Reader XML::Writer)]
15 use_ok('Bio::PrimarySeq');
17 my $verbose = test_debug();
21 # XML library version checks
22 if ( 1000 * $] < 5008 ) {
23 skip( "Reader interface only supported in Perl >= 5.8", 96 );
25 elsif ( XML::LibXML::LIBXML_VERSION() <= 20620 ) {
26 skip( "Reader not supported for libxml2 <= 2.6.20", 96 );
30 diag( "libxml version: ", XML::LibXML::LIBXML_VERSION() );
33 # checks that your module is there and loads ok
34 use_ok('Bio::SeqIO::seqxml');
38 my $seq_stream = Bio::SeqIO->new(
39 -file => test_input_file("seqxml.xml"),
47 is( $seq_stream->seqXMLversion, '0.3', 'seqXML version' );
48 is( $seq_stream->source, 'Ensembl', 'source' );
49 is( $seq_stream->sourceVersion, '56', 'source version' );
51 # now get and check the sequence entry itself
52 my $seq_obj = $seq_stream->next_seq;
53 isa_ok( $seq_obj, 'Bio::Seq' );
54 is( $seq_obj->display_id, 'ENST00000308775', 'display id' );
55 is( $seq_obj->primary_id, 'ENST00000308775', 'primary id' );
56 is( $seq_obj->desc, 'dystroglycan 1', 'description' );
57 is( $seq_obj->seq, 'AAGGC----UGAUGUC.....ACAU', 'sequence' );
58 is( $seq_obj->length, 25, 'length' );
60 my ($source) = $seq_obj->get_Annotations('source');
61 if ($source) { is($source->value, 'Ensembl', 'entry source'); }
64 isa_ok( $seq_obj->species, 'Bio::Species', 'species' );
65 is( $seq_obj->species->node_name, 'Homo sapiens', 'species name' );
66 is( $seq_obj->species->ncbi_taxid, '9606', 'NCBI tax id' );
69 my @dblinks = $seq_obj->get_Annotations('dblink');
70 my $dblink = shift @dblinks;
71 isa_ok( $dblink, 'Bio::Annotation::DBLink' );
72 is( $dblink->database, 'RefSeq', 'dblink source' );
73 is( $dblink->primary_id, 'NM_004393', 'dblink ID' );
76 my @annotations = $seq_obj->get_Annotations();
77 foreach my $annot_obj (@annotations) {
78 next if ( $annot_obj->tagname eq 'dblink' );
79 next if ( $annot_obj->tagname eq 'source' );
80 isa_ok( $annot_obj, 'Bio::Annotation::SimpleValue' );
81 if ( $annot_obj->tagname eq 'has_splice_variants' ) {
82 is( $annot_obj->value, undef, 'boolean property' );
84 elsif ( $annot_obj->tagname eq 'prediction_method' ) {
85 is( $annot_obj->value, 'manual curation', 'property with value' );
90 my $outfile = test_output_file();
92 my $seq_writer = Bio::SeqIO->new(
97 -sourceVersion => '56',
98 -seqXMLversion => '0.3',
102 $seq_writer->flush; # to make sure output is written to file
103 ok( -s $outfile, 'outfile is created' );
106 is( $seq_writer->seqXMLversion, '0.3', 'seqXML version' );
107 is( $seq_writer->source, 'Ensembl', 'source' );
108 is( $seq_writer->sourceVersion, '56', 'source version' );
109 is( $seq_writer->schemaLocation, 'http://www.seqxml.org/0.3/seqxml.xsd', 'schemaLocation' );
111 # write one sequence entry to file
112 $seq_writer->write_seq($seq_obj);
114 if ( $verbose > 0 ) {
115 diag("writing first seqXML outfile");
116 diag(`cat $outfile`);
119 # verify written data by roundtripping it
121 my $new_in = Bio::SeqIO->new(
126 my $new_seqobj = $new_in->next_seq;
127 isa_ok( $new_seqobj, 'Bio::Seq' );
128 is( $new_seqobj->display_id, 'ENST00000308775', 'display id' );
129 is( $new_seqobj->primary_id, 'ENST00000308775', 'primary id' );
130 is( $new_seqobj->desc, 'dystroglycan 1', 'description' );
131 is( $new_seqobj->seq, 'AAGGC----UGAUGUC.....ACAU', 'sequence' );
132 is( $new_seqobj->length, 25, 'length' );
134 my ($new_source) = $new_seqobj->get_Annotations('source');
135 if ($new_source) { is($new_source->value, 'Ensembl', 'entry source'); }
139 isa_ok( $new_seqobj->species, 'Bio::Species', 'species' );
140 is( $new_seqobj->species->node_name, 'Homo sapiens', 'species name' );
141 is( $new_seqobj->species->ncbi_taxid, '9606', 'NCBI tax id' );
144 my @dblinks = $new_seqobj->get_Annotations('dblink');
145 my $dblink = shift @dblinks;
146 isa_ok( $dblink, 'Bio::Annotation::DBLink' );
147 is( $dblink->database, 'RefSeq', 'dblink source' );
148 is( $dblink->primary_id, 'NM_004393', 'dblink ID' );
151 my @annotations = $new_seqobj->get_Annotations();
152 foreach my $annot_obj (@annotations) {
153 next if ( $annot_obj->tagname eq 'dblink' );
154 next if ( $annot_obj->tagname eq 'source' );
155 isa_ok( $annot_obj, 'Bio::Annotation::SimpleValue' );
156 if ( $annot_obj->tagname eq 'has_splice_variants' ) {
157 is( $annot_obj->value, undef, 'boolean property' );
159 elsif ( $annot_obj->tagname eq 'prediction_method' ) {
163 'property with value'
169 # write data from a Seq object created from a fasta file
171 # forcing a Bio::Seq to be created
172 # due to SeqIO::fasta creating a PrimarySeq by default
174 my $factory = Bio::Seq::SeqFactory->new(-type => 'Bio::Seq');
176 my $seq_stream = Bio::SeqIO->new(
177 -file => test_input_file("test.fasta"),
179 -seqfactory => $factory,
182 my $outfile = test_output_file();
183 my $writer = Bio::SeqIO->new(
184 -file => ">$outfile",
188 ok( -s $outfile, 'outfile is created' );
190 while ( my $seq_obj = $seq_stream->next_seq ) {
191 $writer->write_seq($seq_obj);
194 if ( $verbose > 0 ) {
195 diag(`cat $outfile`);
198 # now read that newly made seqxml back in
199 my $in = Bio::SeqIO->new(
205 is( $in->seqXMLversion, '0.3', 'seqXML version' );
206 is( $in->source, undef, 'source' );
207 is( $in->sourceVersion, undef, 'source version' );
209 # check first sequence entry
210 my $seqxml_obj = $in->next_seq;
211 is( $seqxml_obj->display_id, 'roa1_drome', 'display id' );
212 is( $seqxml_obj->primary_id, 'roa1_drome', 'primary id' );
213 is( $seqxml_obj->desc, 'Rea guano receptor type III >> 0.1',
217 'MVNSNQNQNGNSNGHDDDFPQDSITEPEHMRKLFIGGLDYRTTDENLKAHEKWGNIVDVVVMKDPRTKRSRGFGFITYSHSSMIDEAQKSRPHKIDGRVEPKRAVPRQDIDSPNAGATVKKLFVGALKDDHDEQSIRDYFQHFGNIVDNIVIDKETGKKRGFAFVEFDDYDPVDKVVLQKQHQLNGKMVDVKKALPKNDQQGGGGGRGGPGGRAGGNRGNMGGGNYGNQNGGGNWNNGGNNWGNNRGNDNWGNNSFGGGGGGGGGYGGGNNSWGNNNPWDNGNGGGNFGGGGNNWNGGNDFGGYQQNYGGGPQRGGGNFNNNRMQPYQGGGGFKAGGGNQGNYGNNQGFNNGGNNRRY',
220 is( $seqxml_obj->length, 358, 'length' );
222 # check second sequence entry
223 my $seqxml_obj2 = $in->next_seq;
224 is( $seqxml_obj2->display_id, 'roa2_drome', 'display id' );
225 is( $seqxml_obj2->primary_id, 'roa2_drome', 'primary id' );
226 is( $seqxml_obj2->desc, 'Rea guano ligand', 'description' );
229 'MVNSNQNQNGNSNGHDDDFPQDSITEPEHMRKLFIGGLDYRTTDENLKAHEKWGNIVDVVVMKDPTSTSTSTSTSTSTSTSTMIDEAQKSRPHKIDGRVEPKRAVPRQDIDSPNAGATVKKLFVGALKDDHDEQSIRDYFQHLLLLLLLDLLLLDLLLLDLLLFVEFDDYDPVDKVVLQKQHQLNGKMVDVKKALPKNDQQGGGGGRGGPGGRAGGNRGNMGGGNYGNQNGGGNWNNGGNNWGNNRGNDNWGNNSFGGGGGGGGGYGGGNNSWGNNNPWDNGNGGGNFGGGGNNWNGGNDFGGYQQNYGGGPQRGGGNFNNNRMQPYQGGGGFKAGGGNQGNYGNNQGFNNGGNNRRY',
232 is( $seqxml_obj2->length, 358, 'length' );