maint: restructure to use Dist::Zilla
[bioperl-live.git] / lib / Bio / DB / GFF / Aggregator / clone.pm
blobedc6550f5f104da5bcc240049081523356756f41
1 =head1 NAME
3 Bio::DB::GFF::Aggregator::clone -- Clone aggregator
5 =head1 SYNOPSIS
7 use Bio::DB::GFF;
9 # Open the sequence database
10 my $db = Bio::DB::GFF->new( -adaptor => 'dbi:mysql',
11 -dsn => 'dbi:mysql:elegans42',
12 -aggregator => ['transcript','clone'],
15 ----------------------------------------------------------------------------
16 Aggregator method: clone
17 Main method: -none-
18 Sub methods: Clone_left_end Clone_right_end region:Genomic_canonical
19 ----------------------------------------------------------------------------
21 =head1 DESCRIPTION
23 Bio::DB::GFF::Aggregator::clone is one of the default aggregators, and
24 was written to be compatible with the C elegans GFF files. It
25 aggregates raw "Clone_left_end", "Clone_right_end", and
26 "region:Genomic_canonical" features into composite features of type
27 "clone".
29 =cut
31 package Bio::DB::GFF::Aggregator::clone;
33 use strict;
36 use base qw(Bio::DB::GFF::Aggregator);
38 =head2 aggregate
40 Title : aggregate
41 Usage : $features = $a->aggregate($features,$factory)
42 Function: aggregate a feature list into composite features
43 Returns : an array reference containing modified features
44 Args : see L<Bio::DB::GFF::Aggregator>
45 Status : Public
47 The WormBase GFF model is unusual in that clones aren't identified as
48 a single feature with start and stop positions, but as two features, a
49 "left end" and a "right end". One or both of these features may be
50 absent. In order to accommodate this, the aggregator will return undef
51 for the start and/or stop if one or both of the ends are missing.
53 =cut
57 # we look for features of type Sequence and add them to a pseudotype transcript
58 sub aggregate {
59 my $self = shift;
60 my $features = shift;
61 my $factory = shift;
63 my $matchsub = $self->match_sub($factory) or return;
64 my $passthru = $self->passthru_sub($factory);
65 my $method = $self->get_method;
67 my (%clones,%types,@result);
68 for my $feature (@$features) {
70 if ($feature->group && $matchsub->($feature)) {
72 if ($feature->method =~ /^region|Sequence$/ && $feature->source eq 'Genomic_canonical') {
73 $clones{$feature->group}{canonical} = $feature;
74 } elsif ($feature->method eq 'Clone_left_end') {
75 $clones{$feature->group}{left} = $feature;
76 } elsif ($feature->method eq 'Clone_right_end') {
77 $clones{$feature->group}{right} = $feature;
79 push @result,$feature if $passthru && $passthru->($feature);
80 } else {
81 push @result,$feature;
85 for my $clone (keys %clones) {
86 my $canonical = $clones{$clone}{canonical} or next;
88 # the genomic_canonical doesn't tell us where the clone starts and stops
89 # so don't assume it
90 my $duplicate = $canonical->clone; # make a duplicate of the feature
91 # munge the method and source fields
92 my $source = $duplicate->source;
93 my $type = $types{$method,$source} ||= Bio::DB::GFF::Typename->new($method,$source);
94 $duplicate->type($type);
96 my ($start,$stop) = $duplicate->strand > 0 ? ('start','stop') : ('stop','start');
97 @{$duplicate}{$start,$stop} =(undef,undef);
99 $duplicate->{$start} = $clones{$clone}{left}{$start} if exists $clones{$clone}{left};
100 $duplicate->{$stop} = $clones{$clone}{right}{$stop} if exists $clones{$clone}{right};
101 $duplicate->method($self->method);
102 push @result,$duplicate;
105 @$features = @result;
108 =head2 method
110 Title : method
111 Usage : $aggregator->method
112 Function: return the method for the composite object
113 Returns : the string "clone"
114 Args : none
115 Status : Public
117 =cut
119 sub method { 'clone' }
121 =head2 part_names
123 Title : part_names
124 Usage : $aggregator->part_names
125 Function: return the methods for the sub-parts
126 Returns : the list ("Clone_left_end", "Clone_right_end", "region:Genomic_canonical")
127 Args : none
128 Status : Public
130 =cut
132 sub part_names {
133 my $self = shift;
134 return qw(Clone_left_end Clone_right_end region:Genomic_canonical Sequence:Genomic_canonical);
139 __END__
141 =head1 BUGS
143 None reported.
146 =head1 SEE ALSO
148 L<Bio::DB::GFF>, L<Bio::DB::GFF::Aggregator>
150 =head1 AUTHOR
152 Lincoln Stein E<lt>lstein@cshl.orgE<gt>.
154 Copyright (c) 2001 Cold Spring Harbor Laboratory.
156 This library is free software; you can redistribute it and/or modify
157 it under the same terms as Perl itself.
159 =cut