Apply carsonhh's patch
[bioperl-live.git] / scripts / Bio-DB-GFF / bp_generate_histogram.pl
blobac3976990e9d796ecc7513dfe6590a19a88e9d90
1 #!/usr/bin/perl
3 use strict;
4 use warnings;
5 use lib '.','./blib','../../blib/lib';
6 use Bio::DB::GFF;
7 use Getopt::Long;
9 my $usage = <<USAGE;
10 Usage: $0 [options] feature_type1 feature_type2...
12 Dump out a GFF-formatted histogram of the density of the indicated set
13 of feature types.
15 Options:
16 --dsn <dsn> Data source (default dbi:mysql:test)
17 --adaptor <adaptor> Schema adaptor (default dbi::mysqlopt)
18 --user <user> Username for mysql authentication
19 --pass <password> Password for mysql authentication
20 --bin <bp> Bin size in base pairs.
21 --aggregator <list> Comma-separated list of aggregators
22 --sort Sort the resulting list by type and bin
23 --merge Merge features with same method but different sources
24 USAGE
27 my ($DSN,$ADAPTOR,$AGG,$USER,$PASSWORD,$BINSIZE,$SORT,$MERGE);
28 GetOptions ('dsn:s' => \$DSN,
29 'adaptor:s' => \$ADAPTOR,
30 'user:s' => \$USER,
31 'password:s' => \$PASSWORD,
32 'aggregators:s' => \$AGG,
33 'bin:i' => \$BINSIZE,
34 'sort' => \$SORT,
35 'merge' => \$MERGE,
36 ) or die $usage;
38 my @types = @ARGV or die $usage;
40 # some local defaults
41 $DSN ||= 'dbi:mysql:test';
42 $ADAPTOR ||= 'dbi::mysqlopt';
43 $BINSIZE ||= 1_000_000; # 1 megabase bins
45 my @options;
46 push @options,(-user=>$USER) if defined $USER;
47 push @options,(-pass=>$PASSWORD) if defined $PASSWORD;
48 push @options,(-aggregator=>[split /\s+/,$AGG]) if defined $AGG;
50 my $db = Bio::DB::GFF->new(-adaptor=>$ADAPTOR,-dsn => $DSN,@options)
51 or die "Can't open database: ",Bio::DB::GFF->error,"\n";
53 my @features = $db->features(-binsize=>$BINSIZE,-types=>\@types);
55 if ($MERGE) {
56 my %MERGE;
57 for my $f (@features) {
58 my $name = $f->name;
59 my $class = $name->class;
60 $name =~ s/^(.+:.+):.+$/$1/;
61 $f->group(Bio::DB::GFF::Featname->new($class,$name));
62 my $source = $f->source;
63 $source =~ s/:.+$//;
64 $f->source($source);
65 if (my $already_there = $MERGE{$f->source,$f->abs_ref,$f->abs_start}) {
66 $already_there->score($already_there->score + $f->score);
67 } else {
68 $MERGE{$f->source,$f->abs_ref,$f->abs_start} = $f;
71 @features = values %MERGE;
74 # sort features by type, ref and start if requested
75 if ($SORT) {
76 @features = sort {
77 $a->type cmp $b->type
78 || $a->abs_ref cmp $b->abs_ref
79 || $a->start <=> $b->start
81 @features;
84 for my $f (@features) {
85 print $f->gff_string,"\n";
89 __END__
91 =head1 NAME
93 bp_generate_histogram.pl -- Generate a histogram of Bio::DB::GFF features
95 =head1 SYNOPSIS
97 bp_generate_histogram.pl -d gadfly variation gene:curated
99 =head1 DESCRIPTION
101 Use this utility to generate feature density histograms from
102 Bio::DB::GFF databases. The result is a GFF data file that is
103 suitable for loading with load_gff.pl.
105 =head2 OPTIONS
107 The following options are recognized:
109 Option Description
110 ------ -----------
112 --dsn <dsn> Data source (default dbi:mysql:test)
113 --adaptor <adaptor> Schema adaptor (default dbi::mysqlopt)
114 --user <user> Username for mysql authentication
115 --pass <password> Password for mysql authentication
116 --aggregator <list> Comma-separated list of aggregators
118 =head1 BUGS
120 Please report them.
122 =head1 SEE ALSO
124 L<Bio::DB::GFF>
126 =head1 AUTHOR
128 Lincoln Stein E<lt>lstein@cshl.orgE<gt>
130 Copyright (c) 2001 Cold Spring Harbor Laboratory
132 This library is free software; you can redistribute it and/or modify
133 it under the same terms as Perl itself. See DISCLAIMER.txt for
134 disclaimers of warranty.
136 =cut