lib/Bio/DB/SeqFeature/Store/berkeleydb3.pm

   1 package Bio::DB::SeqFeature::Store::berkeleydb3;
   2
   3 # $Id: berkeleydb3.pm 15987 2009-08-18 21:08:55Z lstein $
   4 # faster implementation of berkeleydb
   5
   6 =head1 NAME
   7
   8 Bio::DB::SeqFeature::Store::berkeleydb3 -- Storage and retrieval of sequence
   9 annotation data in Berkeleydb files
  10
  11 =head1 SYNOPSIS
  12
  13   # Create a feature database from scratch
  14   $db     = Bio::DB::SeqFeature::Store->new( -adaptor => 'berkeleydb',
  15                                              -dsn     => '/var/databases/fly4.3',
  16                                              -create  => 1);
  17
  18   # get a feature from somewhere
  19   my $feature = Bio::SeqFeature::Generic->new(...);
  20
  21   # store it
  22   $db->store($feature) or die "Couldn't store!";
  23
  24 =head1 DESCRIPTION
  25
  26 This is a faster version of the berkeleydb storage adaptor for
  27 Bio::DB::SeqFeature::Store. It is used automatically when you create a
  28 new database with the original berkeleydb adaptor. When opening a
  29 database created under the original adaptor, the old code is used for
  30 backward compatibility.
  31
  32 Please see L<Bio::DB::SeqFeature::Store::berkeleydb> for full usage
  33 instructions.
  34
  35 =head1 BUGS
  36
  37 This is an early version, so there are certainly some bugs. Please
  38 use the BioPerl bug tracking system to report bugs.
  39
  40 =head1 SEE ALSO
  41
  42 L<bioperl>,
  43 L<Bio::DB::SeqFeature>,
  44 L<Bio::DB::SeqFeature::Store>,
  45 L<Bio::DB::SeqFeature::GFF3Loader>,
  46 L<Bio::DB::SeqFeature::Segment>,
  47 L<Bio::DB::SeqFeature::Store::memory>,
  48 L<Bio::DB::SeqFeature::Store::DBI::mysql>,
  49
  50 =head1 AUTHOR
  51
  52 Lincoln Stein E<lt>lincoln.stein@gmail.comE<gt>.
  53
  54 Copyright (c) 2009 Ontario Institute for Cancer Research
  55
  56 This library is free software; you can redistribute it and/or modify
  57 it under the same terms as Perl itself.
  58
  59 =cut
  60
  61 use strict;
  62 use base 'Bio::DB::SeqFeature::Store::berkeleydb';
  63 use DB_File;
  64 use Fcntl qw(O_RDWR O_CREAT :flock);
  65 use Bio::DB::GFF::Util::Rearrange 'rearrange';
  66
  67 # can't have more sequence ids than this
  68 use constant MAX_SEQUENCES => 1_000_000_000;
  69
  70 # used to construct the bin key
  71 use constant C1  => 500_000_000; # limits chromosome length to 500 megabases
  72 use constant C2  => 1000*C1;     # at most 1000 chromosomes
  73
  74 use constant BINSIZE       => 10_000;
  75 use constant MININT        => -999_999_999_999;
  76 use constant MAXINT        => 999_999_999_999;
  77 use constant SUMMARY_BIN_SIZE => 1000;
  78
  79 sub version { return 3.0 }
  80
  81 sub open_index_dbs {
  82     my $self = shift;
  83     my ($flags,$create) = @_;
  84
  85     # Create the main index databases; these are DB_BTREE implementations with duplicates allowed.
  86     $DB_BTREE->{flags}  = R_DUP;
  87
  88     my $string_cmp          = DB_File::BTREEINFO->new;
  89     $string_cmp->{flags}    = R_DUP;
  90     $string_cmp->{compare}  = sub { lc $_[0] cmp lc $_[1] };
  91
  92     my $numeric_cmp         = DB_File::BTREEINFO->new;
  93     $numeric_cmp->{flags}   = R_DUP;
  94     $numeric_cmp->{compare} = sub { $_[0] <=> $_[1] };
  95
  96     for my $idx ($self->_index_files) {
  97         my $path    = $self->_qualify("$idx.idx");
  98         my %db;
  99         my $dbtype  = $idx eq 'locations' ? $numeric_cmp
 100                      :$idx eq 'summary'   ? $numeric_cmp
 101                      :$idx eq 'types'     ? $numeric_cmp
 102                      :$idx eq 'seqids'    ? $DB_HASH
 103                      :$idx eq 'typeids'   ? $DB_HASH
 104                      :$string_cmp;
 105
 106         tie(%db,'DB_File',$path,$flags,0666,$dbtype)
 107             or $self->throw("Couldn't tie $path: $!");
 108         %db = () if $create;
 109         $self->index_db($idx=>\%db);
 110     }
 111
 112 }
 113
 114 sub seqid_db  { shift->index_db('seqids')    }
 115 sub typeid_db { shift->index_db('typeids') }
 116
 117 sub _delete_databases {
 118     my $self = shift;
 119     $self->SUPER::_delete_databases;
 120 }
 121
 122 # given a seqid (name), return its denormalized numeric representation
 123 sub seqid_id {
 124     my $self   = shift;
 125     my $seqid  = shift;
 126     my $db     = $self->seqid_db;
 127     return $db->{lc $seqid};
 128 }
 129
 130 sub add_seqid {
 131     my $self  = shift;
 132     my $seqid = shift;
 133
 134     my $db    = $self->seqid_db;
 135     my $key   = lc $seqid;
 136     $db->{$key} = ++$db->{'.nextid'} unless exists $db->{$key};
 137     die "Maximum number of sequence ids exceeded. This module can handle up to ",
 138         MAX_SEQUENCES," unique ids" if $db->{$key} > MAX_SEQUENCES;
 139     return $db->{$key};
 140 }
 141
 142 # given a seqid (name), return its denormalized numeric representation
 143 sub type_id {
 144     my $self   = shift;
 145     my $typeid  = shift;
 146     my $db     = $self->typeid_db;
 147     return $db->{$typeid};
 148 }
 149
 150 sub add_typeid {
 151     my $self  = shift;
 152     my $typeid = shift;
 153
 154     my $db      = $self->typeid_db;
 155     my $key     = lc $typeid;
 156     $db->{$key} = ++$db->{'.nextid'} unless exists $db->{$key};
 157     return $db->{$key};
 158 }
 159
 160 sub _seq_ids {
 161     my $self = shift;
 162     if (my $fa = $self->{fasta_db}) {
 163         if (my @s = eval {$fa->ids}) {
 164             return @s;
 165         }
 166     }
 167     my $l = $self->seqid_db or return;
 168     return grep {!/^\./} keys %$l;
 169 }
 170
 171 sub _index_files {
 172     return (shift->SUPER::_index_files,'seqids','typeids','summary');
 173 }
 174
 175 sub _update_indexes {
 176     my $self = shift;
 177     my $obj  = shift;
 178     defined (my $id   = $obj->primary_id) or return;
 179     $self->SUPER::_update_indexes($obj);
 180     $self->_update_seqid_index($obj,$id);
 181 }
 182
 183 sub _update_seqid_index {
 184     my $self = shift;
 185     my ($obj,$id,$delete) = @_;
 186     my $seq_name = $obj->seq_id;
 187     $self->add_seqid(lc $seq_name);
 188 }
 189
 190 sub _update_type_index {
 191     my $self = shift;
 192     my ($obj,$id,$delete) = @_;
 193     my $db = $self->index_db('types')
 194         or $self->throw("Couldn't find 'types' index file");
 195
 196     my $key    = $self->_obj_to_type($obj);
 197     my $typeid = $self->add_typeid($key);
 198     $self->update_or_delete($delete,$db,$typeid,$id);
 199 }
 200
 201 sub _obj_to_type {
 202     my $self = shift;
 203     my $obj  = shift;
 204     my $tag         = $obj->primary_tag;
 205     my $source_tag  = $obj->source_tag || '';
 206     return unless defined $tag;
 207
 208     $tag           .= ":$source_tag";
 209     return lc $tag;
 210 }
 211
 212 sub types {
 213     my $self = shift;
 214     eval "require Bio::DB::GFF::Typename"
 215         unless Bio::DB::GFF::Typename->can('new');
 216     my $db   = $self->typeid_db;
 217     return grep {!/^\./} map {Bio::DB::GFF::Typename->new($_)} keys %$db;
 218 }
 219
 220 sub _id2type {
 221     my $self = shift;
 222     my $wanted_id   = shift;
 223
 224     my $db = $self->typeid_db;
 225     while (my($key,$id) = each %$db) {
 226         next if $key =~ /^\./;
 227         return $key if $id == $wanted_id;
 228     }
 229     return;
 230 }
 231
 232 # return a hash of typeids that match a human-readable type
 233 sub _matching_types {
 234     my $self  = shift;
 235     my $types = shift;
 236     my @types = ref $types eq 'ARRAY' ?  @$types : $types;
 237     my $db   = $self->typeid_db;
 238
 239     my %result;
 240     my @all_types;
 241
 242     for my $type (@types) {
 243         my ($primary_tag,$source_tag);
 244         if (ref $type && $type->isa('Bio::DB::GFF::Typename')) {
 245             $primary_tag = $type->method;
 246             $source_tag  = $type->source;
 247         } else {
 248             ($primary_tag,$source_tag) = split ':',$type,2;
 249         }
 250         if (defined $source_tag) {
 251             my $id = $db->{lc "$primary_tag:$source_tag"};
 252             $result{$id}++ if defined $id;
 253         } else {
 254             @all_types  = $self->types unless @all_types;
 255             $result{$db->{$_}}++ foreach grep {/^$primary_tag:/} @all_types;
 256         }
 257     }
 258     return \%result;
 259 }
 260
 261 sub _update_location_index {
 262     my $self = shift;
 263     my ($obj,$id,$delete) = @_;
 264
 265     my $db = $self->index_db('locations')
 266         or $self->throw("Couldn't find 'locations' index file");
 267
 268     my $seq_id      = $obj->seq_id || '';
 269     my $start       = $obj->start  || '';
 270     my $end         = $obj->end    || '';
 271     my $strand      = $obj->strand;
 272     my $bin_min     = int $start/BINSIZE;
 273     my $bin_max     = int $end/BINSIZE;
 274
 275     my $typeid      = $self->add_typeid($self->_obj_to_type($obj));
 276     my $seq_no      = $self->add_seqid($seq_id);
 277
 278     for (my $bin = $bin_min; $bin <= $bin_max; $bin++ ) {
 279         my $key = $seq_no * MAX_SEQUENCES + $bin;
 280         $self->update_or_delete($delete,$db,$key,pack("i5",$id,$start,$end,$strand,$typeid));
 281     }
 282
 283 }
 284
 285 sub _features {
 286     my $self = shift;
 287     my ($seq_id,$start,$end,$strand,
 288         $name,$class,$allow_aliases,
 289         $types,
 290         $attributes,
 291         $range_type,
 292         $iterator
 293        ) = rearrange([['SEQID','SEQ_ID','REF'],'START',['STOP','END'],'STRAND',
 294                       'NAME','CLASS','ALIASES',
 295                       ['TYPES','TYPE','PRIMARY_TAG'],
 296                       ['ATTRIBUTES','ATTRIBUTE'],
 297                       'RANGE_TYPE',
 298                       'ITERATOR',
 299                      ],@_);
 300
 301     my (@from,@where,@args,@group);
 302     $range_type ||= 'overlaps';
 303
 304     my @result;
 305     unless (defined $name or defined $seq_id or defined $types or defined $attributes) {
 306         my $is_indexed = $self->index_db('is_indexed');
 307         @result = $is_indexed ? grep {$is_indexed->{$_}} keys %{$self->db}
 308                               : grep { !/^\./ }keys %{$self->db};
 309     }
 310
 311     my %found = ();
 312     my $result = 1;
 313
 314     if (defined($name)) {
 315         # hacky backward compatibility workaround
 316         undef $class if $class && $class eq 'Sequence';
 317         $name     = "$class:$name" if defined $class && length $class > 0;
 318         $result &&= $self->filter_by_name($name,$allow_aliases,\%found);
 319     }
 320
 321     if (defined $seq_id) { # location with or without types
 322         my $typelist = defined $types ? $self->_matching_types($types) : undef;
 323         $result &&= $self->filter_by_type_and_location(
 324             $seq_id, $start, $end, $strand, $range_type, $typelist, \%found
 325         );
 326     }
 327
 328     elsif (defined $types) { # types without location
 329         $result &&= $self->filter_by_type($types,\%found);
 330     }
 331
 332     if (defined $attributes) {
 333       $result &&= $self->filter_by_attribute($attributes,\%found);
 334     }
 335
 336     push @result,keys %found if $result;
 337     return $iterator ? Bio::DB::SeqFeature::Store::berkeleydb::Iterator->new($self,\@result)
 338                      : map {$self->fetch($_)} @result;
 339 }
 340
 341 sub filter_by_type {
 342     my $self = shift;
 343     my ($types,$filter) = @_;
 344     my @types = ref $types eq 'ARRAY' ?  @$types : $types;
 345
 346     my $index = $self->index_db('types');
 347     my $db    = tied(%$index);
 348
 349     my @results;
 350
 351     for my $type (@types) {
 352         my ($primary_tag,$source_tag);
 353         if (ref $type && $type->isa('Bio::DB::GFF::Typename')) {
 354             $primary_tag = $type->method;
 355             $source_tag  = $type->source;
 356         } else {
 357             ($primary_tag,$source_tag) = split ':',$type,2;
 358         }
 359         $source_tag ||= '';
 360         $primary_tag  = quotemeta($primary_tag);
 361         $source_tag    = quotemeta($source_tag);
 362         my $match = length $source_tag ? "^$primary_tag:$source_tag\$" : "^$primary_tag:";
 363         my $key      = lc "$primary_tag:$source_tag";
 364         my $value;
 365
 366         # If filter is already provided, then it is usually faster to
 367         # fetch each object.
 368         if (%$filter) {
 369             for my $id (keys %$filter) {
 370                 my $obj = $self->_fetch($id) or next;
 371                 push @results,$id if $obj->type =~ /$match/i;
 372             }
 373
 374         }
 375
 376         else {
 377             my $types   = $self->typeid_db;
 378             my @typeids = map {$types->{$_}} grep {/$match/} keys %$types;
 379             for my $t (@typeids) {
 380                 my $k = $t;
 381                 for (my $status = $db->seq($k,$value,R_CURSOR);
 382                     $status == 0 && $k == $t;
 383                     $status = $db->seq($k,$value,R_NEXT)) {
 384                     next if %$filter && !$filter->{$value};  # don't even bother
 385                     push @results,$value;
 386                 }
 387             }
 388         }
 389     }
 390     $self->update_filter($filter,\@results);
 391 }
 392
 393 sub filter_by_type_and_location {
 394     my $self = shift;
 395     my ($seq_id,$start,$end,$strand,$range_type,$typelist,$filter) = @_;
 396     $strand ||= 0;
 397
 398     my $index    = $self->index_db('locations');
 399     my $db       = tied(%$index);
 400
 401     my $binstart = defined $start ? int $start/BINSIZE : 0;
 402     my $binend   = defined $end   ? int $end/BINSIZE   : MAX_SEQUENCES-1;
 403
 404     my %seenit;
 405     my @results;
 406
 407     $start = MININT  if !defined $start;
 408     $end   = MAXINT  if !defined $end;
 409
 410     my $seq_no = $self->seqid_id($seq_id);
 411     return unless defined $seq_no;
 412
 413     if ($range_type eq 'overlaps' or $range_type eq 'contains') {
 414         my $keystart = $seq_no * MAX_SEQUENCES + $binstart;
 415         my $keystop  = $seq_no * MAX_SEQUENCES + $binend;
 416         my $value;
 417
 418         for (my $status = $db->seq($keystart,$value,R_CURSOR);
 419              $status == 0 && $keystart <= $keystop;
 420              $status = $db->seq($keystart,$value,R_NEXT)) {
 421             my ($id,$fstart,$fend,$fstrand,$ftype) = unpack("i5",$value);
 422             next if $seenit{$id}++;
 423             next if $strand   && $fstrand != $strand;
 424             next if $typelist && !$typelist->{$ftype};
 425             if ($range_type eq 'overlaps') {
 426                 next unless $fend >= $start && $fstart <= $end;
 427             }
 428             elsif ($range_type eq 'contains') {
 429                 next unless $fstart >= $start && $fend <= $end;
 430             }
 431             next if %$filter && !$filter->{$id};  # don't bother
 432             push @results,$id;
 433         }
 434     }
 435
 436     # for contained in, we look for features originating and terminating outside the specified range
 437     # this is incredibly inefficient, but fortunately the query is rare (?)
 438     elsif ($range_type eq 'contained_in') {
 439         my $keystart = $seq_no * MAX_SEQUENCES;
 440         my $keystop  = $seq_no * MAX_SEQUENCES + $binstart;
 441         my $value;
 442
 443         # do the left part of the range
 444         for (my $status = $db->seq($keystart,$value,R_CURSOR);
 445              $status == 0 && $keystart <= $keystop;
 446              $status = $db->seq($keystart,$value,R_NEXT)) {
 447             my ($id,$fstart,$fend,$fstrand,$ftype) = unpack("i5",$value);
 448             next if $seenit{$id}++;
 449             next if $strand && $fstrand != $strand;
 450             next if $typelist && !$typelist->{$ftype};
 451             next unless $fstart <= $start && $fend >= $end;
 452             next if %$filter && !$filter->{$id};  # don't bother
 453             push @results,$id;
 454         }
 455
 456         # do the right part of the range
 457         $keystart = $seq_no*MAX_SEQUENCES+$binend;
 458         for (my $status = $db->seq($keystart,$value,R_CURSOR);
 459              $status == 0;
 460              $status = $db->seq($keystart,$value,R_NEXT)) {
 461             my ($id,$fstart,$fend,$fstrand,$ftype) = unpack("i5",$value);
 462             next if $seenit{$id}++;
 463             next if $strand && $fstrand != $strand;
 464             next unless $fstart <= $start && $fend >= $end;
 465             next if $typelist && !$typelist->{$ftype};
 466             next if %$filter && !$filter->{$id};  # don't bother
 467             push @results,$id;
 468         }
 469
 470     }
 471
 472     $self->update_filter($filter,\@results);
 473 }
 474
 475 sub build_summary_statistics {
 476     my $self = shift;
 477
 478     my $insert = $self->index_db('summary');
 479     %$insert   = ();
 480
 481     my $current_bin = -1;
 482     my (%residuals,$last_bin);
 483
 484     my $le = -t \*STDERR ? "\r" : "\n";
 485
 486     print STDERR "\n";
 487
 488     # iterate through all the indexed features
 489     my $sbs      = SUMMARY_BIN_SIZE;
 490
 491     # Sadly we have to do this in two steps. In the first step, we sort
 492     # features by typeid,seqid,start. In the second step, we read through
 493     # this sorted list. To avoid running out of memory, we use a db_file
 494     # temporary database
 495     my $fh   = File::Temp->new() or $self->throw("Could not create temporary file for sorting: $!");
 496     my $name = $fh->filename;
 497     my %sort;
 498     my $num_cmp_tree         = DB_File::BTREEINFO->new;
 499     $num_cmp_tree->{compare} = sub { $_[0] <=> $_[1] };
 500     $num_cmp_tree->{flags}   = R_DUP;
 501     my $s = tie %sort, 'DB_File', $name, O_CREAT|O_RDWR, 0666, $num_cmp_tree
 502         or $self->throw("Could not create Berkeley DB in temporary file '$name': $!");
 503
 504     my $index    = $self->index_db('locations');
 505     my $db       = tied(%$index);
 506     my $keystart = 0;
 507     my ($value,$count);
 508     my %seenit;
 509
 510     for (my $status = $db->seq($keystart,$value,R_CURSOR);
 511          $status == 0;
 512          $status  = $db->seq($keystart,$value,R_NEXT)) {
 513         my ($id,$start,$end,$strand,$typeid) = unpack('i5',$value);
 514         next if $seenit{$id}++;
 515
 516         print STDERR $count," features sorted$le" if ++$count % 1000 == 0;
 517         my $seqid = int($keystart / MAX_SEQUENCES);
 518         my $key   = $self->_encode_summary_key($typeid,$seqid,$start-1);
 519         $sort{$key}=$end;
 520     }
 521     print STDERR "COUNT = $count\n";
 522
 523     my ($current_type,$current_seqid,$end);
 524     my $cum_count = 0;
 525
 526     $keystart = 0;
 527     $count    = 0;
 528
 529     # the second step allows us to iterate through this
 530     for (my $status = $s->seq($keystart,$end,R_CURSOR);
 531          $status == 0;
 532          $status  = $s->seq($keystart,$end,R_NEXT)) {
 533
 534         print STDERR $count," features processed$le" if ++$count % 1000 == 0;
 535         my ($typeid,$seqid,$start) = $self->_decode_summary_key($keystart);
 536
 537         my $bin   = int($start/$sbs);
 538
 539         # because the input is sorted by start, no more features will contribute to the
 540         # current bin so we can dispose of it
 541         if ($bin != $current_bin) {
 542             if ($seqid != $current_seqid or $typeid != $current_type) {
 543                 # load all bins left over
 544                 $self->_load_bins($insert,\%residuals,\$cum_count,$current_type,$current_seqid);
 545                 %residuals = () ;
 546                 $cum_count = 0;
 547             } else {
 548                 # load all up to current one
 549                 $self->_load_bins($insert,\%residuals,\$cum_count,$current_type,$current_seqid,$current_bin);
 550             }
 551         }
 552
 553         $last_bin = $current_bin;
 554         ($current_seqid,$current_type,$current_bin) = ($seqid,$typeid,$bin);
 555
 556         # summarize across entire spanned region
 557         my $last_bin = int(($end-1)/$sbs);
 558         for (my $b=$bin;$b<=$last_bin;$b++) {
 559             $residuals{$b}++;
 560         }
 561     }
 562
 563     # handle tail case
 564     # load all bins left over
 565     $self->_load_bins($insert,\%residuals,\$cum_count,$current_type,$current_seqid);
 566
 567     undef %sort;
 568     undef $fh;
 569 }
 570
 571 sub _load_bins {
 572     my $self = shift;
 573     my ($insert,$residuals,$cum_count,$typeid,$seqid,$stop_after) = @_;
 574     for my $b (sort {$a<=>$b} keys %$residuals) {
 575         last if defined $stop_after and $b > $stop_after;
 576         $$cum_count += $residuals->{$b};
 577         my $key         = $self->_encode_summary_key($typeid,$seqid,$b);
 578         $insert->{$key} = $$cum_count;
 579         delete $residuals->{$b}; # no longer needed
 580     }
 581 }
 582
 583 sub coverage_array {
 584     my $self = shift;
 585     my ($seq_name,$start,$end,$types,$bins) =
 586         rearrange([['SEQID','SEQ_ID','REF'],'START',['STOP','END'],
 587                    ['TYPES','TYPE','PRIMARY_TAG'],'BINS'],@_);
 588
 589     $bins  ||= 1000;
 590     $start ||= 1;
 591     unless ($end) {
 592         my $segment = $self->segment($seq_name) or $self->throw("unknown seq_id $seq_name");
 593         $end        = $segment->end;
 594     }
 595
 596     my $binsize = ($end-$start+1)/$bins;
 597     my $seqid   = $self->seqid_id($seq_name) || 0;
 598
 599     return [] unless $seqid;
 600
 601     # where each bin starts
 602     my @his_bin_array = map {$start + $binsize * $_}       (0..$bins);
 603     my @sum_bin_array = map {int(($_-1)/SUMMARY_BIN_SIZE)} @his_bin_array;
 604
 605     my $interval_stats_idx = $self->index_db('summary');
 606     my $db                 = tied(%$interval_stats_idx);
 607     my $t                  = $self->_matching_types($types);
 608
 609     my (%bins,$report_tag);
 610     for my $typeid (sort keys %$t) {
 611         $report_tag ||= $typeid;
 612
 613         for (my $i=0;$i<@sum_bin_array;$i++) {
 614             my $cum_count;
 615             my $bin = $sum_bin_array[$i];
 616             my $key = $self->_encode_summary_key($typeid,$seqid,$bin);
 617             my $status = $db->seq($key,$cum_count,R_CURSOR);
 618             next unless $status == 0;
 619             push @{$bins{$typeid}},[$bin,$cum_count];
 620         }
 621     }
 622
 623     my @merged_bins;
 624     my $firstbin = int(($start-1)/$binsize);
 625     for my $type (keys %bins) {
 626         my $arry       = $bins{$type};
 627         my $last_count = $arry->[0][1]-1;
 628         my $last_bin   = -1;
 629         my $i          = 0;
 630         my $delta;
 631         for my $b (@$arry) {
 632             my ($bin,$count) = @$b;
 633             $delta              = $count - $last_count if $bin > $last_bin;
 634             $merged_bins[$i++]  = $delta;
 635             $last_count         = $count;
 636             $last_bin           = $bin;
 637         }
 638     }
 639     my $returned_type = $self->_id2type($report_tag);
 640     return wantarray ? (\@merged_bins,$returned_type) : \@merged_bins;
 641 }
 642
 643
 644 sub _encode_summary_key {
 645     my $self                 = shift;
 646     my ($typeid,$seqid,$bin) = @_;
 647     $self->throw('Cannot index chromosomes larger than '.C1*SUMMARY_BIN_SIZE/1e6.' megabases')
 648         if $bin > C1;
 649     return ($typeid-1)*C2 + ($seqid-1)*C1 + $bin;
 650 }
 651
 652 sub _decode_summary_key {
 653     my $self   = shift;
 654     my $key    = shift;
 655     my $typeid   = int($key/C2);
 656     my $residual =     $key%C2;
 657     my $seqid    = int($residual/C1);
 658     my $bin      =     $residual%C1;
 659     return ($typeid+1,$seqid+1,$bin);
 660 }
 661
 662
 663 1;
 664