From deb9aa80747003db75d8efc558c7a3c66a406e36 Mon Sep 17 00:00:00 2001 From: Evan Carroll Date: Tue, 17 Jun 2008 15:57:58 -0500 Subject: [PATCH] added the ability to supply a first row shorter than the set of data --- lib/DataExtract/FixedWidth.pm | 23 +++++++++++------- t/11-Heuristic-woHeader-wCols-wUndefHeaderCol.t | 31 +++++++++++++++++++++++++ 2 files changed, 45 insertions(+), 9 deletions(-) create mode 100644 t/11-Heuristic-woHeader-wCols-wUndefHeaderCol.t diff --git a/lib/DataExtract/FixedWidth.pm b/lib/DataExtract/FixedWidth.pm index 76a2da5..e06d3a7 100644 --- a/lib/DataExtract/FixedWidth.pm +++ b/lib/DataExtract/FixedWidth.pm @@ -88,16 +88,21 @@ has 'skip_header_data' => ( sub _heuristic_trigger { my ( $self, $data ) = @_; - + chomp @$data; + my $maxLength = 0; + for ( @$data ) { + $maxLength = length if length > $maxLength + } + $self->header_row( $data->[0] ) unless $self->has_header_row ; - + { my @unpack; - my $mask = ' ' x length $data->[0]; + my $mask = ' ' x $maxLength; $mask |= $_ for @$data; push @unpack, length($1) @@ -149,7 +154,7 @@ sub _build_colchar_map { my $self = shift; my $ccm = {}; - ## If we can generate from heurisitic data and don't have a header_row + ## If we can generate from heurisitic data and don't have a header_row if ( $self->has_header_row && !defined $self->header_row @@ -165,7 +170,7 @@ sub _build_colchar_map { ## Generate from header_row else { croak 'Can not render the map of columns to start-chars without the header_row' - unless defined $self->has_header_row + unless defined $self->has_header_row ; foreach my $col ( $self->cols ) { @@ -329,13 +334,13 @@ DataExtract::FixedWidth - The one stop shop for parsing static column width text ## We assume the columns have no spaces in the header. my $de = DataExtract::FixedWidth->new({ header_row => $header_row }); - + ## We explicitly tell what column names to pick out of the header. my $de = DataExtract::FixedWidth->new({ header_row => $header_row cols => [qw/COL1NAME COL2NAME COL3NAME/, 'COL WITH SPACE IN NAME'] }); - + ## We supply data to heuristic and assume ## * first row is the header (to avoid this assumption ## set the header_row to undef. ) @@ -368,9 +373,9 @@ DataExtract::FixedWidth - The one stop shop for parsing static column width text unpack_string => $template , header_row => $header_row }); - + $de->parse( $data_row ); - + $de->parse_hash( $data_row ); =head1 DESCRIPTION diff --git a/t/11-Heuristic-woHeader-wCols-wUndefHeaderCol.t b/t/11-Heuristic-woHeader-wCols-wUndefHeaderCol.t new file mode 100644 index 0000000..8e756ef --- /dev/null +++ b/t/11-Heuristic-woHeader-wCols-wUndefHeaderCol.t @@ -0,0 +1,31 @@ +#!/usr/bin/env perl +use strict; +use warnings; +use feature ':5.10'; + +## +## In version .06 this test failed +## I got the longer a19a9a14a13a5A* instead of a19a9a14a13A* +## This was because the length for the euristic was set statically to the +## first row rather than the longest row +## + +use DataExtract::FixedWidth; +use IO::File; + +use Test::More tests => 1; + +use File::Spec; +my $file = File::Spec->catfile( 't', 'data', 'larochenew.TXT' ); +my $fh = IO::File->new( $file ); +my @lines = grep /\w/, $fh->getlines; + +my $defw = DataExtract::FixedWidth->new({ + heuristic => \@lines + , cols => [ qw/vin stock color price miles/ ] + , header_row => undef +}); + +is ( $defw->unpack_string, 'a19a9a14a13A*', 'Heuristic not affected by being short' ); + +1; -- 2.11.4.GIT