3 # This script can be used as a base to parse unreliable CSV streams
4 # Modify to your own needs
6 # (m)'08 [23 Apr 2008] Copyright H.M.Brand 2008-2013
13 my $csv = Text::CSV_XS->new ({ binary => 1,
17 my $csa = Text::CSV_XS->new ({ binary => 1,
18 allow_loose_quotes => 1,
23 my $file = @ARGV ? shift : "test.csv";
24 open my $fh, "<", $file or die "$file: $!\n";
26 my %err_eol = map { $_ => 1 } 2010, 2027, 2031, 2032;
28 print STDERR "Reading $file with Text::CSV_XS $Text::CSV_XS::VERSION\n";
30 my $row = $csv->getline ($fh);
32 unless ($row) { # Parsing failed
34 # Could be end of file
37 # Diagnose and show what was wrong
38 my @diag = $csv->error_diag;
39 print STDERR "$file line $./$diag[2] - $diag[0] - $diag[1]\n";
40 my $ep = $diag[2] - 1; # diag[2] is 1-based
41 my $ein = $csv->error_input; # The line scanned so far
43 substr $err, $ep + 1, 0, "*"; # Bad character marked between **
44 substr $err, $ep, 0, "*";
45 ($err = substr $err, $ep - 5, 12) =~ s/ +$//;
46 print STDERR " |$err|\n";
48 REPARSE: { # Now retry with allowed options
49 if ($csa->parse ($ein)) {
50 print STDERR "Accepted in allow mode ...\n";
51 $row = [ $csa->fields ];
54 my @diag = $csa->error_diag;
55 if (exists $err_eol{$diag[0]}) { # \r or \n inside field
56 print STDERR " Extending line with next chunk\n";
61 print STDERR " Also could not parse it in allow mode\n";
62 print STDERR " $./$diag[2] - $diag[0] - $diag[1]\n";
63 print STDERR " Line skipped\n";
69 # Data was fine, print data properly quoted
70 $csv->print (*STDOUT, $row);