Add dependencies that not require a module
[Text-CSV_XS.git] / examples / parser-xs.pl
1 #!/pro/bin/perl
2
3 # This script can be used as a base to parse unreliable CSV streams
4 # Modify to your own needs
5 #
6 # (m)'08 [23 Apr 2008] Copyright H.M.Brand 2008-2014
7
8 use strict;
9 use warnings;
10
11 use Text::CSV_XS;
12
13 my $csv = Text::CSV_XS->new ({ binary => 1,
14 blank_is_undef => 1,
15 eol => $/,
16 });
17 my $csa = Text::CSV_XS->new ({ binary => 1,
18 allow_loose_quotes => 1,
19 blank_is_undef => 1,
20 escape_char => undef,
21 });
22
23 my $file = @ARGV ? shift : "test.csv";
24 open my $fh, "<", $file or die "$file: $!\n";
25
26 my %err_eol = map { $_ => 1 } 2010, 2027, 2031, 2032;
27
28 print STDERR "Reading $file with Text::CSV_XS $Text::CSV_XS::VERSION\n";
29 while (1) {
30 my $row = $csv->getline ($fh);
31
32 unless ($row) { # Parsing failed
33
34 # Could be end of file
35 $csv->eof and last;
36
37 # Diagnose and show what was wrong
38 my @diag = $csv->error_diag;
39 print STDERR "$file line $./$diag[2] - $diag[0] - $diag[1]\n";
40 my $ep = $diag[2] - 1; # diag[2] is 1-based
41 my $ein = $csv->error_input; # The line scanned so far
42 my $err = $ein . " ";
43 substr $err, $ep + 1, 0, "*"; # Bad character marked between **
44 substr $err, $ep, 0, "*";
45 ($err = substr $err, $ep - 5, 12) =~ s/ +$//;
46 print STDERR " |$err|\n";
47
48 REPARSE: { # Now retry with allowed options
49 if ($csa->parse ($ein)) {
50 print STDERR "Accepted in allow mode ...\n";
51 $row = [ $csa->fields ];
52 }
53 else { # Still fails
54 my @diag = $csa->error_diag;
55 if (exists $err_eol{$diag[0]}) { # \r or \n inside field
56 print STDERR " Extending line with next chunk\n";
57 $ein .= scalar <$fh>;
58 goto REPARSE;
59 }
60
61 print STDERR " Also could not parse it in allow mode\n";
62 print STDERR " $./$diag[2] - $diag[0] - $diag[1]\n";
63 print STDERR " Line skipped\n";
64 next;
65 }
66 }
67 }
68
69 # Data was fine, print data properly quoted
70 $csv->print (*STDOUT, $row);
71 }