Bug 21334: Add bibliographic content type definitions
[koha.git] / Koha / Edifact.pm
blob38756270b27cb9d77e952ab2187b521d69fbdb4b
1 package Koha::Edifact;
3 # Copyright 2014,2015 PTFS-Europe Ltd
5 # This file is part of Koha.
7 # Koha is free software; you can redistribute it and/or modify it
8 # under the terms of the GNU General Public License as published by
9 # the Free Software Foundation; either version 3 of the License, or
10 # (at your option) any later version.
12 # Koha is distributed in the hope that it will be useful, but
13 # WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 # GNU General Public License for more details.
17 # You should have received a copy of the GNU General Public License
18 # along with Koha; if not, see <http://www.gnu.org/licenses>.
20 use strict;
21 use warnings;
22 use File::Slurp;
23 use Carp;
24 use Encode qw( from_to );
25 use Koha::Edifact::Segment;
26 use Koha::Edifact::Message;
28 my $separator = {
29 component => q{\:},
30 data => q{\+},
31 decimal => q{.},
32 release => q{\?},
33 reserved => q{ },
34 segment => q{\'},
37 sub new {
38 my ( $class, $param_hashref ) = @_;
39 my $transmission;
40 my $self = ();
42 if ( $param_hashref->{filename} ) {
43 if ( $param_hashref->{transmission} ) {
44 carp
45 "Cannot instantiate $class : both filename and transmission passed";
46 return;
48 $transmission = read_file( $param_hashref->{filename} );
50 else {
51 $transmission = $param_hashref->{transmission};
53 $self->{transmission} = _init($transmission);
55 bless $self, $class;
56 return $self;
59 sub interchange_header {
60 my ( $self, $field ) = @_;
62 my %element = (
63 sender => 1,
64 recipient => 2,
65 datetime => 3,
66 interchange_control_reference => 4,
67 application_reference => 6,
69 if ( !exists $element{$field} ) {
70 carp "No interchange header field $field available";
71 return;
73 my $data = $self->{transmission}->[0]->elem( $element{$field} );
74 return $data;
77 sub interchange_trailer {
78 my ( $self, $field ) = @_;
79 my $trailer = $self->{transmission}->[-1];
80 if ( $field eq 'interchange_control_count' ) {
81 return $trailer->elem(0);
83 elsif ( $field eq 'interchange_control_reference' ) {
84 return $trailer->elem(1);
86 carp "Trailer field $field not recognized";
87 return;
90 sub new_data_iterator {
91 my $self = shift;
92 my $offset = 0;
93 while ( $self->{transmission}->[$offset]->tag() ne 'UNH' ) {
94 ++$offset;
95 if ( $offset == @{ $self->{transmission} } ) {
96 carp 'Cannot find message start';
97 return;
100 $self->{data_iterator} = $offset;
101 return 1;
104 sub next_segment {
105 my $self = shift;
106 if ( defined $self->{data_iterator} ) {
107 my $seg = $self->{transmission}->[ $self->{data_iterator} ];
108 if ( $seg->tag eq 'UNH' ) {
110 $self->{msg_type} = $seg->elem( 1, 0 );
112 elsif ( $seg->tag eq 'LIN' ) {
113 $self->{msg_type} = 'detail';
116 if ( $seg->tag ne 'UNZ' ) {
117 $self->{data_iterator}++;
119 else {
120 $self->{data_iterator} = undef;
122 return $seg;
124 return;
127 # for debugging return whole transmission
128 sub get_transmission {
129 my $self = shift;
131 return $self->{transmission};
134 sub message_type {
135 my $self = shift;
136 return $self->{msg_type};
139 sub _init {
140 my $msg = shift;
141 if ( !$msg ) {
142 return;
144 if ( $msg =~ s/^UNA(.{6})// ) {
145 if ( service_string_advice($1) ) {
146 return segmentize($msg);
149 return;
151 else {
152 my $s = substr $msg, 10;
153 croak "File does not start with a Service string advice :$s";
157 # return an array of Message objects
158 sub message_array {
159 my $self = shift;
161 # return an array of array_refs 1 ref to a message
162 my $msg_arr = [];
163 my $msg = [];
164 my $in_msg = 0;
165 foreach my $seg ( @{ $self->{transmission} } ) {
166 if ( $seg->tag eq 'UNH' ) {
167 $in_msg = 1;
168 push @{$msg}, $seg;
170 elsif ( $seg->tag eq 'UNT' ) {
171 $in_msg = 0;
172 if ( @{$msg} ) {
173 push @{$msg_arr}, Koha::Edifact::Message->new($msg);
174 $msg = [];
177 elsif ($in_msg) {
178 push @{$msg}, $seg;
181 return $msg_arr;
185 # internal parsing routines used in _init
187 sub service_string_advice {
188 my $ssa = shift;
190 # At present this just validates that the ssa
191 # is standard Edifact
192 # TBD reset the seps if non standard
193 if ( $ssa ne q{:+.? '} ) {
194 carp " Non standard Service String Advice [$ssa]";
195 return;
198 # else use default separators
199 return 1;
202 sub segmentize {
203 my $raw = shift;
205 # In practice edifact uses latin-1 but check
206 # Transport now converts to utf-8 on ingest
207 # Do not convert here
208 #my $char_set = 'iso-8859-1';
209 #if ( $raw =~ m/^UNB[+]UNO(.)/ ) {
210 # $char_set = msgcharset($1);
212 #from_to( $raw, $char_set, 'utf8' );
214 my $re = qr{
215 (?> # dont backtrack into this group
216 [?]. # either the escape character
217 # followed by any other character
218 | # or
219 [^'?] # a character that is neither escape
220 # nor split
223 my @segmented;
224 while ( $raw =~ /($re)/g ) {
225 push @segmented, Koha::Edifact::Segment->new( { seg_string => $1 } );
227 return \@segmented;
230 sub msgcharset {
231 my $code = shift;
232 if ( $code =~ m/^[^ABCDEF]$/ ) {
233 $code = 'default';
235 my %encoding_map = (
236 A => 'ascii',
237 B => 'ascii',
238 C => 'iso-8859-1',
239 D => 'iso-8859-1',
240 E => 'iso-8859-1',
241 F => 'iso-8859-1',
242 default => 'iso-8859-1',
244 return $encoding_map{$code};
248 __END__
250 =head1 NAME
252 Edifact - Edifact message handler
254 =head1 DESCRIPTION
256 Koha module for parsing Edifact messages
258 =head1 SUBROUTINES
260 =head2 new
262 my $e = Koha::Edifact->new( { filename => 'myfilename' } );
264 my $e = Koha::Edifact->new( { transmission => $msg_variable } );
266 instantiate the Edifact parser, requires either to be passed an in-memory
267 edifact message as transmission or a filename which it will read on creation
269 =head2 interchange_header
271 will return the data in the header field designated by the parameter
272 specified. Valid parameters are: 'sender', 'recipient', 'datetime',
273 'interchange_control_reference', and 'application_reference'
275 =head2 interchange_trailer
277 called either with the string 'interchange_control_count' or
278 'interchange_control_reference' will return the corresponding field from
279 the interchange trailer
281 =head2 new_data_iterator
283 Sets the object's data_iterator to point to the UNH segment
285 =head2 next_segment
287 Returns the next segment pointed to by the data_iterator. Increments the
288 data_iterator member or destroys it if segment UNZ has been reached
290 =head2 get_transmission
292 This method is useful in debugg:ing. Call on an Edifact object
293 it returns the object's transmission member
295 =head2 message_type
297 return the object's message type
299 =head2 message_array
301 return an array of Message objects contained in the Edifact transmission
303 =head1 Internal Methods
305 =head2 _init
307 Called by the constructor to do the parsing of the transmission
309 =head2 service_string_advice
311 Examines the Service String Advice returns 1 if the default separartors are in use
312 undef otherwise
314 =head2 segmentize
316 takes a raw Edifact message and returns a reference to an array of
317 its segments
319 =head2 msgcharset
321 Return the character set the message was encoded in. The default is iso-8859-1
323 We preserve this info but will have converted to utf-8 on ingest
325 =head1 AUTHOR
327 Colin Campbell <colin.campbell@ptfs-europe.com>
330 =head1 COPYRIGHT
332 Copyright 2014,2015, PTFS-Europe Ltd
333 This program is free software, You may redistribute it under
334 under the terms of the GNU General Public License
337 =cut