Bug 7119 - tidy t/00-load.t
[koha.git] / C4 / Charset.pm
blobc294b361331489a29eb771456d415fea95b8f7a0
1 package C4::Charset;
3 # Copyright (C) 2008 LibLime
5 # This file is part of Koha.
7 # Koha is free software; you can redistribute it and/or modify it under the
8 # terms of the GNU General Public License as published by the Free Software
9 # Foundation; either version 2 of the License, or (at your option) any later
10 # version.
12 # Koha is distributed in the hope that it will be useful, but WITHOUT ANY
13 # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
14 # A PARTICULAR PURPOSE. See the GNU General Public License for more details.
16 # You should have received a copy of the GNU General Public License along
17 # with Koha; if not, write to the Free Software Foundation, Inc.,
18 # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
20 use strict;
21 use warnings;
23 use MARC::Charset qw/marc8_to_utf8/;
24 use Text::Iconv;
25 use C4::Debug;
26 use Unicode::Normalize;
28 use vars qw($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
30 BEGIN {
31 # set the version for version checking
32 $VERSION = 3.07.00.049;
33 require Exporter;
34 @ISA = qw(Exporter);
35 @EXPORT = qw(
36 NormalizeString
37 IsStringUTF8ish
38 MarcToUTF8Record
39 SetUTF8Flag
40 SetMarcUnicodeFlag
41 StripNonXmlChars
42 nsb_clean
46 =head1 NAME
48 C4::Charset - utilities for handling character set conversions.
50 =head1 SYNOPSIS
52 use C4::Charset;
54 =head1 DESCRIPTION
56 This module contains routines for dealing with character set
57 conversions, particularly for MARC records.
59 A variety of character encodings are in use by various MARC
60 standards, and even more character encodings are used by
61 non-standard MARC records. The various MARC formats generally
62 do not do a good job of advertising a given record's character
63 encoding, and even when a record does advertise its encoding,
64 e.g., via the Leader/09, experience has shown that one cannot
65 trust it.
67 Ultimately, all MARC records are stored in Koha in UTF-8 and
68 must be converted from whatever the source character encoding is.
69 The goal of this module is to ensure that these conversions
70 take place accurately. When a character conversion cannot take
71 place, or at least not accurately, the module was provide
72 enough information to allow user-facing code to inform the user
73 on how to deal with the situation.
75 =cut
77 =head1 FUNCTIONS
79 =head2 IsStringUTF8ish
81 my $is_utf8 = IsStringUTF8ish($str);
83 Determines if C<$str> is valid UTF-8. This can mean
84 one of two things:
86 =over
88 =item *
90 The Perl UTF-8 flag is set and the string contains valid UTF-8.
92 =item *
94 The Perl UTF-8 flag is B<not> set, but the octets contain
95 valid UTF-8.
97 =back
99 The function is named C<IsStringUTF8ish> instead of C<IsStringUTF8>
100 because in one could be presented with a MARC blob that is
101 not actually in UTF-8 but whose sequence of octets appears to be
102 valid UTF-8. The rest of the MARC character conversion functions
103 will assume that this situation occur does not very often.
105 =cut
107 sub IsStringUTF8ish {
108 my $str = shift;
110 return 1 if utf8::is_utf8($str);
111 return utf8::decode($str);
114 =head2 SetUTF8Flag
116 my $marc_record = SetUTF8Flag($marc_record, $nfd);
118 This function sets the PERL UTF8 flag for data.
119 It is required when using new_from_usmarc
120 since MARC::File::USMARC does not handle PERL UTF8 setting.
121 When editing unicode marc records fields and subfields, you
122 would end up in double encoding without using this function.
124 If $nfd is set, string normalization will use NFD instead of NFC
126 FIXME
127 In my opinion, this function belongs to MARC::Record and not
128 to this package.
129 But since it handles charset, and MARC::Record, it finds its way in that package
131 =cut
133 sub SetUTF8Flag{
134 my ($record, $nfd)=@_;
135 return unless ($record && $record->fields());
136 foreach my $field ($record->fields()){
137 if ($field->tag()>=10){
138 my @subfields;
139 foreach my $subfield ($field->subfields()){
140 push @subfields,($$subfield[0],NormalizeString($$subfield[1],$nfd));
142 my $newfield=MARC::Field->new(
143 $field->tag(),
144 $field->indicator(1),
145 $field->indicator(2),
146 @subfields
148 $field->replace_with($newfield);
153 =head2 NormalizeString
155 my $normalized_string=NormalizeString($string,$nfd,$transform);
157 Given a string
158 nfd : If you want to set NFD and not NFC
159 transform : If you expect all the signs to be removed
161 Sets the PERL UTF8 Flag on your initial data if need be
162 and applies cleaning if required
164 Returns a utf8 NFC normalized string
166 Sample code :
167 my $string=NormalizeString ("l'ornithoptère");
168 #results into ornithoptère in NFC form and sets UTF8 Flag
170 =cut
173 sub NormalizeString{
174 my ($string,$nfd,$transform)=@_;
175 utf8::decode($string) unless (utf8::is_utf8($string));
176 if ($nfd){
177 $string= NFD($string);
179 else {
180 $string=NFC($string);
182 if ($transform){
183 $string=~s/\<|\>|\^|\;|\.|\?|,|\-|\(|\)|\[|\]|\{|\}|\$|\%|\!|\*|\:|\\|\/|\&|\"|\'/ /g;
184 #removing one letter words "d'" "l'" was changed into "d " "l "
185 $string=~s/\b\S\b//g;
186 $string=~s/\s+$//g;
188 return $string;
191 =head2 MarcToUTF8Record
193 ($marc_record, $converted_from, $errors_arrayref) = MarcToUTF8Record($marc_blob,
194 $marc_flavour, [, $source_encoding]);
196 Given a MARC blob or a C<MARC::Record>, the MARC flavour, and an
197 optional source encoding, return a C<MARC::Record> that is
198 converted to UTF-8.
200 The returned C<$marc_record> is guaranteed to be in valid UTF-8, but
201 is not guaranteed to have been converted correctly. Specifically,
202 if C<$converted_from> is 'failed', the MARC record returned failed
203 character conversion and had each of its non-ASCII octets changed
204 to the Unicode replacement character.
206 If the source encoding was not specified, this routine will
207 try to guess it; the character encoding used for a successful
208 conversion is returned in C<$converted_from>.
210 =cut
212 sub MarcToUTF8Record {
213 my $marc = shift;
214 my $marc_flavour = shift;
215 my $source_encoding = shift;
216 my $marc_record;
217 my $marc_blob_is_utf8 = 0;
218 if (ref($marc) eq 'MARC::Record') {
219 my $marc_blob = $marc->as_usmarc();
220 $marc_blob_is_utf8 = IsStringUTF8ish($marc_blob);
221 $marc_record = $marc;
222 } else {
223 # dealing with a MARC blob
225 # remove any ersatz whitespace from the beginning and
226 # end of the MARC blob -- these can creep into MARC
227 # files produced by several sources -- caller really
228 # should be doing this, however
229 $marc =~ s/^\s+//;
230 $marc =~ s/\s+$//;
231 $marc_blob_is_utf8 = IsStringUTF8ish($marc);
232 eval {
233 $marc_record = MARC::Record->new_from_usmarc($marc);
235 if ($@) {
236 # if we fail the first time, one likely problem
237 # is that we have a MARC21 record that says that it's
238 # UTF-8 (Leader/09 = 'a') but contains non-UTF-8 characters.
239 # We'll try parsing it again.
240 substr($marc, 9, 1) = ' ';
241 eval {
242 $marc_record = MARC::Record->new_from_usmarc($marc);
244 if ($@) {
245 # it's hopeless; return an empty MARC::Record
246 return MARC::Record->new(), 'failed', ['could not parse MARC blob'];
251 # If we do not know the source encoding, try some guesses
252 # as follows:
253 # 1. Record is UTF-8 already.
254 # 2. If MARC flavor is MARC21 or NORMARC, then
255 # a. record is MARC-8
256 # b. record is ISO-8859-1
257 # 3. If MARC flavor is UNIMARC, then
258 if (not defined $source_encoding) {
259 if ($marc_blob_is_utf8) {
260 # note that for MARC21/NORMARC we are not bothering to check
261 # if the Leader/09 is set to 'a' or not -- because
262 # of problems with various ILSs (including Koha in the
263 # past, alas), this just is not trustworthy.
264 SetMarcUnicodeFlag($marc_record, $marc_flavour);
265 return $marc_record, 'UTF-8', [];
266 } else {
267 if ($marc_flavour eq 'MARC21' || $marc_flavour eq 'NORMARC') {
268 return _default_marc21_charconv_to_utf8($marc_record, $marc_flavour);
269 } elsif ($marc_flavour =~/UNIMARC/) {
270 return _default_unimarc_charconv_to_utf8($marc_record, $marc_flavour);
271 } else {
272 return _default_marc21_charconv_to_utf8($marc_record, $marc_flavour);
275 } else {
276 # caller knows the character encoding
277 my $original_marc_record = $marc_record->clone();
278 my @errors;
279 if ($source_encoding =~ /utf-?8/i) {
280 if ($marc_blob_is_utf8) {
281 SetMarcUnicodeFlag($marc_record, $marc_flavour);
282 return $marc_record, 'UTF-8', [];
283 } else {
284 push @errors, 'specified UTF-8 => UTF-8 conversion, but record is not in UTF-8';
286 } elsif ($source_encoding =~ /marc-?8/i) {
287 @errors = _marc_marc8_to_utf8($marc_record, $marc_flavour);
288 } elsif ($source_encoding =~ /5426/) {
289 @errors = _marc_iso5426_to_utf8($marc_record, $marc_flavour);
290 } else {
291 # assume any other character encoding is for Text::Iconv
292 @errors = _marc_to_utf8_via_text_iconv($marc_record, $marc_flavour, $source_encoding);
295 if (@errors) {
296 _marc_to_utf8_replacement_char($original_marc_record, $marc_flavour);
297 return $original_marc_record, 'failed', \@errors;
298 } else {
299 return $marc_record, $source_encoding, [];
305 =head2 SetMarcUnicodeFlag
307 SetMarcUnicodeFlag($marc_record, $marc_flavour);
309 Set both the internal MARC::Record encoding flag
310 and the appropriate Leader/09 (MARC21) or
311 100/26-29 (UNIMARC) to indicate that the record
312 is in UTF-8. Note that this does B<not> do
313 any actual character conversion.
315 =cut
317 sub SetMarcUnicodeFlag {
318 my $marc_record = shift;
319 my $marc_flavour = shift; # || C4::Context->preference("marcflavour");
321 $marc_record->encoding('UTF-8');
322 if ($marc_flavour eq 'MARC21' || $marc_flavour eq 'NORMARC') {
323 my $leader = $marc_record->leader();
324 substr($leader, 9, 1) = 'a';
325 $marc_record->leader($leader);
326 } elsif ($marc_flavour =~/UNIMARC/) {
327 my $string;
328 my ($subflength,$encodingposition)=($marc_flavour=~/AUTH/?(21,9):(36,22));
329 $string=$marc_record->subfield( 100, "a" );
330 if (defined $string && length($string)==$subflength) {
331 $string = substr $string, 0,$subflength if (length($string)>$subflength);
333 else {
334 $string = POSIX::strftime( "%Y%m%d", localtime );
335 $string =~ s/\-//g;
336 $string = sprintf( "%-*s", $subflength, $string );
338 substr( $string, $encodingposition, 8, "frey50 " );
339 if ( $marc_record->subfield( 100, "a" ) ) {
340 $marc_record->field('100')->update(a=>$string);
342 else {
343 $marc_record->insert_grouped_field(
344 MARC::Field->new( 100, '', '', "a" => $string ) );
346 $debug && warn "encodage: ", substr( $marc_record->subfield(100, 'a'), $encodingposition, 8 );
347 } else {
348 warn "Unrecognized marcflavour: $marc_flavour";
352 =head2 StripNonXmlChars
354 my $new_str = StripNonXmlChars($old_str);
356 Given a string, return a copy with the
357 characters that are illegal in XML
358 removed.
360 This function exists to work around a problem
361 that can occur with badly-encoded MARC records.
362 Specifically, if a UTF-8 MARC record also
363 has excape (\x1b) characters, MARC::File::XML
364 will let the escape characters pass through
365 when as_xml() or as_xml_record() is called. The
366 problem is that the escape character is not
367 legal in well-formed XML documents, so when
368 MARC::File::XML attempts to parse such a record,
369 the XML parser will fail.
371 Stripping such characters will allow a
372 MARC::Record->new_from_xml()
373 to work, at the possible risk of some data loss.
375 =cut
377 sub StripNonXmlChars {
378 my $str = shift;
379 if (!defined($str) || $str eq ""){
380 return "";
382 $str =~ s/[^\x09\x0A\x0D\x{0020}-\x{D7FF}\x{E000}-\x{FFFD}\x{10000}-\x{10FFFF}]//g;
383 return $str;
388 =head2 nsb_clean
390 =over 4
392 nsb_clean($string);
394 =back
396 Removes Non Sorting Block characters
398 =cut
399 sub nsb_clean {
400 my $NSB = '\x88' ; # NSB : begin Non Sorting Block
401 my $NSE = '\x89' ; # NSE : Non Sorting Block end
402 my $NSB2 = '\x98' ; # NSB : begin Non Sorting Block
403 my $NSE2 = '\x9C' ; # NSE : Non Sorting Block end
404 my $C2 = '\xC2' ; # What is this char ? It is sometimes left by the regexp after removing NSB / NSE
406 # handles non sorting blocks
407 my ($string) = @_ ;
408 $_ = $string ;
409 s/$NSB//g ;
410 s/$NSE//g ;
411 s/$NSB2//g ;
412 s/$NSE2//g ;
413 s/$C2//g ;
414 $string = $_ ;
416 return($string) ;
420 =head1 INTERNAL FUNCTIONS
422 =head2 _default_marc21_charconv_to_utf8
424 my ($new_marc_record, $guessed_charset) = _default_marc21_charconv_to_utf8($marc_record);
426 Converts a C<MARC::Record> of unknown character set to UTF-8,
427 first by trying a MARC-8 to UTF-8 conversion, then ISO-8859-1
428 to UTF-8, then a default conversion that replaces each non-ASCII
429 character with the replacement character.
431 The C<$guessed_charset> return value contains the character set
432 that resulted in a conversion to valid UTF-8; note that
433 if the MARC-8 and ISO-8859-1 conversions failed, the value of
434 this is 'failed'.
436 =cut
438 sub _default_marc21_charconv_to_utf8 {
439 my $marc_record = shift;
440 my $marc_flavour = shift;
442 my $trial_marc8 = $marc_record->clone();
443 my @all_errors = ();
444 my @errors = _marc_marc8_to_utf8($trial_marc8, $marc_flavour);
445 unless (@errors) {
446 return $trial_marc8, 'MARC-8', [];
448 push @all_errors, @errors;
450 my $trial_8859_1 = $marc_record->clone();
451 @errors = _marc_to_utf8_via_text_iconv($trial_8859_1, $marc_flavour, 'iso-8859-1');
452 unless (@errors) {
453 return $trial_8859_1, 'iso-8859-1', []; # note -- we could return \@all_errors
454 # instead if we wanted to report details
455 # of the failed attempt at MARC-8 => UTF-8
457 push @all_errors, @errors;
459 my $default_converted = $marc_record->clone();
460 _marc_to_utf8_replacement_char($default_converted, $marc_flavour);
461 return $default_converted, 'failed', \@all_errors;
464 =head2 _default_unimarc_charconv_to_utf8
466 my ($new_marc_record, $guessed_charset) = _default_unimarc_charconv_to_utf8($marc_record);
468 Converts a C<MARC::Record> of unknown character set to UTF-8,
469 first by trying a ISO-5426 to UTF-8 conversion, then ISO-8859-1
470 to UTF-8, then a default conversion that replaces each non-ASCII
471 character with the replacement character.
473 The C<$guessed_charset> return value contains the character set
474 that resulted in a conversion to valid UTF-8; note that
475 if the MARC-8 and ISO-8859-1 conversions failed, the value of
476 this is 'failed'.
478 =cut
480 sub _default_unimarc_charconv_to_utf8 {
481 my $marc_record = shift;
482 my $marc_flavour = shift;
484 my $trial_marc8 = $marc_record->clone();
485 my @all_errors = ();
486 my @errors = _marc_iso5426_to_utf8($trial_marc8, $marc_flavour);
487 unless (@errors) {
488 return $trial_marc8, 'iso-5426';
490 push @all_errors, @errors;
492 my $trial_8859_1 = $marc_record->clone();
493 @errors = _marc_to_utf8_via_text_iconv($trial_8859_1, $marc_flavour, 'iso-8859-1');
494 unless (@errors) {
495 return $trial_8859_1, 'iso-8859-1';
497 push @all_errors, @errors;
499 my $default_converted = $marc_record->clone();
500 _marc_to_utf8_replacement_char($default_converted, $marc_flavour);
501 return $default_converted, 'failed', \@all_errors;
504 =head2 _marc_marc8_to_utf8
506 my @errors = _marc_marc8_to_utf8($marc_record, $marc_flavour, $source_encoding);
508 Convert a C<MARC::Record> to UTF-8 in-place from MARC-8.
509 If the conversion fails for some reason, an
510 appropriate messages will be placed in the returned
511 C<@errors> array.
513 =cut
515 sub _marc_marc8_to_utf8 {
516 my $marc_record = shift;
517 my $marc_flavour = shift;
519 my $prev_ignore = MARC::Charset->ignore_errors();
520 MARC::Charset->ignore_errors(1);
522 # trap warnings raised by MARC::Charset
523 my @errors = ();
524 local $SIG{__WARN__} = sub {
525 my $msg = $_[0];
526 if ($msg =~ /MARC.Charset/) {
527 # FIXME - purpose of this regexp is to strip out the
528 # line reference to MARC/Charset.pm, but as it
529 # exists probably won't work quite on Windows --
530 # some sort of minimal-bunch back-tracking RE
531 # would be helpful here
532 $msg =~ s/at [\/].*?.MARC.Charset\.pm line \d+\.\n$//;
533 push @errors, $msg;
534 } else {
535 # if warning doesn't come from MARC::Charset, just
536 # pass it on
537 warn $msg;
541 foreach my $field ($marc_record->fields()) {
542 if ($field->is_control_field()) {
543 ; # do nothing -- control fields should not contain non-ASCII characters
544 } else {
545 my @converted_subfields;
546 foreach my $subfield ($field->subfields()) {
547 my $utf8sf = MARC::Charset::marc8_to_utf8($subfield->[1]);
548 unless (IsStringUTF8ish($utf8sf)) {
549 # Because of a bug in MARC::Charset 0.98, if the string
550 # has (a) one or more diacritics that (b) are only in character positions
551 # 128 to 255 inclusive, the resulting converted string is not in
552 # UTF-8, but the legacy 8-bit encoding (e.g., ISO-8859-1). If that
553 # occurs, upgrade the string in place. Moral of the story seems to be
554 # that pack("U", ...) is better than chr(...) if you need to guarantee
555 # that the resulting string is UTF-8.
556 utf8::upgrade($utf8sf);
558 push @converted_subfields, $subfield->[0], $utf8sf;
561 $field->replace_with(MARC::Field->new(
562 $field->tag(), $field->indicator(1), $field->indicator(2),
563 @converted_subfields)
568 MARC::Charset->ignore_errors($prev_ignore);
570 SetMarcUnicodeFlag($marc_record, $marc_flavour);
572 return @errors;
575 =head2 _marc_iso5426_to_utf8
577 my @errors = _marc_iso5426_to_utf8($marc_record, $marc_flavour, $source_encoding);
579 Convert a C<MARC::Record> to UTF-8 in-place from ISO-5426.
580 If the conversion fails for some reason, an
581 appropriate messages will be placed in the returned
582 C<@errors> array.
584 FIXME - is ISO-5426 equivalent enough to MARC-8
585 that C<MARC::Charset> can be used instead?
587 =cut
589 sub _marc_iso5426_to_utf8 {
590 my $marc_record = shift;
591 my $marc_flavour = shift;
593 my @errors = ();
595 foreach my $field ($marc_record->fields()) {
596 if ($field->is_control_field()) {
597 ; # do nothing -- control fields should not contain non-ASCII characters
598 } else {
599 my @converted_subfields;
600 foreach my $subfield ($field->subfields()) {
601 my $utf8sf = char_decode5426($subfield->[1]);
602 push @converted_subfields, $subfield->[0], $utf8sf;
605 $field->replace_with(MARC::Field->new(
606 $field->tag(), $field->indicator(1), $field->indicator(2),
607 @converted_subfields)
612 SetMarcUnicodeFlag($marc_record, $marc_flavour);
614 return @errors;
617 =head2 _marc_to_utf8_via_text_iconv
619 my @errors = _marc_to_utf8_via_text_iconv($marc_record, $marc_flavour, $source_encoding);
621 Convert a C<MARC::Record> to UTF-8 in-place using the
622 C<Text::Iconv> CPAN module. Any source encoding accepted
623 by the user's iconv installation should work. If
624 the source encoding is not recognized on the user's
625 server or the conversion fails for some reason,
626 appropriate messages will be placed in the returned
627 C<@errors> array.
629 =cut
631 sub _marc_to_utf8_via_text_iconv {
632 my $marc_record = shift;
633 my $marc_flavour = shift;
634 my $source_encoding = shift;
636 my @errors = ();
637 my $decoder;
638 eval { $decoder = Text::Iconv->new($source_encoding, 'utf8'); };
639 if ($@) {
640 push @errors, "Could not initialze $source_encoding => utf8 converter: $@";
641 return @errors;
644 my $prev_raise_error = Text::Iconv->raise_error();
645 Text::Iconv->raise_error(1);
647 foreach my $field ($marc_record->fields()) {
648 if ($field->is_control_field()) {
649 ; # do nothing -- control fields should not contain non-ASCII characters
650 } else {
651 my @converted_subfields;
652 foreach my $subfield ($field->subfields()) {
653 my $converted_value;
654 my $conversion_ok = 1;
655 eval { $converted_value = $decoder->convert($subfield->[1]); };
656 if ($@) {
657 $conversion_ok = 0;
658 push @errors, $@;
659 } elsif (not defined $converted_value) {
660 $conversion_ok = 0;
661 push @errors, "Text::Iconv conversion failed - retval is " . $decoder->retval();
664 if ($conversion_ok) {
665 push @converted_subfields, $subfield->[0], $converted_value;
666 } else {
667 $converted_value = $subfield->[1];
668 $converted_value =~ s/[\200-\377]/\xef\xbf\xbd/g;
669 push @converted_subfields, $subfield->[0], $converted_value;
673 $field->replace_with(MARC::Field->new(
674 $field->tag(), $field->indicator(1), $field->indicator(2),
675 @converted_subfields)
680 SetMarcUnicodeFlag($marc_record, $marc_flavour);
681 Text::Iconv->raise_error($prev_raise_error);
683 return @errors;
686 =head2 _marc_to_utf8_replacement_char
688 _marc_to_utf8_replacement_char($marc_record, $marc_flavour);
690 Convert a C<MARC::Record> to UTF-8 in-place, adopting the
691 unsatisfactory method of replacing all non-ASCII (e.g.,
692 where the eight bit is set) octet with the Unicode
693 replacement character. This is meant as a last-ditch
694 method, and would be best used as part of a UI that
695 lets a cataloguer pick various character conversions
696 until he or she finds the right one.
698 =cut
700 sub _marc_to_utf8_replacement_char {
701 my $marc_record = shift;
702 my $marc_flavour = shift;
704 foreach my $field ($marc_record->fields()) {
705 if ($field->is_control_field()) {
706 ; # do nothing -- control fields should not contain non-ASCII characters
707 } else {
708 my @converted_subfields;
709 foreach my $subfield ($field->subfields()) {
710 my $value = $subfield->[1];
711 $value =~ s/[\200-\377]/\xef\xbf\xbd/g;
712 push @converted_subfields, $subfield->[0], $value;
715 $field->replace_with(MARC::Field->new(
716 $field->tag(), $field->indicator(1), $field->indicator(2),
717 @converted_subfields)
722 SetMarcUnicodeFlag($marc_record, $marc_flavour);
725 =head2 char_decode5426
727 my $utf8string = char_decode5426($iso_5426_string);
729 Converts a string from ISO-5426 to UTF-8.
731 =cut
734 my %chars;
735 $chars{0xb0}=0x0101;#3/0ayn[ain]
736 $chars{0xb1}=0x0623;#3/1alif/hamzah[alefwithhamzaabove]
737 #$chars{0xb2}=0x00e0;#'à';
738 $chars{0xb2}=0x00e0;#3/2leftlowsinglequotationmark
739 #$chars{0xb3}=0x00e7;#'ç';
740 $chars{0xb3}=0x00e7;#3/2leftlowsinglequotationmark
741 # $chars{0xb4}='è';
742 $chars{0xb4}=0x00e8;
743 $chars{0xbd}=0x02b9;
744 $chars{0xbe}=0x02ba;
745 # $chars{0xb5}='é';
746 $chars{0xb5}=0x00e9;
747 $chars{0x97}=0x003c;#3/2leftlowsinglequotationmark
748 $chars{0x98}=0x003e;#3/2leftlowsinglequotationmark
749 $chars{0xfa}=0x0153; #oe
750 $chars{0xea}=0x0152; #oe
751 $chars{0x81d1}=0x00b0;
753 ####
754 ## combined characters iso5426
756 $chars{0xc041}=0x1ea2; # capital a with hook above
757 $chars{0xc045}=0x1eba; # capital e with hook above
758 $chars{0xc049}=0x1ec8; # capital i with hook above
759 $chars{0xc04f}=0x1ece; # capital o with hook above
760 $chars{0xc055}=0x1ee6; # capital u with hook above
761 $chars{0xc059}=0x1ef6; # capital y with hook above
762 $chars{0xc061}=0x1ea3; # small a with hook above
763 $chars{0xc065}=0x1ebb; # small e with hook above
764 $chars{0xc069}=0x1ec9; # small i with hook above
765 $chars{0xc06f}=0x1ecf; # small o with hook above
766 $chars{0xc075}=0x1ee7; # small u with hook above
767 $chars{0xc079}=0x1ef7; # small y with hook above
769 # 4/1 grave accent
770 $chars{0xc141}=0x00c0; # capital a with grave accent
771 $chars{0xc145}=0x00c8; # capital e with grave accent
772 $chars{0xc149}=0x00cc; # capital i with grave accent
773 $chars{0xc14f}=0x00d2; # capital o with grave accent
774 $chars{0xc155}=0x00d9; # capital u with grave accent
775 $chars{0xc157}=0x1e80; # capital w with grave
776 $chars{0xc159}=0x1ef2; # capital y with grave
777 $chars{0xc161}=0x00e0; # small a with grave accent
778 $chars{0xc165}=0x00e8; # small e with grave accent
779 $chars{0xc169}=0x00ec; # small i with grave accent
780 $chars{0xc16f}=0x00f2; # small o with grave accent
781 $chars{0xc175}=0x00f9; # small u with grave accent
782 $chars{0xc177}=0x1e81; # small w with grave
783 $chars{0xc179}=0x1ef3; # small y with grave
784 # 4/2 acute accent
785 $chars{0xc241}=0x00c1; # capital a with acute accent
786 $chars{0xc243}=0x0106; # capital c with acute accent
787 $chars{0xc245}=0x00c9; # capital e with acute accent
788 $chars{0xc247}=0x01f4; # capital g with acute
789 $chars{0xc249}=0x00cd; # capital i with acute accent
790 $chars{0xc24b}=0x1e30; # capital k with acute
791 $chars{0xc24c}=0x0139; # capital l with acute accent
792 $chars{0xc24d}=0x1e3e; # capital m with acute
793 $chars{0xc24e}=0x0143; # capital n with acute accent
794 $chars{0xc24f}=0x00d3; # capital o with acute accent
795 $chars{0xc250}=0x1e54; # capital p with acute
796 $chars{0xc252}=0x0154; # capital r with acute accent
797 $chars{0xc253}=0x015a; # capital s with acute accent
798 $chars{0xc255}=0x00da; # capital u with acute accent
799 $chars{0xc257}=0x1e82; # capital w with acute
800 $chars{0xc259}=0x00dd; # capital y with acute accent
801 $chars{0xc25a}=0x0179; # capital z with acute accent
802 $chars{0xc261}=0x00e1; # small a with acute accent
803 $chars{0xc263}=0x0107; # small c with acute accent
804 $chars{0xc265}=0x00e9; # small e with acute accent
805 $chars{0xc267}=0x01f5; # small g with acute
806 $chars{0xc269}=0x00ed; # small i with acute accent
807 $chars{0xc26b}=0x1e31; # small k with acute
808 $chars{0xc26c}=0x013a; # small l with acute accent
809 $chars{0xc26d}=0x1e3f; # small m with acute
810 $chars{0xc26e}=0x0144; # small n with acute accent
811 $chars{0xc26f}=0x00f3; # small o with acute accent
812 $chars{0xc270}=0x1e55; # small p with acute
813 $chars{0xc272}=0x0155; # small r with acute accent
814 $chars{0xc273}=0x015b; # small s with acute accent
815 $chars{0xc275}=0x00fa; # small u with acute accent
816 $chars{0xc277}=0x1e83; # small w with acute
817 $chars{0xc279}=0x00fd; # small y with acute accent
818 $chars{0xc27a}=0x017a; # small z with acute accent
819 $chars{0xc2e1}=0x01fc; # capital ae with acute
820 $chars{0xc2f1}=0x01fd; # small ae with acute
821 # 4/3 circumflex accent
822 $chars{0xc341}=0x00c2; # capital a with circumflex accent
823 $chars{0xc343}=0x0108; # capital c with circumflex
824 $chars{0xc345}=0x00ca; # capital e with circumflex accent
825 $chars{0xc347}=0x011c; # capital g with circumflex
826 $chars{0xc348}=0x0124; # capital h with circumflex
827 $chars{0xc349}=0x00ce; # capital i with circumflex accent
828 $chars{0xc34a}=0x0134; # capital j with circumflex
829 $chars{0xc34f}=0x00d4; # capital o with circumflex accent
830 $chars{0xc353}=0x015c; # capital s with circumflex
831 $chars{0xc355}=0x00db; # capital u with circumflex
832 $chars{0xc357}=0x0174; # capital w with circumflex
833 $chars{0xc359}=0x0176; # capital y with circumflex
834 $chars{0xc35a}=0x1e90; # capital z with circumflex
835 $chars{0xc361}=0x00e2; # small a with circumflex accent
836 $chars{0xc363}=0x0109; # small c with circumflex
837 $chars{0xc365}=0x00ea; # small e with circumflex accent
838 $chars{0xc367}=0x011d; # small g with circumflex
839 $chars{0xc368}=0x0125; # small h with circumflex
840 $chars{0xc369}=0x00ee; # small i with circumflex accent
841 $chars{0xc36a}=0x0135; # small j with circumflex
842 $chars{0xc36e}=0x00f1; # small n with tilde
843 $chars{0xc36f}=0x00f4; # small o with circumflex accent
844 $chars{0xc373}=0x015d; # small s with circumflex
845 $chars{0xc375}=0x00fb; # small u with circumflex
846 $chars{0xc377}=0x0175; # small w with circumflex
847 $chars{0xc379}=0x0177; # small y with circumflex
848 $chars{0xc37a}=0x1e91; # small z with circumflex
849 # 4/4 tilde
850 $chars{0xc441}=0x00c3; # capital a with tilde
851 $chars{0xc445}=0x1ebc; # capital e with tilde
852 $chars{0xc449}=0x0128; # capital i with tilde
853 $chars{0xc44e}=0x00d1; # capital n with tilde
854 $chars{0xc44f}=0x00d5; # capital o with tilde
855 $chars{0xc455}=0x0168; # capital u with tilde
856 $chars{0xc456}=0x1e7c; # capital v with tilde
857 $chars{0xc459}=0x1ef8; # capital y with tilde
858 $chars{0xc461}=0x00e3; # small a with tilde
859 $chars{0xc465}=0x1ebd; # small e with tilde
860 $chars{0xc469}=0x0129; # small i with tilde
861 $chars{0xc46e}=0x00f1; # small n with tilde
862 $chars{0xc46f}=0x00f5; # small o with tilde
863 $chars{0xc475}=0x0169; # small u with tilde
864 $chars{0xc476}=0x1e7d; # small v with tilde
865 $chars{0xc479}=0x1ef9; # small y with tilde
866 # 4/5 macron
867 $chars{0xc541}=0x0100; # capital a with macron
868 $chars{0xc545}=0x0112; # capital e with macron
869 $chars{0xc547}=0x1e20; # capital g with macron
870 $chars{0xc549}=0x012a; # capital i with macron
871 $chars{0xc54f}=0x014c; # capital o with macron
872 $chars{0xc555}=0x016a; # capital u with macron
873 $chars{0xc561}=0x0101; # small a with macron
874 $chars{0xc565}=0x0113; # small e with macron
875 $chars{0xc567}=0x1e21; # small g with macron
876 $chars{0xc569}=0x012b; # small i with macron
877 $chars{0xc56f}=0x014d; # small o with macron
878 $chars{0xc575}=0x016b; # small u with macron
879 $chars{0xc572}=0x0159; # small r with macron
880 $chars{0xc5e1}=0x01e2; # capital ae with macron
881 $chars{0xc5f1}=0x01e3; # small ae with macron
882 # 4/6 breve
883 $chars{0xc641}=0x0102; # capital a with breve
884 $chars{0xc645}=0x0114; # capital e with breve
885 $chars{0xc647}=0x011e; # capital g with breve
886 $chars{0xc649}=0x012c; # capital i with breve
887 $chars{0xc64f}=0x014e; # capital o with breve
888 $chars{0xc655}=0x016c; # capital u with breve
889 $chars{0xc661}=0x0103; # small a with breve
890 $chars{0xc665}=0x0115; # small e with breve
891 $chars{0xc667}=0x011f; # small g with breve
892 $chars{0xc669}=0x012d; # small i with breve
893 $chars{0xc66f}=0x014f; # small o with breve
894 $chars{0xc675}=0x016d; # small u with breve
895 # 4/7 dot above
896 $chars{0xc7b0}=0x01e1; # Ain with dot above
897 $chars{0xc742}=0x1e02; # capital b with dot above
898 $chars{0xc743}=0x010a; # capital c with dot above
899 $chars{0xc744}=0x1e0a; # capital d with dot above
900 $chars{0xc745}=0x0116; # capital e with dot above
901 $chars{0xc746}=0x1e1e; # capital f with dot above
902 $chars{0xc747}=0x0120; # capital g with dot above
903 $chars{0xc748}=0x1e22; # capital h with dot above
904 $chars{0xc749}=0x0130; # capital i with dot above
905 $chars{0xc74d}=0x1e40; # capital m with dot above
906 $chars{0xc74e}=0x1e44; # capital n with dot above
907 $chars{0xc750}=0x1e56; # capital p with dot above
908 $chars{0xc752}=0x1e58; # capital r with dot above
909 $chars{0xc753}=0x1e60; # capital s with dot above
910 $chars{0xc754}=0x1e6a; # capital t with dot above
911 $chars{0xc757}=0x1e86; # capital w with dot above
912 $chars{0xc758}=0x1e8a; # capital x with dot above
913 $chars{0xc759}=0x1e8e; # capital y with dot above
914 $chars{0xc75a}=0x017b; # capital z with dot above
915 $chars{0xc761}=0x0227; # small b with dot above
916 $chars{0xc762}=0x1e03; # small b with dot above
917 $chars{0xc763}=0x010b; # small c with dot above
918 $chars{0xc764}=0x1e0b; # small d with dot above
919 $chars{0xc765}=0x0117; # small e with dot above
920 $chars{0xc766}=0x1e1f; # small f with dot above
921 $chars{0xc767}=0x0121; # small g with dot above
922 $chars{0xc768}=0x1e23; # small h with dot above
923 $chars{0xc76d}=0x1e41; # small m with dot above
924 $chars{0xc76e}=0x1e45; # small n with dot above
925 $chars{0xc770}=0x1e57; # small p with dot above
926 $chars{0xc772}=0x1e59; # small r with dot above
927 $chars{0xc773}=0x1e61; # small s with dot above
928 $chars{0xc774}=0x1e6b; # small t with dot above
929 $chars{0xc777}=0x1e87; # small w with dot above
930 $chars{0xc778}=0x1e8b; # small x with dot above
931 $chars{0xc779}=0x1e8f; # small y with dot above
932 $chars{0xc77a}=0x017c; # small z with dot above
933 # 4/8 trema, diaresis
934 $chars{0xc820}=0x00a8; # diaeresis
935 $chars{0xc841}=0x00c4; # capital a with diaeresis
936 $chars{0xc845}=0x00cb; # capital e with diaeresis
937 $chars{0xc848}=0x1e26; # capital h with diaeresis
938 $chars{0xc849}=0x00cf; # capital i with diaeresis
939 $chars{0xc84f}=0x00d6; # capital o with diaeresis
940 $chars{0xc855}=0x00dc; # capital u with diaeresis
941 $chars{0xc857}=0x1e84; # capital w with diaeresis
942 $chars{0xc858}=0x1e8c; # capital x with diaeresis
943 $chars{0xc859}=0x0178; # capital y with diaeresis
944 $chars{0xc861}=0x00e4; # small a with diaeresis
945 $chars{0xc865}=0x00eb; # small e with diaeresis
946 $chars{0xc868}=0x1e27; # small h with diaeresis
947 $chars{0xc869}=0x00ef; # small i with diaeresis
948 $chars{0xc86f}=0x00f6; # small o with diaeresis
949 $chars{0xc874}=0x1e97; # small t with diaeresis
950 $chars{0xc875}=0x00fc; # small u with diaeresis
951 $chars{0xc877}=0x1e85; # small w with diaeresis
952 $chars{0xc878}=0x1e8d; # small x with diaeresis
953 $chars{0xc879}=0x00ff; # small y with diaeresis
954 # 4/9 umlaut
955 $chars{0xc920}=0x00a8; # [diaeresis]
956 $chars{0xc961}=0x00e4; # a with umlaut
957 $chars{0xc965}=0x00eb; # e with umlaut
958 $chars{0xc969}=0x00ef; # i with umlaut
959 $chars{0xc96f}=0x00f6; # o with umlaut
960 $chars{0xc975}=0x00fc; # u with umlaut
961 # 4/10 circle above
962 $chars{0xca41}=0x00c5; # capital a with ring above
963 $chars{0xcaad}=0x016e; # capital u with ring above
964 $chars{0xca61}=0x00e5; # small a with ring above
965 $chars{0xca75}=0x016f; # small u with ring above
966 $chars{0xca77}=0x1e98; # small w with ring above
967 $chars{0xca79}=0x1e99; # small y with ring above
968 # 4/11 high comma off centre
969 # 4/12 inverted high comma centred
970 # 4/13 double acute accent
971 $chars{0xcd4f}=0x0150; # capital o with double acute
972 $chars{0xcd55}=0x0170; # capital u with double acute
973 $chars{0xcd6f}=0x0151; # small o with double acute
974 $chars{0xcd75}=0x0171; # small u with double acute
975 # 4/14 horn
976 $chars{0xce54}=0x01a0; # latin capital letter o with horn
977 $chars{0xce55}=0x01af; # latin capital letter u with horn
978 $chars{0xce74}=0x01a1; # latin small letter o with horn
979 $chars{0xce75}=0x01b0; # latin small letter u with horn
980 # 4/15 caron (hacek
981 $chars{0xcf41}=0x01cd; # capital a with caron
982 $chars{0xcf43}=0x010c; # capital c with caron
983 $chars{0xcf44}=0x010e; # capital d with caron
984 $chars{0xcf45}=0x011a; # capital e with caron
985 $chars{0xcf47}=0x01e6; # capital g with caron
986 $chars{0xcf49}=0x01cf; # capital i with caron
987 $chars{0xcf4b}=0x01e8; # capital k with caron
988 $chars{0xcf4c}=0x013d; # capital l with caron
989 $chars{0xcf4e}=0x0147; # capital n with caron
990 $chars{0xcf4f}=0x01d1; # capital o with caron
991 $chars{0xcf52}=0x0158; # capital r with caron
992 $chars{0xcf53}=0x0160; # capital s with caron
993 $chars{0xcf54}=0x0164; # capital t with caron
994 $chars{0xcf55}=0x01d3; # capital u with caron
995 $chars{0xcf5a}=0x017d; # capital z with caron
996 $chars{0xcf61}=0x01ce; # small a with caron
997 $chars{0xcf63}=0x010d; # small c with caron
998 $chars{0xcf64}=0x010f; # small d with caron
999 $chars{0xcf65}=0x011b; # small e with caron
1000 $chars{0xcf67}=0x01e7; # small g with caron
1001 $chars{0xcf69}=0x01d0; # small i with caron
1002 $chars{0xcf6a}=0x01f0; # small j with caron
1003 $chars{0xcf6b}=0x01e9; # small k with caron
1004 $chars{0xcf6c}=0x013e; # small l with caron
1005 $chars{0xcf6e}=0x0148; # small n with caron
1006 $chars{0xcf6f}=0x01d2; # small o with caron
1007 $chars{0xcf72}=0x0159; # small r with caron
1008 $chars{0xcf73}=0x0161; # small s with caron
1009 $chars{0xcf74}=0x0165; # small t with caron
1010 $chars{0xcf75}=0x01d4; # small u with caron
1011 $chars{0xcf7a}=0x017e; # small z with caron
1012 # 5/0 cedilla
1013 $chars{0xd020}=0x00b8; # cedilla
1014 $chars{0xd043}=0x00c7; # capital c with cedilla
1015 $chars{0xd044}=0x1e10; # capital d with cedilla
1016 $chars{0xd047}=0x0122; # capital g with cedilla
1017 $chars{0xd048}=0x1e28; # capital h with cedilla
1018 $chars{0xd04b}=0x0136; # capital k with cedilla
1019 $chars{0xd04c}=0x013b; # capital l with cedilla
1020 $chars{0xd04e}=0x0145; # capital n with cedilla
1021 $chars{0xd052}=0x0156; # capital r with cedilla
1022 $chars{0xd053}=0x015e; # capital s with cedilla
1023 $chars{0xd054}=0x0162; # capital t with cedilla
1024 $chars{0xd063}=0x00e7; # small c with cedilla
1025 $chars{0xd064}=0x1e11; # small d with cedilla
1026 $chars{0xd065}=0x0119; # small e with cedilla
1027 $chars{0xd067}=0x0123; # small g with cedilla
1028 $chars{0xd068}=0x1e29; # small h with cedilla
1029 $chars{0xd06b}=0x0137; # small k with cedilla
1030 $chars{0xd06c}=0x013c; # small l with cedilla
1031 $chars{0xd06e}=0x0146; # small n with cedilla
1032 $chars{0xd072}=0x0157; # small r with cedilla
1033 $chars{0xd073}=0x015f; # small s with cedilla
1034 $chars{0xd074}=0x0163; # small t with cedilla
1035 # 5/1 rude
1036 # 5/2 hook to left
1037 # 5/3 ogonek (hook to right
1038 $chars{0xd320}=0x02db; # ogonek
1039 $chars{0xd341}=0x0104; # capital a with ogonek
1040 $chars{0xd345}=0x0118; # capital e with ogonek
1041 $chars{0xd349}=0x012e; # capital i with ogonek
1042 $chars{0xd34f}=0x01ea; # capital o with ogonek
1043 $chars{0xd355}=0x0172; # capital u with ogonek
1044 $chars{0xd361}=0x0105; # small a with ogonek
1045 $chars{0xd365}=0x0119; # small e with ogonek
1046 $chars{0xd369}=0x012f; # small i with ogonek
1047 $chars{0xd36f}=0x01eb; # small o with ogonek
1048 $chars{0xd375}=0x0173; # small u with ogonek
1049 # 5/4 circle below
1050 $chars{0xd441}=0x1e00; # capital a with ring below
1051 $chars{0xd461}=0x1e01; # small a with ring below
1052 # 5/5 half circle below
1053 $chars{0xf948}=0x1e2a; # capital h with breve below
1054 $chars{0xf968}=0x1e2b; # small h with breve below
1055 # 5/6 dot below
1056 $chars{0xd641}=0x1ea0; # capital a with dot below
1057 $chars{0xd642}=0x1e04; # capital b with dot below
1058 $chars{0xd644}=0x1e0c; # capital d with dot below
1059 $chars{0xd645}=0x1eb8; # capital e with dot below
1060 $chars{0xd648}=0x1e24; # capital h with dot below
1061 $chars{0xd649}=0x1eca; # capital i with dot below
1062 $chars{0xd64b}=0x1e32; # capital k with dot below
1063 $chars{0xd64c}=0x1e36; # capital l with dot below
1064 $chars{0xd64d}=0x1e42; # capital m with dot below
1065 $chars{0xd64e}=0x1e46; # capital n with dot below
1066 $chars{0xd64f}=0x1ecc; # capital o with dot below
1067 $chars{0xd652}=0x1e5a; # capital r with dot below
1068 $chars{0xd653}=0x1e62; # capital s with dot below
1069 $chars{0xd654}=0x1e6c; # capital t with dot below
1070 $chars{0xd655}=0x1ee4; # capital u with dot below
1071 $chars{0xd656}=0x1e7e; # capital v with dot below
1072 $chars{0xd657}=0x1e88; # capital w with dot below
1073 $chars{0xd659}=0x1ef4; # capital y with dot below
1074 $chars{0xd65a}=0x1e92; # capital z with dot below
1075 $chars{0xd661}=0x1ea1; # small a with dot below
1076 $chars{0xd662}=0x1e05; # small b with dot below
1077 $chars{0xd664}=0x1e0d; # small d with dot below
1078 $chars{0xd665}=0x1eb9; # small e with dot below
1079 $chars{0xd668}=0x1e25; # small h with dot below
1080 $chars{0xd669}=0x1ecb; # small i with dot below
1081 $chars{0xd66b}=0x1e33; # small k with dot below
1082 $chars{0xd66c}=0x1e37; # small l with dot below
1083 $chars{0xd66d}=0x1e43; # small m with dot below
1084 $chars{0xd66e}=0x1e47; # small n with dot below
1085 $chars{0xd66f}=0x1ecd; # small o with dot below
1086 $chars{0xd672}=0x1e5b; # small r with dot below
1087 $chars{0xd673}=0x1e63; # small s with dot below
1088 $chars{0xd674}=0x1e6d; # small t with dot below
1089 $chars{0xd675}=0x1ee5; # small u with dot below
1090 $chars{0xd676}=0x1e7f; # small v with dot below
1091 $chars{0xd677}=0x1e89; # small w with dot below
1092 $chars{0xd679}=0x1ef5; # small y with dot below
1093 $chars{0xd67a}=0x1e93; # small z with dot below
1094 # 5/7 double dot below
1095 $chars{0xd755}=0x1e72; # capital u with diaeresis below
1096 $chars{0xd775}=0x1e73; # small u with diaeresis below
1097 # 5/8 underline
1098 $chars{0xd820}=0x005f; # underline
1099 # 5/9 double underline
1100 $chars{0xd920}=0x2017; # double underline
1101 # 5/10 small low vertical bar
1102 $chars{0xda20}=0x02cc; #
1103 # 5/11 circumflex below
1104 # 5/12 (this position shall not be used)
1105 # 5/13 left half of ligature sign and of double tilde
1106 # 5/14 right half of ligature sign
1107 # 5/15 right half of double tilde
1108 # map {printf "%x :%x\n",$_,$chars{$_};}keys %chars;
1110 sub char_decode5426 {
1111 my ( $string) = @_;
1112 my $result;
1114 my @data = unpack("C*", $string);
1115 my @characters;
1116 my $length=scalar(@data);
1117 for (my $i = 0; $i < scalar(@data); $i++) {
1118 my $char= $data[$i];
1119 if ($char >= 0x00 && $char <= 0x7F){
1120 #IsAscii
1122 push @characters,$char unless ($char<0x02 ||$char== 0x0F);
1123 }elsif (($char >= 0xC0 && $char <= 0xDF)) {
1124 #Combined Char
1125 my $convchar ;
1126 if ($chars{$char*256+$data[$i+1]}) {
1127 $convchar= $chars{$char * 256 + $data[$i+1]};
1128 $i++;
1129 # printf "char %x $char, char to convert %x , converted %x\n",$char,$char * 256 + $data[$i - 1],$convchar;
1130 } elsif ($chars{$char}) {
1131 $convchar= $chars{$char};
1132 # printf "0xC char %x, converted %x\n",$char,$chars{$char};
1133 }else {
1134 $convchar=$char;
1136 push @characters,$convchar;
1137 } else {
1138 my $convchar;
1139 if ($chars{$char}) {
1140 $convchar= $chars{$char};
1141 # printf "char %x, converted %x\n",$char,$chars{$char};
1142 }else {
1143 # printf "char %x $char\n",$char;
1144 $convchar=$char;
1146 push @characters,$convchar;
1149 $result=pack "U*",@characters;
1150 # $result=~s/\x01//;
1151 # $result=~s/\x00//;
1152 $result=~s/\x0f//;
1153 $result=~s/\x1b.//;
1154 $result=~s/\x0e//;
1155 $result=~s/\x1b\x5b//;
1156 # map{printf "%x",$_} @characters;
1157 # printf "\n";
1158 return $result;
1164 =head1 AUTHOR
1166 Koha Development Team <http://koha-community.org/>
1168 Galen Charlton <galen.charlton@liblime.com>
1170 =cut