C4/Charset.pm

   1 package C4::Charset;
   2
   3 # Copyright (C) 2008 LibLime
   4 #
   5 # This file is part of Koha.
   6 #
   7 # Koha is free software; you can redistribute it and/or modify it under the
   8 # terms of the GNU General Public License as published by the Free Software
   9 # Foundation; either version 2 of the License, or (at your option) any later
  10 # version.
  11 #
  12 # Koha is distributed in the hope that it will be useful, but WITHOUT ANY
  13 # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
  14 # A PARTICULAR PURPOSE.  See the GNU General Public License for more details.
  15 #
  16 # You should have received a copy of the GNU General Public License along with
  17 # Koha; if not, write to the Free Software Foundation, Inc., 59 Temple Place,
  18 # Suite 330, Boston, MA  02111-1307 USA
  19
  20 use strict;
  21 use warnings;
  22
  23 use MARC::Charset qw/marc8_to_utf8/;
  24 use Text::Iconv;
  25 use C4::Debug;
  26
  27 use vars qw($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
  28
  29 BEGIN {
  30     # set the version for version checking
  31     $VERSION = 3.01;
  32     require Exporter;
  33     @ISA    = qw(Exporter);
  34     @EXPORT = qw(
  35         IsStringUTF8ish
  36         MarcToUTF8Record
  37         SetMarcUnicodeFlag
  38         StripNonXmlChars
  39     );
  40 }
  41
  42 =head1 NAME
  43
  44 C4::Charset - utilities for handling character set conversions.
  45
  46 =head1 SYNOPSIS
  47
  48 use C4::Charset;
  49
  50 =head1 DESCRIPTION
  51
  52 This module contains routines for dealing with character set
  53 conversions, particularly for MARC records.
  54
  55 A variety of character encodings are in use by various MARC
  56 standards, and even more character encodings are used by
  57 non-standard MARC records.  The various MARC formats generally
  58 do not do a good job of advertising a given record's character
  59 encoding, and even when a record does advertise its encoding,
  60 e.g., via the Leader/09, experience has shown that one cannot
  61 trust it.
  62
  63 Ultimately, all MARC records are stored in Koha in UTF-8 and
  64 must be converted from whatever the source character encoding is.
  65 The goal of this module is to ensure that these conversions
  66 take place accurately.  When a character conversion cannot take
  67 place, or at least not accurately, the module was provide
  68 enough information to allow user-facing code to inform the user
  69 on how to deal with the situation.
  70
  71 =cut
  72
  73 =head1 FUNCTIONS
  74
  75 =head2 IsStringUTF8ish
  76
  77 =over 4
  78
  79 my $is_utf8 = IsStringUTF8ish($str);
  80
  81 =back
  82
  83 Determines if C<$str> is valid UTF-8.  This can mean
  84 one of two things:
  85
  86 =over 2
  87
  88 =item *
  89
  90 The Perl UTF-8 flag is set and the string contains valid UTF-8.
  91
  92 =item *
  93
  94 The Perl UTF-8 flag is B<not> set, but the octets contain
  95 valid UTF-8.
  96
  97 =back
  98
  99 The function is named C<IsStringUTF8ish> instead of C<IsStringUTF8>
 100 because in one could be presented with a MARC blob that is
 101 not actually in UTF-8 but whose sequence of octets appears to be
 102 valid UTF-8.  The rest of the MARC character conversion functions
 103 will assume that this situation occur does not very often.
 104
 105 =cut
 106
 107 sub IsStringUTF8ish {
 108     my $str = shift;
 109
 110     return 1 if utf8::is_utf8($str);
 111     return utf8::decode($str);
 112 }
 113
 114 =head2 MarcToUTF8Record
 115
 116 =over 4
 117
 118 ($marc_record, $converted_from, $errors_arrayref) = MarcToUTF8Record($marc_blob, $marc_flavour, [, $source_encoding]);
 119
 120 =back
 121
 122 Given a MARC blob or a C<MARC::Record>, the MARC flavour, and an
 123 optional source encoding, return a C<MARC::Record> that is
 124 converted to UTF-8.
 125
 126 The returned C<$marc_record> is guaranteed to be in valid UTF-8, but
 127 is not guaranteed to have been converted correctly.  Specifically,
 128 if C<$converted_from> is 'failed', the MARC record returned failed
 129 character conversion and had each of its non-ASCII octets changed
 130 to the Unicode replacement character.
 131
 132 If the source encoding was not specified, this routine will
 133 try to guess it; the character encoding used for a successful
 134 conversion is returned in C<$converted_from>.
 135
 136 =cut
 137
 138 sub MarcToUTF8Record {
 139     my $marc = shift;
 140     my $marc_flavour = shift;
 141     my $source_encoding = shift;
 142
 143     my $marc_record;
 144     my $marc_blob_is_utf8 = 0;
 145     if (ref($marc) eq 'MARC::Record') {
 146         my $marc_blob = $marc->as_usmarc();
 147         $marc_blob_is_utf8 = IsStringUTF8ish($marc_blob);
 148         $marc_record = $marc;
 149     } else {
 150         # dealing with a MARC blob
 151
 152         # remove any ersatz whitespace from the beginning and
 153         # end of the MARC blob -- these can creep into MARC
 154         # files produced by several sources -- caller really
 155         # should be doing this, however
 156         $marc =~ s/^\s+//;
 157         $marc =~ s/\s+$//;
 158         $marc_blob_is_utf8 = IsStringUTF8ish($marc);
 159         eval {
 160             $marc_record = MARC::Record->new_from_usmarc($marc);
 161         };
 162         if ($@) {
 163             # if we fail the first time, one likely problem
 164             # is that we have a MARC21 record that says that it's
 165             # UTF-8 (Leader/09 = 'a') but contains non-UTF-8 characters.
 166             # We'll try parsing it again.
 167             substr($marc, 9, 1) = ' ';
 168             eval {
 169                 $marc_record = MARC::Record->new_from_usmarc($marc);
 170             };
 171             if ($@) {
 172                 # it's hopeless; return an empty MARC::Record
 173                 return MARC::Record->new(), 'failed', ['could not parse MARC blob'];
 174             }
 175         }
 176     }
 177
 178     # If we do not know the source encoding, try some guesses
 179     # as follows:
 180     #   1. Record is UTF-8 already.
 181     #   2. If MARC flavor is MARC21, then
 182     #      a. record is MARC-8
 183     #      b. record is ISO-8859-1
 184     #   3. If MARC flavor is UNIMARC, then
 185     if (not defined $source_encoding) {
 186         if ($marc_blob_is_utf8) {
 187             # note that for MARC21 we are not bothering to check
 188             # if the Leader/09 is set to 'a' or not -- because
 189             # of problems with various ILSs (including Koha in the
 190             # past, alas), this just is not trustworthy.
 191             SetMarcUnicodeFlag($marc_record, $marc_flavour);
 192             return $marc_record, 'UTF-8', [];
 193         } else {
 194             if ($marc_flavour eq 'MARC21') {
 195                 return _default_marc21_charconv_to_utf8($marc_record, $marc_flavour);
 196             } elsif ($marc_flavour =~/UNIMARC/) {
 197                 return _default_unimarc_charconv_to_utf8($marc_record, $marc_flavour);
 198             } else {
 199                 return _default_marc21_charconv_to_utf8($marc_record, $marc_flavour);
 200             }
 201         }
 202     } else {
 203         # caller knows the character encoding
 204         my $original_marc_record = $marc_record->clone();
 205         my @errors;
 206         if ($source_encoding =~ /utf-?8/i) {
 207             if ($marc_blob_is_utf8) {
 208                 SetMarcUnicodeFlag($marc_record, $marc_flavour);
 209                 return $marc_record, 'UTF-8', [];
 210             } else {
 211                 push @errors, 'specified UTF-8 => UTF-8 conversion, but record is not in UTF-8';
 212             }
 213         } elsif ($source_encoding =~ /marc-?8/i) {
 214             @errors = _marc_marc8_to_utf8($marc_record, $marc_flavour);
 215         } elsif ($source_encoding =~ /5426/) {
 216             @errors = _marc_iso5426_to_utf8($marc_record, $marc_flavour);
 217         } else {
 218             # assume any other character encoding is for Text::Iconv
 219             @errors = _marc_to_utf8_via_text_iconv($marc_record, $marc_flavour, 'iso-8859-1');
 220         }
 221
 222         if (@errors) {
 223             _marc_to_utf8_replacement_char($original_marc_record, $marc_flavour);
 224             return $original_marc_record, 'failed', \@errors;
 225         } else {
 226             return $marc_record, $source_encoding, [];
 227         }
 228     }
 229
 230 }
 231
 232 =head2 SetMarcUnicodeFlag
 233
 234 =over 4
 235
 236 SetMarcUnicodeFlag($marc_record, $marc_flavour);
 237
 238 =back
 239
 240 Set both the internal MARC::Record encoding flag
 241 and the appropriate Leader/09 (MARC21) or
 242 100/26-29 (UNIMARC) to indicate that the record
 243 is in UTF-8.  Note that this does B<not> do
 244 any actual character conversion.
 245
 246 =cut
 247
 248 sub SetMarcUnicodeFlag {
 249     my $marc_record = shift;
 250     my $marc_flavour = shift; # || C4::Context->preference("marcflavour");
 251
 252     $marc_record->encoding('UTF-8');
 253     if ($marc_flavour eq 'MARC21') {
 254         my $leader = $marc_record->leader();
 255         substr($leader, 9, 1) = 'a';
 256         $marc_record->leader($leader);
 257     } elsif ($marc_flavour =~/UNIMARC/) {
 258         my $string;
 259                 my ($subflength,$encodingposition)=($marc_flavour=~/AUTH/?(21,8):(36,22));
 260                 $string=$marc_record->subfield( 100, "a" );
 261         if (length($string)==$subflength) {
 262                         $string = substr $string, 0,$subflength if (length($string)>$subflength);
 263         }
 264         else {
 265             $string = POSIX::strftime( "%Y%m%d", localtime );
 266             $string =~ s/\-//g;
 267             $string = sprintf( "%-*s", $subflength, $string );
 268         }
 269         substr( $string, $encodingposition, 8, "frey50  " );
 270         if ( $marc_record->subfield( 100, "a" ) ) {
 271                         $marc_record->field('100')->update(a=>$string);
 272                 }
 273                 else {
 274             $marc_record->insert_grouped_field(
 275                 MARC::Field->new( 100, '', '', "a" => $string ) );
 276         }
 277                 $debug && warn "encodage: ", substr( $marc_record->subfield(100, 'a'), $encodingposition, 8 );
 278     } else {
 279         warn "Unrecognized marcflavour: $marc_flavour";
 280     }
 281 }
 282
 283 =head2 StripNonXmlChars
 284
 285 =over 4
 286
 287 my $new_str = StripNonXmlChars($old_str);
 288
 289 =back
 290
 291 Given a string, return a copy with the
 292 characters that are illegal in XML
 293 removed.
 294
 295 This function exists to work around a problem
 296 that can occur with badly-encoded MARC records.
 297 Specifically, if a UTF-8 MARC record also
 298 has excape (\x1b) characters, MARC::File::XML
 299 will let the escape characters pass through
 300 when as_xml() or as_xml_record() is called.  The
 301 problem is that the escape character is not
 302 legal in well-formed XML documents, so when
 303 MARC::File::XML attempts to parse such a record,
 304 the XML parser will fail.
 305
 306 Stripping such characters will allow a
 307 MARC::Record->new_from_xml()
 308 to work, at the possible risk of some data loss.
 309
 310 =cut
 311
 312 sub StripNonXmlChars {
 313     my $str = shift;
 314     if (!defined($str) || $str eq ""){
 315         return "";
 316     }
 317     $str =~ s/[^\x09\x0A\x0D\x{0020}-\x{D7FF}\x{E000}-\x{FFFD}\x{10000}-\x{10FFFF}]//g;
 318     return $str;
 319 }
 320
 321 =head1 INTERNAL FUNCTIONS
 322
 323 =head2 _default_marc21_charconv_to_utf8
 324
 325 =over 4
 326
 327 my ($new_marc_record, $guessed_charset) = _default_marc21_charconv_to_utf8($marc_record);
 328
 329 =back
 330
 331 Converts a C<MARC::Record> of unknown character set to UTF-8,
 332 first by trying a MARC-8 to UTF-8 conversion, then ISO-8859-1
 333 to UTF-8, then a default conversion that replaces each non-ASCII
 334 character with the replacement character.
 335
 336 The C<$guessed_charset> return value contains the character set
 337 that resulted in a conversion to valid UTF-8; note that
 338 if the MARC-8 and ISO-8859-1 conversions failed, the value of
 339 this is 'failed'.
 340
 341 =cut
 342
 343 sub _default_marc21_charconv_to_utf8 {
 344     my $marc_record = shift;
 345     my $marc_flavour = shift;
 346
 347     my $trial_marc8 = $marc_record->clone();
 348     my @all_errors = ();
 349     my @errors = _marc_marc8_to_utf8($trial_marc8, $marc_flavour);
 350     unless (@errors) {
 351         return $trial_marc8, 'MARC-8', [];
 352     }
 353     push @all_errors, @errors;
 354
 355     my $trial_8859_1 = $marc_record->clone();
 356     @errors = _marc_to_utf8_via_text_iconv($trial_8859_1, $marc_flavour, 'iso-8859-1');
 357     unless (@errors) {
 358         return $trial_8859_1, 'iso-8859-1', []; # note -- we could return \@all_errors
 359                                                 # instead if we wanted to report details
 360                                                 # of the failed attempt at MARC-8 => UTF-8
 361     }
 362     push @all_errors, @errors;
 363
 364     my $default_converted = $marc_record->clone();
 365     _marc_to_utf8_replacement_char($default_converted, $marc_flavour);
 366     return $default_converted, 'failed', \@all_errors;
 367 }
 368
 369 =head2 _default_unimarc_charconv_to_utf8
 370
 371 =over 4
 372
 373 my ($new_marc_record, $guessed_charset) = _default_unimarc_charconv_to_utf8($marc_record);
 374
 375 =back
 376
 377 Converts a C<MARC::Record> of unknown character set to UTF-8,
 378 first by trying a ISO-5426 to UTF-8 conversion, then ISO-8859-1
 379 to UTF-8, then a default conversion that replaces each non-ASCII
 380 character with the replacement character.
 381
 382 The C<$guessed_charset> return value contains the character set
 383 that resulted in a conversion to valid UTF-8; note that
 384 if the MARC-8 and ISO-8859-1 conversions failed, the value of
 385 this is 'failed'.
 386
 387 =cut
 388
 389 sub _default_unimarc_charconv_to_utf8 {
 390     my $marc_record = shift;
 391     my $marc_flavour = shift;
 392
 393     my $trial_marc8 = $marc_record->clone();
 394     my @all_errors = ();
 395     my @errors = _marc_iso5426_to_utf8($trial_marc8, $marc_flavour);
 396     unless (@errors) {
 397         return $trial_marc8, 'iso-5426';
 398     }
 399     push @all_errors, @errors;
 400
 401     my $trial_8859_1 = $marc_record->clone();
 402     @errors = _marc_to_utf8_via_text_iconv($trial_8859_1, $marc_flavour, 'iso-8859-1');
 403     unless (@errors) {
 404         return $trial_8859_1, 'iso-8859-1';
 405     }
 406     push @all_errors, @errors;
 407
 408     my $default_converted = $marc_record->clone();
 409     _marc_to_utf8_replacement_char($default_converted, $marc_flavour);
 410     return $default_converted, 'failed', \@all_errors;
 411 }
 412
 413 =head2 _marc_marc8_to_utf8
 414
 415 =over 4
 416
 417 my @errors = _marc_marc8_to_utf8($marc_record, $marc_flavour, $source_encoding);
 418
 419 =back
 420
 421 Convert a C<MARC::Record> to UTF-8 in-place from MARC-8.
 422 If the conversion fails for some reason, an
 423 appropriate messages will be placed in the returned
 424 C<@errors> array.
 425
 426 =cut
 427
 428 sub _marc_marc8_to_utf8 {
 429     my $marc_record = shift;
 430     my $marc_flavour = shift;
 431
 432     my $prev_ignore = MARC::Charset->ignore_errors();
 433     MARC::Charset->ignore_errors(1);
 434
 435     # trap warnings raised by MARC::Charset
 436     my @errors = ();
 437     local $SIG{__WARN__} = sub {
 438         my $msg = $_[0];
 439         if ($msg =~ /MARC.Charset/) {
 440             # FIXME - purpose of this regexp is to strip out the
 441             # line reference to MARC/Charset.pm, but as it
 442             # exists probably won't work quite on Windows --
 443             # some sort of minimal-bunch back-tracking RE
 444             # would be helpful here
 445             $msg =~ s/at [\/].*?.MARC.Charset\.pm line \d+\.\n$//;
 446             push @errors, $msg;
 447         } else {
 448             # if warning doesn't come from MARC::Charset, just
 449             # pass it on
 450             warn $msg;
 451         }
 452     };
 453
 454     foreach my $field ($marc_record->fields()) {
 455         if ($field->is_control_field()) {
 456             ; # do nothing -- control fields should not contain non-ASCII characters
 457         } else {
 458             my @converted_subfields;
 459             foreach my $subfield ($field->subfields()) {
 460                 my $utf8sf = MARC::Charset::marc8_to_utf8($subfield->[1]);
 461                 unless (IsStringUTF8ish($utf8sf)) {
 462                     # Because of a bug in MARC::Charset 0.98, if the string
 463                     # has (a) one or more diacritics that (b) are only in character positions
 464                     # 128 to 255 inclusive, the resulting converted string is not in
 465                     # UTF-8, but the legacy 8-bit encoding (e.g., ISO-8859-1).  If that
 466                     # occurs, upgrade the string in place.  Moral of the story seems to be
 467                     # that pack("U", ...) is better than chr(...) if you need to guarantee
 468                     # that the resulting string is UTF-8.
 469                     utf8::upgrade($utf8sf);
 470                 }
 471                 push @converted_subfields, $subfield->[0], $utf8sf;
 472             }
 473
 474             $field->replace_with(MARC::Field->new(
 475                 $field->tag(), $field->indicator(1), $field->indicator(2),
 476                 @converted_subfields)
 477             );
 478         }
 479     }
 480
 481     MARC::Charset->ignore_errors($prev_ignore);
 482
 483     SetMarcUnicodeFlag($marc_record, $marc_flavour);
 484
 485     return @errors;
 486 }
 487
 488 =head2 _marc_iso5426_to_utf8
 489
 490 =over 4
 491
 492 my @errors = _marc_iso5426_to_utf8($marc_record, $marc_flavour, $source_encoding);
 493
 494 =back
 495
 496 Convert a C<MARC::Record> to UTF-8 in-place from ISO-5426.
 497 If the conversion fails for some reason, an
 498 appropriate messages will be placed in the returned
 499 C<@errors> array.
 500
 501 FIXME - is ISO-5426 equivalent enough to MARC-8
 502 that C<MARC::Charset> can be used instead?
 503
 504 =cut
 505
 506 sub _marc_iso5426_to_utf8 {
 507     my $marc_record = shift;
 508     my $marc_flavour = shift;
 509
 510     my @errors = ();
 511
 512     foreach my $field ($marc_record->fields()) {
 513         if ($field->is_control_field()) {
 514             ; # do nothing -- control fields should not contain non-ASCII characters
 515         } else {
 516             my @converted_subfields;
 517             foreach my $subfield ($field->subfields()) {
 518                 my $utf8sf = char_decode5426($subfield->[1]);
 519                 push @converted_subfields, $subfield->[0], $utf8sf;
 520             }
 521
 522             $field->replace_with(MARC::Field->new(
 523                 $field->tag(), $field->indicator(1), $field->indicator(2),
 524                 @converted_subfields)
 525             );
 526         }
 527     }
 528
 529     SetMarcUnicodeFlag($marc_record, $marc_flavour);
 530
 531     return @errors;
 532 }
 533
 534 =head2 _marc_to_utf8_via_text_iconv
 535
 536 =over 4
 537
 538 my @errors = _marc_to_utf8_via_text_iconv($marc_record, $marc_flavour, $source_encoding);
 539
 540 =back
 541
 542 Convert a C<MARC::Record> to UTF-8 in-place using the
 543 C<Text::Iconv> CPAN module.  Any source encoding accepted
 544 by the user's iconv installation should work.  If
 545 the source encoding is not recognized on the user's
 546 server or the conversion fails for some reason,
 547 appropriate messages will be placed in the returned
 548 C<@errors> array.
 549
 550 =cut
 551
 552 sub _marc_to_utf8_via_text_iconv {
 553     my $marc_record = shift;
 554     my $marc_flavour = shift;
 555     my $source_encoding = shift;
 556
 557     my @errors = ();
 558     my $decoder;
 559     eval { $decoder = Text::Iconv->new($source_encoding, 'utf8'); };
 560     if ($@) {
 561         push @errors, "Could not initialze $source_encoding => utf8 converter: $@";
 562         return @errors;
 563     }
 564
 565     my $prev_raise_error = Text::Iconv->raise_error();
 566     Text::Iconv->raise_error(1);
 567
 568     foreach my $field ($marc_record->fields()) {
 569         if ($field->is_control_field()) {
 570             ; # do nothing -- control fields should not contain non-ASCII characters
 571         } else {
 572             my @converted_subfields;
 573             foreach my $subfield ($field->subfields()) {
 574                 my $converted_value;
 575                 my $conversion_ok = 1;
 576                 eval { $converted_value = $decoder->convert($subfield->[1]); };
 577                 if ($@) {
 578                     $conversion_ok = 0;
 579                     push @errors, $@;
 580                 } elsif (not defined $converted_value) {
 581                     $conversion_ok = 0;
 582                     push @errors, "Text::Iconv conversion failed - retval is " . $decoder->retval();
 583                 }
 584
 585                 if ($conversion_ok) {
 586                     push @converted_subfields, $subfield->[0], $converted_value;
 587                 } else {
 588                     $converted_value = $subfield->[1];
 589                     $converted_value =~ s/[\200-\377]/\xef\xbf\xbd/g;
 590                     push @converted_subfields, $subfield->[0], $converted_value;
 591                 }
 592             }
 593
 594             $field->replace_with(MARC::Field->new(
 595                 $field->tag(), $field->indicator(1), $field->indicator(2),
 596                 @converted_subfields)
 597             );
 598         }
 599     }
 600
 601     SetMarcUnicodeFlag($marc_record, $marc_flavour);
 602     Text::Iconv->raise_error($prev_raise_error);
 603
 604     return @errors;
 605 }
 606
 607 =head2 _marc_to_utf8_replacement_char
 608
 609 =over 4
 610
 611 _marc_to_utf8_replacement_char($marc_record, $marc_flavour);
 612
 613 =back
 614
 615 Convert a C<MARC::Record> to UTF-8 in-place, adopting the
 616 unsatisfactory method of replacing all non-ASCII (e.g.,
 617 where the eight bit is set) octet with the Unicode
 618 replacement character.  This is meant as a last-ditch
 619 method, and would be best used as part of a UI that
 620 lets a cataloguer pick various character conversions
 621 until he or she finds the right one.
 622
 623 =cut
 624
 625 sub _marc_to_utf8_replacement_char {
 626     my $marc_record = shift;
 627     my $marc_flavour = shift;
 628
 629     foreach my $field ($marc_record->fields()) {
 630         if ($field->is_control_field()) {
 631             ; # do nothing -- control fields should not contain non-ASCII characters
 632         } else {
 633             my @converted_subfields;
 634             foreach my $subfield ($field->subfields()) {
 635                 my $value = $subfield->[1];
 636                 $value =~ s/[\200-\377]/\xef\xbf\xbd/g;
 637                 push @converted_subfields, $subfield->[0], $value;
 638             }
 639
 640             $field->replace_with(MARC::Field->new(
 641                 $field->tag(), $field->indicator(1), $field->indicator(2),
 642                 @converted_subfields)
 643             );
 644         }
 645     }
 646
 647     SetMarcUnicodeFlag($marc_record, $marc_flavour);
 648 }
 649
 650 =head2 char_decode5426
 651
 652 =over 4
 653
 654 my $utf8string = char_decode5426($iso_5426_string);
 655
 656 =back
 657
 658 Converts a string from ISO-5426 to UTF-8.
 659
 660 =cut
 661
 662
 663 my %chars;
 664 $chars{0xb0}=0x0101;#3/0ayn[ain]
 665 $chars{0xb1}=0x0623;#3/1alif/hamzah[alefwithhamzaabove]
 666 #$chars{0xb2}=0x00e0;#'à';
 667 $chars{0xb2}=0x00e0;#3/2leftlowsinglequotationmark
 668 #$chars{0xb3}=0x00e7;#'ç';
 669 $chars{0xb3}=0x00e7;#3/2leftlowsinglequotationmark
 670 # $chars{0xb4}='è';
 671 $chars{0xb4}=0x00e8;
 672 # $chars{0xb5}='é';
 673 $chars{0xb5}=0x00e9;
 674 $chars{0x97}=0x003c;#3/2leftlowsinglequotationmark
 675 $chars{0x98}=0x003e;#3/2leftlowsinglequotationmark
 676 $chars{0xfa}=0x0153;#oe
 677 $chars{0x81d1}=0x00b0;
 678
 679 ####
 680 ## combined characters iso5426
 681
 682 $chars{0xc041}=0x1ea2; # capital a with hook above
 683 $chars{0xc045}=0x1eba; # capital e with hook above
 684 $chars{0xc049}=0x1ec8; # capital i with hook above
 685 $chars{0xc04f}=0x1ece; # capital o with hook above
 686 $chars{0xc055}=0x1ee6; # capital u with hook above
 687 $chars{0xc059}=0x1ef6; # capital y with hook above
 688 $chars{0xc061}=0x1ea3; # small a with hook above
 689 $chars{0xc065}=0x1ebb; # small e with hook above
 690 $chars{0xc069}=0x1ec9; # small i with hook above
 691 $chars{0xc06f}=0x1ecf; # small o with hook above
 692 $chars{0xc075}=0x1ee7; # small u with hook above
 693 $chars{0xc079}=0x1ef7; # small y with hook above
 694
 695         # 4/1 grave accent
 696 $chars{0xc141}=0x00c0; # capital a with grave accent
 697 $chars{0xc145}=0x00c8; # capital e with grave accent
 698 $chars{0xc149}=0x00cc; # capital i with grave accent
 699 $chars{0xc14f}=0x00d2; # capital o with grave accent
 700 $chars{0xc155}=0x00d9; # capital u with grave accent
 701 $chars{0xc157}=0x1e80; # capital w with grave
 702 $chars{0xc159}=0x1ef2; # capital y with grave
 703 $chars{0xc161}=0x00e0; # small a with grave accent
 704 $chars{0xc165}=0x00e8; # small e with grave accent
 705 $chars{0xc169}=0x00ec; # small i with grave accent
 706 $chars{0xc16f}=0x00f2; # small o with grave accent
 707 $chars{0xc175}=0x00f9; # small u with grave accent
 708 $chars{0xc177}=0x1e81; # small w with grave
 709 $chars{0xc179}=0x1ef3; # small y with grave
 710         # 4/2 acute accent
 711 $chars{0xc241}=0x00c1; # capital a with acute accent
 712 $chars{0xc243}=0x0106; # capital c with acute accent
 713 $chars{0xc245}=0x00c9; # capital e with acute accent
 714 $chars{0xc247}=0x01f4; # capital g with acute
 715 $chars{0xc249}=0x00cd; # capital i with acute accent
 716 $chars{0xc24b}=0x1e30; # capital k with acute
 717 $chars{0xc24c}=0x0139; # capital l with acute accent
 718 $chars{0xc24d}=0x1e3e; # capital m with acute
 719 $chars{0xc24e}=0x0143; # capital n with acute accent
 720 $chars{0xc24f}=0x00d3; # capital o with acute accent
 721 $chars{0xc250}=0x1e54; # capital p with acute
 722 $chars{0xc252}=0x0154; # capital r with acute accent
 723 $chars{0xc253}=0x015a; # capital s with acute accent
 724 $chars{0xc255}=0x00da; # capital u with acute accent
 725 $chars{0xc257}=0x1e82; # capital w with acute
 726 $chars{0xc259}=0x00dd; # capital y with acute accent
 727 $chars{0xc25a}=0x0179; # capital z with acute accent
 728 $chars{0xc261}=0x00e1; # small a with acute accent
 729 $chars{0xc263}=0x0107; # small c with acute accent
 730 $chars{0xc265}=0x00e9; # small e with acute accent
 731 $chars{0xc267}=0x01f5; # small g with acute
 732 $chars{0xc269}=0x00ed; # small i with acute accent
 733 $chars{0xc26b}=0x1e31; # small k with acute
 734 $chars{0xc26c}=0x013a; # small l with acute accent
 735 $chars{0xc26d}=0x1e3f; # small m with acute
 736 $chars{0xc26e}=0x0144; # small n with acute accent
 737 $chars{0xc26f}=0x00f3; # small o with acute accent
 738 $chars{0xc270}=0x1e55; # small p with acute
 739 $chars{0xc272}=0x0155; # small r with acute accent
 740 $chars{0xc273}=0x015b; # small s with acute accent
 741 $chars{0xc275}=0x00fa; # small u with acute accent
 742 $chars{0xc277}=0x1e83; # small w with acute
 743 $chars{0xc279}=0x00fd; # small y with acute accent
 744 $chars{0xc27a}=0x017a; # small z with acute accent
 745 $chars{0xc2e1}=0x01fc; # capital ae with acute
 746 $chars{0xc2f1}=0x01fd; # small ae with acute
 747        # 4/3 circumflex accent
 748 $chars{0xc341}=0x00c2; # capital a with circumflex accent
 749 $chars{0xc343}=0x0108; # capital c with circumflex
 750 $chars{0xc345}=0x00ca; # capital e with circumflex accent
 751 $chars{0xc347}=0x011c; # capital g with circumflex
 752 $chars{0xc348}=0x0124; # capital h with circumflex
 753 $chars{0xc349}=0x00ce; # capital i with circumflex accent
 754 $chars{0xc34a}=0x0134; # capital j with circumflex
 755 $chars{0xc34f}=0x00d4; # capital o with circumflex accent
 756 $chars{0xc353}=0x015c; # capital s with circumflex
 757 $chars{0xc355}=0x00db; # capital u with circumflex
 758 $chars{0xc357}=0x0174; # capital w with circumflex
 759 $chars{0xc359}=0x0176; # capital y with circumflex
 760 $chars{0xc35a}=0x1e90; # capital z with circumflex
 761 $chars{0xc361}=0x00e2; # small a with circumflex accent
 762 $chars{0xc363}=0x0109; # small c with circumflex
 763 $chars{0xc365}=0x00ea; # small e with circumflex accent
 764 $chars{0xc367}=0x011d; # small g with circumflex
 765 $chars{0xc368}=0x0125; # small h with circumflex
 766 $chars{0xc369}=0x00ee; # small i with circumflex accent
 767 $chars{0xc36a}=0x0135; # small j with circumflex
 768 $chars{0xc36e}=0x00f1; # small n with tilde
 769 $chars{0xc36f}=0x00f4; # small o with circumflex accent
 770 $chars{0xc373}=0x015d; # small s with circumflex
 771 $chars{0xc375}=0x00fb; # small u with circumflex
 772 $chars{0xc377}=0x0175; # small w with circumflex
 773 $chars{0xc379}=0x0177; # small y with circumflex
 774 $chars{0xc37a}=0x1e91; # small z with circumflex
 775         # 4/4 tilde
 776 $chars{0xc441}=0x00c3; # capital a with tilde
 777 $chars{0xc445}=0x1ebc; # capital e with tilde
 778 $chars{0xc449}=0x0128; # capital i with tilde
 779 $chars{0xc44e}=0x00d1; # capital n with tilde
 780 $chars{0xc44f}=0x00d5; # capital o with tilde
 781 $chars{0xc455}=0x0168; # capital u with tilde
 782 $chars{0xc456}=0x1e7c; # capital v with tilde
 783 $chars{0xc459}=0x1ef8; # capital y with tilde
 784 $chars{0xc461}=0x00e3; # small a with tilde
 785 $chars{0xc465}=0x1ebd; # small e with tilde
 786 $chars{0xc469}=0x0129; # small i with tilde
 787 $chars{0xc46e}=0x00f1; # small n with tilde
 788 $chars{0xc46f}=0x00f5; # small o with tilde
 789 $chars{0xc475}=0x0169; # small u with tilde
 790 $chars{0xc476}=0x1e7d; # small v with tilde
 791 $chars{0xc479}=0x1ef9; # small y with tilde
 792     # 4/5 macron
 793 $chars{0xc541}=0x0100; # capital a with macron
 794 $chars{0xc545}=0x0112; # capital e with macron
 795 $chars{0xc547}=0x1e20; # capital g with macron
 796 $chars{0xc549}=0x012a; # capital i with macron
 797 $chars{0xc54f}=0x014c; # capital o with macron
 798 $chars{0xc555}=0x016a; # capital u with macron
 799 $chars{0xc561}=0x0101; # small a with macron
 800 $chars{0xc565}=0x0113; # small e with macron
 801 $chars{0xc567}=0x1e21; # small g with macron
 802 $chars{0xc569}=0x012b; # small i with macron
 803 $chars{0xc56f}=0x014d; # small o with macron
 804 $chars{0xc575}=0x016b; # small u with macron
 805 $chars{0xc572}=0x0159; # small r with macron
 806 $chars{0xc5e1}=0x01e2; # capital ae with macron
 807 $chars{0xc5f1}=0x01e3; # small ae with macron
 808         # 4/6 breve
 809 $chars{0xc641}=0x0102; # capital a with breve
 810 $chars{0xc645}=0x0114; # capital e with breve
 811 $chars{0xc647}=0x011e; # capital g with breve
 812 $chars{0xc649}=0x012c; # capital i with breve
 813 $chars{0xc64f}=0x014e; # capital o with breve
 814 $chars{0xc655}=0x016c; # capital u with breve
 815 $chars{0xc661}=0x0103; # small a with breve
 816 $chars{0xc665}=0x0115; # small e with breve
 817 $chars{0xc667}=0x011f; # small g with breve
 818 $chars{0xc669}=0x012d; # small i with breve
 819 $chars{0xc66f}=0x014f; # small o with breve
 820 $chars{0xc675}=0x016d; # small u with breve
 821         # 4/7 dot above
 822 $chars{0xc7b0}=0x01e1; # Ain with dot above
 823 $chars{0xc742}=0x1e02; # capital b with dot above
 824 $chars{0xc743}=0x010a; # capital c with dot above
 825 $chars{0xc744}=0x1e0a; # capital d with dot above
 826 $chars{0xc745}=0x0116; # capital e with dot above
 827 $chars{0xc746}=0x1e1e; # capital f with dot above
 828 $chars{0xc747}=0x0120; # capital g with dot above
 829 $chars{0xc748}=0x1e22; # capital h with dot above
 830 $chars{0xc749}=0x0130; # capital i with dot above
 831 $chars{0xc74d}=0x1e40; # capital m with dot above
 832 $chars{0xc74e}=0x1e44; # capital n with dot above
 833 $chars{0xc750}=0x1e56; # capital p with dot above
 834 $chars{0xc752}=0x1e58; # capital r with dot above
 835 $chars{0xc753}=0x1e60; # capital s with dot above
 836 $chars{0xc754}=0x1e6a; # capital t with dot above
 837 $chars{0xc757}=0x1e86; # capital w with dot above
 838 $chars{0xc758}=0x1e8a; # capital x with dot above
 839 $chars{0xc759}=0x1e8e; # capital y with dot above
 840 $chars{0xc75a}=0x017b; # capital z with dot above
 841 $chars{0xc761}=0x0227; # small b with dot above
 842 $chars{0xc762}=0x1e03; # small b with dot above
 843 $chars{0xc763}=0x010b; # small c with dot above
 844 $chars{0xc764}=0x1e0b; # small d with dot above
 845 $chars{0xc765}=0x0117; # small e with dot above
 846 $chars{0xc766}=0x1e1f; # small f with dot above
 847 $chars{0xc767}=0x0121; # small g with dot above
 848 $chars{0xc768}=0x1e23; # small h with dot above
 849 $chars{0xc76d}=0x1e41; # small m with dot above
 850 $chars{0xc76e}=0x1e45; # small n with dot above
 851 $chars{0xc770}=0x1e57; # small p with dot above
 852 $chars{0xc772}=0x1e59; # small r with dot above
 853 $chars{0xc773}=0x1e61; # small s with dot above
 854 $chars{0xc774}=0x1e6b; # small t with dot above
 855 $chars{0xc777}=0x1e87; # small w with dot above
 856 $chars{0xc778}=0x1e8b; # small x with dot above
 857 $chars{0xc779}=0x1e8f; # small y with dot above
 858 $chars{0xc77a}=0x017c; # small z with dot above
 859         # 4/8 trema, diaresis
 860 $chars{0xc820}=0x00a8; # diaeresis
 861 $chars{0xc841}=0x00c4; # capital a with diaeresis
 862 $chars{0xc845}=0x00cb; # capital e with diaeresis
 863 $chars{0xc848}=0x1e26; # capital h with diaeresis
 864 $chars{0xc849}=0x00cf; # capital i with diaeresis
 865 $chars{0xc84f}=0x00d6; # capital o with diaeresis
 866 $chars{0xc855}=0x00dc; # capital u with diaeresis
 867 $chars{0xc857}=0x1e84; # capital w with diaeresis
 868 $chars{0xc858}=0x1e8c; # capital x with diaeresis
 869 $chars{0xc859}=0x0178; # capital y with diaeresis
 870 $chars{0xc861}=0x00e4; # small a with diaeresis
 871 $chars{0xc865}=0x00eb; # small e with diaeresis
 872 $chars{0xc868}=0x1e27; # small h with diaeresis
 873 $chars{0xc869}=0x00ef; # small i with diaeresis
 874 $chars{0xc86f}=0x00f6; # small o with diaeresis
 875 $chars{0xc874}=0x1e97; # small t with diaeresis
 876 $chars{0xc875}=0x00fc; # small u with diaeresis
 877 $chars{0xc877}=0x1e85; # small w with diaeresis
 878 $chars{0xc878}=0x1e8d; # small x with diaeresis
 879 $chars{0xc879}=0x00ff; # small y with diaeresis
 880         # 4/9 umlaut
 881 $chars{0xc920}=0x00a8; # [diaeresis]
 882 $chars{0xc961}=0x00e4; # a with umlaut
 883 $chars{0xc965}=0x00eb; # e with umlaut
 884 $chars{0xc969}=0x00ef; # i with umlaut
 885 $chars{0xc96f}=0x00f6; # o with umlaut
 886 $chars{0xc975}=0x00fc; # u with umlaut
 887         # 4/10 circle above
 888 $chars{0xca41}=0x00c5; # capital a with ring above
 889 $chars{0xcaad}=0x016e; # capital u with ring above
 890 $chars{0xca61}=0x00e5; # small a with ring above
 891 $chars{0xca75}=0x016f; # small u with ring above
 892 $chars{0xca77}=0x1e98; # small w with ring above
 893 $chars{0xca79}=0x1e99; # small y with ring above
 894         # 4/11 high comma off centre
 895         # 4/12 inverted high comma centred
 896         # 4/13 double acute accent
 897 $chars{0xcd4f}=0x0150; # capital o with double acute
 898 $chars{0xcd55}=0x0170; # capital u with double acute
 899 $chars{0xcd6f}=0x0151; # small o with double acute
 900 $chars{0xcd75}=0x0171; # small u with double acute
 901         # 4/14 horn
 902 $chars{0xce54}=0x01a0; # latin capital letter o with horn
 903 $chars{0xce55}=0x01af; # latin capital letter u with horn
 904 $chars{0xce74}=0x01a1; # latin small letter o with horn
 905 $chars{0xce75}=0x01b0; # latin small letter u with horn
 906         # 4/15 caron (hacek
 907 $chars{0xcf41}=0x01cd; # capital a with caron
 908 $chars{0xcf43}=0x010c; # capital c with caron
 909 $chars{0xcf44}=0x010e; # capital d with caron
 910 $chars{0xcf45}=0x011a; # capital e with caron
 911 $chars{0xcf47}=0x01e6; # capital g with caron
 912 $chars{0xcf49}=0x01cf; # capital i with caron
 913 $chars{0xcf4b}=0x01e8; # capital k with caron
 914 $chars{0xcf4c}=0x013d; # capital l with caron
 915 $chars{0xcf4e}=0x0147; # capital n with caron
 916 $chars{0xcf4f}=0x01d1; # capital o with caron
 917 $chars{0xcf52}=0x0158; # capital r with caron
 918 $chars{0xcf53}=0x0160; # capital s with caron
 919 $chars{0xcf54}=0x0164; # capital t with caron
 920 $chars{0xcf55}=0x01d3; # capital u with caron
 921 $chars{0xcf5a}=0x017d; # capital z with caron
 922 $chars{0xcf61}=0x01ce; # small a with caron
 923 $chars{0xcf63}=0x010d; # small c with caron
 924 $chars{0xcf64}=0x010f; # small d with caron
 925 $chars{0xcf65}=0x011b; # small e with caron
 926 $chars{0xcf67}=0x01e7; # small g with caron
 927 $chars{0xcf69}=0x01d0; # small i with caron
 928 $chars{0xcf6a}=0x01f0; # small j with caron
 929 $chars{0xcf6b}=0x01e9; # small k with caron
 930 $chars{0xcf6c}=0x013e; # small l with caron
 931 $chars{0xcf6e}=0x0148; # small n with caron
 932 $chars{0xcf6f}=0x01d2; # small o with caron
 933 $chars{0xcf72}=0x0159; # small r with caron
 934 $chars{0xcf73}=0x0161; # small s with caron
 935 $chars{0xcf74}=0x0165; # small t with caron
 936 $chars{0xcf75}=0x01d4; # small u with caron
 937 $chars{0xcf7a}=0x017e; # small z with caron
 938         # 5/0 cedilla
 939 $chars{0xd020}=0x00b8; # cedilla
 940 $chars{0xd043}=0x00c7; # capital c with cedilla
 941 $chars{0xd044}=0x1e10; # capital d with cedilla
 942 $chars{0xd047}=0x0122; # capital g with cedilla
 943 $chars{0xd048}=0x1e28; # capital h with cedilla
 944 $chars{0xd04b}=0x0136; # capital k with cedilla
 945 $chars{0xd04c}=0x013b; # capital l with cedilla
 946 $chars{0xd04e}=0x0145; # capital n with cedilla
 947 $chars{0xd052}=0x0156; # capital r with cedilla
 948 $chars{0xd053}=0x015e; # capital s with cedilla
 949 $chars{0xd054}=0x0162; # capital t with cedilla
 950 $chars{0xd063}=0x00e7; # small c with cedilla
 951 $chars{0xd064}=0x1e11; # small d with cedilla
 952 $chars{0xd065}=0x0119; # small e with cedilla
 953 $chars{0xd067}=0x0123; # small g with cedilla
 954 $chars{0xd068}=0x1e29; # small h with cedilla
 955 $chars{0xd06b}=0x0137; # small k with cedilla
 956 $chars{0xd06c}=0x013c; # small l with cedilla
 957 $chars{0xd06e}=0x0146; # small n with cedilla
 958 $chars{0xd072}=0x0157; # small r with cedilla
 959 $chars{0xd073}=0x015f; # small s with cedilla
 960 $chars{0xd074}=0x0163; # small t with cedilla
 961         # 5/1 rude
 962         # 5/2 hook to left
 963         # 5/3 ogonek (hook to right
 964 $chars{0xd320}=0x02db; # ogonek
 965 $chars{0xd341}=0x0104; # capital a with ogonek
 966 $chars{0xd345}=0x0118; # capital e with ogonek
 967 $chars{0xd349}=0x012e; # capital i with ogonek
 968 $chars{0xd34f}=0x01ea; # capital o with ogonek
 969 $chars{0xd355}=0x0172; # capital u with ogonek
 970 $chars{0xd361}=0x0105; # small a with ogonek
 971 $chars{0xd365}=0x0119; # small e with ogonek
 972 $chars{0xd369}=0x012f; # small i with ogonek
 973 $chars{0xd36f}=0x01eb; # small o with ogonek
 974 $chars{0xd375}=0x0173; # small u with ogonek
 975         # 5/4 circle below
 976 $chars{0xd441}=0x1e00; # capital a with ring below
 977 $chars{0xd461}=0x1e01; # small a with ring below
 978         # 5/5 half circle below
 979 $chars{0xf948}=0x1e2a; # capital h with breve below
 980 $chars{0xf968}=0x1e2b; # small h with breve below
 981         # 5/6 dot below
 982 $chars{0xd641}=0x1ea0; # capital a with dot below
 983 $chars{0xd642}=0x1e04; # capital b with dot below
 984 $chars{0xd644}=0x1e0c; # capital d with dot below
 985 $chars{0xd645}=0x1eb8; # capital e with dot below
 986 $chars{0xd648}=0x1e24; # capital h with dot below
 987 $chars{0xd649}=0x1eca; # capital i with dot below
 988 $chars{0xd64b}=0x1e32; # capital k with dot below
 989 $chars{0xd64c}=0x1e36; # capital l with dot below
 990 $chars{0xd64d}=0x1e42; # capital m with dot below
 991 $chars{0xd64e}=0x1e46; # capital n with dot below
 992 $chars{0xd64f}=0x1ecc; # capital o with dot below
 993 $chars{0xd652}=0x1e5a; # capital r with dot below
 994 $chars{0xd653}=0x1e62; # capital s with dot below
 995 $chars{0xd654}=0x1e6c; # capital t with dot below
 996 $chars{0xd655}=0x1ee4; # capital u with dot below
 997 $chars{0xd656}=0x1e7e; # capital v with dot below
 998 $chars{0xd657}=0x1e88; # capital w with dot below
 999 $chars{0xd659}=0x1ef4; # capital y with dot below
1000 $chars{0xd65a}=0x1e92; # capital z with dot below
1001 $chars{0xd661}=0x1ea1; # small a with dot below
1002 $chars{0xd662}=0x1e05; # small b with dot below
1003 $chars{0xd664}=0x1e0d; # small d with dot below
1004 $chars{0xd665}=0x1eb9; # small e with dot below
1005 $chars{0xd668}=0x1e25; # small h with dot below
1006 $chars{0xd669}=0x1ecb; # small i with dot below
1007 $chars{0xd66b}=0x1e33; # small k with dot below
1008 $chars{0xd66c}=0x1e37; # small l with dot below
1009 $chars{0xd66d}=0x1e43; # small m with dot below
1010 $chars{0xd66e}=0x1e47; # small n with dot below
1011 $chars{0xd66f}=0x1ecd; # small o with dot below
1012 $chars{0xd672}=0x1e5b; # small r with dot below
1013 $chars{0xd673}=0x1e63; # small s with dot below
1014 $chars{0xd674}=0x1e6d; # small t with dot below
1015 $chars{0xd675}=0x1ee5; # small u with dot below
1016 $chars{0xd676}=0x1e7f; # small v with dot below
1017 $chars{0xd677}=0x1e89; # small w with dot below
1018 $chars{0xd679}=0x1ef5; # small y with dot below
1019 $chars{0xd67a}=0x1e93; # small z with dot below
1020         # 5/7 double dot below
1021 $chars{0xd755}=0x1e72; # capital u with diaeresis below
1022 $chars{0xd775}=0x1e73; # small u with diaeresis below
1023         # 5/8 underline
1024 $chars{0xd820}=0x005f; # underline
1025         # 5/9 double underline
1026 $chars{0xd920}=0x2017; # double underline
1027         # 5/10 small low vertical bar
1028 $chars{0xda20}=0x02cc; #
1029         # 5/11 circumflex below
1030         # 5/12 (this position shall not be used)
1031         # 5/13 left half of ligature sign and of double tilde
1032         # 5/14 right half of ligature sign
1033         # 5/15 right half of double tilde
1034 #     map {printf "%x :%x\n",$_,$chars{$_};}keys %chars;
1035
1036 sub char_decode5426 {
1037     my ( $string) = @_;
1038     my $result;
1039
1040     my @data = unpack("C*", $string);
1041     my @characters;
1042     my $length=scalar(@data);
1043     for (my $i = 0; $i < scalar(@data); $i++) {
1044       my $char= $data[$i];
1045       if ($char >= 0x00 && $char <= 0x7F){
1046         #IsAscii
1047
1048           push @characters,$char unless ($char<0x02 ||$char== 0x0F);
1049       }elsif (($char >= 0xC0 && $char <= 0xDF)) {
1050         #Combined Char
1051         my $convchar ;
1052         if ($chars{$char*256+$data[$i+1]}) {
1053           $convchar= $chars{$char * 256 + $data[$i+1]};
1054           $i++;
1055 #           printf "char %x $char, char to convert %x , converted %x\n",$char,$char * 256 + $data[$i - 1],$convchar;
1056         } elsif ($chars{$char})  {
1057           $convchar= $chars{$char};
1058 #           printf "0xC char %x, converted %x\n",$char,$chars{$char};
1059         }else {
1060           $convchar=$char;
1061         }
1062         push @characters,$convchar;
1063       } else {
1064         my $convchar;
1065         if ($chars{$char})  {
1066           $convchar= $chars{$char};
1067 #            printf "char %x,  converted %x\n",$char,$chars{$char};
1068         }else {
1069 #            printf "char %x $char\n",$char;
1070           $convchar=$char;
1071         }
1072         push @characters,$convchar;
1073       }
1074     }
1075     $result=pack "U*",@characters;
1076 #     $result=~s/\x01//;
1077 #     $result=~s/\x00//;
1078      $result=~s/\x0f//;
1079      $result=~s/\x1b.//;
1080      $result=~s/\x0e//;
1081      $result=~s/\x1b\x5b//;
1082 #   map{printf "%x",$_} @characters;
1083 #   printf "\n";
1084   return $result;
1085 }
1086
1087 1;
1088
1089
1090 =head1 AUTHOR
1091
1092 Koha Development Team <info@koha.org>
1093
1094 Galen Charlton <galen.charlton@liblime.com>
1095
1096 =cut