Bug 9788: Improvements when calling GetReservesFromItemnumber
[koha.git] / C4 / Charset.pm
blob2968b5ee1a4d7653d2e85b644be5ed96b18b2538
1 package C4::Charset;
3 # Copyright (C) 2008 LibLime
5 # This file is part of Koha.
7 # Koha is free software; you can redistribute it and/or modify it under the
8 # terms of the GNU General Public License as published by the Free Software
9 # Foundation; either version 2 of the License, or (at your option) any later
10 # version.
12 # Koha is distributed in the hope that it will be useful, but WITHOUT ANY
13 # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
14 # A PARTICULAR PURPOSE. See the GNU General Public License for more details.
16 # You should have received a copy of the GNU General Public License along
17 # with Koha; if not, write to the Free Software Foundation, Inc.,
18 # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
20 use strict;
21 use warnings;
23 use MARC::Charset qw/marc8_to_utf8/;
24 use Text::Iconv;
25 use C4::Debug;
26 use Unicode::Normalize;
28 use vars qw($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
30 BEGIN {
31 # set the version for version checking
32 $VERSION = 3.07.00.049;
33 require Exporter;
34 @ISA = qw(Exporter);
35 @EXPORT = qw(
36 NormalizeString
37 IsStringUTF8ish
38 MarcToUTF8Record
39 SetUTF8Flag
40 SetMarcUnicodeFlag
41 StripNonXmlChars
42 nsb_clean
46 =head1 NAME
48 C4::Charset - utilities for handling character set conversions.
50 =head1 SYNOPSIS
52 use C4::Charset;
54 =head1 DESCRIPTION
56 This module contains routines for dealing with character set
57 conversions, particularly for MARC records.
59 A variety of character encodings are in use by various MARC
60 standards, and even more character encodings are used by
61 non-standard MARC records. The various MARC formats generally
62 do not do a good job of advertising a given record's character
63 encoding, and even when a record does advertise its encoding,
64 e.g., via the Leader/09, experience has shown that one cannot
65 trust it.
67 Ultimately, all MARC records are stored in Koha in UTF-8 and
68 must be converted from whatever the source character encoding is.
69 The goal of this module is to ensure that these conversions
70 take place accurately. When a character conversion cannot take
71 place, or at least not accurately, the module was provide
72 enough information to allow user-facing code to inform the user
73 on how to deal with the situation.
75 =cut
77 =head1 FUNCTIONS
79 =head2 IsStringUTF8ish
81 my $is_utf8 = IsStringUTF8ish($str);
83 Determines if C<$str> is valid UTF-8. This can mean
84 one of two things:
86 =over
88 =item *
90 The Perl UTF-8 flag is set and the string contains valid UTF-8.
92 =item *
94 The Perl UTF-8 flag is B<not> set, but the octets contain
95 valid UTF-8.
97 =back
99 The function is named C<IsStringUTF8ish> instead of C<IsStringUTF8>
100 because in one could be presented with a MARC blob that is
101 not actually in UTF-8 but whose sequence of octets appears to be
102 valid UTF-8. The rest of the MARC character conversion functions
103 will assume that this situation occur does not very often.
105 =cut
107 sub IsStringUTF8ish {
108 my $str = shift;
110 return 1 if utf8::is_utf8($str);
111 return utf8::decode($str);
114 =head2 SetUTF8Flag
116 my $marc_record = SetUTF8Flag($marc_record, $nfd);
118 This function sets the PERL UTF8 flag for data.
119 It is required when using new_from_usmarc
120 since MARC::File::USMARC does not handle PERL UTF8 setting.
121 When editing unicode marc records fields and subfields, you
122 would end up in double encoding without using this function.
124 If $nfd is set, string normalization will use NFD instead of NFC
126 FIXME
127 In my opinion, this function belongs to MARC::Record and not
128 to this package.
129 But since it handles charset, and MARC::Record, it finds its way in that package
131 =cut
133 sub SetUTF8Flag{
134 my ($record, $nfd)=@_;
135 return unless ($record && $record->fields());
136 foreach my $field ($record->fields()){
137 if ($field->tag()>=10){
138 my @subfields;
139 foreach my $subfield ($field->subfields()){
140 push @subfields,($$subfield[0],NormalizeString($$subfield[1],$nfd));
142 eval {
143 my $newfield=MARC::Field->new(
144 $field->tag(),
145 $field->indicator(1),
146 $field->indicator(2),
147 @subfields
149 $field->replace_with($newfield);
151 warn "ERROR occurred in SetUTF8Flag $@" if $@;
156 =head2 NormalizeString
158 my $normalized_string=NormalizeString($string,$nfd,$transform);
160 Given a string
161 nfd : If you want to set NFD and not NFC
162 transform : If you expect all the signs to be removed
164 Sets the PERL UTF8 Flag on your initial data if need be
165 and applies cleaning if required
167 Returns a utf8 NFC normalized string
169 Sample code :
170 my $string=NormalizeString ("l'ornithoptère");
171 #results into ornithoptère in NFC form and sets UTF8 Flag
173 =cut
176 sub NormalizeString{
177 my ($string,$nfd,$transform)=@_;
178 utf8::decode($string) unless (utf8::is_utf8($string));
179 if ($nfd){
180 $string= NFD($string);
182 else {
183 $string=NFC($string);
185 if ($transform){
186 $string=~s/\<|\>|\^|\;|\.|\?|,|\-|\(|\)|\[|\]|\{|\}|\$|\%|\!|\*|\:|\\|\/|\&|\"|\'/ /g;
187 #removing one letter words "d'" "l'" was changed into "d " "l "
188 $string=~s/\b\S\b//g;
189 $string=~s/\s+$//g;
191 return $string;
194 =head2 MarcToUTF8Record
196 ($marc_record, $converted_from, $errors_arrayref) = MarcToUTF8Record($marc_blob,
197 $marc_flavour, [, $source_encoding]);
199 Given a MARC blob or a C<MARC::Record>, the MARC flavour, and an
200 optional source encoding, return a C<MARC::Record> that is
201 converted to UTF-8.
203 The returned C<$marc_record> is guaranteed to be in valid UTF-8, but
204 is not guaranteed to have been converted correctly. Specifically,
205 if C<$converted_from> is 'failed', the MARC record returned failed
206 character conversion and had each of its non-ASCII octets changed
207 to the Unicode replacement character.
209 If the source encoding was not specified, this routine will
210 try to guess it; the character encoding used for a successful
211 conversion is returned in C<$converted_from>.
213 =cut
215 sub MarcToUTF8Record {
216 my $marc = shift;
217 my $marc_flavour = shift;
218 my $source_encoding = shift;
219 my $marc_record;
220 my $marc_blob_is_utf8 = 0;
221 if (ref($marc) eq 'MARC::Record') {
222 my $marc_blob = $marc->as_usmarc();
223 $marc_blob_is_utf8 = IsStringUTF8ish($marc_blob);
224 $marc_record = $marc;
225 } else {
226 # dealing with a MARC blob
228 # remove any ersatz whitespace from the beginning and
229 # end of the MARC blob -- these can creep into MARC
230 # files produced by several sources -- caller really
231 # should be doing this, however
232 $marc =~ s/^\s+//;
233 $marc =~ s/\s+$//;
234 $marc_blob_is_utf8 = IsStringUTF8ish($marc);
235 eval {
236 $marc_record = MARC::Record->new_from_usmarc($marc);
238 if ($@) {
239 # if we fail the first time, one likely problem
240 # is that we have a MARC21 record that says that it's
241 # UTF-8 (Leader/09 = 'a') but contains non-UTF-8 characters.
242 # We'll try parsing it again.
243 substr($marc, 9, 1) = ' ';
244 eval {
245 $marc_record = MARC::Record->new_from_usmarc($marc);
247 if ($@) {
248 # it's hopeless; return an empty MARC::Record
249 return MARC::Record->new(), 'failed', ['could not parse MARC blob'];
254 # If we do not know the source encoding, try some guesses
255 # as follows:
256 # 1. Record is UTF-8 already.
257 # 2. If MARC flavor is MARC21 or NORMARC, then
258 # a. record is MARC-8
259 # b. record is ISO-8859-1
260 # 3. If MARC flavor is UNIMARC, then
261 if (not defined $source_encoding) {
262 if ($marc_blob_is_utf8) {
263 # note that for MARC21/NORMARC we are not bothering to check
264 # if the Leader/09 is set to 'a' or not -- because
265 # of problems with various ILSs (including Koha in the
266 # past, alas), this just is not trustworthy.
267 SetMarcUnicodeFlag($marc_record, $marc_flavour);
268 return $marc_record, 'UTF-8', [];
269 } else {
270 if ($marc_flavour eq 'MARC21' || $marc_flavour eq 'NORMARC') {
271 return _default_marc21_charconv_to_utf8($marc_record, $marc_flavour);
272 } elsif ($marc_flavour =~/UNIMARC/) {
273 return _default_unimarc_charconv_to_utf8($marc_record, $marc_flavour);
274 } else {
275 return _default_marc21_charconv_to_utf8($marc_record, $marc_flavour);
278 } else {
279 # caller knows the character encoding
280 my $original_marc_record = $marc_record->clone();
281 my @errors;
282 if ($source_encoding =~ /utf-?8/i) {
283 if ($marc_blob_is_utf8) {
284 SetMarcUnicodeFlag($marc_record, $marc_flavour);
285 return $marc_record, 'UTF-8', [];
286 } else {
287 push @errors, 'specified UTF-8 => UTF-8 conversion, but record is not in UTF-8';
289 } elsif ($source_encoding =~ /marc-?8/i) {
290 @errors = _marc_marc8_to_utf8($marc_record, $marc_flavour);
291 } elsif ($source_encoding =~ /5426/) {
292 @errors = _marc_iso5426_to_utf8($marc_record, $marc_flavour);
293 } else {
294 # assume any other character encoding is for Text::Iconv
295 @errors = _marc_to_utf8_via_text_iconv($marc_record, $marc_flavour, $source_encoding);
298 if (@errors) {
299 _marc_to_utf8_replacement_char($original_marc_record, $marc_flavour);
300 return $original_marc_record, 'failed', \@errors;
301 } else {
302 return $marc_record, $source_encoding, [];
308 =head2 SetMarcUnicodeFlag
310 SetMarcUnicodeFlag($marc_record, $marc_flavour);
312 Set both the internal MARC::Record encoding flag
313 and the appropriate Leader/09 (MARC21) or
314 100/26-29 (UNIMARC) to indicate that the record
315 is in UTF-8. Note that this does B<not> do
316 any actual character conversion.
318 =cut
320 sub SetMarcUnicodeFlag {
321 my $marc_record = shift;
322 my $marc_flavour = shift; # || C4::Context->preference("marcflavour");
324 $marc_record->encoding('UTF-8');
325 if ($marc_flavour eq 'MARC21' || $marc_flavour eq 'NORMARC') {
326 my $leader = $marc_record->leader();
327 substr($leader, 9, 1) = 'a';
328 $marc_record->leader($leader);
329 } elsif ($marc_flavour =~/UNIMARC/) {
330 my $defaultlanguage = C4::Context->preference("UNIMARCField100Language");
331 $defaultlanguage = "fre" if (!$defaultlanguage || length($defaultlanguage) != 3);
332 my $string;
333 my ($subflength,$encodingposition)=($marc_flavour=~/AUTH/?(21,12):(36,25));
334 $string=$marc_record->subfield( 100, "a" );
335 if (defined $string && length($string)==$subflength) {
336 $string = substr $string, 0,$subflength if (length($string)>$subflength);
338 else {
339 $string = POSIX::strftime( "%Y%m%d", localtime );
340 $string =~ s/\-//g;
341 $string = sprintf( "%-*s", $subflength, $string );
342 substr ( $string, ($encodingposition - 3), 3, $defaultlanguage);
344 substr( $string, $encodingposition, 3, "y50" );
345 if ( $marc_record->subfield( 100, "a" ) ) {
346 $marc_record->field('100')->update(a=>$string);
348 else {
349 $marc_record->insert_grouped_field(
350 MARC::Field->new( 100, '', '', "a" => $string ) );
352 $debug && warn "encodage: ", substr( $marc_record->subfield(100, 'a'), $encodingposition, 3 );
353 } else {
354 warn "Unrecognized marcflavour: $marc_flavour";
358 =head2 StripNonXmlChars
360 my $new_str = StripNonXmlChars($old_str);
362 Given a string, return a copy with the
363 characters that are illegal in XML
364 removed.
366 This function exists to work around a problem
367 that can occur with badly-encoded MARC records.
368 Specifically, if a UTF-8 MARC record also
369 has excape (\x1b) characters, MARC::File::XML
370 will let the escape characters pass through
371 when as_xml() or as_xml_record() is called. The
372 problem is that the escape character is not
373 legal in well-formed XML documents, so when
374 MARC::File::XML attempts to parse such a record,
375 the XML parser will fail.
377 Stripping such characters will allow a
378 MARC::Record->new_from_xml()
379 to work, at the possible risk of some data loss.
381 =cut
383 sub StripNonXmlChars {
384 my $str = shift;
385 if (!defined($str) || $str eq ""){
386 return "";
388 $str =~ s/[^\x09\x0A\x0D\x{0020}-\x{D7FF}\x{E000}-\x{FFFD}\x{10000}-\x{10FFFF}]//g;
389 return $str;
394 =head2 nsb_clean
396 =over 4
398 nsb_clean($string);
400 =back
402 Removes Non Sorting Block characters
404 =cut
405 sub nsb_clean {
406 my $NSB = '\x88' ; # NSB : begin Non Sorting Block
407 my $NSE = '\x89' ; # NSE : Non Sorting Block end
408 my $NSB2 = '\x98' ; # NSB : begin Non Sorting Block
409 my $NSE2 = '\x9C' ; # NSE : Non Sorting Block end
410 my $C2 = '\xC2' ; # What is this char ? It is sometimes left by the regexp after removing NSB / NSE
412 # handles non sorting blocks
413 my ($string) = @_ ;
414 $_ = $string ;
415 s/$NSB//g ;
416 s/$NSE//g ;
417 s/$NSB2//g ;
418 s/$NSE2//g ;
419 s/$C2//g ;
420 $string = $_ ;
422 return($string) ;
426 =head1 INTERNAL FUNCTIONS
428 =head2 _default_marc21_charconv_to_utf8
430 my ($new_marc_record, $guessed_charset) = _default_marc21_charconv_to_utf8($marc_record);
432 Converts a C<MARC::Record> of unknown character set to UTF-8,
433 first by trying a MARC-8 to UTF-8 conversion, then ISO-8859-1
434 to UTF-8, then a default conversion that replaces each non-ASCII
435 character with the replacement character.
437 The C<$guessed_charset> return value contains the character set
438 that resulted in a conversion to valid UTF-8; note that
439 if the MARC-8 and ISO-8859-1 conversions failed, the value of
440 this is 'failed'.
442 =cut
444 sub _default_marc21_charconv_to_utf8 {
445 my $marc_record = shift;
446 my $marc_flavour = shift;
448 my $trial_marc8 = $marc_record->clone();
449 my @all_errors = ();
450 my @errors = _marc_marc8_to_utf8($trial_marc8, $marc_flavour);
451 unless (@errors) {
452 return $trial_marc8, 'MARC-8', [];
454 push @all_errors, @errors;
456 my $trial_8859_1 = $marc_record->clone();
457 @errors = _marc_to_utf8_via_text_iconv($trial_8859_1, $marc_flavour, 'iso-8859-1');
458 unless (@errors) {
459 return $trial_8859_1, 'iso-8859-1', []; # note -- we could return \@all_errors
460 # instead if we wanted to report details
461 # of the failed attempt at MARC-8 => UTF-8
463 push @all_errors, @errors;
465 my $default_converted = $marc_record->clone();
466 _marc_to_utf8_replacement_char($default_converted, $marc_flavour);
467 return $default_converted, 'failed', \@all_errors;
470 =head2 _default_unimarc_charconv_to_utf8
472 my ($new_marc_record, $guessed_charset) = _default_unimarc_charconv_to_utf8($marc_record);
474 Converts a C<MARC::Record> of unknown character set to UTF-8,
475 first by trying a ISO-5426 to UTF-8 conversion, then ISO-8859-1
476 to UTF-8, then a default conversion that replaces each non-ASCII
477 character with the replacement character.
479 The C<$guessed_charset> return value contains the character set
480 that resulted in a conversion to valid UTF-8; note that
481 if the MARC-8 and ISO-8859-1 conversions failed, the value of
482 this is 'failed'.
484 =cut
486 sub _default_unimarc_charconv_to_utf8 {
487 my $marc_record = shift;
488 my $marc_flavour = shift;
490 my $trial_marc8 = $marc_record->clone();
491 my @all_errors = ();
492 my @errors = _marc_iso5426_to_utf8($trial_marc8, $marc_flavour);
493 unless (@errors) {
494 return $trial_marc8, 'iso-5426';
496 push @all_errors, @errors;
498 my $trial_8859_1 = $marc_record->clone();
499 @errors = _marc_to_utf8_via_text_iconv($trial_8859_1, $marc_flavour, 'iso-8859-1');
500 unless (@errors) {
501 return $trial_8859_1, 'iso-8859-1';
503 push @all_errors, @errors;
505 my $default_converted = $marc_record->clone();
506 _marc_to_utf8_replacement_char($default_converted, $marc_flavour);
507 return $default_converted, 'failed', \@all_errors;
510 =head2 _marc_marc8_to_utf8
512 my @errors = _marc_marc8_to_utf8($marc_record, $marc_flavour, $source_encoding);
514 Convert a C<MARC::Record> to UTF-8 in-place from MARC-8.
515 If the conversion fails for some reason, an
516 appropriate messages will be placed in the returned
517 C<@errors> array.
519 =cut
521 sub _marc_marc8_to_utf8 {
522 my $marc_record = shift;
523 my $marc_flavour = shift;
525 my $prev_ignore = MARC::Charset->ignore_errors();
526 MARC::Charset->ignore_errors(1);
528 # trap warnings raised by MARC::Charset
529 my @errors = ();
530 local $SIG{__WARN__} = sub {
531 my $msg = $_[0];
532 if ($msg =~ /MARC.Charset/) {
533 # FIXME - purpose of this regexp is to strip out the
534 # line reference to MARC/Charset.pm, but as it
535 # exists probably won't work quite on Windows --
536 # some sort of minimal-bunch back-tracking RE
537 # would be helpful here
538 $msg =~ s/at [\/].*?.MARC.Charset\.pm line \d+\.\n$//;
539 push @errors, $msg;
540 } else {
541 # if warning doesn't come from MARC::Charset, just
542 # pass it on
543 warn $msg;
547 foreach my $field ($marc_record->fields()) {
548 if ($field->is_control_field()) {
549 ; # do nothing -- control fields should not contain non-ASCII characters
550 } else {
551 my @converted_subfields;
552 foreach my $subfield ($field->subfields()) {
553 my $utf8sf = MARC::Charset::marc8_to_utf8($subfield->[1]);
554 unless (IsStringUTF8ish($utf8sf)) {
555 # Because of a bug in MARC::Charset 0.98, if the string
556 # has (a) one or more diacritics that (b) are only in character positions
557 # 128 to 255 inclusive, the resulting converted string is not in
558 # UTF-8, but the legacy 8-bit encoding (e.g., ISO-8859-1). If that
559 # occurs, upgrade the string in place. Moral of the story seems to be
560 # that pack("U", ...) is better than chr(...) if you need to guarantee
561 # that the resulting string is UTF-8.
562 utf8::upgrade($utf8sf);
564 push @converted_subfields, $subfield->[0], $utf8sf;
567 $field->replace_with(MARC::Field->new(
568 $field->tag(), $field->indicator(1), $field->indicator(2),
569 @converted_subfields)
574 MARC::Charset->ignore_errors($prev_ignore);
576 SetMarcUnicodeFlag($marc_record, $marc_flavour);
578 return @errors;
581 =head2 _marc_iso5426_to_utf8
583 my @errors = _marc_iso5426_to_utf8($marc_record, $marc_flavour, $source_encoding);
585 Convert a C<MARC::Record> to UTF-8 in-place from ISO-5426.
586 If the conversion fails for some reason, an
587 appropriate messages will be placed in the returned
588 C<@errors> array.
590 FIXME - is ISO-5426 equivalent enough to MARC-8
591 that C<MARC::Charset> can be used instead?
593 =cut
595 sub _marc_iso5426_to_utf8 {
596 my $marc_record = shift;
597 my $marc_flavour = shift;
599 my @errors = ();
601 foreach my $field ($marc_record->fields()) {
602 if ($field->is_control_field()) {
603 ; # do nothing -- control fields should not contain non-ASCII characters
604 } else {
605 my @converted_subfields;
606 foreach my $subfield ($field->subfields()) {
607 my $utf8sf = char_decode5426($subfield->[1]);
608 push @converted_subfields, $subfield->[0], $utf8sf;
611 $field->replace_with(MARC::Field->new(
612 $field->tag(), $field->indicator(1), $field->indicator(2),
613 @converted_subfields)
618 SetMarcUnicodeFlag($marc_record, $marc_flavour);
620 return @errors;
623 =head2 _marc_to_utf8_via_text_iconv
625 my @errors = _marc_to_utf8_via_text_iconv($marc_record, $marc_flavour, $source_encoding);
627 Convert a C<MARC::Record> to UTF-8 in-place using the
628 C<Text::Iconv> CPAN module. Any source encoding accepted
629 by the user's iconv installation should work. If
630 the source encoding is not recognized on the user's
631 server or the conversion fails for some reason,
632 appropriate messages will be placed in the returned
633 C<@errors> array.
635 =cut
637 sub _marc_to_utf8_via_text_iconv {
638 my $marc_record = shift;
639 my $marc_flavour = shift;
640 my $source_encoding = shift;
642 my @errors = ();
643 my $decoder;
644 eval { $decoder = Text::Iconv->new($source_encoding, 'utf8'); };
645 if ($@) {
646 push @errors, "Could not initialze $source_encoding => utf8 converter: $@";
647 return @errors;
650 my $prev_raise_error = Text::Iconv->raise_error();
651 Text::Iconv->raise_error(1);
653 foreach my $field ($marc_record->fields()) {
654 if ($field->is_control_field()) {
655 ; # do nothing -- control fields should not contain non-ASCII characters
656 } else {
657 my @converted_subfields;
658 foreach my $subfield ($field->subfields()) {
659 my $converted_value;
660 my $conversion_ok = 1;
661 eval { $converted_value = $decoder->convert($subfield->[1]); };
662 if ($@) {
663 $conversion_ok = 0;
664 push @errors, $@;
665 } elsif (not defined $converted_value) {
666 $conversion_ok = 0;
667 push @errors, "Text::Iconv conversion failed - retval is " . $decoder->retval();
670 if ($conversion_ok) {
671 push @converted_subfields, $subfield->[0], $converted_value;
672 } else {
673 $converted_value = $subfield->[1];
674 $converted_value =~ s/[\200-\377]/\xef\xbf\xbd/g;
675 push @converted_subfields, $subfield->[0], $converted_value;
679 $field->replace_with(MARC::Field->new(
680 $field->tag(), $field->indicator(1), $field->indicator(2),
681 @converted_subfields)
686 SetMarcUnicodeFlag($marc_record, $marc_flavour);
687 Text::Iconv->raise_error($prev_raise_error);
689 return @errors;
692 =head2 _marc_to_utf8_replacement_char
694 _marc_to_utf8_replacement_char($marc_record, $marc_flavour);
696 Convert a C<MARC::Record> to UTF-8 in-place, adopting the
697 unsatisfactory method of replacing all non-ASCII (e.g.,
698 where the eight bit is set) octet with the Unicode
699 replacement character. This is meant as a last-ditch
700 method, and would be best used as part of a UI that
701 lets a cataloguer pick various character conversions
702 until he or she finds the right one.
704 =cut
706 sub _marc_to_utf8_replacement_char {
707 my $marc_record = shift;
708 my $marc_flavour = shift;
710 foreach my $field ($marc_record->fields()) {
711 if ($field->is_control_field()) {
712 ; # do nothing -- control fields should not contain non-ASCII characters
713 } else {
714 my @converted_subfields;
715 foreach my $subfield ($field->subfields()) {
716 my $value = $subfield->[1];
717 $value =~ s/[\200-\377]/\xef\xbf\xbd/g;
718 push @converted_subfields, $subfield->[0], $value;
721 $field->replace_with(MARC::Field->new(
722 $field->tag(), $field->indicator(1), $field->indicator(2),
723 @converted_subfields)
728 SetMarcUnicodeFlag($marc_record, $marc_flavour);
731 =head2 char_decode5426
733 my $utf8string = char_decode5426($iso_5426_string);
735 Converts a string from ISO-5426 to UTF-8.
737 =cut
740 my %chars;
741 $chars{0xb0}=0x0101;#3/0ayn[ain]
742 $chars{0xb1}=0x0623;#3/1alif/hamzah[alefwithhamzaabove]
743 #$chars{0xb2}=0x00e0;#'à';
744 $chars{0xb2}=0x00e0;#3/2leftlowsinglequotationmark
745 #$chars{0xb3}=0x00e7;#'ç';
746 $chars{0xb3}=0x00e7;#3/2leftlowsinglequotationmark
747 # $chars{0xb4}='è';
748 $chars{0xb4}=0x00e8;
749 $chars{0xbd}=0x02b9;
750 $chars{0xbe}=0x02ba;
751 # $chars{0xb5}='é';
752 $chars{0xb5}=0x00e9;
753 $chars{0x97}=0x003c;#3/2leftlowsinglequotationmark
754 $chars{0x98}=0x003e;#3/2leftlowsinglequotationmark
755 $chars{0xfa}=0x0153; #oe
756 $chars{0xea}=0x0152; #oe
757 $chars{0x81d1}=0x00b0;
759 ####
760 ## combined characters iso5426
762 $chars{0xc041}=0x1ea2; # capital a with hook above
763 $chars{0xc045}=0x1eba; # capital e with hook above
764 $chars{0xc049}=0x1ec8; # capital i with hook above
765 $chars{0xc04f}=0x1ece; # capital o with hook above
766 $chars{0xc055}=0x1ee6; # capital u with hook above
767 $chars{0xc059}=0x1ef6; # capital y with hook above
768 $chars{0xc061}=0x1ea3; # small a with hook above
769 $chars{0xc065}=0x1ebb; # small e with hook above
770 $chars{0xc069}=0x1ec9; # small i with hook above
771 $chars{0xc06f}=0x1ecf; # small o with hook above
772 $chars{0xc075}=0x1ee7; # small u with hook above
773 $chars{0xc079}=0x1ef7; # small y with hook above
775 # 4/1 grave accent
776 $chars{0xc141}=0x00c0; # capital a with grave accent
777 $chars{0xc145}=0x00c8; # capital e with grave accent
778 $chars{0xc149}=0x00cc; # capital i with grave accent
779 $chars{0xc14f}=0x00d2; # capital o with grave accent
780 $chars{0xc155}=0x00d9; # capital u with grave accent
781 $chars{0xc157}=0x1e80; # capital w with grave
782 $chars{0xc159}=0x1ef2; # capital y with grave
783 $chars{0xc161}=0x00e0; # small a with grave accent
784 $chars{0xc165}=0x00e8; # small e with grave accent
785 $chars{0xc169}=0x00ec; # small i with grave accent
786 $chars{0xc16f}=0x00f2; # small o with grave accent
787 $chars{0xc175}=0x00f9; # small u with grave accent
788 $chars{0xc177}=0x1e81; # small w with grave
789 $chars{0xc179}=0x1ef3; # small y with grave
790 # 4/2 acute accent
791 $chars{0xc241}=0x00c1; # capital a with acute accent
792 $chars{0xc243}=0x0106; # capital c with acute accent
793 $chars{0xc245}=0x00c9; # capital e with acute accent
794 $chars{0xc247}=0x01f4; # capital g with acute
795 $chars{0xc249}=0x00cd; # capital i with acute accent
796 $chars{0xc24b}=0x1e30; # capital k with acute
797 $chars{0xc24c}=0x0139; # capital l with acute accent
798 $chars{0xc24d}=0x1e3e; # capital m with acute
799 $chars{0xc24e}=0x0143; # capital n with acute accent
800 $chars{0xc24f}=0x00d3; # capital o with acute accent
801 $chars{0xc250}=0x1e54; # capital p with acute
802 $chars{0xc252}=0x0154; # capital r with acute accent
803 $chars{0xc253}=0x015a; # capital s with acute accent
804 $chars{0xc255}=0x00da; # capital u with acute accent
805 $chars{0xc257}=0x1e82; # capital w with acute
806 $chars{0xc259}=0x00dd; # capital y with acute accent
807 $chars{0xc25a}=0x0179; # capital z with acute accent
808 $chars{0xc261}=0x00e1; # small a with acute accent
809 $chars{0xc263}=0x0107; # small c with acute accent
810 $chars{0xc265}=0x00e9; # small e with acute accent
811 $chars{0xc267}=0x01f5; # small g with acute
812 $chars{0xc269}=0x00ed; # small i with acute accent
813 $chars{0xc26b}=0x1e31; # small k with acute
814 $chars{0xc26c}=0x013a; # small l with acute accent
815 $chars{0xc26d}=0x1e3f; # small m with acute
816 $chars{0xc26e}=0x0144; # small n with acute accent
817 $chars{0xc26f}=0x00f3; # small o with acute accent
818 $chars{0xc270}=0x1e55; # small p with acute
819 $chars{0xc272}=0x0155; # small r with acute accent
820 $chars{0xc273}=0x015b; # small s with acute accent
821 $chars{0xc275}=0x00fa; # small u with acute accent
822 $chars{0xc277}=0x1e83; # small w with acute
823 $chars{0xc279}=0x00fd; # small y with acute accent
824 $chars{0xc27a}=0x017a; # small z with acute accent
825 $chars{0xc2e1}=0x01fc; # capital ae with acute
826 $chars{0xc2f1}=0x01fd; # small ae with acute
827 # 4/3 circumflex accent
828 $chars{0xc341}=0x00c2; # capital a with circumflex accent
829 $chars{0xc343}=0x0108; # capital c with circumflex
830 $chars{0xc345}=0x00ca; # capital e with circumflex accent
831 $chars{0xc347}=0x011c; # capital g with circumflex
832 $chars{0xc348}=0x0124; # capital h with circumflex
833 $chars{0xc349}=0x00ce; # capital i with circumflex accent
834 $chars{0xc34a}=0x0134; # capital j with circumflex
835 $chars{0xc34f}=0x00d4; # capital o with circumflex accent
836 $chars{0xc353}=0x015c; # capital s with circumflex
837 $chars{0xc355}=0x00db; # capital u with circumflex
838 $chars{0xc357}=0x0174; # capital w with circumflex
839 $chars{0xc359}=0x0176; # capital y with circumflex
840 $chars{0xc35a}=0x1e90; # capital z with circumflex
841 $chars{0xc361}=0x00e2; # small a with circumflex accent
842 $chars{0xc363}=0x0109; # small c with circumflex
843 $chars{0xc365}=0x00ea; # small e with circumflex accent
844 $chars{0xc367}=0x011d; # small g with circumflex
845 $chars{0xc368}=0x0125; # small h with circumflex
846 $chars{0xc369}=0x00ee; # small i with circumflex accent
847 $chars{0xc36a}=0x0135; # small j with circumflex
848 $chars{0xc36e}=0x00f1; # small n with tilde
849 $chars{0xc36f}=0x00f4; # small o with circumflex accent
850 $chars{0xc373}=0x015d; # small s with circumflex
851 $chars{0xc375}=0x00fb; # small u with circumflex
852 $chars{0xc377}=0x0175; # small w with circumflex
853 $chars{0xc379}=0x0177; # small y with circumflex
854 $chars{0xc37a}=0x1e91; # small z with circumflex
855 # 4/4 tilde
856 $chars{0xc441}=0x00c3; # capital a with tilde
857 $chars{0xc445}=0x1ebc; # capital e with tilde
858 $chars{0xc449}=0x0128; # capital i with tilde
859 $chars{0xc44e}=0x00d1; # capital n with tilde
860 $chars{0xc44f}=0x00d5; # capital o with tilde
861 $chars{0xc455}=0x0168; # capital u with tilde
862 $chars{0xc456}=0x1e7c; # capital v with tilde
863 $chars{0xc459}=0x1ef8; # capital y with tilde
864 $chars{0xc461}=0x00e3; # small a with tilde
865 $chars{0xc465}=0x1ebd; # small e with tilde
866 $chars{0xc469}=0x0129; # small i with tilde
867 $chars{0xc46e}=0x00f1; # small n with tilde
868 $chars{0xc46f}=0x00f5; # small o with tilde
869 $chars{0xc475}=0x0169; # small u with tilde
870 $chars{0xc476}=0x1e7d; # small v with tilde
871 $chars{0xc479}=0x1ef9; # small y with tilde
872 # 4/5 macron
873 $chars{0xc541}=0x0100; # capital a with macron
874 $chars{0xc545}=0x0112; # capital e with macron
875 $chars{0xc547}=0x1e20; # capital g with macron
876 $chars{0xc549}=0x012a; # capital i with macron
877 $chars{0xc54f}=0x014c; # capital o with macron
878 $chars{0xc555}=0x016a; # capital u with macron
879 $chars{0xc561}=0x0101; # small a with macron
880 $chars{0xc565}=0x0113; # small e with macron
881 $chars{0xc567}=0x1e21; # small g with macron
882 $chars{0xc569}=0x012b; # small i with macron
883 $chars{0xc56f}=0x014d; # small o with macron
884 $chars{0xc575}=0x016b; # small u with macron
885 $chars{0xc572}=0x0159; # small r with macron
886 $chars{0xc5e1}=0x01e2; # capital ae with macron
887 $chars{0xc5f1}=0x01e3; # small ae with macron
888 # 4/6 breve
889 $chars{0xc641}=0x0102; # capital a with breve
890 $chars{0xc645}=0x0114; # capital e with breve
891 $chars{0xc647}=0x011e; # capital g with breve
892 $chars{0xc649}=0x012c; # capital i with breve
893 $chars{0xc64f}=0x014e; # capital o with breve
894 $chars{0xc655}=0x016c; # capital u with breve
895 $chars{0xc661}=0x0103; # small a with breve
896 $chars{0xc665}=0x0115; # small e with breve
897 $chars{0xc667}=0x011f; # small g with breve
898 $chars{0xc669}=0x012d; # small i with breve
899 $chars{0xc66f}=0x014f; # small o with breve
900 $chars{0xc675}=0x016d; # small u with breve
901 # 4/7 dot above
902 $chars{0xc7b0}=0x01e1; # Ain with dot above
903 $chars{0xc742}=0x1e02; # capital b with dot above
904 $chars{0xc743}=0x010a; # capital c with dot above
905 $chars{0xc744}=0x1e0a; # capital d with dot above
906 $chars{0xc745}=0x0116; # capital e with dot above
907 $chars{0xc746}=0x1e1e; # capital f with dot above
908 $chars{0xc747}=0x0120; # capital g with dot above
909 $chars{0xc748}=0x1e22; # capital h with dot above
910 $chars{0xc749}=0x0130; # capital i with dot above
911 $chars{0xc74d}=0x1e40; # capital m with dot above
912 $chars{0xc74e}=0x1e44; # capital n with dot above
913 $chars{0xc750}=0x1e56; # capital p with dot above
914 $chars{0xc752}=0x1e58; # capital r with dot above
915 $chars{0xc753}=0x1e60; # capital s with dot above
916 $chars{0xc754}=0x1e6a; # capital t with dot above
917 $chars{0xc757}=0x1e86; # capital w with dot above
918 $chars{0xc758}=0x1e8a; # capital x with dot above
919 $chars{0xc759}=0x1e8e; # capital y with dot above
920 $chars{0xc75a}=0x017b; # capital z with dot above
921 $chars{0xc761}=0x0227; # small b with dot above
922 $chars{0xc762}=0x1e03; # small b with dot above
923 $chars{0xc763}=0x010b; # small c with dot above
924 $chars{0xc764}=0x1e0b; # small d with dot above
925 $chars{0xc765}=0x0117; # small e with dot above
926 $chars{0xc766}=0x1e1f; # small f with dot above
927 $chars{0xc767}=0x0121; # small g with dot above
928 $chars{0xc768}=0x1e23; # small h with dot above
929 $chars{0xc76d}=0x1e41; # small m with dot above
930 $chars{0xc76e}=0x1e45; # small n with dot above
931 $chars{0xc770}=0x1e57; # small p with dot above
932 $chars{0xc772}=0x1e59; # small r with dot above
933 $chars{0xc773}=0x1e61; # small s with dot above
934 $chars{0xc774}=0x1e6b; # small t with dot above
935 $chars{0xc777}=0x1e87; # small w with dot above
936 $chars{0xc778}=0x1e8b; # small x with dot above
937 $chars{0xc779}=0x1e8f; # small y with dot above
938 $chars{0xc77a}=0x017c; # small z with dot above
939 # 4/8 trema, diaresis
940 $chars{0xc820}=0x00a8; # diaeresis
941 $chars{0xc841}=0x00c4; # capital a with diaeresis
942 $chars{0xc845}=0x00cb; # capital e with diaeresis
943 $chars{0xc848}=0x1e26; # capital h with diaeresis
944 $chars{0xc849}=0x00cf; # capital i with diaeresis
945 $chars{0xc84f}=0x00d6; # capital o with diaeresis
946 $chars{0xc855}=0x00dc; # capital u with diaeresis
947 $chars{0xc857}=0x1e84; # capital w with diaeresis
948 $chars{0xc858}=0x1e8c; # capital x with diaeresis
949 $chars{0xc859}=0x0178; # capital y with diaeresis
950 $chars{0xc861}=0x00e4; # small a with diaeresis
951 $chars{0xc865}=0x00eb; # small e with diaeresis
952 $chars{0xc868}=0x1e27; # small h with diaeresis
953 $chars{0xc869}=0x00ef; # small i with diaeresis
954 $chars{0xc86f}=0x00f6; # small o with diaeresis
955 $chars{0xc874}=0x1e97; # small t with diaeresis
956 $chars{0xc875}=0x00fc; # small u with diaeresis
957 $chars{0xc877}=0x1e85; # small w with diaeresis
958 $chars{0xc878}=0x1e8d; # small x with diaeresis
959 $chars{0xc879}=0x00ff; # small y with diaeresis
960 # 4/9 umlaut
961 $chars{0xc920}=0x00a8; # [diaeresis]
962 $chars{0xc961}=0x00e4; # a with umlaut
963 $chars{0xc965}=0x00eb; # e with umlaut
964 $chars{0xc969}=0x00ef; # i with umlaut
965 $chars{0xc96f}=0x00f6; # o with umlaut
966 $chars{0xc975}=0x00fc; # u with umlaut
967 # 4/10 circle above
968 $chars{0xca41}=0x00c5; # capital a with ring above
969 $chars{0xcaad}=0x016e; # capital u with ring above
970 $chars{0xca61}=0x00e5; # small a with ring above
971 $chars{0xca75}=0x016f; # small u with ring above
972 $chars{0xca77}=0x1e98; # small w with ring above
973 $chars{0xca79}=0x1e99; # small y with ring above
974 # 4/11 high comma off centre
975 # 4/12 inverted high comma centred
976 # 4/13 double acute accent
977 $chars{0xcd4f}=0x0150; # capital o with double acute
978 $chars{0xcd55}=0x0170; # capital u with double acute
979 $chars{0xcd6f}=0x0151; # small o with double acute
980 $chars{0xcd75}=0x0171; # small u with double acute
981 # 4/14 horn
982 $chars{0xce54}=0x01a0; # latin capital letter o with horn
983 $chars{0xce55}=0x01af; # latin capital letter u with horn
984 $chars{0xce74}=0x01a1; # latin small letter o with horn
985 $chars{0xce75}=0x01b0; # latin small letter u with horn
986 # 4/15 caron (hacek
987 $chars{0xcf41}=0x01cd; # capital a with caron
988 $chars{0xcf43}=0x010c; # capital c with caron
989 $chars{0xcf44}=0x010e; # capital d with caron
990 $chars{0xcf45}=0x011a; # capital e with caron
991 $chars{0xcf47}=0x01e6; # capital g with caron
992 $chars{0xcf49}=0x01cf; # capital i with caron
993 $chars{0xcf4b}=0x01e8; # capital k with caron
994 $chars{0xcf4c}=0x013d; # capital l with caron
995 $chars{0xcf4e}=0x0147; # capital n with caron
996 $chars{0xcf4f}=0x01d1; # capital o with caron
997 $chars{0xcf52}=0x0158; # capital r with caron
998 $chars{0xcf53}=0x0160; # capital s with caron
999 $chars{0xcf54}=0x0164; # capital t with caron
1000 $chars{0xcf55}=0x01d3; # capital u with caron
1001 $chars{0xcf5a}=0x017d; # capital z with caron
1002 $chars{0xcf61}=0x01ce; # small a with caron
1003 $chars{0xcf63}=0x010d; # small c with caron
1004 $chars{0xcf64}=0x010f; # small d with caron
1005 $chars{0xcf65}=0x011b; # small e with caron
1006 $chars{0xcf67}=0x01e7; # small g with caron
1007 $chars{0xcf69}=0x01d0; # small i with caron
1008 $chars{0xcf6a}=0x01f0; # small j with caron
1009 $chars{0xcf6b}=0x01e9; # small k with caron
1010 $chars{0xcf6c}=0x013e; # small l with caron
1011 $chars{0xcf6e}=0x0148; # small n with caron
1012 $chars{0xcf6f}=0x01d2; # small o with caron
1013 $chars{0xcf72}=0x0159; # small r with caron
1014 $chars{0xcf73}=0x0161; # small s with caron
1015 $chars{0xcf74}=0x0165; # small t with caron
1016 $chars{0xcf75}=0x01d4; # small u with caron
1017 $chars{0xcf7a}=0x017e; # small z with caron
1018 # 5/0 cedilla
1019 $chars{0xd020}=0x00b8; # cedilla
1020 $chars{0xd043}=0x00c7; # capital c with cedilla
1021 $chars{0xd044}=0x1e10; # capital d with cedilla
1022 $chars{0xd047}=0x0122; # capital g with cedilla
1023 $chars{0xd048}=0x1e28; # capital h with cedilla
1024 $chars{0xd04b}=0x0136; # capital k with cedilla
1025 $chars{0xd04c}=0x013b; # capital l with cedilla
1026 $chars{0xd04e}=0x0145; # capital n with cedilla
1027 $chars{0xd052}=0x0156; # capital r with cedilla
1028 $chars{0xd053}=0x015e; # capital s with cedilla
1029 $chars{0xd054}=0x0162; # capital t with cedilla
1030 $chars{0xd063}=0x00e7; # small c with cedilla
1031 $chars{0xd064}=0x1e11; # small d with cedilla
1032 $chars{0xd065}=0x0119; # small e with cedilla
1033 $chars{0xd067}=0x0123; # small g with cedilla
1034 $chars{0xd068}=0x1e29; # small h with cedilla
1035 $chars{0xd06b}=0x0137; # small k with cedilla
1036 $chars{0xd06c}=0x013c; # small l with cedilla
1037 $chars{0xd06e}=0x0146; # small n with cedilla
1038 $chars{0xd072}=0x0157; # small r with cedilla
1039 $chars{0xd073}=0x015f; # small s with cedilla
1040 $chars{0xd074}=0x0163; # small t with cedilla
1041 # 5/1 rude
1042 # 5/2 hook to left
1043 # 5/3 ogonek (hook to right
1044 $chars{0xd320}=0x02db; # ogonek
1045 $chars{0xd341}=0x0104; # capital a with ogonek
1046 $chars{0xd345}=0x0118; # capital e with ogonek
1047 $chars{0xd349}=0x012e; # capital i with ogonek
1048 $chars{0xd34f}=0x01ea; # capital o with ogonek
1049 $chars{0xd355}=0x0172; # capital u with ogonek
1050 $chars{0xd361}=0x0105; # small a with ogonek
1051 $chars{0xd365}=0x0119; # small e with ogonek
1052 $chars{0xd369}=0x012f; # small i with ogonek
1053 $chars{0xd36f}=0x01eb; # small o with ogonek
1054 $chars{0xd375}=0x0173; # small u with ogonek
1055 # 5/4 circle below
1056 $chars{0xd441}=0x1e00; # capital a with ring below
1057 $chars{0xd461}=0x1e01; # small a with ring below
1058 # 5/5 half circle below
1059 $chars{0xf948}=0x1e2a; # capital h with breve below
1060 $chars{0xf968}=0x1e2b; # small h with breve below
1061 # 5/6 dot below
1062 $chars{0xd641}=0x1ea0; # capital a with dot below
1063 $chars{0xd642}=0x1e04; # capital b with dot below
1064 $chars{0xd644}=0x1e0c; # capital d with dot below
1065 $chars{0xd645}=0x1eb8; # capital e with dot below
1066 $chars{0xd648}=0x1e24; # capital h with dot below
1067 $chars{0xd649}=0x1eca; # capital i with dot below
1068 $chars{0xd64b}=0x1e32; # capital k with dot below
1069 $chars{0xd64c}=0x1e36; # capital l with dot below
1070 $chars{0xd64d}=0x1e42; # capital m with dot below
1071 $chars{0xd64e}=0x1e46; # capital n with dot below
1072 $chars{0xd64f}=0x1ecc; # capital o with dot below
1073 $chars{0xd652}=0x1e5a; # capital r with dot below
1074 $chars{0xd653}=0x1e62; # capital s with dot below
1075 $chars{0xd654}=0x1e6c; # capital t with dot below
1076 $chars{0xd655}=0x1ee4; # capital u with dot below
1077 $chars{0xd656}=0x1e7e; # capital v with dot below
1078 $chars{0xd657}=0x1e88; # capital w with dot below
1079 $chars{0xd659}=0x1ef4; # capital y with dot below
1080 $chars{0xd65a}=0x1e92; # capital z with dot below
1081 $chars{0xd661}=0x1ea1; # small a with dot below
1082 $chars{0xd662}=0x1e05; # small b with dot below
1083 $chars{0xd664}=0x1e0d; # small d with dot below
1084 $chars{0xd665}=0x1eb9; # small e with dot below
1085 $chars{0xd668}=0x1e25; # small h with dot below
1086 $chars{0xd669}=0x1ecb; # small i with dot below
1087 $chars{0xd66b}=0x1e33; # small k with dot below
1088 $chars{0xd66c}=0x1e37; # small l with dot below
1089 $chars{0xd66d}=0x1e43; # small m with dot below
1090 $chars{0xd66e}=0x1e47; # small n with dot below
1091 $chars{0xd66f}=0x1ecd; # small o with dot below
1092 $chars{0xd672}=0x1e5b; # small r with dot below
1093 $chars{0xd673}=0x1e63; # small s with dot below
1094 $chars{0xd674}=0x1e6d; # small t with dot below
1095 $chars{0xd675}=0x1ee5; # small u with dot below
1096 $chars{0xd676}=0x1e7f; # small v with dot below
1097 $chars{0xd677}=0x1e89; # small w with dot below
1098 $chars{0xd679}=0x1ef5; # small y with dot below
1099 $chars{0xd67a}=0x1e93; # small z with dot below
1100 # 5/7 double dot below
1101 $chars{0xd755}=0x1e72; # capital u with diaeresis below
1102 $chars{0xd775}=0x1e73; # small u with diaeresis below
1103 # 5/8 underline
1104 $chars{0xd820}=0x005f; # underline
1105 # 5/9 double underline
1106 $chars{0xd920}=0x2017; # double underline
1107 # 5/10 small low vertical bar
1108 $chars{0xda20}=0x02cc; #
1109 # 5/11 circumflex below
1110 # 5/12 (this position shall not be used)
1111 # 5/13 left half of ligature sign and of double tilde
1112 # 5/14 right half of ligature sign
1113 # 5/15 right half of double tilde
1114 # map {printf "%x :%x\n",$_,$chars{$_};}keys %chars;
1116 sub char_decode5426 {
1117 my ( $string) = @_;
1118 my $result;
1120 my @data = unpack("C*", $string);
1121 my @characters;
1122 my $length=scalar(@data);
1123 for (my $i = 0; $i < scalar(@data); $i++) {
1124 my $char= $data[$i];
1125 if ($char >= 0x00 && $char <= 0x7F){
1126 #IsAscii
1128 push @characters,$char unless ($char<0x02 ||$char== 0x0F);
1129 }elsif (($char >= 0xC0 && $char <= 0xDF)) {
1130 #Combined Char
1131 my $convchar ;
1132 if ($chars{$char*256+$data[$i+1]}) {
1133 $convchar= $chars{$char * 256 + $data[$i+1]};
1134 $i++;
1135 # printf "char %x $char, char to convert %x , converted %x\n",$char,$char * 256 + $data[$i - 1],$convchar;
1136 } elsif ($chars{$char}) {
1137 $convchar= $chars{$char};
1138 # printf "0xC char %x, converted %x\n",$char,$chars{$char};
1139 }else {
1140 $convchar=$char;
1142 push @characters,$convchar;
1143 } else {
1144 my $convchar;
1145 if ($chars{$char}) {
1146 $convchar= $chars{$char};
1147 # printf "char %x, converted %x\n",$char,$chars{$char};
1148 }else {
1149 # printf "char %x $char\n",$char;
1150 $convchar=$char;
1152 push @characters,$convchar;
1155 $result=pack "U*",@characters;
1156 # $result=~s/\x01//;
1157 # $result=~s/\x00//;
1158 $result=~s/\x0f//;
1159 $result=~s/\x1b.//;
1160 $result=~s/\x0e//;
1161 $result=~s/\x1b\x5b//;
1162 # map{printf "%x",$_} @characters;
1163 # printf "\n";
1164 return $result;
1170 =head1 AUTHOR
1172 Koha Development Team <http://koha-community.org/>
1174 Galen Charlton <galen.charlton@liblime.com>
1176 =cut