Bug 12164: (followup) MoveOrders description corrected
[koha.git] / C4 / Charset.pm
blob2950a71a8bd4ccd3b9078af061c6b9a34381c8af
1 package C4::Charset;
3 # Copyright (C) 2008 LibLime
5 # This file is part of Koha.
7 # Koha is free software; you can redistribute it and/or modify it under the
8 # terms of the GNU General Public License as published by the Free Software
9 # Foundation; either version 2 of the License, or (at your option) any later
10 # version.
12 # Koha is distributed in the hope that it will be useful, but WITHOUT ANY
13 # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
14 # A PARTICULAR PURPOSE. See the GNU General Public License for more details.
16 # You should have received a copy of the GNU General Public License along
17 # with Koha; if not, write to the Free Software Foundation, Inc.,
18 # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
20 use strict;
21 use warnings;
23 use MARC::Charset qw/marc8_to_utf8/;
24 use Text::Iconv;
25 use C4::Context;
26 use C4::Debug;
27 use Unicode::Normalize;
29 use vars qw($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
31 BEGIN {
32 # set the version for version checking
33 $VERSION = 3.07.00.049;
34 require Exporter;
35 @ISA = qw(Exporter);
36 @EXPORT = qw(
37 NormalizeString
38 IsStringUTF8ish
39 MarcToUTF8Record
40 SetUTF8Flag
41 SetMarcUnicodeFlag
42 StripNonXmlChars
43 nsb_clean
47 =encoding UTF-8
49 =head1 NAME
51 C4::Charset - utilities for handling character set conversions.
53 =head1 SYNOPSIS
55 use C4::Charset;
57 =head1 DESCRIPTION
59 This module contains routines for dealing with character set
60 conversions, particularly for MARC records.
62 A variety of character encodings are in use by various MARC
63 standards, and even more character encodings are used by
64 non-standard MARC records. The various MARC formats generally
65 do not do a good job of advertising a given record's character
66 encoding, and even when a record does advertise its encoding,
67 e.g., via the Leader/09, experience has shown that one cannot
68 trust it.
70 Ultimately, all MARC records are stored in Koha in UTF-8 and
71 must be converted from whatever the source character encoding is.
72 The goal of this module is to ensure that these conversions
73 take place accurately. When a character conversion cannot take
74 place, or at least not accurately, the module was provide
75 enough information to allow user-facing code to inform the user
76 on how to deal with the situation.
78 =cut
80 =head1 FUNCTIONS
82 =head2 IsStringUTF8ish
84 my $is_utf8 = IsStringUTF8ish($str);
86 Determines if C<$str> is valid UTF-8. This can mean
87 one of two things:
89 =over
91 =item *
93 The Perl UTF-8 flag is set and the string contains valid UTF-8.
95 =item *
97 The Perl UTF-8 flag is B<not> set, but the octets contain
98 valid UTF-8.
100 =back
102 The function is named C<IsStringUTF8ish> instead of C<IsStringUTF8>
103 because in one could be presented with a MARC blob that is
104 not actually in UTF-8 but whose sequence of octets appears to be
105 valid UTF-8. The rest of the MARC character conversion functions
106 will assume that this situation occur does not very often.
108 =cut
110 sub IsStringUTF8ish {
111 my $str = shift;
113 return 1 if utf8::is_utf8($str);
114 return utf8::decode($str);
117 =head2 SetUTF8Flag
119 my $marc_record = SetUTF8Flag($marc_record, $nfd);
121 This function sets the PERL UTF8 flag for data.
122 It is required when using new_from_usmarc
123 since MARC::File::USMARC does not handle PERL UTF8 setting.
124 When editing unicode marc records fields and subfields, you
125 would end up in double encoding without using this function.
127 If $nfd is set, string normalization will use NFD instead of NFC
129 FIXME
130 In my opinion, this function belongs to MARC::Record and not
131 to this package.
132 But since it handles charset, and MARC::Record, it finds its way in that package
134 =cut
136 sub SetUTF8Flag{
137 my ($record, $nfd)=@_;
138 return unless ($record && $record->fields());
139 foreach my $field ($record->fields()){
140 if ($field->tag()>=10){
141 my @subfields;
142 foreach my $subfield ($field->subfields()){
143 push @subfields,($$subfield[0],NormalizeString($$subfield[1],$nfd));
145 eval {
146 my $newfield=MARC::Field->new(
147 $field->tag(),
148 $field->indicator(1),
149 $field->indicator(2),
150 @subfields
152 $field->replace_with($newfield);
154 warn "ERROR occurred in SetUTF8Flag $@" if $@;
159 =head2 NormalizeString
161 my $normalized_string=NormalizeString($string,$nfd,$transform);
163 Given a string
164 nfd : If you want to set NFD and not NFC
165 transform : If you expect all the signs to be removed
167 Sets the PERL UTF8 Flag on your initial data if need be
168 and applies cleaning if required
170 Returns a utf8 NFC normalized string
172 Sample code :
173 my $string=NormalizeString ("l'ornithoptère");
174 #results into ornithoptère in NFC form and sets UTF8 Flag
176 =cut
179 sub NormalizeString{
180 my ($string,$nfd,$transform)=@_;
181 utf8::decode($string) unless (utf8::is_utf8($string));
182 if ($nfd){
183 $string= NFD($string);
185 else {
186 $string=NFC($string);
188 if ($transform){
189 $string=~s/\<|\>|\^|\;|\.|\?|,|\-|\(|\)|\[|\]|\{|\}|\$|\%|\!|\*|\:|\\|\/|\&|\"|\'/ /g;
190 #removing one letter words "d'" "l'" was changed into "d " "l "
191 $string=~s/\b\S\b//g;
192 $string=~s/\s+$//g;
194 return $string;
197 =head2 MarcToUTF8Record
199 ($marc_record, $converted_from, $errors_arrayref) = MarcToUTF8Record($marc_blob,
200 $marc_flavour, [, $source_encoding]);
202 Given a MARC blob or a C<MARC::Record>, the MARC flavour, and an
203 optional source encoding, return a C<MARC::Record> that is
204 converted to UTF-8.
206 The returned C<$marc_record> is guaranteed to be in valid UTF-8, but
207 is not guaranteed to have been converted correctly. Specifically,
208 if C<$converted_from> is 'failed', the MARC record returned failed
209 character conversion and had each of its non-ASCII octets changed
210 to the Unicode replacement character.
212 If the source encoding was not specified, this routine will
213 try to guess it; the character encoding used for a successful
214 conversion is returned in C<$converted_from>.
216 =cut
218 sub MarcToUTF8Record {
219 my $marc = shift;
220 my $marc_flavour = shift;
221 my $source_encoding = shift;
222 my $marc_record;
223 my $marc_blob_is_utf8 = 0;
224 if (ref($marc) eq 'MARC::Record') {
225 my $marc_blob = $marc->as_usmarc();
226 $marc_blob_is_utf8 = IsStringUTF8ish($marc_blob);
227 $marc_record = $marc;
228 } else {
229 # dealing with a MARC blob
231 # remove any ersatz whitespace from the beginning and
232 # end of the MARC blob -- these can creep into MARC
233 # files produced by several sources -- caller really
234 # should be doing this, however
235 $marc =~ s/^\s+//;
236 $marc =~ s/\s+$//;
237 $marc_blob_is_utf8 = IsStringUTF8ish($marc);
238 eval {
239 $marc_record = MARC::Record->new_from_usmarc($marc);
241 if ($@) {
242 # if we fail the first time, one likely problem
243 # is that we have a MARC21 record that says that it's
244 # UTF-8 (Leader/09 = 'a') but contains non-UTF-8 characters.
245 # We'll try parsing it again.
246 substr($marc, 9, 1) = ' ';
247 eval {
248 $marc_record = MARC::Record->new_from_usmarc($marc);
250 if ($@) {
251 # it's hopeless; return an empty MARC::Record
252 return MARC::Record->new(), 'failed', ['could not parse MARC blob'];
257 # If we do not know the source encoding, try some guesses
258 # as follows:
259 # 1. Record is UTF-8 already.
260 # 2. If MARC flavor is MARC21 or NORMARC, then
261 # a. record is MARC-8
262 # b. record is ISO-8859-1
263 # 3. If MARC flavor is UNIMARC, then
264 if (not defined $source_encoding) {
265 if ($marc_blob_is_utf8) {
266 # note that for MARC21/NORMARC we are not bothering to check
267 # if the Leader/09 is set to 'a' or not -- because
268 # of problems with various ILSs (including Koha in the
269 # past, alas), this just is not trustworthy.
270 SetMarcUnicodeFlag($marc_record, $marc_flavour);
271 return $marc_record, 'UTF-8', [];
272 } else {
273 if ($marc_flavour eq 'MARC21' || $marc_flavour eq 'NORMARC') {
274 return _default_marc21_charconv_to_utf8($marc_record, $marc_flavour);
275 } elsif ($marc_flavour =~/UNIMARC/) {
276 return _default_unimarc_charconv_to_utf8($marc_record, $marc_flavour);
277 } else {
278 return _default_marc21_charconv_to_utf8($marc_record, $marc_flavour);
281 } else {
282 # caller knows the character encoding
283 my $original_marc_record = $marc_record->clone();
284 my @errors;
285 if ($source_encoding =~ /utf-?8/i) {
286 if ($marc_blob_is_utf8) {
287 SetMarcUnicodeFlag($marc_record, $marc_flavour);
288 return $marc_record, 'UTF-8', [];
289 } else {
290 push @errors, 'specified UTF-8 => UTF-8 conversion, but record is not in UTF-8';
292 } elsif ($source_encoding =~ /marc-?8/i) {
293 @errors = _marc_marc8_to_utf8($marc_record, $marc_flavour);
294 } elsif ($source_encoding =~ /5426/) {
295 @errors = _marc_iso5426_to_utf8($marc_record, $marc_flavour);
296 } else {
297 # assume any other character encoding is for Text::Iconv
298 @errors = _marc_to_utf8_via_text_iconv($marc_record, $marc_flavour, $source_encoding);
301 if (@errors) {
302 _marc_to_utf8_replacement_char($original_marc_record, $marc_flavour);
303 return $original_marc_record, 'failed', \@errors;
304 } else {
305 return $marc_record, $source_encoding, [];
311 =head2 SetMarcUnicodeFlag
313 SetMarcUnicodeFlag($marc_record, $marc_flavour);
315 Set both the internal MARC::Record encoding flag
316 and the appropriate Leader/09 (MARC21) or
317 100/26-29 (UNIMARC) to indicate that the record
318 is in UTF-8. Note that this does B<not> do
319 any actual character conversion.
321 =cut
323 sub SetMarcUnicodeFlag {
324 my $marc_record = shift;
325 my $marc_flavour = shift; # || C4::Context->preference("marcflavour");
327 $marc_record->encoding('UTF-8');
328 if ($marc_flavour eq 'MARC21' || $marc_flavour eq 'NORMARC') {
329 my $leader = $marc_record->leader();
330 substr($leader, 9, 1) = 'a';
331 $marc_record->leader($leader);
332 } elsif ($marc_flavour =~/UNIMARC/) {
333 my $defaultlanguage = C4::Context->preference("UNIMARCField100Language");
334 $defaultlanguage = "fre" if (!$defaultlanguage || length($defaultlanguage) != 3);
335 my $string;
336 my ($subflength,$encodingposition)=($marc_flavour=~/AUTH/?(21,12):(36,25));
337 $string=$marc_record->subfield( 100, "a" );
338 if (defined $string && length($string)==$subflength) {
339 $string = substr $string, 0,$subflength if (length($string)>$subflength);
341 else {
342 $string = POSIX::strftime( "%Y%m%d", localtime );
343 $string =~ s/\-//g;
344 $string = sprintf( "%-*s", $subflength, $string );
345 substr ( $string, ($encodingposition - 3), 3, $defaultlanguage);
347 substr( $string, $encodingposition, 3, "y50" );
348 if ( $marc_record->subfield( 100, "a" ) ) {
349 $marc_record->field('100')->update(a=>$string);
351 else {
352 $marc_record->insert_grouped_field(
353 MARC::Field->new( 100, '', '', "a" => $string ) );
355 $debug && warn "encodage: ", substr( $marc_record->subfield(100, 'a'), $encodingposition, 3 );
356 } else {
357 warn "Unrecognized marcflavour: $marc_flavour";
361 =head2 StripNonXmlChars
363 my $new_str = StripNonXmlChars($old_str);
365 Given a string, return a copy with the
366 characters that are illegal in XML
367 removed.
369 This function exists to work around a problem
370 that can occur with badly-encoded MARC records.
371 Specifically, if a UTF-8 MARC record also
372 has excape (\x1b) characters, MARC::File::XML
373 will let the escape characters pass through
374 when as_xml() or as_xml_record() is called. The
375 problem is that the escape character is not
376 legal in well-formed XML documents, so when
377 MARC::File::XML attempts to parse such a record,
378 the XML parser will fail.
380 Stripping such characters will allow a
381 MARC::Record->new_from_xml()
382 to work, at the possible risk of some data loss.
384 =cut
386 sub StripNonXmlChars {
387 my $str = shift;
388 if (!defined($str) || $str eq ""){
389 return "";
391 $str =~ s/[^\x09\x0A\x0D\x{0020}-\x{D7FF}\x{E000}-\x{FFFD}\x{10000}-\x{10FFFF}]//g;
392 return $str;
397 =head2 nsb_clean
399 =over 4
401 nsb_clean($string);
403 =back
405 Removes Non Sorting Block characters
407 =cut
408 sub nsb_clean {
409 my $NSB = '\x88' ; # NSB : begin Non Sorting Block
410 my $NSE = '\x89' ; # NSE : Non Sorting Block end
411 my $NSB2 = '\x98' ; # NSB : begin Non Sorting Block
412 my $NSE2 = '\x9C' ; # NSE : Non Sorting Block end
413 my $C2 = '\xC2' ; # What is this char ? It is sometimes left by the regexp after removing NSB / NSE
415 # handles non sorting blocks
416 my ($string) = @_ ;
417 $_ = $string ;
418 s/$NSB//g ;
419 s/$NSE//g ;
420 s/$NSB2//g ;
421 s/$NSE2//g ;
422 s/$C2//g ;
423 $string = $_ ;
425 return($string) ;
429 =head1 INTERNAL FUNCTIONS
431 =head2 _default_marc21_charconv_to_utf8
433 my ($new_marc_record, $guessed_charset) = _default_marc21_charconv_to_utf8($marc_record);
435 Converts a C<MARC::Record> of unknown character set to UTF-8,
436 first by trying a MARC-8 to UTF-8 conversion, then ISO-8859-1
437 to UTF-8, then a default conversion that replaces each non-ASCII
438 character with the replacement character.
440 The C<$guessed_charset> return value contains the character set
441 that resulted in a conversion to valid UTF-8; note that
442 if the MARC-8 and ISO-8859-1 conversions failed, the value of
443 this is 'failed'.
445 =cut
447 sub _default_marc21_charconv_to_utf8 {
448 my $marc_record = shift;
449 my $marc_flavour = shift;
451 my $trial_marc8 = $marc_record->clone();
452 my @all_errors = ();
453 my @errors = _marc_marc8_to_utf8($trial_marc8, $marc_flavour);
454 unless (@errors) {
455 return $trial_marc8, 'MARC-8', [];
457 push @all_errors, @errors;
459 my $trial_8859_1 = $marc_record->clone();
460 @errors = _marc_to_utf8_via_text_iconv($trial_8859_1, $marc_flavour, 'iso-8859-1');
461 unless (@errors) {
462 return $trial_8859_1, 'iso-8859-1', []; # note -- we could return \@all_errors
463 # instead if we wanted to report details
464 # of the failed attempt at MARC-8 => UTF-8
466 push @all_errors, @errors;
468 my $default_converted = $marc_record->clone();
469 _marc_to_utf8_replacement_char($default_converted, $marc_flavour);
470 return $default_converted, 'failed', \@all_errors;
473 =head2 _default_unimarc_charconv_to_utf8
475 my ($new_marc_record, $guessed_charset) = _default_unimarc_charconv_to_utf8($marc_record);
477 Converts a C<MARC::Record> of unknown character set to UTF-8,
478 first by trying a ISO-5426 to UTF-8 conversion, then ISO-8859-1
479 to UTF-8, then a default conversion that replaces each non-ASCII
480 character with the replacement character.
482 The C<$guessed_charset> return value contains the character set
483 that resulted in a conversion to valid UTF-8; note that
484 if the MARC-8 and ISO-8859-1 conversions failed, the value of
485 this is 'failed'.
487 =cut
489 sub _default_unimarc_charconv_to_utf8 {
490 my $marc_record = shift;
491 my $marc_flavour = shift;
493 my $trial_marc8 = $marc_record->clone();
494 my @all_errors = ();
495 my @errors = _marc_iso5426_to_utf8($trial_marc8, $marc_flavour);
496 unless (@errors) {
497 return $trial_marc8, 'iso-5426';
499 push @all_errors, @errors;
501 my $trial_8859_1 = $marc_record->clone();
502 @errors = _marc_to_utf8_via_text_iconv($trial_8859_1, $marc_flavour, 'iso-8859-1');
503 unless (@errors) {
504 return $trial_8859_1, 'iso-8859-1';
506 push @all_errors, @errors;
508 my $default_converted = $marc_record->clone();
509 _marc_to_utf8_replacement_char($default_converted, $marc_flavour);
510 return $default_converted, 'failed', \@all_errors;
513 =head2 _marc_marc8_to_utf8
515 my @errors = _marc_marc8_to_utf8($marc_record, $marc_flavour, $source_encoding);
517 Convert a C<MARC::Record> to UTF-8 in-place from MARC-8.
518 If the conversion fails for some reason, an
519 appropriate messages will be placed in the returned
520 C<@errors> array.
522 =cut
524 sub _marc_marc8_to_utf8 {
525 my $marc_record = shift;
526 my $marc_flavour = shift;
528 my $prev_ignore = MARC::Charset->ignore_errors();
529 MARC::Charset->ignore_errors(1);
531 # trap warnings raised by MARC::Charset
532 my @errors = ();
533 local $SIG{__WARN__} = sub {
534 my $msg = $_[0];
535 if ($msg =~ /MARC.Charset/) {
536 # FIXME - purpose of this regexp is to strip out the
537 # line reference to MARC/Charset.pm, but as it
538 # exists probably won't work quite on Windows --
539 # some sort of minimal-bunch back-tracking RE
540 # would be helpful here
541 $msg =~ s/at [\/].*?.MARC.Charset\.pm line \d+\.\n$//;
542 push @errors, $msg;
543 } else {
544 # if warning doesn't come from MARC::Charset, just
545 # pass it on
546 warn $msg;
550 foreach my $field ($marc_record->fields()) {
551 if ($field->is_control_field()) {
552 ; # do nothing -- control fields should not contain non-ASCII characters
553 } else {
554 my @converted_subfields;
555 foreach my $subfield ($field->subfields()) {
556 my $utf8sf = MARC::Charset::marc8_to_utf8($subfield->[1]);
557 unless (IsStringUTF8ish($utf8sf)) {
558 # Because of a bug in MARC::Charset 0.98, if the string
559 # has (a) one or more diacritics that (b) are only in character positions
560 # 128 to 255 inclusive, the resulting converted string is not in
561 # UTF-8, but the legacy 8-bit encoding (e.g., ISO-8859-1). If that
562 # occurs, upgrade the string in place. Moral of the story seems to be
563 # that pack("U", ...) is better than chr(...) if you need to guarantee
564 # that the resulting string is UTF-8.
565 utf8::upgrade($utf8sf);
567 push @converted_subfields, $subfield->[0], $utf8sf;
570 $field->replace_with(MARC::Field->new(
571 $field->tag(), $field->indicator(1), $field->indicator(2),
572 @converted_subfields)
577 MARC::Charset->ignore_errors($prev_ignore);
579 SetMarcUnicodeFlag($marc_record, $marc_flavour);
581 return @errors;
584 =head2 _marc_iso5426_to_utf8
586 my @errors = _marc_iso5426_to_utf8($marc_record, $marc_flavour, $source_encoding);
588 Convert a C<MARC::Record> to UTF-8 in-place from ISO-5426.
589 If the conversion fails for some reason, an
590 appropriate messages will be placed in the returned
591 C<@errors> array.
593 FIXME - is ISO-5426 equivalent enough to MARC-8
594 that C<MARC::Charset> can be used instead?
596 =cut
598 sub _marc_iso5426_to_utf8 {
599 my $marc_record = shift;
600 my $marc_flavour = shift;
602 my @errors = ();
604 foreach my $field ($marc_record->fields()) {
605 if ($field->is_control_field()) {
606 ; # do nothing -- control fields should not contain non-ASCII characters
607 } else {
608 my @converted_subfields;
609 foreach my $subfield ($field->subfields()) {
610 my $utf8sf = char_decode5426($subfield->[1]);
611 push @converted_subfields, $subfield->[0], $utf8sf;
614 $field->replace_with(MARC::Field->new(
615 $field->tag(), $field->indicator(1), $field->indicator(2),
616 @converted_subfields)
621 SetMarcUnicodeFlag($marc_record, $marc_flavour);
623 return @errors;
626 =head2 _marc_to_utf8_via_text_iconv
628 my @errors = _marc_to_utf8_via_text_iconv($marc_record, $marc_flavour, $source_encoding);
630 Convert a C<MARC::Record> to UTF-8 in-place using the
631 C<Text::Iconv> CPAN module. Any source encoding accepted
632 by the user's iconv installation should work. If
633 the source encoding is not recognized on the user's
634 server or the conversion fails for some reason,
635 appropriate messages will be placed in the returned
636 C<@errors> array.
638 =cut
640 sub _marc_to_utf8_via_text_iconv {
641 my $marc_record = shift;
642 my $marc_flavour = shift;
643 my $source_encoding = shift;
645 my @errors = ();
646 my $decoder;
647 eval { $decoder = Text::Iconv->new($source_encoding, 'utf8'); };
648 if ($@) {
649 push @errors, "Could not initialze $source_encoding => utf8 converter: $@";
650 return @errors;
653 my $prev_raise_error = Text::Iconv->raise_error();
654 Text::Iconv->raise_error(1);
656 foreach my $field ($marc_record->fields()) {
657 if ($field->is_control_field()) {
658 ; # do nothing -- control fields should not contain non-ASCII characters
659 } else {
660 my @converted_subfields;
661 foreach my $subfield ($field->subfields()) {
662 my $converted_value;
663 my $conversion_ok = 1;
664 eval { $converted_value = $decoder->convert($subfield->[1]); };
665 if ($@) {
666 $conversion_ok = 0;
667 push @errors, $@;
668 } elsif (not defined $converted_value) {
669 $conversion_ok = 0;
670 push @errors, "Text::Iconv conversion failed - retval is " . $decoder->retval();
673 if ($conversion_ok) {
674 push @converted_subfields, $subfield->[0], $converted_value;
675 } else {
676 $converted_value = $subfield->[1];
677 $converted_value =~ s/[\200-\377]/\xef\xbf\xbd/g;
678 push @converted_subfields, $subfield->[0], $converted_value;
682 $field->replace_with(MARC::Field->new(
683 $field->tag(), $field->indicator(1), $field->indicator(2),
684 @converted_subfields)
689 SetMarcUnicodeFlag($marc_record, $marc_flavour);
690 Text::Iconv->raise_error($prev_raise_error);
692 return @errors;
695 =head2 _marc_to_utf8_replacement_char
697 _marc_to_utf8_replacement_char($marc_record, $marc_flavour);
699 Convert a C<MARC::Record> to UTF-8 in-place, adopting the
700 unsatisfactory method of replacing all non-ASCII (e.g.,
701 where the eight bit is set) octet with the Unicode
702 replacement character. This is meant as a last-ditch
703 method, and would be best used as part of a UI that
704 lets a cataloguer pick various character conversions
705 until he or she finds the right one.
707 =cut
709 sub _marc_to_utf8_replacement_char {
710 my $marc_record = shift;
711 my $marc_flavour = shift;
713 foreach my $field ($marc_record->fields()) {
714 if ($field->is_control_field()) {
715 ; # do nothing -- control fields should not contain non-ASCII characters
716 } else {
717 my @converted_subfields;
718 foreach my $subfield ($field->subfields()) {
719 my $value = $subfield->[1];
720 $value =~ s/[\200-\377]/\xef\xbf\xbd/g;
721 push @converted_subfields, $subfield->[0], $value;
724 $field->replace_with(MARC::Field->new(
725 $field->tag(), $field->indicator(1), $field->indicator(2),
726 @converted_subfields)
731 SetMarcUnicodeFlag($marc_record, $marc_flavour);
734 =head2 char_decode5426
736 my $utf8string = char_decode5426($iso_5426_string);
738 Converts a string from ISO-5426 to UTF-8.
740 =cut
743 my %chars;
744 $chars{0xb0}=0x0101;#3/0ayn[ain]
745 $chars{0xb1}=0x0623;#3/1alif/hamzah[alefwithhamzaabove]
746 #$chars{0xb2}=0x00e0;#'à';
747 $chars{0xb2}=0x00e0;#3/2leftlowsinglequotationmark
748 #$chars{0xb3}=0x00e7;#'ç';
749 $chars{0xb3}=0x00e7;#3/2leftlowsinglequotationmark
750 # $chars{0xb4}='è';
751 $chars{0xb4}=0x00e8;
752 $chars{0xbd}=0x02b9;
753 $chars{0xbe}=0x02ba;
754 # $chars{0xb5}='é';
755 $chars{0xb5}=0x00e9;
756 $chars{0x97}=0x003c;#3/2leftlowsinglequotationmark
757 $chars{0x98}=0x003e;#3/2leftlowsinglequotationmark
758 $chars{0xfa}=0x0153; #oe
759 $chars{0xea}=0x0152; #oe
760 $chars{0x81d1}=0x00b0;
762 ####
763 ## combined characters iso5426
765 $chars{0xc041}=0x1ea2; # capital a with hook above
766 $chars{0xc045}=0x1eba; # capital e with hook above
767 $chars{0xc049}=0x1ec8; # capital i with hook above
768 $chars{0xc04f}=0x1ece; # capital o with hook above
769 $chars{0xc055}=0x1ee6; # capital u with hook above
770 $chars{0xc059}=0x1ef6; # capital y with hook above
771 $chars{0xc061}=0x1ea3; # small a with hook above
772 $chars{0xc065}=0x1ebb; # small e with hook above
773 $chars{0xc069}=0x1ec9; # small i with hook above
774 $chars{0xc06f}=0x1ecf; # small o with hook above
775 $chars{0xc075}=0x1ee7; # small u with hook above
776 $chars{0xc079}=0x1ef7; # small y with hook above
778 # 4/1 grave accent
779 $chars{0xc141}=0x00c0; # capital a with grave accent
780 $chars{0xc145}=0x00c8; # capital e with grave accent
781 $chars{0xc149}=0x00cc; # capital i with grave accent
782 $chars{0xc14f}=0x00d2; # capital o with grave accent
783 $chars{0xc155}=0x00d9; # capital u with grave accent
784 $chars{0xc157}=0x1e80; # capital w with grave
785 $chars{0xc159}=0x1ef2; # capital y with grave
786 $chars{0xc161}=0x00e0; # small a with grave accent
787 $chars{0xc165}=0x00e8; # small e with grave accent
788 $chars{0xc169}=0x00ec; # small i with grave accent
789 $chars{0xc16f}=0x00f2; # small o with grave accent
790 $chars{0xc175}=0x00f9; # small u with grave accent
791 $chars{0xc177}=0x1e81; # small w with grave
792 $chars{0xc179}=0x1ef3; # small y with grave
793 # 4/2 acute accent
794 $chars{0xc241}=0x00c1; # capital a with acute accent
795 $chars{0xc243}=0x0106; # capital c with acute accent
796 $chars{0xc245}=0x00c9; # capital e with acute accent
797 $chars{0xc247}=0x01f4; # capital g with acute
798 $chars{0xc249}=0x00cd; # capital i with acute accent
799 $chars{0xc24b}=0x1e30; # capital k with acute
800 $chars{0xc24c}=0x0139; # capital l with acute accent
801 $chars{0xc24d}=0x1e3e; # capital m with acute
802 $chars{0xc24e}=0x0143; # capital n with acute accent
803 $chars{0xc24f}=0x00d3; # capital o with acute accent
804 $chars{0xc250}=0x1e54; # capital p with acute
805 $chars{0xc252}=0x0154; # capital r with acute accent
806 $chars{0xc253}=0x015a; # capital s with acute accent
807 $chars{0xc255}=0x00da; # capital u with acute accent
808 $chars{0xc257}=0x1e82; # capital w with acute
809 $chars{0xc259}=0x00dd; # capital y with acute accent
810 $chars{0xc25a}=0x0179; # capital z with acute accent
811 $chars{0xc261}=0x00e1; # small a with acute accent
812 $chars{0xc263}=0x0107; # small c with acute accent
813 $chars{0xc265}=0x00e9; # small e with acute accent
814 $chars{0xc267}=0x01f5; # small g with acute
815 $chars{0xc269}=0x00ed; # small i with acute accent
816 $chars{0xc26b}=0x1e31; # small k with acute
817 $chars{0xc26c}=0x013a; # small l with acute accent
818 $chars{0xc26d}=0x1e3f; # small m with acute
819 $chars{0xc26e}=0x0144; # small n with acute accent
820 $chars{0xc26f}=0x00f3; # small o with acute accent
821 $chars{0xc270}=0x1e55; # small p with acute
822 $chars{0xc272}=0x0155; # small r with acute accent
823 $chars{0xc273}=0x015b; # small s with acute accent
824 $chars{0xc275}=0x00fa; # small u with acute accent
825 $chars{0xc277}=0x1e83; # small w with acute
826 $chars{0xc279}=0x00fd; # small y with acute accent
827 $chars{0xc27a}=0x017a; # small z with acute accent
828 $chars{0xc2e1}=0x01fc; # capital ae with acute
829 $chars{0xc2f1}=0x01fd; # small ae with acute
830 # 4/3 circumflex accent
831 $chars{0xc341}=0x00c2; # capital a with circumflex accent
832 $chars{0xc343}=0x0108; # capital c with circumflex
833 $chars{0xc345}=0x00ca; # capital e with circumflex accent
834 $chars{0xc347}=0x011c; # capital g with circumflex
835 $chars{0xc348}=0x0124; # capital h with circumflex
836 $chars{0xc349}=0x00ce; # capital i with circumflex accent
837 $chars{0xc34a}=0x0134; # capital j with circumflex
838 $chars{0xc34f}=0x00d4; # capital o with circumflex accent
839 $chars{0xc353}=0x015c; # capital s with circumflex
840 $chars{0xc355}=0x00db; # capital u with circumflex
841 $chars{0xc357}=0x0174; # capital w with circumflex
842 $chars{0xc359}=0x0176; # capital y with circumflex
843 $chars{0xc35a}=0x1e90; # capital z with circumflex
844 $chars{0xc361}=0x00e2; # small a with circumflex accent
845 $chars{0xc363}=0x0109; # small c with circumflex
846 $chars{0xc365}=0x00ea; # small e with circumflex accent
847 $chars{0xc367}=0x011d; # small g with circumflex
848 $chars{0xc368}=0x0125; # small h with circumflex
849 $chars{0xc369}=0x00ee; # small i with circumflex accent
850 $chars{0xc36a}=0x0135; # small j with circumflex
851 $chars{0xc36e}=0x00f1; # small n with tilde
852 $chars{0xc36f}=0x00f4; # small o with circumflex accent
853 $chars{0xc373}=0x015d; # small s with circumflex
854 $chars{0xc375}=0x00fb; # small u with circumflex
855 $chars{0xc377}=0x0175; # small w with circumflex
856 $chars{0xc379}=0x0177; # small y with circumflex
857 $chars{0xc37a}=0x1e91; # small z with circumflex
858 # 4/4 tilde
859 $chars{0xc441}=0x00c3; # capital a with tilde
860 $chars{0xc445}=0x1ebc; # capital e with tilde
861 $chars{0xc449}=0x0128; # capital i with tilde
862 $chars{0xc44e}=0x00d1; # capital n with tilde
863 $chars{0xc44f}=0x00d5; # capital o with tilde
864 $chars{0xc455}=0x0168; # capital u with tilde
865 $chars{0xc456}=0x1e7c; # capital v with tilde
866 $chars{0xc459}=0x1ef8; # capital y with tilde
867 $chars{0xc461}=0x00e3; # small a with tilde
868 $chars{0xc465}=0x1ebd; # small e with tilde
869 $chars{0xc469}=0x0129; # small i with tilde
870 $chars{0xc46e}=0x00f1; # small n with tilde
871 $chars{0xc46f}=0x00f5; # small o with tilde
872 $chars{0xc475}=0x0169; # small u with tilde
873 $chars{0xc476}=0x1e7d; # small v with tilde
874 $chars{0xc479}=0x1ef9; # small y with tilde
875 # 4/5 macron
876 $chars{0xc541}=0x0100; # capital a with macron
877 $chars{0xc545}=0x0112; # capital e with macron
878 $chars{0xc547}=0x1e20; # capital g with macron
879 $chars{0xc549}=0x012a; # capital i with macron
880 $chars{0xc54f}=0x014c; # capital o with macron
881 $chars{0xc555}=0x016a; # capital u with macron
882 $chars{0xc561}=0x0101; # small a with macron
883 $chars{0xc565}=0x0113; # small e with macron
884 $chars{0xc567}=0x1e21; # small g with macron
885 $chars{0xc569}=0x012b; # small i with macron
886 $chars{0xc56f}=0x014d; # small o with macron
887 $chars{0xc575}=0x016b; # small u with macron
888 $chars{0xc572}=0x0159; # small r with macron
889 $chars{0xc5e1}=0x01e2; # capital ae with macron
890 $chars{0xc5f1}=0x01e3; # small ae with macron
891 # 4/6 breve
892 $chars{0xc641}=0x0102; # capital a with breve
893 $chars{0xc645}=0x0114; # capital e with breve
894 $chars{0xc647}=0x011e; # capital g with breve
895 $chars{0xc649}=0x012c; # capital i with breve
896 $chars{0xc64f}=0x014e; # capital o with breve
897 $chars{0xc655}=0x016c; # capital u with breve
898 $chars{0xc661}=0x0103; # small a with breve
899 $chars{0xc665}=0x0115; # small e with breve
900 $chars{0xc667}=0x011f; # small g with breve
901 $chars{0xc669}=0x012d; # small i with breve
902 $chars{0xc66f}=0x014f; # small o with breve
903 $chars{0xc675}=0x016d; # small u with breve
904 # 4/7 dot above
905 $chars{0xc7b0}=0x01e1; # Ain with dot above
906 $chars{0xc742}=0x1e02; # capital b with dot above
907 $chars{0xc743}=0x010a; # capital c with dot above
908 $chars{0xc744}=0x1e0a; # capital d with dot above
909 $chars{0xc745}=0x0116; # capital e with dot above
910 $chars{0xc746}=0x1e1e; # capital f with dot above
911 $chars{0xc747}=0x0120; # capital g with dot above
912 $chars{0xc748}=0x1e22; # capital h with dot above
913 $chars{0xc749}=0x0130; # capital i with dot above
914 $chars{0xc74d}=0x1e40; # capital m with dot above
915 $chars{0xc74e}=0x1e44; # capital n with dot above
916 $chars{0xc750}=0x1e56; # capital p with dot above
917 $chars{0xc752}=0x1e58; # capital r with dot above
918 $chars{0xc753}=0x1e60; # capital s with dot above
919 $chars{0xc754}=0x1e6a; # capital t with dot above
920 $chars{0xc757}=0x1e86; # capital w with dot above
921 $chars{0xc758}=0x1e8a; # capital x with dot above
922 $chars{0xc759}=0x1e8e; # capital y with dot above
923 $chars{0xc75a}=0x017b; # capital z with dot above
924 $chars{0xc761}=0x0227; # small b with dot above
925 $chars{0xc762}=0x1e03; # small b with dot above
926 $chars{0xc763}=0x010b; # small c with dot above
927 $chars{0xc764}=0x1e0b; # small d with dot above
928 $chars{0xc765}=0x0117; # small e with dot above
929 $chars{0xc766}=0x1e1f; # small f with dot above
930 $chars{0xc767}=0x0121; # small g with dot above
931 $chars{0xc768}=0x1e23; # small h with dot above
932 $chars{0xc76d}=0x1e41; # small m with dot above
933 $chars{0xc76e}=0x1e45; # small n with dot above
934 $chars{0xc770}=0x1e57; # small p with dot above
935 $chars{0xc772}=0x1e59; # small r with dot above
936 $chars{0xc773}=0x1e61; # small s with dot above
937 $chars{0xc774}=0x1e6b; # small t with dot above
938 $chars{0xc777}=0x1e87; # small w with dot above
939 $chars{0xc778}=0x1e8b; # small x with dot above
940 $chars{0xc779}=0x1e8f; # small y with dot above
941 $chars{0xc77a}=0x017c; # small z with dot above
942 # 4/8 trema, diaresis
943 $chars{0xc820}=0x00a8; # diaeresis
944 $chars{0xc841}=0x00c4; # capital a with diaeresis
945 $chars{0xc845}=0x00cb; # capital e with diaeresis
946 $chars{0xc848}=0x1e26; # capital h with diaeresis
947 $chars{0xc849}=0x00cf; # capital i with diaeresis
948 $chars{0xc84f}=0x00d6; # capital o with diaeresis
949 $chars{0xc855}=0x00dc; # capital u with diaeresis
950 $chars{0xc857}=0x1e84; # capital w with diaeresis
951 $chars{0xc858}=0x1e8c; # capital x with diaeresis
952 $chars{0xc859}=0x0178; # capital y with diaeresis
953 $chars{0xc861}=0x00e4; # small a with diaeresis
954 $chars{0xc865}=0x00eb; # small e with diaeresis
955 $chars{0xc868}=0x1e27; # small h with diaeresis
956 $chars{0xc869}=0x00ef; # small i with diaeresis
957 $chars{0xc86f}=0x00f6; # small o with diaeresis
958 $chars{0xc874}=0x1e97; # small t with diaeresis
959 $chars{0xc875}=0x00fc; # small u with diaeresis
960 $chars{0xc877}=0x1e85; # small w with diaeresis
961 $chars{0xc878}=0x1e8d; # small x with diaeresis
962 $chars{0xc879}=0x00ff; # small y with diaeresis
963 # 4/9 umlaut
964 $chars{0xc920}=0x00a8; # [diaeresis]
965 $chars{0xc961}=0x00e4; # a with umlaut
966 $chars{0xc965}=0x00eb; # e with umlaut
967 $chars{0xc969}=0x00ef; # i with umlaut
968 $chars{0xc96f}=0x00f6; # o with umlaut
969 $chars{0xc975}=0x00fc; # u with umlaut
970 # 4/10 circle above
971 $chars{0xca41}=0x00c5; # capital a with ring above
972 $chars{0xcaad}=0x016e; # capital u with ring above
973 $chars{0xca61}=0x00e5; # small a with ring above
974 $chars{0xca75}=0x016f; # small u with ring above
975 $chars{0xca77}=0x1e98; # small w with ring above
976 $chars{0xca79}=0x1e99; # small y with ring above
977 # 4/11 high comma off centre
978 # 4/12 inverted high comma centred
979 # 4/13 double acute accent
980 $chars{0xcd4f}=0x0150; # capital o with double acute
981 $chars{0xcd55}=0x0170; # capital u with double acute
982 $chars{0xcd6f}=0x0151; # small o with double acute
983 $chars{0xcd75}=0x0171; # small u with double acute
984 # 4/14 horn
985 $chars{0xce54}=0x01a0; # latin capital letter o with horn
986 $chars{0xce55}=0x01af; # latin capital letter u with horn
987 $chars{0xce74}=0x01a1; # latin small letter o with horn
988 $chars{0xce75}=0x01b0; # latin small letter u with horn
989 # 4/15 caron (hacek
990 $chars{0xcf41}=0x01cd; # capital a with caron
991 $chars{0xcf43}=0x010c; # capital c with caron
992 $chars{0xcf44}=0x010e; # capital d with caron
993 $chars{0xcf45}=0x011a; # capital e with caron
994 $chars{0xcf47}=0x01e6; # capital g with caron
995 $chars{0xcf49}=0x01cf; # capital i with caron
996 $chars{0xcf4b}=0x01e8; # capital k with caron
997 $chars{0xcf4c}=0x013d; # capital l with caron
998 $chars{0xcf4e}=0x0147; # capital n with caron
999 $chars{0xcf4f}=0x01d1; # capital o with caron
1000 $chars{0xcf52}=0x0158; # capital r with caron
1001 $chars{0xcf53}=0x0160; # capital s with caron
1002 $chars{0xcf54}=0x0164; # capital t with caron
1003 $chars{0xcf55}=0x01d3; # capital u with caron
1004 $chars{0xcf5a}=0x017d; # capital z with caron
1005 $chars{0xcf61}=0x01ce; # small a with caron
1006 $chars{0xcf63}=0x010d; # small c with caron
1007 $chars{0xcf64}=0x010f; # small d with caron
1008 $chars{0xcf65}=0x011b; # small e with caron
1009 $chars{0xcf67}=0x01e7; # small g with caron
1010 $chars{0xcf69}=0x01d0; # small i with caron
1011 $chars{0xcf6a}=0x01f0; # small j with caron
1012 $chars{0xcf6b}=0x01e9; # small k with caron
1013 $chars{0xcf6c}=0x013e; # small l with caron
1014 $chars{0xcf6e}=0x0148; # small n with caron
1015 $chars{0xcf6f}=0x01d2; # small o with caron
1016 $chars{0xcf72}=0x0159; # small r with caron
1017 $chars{0xcf73}=0x0161; # small s with caron
1018 $chars{0xcf74}=0x0165; # small t with caron
1019 $chars{0xcf75}=0x01d4; # small u with caron
1020 $chars{0xcf7a}=0x017e; # small z with caron
1021 # 5/0 cedilla
1022 $chars{0xd020}=0x00b8; # cedilla
1023 $chars{0xd043}=0x00c7; # capital c with cedilla
1024 $chars{0xd044}=0x1e10; # capital d with cedilla
1025 $chars{0xd047}=0x0122; # capital g with cedilla
1026 $chars{0xd048}=0x1e28; # capital h with cedilla
1027 $chars{0xd04b}=0x0136; # capital k with cedilla
1028 $chars{0xd04c}=0x013b; # capital l with cedilla
1029 $chars{0xd04e}=0x0145; # capital n with cedilla
1030 $chars{0xd052}=0x0156; # capital r with cedilla
1031 $chars{0xd053}=0x015e; # capital s with cedilla
1032 $chars{0xd054}=0x0162; # capital t with cedilla
1033 $chars{0xd063}=0x00e7; # small c with cedilla
1034 $chars{0xd064}=0x1e11; # small d with cedilla
1035 $chars{0xd065}=0x0119; # small e with cedilla
1036 $chars{0xd067}=0x0123; # small g with cedilla
1037 $chars{0xd068}=0x1e29; # small h with cedilla
1038 $chars{0xd06b}=0x0137; # small k with cedilla
1039 $chars{0xd06c}=0x013c; # small l with cedilla
1040 $chars{0xd06e}=0x0146; # small n with cedilla
1041 $chars{0xd072}=0x0157; # small r with cedilla
1042 $chars{0xd073}=0x015f; # small s with cedilla
1043 $chars{0xd074}=0x0163; # small t with cedilla
1044 # 5/1 rude
1045 # 5/2 hook to left
1046 # 5/3 ogonek (hook to right
1047 $chars{0xd320}=0x02db; # ogonek
1048 $chars{0xd341}=0x0104; # capital a with ogonek
1049 $chars{0xd345}=0x0118; # capital e with ogonek
1050 $chars{0xd349}=0x012e; # capital i with ogonek
1051 $chars{0xd34f}=0x01ea; # capital o with ogonek
1052 $chars{0xd355}=0x0172; # capital u with ogonek
1053 $chars{0xd361}=0x0105; # small a with ogonek
1054 $chars{0xd365}=0x0119; # small e with ogonek
1055 $chars{0xd369}=0x012f; # small i with ogonek
1056 $chars{0xd36f}=0x01eb; # small o with ogonek
1057 $chars{0xd375}=0x0173; # small u with ogonek
1058 # 5/4 circle below
1059 $chars{0xd441}=0x1e00; # capital a with ring below
1060 $chars{0xd461}=0x1e01; # small a with ring below
1061 # 5/5 half circle below
1062 $chars{0xf948}=0x1e2a; # capital h with breve below
1063 $chars{0xf968}=0x1e2b; # small h with breve below
1064 # 5/6 dot below
1065 $chars{0xd641}=0x1ea0; # capital a with dot below
1066 $chars{0xd642}=0x1e04; # capital b with dot below
1067 $chars{0xd644}=0x1e0c; # capital d with dot below
1068 $chars{0xd645}=0x1eb8; # capital e with dot below
1069 $chars{0xd648}=0x1e24; # capital h with dot below
1070 $chars{0xd649}=0x1eca; # capital i with dot below
1071 $chars{0xd64b}=0x1e32; # capital k with dot below
1072 $chars{0xd64c}=0x1e36; # capital l with dot below
1073 $chars{0xd64d}=0x1e42; # capital m with dot below
1074 $chars{0xd64e}=0x1e46; # capital n with dot below
1075 $chars{0xd64f}=0x1ecc; # capital o with dot below
1076 $chars{0xd652}=0x1e5a; # capital r with dot below
1077 $chars{0xd653}=0x1e62; # capital s with dot below
1078 $chars{0xd654}=0x1e6c; # capital t with dot below
1079 $chars{0xd655}=0x1ee4; # capital u with dot below
1080 $chars{0xd656}=0x1e7e; # capital v with dot below
1081 $chars{0xd657}=0x1e88; # capital w with dot below
1082 $chars{0xd659}=0x1ef4; # capital y with dot below
1083 $chars{0xd65a}=0x1e92; # capital z with dot below
1084 $chars{0xd661}=0x1ea1; # small a with dot below
1085 $chars{0xd662}=0x1e05; # small b with dot below
1086 $chars{0xd664}=0x1e0d; # small d with dot below
1087 $chars{0xd665}=0x1eb9; # small e with dot below
1088 $chars{0xd668}=0x1e25; # small h with dot below
1089 $chars{0xd669}=0x1ecb; # small i with dot below
1090 $chars{0xd66b}=0x1e33; # small k with dot below
1091 $chars{0xd66c}=0x1e37; # small l with dot below
1092 $chars{0xd66d}=0x1e43; # small m with dot below
1093 $chars{0xd66e}=0x1e47; # small n with dot below
1094 $chars{0xd66f}=0x1ecd; # small o with dot below
1095 $chars{0xd672}=0x1e5b; # small r with dot below
1096 $chars{0xd673}=0x1e63; # small s with dot below
1097 $chars{0xd674}=0x1e6d; # small t with dot below
1098 $chars{0xd675}=0x1ee5; # small u with dot below
1099 $chars{0xd676}=0x1e7f; # small v with dot below
1100 $chars{0xd677}=0x1e89; # small w with dot below
1101 $chars{0xd679}=0x1ef5; # small y with dot below
1102 $chars{0xd67a}=0x1e93; # small z with dot below
1103 # 5/7 double dot below
1104 $chars{0xd755}=0x1e72; # capital u with diaeresis below
1105 $chars{0xd775}=0x1e73; # small u with diaeresis below
1106 # 5/8 underline
1107 $chars{0xd820}=0x005f; # underline
1108 # 5/9 double underline
1109 $chars{0xd920}=0x2017; # double underline
1110 # 5/10 small low vertical bar
1111 $chars{0xda20}=0x02cc; #
1112 # 5/11 circumflex below
1113 # 5/12 (this position shall not be used)
1114 # 5/13 left half of ligature sign and of double tilde
1115 # 5/14 right half of ligature sign
1116 # 5/15 right half of double tilde
1117 # map {printf "%x :%x\n",$_,$chars{$_};}keys %chars;
1119 sub char_decode5426 {
1120 my ( $string) = @_;
1121 my $result;
1123 my @data = unpack("C*", $string);
1124 my @characters;
1125 my $length=scalar(@data);
1126 for (my $i = 0; $i < scalar(@data); $i++) {
1127 my $char= $data[$i];
1128 if ($char >= 0x00 && $char <= 0x7F){
1129 #IsAscii
1131 push @characters,$char unless ($char<0x02 ||$char== 0x0F);
1132 }elsif (($char >= 0xC0 && $char <= 0xDF)) {
1133 #Combined Char
1134 my $convchar ;
1135 if ($chars{$char*256+$data[$i+1]}) {
1136 $convchar= $chars{$char * 256 + $data[$i+1]};
1137 $i++;
1138 # printf "char %x $char, char to convert %x , converted %x\n",$char,$char * 256 + $data[$i - 1],$convchar;
1139 } elsif ($chars{$char}) {
1140 $convchar= $chars{$char};
1141 # printf "0xC char %x, converted %x\n",$char,$chars{$char};
1142 }else {
1143 $convchar=$char;
1145 push @characters,$convchar;
1146 } else {
1147 my $convchar;
1148 if ($chars{$char}) {
1149 $convchar= $chars{$char};
1150 # printf "char %x, converted %x\n",$char,$chars{$char};
1151 }else {
1152 # printf "char %x $char\n",$char;
1153 $convchar=$char;
1155 push @characters,$convchar;
1158 $result=pack "U*",@characters;
1159 # $result=~s/\x01//;
1160 # $result=~s/\x00//;
1161 $result=~s/\x0f//;
1162 $result=~s/\x1b.//;
1163 $result=~s/\x0e//;
1164 $result=~s/\x1b\x5b//;
1165 # map{printf "%x",$_} @characters;
1166 # printf "\n";
1167 return $result;
1173 =head1 AUTHOR
1175 Koha Development Team <http://koha-community.org/>
1177 Galen Charlton <galen.charlton@liblime.com>
1179 =cut