From dc283cbb6b66a9238a33fe66fc8ba15c57fd7a43 Mon Sep 17 00:00:00 2001 From: tromey Date: Mon, 17 Apr 2006 21:41:47 +0000 Subject: [PATCH] PR libgcj/27171: * testsuite/libjava.lang/pr27171.java: New file. * testsuite/libjava.lang/pr27171.out: New file. * gnu/gcj/convert/Output_UTF8.java (havePendingBytes): Return true if we've seen a high surrogate. (write): Handle high surrogates at the end of the stream. Properly emit isolated low surrogates. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@113013 138bc75d-0d04-0410-961f-82ee72b054a4 --- libjava/ChangeLog | 10 +++++++++ libjava/gnu/gcj/convert/Output_UTF8.java | 34 ++++++++++++++++++++++------- libjava/testsuite/libjava.lang/pr27171.java | 19 ++++++++++++++++ libjava/testsuite/libjava.lang/pr27171.out | 2 ++ 4 files changed, 57 insertions(+), 8 deletions(-) create mode 100644 libjava/testsuite/libjava.lang/pr27171.java create mode 100644 libjava/testsuite/libjava.lang/pr27171.out diff --git a/libjava/ChangeLog b/libjava/ChangeLog index fbbe9e2c20f..8437c1a1697 100644 --- a/libjava/ChangeLog +++ b/libjava/ChangeLog @@ -1,3 +1,13 @@ +2006-04-17 Tom Tromey + + PR libgcj/27171: + * testsuite/libjava.lang/pr27171.java: New file. + * testsuite/libjava.lang/pr27171.out: New file. + * gnu/gcj/convert/Output_UTF8.java (havePendingBytes): Return + true if we've seen a high surrogate. + (write): Handle high surrogates at the end of the stream. + Properly emit isolated low surrogates. + 2006-04-17 Andreas Tobler * testsuite/libjava.lang/stringconst2.java: Print a stack trace in case diff --git a/libjava/gnu/gcj/convert/Output_UTF8.java b/libjava/gnu/gcj/convert/Output_UTF8.java index f54e7ebcfc2..e550a7f0ab6 100644 --- a/libjava/gnu/gcj/convert/Output_UTF8.java +++ b/libjava/gnu/gcj/convert/Output_UTF8.java @@ -1,4 +1,4 @@ -/* Copyright (C) 1999, 2000, 2003 Free Software Foundation +/* Copyright (C) 1999, 2000, 2003, 2006 Free Software Foundation This file is part of libgcj. @@ -36,7 +36,7 @@ public class Output_UTF8 extends UnicodeToBytes int avail = buf.length - count; for (;;) { - if (avail == 0 || (inlength == 0 && bytes_todo == 0)) + if (avail == 0 || (inlength == 0 && bytes_todo == 0 && hi_part == 0)) break; // The algorithm is made more complicated because we want to write // at least one byte in the output buffer, if there is room for @@ -61,17 +61,25 @@ public class Output_UTF8 extends UnicodeToBytes continue; } + // Handle a high surrogate at the end of the input stream. + if (inlength == 0 && hi_part != 0) + { + buf[count++] = (byte) (0xE0 | (hi_part >> 12)); + value = hi_part; + hi_part = 0; + avail--; + bytes_todo = 2; + continue; + } + char ch = inbuffer[inpos++]; inlength--; - if ((hi_part != 0 && (ch <= 0xDBFF || ch > 0xDFFF)) - || (hi_part == 0 && ch >= 0xDC00 && ch <= 0xDFFF)) + if (hi_part != 0 && (ch <= 0xDBFF || ch > 0xDFFF)) { // If the previous character was a high surrogate, and we // don't now have a low surrogate, we print the high - // surrogate as an isolated character. If this character - // is a low surrogate and we didn't previously see a high - // surrogate, we do the same thing. + // surrogate as an isolated character. --inpos; ++inlength; buf[count++] = (byte) (0xE0 | (hi_part >> 12)); @@ -80,6 +88,16 @@ public class Output_UTF8 extends UnicodeToBytes avail--; bytes_todo = 2; } + else if (hi_part == 0 && ch >= 0xDC00 && ch <= 0xDFFF) + { + // If this character is a low surrogate and we didn't + // previously see a high surrogate, we do the same thing + // as above. + buf[count++] = (byte) (0xE0 | (ch >> 12)); + value = ch; + avail--; + bytes_todo = 2; + } else if (ch < 128 && (ch != 0 || standardUTF8)) { avail--; @@ -122,7 +140,7 @@ public class Output_UTF8 extends UnicodeToBytes public boolean havePendingBytes() { - return bytes_todo > 0; + return bytes_todo > 0 || hi_part != 0; } } diff --git a/libjava/testsuite/libjava.lang/pr27171.java b/libjava/testsuite/libjava.lang/pr27171.java new file mode 100644 index 00000000000..a07fea73647 --- /dev/null +++ b/libjava/testsuite/libjava.lang/pr27171.java @@ -0,0 +1,19 @@ +public class pr27171 { + + public static void main(String[] args) throws Throwable { + // Isolated low surrogate. + char x = 56478; // 0xdc9e + String xs = new String(new char[] { x }); + // Note that we fix a result for our implementation; but + // the JDK does something else. + System.out.println(xs.getBytes("UTF-8").length); + + // isolated high surrogate -- at end of input stream + char y = 0xdaee; + String ys = new String(new char[] { y }); + // Note that we fix a result for our implementation; but + // the JDK does something else. + System.out.println(ys.getBytes("UTF-8").length); + } +} + diff --git a/libjava/testsuite/libjava.lang/pr27171.out b/libjava/testsuite/libjava.lang/pr27171.out new file mode 100644 index 00000000000..a5c8806279f --- /dev/null +++ b/libjava/testsuite/libjava.lang/pr27171.out @@ -0,0 +1,2 @@ +3 +3 -- 2.11.4.GIT