Index: /Users/quelle/Documents/workspace/jruby_1_0_1/src/org/jruby/util/Pack.java =================================================================== --- /Users/quelle/Documents/workspace/jruby_1_0_1/src/org/jruby/util/Pack.java (revision 4620) +++ /Users/quelle/Documents/workspace/jruby_1_0_1/src/org/jruby/util/Pack.java (working copy) @@ -35,6 +35,7 @@ package org.jruby.util; import java.math.BigInteger; +import java.nio.BufferUnderflowException; import java.nio.ByteBuffer; import java.nio.ByteOrder; import java.nio.CharBuffer; @@ -855,12 +856,40 @@ break; case 'U' : { + ByteBuffer buffer; if (occurrences == IS_STAR || occurrences > encode.remaining()) { - occurrences = encode.remaining(); + byte[] toUnpack = new byte[encode.remaining()]; + encode.get(toUnpack); + buffer = ByteBuffer.wrap(toUnpack); + } else { + byte[] toUnpack = new byte[occurrences*4]; + int pos = 0; + try { + while (--occurrences >= 0 && encode.hasRemaining()) { + byte b = encode.get(); + if (b >= (byte)0x00 && b <= (byte)0x7F) { + toUnpack[pos++] = b; + } else if (b >= (byte)0xC2 && b <= (byte)0xDF) { + toUnpack[pos++] = b; + toUnpack[pos++] = encode.get(); + } else if (b >= (byte)0xE0 && b <= (byte)0xEF) { + toUnpack[pos++] = b; + encode.get(toUnpack, pos, 2); + pos += 2; + } else if (b >= (byte)0xF0 && b <= (byte)0xF4) { + toUnpack[pos++] = b; + encode.get(toUnpack, pos, 3); + pos += 3; + } else { + throw runtime.newArgumentError("malformed UTF-8 character"); + } + } + } catch (BufferUnderflowException e) { + throw runtime.newArgumentError("malformed UTF-8 character"); + } + buffer = ByteBuffer.wrap(toUnpack, 0, pos); } //get the correct substring - byte[] toUnpack = new byte[occurrences]; - encode.get(toUnpack); CharBuffer lUtf8 = null; try { Charset utf8 = Charset.forName("UTF-8"); @@ -867,7 +896,6 @@ CharsetDecoder utf8Decoder = utf8.newDecoder(); utf8Decoder.onMalformedInput(CodingErrorAction.REPORT); utf8Decoder.onUnmappableCharacter(CodingErrorAction.REPORT); - ByteBuffer buffer = ByteBuffer.wrap(toUnpack); lUtf8 = utf8Decoder.decode(buffer); } catch (CharacterCodingException cce) { @@ -874,7 +902,7 @@ // invalid incoming bytes; fail to encode. throw runtime.newArgumentError("malformed UTF-8 character"); } - while (occurrences-- > 0 && lUtf8.hasRemaining()) { + while (lUtf8.hasRemaining()) { long lCurChar = lUtf8.get(); result.append(runtime.newFixnum(lCurChar)); }