diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index b2cbd5e8..f252e4e0 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -53,7 +53,7 @@ jobs: fail-fast: false matrix: os: [ubuntu] - ruby: ['jruby-9.3', 'jruby-9.4', 'truffleruby'] + ruby: ['jruby-9.4', 'truffleruby'] runs-on: ${{ matrix.os }}-latest steps: - uses: actions/checkout@v4 diff --git a/ChangeLog b/ChangeLog index 86bdab8b..b34897f1 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,5 @@ +* Fix an integer overflow when parsing maps. + 2026-06-09 1.8.2 * Fix `Buffer#clear` to properly reset memory chunks before adding them back to the pool. diff --git a/ext/java/org/msgpack/jruby/Decoder.java b/ext/java/org/msgpack/jruby/Decoder.java index 9ea35b49..44357dc8 100644 --- a/ext/java/org/msgpack/jruby/Decoder.java +++ b/ext/java/org/msgpack/jruby/Decoder.java @@ -85,6 +85,14 @@ public Decoder(Ruby runtime, Unpacker unpacker, byte[] bytes, int offset, int le feed(bytes, offset, length); } + private int getUnsignedInt() { + int size = buffer.getInt(); + if (size < 0) { + throw runtime.newRaiseException(underflowErrorClass, "Size too large (limited to 2**31 for the Java version)"); + } + return size; + } + public void feed(byte[] bytes) { feed(bytes, 0, bytes.length); } @@ -200,7 +208,7 @@ public IRubyObject read_array_header() { } else if (b == ARY16) { return runtime.newFixnum(buffer.getShort() & 0xffff); } else if (b == ARY32) { - return runtime.newFixnum(buffer.getInt()); + return runtime.newFixnum(getUnsignedInt()); } throw runtime.newRaiseException(unexpectedTypeErrorClass, "unexpected type"); } catch (RaiseException re) { @@ -221,7 +229,7 @@ public IRubyObject read_map_header() { } else if (b == MAP16) { return runtime.newFixnum(buffer.getShort() & 0xffff); } else if (b == MAP32) { - return runtime.newFixnum(buffer.getInt()); + return runtime.newFixnum(getUnsignedInt()); } throw runtime.newRaiseException(unexpectedTypeErrorClass, "unexpected type"); } catch (RaiseException re) { @@ -258,10 +266,10 @@ private IRubyObject consumeNext() { case TRUE: return runtime.getTrue(); case BIN8: return consumeString(buffer.get() & 0xff, binaryEncoding); case BIN16: return consumeString(buffer.getShort() & 0xffff, binaryEncoding); - case BIN32: return consumeString(buffer.getInt(), binaryEncoding); + case BIN32: return consumeString(getUnsignedInt(), binaryEncoding); case VAREXT8: return consumeExtension(buffer.get() & 0xff); case VAREXT16: return consumeExtension(buffer.getShort() & 0xffff); - case VAREXT32: return consumeExtension(buffer.getInt()); + case VAREXT32: return consumeExtension(getUnsignedInt()); case FLOAT32: return runtime.newFloat(buffer.getFloat()); case FLOAT64: return runtime.newFloat(buffer.getDouble()); case UINT8: return runtime.newFixnum(buffer.get() & 0xffL); @@ -283,11 +291,11 @@ private IRubyObject consumeNext() { case FIXEXT16: return consumeExtension(16); case STR8: return consumeString(buffer.get() & 0xff, utf8Encoding); case STR16: return consumeString(buffer.getShort() & 0xffff, utf8Encoding); - case STR32: return consumeString(buffer.getInt(), utf8Encoding); + case STR32: return consumeString(getUnsignedInt(), utf8Encoding); case ARY16: return consumeArray(buffer.getShort() & 0xffff); - case ARY32: return consumeArray(buffer.getInt()); + case ARY32: return consumeArray(getUnsignedInt()); case MAP16: return consumeHash(buffer.getShort() & 0xffff); - case MAP32: return consumeHash(buffer.getInt()); + case MAP32: return consumeHash(getUnsignedInt()); default: break outer; } case 0xe: diff --git a/ext/msgpack/unpacker.c b/ext/msgpack/unpacker.c index ad4d6630..71c85beb 100644 --- a/ext/msgpack/unpacker.c +++ b/ext/msgpack/unpacker.c @@ -456,13 +456,13 @@ static int read_primitive(msgpack_unpacker_t* uk) return object_complete(uk, INT2NUM((int8_t)b)); SWITCH_RANGE(b, 0xa0, 0xbf) // FixRaw / fixstr - int count = b & 0x1f; + size_t count = b & 0x1f; /* read_raw_body_begin sets uk->reading_raw */ uk->reading_raw_remaining = count; return read_raw_body_begin(uk, RAW_TYPE_STRING); SWITCH_RANGE(b, 0x90, 0x9f) // FixArray - int count = b & 0x0f; + size_t count = b & 0x0f; if(count == 0) { return object_complete(uk, rb_ary_new()); } @@ -638,7 +638,7 @@ static int read_primitive(msgpack_unpacker_t* uk) case 0xd9: // raw 8 / str 8 { READ_CAST_BLOCK_OR_RETURN_EOF(cb, uk, 1); - uint8_t count = cb.u8; + size_t count = cb.u8; /* read_raw_body_begin sets uk->reading_raw */ uk->reading_raw_remaining = count; return read_raw_body_begin(uk, RAW_TYPE_STRING); @@ -647,7 +647,7 @@ static int read_primitive(msgpack_unpacker_t* uk) case 0xda: // raw 16 / str 16 { READ_CAST_BLOCK_OR_RETURN_EOF(cb, uk, 2); - uint16_t count = _msgpack_be16(cb.u16); + size_t count = _msgpack_be16(cb.u16); /* read_raw_body_begin sets uk->reading_raw */ uk->reading_raw_remaining = count; return read_raw_body_begin(uk, RAW_TYPE_STRING); @@ -656,7 +656,7 @@ static int read_primitive(msgpack_unpacker_t* uk) case 0xdb: // raw 32 / str 32 { READ_CAST_BLOCK_OR_RETURN_EOF(cb, uk, 4); - uint32_t count = _msgpack_be32(cb.u32); + size_t count = _msgpack_be32(cb.u32); /* read_raw_body_begin sets uk->reading_raw */ uk->reading_raw_remaining = count; return read_raw_body_begin(uk, RAW_TYPE_STRING); @@ -665,7 +665,7 @@ static int read_primitive(msgpack_unpacker_t* uk) case 0xc4: // bin 8 { READ_CAST_BLOCK_OR_RETURN_EOF(cb, uk, 1); - uint8_t count = cb.u8; + size_t count = cb.u8; /* read_raw_body_begin sets uk->reading_raw */ uk->reading_raw_remaining = count; return read_raw_body_begin(uk, RAW_TYPE_BINARY); @@ -674,7 +674,7 @@ static int read_primitive(msgpack_unpacker_t* uk) case 0xc5: // bin 16 { READ_CAST_BLOCK_OR_RETURN_EOF(cb, uk, 2); - uint16_t count = _msgpack_be16(cb.u16); + size_t count = _msgpack_be16(cb.u16); /* read_raw_body_begin sets uk->reading_raw */ uk->reading_raw_remaining = count; return read_raw_body_begin(uk, RAW_TYPE_BINARY); @@ -683,7 +683,7 @@ static int read_primitive(msgpack_unpacker_t* uk) case 0xc6: // bin 32 { READ_CAST_BLOCK_OR_RETURN_EOF(cb, uk, 4); - uint32_t count = _msgpack_be32(cb.u32); + size_t count = _msgpack_be32(cb.u32); /* read_raw_body_begin sets uk->reading_raw */ uk->reading_raw_remaining = count; return read_raw_body_begin(uk, RAW_TYPE_BINARY); @@ -692,7 +692,7 @@ static int read_primitive(msgpack_unpacker_t* uk) case 0xdc: // array 16 { READ_CAST_BLOCK_OR_RETURN_EOF(cb, uk, 2); - uint16_t count = _msgpack_be16(cb.u16); + size_t count = _msgpack_be16(cb.u16); if(count == 0) { return object_complete(uk, rb_ary_new()); } @@ -702,7 +702,7 @@ static int read_primitive(msgpack_unpacker_t* uk) case 0xdd: // array 32 { READ_CAST_BLOCK_OR_RETURN_EOF(cb, uk, 4); - uint32_t count = _msgpack_be32(cb.u32); + size_t count = _msgpack_be32(cb.u32); if(count == 0) { return object_complete(uk, rb_ary_new()); } @@ -712,7 +712,7 @@ static int read_primitive(msgpack_unpacker_t* uk) case 0xde: // map 16 { READ_CAST_BLOCK_OR_RETURN_EOF(cb, uk, 2); - uint16_t count = _msgpack_be16(cb.u16); + size_t count = _msgpack_be16(cb.u16); if(count == 0) { return object_complete(uk, rb_hash_new()); } @@ -722,7 +722,7 @@ static int read_primitive(msgpack_unpacker_t* uk) case 0xdf: // map 32 { READ_CAST_BLOCK_OR_RETURN_EOF(cb, uk, 4); - uint32_t count = _msgpack_be32(cb.u32); + size_t count = _msgpack_be32(cb.u32); if(count == 0) { return object_complete(uk, rb_hash_new()); } diff --git a/spec/unpack_spec.rb b/spec/unpack_spec.rb index 73aab5e1..22b9e89f 100644 --- a/spec/unpack_spec.rb +++ b/spec/unpack_spec.rb @@ -7,6 +7,10 @@ end describe MessagePack do + let(:eof_error) do + IS_JRUBY ? MessagePack::UnderflowError : EOFError + end + it 'MessagePack.unpack symbolize_keys' do symbolized_hash = {:a => 'b', :c => 'd'} MessagePack.load(MessagePack.pack(symbolized_hash), :symbolize_keys => true).should == symbolized_hash @@ -54,4 +58,35 @@ MessagePack.unpack([0xc6, 0x00, 0x00, 0x00, 0x01].pack('C*') + 'a').should == "a" MessagePack.unpack([0xc6, 0x00, 0x00, 0x00, 0x02].pack('C*') + 'aa').should == "aa" end + + it "msgpack fixmap type" do + MessagePack.unpack([0x81, 0xa1, 0x61, 0x01].pack('C*')).should == {"a" => 1} + expect { + MessagePack.unpack([0x82, 0xa1, 0x61, 0x01].pack('C*')) + }.to raise_error(eof_error) + end + + it "msgpack map 16 type" do + MessagePack.unpack([0xde, 0x00, 0x01, 0xa1, 0x61, 0x1].pack('C*')).should == {"a" => 1} + + expect { + MessagePack.unpack([0xde, 0x00, 0x02, 0xa1, 0x61, 0x1].pack('C*')) + }.to raise_error(eof_error) + + expect { + MessagePack.unpack([0xde, 0x80, 0x01, 0xa1, 0x61, 0x1].pack('C*')) + }.to raise_error(eof_error) + end + + it "msgpack map 32 type" do + MessagePack.unpack([0xdf, 0x00, 0x00, 0x00, 0x01, 0xa1, 0x61, 0x1].pack('C*')).should == {"a" => 1} + + expect { + MessagePack.unpack([0xdf, 0x00, 0x00, 0x00, 0x02, 0xa1, 0x61, 0x1].pack('C*')) + }.to raise_error(eof_error) + + expect { + p MessagePack.unpack([0xdf, 0x80, 0x00, 0x00, 0x01, 0xa1, 0x61, 0x1].pack('C*')) + }.to raise_error(eof_error) + end end