Skip to content

Commit 833910c

Browse files
Claude Botclaude
andcommitted
fix(FormData): prevent crash on very large input
When `FormData.from()` is called with a very large ArrayBuffer (exceeding WebKit's String::MaxLength of INT32_MAX), it would crash with an assertion failure in WebKit's StringImpl. This fixes the issue by adding length checks in the C++ `toString` and related functions (helpers.h) to check against both Bun's synthetic limit and WebKit's String::MaxLength. For UTF-8 tagged strings, we use simdutf to calculate the actual UTF-16 length only when the byte length exceeds the limit, since UTF-16 length is at most equal to UTF-8 byte length. Changes: - Add length checks to all UTF-8 code paths in helpers.h: - toString(ZigString) - toString(ZigString, StringPointer) - toStringCopy(ZigString) - toStringCopy(ZigString, StringPointer) - appendToBuilder(ZigString, StringBuilder) - Add WTF::String::MaxLength check to non-UTF-8 paths Now `FormData.from(new Uint32Array(913148244))` returns an empty FormData instead of crashing with an assertion failure. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <[email protected]>
1 parent ddcec61 commit 833910c

File tree

2 files changed

+68
-4
lines changed

2 files changed

+68
-4
lines changed

src/bun.js/bindings/helpers.h

Lines changed: 47 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
#include "root.h"
44
#include "wtf/text/ASCIILiteral.h"
5+
#include "wtf/SIMDUTF.h"
56

67
#include <JavaScriptCore/Error.h>
78
#include <JavaScriptCore/Exception.h>
@@ -79,12 +80,22 @@ static const WTF::String toString(ZigString str)
7980
}
8081
if (isTaggedUTF8Ptr(str.ptr)) [[unlikely]] {
8182
ASSERT_WITH_MESSAGE(!isTaggedExternalPtr(str.ptr), "UTF8 and external ptr are mutually exclusive. The external will never be freed.");
83+
// Check if the resulting UTF-16 string could possibly exceed the maximum length.
84+
// UTF-16 length is at most equal to UTF-8 byte length, so only check when byte length exceeds limit.
85+
size_t maxLength = std::min(Bun__stringSyntheticAllocationLimit, static_cast<size_t>(WTF::String::MaxLength));
86+
if (str.len > maxLength) [[unlikely]] {
87+
// UTF-8 byte length != UTF-16 length, so use simdutf to calculate the actual UTF-16 length.
88+
size_t utf16Length = simdutf::utf16_length_from_utf8(reinterpret_cast<const char*>(untag(str.ptr)), str.len);
89+
if (utf16Length > maxLength) {
90+
return {};
91+
}
92+
}
8293
return WTF::String::fromUTF8ReplacingInvalidSequences(std::span { untag(str.ptr), str.len });
8394
}
8495

8596
if (isTaggedExternalPtr(str.ptr)) [[unlikely]] {
8697
// This will fail if the string is too long. Let's make it explicit instead of an ASSERT.
87-
if (str.len > Bun__stringSyntheticAllocationLimit) [[unlikely]] {
98+
if (str.len > Bun__stringSyntheticAllocationLimit || str.len > WTF::String::MaxLength) [[unlikely]] {
8899
free_global_string(nullptr, reinterpret_cast<void*>(const_cast<unsigned char*>(untag(str.ptr))), static_cast<unsigned>(str.len));
89100
return {};
90101
}
@@ -95,7 +106,7 @@ static const WTF::String toString(ZigString str)
95106
}
96107

97108
// This will fail if the string is too long. Let's make it explicit instead of an ASSERT.
98-
if (str.len > Bun__stringSyntheticAllocationLimit) [[unlikely]] {
109+
if (str.len > Bun__stringSyntheticAllocationLimit || str.len > WTF::String::MaxLength) [[unlikely]] {
99110
return {};
100111
}
101112

@@ -121,11 +132,19 @@ static const WTF::String toString(ZigString str, StringPointer ptr)
121132
return WTF::String();
122133
}
123134
if (isTaggedUTF8Ptr(str.ptr)) [[unlikely]] {
135+
// Check if the resulting UTF-16 string could possibly exceed the maximum length.
136+
size_t maxLength = std::min(Bun__stringSyntheticAllocationLimit, static_cast<size_t>(WTF::String::MaxLength));
137+
if (ptr.len > maxLength) [[unlikely]] {
138+
size_t utf16Length = simdutf::utf16_length_from_utf8(reinterpret_cast<const char*>(&untag(str.ptr)[ptr.off]), ptr.len);
139+
if (utf16Length > maxLength) {
140+
return {};
141+
}
142+
}
124143
return WTF::String::fromUTF8ReplacingInvalidSequences(std::span { &untag(str.ptr)[ptr.off], ptr.len });
125144
}
126145

127146
// This will fail if the string is too long. Let's make it explicit instead of an ASSERT.
128-
if (str.len > Bun__stringSyntheticAllocationLimit) [[unlikely]] {
147+
if (ptr.len > Bun__stringSyntheticAllocationLimit || ptr.len > WTF::String::MaxLength) [[unlikely]] {
129148
return {};
130149
}
131150

@@ -141,11 +160,19 @@ static const WTF::String toStringCopy(ZigString str, StringPointer ptr)
141160
return WTF::String();
142161
}
143162
if (isTaggedUTF8Ptr(str.ptr)) [[unlikely]] {
163+
// Check if the resulting UTF-16 string could possibly exceed the maximum length.
164+
size_t maxLength = std::min(Bun__stringSyntheticAllocationLimit, static_cast<size_t>(WTF::String::MaxLength));
165+
if (ptr.len > maxLength) [[unlikely]] {
166+
size_t utf16Length = simdutf::utf16_length_from_utf8(reinterpret_cast<const char*>(&untag(str.ptr)[ptr.off]), ptr.len);
167+
if (utf16Length > maxLength) {
168+
return {};
169+
}
170+
}
144171
return WTF::String::fromUTF8ReplacingInvalidSequences(std::span { &untag(str.ptr)[ptr.off], ptr.len });
145172
}
146173

147174
// This will fail if the string is too long. Let's make it explicit instead of an ASSERT.
148-
if (str.len > Bun__stringSyntheticAllocationLimit) [[unlikely]] {
175+
if (ptr.len > Bun__stringSyntheticAllocationLimit || ptr.len > WTF::String::MaxLength) [[unlikely]] {
149176
return {};
150177
}
151178

@@ -161,6 +188,14 @@ static const WTF::String toStringCopy(ZigString str)
161188
return WTF::String();
162189
}
163190
if (isTaggedUTF8Ptr(str.ptr)) [[unlikely]] {
191+
// Check if the resulting UTF-16 string could possibly exceed the maximum length.
192+
size_t maxLength = std::min(Bun__stringSyntheticAllocationLimit, static_cast<size_t>(WTF::String::MaxLength));
193+
if (str.len > maxLength) [[unlikely]] {
194+
size_t utf16Length = simdutf::utf16_length_from_utf8(reinterpret_cast<const char*>(untag(str.ptr)), str.len);
195+
if (utf16Length > maxLength) {
196+
return {};
197+
}
198+
}
164199
return WTF::String::fromUTF8ReplacingInvalidSequences(std::span { untag(str.ptr), str.len });
165200
}
166201

@@ -188,6 +223,14 @@ static void appendToBuilder(ZigString str, WTF::StringBuilder& builder)
188223
return;
189224
}
190225
if (isTaggedUTF8Ptr(str.ptr)) [[unlikely]] {
226+
// Check if the resulting UTF-16 string could possibly exceed the maximum length.
227+
size_t maxLength = std::min(Bun__stringSyntheticAllocationLimit, static_cast<size_t>(WTF::String::MaxLength));
228+
if (str.len > maxLength) [[unlikely]] {
229+
size_t utf16Length = simdutf::utf16_length_from_utf8(reinterpret_cast<const char*>(untag(str.ptr)), str.len);
230+
if (utf16Length > maxLength) {
231+
return;
232+
}
233+
}
191234
WTF::String converted = WTF::String::fromUTF8ReplacingInvalidSequences(std::span { untag(str.ptr), str.len });
192235
builder.append(converted);
193236
return;

test/js/web/html/FormData.test.ts

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -277,6 +277,27 @@ describe("FormData", () => {
277277
expect(fd.toJSON()).toEqual({ "1": "1" });
278278
});
279279

280+
test("FormData.from does not crash on very large input", () => {
281+
// This test verifies that FormData.from doesn't crash with an assertion failure
282+
// when given input larger than WebKit's String::MaxLength (INT32_MAX ~= 2GB).
283+
// We use a smaller test case with the synthetic limit to avoid actually allocating 2GB+.
284+
const { setSyntheticAllocationLimitForTesting } = require("bun:internal-for-testing");
285+
// Set a small limit so we can test the boundary without allocating gigabytes
286+
const originalLimit = setSyntheticAllocationLimitForTesting(1024 * 1024); // 1MB limit
287+
try {
288+
// Create a buffer larger than the limit
289+
const largeBuffer = new Uint8Array(2 * 1024 * 1024); // 2MB
290+
// @ts-expect-error - FormData.from is a Bun extension
291+
const result = FormData.from(largeBuffer);
292+
// Should return an empty FormData instead of crashing
293+
expect(result).toBeInstanceOf(FormData);
294+
// Verify the FormData is empty (string was too long to parse)
295+
expect(Array.from(result.entries())).toHaveLength(0);
296+
} finally {
297+
setSyntheticAllocationLimitForTesting(originalLimit);
298+
}
299+
});
300+
280301
it("should throw on bad boundary", async () => {
281302
const response = new Response('foo\r\nContent-Disposition: form-data; name="foo"\r\n\r\nbar\r\n', {
282303
headers: {

0 commit comments

Comments
 (0)