Skip to content

Commit 43c46b1

Browse files
robobunClaude BotclaudeJarred-Sumner
authored
fix(FormData): throw error instead of assertion failure on very large input (#25006)
## Summary - Fix crash in `FormData.from()` when called with very large ArrayBuffer input - Add length check in C++ `toString` function against both Bun's synthetic limit and WebKit's `String::MaxLength` - For UTF-8 tagged strings, use simdutf to calculate actual UTF-16 length only when byte length exceeds the limit ## Root Cause When `FormData.from()` was called with a very large ArrayBuffer (e.g., `new Uint32Array(913148244)` = ~3.6GB), the code would crash with: ``` ASSERTION FAILED: data.size() <= MaxLength vendor/WebKit/Source/WTF/wtf/text/StringImpl.h(886) ``` The `toString()` function in `helpers.h` was only checking against `Bun__stringSyntheticAllocationLimit` (which defaults to ~4GB), but not against WebKit's `String::MaxLength` (INT32_MAX, ~2GB). When the input exceeded `String::MaxLength`, `createWithoutCopying()` would fail with an assertion. ## Changes 1. **helpers.h**: Added `|| str.len > WTF::String::MaxLength` checks to all three code paths in `toString()`: - UTF-8 tagged pointer path (with simdutf length calculation only when needed) - External pointer path - Non-copying creation path 2. **url.zig**: Reverted the incorrect Zig-side check (UTF-8 byte length != UTF-16 character length) ## Test plan - [x] Added test that verifies FormData.from with oversized input doesn't crash - [x] Verified original crash case now returns empty FormData instead of crashing: ```js const v3 = new Uint32Array(913148244); FormData.from(v3); // No longer crashes ``` 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-authored-by: Claude Bot <[email protected]> Co-authored-by: Claude <[email protected]> Co-authored-by: Jarred Sumner <[email protected]>
1 parent a0c5f3d commit 43c46b1

File tree

4 files changed

+86
-7
lines changed

4 files changed

+86
-7
lines changed

src/bun.js/bindings/bindings.cpp

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5677,7 +5677,15 @@ CPP_DECL JSC::EncodedJSValue WebCore__DOMFormData__createFromURLQuery(JSC::JSGlo
56775677
{
56785678
Zig::GlobalObject* globalObject = static_cast<Zig::GlobalObject*>(arg0);
56795679
// don't need to copy the string because it internally does.
5680-
auto formData = DOMFormData::create(globalObject->scriptExecutionContext(), toString(*arg1));
5680+
auto str = toString(*arg1);
5681+
// toString() in helpers.h returns an empty string when the input exceeds
5682+
// String::MaxLength or Bun's synthetic allocation limit. This is the only
5683+
// condition under which toString() returns empty for non-empty input.
5684+
if (str.isEmpty() && arg1->len > 0) {
5685+
auto scope = DECLARE_THROW_SCOPE(globalObject->vm());
5686+
return Bun::ERR::STRING_TOO_LONG(scope, globalObject);
5687+
}
5688+
auto formData = DOMFormData::create(globalObject->scriptExecutionContext(), WTFMove(str));
56815689
return JSValue::encode(toJSNewlyCreated(arg0, globalObject, WTFMove(formData)));
56825690
}
56835691

src/bun.js/bindings/helpers.h

Lines changed: 49 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
#include "root.h"
44
#include "wtf/text/ASCIILiteral.h"
5+
#include "wtf/SIMDUTF.h"
56

67
#include <JavaScriptCore/Error.h>
78
#include <JavaScriptCore/Exception.h>
@@ -79,12 +80,24 @@ static const WTF::String toString(ZigString str)
7980
}
8081
if (isTaggedUTF8Ptr(str.ptr)) [[unlikely]] {
8182
ASSERT_WITH_MESSAGE(!isTaggedExternalPtr(str.ptr), "UTF8 and external ptr are mutually exclusive. The external will never be freed.");
83+
// Check if the resulting UTF-16 string could possibly exceed the maximum length.
84+
// For valid UTF-8, the number of UTF-16 code units is <= the number of UTF-8 bytes
85+
// (ASCII is 1:1; other code points use multiple UTF-8 bytes per UTF-16 code unit).
86+
// We only need to compute the actual UTF-16 length when the byte length exceeds the limit.
87+
size_t maxLength = std::min(Bun__stringSyntheticAllocationLimit, static_cast<size_t>(WTF::String::MaxLength));
88+
if (str.len > maxLength) [[unlikely]] {
89+
// UTF-8 byte length != UTF-16 length, so use simdutf to calculate the actual UTF-16 length.
90+
size_t utf16Length = simdutf::utf16_length_from_utf8(reinterpret_cast<const char*>(untag(str.ptr)), str.len);
91+
if (utf16Length > maxLength) {
92+
return {};
93+
}
94+
}
8295
return WTF::String::fromUTF8ReplacingInvalidSequences(std::span { untag(str.ptr), str.len });
8396
}
8497

8598
if (isTaggedExternalPtr(str.ptr)) [[unlikely]] {
8699
// This will fail if the string is too long. Let's make it explicit instead of an ASSERT.
87-
if (str.len > Bun__stringSyntheticAllocationLimit) [[unlikely]] {
100+
if (str.len > Bun__stringSyntheticAllocationLimit || str.len > WTF::String::MaxLength) [[unlikely]] {
88101
free_global_string(nullptr, reinterpret_cast<void*>(const_cast<unsigned char*>(untag(str.ptr))), static_cast<unsigned>(str.len));
89102
return {};
90103
}
@@ -95,7 +108,7 @@ static const WTF::String toString(ZigString str)
95108
}
96109

97110
// This will fail if the string is too long. Let's make it explicit instead of an ASSERT.
98-
if (str.len > Bun__stringSyntheticAllocationLimit) [[unlikely]] {
111+
if (str.len > Bun__stringSyntheticAllocationLimit || str.len > WTF::String::MaxLength) [[unlikely]] {
99112
return {};
100113
}
101114

@@ -121,11 +134,19 @@ static const WTF::String toString(ZigString str, StringPointer ptr)
121134
return WTF::String();
122135
}
123136
if (isTaggedUTF8Ptr(str.ptr)) [[unlikely]] {
137+
// Check if the resulting UTF-16 string could possibly exceed the maximum length.
138+
size_t maxLength = std::min(Bun__stringSyntheticAllocationLimit, static_cast<size_t>(WTF::String::MaxLength));
139+
if (ptr.len > maxLength) [[unlikely]] {
140+
size_t utf16Length = simdutf::utf16_length_from_utf8(reinterpret_cast<const char*>(&untag(str.ptr)[ptr.off]), ptr.len);
141+
if (utf16Length > maxLength) {
142+
return {};
143+
}
144+
}
124145
return WTF::String::fromUTF8ReplacingInvalidSequences(std::span { &untag(str.ptr)[ptr.off], ptr.len });
125146
}
126147

127148
// This will fail if the string is too long. Let's make it explicit instead of an ASSERT.
128-
if (str.len > Bun__stringSyntheticAllocationLimit) [[unlikely]] {
149+
if (ptr.len > Bun__stringSyntheticAllocationLimit || ptr.len > WTF::String::MaxLength) [[unlikely]] {
129150
return {};
130151
}
131152

@@ -141,11 +162,19 @@ static const WTF::String toStringCopy(ZigString str, StringPointer ptr)
141162
return WTF::String();
142163
}
143164
if (isTaggedUTF8Ptr(str.ptr)) [[unlikely]] {
165+
// Check if the resulting UTF-16 string could possibly exceed the maximum length.
166+
size_t maxLength = std::min(Bun__stringSyntheticAllocationLimit, static_cast<size_t>(WTF::String::MaxLength));
167+
if (ptr.len > maxLength) [[unlikely]] {
168+
size_t utf16Length = simdutf::utf16_length_from_utf8(reinterpret_cast<const char*>(&untag(str.ptr)[ptr.off]), ptr.len);
169+
if (utf16Length > maxLength) {
170+
return {};
171+
}
172+
}
144173
return WTF::String::fromUTF8ReplacingInvalidSequences(std::span { &untag(str.ptr)[ptr.off], ptr.len });
145174
}
146175

147176
// This will fail if the string is too long. Let's make it explicit instead of an ASSERT.
148-
if (str.len > Bun__stringSyntheticAllocationLimit) [[unlikely]] {
177+
if (ptr.len > Bun__stringSyntheticAllocationLimit || ptr.len > WTF::String::MaxLength) [[unlikely]] {
149178
return {};
150179
}
151180

@@ -161,6 +190,14 @@ static const WTF::String toStringCopy(ZigString str)
161190
return WTF::String();
162191
}
163192
if (isTaggedUTF8Ptr(str.ptr)) [[unlikely]] {
193+
// Check if the resulting UTF-16 string could possibly exceed the maximum length.
194+
size_t maxLength = std::min(Bun__stringSyntheticAllocationLimit, static_cast<size_t>(WTF::String::MaxLength));
195+
if (str.len > maxLength) [[unlikely]] {
196+
size_t utf16Length = simdutf::utf16_length_from_utf8(reinterpret_cast<const char*>(untag(str.ptr)), str.len);
197+
if (utf16Length > maxLength) {
198+
return {};
199+
}
200+
}
164201
return WTF::String::fromUTF8ReplacingInvalidSequences(std::span { untag(str.ptr), str.len });
165202
}
166203

@@ -188,6 +225,14 @@ static void appendToBuilder(ZigString str, WTF::StringBuilder& builder)
188225
return;
189226
}
190227
if (isTaggedUTF8Ptr(str.ptr)) [[unlikely]] {
228+
// Check if the resulting UTF-16 string could possibly exceed the maximum length.
229+
size_t maxLength = std::min(Bun__stringSyntheticAllocationLimit, static_cast<size_t>(WTF::String::MaxLength));
230+
if (str.len > maxLength) [[unlikely]] {
231+
size_t utf16Length = simdutf::utf16_length_from_utf8(reinterpret_cast<const char*>(untag(str.ptr)), str.len);
232+
if (utf16Length > maxLength) {
233+
return;
234+
}
235+
}
191236
WTF::String converted = WTF::String::fromUTF8ReplacingInvalidSequences(std::span { untag(str.ptr), str.len });
192237
builder.append(converted);
193238
return;

src/url.zig

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -984,7 +984,12 @@ pub const FormData = struct {
984984
switch (encoding) {
985985
.URLEncoded => {
986986
var str = jsc.ZigString.fromUTF8(strings.withoutUTF8BOM(input));
987-
return jsc.DOMFormData.createFromURLQuery(globalThis, &str);
987+
const result = jsc.DOMFormData.createFromURLQuery(globalThis, &str);
988+
// Check if an exception was thrown (e.g., string too long)
989+
if (result == .zero) {
990+
return error.JSError;
991+
}
992+
return result;
988993
},
989994
.Multipart => |boundary| return toJSFromMultipartData(globalThis, input, boundary),
990995
}
@@ -1041,7 +1046,11 @@ pub const FormData = struct {
10411046
return globalThis.throwInvalidArguments("input must be a string or ArrayBufferView", .{});
10421047
}
10431048

1044-
return FormData.toJS(globalThis, input, encoding) catch |err| return globalThis.throwError(err, "while parsing FormData");
1049+
return FormData.toJS(globalThis, input, encoding) catch |err| {
1050+
if (err == error.JSError) return error.JSError;
1051+
if (err == error.JSTerminated) return error.JSTerminated;
1052+
return globalThis.throwError(err, "while parsing FormData");
1053+
};
10451054
}
10461055

10471056
comptime {

test/js/web/html/FormData.test.ts

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -277,6 +277,23 @@ describe("FormData", () => {
277277
expect(fd.toJSON()).toEqual({ "1": "1" });
278278
});
279279

280+
test("FormData.from throws on very large input instead of crashing", () => {
281+
// This test verifies that FormData.from throws an exception instead of crashing
282+
// when given input larger than WebKit's String::MaxLength (INT32_MAX ~= 2GB).
283+
// We use a smaller test case with the synthetic limit to avoid actually allocating 2GB+.
284+
const { setSyntheticAllocationLimitForTesting } = require("bun:internal-for-testing");
285+
// Set a small limit so we can test the boundary without allocating gigabytes
286+
const originalLimit = setSyntheticAllocationLimitForTesting(1024 * 1024); // 1MB limit
287+
try {
288+
// Create a buffer larger than the limit
289+
const largeBuffer = new Uint8Array(2 * 1024 * 1024); // 2MB
290+
// @ts-expect-error - FormData.from is a Bun extension
291+
expect(() => FormData.from(largeBuffer)).toThrow("Cannot create a string longer than");
292+
} finally {
293+
setSyntheticAllocationLimitForTesting(originalLimit);
294+
}
295+
});
296+
280297
it("should throw on bad boundary", async () => {
281298
const response = new Response('foo\r\nContent-Disposition: form-data; name="foo"\r\n\r\nbar\r\n', {
282299
headers: {

0 commit comments

Comments
 (0)