Skip to content

Commit 914cd22

Browse files
Claude Botclaude
andcommitted
fix(FormData): throw exception on very large input instead of crashing
When `FormData.from()` is called with a very large ArrayBuffer (exceeding WebKit's String::MaxLength of INT32_MAX), it would crash with an assertion failure in WebKit's StringImpl. This fixes the issue by: 1. Adding length checks in the C++ `toString` and related functions (helpers.h) to check against both Bun's synthetic limit and WebKit's String::MaxLength. For UTF-8 tagged strings, we use simdutf to calculate the actual UTF-16 length only when the byte length exceeds the limit. 2. Throwing an ERR_STRING_TOO_LONG exception from createFromURLQuery when the string is too long, instead of silently returning an empty FormData. Changes: - Add length checks to all UTF-8 code paths in helpers.h: - toString(ZigString) - toString(ZigString, StringPointer) - toStringCopy(ZigString) - toStringCopy(ZigString, StringPointer) - appendToBuilder(ZigString, StringBuilder) - Add WTF::String::MaxLength check to non-UTF-8 paths - Throw ERR_STRING_TOO_LONG in createFromURLQuery when string is too long - Properly propagate JSError from toJS to fromMultipartData Now `FormData.from(new Uint32Array(913148244))` throws: error: Cannot create a string longer than 2147483647 characters code: "ERR_STRING_TOO_LONG" 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <[email protected]>
1 parent ddcec61 commit 914cd22

File tree

4 files changed

+86
-7
lines changed

4 files changed

+86
-7
lines changed

src/bun.js/bindings/bindings.cpp

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5673,7 +5673,15 @@ CPP_DECL JSC::EncodedJSValue WebCore__DOMFormData__createFromURLQuery(JSC::JSGlo
56735673
{
56745674
Zig::GlobalObject* globalObject = static_cast<Zig::GlobalObject*>(arg0);
56755675
// don't need to copy the string because it internally does.
5676-
auto formData = DOMFormData::create(globalObject->scriptExecutionContext(), toString(*arg1));
5676+
auto str = toString(*arg1);
5677+
// toString() in helpers.h returns an empty string when the input exceeds
5678+
// String::MaxLength or Bun's synthetic allocation limit. This is the only
5679+
// condition under which toString() returns empty for non-empty input.
5680+
if (str.isEmpty() && arg1->len > 0) {
5681+
auto scope = DECLARE_THROW_SCOPE(globalObject->vm());
5682+
return Bun::ERR::STRING_TOO_LONG(scope, globalObject);
5683+
}
5684+
auto formData = DOMFormData::create(globalObject->scriptExecutionContext(), WTFMove(str));
56775685
return JSValue::encode(toJSNewlyCreated(arg0, globalObject, WTFMove(formData)));
56785686
}
56795687

src/bun.js/bindings/helpers.h

Lines changed: 49 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
#include "root.h"
44
#include "wtf/text/ASCIILiteral.h"
5+
#include "wtf/SIMDUTF.h"
56

67
#include <JavaScriptCore/Error.h>
78
#include <JavaScriptCore/Exception.h>
@@ -79,12 +80,24 @@ static const WTF::String toString(ZigString str)
7980
}
8081
if (isTaggedUTF8Ptr(str.ptr)) [[unlikely]] {
8182
ASSERT_WITH_MESSAGE(!isTaggedExternalPtr(str.ptr), "UTF8 and external ptr are mutually exclusive. The external will never be freed.");
83+
// Check if the resulting UTF-16 string could possibly exceed the maximum length.
84+
// For valid UTF-8, the number of UTF-16 code units is <= the number of UTF-8 bytes
85+
// (ASCII is 1:1; other code points use multiple UTF-8 bytes per UTF-16 code unit).
86+
// We only need to compute the actual UTF-16 length when the byte length exceeds the limit.
87+
size_t maxLength = std::min(Bun__stringSyntheticAllocationLimit, static_cast<size_t>(WTF::String::MaxLength));
88+
if (str.len > maxLength) [[unlikely]] {
89+
// UTF-8 byte length != UTF-16 length, so use simdutf to calculate the actual UTF-16 length.
90+
size_t utf16Length = simdutf::utf16_length_from_utf8(reinterpret_cast<const char*>(untag(str.ptr)), str.len);
91+
if (utf16Length > maxLength) {
92+
return {};
93+
}
94+
}
8295
return WTF::String::fromUTF8ReplacingInvalidSequences(std::span { untag(str.ptr), str.len });
8396
}
8497

8598
if (isTaggedExternalPtr(str.ptr)) [[unlikely]] {
8699
// This will fail if the string is too long. Let's make it explicit instead of an ASSERT.
87-
if (str.len > Bun__stringSyntheticAllocationLimit) [[unlikely]] {
100+
if (str.len > Bun__stringSyntheticAllocationLimit || str.len > WTF::String::MaxLength) [[unlikely]] {
88101
free_global_string(nullptr, reinterpret_cast<void*>(const_cast<unsigned char*>(untag(str.ptr))), static_cast<unsigned>(str.len));
89102
return {};
90103
}
@@ -95,7 +108,7 @@ static const WTF::String toString(ZigString str)
95108
}
96109

97110
// This will fail if the string is too long. Let's make it explicit instead of an ASSERT.
98-
if (str.len > Bun__stringSyntheticAllocationLimit) [[unlikely]] {
111+
if (str.len > Bun__stringSyntheticAllocationLimit || str.len > WTF::String::MaxLength) [[unlikely]] {
99112
return {};
100113
}
101114

@@ -121,11 +134,19 @@ static const WTF::String toString(ZigString str, StringPointer ptr)
121134
return WTF::String();
122135
}
123136
if (isTaggedUTF8Ptr(str.ptr)) [[unlikely]] {
137+
// Check if the resulting UTF-16 string could possibly exceed the maximum length.
138+
size_t maxLength = std::min(Bun__stringSyntheticAllocationLimit, static_cast<size_t>(WTF::String::MaxLength));
139+
if (ptr.len > maxLength) [[unlikely]] {
140+
size_t utf16Length = simdutf::utf16_length_from_utf8(reinterpret_cast<const char*>(&untag(str.ptr)[ptr.off]), ptr.len);
141+
if (utf16Length > maxLength) {
142+
return {};
143+
}
144+
}
124145
return WTF::String::fromUTF8ReplacingInvalidSequences(std::span { &untag(str.ptr)[ptr.off], ptr.len });
125146
}
126147

127148
// This will fail if the string is too long. Let's make it explicit instead of an ASSERT.
128-
if (str.len > Bun__stringSyntheticAllocationLimit) [[unlikely]] {
149+
if (ptr.len > Bun__stringSyntheticAllocationLimit || ptr.len > WTF::String::MaxLength) [[unlikely]] {
129150
return {};
130151
}
131152

@@ -141,11 +162,19 @@ static const WTF::String toStringCopy(ZigString str, StringPointer ptr)
141162
return WTF::String();
142163
}
143164
if (isTaggedUTF8Ptr(str.ptr)) [[unlikely]] {
165+
// Check if the resulting UTF-16 string could possibly exceed the maximum length.
166+
size_t maxLength = std::min(Bun__stringSyntheticAllocationLimit, static_cast<size_t>(WTF::String::MaxLength));
167+
if (ptr.len > maxLength) [[unlikely]] {
168+
size_t utf16Length = simdutf::utf16_length_from_utf8(reinterpret_cast<const char*>(&untag(str.ptr)[ptr.off]), ptr.len);
169+
if (utf16Length > maxLength) {
170+
return {};
171+
}
172+
}
144173
return WTF::String::fromUTF8ReplacingInvalidSequences(std::span { &untag(str.ptr)[ptr.off], ptr.len });
145174
}
146175

147176
// This will fail if the string is too long. Let's make it explicit instead of an ASSERT.
148-
if (str.len > Bun__stringSyntheticAllocationLimit) [[unlikely]] {
177+
if (ptr.len > Bun__stringSyntheticAllocationLimit || ptr.len > WTF::String::MaxLength) [[unlikely]] {
149178
return {};
150179
}
151180

@@ -161,6 +190,14 @@ static const WTF::String toStringCopy(ZigString str)
161190
return WTF::String();
162191
}
163192
if (isTaggedUTF8Ptr(str.ptr)) [[unlikely]] {
193+
// Check if the resulting UTF-16 string could possibly exceed the maximum length.
194+
size_t maxLength = std::min(Bun__stringSyntheticAllocationLimit, static_cast<size_t>(WTF::String::MaxLength));
195+
if (str.len > maxLength) [[unlikely]] {
196+
size_t utf16Length = simdutf::utf16_length_from_utf8(reinterpret_cast<const char*>(untag(str.ptr)), str.len);
197+
if (utf16Length > maxLength) {
198+
return {};
199+
}
200+
}
164201
return WTF::String::fromUTF8ReplacingInvalidSequences(std::span { untag(str.ptr), str.len });
165202
}
166203

@@ -188,6 +225,14 @@ static void appendToBuilder(ZigString str, WTF::StringBuilder& builder)
188225
return;
189226
}
190227
if (isTaggedUTF8Ptr(str.ptr)) [[unlikely]] {
228+
// Check if the resulting UTF-16 string could possibly exceed the maximum length.
229+
size_t maxLength = std::min(Bun__stringSyntheticAllocationLimit, static_cast<size_t>(WTF::String::MaxLength));
230+
if (str.len > maxLength) [[unlikely]] {
231+
size_t utf16Length = simdutf::utf16_length_from_utf8(reinterpret_cast<const char*>(untag(str.ptr)), str.len);
232+
if (utf16Length > maxLength) {
233+
return;
234+
}
235+
}
191236
WTF::String converted = WTF::String::fromUTF8ReplacingInvalidSequences(std::span { untag(str.ptr), str.len });
192237
builder.append(converted);
193238
return;

src/url.zig

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -984,7 +984,12 @@ pub const FormData = struct {
984984
switch (encoding) {
985985
.URLEncoded => {
986986
var str = jsc.ZigString.fromUTF8(strings.withoutUTF8BOM(input));
987-
return jsc.DOMFormData.createFromURLQuery(globalThis, &str);
987+
const result = jsc.DOMFormData.createFromURLQuery(globalThis, &str);
988+
// Check if an exception was thrown (e.g., string too long)
989+
if (result == .zero) {
990+
return error.JSError;
991+
}
992+
return result;
988993
},
989994
.Multipart => |boundary| return toJSFromMultipartData(globalThis, input, boundary),
990995
}
@@ -1041,7 +1046,11 @@ pub const FormData = struct {
10411046
return globalThis.throwInvalidArguments("input must be a string or ArrayBufferView", .{});
10421047
}
10431048

1044-
return FormData.toJS(globalThis, input, encoding) catch |err| return globalThis.throwError(err, "while parsing FormData");
1049+
return FormData.toJS(globalThis, input, encoding) catch |err| {
1050+
// If it's JSError, an exception is already thrown - just propagate it
1051+
if (err == error.JSError) return error.JSError;
1052+
return globalThis.throwError(err, "while parsing FormData");
1053+
};
10451054
}
10461055

10471056
comptime {

test/js/web/html/FormData.test.ts

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -277,6 +277,23 @@ describe("FormData", () => {
277277
expect(fd.toJSON()).toEqual({ "1": "1" });
278278
});
279279

280+
test("FormData.from throws on very large input instead of crashing", () => {
281+
// This test verifies that FormData.from throws an exception instead of crashing
282+
// when given input larger than WebKit's String::MaxLength (INT32_MAX ~= 2GB).
283+
// We use a smaller test case with the synthetic limit to avoid actually allocating 2GB+.
284+
const { setSyntheticAllocationLimitForTesting } = require("bun:internal-for-testing");
285+
// Set a small limit so we can test the boundary without allocating gigabytes
286+
const originalLimit = setSyntheticAllocationLimitForTesting(1024 * 1024); // 1MB limit
287+
try {
288+
// Create a buffer larger than the limit
289+
const largeBuffer = new Uint8Array(2 * 1024 * 1024); // 2MB
290+
// @ts-expect-error - FormData.from is a Bun extension
291+
expect(() => FormData.from(largeBuffer)).toThrow("Cannot create a string longer than");
292+
} finally {
293+
setSyntheticAllocationLimitForTesting(originalLimit);
294+
}
295+
});
296+
280297
it("should throw on bad boundary", async () => {
281298
const response = new Response('foo\r\nContent-Disposition: form-data; name="foo"\r\n\r\nbar\r\n', {
282299
headers: {

0 commit comments

Comments
 (0)