Skip to content

Commit 599fe5f

Browse files
committed
correctly summarize ws fixes
1 parent 54548b7 commit 599fe5f

File tree

5 files changed

+732
-12
lines changed

5 files changed

+732
-12
lines changed

CMakeLists.txt

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -104,4 +104,14 @@ add_test(NAME fixdiff7
104104
-P ${CMAKE_CURRENT_SOURCE_DIR}/tests/runtest.cmake
105105
WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}/tests/7)
106106

107+
add_test(NAME fixdiff8
108+
COMMAND ${CMAKE_COMMAND}
109+
-DCMD=$<TARGET_FILE:${PROJECT_NAME}>
110+
-DSRC=deaddrop.js
111+
-DPATCH=gemini.patch
112+
-DEXPSHA=0549dec027cd6e589d998c4fc423beaab919a10a563e4f3e770c952dc3b0c55c
113+
-DEXPSHA_WIN=2e6b9b12ae0128c9edfc109744b9c67848712b0521c322a45104895aa4cbc3b1
114+
-P ${CMAKE_CURRENT_SOURCE_DIR}/tests/runtest.cmake
115+
WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}/tests/8)
116+
107117

README.md

Lines changed: 177 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,17 +33,194 @@ headers with accurate line counts on stdout.
3333
It silently repairs:
3434

3535
1. New empty + lines with only whitespace are rewritten to be empty blank lines
36+
37+
Example:
38+
39+
```
40+
diff --git a/deaddrop.js b/deaddrop.js
41+
index 8f804f0..7913254 100644
42+
--- a/deaddrop.js
43+
+++ b/deaddrop.js
44+
@@ -165,13 +165,21 @@
45+
ts = d.getFullYear() + '-' + pad(d.getMonth() + 1) + '-' +
46+
pad(d.getDate()) + '_' + pad(d.getHours()) + '-' +
47+
pad(d.getMinutes()) + '-' + pad(d.getSeconds()),
48+
+<tab>
49+
formData = new FormData(), blob;
50+
...
51+
```
52+
53+
The added line is rewritten to be an empty blank line.
54+
3655
2. Diff stanzas that do not contain any +/- lines are removed
56+
57+
Example:
58+
59+
```
60+
--- a/deaddrop.js
61+
+++ b/deaddrop.js
62+
@@ -3,6 +3,11 @@
63+
var server_max_size = 0, username = "", ws;
64+
65+
function san(s)
66+
{
67+
```
68+
3769
3. Original lines in diff that differ from real line in file only by
3870
whitespace are rewritten to contain the correct whitespace
71+
72+
Example: file contains `<tab><tab>abc`
73+
74+
```
75+
--- a/deaddrop.js
76+
+++ b/deaddrop.js
77+
@@ -3,6 +3,11 @@
78+
<space><space>abc
79+
...
80+
```
81+
82+
The output patch is rewritten to match what is already in the file at that
83+
line for whitespace, so the output patch contains `<tab><tab>abc`
84+
3985
4. All stanza header line offsets and counts are recomputed from the actual
4086
match in the original source and counting before and after lines in the diff,
4187
the incoming @@ line is completely ignored and rewritten with actual info
88+
89+
Example incoming patch stanza headers can be nonsense
90+
91+
```
92+
--- a/deaddrop.js
93+
+++ b/deaddrop.js
94+
@@ -123,16 +345,5 @@
95+
<space><space>abc
96+
...
97+
```
98+
99+
The correct headers will be rewritten in place of the wrong ones.
100+
42101
5. Extra lead-in context lines to stanza by removing until only 3
102+
103+
```
104+
--- a/deaddrop.js
105+
+++ b/deaddrop.js
106+
@@ -3,6 +3,11 @@
107+
"<tr><th>User</th><th>IP Address</th>" +
108+
"<th>Platform</th><th>Client</th></tr>";
109+
110+
for (n = 0; n < j.connected_users.length; n++) {
111+
var u = j.connected_users[n];
112+
s_users += "<tr><td>" + san(u.user) +
113+
"</td><td>" + san(u.ip) +
114+
"</td><td>" + san(u.platform) +
115+
"</td><td>" + san(u.browser) +
116+
"</td></tr>";
117+
}
118+
s_users += "</table>";
119+
t_users.innerHTML = s_users;
120+
}
121+
+ };
122+
+
123+
+ ws.onclose = function() {
124+
...
125+
```
126+
127+
This will be rewritten to reduce the lead-in to the normal 3
128+
129+
```
130+
--- a/deaddrop.js
131+
+++ b/deaddrop.js
132+
@@ -14,6 +14,11 @@
133+
s_users += "</table>";
134+
t_users.innerHTML = s_users;
135+
}
136+
+ };
137+
+
138+
+ ws.onclose = function() {
139+
...
140+
```
141+
43142
6. Excessive lead-out-context is removed, missing lead-out context is added.
44143
Diffs adding to EOF with missing or wrong context caused by
45144
LLM losing blank lines at the original EOF are rewritten by checking
46145
the original source file for extra lines and adding them as needed.
146+
147+
Example 1: excessive led-out removed
148+
149+
```
150+
...
151+
s_users += "</table>";
152+
t_users.innerHTML = s_users;
153+
}
154+
+ };
155+
+
156+
+ ws.onclose = function() {
157+
var u = j.connected_users[n];
158+
s_users += "<tr><td>" + san(u.user) +
159+
"</td><td>" + san(u.ip) +
160+
"</td><td>" + san(u.platform) +
161+
"</td><td>" + san(u.browser) +
162+
"</td></tr>";
163+
```
164+
165+
This will be trimmed to
166+
167+
```
168+
...
169+
s_users += "</table>";
170+
t_users.innerHTML = s_users;
171+
}
172+
+ };
173+
+
174+
+ ws.onclose = function() {
175+
var u = j.connected_users[n];
176+
s_users += "<tr><td>" + san(u.user) +
177+
"</td><td>" + san(u.ip) +
178+
```
179+
180+
Sometimes at EOT, the LLM does not know what is in the file properly, this leads
181+
to missing lead-out context.
182+
183+
Example 2: missing EOT context
184+
185+
Actual file ending
186+
187+
```
188+
...
189+
A
190+
B
191+
<cr>
192+
<cr>
193+
```
194+
195+
patch:
196+
197+
```
198+
--- a/deaddrop.js
199+
+++ b/deaddrop.js
200+
@@ -14,6 +14,11 @@
201+
A
202+
B
203+
+C
204+
+D
205+
...
206+
```
207+
208+
fixdiff will realize the situation and fix the stanza by fetching the extra
209+
lines from the original file and adding them as context at the end.
210+
211+
```
212+
--- a/deaddrop.js
213+
+++ b/deaddrop.js
214+
@@ -14,6 +14,11 @@
215+
A
216+
B
217+
+C
218+
+D
219+
<cr>
220+
<cr>
221+
...
222+
```
223+
47224
7. Unexpected blank lines in a stanza (without space, + or -) are either ignored
48225
if happening at the end of the stanza, or rewritten to be context by adding
49226
a space at the beginning, if the normal diff resumes.

fixdiff.c

Lines changed: 28 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -116,6 +116,9 @@ typedef struct {
116116

117117
int pending_empty_lines;
118118

119+
int whitespace_corrected[64];
120+
int count_whitespace_corrected;
121+
119122
char ongoing;
120123
char skip_this_one;
121124
char lead_in_active;
@@ -380,7 +383,7 @@ fixdiff_find_original(dp_t *pdp, int *line_start)
380383
elog("Unable to skip temp lines\n");
381384
return 1;
382385
}
383-
elog("Stanza %d: removing extra lead-in\n", pdp->stanzas);
386+
elog(" stanza %d: removing extra lead-in\n", pdp->stanzas);
384387
pdp->lead_in--;
385388
pdp->lead_in_corrected++;
386389
pdp->pre--;
@@ -399,13 +402,16 @@ fixdiff_find_original(dp_t *pdp, int *line_start)
399402
return 1;
400403
}
401404

405+
pdp->count_whitespace_corrected = 0;
406+
402407
/*
403408
* Outer loop walks through each line in source.
404409
* Inner loop tries to match starting from that line
405410
*/
406411

407412
while (!hit) {
408413
line_ending_t let, les;
414+
int n;
409415

410416
init_lbuf(&lb, "src_comp");
411417
lb.fd = open(pdp->pf, OFLAGS(O_RDONLY));
@@ -437,7 +443,8 @@ fixdiff_find_original(dp_t *pdp, int *line_start)
437443
break;
438444

439445
if (!ls) {
440-
elog("Failed to match, best chunk %d lines started at %s:%d (tabs shown below as >)\n",
446+
elog("**** Failed to match, best chunk %d lines started at %s:%d "
447+
"(tabs shown below as >)\n",
441448
lmc, pdp->pf, lg_lis);
442449
elog("last match: patch = '%s"
443450
"', source = '%s'\n", b1, b2);
@@ -484,7 +491,12 @@ fixdiff_find_original(dp_t *pdp, int *line_start)
484491
if ((p1 < p1_end) != (p2 < p2_end))
485492
goto record_breakage;
486493

487-
elog("(fixable whitespace-only difference at stanza line %d)\n", lb_temp.li);
494+
for (n = 0; n < pdp->count_whitespace_corrected; n++)
495+
if (pdp->whitespace_corrected[n] == lb_temp.li)
496+
break;
497+
if (n == pdp->count_whitespace_corrected &&
498+
pdp->count_whitespace_corrected < sizeof(pdp->whitespace_corrected) / sizeof(int))
499+
pdp->whitespace_corrected[pdp->count_whitespace_corrected++] = lb_temp.li;
488500

489501
/*
490502
* We have to take care about picking up windows _TEXT
@@ -592,8 +604,7 @@ fixdiff_find_original(dp_t *pdp, int *line_start)
592604
}
593605

594606
if (lea != LE_ZERO)
595-
if (write(lb_temp.fd, "\n", TO_POSLEN(1)) !=
596-
(ssize_t)1) {
607+
if (write(lb_temp.fd, "\n", TO_POSLEN(1)) != (ssize_t)1) {
597608
close(lb_ef.fd);
598609
pdp->reason = "failed to write extra "
599610
"stanza trailer to temp file";
@@ -608,12 +619,16 @@ fixdiff_find_original(dp_t *pdp, int *line_start)
608619
}
609620

610621
if (a)
611-
elog("Stanza %d: detected patch at EOF: "
622+
elog(" stanza %d: detected patch at EOF: "
612623
"added %d context at end\n",
613624
pdp->stanzas, a);
614625

615626
close(lb_ef.fd);
616627
}
628+
629+
if (pdp->count_whitespace_corrected)
630+
elog(" stanza %d: fixed %d lines with whitespace-only fuzz\n",
631+
pdp->stanzas, pdp->count_whitespace_corrected);
617632
}
618633

619634
out:
@@ -642,7 +657,7 @@ fixdiff_stanza_end(dp_t *pdp)
642657
}
643658

644659
if (dp.pending_empty_lines)
645-
elog(" Dropped %d unexpected empty lines\n", dp.pending_empty_lines);
660+
elog(" stanza %d: Dropped %d unexpected empty lines\n", pdp->stanzas, dp.pending_empty_lines);
646661

647662
if (fixdiff_find_original(pdp, &orig)) {
648663
elog("Unable to find original stanza in source\n");
@@ -885,7 +900,8 @@ main(int argc, char *argv[])
885900
(in[0] == ' ' || in[0] == '-' || in[0] == '+')) {
886901
char ctx[3];
887902

888-
elog(" Treating %d unexpected newline(s) as context\n", dp.pending_empty_lines);
903+
elog(" stanza %d: Treating %d unexpected newline(s) as context\n",
904+
dp.stanzas, dp.pending_empty_lines);
889905

890906
ctx[0] = ' ';
891907
ctx[1] = '\n';
@@ -959,15 +975,15 @@ main(int argc, char *argv[])
959975
in[2] = in[l1 + 1];
960976
in[3] = '\0';
961977
l = 3;
962-
elog(" Reducing %u char whitespace-only line to CRLF\n",
963-
(unsigned int)l1);
978+
elog(" stanza %d: Reducing %u char whitespace-only "
979+
"line to CRLF\n", dp.stanzas, (unsigned int)l1);
964980
} else
965981
if (l1 > 1 && in[l1] == 0x0a && (l - l1) == 1) {
966982
in[1] = in[l1];
967983
in[2] = '\0';
968984
l = 2;
969-
elog(" Reducing %d char whitespace-only line to LF\n",
970-
(unsigned int)l1);
985+
elog(" stanza %d: Reducing %d char whitespace-only"
986+
" line to LF\n", dp.stanzas, (unsigned int)l1);
971987
}
972988

973989
dp.post++;

0 commit comments

Comments
 (0)