@@ -84,11 +84,21 @@ typedef struct {
8484 int li ;
8585} lbuf_t ;
8686
87+ typedef struct rewriter {
88+ struct rewriter * next ;
89+ size_t len ;
90+ int line ;
91+ char * text ;
92+ } rewriter_t ;
93+ /* new_text is overcommitted below */
94+
8795typedef struct {
8896 off_t flo ;
8997
9098 const char * reason ;
9199
100+ rewriter_t * rewriter_head ;
101+
92102 dss_t d ;
93103 int pre ;
94104 int post ;
@@ -102,6 +112,8 @@ typedef struct {
102112
103113 int fd_temp ;
104114
115+ int li_out ;
116+
105117 char ongoing ;
106118 char skip_this_one ;
107119 char lead_in_active ;
@@ -313,10 +325,26 @@ fixdiff_stanza_start(dp_t *pdp, char *sh, size_t len)
313325 return 0 ;
314326}
315327
328+ static void
329+ stain_copy (char * dest , const char * in , size_t len )
330+ {
331+ char * p = dest ;
332+
333+ strncpy (dest , in , len - 1 );
334+ dest [len - 1 ] = '\0' ;
335+ do {
336+ p = strchr (p , '\t' );
337+ if (!p )
338+ break ;
339+ * p = '>' ;
340+ p ++ ;
341+ } while (1 );
342+ }
343+
316344static int
317345fixdiff_find_original (dp_t * pdp , int * line_start )
318346{
319- char in_src [4096 ], in_temp [4096 ], b1 [256 ], b2 [256 ], hit = 0 ;
347+ char in_src [4096 ], in_temp [4096 ], b1 [256 ], b2 [256 ], f1 [ 256 ], f2 [ 256 ], hit = 0 ;
320348 int ret = 1 , mc = 0 , lmc = 0 , lis = 0 , lg_lis = 0 ;
321349 lbuf_t lb_temp , lb_src , lb ;
322350 size_t lt , ls ;
@@ -329,6 +357,8 @@ fixdiff_find_original(dp_t *pdp, int *line_start)
329357 lb_src .fd = lb .fd = -1 ;
330358 b1 [0 ] = '\0' ;
331359 b2 [0 ] = '\0' ;
360+ f1 [0 ] = '\0' ;
361+ f2 [0 ] = '\0' ;
332362
333363 init_lbuf (& lb_temp , "temp" );
334364 lb_temp .fd = open (pdp -> temp , OFLAGS (O_RDWR ));
@@ -402,26 +432,105 @@ fixdiff_find_original(dp_t *pdp, int *line_start)
402432 break ;
403433
404434 if (!ls ) {
405- elog ("failed to match, best chunk %d lines at %s:%d\n" ,
435+ elog ("failed to match, best chunk %d lines at %s:%d (tabs shown below as >) \n" ,
406436 lmc , pdp -> pf , lg_lis );
407- elog ("patch: '%s', source '%s'\n" , b1 , b2 );
437+ elog ("last match: patch = '%s"
438+ "', source = '%s'\n" , b1 , b2 );
439+ elog ("divergence: patch = '%s"
440+ "', source = '%s'\n" , f1 , f2 );
408441 mc = 0 ;
409442 break ;
410443 }
411444
412445 if (fixdiff_strcmp (in_temp + 1 , lt - 1 , & let , in_src , ls , & les )) {
413- if (mc > pdp -> pre + pdp -> post )
414- elog ("match failed after %d: '%s' / '%s'" , mc , in_temp + 1 , in_src );
446+ /*
447+ * It's not a match.
448+ *
449+ * It's still possible we only differ by whitespace.
450+ * Does it match if we treat any whitespace as a single
451+ * whitespace match token?
452+ */
453+
454+ char * p1 = in_temp + 1 , * p1_end = p1 + lt - 1 - (int )let ,
455+ * p2 = in_src , * p2_end = p2 + ls - (int )les ;
456+
457+ while (p1 < p1_end && p2 < p2_end ) {
458+ char wst1 = 0 , wst2 = 0 ;
459+
460+ while (* p1 == ' ' || * p1 == '\t' && p1 < p1_end ) {
461+ p1 ++ ;
462+ wst1 = 1 ;
463+ }
464+ while (* p2 == ' ' || * p2 == '\t' && p2 < p2_end ) {
465+ p2 ++ ;
466+ wst2 = 1 ;
467+ }
468+
469+ if (wst1 != wst2 )
470+ goto record_breakage ;
471+
472+ if (* p1 != * p2 )
473+ goto record_breakage ;
474+
475+ p1 ++ ;
476+ p2 ++ ;
477+ }
478+
479+ if ((p1 < p1_end ) != (p2 < p2_end ))
480+ goto record_breakage ;
481+
482+ elog ("(fixable whitespace-only difference at stanza line %d)\n" , lb_temp .li );
483+
484+ /*
485+ * We have to take care about picking up windows _TEXT
486+ * CRLF, eliminating that if present and only putting
487+ * the LF, so rewritten lines are indistinguishable
488+ */
489+
490+ {
491+ size_t rlen = ls + 1 - les + 1 ;
492+ rewriter_t * rwt = malloc (sizeof (* rwt ) + rlen + 1 );
493+
494+ if (!rwt ) {
495+ elog ("OOM\n" );
496+ return -1 ;
497+ }
498+ rwt -> next = pdp -> rewriter_head ;
499+ pdp -> rewriter_head = rwt ;
500+ rwt -> line = lb_temp .li ;
501+ rwt -> text = (char * )& rwt [1 ];
502+ rwt -> text [0 ] = * in_temp ;
503+ rwt -> len = rlen ;
504+ memcpy (rwt -> text + 1 , in_src , ls );
505+ rwt -> text [1 + ls ] = '\n' ;
506+ }
507+ goto allow_match_ws ;
508+
509+ record_breakage :
510+ if (mc + 1 > lmc ) {
511+ stain_copy (f1 , in_temp + 1 , sizeof (f1 ));
512+ stain_copy (f2 , in_src , sizeof (f2 ));
513+ }
415514 mc = 0 ;
515+ {
516+ rewriter_t * rwt = pdp -> rewriter_head , * rwt1 ;
517+
518+ while (rwt ) {
519+ rwt1 = rwt -> next ;
520+ free (rwt );
521+ rwt = rwt1 ;
522+ }
523+
524+ pdp -> rewriter_head = NULL ;
525+ }
416526 break ;
417527 }
418528
529+ allow_match_ws :
419530 mc ++ ;
420531 if (mc > lmc ) {
421- strncpy (b1 , in_temp + 1 , sizeof (b1 ) - 1 );
422- b1 [sizeof (b1 ) - 1 ] = '\0' ;
423- strncpy (b2 , in_src + 1 , sizeof (b2 ) - 1 );
424- b2 [sizeof (b2 ) - 1 ] = '\0' ;
532+ stain_copy (b1 , in_temp + 1 , sizeof (b1 ));
533+ stain_copy (b2 , in_src , sizeof (b2 ));
425534 lmc ++ ;
426535 lg_lis = lis ;
427536 }
@@ -512,8 +621,9 @@ fixdiff_find_original(dp_t *pdp, int *line_start)
512621static int
513622fixdiff_stanza_end (dp_t * pdp )
514623{
624+ int orig , nope = 0 ;
625+ lbuf_t lb_temp ;
515626 char buf [256 ];
516- int orig ;
517627
518628 if (!pdp -> ongoing )
519629 return 0 ;
@@ -554,21 +664,64 @@ fixdiff_stanza_end(dp_t *pdp)
554664
555665 /* dump the temp side-buffer into stdout */
556666
557- lseek (pdp -> fd_temp , pdp -> flo , SEEK_SET );
667+ init_lbuf (& lb_temp , "lb_temp" );
668+ lb_temp .fd = open (pdp -> temp , OFLAGS (O_RDONLY ));
669+ lseek (lb_temp .fd , pdp -> flo , SEEK_SET );
670+
558671 while (1 ) {
559- ssize_t l = read (pdp -> fd_temp , buf , sizeof (buf ));
672+ char buf [4096 ];
673+ ssize_t l = fixdiff_get_line (& lb_temp , buf , sizeof (buf ));
674+ rewriter_t * rwt = pdp -> rewriter_head ;
675+
560676 if (!l )
561677 break ;
562678
563- if (write (1 , buf , TO_POSLEN (l )) != (ssize_t )l ) {
564- pdp -> reason = "failed to write to stdout" ;
565- return 1 ;
679+ // elog("dumping %d (len %d)\n", (int)pdp->li_out, (int)l);
680+
681+ while (rwt ) {
682+ // elog("%d %d\n", rwt->line, pdp->li_out);
683+ if (rwt -> line == lb_temp .li /*pdp->li_out*/ ) /* we need to rewrite this line */
684+ break ;
685+
686+ rwt = rwt -> next ;
687+ }
688+
689+ if (rwt ) {
690+ // elog("rewriting '%.*s' to '%.*s'\n", (int)l, buf, (int)rwt->len, rwt->text);
691+ if (write (1 , rwt -> text , TO_POSLEN (rwt -> len )) != (ssize_t )rwt -> len ) {
692+ pdp -> reason = "failed to write to stdout" ;
693+ nope = 1 ;
694+ break ;
695+ }
696+ } else {
697+ if (write (1 , buf , TO_POSLEN (l )) != (ssize_t )l ) {
698+ pdp -> reason = "failed to write to stdout" ;
699+ nope = 1 ;
700+ break ;
701+ }
702+ }
703+
704+ pdp -> li_out ++ ;
705+ }
706+
707+ {
708+ rewriter_t * rwt = pdp -> rewriter_head , * rwt1 ;
709+
710+ while (rwt ) {
711+ rwt1 = rwt -> next ;
712+ free (rwt );
713+ rwt = rwt1 ;
566714 }
715+
716+ pdp -> rewriter_head = NULL ;
567717 }
568718
569- close (pdp -> fd_temp );
719+ close (lb_temp . fd );
570720 pdp -> fd_temp = -1 ;
571721
722+ if (nope )
723+ return 1 ;
724+
572725 /* track the effect stanza changes are having on line offsets */
573726 pdp -> delta += pdp -> post - pdp -> pre ;
574727
@@ -611,6 +764,7 @@ main(int argc, char *argv[])
611764 dp .d = DSS_WAIT_MMM ;
612765 dp .lb .fd = 0 ; /* stdin */
613766 dp .fd_temp = -1 ;
767+ dp .li_out = 1 ;
614768
615769 while (1 ) {
616770 size_t l = fixdiff_get_line (& dp .lb , in , sizeof (in ));
0 commit comments