

@@ 38,9 +38,9 @@ typedef struct s_xdpsplit { 



* Basically considers a "box" (off1, off2, lim1, lim2) and scan from both 



* the forward diagonal starting from (off1, off2) and the backward diagonal 



* starting from (lim1, lim2). If the K values on the same diagonal crosses 



* returns the furthest point of reach. We might end up having to expensive 



* cases using this algorithm is full, so a little bit of heuristic is needed 



* to cut the search and to return a suboptimal point. 



* returns the furthest point of reach. We might encounter expensive edge cases 



* using this algorithm, so a little bit of heuristic is needed to cut the 



* search and to return a suboptimal point. 



*/ 



static long xdl_split(unsigned long const *ha1, long off1, long lim1, 



unsigned long const *ha2, long off2, long lim2, 


@@ 63,11 +63,13 @@ static long xdl_split(unsigned long const *ha1, long off1, long lim1, 



int got_snake = 0; 







/* 



* We need to extent the diagonal "domain" by one. If the next 



* We need to extend the diagonal "domain" by one. If the next 



* values exits the box boundaries we need to change it in the 



* opposite direction because (max  min) must be a power of two. 



* opposite direction because (max  min) must be a power of 



* two. 



* 



* Also we initialize the external K value to 1 so that we can 



* avoid extra conditions check inside the core loop. 



* avoid extra conditions in the check inside the core loop. 



*/ 



if (fmin > dmin) 



kvdf[fmin  1] = 1; 


@@ 98,11 +100,13 @@ static long xdl_split(unsigned long const *ha1, long off1, long lim1, 



} 







/* 



* We need to extent the diagonal "domain" by one. If the next 



* We need to extend the diagonal "domain" by one. If the next 



* values exits the box boundaries we need to change it in the 



* opposite direction because (max  min) must be a power of two. 



* opposite direction because (max  min) must be a power of 



* two. 



* 



* Also we initialize the external K value to 1 so that we can 



* avoid extra conditions check inside the core loop. 



* avoid extra conditions in the check inside the core loop. 



*/ 



if (bmin > dmin) 



kvdb[bmin  1] = XDL_LINE_MAX; 


@@ 138,7 +142,7 @@ static long xdl_split(unsigned long const *ha1, long off1, long lim1, 



/* 



* If the edit cost is above the heuristic trigger and if 



* we got a good snake, we sample current diagonals to see 



* if some of the, have reached an "interesting" path. Our 



* if some of them have reached an "interesting" path. Our 



* measure is a function of the distance from the diagonal 



* corner (i1 + i2) penalized with the distance from the 



* mid diagonal itself. If this value is above the current 


@@ 196,8 +200,9 @@ static long xdl_split(unsigned long const *ha1, long off1, long lim1, 



} 







/* 



* Enough is enough. We spent too much time here and now we collect 



* the furthest reaching path using the (i1 + i2) measure. 



* Enough is enough. We spent too much time here and now we 



* collect the furthest reaching path using the (i1 + i2) 



* measure. 



*/ 



if (ec >= xenv>mxcost) { 



long fbest, fbest1, bbest, bbest1; 


@@ 244,9 +249,9 @@ static long xdl_split(unsigned long const *ha1, long off1, long lim1, 











/* 



* Rule: "Divide et Impera". Recursively split the box in subboxes by calling 



* the box splitting function. Note that the real job (marking changed lines) 



* is done in the two boundary reaching checks. 



* Rule: "Divide et Impera" (divide & conquer). Recursively split the box in 



* subboxes by calling the box splitting function. Note that the real job 



* (marking changed lines) is done in the two boundary reaching checks. 



*/ 



int xdl_recs_cmp(diffdata_t *dd1, long off1, long lim1, 



diffdata_t *dd2, long off2, long lim2, 


@@ 323,7 +328,9 @@ int xdl_do_diff(mmfile_t *mf1, mmfile_t *mf2, xpparam_t const *xpp, 



} 







/* 



* Allocate and setup K vectors to be used by the differential algorithm. 



* Allocate and setup K vectors to be used by the differential 



* algorithm. 



* 



* One is to store the forward path and one to store the backward path. 



*/ 



ndiags = xe>xdf1.nreff + xe>xdf2.nreff + 3; 


@@ 394,8 +401,8 @@ static int recs_match(xrecord_t *rec1, xrecord_t *rec2, long flags) 



/* 



* If a line is indented more than this, get_indent() just returns this value. 



* This avoids having to do absurd amounts of work for data that are not 



* humanreadable text, and also ensures that the output of get_indent fits within 



* an int. 



* humanreadable text, and also ensures that the output of get_indent fits 



* within an int. 



*/ 



#define MAX_INDENT 200 






@@ 429,9 +436,9 @@ static int get_indent(xrecord_t *rec) 



} 







/* 



* If more than this number of consecutive blank rows are found, just return this 



* value. This avoids requiring O(N^2) work for pathological cases, and also 



* ensures that the output of score_split fits in an int. 



* If more than this number of consecutive blank rows are found, just return 



* this value. This avoids requiring O(N^2) work for pathological cases, and 



* also ensures that the output of score_split fits in an int. 



*/ 



#define MAX_BLANKS 20 






@@ 443,8 +450,8 @@ struct split_measurement { 



int end_of_file; 







/* 



* How much is the line immediately following the split indented (or 1 if 



* the line is blank): 



* How much is the line immediately following the split indented (or 1 



* if the line is blank): 



*/ 



int indent; 






@@ 454,8 +461,8 @@ struct split_measurement { 



int pre_blank; 







/* 



* How much is the nearest nonblank line above the split indented (or 1 



* if there is no such line)? 



* How much is the nearest nonblank line above the split indented (or 



* 1 if there is no such line)? 



*/ 



int pre_indent; 






@@ 581,13 +588,13 @@ static void measure_split(const xdfile_t *xdf, long split, 







/* 



* Compute a badness score for the hypothetical split whose measurements are 



* stored in m. The weight factors were determined empirically using the tools and 



* corpus described in 



* stored in m. The weight factors were determined empirically using the tools 



* and corpus described in 



* 



* https://github.com/mhagger/diffslidertools 



* 



* Also see that project if you want to improve the weights based on, for example, 



* a larger or more diverse corpus. 



* Also see that project if you want to improve the weights based on, for 



* example, a larger or more diverse corpus. 



*/ 



static void score_add_split(const struct split_measurement *m, struct split_score *s) 



{ 


@@ 809,13 +816,16 @@ int xdl_change_compact(xdfile_t *xdf, xdfile_t *xdfo, long flags) { 



group_init(xdfo, &go); 







while (1) { 



/* If the group is empty in the tobecompacted file, skip it: */ 



/* 



* If the group is empty in the tobecompacted file, skip it: 



*/ 



if (g.end == g.start) 



goto next; 







/* 



* Now shift the change up and then down as far as possible in 



* each direction. If it bumps into any other changes, merge them. 



* each direction. If it bumps into any other changes, merge 



* them. 



*/ 



do { 



groupsize = g.end  g.start; 


@@ 858,17 +868,17 @@ int xdl_change_compact(xdfile_t *xdf, xdfile_t *xdfo, long flags) { 



* If the group can be shifted, then we can possibly use this 



* freedom to produce a more intuitive diff. 



* 



* The group is currently shifted as far down as possible, so the 



* heuristics below only have to handle upwards shifts. 



* The group is currently shifted as far down as possible, so 



* the heuristics below only have to handle upwards shifts. 



*/ 







if (g.end == earliest_end) { 



/* no shifting was possible */ 



} else if (end_matching_other != 1) { 



/* 



* Move the possibly merged group of changes back to line 



* up with the last group of changes from the other file 



* that it can align with. 



* Move the possibly merged group of changes back to 



* line up with the last group of changes from the 



* other file that it can align with. 



*/ 



while (go.end == go.start) { 



if (group_slide_up(xdf, &g, flags)) 


@@ 879,14 +889,15 @@ int xdl_change_compact(xdfile_t *xdf, xdfile_t *xdfo, long flags) { 



} else if (flags & XDF_INDENT_HEURISTIC) { 



/* 



* Indent heuristic: a group of pure add/delete lines 



* implies two splits, one between the end of the "before" 



* context and the start of the group, and another between 



* the end of the group and the beginning of the "after" 



* context. Some splits are aesthetically better and some 



* are worse. We compute a badness "score" for each split, 



* and add the scores for the two splits to define a 



* "score" for each position that the group can be shifted 



* to. Then we pick the shift with the lowest score. 



* implies two splits, one between the end of the 



* "before" context and the start of the group, and 



* another between the end of the group and the 



* beginning of the "after" context. Some splits are 



* aesthetically better and some are worse. We compute 



* a badness "score" for each split, and add the scores 



* for the two splits to define a "score" for each 



* position that the group can be shifted to. Then we 



* pick the shift with the lowest score. 



*/ 



long shift, best_shift = 1; 



struct split_score best_score; 


