git/linear-assignment.c

/*
 * Based on: Jonker, R., & Volgenant, A. (1987). <i>A shortest augmenting path
 * algorithm for dense and sparse linear assignment problems</i>. Computing,
 * 38(4), 325-340.
 */
#include "git-compat-util.h"
#include "linear-assignment.h"

#define COST(column, row) cost[(column) + column_count * (row)]

/*
 * The parameter `cost` is the cost matrix: the cost to assign column j to row
 * i is `cost[j + column_count * i].
 */
void compute_assignment(int column_count, int row_count, int *cost,
			int *column2row, int *row2column)
{
	int *v, *d;
	int *free_row, free_count = 0, saved_free_count, *pred, *col;
	int i, j, phase;

	if (column_count < 2) {
		memset(column2row, 0, sizeof(int) * column_count);
		memset(row2column, 0, sizeof(int) * row_count);
		return;
	}

	memset(column2row, -1, sizeof(int) * column_count);
	memset(row2column, -1, sizeof(int) * row_count);
	ALLOC_ARRAY(v, column_count);

	/* column reduction */
	for (j = column_count - 1; j >= 0; j--) {
		int i1 = 0;

		for (i = 1; i < row_count; i++)
			if (COST(j, i1) > COST(j, i))
				i1 = i;
		v[j] = COST(j, i1);
		if (row2column[i1] == -1) {
			/* row i1 unassigned */
			row2column[i1] = j;
			column2row[j] = i1;
		} else {
			if (row2column[i1] >= 0)
				row2column[i1] = -2 - row2column[i1];
			column2row[j] = -1;
		}
	}

	/* reduction transfer */
	ALLOC_ARRAY(free_row, row_count);
	for (i = 0; i < row_count; i++) {
		int j1 = row2column[i];
		if (j1 == -1)
			free_row[free_count++] = i;
		else if (j1 < -1)
			row2column[i] = -2 - j1;
		else {
			int min = COST(!j1, i) - v[!j1];
			for (j = 1; j < column_count; j++)
				if (j != j1 && min > COST(j, i) - v[j])
					min = COST(j, i) - v[j];
			v[j1] -= min;
		}
	}

	if (free_count ==
	    (column_count < row_count ? row_count - column_count : 0)) {
		free(v);
		free(free_row);
		return;
	}

	/* augmenting row reduction */
	for (phase = 0; phase < 2; phase++) {
		int k = 0;

		saved_free_count = free_count;
		free_count = 0;
		while (k < saved_free_count) {
			int u1, u2;
			int j1 = 0, j2, i0;

			i = free_row[k++];
			u1 = COST(j1, i) - v[j1];
			j2 = -1;
			u2 = INT_MAX;
			for (j = 1; j < column_count; j++) {
				int c = COST(j, i) - v[j];
				if (u2 > c) {
					if (u1 < c) {
						u2 = c;
						j2 = j;
					} else {
						u2 = u1;
						u1 = c;
						j2 = j1;
						j1 = j;
					}
				}
			}
			if (j2 < 0) {
				j2 = j1;
				u2 = u1;
			}

			i0 = column2row[j1];
			if (u1 < u2)
				v[j1] -= u2 - u1;
			else if (i0 >= 0) {
				j1 = j2;
				i0 = column2row[j1];
			}

			if (i0 >= 0) {
				if (u1 < u2)
					free_row[--k] = i0;
				else
					free_row[free_count++] = i0;
			}
			row2column[i] = j1;
			column2row[j1] = i;
		}
	}

	/* augmentation */
	saved_free_count = free_count;
	ALLOC_ARRAY(d, column_count);
	ALLOC_ARRAY(pred, column_count);
	ALLOC_ARRAY(col, column_count);
	for (free_count = 0; free_count < saved_free_count; free_count++) {
		int i1 = free_row[free_count], low = 0, up = 0, last, k;
		int min, c, u1;

		for (j = 0; j < column_count; j++) {
			d[j] = COST(j, i1) - v[j];
			pred[j] = i1;
			col[j] = j;
		}

		j = -1;
		do {
			last = low;
			min = d[col[up++]];
			for (k = up; k < column_count; k++) {
				j = col[k];
				c = d[j];
				if (c <= min) {
					if (c < min) {
						up = low;
						min = c;
					}
					col[k] = col[up];
					col[up++] = j;
				}
			}
			for (k = low; k < up; k++)
				if (column2row[col[k]] == -1)
					goto update;

			/* scan a row */
			do {
				int j1 = col[low++];

				i = column2row[j1];
				u1 = COST(j1, i) - v[j1] - min;
				for (k = up; k < column_count; k++) {
					j = col[k];
					c = COST(j, i) - v[j] - u1;
					if (c < d[j]) {
						d[j] = c;
						pred[j] = i;
						if (c == min) {
							if (column2row[j] == -1)
								goto update;
							col[k] = col[up];
							col[up++] = j;
						}
					}
				}
			} while (low != up);
		} while (low == up);

update:
		/* updating of the column pieces */
		for (k = 0; k < last; k++) {
			int j1 = col[k];
			v[j1] += d[j1] - min;
		}

		/* augmentation */
		do {
			if (j < 0)
				BUG("negative j: %d", j);
			i = pred[j];
			column2row[j] = i;
			SWAP(j, row2column[i]);
		} while (i1 != i);
	}

	free(col);
	free(pred);
	free(d);
	free(v);
	free(free_row);
}
linear-assignment: a function to solve least-cost assignment problems The problem solved by the code introduced in this commit goes like this: given two sets of items, and a cost matrix which says how much it "costs" to assign any given item of the first set to any given item of the second, assign all items (except when the sets have different size) in the cheapest way. We use the Jonker-Volgenant algorithm to solve the assignment problem to answer questions such as: given two different versions of a topic branch (or iterations of a patch series), what is the best pairing of commits/patches between the different versions? Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de> Signed-off-by: Junio C Hamano <gitster@pobox.com> 2018-08-13 19:33:00 +08:00			`/*`
			`* Based on: Jonker, R., & Volgenant, A. (1987). <i>A shortest augmenting path`
			`* algorithm for dense and sparse linear assignment problems</i>. Computing,`
			`* 38(4), 325-340.`
			`*/`
treewide: remove unnecessary cache.h includes in source files We had several C files include cache.h unnecessarily. Replace those with an include of "git-compat-util.h" instead. Much like the previous commit, these have all been verified via both ensuring that gcc -E $SOURCE_FILE \| grep '"cache.h"' found no hits and that make DEVELOPER=1 ${OBJECT_FILE_FOR_SOURCE_FILE} successfully compiles without warnings. Signed-off-by: Elijah Newren <newren@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com> 2023-02-24 08:09:23 +08:00			`#include "git-compat-util.h"`
linear-assignment: a function to solve least-cost assignment problems The problem solved by the code introduced in this commit goes like this: given two sets of items, and a cost matrix which says how much it "costs" to assign any given item of the first set to any given item of the second, assign all items (except when the sets have different size) in the cheapest way. We use the Jonker-Volgenant algorithm to solve the assignment problem to answer questions such as: given two different versions of a topic branch (or iterations of a patch series), what is the best pairing of commits/patches between the different versions? Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de> Signed-off-by: Junio C Hamano <gitster@pobox.com> 2018-08-13 19:33:00 +08:00			`#include "linear-assignment.h"`

			`#define COST(column, row) cost[(column) + column_count * (row)]`

			`/*`
			* The parameter `cost` is the cost matrix: the cost to assign column j to row
			* i is `cost[j + column_count * i].
			`*/`
			`void compute_assignment(int column_count, int row_count, int *cost,`
			`int column2row, int row2column)`
			`{`
			`int v, d;`
			`int free_row, free_count = 0, saved_free_count, pred, *col;`
			`int i, j, phase;`

linear-assignment: fix potential out of bounds memory access Currently the 'compute_assignment()' function may read memory out of bounds, even if used correctly. Namely this happens when we only have one column. In that case we try to calculate the initial minimum cost using '!j1' as column in the reduction transfer code. That in turn causes us to try and get the cost from column 1 in the cost matrix, which does not exist, and thus results in an out of bounds memory read. In the original paper [1], the example code initializes that minimum cost to "infinite". We could emulate something similar by setting the minimum cost to INT_MAX, which would result in the same minimum cost as the current algorithm, as we'd always go into the if condition at least once, except when we only have one column, and column_count thus equals 1. If column_count does equal 1, the condition in the loop would always be false, and we'd end up with a minimum of INT_MAX, which may lead to integer overflows later in the algorithm. For a column count of 1, we however do not even really need to go through the whole algorithm. A column count of 1 means that there's no possible assignments, and we can just zero out the column2row and row2column arrays, and return early from the function, while keeping the reduction transfer part of the function the same as it is currently. Another solution would be to just not call the 'compute_assignment()' function from the range diff code in this case, however it's better to make the compute_assignment function more robust, so future callers don't run into this potential problem. Note that the test only fails under valgrind on Linux, but the same command has been reported to segfault on Mac OS. [1]: Jonker, R., & Volgenant, A. (1987). A shortest augmenting path algorithm for dense and sparse linear assignment problems. Computing, 38(4), 325–340. Reported-by: ryenus <ryenus@gmail.com> Helped-by: Derrick Stolee <stolee@gmail.com> Signed-off-by: Thomas Gummerer <t.gummerer@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com> 2018-09-14 06:38:34 +08:00			`if (column_count < 2) {`
			`memset(column2row, 0, sizeof(int) * column_count);`
			`memset(row2column, 0, sizeof(int) * row_count);`
			`return;`
			`}`

linear-assignment: a function to solve least-cost assignment problems The problem solved by the code introduced in this commit goes like this: given two sets of items, and a cost matrix which says how much it "costs" to assign any given item of the first set to any given item of the second, assign all items (except when the sets have different size) in the cheapest way. We use the Jonker-Volgenant algorithm to solve the assignment problem to answer questions such as: given two different versions of a topic branch (or iterations of a patch series), what is the best pairing of commits/patches between the different versions? Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de> Signed-off-by: Junio C Hamano <gitster@pobox.com> 2018-08-13 19:33:00 +08:00			`memset(column2row, -1, sizeof(int) * column_count);`
			`memset(row2column, -1, sizeof(int) * row_count);`
			`ALLOC_ARRAY(v, column_count);`

			`/* column reduction */`
			`for (j = column_count - 1; j >= 0; j--) {`
			`int i1 = 0;`

			`for (i = 1; i < row_count; i++)`
			`if (COST(j, i1) > COST(j, i))`
			`i1 = i;`
			`v[j] = COST(j, i1);`
			`if (row2column[i1] == -1) {`
			`/* row i1 unassigned */`
			`row2column[i1] = j;`
			`column2row[j] = i1;`
			`} else {`
			`if (row2column[i1] >= 0)`
			`row2column[i1] = -2 - row2column[i1];`
			`column2row[j] = -1;`
			`}`
			`}`

			`/* reduction transfer */`
			`ALLOC_ARRAY(free_row, row_count);`
			`for (i = 0; i < row_count; i++) {`
			`int j1 = row2column[i];`
			`if (j1 == -1)`
			`free_row[free_count++] = i;`
			`else if (j1 < -1)`
			`row2column[i] = -2 - j1;`
			`else {`
			`int min = COST(!j1, i) - v[!j1];`
			`for (j = 1; j < column_count; j++)`
			`if (j != j1 && min > COST(j, i) - v[j])`
			`min = COST(j, i) - v[j];`
			`v[j1] -= min;`
			`}`
			`}`

			`if (free_count ==`
			`(column_count < row_count ? row_count - column_count : 0)) {`
			`free(v);`
			`free(free_row);`
			`return;`
			`}`

			`/* augmenting row reduction */`
			`for (phase = 0; phase < 2; phase++) {`
			`int k = 0;`

			`saved_free_count = free_count;`
			`free_count = 0;`
			`while (k < saved_free_count) {`
			`int u1, u2;`
			`int j1 = 0, j2, i0;`

			`i = free_row[k++];`
			`u1 = COST(j1, i) - v[j1];`
			`j2 = -1;`
			`u2 = INT_MAX;`
			`for (j = 1; j < column_count; j++) {`
			`int c = COST(j, i) - v[j];`
			`if (u2 > c) {`
			`if (u1 < c) {`
			`u2 = c;`
			`j2 = j;`
			`} else {`
			`u2 = u1;`
			`u1 = c;`
			`j2 = j1;`
			`j1 = j;`
			`}`
			`}`
			`}`
			`if (j2 < 0) {`
			`j2 = j1;`
			`u2 = u1;`
			`}`

			`i0 = column2row[j1];`
			`if (u1 < u2)`
			`v[j1] -= u2 - u1;`
			`else if (i0 >= 0) {`
			`j1 = j2;`
			`i0 = column2row[j1];`
			`}`

			`if (i0 >= 0) {`
			`if (u1 < u2)`
			`free_row[--k] = i0;`
			`else`
			`free_row[free_count++] = i0;`
			`}`
			`row2column[i] = j1;`
			`column2row[j1] = i;`
			`}`
			`}`

			`/* augmentation */`
			`saved_free_count = free_count;`
			`ALLOC_ARRAY(d, column_count);`
			`ALLOC_ARRAY(pred, column_count);`
			`ALLOC_ARRAY(col, column_count);`
			`for (free_count = 0; free_count < saved_free_count; free_count++) {`
			`int i1 = free_row[free_count], low = 0, up = 0, last, k;`
			`int min, c, u1;`

			`for (j = 0; j < column_count; j++) {`
			`d[j] = COST(j, i1) - v[j];`
			`pred[j] = i1;`
			`col[j] = j;`
			`}`

			`j = -1;`
			`do {`
			`last = low;`
			`min = d[col[up++]];`
			`for (k = up; k < column_count; k++) {`
			`j = col[k];`
			`c = d[j];`
			`if (c <= min) {`
			`if (c < min) {`
			`up = low;`
			`min = c;`
			`}`
			`col[k] = col[up];`
			`col[up++] = j;`
			`}`
			`}`
			`for (k = low; k < up; k++)`
			`if (column2row[col[k]] == -1)`
			`goto update;`

			`/* scan a row */`
			`do {`
			`int j1 = col[low++];`

			`i = column2row[j1];`
			`u1 = COST(j1, i) - v[j1] - min;`
			`for (k = up; k < column_count; k++) {`
			`j = col[k];`
			`c = COST(j, i) - v[j] - u1;`
			`if (c < d[j]) {`
			`d[j] = c;`
			`pred[j] = i;`
			`if (c == min) {`
			`if (column2row[j] == -1)`
			`goto update;`
			`col[k] = col[up];`
			`col[up++] = j;`
			`}`
			`}`
			`}`
			`} while (low != up);`
			`} while (low == up);`

			`update:`
			`/* updating of the column pieces */`
			`for (k = 0; k < last; k++) {`
			`int j1 = col[k];`
			`v[j1] += d[j1] - min;`
			`}`

			`/* augmentation */`
			`do {`
			`if (j < 0)`
			`BUG("negative j: %d", j);`
			`i = pred[j];`
			`column2row[j] = i;`
			`SWAP(j, row2column[i]);`
			`} while (i1 != i);`
			`}`

			`free(col);`
			`free(pred);`
			`free(d);`
			`free(v);`
			`free(free_row);`
			`}`