git/builtin/checkout--worker.c

#include "builtin.h"
#include "config.h"
#include "entry.h"
#include "parallel-checkout.h"
#include "parse-options.h"
#include "pkt-line.h"

static void packet_to_pc_item(const char *buffer, int len,
			      struct parallel_checkout_item *pc_item)
{
	const struct pc_item_fixed_portion *fixed_portion;
	const char *variant;
	char *encoding;

	if (len < sizeof(struct pc_item_fixed_portion))
		BUG("checkout worker received too short item (got %dB, exp %dB)",
		    len, (int)sizeof(struct pc_item_fixed_portion));

	fixed_portion = (struct pc_item_fixed_portion *)buffer;

	if (len - sizeof(struct pc_item_fixed_portion) !=
		fixed_portion->name_len + fixed_portion->working_tree_encoding_len)
		BUG("checkout worker received corrupted item");

	variant = buffer + sizeof(struct pc_item_fixed_portion);

	/*
	 * Note: the main process uses zero length to communicate that the
	 * encoding is NULL. There is no use case that requires sending an
	 * actual empty string, since convert_attrs() never sets
	 * ca.working_tree_enconding to "".
	 */
	if (fixed_portion->working_tree_encoding_len) {
		encoding = xmemdupz(variant,
				    fixed_portion->working_tree_encoding_len);
		variant += fixed_portion->working_tree_encoding_len;
	} else {
		encoding = NULL;
	}

	memset(pc_item, 0, sizeof(*pc_item));
	pc_item->ce = make_empty_transient_cache_entry(fixed_portion->name_len, NULL);
	pc_item->ce->ce_namelen = fixed_portion->name_len;
	pc_item->ce->ce_mode = fixed_portion->ce_mode;
	memcpy(pc_item->ce->name, variant, pc_item->ce->ce_namelen);
	oidcpy(&pc_item->ce->oid, &fixed_portion->oid);

	pc_item->id = fixed_portion->id;
	pc_item->ca.crlf_action = fixed_portion->crlf_action;
	pc_item->ca.ident = fixed_portion->ident;
	pc_item->ca.working_tree_encoding = encoding;
}

static void report_result(struct parallel_checkout_item *pc_item)
{
	struct pc_item_result res = { 0 };
	size_t size;

	res.id = pc_item->id;
	res.status = pc_item->status;

	if (pc_item->status == PC_ITEM_WRITTEN) {
		res.st = pc_item->st;
		size = sizeof(res);
	} else {
		size = PC_ITEM_RESULT_BASE_SIZE;
	}

	packet_write(1, (const char *)&res, size);
}

/* Free the worker-side malloced data, but not pc_item itself. */
static void release_pc_item_data(struct parallel_checkout_item *pc_item)
{
	free((char *)pc_item->ca.working_tree_encoding);
	discard_cache_entry(pc_item->ce);
}

static void worker_loop(struct checkout *state)
{
	struct parallel_checkout_item *items = NULL;
	size_t i, nr = 0, alloc = 0;

	while (1) {
		int len = packet_read(0, NULL, NULL, packet_buffer,
				      sizeof(packet_buffer), 0);

		if (len < 0)
			BUG("packet_read() returned negative value");
		else if (!len)
			break;

		ALLOC_GROW(items, nr + 1, alloc);
		packet_to_pc_item(packet_buffer, len, &items[nr++]);
	}

	for (i = 0; i < nr; i++) {
		struct parallel_checkout_item *pc_item = &items[i];
		write_pc_item(pc_item, state);
		report_result(pc_item);
		release_pc_item_data(pc_item);
	}

	packet_flush(1);

	free(items);
}

static const char * const checkout_worker_usage[] = {
	N_("git checkout--worker [<options>]"),
	NULL
};

int cmd_checkout__worker(int argc, const char **argv, const char *prefix)
{
	struct checkout state = CHECKOUT_INIT;
	struct option checkout_worker_options[] = {
		OPT_STRING(0, "prefix", &state.base_dir, N_("string"),
			N_("when creating files, prepend <string>")),
		OPT_END()
	};

	if (argc == 2 && !strcmp(argv[1], "-h"))
		usage_with_options(checkout_worker_usage,
				   checkout_worker_options);

	git_config(git_default_config, NULL);
	argc = parse_options(argc, argv, prefix, checkout_worker_options,
			     checkout_worker_usage, 0);
	if (argc > 0)
		usage_with_options(checkout_worker_usage, checkout_worker_options);

	if (state.base_dir)
		state.base_dir_len = strlen(state.base_dir);

	/*
	 * Setting this on a worker won't actually update the index. We just
	 * need to tell the checkout machinery to lstat() the written entries,
	 * so that we can send this data back to the main process.
	 */
	state.refresh_cache = 1;

	worker_loop(&state);
	return 0;
}
parallel-checkout: make it truly parallel Use multiple worker processes to distribute the queued entries and call write_pc_item() in parallel for them. The items are distributed uniformly in contiguous chunks. This minimizes the chances of two workers writing to the same directory simultaneously, which could affect performance due to lock contention in the kernel. Work stealing (or any other format of re-distribution) is not implemented yet. The protocol between the main process and the workers is quite simple. They exchange binary messages packed in pkt-line format, and use PKT-FLUSH to mark the end of input (from both sides). The main process starts the communication by sending N pkt-lines, each corresponding to an item that needs to be written. These packets contain all the necessary information to load, smudge, and write the blob associated with each item. Then it waits for the worker to send back N pkt-lines containing the results for each item. The resulting packet must contain: the identification number of the item that it refers to, the status of the operation, and the lstat() data gathered after writing the file (iff the operation was successful). For now, checkout always uses a hardcoded value of 2 workers, only to demonstrate that the parallel checkout framework correctly divides and writes the queued entries. The next patch will add user configurations and define a more reasonable default, based on tests with the said settings. Co-authored-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com> Co-authored-by: Jeff Hostetler <jeffhost@microsoft.com> Signed-off-by: Matheus Tavares <matheus.bernardino@usp.br> Signed-off-by: Junio C Hamano <gitster@pobox.com> 2021-04-19 08:14:54 +08:00			`#include "builtin.h"`
			`#include "config.h"`
			`#include "entry.h"`
			`#include "parallel-checkout.h"`
			`#include "parse-options.h"`
			`#include "pkt-line.h"`

			`static void packet_to_pc_item(const char *buffer, int len,`
			`struct parallel_checkout_item *pc_item)`
			`{`
			`const struct pc_item_fixed_portion *fixed_portion;`
			`const char *variant;`
			`char *encoding;`

			`if (len < sizeof(struct pc_item_fixed_portion))`
			`BUG("checkout worker received too short item (got %dB, exp %dB)",`
			`len, (int)sizeof(struct pc_item_fixed_portion));`

			`fixed_portion = (struct pc_item_fixed_portion *)buffer;`

			`if (len - sizeof(struct pc_item_fixed_portion) !=`
			`fixed_portion->name_len + fixed_portion->working_tree_encoding_len)`
			`BUG("checkout worker received corrupted item");`

			`variant = buffer + sizeof(struct pc_item_fixed_portion);`

			`/*`
			`* Note: the main process uses zero length to communicate that the`
			`* encoding is NULL. There is no use case that requires sending an`
			`* actual empty string, since convert_attrs() never sets`
			`* ca.working_tree_enconding to "".`
			`*/`
			`if (fixed_portion->working_tree_encoding_len) {`
			`encoding = xmemdupz(variant,`
			`fixed_portion->working_tree_encoding_len);`
			`variant += fixed_portion->working_tree_encoding_len;`
			`} else {`
			`encoding = NULL;`
			`}`

			`memset(pc_item, 0, sizeof(*pc_item));`
make_transient_cache_entry(): optionally alloc from mem_pool Allow make_transient_cache_entry() to optionally receive a mem_pool struct in which it should allocate the entry. This will be used in the following patch, to store some transient entries which should persist until parallel checkout finishes. Signed-off-by: Matheus Tavares <matheus.bernardino@usp.br> Signed-off-by: Junio C Hamano <gitster@pobox.com> 2021-05-05 00:27:28 +08:00			`pc_item->ce = make_empty_transient_cache_entry(fixed_portion->name_len, NULL);`
parallel-checkout: make it truly parallel Use multiple worker processes to distribute the queued entries and call write_pc_item() in parallel for them. The items are distributed uniformly in contiguous chunks. This minimizes the chances of two workers writing to the same directory simultaneously, which could affect performance due to lock contention in the kernel. Work stealing (or any other format of re-distribution) is not implemented yet. The protocol between the main process and the workers is quite simple. They exchange binary messages packed in pkt-line format, and use PKT-FLUSH to mark the end of input (from both sides). The main process starts the communication by sending N pkt-lines, each corresponding to an item that needs to be written. These packets contain all the necessary information to load, smudge, and write the blob associated with each item. Then it waits for the worker to send back N pkt-lines containing the results for each item. The resulting packet must contain: the identification number of the item that it refers to, the status of the operation, and the lstat() data gathered after writing the file (iff the operation was successful). For now, checkout always uses a hardcoded value of 2 workers, only to demonstrate that the parallel checkout framework correctly divides and writes the queued entries. The next patch will add user configurations and define a more reasonable default, based on tests with the said settings. Co-authored-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com> Co-authored-by: Jeff Hostetler <jeffhost@microsoft.com> Signed-off-by: Matheus Tavares <matheus.bernardino@usp.br> Signed-off-by: Junio C Hamano <gitster@pobox.com> 2021-04-19 08:14:54 +08:00			`pc_item->ce->ce_namelen = fixed_portion->name_len;`
			`pc_item->ce->ce_mode = fixed_portion->ce_mode;`
			`memcpy(pc_item->ce->name, variant, pc_item->ce->ce_namelen);`
			`oidcpy(&pc_item->ce->oid, &fixed_portion->oid);`

			`pc_item->id = fixed_portion->id;`
			`pc_item->ca.crlf_action = fixed_portion->crlf_action;`
			`pc_item->ca.ident = fixed_portion->ident;`
			`pc_item->ca.working_tree_encoding = encoding;`
			`}`

			`static void report_result(struct parallel_checkout_item *pc_item)`
			`{`
builtin/checkout--worker: zero-initialise struct to avoid MSAN complaints report_result() sends a struct to the parent process, but that struct would contain uninitialised padding bytes. Running this code under MSAN rightly triggers a warning - but we don't particularly care about this warning because we control the receiving code, and we therefore know that those padding bytes won't be read on the receiving end. We could simply suppress this warning under MSAN with the approporiate ifdef'd attributes, but a less intrusive solution is to 0-initialise the struct, which guarantees that the padding will also be initialised. Interestingly, in the error-case branch, we only try to copy the first two members of pc_item_result, by copying only PC_ITEM_RESULT_BASE_SIZE bytes. However PC_ITEM_RESULT_BASE_SIZE is defined as 'offsetof(the_last_member)', which means that we're copying padding bytes after the end of the second last member. We could avoid doing this by redefining PC_ITEM_RESULT_BASE_SIZE as 'offsetof(second_last_member) + sizeof(second_last_member)', but there's no huge benefit to doing so (and this patch silences the MSAN warning in this scenario either way). MSAN output from t2080 (partially interleaved due to the parallel work :) ): Uninitialized bytes in __interceptor_write at offset 12 inside [0x7fff37d83408, 160) ==23279==WARNING: MemorySanitizer: use-of-uninitialized-value Uninitialized bytes in __interceptor_write at offset 12 inside [0x7ffdb8a07ec8, 160) ==23280==WARNING: MemorySanitizer: use-of-uninitialized-value #0 0xd5ac28 in xwrite /home/ahunt/git/git/wrapper.c:256:8 #1 0xd5b327 in write_in_full /home/ahunt/git/git/wrapper.c:311:21 #2 0xb0a8c4 in do_packet_write /home/ahunt/git/git/pkt-line.c:221:6 #3 0xb0a5fd in packet_write /home/ahunt/git/git/pkt-line.c:242:6 #4 0x4f7441 in report_result /home/ahunt/git/git/builtin/checkout--worker.c:69:2 #5 0x4f6be6 in worker_loop /home/ahunt/git/git/builtin/checkout--worker.c:100:3 #6 0x4f68d3 in cmd_checkout__worker /home/ahunt/git/git/builtin/checkout--worker.c:143:2 #7 0x4a1e76 in run_builtin /home/ahunt/git/git/git.c:461:11 #8 0x49e1e7 in handle_builtin /home/ahunt/git/git/git.c:714:3 #9 0x4a0c08 in run_argv /home/ahunt/git/git/git.c:781:4 #10 0x49d5a8 in cmd_main /home/ahunt/git/git/git.c:912:19 #11 0x7974da in main /home/ahunt/git/git/common-main.c:52:11 #12 0x7f8778114349 in __libc_start_main (/lib64/libc.so.6+0x24349) #13 0x421bd9 in _start /home/abuild/rpmbuild/BUILD/glibc-2.26/csu/../sysdeps/x86_64/start.S:120 Uninitialized value was created by an allocation of 'res' in the stack frame of function 'report_result' #0 0x4f72c0 in report_result /home/ahunt/git/git/builtin/checkout--worker.c:55 SUMMARY: MemorySanitizer: use-of-uninitialized-value /home/ahunt/git/git/wrapper.c:256:8 in xwrite Exiting #0 0xd5ac28 in xwrite /home/ahunt/git/git/wrapper.c:256:8 #1 0xd5b327 in write_in_full /home/ahunt/git/git/wrapper.c:311:21 #2 0xb0a8c4 in do_packet_write /home/ahunt/git/git/pkt-line.c:221:6 #3 0xb0a5fd in packet_write /home/ahunt/git/git/pkt-line.c:242:6 #4 0x4f7441 in report_result /home/ahunt/git/git/builtin/checkout--worker.c:69:2 #5 0x4f6be6 in worker_loop /home/ahunt/git/git/builtin/checkout--worker.c:100:3 #6 0x4f68d3 in cmd_checkout__worker /home/ahunt/git/git/builtin/checkout--worker.c:143:2 #7 0x4a1e76 in run_builtin /home/ahunt/git/git/git.c:461:11 #8 0x49e1e7 in handle_builtin /home/ahunt/git/git/git.c:714:3 #9 0x4a0c08 in run_argv /home/ahunt/git/git/git.c:781:4 #10 0x49d5a8 in cmd_main /home/ahunt/git/git/git.c:912:19 #11 0x7974da in main /home/ahunt/git/git/common-main.c:52:11 #12 0x7f2749a0e349 in __libc_start_main (/lib64/libc.so.6+0x24349) #13 0x421bd9 in _start /home/abuild/rpmbuild/BUILD/glibc-2.26/csu/../sysdeps/x86_64/start.S:120 Uninitialized value was created by an allocation of 'res' in the stack frame of function 'report_result' #0 0x4f72c0 in report_result /home/ahunt/git/git/builtin/checkout--worker.c:55 SUMMARY: MemorySanitizer: use-of-uninitialized-value /home/ahunt/git/git/wrapper.c:256:8 in xwrite Signed-off-by: Andrzej Hunt <andrzej@ahunt.org> Signed-off-by: Junio C Hamano <gitster@pobox.com> 2021-06-14 23:51:16 +08:00			`struct pc_item_result res = { 0 };`
parallel-checkout: make it truly parallel Use multiple worker processes to distribute the queued entries and call write_pc_item() in parallel for them. The items are distributed uniformly in contiguous chunks. This minimizes the chances of two workers writing to the same directory simultaneously, which could affect performance due to lock contention in the kernel. Work stealing (or any other format of re-distribution) is not implemented yet. The protocol between the main process and the workers is quite simple. They exchange binary messages packed in pkt-line format, and use PKT-FLUSH to mark the end of input (from both sides). The main process starts the communication by sending N pkt-lines, each corresponding to an item that needs to be written. These packets contain all the necessary information to load, smudge, and write the blob associated with each item. Then it waits for the worker to send back N pkt-lines containing the results for each item. The resulting packet must contain: the identification number of the item that it refers to, the status of the operation, and the lstat() data gathered after writing the file (iff the operation was successful). For now, checkout always uses a hardcoded value of 2 workers, only to demonstrate that the parallel checkout framework correctly divides and writes the queued entries. The next patch will add user configurations and define a more reasonable default, based on tests with the said settings. Co-authored-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com> Co-authored-by: Jeff Hostetler <jeffhost@microsoft.com> Signed-off-by: Matheus Tavares <matheus.bernardino@usp.br> Signed-off-by: Junio C Hamano <gitster@pobox.com> 2021-04-19 08:14:54 +08:00			`size_t size;`

			`res.id = pc_item->id;`
			`res.status = pc_item->status;`

			`if (pc_item->status == PC_ITEM_WRITTEN) {`
			`res.st = pc_item->st;`
			`size = sizeof(res);`
			`} else {`
			`size = PC_ITEM_RESULT_BASE_SIZE;`
			`}`

			`packet_write(1, (const char *)&res, size);`
			`}`

			`/* Free the worker-side malloced data, but not pc_item itself. */`
			`static void release_pc_item_data(struct parallel_checkout_item *pc_item)`
			`{`
			`free((char *)pc_item->ca.working_tree_encoding);`
			`discard_cache_entry(pc_item->ce);`
			`}`

			`static void worker_loop(struct checkout *state)`
			`{`
			`struct parallel_checkout_item *items = NULL;`
			`size_t i, nr = 0, alloc = 0;`

			`while (1) {`
			`int len = packet_read(0, NULL, NULL, packet_buffer,`
			`sizeof(packet_buffer), 0);`

			`if (len < 0)`
			`BUG("packet_read() returned negative value");`
			`else if (!len)`
			`break;`

			`ALLOC_GROW(items, nr + 1, alloc);`
			`packet_to_pc_item(packet_buffer, len, &items[nr++]);`
			`}`

			`for (i = 0; i < nr; i++) {`
			`struct parallel_checkout_item *pc_item = &items[i];`
			`write_pc_item(pc_item, state);`
			`report_result(pc_item);`
			`release_pc_item_data(pc_item);`
			`}`

			`packet_flush(1);`

			`free(items);`
			`}`

			`static const char * const checkout_worker_usage[] = {`
			`N_("git checkout--worker [<options>]"),`
			`NULL`
			`};`

			`int cmd_checkout__worker(int argc, const char *argv, const char prefix)`
			`{`
			`struct checkout state = CHECKOUT_INIT;`
			`struct option checkout_worker_options[] = {`
			`OPT_STRING(0, "prefix", &state.base_dir, N_("string"),`
			`N_("when creating files, prepend <string>")),`
			`OPT_END()`
			`};`

			`if (argc == 2 && !strcmp(argv[1], "-h"))`
			`usage_with_options(checkout_worker_usage,`
			`checkout_worker_options);`

			`git_config(git_default_config, NULL);`
			`argc = parse_options(argc, argv, prefix, checkout_worker_options,`
			`checkout_worker_usage, 0);`
			`if (argc > 0)`
			`usage_with_options(checkout_worker_usage, checkout_worker_options);`

			`if (state.base_dir)`
			`state.base_dir_len = strlen(state.base_dir);`

			`/*`
			`* Setting this on a worker won't actually update the index. We just`
			`* need to tell the checkout machinery to lstat() the written entries,`
			`* so that we can send this data back to the main process.`
			`*/`
			`state.refresh_cache = 1;`

			`worker_loop(&state);`
			`return 0;`
			`}`