core.fsync: introduce granular fsync control infrastructure

This commit introduces the infrastructure for the core.fsync
configuration knob. The repository components we want to sync
are identified by flags so that we can turn on or off syncing
for specific components.

If core.fsyncObjectFiles is set and the core.fsync configuration
also includes FSYNC_COMPONENT_LOOSE_OBJECT, we will fsync any
loose objects. This picks the strictest data integrity behavior
if core.fsync and core.fsyncObjectFiles are set to conflicting values.

This change introduces the currently unused fsync_component
helper, which will be used by a later patch that adds fsyncing to
the refs backend.

Actual configuration and documentation of the fsync components
list are in other patches in the series to separate review of
the underlying mechanism from the policy of how it's configured.

Helped-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Neeraj Singh <neerajsi@microsoft.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
This commit is contained in:
Neeraj Singh 2022-03-10 22:43:21 +00:00 committed by Junio C Hamano
parent abf38abec2
commit 020406eaa5
15 changed files with 97 additions and 33 deletions

View File

@ -865,7 +865,7 @@ static void end_packfile(void)
struct tag *t;
close_pack_windows(pack_data);
finalize_hashfile(pack_file, cur_pack_oid.hash, 0);
finalize_hashfile(pack_file, cur_pack_oid.hash, FSYNC_COMPONENT_PACK, 0);
fixup_pack_header_footer(pack_data->pack_fd, pack_data->hash,
pack_data->pack_name, object_count,
cur_pack_oid.hash, pack_size);

View File

@ -1290,7 +1290,7 @@ static void conclude_pack(int fix_thin_pack, const char *curr_pack, unsigned cha
nr_objects - nr_objects_initial);
stop_progress_msg(&progress, msg.buf);
strbuf_release(&msg);
finalize_hashfile(f, tail_hash, 0);
finalize_hashfile(f, tail_hash, FSYNC_COMPONENT_PACK, 0);
hashcpy(read_hash, pack_hash);
fixup_pack_header_footer(output_fd, pack_hash,
curr_pack, nr_objects,
@ -1512,7 +1512,7 @@ static void final(const char *final_pack_name, const char *curr_pack_name,
if (!from_stdin) {
close(input_fd);
} else {
fsync_or_die(output_fd, curr_pack_name);
fsync_component_or_die(FSYNC_COMPONENT_PACK, output_fd, curr_pack_name);
err = close(output_fd);
if (err)
die_errno(_("error while closing pack file"));

View File

@ -1199,16 +1199,26 @@ static void write_pack_file(void)
display_progress(progress_state, written);
}
/*
* Did we write the wrong # entries in the header?
* If so, rewrite it like in fast-import
*/
if (pack_to_stdout) {
finalize_hashfile(f, hash, CSUM_HASH_IN_STREAM | CSUM_CLOSE);
/*
* We never fsync when writing to stdout since we may
* not be writing to an actual pack file. For instance,
* the upload-pack code passes a pipe here. Calling
* fsync on a pipe results in unnecessary
* synchronization with the reader on some platforms.
*/
finalize_hashfile(f, hash, FSYNC_COMPONENT_NONE,
CSUM_HASH_IN_STREAM | CSUM_CLOSE);
} else if (nr_written == nr_remaining) {
finalize_hashfile(f, hash, CSUM_HASH_IN_STREAM | CSUM_FSYNC | CSUM_CLOSE);
finalize_hashfile(f, hash, FSYNC_COMPONENT_PACK,
CSUM_HASH_IN_STREAM | CSUM_FSYNC | CSUM_CLOSE);
} else {
int fd = finalize_hashfile(f, hash, 0);
/*
* If we wrote the wrong number of entries in the
* header, rewrite it like in fast-import.
*/
int fd = finalize_hashfile(f, hash, FSYNC_COMPONENT_PACK, 0);
fixup_pack_header_footer(fd, hash, pack_tmp_name,
nr_written, hash, offset);
close(fd);

View File

@ -53,9 +53,10 @@ static void finish_bulk_checkin(struct bulk_checkin_state *state)
unlink(state->pack_tmp_name);
goto clear_exit;
} else if (state->nr_written == 1) {
finalize_hashfile(state->f, hash, CSUM_HASH_IN_STREAM | CSUM_FSYNC | CSUM_CLOSE);
finalize_hashfile(state->f, hash, FSYNC_COMPONENT_PACK,
CSUM_HASH_IN_STREAM | CSUM_FSYNC | CSUM_CLOSE);
} else {
int fd = finalize_hashfile(state->f, hash, 0);
int fd = finalize_hashfile(state->f, hash, FSYNC_COMPONENT_PACK, 0);
fixup_pack_header_footer(fd, hash, state->pack_tmp_name,
state->nr_written, hash,
state->offset);

23
cache.h
View File

@ -993,6 +993,27 @@ void reset_shared_repository(void);
extern int read_replace_refs;
extern char *git_replace_ref_base;
/*
* These values are used to help identify parts of a repository to fsync.
* FSYNC_COMPONENT_NONE identifies data that will not be a persistent part of the
* repository and so shouldn't be fsynced.
*/
enum fsync_component {
FSYNC_COMPONENT_NONE,
FSYNC_COMPONENT_LOOSE_OBJECT = 1 << 0,
FSYNC_COMPONENT_PACK = 1 << 1,
FSYNC_COMPONENT_PACK_METADATA = 1 << 2,
FSYNC_COMPONENT_COMMIT_GRAPH = 1 << 3,
};
#define FSYNC_COMPONENTS_DEFAULT (FSYNC_COMPONENT_PACK | \
FSYNC_COMPONENT_PACK_METADATA | \
FSYNC_COMPONENT_COMMIT_GRAPH)
/*
* A bitmask indicating which components of the repo should be fsynced.
*/
extern enum fsync_component fsync_components;
extern int fsync_object_files;
extern int use_fsync;
@ -1707,6 +1728,8 @@ int copy_file_with_time(const char *dst, const char *src, int mode);
void write_or_die(int fd, const void *buf, size_t count);
void fsync_or_die(int fd, const char *);
int fsync_component(enum fsync_component component, int fd);
void fsync_component_or_die(enum fsync_component component, int fd, const char *msg);
ssize_t read_in_full(int fd, void *buf, size_t count);
ssize_t write_in_full(int fd, const void *buf, size_t count);

View File

@ -1942,7 +1942,8 @@ static int write_commit_graph_file(struct write_commit_graph_context *ctx)
}
close_commit_graph(ctx->r->objects);
finalize_hashfile(f, file_hash, CSUM_HASH_IN_STREAM | CSUM_FSYNC);
finalize_hashfile(f, file_hash, FSYNC_COMPONENT_COMMIT_GRAPH,
CSUM_HASH_IN_STREAM | CSUM_FSYNC);
free_chunkfile(cf);
if (ctx->split) {

View File

@ -58,7 +58,8 @@ static void free_hashfile(struct hashfile *f)
free(f);
}
int finalize_hashfile(struct hashfile *f, unsigned char *result, unsigned int flags)
int finalize_hashfile(struct hashfile *f, unsigned char *result,
enum fsync_component component, unsigned int flags)
{
int fd;
@ -69,7 +70,7 @@ int finalize_hashfile(struct hashfile *f, unsigned char *result, unsigned int fl
if (flags & CSUM_HASH_IN_STREAM)
flush(f, f->buffer, the_hash_algo->rawsz);
if (flags & CSUM_FSYNC)
fsync_or_die(f->fd, f->name);
fsync_component_or_die(component, f->fd, f->name);
if (flags & CSUM_CLOSE) {
if (close(f->fd))
die_errno("%s: sha1 file error on close", f->name);

View File

@ -1,6 +1,7 @@
#ifndef CSUM_FILE_H
#define CSUM_FILE_H
#include "cache.h"
#include "hash.h"
struct progress;
@ -38,7 +39,7 @@ int hashfile_truncate(struct hashfile *, struct hashfile_checkpoint *);
struct hashfile *hashfd(int fd, const char *name);
struct hashfile *hashfd_check(const char *name);
struct hashfile *hashfd_throughput(int fd, const char *name, struct progress *tp);
int finalize_hashfile(struct hashfile *, unsigned char *, unsigned int);
int finalize_hashfile(struct hashfile *, unsigned char *, enum fsync_component, unsigned int);
void hashwrite(struct hashfile *, const void *, unsigned int);
void hashflush(struct hashfile *f);
void crc32_begin(struct hashfile *);

View File

@ -45,6 +45,7 @@ int pack_compression_level = Z_DEFAULT_COMPRESSION;
int fsync_object_files;
int use_fsync = -1;
enum fsync_method fsync_method = FSYNC_METHOD_DEFAULT;
enum fsync_component fsync_components = FSYNC_COMPONENTS_DEFAULT;
size_t packed_git_window_size = DEFAULT_PACKED_GIT_WINDOW_SIZE;
size_t packed_git_limit = DEFAULT_PACKED_GIT_LIMIT;
size_t delta_base_cache_limit = 96 * 1024 * 1024;

3
midx.c
View File

@ -1438,7 +1438,8 @@ static int write_midx_internal(const char *object_dir,
write_midx_header(f, get_num_chunks(cf), ctx.nr - dropped_packs);
write_chunkfile(cf, &ctx);
finalize_hashfile(f, midx_hash, CSUM_FSYNC | CSUM_HASH_IN_STREAM);
finalize_hashfile(f, midx_hash, FSYNC_COMPONENT_PACK_METADATA,
CSUM_FSYNC | CSUM_HASH_IN_STREAM);
free_chunkfile(cf);
if (flags & MIDX_WRITE_REV_INDEX &&

View File

@ -1849,11 +1849,16 @@ int hash_object_file(const struct git_hash_algo *algo, const void *buf,
/* Finalize a file on disk, and close it. */
static void close_loose_object(int fd)
{
if (!the_repository->objects->odb->will_destroy) {
if (fsync_object_files)
fsync_or_die(fd, "loose object file");
}
if (the_repository->objects->odb->will_destroy)
goto out;
if (fsync_object_files > 0)
fsync_or_die(fd, "loose object file");
else
fsync_component_or_die(FSYNC_COMPONENT_LOOSE_OBJECT, fd,
"loose object file");
out:
if (close(fd) != 0)
die_errno(_("error when closing loose object file"));
}

View File

@ -719,7 +719,8 @@ void bitmap_writer_finish(struct pack_idx_entry **index,
if (options & BITMAP_OPT_HASH_CACHE)
write_hash_cache(f, index, index_nr);
finalize_hashfile(f, NULL, CSUM_HASH_IN_STREAM | CSUM_FSYNC | CSUM_CLOSE);
finalize_hashfile(f, NULL, FSYNC_COMPONENT_PACK_METADATA,
CSUM_HASH_IN_STREAM | CSUM_FSYNC | CSUM_CLOSE);
if (adjust_shared_perm(tmp_file.buf))
die_errno("unable to make temporary bitmap file readable");

View File

@ -159,9 +159,9 @@ const char *write_idx_file(const char *index_name, struct pack_idx_entry **objec
}
hashwrite(f, sha1, the_hash_algo->rawsz);
finalize_hashfile(f, NULL, CSUM_HASH_IN_STREAM | CSUM_CLOSE |
((opts->flags & WRITE_IDX_VERIFY)
? 0 : CSUM_FSYNC));
finalize_hashfile(f, NULL, FSYNC_COMPONENT_PACK_METADATA,
CSUM_HASH_IN_STREAM | CSUM_CLOSE |
((opts->flags & WRITE_IDX_VERIFY) ? 0 : CSUM_FSYNC));
return index_name;
}
@ -281,8 +281,9 @@ const char *write_rev_file_order(const char *rev_name,
if (rev_name && adjust_shared_perm(rev_name) < 0)
die(_("failed to make %s readable"), rev_name);
finalize_hashfile(f, NULL, CSUM_HASH_IN_STREAM | CSUM_CLOSE |
((flags & WRITE_IDX_VERIFY) ? 0 : CSUM_FSYNC));
finalize_hashfile(f, NULL, FSYNC_COMPONENT_PACK_METADATA,
CSUM_HASH_IN_STREAM | CSUM_CLOSE |
((flags & WRITE_IDX_VERIFY) ? 0 : CSUM_FSYNC));
return rev_name;
}
@ -390,7 +391,7 @@ void fixup_pack_header_footer(int pack_fd,
the_hash_algo->final_fn(partial_pack_hash, &old_hash_ctx);
the_hash_algo->final_fn(new_pack_hash, &new_hash_ctx);
write_or_die(pack_fd, new_pack_hash, the_hash_algo->rawsz);
fsync_or_die(pack_fd, pack_name);
fsync_component_or_die(FSYNC_COMPONENT_PACK, pack_fd, pack_name);
}
char *index_pack_lockfile(int ip_out, int *is_well_formed)

View File

@ -3089,7 +3089,7 @@ static int do_write_index(struct index_state *istate, struct tempfile *tempfile,
return -1;
}
finalize_hashfile(f, istate->oid.hash, CSUM_HASH_IN_STREAM);
finalize_hashfile(f, istate->oid.hash, FSYNC_COMPONENT_NONE, CSUM_HASH_IN_STREAM);
if (close_tempfile_gently(tempfile)) {
error(_("could not close '%s'"), get_tempfile_path(tempfile));
return -1;

View File

@ -56,21 +56,39 @@ void fprintf_or_die(FILE *f, const char *fmt, ...)
}
}
void fsync_or_die(int fd, const char *msg)
static int maybe_fsync(int fd)
{
if (use_fsync < 0)
use_fsync = git_env_bool("GIT_TEST_FSYNC", 1);
if (!use_fsync)
return;
return 0;
if (fsync_method == FSYNC_METHOD_WRITEOUT_ONLY &&
git_fsync(fd, FSYNC_WRITEOUT_ONLY) >= 0)
return;
return 0;
if (git_fsync(fd, FSYNC_HARDWARE_FLUSH) < 0)
return git_fsync(fd, FSYNC_HARDWARE_FLUSH);
}
void fsync_or_die(int fd, const char *msg)
{
if (maybe_fsync(fd) < 0)
die_errno("fsync error on '%s'", msg);
}
int fsync_component(enum fsync_component component, int fd)
{
if (fsync_components & component)
return maybe_fsync(fd);
return 0;
}
void fsync_component_or_die(enum fsync_component component, int fd, const char *msg)
{
if (fsync_components & component)
fsync_or_die(fd, msg);
}
void write_or_die(int fd, const void *buf, size_t count)
{
if (write_in_full(fd, buf, count) < 0) {