2015-11-09 21:34:01 +08:00
|
|
|
#include "../cache.h"
|
2017-06-15 02:07:36 +08:00
|
|
|
#include "../config.h"
|
2015-11-09 21:34:01 +08:00
|
|
|
#include "../refs.h"
|
|
|
|
#include "refs-internal.h"
|
2017-04-16 14:41:31 +08:00
|
|
|
#include "ref-cache.h"
|
2017-06-23 15:01:37 +08:00
|
|
|
#include "packed-backend.h"
|
refs: introduce an iterator interface
Currently, the API for iterating over references is via a family of
for_each_ref()-type functions that invoke a callback function for each
selected reference. All of these eventually call do_for_each_ref(),
which knows how to do one thing: iterate in parallel through two
ref_caches, one for loose and one for packed refs, giving loose
references precedence over packed refs. This is rather complicated code,
and is quite specialized to the files backend. It also requires callers
to encapsulate their work into a callback function, which often means
that they have to define and use a "cb_data" struct to manage their
context.
The current design is already bursting at the seams, and will become
even more awkward in the upcoming world of multiple reference storage
backends:
* Per-worktree vs. shared references are currently handled via a kludge
in git_path() rather than iterating over each part of the reference
namespace separately and merging the results. This kludge will cease
to work when we have multiple reference storage backends.
* The current scheme is inflexible. What if we sometimes want to bypass
the ref_cache, or use it only for packed or only for loose refs? What
if we want to store symbolic refs in one type of storage backend and
non-symbolic ones in another?
In the future, each reference backend will need to define its own way of
iterating over references. The crux of the problem with the current
design is that it is impossible to compose for_each_ref()-style
iterations, because the flow of control is owned by the for_each_ref()
function. There is nothing that a caller can do but iterate through all
references in a single burst, so there is no way for it to interleave
references from multiple backends and present the result to the rest of
the world as a single compound backend.
This commit introduces a new iteration primitive for references: a
ref_iterator. A ref_iterator is a polymorphic object that a reference
storage backend can be asked to instantiate. There are three functions
that can be applied to a ref_iterator:
* ref_iterator_advance(): move to the next reference in the iteration
* ref_iterator_abort(): end the iteration before it is exhausted
* ref_iterator_peel(): peel the reference currently being looked at
Iterating using a ref_iterator leaves the flow of control in the hands
of the caller, which means that ref_iterators from multiple
sources (e.g., loose and packed refs) can be composed and presented to
the world as a single compound ref_iterator.
It also means that the backend code for implementing reference iteration
will sometimes be more complicated. For example, the
cache_ref_iterator (which iterates over a ref_cache) can't use the C
stack to recurse; instead, it must manage its own stack internally as
explicit data structures. There is also a lot of boilerplate connected
with object-oriented programming in C.
Eventually, end-user callers will be able to be written in a more
natural way—managing their own flow of control rather than having to
work via callbacks. Since there will only be a few reference backends
but there are many consumers of this API, this is a good tradeoff.
More importantly, we gain composability, and especially the possibility
of writing interchangeable parts that can work with any ref_iterator.
For example, merge_ref_iterator implements a generic way of merging the
contents of any two ref_iterators. It is used to merge loose + packed
refs as part of the implementation of the files_ref_iterator. But it
will also be possible to use it to merge other pairs of reference
sources (e.g., per-worktree vs. shared refs).
Another example is prefix_ref_iterator, which can be used to trim a
prefix off the front of reference names before presenting them to the
caller (e.g., "refs/heads/master" -> "master").
In this patch, we introduce the iterator abstraction and many utilities,
and implement a reference iterator for the files ref storage backend.
(I've written several other obvious utilities, for example a generic way
to filter references being iterated over. These will probably be useful
in the future. But they are not needed for this patch series, so I am
not including them at this time.)
In a moment we will rewrite do_for_each_ref() to work via reference
iterators (allowing some special-purpose code to be discarded), and do
something similar for reflogs. In future patch series, we will expose
the ref_iterator abstraction in the public refs API so that callers can
use it directly.
Implementation note: I tried abstracting this a layer further to allow
generic iterators (over arbitrary types of objects) and generic
utilities like a generic merge_iterator. But the implementation in C was
very cumbersome, involving (in my opinion) too much boilerplate and too
much unsafe casting, some of which would have had to be done on the
caller side. However, I did put a few iterator-related constants in a
top-level header file, iterator.h, as they will be useful in a moment to
implement iteration over directory trees and possibly other types of
iterators in the future.
Signed-off-by: Ramsay Jones <ramsay@ramsayjones.plus.com>
Signed-off-by: Michael Haggerty <mhagger@alum.mit.edu>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2016-06-18 12:15:15 +08:00
|
|
|
#include "../iterator.h"
|
2016-06-18 12:15:19 +08:00
|
|
|
#include "../dir-iterator.h"
|
2015-11-09 21:34:01 +08:00
|
|
|
#include "../lockfile.h"
|
|
|
|
#include "../object.h"
|
|
|
|
#include "../dir.h"
|
2018-03-31 02:35:12 +08:00
|
|
|
#include "../chdir-notify.h"
|
2018-10-21 16:08:54 +08:00
|
|
|
#include "worktree.h"
|
2015-11-09 21:34:01 +08:00
|
|
|
|
2017-11-05 16:42:05 +08:00
|
|
|
/*
|
|
|
|
* This backend uses the following flags in `ref_update::flags` for
|
|
|
|
* internal bookkeeping purposes. Their numerical values must not
|
2017-11-05 16:42:06 +08:00
|
|
|
* conflict with REF_NO_DEREF, REF_FORCE_CREATE_REFLOG, REF_HAVE_NEW,
|
2021-12-07 21:38:16 +08:00
|
|
|
* or REF_HAVE_OLD, which are also stored in `ref_update::flags`.
|
2017-11-05 16:42:05 +08:00
|
|
|
*/
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Used as a flag in ref_update::flags when a loose ref is being
|
2017-11-05 16:42:06 +08:00
|
|
|
* pruned. This flag must only be used when REF_NO_DEREF is set.
|
2017-11-05 16:42:05 +08:00
|
|
|
*/
|
2017-11-05 16:42:07 +08:00
|
|
|
#define REF_IS_PRUNING (1 << 4)
|
2017-11-05 16:42:05 +08:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Flag passed to lock_ref_sha1_basic() telling it to tolerate broken
|
|
|
|
* refs (i.e., because the reference is about to be deleted anyway).
|
|
|
|
*/
|
|
|
|
#define REF_DELETING (1 << 5)
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Used as a flag in ref_update::flags when the lockfile needs to be
|
|
|
|
* committed.
|
|
|
|
*/
|
|
|
|
#define REF_NEEDS_COMMIT (1 << 6)
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Used as a flag in ref_update::flags when the ref_update was via an
|
|
|
|
* update to HEAD.
|
|
|
|
*/
|
|
|
|
#define REF_UPDATE_VIA_HEAD (1 << 8)
|
|
|
|
|
|
|
|
/*
|
2021-05-08 13:00:43 +08:00
|
|
|
* Used as a flag in ref_update::flags when a reference has been
|
|
|
|
* deleted and the ref's parent directories may need cleanup.
|
2017-11-05 16:42:05 +08:00
|
|
|
*/
|
2021-05-08 13:00:43 +08:00
|
|
|
#define REF_DELETED_RMDIR (1 << 9)
|
2017-11-05 16:42:05 +08:00
|
|
|
|
2015-11-09 21:34:01 +08:00
|
|
|
struct ref_lock {
|
|
|
|
char *ref_name;
|
2017-09-05 20:15:15 +08:00
|
|
|
struct lock_file lk;
|
2015-11-09 21:34:01 +08:00
|
|
|
struct object_id old_oid;
|
|
|
|
};
|
|
|
|
|
2016-09-05 00:08:11 +08:00
|
|
|
struct files_ref_store {
|
|
|
|
struct ref_store base;
|
2017-03-26 10:42:32 +08:00
|
|
|
unsigned int store_flags;
|
2017-02-10 19:16:16 +08:00
|
|
|
|
2017-03-26 10:42:24 +08:00
|
|
|
char *gitcommondir;
|
2017-03-26 10:42:18 +08:00
|
|
|
|
2017-04-16 14:41:32 +08:00
|
|
|
struct ref_cache *loose;
|
2017-05-22 22:17:40 +08:00
|
|
|
|
2017-06-23 15:01:38 +08:00
|
|
|
struct ref_store *packed_ref_store;
|
2016-09-05 00:08:11 +08:00
|
|
|
};
|
2015-11-09 21:34:01 +08:00
|
|
|
|
refs: rename struct ref_cache to files_ref_store
The greater goal of this patch series is to develop the concept of a
reference store, which is a place that references, their values, and
their reflogs are stored, and to virtualize the reference interface so
that different types of ref_stores can be implemented. We will then, for
example, use ref_store instances to access submodule references and
worktree references.
Currently, we keep a ref_cache for each submodule that has had its
references iterated over. It is a far cry from a ref_store, but they are
stored the way we will want to store ref_stores, and ref_stores will
eventually have to hold the reference caches. So let's treat ref_caches
as embryo ref_stores, and build them out from there.
As the first step, simply rename `ref_cache` to `files_ref_store`, and
rename some functions and attributes correspondingly.
Signed-off-by: Michael Haggerty <mhagger@alum.mit.edu>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2016-09-05 00:08:09 +08:00
|
|
|
static void clear_loose_ref_cache(struct files_ref_store *refs)
|
2015-11-09 21:34:01 +08:00
|
|
|
{
|
|
|
|
if (refs->loose) {
|
2017-04-16 14:41:32 +08:00
|
|
|
free_ref_cache(refs->loose);
|
2015-11-09 21:34:01 +08:00
|
|
|
refs->loose = NULL;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-01-23 06:29:30 +08:00
|
|
|
/*
|
|
|
|
* Create a new submodule ref cache and add it to the internal
|
|
|
|
* set of caches.
|
|
|
|
*/
|
2021-10-09 05:08:14 +08:00
|
|
|
static struct ref_store *files_ref_store_create(struct repository *repo,
|
|
|
|
const char *gitdir,
|
2017-03-26 10:42:32 +08:00
|
|
|
unsigned int flags)
|
2015-11-09 21:34:01 +08:00
|
|
|
{
|
2016-09-05 00:08:11 +08:00
|
|
|
struct files_ref_store *refs = xcalloc(1, sizeof(*refs));
|
|
|
|
struct ref_store *ref_store = (struct ref_store *)refs;
|
2017-03-26 10:42:24 +08:00
|
|
|
struct strbuf sb = STRBUF_INIT;
|
2015-11-09 21:34:01 +08:00
|
|
|
|
2021-12-23 02:11:54 +08:00
|
|
|
base_ref_store_init(ref_store, repo, gitdir, &refs_be_files);
|
2017-03-26 10:42:32 +08:00
|
|
|
refs->store_flags = flags;
|
2017-03-26 10:42:24 +08:00
|
|
|
get_common_dir_noenv(&sb, gitdir);
|
|
|
|
refs->gitcommondir = strbuf_detach(&sb, NULL);
|
2021-12-23 02:11:52 +08:00
|
|
|
refs->packed_ref_store =
|
|
|
|
packed_ref_store_create(repo, refs->gitcommondir, flags);
|
2015-11-09 21:34:01 +08:00
|
|
|
|
2020-08-19 22:27:57 +08:00
|
|
|
chdir_notify_reparent("files-backend $GIT_DIR", &refs->base.gitdir);
|
2018-03-31 02:35:12 +08:00
|
|
|
chdir_notify_reparent("files-backend $GIT_COMMONDIR",
|
|
|
|
&refs->gitcommondir);
|
|
|
|
|
2016-09-05 00:08:11 +08:00
|
|
|
return ref_store;
|
2016-01-23 06:29:30 +08:00
|
|
|
}
|
2015-11-09 21:34:01 +08:00
|
|
|
|
2017-02-10 19:16:16 +08:00
|
|
|
/*
|
2017-03-26 10:42:32 +08:00
|
|
|
* Die if refs is not the main ref store. caller is used in any
|
|
|
|
* necessary error messages.
|
2017-02-10 19:16:16 +08:00
|
|
|
*/
|
|
|
|
static void files_assert_main_repository(struct files_ref_store *refs,
|
|
|
|
const char *caller)
|
|
|
|
{
|
2017-03-26 10:42:32 +08:00
|
|
|
if (refs->store_flags & REF_STORE_MAIN)
|
|
|
|
return;
|
|
|
|
|
2018-05-02 17:38:39 +08:00
|
|
|
BUG("operation %s only allowed for main ref store", caller);
|
2017-02-10 19:16:16 +08:00
|
|
|
}
|
|
|
|
|
2016-01-23 06:29:30 +08:00
|
|
|
/*
|
2016-09-05 00:08:11 +08:00
|
|
|
* Downcast ref_store to files_ref_store. Die if ref_store is not a
|
2017-03-26 10:42:32 +08:00
|
|
|
* files_ref_store. required_flags is compared with ref_store's
|
|
|
|
* store_flags to ensure the ref_store has all required capabilities.
|
|
|
|
* "caller" is used in any necessary error messages.
|
2016-01-23 06:29:30 +08:00
|
|
|
*/
|
2017-03-26 10:42:32 +08:00
|
|
|
static struct files_ref_store *files_downcast(struct ref_store *ref_store,
|
|
|
|
unsigned int required_flags,
|
|
|
|
const char *caller)
|
2016-01-23 06:29:30 +08:00
|
|
|
{
|
2017-02-10 19:16:16 +08:00
|
|
|
struct files_ref_store *refs;
|
|
|
|
|
2016-09-05 00:08:11 +08:00
|
|
|
if (ref_store->be != &refs_be_files)
|
2018-05-02 17:38:39 +08:00
|
|
|
BUG("ref_store is type \"%s\" not \"files\" in %s",
|
2016-09-05 00:08:11 +08:00
|
|
|
ref_store->be->name, caller);
|
2016-06-18 12:15:12 +08:00
|
|
|
|
2017-02-10 19:16:16 +08:00
|
|
|
refs = (struct files_ref_store *)ref_store;
|
|
|
|
|
2017-03-26 10:42:32 +08:00
|
|
|
if ((refs->store_flags & required_flags) != required_flags)
|
2018-05-02 17:38:39 +08:00
|
|
|
BUG("operation %s requires abilities 0x%x, but only have 0x%x",
|
2017-03-26 10:42:32 +08:00
|
|
|
caller, required_flags, refs->store_flags);
|
2016-06-18 12:15:12 +08:00
|
|
|
|
2017-02-10 19:16:16 +08:00
|
|
|
return refs;
|
2015-11-09 21:34:01 +08:00
|
|
|
}
|
|
|
|
|
2017-03-26 10:42:22 +08:00
|
|
|
static void files_reflog_path(struct files_ref_store *refs,
|
|
|
|
struct strbuf *sb,
|
|
|
|
const char *refname)
|
|
|
|
{
|
2022-09-20 00:34:50 +08:00
|
|
|
const char *bare_refname;
|
|
|
|
const char *wtname;
|
|
|
|
int wtname_len;
|
|
|
|
enum ref_worktree_type wt_type = parse_worktree_ref(
|
|
|
|
refname, &wtname, &wtname_len, &bare_refname);
|
|
|
|
|
|
|
|
switch (wt_type) {
|
|
|
|
case REF_WORKTREE_CURRENT:
|
2020-08-19 22:27:57 +08:00
|
|
|
strbuf_addf(sb, "%s/logs/%s", refs->base.gitdir, refname);
|
2017-03-26 10:42:24 +08:00
|
|
|
break;
|
2022-09-20 00:34:50 +08:00
|
|
|
case REF_WORKTREE_SHARED:
|
|
|
|
case REF_WORKTREE_MAIN:
|
|
|
|
strbuf_addf(sb, "%s/logs/%s", refs->gitcommondir, bare_refname);
|
2018-11-25 12:58:16 +08:00
|
|
|
break;
|
2022-09-20 00:34:50 +08:00
|
|
|
case REF_WORKTREE_OTHER:
|
|
|
|
strbuf_addf(sb, "%s/worktrees/%.*s/logs/%s", refs->gitcommondir,
|
|
|
|
wtname_len, wtname, bare_refname);
|
2017-03-26 10:42:24 +08:00
|
|
|
break;
|
|
|
|
default:
|
2022-09-20 00:34:50 +08:00
|
|
|
BUG("unknown ref type %d of ref %s", wt_type, refname);
|
2017-03-26 10:42:24 +08:00
|
|
|
}
|
2017-03-26 10:42:22 +08:00
|
|
|
}
|
|
|
|
|
2017-03-26 10:42:23 +08:00
|
|
|
static void files_ref_path(struct files_ref_store *refs,
|
|
|
|
struct strbuf *sb,
|
|
|
|
const char *refname)
|
|
|
|
{
|
2022-09-20 00:34:50 +08:00
|
|
|
const char *bare_refname;
|
|
|
|
const char *wtname;
|
|
|
|
int wtname_len;
|
|
|
|
enum ref_worktree_type wt_type = parse_worktree_ref(
|
|
|
|
refname, &wtname, &wtname_len, &bare_refname);
|
|
|
|
switch (wt_type) {
|
|
|
|
case REF_WORKTREE_CURRENT:
|
2020-08-19 22:27:57 +08:00
|
|
|
strbuf_addf(sb, "%s/%s", refs->base.gitdir, refname);
|
2017-03-26 10:42:24 +08:00
|
|
|
break;
|
2022-09-20 00:34:50 +08:00
|
|
|
case REF_WORKTREE_OTHER:
|
|
|
|
strbuf_addf(sb, "%s/worktrees/%.*s/%s", refs->gitcommondir,
|
|
|
|
wtname_len, wtname, bare_refname);
|
|
|
|
break;
|
|
|
|
case REF_WORKTREE_SHARED:
|
|
|
|
case REF_WORKTREE_MAIN:
|
|
|
|
strbuf_addf(sb, "%s/%s", refs->gitcommondir, bare_refname);
|
2017-03-26 10:42:24 +08:00
|
|
|
break;
|
|
|
|
default:
|
2022-09-20 00:34:50 +08:00
|
|
|
BUG("unknown ref type %d of ref %s", wt_type, refname);
|
2017-03-26 10:42:24 +08:00
|
|
|
}
|
2017-03-26 10:42:23 +08:00
|
|
|
}
|
|
|
|
|
2019-03-07 20:29:15 +08:00
|
|
|
/*
|
2019-03-07 20:29:17 +08:00
|
|
|
* Manually add refs/bisect, refs/rewritten and refs/worktree, which, being
|
2019-03-07 20:29:15 +08:00
|
|
|
* per-worktree, might not appear in the directory listing for
|
|
|
|
* refs/ in the main repo.
|
|
|
|
*/
|
|
|
|
static void add_per_worktree_entries_to_dir(struct ref_dir *dir, const char *dirname)
|
|
|
|
{
|
2019-03-07 20:29:17 +08:00
|
|
|
const char *prefixes[] = { "refs/bisect/", "refs/worktree/", "refs/rewritten/" };
|
2019-03-07 20:29:16 +08:00
|
|
|
int ip;
|
2019-03-07 20:29:15 +08:00
|
|
|
|
|
|
|
if (strcmp(dirname, "refs/"))
|
|
|
|
return;
|
|
|
|
|
2019-03-07 20:29:16 +08:00
|
|
|
for (ip = 0; ip < ARRAY_SIZE(prefixes); ip++) {
|
|
|
|
const char *prefix = prefixes[ip];
|
|
|
|
int prefix_len = strlen(prefix);
|
|
|
|
struct ref_entry *child_entry;
|
|
|
|
int pos;
|
2019-03-07 20:29:15 +08:00
|
|
|
|
2019-03-07 20:29:16 +08:00
|
|
|
pos = search_ref_dir(dir, prefix, prefix_len);
|
|
|
|
if (pos >= 0)
|
|
|
|
continue;
|
2021-09-28 21:02:24 +08:00
|
|
|
child_entry = create_dir_entry(dir->cache, prefix, prefix_len);
|
2019-03-07 20:29:15 +08:00
|
|
|
add_entry_to_dir(dir, child_entry);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-11-09 21:34:01 +08:00
|
|
|
/*
|
|
|
|
* Read the loose references from the namespace dirname into dir
|
|
|
|
* (without recursing). dirname must end with '/'. dir must be the
|
|
|
|
* directory entry corresponding to dirname.
|
|
|
|
*/
|
2017-04-16 14:41:34 +08:00
|
|
|
static void loose_fill_ref_dir(struct ref_store *ref_store,
|
|
|
|
struct ref_dir *dir, const char *dirname)
|
2015-11-09 21:34:01 +08:00
|
|
|
{
|
2017-04-16 14:41:34 +08:00
|
|
|
struct files_ref_store *refs =
|
|
|
|
files_downcast(ref_store, REF_STORE_READ, "fill_ref_dir");
|
2015-11-09 21:34:01 +08:00
|
|
|
DIR *d;
|
|
|
|
struct dirent *de;
|
|
|
|
int dirnamelen = strlen(dirname);
|
|
|
|
struct strbuf refname;
|
|
|
|
struct strbuf path = STRBUF_INIT;
|
|
|
|
size_t path_baselen;
|
|
|
|
|
2017-03-26 10:42:23 +08:00
|
|
|
files_ref_path(refs, &path, dirname);
|
2015-11-09 21:34:01 +08:00
|
|
|
path_baselen = path.len;
|
|
|
|
|
|
|
|
d = opendir(path.buf);
|
|
|
|
if (!d) {
|
|
|
|
strbuf_release(&path);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
strbuf_init(&refname, dirnamelen + 257);
|
|
|
|
strbuf_add(&refname, dirname, dirnamelen);
|
|
|
|
|
|
|
|
while ((de = readdir(d)) != NULL) {
|
2017-05-07 06:10:24 +08:00
|
|
|
struct object_id oid;
|
2015-11-09 21:34:01 +08:00
|
|
|
struct stat st;
|
|
|
|
int flag;
|
|
|
|
|
|
|
|
if (de->d_name[0] == '.')
|
|
|
|
continue;
|
|
|
|
if (ends_with(de->d_name, ".lock"))
|
|
|
|
continue;
|
|
|
|
strbuf_addstr(&refname, de->d_name);
|
|
|
|
strbuf_addstr(&path, de->d_name);
|
|
|
|
if (stat(path.buf, &st) < 0) {
|
|
|
|
; /* silently ignore */
|
|
|
|
} else if (S_ISDIR(st.st_mode)) {
|
|
|
|
strbuf_addch(&refname, '/');
|
|
|
|
add_entry_to_dir(dir,
|
2017-04-16 14:41:33 +08:00
|
|
|
create_dir_entry(dir->cache, refname.buf,
|
2021-09-28 21:02:24 +08:00
|
|
|
refname.len));
|
2015-11-09 21:34:01 +08:00
|
|
|
} else {
|
2017-03-26 10:42:34 +08:00
|
|
|
if (!refs_resolve_ref_unsafe(&refs->base,
|
2017-02-10 04:53:52 +08:00
|
|
|
refname.buf,
|
|
|
|
RESOLVE_REF_READING,
|
2022-01-26 22:37:01 +08:00
|
|
|
&oid, &flag)) {
|
2017-05-07 06:10:24 +08:00
|
|
|
oidclr(&oid);
|
2015-11-09 21:34:01 +08:00
|
|
|
flag |= REF_ISBROKEN;
|
2017-05-07 06:10:24 +08:00
|
|
|
} else if (is_null_oid(&oid)) {
|
2015-11-09 21:34:01 +08:00
|
|
|
/*
|
|
|
|
* It is so astronomically unlikely
|
2017-11-05 16:42:09 +08:00
|
|
|
* that null_oid is the OID of an
|
2015-11-09 21:34:01 +08:00
|
|
|
* actual object that we consider its
|
|
|
|
* appearance in a loose reference
|
|
|
|
* file to be repo corruption
|
|
|
|
* (probably due to a software bug).
|
|
|
|
*/
|
|
|
|
flag |= REF_ISBROKEN;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (check_refname_format(refname.buf,
|
|
|
|
REFNAME_ALLOW_ONELEVEL)) {
|
|
|
|
if (!refname_is_safe(refname.buf))
|
|
|
|
die("loose refname is dangerous: %s", refname.buf);
|
2017-05-07 06:10:24 +08:00
|
|
|
oidclr(&oid);
|
2015-11-09 21:34:01 +08:00
|
|
|
flag |= REF_BAD_NAME | REF_ISBROKEN;
|
|
|
|
}
|
|
|
|
add_entry_to_dir(dir,
|
2017-05-22 22:17:53 +08:00
|
|
|
create_ref_entry(refname.buf, &oid, flag));
|
2015-11-09 21:34:01 +08:00
|
|
|
}
|
|
|
|
strbuf_setlen(&refname, dirnamelen);
|
|
|
|
strbuf_setlen(&path, path_baselen);
|
|
|
|
}
|
|
|
|
strbuf_release(&refname);
|
|
|
|
strbuf_release(&path);
|
|
|
|
closedir(d);
|
2017-04-16 14:41:35 +08:00
|
|
|
|
2019-03-07 20:29:15 +08:00
|
|
|
add_per_worktree_entries_to_dir(dir, dirname);
|
2015-11-09 21:34:01 +08:00
|
|
|
}
|
|
|
|
|
2017-04-16 14:41:38 +08:00
|
|
|
static struct ref_cache *get_loose_ref_cache(struct files_ref_store *refs)
|
2015-11-09 21:34:01 +08:00
|
|
|
{
|
|
|
|
if (!refs->loose) {
|
|
|
|
/*
|
|
|
|
* Mark the top-level directory complete because we
|
|
|
|
* are about to read the only subdirectory that can
|
|
|
|
* hold references:
|
|
|
|
*/
|
2017-04-16 14:41:34 +08:00
|
|
|
refs->loose = create_ref_cache(&refs->base, loose_fill_ref_dir);
|
2017-04-16 14:41:32 +08:00
|
|
|
|
|
|
|
/* We're going to fill the top level ourselves: */
|
|
|
|
refs->loose->root->flag &= ~REF_INCOMPLETE;
|
|
|
|
|
2015-11-09 21:34:01 +08:00
|
|
|
/*
|
2017-04-16 14:41:32 +08:00
|
|
|
* Add an incomplete entry for "refs/" (to be filled
|
|
|
|
* lazily):
|
2015-11-09 21:34:01 +08:00
|
|
|
*/
|
2017-04-16 14:41:32 +08:00
|
|
|
add_entry_to_dir(get_ref_dir(refs->loose->root),
|
2021-09-28 21:02:24 +08:00
|
|
|
create_dir_entry(refs->loose, "refs/", 5));
|
2015-11-09 21:34:01 +08:00
|
|
|
}
|
2017-04-16 14:41:38 +08:00
|
|
|
return refs->loose;
|
2015-11-09 21:34:01 +08:00
|
|
|
}
|
|
|
|
|
2022-03-01 17:33:54 +08:00
|
|
|
static int read_ref_internal(struct ref_store *ref_store, const char *refname,
|
|
|
|
struct object_id *oid, struct strbuf *referent,
|
|
|
|
unsigned int *type, int *failure_errno, int skip_packed_refs)
|
2015-11-09 21:34:01 +08:00
|
|
|
{
|
2016-09-05 00:08:14 +08:00
|
|
|
struct files_ref_store *refs =
|
2017-03-26 10:42:32 +08:00
|
|
|
files_downcast(ref_store, REF_STORE_READ, "read_raw_ref");
|
2016-04-08 03:03:02 +08:00
|
|
|
struct strbuf sb_contents = STRBUF_INIT;
|
|
|
|
struct strbuf sb_path = STRBUF_INIT;
|
2016-04-08 03:03:01 +08:00
|
|
|
const char *path;
|
|
|
|
const char *buf;
|
|
|
|
struct stat st;
|
|
|
|
int fd;
|
2016-04-08 03:03:02 +08:00
|
|
|
int ret = -1;
|
2016-10-07 00:48:42 +08:00
|
|
|
int remaining_retries = 3;
|
2021-10-16 17:39:10 +08:00
|
|
|
int myerr = 0;
|
2015-11-09 21:34:01 +08:00
|
|
|
|
2016-04-22 07:11:17 +08:00
|
|
|
*type = 0;
|
2016-04-08 03:03:02 +08:00
|
|
|
strbuf_reset(&sb_path);
|
2016-09-05 00:08:20 +08:00
|
|
|
|
2017-03-26 10:42:23 +08:00
|
|
|
files_ref_path(refs, &sb_path, refname);
|
2016-09-05 00:08:20 +08:00
|
|
|
|
2016-04-08 03:03:02 +08:00
|
|
|
path = sb_path.buf;
|
2015-11-09 21:34:01 +08:00
|
|
|
|
2016-04-08 03:03:01 +08:00
|
|
|
stat_ref:
|
|
|
|
/*
|
|
|
|
* We might have to loop back here to avoid a race
|
|
|
|
* condition: first we lstat() the file, then we try
|
|
|
|
* to read it as a link or as a file. But if somebody
|
|
|
|
* changes the type of the file (file <-> directory
|
|
|
|
* <-> symlink) between the lstat() and reading, then
|
|
|
|
* we don't want to report that as an error but rather
|
|
|
|
* try again starting with the lstat().
|
2016-10-07 00:48:42 +08:00
|
|
|
*
|
|
|
|
* We'll keep a count of the retries, though, just to avoid
|
|
|
|
* any confusing situation sending us into an infinite loop.
|
2016-04-08 03:03:01 +08:00
|
|
|
*/
|
2015-11-09 21:34:01 +08:00
|
|
|
|
2016-10-07 00:48:42 +08:00
|
|
|
if (remaining_retries-- <= 0)
|
|
|
|
goto out;
|
|
|
|
|
2016-04-08 03:03:01 +08:00
|
|
|
if (lstat(path, &st) < 0) {
|
2021-10-16 17:39:09 +08:00
|
|
|
int ignore_errno;
|
2021-10-16 17:39:10 +08:00
|
|
|
myerr = errno;
|
2022-03-01 17:33:54 +08:00
|
|
|
if (myerr != ENOENT || skip_packed_refs)
|
2016-04-08 03:03:02 +08:00
|
|
|
goto out;
|
2021-10-16 17:39:09 +08:00
|
|
|
if (refs_read_raw_ref(refs->packed_ref_store, refname, oid,
|
|
|
|
referent, type, &ignore_errno)) {
|
2021-10-16 17:39:10 +08:00
|
|
|
myerr = ENOENT;
|
2016-04-08 03:03:02 +08:00
|
|
|
goto out;
|
2015-11-09 21:34:01 +08:00
|
|
|
}
|
2016-04-08 03:03:02 +08:00
|
|
|
ret = 0;
|
|
|
|
goto out;
|
2015-11-09 21:34:01 +08:00
|
|
|
}
|
|
|
|
|
2016-04-08 03:03:01 +08:00
|
|
|
/* Follow "normalized" - ie "refs/.." symlinks by hand */
|
|
|
|
if (S_ISLNK(st.st_mode)) {
|
2016-04-08 03:03:02 +08:00
|
|
|
strbuf_reset(&sb_contents);
|
pass st.st_size as hint for strbuf_readlink()
When we initially added the strbuf_readlink() function in
b11b7e13f4 (Add generic 'strbuf_readlink()' helper function,
2008-12-17), the point was that we generally have a _guess_
as to the correct size based on the stat information, but we
can't necessarily trust it.
Over the years, a few callers have grown up that simply pass
in 0, even though they have the stat information. Let's have
them pass in their hint for consistency (and in theory
efficiency, since it may avoid an extra resize/syscall loop,
but neither location is probably performance critical).
Note that st.st_size is actually an off_t, so in theory we
need xsize_t() here. But none of the other callsites use it,
and since this is just a hint, it doesn't matter either way
(if we wrap we'll simply start with a too-small hint and
then eventually complain when we cannot allocate the
memory).
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2018-07-24 18:51:39 +08:00
|
|
|
if (strbuf_readlink(&sb_contents, path, st.st_size) < 0) {
|
2021-10-16 17:39:10 +08:00
|
|
|
myerr = errno;
|
|
|
|
if (myerr == ENOENT || myerr == EINVAL)
|
2015-11-09 21:34:01 +08:00
|
|
|
/* inconsistent with lstat; retry */
|
|
|
|
goto stat_ref;
|
|
|
|
else
|
2016-04-08 03:03:02 +08:00
|
|
|
goto out;
|
2015-11-09 21:34:01 +08:00
|
|
|
}
|
2016-04-08 03:03:02 +08:00
|
|
|
if (starts_with(sb_contents.buf, "refs/") &&
|
|
|
|
!check_refname_format(sb_contents.buf, 0)) {
|
2016-04-22 07:11:17 +08:00
|
|
|
strbuf_swap(&sb_contents, referent);
|
2016-04-26 09:06:23 +08:00
|
|
|
*type |= REF_ISSYMREF;
|
2016-04-08 03:03:02 +08:00
|
|
|
ret = 0;
|
|
|
|
goto out;
|
2015-11-09 21:34:01 +08:00
|
|
|
}
|
files_read_raw_ref: avoid infinite loop on broken symlinks
Our ref resolution first runs lstat() on any path we try to
look up, because we want to treat symlinks specially (by
resolving them manually and considering them symrefs). But
if the results of `readlink` do _not_ look like a ref, we
fall through to treating it like a normal file, and just
read the contents of the linked path.
Since fcb7c76 (resolve_ref_unsafe(): close race condition
reading loose refs, 2013-06-19), that "normal file" code
path will stat() the file and if we see ENOENT, will jump
back to the lstat(), thinking we've seen inconsistent
results between the two calls. But for a symbolic ref, this
isn't a race: the lstat() found the symlink, and the stat()
is looking at the path it points to. We end up in an
infinite loop calling lstat() and stat().
We can fix this by avoiding the retry-on-inconsistent jump
when we know that we found a symlink. While we're at it,
let's add a comment explaining why the symlink case gets to
this code in the first place; without that, it is not
obvious that the correct solution isn't to avoid the stat()
code path entirely.
Signed-off-by: Jeff King <peff@peff.net>
Reviewed-by: Michael Haggerty <mhagger@alum.mit.edu>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2016-10-07 03:41:08 +08:00
|
|
|
/*
|
|
|
|
* It doesn't look like a refname; fall through to just
|
|
|
|
* treating it like a non-symlink, and reading whatever it
|
|
|
|
* points to.
|
|
|
|
*/
|
2016-04-08 03:03:01 +08:00
|
|
|
}
|
2015-11-09 21:34:01 +08:00
|
|
|
|
2016-04-08 03:03:01 +08:00
|
|
|
/* Is it a directory? */
|
|
|
|
if (S_ISDIR(st.st_mode)) {
|
2021-10-16 17:39:09 +08:00
|
|
|
int ignore_errno;
|
2016-05-05 20:09:41 +08:00
|
|
|
/*
|
|
|
|
* Even though there is a directory where the loose
|
|
|
|
* ref is supposed to be, there could still be a
|
|
|
|
* packed ref:
|
|
|
|
*/
|
2022-03-01 17:33:54 +08:00
|
|
|
if (skip_packed_refs ||
|
|
|
|
refs_read_raw_ref(refs->packed_ref_store, refname, oid,
|
2021-10-16 17:39:09 +08:00
|
|
|
referent, type, &ignore_errno)) {
|
2021-10-16 17:39:10 +08:00
|
|
|
myerr = EISDIR;
|
2016-05-05 20:09:41 +08:00
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
ret = 0;
|
2016-04-08 03:03:02 +08:00
|
|
|
goto out;
|
2016-04-08 03:03:01 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Anything else, just open it and try to use it as
|
|
|
|
* a ref
|
|
|
|
*/
|
|
|
|
fd = open(path, O_RDONLY);
|
|
|
|
if (fd < 0) {
|
2021-10-16 17:39:10 +08:00
|
|
|
myerr = errno;
|
|
|
|
if (myerr == ENOENT && !S_ISLNK(st.st_mode))
|
2016-04-08 03:03:01 +08:00
|
|
|
/* inconsistent with lstat; retry */
|
|
|
|
goto stat_ref;
|
|
|
|
else
|
2016-04-08 03:03:02 +08:00
|
|
|
goto out;
|
2016-04-08 03:03:01 +08:00
|
|
|
}
|
2016-04-08 03:03:02 +08:00
|
|
|
strbuf_reset(&sb_contents);
|
|
|
|
if (strbuf_read(&sb_contents, fd, 256) < 0) {
|
2021-10-16 17:39:10 +08:00
|
|
|
myerr = errno;
|
2016-04-08 03:03:01 +08:00
|
|
|
close(fd);
|
2016-04-08 03:03:02 +08:00
|
|
|
goto out;
|
2016-04-08 03:03:01 +08:00
|
|
|
}
|
|
|
|
close(fd);
|
2016-04-08 03:03:02 +08:00
|
|
|
strbuf_rtrim(&sb_contents);
|
|
|
|
buf = sb_contents.buf;
|
2020-08-19 22:27:55 +08:00
|
|
|
|
2021-10-16 17:39:10 +08:00
|
|
|
ret = parse_loose_ref_contents(buf, oid, referent, type, &myerr);
|
2020-08-19 22:27:55 +08:00
|
|
|
|
|
|
|
out:
|
2021-10-16 17:39:10 +08:00
|
|
|
if (ret && !myerr)
|
|
|
|
BUG("returning non-zero %d, should have set myerr!", ret);
|
|
|
|
*failure_errno = myerr;
|
|
|
|
|
2020-08-19 22:27:55 +08:00
|
|
|
strbuf_release(&sb_path);
|
|
|
|
strbuf_release(&sb_contents);
|
refs API: use "failure_errno", not "errno"
Fix a logic error in refs_resolve_ref_unsafe() introduced in a recent
series of mine to abstract the refs API away from errno. See
96f6623ada0 (Merge branch 'ab/refs-errno-cleanup', 2021-11-29)for that
series.
In that series introduction of "failure_errno" to
refs_resolve_ref_unsafe came in ef18119dec8 (refs API: add a version
of refs_resolve_ref_unsafe() with "errno", 2021-10-16). There we'd set
"errno = 0" immediately before refs_read_raw_ref(), and then set
"failure_errno" to "errno" if errno was non-zero afterwards.
Then in the next commit 8b72fea7e91 (refs API: make
refs_read_raw_ref() not set errno, 2021-10-16) we started expecting
"refs_read_raw_ref()" to set "failure_errno". It would do that if
refs_read_raw_ref() failed, but it wouldn't be the same errno.
So we might set the "errno" here to any arbitrary bad value, and end
up e.g. returning NULL when we meant to return the refname from
refs_resolve_ref_unsafe(), or the other way around. Instrumenting this
code will reveal cases where refs_read_raw_ref() will fail, and
"errno" and "failure_errno" will be set to different values.
In practice I haven't found a case where this scary bug changed
anything in practice. The reason for that is that we'll not care about
the actual value of "errno" here per-se, but only whether:
1. We have an errno
2. If it's one of ENOENT, EISDIR or ENOTDIR. See the adjacent code
added in a1c1d8170db (refs_resolve_ref_unsafe: handle d/f
conflicts for writes, 2017-10-06)
I.e. if we clobber "failure_errno" with "errno", but it happened to be
one of those three, and we'll clobber it with another one of the three
we were OK.
Perhaps there are cases where the difference ended up mattering, but I
haven't found them. Instrumenting the test suite to fail if "errno"
and "failure_errno" are different shows a lot of failures, checking if
they're different *and* one is but not the other is outside that list
of three "errno" values yields no failures.
But let's fix the obvious bug. We should just stop paying attention to
"errno" in refs_resolve_ref_unsafe(). In addition let's change the
partial resetting of "errno" in files_read_raw_ref() to happen just
before the "return", to ensure that any such bug will be more easily
spotted in the future.
Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2022-01-12 20:36:46 +08:00
|
|
|
errno = 0;
|
2020-08-19 22:27:55 +08:00
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2022-03-01 17:33:54 +08:00
|
|
|
static int files_read_raw_ref(struct ref_store *ref_store, const char *refname,
|
|
|
|
struct object_id *oid, struct strbuf *referent,
|
|
|
|
unsigned int *type, int *failure_errno)
|
|
|
|
{
|
|
|
|
return read_ref_internal(ref_store, refname, oid, referent, type, failure_errno, 0);
|
|
|
|
}
|
|
|
|
|
|
|
|
static int files_read_symbolic_ref(struct ref_store *ref_store, const char *refname,
|
|
|
|
struct strbuf *referent)
|
|
|
|
{
|
|
|
|
struct object_id oid;
|
|
|
|
int failure_errno, ret;
|
|
|
|
unsigned int type;
|
|
|
|
|
|
|
|
ret = read_ref_internal(ref_store, refname, &oid, referent, &type, &failure_errno, 1);
|
|
|
|
if (ret)
|
|
|
|
return ret;
|
|
|
|
|
|
|
|
return !(type & REF_ISSYMREF);
|
|
|
|
}
|
|
|
|
|
2020-08-19 22:27:55 +08:00
|
|
|
int parse_loose_ref_contents(const char *buf, struct object_id *oid,
|
2021-10-16 17:39:10 +08:00
|
|
|
struct strbuf *referent, unsigned int *type,
|
|
|
|
int *failure_errno)
|
2020-08-19 22:27:55 +08:00
|
|
|
{
|
|
|
|
const char *p;
|
2020-01-31 03:35:46 +08:00
|
|
|
if (skip_prefix(buf, "ref:", &buf)) {
|
2015-11-09 21:34:01 +08:00
|
|
|
while (isspace(*buf))
|
|
|
|
buf++;
|
2016-04-08 03:03:01 +08:00
|
|
|
|
2016-04-22 07:11:17 +08:00
|
|
|
strbuf_reset(referent);
|
|
|
|
strbuf_addstr(referent, buf);
|
2016-04-26 09:06:23 +08:00
|
|
|
*type |= REF_ISSYMREF;
|
2020-08-19 22:27:55 +08:00
|
|
|
return 0;
|
2015-11-09 21:34:01 +08:00
|
|
|
}
|
|
|
|
|
2016-04-08 03:03:01 +08:00
|
|
|
/*
|
2020-08-19 22:27:55 +08:00
|
|
|
* FETCH_HEAD has additional data after the sha.
|
2016-04-08 03:03:01 +08:00
|
|
|
*/
|
2017-10-16 06:07:11 +08:00
|
|
|
if (parse_oid_hex(buf, oid, &p) ||
|
|
|
|
(*p != '\0' && !isspace(*p))) {
|
2016-04-26 09:06:23 +08:00
|
|
|
*type |= REF_ISBROKEN;
|
2021-10-16 17:39:10 +08:00
|
|
|
*failure_errno = EINVAL;
|
2020-08-19 22:27:55 +08:00
|
|
|
return -1;
|
2016-04-08 03:03:01 +08:00
|
|
|
}
|
2020-08-19 22:27:55 +08:00
|
|
|
return 0;
|
2015-11-09 21:34:01 +08:00
|
|
|
}
|
|
|
|
|
2016-04-24 14:11:37 +08:00
|
|
|
static void unlock_ref(struct ref_lock *lock)
|
|
|
|
{
|
2017-09-05 20:15:15 +08:00
|
|
|
rollback_lock_file(&lock->lk);
|
2016-04-24 14:11:37 +08:00
|
|
|
free(lock->ref_name);
|
|
|
|
free(lock);
|
|
|
|
}
|
|
|
|
|
refs: resolve symbolic refs first
Before committing ref updates, split symbolic ref updates into two
parts: an update to the underlying ref, and a log-only update to the
symbolic ref. This ensures that both references are locked correctly
during the transaction, including while their reflogs are updated.
Similarly, if the reference pointed to by HEAD is modified directly, add
a separate log-only update to HEAD, rather than leaving the job of
updating HEAD's reflog to commit_ref_update(). This change ensures that
HEAD is locked correctly while its reflog is being modified, as well as
being cheaper (HEAD only needs to be resolved once).
This makes use of a new function, lock_raw_ref(), which is analogous to
read_raw_ref(), but acquires a lock on the reference before reading it.
This change still has two problems:
* There are redundant read_ref_full() reference lookups.
* It is still possible to get incorrect reflogs for symbolic references
if there is a concurrent update by another process, since the old_oid
of a symref is determined before the lock on the pointed-to ref is
held.
Both problems will soon be fixed.
Signed-off-by: David Turner <dturner@twopensource.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
Signed-off-by: Michael Haggerty <mhagger@alum.mit.edu>
WIP
2016-04-25 21:56:07 +08:00
|
|
|
/*
|
|
|
|
* Lock refname, without following symrefs, and set *lock_p to point
|
|
|
|
* at a newly-allocated lock object. Fill in lock->old_oid, referent,
|
|
|
|
* and type similarly to read_raw_ref().
|
|
|
|
*
|
|
|
|
* The caller must verify that refname is a "safe" reference name (in
|
|
|
|
* the sense of refname_is_safe()) before calling this function.
|
|
|
|
*
|
|
|
|
* If the reference doesn't already exist, verify that refname doesn't
|
|
|
|
* have a D/F conflict with any existing references. extras and skip
|
2017-04-16 14:41:27 +08:00
|
|
|
* are passed to refs_verify_refname_available() for this check.
|
refs: resolve symbolic refs first
Before committing ref updates, split symbolic ref updates into two
parts: an update to the underlying ref, and a log-only update to the
symbolic ref. This ensures that both references are locked correctly
during the transaction, including while their reflogs are updated.
Similarly, if the reference pointed to by HEAD is modified directly, add
a separate log-only update to HEAD, rather than leaving the job of
updating HEAD's reflog to commit_ref_update(). This change ensures that
HEAD is locked correctly while its reflog is being modified, as well as
being cheaper (HEAD only needs to be resolved once).
This makes use of a new function, lock_raw_ref(), which is analogous to
read_raw_ref(), but acquires a lock on the reference before reading it.
This change still has two problems:
* There are redundant read_ref_full() reference lookups.
* It is still possible to get incorrect reflogs for symbolic references
if there is a concurrent update by another process, since the old_oid
of a symref is determined before the lock on the pointed-to ref is
held.
Both problems will soon be fixed.
Signed-off-by: David Turner <dturner@twopensource.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
Signed-off-by: Michael Haggerty <mhagger@alum.mit.edu>
WIP
2016-04-25 21:56:07 +08:00
|
|
|
*
|
|
|
|
* If mustexist is not set and the reference is not found or is
|
2017-11-05 16:42:09 +08:00
|
|
|
* broken, lock the reference anyway but clear old_oid.
|
refs: resolve symbolic refs first
Before committing ref updates, split symbolic ref updates into two
parts: an update to the underlying ref, and a log-only update to the
symbolic ref. This ensures that both references are locked correctly
during the transaction, including while their reflogs are updated.
Similarly, if the reference pointed to by HEAD is modified directly, add
a separate log-only update to HEAD, rather than leaving the job of
updating HEAD's reflog to commit_ref_update(). This change ensures that
HEAD is locked correctly while its reflog is being modified, as well as
being cheaper (HEAD only needs to be resolved once).
This makes use of a new function, lock_raw_ref(), which is analogous to
read_raw_ref(), but acquires a lock on the reference before reading it.
This change still has two problems:
* There are redundant read_ref_full() reference lookups.
* It is still possible to get incorrect reflogs for symbolic references
if there is a concurrent update by another process, since the old_oid
of a symref is determined before the lock on the pointed-to ref is
held.
Both problems will soon be fixed.
Signed-off-by: David Turner <dturner@twopensource.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
Signed-off-by: Michael Haggerty <mhagger@alum.mit.edu>
WIP
2016-04-25 21:56:07 +08:00
|
|
|
*
|
|
|
|
* Return 0 on success. On failure, write an error message to err and
|
|
|
|
* return TRANSACTION_NAME_CONFLICT or TRANSACTION_GENERIC_ERROR.
|
|
|
|
*
|
|
|
|
* Implementation note: This function is basically
|
|
|
|
*
|
|
|
|
* lock reference
|
|
|
|
* read_raw_ref()
|
|
|
|
*
|
|
|
|
* but it includes a lot more code to
|
|
|
|
* - Deal with possible races with other processes
|
2017-04-16 14:41:27 +08:00
|
|
|
* - Avoid calling refs_verify_refname_available() when it can be
|
refs: resolve symbolic refs first
Before committing ref updates, split symbolic ref updates into two
parts: an update to the underlying ref, and a log-only update to the
symbolic ref. This ensures that both references are locked correctly
during the transaction, including while their reflogs are updated.
Similarly, if the reference pointed to by HEAD is modified directly, add
a separate log-only update to HEAD, rather than leaving the job of
updating HEAD's reflog to commit_ref_update(). This change ensures that
HEAD is locked correctly while its reflog is being modified, as well as
being cheaper (HEAD only needs to be resolved once).
This makes use of a new function, lock_raw_ref(), which is analogous to
read_raw_ref(), but acquires a lock on the reference before reading it.
This change still has two problems:
* There are redundant read_ref_full() reference lookups.
* It is still possible to get incorrect reflogs for symbolic references
if there is a concurrent update by another process, since the old_oid
of a symref is determined before the lock on the pointed-to ref is
held.
Both problems will soon be fixed.
Signed-off-by: David Turner <dturner@twopensource.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
Signed-off-by: Michael Haggerty <mhagger@alum.mit.edu>
WIP
2016-04-25 21:56:07 +08:00
|
|
|
* avoided, namely if we were successfully able to read the ref
|
|
|
|
* - Generate informative error messages in the case of failure
|
|
|
|
*/
|
2016-09-05 00:08:31 +08:00
|
|
|
static int lock_raw_ref(struct files_ref_store *refs,
|
|
|
|
const char *refname, int mustexist,
|
refs: resolve symbolic refs first
Before committing ref updates, split symbolic ref updates into two
parts: an update to the underlying ref, and a log-only update to the
symbolic ref. This ensures that both references are locked correctly
during the transaction, including while their reflogs are updated.
Similarly, if the reference pointed to by HEAD is modified directly, add
a separate log-only update to HEAD, rather than leaving the job of
updating HEAD's reflog to commit_ref_update(). This change ensures that
HEAD is locked correctly while its reflog is being modified, as well as
being cheaper (HEAD only needs to be resolved once).
This makes use of a new function, lock_raw_ref(), which is analogous to
read_raw_ref(), but acquires a lock on the reference before reading it.
This change still has two problems:
* There are redundant read_ref_full() reference lookups.
* It is still possible to get incorrect reflogs for symbolic references
if there is a concurrent update by another process, since the old_oid
of a symref is determined before the lock on the pointed-to ref is
held.
Both problems will soon be fixed.
Signed-off-by: David Turner <dturner@twopensource.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
Signed-off-by: Michael Haggerty <mhagger@alum.mit.edu>
WIP
2016-04-25 21:56:07 +08:00
|
|
|
const struct string_list *extras,
|
|
|
|
struct ref_lock **lock_p,
|
|
|
|
struct strbuf *referent,
|
|
|
|
unsigned int *type,
|
|
|
|
struct strbuf *err)
|
|
|
|
{
|
|
|
|
struct ref_lock *lock;
|
|
|
|
struct strbuf ref_file = STRBUF_INIT;
|
|
|
|
int attempts_remaining = 3;
|
|
|
|
int ret = TRANSACTION_GENERIC_ERROR;
|
2021-08-23 19:52:40 +08:00
|
|
|
int failure_errno;
|
refs: resolve symbolic refs first
Before committing ref updates, split symbolic ref updates into two
parts: an update to the underlying ref, and a log-only update to the
symbolic ref. This ensures that both references are locked correctly
during the transaction, including while their reflogs are updated.
Similarly, if the reference pointed to by HEAD is modified directly, add
a separate log-only update to HEAD, rather than leaving the job of
updating HEAD's reflog to commit_ref_update(). This change ensures that
HEAD is locked correctly while its reflog is being modified, as well as
being cheaper (HEAD only needs to be resolved once).
This makes use of a new function, lock_raw_ref(), which is analogous to
read_raw_ref(), but acquires a lock on the reference before reading it.
This change still has two problems:
* There are redundant read_ref_full() reference lookups.
* It is still possible to get incorrect reflogs for symbolic references
if there is a concurrent update by another process, since the old_oid
of a symref is determined before the lock on the pointed-to ref is
held.
Both problems will soon be fixed.
Signed-off-by: David Turner <dturner@twopensource.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
Signed-off-by: Michael Haggerty <mhagger@alum.mit.edu>
WIP
2016-04-25 21:56:07 +08:00
|
|
|
|
|
|
|
assert(err);
|
2017-02-10 19:16:16 +08:00
|
|
|
files_assert_main_repository(refs, "lock_raw_ref");
|
2016-09-05 00:08:31 +08:00
|
|
|
|
refs: resolve symbolic refs first
Before committing ref updates, split symbolic ref updates into two
parts: an update to the underlying ref, and a log-only update to the
symbolic ref. This ensures that both references are locked correctly
during the transaction, including while their reflogs are updated.
Similarly, if the reference pointed to by HEAD is modified directly, add
a separate log-only update to HEAD, rather than leaving the job of
updating HEAD's reflog to commit_ref_update(). This change ensures that
HEAD is locked correctly while its reflog is being modified, as well as
being cheaper (HEAD only needs to be resolved once).
This makes use of a new function, lock_raw_ref(), which is analogous to
read_raw_ref(), but acquires a lock on the reference before reading it.
This change still has two problems:
* There are redundant read_ref_full() reference lookups.
* It is still possible to get incorrect reflogs for symbolic references
if there is a concurrent update by another process, since the old_oid
of a symref is determined before the lock on the pointed-to ref is
held.
Both problems will soon be fixed.
Signed-off-by: David Turner <dturner@twopensource.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
Signed-off-by: Michael Haggerty <mhagger@alum.mit.edu>
WIP
2016-04-25 21:56:07 +08:00
|
|
|
*type = 0;
|
|
|
|
|
|
|
|
/* First lock the file so it can't change out from under us. */
|
|
|
|
|
2021-03-14 00:17:22 +08:00
|
|
|
*lock_p = CALLOC_ARRAY(lock, 1);
|
refs: resolve symbolic refs first
Before committing ref updates, split symbolic ref updates into two
parts: an update to the underlying ref, and a log-only update to the
symbolic ref. This ensures that both references are locked correctly
during the transaction, including while their reflogs are updated.
Similarly, if the reference pointed to by HEAD is modified directly, add
a separate log-only update to HEAD, rather than leaving the job of
updating HEAD's reflog to commit_ref_update(). This change ensures that
HEAD is locked correctly while its reflog is being modified, as well as
being cheaper (HEAD only needs to be resolved once).
This makes use of a new function, lock_raw_ref(), which is analogous to
read_raw_ref(), but acquires a lock on the reference before reading it.
This change still has two problems:
* There are redundant read_ref_full() reference lookups.
* It is still possible to get incorrect reflogs for symbolic references
if there is a concurrent update by another process, since the old_oid
of a symref is determined before the lock on the pointed-to ref is
held.
Both problems will soon be fixed.
Signed-off-by: David Turner <dturner@twopensource.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
Signed-off-by: Michael Haggerty <mhagger@alum.mit.edu>
WIP
2016-04-25 21:56:07 +08:00
|
|
|
|
|
|
|
lock->ref_name = xstrdup(refname);
|
2017-03-26 10:42:23 +08:00
|
|
|
files_ref_path(refs, &ref_file, refname);
|
refs: resolve symbolic refs first
Before committing ref updates, split symbolic ref updates into two
parts: an update to the underlying ref, and a log-only update to the
symbolic ref. This ensures that both references are locked correctly
during the transaction, including while their reflogs are updated.
Similarly, if the reference pointed to by HEAD is modified directly, add
a separate log-only update to HEAD, rather than leaving the job of
updating HEAD's reflog to commit_ref_update(). This change ensures that
HEAD is locked correctly while its reflog is being modified, as well as
being cheaper (HEAD only needs to be resolved once).
This makes use of a new function, lock_raw_ref(), which is analogous to
read_raw_ref(), but acquires a lock on the reference before reading it.
This change still has two problems:
* There are redundant read_ref_full() reference lookups.
* It is still possible to get incorrect reflogs for symbolic references
if there is a concurrent update by another process, since the old_oid
of a symref is determined before the lock on the pointed-to ref is
held.
Both problems will soon be fixed.
Signed-off-by: David Turner <dturner@twopensource.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
Signed-off-by: Michael Haggerty <mhagger@alum.mit.edu>
WIP
2016-04-25 21:56:07 +08:00
|
|
|
|
|
|
|
retry:
|
|
|
|
switch (safe_create_leading_directories(ref_file.buf)) {
|
|
|
|
case SCLD_OK:
|
|
|
|
break; /* success */
|
|
|
|
case SCLD_EXISTS:
|
|
|
|
/*
|
|
|
|
* Suppose refname is "refs/foo/bar". We just failed
|
|
|
|
* to create the containing directory, "refs/foo",
|
|
|
|
* because there was a non-directory in the way. This
|
|
|
|
* indicates a D/F conflict, probably because of
|
|
|
|
* another reference such as "refs/foo". There is no
|
|
|
|
* reason to expect this error to be transitory.
|
|
|
|
*/
|
2017-03-26 10:42:34 +08:00
|
|
|
if (refs_verify_refname_available(&refs->base, refname,
|
2021-08-23 19:36:06 +08:00
|
|
|
extras, NULL, err)) {
|
refs: resolve symbolic refs first
Before committing ref updates, split symbolic ref updates into two
parts: an update to the underlying ref, and a log-only update to the
symbolic ref. This ensures that both references are locked correctly
during the transaction, including while their reflogs are updated.
Similarly, if the reference pointed to by HEAD is modified directly, add
a separate log-only update to HEAD, rather than leaving the job of
updating HEAD's reflog to commit_ref_update(). This change ensures that
HEAD is locked correctly while its reflog is being modified, as well as
being cheaper (HEAD only needs to be resolved once).
This makes use of a new function, lock_raw_ref(), which is analogous to
read_raw_ref(), but acquires a lock on the reference before reading it.
This change still has two problems:
* There are redundant read_ref_full() reference lookups.
* It is still possible to get incorrect reflogs for symbolic references
if there is a concurrent update by another process, since the old_oid
of a symref is determined before the lock on the pointed-to ref is
held.
Both problems will soon be fixed.
Signed-off-by: David Turner <dturner@twopensource.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
Signed-off-by: Michael Haggerty <mhagger@alum.mit.edu>
WIP
2016-04-25 21:56:07 +08:00
|
|
|
if (mustexist) {
|
|
|
|
/*
|
|
|
|
* To the user the relevant error is
|
|
|
|
* that the "mustexist" reference is
|
|
|
|
* missing:
|
|
|
|
*/
|
|
|
|
strbuf_reset(err);
|
|
|
|
strbuf_addf(err, "unable to resolve reference '%s'",
|
|
|
|
refname);
|
|
|
|
} else {
|
|
|
|
/*
|
|
|
|
* The error message set by
|
2017-04-16 14:41:27 +08:00
|
|
|
* refs_verify_refname_available() is
|
|
|
|
* OK.
|
refs: resolve symbolic refs first
Before committing ref updates, split symbolic ref updates into two
parts: an update to the underlying ref, and a log-only update to the
symbolic ref. This ensures that both references are locked correctly
during the transaction, including while their reflogs are updated.
Similarly, if the reference pointed to by HEAD is modified directly, add
a separate log-only update to HEAD, rather than leaving the job of
updating HEAD's reflog to commit_ref_update(). This change ensures that
HEAD is locked correctly while its reflog is being modified, as well as
being cheaper (HEAD only needs to be resolved once).
This makes use of a new function, lock_raw_ref(), which is analogous to
read_raw_ref(), but acquires a lock on the reference before reading it.
This change still has two problems:
* There are redundant read_ref_full() reference lookups.
* It is still possible to get incorrect reflogs for symbolic references
if there is a concurrent update by another process, since the old_oid
of a symref is determined before the lock on the pointed-to ref is
held.
Both problems will soon be fixed.
Signed-off-by: David Turner <dturner@twopensource.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
Signed-off-by: Michael Haggerty <mhagger@alum.mit.edu>
WIP
2016-04-25 21:56:07 +08:00
|
|
|
*/
|
|
|
|
ret = TRANSACTION_NAME_CONFLICT;
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
/*
|
|
|
|
* The file that is in the way isn't a loose
|
|
|
|
* reference. Report it as a low-level
|
|
|
|
* failure.
|
|
|
|
*/
|
|
|
|
strbuf_addf(err, "unable to create lock file %s.lock; "
|
|
|
|
"non-directory in the way",
|
|
|
|
ref_file.buf);
|
|
|
|
}
|
|
|
|
goto error_return;
|
|
|
|
case SCLD_VANISHED:
|
|
|
|
/* Maybe another process was tidying up. Try again. */
|
|
|
|
if (--attempts_remaining > 0)
|
|
|
|
goto retry;
|
|
|
|
/* fall through */
|
|
|
|
default:
|
|
|
|
strbuf_addf(err, "unable to create directory for %s",
|
|
|
|
ref_file.buf);
|
|
|
|
goto error_return;
|
|
|
|
}
|
|
|
|
|
2017-08-21 19:51:34 +08:00
|
|
|
if (hold_lock_file_for_update_timeout(
|
2017-09-05 20:15:15 +08:00
|
|
|
&lock->lk, ref_file.buf, LOCK_NO_DEREF,
|
2017-08-21 19:51:34 +08:00
|
|
|
get_files_ref_lock_timeout_ms()) < 0) {
|
2021-08-23 19:52:40 +08:00
|
|
|
int myerr = errno;
|
|
|
|
errno = 0;
|
|
|
|
if (myerr == ENOENT && --attempts_remaining > 0) {
|
refs: resolve symbolic refs first
Before committing ref updates, split symbolic ref updates into two
parts: an update to the underlying ref, and a log-only update to the
symbolic ref. This ensures that both references are locked correctly
during the transaction, including while their reflogs are updated.
Similarly, if the reference pointed to by HEAD is modified directly, add
a separate log-only update to HEAD, rather than leaving the job of
updating HEAD's reflog to commit_ref_update(). This change ensures that
HEAD is locked correctly while its reflog is being modified, as well as
being cheaper (HEAD only needs to be resolved once).
This makes use of a new function, lock_raw_ref(), which is analogous to
read_raw_ref(), but acquires a lock on the reference before reading it.
This change still has two problems:
* There are redundant read_ref_full() reference lookups.
* It is still possible to get incorrect reflogs for symbolic references
if there is a concurrent update by another process, since the old_oid
of a symref is determined before the lock on the pointed-to ref is
held.
Both problems will soon be fixed.
Signed-off-by: David Turner <dturner@twopensource.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
Signed-off-by: Michael Haggerty <mhagger@alum.mit.edu>
WIP
2016-04-25 21:56:07 +08:00
|
|
|
/*
|
|
|
|
* Maybe somebody just deleted one of the
|
|
|
|
* directories leading to ref_file. Try
|
|
|
|
* again:
|
|
|
|
*/
|
|
|
|
goto retry;
|
|
|
|
} else {
|
2021-08-23 19:52:40 +08:00
|
|
|
unable_to_lock_message(ref_file.buf, myerr, err);
|
refs: resolve symbolic refs first
Before committing ref updates, split symbolic ref updates into two
parts: an update to the underlying ref, and a log-only update to the
symbolic ref. This ensures that both references are locked correctly
during the transaction, including while their reflogs are updated.
Similarly, if the reference pointed to by HEAD is modified directly, add
a separate log-only update to HEAD, rather than leaving the job of
updating HEAD's reflog to commit_ref_update(). This change ensures that
HEAD is locked correctly while its reflog is being modified, as well as
being cheaper (HEAD only needs to be resolved once).
This makes use of a new function, lock_raw_ref(), which is analogous to
read_raw_ref(), but acquires a lock on the reference before reading it.
This change still has two problems:
* There are redundant read_ref_full() reference lookups.
* It is still possible to get incorrect reflogs for symbolic references
if there is a concurrent update by another process, since the old_oid
of a symref is determined before the lock on the pointed-to ref is
held.
Both problems will soon be fixed.
Signed-off-by: David Turner <dturner@twopensource.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
Signed-off-by: Michael Haggerty <mhagger@alum.mit.edu>
WIP
2016-04-25 21:56:07 +08:00
|
|
|
goto error_return;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Now we hold the lock and can read the reference without
|
|
|
|
* fear that its value will change.
|
|
|
|
*/
|
|
|
|
|
2021-08-23 19:52:40 +08:00
|
|
|
if (files_read_raw_ref(&refs->base, refname, &lock->old_oid, referent,
|
|
|
|
type, &failure_errno)) {
|
|
|
|
if (failure_errno == ENOENT) {
|
refs: resolve symbolic refs first
Before committing ref updates, split symbolic ref updates into two
parts: an update to the underlying ref, and a log-only update to the
symbolic ref. This ensures that both references are locked correctly
during the transaction, including while their reflogs are updated.
Similarly, if the reference pointed to by HEAD is modified directly, add
a separate log-only update to HEAD, rather than leaving the job of
updating HEAD's reflog to commit_ref_update(). This change ensures that
HEAD is locked correctly while its reflog is being modified, as well as
being cheaper (HEAD only needs to be resolved once).
This makes use of a new function, lock_raw_ref(), which is analogous to
read_raw_ref(), but acquires a lock on the reference before reading it.
This change still has two problems:
* There are redundant read_ref_full() reference lookups.
* It is still possible to get incorrect reflogs for symbolic references
if there is a concurrent update by another process, since the old_oid
of a symref is determined before the lock on the pointed-to ref is
held.
Both problems will soon be fixed.
Signed-off-by: David Turner <dturner@twopensource.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
Signed-off-by: Michael Haggerty <mhagger@alum.mit.edu>
WIP
2016-04-25 21:56:07 +08:00
|
|
|
if (mustexist) {
|
|
|
|
/* Garden variety missing reference. */
|
|
|
|
strbuf_addf(err, "unable to resolve reference '%s'",
|
|
|
|
refname);
|
|
|
|
goto error_return;
|
|
|
|
} else {
|
|
|
|
/*
|
|
|
|
* Reference is missing, but that's OK. We
|
|
|
|
* know that there is not a conflict with
|
|
|
|
* another loose reference because
|
|
|
|
* (supposing that we are trying to lock
|
|
|
|
* reference "refs/foo/bar"):
|
|
|
|
*
|
|
|
|
* - We were successfully able to create
|
|
|
|
* the lockfile refs/foo/bar.lock, so we
|
|
|
|
* know there cannot be a loose reference
|
|
|
|
* named "refs/foo".
|
|
|
|
*
|
|
|
|
* - We got ENOENT and not EISDIR, so we
|
|
|
|
* know that there cannot be a loose
|
|
|
|
* reference named "refs/foo/bar/baz".
|
|
|
|
*/
|
|
|
|
}
|
2021-08-23 19:52:40 +08:00
|
|
|
} else if (failure_errno == EISDIR) {
|
refs: resolve symbolic refs first
Before committing ref updates, split symbolic ref updates into two
parts: an update to the underlying ref, and a log-only update to the
symbolic ref. This ensures that both references are locked correctly
during the transaction, including while their reflogs are updated.
Similarly, if the reference pointed to by HEAD is modified directly, add
a separate log-only update to HEAD, rather than leaving the job of
updating HEAD's reflog to commit_ref_update(). This change ensures that
HEAD is locked correctly while its reflog is being modified, as well as
being cheaper (HEAD only needs to be resolved once).
This makes use of a new function, lock_raw_ref(), which is analogous to
read_raw_ref(), but acquires a lock on the reference before reading it.
This change still has two problems:
* There are redundant read_ref_full() reference lookups.
* It is still possible to get incorrect reflogs for symbolic references
if there is a concurrent update by another process, since the old_oid
of a symref is determined before the lock on the pointed-to ref is
held.
Both problems will soon be fixed.
Signed-off-by: David Turner <dturner@twopensource.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
Signed-off-by: Michael Haggerty <mhagger@alum.mit.edu>
WIP
2016-04-25 21:56:07 +08:00
|
|
|
/*
|
|
|
|
* There is a directory in the way. It might have
|
|
|
|
* contained references that have been deleted. If
|
|
|
|
* we don't require that the reference already
|
|
|
|
* exists, try to remove the directory so that it
|
|
|
|
* doesn't cause trouble when we want to rename the
|
|
|
|
* lockfile into place later.
|
|
|
|
*/
|
|
|
|
if (mustexist) {
|
|
|
|
/* Garden variety missing reference. */
|
|
|
|
strbuf_addf(err, "unable to resolve reference '%s'",
|
|
|
|
refname);
|
|
|
|
goto error_return;
|
|
|
|
} else if (remove_dir_recursively(&ref_file,
|
|
|
|
REMOVE_DIR_EMPTY_ONLY)) {
|
2017-04-16 14:41:26 +08:00
|
|
|
if (refs_verify_refname_available(
|
|
|
|
&refs->base, refname,
|
2021-08-23 19:36:06 +08:00
|
|
|
extras, NULL, err)) {
|
refs: resolve symbolic refs first
Before committing ref updates, split symbolic ref updates into two
parts: an update to the underlying ref, and a log-only update to the
symbolic ref. This ensures that both references are locked correctly
during the transaction, including while their reflogs are updated.
Similarly, if the reference pointed to by HEAD is modified directly, add
a separate log-only update to HEAD, rather than leaving the job of
updating HEAD's reflog to commit_ref_update(). This change ensures that
HEAD is locked correctly while its reflog is being modified, as well as
being cheaper (HEAD only needs to be resolved once).
This makes use of a new function, lock_raw_ref(), which is analogous to
read_raw_ref(), but acquires a lock on the reference before reading it.
This change still has two problems:
* There are redundant read_ref_full() reference lookups.
* It is still possible to get incorrect reflogs for symbolic references
if there is a concurrent update by another process, since the old_oid
of a symref is determined before the lock on the pointed-to ref is
held.
Both problems will soon be fixed.
Signed-off-by: David Turner <dturner@twopensource.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
Signed-off-by: Michael Haggerty <mhagger@alum.mit.edu>
WIP
2016-04-25 21:56:07 +08:00
|
|
|
/*
|
|
|
|
* The error message set by
|
|
|
|
* verify_refname_available() is OK.
|
|
|
|
*/
|
|
|
|
ret = TRANSACTION_NAME_CONFLICT;
|
|
|
|
goto error_return;
|
|
|
|
} else {
|
|
|
|
/*
|
|
|
|
* We can't delete the directory,
|
|
|
|
* but we also don't know of any
|
|
|
|
* references that it should
|
|
|
|
* contain.
|
|
|
|
*/
|
|
|
|
strbuf_addf(err, "there is a non-empty directory '%s' "
|
|
|
|
"blocking reference '%s'",
|
|
|
|
ref_file.buf, refname);
|
|
|
|
goto error_return;
|
|
|
|
}
|
|
|
|
}
|
2021-08-23 19:52:40 +08:00
|
|
|
} else if (failure_errno == EINVAL && (*type & REF_ISBROKEN)) {
|
refs: resolve symbolic refs first
Before committing ref updates, split symbolic ref updates into two
parts: an update to the underlying ref, and a log-only update to the
symbolic ref. This ensures that both references are locked correctly
during the transaction, including while their reflogs are updated.
Similarly, if the reference pointed to by HEAD is modified directly, add
a separate log-only update to HEAD, rather than leaving the job of
updating HEAD's reflog to commit_ref_update(). This change ensures that
HEAD is locked correctly while its reflog is being modified, as well as
being cheaper (HEAD only needs to be resolved once).
This makes use of a new function, lock_raw_ref(), which is analogous to
read_raw_ref(), but acquires a lock on the reference before reading it.
This change still has two problems:
* There are redundant read_ref_full() reference lookups.
* It is still possible to get incorrect reflogs for symbolic references
if there is a concurrent update by another process, since the old_oid
of a symref is determined before the lock on the pointed-to ref is
held.
Both problems will soon be fixed.
Signed-off-by: David Turner <dturner@twopensource.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
Signed-off-by: Michael Haggerty <mhagger@alum.mit.edu>
WIP
2016-04-25 21:56:07 +08:00
|
|
|
strbuf_addf(err, "unable to resolve reference '%s': "
|
|
|
|
"reference broken", refname);
|
|
|
|
goto error_return;
|
|
|
|
} else {
|
|
|
|
strbuf_addf(err, "unable to resolve reference '%s': %s",
|
2021-08-23 19:52:40 +08:00
|
|
|
refname, strerror(failure_errno));
|
refs: resolve symbolic refs first
Before committing ref updates, split symbolic ref updates into two
parts: an update to the underlying ref, and a log-only update to the
symbolic ref. This ensures that both references are locked correctly
during the transaction, including while their reflogs are updated.
Similarly, if the reference pointed to by HEAD is modified directly, add
a separate log-only update to HEAD, rather than leaving the job of
updating HEAD's reflog to commit_ref_update(). This change ensures that
HEAD is locked correctly while its reflog is being modified, as well as
being cheaper (HEAD only needs to be resolved once).
This makes use of a new function, lock_raw_ref(), which is analogous to
read_raw_ref(), but acquires a lock on the reference before reading it.
This change still has two problems:
* There are redundant read_ref_full() reference lookups.
* It is still possible to get incorrect reflogs for symbolic references
if there is a concurrent update by another process, since the old_oid
of a symref is determined before the lock on the pointed-to ref is
held.
Both problems will soon be fixed.
Signed-off-by: David Turner <dturner@twopensource.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
Signed-off-by: Michael Haggerty <mhagger@alum.mit.edu>
WIP
2016-04-25 21:56:07 +08:00
|
|
|
goto error_return;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* If the ref did not exist and we are creating it,
|
files-backend: cheapen refname_available check when locking refs
When locking references in preparation for updating them, we need to
check that none of the newly added references D/F conflict with
existing references (e.g., we don't allow `refs/foo` to be added if
`refs/foo/bar` already exists, or vice versa).
Prior to 524a9fdb51 (refs_verify_refname_available(): use function in
more places, 2017-04-16), conflicts with existing loose references
were checked by looking directly in the filesystem, and then conflicts
with existing packed references were checked by running
`verify_refname_available_dir()` against the packed-refs cache.
But that commit changed the final check to call
`refs_verify_refname_available()` against the *whole* files ref-store,
including both loose and packed references, with the following
comment:
> This means that those callsites now check for conflicts with all
> references rather than just packed refs, but the performance cost
> shouldn't be significant (and will be regained later).
That comment turned out to be too sanguine. User s@kazlauskas.me
reported that fetches involving a very large number of references in
neighboring directories were slowed down by that change.
The problem is that when fetching, each reference is updated
individually, within its own reference transaction. This is done
because some reference updates might succeed even though others fail.
But every time a reference update transaction is finished,
`clear_loose_ref_cache()` is called. So when it is time to update the
next reference, part of the loose ref cache has to be repopulated for
the `refs_verify_refname_available()` call. If the references are all
in neighboring directories, then the cost of repopulating the
reference cache increases with the number of references, resulting in
O(N²) effort.
The comment above also claims that the performance cost "will be
regained later". The idea was that once the packed-refs were finished
being split out into a separate ref-store, we could limit the
`refs_verify_refname_available()` call to the packed references again.
That is what we do now.
Signed-off-by: Michael Haggerty <mhagger@alum.mit.edu>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-08-17 23:12:50 +08:00
|
|
|
* make sure there is no existing packed ref that
|
|
|
|
* conflicts with refname:
|
refs: resolve symbolic refs first
Before committing ref updates, split symbolic ref updates into two
parts: an update to the underlying ref, and a log-only update to the
symbolic ref. This ensures that both references are locked correctly
during the transaction, including while their reflogs are updated.
Similarly, if the reference pointed to by HEAD is modified directly, add
a separate log-only update to HEAD, rather than leaving the job of
updating HEAD's reflog to commit_ref_update(). This change ensures that
HEAD is locked correctly while its reflog is being modified, as well as
being cheaper (HEAD only needs to be resolved once).
This makes use of a new function, lock_raw_ref(), which is analogous to
read_raw_ref(), but acquires a lock on the reference before reading it.
This change still has two problems:
* There are redundant read_ref_full() reference lookups.
* It is still possible to get incorrect reflogs for symbolic references
if there is a concurrent update by another process, since the old_oid
of a symref is determined before the lock on the pointed-to ref is
held.
Both problems will soon be fixed.
Signed-off-by: David Turner <dturner@twopensource.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
Signed-off-by: Michael Haggerty <mhagger@alum.mit.edu>
WIP
2016-04-25 21:56:07 +08:00
|
|
|
*/
|
2017-04-16 14:41:27 +08:00
|
|
|
if (refs_verify_refname_available(
|
files-backend: cheapen refname_available check when locking refs
When locking references in preparation for updating them, we need to
check that none of the newly added references D/F conflict with
existing references (e.g., we don't allow `refs/foo` to be added if
`refs/foo/bar` already exists, or vice versa).
Prior to 524a9fdb51 (refs_verify_refname_available(): use function in
more places, 2017-04-16), conflicts with existing loose references
were checked by looking directly in the filesystem, and then conflicts
with existing packed references were checked by running
`verify_refname_available_dir()` against the packed-refs cache.
But that commit changed the final check to call
`refs_verify_refname_available()` against the *whole* files ref-store,
including both loose and packed references, with the following
comment:
> This means that those callsites now check for conflicts with all
> references rather than just packed refs, but the performance cost
> shouldn't be significant (and will be regained later).
That comment turned out to be too sanguine. User s@kazlauskas.me
reported that fetches involving a very large number of references in
neighboring directories were slowed down by that change.
The problem is that when fetching, each reference is updated
individually, within its own reference transaction. This is done
because some reference updates might succeed even though others fail.
But every time a reference update transaction is finished,
`clear_loose_ref_cache()` is called. So when it is time to update the
next reference, part of the loose ref cache has to be repopulated for
the `refs_verify_refname_available()` call. If the references are all
in neighboring directories, then the cost of repopulating the
reference cache increases with the number of references, resulting in
O(N²) effort.
The comment above also claims that the performance cost "will be
regained later". The idea was that once the packed-refs were finished
being split out into a separate ref-store, we could limit the
`refs_verify_refname_available()` call to the packed references again.
That is what we do now.
Signed-off-by: Michael Haggerty <mhagger@alum.mit.edu>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-08-17 23:12:50 +08:00
|
|
|
refs->packed_ref_store, refname,
|
2021-08-23 19:36:06 +08:00
|
|
|
extras, NULL, err))
|
refs: resolve symbolic refs first
Before committing ref updates, split symbolic ref updates into two
parts: an update to the underlying ref, and a log-only update to the
symbolic ref. This ensures that both references are locked correctly
during the transaction, including while their reflogs are updated.
Similarly, if the reference pointed to by HEAD is modified directly, add
a separate log-only update to HEAD, rather than leaving the job of
updating HEAD's reflog to commit_ref_update(). This change ensures that
HEAD is locked correctly while its reflog is being modified, as well as
being cheaper (HEAD only needs to be resolved once).
This makes use of a new function, lock_raw_ref(), which is analogous to
read_raw_ref(), but acquires a lock on the reference before reading it.
This change still has two problems:
* There are redundant read_ref_full() reference lookups.
* It is still possible to get incorrect reflogs for symbolic references
if there is a concurrent update by another process, since the old_oid
of a symref is determined before the lock on the pointed-to ref is
held.
Both problems will soon be fixed.
Signed-off-by: David Turner <dturner@twopensource.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
Signed-off-by: Michael Haggerty <mhagger@alum.mit.edu>
WIP
2016-04-25 21:56:07 +08:00
|
|
|
goto error_return;
|
|
|
|
}
|
|
|
|
|
|
|
|
ret = 0;
|
|
|
|
goto out;
|
|
|
|
|
|
|
|
error_return:
|
|
|
|
unlock_ref(lock);
|
|
|
|
*lock_p = NULL;
|
|
|
|
|
|
|
|
out:
|
|
|
|
strbuf_release(&ref_file);
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
refs: introduce an iterator interface
Currently, the API for iterating over references is via a family of
for_each_ref()-type functions that invoke a callback function for each
selected reference. All of these eventually call do_for_each_ref(),
which knows how to do one thing: iterate in parallel through two
ref_caches, one for loose and one for packed refs, giving loose
references precedence over packed refs. This is rather complicated code,
and is quite specialized to the files backend. It also requires callers
to encapsulate their work into a callback function, which often means
that they have to define and use a "cb_data" struct to manage their
context.
The current design is already bursting at the seams, and will become
even more awkward in the upcoming world of multiple reference storage
backends:
* Per-worktree vs. shared references are currently handled via a kludge
in git_path() rather than iterating over each part of the reference
namespace separately and merging the results. This kludge will cease
to work when we have multiple reference storage backends.
* The current scheme is inflexible. What if we sometimes want to bypass
the ref_cache, or use it only for packed or only for loose refs? What
if we want to store symbolic refs in one type of storage backend and
non-symbolic ones in another?
In the future, each reference backend will need to define its own way of
iterating over references. The crux of the problem with the current
design is that it is impossible to compose for_each_ref()-style
iterations, because the flow of control is owned by the for_each_ref()
function. There is nothing that a caller can do but iterate through all
references in a single burst, so there is no way for it to interleave
references from multiple backends and present the result to the rest of
the world as a single compound backend.
This commit introduces a new iteration primitive for references: a
ref_iterator. A ref_iterator is a polymorphic object that a reference
storage backend can be asked to instantiate. There are three functions
that can be applied to a ref_iterator:
* ref_iterator_advance(): move to the next reference in the iteration
* ref_iterator_abort(): end the iteration before it is exhausted
* ref_iterator_peel(): peel the reference currently being looked at
Iterating using a ref_iterator leaves the flow of control in the hands
of the caller, which means that ref_iterators from multiple
sources (e.g., loose and packed refs) can be composed and presented to
the world as a single compound ref_iterator.
It also means that the backend code for implementing reference iteration
will sometimes be more complicated. For example, the
cache_ref_iterator (which iterates over a ref_cache) can't use the C
stack to recurse; instead, it must manage its own stack internally as
explicit data structures. There is also a lot of boilerplate connected
with object-oriented programming in C.
Eventually, end-user callers will be able to be written in a more
natural way—managing their own flow of control rather than having to
work via callbacks. Since there will only be a few reference backends
but there are many consumers of this API, this is a good tradeoff.
More importantly, we gain composability, and especially the possibility
of writing interchangeable parts that can work with any ref_iterator.
For example, merge_ref_iterator implements a generic way of merging the
contents of any two ref_iterators. It is used to merge loose + packed
refs as part of the implementation of the files_ref_iterator. But it
will also be possible to use it to merge other pairs of reference
sources (e.g., per-worktree vs. shared refs).
Another example is prefix_ref_iterator, which can be used to trim a
prefix off the front of reference names before presenting them to the
caller (e.g., "refs/heads/master" -> "master").
In this patch, we introduce the iterator abstraction and many utilities,
and implement a reference iterator for the files ref storage backend.
(I've written several other obvious utilities, for example a generic way
to filter references being iterated over. These will probably be useful
in the future. But they are not needed for this patch series, so I am
not including them at this time.)
In a moment we will rewrite do_for_each_ref() to work via reference
iterators (allowing some special-purpose code to be discarded), and do
something similar for reflogs. In future patch series, we will expose
the ref_iterator abstraction in the public refs API so that callers can
use it directly.
Implementation note: I tried abstracting this a layer further to allow
generic iterators (over arbitrary types of objects) and generic
utilities like a generic merge_iterator. But the implementation in C was
very cumbersome, involving (in my opinion) too much boilerplate and too
much unsafe casting, some of which would have had to be done on the
caller side. However, I did put a few iterator-related constants in a
top-level header file, iterator.h, as they will be useful in a moment to
implement iteration over directory trees and possibly other types of
iterators in the future.
Signed-off-by: Ramsay Jones <ramsay@ramsayjones.plus.com>
Signed-off-by: Michael Haggerty <mhagger@alum.mit.edu>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2016-06-18 12:15:15 +08:00
|
|
|
struct files_ref_iterator {
|
|
|
|
struct ref_iterator base;
|
|
|
|
|
|
|
|
struct ref_iterator *iter0;
|
2021-10-09 05:08:15 +08:00
|
|
|
struct repository *repo;
|
refs: introduce an iterator interface
Currently, the API for iterating over references is via a family of
for_each_ref()-type functions that invoke a callback function for each
selected reference. All of these eventually call do_for_each_ref(),
which knows how to do one thing: iterate in parallel through two
ref_caches, one for loose and one for packed refs, giving loose
references precedence over packed refs. This is rather complicated code,
and is quite specialized to the files backend. It also requires callers
to encapsulate their work into a callback function, which often means
that they have to define and use a "cb_data" struct to manage their
context.
The current design is already bursting at the seams, and will become
even more awkward in the upcoming world of multiple reference storage
backends:
* Per-worktree vs. shared references are currently handled via a kludge
in git_path() rather than iterating over each part of the reference
namespace separately and merging the results. This kludge will cease
to work when we have multiple reference storage backends.
* The current scheme is inflexible. What if we sometimes want to bypass
the ref_cache, or use it only for packed or only for loose refs? What
if we want to store symbolic refs in one type of storage backend and
non-symbolic ones in another?
In the future, each reference backend will need to define its own way of
iterating over references. The crux of the problem with the current
design is that it is impossible to compose for_each_ref()-style
iterations, because the flow of control is owned by the for_each_ref()
function. There is nothing that a caller can do but iterate through all
references in a single burst, so there is no way for it to interleave
references from multiple backends and present the result to the rest of
the world as a single compound backend.
This commit introduces a new iteration primitive for references: a
ref_iterator. A ref_iterator is a polymorphic object that a reference
storage backend can be asked to instantiate. There are three functions
that can be applied to a ref_iterator:
* ref_iterator_advance(): move to the next reference in the iteration
* ref_iterator_abort(): end the iteration before it is exhausted
* ref_iterator_peel(): peel the reference currently being looked at
Iterating using a ref_iterator leaves the flow of control in the hands
of the caller, which means that ref_iterators from multiple
sources (e.g., loose and packed refs) can be composed and presented to
the world as a single compound ref_iterator.
It also means that the backend code for implementing reference iteration
will sometimes be more complicated. For example, the
cache_ref_iterator (which iterates over a ref_cache) can't use the C
stack to recurse; instead, it must manage its own stack internally as
explicit data structures. There is also a lot of boilerplate connected
with object-oriented programming in C.
Eventually, end-user callers will be able to be written in a more
natural way—managing their own flow of control rather than having to
work via callbacks. Since there will only be a few reference backends
but there are many consumers of this API, this is a good tradeoff.
More importantly, we gain composability, and especially the possibility
of writing interchangeable parts that can work with any ref_iterator.
For example, merge_ref_iterator implements a generic way of merging the
contents of any two ref_iterators. It is used to merge loose + packed
refs as part of the implementation of the files_ref_iterator. But it
will also be possible to use it to merge other pairs of reference
sources (e.g., per-worktree vs. shared refs).
Another example is prefix_ref_iterator, which can be used to trim a
prefix off the front of reference names before presenting them to the
caller (e.g., "refs/heads/master" -> "master").
In this patch, we introduce the iterator abstraction and many utilities,
and implement a reference iterator for the files ref storage backend.
(I've written several other obvious utilities, for example a generic way
to filter references being iterated over. These will probably be useful
in the future. But they are not needed for this patch series, so I am
not including them at this time.)
In a moment we will rewrite do_for_each_ref() to work via reference
iterators (allowing some special-purpose code to be discarded), and do
something similar for reflogs. In future patch series, we will expose
the ref_iterator abstraction in the public refs API so that callers can
use it directly.
Implementation note: I tried abstracting this a layer further to allow
generic iterators (over arbitrary types of objects) and generic
utilities like a generic merge_iterator. But the implementation in C was
very cumbersome, involving (in my opinion) too much boilerplate and too
much unsafe casting, some of which would have had to be done on the
caller side. However, I did put a few iterator-related constants in a
top-level header file, iterator.h, as they will be useful in a moment to
implement iteration over directory trees and possibly other types of
iterators in the future.
Signed-off-by: Ramsay Jones <ramsay@ramsayjones.plus.com>
Signed-off-by: Michael Haggerty <mhagger@alum.mit.edu>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2016-06-18 12:15:15 +08:00
|
|
|
unsigned int flags;
|
|
|
|
};
|
2015-11-09 21:34:01 +08:00
|
|
|
|
refs: introduce an iterator interface
Currently, the API for iterating over references is via a family of
for_each_ref()-type functions that invoke a callback function for each
selected reference. All of these eventually call do_for_each_ref(),
which knows how to do one thing: iterate in parallel through two
ref_caches, one for loose and one for packed refs, giving loose
references precedence over packed refs. This is rather complicated code,
and is quite specialized to the files backend. It also requires callers
to encapsulate their work into a callback function, which often means
that they have to define and use a "cb_data" struct to manage their
context.
The current design is already bursting at the seams, and will become
even more awkward in the upcoming world of multiple reference storage
backends:
* Per-worktree vs. shared references are currently handled via a kludge
in git_path() rather than iterating over each part of the reference
namespace separately and merging the results. This kludge will cease
to work when we have multiple reference storage backends.
* The current scheme is inflexible. What if we sometimes want to bypass
the ref_cache, or use it only for packed or only for loose refs? What
if we want to store symbolic refs in one type of storage backend and
non-symbolic ones in another?
In the future, each reference backend will need to define its own way of
iterating over references. The crux of the problem with the current
design is that it is impossible to compose for_each_ref()-style
iterations, because the flow of control is owned by the for_each_ref()
function. There is nothing that a caller can do but iterate through all
references in a single burst, so there is no way for it to interleave
references from multiple backends and present the result to the rest of
the world as a single compound backend.
This commit introduces a new iteration primitive for references: a
ref_iterator. A ref_iterator is a polymorphic object that a reference
storage backend can be asked to instantiate. There are three functions
that can be applied to a ref_iterator:
* ref_iterator_advance(): move to the next reference in the iteration
* ref_iterator_abort(): end the iteration before it is exhausted
* ref_iterator_peel(): peel the reference currently being looked at
Iterating using a ref_iterator leaves the flow of control in the hands
of the caller, which means that ref_iterators from multiple
sources (e.g., loose and packed refs) can be composed and presented to
the world as a single compound ref_iterator.
It also means that the backend code for implementing reference iteration
will sometimes be more complicated. For example, the
cache_ref_iterator (which iterates over a ref_cache) can't use the C
stack to recurse; instead, it must manage its own stack internally as
explicit data structures. There is also a lot of boilerplate connected
with object-oriented programming in C.
Eventually, end-user callers will be able to be written in a more
natural way—managing their own flow of control rather than having to
work via callbacks. Since there will only be a few reference backends
but there are many consumers of this API, this is a good tradeoff.
More importantly, we gain composability, and especially the possibility
of writing interchangeable parts that can work with any ref_iterator.
For example, merge_ref_iterator implements a generic way of merging the
contents of any two ref_iterators. It is used to merge loose + packed
refs as part of the implementation of the files_ref_iterator. But it
will also be possible to use it to merge other pairs of reference
sources (e.g., per-worktree vs. shared refs).
Another example is prefix_ref_iterator, which can be used to trim a
prefix off the front of reference names before presenting them to the
caller (e.g., "refs/heads/master" -> "master").
In this patch, we introduce the iterator abstraction and many utilities,
and implement a reference iterator for the files ref storage backend.
(I've written several other obvious utilities, for example a generic way
to filter references being iterated over. These will probably be useful
in the future. But they are not needed for this patch series, so I am
not including them at this time.)
In a moment we will rewrite do_for_each_ref() to work via reference
iterators (allowing some special-purpose code to be discarded), and do
something similar for reflogs. In future patch series, we will expose
the ref_iterator abstraction in the public refs API so that callers can
use it directly.
Implementation note: I tried abstracting this a layer further to allow
generic iterators (over arbitrary types of objects) and generic
utilities like a generic merge_iterator. But the implementation in C was
very cumbersome, involving (in my opinion) too much boilerplate and too
much unsafe casting, some of which would have had to be done on the
caller side. However, I did put a few iterator-related constants in a
top-level header file, iterator.h, as they will be useful in a moment to
implement iteration over directory trees and possibly other types of
iterators in the future.
Signed-off-by: Ramsay Jones <ramsay@ramsayjones.plus.com>
Signed-off-by: Michael Haggerty <mhagger@alum.mit.edu>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2016-06-18 12:15:15 +08:00
|
|
|
static int files_ref_iterator_advance(struct ref_iterator *ref_iterator)
|
|
|
|
{
|
|
|
|
struct files_ref_iterator *iter =
|
|
|
|
(struct files_ref_iterator *)ref_iterator;
|
|
|
|
int ok;
|
2015-11-09 21:34:01 +08:00
|
|
|
|
refs: introduce an iterator interface
Currently, the API for iterating over references is via a family of
for_each_ref()-type functions that invoke a callback function for each
selected reference. All of these eventually call do_for_each_ref(),
which knows how to do one thing: iterate in parallel through two
ref_caches, one for loose and one for packed refs, giving loose
references precedence over packed refs. This is rather complicated code,
and is quite specialized to the files backend. It also requires callers
to encapsulate their work into a callback function, which often means
that they have to define and use a "cb_data" struct to manage their
context.
The current design is already bursting at the seams, and will become
even more awkward in the upcoming world of multiple reference storage
backends:
* Per-worktree vs. shared references are currently handled via a kludge
in git_path() rather than iterating over each part of the reference
namespace separately and merging the results. This kludge will cease
to work when we have multiple reference storage backends.
* The current scheme is inflexible. What if we sometimes want to bypass
the ref_cache, or use it only for packed or only for loose refs? What
if we want to store symbolic refs in one type of storage backend and
non-symbolic ones in another?
In the future, each reference backend will need to define its own way of
iterating over references. The crux of the problem with the current
design is that it is impossible to compose for_each_ref()-style
iterations, because the flow of control is owned by the for_each_ref()
function. There is nothing that a caller can do but iterate through all
references in a single burst, so there is no way for it to interleave
references from multiple backends and present the result to the rest of
the world as a single compound backend.
This commit introduces a new iteration primitive for references: a
ref_iterator. A ref_iterator is a polymorphic object that a reference
storage backend can be asked to instantiate. There are three functions
that can be applied to a ref_iterator:
* ref_iterator_advance(): move to the next reference in the iteration
* ref_iterator_abort(): end the iteration before it is exhausted
* ref_iterator_peel(): peel the reference currently being looked at
Iterating using a ref_iterator leaves the flow of control in the hands
of the caller, which means that ref_iterators from multiple
sources (e.g., loose and packed refs) can be composed and presented to
the world as a single compound ref_iterator.
It also means that the backend code for implementing reference iteration
will sometimes be more complicated. For example, the
cache_ref_iterator (which iterates over a ref_cache) can't use the C
stack to recurse; instead, it must manage its own stack internally as
explicit data structures. There is also a lot of boilerplate connected
with object-oriented programming in C.
Eventually, end-user callers will be able to be written in a more
natural way—managing their own flow of control rather than having to
work via callbacks. Since there will only be a few reference backends
but there are many consumers of this API, this is a good tradeoff.
More importantly, we gain composability, and especially the possibility
of writing interchangeable parts that can work with any ref_iterator.
For example, merge_ref_iterator implements a generic way of merging the
contents of any two ref_iterators. It is used to merge loose + packed
refs as part of the implementation of the files_ref_iterator. But it
will also be possible to use it to merge other pairs of reference
sources (e.g., per-worktree vs. shared refs).
Another example is prefix_ref_iterator, which can be used to trim a
prefix off the front of reference names before presenting them to the
caller (e.g., "refs/heads/master" -> "master").
In this patch, we introduce the iterator abstraction and many utilities,
and implement a reference iterator for the files ref storage backend.
(I've written several other obvious utilities, for example a generic way
to filter references being iterated over. These will probably be useful
in the future. But they are not needed for this patch series, so I am
not including them at this time.)
In a moment we will rewrite do_for_each_ref() to work via reference
iterators (allowing some special-purpose code to be discarded), and do
something similar for reflogs. In future patch series, we will expose
the ref_iterator abstraction in the public refs API so that callers can
use it directly.
Implementation note: I tried abstracting this a layer further to allow
generic iterators (over arbitrary types of objects) and generic
utilities like a generic merge_iterator. But the implementation in C was
very cumbersome, involving (in my opinion) too much boilerplate and too
much unsafe casting, some of which would have had to be done on the
caller side. However, I did put a few iterator-related constants in a
top-level header file, iterator.h, as they will be useful in a moment to
implement iteration over directory trees and possibly other types of
iterators in the future.
Signed-off-by: Ramsay Jones <ramsay@ramsayjones.plus.com>
Signed-off-by: Michael Haggerty <mhagger@alum.mit.edu>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2016-06-18 12:15:15 +08:00
|
|
|
while ((ok = ref_iterator_advance(iter->iter0)) == ITER_OK) {
|
2016-09-05 00:08:44 +08:00
|
|
|
if (iter->flags & DO_FOR_EACH_PER_WORKTREE_ONLY &&
|
2022-09-20 00:34:50 +08:00
|
|
|
parse_worktree_ref(iter->iter0->refname, NULL, NULL,
|
|
|
|
NULL) != REF_WORKTREE_CURRENT)
|
2016-09-05 00:08:44 +08:00
|
|
|
continue;
|
|
|
|
|
refs: add DO_FOR_EACH_OMIT_DANGLING_SYMREFS flag
When the DO_FOR_EACH_INCLUDE_BROKEN flag is used, we include both actual
corrupt refs (illegal names, missing objects), but also symrefs that
point to nothing. This latter is not really a corruption, but just
something that may happen normally. For example, the symref at
refs/remotes/origin/HEAD may point to a tracking branch which is later
deleted. (The local HEAD may also be unborn, of course, but we do not
access it through ref iteration).
Most callers of for_each_ref() etc, do not care. They don't pass
INCLUDE_BROKEN, so don't see it at all. But for those which do pass it,
this somewhat-normal state causes extra warnings (e.g., from
for-each-ref) or even aborts operations (destructive repacks with
GIT_REF_PARANOIA set).
This patch just introduces the flag and the mechanism; there are no
callers yet (and hence no tests). Two things to note on the
implementation:
- we actually skip any symref that does not resolve to a ref. This
includes ones which point to an invalidly-named ref. You could argue
this is a more serious breakage than simple dangling. But the
overall effect is the same (we could not follow the symref), as well
as the impact on things like REF_PARANOIA (either way, a symref we
can't follow won't impact reachability, because we'll see the ref
itself during iteration). The underlying resolution function doesn't
distinguish these two cases (they both get REF_ISBROKEN).
- we change the iterator in refs/files-backend.c where we check
INCLUDE_BROKEN. There's a matching spot in refs/packed-backend.c,
but we don't know need to do anything there. The packed backend does
not support symrefs at all.
The resulting set of flags might be a bit easier to follow if we broke
this down into "INCLUDE_CORRUPT_REFS" and "INCLUDE_DANGLING_SYMREFS".
But there are a few reasons not do so:
- adding a new OMIT_DANGLING_SYMREFS flag lets us leave existing
callers intact, without changing their behavior (and some of them
really do want to see the dangling symrefs; e.g., t5505 has a test
which expects us to report when a symref becomes dangling)
- they're not actually independent. You cannot say "include dangling
symrefs" without also including refs whose objects are not
reachable, because dangling symrefs by definition do not have an
object. We could tweak the implementation to distinguish this, but
in practice nobody wants to ask for that. Adding the OMIT flag keeps
the implementation simple and makes sure we don't regress the
current behavior.
Signed-off-by: Jeff King <peff@peff.net>
Reviewed-by: Jonathan Tan <jonathantanmy@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2021-09-25 02:41:32 +08:00
|
|
|
if ((iter->flags & DO_FOR_EACH_OMIT_DANGLING_SYMREFS) &&
|
|
|
|
(iter->iter0->flags & REF_ISSYMREF) &&
|
|
|
|
(iter->iter0->flags & REF_ISBROKEN))
|
|
|
|
continue;
|
|
|
|
|
refs: introduce an iterator interface
Currently, the API for iterating over references is via a family of
for_each_ref()-type functions that invoke a callback function for each
selected reference. All of these eventually call do_for_each_ref(),
which knows how to do one thing: iterate in parallel through two
ref_caches, one for loose and one for packed refs, giving loose
references precedence over packed refs. This is rather complicated code,
and is quite specialized to the files backend. It also requires callers
to encapsulate their work into a callback function, which often means
that they have to define and use a "cb_data" struct to manage their
context.
The current design is already bursting at the seams, and will become
even more awkward in the upcoming world of multiple reference storage
backends:
* Per-worktree vs. shared references are currently handled via a kludge
in git_path() rather than iterating over each part of the reference
namespace separately and merging the results. This kludge will cease
to work when we have multiple reference storage backends.
* The current scheme is inflexible. What if we sometimes want to bypass
the ref_cache, or use it only for packed or only for loose refs? What
if we want to store symbolic refs in one type of storage backend and
non-symbolic ones in another?
In the future, each reference backend will need to define its own way of
iterating over references. The crux of the problem with the current
design is that it is impossible to compose for_each_ref()-style
iterations, because the flow of control is owned by the for_each_ref()
function. There is nothing that a caller can do but iterate through all
references in a single burst, so there is no way for it to interleave
references from multiple backends and present the result to the rest of
the world as a single compound backend.
This commit introduces a new iteration primitive for references: a
ref_iterator. A ref_iterator is a polymorphic object that a reference
storage backend can be asked to instantiate. There are three functions
that can be applied to a ref_iterator:
* ref_iterator_advance(): move to the next reference in the iteration
* ref_iterator_abort(): end the iteration before it is exhausted
* ref_iterator_peel(): peel the reference currently being looked at
Iterating using a ref_iterator leaves the flow of control in the hands
of the caller, which means that ref_iterators from multiple
sources (e.g., loose and packed refs) can be composed and presented to
the world as a single compound ref_iterator.
It also means that the backend code for implementing reference iteration
will sometimes be more complicated. For example, the
cache_ref_iterator (which iterates over a ref_cache) can't use the C
stack to recurse; instead, it must manage its own stack internally as
explicit data structures. There is also a lot of boilerplate connected
with object-oriented programming in C.
Eventually, end-user callers will be able to be written in a more
natural way—managing their own flow of control rather than having to
work via callbacks. Since there will only be a few reference backends
but there are many consumers of this API, this is a good tradeoff.
More importantly, we gain composability, and especially the possibility
of writing interchangeable parts that can work with any ref_iterator.
For example, merge_ref_iterator implements a generic way of merging the
contents of any two ref_iterators. It is used to merge loose + packed
refs as part of the implementation of the files_ref_iterator. But it
will also be possible to use it to merge other pairs of reference
sources (e.g., per-worktree vs. shared refs).
Another example is prefix_ref_iterator, which can be used to trim a
prefix off the front of reference names before presenting them to the
caller (e.g., "refs/heads/master" -> "master").
In this patch, we introduce the iterator abstraction and many utilities,
and implement a reference iterator for the files ref storage backend.
(I've written several other obvious utilities, for example a generic way
to filter references being iterated over. These will probably be useful
in the future. But they are not needed for this patch series, so I am
not including them at this time.)
In a moment we will rewrite do_for_each_ref() to work via reference
iterators (allowing some special-purpose code to be discarded), and do
something similar for reflogs. In future patch series, we will expose
the ref_iterator abstraction in the public refs API so that callers can
use it directly.
Implementation note: I tried abstracting this a layer further to allow
generic iterators (over arbitrary types of objects) and generic
utilities like a generic merge_iterator. But the implementation in C was
very cumbersome, involving (in my opinion) too much boilerplate and too
much unsafe casting, some of which would have had to be done on the
caller side. However, I did put a few iterator-related constants in a
top-level header file, iterator.h, as they will be useful in a moment to
implement iteration over directory trees and possibly other types of
iterators in the future.
Signed-off-by: Ramsay Jones <ramsay@ramsayjones.plus.com>
Signed-off-by: Michael Haggerty <mhagger@alum.mit.edu>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2016-06-18 12:15:15 +08:00
|
|
|
if (!(iter->flags & DO_FOR_EACH_INCLUDE_BROKEN) &&
|
|
|
|
!ref_resolves_to_object(iter->iter0->refname,
|
2021-10-09 05:08:15 +08:00
|
|
|
iter->repo,
|
refs: introduce an iterator interface
Currently, the API for iterating over references is via a family of
for_each_ref()-type functions that invoke a callback function for each
selected reference. All of these eventually call do_for_each_ref(),
which knows how to do one thing: iterate in parallel through two
ref_caches, one for loose and one for packed refs, giving loose
references precedence over packed refs. This is rather complicated code,
and is quite specialized to the files backend. It also requires callers
to encapsulate their work into a callback function, which often means
that they have to define and use a "cb_data" struct to manage their
context.
The current design is already bursting at the seams, and will become
even more awkward in the upcoming world of multiple reference storage
backends:
* Per-worktree vs. shared references are currently handled via a kludge
in git_path() rather than iterating over each part of the reference
namespace separately and merging the results. This kludge will cease
to work when we have multiple reference storage backends.
* The current scheme is inflexible. What if we sometimes want to bypass
the ref_cache, or use it only for packed or only for loose refs? What
if we want to store symbolic refs in one type of storage backend and
non-symbolic ones in another?
In the future, each reference backend will need to define its own way of
iterating over references. The crux of the problem with the current
design is that it is impossible to compose for_each_ref()-style
iterations, because the flow of control is owned by the for_each_ref()
function. There is nothing that a caller can do but iterate through all
references in a single burst, so there is no way for it to interleave
references from multiple backends and present the result to the rest of
the world as a single compound backend.
This commit introduces a new iteration primitive for references: a
ref_iterator. A ref_iterator is a polymorphic object that a reference
storage backend can be asked to instantiate. There are three functions
that can be applied to a ref_iterator:
* ref_iterator_advance(): move to the next reference in the iteration
* ref_iterator_abort(): end the iteration before it is exhausted
* ref_iterator_peel(): peel the reference currently being looked at
Iterating using a ref_iterator leaves the flow of control in the hands
of the caller, which means that ref_iterators from multiple
sources (e.g., loose and packed refs) can be composed and presented to
the world as a single compound ref_iterator.
It also means that the backend code for implementing reference iteration
will sometimes be more complicated. For example, the
cache_ref_iterator (which iterates over a ref_cache) can't use the C
stack to recurse; instead, it must manage its own stack internally as
explicit data structures. There is also a lot of boilerplate connected
with object-oriented programming in C.
Eventually, end-user callers will be able to be written in a more
natural way—managing their own flow of control rather than having to
work via callbacks. Since there will only be a few reference backends
but there are many consumers of this API, this is a good tradeoff.
More importantly, we gain composability, and especially the possibility
of writing interchangeable parts that can work with any ref_iterator.
For example, merge_ref_iterator implements a generic way of merging the
contents of any two ref_iterators. It is used to merge loose + packed
refs as part of the implementation of the files_ref_iterator. But it
will also be possible to use it to merge other pairs of reference
sources (e.g., per-worktree vs. shared refs).
Another example is prefix_ref_iterator, which can be used to trim a
prefix off the front of reference names before presenting them to the
caller (e.g., "refs/heads/master" -> "master").
In this patch, we introduce the iterator abstraction and many utilities,
and implement a reference iterator for the files ref storage backend.
(I've written several other obvious utilities, for example a generic way
to filter references being iterated over. These will probably be useful
in the future. But they are not needed for this patch series, so I am
not including them at this time.)
In a moment we will rewrite do_for_each_ref() to work via reference
iterators (allowing some special-purpose code to be discarded), and do
something similar for reflogs. In future patch series, we will expose
the ref_iterator abstraction in the public refs API so that callers can
use it directly.
Implementation note: I tried abstracting this a layer further to allow
generic iterators (over arbitrary types of objects) and generic
utilities like a generic merge_iterator. But the implementation in C was
very cumbersome, involving (in my opinion) too much boilerplate and too
much unsafe casting, some of which would have had to be done on the
caller side. However, I did put a few iterator-related constants in a
top-level header file, iterator.h, as they will be useful in a moment to
implement iteration over directory trees and possibly other types of
iterators in the future.
Signed-off-by: Ramsay Jones <ramsay@ramsayjones.plus.com>
Signed-off-by: Michael Haggerty <mhagger@alum.mit.edu>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2016-06-18 12:15:15 +08:00
|
|
|
iter->iter0->oid,
|
|
|
|
iter->iter0->flags))
|
|
|
|
continue;
|
|
|
|
|
|
|
|
iter->base.refname = iter->iter0->refname;
|
|
|
|
iter->base.oid = iter->iter0->oid;
|
|
|
|
iter->base.flags = iter->iter0->flags;
|
|
|
|
return ITER_OK;
|
2015-11-09 21:34:01 +08:00
|
|
|
}
|
|
|
|
|
refs: introduce an iterator interface
Currently, the API for iterating over references is via a family of
for_each_ref()-type functions that invoke a callback function for each
selected reference. All of these eventually call do_for_each_ref(),
which knows how to do one thing: iterate in parallel through two
ref_caches, one for loose and one for packed refs, giving loose
references precedence over packed refs. This is rather complicated code,
and is quite specialized to the files backend. It also requires callers
to encapsulate their work into a callback function, which often means
that they have to define and use a "cb_data" struct to manage their
context.
The current design is already bursting at the seams, and will become
even more awkward in the upcoming world of multiple reference storage
backends:
* Per-worktree vs. shared references are currently handled via a kludge
in git_path() rather than iterating over each part of the reference
namespace separately and merging the results. This kludge will cease
to work when we have multiple reference storage backends.
* The current scheme is inflexible. What if we sometimes want to bypass
the ref_cache, or use it only for packed or only for loose refs? What
if we want to store symbolic refs in one type of storage backend and
non-symbolic ones in another?
In the future, each reference backend will need to define its own way of
iterating over references. The crux of the problem with the current
design is that it is impossible to compose for_each_ref()-style
iterations, because the flow of control is owned by the for_each_ref()
function. There is nothing that a caller can do but iterate through all
references in a single burst, so there is no way for it to interleave
references from multiple backends and present the result to the rest of
the world as a single compound backend.
This commit introduces a new iteration primitive for references: a
ref_iterator. A ref_iterator is a polymorphic object that a reference
storage backend can be asked to instantiate. There are three functions
that can be applied to a ref_iterator:
* ref_iterator_advance(): move to the next reference in the iteration
* ref_iterator_abort(): end the iteration before it is exhausted
* ref_iterator_peel(): peel the reference currently being looked at
Iterating using a ref_iterator leaves the flow of control in the hands
of the caller, which means that ref_iterators from multiple
sources (e.g., loose and packed refs) can be composed and presented to
the world as a single compound ref_iterator.
It also means that the backend code for implementing reference iteration
will sometimes be more complicated. For example, the
cache_ref_iterator (which iterates over a ref_cache) can't use the C
stack to recurse; instead, it must manage its own stack internally as
explicit data structures. There is also a lot of boilerplate connected
with object-oriented programming in C.
Eventually, end-user callers will be able to be written in a more
natural way—managing their own flow of control rather than having to
work via callbacks. Since there will only be a few reference backends
but there are many consumers of this API, this is a good tradeoff.
More importantly, we gain composability, and especially the possibility
of writing interchangeable parts that can work with any ref_iterator.
For example, merge_ref_iterator implements a generic way of merging the
contents of any two ref_iterators. It is used to merge loose + packed
refs as part of the implementation of the files_ref_iterator. But it
will also be possible to use it to merge other pairs of reference
sources (e.g., per-worktree vs. shared refs).
Another example is prefix_ref_iterator, which can be used to trim a
prefix off the front of reference names before presenting them to the
caller (e.g., "refs/heads/master" -> "master").
In this patch, we introduce the iterator abstraction and many utilities,
and implement a reference iterator for the files ref storage backend.
(I've written several other obvious utilities, for example a generic way
to filter references being iterated over. These will probably be useful
in the future. But they are not needed for this patch series, so I am
not including them at this time.)
In a moment we will rewrite do_for_each_ref() to work via reference
iterators (allowing some special-purpose code to be discarded), and do
something similar for reflogs. In future patch series, we will expose
the ref_iterator abstraction in the public refs API so that callers can
use it directly.
Implementation note: I tried abstracting this a layer further to allow
generic iterators (over arbitrary types of objects) and generic
utilities like a generic merge_iterator. But the implementation in C was
very cumbersome, involving (in my opinion) too much boilerplate and too
much unsafe casting, some of which would have had to be done on the
caller side. However, I did put a few iterator-related constants in a
top-level header file, iterator.h, as they will be useful in a moment to
implement iteration over directory trees and possibly other types of
iterators in the future.
Signed-off-by: Ramsay Jones <ramsay@ramsayjones.plus.com>
Signed-off-by: Michael Haggerty <mhagger@alum.mit.edu>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2016-06-18 12:15:15 +08:00
|
|
|
iter->iter0 = NULL;
|
|
|
|
if (ref_iterator_abort(ref_iterator) != ITER_DONE)
|
|
|
|
ok = ITER_ERROR;
|
|
|
|
|
|
|
|
return ok;
|
2015-11-09 21:34:01 +08:00
|
|
|
}
|
|
|
|
|
refs: introduce an iterator interface
Currently, the API for iterating over references is via a family of
for_each_ref()-type functions that invoke a callback function for each
selected reference. All of these eventually call do_for_each_ref(),
which knows how to do one thing: iterate in parallel through two
ref_caches, one for loose and one for packed refs, giving loose
references precedence over packed refs. This is rather complicated code,
and is quite specialized to the files backend. It also requires callers
to encapsulate their work into a callback function, which often means
that they have to define and use a "cb_data" struct to manage their
context.
The current design is already bursting at the seams, and will become
even more awkward in the upcoming world of multiple reference storage
backends:
* Per-worktree vs. shared references are currently handled via a kludge
in git_path() rather than iterating over each part of the reference
namespace separately and merging the results. This kludge will cease
to work when we have multiple reference storage backends.
* The current scheme is inflexible. What if we sometimes want to bypass
the ref_cache, or use it only for packed or only for loose refs? What
if we want to store symbolic refs in one type of storage backend and
non-symbolic ones in another?
In the future, each reference backend will need to define its own way of
iterating over references. The crux of the problem with the current
design is that it is impossible to compose for_each_ref()-style
iterations, because the flow of control is owned by the for_each_ref()
function. There is nothing that a caller can do but iterate through all
references in a single burst, so there is no way for it to interleave
references from multiple backends and present the result to the rest of
the world as a single compound backend.
This commit introduces a new iteration primitive for references: a
ref_iterator. A ref_iterator is a polymorphic object that a reference
storage backend can be asked to instantiate. There are three functions
that can be applied to a ref_iterator:
* ref_iterator_advance(): move to the next reference in the iteration
* ref_iterator_abort(): end the iteration before it is exhausted
* ref_iterator_peel(): peel the reference currently being looked at
Iterating using a ref_iterator leaves the flow of control in the hands
of the caller, which means that ref_iterators from multiple
sources (e.g., loose and packed refs) can be composed and presented to
the world as a single compound ref_iterator.
It also means that the backend code for implementing reference iteration
will sometimes be more complicated. For example, the
cache_ref_iterator (which iterates over a ref_cache) can't use the C
stack to recurse; instead, it must manage its own stack internally as
explicit data structures. There is also a lot of boilerplate connected
with object-oriented programming in C.
Eventually, end-user callers will be able to be written in a more
natural way—managing their own flow of control rather than having to
work via callbacks. Since there will only be a few reference backends
but there are many consumers of this API, this is a good tradeoff.
More importantly, we gain composability, and especially the possibility
of writing interchangeable parts that can work with any ref_iterator.
For example, merge_ref_iterator implements a generic way of merging the
contents of any two ref_iterators. It is used to merge loose + packed
refs as part of the implementation of the files_ref_iterator. But it
will also be possible to use it to merge other pairs of reference
sources (e.g., per-worktree vs. shared refs).
Another example is prefix_ref_iterator, which can be used to trim a
prefix off the front of reference names before presenting them to the
caller (e.g., "refs/heads/master" -> "master").
In this patch, we introduce the iterator abstraction and many utilities,
and implement a reference iterator for the files ref storage backend.
(I've written several other obvious utilities, for example a generic way
to filter references being iterated over. These will probably be useful
in the future. But they are not needed for this patch series, so I am
not including them at this time.)
In a moment we will rewrite do_for_each_ref() to work via reference
iterators (allowing some special-purpose code to be discarded), and do
something similar for reflogs. In future patch series, we will expose
the ref_iterator abstraction in the public refs API so that callers can
use it directly.
Implementation note: I tried abstracting this a layer further to allow
generic iterators (over arbitrary types of objects) and generic
utilities like a generic merge_iterator. But the implementation in C was
very cumbersome, involving (in my opinion) too much boilerplate and too
much unsafe casting, some of which would have had to be done on the
caller side. However, I did put a few iterator-related constants in a
top-level header file, iterator.h, as they will be useful in a moment to
implement iteration over directory trees and possibly other types of
iterators in the future.
Signed-off-by: Ramsay Jones <ramsay@ramsayjones.plus.com>
Signed-off-by: Michael Haggerty <mhagger@alum.mit.edu>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2016-06-18 12:15:15 +08:00
|
|
|
static int files_ref_iterator_peel(struct ref_iterator *ref_iterator,
|
|
|
|
struct object_id *peeled)
|
2015-11-09 21:34:01 +08:00
|
|
|
{
|
refs: introduce an iterator interface
Currently, the API for iterating over references is via a family of
for_each_ref()-type functions that invoke a callback function for each
selected reference. All of these eventually call do_for_each_ref(),
which knows how to do one thing: iterate in parallel through two
ref_caches, one for loose and one for packed refs, giving loose
references precedence over packed refs. This is rather complicated code,
and is quite specialized to the files backend. It also requires callers
to encapsulate their work into a callback function, which often means
that they have to define and use a "cb_data" struct to manage their
context.
The current design is already bursting at the seams, and will become
even more awkward in the upcoming world of multiple reference storage
backends:
* Per-worktree vs. shared references are currently handled via a kludge
in git_path() rather than iterating over each part of the reference
namespace separately and merging the results. This kludge will cease
to work when we have multiple reference storage backends.
* The current scheme is inflexible. What if we sometimes want to bypass
the ref_cache, or use it only for packed or only for loose refs? What
if we want to store symbolic refs in one type of storage backend and
non-symbolic ones in another?
In the future, each reference backend will need to define its own way of
iterating over references. The crux of the problem with the current
design is that it is impossible to compose for_each_ref()-style
iterations, because the flow of control is owned by the for_each_ref()
function. There is nothing that a caller can do but iterate through all
references in a single burst, so there is no way for it to interleave
references from multiple backends and present the result to the rest of
the world as a single compound backend.
This commit introduces a new iteration primitive for references: a
ref_iterator. A ref_iterator is a polymorphic object that a reference
storage backend can be asked to instantiate. There are three functions
that can be applied to a ref_iterator:
* ref_iterator_advance(): move to the next reference in the iteration
* ref_iterator_abort(): end the iteration before it is exhausted
* ref_iterator_peel(): peel the reference currently being looked at
Iterating using a ref_iterator leaves the flow of control in the hands
of the caller, which means that ref_iterators from multiple
sources (e.g., loose and packed refs) can be composed and presented to
the world as a single compound ref_iterator.
It also means that the backend code for implementing reference iteration
will sometimes be more complicated. For example, the
cache_ref_iterator (which iterates over a ref_cache) can't use the C
stack to recurse; instead, it must manage its own stack internally as
explicit data structures. There is also a lot of boilerplate connected
with object-oriented programming in C.
Eventually, end-user callers will be able to be written in a more
natural way—managing their own flow of control rather than having to
work via callbacks. Since there will only be a few reference backends
but there are many consumers of this API, this is a good tradeoff.
More importantly, we gain composability, and especially the possibility
of writing interchangeable parts that can work with any ref_iterator.
For example, merge_ref_iterator implements a generic way of merging the
contents of any two ref_iterators. It is used to merge loose + packed
refs as part of the implementation of the files_ref_iterator. But it
will also be possible to use it to merge other pairs of reference
sources (e.g., per-worktree vs. shared refs).
Another example is prefix_ref_iterator, which can be used to trim a
prefix off the front of reference names before presenting them to the
caller (e.g., "refs/heads/master" -> "master").
In this patch, we introduce the iterator abstraction and many utilities,
and implement a reference iterator for the files ref storage backend.
(I've written several other obvious utilities, for example a generic way
to filter references being iterated over. These will probably be useful
in the future. But they are not needed for this patch series, so I am
not including them at this time.)
In a moment we will rewrite do_for_each_ref() to work via reference
iterators (allowing some special-purpose code to be discarded), and do
something similar for reflogs. In future patch series, we will expose
the ref_iterator abstraction in the public refs API so that callers can
use it directly.
Implementation note: I tried abstracting this a layer further to allow
generic iterators (over arbitrary types of objects) and generic
utilities like a generic merge_iterator. But the implementation in C was
very cumbersome, involving (in my opinion) too much boilerplate and too
much unsafe casting, some of which would have had to be done on the
caller side. However, I did put a few iterator-related constants in a
top-level header file, iterator.h, as they will be useful in a moment to
implement iteration over directory trees and possibly other types of
iterators in the future.
Signed-off-by: Ramsay Jones <ramsay@ramsayjones.plus.com>
Signed-off-by: Michael Haggerty <mhagger@alum.mit.edu>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2016-06-18 12:15:15 +08:00
|
|
|
struct files_ref_iterator *iter =
|
|
|
|
(struct files_ref_iterator *)ref_iterator;
|
2016-04-08 03:02:49 +08:00
|
|
|
|
refs: introduce an iterator interface
Currently, the API for iterating over references is via a family of
for_each_ref()-type functions that invoke a callback function for each
selected reference. All of these eventually call do_for_each_ref(),
which knows how to do one thing: iterate in parallel through two
ref_caches, one for loose and one for packed refs, giving loose
references precedence over packed refs. This is rather complicated code,
and is quite specialized to the files backend. It also requires callers
to encapsulate their work into a callback function, which often means
that they have to define and use a "cb_data" struct to manage their
context.
The current design is already bursting at the seams, and will become
even more awkward in the upcoming world of multiple reference storage
backends:
* Per-worktree vs. shared references are currently handled via a kludge
in git_path() rather than iterating over each part of the reference
namespace separately and merging the results. This kludge will cease
to work when we have multiple reference storage backends.
* The current scheme is inflexible. What if we sometimes want to bypass
the ref_cache, or use it only for packed or only for loose refs? What
if we want to store symbolic refs in one type of storage backend and
non-symbolic ones in another?
In the future, each reference backend will need to define its own way of
iterating over references. The crux of the problem with the current
design is that it is impossible to compose for_each_ref()-style
iterations, because the flow of control is owned by the for_each_ref()
function. There is nothing that a caller can do but iterate through all
references in a single burst, so there is no way for it to interleave
references from multiple backends and present the result to the rest of
the world as a single compound backend.
This commit introduces a new iteration primitive for references: a
ref_iterator. A ref_iterator is a polymorphic object that a reference
storage backend can be asked to instantiate. There are three functions
that can be applied to a ref_iterator:
* ref_iterator_advance(): move to the next reference in the iteration
* ref_iterator_abort(): end the iteration before it is exhausted
* ref_iterator_peel(): peel the reference currently being looked at
Iterating using a ref_iterator leaves the flow of control in the hands
of the caller, which means that ref_iterators from multiple
sources (e.g., loose and packed refs) can be composed and presented to
the world as a single compound ref_iterator.
It also means that the backend code for implementing reference iteration
will sometimes be more complicated. For example, the
cache_ref_iterator (which iterates over a ref_cache) can't use the C
stack to recurse; instead, it must manage its own stack internally as
explicit data structures. There is also a lot of boilerplate connected
with object-oriented programming in C.
Eventually, end-user callers will be able to be written in a more
natural way—managing their own flow of control rather than having to
work via callbacks. Since there will only be a few reference backends
but there are many consumers of this API, this is a good tradeoff.
More importantly, we gain composability, and especially the possibility
of writing interchangeable parts that can work with any ref_iterator.
For example, merge_ref_iterator implements a generic way of merging the
contents of any two ref_iterators. It is used to merge loose + packed
refs as part of the implementation of the files_ref_iterator. But it
will also be possible to use it to merge other pairs of reference
sources (e.g., per-worktree vs. shared refs).
Another example is prefix_ref_iterator, which can be used to trim a
prefix off the front of reference names before presenting them to the
caller (e.g., "refs/heads/master" -> "master").
In this patch, we introduce the iterator abstraction and many utilities,
and implement a reference iterator for the files ref storage backend.
(I've written several other obvious utilities, for example a generic way
to filter references being iterated over. These will probably be useful
in the future. But they are not needed for this patch series, so I am
not including them at this time.)
In a moment we will rewrite do_for_each_ref() to work via reference
iterators (allowing some special-purpose code to be discarded), and do
something similar for reflogs. In future patch series, we will expose
the ref_iterator abstraction in the public refs API so that callers can
use it directly.
Implementation note: I tried abstracting this a layer further to allow
generic iterators (over arbitrary types of objects) and generic
utilities like a generic merge_iterator. But the implementation in C was
very cumbersome, involving (in my opinion) too much boilerplate and too
much unsafe casting, some of which would have had to be done on the
caller side. However, I did put a few iterator-related constants in a
top-level header file, iterator.h, as they will be useful in a moment to
implement iteration over directory trees and possibly other types of
iterators in the future.
Signed-off-by: Ramsay Jones <ramsay@ramsayjones.plus.com>
Signed-off-by: Michael Haggerty <mhagger@alum.mit.edu>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2016-06-18 12:15:15 +08:00
|
|
|
return ref_iterator_peel(iter->iter0, peeled);
|
|
|
|
}
|
|
|
|
|
|
|
|
static int files_ref_iterator_abort(struct ref_iterator *ref_iterator)
|
|
|
|
{
|
|
|
|
struct files_ref_iterator *iter =
|
|
|
|
(struct files_ref_iterator *)ref_iterator;
|
|
|
|
int ok = ITER_DONE;
|
|
|
|
|
|
|
|
if (iter->iter0)
|
|
|
|
ok = ref_iterator_abort(iter->iter0);
|
|
|
|
|
|
|
|
base_ref_iterator_free(ref_iterator);
|
|
|
|
return ok;
|
|
|
|
}
|
|
|
|
|
|
|
|
static struct ref_iterator_vtable files_ref_iterator_vtable = {
|
2022-03-18 01:27:16 +08:00
|
|
|
.advance = files_ref_iterator_advance,
|
|
|
|
.peel = files_ref_iterator_peel,
|
|
|
|
.abort = files_ref_iterator_abort,
|
refs: introduce an iterator interface
Currently, the API for iterating over references is via a family of
for_each_ref()-type functions that invoke a callback function for each
selected reference. All of these eventually call do_for_each_ref(),
which knows how to do one thing: iterate in parallel through two
ref_caches, one for loose and one for packed refs, giving loose
references precedence over packed refs. This is rather complicated code,
and is quite specialized to the files backend. It also requires callers
to encapsulate their work into a callback function, which often means
that they have to define and use a "cb_data" struct to manage their
context.
The current design is already bursting at the seams, and will become
even more awkward in the upcoming world of multiple reference storage
backends:
* Per-worktree vs. shared references are currently handled via a kludge
in git_path() rather than iterating over each part of the reference
namespace separately and merging the results. This kludge will cease
to work when we have multiple reference storage backends.
* The current scheme is inflexible. What if we sometimes want to bypass
the ref_cache, or use it only for packed or only for loose refs? What
if we want to store symbolic refs in one type of storage backend and
non-symbolic ones in another?
In the future, each reference backend will need to define its own way of
iterating over references. The crux of the problem with the current
design is that it is impossible to compose for_each_ref()-style
iterations, because the flow of control is owned by the for_each_ref()
function. There is nothing that a caller can do but iterate through all
references in a single burst, so there is no way for it to interleave
references from multiple backends and present the result to the rest of
the world as a single compound backend.
This commit introduces a new iteration primitive for references: a
ref_iterator. A ref_iterator is a polymorphic object that a reference
storage backend can be asked to instantiate. There are three functions
that can be applied to a ref_iterator:
* ref_iterator_advance(): move to the next reference in the iteration
* ref_iterator_abort(): end the iteration before it is exhausted
* ref_iterator_peel(): peel the reference currently being looked at
Iterating using a ref_iterator leaves the flow of control in the hands
of the caller, which means that ref_iterators from multiple
sources (e.g., loose and packed refs) can be composed and presented to
the world as a single compound ref_iterator.
It also means that the backend code for implementing reference iteration
will sometimes be more complicated. For example, the
cache_ref_iterator (which iterates over a ref_cache) can't use the C
stack to recurse; instead, it must manage its own stack internally as
explicit data structures. There is also a lot of boilerplate connected
with object-oriented programming in C.
Eventually, end-user callers will be able to be written in a more
natural way—managing their own flow of control rather than having to
work via callbacks. Since there will only be a few reference backends
but there are many consumers of this API, this is a good tradeoff.
More importantly, we gain composability, and especially the possibility
of writing interchangeable parts that can work with any ref_iterator.
For example, merge_ref_iterator implements a generic way of merging the
contents of any two ref_iterators. It is used to merge loose + packed
refs as part of the implementation of the files_ref_iterator. But it
will also be possible to use it to merge other pairs of reference
sources (e.g., per-worktree vs. shared refs).
Another example is prefix_ref_iterator, which can be used to trim a
prefix off the front of reference names before presenting them to the
caller (e.g., "refs/heads/master" -> "master").
In this patch, we introduce the iterator abstraction and many utilities,
and implement a reference iterator for the files ref storage backend.
(I've written several other obvious utilities, for example a generic way
to filter references being iterated over. These will probably be useful
in the future. But they are not needed for this patch series, so I am
not including them at this time.)
In a moment we will rewrite do_for_each_ref() to work via reference
iterators (allowing some special-purpose code to be discarded), and do
something similar for reflogs. In future patch series, we will expose
the ref_iterator abstraction in the public refs API so that callers can
use it directly.
Implementation note: I tried abstracting this a layer further to allow
generic iterators (over arbitrary types of objects) and generic
utilities like a generic merge_iterator. But the implementation in C was
very cumbersome, involving (in my opinion) too much boilerplate and too
much unsafe casting, some of which would have had to be done on the
caller side. However, I did put a few iterator-related constants in a
top-level header file, iterator.h, as they will be useful in a moment to
implement iteration over directory trees and possibly other types of
iterators in the future.
Signed-off-by: Ramsay Jones <ramsay@ramsayjones.plus.com>
Signed-off-by: Michael Haggerty <mhagger@alum.mit.edu>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2016-06-18 12:15:15 +08:00
|
|
|
};
|
|
|
|
|
2016-09-05 00:08:37 +08:00
|
|
|
static struct ref_iterator *files_ref_iterator_begin(
|
2016-09-05 00:08:36 +08:00
|
|
|
struct ref_store *ref_store,
|
refs: introduce an iterator interface
Currently, the API for iterating over references is via a family of
for_each_ref()-type functions that invoke a callback function for each
selected reference. All of these eventually call do_for_each_ref(),
which knows how to do one thing: iterate in parallel through two
ref_caches, one for loose and one for packed refs, giving loose
references precedence over packed refs. This is rather complicated code,
and is quite specialized to the files backend. It also requires callers
to encapsulate their work into a callback function, which often means
that they have to define and use a "cb_data" struct to manage their
context.
The current design is already bursting at the seams, and will become
even more awkward in the upcoming world of multiple reference storage
backends:
* Per-worktree vs. shared references are currently handled via a kludge
in git_path() rather than iterating over each part of the reference
namespace separately and merging the results. This kludge will cease
to work when we have multiple reference storage backends.
* The current scheme is inflexible. What if we sometimes want to bypass
the ref_cache, or use it only for packed or only for loose refs? What
if we want to store symbolic refs in one type of storage backend and
non-symbolic ones in another?
In the future, each reference backend will need to define its own way of
iterating over references. The crux of the problem with the current
design is that it is impossible to compose for_each_ref()-style
iterations, because the flow of control is owned by the for_each_ref()
function. There is nothing that a caller can do but iterate through all
references in a single burst, so there is no way for it to interleave
references from multiple backends and present the result to the rest of
the world as a single compound backend.
This commit introduces a new iteration primitive for references: a
ref_iterator. A ref_iterator is a polymorphic object that a reference
storage backend can be asked to instantiate. There are three functions
that can be applied to a ref_iterator:
* ref_iterator_advance(): move to the next reference in the iteration
* ref_iterator_abort(): end the iteration before it is exhausted
* ref_iterator_peel(): peel the reference currently being looked at
Iterating using a ref_iterator leaves the flow of control in the hands
of the caller, which means that ref_iterators from multiple
sources (e.g., loose and packed refs) can be composed and presented to
the world as a single compound ref_iterator.
It also means that the backend code for implementing reference iteration
will sometimes be more complicated. For example, the
cache_ref_iterator (which iterates over a ref_cache) can't use the C
stack to recurse; instead, it must manage its own stack internally as
explicit data structures. There is also a lot of boilerplate connected
with object-oriented programming in C.
Eventually, end-user callers will be able to be written in a more
natural way—managing their own flow of control rather than having to
work via callbacks. Since there will only be a few reference backends
but there are many consumers of this API, this is a good tradeoff.
More importantly, we gain composability, and especially the possibility
of writing interchangeable parts that can work with any ref_iterator.
For example, merge_ref_iterator implements a generic way of merging the
contents of any two ref_iterators. It is used to merge loose + packed
refs as part of the implementation of the files_ref_iterator. But it
will also be possible to use it to merge other pairs of reference
sources (e.g., per-worktree vs. shared refs).
Another example is prefix_ref_iterator, which can be used to trim a
prefix off the front of reference names before presenting them to the
caller (e.g., "refs/heads/master" -> "master").
In this patch, we introduce the iterator abstraction and many utilities,
and implement a reference iterator for the files ref storage backend.
(I've written several other obvious utilities, for example a generic way
to filter references being iterated over. These will probably be useful
in the future. But they are not needed for this patch series, so I am
not including them at this time.)
In a moment we will rewrite do_for_each_ref() to work via reference
iterators (allowing some special-purpose code to be discarded), and do
something similar for reflogs. In future patch series, we will expose
the ref_iterator abstraction in the public refs API so that callers can
use it directly.
Implementation note: I tried abstracting this a layer further to allow
generic iterators (over arbitrary types of objects) and generic
utilities like a generic merge_iterator. But the implementation in C was
very cumbersome, involving (in my opinion) too much boilerplate and too
much unsafe casting, some of which would have had to be done on the
caller side. However, I did put a few iterator-related constants in a
top-level header file, iterator.h, as they will be useful in a moment to
implement iteration over directory trees and possibly other types of
iterators in the future.
Signed-off-by: Ramsay Jones <ramsay@ramsayjones.plus.com>
Signed-off-by: Michael Haggerty <mhagger@alum.mit.edu>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2016-06-18 12:15:15 +08:00
|
|
|
const char *prefix, unsigned int flags)
|
|
|
|
{
|
2017-03-26 10:42:32 +08:00
|
|
|
struct files_ref_store *refs;
|
2017-09-14 01:15:55 +08:00
|
|
|
struct ref_iterator *loose_iter, *packed_iter, *overlay_iter;
|
refs: introduce an iterator interface
Currently, the API for iterating over references is via a family of
for_each_ref()-type functions that invoke a callback function for each
selected reference. All of these eventually call do_for_each_ref(),
which knows how to do one thing: iterate in parallel through two
ref_caches, one for loose and one for packed refs, giving loose
references precedence over packed refs. This is rather complicated code,
and is quite specialized to the files backend. It also requires callers
to encapsulate their work into a callback function, which often means
that they have to define and use a "cb_data" struct to manage their
context.
The current design is already bursting at the seams, and will become
even more awkward in the upcoming world of multiple reference storage
backends:
* Per-worktree vs. shared references are currently handled via a kludge
in git_path() rather than iterating over each part of the reference
namespace separately and merging the results. This kludge will cease
to work when we have multiple reference storage backends.
* The current scheme is inflexible. What if we sometimes want to bypass
the ref_cache, or use it only for packed or only for loose refs? What
if we want to store symbolic refs in one type of storage backend and
non-symbolic ones in another?
In the future, each reference backend will need to define its own way of
iterating over references. The crux of the problem with the current
design is that it is impossible to compose for_each_ref()-style
iterations, because the flow of control is owned by the for_each_ref()
function. There is nothing that a caller can do but iterate through all
references in a single burst, so there is no way for it to interleave
references from multiple backends and present the result to the rest of
the world as a single compound backend.
This commit introduces a new iteration primitive for references: a
ref_iterator. A ref_iterator is a polymorphic object that a reference
storage backend can be asked to instantiate. There are three functions
that can be applied to a ref_iterator:
* ref_iterator_advance(): move to the next reference in the iteration
* ref_iterator_abort(): end the iteration before it is exhausted
* ref_iterator_peel(): peel the reference currently being looked at
Iterating using a ref_iterator leaves the flow of control in the hands
of the caller, which means that ref_iterators from multiple
sources (e.g., loose and packed refs) can be composed and presented to
the world as a single compound ref_iterator.
It also means that the backend code for implementing reference iteration
will sometimes be more complicated. For example, the
cache_ref_iterator (which iterates over a ref_cache) can't use the C
stack to recurse; instead, it must manage its own stack internally as
explicit data structures. There is also a lot of boilerplate connected
with object-oriented programming in C.
Eventually, end-user callers will be able to be written in a more
natural way—managing their own flow of control rather than having to
work via callbacks. Since there will only be a few reference backends
but there are many consumers of this API, this is a good tradeoff.
More importantly, we gain composability, and especially the possibility
of writing interchangeable parts that can work with any ref_iterator.
For example, merge_ref_iterator implements a generic way of merging the
contents of any two ref_iterators. It is used to merge loose + packed
refs as part of the implementation of the files_ref_iterator. But it
will also be possible to use it to merge other pairs of reference
sources (e.g., per-worktree vs. shared refs).
Another example is prefix_ref_iterator, which can be used to trim a
prefix off the front of reference names before presenting them to the
caller (e.g., "refs/heads/master" -> "master").
In this patch, we introduce the iterator abstraction and many utilities,
and implement a reference iterator for the files ref storage backend.
(I've written several other obvious utilities, for example a generic way
to filter references being iterated over. These will probably be useful
in the future. But they are not needed for this patch series, so I am
not including them at this time.)
In a moment we will rewrite do_for_each_ref() to work via reference
iterators (allowing some special-purpose code to be discarded), and do
something similar for reflogs. In future patch series, we will expose
the ref_iterator abstraction in the public refs API so that callers can
use it directly.
Implementation note: I tried abstracting this a layer further to allow
generic iterators (over arbitrary types of objects) and generic
utilities like a generic merge_iterator. But the implementation in C was
very cumbersome, involving (in my opinion) too much boilerplate and too
much unsafe casting, some of which would have had to be done on the
caller side. However, I did put a few iterator-related constants in a
top-level header file, iterator.h, as they will be useful in a moment to
implement iteration over directory trees and possibly other types of
iterators in the future.
Signed-off-by: Ramsay Jones <ramsay@ramsayjones.plus.com>
Signed-off-by: Michael Haggerty <mhagger@alum.mit.edu>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2016-06-18 12:15:15 +08:00
|
|
|
struct files_ref_iterator *iter;
|
|
|
|
struct ref_iterator *ref_iterator;
|
2017-05-22 22:17:52 +08:00
|
|
|
unsigned int required_flags = REF_STORE_READ;
|
refs: introduce an iterator interface
Currently, the API for iterating over references is via a family of
for_each_ref()-type functions that invoke a callback function for each
selected reference. All of these eventually call do_for_each_ref(),
which knows how to do one thing: iterate in parallel through two
ref_caches, one for loose and one for packed refs, giving loose
references precedence over packed refs. This is rather complicated code,
and is quite specialized to the files backend. It also requires callers
to encapsulate their work into a callback function, which often means
that they have to define and use a "cb_data" struct to manage their
context.
The current design is already bursting at the seams, and will become
even more awkward in the upcoming world of multiple reference storage
backends:
* Per-worktree vs. shared references are currently handled via a kludge
in git_path() rather than iterating over each part of the reference
namespace separately and merging the results. This kludge will cease
to work when we have multiple reference storage backends.
* The current scheme is inflexible. What if we sometimes want to bypass
the ref_cache, or use it only for packed or only for loose refs? What
if we want to store symbolic refs in one type of storage backend and
non-symbolic ones in another?
In the future, each reference backend will need to define its own way of
iterating over references. The crux of the problem with the current
design is that it is impossible to compose for_each_ref()-style
iterations, because the flow of control is owned by the for_each_ref()
function. There is nothing that a caller can do but iterate through all
references in a single burst, so there is no way for it to interleave
references from multiple backends and present the result to the rest of
the world as a single compound backend.
This commit introduces a new iteration primitive for references: a
ref_iterator. A ref_iterator is a polymorphic object that a reference
storage backend can be asked to instantiate. There are three functions
that can be applied to a ref_iterator:
* ref_iterator_advance(): move to the next reference in the iteration
* ref_iterator_abort(): end the iteration before it is exhausted
* ref_iterator_peel(): peel the reference currently being looked at
Iterating using a ref_iterator leaves the flow of control in the hands
of the caller, which means that ref_iterators from multiple
sources (e.g., loose and packed refs) can be composed and presented to
the world as a single compound ref_iterator.
It also means that the backend code for implementing reference iteration
will sometimes be more complicated. For example, the
cache_ref_iterator (which iterates over a ref_cache) can't use the C
stack to recurse; instead, it must manage its own stack internally as
explicit data structures. There is also a lot of boilerplate connected
with object-oriented programming in C.
Eventually, end-user callers will be able to be written in a more
natural way—managing their own flow of control rather than having to
work via callbacks. Since there will only be a few reference backends
but there are many consumers of this API, this is a good tradeoff.
More importantly, we gain composability, and especially the possibility
of writing interchangeable parts that can work with any ref_iterator.
For example, merge_ref_iterator implements a generic way of merging the
contents of any two ref_iterators. It is used to merge loose + packed
refs as part of the implementation of the files_ref_iterator. But it
will also be possible to use it to merge other pairs of reference
sources (e.g., per-worktree vs. shared refs).
Another example is prefix_ref_iterator, which can be used to trim a
prefix off the front of reference names before presenting them to the
caller (e.g., "refs/heads/master" -> "master").
In this patch, we introduce the iterator abstraction and many utilities,
and implement a reference iterator for the files ref storage backend.
(I've written several other obvious utilities, for example a generic way
to filter references being iterated over. These will probably be useful
in the future. But they are not needed for this patch series, so I am
not including them at this time.)
In a moment we will rewrite do_for_each_ref() to work via reference
iterators (allowing some special-purpose code to be discarded), and do
something similar for reflogs. In future patch series, we will expose
the ref_iterator abstraction in the public refs API so that callers can
use it directly.
Implementation note: I tried abstracting this a layer further to allow
generic iterators (over arbitrary types of objects) and generic
utilities like a generic merge_iterator. But the implementation in C was
very cumbersome, involving (in my opinion) too much boilerplate and too
much unsafe casting, some of which would have had to be done on the
caller side. However, I did put a few iterator-related constants in a
top-level header file, iterator.h, as they will be useful in a moment to
implement iteration over directory trees and possibly other types of
iterators in the future.
Signed-off-by: Ramsay Jones <ramsay@ramsayjones.plus.com>
Signed-off-by: Michael Haggerty <mhagger@alum.mit.edu>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2016-06-18 12:15:15 +08:00
|
|
|
|
2017-05-22 22:17:52 +08:00
|
|
|
if (!(flags & DO_FOR_EACH_INCLUDE_BROKEN))
|
|
|
|
required_flags |= REF_STORE_ODB;
|
refs: introduce an iterator interface
Currently, the API for iterating over references is via a family of
for_each_ref()-type functions that invoke a callback function for each
selected reference. All of these eventually call do_for_each_ref(),
which knows how to do one thing: iterate in parallel through two
ref_caches, one for loose and one for packed refs, giving loose
references precedence over packed refs. This is rather complicated code,
and is quite specialized to the files backend. It also requires callers
to encapsulate their work into a callback function, which often means
that they have to define and use a "cb_data" struct to manage their
context.
The current design is already bursting at the seams, and will become
even more awkward in the upcoming world of multiple reference storage
backends:
* Per-worktree vs. shared references are currently handled via a kludge
in git_path() rather than iterating over each part of the reference
namespace separately and merging the results. This kludge will cease
to work when we have multiple reference storage backends.
* The current scheme is inflexible. What if we sometimes want to bypass
the ref_cache, or use it only for packed or only for loose refs? What
if we want to store symbolic refs in one type of storage backend and
non-symbolic ones in another?
In the future, each reference backend will need to define its own way of
iterating over references. The crux of the problem with the current
design is that it is impossible to compose for_each_ref()-style
iterations, because the flow of control is owned by the for_each_ref()
function. There is nothing that a caller can do but iterate through all
references in a single burst, so there is no way for it to interleave
references from multiple backends and present the result to the rest of
the world as a single compound backend.
This commit introduces a new iteration primitive for references: a
ref_iterator. A ref_iterator is a polymorphic object that a reference
storage backend can be asked to instantiate. There are three functions
that can be applied to a ref_iterator:
* ref_iterator_advance(): move to the next reference in the iteration
* ref_iterator_abort(): end the iteration before it is exhausted
* ref_iterator_peel(): peel the reference currently being looked at
Iterating using a ref_iterator leaves the flow of control in the hands
of the caller, which means that ref_iterators from multiple
sources (e.g., loose and packed refs) can be composed and presented to
the world as a single compound ref_iterator.
It also means that the backend code for implementing reference iteration
will sometimes be more complicated. For example, the
cache_ref_iterator (which iterates over a ref_cache) can't use the C
stack to recurse; instead, it must manage its own stack internally as
explicit data structures. There is also a lot of boilerplate connected
with object-oriented programming in C.
Eventually, end-user callers will be able to be written in a more
natural way—managing their own flow of control rather than having to
work via callbacks. Since there will only be a few reference backends
but there are many consumers of this API, this is a good tradeoff.
More importantly, we gain composability, and especially the possibility
of writing interchangeable parts that can work with any ref_iterator.
For example, merge_ref_iterator implements a generic way of merging the
contents of any two ref_iterators. It is used to merge loose + packed
refs as part of the implementation of the files_ref_iterator. But it
will also be possible to use it to merge other pairs of reference
sources (e.g., per-worktree vs. shared refs).
Another example is prefix_ref_iterator, which can be used to trim a
prefix off the front of reference names before presenting them to the
caller (e.g., "refs/heads/master" -> "master").
In this patch, we introduce the iterator abstraction and many utilities,
and implement a reference iterator for the files ref storage backend.
(I've written several other obvious utilities, for example a generic way
to filter references being iterated over. These will probably be useful
in the future. But they are not needed for this patch series, so I am
not including them at this time.)
In a moment we will rewrite do_for_each_ref() to work via reference
iterators (allowing some special-purpose code to be discarded), and do
something similar for reflogs. In future patch series, we will expose
the ref_iterator abstraction in the public refs API so that callers can
use it directly.
Implementation note: I tried abstracting this a layer further to allow
generic iterators (over arbitrary types of objects) and generic
utilities like a generic merge_iterator. But the implementation in C was
very cumbersome, involving (in my opinion) too much boilerplate and too
much unsafe casting, some of which would have had to be done on the
caller side. However, I did put a few iterator-related constants in a
top-level header file, iterator.h, as they will be useful in a moment to
implement iteration over directory trees and possibly other types of
iterators in the future.
Signed-off-by: Ramsay Jones <ramsay@ramsayjones.plus.com>
Signed-off-by: Michael Haggerty <mhagger@alum.mit.edu>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2016-06-18 12:15:15 +08:00
|
|
|
|
2017-05-22 22:17:52 +08:00
|
|
|
refs = files_downcast(ref_store, required_flags, "ref_iterator_begin");
|
2017-03-26 10:42:32 +08:00
|
|
|
|
refs: introduce an iterator interface
Currently, the API for iterating over references is via a family of
for_each_ref()-type functions that invoke a callback function for each
selected reference. All of these eventually call do_for_each_ref(),
which knows how to do one thing: iterate in parallel through two
ref_caches, one for loose and one for packed refs, giving loose
references precedence over packed refs. This is rather complicated code,
and is quite specialized to the files backend. It also requires callers
to encapsulate their work into a callback function, which often means
that they have to define and use a "cb_data" struct to manage their
context.
The current design is already bursting at the seams, and will become
even more awkward in the upcoming world of multiple reference storage
backends:
* Per-worktree vs. shared references are currently handled via a kludge
in git_path() rather than iterating over each part of the reference
namespace separately and merging the results. This kludge will cease
to work when we have multiple reference storage backends.
* The current scheme is inflexible. What if we sometimes want to bypass
the ref_cache, or use it only for packed or only for loose refs? What
if we want to store symbolic refs in one type of storage backend and
non-symbolic ones in another?
In the future, each reference backend will need to define its own way of
iterating over references. The crux of the problem with the current
design is that it is impossible to compose for_each_ref()-style
iterations, because the flow of control is owned by the for_each_ref()
function. There is nothing that a caller can do but iterate through all
references in a single burst, so there is no way for it to interleave
references from multiple backends and present the result to the rest of
the world as a single compound backend.
This commit introduces a new iteration primitive for references: a
ref_iterator. A ref_iterator is a polymorphic object that a reference
storage backend can be asked to instantiate. There are three functions
that can be applied to a ref_iterator:
* ref_iterator_advance(): move to the next reference in the iteration
* ref_iterator_abort(): end the iteration before it is exhausted
* ref_iterator_peel(): peel the reference currently being looked at
Iterating using a ref_iterator leaves the flow of control in the hands
of the caller, which means that ref_iterators from multiple
sources (e.g., loose and packed refs) can be composed and presented to
the world as a single compound ref_iterator.
It also means that the backend code for implementing reference iteration
will sometimes be more complicated. For example, the
cache_ref_iterator (which iterates over a ref_cache) can't use the C
stack to recurse; instead, it must manage its own stack internally as
explicit data structures. There is also a lot of boilerplate connected
with object-oriented programming in C.
Eventually, end-user callers will be able to be written in a more
natural way—managing their own flow of control rather than having to
work via callbacks. Since there will only be a few reference backends
but there are many consumers of this API, this is a good tradeoff.
More importantly, we gain composability, and especially the possibility
of writing interchangeable parts that can work with any ref_iterator.
For example, merge_ref_iterator implements a generic way of merging the
contents of any two ref_iterators. It is used to merge loose + packed
refs as part of the implementation of the files_ref_iterator. But it
will also be possible to use it to merge other pairs of reference
sources (e.g., per-worktree vs. shared refs).
Another example is prefix_ref_iterator, which can be used to trim a
prefix off the front of reference names before presenting them to the
caller (e.g., "refs/heads/master" -> "master").
In this patch, we introduce the iterator abstraction and many utilities,
and implement a reference iterator for the files ref storage backend.
(I've written several other obvious utilities, for example a generic way
to filter references being iterated over. These will probably be useful
in the future. But they are not needed for this patch series, so I am
not including them at this time.)
In a moment we will rewrite do_for_each_ref() to work via reference
iterators (allowing some special-purpose code to be discarded), and do
something similar for reflogs. In future patch series, we will expose
the ref_iterator abstraction in the public refs API so that callers can
use it directly.
Implementation note: I tried abstracting this a layer further to allow
generic iterators (over arbitrary types of objects) and generic
utilities like a generic merge_iterator. But the implementation in C was
very cumbersome, involving (in my opinion) too much boilerplate and too
much unsafe casting, some of which would have had to be done on the
caller side. However, I did put a few iterator-related constants in a
top-level header file, iterator.h, as they will be useful in a moment to
implement iteration over directory trees and possibly other types of
iterators in the future.
Signed-off-by: Ramsay Jones <ramsay@ramsayjones.plus.com>
Signed-off-by: Michael Haggerty <mhagger@alum.mit.edu>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2016-06-18 12:15:15 +08:00
|
|
|
/*
|
|
|
|
* We must make sure that all loose refs are read before
|
|
|
|
* accessing the packed-refs file; this avoids a race
|
|
|
|
* condition if loose refs are migrated to the packed-refs
|
|
|
|
* file by a simultaneous process, but our in-memory view is
|
|
|
|
* from before the migration. We ensure this as follows:
|
2017-04-16 14:41:39 +08:00
|
|
|
* First, we call start the loose refs iteration with its
|
|
|
|
* `prime_ref` argument set to true. This causes the loose
|
|
|
|
* references in the subtree to be pre-read into the cache.
|
|
|
|
* (If they've already been read, that's OK; we only need to
|
|
|
|
* guarantee that they're read before the packed refs, not
|
|
|
|
* *how much* before.) After that, we call
|
2017-06-23 15:01:35 +08:00
|
|
|
* packed_ref_iterator_begin(), which internally checks
|
|
|
|
* whether the packed-ref cache is up to date with what is on
|
|
|
|
* disk, and re-reads it if not.
|
refs: introduce an iterator interface
Currently, the API for iterating over references is via a family of
for_each_ref()-type functions that invoke a callback function for each
selected reference. All of these eventually call do_for_each_ref(),
which knows how to do one thing: iterate in parallel through two
ref_caches, one for loose and one for packed refs, giving loose
references precedence over packed refs. This is rather complicated code,
and is quite specialized to the files backend. It also requires callers
to encapsulate their work into a callback function, which often means
that they have to define and use a "cb_data" struct to manage their
context.
The current design is already bursting at the seams, and will become
even more awkward in the upcoming world of multiple reference storage
backends:
* Per-worktree vs. shared references are currently handled via a kludge
in git_path() rather than iterating over each part of the reference
namespace separately and merging the results. This kludge will cease
to work when we have multiple reference storage backends.
* The current scheme is inflexible. What if we sometimes want to bypass
the ref_cache, or use it only for packed or only for loose refs? What
if we want to store symbolic refs in one type of storage backend and
non-symbolic ones in another?
In the future, each reference backend will need to define its own way of
iterating over references. The crux of the problem with the current
design is that it is impossible to compose for_each_ref()-style
iterations, because the flow of control is owned by the for_each_ref()
function. There is nothing that a caller can do but iterate through all
references in a single burst, so there is no way for it to interleave
references from multiple backends and present the result to the rest of
the world as a single compound backend.
This commit introduces a new iteration primitive for references: a
ref_iterator. A ref_iterator is a polymorphic object that a reference
storage backend can be asked to instantiate. There are three functions
that can be applied to a ref_iterator:
* ref_iterator_advance(): move to the next reference in the iteration
* ref_iterator_abort(): end the iteration before it is exhausted
* ref_iterator_peel(): peel the reference currently being looked at
Iterating using a ref_iterator leaves the flow of control in the hands
of the caller, which means that ref_iterators from multiple
sources (e.g., loose and packed refs) can be composed and presented to
the world as a single compound ref_iterator.
It also means that the backend code for implementing reference iteration
will sometimes be more complicated. For example, the
cache_ref_iterator (which iterates over a ref_cache) can't use the C
stack to recurse; instead, it must manage its own stack internally as
explicit data structures. There is also a lot of boilerplate connected
with object-oriented programming in C.
Eventually, end-user callers will be able to be written in a more
natural way—managing their own flow of control rather than having to
work via callbacks. Since there will only be a few reference backends
but there are many consumers of this API, this is a good tradeoff.
More importantly, we gain composability, and especially the possibility
of writing interchangeable parts that can work with any ref_iterator.
For example, merge_ref_iterator implements a generic way of merging the
contents of any two ref_iterators. It is used to merge loose + packed
refs as part of the implementation of the files_ref_iterator. But it
will also be possible to use it to merge other pairs of reference
sources (e.g., per-worktree vs. shared refs).
Another example is prefix_ref_iterator, which can be used to trim a
prefix off the front of reference names before presenting them to the
caller (e.g., "refs/heads/master" -> "master").
In this patch, we introduce the iterator abstraction and many utilities,
and implement a reference iterator for the files ref storage backend.
(I've written several other obvious utilities, for example a generic way
to filter references being iterated over. These will probably be useful
in the future. But they are not needed for this patch series, so I am
not including them at this time.)
In a moment we will rewrite do_for_each_ref() to work via reference
iterators (allowing some special-purpose code to be discarded), and do
something similar for reflogs. In future patch series, we will expose
the ref_iterator abstraction in the public refs API so that callers can
use it directly.
Implementation note: I tried abstracting this a layer further to allow
generic iterators (over arbitrary types of objects) and generic
utilities like a generic merge_iterator. But the implementation in C was
very cumbersome, involving (in my opinion) too much boilerplate and too
much unsafe casting, some of which would have had to be done on the
caller side. However, I did put a few iterator-related constants in a
top-level header file, iterator.h, as they will be useful in a moment to
implement iteration over directory trees and possibly other types of
iterators in the future.
Signed-off-by: Ramsay Jones <ramsay@ramsayjones.plus.com>
Signed-off-by: Michael Haggerty <mhagger@alum.mit.edu>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2016-06-18 12:15:15 +08:00
|
|
|
*/
|
|
|
|
|
2017-04-16 14:41:39 +08:00
|
|
|
loose_iter = cache_ref_iterator_begin(get_loose_ref_cache(refs),
|
2021-10-09 05:08:16 +08:00
|
|
|
prefix, ref_store->repo, 1);
|
refs: introduce an iterator interface
Currently, the API for iterating over references is via a family of
for_each_ref()-type functions that invoke a callback function for each
selected reference. All of these eventually call do_for_each_ref(),
which knows how to do one thing: iterate in parallel through two
ref_caches, one for loose and one for packed refs, giving loose
references precedence over packed refs. This is rather complicated code,
and is quite specialized to the files backend. It also requires callers
to encapsulate their work into a callback function, which often means
that they have to define and use a "cb_data" struct to manage their
context.
The current design is already bursting at the seams, and will become
even more awkward in the upcoming world of multiple reference storage
backends:
* Per-worktree vs. shared references are currently handled via a kludge
in git_path() rather than iterating over each part of the reference
namespace separately and merging the results. This kludge will cease
to work when we have multiple reference storage backends.
* The current scheme is inflexible. What if we sometimes want to bypass
the ref_cache, or use it only for packed or only for loose refs? What
if we want to store symbolic refs in one type of storage backend and
non-symbolic ones in another?
In the future, each reference backend will need to define its own way of
iterating over references. The crux of the problem with the current
design is that it is impossible to compose for_each_ref()-style
iterations, because the flow of control is owned by the for_each_ref()
function. There is nothing that a caller can do but iterate through all
references in a single burst, so there is no way for it to interleave
references from multiple backends and present the result to the rest of
the world as a single compound backend.
This commit introduces a new iteration primitive for references: a
ref_iterator. A ref_iterator is a polymorphic object that a reference
storage backend can be asked to instantiate. There are three functions
that can be applied to a ref_iterator:
* ref_iterator_advance(): move to the next reference in the iteration
* ref_iterator_abort(): end the iteration before it is exhausted
* ref_iterator_peel(): peel the reference currently being looked at
Iterating using a ref_iterator leaves the flow of control in the hands
of the caller, which means that ref_iterators from multiple
sources (e.g., loose and packed refs) can be composed and presented to
the world as a single compound ref_iterator.
It also means that the backend code for implementing reference iteration
will sometimes be more complicated. For example, the
cache_ref_iterator (which iterates over a ref_cache) can't use the C
stack to recurse; instead, it must manage its own stack internally as
explicit data structures. There is also a lot of boilerplate connected
with object-oriented programming in C.
Eventually, end-user callers will be able to be written in a more
natural way—managing their own flow of control rather than having to
work via callbacks. Since there will only be a few reference backends
but there are many consumers of this API, this is a good tradeoff.
More importantly, we gain composability, and especially the possibility
of writing interchangeable parts that can work with any ref_iterator.
For example, merge_ref_iterator implements a generic way of merging the
contents of any two ref_iterators. It is used to merge loose + packed
refs as part of the implementation of the files_ref_iterator. But it
will also be possible to use it to merge other pairs of reference
sources (e.g., per-worktree vs. shared refs).
Another example is prefix_ref_iterator, which can be used to trim a
prefix off the front of reference names before presenting them to the
caller (e.g., "refs/heads/master" -> "master").
In this patch, we introduce the iterator abstraction and many utilities,
and implement a reference iterator for the files ref storage backend.
(I've written several other obvious utilities, for example a generic way
to filter references being iterated over. These will probably be useful
in the future. But they are not needed for this patch series, so I am
not including them at this time.)
In a moment we will rewrite do_for_each_ref() to work via reference
iterators (allowing some special-purpose code to be discarded), and do
something similar for reflogs. In future patch series, we will expose
the ref_iterator abstraction in the public refs API so that callers can
use it directly.
Implementation note: I tried abstracting this a layer further to allow
generic iterators (over arbitrary types of objects) and generic
utilities like a generic merge_iterator. But the implementation in C was
very cumbersome, involving (in my opinion) too much boilerplate and too
much unsafe casting, some of which would have had to be done on the
caller side. However, I did put a few iterator-related constants in a
top-level header file, iterator.h, as they will be useful in a moment to
implement iteration over directory trees and possibly other types of
iterators in the future.
Signed-off-by: Ramsay Jones <ramsay@ramsayjones.plus.com>
Signed-off-by: Michael Haggerty <mhagger@alum.mit.edu>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2016-06-18 12:15:15 +08:00
|
|
|
|
2017-06-23 15:01:35 +08:00
|
|
|
/*
|
|
|
|
* The packed-refs file might contain broken references, for
|
|
|
|
* example an old version of a reference that points at an
|
|
|
|
* object that has since been garbage-collected. This is OK as
|
|
|
|
* long as there is a corresponding loose reference that
|
|
|
|
* overrides it, and we don't want to emit an error message in
|
|
|
|
* this case. So ask the packed_ref_store for all of its
|
|
|
|
* references, and (if needed) do our own check for broken
|
|
|
|
* ones in files_ref_iterator_advance(), after we have merged
|
|
|
|
* the packed and loose references.
|
|
|
|
*/
|
2017-06-23 15:01:38 +08:00
|
|
|
packed_iter = refs_ref_iterator_begin(
|
|
|
|
refs->packed_ref_store, prefix, 0,
|
2017-06-23 15:01:35 +08:00
|
|
|
DO_FOR_EACH_INCLUDE_BROKEN);
|
refs: introduce an iterator interface
Currently, the API for iterating over references is via a family of
for_each_ref()-type functions that invoke a callback function for each
selected reference. All of these eventually call do_for_each_ref(),
which knows how to do one thing: iterate in parallel through two
ref_caches, one for loose and one for packed refs, giving loose
references precedence over packed refs. This is rather complicated code,
and is quite specialized to the files backend. It also requires callers
to encapsulate their work into a callback function, which often means
that they have to define and use a "cb_data" struct to manage their
context.
The current design is already bursting at the seams, and will become
even more awkward in the upcoming world of multiple reference storage
backends:
* Per-worktree vs. shared references are currently handled via a kludge
in git_path() rather than iterating over each part of the reference
namespace separately and merging the results. This kludge will cease
to work when we have multiple reference storage backends.
* The current scheme is inflexible. What if we sometimes want to bypass
the ref_cache, or use it only for packed or only for loose refs? What
if we want to store symbolic refs in one type of storage backend and
non-symbolic ones in another?
In the future, each reference backend will need to define its own way of
iterating over references. The crux of the problem with the current
design is that it is impossible to compose for_each_ref()-style
iterations, because the flow of control is owned by the for_each_ref()
function. There is nothing that a caller can do but iterate through all
references in a single burst, so there is no way for it to interleave
references from multiple backends and present the result to the rest of
the world as a single compound backend.
This commit introduces a new iteration primitive for references: a
ref_iterator. A ref_iterator is a polymorphic object that a reference
storage backend can be asked to instantiate. There are three functions
that can be applied to a ref_iterator:
* ref_iterator_advance(): move to the next reference in the iteration
* ref_iterator_abort(): end the iteration before it is exhausted
* ref_iterator_peel(): peel the reference currently being looked at
Iterating using a ref_iterator leaves the flow of control in the hands
of the caller, which means that ref_iterators from multiple
sources (e.g., loose and packed refs) can be composed and presented to
the world as a single compound ref_iterator.
It also means that the backend code for implementing reference iteration
will sometimes be more complicated. For example, the
cache_ref_iterator (which iterates over a ref_cache) can't use the C
stack to recurse; instead, it must manage its own stack internally as
explicit data structures. There is also a lot of boilerplate connected
with object-oriented programming in C.
Eventually, end-user callers will be able to be written in a more
natural way—managing their own flow of control rather than having to
work via callbacks. Since there will only be a few reference backends
but there are many consumers of this API, this is a good tradeoff.
More importantly, we gain composability, and especially the possibility
of writing interchangeable parts that can work with any ref_iterator.
For example, merge_ref_iterator implements a generic way of merging the
contents of any two ref_iterators. It is used to merge loose + packed
refs as part of the implementation of the files_ref_iterator. But it
will also be possible to use it to merge other pairs of reference
sources (e.g., per-worktree vs. shared refs).
Another example is prefix_ref_iterator, which can be used to trim a
prefix off the front of reference names before presenting them to the
caller (e.g., "refs/heads/master" -> "master").
In this patch, we introduce the iterator abstraction and many utilities,
and implement a reference iterator for the files ref storage backend.
(I've written several other obvious utilities, for example a generic way
to filter references being iterated over. These will probably be useful
in the future. But they are not needed for this patch series, so I am
not including them at this time.)
In a moment we will rewrite do_for_each_ref() to work via reference
iterators (allowing some special-purpose code to be discarded), and do
something similar for reflogs. In future patch series, we will expose
the ref_iterator abstraction in the public refs API so that callers can
use it directly.
Implementation note: I tried abstracting this a layer further to allow
generic iterators (over arbitrary types of objects) and generic
utilities like a generic merge_iterator. But the implementation in C was
very cumbersome, involving (in my opinion) too much boilerplate and too
much unsafe casting, some of which would have had to be done on the
caller side. However, I did put a few iterator-related constants in a
top-level header file, iterator.h, as they will be useful in a moment to
implement iteration over directory trees and possibly other types of
iterators in the future.
Signed-off-by: Ramsay Jones <ramsay@ramsayjones.plus.com>
Signed-off-by: Michael Haggerty <mhagger@alum.mit.edu>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2016-06-18 12:15:15 +08:00
|
|
|
|
2017-09-14 01:15:55 +08:00
|
|
|
overlay_iter = overlay_ref_iterator_begin(loose_iter, packed_iter);
|
|
|
|
|
2021-03-14 00:17:22 +08:00
|
|
|
CALLOC_ARRAY(iter, 1);
|
2017-09-14 01:15:55 +08:00
|
|
|
ref_iterator = &iter->base;
|
|
|
|
base_ref_iterator_init(ref_iterator, &files_ref_iterator_vtable,
|
|
|
|
overlay_iter->ordered);
|
|
|
|
iter->iter0 = overlay_iter;
|
2021-10-09 05:08:15 +08:00
|
|
|
iter->repo = ref_store->repo;
|
refs: introduce an iterator interface
Currently, the API for iterating over references is via a family of
for_each_ref()-type functions that invoke a callback function for each
selected reference. All of these eventually call do_for_each_ref(),
which knows how to do one thing: iterate in parallel through two
ref_caches, one for loose and one for packed refs, giving loose
references precedence over packed refs. This is rather complicated code,
and is quite specialized to the files backend. It also requires callers
to encapsulate their work into a callback function, which often means
that they have to define and use a "cb_data" struct to manage their
context.
The current design is already bursting at the seams, and will become
even more awkward in the upcoming world of multiple reference storage
backends:
* Per-worktree vs. shared references are currently handled via a kludge
in git_path() rather than iterating over each part of the reference
namespace separately and merging the results. This kludge will cease
to work when we have multiple reference storage backends.
* The current scheme is inflexible. What if we sometimes want to bypass
the ref_cache, or use it only for packed or only for loose refs? What
if we want to store symbolic refs in one type of storage backend and
non-symbolic ones in another?
In the future, each reference backend will need to define its own way of
iterating over references. The crux of the problem with the current
design is that it is impossible to compose for_each_ref()-style
iterations, because the flow of control is owned by the for_each_ref()
function. There is nothing that a caller can do but iterate through all
references in a single burst, so there is no way for it to interleave
references from multiple backends and present the result to the rest of
the world as a single compound backend.
This commit introduces a new iteration primitive for references: a
ref_iterator. A ref_iterator is a polymorphic object that a reference
storage backend can be asked to instantiate. There are three functions
that can be applied to a ref_iterator:
* ref_iterator_advance(): move to the next reference in the iteration
* ref_iterator_abort(): end the iteration before it is exhausted
* ref_iterator_peel(): peel the reference currently being looked at
Iterating using a ref_iterator leaves the flow of control in the hands
of the caller, which means that ref_iterators from multiple
sources (e.g., loose and packed refs) can be composed and presented to
the world as a single compound ref_iterator.
It also means that the backend code for implementing reference iteration
will sometimes be more complicated. For example, the
cache_ref_iterator (which iterates over a ref_cache) can't use the C
stack to recurse; instead, it must manage its own stack internally as
explicit data structures. There is also a lot of boilerplate connected
with object-oriented programming in C.
Eventually, end-user callers will be able to be written in a more
natural way—managing their own flow of control rather than having to
work via callbacks. Since there will only be a few reference backends
but there are many consumers of this API, this is a good tradeoff.
More importantly, we gain composability, and especially the possibility
of writing interchangeable parts that can work with any ref_iterator.
For example, merge_ref_iterator implements a generic way of merging the
contents of any two ref_iterators. It is used to merge loose + packed
refs as part of the implementation of the files_ref_iterator. But it
will also be possible to use it to merge other pairs of reference
sources (e.g., per-worktree vs. shared refs).
Another example is prefix_ref_iterator, which can be used to trim a
prefix off the front of reference names before presenting them to the
caller (e.g., "refs/heads/master" -> "master").
In this patch, we introduce the iterator abstraction and many utilities,
and implement a reference iterator for the files ref storage backend.
(I've written several other obvious utilities, for example a generic way
to filter references being iterated over. These will probably be useful
in the future. But they are not needed for this patch series, so I am
not including them at this time.)
In a moment we will rewrite do_for_each_ref() to work via reference
iterators (allowing some special-purpose code to be discarded), and do
something similar for reflogs. In future patch series, we will expose
the ref_iterator abstraction in the public refs API so that callers can
use it directly.
Implementation note: I tried abstracting this a layer further to allow
generic iterators (over arbitrary types of objects) and generic
utilities like a generic merge_iterator. But the implementation in C was
very cumbersome, involving (in my opinion) too much boilerplate and too
much unsafe casting, some of which would have had to be done on the
caller side. However, I did put a few iterator-related constants in a
top-level header file, iterator.h, as they will be useful in a moment to
implement iteration over directory trees and possibly other types of
iterators in the future.
Signed-off-by: Ramsay Jones <ramsay@ramsayjones.plus.com>
Signed-off-by: Michael Haggerty <mhagger@alum.mit.edu>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2016-06-18 12:15:15 +08:00
|
|
|
iter->flags = flags;
|
|
|
|
|
|
|
|
return ref_iterator;
|
2015-11-09 21:34:01 +08:00
|
|
|
}
|
|
|
|
|
2021-08-23 19:52:37 +08:00
|
|
|
/*
|
|
|
|
* Callback function for raceproof_create_file(). This function is
|
|
|
|
* expected to do something that makes dirname(path) permanent despite
|
|
|
|
* the fact that other processes might be cleaning up empty
|
|
|
|
* directories at the same time. Usually it will create a file named
|
|
|
|
* path, but alternatively it could create another file in that
|
|
|
|
* directory, or even chdir() into that directory. The function should
|
|
|
|
* return 0 if the action was completed successfully. On error, it
|
|
|
|
* should return a nonzero result and set errno.
|
|
|
|
* raceproof_create_file() treats two errno values specially:
|
|
|
|
*
|
|
|
|
* - ENOENT -- dirname(path) does not exist. In this case,
|
|
|
|
* raceproof_create_file() tries creating dirname(path)
|
|
|
|
* (and any parent directories, if necessary) and calls
|
|
|
|
* the function again.
|
|
|
|
*
|
|
|
|
* - EISDIR -- the file already exists and is a directory. In this
|
|
|
|
* case, raceproof_create_file() removes the directory if
|
|
|
|
* it is empty (and recursively any empty directories that
|
|
|
|
* it contains) and calls the function again.
|
|
|
|
*
|
|
|
|
* Any other errno causes raceproof_create_file() to fail with the
|
|
|
|
* callback's return value and errno.
|
|
|
|
*
|
|
|
|
* Obviously, this function should be OK with being called again if it
|
|
|
|
* fails with ENOENT or EISDIR. In other scenarios it will not be
|
|
|
|
* called again.
|
|
|
|
*/
|
|
|
|
typedef int create_file_fn(const char *path, void *cb);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Create a file in dirname(path) by calling fn, creating leading
|
|
|
|
* directories if necessary. Retry a few times in case we are racing
|
|
|
|
* with another process that is trying to clean up the directory that
|
|
|
|
* contains path. See the documentation for create_file_fn for more
|
|
|
|
* details.
|
|
|
|
*
|
|
|
|
* Return the value and set the errno that resulted from the most
|
|
|
|
* recent call of fn. fn is always called at least once, and will be
|
|
|
|
* called more than once if it returns ENOENT or EISDIR.
|
|
|
|
*/
|
|
|
|
static int raceproof_create_file(const char *path, create_file_fn fn, void *cb)
|
|
|
|
{
|
|
|
|
/*
|
|
|
|
* The number of times we will try to remove empty directories
|
|
|
|
* in the way of path. This is only 1 because if another
|
|
|
|
* process is racily creating directories that conflict with
|
|
|
|
* us, we don't want to fight against them.
|
|
|
|
*/
|
|
|
|
int remove_directories_remaining = 1;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* The number of times that we will try to create the
|
|
|
|
* directories containing path. We are willing to attempt this
|
|
|
|
* more than once, because another process could be trying to
|
|
|
|
* clean up empty directories at the same time as we are
|
|
|
|
* trying to create them.
|
|
|
|
*/
|
|
|
|
int create_directories_remaining = 3;
|
|
|
|
|
|
|
|
/* A scratch copy of path, filled lazily if we need it: */
|
|
|
|
struct strbuf path_copy = STRBUF_INIT;
|
|
|
|
|
|
|
|
int ret, save_errno;
|
|
|
|
|
|
|
|
/* Sanity check: */
|
|
|
|
assert(*path);
|
|
|
|
|
|
|
|
retry_fn:
|
|
|
|
ret = fn(path, cb);
|
|
|
|
save_errno = errno;
|
|
|
|
if (!ret)
|
|
|
|
goto out;
|
|
|
|
|
|
|
|
if (errno == EISDIR && remove_directories_remaining-- > 0) {
|
|
|
|
/*
|
|
|
|
* A directory is in the way. Maybe it is empty; try
|
|
|
|
* to remove it:
|
|
|
|
*/
|
|
|
|
if (!path_copy.len)
|
|
|
|
strbuf_addstr(&path_copy, path);
|
|
|
|
|
|
|
|
if (!remove_dir_recursively(&path_copy, REMOVE_DIR_EMPTY_ONLY))
|
|
|
|
goto retry_fn;
|
|
|
|
} else if (errno == ENOENT && create_directories_remaining-- > 0) {
|
|
|
|
/*
|
|
|
|
* Maybe the containing directory didn't exist, or
|
|
|
|
* maybe it was just deleted by a process that is
|
|
|
|
* racing with us to clean up empty directories. Try
|
|
|
|
* to create it:
|
|
|
|
*/
|
|
|
|
enum scld_error scld_result;
|
|
|
|
|
|
|
|
if (!path_copy.len)
|
|
|
|
strbuf_addstr(&path_copy, path);
|
|
|
|
|
|
|
|
do {
|
|
|
|
scld_result = safe_create_leading_directories(path_copy.buf);
|
|
|
|
if (scld_result == SCLD_OK)
|
|
|
|
goto retry_fn;
|
|
|
|
} while (scld_result == SCLD_VANISHED && create_directories_remaining-- > 0);
|
|
|
|
}
|
|
|
|
|
|
|
|
out:
|
|
|
|
strbuf_release(&path_copy);
|
|
|
|
errno = save_errno;
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2015-11-09 21:34:01 +08:00
|
|
|
static int remove_empty_directories(struct strbuf *path)
|
|
|
|
{
|
|
|
|
/*
|
|
|
|
* we want to create a file but there is a directory there;
|
|
|
|
* if that is an empty directory (or a directory that contains
|
|
|
|
* only empty directories), remove them.
|
|
|
|
*/
|
|
|
|
return remove_dir_recursively(path, REMOVE_DIR_EMPTY_ONLY);
|
|
|
|
}
|
|
|
|
|
2017-01-07 00:22:28 +08:00
|
|
|
static int create_reflock(const char *path, void *cb)
|
|
|
|
{
|
|
|
|
struct lock_file *lk = cb;
|
|
|
|
|
2017-08-21 19:51:34 +08:00
|
|
|
return hold_lock_file_for_update_timeout(
|
|
|
|
lk, path, LOCK_NO_DEREF,
|
|
|
|
get_files_ref_lock_timeout_ms()) < 0 ? -1 : 0;
|
2017-01-07 00:22:28 +08:00
|
|
|
}
|
|
|
|
|
2015-11-09 21:34:01 +08:00
|
|
|
/*
|
|
|
|
* Locks a ref returning the lock on success and NULL on failure.
|
|
|
|
*/
|
2017-10-16 06:07:12 +08:00
|
|
|
static struct ref_lock *lock_ref_oid_basic(struct files_ref_store *refs,
|
refs/files: remove "name exist?" check in lock_ref_oid_basic()
In lock_ref_oid_basic() we'll happily lock a reference that doesn't
exist yet. That's normal, and is how references are initially born,
but we don't need to retain checks here in lock_ref_oid_basic() about
the state of the ref, when what we're checking is either checked
already, or something we're about to discover by trying to lock the
ref with raceproof_create_file().
The one exception is the caller in files_reflog_expire(), who passes
us a "type" to find out if the reference is a symref or not. We can
move the that logic over to that caller, which can now defer its
discovery of whether or not the ref is a symref until it's needed. In
the preceding commit an exhaustive regression test was added for that
case in a new test in "t1417-reflog-updateref.sh".
The improved diagnostics here were added in
5b2d8d6f218 (lock_ref_sha1_basic(): improve diagnostics for ref D/F
conflicts, 2015-05-11), and then much of the surrounding code went
away recently in my 245fbba46d6 (refs/files: remove unused "errno ==
EISDIR" code, 2021-08-23).
The refs_resolve_ref_unsafe() code being removed here looks like it
should be tasked with doing that, but it's actually redundant to other
code.
The reason for that is as noted in 245fbba46d6 this once widely used
function now only has a handful of callers left, which all handle this
case themselves.
To the extent that we're racy between their check and ours removing
this check actually improves the situation, as we'll be doing fewer
things between the not-under-lock initial check and acquiring the
lock.
Why this is OK for all the remaining callers of lock_ref_oid_basic()
is noted below. There are only two of those callers:
* "git branch -[cm] <oldbranch> <newbranch>":
In files_copy_or_rename_ref() we'll call this when we copy or rename
refs via rename_ref() and copy_ref(). but only after we've checked
if the refname exists already via its own call to
refs_resolve_ref_unsafe() and refs_rename_ref_available().
As the updated comment to the latter here notes neither of those are
actually needed. If we delete not only this code but also
refs_rename_ref_available() we'll do just fine, we'll just emit a
less friendly error message if e.g. "git branch -m A B/C" would have
a D/F conflict with a "B" file.
Actually we'd probably die before that in case reflogs for the
branch existed, i.e. when the try to rename() or copy_file() the
relevant reflog, since if we've got a D/F conflict with a branch
name we'll probably also have the same with its reflogs (but not
necessarily, we might have reflogs, but it might not).
As some #leftoverbits that code seems buggy to me, i.e. the reflog
"protocol" should be to get a lock on the main ref, and then perform
ref and/or reflog operations. That code dates back to
c976d415e53 (git-branch: add options and tests for branch renaming,
2006-11-28) and probably pre-dated the solidifying of that
convention. But in any case, that edge case is not our bug or
problem right now.
* "git reflog expire <ref>":
In files_reflog_expire() we'll call this without previous ref
existence checking in files-backend.c, but that code is in turn
called by code that's just finished checking if the refname whose
reflog we're expiring exists.
See ae35e16cd43 (reflog expire: don't lock reflogs using previously
seen OID, 2021-08-23) for the current state of that code, and
5e6f003ca8a (reflog_expire(): ignore --updateref for symbolic
references, 2015-03-03) for the code we'd break if we only did a
"update = !!ref" here, which is covered by the aforementioned
regression test in "t1417-reflog-updateref.sh".
Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2021-10-16 17:39:13 +08:00
|
|
|
const char *refname,
|
2017-10-16 06:07:12 +08:00
|
|
|
struct strbuf *err)
|
2015-11-09 21:34:01 +08:00
|
|
|
{
|
|
|
|
struct strbuf ref_file = STRBUF_INIT;
|
|
|
|
struct ref_lock *lock;
|
|
|
|
|
2017-10-16 06:07:12 +08:00
|
|
|
files_assert_main_repository(refs, "lock_ref_oid_basic");
|
2015-11-09 21:34:01 +08:00
|
|
|
assert(err);
|
|
|
|
|
2021-03-14 00:17:22 +08:00
|
|
|
CALLOC_ARRAY(lock, 1);
|
2015-11-09 21:34:01 +08:00
|
|
|
|
2017-03-26 10:42:23 +08:00
|
|
|
files_ref_path(refs, &ref_file, refname);
|
lock_ref_sha1_basic: handle REF_NODEREF with invalid refs
We sometimes call lock_ref_sha1_basic with REF_NODEREF
to operate directly on a symbolic ref. This is used, for
example, to move to a detached HEAD, or when updating
the contents of HEAD via checkout or symbolic-ref.
However, the first step of the function is to resolve the
refname to get the "old" sha1, and we do so without telling
resolve_ref_unsafe() that we are only interested in the
symref. As a result, we may detect a problem there not with
the symref itself, but with something it points to.
The real-world example I found (and what is used in the test
suite) is a HEAD pointing to a ref that cannot exist,
because it would cause a directory/file conflict with other
existing refs. This situation is somewhat broken, of
course, as trying to _commit_ on that HEAD would fail. But
it's not explicitly forbidden, and we should be able to move
away from it. However, neither "git checkout" nor "git
symbolic-ref" can do so. We try to take the lock on HEAD,
which is pointing to a non-existent ref. We bail from
resolve_ref_unsafe() with errno set to EISDIR, and the lock
code thinks we are attempting to create a d/f conflict.
Of course we're not. The problem is that the lock code has
no idea what level we were at when we got EISDIR, so trying
to diagnose or remove empty directories for HEAD is not
useful.
To make things even more complicated, we only get EISDIR in
the loose-ref case. If the refs are packed, the resolution
may "succeed", giving us the pointed-to ref in "refname",
but a null oid. Later, we say "ah, the null oid means we are
creating; let's make sure there is room for it", but
mistakenly check against the _resolved_ refname, not the
original.
We can fix this by making two tweaks:
1. Call resolve_ref_unsafe() with RESOLVE_REF_NO_RECURSE
when REF_NODEREF is set. This means any errors
we get will be from the orig_refname, and we can act
accordingly.
We already do this in the REF_DELETING case, but we
should do it for update, too.
2. If we do get a "refname" return from
resolve_ref_unsafe(), even with RESOLVE_REF_NO_RECURSE
it may be the name of the ref pointed-to by a symref.
We already normalize this back to orig_refname before
taking the lockfile, but we need to do so before the
null_oid check.
While we're rearranging the REF_NODEREF handling, we can
also bump the initialization of lflags to the top of the
function, where we are setting up other flags. This saves us
from having yet another conditional block on REF_NODEREF
just to set it later.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2016-01-13 05:45:09 +08:00
|
|
|
|
2015-11-09 21:34:01 +08:00
|
|
|
/*
|
|
|
|
* If the ref did not exist and we are creating it, make sure
|
|
|
|
* there is no existing packed ref whose name begins with our
|
|
|
|
* refname, nor a packed ref whose name is a proper prefix of
|
|
|
|
* our refname.
|
|
|
|
*/
|
|
|
|
if (is_null_oid(&lock->old_oid) &&
|
files-backend: cheapen refname_available check when locking refs
When locking references in preparation for updating them, we need to
check that none of the newly added references D/F conflict with
existing references (e.g., we don't allow `refs/foo` to be added if
`refs/foo/bar` already exists, or vice versa).
Prior to 524a9fdb51 (refs_verify_refname_available(): use function in
more places, 2017-04-16), conflicts with existing loose references
were checked by looking directly in the filesystem, and then conflicts
with existing packed references were checked by running
`verify_refname_available_dir()` against the packed-refs cache.
But that commit changed the final check to call
`refs_verify_refname_available()` against the *whole* files ref-store,
including both loose and packed references, with the following
comment:
> This means that those callsites now check for conflicts with all
> references rather than just packed refs, but the performance cost
> shouldn't be significant (and will be regained later).
That comment turned out to be too sanguine. User s@kazlauskas.me
reported that fetches involving a very large number of references in
neighboring directories were slowed down by that change.
The problem is that when fetching, each reference is updated
individually, within its own reference transaction. This is done
because some reference updates might succeed even though others fail.
But every time a reference update transaction is finished,
`clear_loose_ref_cache()` is called. So when it is time to update the
next reference, part of the loose ref cache has to be repopulated for
the `refs_verify_refname_available()` call. If the references are all
in neighboring directories, then the cost of repopulating the
reference cache increases with the number of references, resulting in
O(N²) effort.
The comment above also claims that the performance cost "will be
regained later". The idea was that once the packed-refs were finished
being split out into a separate ref-store, we could limit the
`refs_verify_refname_available()` call to the packed references again.
That is what we do now.
Signed-off-by: Michael Haggerty <mhagger@alum.mit.edu>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-08-17 23:12:50 +08:00
|
|
|
refs_verify_refname_available(refs->packed_ref_store, refname,
|
2021-08-23 19:52:39 +08:00
|
|
|
NULL, NULL, err))
|
2015-11-09 21:34:01 +08:00
|
|
|
goto error_return;
|
|
|
|
|
|
|
|
lock->ref_name = xstrdup(refname);
|
|
|
|
|
2017-09-05 20:15:15 +08:00
|
|
|
if (raceproof_create_file(ref_file.buf, create_reflock, &lock->lk)) {
|
2017-01-07 00:22:28 +08:00
|
|
|
unable_to_lock_message(ref_file.buf, errno, err);
|
2015-11-09 21:34:01 +08:00
|
|
|
goto error_return;
|
|
|
|
}
|
|
|
|
|
2021-10-16 17:39:27 +08:00
|
|
|
if (!refs_resolve_ref_unsafe(&refs->base, lock->ref_name, 0,
|
2022-01-26 22:37:01 +08:00
|
|
|
&lock->old_oid, NULL))
|
2021-08-23 19:36:12 +08:00
|
|
|
oidclr(&lock->old_oid);
|
2015-11-09 21:34:01 +08:00
|
|
|
goto out;
|
|
|
|
|
|
|
|
error_return:
|
|
|
|
unlock_ref(lock);
|
|
|
|
lock = NULL;
|
|
|
|
|
|
|
|
out:
|
|
|
|
strbuf_release(&ref_file);
|
|
|
|
return lock;
|
|
|
|
}
|
|
|
|
|
|
|
|
struct ref_to_prune {
|
|
|
|
struct ref_to_prune *next;
|
2017-10-16 06:06:49 +08:00
|
|
|
struct object_id oid;
|
2015-11-09 21:34:01 +08:00
|
|
|
char name[FLEX_ARRAY];
|
|
|
|
};
|
|
|
|
|
2017-01-07 00:22:42 +08:00
|
|
|
enum {
|
|
|
|
REMOVE_EMPTY_PARENTS_REF = 0x01,
|
|
|
|
REMOVE_EMPTY_PARENTS_REFLOG = 0x02
|
|
|
|
};
|
|
|
|
|
2015-11-09 21:34:01 +08:00
|
|
|
/*
|
2017-01-07 00:22:42 +08:00
|
|
|
* Remove empty parent directories associated with the specified
|
|
|
|
* reference and/or its reflog, but spare [logs/]refs/ and immediate
|
|
|
|
* subdirs. flags is a combination of REMOVE_EMPTY_PARENTS_REF and/or
|
|
|
|
* REMOVE_EMPTY_PARENTS_REFLOG.
|
2015-11-09 21:34:01 +08:00
|
|
|
*/
|
2017-03-26 10:42:22 +08:00
|
|
|
static void try_remove_empty_parents(struct files_ref_store *refs,
|
|
|
|
const char *refname,
|
|
|
|
unsigned int flags)
|
2015-11-09 21:34:01 +08:00
|
|
|
{
|
2017-01-07 00:22:41 +08:00
|
|
|
struct strbuf buf = STRBUF_INIT;
|
2017-03-26 10:42:20 +08:00
|
|
|
struct strbuf sb = STRBUF_INIT;
|
2015-11-09 21:34:01 +08:00
|
|
|
char *p, *q;
|
|
|
|
int i;
|
2017-01-07 00:22:41 +08:00
|
|
|
|
|
|
|
strbuf_addstr(&buf, refname);
|
|
|
|
p = buf.buf;
|
2015-11-09 21:34:01 +08:00
|
|
|
for (i = 0; i < 2; i++) { /* refs/{heads,tags,...}/ */
|
|
|
|
while (*p && *p != '/')
|
|
|
|
p++;
|
|
|
|
/* tolerate duplicate slashes; see check_refname_format() */
|
|
|
|
while (*p == '/')
|
|
|
|
p++;
|
|
|
|
}
|
2017-01-07 00:22:41 +08:00
|
|
|
q = buf.buf + buf.len;
|
2017-01-07 00:22:42 +08:00
|
|
|
while (flags & (REMOVE_EMPTY_PARENTS_REF | REMOVE_EMPTY_PARENTS_REFLOG)) {
|
2015-11-09 21:34:01 +08:00
|
|
|
while (q > p && *q != '/')
|
|
|
|
q--;
|
|
|
|
while (q > p && *(q-1) == '/')
|
|
|
|
q--;
|
|
|
|
if (q == p)
|
|
|
|
break;
|
2017-01-07 00:22:41 +08:00
|
|
|
strbuf_setlen(&buf, q - buf.buf);
|
2017-03-26 10:42:20 +08:00
|
|
|
|
|
|
|
strbuf_reset(&sb);
|
2017-03-26 10:42:23 +08:00
|
|
|
files_ref_path(refs, &sb, buf.buf);
|
2017-03-26 10:42:20 +08:00
|
|
|
if ((flags & REMOVE_EMPTY_PARENTS_REF) && rmdir(sb.buf))
|
2017-01-07 00:22:42 +08:00
|
|
|
flags &= ~REMOVE_EMPTY_PARENTS_REF;
|
2017-03-26 10:42:20 +08:00
|
|
|
|
|
|
|
strbuf_reset(&sb);
|
2017-03-26 10:42:22 +08:00
|
|
|
files_reflog_path(refs, &sb, buf.buf);
|
2017-03-26 10:42:20 +08:00
|
|
|
if ((flags & REMOVE_EMPTY_PARENTS_REFLOG) && rmdir(sb.buf))
|
2017-01-07 00:22:42 +08:00
|
|
|
flags &= ~REMOVE_EMPTY_PARENTS_REFLOG;
|
2015-11-09 21:34:01 +08:00
|
|
|
}
|
2017-01-07 00:22:41 +08:00
|
|
|
strbuf_release(&buf);
|
2017-03-26 10:42:20 +08:00
|
|
|
strbuf_release(&sb);
|
2015-11-09 21:34:01 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
/* make sure nobody touched the ref, and unlink */
|
2017-03-26 10:42:36 +08:00
|
|
|
static void prune_ref(struct files_ref_store *refs, struct ref_to_prune *r)
|
2015-11-09 21:34:01 +08:00
|
|
|
{
|
|
|
|
struct ref_transaction *transaction;
|
|
|
|
struct strbuf err = STRBUF_INIT;
|
2017-11-05 16:42:02 +08:00
|
|
|
int ret = -1;
|
2015-11-09 21:34:01 +08:00
|
|
|
|
|
|
|
if (check_refname_format(r->name, 0))
|
|
|
|
return;
|
|
|
|
|
2022-04-14 06:51:33 +08:00
|
|
|
transaction = ref_store_transaction_begin(&refs->base, &err);
|
2017-11-05 16:42:02 +08:00
|
|
|
if (!transaction)
|
|
|
|
goto cleanup;
|
|
|
|
ref_transaction_add_update(
|
|
|
|
transaction, r->name,
|
2017-11-05 16:42:07 +08:00
|
|
|
REF_NO_DEREF | REF_HAVE_NEW | REF_HAVE_OLD | REF_IS_PRUNING,
|
2021-04-26 09:02:56 +08:00
|
|
|
null_oid(), &r->oid, NULL);
|
2017-11-05 16:42:02 +08:00
|
|
|
if (ref_transaction_commit(transaction, &err))
|
|
|
|
goto cleanup;
|
|
|
|
|
|
|
|
ret = 0;
|
|
|
|
|
|
|
|
cleanup:
|
|
|
|
if (ret)
|
2015-11-09 21:34:01 +08:00
|
|
|
error("%s", err.buf);
|
|
|
|
strbuf_release(&err);
|
2017-11-05 16:42:02 +08:00
|
|
|
ref_transaction_free(transaction);
|
|
|
|
return;
|
2015-11-09 21:34:01 +08:00
|
|
|
}
|
|
|
|
|
2017-09-08 21:51:48 +08:00
|
|
|
/*
|
|
|
|
* Prune the loose versions of the references in the linked list
|
|
|
|
* `*refs_to_prune`, freeing the entries in the list as we go.
|
|
|
|
*/
|
|
|
|
static void prune_refs(struct files_ref_store *refs, struct ref_to_prune **refs_to_prune)
|
2015-11-09 21:34:01 +08:00
|
|
|
{
|
2017-09-08 21:51:48 +08:00
|
|
|
while (*refs_to_prune) {
|
|
|
|
struct ref_to_prune *r = *refs_to_prune;
|
|
|
|
*refs_to_prune = r->next;
|
2017-03-26 10:42:36 +08:00
|
|
|
prune_ref(refs, r);
|
2017-09-08 21:51:48 +08:00
|
|
|
free(r);
|
2015-11-09 21:34:01 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-05-22 22:17:48 +08:00
|
|
|
/*
|
|
|
|
* Return true if the specified reference should be packed.
|
|
|
|
*/
|
|
|
|
static int should_pack_ref(const char *refname,
|
|
|
|
const struct object_id *oid, unsigned int ref_flags,
|
|
|
|
unsigned int pack_flags)
|
|
|
|
{
|
|
|
|
/* Do not pack per-worktree refs: */
|
2022-09-20 00:34:50 +08:00
|
|
|
if (parse_worktree_ref(refname, NULL, NULL, NULL) !=
|
|
|
|
REF_WORKTREE_SHARED)
|
2017-05-22 22:17:48 +08:00
|
|
|
return 0;
|
|
|
|
|
|
|
|
/* Do not pack non-tags unless PACK_REFS_ALL is set: */
|
|
|
|
if (!(pack_flags & PACK_REFS_ALL) && !starts_with(refname, "refs/tags/"))
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
/* Do not pack symbolic refs: */
|
|
|
|
if (ref_flags & REF_ISSYMREF)
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
/* Do not pack broken refs: */
|
2021-10-09 05:08:15 +08:00
|
|
|
if (!ref_resolves_to_object(refname, the_repository, oid, ref_flags))
|
2017-05-22 22:17:48 +08:00
|
|
|
return 0;
|
|
|
|
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
2016-09-05 00:08:27 +08:00
|
|
|
static int files_pack_refs(struct ref_store *ref_store, unsigned int flags)
|
2015-11-09 21:34:01 +08:00
|
|
|
{
|
2016-09-05 00:08:11 +08:00
|
|
|
struct files_ref_store *refs =
|
2017-03-26 10:42:32 +08:00
|
|
|
files_downcast(ref_store, REF_STORE_WRITE | REF_STORE_ODB,
|
|
|
|
"pack_refs");
|
2017-04-16 14:41:41 +08:00
|
|
|
struct ref_iterator *iter;
|
|
|
|
int ok;
|
|
|
|
struct ref_to_prune *refs_to_prune = NULL;
|
2017-06-23 15:01:39 +08:00
|
|
|
struct strbuf err = STRBUF_INIT;
|
2017-09-08 21:51:47 +08:00
|
|
|
struct ref_transaction *transaction;
|
|
|
|
|
2022-04-14 06:51:33 +08:00
|
|
|
transaction = ref_store_transaction_begin(refs->packed_ref_store, &err);
|
2017-09-08 21:51:47 +08:00
|
|
|
if (!transaction)
|
|
|
|
return -1;
|
2015-11-09 21:34:01 +08:00
|
|
|
|
2017-06-23 15:01:42 +08:00
|
|
|
packed_refs_lock(refs->packed_ref_store, LOCK_DIE_ON_ERROR, &err);
|
2017-04-16 14:41:41 +08:00
|
|
|
|
2021-10-09 05:08:16 +08:00
|
|
|
iter = cache_ref_iterator_begin(get_loose_ref_cache(refs), NULL,
|
|
|
|
the_repository, 0);
|
2017-04-16 14:41:41 +08:00
|
|
|
while ((ok = ref_iterator_advance(iter)) == ITER_OK) {
|
|
|
|
/*
|
|
|
|
* If the loose reference can be packed, add an entry
|
|
|
|
* in the packed ref cache. If the reference should be
|
|
|
|
* pruned, also add it to refs_to_prune.
|
|
|
|
*/
|
2017-05-22 22:17:48 +08:00
|
|
|
if (!should_pack_ref(iter->refname, iter->oid, iter->flags,
|
|
|
|
flags))
|
2017-04-16 14:41:41 +08:00
|
|
|
continue;
|
|
|
|
|
|
|
|
/*
|
2017-09-08 21:51:47 +08:00
|
|
|
* Add a reference creation for this reference to the
|
|
|
|
* packed-refs transaction:
|
2017-04-16 14:41:41 +08:00
|
|
|
*/
|
2017-09-08 21:51:47 +08:00
|
|
|
if (ref_transaction_update(transaction, iter->refname,
|
2017-10-16 06:06:53 +08:00
|
|
|
iter->oid, NULL,
|
2017-11-05 16:42:06 +08:00
|
|
|
REF_NO_DEREF, NULL, &err))
|
2017-09-08 21:51:47 +08:00
|
|
|
die("failure preparing to create packed reference %s: %s",
|
|
|
|
iter->refname, err.buf);
|
2017-04-16 14:41:41 +08:00
|
|
|
|
|
|
|
/* Schedule the loose reference for pruning if requested. */
|
|
|
|
if ((flags & PACK_REFS_PRUNE)) {
|
|
|
|
struct ref_to_prune *n;
|
|
|
|
FLEX_ALLOC_STR(n, name, iter->refname);
|
2017-10-16 06:06:49 +08:00
|
|
|
oidcpy(&n->oid, iter->oid);
|
2017-04-16 14:41:41 +08:00
|
|
|
n->next = refs_to_prune;
|
|
|
|
refs_to_prune = n;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (ok != ITER_DONE)
|
|
|
|
die("error while iterating over references");
|
2015-11-09 21:34:01 +08:00
|
|
|
|
2017-09-08 21:51:47 +08:00
|
|
|
if (ref_transaction_commit(transaction, &err))
|
|
|
|
die("unable to write new packed-refs: %s", err.buf);
|
|
|
|
|
|
|
|
ref_transaction_free(transaction);
|
|
|
|
|
2017-06-23 15:01:45 +08:00
|
|
|
packed_refs_unlock(refs->packed_ref_store);
|
2015-11-09 21:34:01 +08:00
|
|
|
|
2017-09-08 21:51:48 +08:00
|
|
|
prune_refs(refs, &refs_to_prune);
|
2017-06-23 15:01:39 +08:00
|
|
|
strbuf_release(&err);
|
2015-11-09 21:34:01 +08:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2017-05-22 22:17:38 +08:00
|
|
|
static int files_delete_refs(struct ref_store *ref_store, const char *msg,
|
2016-09-05 00:08:40 +08:00
|
|
|
struct string_list *refnames, unsigned int flags)
|
2015-11-09 21:34:01 +08:00
|
|
|
{
|
2016-09-05 00:08:30 +08:00
|
|
|
struct files_ref_store *refs =
|
2017-03-26 10:42:32 +08:00
|
|
|
files_downcast(ref_store, REF_STORE_WRITE, "delete_refs");
|
2015-11-09 21:34:01 +08:00
|
|
|
struct strbuf err = STRBUF_INIT;
|
|
|
|
int i, result = 0;
|
|
|
|
|
|
|
|
if (!refnames->nr)
|
|
|
|
return 0;
|
|
|
|
|
2017-07-02 02:31:06 +08:00
|
|
|
if (packed_refs_lock(refs->packed_ref_store, 0, &err))
|
|
|
|
goto error;
|
2015-11-09 21:34:01 +08:00
|
|
|
|
2022-04-14 06:51:33 +08:00
|
|
|
if (refs_delete_refs(refs->packed_ref_store, msg, refnames, flags)) {
|
|
|
|
packed_refs_unlock(refs->packed_ref_store);
|
2017-07-02 02:31:06 +08:00
|
|
|
goto error;
|
2022-04-14 06:51:33 +08:00
|
|
|
}
|
2015-11-09 21:34:01 +08:00
|
|
|
|
2017-07-02 02:31:06 +08:00
|
|
|
packed_refs_unlock(refs->packed_ref_store);
|
|
|
|
|
2015-11-09 21:34:01 +08:00
|
|
|
for (i = 0; i < refnames->nr; i++) {
|
|
|
|
const char *refname = refnames->items[i].string;
|
|
|
|
|
2017-05-22 22:17:38 +08:00
|
|
|
if (refs_delete_ref(&refs->base, msg, refname, NULL, flags))
|
2015-11-09 21:34:01 +08:00
|
|
|
result |= error(_("could not remove reference %s"), refname);
|
|
|
|
}
|
|
|
|
|
|
|
|
strbuf_release(&err);
|
|
|
|
return result;
|
2017-07-02 02:31:06 +08:00
|
|
|
|
|
|
|
error:
|
|
|
|
/*
|
|
|
|
* If we failed to rewrite the packed-refs file, then it is
|
|
|
|
* unsafe to try to remove loose refs, because doing so might
|
|
|
|
* expose an obsolete packed value for a reference that might
|
|
|
|
* even point at an object that has been garbage collected.
|
|
|
|
*/
|
|
|
|
if (refnames->nr == 1)
|
|
|
|
error(_("could not delete reference %s: %s"),
|
|
|
|
refnames->items[0].string, err.buf);
|
|
|
|
else
|
|
|
|
error(_("could not delete references: %s"), err.buf);
|
|
|
|
|
|
|
|
strbuf_release(&err);
|
|
|
|
return -1;
|
2015-11-09 21:34:01 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* People using contrib's git-new-workdir have .git/logs/refs ->
|
|
|
|
* /some/other/path/.git/logs/refs, and that may live on another device.
|
|
|
|
*
|
|
|
|
* IOW, to avoid cross device rename errors, the temporary renamed log must
|
|
|
|
* live into logs/refs.
|
|
|
|
*/
|
2017-03-26 10:42:21 +08:00
|
|
|
#define TMP_RENAMED_LOG "refs/.tmp-renamed-log"
|
2015-11-09 21:34:01 +08:00
|
|
|
|
2017-03-26 10:42:20 +08:00
|
|
|
struct rename_cb {
|
|
|
|
const char *tmp_renamed_log;
|
|
|
|
int true_errno;
|
|
|
|
};
|
|
|
|
|
|
|
|
static int rename_tmp_log_callback(const char *path, void *cb_data)
|
2015-11-09 21:34:01 +08:00
|
|
|
{
|
2017-03-26 10:42:20 +08:00
|
|
|
struct rename_cb *cb = cb_data;
|
2015-11-09 21:34:01 +08:00
|
|
|
|
2017-03-26 10:42:20 +08:00
|
|
|
if (rename(cb->tmp_renamed_log, path)) {
|
2017-01-07 00:22:29 +08:00
|
|
|
/*
|
|
|
|
* rename(a, b) when b is an existing directory ought
|
|
|
|
* to result in ISDIR, but Solaris 5.8 gives ENOTDIR.
|
|
|
|
* Sheesh. Record the true errno for error reporting,
|
|
|
|
* but report EISDIR to raceproof_create_file() so
|
|
|
|
* that it knows to retry.
|
|
|
|
*/
|
2017-03-26 10:42:20 +08:00
|
|
|
cb->true_errno = errno;
|
2017-01-07 00:22:29 +08:00
|
|
|
if (errno == ENOTDIR)
|
|
|
|
errno = EISDIR;
|
|
|
|
return -1;
|
|
|
|
} else {
|
|
|
|
return 0;
|
2015-11-09 21:34:01 +08:00
|
|
|
}
|
2017-01-07 00:22:29 +08:00
|
|
|
}
|
2015-11-09 21:34:01 +08:00
|
|
|
|
2017-03-26 10:42:22 +08:00
|
|
|
static int rename_tmp_log(struct files_ref_store *refs, const char *newrefname)
|
2017-01-07 00:22:29 +08:00
|
|
|
{
|
2017-03-26 10:42:20 +08:00
|
|
|
struct strbuf path = STRBUF_INIT;
|
|
|
|
struct strbuf tmp = STRBUF_INIT;
|
|
|
|
struct rename_cb cb;
|
|
|
|
int ret;
|
2017-01-07 00:22:29 +08:00
|
|
|
|
2017-03-26 10:42:22 +08:00
|
|
|
files_reflog_path(refs, &path, newrefname);
|
|
|
|
files_reflog_path(refs, &tmp, TMP_RENAMED_LOG);
|
2017-03-26 10:42:20 +08:00
|
|
|
cb.tmp_renamed_log = tmp.buf;
|
|
|
|
ret = raceproof_create_file(path.buf, rename_tmp_log_callback, &cb);
|
2017-01-07 00:22:29 +08:00
|
|
|
if (ret) {
|
|
|
|
if (errno == EISDIR)
|
2017-03-26 10:42:20 +08:00
|
|
|
error("directory not empty: %s", path.buf);
|
2017-01-07 00:22:29 +08:00
|
|
|
else
|
2017-01-07 00:22:30 +08:00
|
|
|
error("unable to move logfile %s to %s: %s",
|
2017-03-26 10:42:20 +08:00
|
|
|
tmp.buf, path.buf,
|
|
|
|
strerror(cb.true_errno));
|
2015-11-09 21:34:01 +08:00
|
|
|
}
|
2017-01-07 00:22:29 +08:00
|
|
|
|
2017-03-26 10:42:20 +08:00
|
|
|
strbuf_release(&path);
|
|
|
|
strbuf_release(&tmp);
|
2015-11-09 21:34:01 +08:00
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int write_ref_to_lockfile(struct ref_lock *lock,
|
2021-12-07 21:38:17 +08:00
|
|
|
const struct object_id *oid,
|
|
|
|
int skip_oid_verification, struct strbuf *err);
|
2016-09-05 00:08:32 +08:00
|
|
|
static int commit_ref_update(struct files_ref_store *refs,
|
|
|
|
struct ref_lock *lock,
|
2017-05-07 06:10:24 +08:00
|
|
|
const struct object_id *oid, const char *logmsg,
|
2016-04-22 20:38:56 +08:00
|
|
|
struct strbuf *err);
|
2015-11-09 21:34:01 +08:00
|
|
|
|
2021-10-16 17:39:11 +08:00
|
|
|
/*
|
refs/files: remove "name exist?" check in lock_ref_oid_basic()
In lock_ref_oid_basic() we'll happily lock a reference that doesn't
exist yet. That's normal, and is how references are initially born,
but we don't need to retain checks here in lock_ref_oid_basic() about
the state of the ref, when what we're checking is either checked
already, or something we're about to discover by trying to lock the
ref with raceproof_create_file().
The one exception is the caller in files_reflog_expire(), who passes
us a "type" to find out if the reference is a symref or not. We can
move the that logic over to that caller, which can now defer its
discovery of whether or not the ref is a symref until it's needed. In
the preceding commit an exhaustive regression test was added for that
case in a new test in "t1417-reflog-updateref.sh".
The improved diagnostics here were added in
5b2d8d6f218 (lock_ref_sha1_basic(): improve diagnostics for ref D/F
conflicts, 2015-05-11), and then much of the surrounding code went
away recently in my 245fbba46d6 (refs/files: remove unused "errno ==
EISDIR" code, 2021-08-23).
The refs_resolve_ref_unsafe() code being removed here looks like it
should be tasked with doing that, but it's actually redundant to other
code.
The reason for that is as noted in 245fbba46d6 this once widely used
function now only has a handful of callers left, which all handle this
case themselves.
To the extent that we're racy between their check and ours removing
this check actually improves the situation, as we'll be doing fewer
things between the not-under-lock initial check and acquiring the
lock.
Why this is OK for all the remaining callers of lock_ref_oid_basic()
is noted below. There are only two of those callers:
* "git branch -[cm] <oldbranch> <newbranch>":
In files_copy_or_rename_ref() we'll call this when we copy or rename
refs via rename_ref() and copy_ref(). but only after we've checked
if the refname exists already via its own call to
refs_resolve_ref_unsafe() and refs_rename_ref_available().
As the updated comment to the latter here notes neither of those are
actually needed. If we delete not only this code but also
refs_rename_ref_available() we'll do just fine, we'll just emit a
less friendly error message if e.g. "git branch -m A B/C" would have
a D/F conflict with a "B" file.
Actually we'd probably die before that in case reflogs for the
branch existed, i.e. when the try to rename() or copy_file() the
relevant reflog, since if we've got a D/F conflict with a branch
name we'll probably also have the same with its reflogs (but not
necessarily, we might have reflogs, but it might not).
As some #leftoverbits that code seems buggy to me, i.e. the reflog
"protocol" should be to get a lock on the main ref, and then perform
ref and/or reflog operations. That code dates back to
c976d415e53 (git-branch: add options and tests for branch renaming,
2006-11-28) and probably pre-dated the solidifying of that
convention. But in any case, that edge case is not our bug or
problem right now.
* "git reflog expire <ref>":
In files_reflog_expire() we'll call this without previous ref
existence checking in files-backend.c, but that code is in turn
called by code that's just finished checking if the refname whose
reflog we're expiring exists.
See ae35e16cd43 (reflog expire: don't lock reflogs using previously
seen OID, 2021-08-23) for the current state of that code, and
5e6f003ca8a (reflog_expire(): ignore --updateref for symbolic
references, 2015-03-03) for the code we'd break if we only did a
"update = !!ref" here, which is covered by the aforementioned
regression test in "t1417-reflog-updateref.sh".
Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2021-10-16 17:39:13 +08:00
|
|
|
* Emit a better error message than lockfile.c's
|
|
|
|
* unable_to_lock_message() would in case there is a D/F conflict with
|
|
|
|
* another existing reference. If there would be a conflict, emit an error
|
2021-10-16 17:39:11 +08:00
|
|
|
* message and return false; otherwise, return true.
|
|
|
|
*
|
|
|
|
* Note that this function is not safe against all races with other
|
refs/files: remove "name exist?" check in lock_ref_oid_basic()
In lock_ref_oid_basic() we'll happily lock a reference that doesn't
exist yet. That's normal, and is how references are initially born,
but we don't need to retain checks here in lock_ref_oid_basic() about
the state of the ref, when what we're checking is either checked
already, or something we're about to discover by trying to lock the
ref with raceproof_create_file().
The one exception is the caller in files_reflog_expire(), who passes
us a "type" to find out if the reference is a symref or not. We can
move the that logic over to that caller, which can now defer its
discovery of whether or not the ref is a symref until it's needed. In
the preceding commit an exhaustive regression test was added for that
case in a new test in "t1417-reflog-updateref.sh".
The improved diagnostics here were added in
5b2d8d6f218 (lock_ref_sha1_basic(): improve diagnostics for ref D/F
conflicts, 2015-05-11), and then much of the surrounding code went
away recently in my 245fbba46d6 (refs/files: remove unused "errno ==
EISDIR" code, 2021-08-23).
The refs_resolve_ref_unsafe() code being removed here looks like it
should be tasked with doing that, but it's actually redundant to other
code.
The reason for that is as noted in 245fbba46d6 this once widely used
function now only has a handful of callers left, which all handle this
case themselves.
To the extent that we're racy between their check and ours removing
this check actually improves the situation, as we'll be doing fewer
things between the not-under-lock initial check and acquiring the
lock.
Why this is OK for all the remaining callers of lock_ref_oid_basic()
is noted below. There are only two of those callers:
* "git branch -[cm] <oldbranch> <newbranch>":
In files_copy_or_rename_ref() we'll call this when we copy or rename
refs via rename_ref() and copy_ref(). but only after we've checked
if the refname exists already via its own call to
refs_resolve_ref_unsafe() and refs_rename_ref_available().
As the updated comment to the latter here notes neither of those are
actually needed. If we delete not only this code but also
refs_rename_ref_available() we'll do just fine, we'll just emit a
less friendly error message if e.g. "git branch -m A B/C" would have
a D/F conflict with a "B" file.
Actually we'd probably die before that in case reflogs for the
branch existed, i.e. when the try to rename() or copy_file() the
relevant reflog, since if we've got a D/F conflict with a branch
name we'll probably also have the same with its reflogs (but not
necessarily, we might have reflogs, but it might not).
As some #leftoverbits that code seems buggy to me, i.e. the reflog
"protocol" should be to get a lock on the main ref, and then perform
ref and/or reflog operations. That code dates back to
c976d415e53 (git-branch: add options and tests for branch renaming,
2006-11-28) and probably pre-dated the solidifying of that
convention. But in any case, that edge case is not our bug or
problem right now.
* "git reflog expire <ref>":
In files_reflog_expire() we'll call this without previous ref
existence checking in files-backend.c, but that code is in turn
called by code that's just finished checking if the refname whose
reflog we're expiring exists.
See ae35e16cd43 (reflog expire: don't lock reflogs using previously
seen OID, 2021-08-23) for the current state of that code, and
5e6f003ca8a (reflog_expire(): ignore --updateref for symbolic
references, 2015-03-03) for the code we'd break if we only did a
"update = !!ref" here, which is covered by the aforementioned
regression test in "t1417-reflog-updateref.sh".
Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2021-10-16 17:39:13 +08:00
|
|
|
* processes, and that's not its job. We'll emit a more verbose error on D/f
|
|
|
|
* conflicts if we get past it into lock_ref_oid_basic().
|
2021-10-16 17:39:11 +08:00
|
|
|
*/
|
|
|
|
static int refs_rename_ref_available(struct ref_store *refs,
|
|
|
|
const char *old_refname,
|
|
|
|
const char *new_refname)
|
|
|
|
{
|
|
|
|
struct string_list skip = STRING_LIST_INIT_NODUP;
|
|
|
|
struct strbuf err = STRBUF_INIT;
|
|
|
|
int ok;
|
|
|
|
|
|
|
|
string_list_insert(&skip, old_refname);
|
|
|
|
ok = !refs_verify_refname_available(refs, new_refname,
|
|
|
|
NULL, &skip, &err);
|
|
|
|
if (!ok)
|
|
|
|
error("%s", err.buf);
|
|
|
|
|
|
|
|
string_list_clear(&skip, 0);
|
|
|
|
strbuf_release(&err);
|
|
|
|
return ok;
|
|
|
|
}
|
|
|
|
|
branch: add a --copy (-c) option to go with --move (-m)
Add the ability to --copy a branch and its reflog and configuration,
this uses the same underlying machinery as the --move (-m) option
except the reflog and configuration is copied instead of being moved.
This is useful for e.g. copying a topic branch to a new version,
e.g. work to work-2 after submitting the work topic to the list, while
preserving all the tracking info and other configuration that goes
with the branch, and unlike --move keeping the other already-submitted
branch around for reference.
Like --move, when the source branch is the currently checked out
branch the HEAD is moved to the destination branch. In the case of
--move we don't really have a choice (other than remaining on a
detached HEAD) and in order to keep the functionality consistent, we
are doing it in similar way for --copy too.
The most common usage of this feature is expected to be moving to a
new topic branch which is a copy of the current one, in that case
moving to the target branch is what the user wants, and doesn't
unexpectedly behave differently than --move would.
One outstanding caveat of this implementation is that:
git checkout maint &&
git checkout master &&
git branch -c topic &&
git checkout -
Will check out 'maint' instead of 'master'. This is because the @{-N}
feature (or its -1 shorthand "-") relies on HEAD reflogs created by
the checkout command, so in this case we'll checkout maint instead of
master, as the user might expect. What to do about that is left to a
future change.
Helped-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: Sahil Dua <sahildua2305@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-06-19 05:19:16 +08:00
|
|
|
static int files_copy_or_rename_ref(struct ref_store *ref_store,
|
2016-09-05 00:08:42 +08:00
|
|
|
const char *oldrefname, const char *newrefname,
|
branch: add a --copy (-c) option to go with --move (-m)
Add the ability to --copy a branch and its reflog and configuration,
this uses the same underlying machinery as the --move (-m) option
except the reflog and configuration is copied instead of being moved.
This is useful for e.g. copying a topic branch to a new version,
e.g. work to work-2 after submitting the work topic to the list, while
preserving all the tracking info and other configuration that goes
with the branch, and unlike --move keeping the other already-submitted
branch around for reference.
Like --move, when the source branch is the currently checked out
branch the HEAD is moved to the destination branch. In the case of
--move we don't really have a choice (other than remaining on a
detached HEAD) and in order to keep the functionality consistent, we
are doing it in similar way for --copy too.
The most common usage of this feature is expected to be moving to a
new topic branch which is a copy of the current one, in that case
moving to the target branch is what the user wants, and doesn't
unexpectedly behave differently than --move would.
One outstanding caveat of this implementation is that:
git checkout maint &&
git checkout master &&
git branch -c topic &&
git checkout -
Will check out 'maint' instead of 'master'. This is because the @{-N}
feature (or its -1 shorthand "-") relies on HEAD reflogs created by
the checkout command, so in this case we'll checkout maint instead of
master, as the user might expect. What to do about that is left to a
future change.
Helped-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: Sahil Dua <sahildua2305@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-06-19 05:19:16 +08:00
|
|
|
const char *logmsg, int copy)
|
2015-11-09 21:34:01 +08:00
|
|
|
{
|
2016-09-05 00:08:42 +08:00
|
|
|
struct files_ref_store *refs =
|
2017-03-26 10:42:32 +08:00
|
|
|
files_downcast(ref_store, REF_STORE_WRITE, "rename_ref");
|
2019-12-12 02:46:19 +08:00
|
|
|
struct object_id orig_oid;
|
2015-11-09 21:34:01 +08:00
|
|
|
int flag = 0, logmoved = 0;
|
|
|
|
struct ref_lock *lock;
|
|
|
|
struct stat loginfo;
|
2017-03-26 10:42:20 +08:00
|
|
|
struct strbuf sb_oldref = STRBUF_INIT;
|
|
|
|
struct strbuf sb_newref = STRBUF_INIT;
|
|
|
|
struct strbuf tmp_renamed_log = STRBUF_INIT;
|
|
|
|
int log, ret;
|
2015-11-09 21:34:01 +08:00
|
|
|
struct strbuf err = STRBUF_INIT;
|
|
|
|
|
2017-03-26 10:42:22 +08:00
|
|
|
files_reflog_path(refs, &sb_oldref, oldrefname);
|
|
|
|
files_reflog_path(refs, &sb_newref, newrefname);
|
|
|
|
files_reflog_path(refs, &tmp_renamed_log, TMP_RENAMED_LOG);
|
2017-03-26 10:42:20 +08:00
|
|
|
|
|
|
|
log = !lstat(sb_oldref.buf, &loginfo);
|
2017-03-26 10:42:19 +08:00
|
|
|
if (log && S_ISLNK(loginfo.st_mode)) {
|
|
|
|
ret = error("reflog for %s is a symlink", oldrefname);
|
|
|
|
goto out;
|
|
|
|
}
|
2015-11-09 21:34:01 +08:00
|
|
|
|
2017-03-26 10:42:36 +08:00
|
|
|
if (!refs_resolve_ref_unsafe(&refs->base, oldrefname,
|
|
|
|
RESOLVE_REF_READING | RESOLVE_REF_NO_RECURSE,
|
2022-01-26 22:37:01 +08:00
|
|
|
&orig_oid, &flag)) {
|
2017-03-26 10:42:19 +08:00
|
|
|
ret = error("refname %s not found", oldrefname);
|
|
|
|
goto out;
|
|
|
|
}
|
2016-04-22 05:42:19 +08:00
|
|
|
|
2017-03-26 10:42:19 +08:00
|
|
|
if (flag & REF_ISSYMREF) {
|
branch: add a --copy (-c) option to go with --move (-m)
Add the ability to --copy a branch and its reflog and configuration,
this uses the same underlying machinery as the --move (-m) option
except the reflog and configuration is copied instead of being moved.
This is useful for e.g. copying a topic branch to a new version,
e.g. work to work-2 after submitting the work topic to the list, while
preserving all the tracking info and other configuration that goes
with the branch, and unlike --move keeping the other already-submitted
branch around for reference.
Like --move, when the source branch is the currently checked out
branch the HEAD is moved to the destination branch. In the case of
--move we don't really have a choice (other than remaining on a
detached HEAD) and in order to keep the functionality consistent, we
are doing it in similar way for --copy too.
The most common usage of this feature is expected to be moving to a
new topic branch which is a copy of the current one, in that case
moving to the target branch is what the user wants, and doesn't
unexpectedly behave differently than --move would.
One outstanding caveat of this implementation is that:
git checkout maint &&
git checkout master &&
git branch -c topic &&
git checkout -
Will check out 'maint' instead of 'master'. This is because the @{-N}
feature (or its -1 shorthand "-") relies on HEAD reflogs created by
the checkout command, so in this case we'll checkout maint instead of
master, as the user might expect. What to do about that is left to a
future change.
Helped-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: Sahil Dua <sahildua2305@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-06-19 05:19:16 +08:00
|
|
|
if (copy)
|
|
|
|
ret = error("refname %s is a symbolic ref, copying it is not supported",
|
|
|
|
oldrefname);
|
|
|
|
else
|
|
|
|
ret = error("refname %s is a symbolic ref, renaming it is not supported",
|
|
|
|
oldrefname);
|
2017-03-26 10:42:19 +08:00
|
|
|
goto out;
|
|
|
|
}
|
2017-03-26 10:42:34 +08:00
|
|
|
if (!refs_rename_ref_available(&refs->base, oldrefname, newrefname)) {
|
2017-03-26 10:42:19 +08:00
|
|
|
ret = 1;
|
|
|
|
goto out;
|
|
|
|
}
|
2015-11-09 21:34:01 +08:00
|
|
|
|
branch: add a --copy (-c) option to go with --move (-m)
Add the ability to --copy a branch and its reflog and configuration,
this uses the same underlying machinery as the --move (-m) option
except the reflog and configuration is copied instead of being moved.
This is useful for e.g. copying a topic branch to a new version,
e.g. work to work-2 after submitting the work topic to the list, while
preserving all the tracking info and other configuration that goes
with the branch, and unlike --move keeping the other already-submitted
branch around for reference.
Like --move, when the source branch is the currently checked out
branch the HEAD is moved to the destination branch. In the case of
--move we don't really have a choice (other than remaining on a
detached HEAD) and in order to keep the functionality consistent, we
are doing it in similar way for --copy too.
The most common usage of this feature is expected to be moving to a
new topic branch which is a copy of the current one, in that case
moving to the target branch is what the user wants, and doesn't
unexpectedly behave differently than --move would.
One outstanding caveat of this implementation is that:
git checkout maint &&
git checkout master &&
git branch -c topic &&
git checkout -
Will check out 'maint' instead of 'master'. This is because the @{-N}
feature (or its -1 shorthand "-") relies on HEAD reflogs created by
the checkout command, so in this case we'll checkout maint instead of
master, as the user might expect. What to do about that is left to a
future change.
Helped-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: Sahil Dua <sahildua2305@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-06-19 05:19:16 +08:00
|
|
|
if (!copy && log && rename(sb_oldref.buf, tmp_renamed_log.buf)) {
|
2017-03-26 10:42:21 +08:00
|
|
|
ret = error("unable to move logfile logs/%s to logs/"TMP_RENAMED_LOG": %s",
|
2017-03-26 10:42:19 +08:00
|
|
|
oldrefname, strerror(errno));
|
|
|
|
goto out;
|
|
|
|
}
|
2015-11-09 21:34:01 +08:00
|
|
|
|
branch: add a --copy (-c) option to go with --move (-m)
Add the ability to --copy a branch and its reflog and configuration,
this uses the same underlying machinery as the --move (-m) option
except the reflog and configuration is copied instead of being moved.
This is useful for e.g. copying a topic branch to a new version,
e.g. work to work-2 after submitting the work topic to the list, while
preserving all the tracking info and other configuration that goes
with the branch, and unlike --move keeping the other already-submitted
branch around for reference.
Like --move, when the source branch is the currently checked out
branch the HEAD is moved to the destination branch. In the case of
--move we don't really have a choice (other than remaining on a
detached HEAD) and in order to keep the functionality consistent, we
are doing it in similar way for --copy too.
The most common usage of this feature is expected to be moving to a
new topic branch which is a copy of the current one, in that case
moving to the target branch is what the user wants, and doesn't
unexpectedly behave differently than --move would.
One outstanding caveat of this implementation is that:
git checkout maint &&
git checkout master &&
git branch -c topic &&
git checkout -
Will check out 'maint' instead of 'master'. This is because the @{-N}
feature (or its -1 shorthand "-") relies on HEAD reflogs created by
the checkout command, so in this case we'll checkout maint instead of
master, as the user might expect. What to do about that is left to a
future change.
Helped-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: Sahil Dua <sahildua2305@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-06-19 05:19:16 +08:00
|
|
|
if (copy && log && copy_file(tmp_renamed_log.buf, sb_oldref.buf, 0644)) {
|
|
|
|
ret = error("unable to copy logfile logs/%s to logs/"TMP_RENAMED_LOG": %s",
|
|
|
|
oldrefname, strerror(errno));
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!copy && refs_delete_ref(&refs->base, logmsg, oldrefname,
|
2017-11-05 16:42:06 +08:00
|
|
|
&orig_oid, REF_NO_DEREF)) {
|
2015-11-09 21:34:01 +08:00
|
|
|
error("unable to delete old %s", oldrefname);
|
|
|
|
goto rollback;
|
|
|
|
}
|
|
|
|
|
2016-02-25 06:58:51 +08:00
|
|
|
/*
|
2017-05-07 06:10:24 +08:00
|
|
|
* Since we are doing a shallow lookup, oid is not the
|
|
|
|
* correct value to pass to delete_ref as old_oid. But that
|
|
|
|
* doesn't matter, because an old_oid check wouldn't add to
|
2016-02-25 06:58:51 +08:00
|
|
|
* the safety anyway; we want to delete the reference whatever
|
|
|
|
* its current value.
|
|
|
|
*/
|
2021-10-16 17:39:27 +08:00
|
|
|
if (!copy && refs_resolve_ref_unsafe(&refs->base, newrefname,
|
2021-10-16 17:39:14 +08:00
|
|
|
RESOLVE_REF_READING | RESOLVE_REF_NO_RECURSE,
|
2022-01-26 22:37:01 +08:00
|
|
|
NULL, NULL) &&
|
2017-03-26 10:42:36 +08:00
|
|
|
refs_delete_ref(&refs->base, NULL, newrefname,
|
2017-11-05 16:42:06 +08:00
|
|
|
NULL, REF_NO_DEREF)) {
|
2017-01-07 00:22:21 +08:00
|
|
|
if (errno == EISDIR) {
|
2015-11-09 21:34:01 +08:00
|
|
|
struct strbuf path = STRBUF_INIT;
|
|
|
|
int result;
|
|
|
|
|
2017-03-26 10:42:23 +08:00
|
|
|
files_ref_path(refs, &path, newrefname);
|
2015-11-09 21:34:01 +08:00
|
|
|
result = remove_empty_directories(&path);
|
|
|
|
strbuf_release(&path);
|
|
|
|
|
|
|
|
if (result) {
|
|
|
|
error("Directory not empty: %s", newrefname);
|
|
|
|
goto rollback;
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
error("unable to delete existing %s", newrefname);
|
|
|
|
goto rollback;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-03-26 10:42:22 +08:00
|
|
|
if (log && rename_tmp_log(refs, newrefname))
|
2015-11-09 21:34:01 +08:00
|
|
|
goto rollback;
|
|
|
|
|
|
|
|
logmoved = log;
|
|
|
|
|
refs/files: remove "name exist?" check in lock_ref_oid_basic()
In lock_ref_oid_basic() we'll happily lock a reference that doesn't
exist yet. That's normal, and is how references are initially born,
but we don't need to retain checks here in lock_ref_oid_basic() about
the state of the ref, when what we're checking is either checked
already, or something we're about to discover by trying to lock the
ref with raceproof_create_file().
The one exception is the caller in files_reflog_expire(), who passes
us a "type" to find out if the reference is a symref or not. We can
move the that logic over to that caller, which can now defer its
discovery of whether or not the ref is a symref until it's needed. In
the preceding commit an exhaustive regression test was added for that
case in a new test in "t1417-reflog-updateref.sh".
The improved diagnostics here were added in
5b2d8d6f218 (lock_ref_sha1_basic(): improve diagnostics for ref D/F
conflicts, 2015-05-11), and then much of the surrounding code went
away recently in my 245fbba46d6 (refs/files: remove unused "errno ==
EISDIR" code, 2021-08-23).
The refs_resolve_ref_unsafe() code being removed here looks like it
should be tasked with doing that, but it's actually redundant to other
code.
The reason for that is as noted in 245fbba46d6 this once widely used
function now only has a handful of callers left, which all handle this
case themselves.
To the extent that we're racy between their check and ours removing
this check actually improves the situation, as we'll be doing fewer
things between the not-under-lock initial check and acquiring the
lock.
Why this is OK for all the remaining callers of lock_ref_oid_basic()
is noted below. There are only two of those callers:
* "git branch -[cm] <oldbranch> <newbranch>":
In files_copy_or_rename_ref() we'll call this when we copy or rename
refs via rename_ref() and copy_ref(). but only after we've checked
if the refname exists already via its own call to
refs_resolve_ref_unsafe() and refs_rename_ref_available().
As the updated comment to the latter here notes neither of those are
actually needed. If we delete not only this code but also
refs_rename_ref_available() we'll do just fine, we'll just emit a
less friendly error message if e.g. "git branch -m A B/C" would have
a D/F conflict with a "B" file.
Actually we'd probably die before that in case reflogs for the
branch existed, i.e. when the try to rename() or copy_file() the
relevant reflog, since if we've got a D/F conflict with a branch
name we'll probably also have the same with its reflogs (but not
necessarily, we might have reflogs, but it might not).
As some #leftoverbits that code seems buggy to me, i.e. the reflog
"protocol" should be to get a lock on the main ref, and then perform
ref and/or reflog operations. That code dates back to
c976d415e53 (git-branch: add options and tests for branch renaming,
2006-11-28) and probably pre-dated the solidifying of that
convention. But in any case, that edge case is not our bug or
problem right now.
* "git reflog expire <ref>":
In files_reflog_expire() we'll call this without previous ref
existence checking in files-backend.c, but that code is in turn
called by code that's just finished checking if the refname whose
reflog we're expiring exists.
See ae35e16cd43 (reflog expire: don't lock reflogs using previously
seen OID, 2021-08-23) for the current state of that code, and
5e6f003ca8a (reflog_expire(): ignore --updateref for symbolic
references, 2015-03-03) for the code we'd break if we only did a
"update = !!ref" here, which is covered by the aforementioned
regression test in "t1417-reflog-updateref.sh".
Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2021-10-16 17:39:13 +08:00
|
|
|
lock = lock_ref_oid_basic(refs, newrefname, &err);
|
2015-11-09 21:34:01 +08:00
|
|
|
if (!lock) {
|
branch: add a --copy (-c) option to go with --move (-m)
Add the ability to --copy a branch and its reflog and configuration,
this uses the same underlying machinery as the --move (-m) option
except the reflog and configuration is copied instead of being moved.
This is useful for e.g. copying a topic branch to a new version,
e.g. work to work-2 after submitting the work topic to the list, while
preserving all the tracking info and other configuration that goes
with the branch, and unlike --move keeping the other already-submitted
branch around for reference.
Like --move, when the source branch is the currently checked out
branch the HEAD is moved to the destination branch. In the case of
--move we don't really have a choice (other than remaining on a
detached HEAD) and in order to keep the functionality consistent, we
are doing it in similar way for --copy too.
The most common usage of this feature is expected to be moving to a
new topic branch which is a copy of the current one, in that case
moving to the target branch is what the user wants, and doesn't
unexpectedly behave differently than --move would.
One outstanding caveat of this implementation is that:
git checkout maint &&
git checkout master &&
git branch -c topic &&
git checkout -
Will check out 'maint' instead of 'master'. This is because the @{-N}
feature (or its -1 shorthand "-") relies on HEAD reflogs created by
the checkout command, so in this case we'll checkout maint instead of
master, as the user might expect. What to do about that is left to a
future change.
Helped-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: Sahil Dua <sahildua2305@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-06-19 05:19:16 +08:00
|
|
|
if (copy)
|
|
|
|
error("unable to copy '%s' to '%s': %s", oldrefname, newrefname, err.buf);
|
|
|
|
else
|
|
|
|
error("unable to rename '%s' to '%s': %s", oldrefname, newrefname, err.buf);
|
2015-11-09 21:34:01 +08:00
|
|
|
strbuf_release(&err);
|
|
|
|
goto rollback;
|
|
|
|
}
|
2017-05-07 06:10:24 +08:00
|
|
|
oidcpy(&lock->old_oid, &orig_oid);
|
2015-11-09 21:34:01 +08:00
|
|
|
|
2021-12-07 21:38:17 +08:00
|
|
|
if (write_ref_to_lockfile(lock, &orig_oid, 0, &err) ||
|
2017-05-07 06:10:24 +08:00
|
|
|
commit_ref_update(refs, lock, &orig_oid, logmsg, &err)) {
|
2015-11-09 21:34:01 +08:00
|
|
|
error("unable to write current sha1 into %s: %s", newrefname, err.buf);
|
|
|
|
strbuf_release(&err);
|
|
|
|
goto rollback;
|
|
|
|
}
|
|
|
|
|
2017-03-26 10:42:19 +08:00
|
|
|
ret = 0;
|
|
|
|
goto out;
|
2015-11-09 21:34:01 +08:00
|
|
|
|
|
|
|
rollback:
|
refs/files: remove "name exist?" check in lock_ref_oid_basic()
In lock_ref_oid_basic() we'll happily lock a reference that doesn't
exist yet. That's normal, and is how references are initially born,
but we don't need to retain checks here in lock_ref_oid_basic() about
the state of the ref, when what we're checking is either checked
already, or something we're about to discover by trying to lock the
ref with raceproof_create_file().
The one exception is the caller in files_reflog_expire(), who passes
us a "type" to find out if the reference is a symref or not. We can
move the that logic over to that caller, which can now defer its
discovery of whether or not the ref is a symref until it's needed. In
the preceding commit an exhaustive regression test was added for that
case in a new test in "t1417-reflog-updateref.sh".
The improved diagnostics here were added in
5b2d8d6f218 (lock_ref_sha1_basic(): improve diagnostics for ref D/F
conflicts, 2015-05-11), and then much of the surrounding code went
away recently in my 245fbba46d6 (refs/files: remove unused "errno ==
EISDIR" code, 2021-08-23).
The refs_resolve_ref_unsafe() code being removed here looks like it
should be tasked with doing that, but it's actually redundant to other
code.
The reason for that is as noted in 245fbba46d6 this once widely used
function now only has a handful of callers left, which all handle this
case themselves.
To the extent that we're racy between their check and ours removing
this check actually improves the situation, as we'll be doing fewer
things between the not-under-lock initial check and acquiring the
lock.
Why this is OK for all the remaining callers of lock_ref_oid_basic()
is noted below. There are only two of those callers:
* "git branch -[cm] <oldbranch> <newbranch>":
In files_copy_or_rename_ref() we'll call this when we copy or rename
refs via rename_ref() and copy_ref(). but only after we've checked
if the refname exists already via its own call to
refs_resolve_ref_unsafe() and refs_rename_ref_available().
As the updated comment to the latter here notes neither of those are
actually needed. If we delete not only this code but also
refs_rename_ref_available() we'll do just fine, we'll just emit a
less friendly error message if e.g. "git branch -m A B/C" would have
a D/F conflict with a "B" file.
Actually we'd probably die before that in case reflogs for the
branch existed, i.e. when the try to rename() or copy_file() the
relevant reflog, since if we've got a D/F conflict with a branch
name we'll probably also have the same with its reflogs (but not
necessarily, we might have reflogs, but it might not).
As some #leftoverbits that code seems buggy to me, i.e. the reflog
"protocol" should be to get a lock on the main ref, and then perform
ref and/or reflog operations. That code dates back to
c976d415e53 (git-branch: add options and tests for branch renaming,
2006-11-28) and probably pre-dated the solidifying of that
convention. But in any case, that edge case is not our bug or
problem right now.
* "git reflog expire <ref>":
In files_reflog_expire() we'll call this without previous ref
existence checking in files-backend.c, but that code is in turn
called by code that's just finished checking if the refname whose
reflog we're expiring exists.
See ae35e16cd43 (reflog expire: don't lock reflogs using previously
seen OID, 2021-08-23) for the current state of that code, and
5e6f003ca8a (reflog_expire(): ignore --updateref for symbolic
references, 2015-03-03) for the code we'd break if we only did a
"update = !!ref" here, which is covered by the aforementioned
regression test in "t1417-reflog-updateref.sh".
Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2021-10-16 17:39:13 +08:00
|
|
|
lock = lock_ref_oid_basic(refs, oldrefname, &err);
|
2015-11-09 21:34:01 +08:00
|
|
|
if (!lock) {
|
|
|
|
error("unable to lock %s for rollback: %s", oldrefname, err.buf);
|
|
|
|
strbuf_release(&err);
|
|
|
|
goto rollbacklog;
|
|
|
|
}
|
|
|
|
|
|
|
|
flag = log_all_ref_updates;
|
2017-01-27 18:09:47 +08:00
|
|
|
log_all_ref_updates = LOG_REFS_NONE;
|
2021-12-07 21:38:17 +08:00
|
|
|
if (write_ref_to_lockfile(lock, &orig_oid, 0, &err) ||
|
2017-05-07 06:10:24 +08:00
|
|
|
commit_ref_update(refs, lock, &orig_oid, NULL, &err)) {
|
2015-11-09 21:34:01 +08:00
|
|
|
error("unable to write current sha1 into %s: %s", oldrefname, err.buf);
|
|
|
|
strbuf_release(&err);
|
|
|
|
}
|
|
|
|
log_all_ref_updates = flag;
|
|
|
|
|
|
|
|
rollbacklog:
|
2017-03-26 10:42:20 +08:00
|
|
|
if (logmoved && rename(sb_newref.buf, sb_oldref.buf))
|
2015-11-09 21:34:01 +08:00
|
|
|
error("unable to restore logfile %s from %s: %s",
|
|
|
|
oldrefname, newrefname, strerror(errno));
|
|
|
|
if (!logmoved && log &&
|
2017-03-26 10:42:20 +08:00
|
|
|
rename(tmp_renamed_log.buf, sb_oldref.buf))
|
2017-03-26 10:42:21 +08:00
|
|
|
error("unable to restore logfile %s from logs/"TMP_RENAMED_LOG": %s",
|
2015-11-09 21:34:01 +08:00
|
|
|
oldrefname, strerror(errno));
|
2017-03-26 10:42:19 +08:00
|
|
|
ret = 1;
|
|
|
|
out:
|
2017-03-26 10:42:20 +08:00
|
|
|
strbuf_release(&sb_newref);
|
|
|
|
strbuf_release(&sb_oldref);
|
|
|
|
strbuf_release(&tmp_renamed_log);
|
|
|
|
|
2017-03-26 10:42:19 +08:00
|
|
|
return ret;
|
2015-11-09 21:34:01 +08:00
|
|
|
}
|
|
|
|
|
branch: add a --copy (-c) option to go with --move (-m)
Add the ability to --copy a branch and its reflog and configuration,
this uses the same underlying machinery as the --move (-m) option
except the reflog and configuration is copied instead of being moved.
This is useful for e.g. copying a topic branch to a new version,
e.g. work to work-2 after submitting the work topic to the list, while
preserving all the tracking info and other configuration that goes
with the branch, and unlike --move keeping the other already-submitted
branch around for reference.
Like --move, when the source branch is the currently checked out
branch the HEAD is moved to the destination branch. In the case of
--move we don't really have a choice (other than remaining on a
detached HEAD) and in order to keep the functionality consistent, we
are doing it in similar way for --copy too.
The most common usage of this feature is expected to be moving to a
new topic branch which is a copy of the current one, in that case
moving to the target branch is what the user wants, and doesn't
unexpectedly behave differently than --move would.
One outstanding caveat of this implementation is that:
git checkout maint &&
git checkout master &&
git branch -c topic &&
git checkout -
Will check out 'maint' instead of 'master'. This is because the @{-N}
feature (or its -1 shorthand "-") relies on HEAD reflogs created by
the checkout command, so in this case we'll checkout maint instead of
master, as the user might expect. What to do about that is left to a
future change.
Helped-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: Sahil Dua <sahildua2305@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-06-19 05:19:16 +08:00
|
|
|
static int files_rename_ref(struct ref_store *ref_store,
|
|
|
|
const char *oldrefname, const char *newrefname,
|
|
|
|
const char *logmsg)
|
|
|
|
{
|
|
|
|
return files_copy_or_rename_ref(ref_store, oldrefname,
|
|
|
|
newrefname, logmsg, 0);
|
|
|
|
}
|
|
|
|
|
|
|
|
static int files_copy_ref(struct ref_store *ref_store,
|
|
|
|
const char *oldrefname, const char *newrefname,
|
|
|
|
const char *logmsg)
|
|
|
|
{
|
|
|
|
return files_copy_or_rename_ref(ref_store, oldrefname,
|
|
|
|
newrefname, logmsg, 1);
|
|
|
|
}
|
|
|
|
|
2017-09-05 20:14:33 +08:00
|
|
|
static int close_ref_gently(struct ref_lock *lock)
|
2015-11-09 21:34:01 +08:00
|
|
|
{
|
2017-09-05 20:15:15 +08:00
|
|
|
if (close_lock_file_gently(&lock->lk))
|
2015-11-09 21:34:01 +08:00
|
|
|
return -1;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int commit_ref(struct ref_lock *lock)
|
|
|
|
{
|
2017-09-05 20:15:15 +08:00
|
|
|
char *path = get_locked_file_path(&lock->lk);
|
commit_ref(): if there is an empty dir in the way, delete it
Part of the bug revealed in the last commit is that resolve_ref_unsafe()
incorrectly returns EISDIR if it finds a directory in the place where it
is looking for a loose reference, even if the corresponding packed
reference exists. lock_ref_sha1_basic() notices the bogus EISDIR, and
use it as an indication that it should call remove_empty_directories()
and call resolve_ref_unsafe() again.
But resolve_ref_unsafe() shouldn't report EISDIR in this case. If we
would simply make that change, then remove_empty_directories() wouldn't
get called anymore, and the empty directory would get in the way when
commit_ref() calls commit_lock_file() to rename the lockfile into place.
So instead of relying on lock_ref_sha1_basic() to delete empty
directories, teach commit_ref(), just before calling commit_lock_file(),
to check whether a directory is in the way, and if so, try to delete it.
Signed-off-by: Michael Haggerty <mhagger@alum.mit.edu>
2016-05-05 21:33:03 +08:00
|
|
|
struct stat st;
|
|
|
|
|
|
|
|
if (!lstat(path, &st) && S_ISDIR(st.st_mode)) {
|
|
|
|
/*
|
|
|
|
* There is a directory at the path we want to rename
|
|
|
|
* the lockfile to. Hopefully it is empty; try to
|
|
|
|
* delete it.
|
|
|
|
*/
|
|
|
|
size_t len = strlen(path);
|
|
|
|
struct strbuf sb_path = STRBUF_INIT;
|
|
|
|
|
|
|
|
strbuf_attach(&sb_path, path, len, len);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* If this fails, commit_lock_file() will also fail
|
|
|
|
* and will report the problem.
|
|
|
|
*/
|
|
|
|
remove_empty_directories(&sb_path);
|
|
|
|
strbuf_release(&sb_path);
|
|
|
|
} else {
|
|
|
|
free(path);
|
|
|
|
}
|
|
|
|
|
2017-09-05 20:15:15 +08:00
|
|
|
if (commit_lock_file(&lock->lk))
|
2015-11-09 21:34:01 +08:00
|
|
|
return -1;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2017-01-07 00:22:33 +08:00
|
|
|
static int open_or_create_logfile(const char *path, void *cb)
|
|
|
|
{
|
|
|
|
int *fd = cb;
|
|
|
|
|
|
|
|
*fd = open(path, O_APPEND | O_WRONLY | O_CREAT, 0666);
|
|
|
|
return (*fd < 0) ? -1 : 0;
|
|
|
|
}
|
|
|
|
|
2015-11-09 21:34:01 +08:00
|
|
|
/*
|
2017-01-07 00:22:36 +08:00
|
|
|
* Create a reflog for a ref. If force_create = 0, only create the
|
|
|
|
* reflog for certain refs (those for which should_autocreate_reflog
|
|
|
|
* returns non-zero). Otherwise, create it regardless of the reference
|
|
|
|
* name. If the logfile already existed or was created, return 0 and
|
|
|
|
* set *logfd to the file descriptor opened for appending to the file.
|
|
|
|
* If no logfile exists and we decided not to create one, return 0 and
|
|
|
|
* set *logfd to -1. On failure, fill in *err, set *logfd to -1, and
|
|
|
|
* return -1.
|
2015-11-09 21:34:01 +08:00
|
|
|
*/
|
2017-03-26 10:42:22 +08:00
|
|
|
static int log_ref_setup(struct files_ref_store *refs,
|
|
|
|
const char *refname, int force_create,
|
2017-01-07 00:22:36 +08:00
|
|
|
int *logfd, struct strbuf *err)
|
2015-11-09 21:34:01 +08:00
|
|
|
{
|
2017-03-26 10:42:22 +08:00
|
|
|
struct strbuf logfile_sb = STRBUF_INIT;
|
|
|
|
char *logfile;
|
|
|
|
|
|
|
|
files_reflog_path(refs, &logfile_sb, refname);
|
|
|
|
logfile = strbuf_detach(&logfile_sb, NULL);
|
2015-11-09 21:34:01 +08:00
|
|
|
|
|
|
|
if (force_create || should_autocreate_reflog(refname)) {
|
2017-01-07 00:22:36 +08:00
|
|
|
if (raceproof_create_file(logfile, open_or_create_logfile, logfd)) {
|
2017-01-07 00:22:33 +08:00
|
|
|
if (errno == ENOENT)
|
|
|
|
strbuf_addf(err, "unable to create directory for '%s': "
|
2017-01-07 00:22:36 +08:00
|
|
|
"%s", logfile, strerror(errno));
|
2017-01-07 00:22:33 +08:00
|
|
|
else if (errno == EISDIR)
|
|
|
|
strbuf_addf(err, "there are still logs under '%s'",
|
2017-01-07 00:22:36 +08:00
|
|
|
logfile);
|
2017-01-07 00:22:33 +08:00
|
|
|
else
|
2017-01-07 00:22:32 +08:00
|
|
|
strbuf_addf(err, "unable to append to '%s': %s",
|
2017-01-07 00:22:36 +08:00
|
|
|
logfile, strerror(errno));
|
2015-11-09 21:34:01 +08:00
|
|
|
|
2017-01-07 00:22:36 +08:00
|
|
|
goto error;
|
2015-11-09 21:34:01 +08:00
|
|
|
}
|
2017-01-07 00:22:32 +08:00
|
|
|
} else {
|
2021-09-10 05:45:51 +08:00
|
|
|
*logfd = open(logfile, O_APPEND | O_WRONLY);
|
2017-01-07 00:22:34 +08:00
|
|
|
if (*logfd < 0) {
|
2017-01-07 00:22:32 +08:00
|
|
|
if (errno == ENOENT || errno == EISDIR) {
|
|
|
|
/*
|
|
|
|
* The logfile doesn't already exist,
|
|
|
|
* but that is not an error; it only
|
|
|
|
* means that we won't write log
|
|
|
|
* entries to it.
|
|
|
|
*/
|
|
|
|
;
|
|
|
|
} else {
|
|
|
|
strbuf_addf(err, "unable to append to '%s': %s",
|
2017-01-07 00:22:36 +08:00
|
|
|
logfile, strerror(errno));
|
|
|
|
goto error;
|
2017-01-07 00:22:32 +08:00
|
|
|
}
|
2015-11-09 21:34:01 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-01-07 00:22:34 +08:00
|
|
|
if (*logfd >= 0)
|
2017-01-07 00:22:36 +08:00
|
|
|
adjust_shared_perm(logfile);
|
2017-01-07 00:22:32 +08:00
|
|
|
|
2017-01-07 00:22:36 +08:00
|
|
|
free(logfile);
|
2015-11-09 21:34:01 +08:00
|
|
|
return 0;
|
|
|
|
|
2017-01-07 00:22:36 +08:00
|
|
|
error:
|
|
|
|
free(logfile);
|
|
|
|
return -1;
|
2015-11-09 21:34:01 +08:00
|
|
|
}
|
|
|
|
|
2021-11-22 22:19:08 +08:00
|
|
|
static int files_create_reflog(struct ref_store *ref_store, const char *refname,
|
2016-09-05 00:08:38 +08:00
|
|
|
struct strbuf *err)
|
2015-11-09 21:34:01 +08:00
|
|
|
{
|
2017-03-26 10:42:22 +08:00
|
|
|
struct files_ref_store *refs =
|
2017-03-26 10:42:32 +08:00
|
|
|
files_downcast(ref_store, REF_STORE_WRITE, "create_reflog");
|
2017-01-07 00:22:34 +08:00
|
|
|
int fd;
|
2015-11-09 21:34:01 +08:00
|
|
|
|
2021-11-22 22:19:08 +08:00
|
|
|
if (log_ref_setup(refs, refname, 1, &fd, err))
|
2017-01-07 00:22:36 +08:00
|
|
|
return -1;
|
|
|
|
|
2017-01-07 00:22:34 +08:00
|
|
|
if (fd >= 0)
|
|
|
|
close(fd);
|
2017-01-07 00:22:36 +08:00
|
|
|
|
|
|
|
return 0;
|
2015-11-09 21:34:01 +08:00
|
|
|
}
|
|
|
|
|
2017-05-07 06:10:24 +08:00
|
|
|
static int log_ref_write_fd(int fd, const struct object_id *old_oid,
|
|
|
|
const struct object_id *new_oid,
|
2015-11-09 21:34:01 +08:00
|
|
|
const char *committer, const char *msg)
|
|
|
|
{
|
2018-07-11 05:08:22 +08:00
|
|
|
struct strbuf sb = STRBUF_INIT;
|
|
|
|
int ret = 0;
|
|
|
|
|
|
|
|
strbuf_addf(&sb, "%s %s %s", oid_to_hex(old_oid), oid_to_hex(new_oid), committer);
|
2020-07-31 19:36:10 +08:00
|
|
|
if (msg && *msg) {
|
|
|
|
strbuf_addch(&sb, '\t');
|
reflog: cleanse messages in the refs.c layer
Regarding reflog messages:
- We expect that a reflog message consists of a single line. The
file format used by the files backend may add a LF after the
message as a delimiter, and output by commands like "git log -g"
may complete such an incomplete line by adding a LF at the end,
but philosophically, the terminating LF is not a part of the
message.
- We however allow callers of refs API to supply a random sequence
of NUL terminated bytes. We cleanse caller-supplied message by
squashing a run of whitespaces into a SP, and by trimming trailing
whitespace, before storing the message. This is how we tolerate,
instead of erring out, a message with LF in it (be it at the end,
in the middle, or both).
Currently, the cleansing of the reflog message is done by the files
backend, before the log is written out. This is sufficient with the
current code, as that is the only backend that writes reflogs. But
new backends can be added that write reflogs, and we'd want the
resulting log message we would read out of "log -g" the same no
matter what backend is used, and moving the code to do so to the
generic layer is a way to do so.
An added benefit is that the "cleansing" function could be updated
later, independent from individual backends, to e.g. allow
multi-line log messages if we wanted to, and when that happens, it
would help a lot to ensure we covered all bases if the cleansing
function (which would be updated) is called from the generic layer.
Side note: I am not interested in supporting multi-line reflog
messages right at the moment (nobody is asking for it), but I
envision that instead of the "squash a run of whitespaces into a SP
and rtrim" cleansing, we can %urlencode problematic bytes in the
message *AND* append a SP at the end, when a new version of Git that
supports multi-line and/or verbatim reflog messages writes a reflog
record. The reading side can detect the presense of SP at the end
(which should have been rtrimmed out if it were written by existing
versions of Git) as a signal that decoding %urlencode recovers the
original reflog message.
Signed-off-by: Han-Wen Nienhuys <hanwen@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2020-07-11 01:19:53 +08:00
|
|
|
strbuf_addstr(&sb, msg);
|
2020-07-31 19:36:10 +08:00
|
|
|
}
|
2018-07-11 05:08:22 +08:00
|
|
|
strbuf_addch(&sb, '\n');
|
|
|
|
if (write_in_full(fd, sb.buf, sb.len) < 0)
|
|
|
|
ret = -1;
|
|
|
|
strbuf_release(&sb);
|
|
|
|
return ret;
|
2015-11-09 21:34:01 +08:00
|
|
|
}
|
|
|
|
|
2017-03-26 10:42:22 +08:00
|
|
|
static int files_log_ref_write(struct files_ref_store *refs,
|
2017-05-07 06:10:24 +08:00
|
|
|
const char *refname, const struct object_id *old_oid,
|
|
|
|
const struct object_id *new_oid, const char *msg,
|
2017-03-26 10:42:15 +08:00
|
|
|
int flags, struct strbuf *err)
|
2015-11-09 21:34:01 +08:00
|
|
|
{
|
2017-01-07 00:22:34 +08:00
|
|
|
int logfd, result;
|
2015-11-09 21:34:01 +08:00
|
|
|
|
2017-01-27 18:09:47 +08:00
|
|
|
if (log_all_ref_updates == LOG_REFS_UNSET)
|
|
|
|
log_all_ref_updates = is_bare_repository() ? LOG_REFS_NONE : LOG_REFS_NORMAL;
|
2015-11-09 21:34:01 +08:00
|
|
|
|
2017-03-26 10:42:22 +08:00
|
|
|
result = log_ref_setup(refs, refname,
|
|
|
|
flags & REF_FORCE_CREATE_REFLOG,
|
2017-01-07 00:22:36 +08:00
|
|
|
&logfd, err);
|
2015-11-09 21:34:01 +08:00
|
|
|
|
|
|
|
if (result)
|
|
|
|
return result;
|
|
|
|
|
|
|
|
if (logfd < 0)
|
|
|
|
return 0;
|
2017-05-07 06:10:24 +08:00
|
|
|
result = log_ref_write_fd(logfd, old_oid, new_oid,
|
2015-11-09 21:34:01 +08:00
|
|
|
git_committer_info(0), msg);
|
|
|
|
if (result) {
|
2017-03-26 10:42:20 +08:00
|
|
|
struct strbuf sb = STRBUF_INIT;
|
2017-01-07 00:22:35 +08:00
|
|
|
int save_errno = errno;
|
|
|
|
|
2017-03-26 10:42:22 +08:00
|
|
|
files_reflog_path(refs, &sb, refname);
|
2017-01-07 00:22:35 +08:00
|
|
|
strbuf_addf(err, "unable to append to '%s': %s",
|
2017-03-26 10:42:20 +08:00
|
|
|
sb.buf, strerror(save_errno));
|
|
|
|
strbuf_release(&sb);
|
2015-11-09 21:34:01 +08:00
|
|
|
close(logfd);
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
if (close(logfd)) {
|
2017-03-26 10:42:20 +08:00
|
|
|
struct strbuf sb = STRBUF_INIT;
|
2017-01-07 00:22:35 +08:00
|
|
|
int save_errno = errno;
|
|
|
|
|
2017-03-26 10:42:22 +08:00
|
|
|
files_reflog_path(refs, &sb, refname);
|
2017-01-07 00:22:35 +08:00
|
|
|
strbuf_addf(err, "unable to append to '%s': %s",
|
2017-03-26 10:42:20 +08:00
|
|
|
sb.buf, strerror(save_errno));
|
|
|
|
strbuf_release(&sb);
|
2015-11-09 21:34:01 +08:00
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
2017-11-05 16:42:09 +08:00
|
|
|
* Write oid into the open lockfile, then close the lockfile. On
|
|
|
|
* errors, rollback the lockfile, fill in *err and return -1.
|
2015-11-09 21:34:01 +08:00
|
|
|
*/
|
|
|
|
static int write_ref_to_lockfile(struct ref_lock *lock,
|
2021-12-07 21:38:17 +08:00
|
|
|
const struct object_id *oid,
|
|
|
|
int skip_oid_verification, struct strbuf *err)
|
2015-11-09 21:34:01 +08:00
|
|
|
{
|
|
|
|
static char term = '\n';
|
|
|
|
struct object *o;
|
|
|
|
int fd;
|
|
|
|
|
2021-12-07 21:38:17 +08:00
|
|
|
if (!skip_oid_verification) {
|
|
|
|
o = parse_object(the_repository, oid);
|
|
|
|
if (!o) {
|
|
|
|
strbuf_addf(
|
|
|
|
err,
|
|
|
|
"trying to write ref '%s' with nonexistent object %s",
|
|
|
|
lock->ref_name, oid_to_hex(oid));
|
|
|
|
unlock_ref(lock);
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
if (o->type != OBJ_COMMIT && is_branch(lock->ref_name)) {
|
|
|
|
strbuf_addf(
|
|
|
|
err,
|
|
|
|
"trying to write non-commit object %s to branch '%s'",
|
|
|
|
oid_to_hex(oid), lock->ref_name);
|
|
|
|
unlock_ref(lock);
|
|
|
|
return -1;
|
|
|
|
}
|
2015-11-09 21:34:01 +08:00
|
|
|
}
|
2017-09-05 20:15:15 +08:00
|
|
|
fd = get_lock_file_fd(&lock->lk);
|
2018-07-16 09:27:59 +08:00
|
|
|
if (write_in_full(fd, oid_to_hex(oid), the_hash_algo->hexsz) < 0 ||
|
avoid "write_in_full(fd, buf, len) != len" pattern
The return value of write_in_full() is either "-1", or the
requested number of bytes[1]. If we make a partial write
before seeing an error, we still return -1, not a partial
value. This goes back to f6aa66cb95 (write_in_full: really
write in full or return error on disk full., 2007-01-11).
So checking anything except "was the return value negative"
is pointless. And there are a couple of reasons not to do
so:
1. It can do a funny signed/unsigned comparison. If your
"len" is signed (e.g., a size_t) then the compiler will
promote the "-1" to its unsigned variant.
This works out for "!= len" (unless you really were
trying to write the maximum size_t bytes), but is a
bug if you check "< len" (an example of which was fixed
recently in config.c).
We should avoid promoting the mental model that you
need to check the length at all, so that new sites are
not tempted to copy us.
2. Checking for a negative value is shorter to type,
especially when the length is an expression.
3. Linus says so. In d34cf19b89 (Clean up write_in_full()
users, 2007-01-11), right after the write_in_full()
semantics were changed, he wrote:
I really wish every "write_in_full()" user would just
check against "<0" now, but this fixes the nasty and
stupid ones.
Appeals to authority aside, this makes it clear that
writing it this way does not have an intentional
benefit. It's a historical curiosity that we never
bothered to clean up (and which was undoubtedly
cargo-culted into new sites).
So let's convert these obviously-correct cases (this
includes write_str_in_full(), which is just a wrapper for
write_in_full()).
[1] A careful reader may notice there is one way that
write_in_full() can return a different value. If we ask
write() to write N bytes and get a return value that is
_larger_ than N, we could return a larger total. But
besides the fact that this would imply a totally broken
version of write(), it would already invoke undefined
behavior. Our internal remaining counter is an unsigned
size_t, which means that subtracting too many byte will
wrap it around to a very large number. So we'll instantly
begin reading off the end of the buffer, trying to write
gigabytes (or petabytes) of data.
Signed-off-by: Jeff King <peff@peff.net>
Reviewed-by: Jonathan Nieder <jrnieder@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-09-14 01:16:03 +08:00
|
|
|
write_in_full(fd, &term, 1) < 0 ||
|
2022-03-11 17:58:59 +08:00
|
|
|
fsync_component(FSYNC_COMPONENT_REFERENCE, get_lock_file_fd(&lock->lk)) < 0 ||
|
2017-09-05 20:14:33 +08:00
|
|
|
close_ref_gently(lock) < 0) {
|
2015-11-09 21:34:01 +08:00
|
|
|
strbuf_addf(err,
|
2017-09-05 20:15:15 +08:00
|
|
|
"couldn't write '%s'", get_lock_file_path(&lock->lk));
|
2015-11-09 21:34:01 +08:00
|
|
|
unlock_ref(lock);
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Commit a change to a loose reference that has already been written
|
|
|
|
* to the loose reference lockfile. Also update the reflogs if
|
|
|
|
* necessary, using the specified lockmsg (which can be NULL).
|
|
|
|
*/
|
2016-09-05 00:08:32 +08:00
|
|
|
static int commit_ref_update(struct files_ref_store *refs,
|
|
|
|
struct ref_lock *lock,
|
2017-05-07 06:10:24 +08:00
|
|
|
const struct object_id *oid, const char *logmsg,
|
2016-04-22 20:38:56 +08:00
|
|
|
struct strbuf *err)
|
2015-11-09 21:34:01 +08:00
|
|
|
{
|
2017-02-10 19:16:16 +08:00
|
|
|
files_assert_main_repository(refs, "commit_ref_update");
|
2016-09-05 00:08:11 +08:00
|
|
|
|
|
|
|
clear_loose_ref_cache(refs);
|
2017-03-26 10:42:22 +08:00
|
|
|
if (files_log_ref_write(refs, lock->ref_name,
|
2017-05-07 06:10:24 +08:00
|
|
|
&lock->old_oid, oid,
|
2017-01-07 00:22:31 +08:00
|
|
|
logmsg, 0, err)) {
|
2015-11-09 21:34:01 +08:00
|
|
|
char *old_msg = strbuf_detach(err, NULL);
|
2016-04-27 21:21:36 +08:00
|
|
|
strbuf_addf(err, "cannot update the ref '%s': %s",
|
2015-11-09 21:34:01 +08:00
|
|
|
lock->ref_name, old_msg);
|
|
|
|
free(old_msg);
|
|
|
|
unlock_ref(lock);
|
|
|
|
return -1;
|
|
|
|
}
|
2016-04-22 21:25:25 +08:00
|
|
|
|
|
|
|
if (strcmp(lock->ref_name, "HEAD") != 0) {
|
2015-11-09 21:34:01 +08:00
|
|
|
/*
|
|
|
|
* Special hack: If a branch is updated directly and HEAD
|
|
|
|
* points to it (may happen on the remote side of a push
|
|
|
|
* for example) then logically the HEAD reflog should be
|
|
|
|
* updated too.
|
|
|
|
* A generic solution implies reverse symref information,
|
|
|
|
* but finding all symrefs pointing to the given branch
|
|
|
|
* would be rather costly for this rare event (the direct
|
|
|
|
* update of a branch) to be worth it. So let's cheat and
|
|
|
|
* check with HEAD only which should cover 99% of all usage
|
|
|
|
* scenarios (even 100% of the default ones).
|
|
|
|
*/
|
|
|
|
int head_flag;
|
|
|
|
const char *head_ref;
|
2016-04-22 21:25:25 +08:00
|
|
|
|
2017-03-26 10:42:36 +08:00
|
|
|
head_ref = refs_resolve_ref_unsafe(&refs->base, "HEAD",
|
|
|
|
RESOLVE_REF_READING,
|
2022-01-26 22:37:01 +08:00
|
|
|
NULL, &head_flag);
|
2015-11-09 21:34:01 +08:00
|
|
|
if (head_ref && (head_flag & REF_ISSYMREF) &&
|
|
|
|
!strcmp(head_ref, lock->ref_name)) {
|
|
|
|
struct strbuf log_err = STRBUF_INIT;
|
2017-03-26 10:42:22 +08:00
|
|
|
if (files_log_ref_write(refs, "HEAD",
|
2017-05-07 06:10:24 +08:00
|
|
|
&lock->old_oid, oid,
|
2017-03-26 10:42:22 +08:00
|
|
|
logmsg, 0, &log_err)) {
|
2015-11-09 21:34:01 +08:00
|
|
|
error("%s", log_err.buf);
|
|
|
|
strbuf_release(&log_err);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2016-04-22 21:25:25 +08:00
|
|
|
|
2015-11-09 21:34:01 +08:00
|
|
|
if (commit_ref(lock)) {
|
2016-04-27 21:21:36 +08:00
|
|
|
strbuf_addf(err, "couldn't set '%s'", lock->ref_name);
|
2015-11-09 21:34:01 +08:00
|
|
|
unlock_ref(lock);
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
unlock_ref(lock);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
create_symref: use existing ref-lock code
The create_symref() function predates the existence of
"struct lock_file", let alone the more recent "struct
ref_lock". Instead, it just does its own manual dot-locking.
Besides being more code, this has a few downsides:
- if git is interrupted while holding the lock, we don't
clean up the lockfile
- we don't do the usual directory/filename conflict check.
So you can sometimes create a symref "refs/heads/foo/bar",
even if "refs/heads/foo" exists (namely, if the refs are
packed and we do not hit the d/f conflict in the
filesystem).
This patch refactors create_symref() to use the "struct
ref_lock" interface, which handles both of these things.
There are a few bonus cleanups that come along with it:
- we leaked ref_path in some error cases
- the symref contents were stored in a fixed-size buffer,
putting an artificial (albeit large) limitation on the
length of the refname. We now write through fprintf, and
handle refnames of any size.
- we called adjust_shared_perm only after the file was
renamed into place, creating a potential race with
readers in a shared repository. The lockfile code now
handles this when creating the lockfile, making it
atomic.
- the legacy prefer_symlink_refs path did not do any
locking at all. Admittedly, it is not atomic from a
reader's perspective (as it unlinks and re-creates the
symlink to overwrite), but at least it cannot conflict
with other writers now.
- the result of this patch is hopefully more readable. It
eliminates three goto labels. Two were for error checking
that is now simplified, and the third was to reach shared
code that has been pulled into its own function.
Signed-off-by: Jeff King <peff@peff.net>
Reviewed-by: Michael Haggerty <mhagger@alum.mit.edu>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2015-12-29 13:57:01 +08:00
|
|
|
static int create_ref_symlink(struct ref_lock *lock, const char *target)
|
2015-11-09 21:34:01 +08:00
|
|
|
{
|
create_symref: use existing ref-lock code
The create_symref() function predates the existence of
"struct lock_file", let alone the more recent "struct
ref_lock". Instead, it just does its own manual dot-locking.
Besides being more code, this has a few downsides:
- if git is interrupted while holding the lock, we don't
clean up the lockfile
- we don't do the usual directory/filename conflict check.
So you can sometimes create a symref "refs/heads/foo/bar",
even if "refs/heads/foo" exists (namely, if the refs are
packed and we do not hit the d/f conflict in the
filesystem).
This patch refactors create_symref() to use the "struct
ref_lock" interface, which handles both of these things.
There are a few bonus cleanups that come along with it:
- we leaked ref_path in some error cases
- the symref contents were stored in a fixed-size buffer,
putting an artificial (albeit large) limitation on the
length of the refname. We now write through fprintf, and
handle refnames of any size.
- we called adjust_shared_perm only after the file was
renamed into place, creating a potential race with
readers in a shared repository. The lockfile code now
handles this when creating the lockfile, making it
atomic.
- the legacy prefer_symlink_refs path did not do any
locking at all. Admittedly, it is not atomic from a
reader's perspective (as it unlinks and re-creates the
symlink to overwrite), but at least it cannot conflict
with other writers now.
- the result of this patch is hopefully more readable. It
eliminates three goto labels. Two were for error checking
that is now simplified, and the third was to reach shared
code that has been pulled into its own function.
Signed-off-by: Jeff King <peff@peff.net>
Reviewed-by: Michael Haggerty <mhagger@alum.mit.edu>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2015-12-29 13:57:01 +08:00
|
|
|
int ret = -1;
|
2015-11-09 21:34:01 +08:00
|
|
|
#ifndef NO_SYMLINK_HEAD
|
2017-09-05 20:15:15 +08:00
|
|
|
char *ref_path = get_locked_file_path(&lock->lk);
|
create_symref: use existing ref-lock code
The create_symref() function predates the existence of
"struct lock_file", let alone the more recent "struct
ref_lock". Instead, it just does its own manual dot-locking.
Besides being more code, this has a few downsides:
- if git is interrupted while holding the lock, we don't
clean up the lockfile
- we don't do the usual directory/filename conflict check.
So you can sometimes create a symref "refs/heads/foo/bar",
even if "refs/heads/foo" exists (namely, if the refs are
packed and we do not hit the d/f conflict in the
filesystem).
This patch refactors create_symref() to use the "struct
ref_lock" interface, which handles both of these things.
There are a few bonus cleanups that come along with it:
- we leaked ref_path in some error cases
- the symref contents were stored in a fixed-size buffer,
putting an artificial (albeit large) limitation on the
length of the refname. We now write through fprintf, and
handle refnames of any size.
- we called adjust_shared_perm only after the file was
renamed into place, creating a potential race with
readers in a shared repository. The lockfile code now
handles this when creating the lockfile, making it
atomic.
- the legacy prefer_symlink_refs path did not do any
locking at all. Admittedly, it is not atomic from a
reader's perspective (as it unlinks and re-creates the
symlink to overwrite), but at least it cannot conflict
with other writers now.
- the result of this patch is hopefully more readable. It
eliminates three goto labels. Two were for error checking
that is now simplified, and the third was to reach shared
code that has been pulled into its own function.
Signed-off-by: Jeff King <peff@peff.net>
Reviewed-by: Michael Haggerty <mhagger@alum.mit.edu>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2015-12-29 13:57:01 +08:00
|
|
|
unlink(ref_path);
|
|
|
|
ret = symlink(target, ref_path);
|
|
|
|
free(ref_path);
|
|
|
|
|
|
|
|
if (ret)
|
2015-11-09 21:34:01 +08:00
|
|
|
fprintf(stderr, "no symlink - falling back to symbolic ref\n");
|
|
|
|
#endif
|
create_symref: use existing ref-lock code
The create_symref() function predates the existence of
"struct lock_file", let alone the more recent "struct
ref_lock". Instead, it just does its own manual dot-locking.
Besides being more code, this has a few downsides:
- if git is interrupted while holding the lock, we don't
clean up the lockfile
- we don't do the usual directory/filename conflict check.
So you can sometimes create a symref "refs/heads/foo/bar",
even if "refs/heads/foo" exists (namely, if the refs are
packed and we do not hit the d/f conflict in the
filesystem).
This patch refactors create_symref() to use the "struct
ref_lock" interface, which handles both of these things.
There are a few bonus cleanups that come along with it:
- we leaked ref_path in some error cases
- the symref contents were stored in a fixed-size buffer,
putting an artificial (albeit large) limitation on the
length of the refname. We now write through fprintf, and
handle refnames of any size.
- we called adjust_shared_perm only after the file was
renamed into place, creating a potential race with
readers in a shared repository. The lockfile code now
handles this when creating the lockfile, making it
atomic.
- the legacy prefer_symlink_refs path did not do any
locking at all. Admittedly, it is not atomic from a
reader's perspective (as it unlinks and re-creates the
symlink to overwrite), but at least it cannot conflict
with other writers now.
- the result of this patch is hopefully more readable. It
eliminates three goto labels. Two were for error checking
that is now simplified, and the third was to reach shared
code that has been pulled into its own function.
Signed-off-by: Jeff King <peff@peff.net>
Reviewed-by: Michael Haggerty <mhagger@alum.mit.edu>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2015-12-29 13:57:01 +08:00
|
|
|
return ret;
|
|
|
|
}
|
2015-11-09 21:34:01 +08:00
|
|
|
|
2017-03-26 10:42:22 +08:00
|
|
|
static void update_symref_reflog(struct files_ref_store *refs,
|
|
|
|
struct ref_lock *lock, const char *refname,
|
create_symref: use existing ref-lock code
The create_symref() function predates the existence of
"struct lock_file", let alone the more recent "struct
ref_lock". Instead, it just does its own manual dot-locking.
Besides being more code, this has a few downsides:
- if git is interrupted while holding the lock, we don't
clean up the lockfile
- we don't do the usual directory/filename conflict check.
So you can sometimes create a symref "refs/heads/foo/bar",
even if "refs/heads/foo" exists (namely, if the refs are
packed and we do not hit the d/f conflict in the
filesystem).
This patch refactors create_symref() to use the "struct
ref_lock" interface, which handles both of these things.
There are a few bonus cleanups that come along with it:
- we leaked ref_path in some error cases
- the symref contents were stored in a fixed-size buffer,
putting an artificial (albeit large) limitation on the
length of the refname. We now write through fprintf, and
handle refnames of any size.
- we called adjust_shared_perm only after the file was
renamed into place, creating a potential race with
readers in a shared repository. The lockfile code now
handles this when creating the lockfile, making it
atomic.
- the legacy prefer_symlink_refs path did not do any
locking at all. Admittedly, it is not atomic from a
reader's perspective (as it unlinks and re-creates the
symlink to overwrite), but at least it cannot conflict
with other writers now.
- the result of this patch is hopefully more readable. It
eliminates three goto labels. Two were for error checking
that is now simplified, and the third was to reach shared
code that has been pulled into its own function.
Signed-off-by: Jeff King <peff@peff.net>
Reviewed-by: Michael Haggerty <mhagger@alum.mit.edu>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2015-12-29 13:57:01 +08:00
|
|
|
const char *target, const char *logmsg)
|
|
|
|
{
|
|
|
|
struct strbuf err = STRBUF_INIT;
|
2017-05-07 06:10:24 +08:00
|
|
|
struct object_id new_oid;
|
2021-10-16 17:39:14 +08:00
|
|
|
|
2017-03-26 10:42:36 +08:00
|
|
|
if (logmsg &&
|
2021-10-16 17:39:27 +08:00
|
|
|
refs_resolve_ref_unsafe(&refs->base, target,
|
2022-01-26 22:37:01 +08:00
|
|
|
RESOLVE_REF_READING, &new_oid, NULL) &&
|
2017-05-07 06:10:24 +08:00
|
|
|
files_log_ref_write(refs, refname, &lock->old_oid,
|
|
|
|
&new_oid, logmsg, 0, &err)) {
|
2015-11-09 21:34:01 +08:00
|
|
|
error("%s", err.buf);
|
|
|
|
strbuf_release(&err);
|
|
|
|
}
|
create_symref: use existing ref-lock code
The create_symref() function predates the existence of
"struct lock_file", let alone the more recent "struct
ref_lock". Instead, it just does its own manual dot-locking.
Besides being more code, this has a few downsides:
- if git is interrupted while holding the lock, we don't
clean up the lockfile
- we don't do the usual directory/filename conflict check.
So you can sometimes create a symref "refs/heads/foo/bar",
even if "refs/heads/foo" exists (namely, if the refs are
packed and we do not hit the d/f conflict in the
filesystem).
This patch refactors create_symref() to use the "struct
ref_lock" interface, which handles both of these things.
There are a few bonus cleanups that come along with it:
- we leaked ref_path in some error cases
- the symref contents were stored in a fixed-size buffer,
putting an artificial (albeit large) limitation on the
length of the refname. We now write through fprintf, and
handle refnames of any size.
- we called adjust_shared_perm only after the file was
renamed into place, creating a potential race with
readers in a shared repository. The lockfile code now
handles this when creating the lockfile, making it
atomic.
- the legacy prefer_symlink_refs path did not do any
locking at all. Admittedly, it is not atomic from a
reader's perspective (as it unlinks and re-creates the
symlink to overwrite), but at least it cannot conflict
with other writers now.
- the result of this patch is hopefully more readable. It
eliminates three goto labels. Two were for error checking
that is now simplified, and the third was to reach shared
code that has been pulled into its own function.
Signed-off-by: Jeff King <peff@peff.net>
Reviewed-by: Michael Haggerty <mhagger@alum.mit.edu>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2015-12-29 13:57:01 +08:00
|
|
|
}
|
2015-11-09 21:34:01 +08:00
|
|
|
|
2017-03-26 10:42:22 +08:00
|
|
|
static int create_symref_locked(struct files_ref_store *refs,
|
|
|
|
struct ref_lock *lock, const char *refname,
|
create_symref: use existing ref-lock code
The create_symref() function predates the existence of
"struct lock_file", let alone the more recent "struct
ref_lock". Instead, it just does its own manual dot-locking.
Besides being more code, this has a few downsides:
- if git is interrupted while holding the lock, we don't
clean up the lockfile
- we don't do the usual directory/filename conflict check.
So you can sometimes create a symref "refs/heads/foo/bar",
even if "refs/heads/foo" exists (namely, if the refs are
packed and we do not hit the d/f conflict in the
filesystem).
This patch refactors create_symref() to use the "struct
ref_lock" interface, which handles both of these things.
There are a few bonus cleanups that come along with it:
- we leaked ref_path in some error cases
- the symref contents were stored in a fixed-size buffer,
putting an artificial (albeit large) limitation on the
length of the refname. We now write through fprintf, and
handle refnames of any size.
- we called adjust_shared_perm only after the file was
renamed into place, creating a potential race with
readers in a shared repository. The lockfile code now
handles this when creating the lockfile, making it
atomic.
- the legacy prefer_symlink_refs path did not do any
locking at all. Admittedly, it is not atomic from a
reader's perspective (as it unlinks and re-creates the
symlink to overwrite), but at least it cannot conflict
with other writers now.
- the result of this patch is hopefully more readable. It
eliminates three goto labels. Two were for error checking
that is now simplified, and the third was to reach shared
code that has been pulled into its own function.
Signed-off-by: Jeff King <peff@peff.net>
Reviewed-by: Michael Haggerty <mhagger@alum.mit.edu>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2015-12-29 13:57:01 +08:00
|
|
|
const char *target, const char *logmsg)
|
|
|
|
{
|
|
|
|
if (prefer_symlink_refs && !create_ref_symlink(lock, target)) {
|
2017-03-26 10:42:22 +08:00
|
|
|
update_symref_reflog(refs, lock, refname, target, logmsg);
|
create_symref: use existing ref-lock code
The create_symref() function predates the existence of
"struct lock_file", let alone the more recent "struct
ref_lock". Instead, it just does its own manual dot-locking.
Besides being more code, this has a few downsides:
- if git is interrupted while holding the lock, we don't
clean up the lockfile
- we don't do the usual directory/filename conflict check.
So you can sometimes create a symref "refs/heads/foo/bar",
even if "refs/heads/foo" exists (namely, if the refs are
packed and we do not hit the d/f conflict in the
filesystem).
This patch refactors create_symref() to use the "struct
ref_lock" interface, which handles both of these things.
There are a few bonus cleanups that come along with it:
- we leaked ref_path in some error cases
- the symref contents were stored in a fixed-size buffer,
putting an artificial (albeit large) limitation on the
length of the refname. We now write through fprintf, and
handle refnames of any size.
- we called adjust_shared_perm only after the file was
renamed into place, creating a potential race with
readers in a shared repository. The lockfile code now
handles this when creating the lockfile, making it
atomic.
- the legacy prefer_symlink_refs path did not do any
locking at all. Admittedly, it is not atomic from a
reader's perspective (as it unlinks and re-creates the
symlink to overwrite), but at least it cannot conflict
with other writers now.
- the result of this patch is hopefully more readable. It
eliminates three goto labels. Two were for error checking
that is now simplified, and the third was to reach shared
code that has been pulled into its own function.
Signed-off-by: Jeff King <peff@peff.net>
Reviewed-by: Michael Haggerty <mhagger@alum.mit.edu>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2015-12-29 13:57:01 +08:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2017-09-05 20:15:15 +08:00
|
|
|
if (!fdopen_lock_file(&lock->lk, "w"))
|
create_symref: use existing ref-lock code
The create_symref() function predates the existence of
"struct lock_file", let alone the more recent "struct
ref_lock". Instead, it just does its own manual dot-locking.
Besides being more code, this has a few downsides:
- if git is interrupted while holding the lock, we don't
clean up the lockfile
- we don't do the usual directory/filename conflict check.
So you can sometimes create a symref "refs/heads/foo/bar",
even if "refs/heads/foo" exists (namely, if the refs are
packed and we do not hit the d/f conflict in the
filesystem).
This patch refactors create_symref() to use the "struct
ref_lock" interface, which handles both of these things.
There are a few bonus cleanups that come along with it:
- we leaked ref_path in some error cases
- the symref contents were stored in a fixed-size buffer,
putting an artificial (albeit large) limitation on the
length of the refname. We now write through fprintf, and
handle refnames of any size.
- we called adjust_shared_perm only after the file was
renamed into place, creating a potential race with
readers in a shared repository. The lockfile code now
handles this when creating the lockfile, making it
atomic.
- the legacy prefer_symlink_refs path did not do any
locking at all. Admittedly, it is not atomic from a
reader's perspective (as it unlinks and re-creates the
symlink to overwrite), but at least it cannot conflict
with other writers now.
- the result of this patch is hopefully more readable. It
eliminates three goto labels. Two were for error checking
that is now simplified, and the third was to reach shared
code that has been pulled into its own function.
Signed-off-by: Jeff King <peff@peff.net>
Reviewed-by: Michael Haggerty <mhagger@alum.mit.edu>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2015-12-29 13:57:01 +08:00
|
|
|
return error("unable to fdopen %s: %s",
|
2021-01-06 03:23:49 +08:00
|
|
|
get_lock_file_path(&lock->lk), strerror(errno));
|
create_symref: use existing ref-lock code
The create_symref() function predates the existence of
"struct lock_file", let alone the more recent "struct
ref_lock". Instead, it just does its own manual dot-locking.
Besides being more code, this has a few downsides:
- if git is interrupted while holding the lock, we don't
clean up the lockfile
- we don't do the usual directory/filename conflict check.
So you can sometimes create a symref "refs/heads/foo/bar",
even if "refs/heads/foo" exists (namely, if the refs are
packed and we do not hit the d/f conflict in the
filesystem).
This patch refactors create_symref() to use the "struct
ref_lock" interface, which handles both of these things.
There are a few bonus cleanups that come along with it:
- we leaked ref_path in some error cases
- the symref contents were stored in a fixed-size buffer,
putting an artificial (albeit large) limitation on the
length of the refname. We now write through fprintf, and
handle refnames of any size.
- we called adjust_shared_perm only after the file was
renamed into place, creating a potential race with
readers in a shared repository. The lockfile code now
handles this when creating the lockfile, making it
atomic.
- the legacy prefer_symlink_refs path did not do any
locking at all. Admittedly, it is not atomic from a
reader's perspective (as it unlinks and re-creates the
symlink to overwrite), but at least it cannot conflict
with other writers now.
- the result of this patch is hopefully more readable. It
eliminates three goto labels. Two were for error checking
that is now simplified, and the third was to reach shared
code that has been pulled into its own function.
Signed-off-by: Jeff King <peff@peff.net>
Reviewed-by: Michael Haggerty <mhagger@alum.mit.edu>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2015-12-29 13:57:01 +08:00
|
|
|
|
2017-03-26 10:42:22 +08:00
|
|
|
update_symref_reflog(refs, lock, refname, target, logmsg);
|
2015-12-29 13:57:25 +08:00
|
|
|
|
create_symref: use existing ref-lock code
The create_symref() function predates the existence of
"struct lock_file", let alone the more recent "struct
ref_lock". Instead, it just does its own manual dot-locking.
Besides being more code, this has a few downsides:
- if git is interrupted while holding the lock, we don't
clean up the lockfile
- we don't do the usual directory/filename conflict check.
So you can sometimes create a symref "refs/heads/foo/bar",
even if "refs/heads/foo" exists (namely, if the refs are
packed and we do not hit the d/f conflict in the
filesystem).
This patch refactors create_symref() to use the "struct
ref_lock" interface, which handles both of these things.
There are a few bonus cleanups that come along with it:
- we leaked ref_path in some error cases
- the symref contents were stored in a fixed-size buffer,
putting an artificial (albeit large) limitation on the
length of the refname. We now write through fprintf, and
handle refnames of any size.
- we called adjust_shared_perm only after the file was
renamed into place, creating a potential race with
readers in a shared repository. The lockfile code now
handles this when creating the lockfile, making it
atomic.
- the legacy prefer_symlink_refs path did not do any
locking at all. Admittedly, it is not atomic from a
reader's perspective (as it unlinks and re-creates the
symlink to overwrite), but at least it cannot conflict
with other writers now.
- the result of this patch is hopefully more readable. It
eliminates three goto labels. Two were for error checking
that is now simplified, and the third was to reach shared
code that has been pulled into its own function.
Signed-off-by: Jeff King <peff@peff.net>
Reviewed-by: Michael Haggerty <mhagger@alum.mit.edu>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2015-12-29 13:57:01 +08:00
|
|
|
/* no error check; commit_ref will check ferror */
|
2021-01-06 03:23:49 +08:00
|
|
|
fprintf(get_lock_file_fp(&lock->lk), "ref: %s\n", target);
|
create_symref: use existing ref-lock code
The create_symref() function predates the existence of
"struct lock_file", let alone the more recent "struct
ref_lock". Instead, it just does its own manual dot-locking.
Besides being more code, this has a few downsides:
- if git is interrupted while holding the lock, we don't
clean up the lockfile
- we don't do the usual directory/filename conflict check.
So you can sometimes create a symref "refs/heads/foo/bar",
even if "refs/heads/foo" exists (namely, if the refs are
packed and we do not hit the d/f conflict in the
filesystem).
This patch refactors create_symref() to use the "struct
ref_lock" interface, which handles both of these things.
There are a few bonus cleanups that come along with it:
- we leaked ref_path in some error cases
- the symref contents were stored in a fixed-size buffer,
putting an artificial (albeit large) limitation on the
length of the refname. We now write through fprintf, and
handle refnames of any size.
- we called adjust_shared_perm only after the file was
renamed into place, creating a potential race with
readers in a shared repository. The lockfile code now
handles this when creating the lockfile, making it
atomic.
- the legacy prefer_symlink_refs path did not do any
locking at all. Admittedly, it is not atomic from a
reader's perspective (as it unlinks and re-creates the
symlink to overwrite), but at least it cannot conflict
with other writers now.
- the result of this patch is hopefully more readable. It
eliminates three goto labels. Two were for error checking
that is now simplified, and the third was to reach shared
code that has been pulled into its own function.
Signed-off-by: Jeff King <peff@peff.net>
Reviewed-by: Michael Haggerty <mhagger@alum.mit.edu>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2015-12-29 13:57:01 +08:00
|
|
|
if (commit_ref(lock) < 0)
|
|
|
|
return error("unable to write symref for %s: %s", refname,
|
|
|
|
strerror(errno));
|
2015-11-09 21:34:01 +08:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2016-09-05 00:08:28 +08:00
|
|
|
static int files_create_symref(struct ref_store *ref_store,
|
|
|
|
const char *refname, const char *target,
|
|
|
|
const char *logmsg)
|
create_symref: use existing ref-lock code
The create_symref() function predates the existence of
"struct lock_file", let alone the more recent "struct
ref_lock". Instead, it just does its own manual dot-locking.
Besides being more code, this has a few downsides:
- if git is interrupted while holding the lock, we don't
clean up the lockfile
- we don't do the usual directory/filename conflict check.
So you can sometimes create a symref "refs/heads/foo/bar",
even if "refs/heads/foo" exists (namely, if the refs are
packed and we do not hit the d/f conflict in the
filesystem).
This patch refactors create_symref() to use the "struct
ref_lock" interface, which handles both of these things.
There are a few bonus cleanups that come along with it:
- we leaked ref_path in some error cases
- the symref contents were stored in a fixed-size buffer,
putting an artificial (albeit large) limitation on the
length of the refname. We now write through fprintf, and
handle refnames of any size.
- we called adjust_shared_perm only after the file was
renamed into place, creating a potential race with
readers in a shared repository. The lockfile code now
handles this when creating the lockfile, making it
atomic.
- the legacy prefer_symlink_refs path did not do any
locking at all. Admittedly, it is not atomic from a
reader's perspective (as it unlinks and re-creates the
symlink to overwrite), but at least it cannot conflict
with other writers now.
- the result of this patch is hopefully more readable. It
eliminates three goto labels. Two were for error checking
that is now simplified, and the third was to reach shared
code that has been pulled into its own function.
Signed-off-by: Jeff King <peff@peff.net>
Reviewed-by: Michael Haggerty <mhagger@alum.mit.edu>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2015-12-29 13:57:01 +08:00
|
|
|
{
|
2016-09-05 00:08:34 +08:00
|
|
|
struct files_ref_store *refs =
|
2017-03-26 10:42:32 +08:00
|
|
|
files_downcast(ref_store, REF_STORE_WRITE, "create_symref");
|
create_symref: use existing ref-lock code
The create_symref() function predates the existence of
"struct lock_file", let alone the more recent "struct
ref_lock". Instead, it just does its own manual dot-locking.
Besides being more code, this has a few downsides:
- if git is interrupted while holding the lock, we don't
clean up the lockfile
- we don't do the usual directory/filename conflict check.
So you can sometimes create a symref "refs/heads/foo/bar",
even if "refs/heads/foo" exists (namely, if the refs are
packed and we do not hit the d/f conflict in the
filesystem).
This patch refactors create_symref() to use the "struct
ref_lock" interface, which handles both of these things.
There are a few bonus cleanups that come along with it:
- we leaked ref_path in some error cases
- the symref contents were stored in a fixed-size buffer,
putting an artificial (albeit large) limitation on the
length of the refname. We now write through fprintf, and
handle refnames of any size.
- we called adjust_shared_perm only after the file was
renamed into place, creating a potential race with
readers in a shared repository. The lockfile code now
handles this when creating the lockfile, making it
atomic.
- the legacy prefer_symlink_refs path did not do any
locking at all. Admittedly, it is not atomic from a
reader's perspective (as it unlinks and re-creates the
symlink to overwrite), but at least it cannot conflict
with other writers now.
- the result of this patch is hopefully more readable. It
eliminates three goto labels. Two were for error checking
that is now simplified, and the third was to reach shared
code that has been pulled into its own function.
Signed-off-by: Jeff King <peff@peff.net>
Reviewed-by: Michael Haggerty <mhagger@alum.mit.edu>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2015-12-29 13:57:01 +08:00
|
|
|
struct strbuf err = STRBUF_INIT;
|
|
|
|
struct ref_lock *lock;
|
|
|
|
int ret;
|
|
|
|
|
refs/files: remove "name exist?" check in lock_ref_oid_basic()
In lock_ref_oid_basic() we'll happily lock a reference that doesn't
exist yet. That's normal, and is how references are initially born,
but we don't need to retain checks here in lock_ref_oid_basic() about
the state of the ref, when what we're checking is either checked
already, or something we're about to discover by trying to lock the
ref with raceproof_create_file().
The one exception is the caller in files_reflog_expire(), who passes
us a "type" to find out if the reference is a symref or not. We can
move the that logic over to that caller, which can now defer its
discovery of whether or not the ref is a symref until it's needed. In
the preceding commit an exhaustive regression test was added for that
case in a new test in "t1417-reflog-updateref.sh".
The improved diagnostics here were added in
5b2d8d6f218 (lock_ref_sha1_basic(): improve diagnostics for ref D/F
conflicts, 2015-05-11), and then much of the surrounding code went
away recently in my 245fbba46d6 (refs/files: remove unused "errno ==
EISDIR" code, 2021-08-23).
The refs_resolve_ref_unsafe() code being removed here looks like it
should be tasked with doing that, but it's actually redundant to other
code.
The reason for that is as noted in 245fbba46d6 this once widely used
function now only has a handful of callers left, which all handle this
case themselves.
To the extent that we're racy between their check and ours removing
this check actually improves the situation, as we'll be doing fewer
things between the not-under-lock initial check and acquiring the
lock.
Why this is OK for all the remaining callers of lock_ref_oid_basic()
is noted below. There are only two of those callers:
* "git branch -[cm] <oldbranch> <newbranch>":
In files_copy_or_rename_ref() we'll call this when we copy or rename
refs via rename_ref() and copy_ref(). but only after we've checked
if the refname exists already via its own call to
refs_resolve_ref_unsafe() and refs_rename_ref_available().
As the updated comment to the latter here notes neither of those are
actually needed. If we delete not only this code but also
refs_rename_ref_available() we'll do just fine, we'll just emit a
less friendly error message if e.g. "git branch -m A B/C" would have
a D/F conflict with a "B" file.
Actually we'd probably die before that in case reflogs for the
branch existed, i.e. when the try to rename() or copy_file() the
relevant reflog, since if we've got a D/F conflict with a branch
name we'll probably also have the same with its reflogs (but not
necessarily, we might have reflogs, but it might not).
As some #leftoverbits that code seems buggy to me, i.e. the reflog
"protocol" should be to get a lock on the main ref, and then perform
ref and/or reflog operations. That code dates back to
c976d415e53 (git-branch: add options and tests for branch renaming,
2006-11-28) and probably pre-dated the solidifying of that
convention. But in any case, that edge case is not our bug or
problem right now.
* "git reflog expire <ref>":
In files_reflog_expire() we'll call this without previous ref
existence checking in files-backend.c, but that code is in turn
called by code that's just finished checking if the refname whose
reflog we're expiring exists.
See ae35e16cd43 (reflog expire: don't lock reflogs using previously
seen OID, 2021-08-23) for the current state of that code, and
5e6f003ca8a (reflog_expire(): ignore --updateref for symbolic
references, 2015-03-03) for the code we'd break if we only did a
"update = !!ref" here, which is covered by the aforementioned
regression test in "t1417-reflog-updateref.sh".
Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2021-10-16 17:39:13 +08:00
|
|
|
lock = lock_ref_oid_basic(refs, refname, &err);
|
create_symref: use existing ref-lock code
The create_symref() function predates the existence of
"struct lock_file", let alone the more recent "struct
ref_lock". Instead, it just does its own manual dot-locking.
Besides being more code, this has a few downsides:
- if git is interrupted while holding the lock, we don't
clean up the lockfile
- we don't do the usual directory/filename conflict check.
So you can sometimes create a symref "refs/heads/foo/bar",
even if "refs/heads/foo" exists (namely, if the refs are
packed and we do not hit the d/f conflict in the
filesystem).
This patch refactors create_symref() to use the "struct
ref_lock" interface, which handles both of these things.
There are a few bonus cleanups that come along with it:
- we leaked ref_path in some error cases
- the symref contents were stored in a fixed-size buffer,
putting an artificial (albeit large) limitation on the
length of the refname. We now write through fprintf, and
handle refnames of any size.
- we called adjust_shared_perm only after the file was
renamed into place, creating a potential race with
readers in a shared repository. The lockfile code now
handles this when creating the lockfile, making it
atomic.
- the legacy prefer_symlink_refs path did not do any
locking at all. Admittedly, it is not atomic from a
reader's perspective (as it unlinks and re-creates the
symlink to overwrite), but at least it cannot conflict
with other writers now.
- the result of this patch is hopefully more readable. It
eliminates three goto labels. Two were for error checking
that is now simplified, and the third was to reach shared
code that has been pulled into its own function.
Signed-off-by: Jeff King <peff@peff.net>
Reviewed-by: Michael Haggerty <mhagger@alum.mit.edu>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2015-12-29 13:57:01 +08:00
|
|
|
if (!lock) {
|
|
|
|
error("%s", err.buf);
|
|
|
|
strbuf_release(&err);
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
2017-03-26 10:42:22 +08:00
|
|
|
ret = create_symref_locked(refs, lock, refname, target, logmsg);
|
create_symref: use existing ref-lock code
The create_symref() function predates the existence of
"struct lock_file", let alone the more recent "struct
ref_lock". Instead, it just does its own manual dot-locking.
Besides being more code, this has a few downsides:
- if git is interrupted while holding the lock, we don't
clean up the lockfile
- we don't do the usual directory/filename conflict check.
So you can sometimes create a symref "refs/heads/foo/bar",
even if "refs/heads/foo" exists (namely, if the refs are
packed and we do not hit the d/f conflict in the
filesystem).
This patch refactors create_symref() to use the "struct
ref_lock" interface, which handles both of these things.
There are a few bonus cleanups that come along with it:
- we leaked ref_path in some error cases
- the symref contents were stored in a fixed-size buffer,
putting an artificial (albeit large) limitation on the
length of the refname. We now write through fprintf, and
handle refnames of any size.
- we called adjust_shared_perm only after the file was
renamed into place, creating a potential race with
readers in a shared repository. The lockfile code now
handles this when creating the lockfile, making it
atomic.
- the legacy prefer_symlink_refs path did not do any
locking at all. Admittedly, it is not atomic from a
reader's perspective (as it unlinks and re-creates the
symlink to overwrite), but at least it cannot conflict
with other writers now.
- the result of this patch is hopefully more readable. It
eliminates three goto labels. Two were for error checking
that is now simplified, and the third was to reach shared
code that has been pulled into its own function.
Signed-off-by: Jeff King <peff@peff.net>
Reviewed-by: Michael Haggerty <mhagger@alum.mit.edu>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2015-12-29 13:57:01 +08:00
|
|
|
unlock_ref(lock);
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2016-09-05 00:08:38 +08:00
|
|
|
static int files_reflog_exists(struct ref_store *ref_store,
|
|
|
|
const char *refname)
|
2015-11-09 21:34:01 +08:00
|
|
|
{
|
2017-03-26 10:42:22 +08:00
|
|
|
struct files_ref_store *refs =
|
2017-03-26 10:42:32 +08:00
|
|
|
files_downcast(ref_store, REF_STORE_READ, "reflog_exists");
|
2017-03-26 10:42:20 +08:00
|
|
|
struct strbuf sb = STRBUF_INIT;
|
2015-11-09 21:34:01 +08:00
|
|
|
struct stat st;
|
2017-03-26 10:42:20 +08:00
|
|
|
int ret;
|
2015-11-09 21:34:01 +08:00
|
|
|
|
2017-03-26 10:42:22 +08:00
|
|
|
files_reflog_path(refs, &sb, refname);
|
2017-03-26 10:42:20 +08:00
|
|
|
ret = !lstat(sb.buf, &st) && S_ISREG(st.st_mode);
|
|
|
|
strbuf_release(&sb);
|
|
|
|
return ret;
|
2015-11-09 21:34:01 +08:00
|
|
|
}
|
|
|
|
|
2016-09-05 00:08:38 +08:00
|
|
|
static int files_delete_reflog(struct ref_store *ref_store,
|
|
|
|
const char *refname)
|
2015-11-09 21:34:01 +08:00
|
|
|
{
|
2017-03-26 10:42:22 +08:00
|
|
|
struct files_ref_store *refs =
|
2017-03-26 10:42:32 +08:00
|
|
|
files_downcast(ref_store, REF_STORE_WRITE, "delete_reflog");
|
2017-03-26 10:42:20 +08:00
|
|
|
struct strbuf sb = STRBUF_INIT;
|
|
|
|
int ret;
|
|
|
|
|
2017-03-26 10:42:22 +08:00
|
|
|
files_reflog_path(refs, &sb, refname);
|
2017-03-26 10:42:20 +08:00
|
|
|
ret = remove_path(sb.buf);
|
|
|
|
strbuf_release(&sb);
|
|
|
|
return ret;
|
2015-11-09 21:34:01 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
static int show_one_reflog_ent(struct strbuf *sb, each_reflog_ent_fn fn, void *cb_data)
|
|
|
|
{
|
2017-02-22 07:47:32 +08:00
|
|
|
struct object_id ooid, noid;
|
2015-11-09 21:34:01 +08:00
|
|
|
char *email_end, *message;
|
2017-04-27 03:29:31 +08:00
|
|
|
timestamp_t timestamp;
|
2015-11-09 21:34:01 +08:00
|
|
|
int tz;
|
2017-02-22 07:47:33 +08:00
|
|
|
const char *p = sb->buf;
|
2015-11-09 21:34:01 +08:00
|
|
|
|
|
|
|
/* old SP new SP name <email> SP time TAB msg LF */
|
2017-02-22 07:47:33 +08:00
|
|
|
if (!sb->len || sb->buf[sb->len - 1] != '\n' ||
|
|
|
|
parse_oid_hex(p, &ooid, &p) || *p++ != ' ' ||
|
|
|
|
parse_oid_hex(p, &noid, &p) || *p++ != ' ' ||
|
|
|
|
!(email_end = strchr(p, '>')) ||
|
2015-11-09 21:34:01 +08:00
|
|
|
email_end[1] != ' ' ||
|
2017-04-21 18:45:44 +08:00
|
|
|
!(timestamp = parse_timestamp(email_end + 2, &message, 10)) ||
|
2015-11-09 21:34:01 +08:00
|
|
|
!message || message[0] != ' ' ||
|
|
|
|
(message[1] != '+' && message[1] != '-') ||
|
|
|
|
!isdigit(message[2]) || !isdigit(message[3]) ||
|
|
|
|
!isdigit(message[4]) || !isdigit(message[5]))
|
|
|
|
return 0; /* corrupt? */
|
|
|
|
email_end[1] = '\0';
|
|
|
|
tz = strtol(message + 1, NULL, 10);
|
|
|
|
if (message[6] != '\t')
|
|
|
|
message += 6;
|
|
|
|
else
|
|
|
|
message += 7;
|
2017-02-22 07:47:33 +08:00
|
|
|
return fn(&ooid, &noid, p, timestamp, tz, message, cb_data);
|
2015-11-09 21:34:01 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
static char *find_beginning_of_line(char *bob, char *scan)
|
|
|
|
{
|
|
|
|
while (bob < scan && *(--scan) != '\n')
|
|
|
|
; /* keep scanning backwards */
|
|
|
|
/*
|
|
|
|
* Return either beginning of the buffer, or LF at the end of
|
|
|
|
* the previous line.
|
|
|
|
*/
|
|
|
|
return scan;
|
|
|
|
}
|
|
|
|
|
2016-09-05 00:08:38 +08:00
|
|
|
static int files_for_each_reflog_ent_reverse(struct ref_store *ref_store,
|
|
|
|
const char *refname,
|
|
|
|
each_reflog_ent_fn fn,
|
|
|
|
void *cb_data)
|
2015-11-09 21:34:01 +08:00
|
|
|
{
|
2017-03-26 10:42:22 +08:00
|
|
|
struct files_ref_store *refs =
|
2017-03-26 10:42:32 +08:00
|
|
|
files_downcast(ref_store, REF_STORE_READ,
|
|
|
|
"for_each_reflog_ent_reverse");
|
2015-11-09 21:34:01 +08:00
|
|
|
struct strbuf sb = STRBUF_INIT;
|
|
|
|
FILE *logfp;
|
|
|
|
long pos;
|
|
|
|
int ret = 0, at_tail = 1;
|
|
|
|
|
2017-03-26 10:42:22 +08:00
|
|
|
files_reflog_path(refs, &sb, refname);
|
2017-03-26 10:42:20 +08:00
|
|
|
logfp = fopen(sb.buf, "r");
|
|
|
|
strbuf_release(&sb);
|
2015-11-09 21:34:01 +08:00
|
|
|
if (!logfp)
|
|
|
|
return -1;
|
|
|
|
|
|
|
|
/* Jump to the end */
|
|
|
|
if (fseek(logfp, 0, SEEK_END) < 0)
|
2017-04-17 00:55:46 +08:00
|
|
|
ret = error("cannot seek back reflog for %s: %s",
|
|
|
|
refname, strerror(errno));
|
2015-11-09 21:34:01 +08:00
|
|
|
pos = ftell(logfp);
|
|
|
|
while (!ret && 0 < pos) {
|
|
|
|
int cnt;
|
|
|
|
size_t nread;
|
|
|
|
char buf[BUFSIZ];
|
|
|
|
char *endp, *scanp;
|
|
|
|
|
|
|
|
/* Fill next block from the end */
|
|
|
|
cnt = (sizeof(buf) < pos) ? sizeof(buf) : pos;
|
2017-04-17 00:55:46 +08:00
|
|
|
if (fseek(logfp, pos - cnt, SEEK_SET)) {
|
|
|
|
ret = error("cannot seek back reflog for %s: %s",
|
|
|
|
refname, strerror(errno));
|
|
|
|
break;
|
|
|
|
}
|
2015-11-09 21:34:01 +08:00
|
|
|
nread = fread(buf, cnt, 1, logfp);
|
2017-04-17 00:55:46 +08:00
|
|
|
if (nread != 1) {
|
|
|
|
ret = error("cannot read %d bytes from reflog for %s: %s",
|
|
|
|
cnt, refname, strerror(errno));
|
|
|
|
break;
|
|
|
|
}
|
2015-11-09 21:34:01 +08:00
|
|
|
pos -= cnt;
|
|
|
|
|
|
|
|
scanp = endp = buf + cnt;
|
|
|
|
if (at_tail && scanp[-1] == '\n')
|
|
|
|
/* Looking at the final LF at the end of the file */
|
|
|
|
scanp--;
|
|
|
|
at_tail = 0;
|
|
|
|
|
|
|
|
while (buf < scanp) {
|
|
|
|
/*
|
|
|
|
* terminating LF of the previous line, or the beginning
|
|
|
|
* of the buffer.
|
|
|
|
*/
|
|
|
|
char *bp;
|
|
|
|
|
|
|
|
bp = find_beginning_of_line(buf, scanp);
|
|
|
|
|
|
|
|
if (*bp == '\n') {
|
|
|
|
/*
|
|
|
|
* The newline is the end of the previous line,
|
|
|
|
* so we know we have complete line starting
|
|
|
|
* at (bp + 1). Prefix it onto any prior data
|
|
|
|
* we collected for the line and process it.
|
|
|
|
*/
|
|
|
|
strbuf_splice(&sb, 0, 0, bp + 1, endp - (bp + 1));
|
|
|
|
scanp = bp;
|
|
|
|
endp = bp + 1;
|
|
|
|
ret = show_one_reflog_ent(&sb, fn, cb_data);
|
|
|
|
strbuf_reset(&sb);
|
|
|
|
if (ret)
|
|
|
|
break;
|
|
|
|
} else if (!pos) {
|
|
|
|
/*
|
|
|
|
* We are at the start of the buffer, and the
|
|
|
|
* start of the file; there is no previous
|
|
|
|
* line, and we have everything for this one.
|
|
|
|
* Process it, and we can end the loop.
|
|
|
|
*/
|
|
|
|
strbuf_splice(&sb, 0, 0, buf, endp - buf);
|
|
|
|
ret = show_one_reflog_ent(&sb, fn, cb_data);
|
|
|
|
strbuf_reset(&sb);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (bp == buf) {
|
|
|
|
/*
|
|
|
|
* We are at the start of the buffer, and there
|
|
|
|
* is more file to read backwards. Which means
|
|
|
|
* we are in the middle of a line. Note that we
|
|
|
|
* may get here even if *bp was a newline; that
|
|
|
|
* just means we are at the exact end of the
|
|
|
|
* previous line, rather than some spot in the
|
|
|
|
* middle.
|
|
|
|
*
|
|
|
|
* Save away what we have to be combined with
|
|
|
|
* the data from the next read.
|
|
|
|
*/
|
|
|
|
strbuf_splice(&sb, 0, 0, buf, endp - buf);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
if (!ret && sb.len)
|
2018-05-02 17:38:39 +08:00
|
|
|
BUG("reverse reflog parser had leftover data");
|
2015-11-09 21:34:01 +08:00
|
|
|
|
|
|
|
fclose(logfp);
|
|
|
|
strbuf_release(&sb);
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2016-09-05 00:08:38 +08:00
|
|
|
static int files_for_each_reflog_ent(struct ref_store *ref_store,
|
|
|
|
const char *refname,
|
|
|
|
each_reflog_ent_fn fn, void *cb_data)
|
2015-11-09 21:34:01 +08:00
|
|
|
{
|
2017-03-26 10:42:22 +08:00
|
|
|
struct files_ref_store *refs =
|
2017-03-26 10:42:32 +08:00
|
|
|
files_downcast(ref_store, REF_STORE_READ,
|
|
|
|
"for_each_reflog_ent");
|
2015-11-09 21:34:01 +08:00
|
|
|
FILE *logfp;
|
|
|
|
struct strbuf sb = STRBUF_INIT;
|
|
|
|
int ret = 0;
|
|
|
|
|
2017-03-26 10:42:22 +08:00
|
|
|
files_reflog_path(refs, &sb, refname);
|
2017-03-26 10:42:20 +08:00
|
|
|
logfp = fopen(sb.buf, "r");
|
|
|
|
strbuf_release(&sb);
|
2015-11-09 21:34:01 +08:00
|
|
|
if (!logfp)
|
|
|
|
return -1;
|
|
|
|
|
|
|
|
while (!ret && !strbuf_getwholeline(&sb, logfp, '\n'))
|
|
|
|
ret = show_one_reflog_ent(&sb, fn, cb_data);
|
|
|
|
fclose(logfp);
|
|
|
|
strbuf_release(&sb);
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2016-06-18 12:15:19 +08:00
|
|
|
struct files_reflog_iterator {
|
|
|
|
struct ref_iterator base;
|
2015-11-09 21:34:01 +08:00
|
|
|
|
2017-03-26 10:42:36 +08:00
|
|
|
struct ref_store *ref_store;
|
2016-06-18 12:15:19 +08:00
|
|
|
struct dir_iterator *dir_iterator;
|
|
|
|
struct object_id oid;
|
|
|
|
};
|
2015-11-09 21:34:01 +08:00
|
|
|
|
2016-06-18 12:15:19 +08:00
|
|
|
static int files_reflog_iterator_advance(struct ref_iterator *ref_iterator)
|
|
|
|
{
|
|
|
|
struct files_reflog_iterator *iter =
|
|
|
|
(struct files_reflog_iterator *)ref_iterator;
|
|
|
|
struct dir_iterator *diter = iter->dir_iterator;
|
|
|
|
int ok;
|
|
|
|
|
|
|
|
while ((ok = dir_iterator_advance(diter)) == ITER_OK) {
|
|
|
|
int flags;
|
|
|
|
|
|
|
|
if (!S_ISREG(diter->st.st_mode))
|
2015-11-09 21:34:01 +08:00
|
|
|
continue;
|
2016-06-18 12:15:19 +08:00
|
|
|
if (diter->basename[0] == '.')
|
|
|
|
continue;
|
|
|
|
if (ends_with(diter->basename, ".lock"))
|
2015-11-09 21:34:01 +08:00
|
|
|
continue;
|
|
|
|
|
2021-10-16 17:39:27 +08:00
|
|
|
if (!refs_resolve_ref_unsafe(iter->ref_store,
|
2021-10-16 17:39:14 +08:00
|
|
|
diter->relative_path, 0,
|
2022-01-26 22:37:01 +08:00
|
|
|
&iter->oid, &flags)) {
|
2016-06-18 12:15:19 +08:00
|
|
|
error("bad ref for %s", diter->path.buf);
|
|
|
|
continue;
|
2015-11-09 21:34:01 +08:00
|
|
|
}
|
2016-06-18 12:15:19 +08:00
|
|
|
|
|
|
|
iter->base.refname = diter->relative_path;
|
|
|
|
iter->base.oid = &iter->oid;
|
|
|
|
iter->base.flags = flags;
|
|
|
|
return ITER_OK;
|
2015-11-09 21:34:01 +08:00
|
|
|
}
|
2016-06-18 12:15:19 +08:00
|
|
|
|
|
|
|
iter->dir_iterator = NULL;
|
|
|
|
if (ref_iterator_abort(ref_iterator) == ITER_ERROR)
|
|
|
|
ok = ITER_ERROR;
|
|
|
|
return ok;
|
|
|
|
}
|
|
|
|
|
2022-08-26 01:09:48 +08:00
|
|
|
static int files_reflog_iterator_peel(struct ref_iterator *ref_iterator UNUSED,
|
|
|
|
struct object_id *peeled UNUSED)
|
2016-06-18 12:15:19 +08:00
|
|
|
{
|
2018-05-02 17:38:39 +08:00
|
|
|
BUG("ref_iterator_peel() called for reflog_iterator");
|
2016-06-18 12:15:19 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
static int files_reflog_iterator_abort(struct ref_iterator *ref_iterator)
|
|
|
|
{
|
|
|
|
struct files_reflog_iterator *iter =
|
|
|
|
(struct files_reflog_iterator *)ref_iterator;
|
|
|
|
int ok = ITER_DONE;
|
|
|
|
|
|
|
|
if (iter->dir_iterator)
|
|
|
|
ok = dir_iterator_abort(iter->dir_iterator);
|
|
|
|
|
|
|
|
base_ref_iterator_free(ref_iterator);
|
|
|
|
return ok;
|
|
|
|
}
|
|
|
|
|
|
|
|
static struct ref_iterator_vtable files_reflog_iterator_vtable = {
|
2022-03-18 01:27:16 +08:00
|
|
|
.advance = files_reflog_iterator_advance,
|
|
|
|
.peel = files_reflog_iterator_peel,
|
|
|
|
.abort = files_reflog_iterator_abort,
|
2016-06-18 12:15:19 +08:00
|
|
|
};
|
|
|
|
|
2017-08-23 20:37:00 +08:00
|
|
|
static struct ref_iterator *reflog_iterator_begin(struct ref_store *ref_store,
|
|
|
|
const char *gitdir)
|
2016-06-18 12:15:19 +08:00
|
|
|
{
|
2019-07-11 07:58:59 +08:00
|
|
|
struct dir_iterator *diter;
|
|
|
|
struct files_reflog_iterator *iter;
|
|
|
|
struct ref_iterator *ref_iterator;
|
2017-03-26 10:42:20 +08:00
|
|
|
struct strbuf sb = STRBUF_INIT;
|
2016-06-18 12:15:19 +08:00
|
|
|
|
2017-08-23 20:37:00 +08:00
|
|
|
strbuf_addf(&sb, "%s/logs", gitdir);
|
2019-07-11 07:58:59 +08:00
|
|
|
|
2019-07-11 07:59:00 +08:00
|
|
|
diter = dir_iterator_begin(sb.buf, 0);
|
2019-08-07 19:15:14 +08:00
|
|
|
if (!diter) {
|
|
|
|
strbuf_release(&sb);
|
2019-07-11 07:58:59 +08:00
|
|
|
return empty_ref_iterator_begin();
|
2019-08-07 19:15:14 +08:00
|
|
|
}
|
2019-07-11 07:58:59 +08:00
|
|
|
|
2021-03-14 00:17:22 +08:00
|
|
|
CALLOC_ARRAY(iter, 1);
|
2019-07-11 07:58:59 +08:00
|
|
|
ref_iterator = &iter->base;
|
|
|
|
|
|
|
|
base_ref_iterator_init(ref_iterator, &files_reflog_iterator_vtable, 0);
|
|
|
|
iter->dir_iterator = diter;
|
2017-03-26 10:42:36 +08:00
|
|
|
iter->ref_store = ref_store;
|
2017-03-26 10:42:20 +08:00
|
|
|
strbuf_release(&sb);
|
2017-08-23 20:37:00 +08:00
|
|
|
|
2016-06-18 12:15:19 +08:00
|
|
|
return ref_iterator;
|
2015-11-09 21:34:01 +08:00
|
|
|
}
|
|
|
|
|
2017-08-23 20:37:00 +08:00
|
|
|
static enum iterator_selection reflog_iterator_select(
|
|
|
|
struct ref_iterator *iter_worktree,
|
|
|
|
struct ref_iterator *iter_common,
|
2022-08-26 01:09:48 +08:00
|
|
|
void *cb_data UNUSED)
|
2017-08-23 20:37:00 +08:00
|
|
|
{
|
|
|
|
if (iter_worktree) {
|
|
|
|
/*
|
|
|
|
* We're a bit loose here. We probably should ignore
|
|
|
|
* common refs if they are accidentally added as
|
|
|
|
* per-worktree refs.
|
|
|
|
*/
|
|
|
|
return ITER_SELECT_0;
|
|
|
|
} else if (iter_common) {
|
2022-09-20 00:34:50 +08:00
|
|
|
if (parse_worktree_ref(iter_common->refname, NULL, NULL,
|
|
|
|
NULL) == REF_WORKTREE_SHARED)
|
2017-08-23 20:37:00 +08:00
|
|
|
return ITER_SELECT_1;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* The main ref store may contain main worktree's
|
|
|
|
* per-worktree refs, which should be ignored
|
|
|
|
*/
|
|
|
|
return ITER_SKIP_1;
|
|
|
|
} else
|
|
|
|
return ITER_DONE;
|
|
|
|
}
|
|
|
|
|
|
|
|
static struct ref_iterator *files_reflog_iterator_begin(struct ref_store *ref_store)
|
|
|
|
{
|
|
|
|
struct files_ref_store *refs =
|
|
|
|
files_downcast(ref_store, REF_STORE_READ,
|
|
|
|
"reflog_iterator_begin");
|
|
|
|
|
2020-08-19 22:27:57 +08:00
|
|
|
if (!strcmp(refs->base.gitdir, refs->gitcommondir)) {
|
2017-08-23 20:37:00 +08:00
|
|
|
return reflog_iterator_begin(ref_store, refs->gitcommondir);
|
|
|
|
} else {
|
|
|
|
return merge_ref_iterator_begin(
|
2020-08-19 22:27:57 +08:00
|
|
|
0, reflog_iterator_begin(ref_store, refs->base.gitdir),
|
2017-08-23 20:37:00 +08:00
|
|
|
reflog_iterator_begin(ref_store, refs->gitcommondir),
|
|
|
|
reflog_iterator_select, refs);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-04-24 14:58:41 +08:00
|
|
|
/*
|
refs: resolve symbolic refs first
Before committing ref updates, split symbolic ref updates into two
parts: an update to the underlying ref, and a log-only update to the
symbolic ref. This ensures that both references are locked correctly
during the transaction, including while their reflogs are updated.
Similarly, if the reference pointed to by HEAD is modified directly, add
a separate log-only update to HEAD, rather than leaving the job of
updating HEAD's reflog to commit_ref_update(). This change ensures that
HEAD is locked correctly while its reflog is being modified, as well as
being cheaper (HEAD only needs to be resolved once).
This makes use of a new function, lock_raw_ref(), which is analogous to
read_raw_ref(), but acquires a lock on the reference before reading it.
This change still has two problems:
* There are redundant read_ref_full() reference lookups.
* It is still possible to get incorrect reflogs for symbolic references
if there is a concurrent update by another process, since the old_oid
of a symref is determined before the lock on the pointed-to ref is
held.
Both problems will soon be fixed.
Signed-off-by: David Turner <dturner@twopensource.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
Signed-off-by: Michael Haggerty <mhagger@alum.mit.edu>
WIP
2016-04-25 21:56:07 +08:00
|
|
|
* If update is a direct update of head_ref (the reference pointed to
|
|
|
|
* by HEAD), then add an extra REF_LOG_ONLY update for HEAD.
|
|
|
|
*/
|
|
|
|
static int split_head_update(struct ref_update *update,
|
|
|
|
struct ref_transaction *transaction,
|
|
|
|
const char *head_ref,
|
|
|
|
struct string_list *affected_refnames,
|
|
|
|
struct strbuf *err)
|
|
|
|
{
|
|
|
|
struct string_list_item *item;
|
|
|
|
struct ref_update *new_update;
|
|
|
|
|
|
|
|
if ((update->flags & REF_LOG_ONLY) ||
|
2017-11-05 16:42:07 +08:00
|
|
|
(update->flags & REF_IS_PRUNING) ||
|
refs: resolve symbolic refs first
Before committing ref updates, split symbolic ref updates into two
parts: an update to the underlying ref, and a log-only update to the
symbolic ref. This ensures that both references are locked correctly
during the transaction, including while their reflogs are updated.
Similarly, if the reference pointed to by HEAD is modified directly, add
a separate log-only update to HEAD, rather than leaving the job of
updating HEAD's reflog to commit_ref_update(). This change ensures that
HEAD is locked correctly while its reflog is being modified, as well as
being cheaper (HEAD only needs to be resolved once).
This makes use of a new function, lock_raw_ref(), which is analogous to
read_raw_ref(), but acquires a lock on the reference before reading it.
This change still has two problems:
* There are redundant read_ref_full() reference lookups.
* It is still possible to get incorrect reflogs for symbolic references
if there is a concurrent update by another process, since the old_oid
of a symref is determined before the lock on the pointed-to ref is
held.
Both problems will soon be fixed.
Signed-off-by: David Turner <dturner@twopensource.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
Signed-off-by: Michael Haggerty <mhagger@alum.mit.edu>
WIP
2016-04-25 21:56:07 +08:00
|
|
|
(update->flags & REF_UPDATE_VIA_HEAD))
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
if (strcmp(update->refname, head_ref))
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* First make sure that HEAD is not already in the
|
2017-09-09 14:57:18 +08:00
|
|
|
* transaction. This check is O(lg N) in the transaction
|
refs: resolve symbolic refs first
Before committing ref updates, split symbolic ref updates into two
parts: an update to the underlying ref, and a log-only update to the
symbolic ref. This ensures that both references are locked correctly
during the transaction, including while their reflogs are updated.
Similarly, if the reference pointed to by HEAD is modified directly, add
a separate log-only update to HEAD, rather than leaving the job of
updating HEAD's reflog to commit_ref_update(). This change ensures that
HEAD is locked correctly while its reflog is being modified, as well as
being cheaper (HEAD only needs to be resolved once).
This makes use of a new function, lock_raw_ref(), which is analogous to
read_raw_ref(), but acquires a lock on the reference before reading it.
This change still has two problems:
* There are redundant read_ref_full() reference lookups.
* It is still possible to get incorrect reflogs for symbolic references
if there is a concurrent update by another process, since the old_oid
of a symref is determined before the lock on the pointed-to ref is
held.
Both problems will soon be fixed.
Signed-off-by: David Turner <dturner@twopensource.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
Signed-off-by: Michael Haggerty <mhagger@alum.mit.edu>
WIP
2016-04-25 21:56:07 +08:00
|
|
|
* size, but it happens at most once per transaction.
|
|
|
|
*/
|
2017-09-09 14:57:18 +08:00
|
|
|
if (string_list_has_string(affected_refnames, "HEAD")) {
|
refs: resolve symbolic refs first
Before committing ref updates, split symbolic ref updates into two
parts: an update to the underlying ref, and a log-only update to the
symbolic ref. This ensures that both references are locked correctly
during the transaction, including while their reflogs are updated.
Similarly, if the reference pointed to by HEAD is modified directly, add
a separate log-only update to HEAD, rather than leaving the job of
updating HEAD's reflog to commit_ref_update(). This change ensures that
HEAD is locked correctly while its reflog is being modified, as well as
being cheaper (HEAD only needs to be resolved once).
This makes use of a new function, lock_raw_ref(), which is analogous to
read_raw_ref(), but acquires a lock on the reference before reading it.
This change still has two problems:
* There are redundant read_ref_full() reference lookups.
* It is still possible to get incorrect reflogs for symbolic references
if there is a concurrent update by another process, since the old_oid
of a symref is determined before the lock on the pointed-to ref is
held.
Both problems will soon be fixed.
Signed-off-by: David Turner <dturner@twopensource.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
Signed-off-by: Michael Haggerty <mhagger@alum.mit.edu>
WIP
2016-04-25 21:56:07 +08:00
|
|
|
/* An entry already existed */
|
|
|
|
strbuf_addf(err,
|
|
|
|
"multiple updates for 'HEAD' (including one "
|
|
|
|
"via its referent '%s') are not allowed",
|
|
|
|
update->refname);
|
|
|
|
return TRANSACTION_NAME_CONFLICT;
|
|
|
|
}
|
|
|
|
|
|
|
|
new_update = ref_transaction_add_update(
|
|
|
|
transaction, "HEAD",
|
2017-11-05 16:42:06 +08:00
|
|
|
update->flags | REF_LOG_ONLY | REF_NO_DEREF,
|
2017-10-16 06:06:53 +08:00
|
|
|
&update->new_oid, &update->old_oid,
|
refs: resolve symbolic refs first
Before committing ref updates, split symbolic ref updates into two
parts: an update to the underlying ref, and a log-only update to the
symbolic ref. This ensures that both references are locked correctly
during the transaction, including while their reflogs are updated.
Similarly, if the reference pointed to by HEAD is modified directly, add
a separate log-only update to HEAD, rather than leaving the job of
updating HEAD's reflog to commit_ref_update(). This change ensures that
HEAD is locked correctly while its reflog is being modified, as well as
being cheaper (HEAD only needs to be resolved once).
This makes use of a new function, lock_raw_ref(), which is analogous to
read_raw_ref(), but acquires a lock on the reference before reading it.
This change still has two problems:
* There are redundant read_ref_full() reference lookups.
* It is still possible to get incorrect reflogs for symbolic references
if there is a concurrent update by another process, since the old_oid
of a symref is determined before the lock on the pointed-to ref is
held.
Both problems will soon be fixed.
Signed-off-by: David Turner <dturner@twopensource.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
Signed-off-by: Michael Haggerty <mhagger@alum.mit.edu>
WIP
2016-04-25 21:56:07 +08:00
|
|
|
update->msg);
|
|
|
|
|
2017-09-09 14:57:18 +08:00
|
|
|
/*
|
|
|
|
* Add "HEAD". This insertion is O(N) in the transaction
|
|
|
|
* size, but it happens at most once per transaction.
|
|
|
|
* Add new_update->refname instead of a literal "HEAD".
|
|
|
|
*/
|
|
|
|
if (strcmp(new_update->refname, "HEAD"))
|
|
|
|
BUG("%s unexpectedly not 'HEAD'", new_update->refname);
|
|
|
|
item = string_list_insert(affected_refnames, new_update->refname);
|
refs: resolve symbolic refs first
Before committing ref updates, split symbolic ref updates into two
parts: an update to the underlying ref, and a log-only update to the
symbolic ref. This ensures that both references are locked correctly
during the transaction, including while their reflogs are updated.
Similarly, if the reference pointed to by HEAD is modified directly, add
a separate log-only update to HEAD, rather than leaving the job of
updating HEAD's reflog to commit_ref_update(). This change ensures that
HEAD is locked correctly while its reflog is being modified, as well as
being cheaper (HEAD only needs to be resolved once).
This makes use of a new function, lock_raw_ref(), which is analogous to
read_raw_ref(), but acquires a lock on the reference before reading it.
This change still has two problems:
* There are redundant read_ref_full() reference lookups.
* It is still possible to get incorrect reflogs for symbolic references
if there is a concurrent update by another process, since the old_oid
of a symref is determined before the lock on the pointed-to ref is
held.
Both problems will soon be fixed.
Signed-off-by: David Turner <dturner@twopensource.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
Signed-off-by: Michael Haggerty <mhagger@alum.mit.edu>
WIP
2016-04-25 21:56:07 +08:00
|
|
|
item->util = new_update;
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* update is for a symref that points at referent and doesn't have
|
2017-11-05 16:42:06 +08:00
|
|
|
* REF_NO_DEREF set. Split it into two updates:
|
|
|
|
* - The original update, but with REF_LOG_ONLY and REF_NO_DEREF set
|
refs: resolve symbolic refs first
Before committing ref updates, split symbolic ref updates into two
parts: an update to the underlying ref, and a log-only update to the
symbolic ref. This ensures that both references are locked correctly
during the transaction, including while their reflogs are updated.
Similarly, if the reference pointed to by HEAD is modified directly, add
a separate log-only update to HEAD, rather than leaving the job of
updating HEAD's reflog to commit_ref_update(). This change ensures that
HEAD is locked correctly while its reflog is being modified, as well as
being cheaper (HEAD only needs to be resolved once).
This makes use of a new function, lock_raw_ref(), which is analogous to
read_raw_ref(), but acquires a lock on the reference before reading it.
This change still has two problems:
* There are redundant read_ref_full() reference lookups.
* It is still possible to get incorrect reflogs for symbolic references
if there is a concurrent update by another process, since the old_oid
of a symref is determined before the lock on the pointed-to ref is
held.
Both problems will soon be fixed.
Signed-off-by: David Turner <dturner@twopensource.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
Signed-off-by: Michael Haggerty <mhagger@alum.mit.edu>
WIP
2016-04-25 21:56:07 +08:00
|
|
|
* - A new, separate update for the referent reference
|
|
|
|
* Note that the new update will itself be subject to splitting when
|
|
|
|
* the iteration gets to it.
|
|
|
|
*/
|
2019-02-14 13:50:46 +08:00
|
|
|
static int split_symref_update(struct ref_update *update,
|
refs: resolve symbolic refs first
Before committing ref updates, split symbolic ref updates into two
parts: an update to the underlying ref, and a log-only update to the
symbolic ref. This ensures that both references are locked correctly
during the transaction, including while their reflogs are updated.
Similarly, if the reference pointed to by HEAD is modified directly, add
a separate log-only update to HEAD, rather than leaving the job of
updating HEAD's reflog to commit_ref_update(). This change ensures that
HEAD is locked correctly while its reflog is being modified, as well as
being cheaper (HEAD only needs to be resolved once).
This makes use of a new function, lock_raw_ref(), which is analogous to
read_raw_ref(), but acquires a lock on the reference before reading it.
This change still has two problems:
* There are redundant read_ref_full() reference lookups.
* It is still possible to get incorrect reflogs for symbolic references
if there is a concurrent update by another process, since the old_oid
of a symref is determined before the lock on the pointed-to ref is
held.
Both problems will soon be fixed.
Signed-off-by: David Turner <dturner@twopensource.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
Signed-off-by: Michael Haggerty <mhagger@alum.mit.edu>
WIP
2016-04-25 21:56:07 +08:00
|
|
|
const char *referent,
|
|
|
|
struct ref_transaction *transaction,
|
|
|
|
struct string_list *affected_refnames,
|
|
|
|
struct strbuf *err)
|
|
|
|
{
|
|
|
|
struct string_list_item *item;
|
|
|
|
struct ref_update *new_update;
|
|
|
|
unsigned int new_flags;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* First make sure that referent is not already in the
|
refs/files-backend: add longer-scoped copy of string to list
split_symref_update() receives a string-pointer `referent` and adds it
to the list of `affected_refnames`. The list simply holds on to the
pointers it is given, it does not copy the strings and it does not ever
free them. The `referent` string in split_symref_update() belongs to a
string buffer in the caller. After we return, the string will be leaked.
In the next patch, we want to properly release the string buffer in the
caller, but we can't safely do so until we've made sure that
`affected_refnames` will not be holding on to a pointer to the string.
We could configure the list to handle its own resources, but it would
mean some alloc/free-churning. The list is already handling other
strings (through other code paths) which we do not need to worry about,
and we'd be memory-churning those strings too, completely unnecessary.
Observe that split_symref_update() creates a `new_update`-object through
ref_transaction_add_update(), after which `new_update->refname` is a
copy of `referent`. The difference is, this copy will be freed, and it
will be freed *after* `affected_refnames` has been cleared.
Rearrange the handling of `referent`, so that we don't add it directly
to `affected_refnames`. Instead, first just check whether `referent`
exists in the string list, and later add `new_update->refname`.
Helped-by: Michael Haggerty <mhagger@alum.mit.edu>
Reviewed-by: Michael Haggerty <mhagger@alum.mit.edu>
Signed-off-by: Martin Ågren <martin.agren@gmail.com>
Reviewed-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-09-09 14:57:15 +08:00
|
|
|
* transaction. This check is O(lg N) in the transaction
|
refs: resolve symbolic refs first
Before committing ref updates, split symbolic ref updates into two
parts: an update to the underlying ref, and a log-only update to the
symbolic ref. This ensures that both references are locked correctly
during the transaction, including while their reflogs are updated.
Similarly, if the reference pointed to by HEAD is modified directly, add
a separate log-only update to HEAD, rather than leaving the job of
updating HEAD's reflog to commit_ref_update(). This change ensures that
HEAD is locked correctly while its reflog is being modified, as well as
being cheaper (HEAD only needs to be resolved once).
This makes use of a new function, lock_raw_ref(), which is analogous to
read_raw_ref(), but acquires a lock on the reference before reading it.
This change still has two problems:
* There are redundant read_ref_full() reference lookups.
* It is still possible to get incorrect reflogs for symbolic references
if there is a concurrent update by another process, since the old_oid
of a symref is determined before the lock on the pointed-to ref is
held.
Both problems will soon be fixed.
Signed-off-by: David Turner <dturner@twopensource.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
Signed-off-by: Michael Haggerty <mhagger@alum.mit.edu>
WIP
2016-04-25 21:56:07 +08:00
|
|
|
* size, but it happens at most once per symref in a
|
|
|
|
* transaction.
|
|
|
|
*/
|
refs/files-backend: add longer-scoped copy of string to list
split_symref_update() receives a string-pointer `referent` and adds it
to the list of `affected_refnames`. The list simply holds on to the
pointers it is given, it does not copy the strings and it does not ever
free them. The `referent` string in split_symref_update() belongs to a
string buffer in the caller. After we return, the string will be leaked.
In the next patch, we want to properly release the string buffer in the
caller, but we can't safely do so until we've made sure that
`affected_refnames` will not be holding on to a pointer to the string.
We could configure the list to handle its own resources, but it would
mean some alloc/free-churning. The list is already handling other
strings (through other code paths) which we do not need to worry about,
and we'd be memory-churning those strings too, completely unnecessary.
Observe that split_symref_update() creates a `new_update`-object through
ref_transaction_add_update(), after which `new_update->refname` is a
copy of `referent`. The difference is, this copy will be freed, and it
will be freed *after* `affected_refnames` has been cleared.
Rearrange the handling of `referent`, so that we don't add it directly
to `affected_refnames`. Instead, first just check whether `referent`
exists in the string list, and later add `new_update->refname`.
Helped-by: Michael Haggerty <mhagger@alum.mit.edu>
Reviewed-by: Michael Haggerty <mhagger@alum.mit.edu>
Signed-off-by: Martin Ågren <martin.agren@gmail.com>
Reviewed-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-09-09 14:57:15 +08:00
|
|
|
if (string_list_has_string(affected_refnames, referent)) {
|
|
|
|
/* An entry already exists */
|
refs: resolve symbolic refs first
Before committing ref updates, split symbolic ref updates into two
parts: an update to the underlying ref, and a log-only update to the
symbolic ref. This ensures that both references are locked correctly
during the transaction, including while their reflogs are updated.
Similarly, if the reference pointed to by HEAD is modified directly, add
a separate log-only update to HEAD, rather than leaving the job of
updating HEAD's reflog to commit_ref_update(). This change ensures that
HEAD is locked correctly while its reflog is being modified, as well as
being cheaper (HEAD only needs to be resolved once).
This makes use of a new function, lock_raw_ref(), which is analogous to
read_raw_ref(), but acquires a lock on the reference before reading it.
This change still has two problems:
* There are redundant read_ref_full() reference lookups.
* It is still possible to get incorrect reflogs for symbolic references
if there is a concurrent update by another process, since the old_oid
of a symref is determined before the lock on the pointed-to ref is
held.
Both problems will soon be fixed.
Signed-off-by: David Turner <dturner@twopensource.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
Signed-off-by: Michael Haggerty <mhagger@alum.mit.edu>
WIP
2016-04-25 21:56:07 +08:00
|
|
|
strbuf_addf(err,
|
|
|
|
"multiple updates for '%s' (including one "
|
|
|
|
"via symref '%s') are not allowed",
|
|
|
|
referent, update->refname);
|
|
|
|
return TRANSACTION_NAME_CONFLICT;
|
|
|
|
}
|
|
|
|
|
|
|
|
new_flags = update->flags;
|
|
|
|
if (!strcmp(update->refname, "HEAD")) {
|
|
|
|
/*
|
|
|
|
* Record that the new update came via HEAD, so that
|
|
|
|
* when we process it, split_head_update() doesn't try
|
|
|
|
* to add another reflog update for HEAD. Note that
|
|
|
|
* this bit will be propagated if the new_update
|
|
|
|
* itself needs to be split.
|
|
|
|
*/
|
|
|
|
new_flags |= REF_UPDATE_VIA_HEAD;
|
|
|
|
}
|
|
|
|
|
|
|
|
new_update = ref_transaction_add_update(
|
|
|
|
transaction, referent, new_flags,
|
2017-10-16 06:06:53 +08:00
|
|
|
&update->new_oid, &update->old_oid,
|
refs: resolve symbolic refs first
Before committing ref updates, split symbolic ref updates into two
parts: an update to the underlying ref, and a log-only update to the
symbolic ref. This ensures that both references are locked correctly
during the transaction, including while their reflogs are updated.
Similarly, if the reference pointed to by HEAD is modified directly, add
a separate log-only update to HEAD, rather than leaving the job of
updating HEAD's reflog to commit_ref_update(). This change ensures that
HEAD is locked correctly while its reflog is being modified, as well as
being cheaper (HEAD only needs to be resolved once).
This makes use of a new function, lock_raw_ref(), which is analogous to
read_raw_ref(), but acquires a lock on the reference before reading it.
This change still has two problems:
* There are redundant read_ref_full() reference lookups.
* It is still possible to get incorrect reflogs for symbolic references
if there is a concurrent update by another process, since the old_oid
of a symref is determined before the lock on the pointed-to ref is
held.
Both problems will soon be fixed.
Signed-off-by: David Turner <dturner@twopensource.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
Signed-off-by: Michael Haggerty <mhagger@alum.mit.edu>
WIP
2016-04-25 21:56:07 +08:00
|
|
|
update->msg);
|
|
|
|
|
2016-04-25 23:48:32 +08:00
|
|
|
new_update->parent_update = update;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Change the symbolic ref update to log only. Also, it
|
2017-11-05 16:42:09 +08:00
|
|
|
* doesn't need to check its old OID value, as that will be
|
2016-04-25 23:48:32 +08:00
|
|
|
* done when new_update is processed.
|
|
|
|
*/
|
2017-11-05 16:42:06 +08:00
|
|
|
update->flags |= REF_LOG_ONLY | REF_NO_DEREF;
|
2016-04-25 23:48:32 +08:00
|
|
|
update->flags &= ~REF_HAVE_OLD;
|
refs: resolve symbolic refs first
Before committing ref updates, split symbolic ref updates into two
parts: an update to the underlying ref, and a log-only update to the
symbolic ref. This ensures that both references are locked correctly
during the transaction, including while their reflogs are updated.
Similarly, if the reference pointed to by HEAD is modified directly, add
a separate log-only update to HEAD, rather than leaving the job of
updating HEAD's reflog to commit_ref_update(). This change ensures that
HEAD is locked correctly while its reflog is being modified, as well as
being cheaper (HEAD only needs to be resolved once).
This makes use of a new function, lock_raw_ref(), which is analogous to
read_raw_ref(), but acquires a lock on the reference before reading it.
This change still has two problems:
* There are redundant read_ref_full() reference lookups.
* It is still possible to get incorrect reflogs for symbolic references
if there is a concurrent update by another process, since the old_oid
of a symref is determined before the lock on the pointed-to ref is
held.
Both problems will soon be fixed.
Signed-off-by: David Turner <dturner@twopensource.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
Signed-off-by: Michael Haggerty <mhagger@alum.mit.edu>
WIP
2016-04-25 21:56:07 +08:00
|
|
|
|
refs/files-backend: add longer-scoped copy of string to list
split_symref_update() receives a string-pointer `referent` and adds it
to the list of `affected_refnames`. The list simply holds on to the
pointers it is given, it does not copy the strings and it does not ever
free them. The `referent` string in split_symref_update() belongs to a
string buffer in the caller. After we return, the string will be leaked.
In the next patch, we want to properly release the string buffer in the
caller, but we can't safely do so until we've made sure that
`affected_refnames` will not be holding on to a pointer to the string.
We could configure the list to handle its own resources, but it would
mean some alloc/free-churning. The list is already handling other
strings (through other code paths) which we do not need to worry about,
and we'd be memory-churning those strings too, completely unnecessary.
Observe that split_symref_update() creates a `new_update`-object through
ref_transaction_add_update(), after which `new_update->refname` is a
copy of `referent`. The difference is, this copy will be freed, and it
will be freed *after* `affected_refnames` has been cleared.
Rearrange the handling of `referent`, so that we don't add it directly
to `affected_refnames`. Instead, first just check whether `referent`
exists in the string list, and later add `new_update->refname`.
Helped-by: Michael Haggerty <mhagger@alum.mit.edu>
Reviewed-by: Michael Haggerty <mhagger@alum.mit.edu>
Signed-off-by: Martin Ågren <martin.agren@gmail.com>
Reviewed-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-09-09 14:57:15 +08:00
|
|
|
/*
|
|
|
|
* Add the referent. This insertion is O(N) in the transaction
|
|
|
|
* size, but it happens at most once per symref in a
|
|
|
|
* transaction. Make sure to add new_update->refname, which will
|
|
|
|
* be valid as long as affected_refnames is in use, and NOT
|
|
|
|
* referent, which might soon be freed by our caller.
|
|
|
|
*/
|
|
|
|
item = string_list_insert(affected_refnames, new_update->refname);
|
|
|
|
if (item->util)
|
|
|
|
BUG("%s unexpectedly found in affected_refnames",
|
|
|
|
new_update->refname);
|
refs: resolve symbolic refs first
Before committing ref updates, split symbolic ref updates into two
parts: an update to the underlying ref, and a log-only update to the
symbolic ref. This ensures that both references are locked correctly
during the transaction, including while their reflogs are updated.
Similarly, if the reference pointed to by HEAD is modified directly, add
a separate log-only update to HEAD, rather than leaving the job of
updating HEAD's reflog to commit_ref_update(). This change ensures that
HEAD is locked correctly while its reflog is being modified, as well as
being cheaper (HEAD only needs to be resolved once).
This makes use of a new function, lock_raw_ref(), which is analogous to
read_raw_ref(), but acquires a lock on the reference before reading it.
This change still has two problems:
* There are redundant read_ref_full() reference lookups.
* It is still possible to get incorrect reflogs for symbolic references
if there is a concurrent update by another process, since the old_oid
of a symref is determined before the lock on the pointed-to ref is
held.
Both problems will soon be fixed.
Signed-off-by: David Turner <dturner@twopensource.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
Signed-off-by: Michael Haggerty <mhagger@alum.mit.edu>
WIP
2016-04-25 21:56:07 +08:00
|
|
|
item->util = new_update;
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2016-04-25 23:48:32 +08:00
|
|
|
/*
|
|
|
|
* Return the refname under which update was originally requested.
|
|
|
|
*/
|
|
|
|
static const char *original_update_refname(struct ref_update *update)
|
|
|
|
{
|
|
|
|
while (update->parent_update)
|
|
|
|
update = update->parent_update;
|
|
|
|
|
|
|
|
return update->refname;
|
|
|
|
}
|
|
|
|
|
2016-06-07 15:29:23 +08:00
|
|
|
/*
|
|
|
|
* Check whether the REF_HAVE_OLD and old_oid values stored in update
|
|
|
|
* are consistent with oid, which is the reference's current value. If
|
|
|
|
* everything is OK, return 0; otherwise, write an error message to
|
|
|
|
* err and return -1.
|
|
|
|
*/
|
|
|
|
static int check_old_oid(struct ref_update *update, struct object_id *oid,
|
|
|
|
struct strbuf *err)
|
|
|
|
{
|
|
|
|
if (!(update->flags & REF_HAVE_OLD) ||
|
convert "oidcmp() == 0" to oideq()
Using the more restrictive oideq() should, in the long run,
give the compiler more opportunities to optimize these
callsites. For now, this conversion should be a complete
noop with respect to the generated code.
The result is also perhaps a little more readable, as it
avoids the "zero is equal" idiom. Since it's so prevalent in
C, I think seasoned programmers tend not to even notice it
anymore, but it can sometimes make for awkward double
negations (e.g., we can drop a few !!oidcmp() instances
here).
This patch was generated almost entirely by the included
coccinelle patch. This mechanical conversion should be
completely safe, because we check explicitly for cases where
oidcmp() is compared to 0, which is what oideq() is doing
under the hood. Note that we don't have to catch "!oidcmp()"
separately; coccinelle's standard isomorphisms make sure the
two are treated equivalently.
I say "almost" because I did hand-edit the coccinelle output
to fix up a few style violations (it mostly keeps the
original formatting, but sometimes unwraps long lines).
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2018-08-29 05:22:40 +08:00
|
|
|
oideq(oid, &update->old_oid))
|
2016-06-07 15:29:23 +08:00
|
|
|
return 0;
|
|
|
|
|
2017-05-07 06:10:23 +08:00
|
|
|
if (is_null_oid(&update->old_oid))
|
2016-06-07 15:29:23 +08:00
|
|
|
strbuf_addf(err, "cannot lock ref '%s': "
|
|
|
|
"reference already exists",
|
|
|
|
original_update_refname(update));
|
|
|
|
else if (is_null_oid(oid))
|
|
|
|
strbuf_addf(err, "cannot lock ref '%s': "
|
|
|
|
"reference is missing but expected %s",
|
|
|
|
original_update_refname(update),
|
2017-05-07 06:10:23 +08:00
|
|
|
oid_to_hex(&update->old_oid));
|
2016-06-07 15:29:23 +08:00
|
|
|
else
|
|
|
|
strbuf_addf(err, "cannot lock ref '%s': "
|
|
|
|
"is at %s but expected %s",
|
|
|
|
original_update_refname(update),
|
|
|
|
oid_to_hex(oid),
|
2017-05-07 06:10:23 +08:00
|
|
|
oid_to_hex(&update->old_oid));
|
2016-06-07 15:29:23 +08:00
|
|
|
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
refs: resolve symbolic refs first
Before committing ref updates, split symbolic ref updates into two
parts: an update to the underlying ref, and a log-only update to the
symbolic ref. This ensures that both references are locked correctly
during the transaction, including while their reflogs are updated.
Similarly, if the reference pointed to by HEAD is modified directly, add
a separate log-only update to HEAD, rather than leaving the job of
updating HEAD's reflog to commit_ref_update(). This change ensures that
HEAD is locked correctly while its reflog is being modified, as well as
being cheaper (HEAD only needs to be resolved once).
This makes use of a new function, lock_raw_ref(), which is analogous to
read_raw_ref(), but acquires a lock on the reference before reading it.
This change still has two problems:
* There are redundant read_ref_full() reference lookups.
* It is still possible to get incorrect reflogs for symbolic references
if there is a concurrent update by another process, since the old_oid
of a symref is determined before the lock on the pointed-to ref is
held.
Both problems will soon be fixed.
Signed-off-by: David Turner <dturner@twopensource.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
Signed-off-by: Michael Haggerty <mhagger@alum.mit.edu>
WIP
2016-04-25 21:56:07 +08:00
|
|
|
/*
|
|
|
|
* Prepare for carrying out update:
|
|
|
|
* - Lock the reference referred to by update.
|
|
|
|
* - Read the reference under lock.
|
2017-11-05 16:42:09 +08:00
|
|
|
* - Check that its old OID value (if specified) is correct, and in
|
refs: resolve symbolic refs first
Before committing ref updates, split symbolic ref updates into two
parts: an update to the underlying ref, and a log-only update to the
symbolic ref. This ensures that both references are locked correctly
during the transaction, including while their reflogs are updated.
Similarly, if the reference pointed to by HEAD is modified directly, add
a separate log-only update to HEAD, rather than leaving the job of
updating HEAD's reflog to commit_ref_update(). This change ensures that
HEAD is locked correctly while its reflog is being modified, as well as
being cheaper (HEAD only needs to be resolved once).
This makes use of a new function, lock_raw_ref(), which is analogous to
read_raw_ref(), but acquires a lock on the reference before reading it.
This change still has two problems:
* There are redundant read_ref_full() reference lookups.
* It is still possible to get incorrect reflogs for symbolic references
if there is a concurrent update by another process, since the old_oid
of a symref is determined before the lock on the pointed-to ref is
held.
Both problems will soon be fixed.
Signed-off-by: David Turner <dturner@twopensource.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
Signed-off-by: Michael Haggerty <mhagger@alum.mit.edu>
WIP
2016-04-25 21:56:07 +08:00
|
|
|
* any case record it in update->lock->old_oid for later use when
|
|
|
|
* writing the reflog.
|
2017-11-05 16:42:06 +08:00
|
|
|
* - If it is a symref update without REF_NO_DEREF, split it up into a
|
refs: resolve symbolic refs first
Before committing ref updates, split symbolic ref updates into two
parts: an update to the underlying ref, and a log-only update to the
symbolic ref. This ensures that both references are locked correctly
during the transaction, including while their reflogs are updated.
Similarly, if the reference pointed to by HEAD is modified directly, add
a separate log-only update to HEAD, rather than leaving the job of
updating HEAD's reflog to commit_ref_update(). This change ensures that
HEAD is locked correctly while its reflog is being modified, as well as
being cheaper (HEAD only needs to be resolved once).
This makes use of a new function, lock_raw_ref(), which is analogous to
read_raw_ref(), but acquires a lock on the reference before reading it.
This change still has two problems:
* There are redundant read_ref_full() reference lookups.
* It is still possible to get incorrect reflogs for symbolic references
if there is a concurrent update by another process, since the old_oid
of a symref is determined before the lock on the pointed-to ref is
held.
Both problems will soon be fixed.
Signed-off-by: David Turner <dturner@twopensource.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
Signed-off-by: Michael Haggerty <mhagger@alum.mit.edu>
WIP
2016-04-25 21:56:07 +08:00
|
|
|
* REF_LOG_ONLY update of the symref and add a separate update for
|
|
|
|
* the referent to transaction.
|
|
|
|
* - If it is an update of head_ref, add a corresponding REF_LOG_ONLY
|
|
|
|
* update of HEAD.
|
2016-04-24 14:58:41 +08:00
|
|
|
*/
|
2016-09-05 00:08:33 +08:00
|
|
|
static int lock_ref_for_update(struct files_ref_store *refs,
|
|
|
|
struct ref_update *update,
|
2016-04-24 14:58:41 +08:00
|
|
|
struct ref_transaction *transaction,
|
refs: resolve symbolic refs first
Before committing ref updates, split symbolic ref updates into two
parts: an update to the underlying ref, and a log-only update to the
symbolic ref. This ensures that both references are locked correctly
during the transaction, including while their reflogs are updated.
Similarly, if the reference pointed to by HEAD is modified directly, add
a separate log-only update to HEAD, rather than leaving the job of
updating HEAD's reflog to commit_ref_update(). This change ensures that
HEAD is locked correctly while its reflog is being modified, as well as
being cheaper (HEAD only needs to be resolved once).
This makes use of a new function, lock_raw_ref(), which is analogous to
read_raw_ref(), but acquires a lock on the reference before reading it.
This change still has two problems:
* There are redundant read_ref_full() reference lookups.
* It is still possible to get incorrect reflogs for symbolic references
if there is a concurrent update by another process, since the old_oid
of a symref is determined before the lock on the pointed-to ref is
held.
Both problems will soon be fixed.
Signed-off-by: David Turner <dturner@twopensource.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
Signed-off-by: Michael Haggerty <mhagger@alum.mit.edu>
WIP
2016-04-25 21:56:07 +08:00
|
|
|
const char *head_ref,
|
2016-04-24 14:58:41 +08:00
|
|
|
struct string_list *affected_refnames,
|
|
|
|
struct strbuf *err)
|
|
|
|
{
|
refs: resolve symbolic refs first
Before committing ref updates, split symbolic ref updates into two
parts: an update to the underlying ref, and a log-only update to the
symbolic ref. This ensures that both references are locked correctly
during the transaction, including while their reflogs are updated.
Similarly, if the reference pointed to by HEAD is modified directly, add
a separate log-only update to HEAD, rather than leaving the job of
updating HEAD's reflog to commit_ref_update(). This change ensures that
HEAD is locked correctly while its reflog is being modified, as well as
being cheaper (HEAD only needs to be resolved once).
This makes use of a new function, lock_raw_ref(), which is analogous to
read_raw_ref(), but acquires a lock on the reference before reading it.
This change still has two problems:
* There are redundant read_ref_full() reference lookups.
* It is still possible to get incorrect reflogs for symbolic references
if there is a concurrent update by another process, since the old_oid
of a symref is determined before the lock on the pointed-to ref is
held.
Both problems will soon be fixed.
Signed-off-by: David Turner <dturner@twopensource.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
Signed-off-by: Michael Haggerty <mhagger@alum.mit.edu>
WIP
2016-04-25 21:56:07 +08:00
|
|
|
struct strbuf referent = STRBUF_INIT;
|
|
|
|
int mustexist = (update->flags & REF_HAVE_OLD) &&
|
2017-05-07 06:10:23 +08:00
|
|
|
!is_null_oid(&update->old_oid);
|
2017-09-09 14:57:16 +08:00
|
|
|
int ret = 0;
|
refs: resolve symbolic refs first
Before committing ref updates, split symbolic ref updates into two
parts: an update to the underlying ref, and a log-only update to the
symbolic ref. This ensures that both references are locked correctly
during the transaction, including while their reflogs are updated.
Similarly, if the reference pointed to by HEAD is modified directly, add
a separate log-only update to HEAD, rather than leaving the job of
updating HEAD's reflog to commit_ref_update(). This change ensures that
HEAD is locked correctly while its reflog is being modified, as well as
being cheaper (HEAD only needs to be resolved once).
This makes use of a new function, lock_raw_ref(), which is analogous to
read_raw_ref(), but acquires a lock on the reference before reading it.
This change still has two problems:
* There are redundant read_ref_full() reference lookups.
* It is still possible to get incorrect reflogs for symbolic references
if there is a concurrent update by another process, since the old_oid
of a symref is determined before the lock on the pointed-to ref is
held.
Both problems will soon be fixed.
Signed-off-by: David Turner <dturner@twopensource.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
Signed-off-by: Michael Haggerty <mhagger@alum.mit.edu>
WIP
2016-04-25 21:56:07 +08:00
|
|
|
struct ref_lock *lock;
|
2016-04-24 14:58:41 +08:00
|
|
|
|
2017-02-10 19:16:16 +08:00
|
|
|
files_assert_main_repository(refs, "lock_ref_for_update");
|
2016-09-05 00:08:33 +08:00
|
|
|
|
2017-05-07 06:10:23 +08:00
|
|
|
if ((update->flags & REF_HAVE_NEW) && is_null_oid(&update->new_oid))
|
2016-04-24 14:58:41 +08:00
|
|
|
update->flags |= REF_DELETING;
|
refs: resolve symbolic refs first
Before committing ref updates, split symbolic ref updates into two
parts: an update to the underlying ref, and a log-only update to the
symbolic ref. This ensures that both references are locked correctly
during the transaction, including while their reflogs are updated.
Similarly, if the reference pointed to by HEAD is modified directly, add
a separate log-only update to HEAD, rather than leaving the job of
updating HEAD's reflog to commit_ref_update(). This change ensures that
HEAD is locked correctly while its reflog is being modified, as well as
being cheaper (HEAD only needs to be resolved once).
This makes use of a new function, lock_raw_ref(), which is analogous to
read_raw_ref(), but acquires a lock on the reference before reading it.
This change still has two problems:
* There are redundant read_ref_full() reference lookups.
* It is still possible to get incorrect reflogs for symbolic references
if there is a concurrent update by another process, since the old_oid
of a symref is determined before the lock on the pointed-to ref is
held.
Both problems will soon be fixed.
Signed-off-by: David Turner <dturner@twopensource.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
Signed-off-by: Michael Haggerty <mhagger@alum.mit.edu>
WIP
2016-04-25 21:56:07 +08:00
|
|
|
|
|
|
|
if (head_ref) {
|
|
|
|
ret = split_head_update(update, transaction, head_ref,
|
|
|
|
affected_refnames, err);
|
|
|
|
if (ret)
|
2017-09-09 14:57:16 +08:00
|
|
|
goto out;
|
refs: resolve symbolic refs first
Before committing ref updates, split symbolic ref updates into two
parts: an update to the underlying ref, and a log-only update to the
symbolic ref. This ensures that both references are locked correctly
during the transaction, including while their reflogs are updated.
Similarly, if the reference pointed to by HEAD is modified directly, add
a separate log-only update to HEAD, rather than leaving the job of
updating HEAD's reflog to commit_ref_update(). This change ensures that
HEAD is locked correctly while its reflog is being modified, as well as
being cheaper (HEAD only needs to be resolved once).
This makes use of a new function, lock_raw_ref(), which is analogous to
read_raw_ref(), but acquires a lock on the reference before reading it.
This change still has two problems:
* There are redundant read_ref_full() reference lookups.
* It is still possible to get incorrect reflogs for symbolic references
if there is a concurrent update by another process, since the old_oid
of a symref is determined before the lock on the pointed-to ref is
held.
Both problems will soon be fixed.
Signed-off-by: David Turner <dturner@twopensource.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
Signed-off-by: Michael Haggerty <mhagger@alum.mit.edu>
WIP
2016-04-25 21:56:07 +08:00
|
|
|
}
|
|
|
|
|
2016-09-05 00:08:31 +08:00
|
|
|
ret = lock_raw_ref(refs, update->refname, mustexist,
|
2021-08-23 19:36:06 +08:00
|
|
|
affected_refnames,
|
2016-09-05 00:08:43 +08:00
|
|
|
&lock, &referent,
|
refs: resolve symbolic refs first
Before committing ref updates, split symbolic ref updates into two
parts: an update to the underlying ref, and a log-only update to the
symbolic ref. This ensures that both references are locked correctly
during the transaction, including while their reflogs are updated.
Similarly, if the reference pointed to by HEAD is modified directly, add
a separate log-only update to HEAD, rather than leaving the job of
updating HEAD's reflog to commit_ref_update(). This change ensures that
HEAD is locked correctly while its reflog is being modified, as well as
being cheaper (HEAD only needs to be resolved once).
This makes use of a new function, lock_raw_ref(), which is analogous to
read_raw_ref(), but acquires a lock on the reference before reading it.
This change still has two problems:
* There are redundant read_ref_full() reference lookups.
* It is still possible to get incorrect reflogs for symbolic references
if there is a concurrent update by another process, since the old_oid
of a symref is determined before the lock on the pointed-to ref is
held.
Both problems will soon be fixed.
Signed-off-by: David Turner <dturner@twopensource.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
Signed-off-by: Michael Haggerty <mhagger@alum.mit.edu>
WIP
2016-04-25 21:56:07 +08:00
|
|
|
&update->type, err);
|
|
|
|
if (ret) {
|
2016-04-24 14:58:41 +08:00
|
|
|
char *reason;
|
|
|
|
|
|
|
|
reason = strbuf_detach(err, NULL);
|
|
|
|
strbuf_addf(err, "cannot lock ref '%s': %s",
|
2016-06-07 15:29:23 +08:00
|
|
|
original_update_refname(update), reason);
|
2016-04-24 14:58:41 +08:00
|
|
|
free(reason);
|
2017-09-09 14:57:16 +08:00
|
|
|
goto out;
|
2016-04-24 14:58:41 +08:00
|
|
|
}
|
refs: resolve symbolic refs first
Before committing ref updates, split symbolic ref updates into two
parts: an update to the underlying ref, and a log-only update to the
symbolic ref. This ensures that both references are locked correctly
during the transaction, including while their reflogs are updated.
Similarly, if the reference pointed to by HEAD is modified directly, add
a separate log-only update to HEAD, rather than leaving the job of
updating HEAD's reflog to commit_ref_update(). This change ensures that
HEAD is locked correctly while its reflog is being modified, as well as
being cheaper (HEAD only needs to be resolved once).
This makes use of a new function, lock_raw_ref(), which is analogous to
read_raw_ref(), but acquires a lock on the reference before reading it.
This change still has two problems:
* There are redundant read_ref_full() reference lookups.
* It is still possible to get incorrect reflogs for symbolic references
if there is a concurrent update by another process, since the old_oid
of a symref is determined before the lock on the pointed-to ref is
held.
Both problems will soon be fixed.
Signed-off-by: David Turner <dturner@twopensource.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
Signed-off-by: Michael Haggerty <mhagger@alum.mit.edu>
WIP
2016-04-25 21:56:07 +08:00
|
|
|
|
2016-09-05 00:08:43 +08:00
|
|
|
update->backend_data = lock;
|
refs: resolve symbolic refs first
Before committing ref updates, split symbolic ref updates into two
parts: an update to the underlying ref, and a log-only update to the
symbolic ref. This ensures that both references are locked correctly
during the transaction, including while their reflogs are updated.
Similarly, if the reference pointed to by HEAD is modified directly, add
a separate log-only update to HEAD, rather than leaving the job of
updating HEAD's reflog to commit_ref_update(). This change ensures that
HEAD is locked correctly while its reflog is being modified, as well as
being cheaper (HEAD only needs to be resolved once).
This makes use of a new function, lock_raw_ref(), which is analogous to
read_raw_ref(), but acquires a lock on the reference before reading it.
This change still has two problems:
* There are redundant read_ref_full() reference lookups.
* It is still possible to get incorrect reflogs for symbolic references
if there is a concurrent update by another process, since the old_oid
of a symref is determined before the lock on the pointed-to ref is
held.
Both problems will soon be fixed.
Signed-off-by: David Turner <dturner@twopensource.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
Signed-off-by: Michael Haggerty <mhagger@alum.mit.edu>
WIP
2016-04-25 21:56:07 +08:00
|
|
|
|
2016-04-25 23:38:35 +08:00
|
|
|
if (update->type & REF_ISSYMREF) {
|
2017-11-05 16:42:06 +08:00
|
|
|
if (update->flags & REF_NO_DEREF) {
|
2016-04-25 23:48:32 +08:00
|
|
|
/*
|
|
|
|
* We won't be reading the referent as part of
|
|
|
|
* the transaction, so we have to read it here
|
2017-11-05 16:42:09 +08:00
|
|
|
* to record and possibly check old_oid:
|
2016-04-25 23:48:32 +08:00
|
|
|
*/
|
2021-10-16 17:39:27 +08:00
|
|
|
if (!refs_resolve_ref_unsafe(&refs->base,
|
2021-10-16 17:39:14 +08:00
|
|
|
referent.buf, 0,
|
2022-01-26 22:37:01 +08:00
|
|
|
&lock->old_oid, NULL)) {
|
2016-04-25 23:48:32 +08:00
|
|
|
if (update->flags & REF_HAVE_OLD) {
|
|
|
|
strbuf_addf(err, "cannot lock ref '%s': "
|
2016-06-07 15:29:23 +08:00
|
|
|
"error reading reference",
|
|
|
|
original_update_refname(update));
|
2017-09-09 14:57:17 +08:00
|
|
|
ret = TRANSACTION_GENERIC_ERROR;
|
2017-09-09 14:57:16 +08:00
|
|
|
goto out;
|
2016-04-25 23:48:32 +08:00
|
|
|
}
|
2016-06-07 15:29:23 +08:00
|
|
|
} else if (check_old_oid(update, &lock->old_oid, err)) {
|
2017-09-09 14:57:16 +08:00
|
|
|
ret = TRANSACTION_GENERIC_ERROR;
|
|
|
|
goto out;
|
2016-04-25 23:38:35 +08:00
|
|
|
}
|
2016-04-25 23:48:32 +08:00
|
|
|
} else {
|
|
|
|
/*
|
|
|
|
* Create a new update for the reference this
|
|
|
|
* symref is pointing at. Also, we will record
|
2017-11-05 16:42:09 +08:00
|
|
|
* and verify old_oid for this update as part
|
2016-04-25 23:48:32 +08:00
|
|
|
* of processing the split-off update, so we
|
|
|
|
* don't have to do it here.
|
|
|
|
*/
|
2019-02-14 13:50:46 +08:00
|
|
|
ret = split_symref_update(update,
|
2016-09-05 00:08:35 +08:00
|
|
|
referent.buf, transaction,
|
refs: resolve symbolic refs first
Before committing ref updates, split symbolic ref updates into two
parts: an update to the underlying ref, and a log-only update to the
symbolic ref. This ensures that both references are locked correctly
during the transaction, including while their reflogs are updated.
Similarly, if the reference pointed to by HEAD is modified directly, add
a separate log-only update to HEAD, rather than leaving the job of
updating HEAD's reflog to commit_ref_update(). This change ensures that
HEAD is locked correctly while its reflog is being modified, as well as
being cheaper (HEAD only needs to be resolved once).
This makes use of a new function, lock_raw_ref(), which is analogous to
read_raw_ref(), but acquires a lock on the reference before reading it.
This change still has two problems:
* There are redundant read_ref_full() reference lookups.
* It is still possible to get incorrect reflogs for symbolic references
if there is a concurrent update by another process, since the old_oid
of a symref is determined before the lock on the pointed-to ref is
held.
Both problems will soon be fixed.
Signed-off-by: David Turner <dturner@twopensource.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
Signed-off-by: Michael Haggerty <mhagger@alum.mit.edu>
WIP
2016-04-25 21:56:07 +08:00
|
|
|
affected_refnames, err);
|
|
|
|
if (ret)
|
2017-09-09 14:57:16 +08:00
|
|
|
goto out;
|
refs: resolve symbolic refs first
Before committing ref updates, split symbolic ref updates into two
parts: an update to the underlying ref, and a log-only update to the
symbolic ref. This ensures that both references are locked correctly
during the transaction, including while their reflogs are updated.
Similarly, if the reference pointed to by HEAD is modified directly, add
a separate log-only update to HEAD, rather than leaving the job of
updating HEAD's reflog to commit_ref_update(). This change ensures that
HEAD is locked correctly while its reflog is being modified, as well as
being cheaper (HEAD only needs to be resolved once).
This makes use of a new function, lock_raw_ref(), which is analogous to
read_raw_ref(), but acquires a lock on the reference before reading it.
This change still has two problems:
* There are redundant read_ref_full() reference lookups.
* It is still possible to get incorrect reflogs for symbolic references
if there is a concurrent update by another process, since the old_oid
of a symref is determined before the lock on the pointed-to ref is
held.
Both problems will soon be fixed.
Signed-off-by: David Turner <dturner@twopensource.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
Signed-off-by: Michael Haggerty <mhagger@alum.mit.edu>
WIP
2016-04-25 21:56:07 +08:00
|
|
|
}
|
2016-04-25 23:48:32 +08:00
|
|
|
} else {
|
|
|
|
struct ref_update *parent_update;
|
2016-04-25 23:38:35 +08:00
|
|
|
|
2017-09-09 14:57:16 +08:00
|
|
|
if (check_old_oid(update, &lock->old_oid, err)) {
|
|
|
|
ret = TRANSACTION_GENERIC_ERROR;
|
|
|
|
goto out;
|
|
|
|
}
|
2016-06-07 15:29:23 +08:00
|
|
|
|
2016-04-25 23:48:32 +08:00
|
|
|
/*
|
|
|
|
* If this update is happening indirectly because of a
|
2017-11-05 16:42:09 +08:00
|
|
|
* symref update, record the old OID in the parent
|
2016-04-25 23:48:32 +08:00
|
|
|
* update:
|
|
|
|
*/
|
|
|
|
for (parent_update = update->parent_update;
|
|
|
|
parent_update;
|
|
|
|
parent_update = parent_update->parent_update) {
|
2016-09-05 00:08:43 +08:00
|
|
|
struct ref_lock *parent_lock = parent_update->backend_data;
|
|
|
|
oidcpy(&parent_lock->old_oid, &lock->old_oid);
|
2016-04-25 23:48:32 +08:00
|
|
|
}
|
refs: resolve symbolic refs first
Before committing ref updates, split symbolic ref updates into two
parts: an update to the underlying ref, and a log-only update to the
symbolic ref. This ensures that both references are locked correctly
during the transaction, including while their reflogs are updated.
Similarly, if the reference pointed to by HEAD is modified directly, add
a separate log-only update to HEAD, rather than leaving the job of
updating HEAD's reflog to commit_ref_update(). This change ensures that
HEAD is locked correctly while its reflog is being modified, as well as
being cheaper (HEAD only needs to be resolved once).
This makes use of a new function, lock_raw_ref(), which is analogous to
read_raw_ref(), but acquires a lock on the reference before reading it.
This change still has two problems:
* There are redundant read_ref_full() reference lookups.
* It is still possible to get incorrect reflogs for symbolic references
if there is a concurrent update by another process, since the old_oid
of a symref is determined before the lock on the pointed-to ref is
held.
Both problems will soon be fixed.
Signed-off-by: David Turner <dturner@twopensource.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
Signed-off-by: Michael Haggerty <mhagger@alum.mit.edu>
WIP
2016-04-25 21:56:07 +08:00
|
|
|
}
|
|
|
|
|
2016-04-24 14:58:41 +08:00
|
|
|
if ((update->flags & REF_HAVE_NEW) &&
|
|
|
|
!(update->flags & REF_DELETING) &&
|
|
|
|
!(update->flags & REF_LOG_ONLY)) {
|
refs: resolve symbolic refs first
Before committing ref updates, split symbolic ref updates into two
parts: an update to the underlying ref, and a log-only update to the
symbolic ref. This ensures that both references are locked correctly
during the transaction, including while their reflogs are updated.
Similarly, if the reference pointed to by HEAD is modified directly, add
a separate log-only update to HEAD, rather than leaving the job of
updating HEAD's reflog to commit_ref_update(). This change ensures that
HEAD is locked correctly while its reflog is being modified, as well as
being cheaper (HEAD only needs to be resolved once).
This makes use of a new function, lock_raw_ref(), which is analogous to
read_raw_ref(), but acquires a lock on the reference before reading it.
This change still has two problems:
* There are redundant read_ref_full() reference lookups.
* It is still possible to get incorrect reflogs for symbolic references
if there is a concurrent update by another process, since the old_oid
of a symref is determined before the lock on the pointed-to ref is
held.
Both problems will soon be fixed.
Signed-off-by: David Turner <dturner@twopensource.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
Signed-off-by: Michael Haggerty <mhagger@alum.mit.edu>
WIP
2016-04-25 21:56:07 +08:00
|
|
|
if (!(update->type & REF_ISSYMREF) &&
|
convert "oidcmp() == 0" to oideq()
Using the more restrictive oideq() should, in the long run,
give the compiler more opportunities to optimize these
callsites. For now, this conversion should be a complete
noop with respect to the generated code.
The result is also perhaps a little more readable, as it
avoids the "zero is equal" idiom. Since it's so prevalent in
C, I think seasoned programmers tend not to even notice it
anymore, but it can sometimes make for awkward double
negations (e.g., we can drop a few !!oidcmp() instances
here).
This patch was generated almost entirely by the included
coccinelle patch. This mechanical conversion should be
completely safe, because we check explicitly for cases where
oidcmp() is compared to 0, which is what oideq() is doing
under the hood. Note that we don't have to catch "!oidcmp()"
separately; coccinelle's standard isomorphisms make sure the
two are treated equivalently.
I say "almost" because I did hand-edit the coccinelle output
to fix up a few style violations (it mostly keeps the
original formatting, but sometimes unwraps long lines).
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2018-08-29 05:22:40 +08:00
|
|
|
oideq(&lock->old_oid, &update->new_oid)) {
|
2016-04-24 14:58:41 +08:00
|
|
|
/*
|
|
|
|
* The reference already has the desired
|
|
|
|
* value, so we don't need to write it.
|
|
|
|
*/
|
2021-12-07 21:38:17 +08:00
|
|
|
} else if (write_ref_to_lockfile(
|
|
|
|
lock, &update->new_oid,
|
|
|
|
update->flags & REF_SKIP_OID_VERIFICATION,
|
|
|
|
err)) {
|
2016-04-24 14:58:41 +08:00
|
|
|
char *write_err = strbuf_detach(err, NULL);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* The lock was freed upon failure of
|
|
|
|
* write_ref_to_lockfile():
|
|
|
|
*/
|
2016-09-05 00:08:43 +08:00
|
|
|
update->backend_data = NULL;
|
2016-04-24 14:58:41 +08:00
|
|
|
strbuf_addf(err,
|
2016-06-07 15:29:23 +08:00
|
|
|
"cannot update ref '%s': %s",
|
2016-04-24 14:58:41 +08:00
|
|
|
update->refname, write_err);
|
|
|
|
free(write_err);
|
2017-09-09 14:57:16 +08:00
|
|
|
ret = TRANSACTION_GENERIC_ERROR;
|
|
|
|
goto out;
|
2016-04-24 14:58:41 +08:00
|
|
|
} else {
|
|
|
|
update->flags |= REF_NEEDS_COMMIT;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (!(update->flags & REF_NEEDS_COMMIT)) {
|
|
|
|
/*
|
|
|
|
* We didn't call write_ref_to_lockfile(), so
|
|
|
|
* the lockfile is still open. Close it to
|
|
|
|
* free up the file descriptor:
|
|
|
|
*/
|
2017-09-05 20:14:33 +08:00
|
|
|
if (close_ref_gently(lock)) {
|
2016-04-24 14:58:41 +08:00
|
|
|
strbuf_addf(err, "couldn't close '%s.lock'",
|
|
|
|
update->refname);
|
2017-09-09 14:57:16 +08:00
|
|
|
ret = TRANSACTION_GENERIC_ERROR;
|
|
|
|
goto out;
|
2016-04-24 14:58:41 +08:00
|
|
|
}
|
|
|
|
}
|
2017-09-09 14:57:16 +08:00
|
|
|
|
|
|
|
out:
|
|
|
|
strbuf_release(&referent);
|
|
|
|
return ret;
|
2016-04-24 14:58:41 +08:00
|
|
|
}
|
|
|
|
|
files_ref_store: use a transaction to update packed refs
When processing a `files_ref_store` transaction, it is sometimes
necessary to delete some references from the "packed-refs" file. Do
that using a reference transaction conducted against the
`packed_ref_store`.
This change further decouples `files_ref_store` from
`packed_ref_store`. It also fixes multiple problems, including the two
revealed by test cases added in the previous commit.
First, the old code didn't obtain the `packed-refs` lock until
`files_transaction_finish()`. This means that a failure to acquire the
`packed-refs` lock (e.g., due to contention with another process)
wasn't detected until it was too late (problems like this are supposed
to be detected in the "prepare" phase). The new code acquires the
`packed-refs` lock in `files_transaction_prepare()`, the same stage of
the processing when the loose reference locks are being acquired,
removing another reason why the "prepare" phase might succeed and the
"finish" phase might nevertheless fail.
Second, the old code deleted the loose version of a reference before
deleting any packed version of the same reference. This left a moment
when another process might think that the packed version of the
reference is current, which is incorrect. (Even worse, the packed
version of the reference can be arbitrarily old, and might even point
at an object that has since been garbage-collected.)
Third, if a reference deletion fails to acquire the `packed-refs` lock
altogether, then the old code might leave the repository in the
incorrect state (possibly corrupt) described in the previous
paragraph.
Now we activate the new "packed-refs" file (sans any references that
are being deleted) *before* deleting the corresponding loose
references. But we hold the "packed-refs" lock until after the loose
references have been finalized, thus preventing a simultaneous
"pack-refs" process from packing the loose version of the reference in
the time gap, which would otherwise defeat our attempt to delete it.
Signed-off-by: Michael Haggerty <mhagger@alum.mit.edu>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-09-08 21:51:51 +08:00
|
|
|
struct files_transaction_backend_data {
|
|
|
|
struct ref_transaction *packed_transaction;
|
|
|
|
int packed_refs_locked;
|
|
|
|
};
|
|
|
|
|
2017-05-22 22:17:42 +08:00
|
|
|
/*
|
|
|
|
* Unlock any references in `transaction` that are still locked, and
|
|
|
|
* mark the transaction closed.
|
|
|
|
*/
|
files_ref_store: use a transaction to update packed refs
When processing a `files_ref_store` transaction, it is sometimes
necessary to delete some references from the "packed-refs" file. Do
that using a reference transaction conducted against the
`packed_ref_store`.
This change further decouples `files_ref_store` from
`packed_ref_store`. It also fixes multiple problems, including the two
revealed by test cases added in the previous commit.
First, the old code didn't obtain the `packed-refs` lock until
`files_transaction_finish()`. This means that a failure to acquire the
`packed-refs` lock (e.g., due to contention with another process)
wasn't detected until it was too late (problems like this are supposed
to be detected in the "prepare" phase). The new code acquires the
`packed-refs` lock in `files_transaction_prepare()`, the same stage of
the processing when the loose reference locks are being acquired,
removing another reason why the "prepare" phase might succeed and the
"finish" phase might nevertheless fail.
Second, the old code deleted the loose version of a reference before
deleting any packed version of the same reference. This left a moment
when another process might think that the packed version of the
reference is current, which is incorrect. (Even worse, the packed
version of the reference can be arbitrarily old, and might even point
at an object that has since been garbage-collected.)
Third, if a reference deletion fails to acquire the `packed-refs` lock
altogether, then the old code might leave the repository in the
incorrect state (possibly corrupt) described in the previous
paragraph.
Now we activate the new "packed-refs" file (sans any references that
are being deleted) *before* deleting the corresponding loose
references. But we hold the "packed-refs" lock until after the loose
references have been finalized, thus preventing a simultaneous
"pack-refs" process from packing the loose version of the reference in
the time gap, which would otherwise defeat our attempt to delete it.
Signed-off-by: Michael Haggerty <mhagger@alum.mit.edu>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-09-08 21:51:51 +08:00
|
|
|
static void files_transaction_cleanup(struct files_ref_store *refs,
|
|
|
|
struct ref_transaction *transaction)
|
2017-05-22 22:17:42 +08:00
|
|
|
{
|
|
|
|
size_t i;
|
files_ref_store: use a transaction to update packed refs
When processing a `files_ref_store` transaction, it is sometimes
necessary to delete some references from the "packed-refs" file. Do
that using a reference transaction conducted against the
`packed_ref_store`.
This change further decouples `files_ref_store` from
`packed_ref_store`. It also fixes multiple problems, including the two
revealed by test cases added in the previous commit.
First, the old code didn't obtain the `packed-refs` lock until
`files_transaction_finish()`. This means that a failure to acquire the
`packed-refs` lock (e.g., due to contention with another process)
wasn't detected until it was too late (problems like this are supposed
to be detected in the "prepare" phase). The new code acquires the
`packed-refs` lock in `files_transaction_prepare()`, the same stage of
the processing when the loose reference locks are being acquired,
removing another reason why the "prepare" phase might succeed and the
"finish" phase might nevertheless fail.
Second, the old code deleted the loose version of a reference before
deleting any packed version of the same reference. This left a moment
when another process might think that the packed version of the
reference is current, which is incorrect. (Even worse, the packed
version of the reference can be arbitrarily old, and might even point
at an object that has since been garbage-collected.)
Third, if a reference deletion fails to acquire the `packed-refs` lock
altogether, then the old code might leave the repository in the
incorrect state (possibly corrupt) described in the previous
paragraph.
Now we activate the new "packed-refs" file (sans any references that
are being deleted) *before* deleting the corresponding loose
references. But we hold the "packed-refs" lock until after the loose
references have been finalized, thus preventing a simultaneous
"pack-refs" process from packing the loose version of the reference in
the time gap, which would otherwise defeat our attempt to delete it.
Signed-off-by: Michael Haggerty <mhagger@alum.mit.edu>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-09-08 21:51:51 +08:00
|
|
|
struct files_transaction_backend_data *backend_data =
|
|
|
|
transaction->backend_data;
|
|
|
|
struct strbuf err = STRBUF_INIT;
|
2017-05-22 22:17:42 +08:00
|
|
|
|
|
|
|
for (i = 0; i < transaction->nr; i++) {
|
|
|
|
struct ref_update *update = transaction->updates[i];
|
|
|
|
struct ref_lock *lock = update->backend_data;
|
|
|
|
|
|
|
|
if (lock) {
|
|
|
|
unlock_ref(lock);
|
|
|
|
update->backend_data = NULL;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-03-30 21:46:13 +08:00
|
|
|
if (backend_data) {
|
|
|
|
if (backend_data->packed_transaction &&
|
|
|
|
ref_transaction_abort(backend_data->packed_transaction, &err)) {
|
|
|
|
error("error aborting transaction: %s", err.buf);
|
|
|
|
strbuf_release(&err);
|
|
|
|
}
|
files_ref_store: use a transaction to update packed refs
When processing a `files_ref_store` transaction, it is sometimes
necessary to delete some references from the "packed-refs" file. Do
that using a reference transaction conducted against the
`packed_ref_store`.
This change further decouples `files_ref_store` from
`packed_ref_store`. It also fixes multiple problems, including the two
revealed by test cases added in the previous commit.
First, the old code didn't obtain the `packed-refs` lock until
`files_transaction_finish()`. This means that a failure to acquire the
`packed-refs` lock (e.g., due to contention with another process)
wasn't detected until it was too late (problems like this are supposed
to be detected in the "prepare" phase). The new code acquires the
`packed-refs` lock in `files_transaction_prepare()`, the same stage of
the processing when the loose reference locks are being acquired,
removing another reason why the "prepare" phase might succeed and the
"finish" phase might nevertheless fail.
Second, the old code deleted the loose version of a reference before
deleting any packed version of the same reference. This left a moment
when another process might think that the packed version of the
reference is current, which is incorrect. (Even worse, the packed
version of the reference can be arbitrarily old, and might even point
at an object that has since been garbage-collected.)
Third, if a reference deletion fails to acquire the `packed-refs` lock
altogether, then the old code might leave the repository in the
incorrect state (possibly corrupt) described in the previous
paragraph.
Now we activate the new "packed-refs" file (sans any references that
are being deleted) *before* deleting the corresponding loose
references. But we hold the "packed-refs" lock until after the loose
references have been finalized, thus preventing a simultaneous
"pack-refs" process from packing the loose version of the reference in
the time gap, which would otherwise defeat our attempt to delete it.
Signed-off-by: Michael Haggerty <mhagger@alum.mit.edu>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-09-08 21:51:51 +08:00
|
|
|
|
2020-03-30 21:46:13 +08:00
|
|
|
if (backend_data->packed_refs_locked)
|
|
|
|
packed_refs_unlock(refs->packed_ref_store);
|
files_ref_store: use a transaction to update packed refs
When processing a `files_ref_store` transaction, it is sometimes
necessary to delete some references from the "packed-refs" file. Do
that using a reference transaction conducted against the
`packed_ref_store`.
This change further decouples `files_ref_store` from
`packed_ref_store`. It also fixes multiple problems, including the two
revealed by test cases added in the previous commit.
First, the old code didn't obtain the `packed-refs` lock until
`files_transaction_finish()`. This means that a failure to acquire the
`packed-refs` lock (e.g., due to contention with another process)
wasn't detected until it was too late (problems like this are supposed
to be detected in the "prepare" phase). The new code acquires the
`packed-refs` lock in `files_transaction_prepare()`, the same stage of
the processing when the loose reference locks are being acquired,
removing another reason why the "prepare" phase might succeed and the
"finish" phase might nevertheless fail.
Second, the old code deleted the loose version of a reference before
deleting any packed version of the same reference. This left a moment
when another process might think that the packed version of the
reference is current, which is incorrect. (Even worse, the packed
version of the reference can be arbitrarily old, and might even point
at an object that has since been garbage-collected.)
Third, if a reference deletion fails to acquire the `packed-refs` lock
altogether, then the old code might leave the repository in the
incorrect state (possibly corrupt) described in the previous
paragraph.
Now we activate the new "packed-refs" file (sans any references that
are being deleted) *before* deleting the corresponding loose
references. But we hold the "packed-refs" lock until after the loose
references have been finalized, thus preventing a simultaneous
"pack-refs" process from packing the loose version of the reference in
the time gap, which would otherwise defeat our attempt to delete it.
Signed-off-by: Michael Haggerty <mhagger@alum.mit.edu>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-09-08 21:51:51 +08:00
|
|
|
|
2020-03-30 21:46:13 +08:00
|
|
|
free(backend_data);
|
|
|
|
}
|
files_ref_store: use a transaction to update packed refs
When processing a `files_ref_store` transaction, it is sometimes
necessary to delete some references from the "packed-refs" file. Do
that using a reference transaction conducted against the
`packed_ref_store`.
This change further decouples `files_ref_store` from
`packed_ref_store`. It also fixes multiple problems, including the two
revealed by test cases added in the previous commit.
First, the old code didn't obtain the `packed-refs` lock until
`files_transaction_finish()`. This means that a failure to acquire the
`packed-refs` lock (e.g., due to contention with another process)
wasn't detected until it was too late (problems like this are supposed
to be detected in the "prepare" phase). The new code acquires the
`packed-refs` lock in `files_transaction_prepare()`, the same stage of
the processing when the loose reference locks are being acquired,
removing another reason why the "prepare" phase might succeed and the
"finish" phase might nevertheless fail.
Second, the old code deleted the loose version of a reference before
deleting any packed version of the same reference. This left a moment
when another process might think that the packed version of the
reference is current, which is incorrect. (Even worse, the packed
version of the reference can be arbitrarily old, and might even point
at an object that has since been garbage-collected.)
Third, if a reference deletion fails to acquire the `packed-refs` lock
altogether, then the old code might leave the repository in the
incorrect state (possibly corrupt) described in the previous
paragraph.
Now we activate the new "packed-refs" file (sans any references that
are being deleted) *before* deleting the corresponding loose
references. But we hold the "packed-refs" lock until after the loose
references have been finalized, thus preventing a simultaneous
"pack-refs" process from packing the loose version of the reference in
the time gap, which would otherwise defeat our attempt to delete it.
Signed-off-by: Michael Haggerty <mhagger@alum.mit.edu>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-09-08 21:51:51 +08:00
|
|
|
|
2017-05-22 22:17:42 +08:00
|
|
|
transaction->state = REF_TRANSACTION_CLOSED;
|
|
|
|
}
|
|
|
|
|
ref_transaction_prepare(): new optional step for reference updates
In the future, compound reference stores will sometimes need to modify
references in two different reference stores at the same time, meaning
that a single logical reference transaction might have to be
implemented as two internal sub-transactions. They won't want to call
`ref_transaction_commit()` for the two sub-transactions one after the
other, because that wouldn't be atomic (the first commit could succeed
and the second one fail). Instead, they will want to prepare both
sub-transactions (i.e., obtain any necessary locks and do any
pre-checks), and only if both prepare steps succeed, then commit both
sub-transactions.
Start preparing for that day by adding a new, optional
`ref_transaction_prepare()` step to the reference transaction
sequence, which obtains the locks and does any prechecks, reporting
any errors that occur. Also add a `ref_transaction_abort()` function
that can be used to abort a sub-transaction even if it has already
been prepared.
That is on the side of the public-facing API. On the side of the
`ref_store` VTABLE, get rid of `transaction_commit` and instead add
methods `transaction_prepare`, `transaction_finish`, and
`transaction_abort`. A `ref_transaction_commit()` now basically calls
methods `transaction_prepare` then `transaction_finish`.
Signed-off-by: Michael Haggerty <mhagger@alum.mit.edu>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-05-22 22:17:44 +08:00
|
|
|
static int files_transaction_prepare(struct ref_store *ref_store,
|
|
|
|
struct ref_transaction *transaction,
|
|
|
|
struct strbuf *err)
|
2015-11-09 21:34:01 +08:00
|
|
|
{
|
2016-09-05 00:08:11 +08:00
|
|
|
struct files_ref_store *refs =
|
2017-03-26 10:42:32 +08:00
|
|
|
files_downcast(ref_store, REF_STORE_WRITE,
|
ref_transaction_prepare(): new optional step for reference updates
In the future, compound reference stores will sometimes need to modify
references in two different reference stores at the same time, meaning
that a single logical reference transaction might have to be
implemented as two internal sub-transactions. They won't want to call
`ref_transaction_commit()` for the two sub-transactions one after the
other, because that wouldn't be atomic (the first commit could succeed
and the second one fail). Instead, they will want to prepare both
sub-transactions (i.e., obtain any necessary locks and do any
pre-checks), and only if both prepare steps succeed, then commit both
sub-transactions.
Start preparing for that day by adding a new, optional
`ref_transaction_prepare()` step to the reference transaction
sequence, which obtains the locks and does any prechecks, reporting
any errors that occur. Also add a `ref_transaction_abort()` function
that can be used to abort a sub-transaction even if it has already
been prepared.
That is on the side of the public-facing API. On the side of the
`ref_store` VTABLE, get rid of `transaction_commit` and instead add
methods `transaction_prepare`, `transaction_finish`, and
`transaction_abort`. A `ref_transaction_commit()` now basically calls
methods `transaction_prepare` then `transaction_finish`.
Signed-off-by: Michael Haggerty <mhagger@alum.mit.edu>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-05-22 22:17:44 +08:00
|
|
|
"ref_transaction_prepare");
|
2017-05-22 22:17:37 +08:00
|
|
|
size_t i;
|
|
|
|
int ret = 0;
|
2015-11-09 21:34:01 +08:00
|
|
|
struct string_list affected_refnames = STRING_LIST_INIT_NODUP;
|
refs: resolve symbolic refs first
Before committing ref updates, split symbolic ref updates into two
parts: an update to the underlying ref, and a log-only update to the
symbolic ref. This ensures that both references are locked correctly
during the transaction, including while their reflogs are updated.
Similarly, if the reference pointed to by HEAD is modified directly, add
a separate log-only update to HEAD, rather than leaving the job of
updating HEAD's reflog to commit_ref_update(). This change ensures that
HEAD is locked correctly while its reflog is being modified, as well as
being cheaper (HEAD only needs to be resolved once).
This makes use of a new function, lock_raw_ref(), which is analogous to
read_raw_ref(), but acquires a lock on the reference before reading it.
This change still has two problems:
* There are redundant read_ref_full() reference lookups.
* It is still possible to get incorrect reflogs for symbolic references
if there is a concurrent update by another process, since the old_oid
of a symref is determined before the lock on the pointed-to ref is
held.
Both problems will soon be fixed.
Signed-off-by: David Turner <dturner@twopensource.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
Signed-off-by: Michael Haggerty <mhagger@alum.mit.edu>
WIP
2016-04-25 21:56:07 +08:00
|
|
|
char *head_ref = NULL;
|
|
|
|
int head_type;
|
files_ref_store: use a transaction to update packed refs
When processing a `files_ref_store` transaction, it is sometimes
necessary to delete some references from the "packed-refs" file. Do
that using a reference transaction conducted against the
`packed_ref_store`.
This change further decouples `files_ref_store` from
`packed_ref_store`. It also fixes multiple problems, including the two
revealed by test cases added in the previous commit.
First, the old code didn't obtain the `packed-refs` lock until
`files_transaction_finish()`. This means that a failure to acquire the
`packed-refs` lock (e.g., due to contention with another process)
wasn't detected until it was too late (problems like this are supposed
to be detected in the "prepare" phase). The new code acquires the
`packed-refs` lock in `files_transaction_prepare()`, the same stage of
the processing when the loose reference locks are being acquired,
removing another reason why the "prepare" phase might succeed and the
"finish" phase might nevertheless fail.
Second, the old code deleted the loose version of a reference before
deleting any packed version of the same reference. This left a moment
when another process might think that the packed version of the
reference is current, which is incorrect. (Even worse, the packed
version of the reference can be arbitrarily old, and might even point
at an object that has since been garbage-collected.)
Third, if a reference deletion fails to acquire the `packed-refs` lock
altogether, then the old code might leave the repository in the
incorrect state (possibly corrupt) described in the previous
paragraph.
Now we activate the new "packed-refs" file (sans any references that
are being deleted) *before* deleting the corresponding loose
references. But we hold the "packed-refs" lock until after the loose
references have been finalized, thus preventing a simultaneous
"pack-refs" process from packing the loose version of the reference in
the time gap, which would otherwise defeat our attempt to delete it.
Signed-off-by: Michael Haggerty <mhagger@alum.mit.edu>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-09-08 21:51:51 +08:00
|
|
|
struct files_transaction_backend_data *backend_data;
|
|
|
|
struct ref_transaction *packed_transaction = NULL;
|
2015-11-09 21:34:01 +08:00
|
|
|
|
|
|
|
assert(err);
|
|
|
|
|
2017-05-22 22:17:42 +08:00
|
|
|
if (!transaction->nr)
|
|
|
|
goto cleanup;
|
2015-11-09 21:34:01 +08:00
|
|
|
|
2021-03-14 00:17:22 +08:00
|
|
|
CALLOC_ARRAY(backend_data, 1);
|
files_ref_store: use a transaction to update packed refs
When processing a `files_ref_store` transaction, it is sometimes
necessary to delete some references from the "packed-refs" file. Do
that using a reference transaction conducted against the
`packed_ref_store`.
This change further decouples `files_ref_store` from
`packed_ref_store`. It also fixes multiple problems, including the two
revealed by test cases added in the previous commit.
First, the old code didn't obtain the `packed-refs` lock until
`files_transaction_finish()`. This means that a failure to acquire the
`packed-refs` lock (e.g., due to contention with another process)
wasn't detected until it was too late (problems like this are supposed
to be detected in the "prepare" phase). The new code acquires the
`packed-refs` lock in `files_transaction_prepare()`, the same stage of
the processing when the loose reference locks are being acquired,
removing another reason why the "prepare" phase might succeed and the
"finish" phase might nevertheless fail.
Second, the old code deleted the loose version of a reference before
deleting any packed version of the same reference. This left a moment
when another process might think that the packed version of the
reference is current, which is incorrect. (Even worse, the packed
version of the reference can be arbitrarily old, and might even point
at an object that has since been garbage-collected.)
Third, if a reference deletion fails to acquire the `packed-refs` lock
altogether, then the old code might leave the repository in the
incorrect state (possibly corrupt) described in the previous
paragraph.
Now we activate the new "packed-refs" file (sans any references that
are being deleted) *before* deleting the corresponding loose
references. But we hold the "packed-refs" lock until after the loose
references have been finalized, thus preventing a simultaneous
"pack-refs" process from packing the loose version of the reference in
the time gap, which would otherwise defeat our attempt to delete it.
Signed-off-by: Michael Haggerty <mhagger@alum.mit.edu>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-09-08 21:51:51 +08:00
|
|
|
transaction->backend_data = backend_data;
|
|
|
|
|
refs: resolve symbolic refs first
Before committing ref updates, split symbolic ref updates into two
parts: an update to the underlying ref, and a log-only update to the
symbolic ref. This ensures that both references are locked correctly
during the transaction, including while their reflogs are updated.
Similarly, if the reference pointed to by HEAD is modified directly, add
a separate log-only update to HEAD, rather than leaving the job of
updating HEAD's reflog to commit_ref_update(). This change ensures that
HEAD is locked correctly while its reflog is being modified, as well as
being cheaper (HEAD only needs to be resolved once).
This makes use of a new function, lock_raw_ref(), which is analogous to
read_raw_ref(), but acquires a lock on the reference before reading it.
This change still has two problems:
* There are redundant read_ref_full() reference lookups.
* It is still possible to get incorrect reflogs for symbolic references
if there is a concurrent update by another process, since the old_oid
of a symref is determined before the lock on the pointed-to ref is
held.
Both problems will soon be fixed.
Signed-off-by: David Turner <dturner@twopensource.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
Signed-off-by: Michael Haggerty <mhagger@alum.mit.edu>
WIP
2016-04-25 21:56:07 +08:00
|
|
|
/*
|
|
|
|
* Fail if a refname appears more than once in the
|
|
|
|
* transaction. (If we end up splitting up any updates using
|
|
|
|
* split_symref_update() or split_head_update(), those
|
|
|
|
* functions will check that the new updates don't have the
|
2017-11-05 16:42:04 +08:00
|
|
|
* same refname as any existing ones.) Also fail if any of the
|
2017-11-05 16:42:07 +08:00
|
|
|
* updates use REF_IS_PRUNING without REF_NO_DEREF.
|
refs: resolve symbolic refs first
Before committing ref updates, split symbolic ref updates into two
parts: an update to the underlying ref, and a log-only update to the
symbolic ref. This ensures that both references are locked correctly
during the transaction, including while their reflogs are updated.
Similarly, if the reference pointed to by HEAD is modified directly, add
a separate log-only update to HEAD, rather than leaving the job of
updating HEAD's reflog to commit_ref_update(). This change ensures that
HEAD is locked correctly while its reflog is being modified, as well as
being cheaper (HEAD only needs to be resolved once).
This makes use of a new function, lock_raw_ref(), which is analogous to
read_raw_ref(), but acquires a lock on the reference before reading it.
This change still has two problems:
* There are redundant read_ref_full() reference lookups.
* It is still possible to get incorrect reflogs for symbolic references
if there is a concurrent update by another process, since the old_oid
of a symref is determined before the lock on the pointed-to ref is
held.
Both problems will soon be fixed.
Signed-off-by: David Turner <dturner@twopensource.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
Signed-off-by: Michael Haggerty <mhagger@alum.mit.edu>
WIP
2016-04-25 21:56:07 +08:00
|
|
|
*/
|
|
|
|
for (i = 0; i < transaction->nr; i++) {
|
|
|
|
struct ref_update *update = transaction->updates[i];
|
|
|
|
struct string_list_item *item =
|
|
|
|
string_list_append(&affected_refnames, update->refname);
|
|
|
|
|
2017-11-05 16:42:07 +08:00
|
|
|
if ((update->flags & REF_IS_PRUNING) &&
|
2017-11-05 16:42:06 +08:00
|
|
|
!(update->flags & REF_NO_DEREF))
|
2017-11-05 16:42:07 +08:00
|
|
|
BUG("REF_IS_PRUNING set without REF_NO_DEREF");
|
2017-11-05 16:42:04 +08:00
|
|
|
|
refs: resolve symbolic refs first
Before committing ref updates, split symbolic ref updates into two
parts: an update to the underlying ref, and a log-only update to the
symbolic ref. This ensures that both references are locked correctly
during the transaction, including while their reflogs are updated.
Similarly, if the reference pointed to by HEAD is modified directly, add
a separate log-only update to HEAD, rather than leaving the job of
updating HEAD's reflog to commit_ref_update(). This change ensures that
HEAD is locked correctly while its reflog is being modified, as well as
being cheaper (HEAD only needs to be resolved once).
This makes use of a new function, lock_raw_ref(), which is analogous to
read_raw_ref(), but acquires a lock on the reference before reading it.
This change still has two problems:
* There are redundant read_ref_full() reference lookups.
* It is still possible to get incorrect reflogs for symbolic references
if there is a concurrent update by another process, since the old_oid
of a symref is determined before the lock on the pointed-to ref is
held.
Both problems will soon be fixed.
Signed-off-by: David Turner <dturner@twopensource.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
Signed-off-by: Michael Haggerty <mhagger@alum.mit.edu>
WIP
2016-04-25 21:56:07 +08:00
|
|
|
/*
|
|
|
|
* We store a pointer to update in item->util, but at
|
|
|
|
* the moment we never use the value of this field
|
|
|
|
* except to check whether it is non-NULL.
|
|
|
|
*/
|
|
|
|
item->util = update;
|
|
|
|
}
|
2015-11-09 21:34:01 +08:00
|
|
|
string_list_sort(&affected_refnames);
|
|
|
|
if (ref_update_reject_duplicates(&affected_refnames, err)) {
|
|
|
|
ret = TRANSACTION_GENERIC_ERROR;
|
|
|
|
goto cleanup;
|
|
|
|
}
|
|
|
|
|
refs: resolve symbolic refs first
Before committing ref updates, split symbolic ref updates into two
parts: an update to the underlying ref, and a log-only update to the
symbolic ref. This ensures that both references are locked correctly
during the transaction, including while their reflogs are updated.
Similarly, if the reference pointed to by HEAD is modified directly, add
a separate log-only update to HEAD, rather than leaving the job of
updating HEAD's reflog to commit_ref_update(). This change ensures that
HEAD is locked correctly while its reflog is being modified, as well as
being cheaper (HEAD only needs to be resolved once).
This makes use of a new function, lock_raw_ref(), which is analogous to
read_raw_ref(), but acquires a lock on the reference before reading it.
This change still has two problems:
* There are redundant read_ref_full() reference lookups.
* It is still possible to get incorrect reflogs for symbolic references
if there is a concurrent update by another process, since the old_oid
of a symref is determined before the lock on the pointed-to ref is
held.
Both problems will soon be fixed.
Signed-off-by: David Turner <dturner@twopensource.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
Signed-off-by: Michael Haggerty <mhagger@alum.mit.edu>
WIP
2016-04-25 21:56:07 +08:00
|
|
|
/*
|
|
|
|
* Special hack: If a branch is updated directly and HEAD
|
|
|
|
* points to it (may happen on the remote side of a push
|
|
|
|
* for example) then logically the HEAD reflog should be
|
|
|
|
* updated too.
|
|
|
|
*
|
|
|
|
* A generic solution would require reverse symref lookups,
|
|
|
|
* but finding all symrefs pointing to a given branch would be
|
|
|
|
* rather costly for this rare event (the direct update of a
|
|
|
|
* branch) to be worth it. So let's cheat and check with HEAD
|
|
|
|
* only, which should cover 99% of all usage scenarios (even
|
|
|
|
* 100% of the default ones).
|
|
|
|
*
|
|
|
|
* So if HEAD is a symbolic reference, then record the name of
|
|
|
|
* the reference that it points to. If we see an update of
|
|
|
|
* head_ref within the transaction, then split_head_update()
|
|
|
|
* arranges for the reflog of HEAD to be updated, too.
|
|
|
|
*/
|
2017-03-26 10:42:36 +08:00
|
|
|
head_ref = refs_resolve_refdup(ref_store, "HEAD",
|
|
|
|
RESOLVE_REF_NO_RECURSE,
|
2017-10-01 15:28:50 +08:00
|
|
|
NULL, &head_type);
|
refs: resolve symbolic refs first
Before committing ref updates, split symbolic ref updates into two
parts: an update to the underlying ref, and a log-only update to the
symbolic ref. This ensures that both references are locked correctly
during the transaction, including while their reflogs are updated.
Similarly, if the reference pointed to by HEAD is modified directly, add
a separate log-only update to HEAD, rather than leaving the job of
updating HEAD's reflog to commit_ref_update(). This change ensures that
HEAD is locked correctly while its reflog is being modified, as well as
being cheaper (HEAD only needs to be resolved once).
This makes use of a new function, lock_raw_ref(), which is analogous to
read_raw_ref(), but acquires a lock on the reference before reading it.
This change still has two problems:
* There are redundant read_ref_full() reference lookups.
* It is still possible to get incorrect reflogs for symbolic references
if there is a concurrent update by another process, since the old_oid
of a symref is determined before the lock on the pointed-to ref is
held.
Both problems will soon be fixed.
Signed-off-by: David Turner <dturner@twopensource.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
Signed-off-by: Michael Haggerty <mhagger@alum.mit.edu>
WIP
2016-04-25 21:56:07 +08:00
|
|
|
|
|
|
|
if (head_ref && !(head_type & REF_ISSYMREF)) {
|
2017-06-16 07:15:46 +08:00
|
|
|
FREE_AND_NULL(head_ref);
|
refs: resolve symbolic refs first
Before committing ref updates, split symbolic ref updates into two
parts: an update to the underlying ref, and a log-only update to the
symbolic ref. This ensures that both references are locked correctly
during the transaction, including while their reflogs are updated.
Similarly, if the reference pointed to by HEAD is modified directly, add
a separate log-only update to HEAD, rather than leaving the job of
updating HEAD's reflog to commit_ref_update(). This change ensures that
HEAD is locked correctly while its reflog is being modified, as well as
being cheaper (HEAD only needs to be resolved once).
This makes use of a new function, lock_raw_ref(), which is analogous to
read_raw_ref(), but acquires a lock on the reference before reading it.
This change still has two problems:
* There are redundant read_ref_full() reference lookups.
* It is still possible to get incorrect reflogs for symbolic references
if there is a concurrent update by another process, since the old_oid
of a symref is determined before the lock on the pointed-to ref is
held.
Both problems will soon be fixed.
Signed-off-by: David Turner <dturner@twopensource.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
Signed-off-by: Michael Haggerty <mhagger@alum.mit.edu>
WIP
2016-04-25 21:56:07 +08:00
|
|
|
}
|
|
|
|
|
2015-11-09 21:34:01 +08:00
|
|
|
/*
|
|
|
|
* Acquire all locks, verify old values if provided, check
|
|
|
|
* that new values are valid, and write new values to the
|
|
|
|
* lockfiles, ready to be activated. Only keep one lockfile
|
|
|
|
* open at a time to avoid running out of file descriptors.
|
ref_transaction_prepare(): new optional step for reference updates
In the future, compound reference stores will sometimes need to modify
references in two different reference stores at the same time, meaning
that a single logical reference transaction might have to be
implemented as two internal sub-transactions. They won't want to call
`ref_transaction_commit()` for the two sub-transactions one after the
other, because that wouldn't be atomic (the first commit could succeed
and the second one fail). Instead, they will want to prepare both
sub-transactions (i.e., obtain any necessary locks and do any
pre-checks), and only if both prepare steps succeed, then commit both
sub-transactions.
Start preparing for that day by adding a new, optional
`ref_transaction_prepare()` step to the reference transaction
sequence, which obtains the locks and does any prechecks, reporting
any errors that occur. Also add a `ref_transaction_abort()` function
that can be used to abort a sub-transaction even if it has already
been prepared.
That is on the side of the public-facing API. On the side of the
`ref_store` VTABLE, get rid of `transaction_commit` and instead add
methods `transaction_prepare`, `transaction_finish`, and
`transaction_abort`. A `ref_transaction_commit()` now basically calls
methods `transaction_prepare` then `transaction_finish`.
Signed-off-by: Michael Haggerty <mhagger@alum.mit.edu>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-05-22 22:17:44 +08:00
|
|
|
* Note that lock_ref_for_update() might append more updates
|
|
|
|
* to the transaction.
|
2015-11-09 21:34:01 +08:00
|
|
|
*/
|
2016-04-22 06:02:50 +08:00
|
|
|
for (i = 0; i < transaction->nr; i++) {
|
|
|
|
struct ref_update *update = transaction->updates[i];
|
2015-11-09 21:34:01 +08:00
|
|
|
|
2016-09-05 00:08:33 +08:00
|
|
|
ret = lock_ref_for_update(refs, update, transaction,
|
|
|
|
head_ref, &affected_refnames, err);
|
2016-04-24 14:58:41 +08:00
|
|
|
if (ret)
|
files_transaction_prepare(): fix handling of ref lock failure
Since dc39e09942 (files_ref_store: use a transaction to update packed
refs, 2017-09-08), failure to lock a reference has been handled
incorrectly by `files_transaction_prepare()`. If
`lock_ref_for_update()` fails in the lock-acquisition loop of that
function, it sets `ret` then breaks out of that loop. Prior to
dc39e09942, that was OK, because the only thing following the loop was
the cleanup code. But dc39e09942 added another blurb of code between
the loop and the cleanup. That blurb sometimes resets `ret` to zero,
making the cleanup code think that the locking was successful.
Specifically, whenever
* One or more reference deletions have been processed successfully in
the lock-acquisition loop. (Processing the first such reference
causes a packed-ref transaction to be initialized.)
* Then `lock_ref_for_update()` fails for a subsequent reference. Such
a failure can happen for a number of reasons, such as the old SHA-1
not being correct, lock contention, etc. This causes a `break` out
of the lock-acquisition loop.
* The `packed-refs` lock is acquired successfully and
`ref_transaction_prepare()` succeeds for the packed-ref transaction.
This has the effect of resetting `ret` back to 0, and making the
cleanup code think that lock acquisition was successful.
In that case, any reference updates that were processed prior to
breaking out of the loop would be carried out (loose and packed), but
the reference that couldn't be locked and any subsequent references
would silently be ignored.
This can easily cause data loss if, for example, the user was trying
to push a new name for an existing branch while deleting the old name.
After the push, the branch could be left unreachable, and could even
subsequently be garbage-collected.
This problem was noticed in the context of deleting one reference and
creating another in a single transaction, when the two references D/F
conflict with each other, like
git update-ref --stdin <<EOF
delete refs/foo
create refs/foo/bar HEAD
EOF
This triggers the above bug because the deletion is processed
successfully for `refs/foo`, then the D/F conflict causes
`lock_ref_for_update()` to fail when `refs/foo/bar` is processed. In
this case the transaction *should* fail, but instead it causes
`refs/foo` to be deleted without creating `refs/foo`. This could
easily result in data loss.
The fix is simple: instead of just breaking out of the loop, jump
directly to the cleanup code. This fixes some tests in t1404 that were
added in the previous commit.
Signed-off-by: Michael Haggerty <mhagger@alum.mit.edu>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-10-24 23:16:25 +08:00
|
|
|
goto cleanup;
|
files_ref_store: use a transaction to update packed refs
When processing a `files_ref_store` transaction, it is sometimes
necessary to delete some references from the "packed-refs" file. Do
that using a reference transaction conducted against the
`packed_ref_store`.
This change further decouples `files_ref_store` from
`packed_ref_store`. It also fixes multiple problems, including the two
revealed by test cases added in the previous commit.
First, the old code didn't obtain the `packed-refs` lock until
`files_transaction_finish()`. This means that a failure to acquire the
`packed-refs` lock (e.g., due to contention with another process)
wasn't detected until it was too late (problems like this are supposed
to be detected in the "prepare" phase). The new code acquires the
`packed-refs` lock in `files_transaction_prepare()`, the same stage of
the processing when the loose reference locks are being acquired,
removing another reason why the "prepare" phase might succeed and the
"finish" phase might nevertheless fail.
Second, the old code deleted the loose version of a reference before
deleting any packed version of the same reference. This left a moment
when another process might think that the packed version of the
reference is current, which is incorrect. (Even worse, the packed
version of the reference can be arbitrarily old, and might even point
at an object that has since been garbage-collected.)
Third, if a reference deletion fails to acquire the `packed-refs` lock
altogether, then the old code might leave the repository in the
incorrect state (possibly corrupt) described in the previous
paragraph.
Now we activate the new "packed-refs" file (sans any references that
are being deleted) *before* deleting the corresponding loose
references. But we hold the "packed-refs" lock until after the loose
references have been finalized, thus preventing a simultaneous
"pack-refs" process from packing the loose version of the reference in
the time gap, which would otherwise defeat our attempt to delete it.
Signed-off-by: Michael Haggerty <mhagger@alum.mit.edu>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-09-08 21:51:51 +08:00
|
|
|
|
|
|
|
if (update->flags & REF_DELETING &&
|
|
|
|
!(update->flags & REF_LOG_ONLY) &&
|
2017-11-05 16:42:07 +08:00
|
|
|
!(update->flags & REF_IS_PRUNING)) {
|
files_ref_store: use a transaction to update packed refs
When processing a `files_ref_store` transaction, it is sometimes
necessary to delete some references from the "packed-refs" file. Do
that using a reference transaction conducted against the
`packed_ref_store`.
This change further decouples `files_ref_store` from
`packed_ref_store`. It also fixes multiple problems, including the two
revealed by test cases added in the previous commit.
First, the old code didn't obtain the `packed-refs` lock until
`files_transaction_finish()`. This means that a failure to acquire the
`packed-refs` lock (e.g., due to contention with another process)
wasn't detected until it was too late (problems like this are supposed
to be detected in the "prepare" phase). The new code acquires the
`packed-refs` lock in `files_transaction_prepare()`, the same stage of
the processing when the loose reference locks are being acquired,
removing another reason why the "prepare" phase might succeed and the
"finish" phase might nevertheless fail.
Second, the old code deleted the loose version of a reference before
deleting any packed version of the same reference. This left a moment
when another process might think that the packed version of the
reference is current, which is incorrect. (Even worse, the packed
version of the reference can be arbitrarily old, and might even point
at an object that has since been garbage-collected.)
Third, if a reference deletion fails to acquire the `packed-refs` lock
altogether, then the old code might leave the repository in the
incorrect state (possibly corrupt) described in the previous
paragraph.
Now we activate the new "packed-refs" file (sans any references that
are being deleted) *before* deleting the corresponding loose
references. But we hold the "packed-refs" lock until after the loose
references have been finalized, thus preventing a simultaneous
"pack-refs" process from packing the loose version of the reference in
the time gap, which would otherwise defeat our attempt to delete it.
Signed-off-by: Michael Haggerty <mhagger@alum.mit.edu>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-09-08 21:51:51 +08:00
|
|
|
/*
|
|
|
|
* This reference has to be deleted from
|
|
|
|
* packed-refs if it exists there.
|
|
|
|
*/
|
|
|
|
if (!packed_transaction) {
|
|
|
|
packed_transaction = ref_store_transaction_begin(
|
2022-04-14 06:51:33 +08:00
|
|
|
refs->packed_ref_store, err);
|
files_ref_store: use a transaction to update packed refs
When processing a `files_ref_store` transaction, it is sometimes
necessary to delete some references from the "packed-refs" file. Do
that using a reference transaction conducted against the
`packed_ref_store`.
This change further decouples `files_ref_store` from
`packed_ref_store`. It also fixes multiple problems, including the two
revealed by test cases added in the previous commit.
First, the old code didn't obtain the `packed-refs` lock until
`files_transaction_finish()`. This means that a failure to acquire the
`packed-refs` lock (e.g., due to contention with another process)
wasn't detected until it was too late (problems like this are supposed
to be detected in the "prepare" phase). The new code acquires the
`packed-refs` lock in `files_transaction_prepare()`, the same stage of
the processing when the loose reference locks are being acquired,
removing another reason why the "prepare" phase might succeed and the
"finish" phase might nevertheless fail.
Second, the old code deleted the loose version of a reference before
deleting any packed version of the same reference. This left a moment
when another process might think that the packed version of the
reference is current, which is incorrect. (Even worse, the packed
version of the reference can be arbitrarily old, and might even point
at an object that has since been garbage-collected.)
Third, if a reference deletion fails to acquire the `packed-refs` lock
altogether, then the old code might leave the repository in the
incorrect state (possibly corrupt) described in the previous
paragraph.
Now we activate the new "packed-refs" file (sans any references that
are being deleted) *before* deleting the corresponding loose
references. But we hold the "packed-refs" lock until after the loose
references have been finalized, thus preventing a simultaneous
"pack-refs" process from packing the loose version of the reference in
the time gap, which would otherwise defeat our attempt to delete it.
Signed-off-by: Michael Haggerty <mhagger@alum.mit.edu>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-09-08 21:51:51 +08:00
|
|
|
if (!packed_transaction) {
|
|
|
|
ret = TRANSACTION_GENERIC_ERROR;
|
|
|
|
goto cleanup;
|
|
|
|
}
|
|
|
|
|
|
|
|
backend_data->packed_transaction =
|
|
|
|
packed_transaction;
|
|
|
|
}
|
|
|
|
|
|
|
|
ref_transaction_add_update(
|
|
|
|
packed_transaction, update->refname,
|
2017-11-05 16:42:06 +08:00
|
|
|
REF_HAVE_NEW | REF_NO_DEREF,
|
2017-11-05 16:42:01 +08:00
|
|
|
&update->new_oid, NULL,
|
files_ref_store: use a transaction to update packed refs
When processing a `files_ref_store` transaction, it is sometimes
necessary to delete some references from the "packed-refs" file. Do
that using a reference transaction conducted against the
`packed_ref_store`.
This change further decouples `files_ref_store` from
`packed_ref_store`. It also fixes multiple problems, including the two
revealed by test cases added in the previous commit.
First, the old code didn't obtain the `packed-refs` lock until
`files_transaction_finish()`. This means that a failure to acquire the
`packed-refs` lock (e.g., due to contention with another process)
wasn't detected until it was too late (problems like this are supposed
to be detected in the "prepare" phase). The new code acquires the
`packed-refs` lock in `files_transaction_prepare()`, the same stage of
the processing when the loose reference locks are being acquired,
removing another reason why the "prepare" phase might succeed and the
"finish" phase might nevertheless fail.
Second, the old code deleted the loose version of a reference before
deleting any packed version of the same reference. This left a moment
when another process might think that the packed version of the
reference is current, which is incorrect. (Even worse, the packed
version of the reference can be arbitrarily old, and might even point
at an object that has since been garbage-collected.)
Third, if a reference deletion fails to acquire the `packed-refs` lock
altogether, then the old code might leave the repository in the
incorrect state (possibly corrupt) described in the previous
paragraph.
Now we activate the new "packed-refs" file (sans any references that
are being deleted) *before* deleting the corresponding loose
references. But we hold the "packed-refs" lock until after the loose
references have been finalized, thus preventing a simultaneous
"pack-refs" process from packing the loose version of the reference in
the time gap, which would otherwise defeat our attempt to delete it.
Signed-off-by: Michael Haggerty <mhagger@alum.mit.edu>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-09-08 21:51:51 +08:00
|
|
|
NULL);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (packed_transaction) {
|
|
|
|
if (packed_refs_lock(refs->packed_ref_store, 0, err)) {
|
|
|
|
ret = TRANSACTION_GENERIC_ERROR;
|
|
|
|
goto cleanup;
|
|
|
|
}
|
|
|
|
backend_data->packed_refs_locked = 1;
|
files-backend: don't rewrite the `packed-refs` file unnecessarily
Even when we are deleting references, we needn't overwrite the
`packed-refs` file if the references that we are deleting only exist
as loose references. Implement this optimization as follows:
* Add a function `is_packed_transaction_needed()`, which checks
whether a given packed-refs transaction actually needs to be carried
out (i.e., it returns false if the transaction obviously wouldn't
have any effect). This function must be called while holding the
`packed-refs` lock to avoid races.
* Change `files_transaction_prepare()` to check whether the
packed-refs transaction is actually needed. If not, squelch it, but
continue holding the `packed-refs` lock until the end of the
transaction to avoid races.
This fixes a mild regression caused by dc39e09942 (files_ref_store:
use a transaction to update packed refs, 2017-09-08). Before that
commit, unnecessary rewrites of `packed-refs` were suppressed by
`repack_without_refs()`. But the transaction-based writing introduced
by that commit didn't perform that optimization.
Note that the pre-dc39e09942 code still had to *read* the whole
`packed-refs` file to determine that the rewrite could be skipped, so
the performance for the cases that the write could be elided was
`O(N)` in the number of packed references both before and after
dc39e09942. But after that commit the constant factor increased.
This commit reimplements the optimization of eliding unnecessary
`packed-refs` rewrites. That, plus the fact that since
cfa2e29c34 (packed_ref_store: get rid of the `ref_cache` entirely,
2017-03-17) we don't necessarily have to read the whole `packed-refs`
file at all, means that deletes of one or a few loose references can
now be done with `O(n lg N)` effort, where `n` is the number of loose
references being deleted and `N` is the total number of packed
references.
This commit fixes two tests in t1409.
Signed-off-by: Michael Haggerty <mhagger@alum.mit.edu>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-10-28 17:16:02 +08:00
|
|
|
|
|
|
|
if (is_packed_transaction_needed(refs->packed_ref_store,
|
|
|
|
packed_transaction)) {
|
|
|
|
ret = ref_transaction_prepare(packed_transaction, err);
|
refs/files-backend: handle packed transaction prepare failure
In files_transaction_prepare(), if we have to delete some refs, we use a
subordinate packed_transaction to do so. It's rare for that
sub-transaction's prepare step to fail, since we hold the packed-refs
lock. But if it does, we trigger a BUG() due to these steps:
- we've attached the packed transaction to the files transaction as
backend_data->packed_transaction
- when the prepare step fails, the packed transaction cleans itself
up, putting itself into the CLOSED state
- the error value from preparing the packed transaction lets us know
in files_transaction_prepare() that we should also clean up and
return an error. We call files_transaction_cleanup(), which tries to
abort backend_data->packed_transaction. Since it's already CLOSED,
that triggers an assertion in ref_transaction_abort().
We can fix that by disconnecting the packed transaction from the outer
files transaction, and then free-ing (not aborting!) it ourselves.
A few other options/alternatives I considered:
- we could just make it a noop to abort a CLOSED transaction. But that
seems less safe, since clearly this code expects (and enforces) a
particular set of state transitions.
- we could have files_transaction_cleanup() selectively call abort()
vs free() based on the state of the on the packed transaction.
That's basically a more restricted version of the above, but also
potentially unsafe.
- instead of disconnecting backend_data->packed_transaction on error,
we could wait to install it until we successfully prepare. That
might make the flow a little simpler, but it introduces a hassle.
Earlier parts of files_transaction_prepare() that encounter an error
will jump to the cleanup label, and expect that cleaning up the
outer transaction will clean up the packed transaction, too. We'd
have to adjust those sites to clean up the packed transaction.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2019-03-21 17:28:44 +08:00
|
|
|
/*
|
|
|
|
* A failure during the prepare step will abort
|
|
|
|
* itself, but not free. Do that now, and disconnect
|
|
|
|
* from the files_transaction so it does not try to
|
|
|
|
* abort us when we hit the cleanup code below.
|
|
|
|
*/
|
|
|
|
if (ret) {
|
|
|
|
ref_transaction_free(packed_transaction);
|
|
|
|
backend_data->packed_transaction = NULL;
|
|
|
|
}
|
files-backend: don't rewrite the `packed-refs` file unnecessarily
Even when we are deleting references, we needn't overwrite the
`packed-refs` file if the references that we are deleting only exist
as loose references. Implement this optimization as follows:
* Add a function `is_packed_transaction_needed()`, which checks
whether a given packed-refs transaction actually needs to be carried
out (i.e., it returns false if the transaction obviously wouldn't
have any effect). This function must be called while holding the
`packed-refs` lock to avoid races.
* Change `files_transaction_prepare()` to check whether the
packed-refs transaction is actually needed. If not, squelch it, but
continue holding the `packed-refs` lock until the end of the
transaction to avoid races.
This fixes a mild regression caused by dc39e09942 (files_ref_store:
use a transaction to update packed refs, 2017-09-08). Before that
commit, unnecessary rewrites of `packed-refs` were suppressed by
`repack_without_refs()`. But the transaction-based writing introduced
by that commit didn't perform that optimization.
Note that the pre-dc39e09942 code still had to *read* the whole
`packed-refs` file to determine that the rewrite could be skipped, so
the performance for the cases that the write could be elided was
`O(N)` in the number of packed references both before and after
dc39e09942. But after that commit the constant factor increased.
This commit reimplements the optimization of eliding unnecessary
`packed-refs` rewrites. That, plus the fact that since
cfa2e29c34 (packed_ref_store: get rid of the `ref_cache` entirely,
2017-03-17) we don't necessarily have to read the whole `packed-refs`
file at all, means that deletes of one or a few loose references can
now be done with `O(n lg N)` effort, where `n` is the number of loose
references being deleted and `N` is the total number of packed
references.
This commit fixes two tests in t1409.
Signed-off-by: Michael Haggerty <mhagger@alum.mit.edu>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-10-28 17:16:02 +08:00
|
|
|
} else {
|
|
|
|
/*
|
|
|
|
* We can skip rewriting the `packed-refs`
|
|
|
|
* file. But we do need to leave it locked, so
|
|
|
|
* that somebody else doesn't pack a reference
|
|
|
|
* that we are trying to delete.
|
refs/files-backend: don't look at an aborted transaction
When deleting refs, we hold packed-refs.lock and prepare a packed
transaction to drop the refs from the packed-refs file. If it turns out
that we don't need to rewrite the packed refs (e.g., because none of the
deletions were present in the file), then we abort the transaction.
If that abort succeeds, then the transaction struct will have been
freed, and we set our local pointer to NULL so we don't look at it
again.
However, if it fails, then the struct will _still_ have been freed
(because ref_transaction_abort() always frees). But we don't clean up
the pointer, and will jump to our cleanup code, which will try to abort
it again, causing a use-after-free.
It's actually impossible for this to trigger in practice, since
packed_transaction_abort() will never return anything but success. But
let's fix it anyway, since that's more than we should assume about the
packed-refs code (after all, we are already bothering to check for an
error result which cannot be triggered).
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2019-03-21 17:28:54 +08:00
|
|
|
*
|
|
|
|
* We need to disconnect our transaction from
|
|
|
|
* backend_data, since the abort (whether successful or
|
|
|
|
* not) will free it.
|
files-backend: don't rewrite the `packed-refs` file unnecessarily
Even when we are deleting references, we needn't overwrite the
`packed-refs` file if the references that we are deleting only exist
as loose references. Implement this optimization as follows:
* Add a function `is_packed_transaction_needed()`, which checks
whether a given packed-refs transaction actually needs to be carried
out (i.e., it returns false if the transaction obviously wouldn't
have any effect). This function must be called while holding the
`packed-refs` lock to avoid races.
* Change `files_transaction_prepare()` to check whether the
packed-refs transaction is actually needed. If not, squelch it, but
continue holding the `packed-refs` lock until the end of the
transaction to avoid races.
This fixes a mild regression caused by dc39e09942 (files_ref_store:
use a transaction to update packed refs, 2017-09-08). Before that
commit, unnecessary rewrites of `packed-refs` were suppressed by
`repack_without_refs()`. But the transaction-based writing introduced
by that commit didn't perform that optimization.
Note that the pre-dc39e09942 code still had to *read* the whole
`packed-refs` file to determine that the rewrite could be skipped, so
the performance for the cases that the write could be elided was
`O(N)` in the number of packed references both before and after
dc39e09942. But after that commit the constant factor increased.
This commit reimplements the optimization of eliding unnecessary
`packed-refs` rewrites. That, plus the fact that since
cfa2e29c34 (packed_ref_store: get rid of the `ref_cache` entirely,
2017-03-17) we don't necessarily have to read the whole `packed-refs`
file at all, means that deletes of one or a few loose references can
now be done with `O(n lg N)` effort, where `n` is the number of loose
references being deleted and `N` is the total number of packed
references.
This commit fixes two tests in t1409.
Signed-off-by: Michael Haggerty <mhagger@alum.mit.edu>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-10-28 17:16:02 +08:00
|
|
|
*/
|
refs/files-backend: don't look at an aborted transaction
When deleting refs, we hold packed-refs.lock and prepare a packed
transaction to drop the refs from the packed-refs file. If it turns out
that we don't need to rewrite the packed refs (e.g., because none of the
deletions were present in the file), then we abort the transaction.
If that abort succeeds, then the transaction struct will have been
freed, and we set our local pointer to NULL so we don't look at it
again.
However, if it fails, then the struct will _still_ have been freed
(because ref_transaction_abort() always frees). But we don't clean up
the pointer, and will jump to our cleanup code, which will try to abort
it again, causing a use-after-free.
It's actually impossible for this to trigger in practice, since
packed_transaction_abort() will never return anything but success. But
let's fix it anyway, since that's more than we should assume about the
packed-refs code (after all, we are already bothering to check for an
error result which cannot be triggered).
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2019-03-21 17:28:54 +08:00
|
|
|
backend_data->packed_transaction = NULL;
|
files-backend: don't rewrite the `packed-refs` file unnecessarily
Even when we are deleting references, we needn't overwrite the
`packed-refs` file if the references that we are deleting only exist
as loose references. Implement this optimization as follows:
* Add a function `is_packed_transaction_needed()`, which checks
whether a given packed-refs transaction actually needs to be carried
out (i.e., it returns false if the transaction obviously wouldn't
have any effect). This function must be called while holding the
`packed-refs` lock to avoid races.
* Change `files_transaction_prepare()` to check whether the
packed-refs transaction is actually needed. If not, squelch it, but
continue holding the `packed-refs` lock until the end of the
transaction to avoid races.
This fixes a mild regression caused by dc39e09942 (files_ref_store:
use a transaction to update packed refs, 2017-09-08). Before that
commit, unnecessary rewrites of `packed-refs` were suppressed by
`repack_without_refs()`. But the transaction-based writing introduced
by that commit didn't perform that optimization.
Note that the pre-dc39e09942 code still had to *read* the whole
`packed-refs` file to determine that the rewrite could be skipped, so
the performance for the cases that the write could be elided was
`O(N)` in the number of packed references both before and after
dc39e09942. But after that commit the constant factor increased.
This commit reimplements the optimization of eliding unnecessary
`packed-refs` rewrites. That, plus the fact that since
cfa2e29c34 (packed_ref_store: get rid of the `ref_cache` entirely,
2017-03-17) we don't necessarily have to read the whole `packed-refs`
file at all, means that deletes of one or a few loose references can
now be done with `O(n lg N)` effort, where `n` is the number of loose
references being deleted and `N` is the total number of packed
references.
This commit fixes two tests in t1409.
Signed-off-by: Michael Haggerty <mhagger@alum.mit.edu>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-10-28 17:16:02 +08:00
|
|
|
if (ref_transaction_abort(packed_transaction, err)) {
|
|
|
|
ret = TRANSACTION_GENERIC_ERROR;
|
|
|
|
goto cleanup;
|
|
|
|
}
|
|
|
|
}
|
ref_transaction_prepare(): new optional step for reference updates
In the future, compound reference stores will sometimes need to modify
references in two different reference stores at the same time, meaning
that a single logical reference transaction might have to be
implemented as two internal sub-transactions. They won't want to call
`ref_transaction_commit()` for the two sub-transactions one after the
other, because that wouldn't be atomic (the first commit could succeed
and the second one fail). Instead, they will want to prepare both
sub-transactions (i.e., obtain any necessary locks and do any
pre-checks), and only if both prepare steps succeed, then commit both
sub-transactions.
Start preparing for that day by adding a new, optional
`ref_transaction_prepare()` step to the reference transaction
sequence, which obtains the locks and does any prechecks, reporting
any errors that occur. Also add a `ref_transaction_abort()` function
that can be used to abort a sub-transaction even if it has already
been prepared.
That is on the side of the public-facing API. On the side of the
`ref_store` VTABLE, get rid of `transaction_commit` and instead add
methods `transaction_prepare`, `transaction_finish`, and
`transaction_abort`. A `ref_transaction_commit()` now basically calls
methods `transaction_prepare` then `transaction_finish`.
Signed-off-by: Michael Haggerty <mhagger@alum.mit.edu>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-05-22 22:17:44 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
cleanup:
|
|
|
|
free(head_ref);
|
|
|
|
string_list_clear(&affected_refnames, 0);
|
|
|
|
|
|
|
|
if (ret)
|
files_ref_store: use a transaction to update packed refs
When processing a `files_ref_store` transaction, it is sometimes
necessary to delete some references from the "packed-refs" file. Do
that using a reference transaction conducted against the
`packed_ref_store`.
This change further decouples `files_ref_store` from
`packed_ref_store`. It also fixes multiple problems, including the two
revealed by test cases added in the previous commit.
First, the old code didn't obtain the `packed-refs` lock until
`files_transaction_finish()`. This means that a failure to acquire the
`packed-refs` lock (e.g., due to contention with another process)
wasn't detected until it was too late (problems like this are supposed
to be detected in the "prepare" phase). The new code acquires the
`packed-refs` lock in `files_transaction_prepare()`, the same stage of
the processing when the loose reference locks are being acquired,
removing another reason why the "prepare" phase might succeed and the
"finish" phase might nevertheless fail.
Second, the old code deleted the loose version of a reference before
deleting any packed version of the same reference. This left a moment
when another process might think that the packed version of the
reference is current, which is incorrect. (Even worse, the packed
version of the reference can be arbitrarily old, and might even point
at an object that has since been garbage-collected.)
Third, if a reference deletion fails to acquire the `packed-refs` lock
altogether, then the old code might leave the repository in the
incorrect state (possibly corrupt) described in the previous
paragraph.
Now we activate the new "packed-refs" file (sans any references that
are being deleted) *before* deleting the corresponding loose
references. But we hold the "packed-refs" lock until after the loose
references have been finalized, thus preventing a simultaneous
"pack-refs" process from packing the loose version of the reference in
the time gap, which would otherwise defeat our attempt to delete it.
Signed-off-by: Michael Haggerty <mhagger@alum.mit.edu>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-09-08 21:51:51 +08:00
|
|
|
files_transaction_cleanup(refs, transaction);
|
ref_transaction_prepare(): new optional step for reference updates
In the future, compound reference stores will sometimes need to modify
references in two different reference stores at the same time, meaning
that a single logical reference transaction might have to be
implemented as two internal sub-transactions. They won't want to call
`ref_transaction_commit()` for the two sub-transactions one after the
other, because that wouldn't be atomic (the first commit could succeed
and the second one fail). Instead, they will want to prepare both
sub-transactions (i.e., obtain any necessary locks and do any
pre-checks), and only if both prepare steps succeed, then commit both
sub-transactions.
Start preparing for that day by adding a new, optional
`ref_transaction_prepare()` step to the reference transaction
sequence, which obtains the locks and does any prechecks, reporting
any errors that occur. Also add a `ref_transaction_abort()` function
that can be used to abort a sub-transaction even if it has already
been prepared.
That is on the side of the public-facing API. On the side of the
`ref_store` VTABLE, get rid of `transaction_commit` and instead add
methods `transaction_prepare`, `transaction_finish`, and
`transaction_abort`. A `ref_transaction_commit()` now basically calls
methods `transaction_prepare` then `transaction_finish`.
Signed-off-by: Michael Haggerty <mhagger@alum.mit.edu>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-05-22 22:17:44 +08:00
|
|
|
else
|
|
|
|
transaction->state = REF_TRANSACTION_PREPARED;
|
|
|
|
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int files_transaction_finish(struct ref_store *ref_store,
|
|
|
|
struct ref_transaction *transaction,
|
|
|
|
struct strbuf *err)
|
|
|
|
{
|
|
|
|
struct files_ref_store *refs =
|
|
|
|
files_downcast(ref_store, 0, "ref_transaction_finish");
|
|
|
|
size_t i;
|
|
|
|
int ret = 0;
|
|
|
|
struct strbuf sb = STRBUF_INIT;
|
files_ref_store: use a transaction to update packed refs
When processing a `files_ref_store` transaction, it is sometimes
necessary to delete some references from the "packed-refs" file. Do
that using a reference transaction conducted against the
`packed_ref_store`.
This change further decouples `files_ref_store` from
`packed_ref_store`. It also fixes multiple problems, including the two
revealed by test cases added in the previous commit.
First, the old code didn't obtain the `packed-refs` lock until
`files_transaction_finish()`. This means that a failure to acquire the
`packed-refs` lock (e.g., due to contention with another process)
wasn't detected until it was too late (problems like this are supposed
to be detected in the "prepare" phase). The new code acquires the
`packed-refs` lock in `files_transaction_prepare()`, the same stage of
the processing when the loose reference locks are being acquired,
removing another reason why the "prepare" phase might succeed and the
"finish" phase might nevertheless fail.
Second, the old code deleted the loose version of a reference before
deleting any packed version of the same reference. This left a moment
when another process might think that the packed version of the
reference is current, which is incorrect. (Even worse, the packed
version of the reference can be arbitrarily old, and might even point
at an object that has since been garbage-collected.)
Third, if a reference deletion fails to acquire the `packed-refs` lock
altogether, then the old code might leave the repository in the
incorrect state (possibly corrupt) described in the previous
paragraph.
Now we activate the new "packed-refs" file (sans any references that
are being deleted) *before* deleting the corresponding loose
references. But we hold the "packed-refs" lock until after the loose
references have been finalized, thus preventing a simultaneous
"pack-refs" process from packing the loose version of the reference in
the time gap, which would otherwise defeat our attempt to delete it.
Signed-off-by: Michael Haggerty <mhagger@alum.mit.edu>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-09-08 21:51:51 +08:00
|
|
|
struct files_transaction_backend_data *backend_data;
|
|
|
|
struct ref_transaction *packed_transaction;
|
|
|
|
|
ref_transaction_prepare(): new optional step for reference updates
In the future, compound reference stores will sometimes need to modify
references in two different reference stores at the same time, meaning
that a single logical reference transaction might have to be
implemented as two internal sub-transactions. They won't want to call
`ref_transaction_commit()` for the two sub-transactions one after the
other, because that wouldn't be atomic (the first commit could succeed
and the second one fail). Instead, they will want to prepare both
sub-transactions (i.e., obtain any necessary locks and do any
pre-checks), and only if both prepare steps succeed, then commit both
sub-transactions.
Start preparing for that day by adding a new, optional
`ref_transaction_prepare()` step to the reference transaction
sequence, which obtains the locks and does any prechecks, reporting
any errors that occur. Also add a `ref_transaction_abort()` function
that can be used to abort a sub-transaction even if it has already
been prepared.
That is on the side of the public-facing API. On the side of the
`ref_store` VTABLE, get rid of `transaction_commit` and instead add
methods `transaction_prepare`, `transaction_finish`, and
`transaction_abort`. A `ref_transaction_commit()` now basically calls
methods `transaction_prepare` then `transaction_finish`.
Signed-off-by: Michael Haggerty <mhagger@alum.mit.edu>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-05-22 22:17:44 +08:00
|
|
|
|
|
|
|
assert(err);
|
|
|
|
|
|
|
|
if (!transaction->nr) {
|
|
|
|
transaction->state = REF_TRANSACTION_CLOSED;
|
|
|
|
return 0;
|
2015-11-09 21:34:01 +08:00
|
|
|
}
|
|
|
|
|
files_ref_store: use a transaction to update packed refs
When processing a `files_ref_store` transaction, it is sometimes
necessary to delete some references from the "packed-refs" file. Do
that using a reference transaction conducted against the
`packed_ref_store`.
This change further decouples `files_ref_store` from
`packed_ref_store`. It also fixes multiple problems, including the two
revealed by test cases added in the previous commit.
First, the old code didn't obtain the `packed-refs` lock until
`files_transaction_finish()`. This means that a failure to acquire the
`packed-refs` lock (e.g., due to contention with another process)
wasn't detected until it was too late (problems like this are supposed
to be detected in the "prepare" phase). The new code acquires the
`packed-refs` lock in `files_transaction_prepare()`, the same stage of
the processing when the loose reference locks are being acquired,
removing another reason why the "prepare" phase might succeed and the
"finish" phase might nevertheless fail.
Second, the old code deleted the loose version of a reference before
deleting any packed version of the same reference. This left a moment
when another process might think that the packed version of the
reference is current, which is incorrect. (Even worse, the packed
version of the reference can be arbitrarily old, and might even point
at an object that has since been garbage-collected.)
Third, if a reference deletion fails to acquire the `packed-refs` lock
altogether, then the old code might leave the repository in the
incorrect state (possibly corrupt) described in the previous
paragraph.
Now we activate the new "packed-refs" file (sans any references that
are being deleted) *before* deleting the corresponding loose
references. But we hold the "packed-refs" lock until after the loose
references have been finalized, thus preventing a simultaneous
"pack-refs" process from packing the loose version of the reference in
the time gap, which would otherwise defeat our attempt to delete it.
Signed-off-by: Michael Haggerty <mhagger@alum.mit.edu>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-09-08 21:51:51 +08:00
|
|
|
backend_data = transaction->backend_data;
|
|
|
|
packed_transaction = backend_data->packed_transaction;
|
|
|
|
|
2015-11-09 21:34:01 +08:00
|
|
|
/* Perform updates first so live commits remain referenced */
|
2016-04-22 06:02:50 +08:00
|
|
|
for (i = 0; i < transaction->nr; i++) {
|
|
|
|
struct ref_update *update = transaction->updates[i];
|
2016-09-05 00:08:43 +08:00
|
|
|
struct ref_lock *lock = update->backend_data;
|
2015-11-09 21:34:01 +08:00
|
|
|
|
2016-02-25 06:58:50 +08:00
|
|
|
if (update->flags & REF_NEEDS_COMMIT ||
|
|
|
|
update->flags & REF_LOG_ONLY) {
|
2017-03-26 10:42:22 +08:00
|
|
|
if (files_log_ref_write(refs,
|
|
|
|
lock->ref_name,
|
2017-05-07 06:10:24 +08:00
|
|
|
&lock->old_oid,
|
|
|
|
&update->new_oid,
|
2017-01-07 00:22:31 +08:00
|
|
|
update->msg, update->flags,
|
|
|
|
err)) {
|
refs: resolve symbolic refs first
Before committing ref updates, split symbolic ref updates into two
parts: an update to the underlying ref, and a log-only update to the
symbolic ref. This ensures that both references are locked correctly
during the transaction, including while their reflogs are updated.
Similarly, if the reference pointed to by HEAD is modified directly, add
a separate log-only update to HEAD, rather than leaving the job of
updating HEAD's reflog to commit_ref_update(). This change ensures that
HEAD is locked correctly while its reflog is being modified, as well as
being cheaper (HEAD only needs to be resolved once).
This makes use of a new function, lock_raw_ref(), which is analogous to
read_raw_ref(), but acquires a lock on the reference before reading it.
This change still has two problems:
* There are redundant read_ref_full() reference lookups.
* It is still possible to get incorrect reflogs for symbolic references
if there is a concurrent update by another process, since the old_oid
of a symref is determined before the lock on the pointed-to ref is
held.
Both problems will soon be fixed.
Signed-off-by: David Turner <dturner@twopensource.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
Signed-off-by: Michael Haggerty <mhagger@alum.mit.edu>
WIP
2016-04-25 21:56:07 +08:00
|
|
|
char *old_msg = strbuf_detach(err, NULL);
|
|
|
|
|
|
|
|
strbuf_addf(err, "cannot update the ref '%s': %s",
|
|
|
|
lock->ref_name, old_msg);
|
|
|
|
free(old_msg);
|
|
|
|
unlock_ref(lock);
|
2016-09-05 00:08:43 +08:00
|
|
|
update->backend_data = NULL;
|
2015-11-09 21:34:01 +08:00
|
|
|
ret = TRANSACTION_GENERIC_ERROR;
|
|
|
|
goto cleanup;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (update->flags & REF_NEEDS_COMMIT) {
|
2016-09-05 00:08:11 +08:00
|
|
|
clear_loose_ref_cache(refs);
|
refs: resolve symbolic refs first
Before committing ref updates, split symbolic ref updates into two
parts: an update to the underlying ref, and a log-only update to the
symbolic ref. This ensures that both references are locked correctly
during the transaction, including while their reflogs are updated.
Similarly, if the reference pointed to by HEAD is modified directly, add
a separate log-only update to HEAD, rather than leaving the job of
updating HEAD's reflog to commit_ref_update(). This change ensures that
HEAD is locked correctly while its reflog is being modified, as well as
being cheaper (HEAD only needs to be resolved once).
This makes use of a new function, lock_raw_ref(), which is analogous to
read_raw_ref(), but acquires a lock on the reference before reading it.
This change still has two problems:
* There are redundant read_ref_full() reference lookups.
* It is still possible to get incorrect reflogs for symbolic references
if there is a concurrent update by another process, since the old_oid
of a symref is determined before the lock on the pointed-to ref is
held.
Both problems will soon be fixed.
Signed-off-by: David Turner <dturner@twopensource.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
Signed-off-by: Michael Haggerty <mhagger@alum.mit.edu>
WIP
2016-04-25 21:56:07 +08:00
|
|
|
if (commit_ref(lock)) {
|
|
|
|
strbuf_addf(err, "couldn't set '%s'", lock->ref_name);
|
|
|
|
unlock_ref(lock);
|
2016-09-05 00:08:43 +08:00
|
|
|
update->backend_data = NULL;
|
2015-11-09 21:34:01 +08:00
|
|
|
ret = TRANSACTION_GENERIC_ERROR;
|
|
|
|
goto cleanup;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
files_ref_store: use a transaction to update packed refs
When processing a `files_ref_store` transaction, it is sometimes
necessary to delete some references from the "packed-refs" file. Do
that using a reference transaction conducted against the
`packed_ref_store`.
This change further decouples `files_ref_store` from
`packed_ref_store`. It also fixes multiple problems, including the two
revealed by test cases added in the previous commit.
First, the old code didn't obtain the `packed-refs` lock until
`files_transaction_finish()`. This means that a failure to acquire the
`packed-refs` lock (e.g., due to contention with another process)
wasn't detected until it was too late (problems like this are supposed
to be detected in the "prepare" phase). The new code acquires the
`packed-refs` lock in `files_transaction_prepare()`, the same stage of
the processing when the loose reference locks are being acquired,
removing another reason why the "prepare" phase might succeed and the
"finish" phase might nevertheless fail.
Second, the old code deleted the loose version of a reference before
deleting any packed version of the same reference. This left a moment
when another process might think that the packed version of the
reference is current, which is incorrect. (Even worse, the packed
version of the reference can be arbitrarily old, and might even point
at an object that has since been garbage-collected.)
Third, if a reference deletion fails to acquire the `packed-refs` lock
altogether, then the old code might leave the repository in the
incorrect state (possibly corrupt) described in the previous
paragraph.
Now we activate the new "packed-refs" file (sans any references that
are being deleted) *before* deleting the corresponding loose
references. But we hold the "packed-refs" lock until after the loose
references have been finalized, thus preventing a simultaneous
"pack-refs" process from packing the loose version of the reference in
the time gap, which would otherwise defeat our attempt to delete it.
Signed-off-by: Michael Haggerty <mhagger@alum.mit.edu>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-09-08 21:51:51 +08:00
|
|
|
|
2017-09-08 21:51:53 +08:00
|
|
|
/*
|
|
|
|
* Now that updates are safely completed, we can perform
|
|
|
|
* deletes. First delete the reflogs of any references that
|
|
|
|
* will be deleted, since (in the unexpected event of an
|
|
|
|
* error) leaving a reference without a reflog is less bad
|
|
|
|
* than leaving a reflog without a reference (the latter is a
|
|
|
|
* mildly invalid repository state):
|
|
|
|
*/
|
|
|
|
for (i = 0; i < transaction->nr; i++) {
|
|
|
|
struct ref_update *update = transaction->updates[i];
|
|
|
|
if (update->flags & REF_DELETING &&
|
|
|
|
!(update->flags & REF_LOG_ONLY) &&
|
2017-11-05 16:42:07 +08:00
|
|
|
!(update->flags & REF_IS_PRUNING)) {
|
2017-09-08 21:51:53 +08:00
|
|
|
strbuf_reset(&sb);
|
|
|
|
files_reflog_path(refs, &sb, update->refname);
|
|
|
|
if (!unlink_or_warn(sb.buf))
|
|
|
|
try_remove_empty_parents(refs, update->refname,
|
|
|
|
REMOVE_EMPTY_PARENTS_REFLOG);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
files_ref_store: use a transaction to update packed refs
When processing a `files_ref_store` transaction, it is sometimes
necessary to delete some references from the "packed-refs" file. Do
that using a reference transaction conducted against the
`packed_ref_store`.
This change further decouples `files_ref_store` from
`packed_ref_store`. It also fixes multiple problems, including the two
revealed by test cases added in the previous commit.
First, the old code didn't obtain the `packed-refs` lock until
`files_transaction_finish()`. This means that a failure to acquire the
`packed-refs` lock (e.g., due to contention with another process)
wasn't detected until it was too late (problems like this are supposed
to be detected in the "prepare" phase). The new code acquires the
`packed-refs` lock in `files_transaction_prepare()`, the same stage of
the processing when the loose reference locks are being acquired,
removing another reason why the "prepare" phase might succeed and the
"finish" phase might nevertheless fail.
Second, the old code deleted the loose version of a reference before
deleting any packed version of the same reference. This left a moment
when another process might think that the packed version of the
reference is current, which is incorrect. (Even worse, the packed
version of the reference can be arbitrarily old, and might even point
at an object that has since been garbage-collected.)
Third, if a reference deletion fails to acquire the `packed-refs` lock
altogether, then the old code might leave the repository in the
incorrect state (possibly corrupt) described in the previous
paragraph.
Now we activate the new "packed-refs" file (sans any references that
are being deleted) *before* deleting the corresponding loose
references. But we hold the "packed-refs" lock until after the loose
references have been finalized, thus preventing a simultaneous
"pack-refs" process from packing the loose version of the reference in
the time gap, which would otherwise defeat our attempt to delete it.
Signed-off-by: Michael Haggerty <mhagger@alum.mit.edu>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-09-08 21:51:51 +08:00
|
|
|
/*
|
|
|
|
* Perform deletes now that updates are safely completed.
|
|
|
|
*
|
|
|
|
* First delete any packed versions of the references, while
|
|
|
|
* retaining the packed-refs lock:
|
|
|
|
*/
|
|
|
|
if (packed_transaction) {
|
|
|
|
ret = ref_transaction_commit(packed_transaction, err);
|
|
|
|
ref_transaction_free(packed_transaction);
|
|
|
|
packed_transaction = NULL;
|
|
|
|
backend_data->packed_transaction = NULL;
|
|
|
|
if (ret)
|
|
|
|
goto cleanup;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Now delete the loose versions of the references: */
|
2016-04-22 06:02:50 +08:00
|
|
|
for (i = 0; i < transaction->nr; i++) {
|
|
|
|
struct ref_update *update = transaction->updates[i];
|
2016-09-05 00:08:43 +08:00
|
|
|
struct ref_lock *lock = update->backend_data;
|
2015-11-09 21:34:01 +08:00
|
|
|
|
2016-02-25 06:58:50 +08:00
|
|
|
if (update->flags & REF_DELETING &&
|
|
|
|
!(update->flags & REF_LOG_ONLY)) {
|
2021-05-08 13:00:43 +08:00
|
|
|
update->flags |= REF_DELETED_RMDIR;
|
2017-01-07 00:22:39 +08:00
|
|
|
if (!(update->type & REF_ISPACKED) ||
|
|
|
|
update->type & REF_ISSYMREF) {
|
|
|
|
/* It is a loose reference. */
|
2017-03-26 10:42:20 +08:00
|
|
|
strbuf_reset(&sb);
|
2017-03-26 10:42:23 +08:00
|
|
|
files_ref_path(refs, &sb, lock->ref_name);
|
2017-03-26 10:42:20 +08:00
|
|
|
if (unlink_or_msg(sb.buf, err)) {
|
2017-01-07 00:22:39 +08:00
|
|
|
ret = TRANSACTION_GENERIC_ERROR;
|
|
|
|
goto cleanup;
|
|
|
|
}
|
2015-11-09 21:34:01 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-09-05 00:08:11 +08:00
|
|
|
clear_loose_ref_cache(refs);
|
2015-11-09 21:34:01 +08:00
|
|
|
|
|
|
|
cleanup:
|
files_ref_store: use a transaction to update packed refs
When processing a `files_ref_store` transaction, it is sometimes
necessary to delete some references from the "packed-refs" file. Do
that using a reference transaction conducted against the
`packed_ref_store`.
This change further decouples `files_ref_store` from
`packed_ref_store`. It also fixes multiple problems, including the two
revealed by test cases added in the previous commit.
First, the old code didn't obtain the `packed-refs` lock until
`files_transaction_finish()`. This means that a failure to acquire the
`packed-refs` lock (e.g., due to contention with another process)
wasn't detected until it was too late (problems like this are supposed
to be detected in the "prepare" phase). The new code acquires the
`packed-refs` lock in `files_transaction_prepare()`, the same stage of
the processing when the loose reference locks are being acquired,
removing another reason why the "prepare" phase might succeed and the
"finish" phase might nevertheless fail.
Second, the old code deleted the loose version of a reference before
deleting any packed version of the same reference. This left a moment
when another process might think that the packed version of the
reference is current, which is incorrect. (Even worse, the packed
version of the reference can be arbitrarily old, and might even point
at an object that has since been garbage-collected.)
Third, if a reference deletion fails to acquire the `packed-refs` lock
altogether, then the old code might leave the repository in the
incorrect state (possibly corrupt) described in the previous
paragraph.
Now we activate the new "packed-refs" file (sans any references that
are being deleted) *before* deleting the corresponding loose
references. But we hold the "packed-refs" lock until after the loose
references have been finalized, thus preventing a simultaneous
"pack-refs" process from packing the loose version of the reference in
the time gap, which would otherwise defeat our attempt to delete it.
Signed-off-by: Michael Haggerty <mhagger@alum.mit.edu>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-09-08 21:51:51 +08:00
|
|
|
files_transaction_cleanup(refs, transaction);
|
2015-11-09 21:34:01 +08:00
|
|
|
|
2017-01-07 00:22:43 +08:00
|
|
|
for (i = 0; i < transaction->nr; i++) {
|
|
|
|
struct ref_update *update = transaction->updates[i];
|
|
|
|
|
2021-05-08 13:00:43 +08:00
|
|
|
if (update->flags & REF_DELETED_RMDIR) {
|
2017-01-07 00:22:43 +08:00
|
|
|
/*
|
2021-05-08 13:00:43 +08:00
|
|
|
* The reference was deleted. Delete any
|
2017-01-07 00:22:43 +08:00
|
|
|
* empty parent directories. (Note that this
|
|
|
|
* can only work because we have already
|
|
|
|
* removed the lockfile.)
|
|
|
|
*/
|
2017-03-26 10:42:22 +08:00
|
|
|
try_remove_empty_parents(refs, update->refname,
|
2017-01-07 00:22:43 +08:00
|
|
|
REMOVE_EMPTY_PARENTS_REF);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
ref_transaction_prepare(): new optional step for reference updates
In the future, compound reference stores will sometimes need to modify
references in two different reference stores at the same time, meaning
that a single logical reference transaction might have to be
implemented as two internal sub-transactions. They won't want to call
`ref_transaction_commit()` for the two sub-transactions one after the
other, because that wouldn't be atomic (the first commit could succeed
and the second one fail). Instead, they will want to prepare both
sub-transactions (i.e., obtain any necessary locks and do any
pre-checks), and only if both prepare steps succeed, then commit both
sub-transactions.
Start preparing for that day by adding a new, optional
`ref_transaction_prepare()` step to the reference transaction
sequence, which obtains the locks and does any prechecks, reporting
any errors that occur. Also add a `ref_transaction_abort()` function
that can be used to abort a sub-transaction even if it has already
been prepared.
That is on the side of the public-facing API. On the side of the
`ref_store` VTABLE, get rid of `transaction_commit` and instead add
methods `transaction_prepare`, `transaction_finish`, and
`transaction_abort`. A `ref_transaction_commit()` now basically calls
methods `transaction_prepare` then `transaction_finish`.
Signed-off-by: Michael Haggerty <mhagger@alum.mit.edu>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-05-22 22:17:44 +08:00
|
|
|
strbuf_release(&sb);
|
2015-11-09 21:34:01 +08:00
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
ref_transaction_prepare(): new optional step for reference updates
In the future, compound reference stores will sometimes need to modify
references in two different reference stores at the same time, meaning
that a single logical reference transaction might have to be
implemented as two internal sub-transactions. They won't want to call
`ref_transaction_commit()` for the two sub-transactions one after the
other, because that wouldn't be atomic (the first commit could succeed
and the second one fail). Instead, they will want to prepare both
sub-transactions (i.e., obtain any necessary locks and do any
pre-checks), and only if both prepare steps succeed, then commit both
sub-transactions.
Start preparing for that day by adding a new, optional
`ref_transaction_prepare()` step to the reference transaction
sequence, which obtains the locks and does any prechecks, reporting
any errors that occur. Also add a `ref_transaction_abort()` function
that can be used to abort a sub-transaction even if it has already
been prepared.
That is on the side of the public-facing API. On the side of the
`ref_store` VTABLE, get rid of `transaction_commit` and instead add
methods `transaction_prepare`, `transaction_finish`, and
`transaction_abort`. A `ref_transaction_commit()` now basically calls
methods `transaction_prepare` then `transaction_finish`.
Signed-off-by: Michael Haggerty <mhagger@alum.mit.edu>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-05-22 22:17:44 +08:00
|
|
|
static int files_transaction_abort(struct ref_store *ref_store,
|
|
|
|
struct ref_transaction *transaction,
|
2022-08-26 01:09:48 +08:00
|
|
|
struct strbuf *err UNUSED)
|
ref_transaction_prepare(): new optional step for reference updates
In the future, compound reference stores will sometimes need to modify
references in two different reference stores at the same time, meaning
that a single logical reference transaction might have to be
implemented as two internal sub-transactions. They won't want to call
`ref_transaction_commit()` for the two sub-transactions one after the
other, because that wouldn't be atomic (the first commit could succeed
and the second one fail). Instead, they will want to prepare both
sub-transactions (i.e., obtain any necessary locks and do any
pre-checks), and only if both prepare steps succeed, then commit both
sub-transactions.
Start preparing for that day by adding a new, optional
`ref_transaction_prepare()` step to the reference transaction
sequence, which obtains the locks and does any prechecks, reporting
any errors that occur. Also add a `ref_transaction_abort()` function
that can be used to abort a sub-transaction even if it has already
been prepared.
That is on the side of the public-facing API. On the side of the
`ref_store` VTABLE, get rid of `transaction_commit` and instead add
methods `transaction_prepare`, `transaction_finish`, and
`transaction_abort`. A `ref_transaction_commit()` now basically calls
methods `transaction_prepare` then `transaction_finish`.
Signed-off-by: Michael Haggerty <mhagger@alum.mit.edu>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-05-22 22:17:44 +08:00
|
|
|
{
|
files_ref_store: use a transaction to update packed refs
When processing a `files_ref_store` transaction, it is sometimes
necessary to delete some references from the "packed-refs" file. Do
that using a reference transaction conducted against the
`packed_ref_store`.
This change further decouples `files_ref_store` from
`packed_ref_store`. It also fixes multiple problems, including the two
revealed by test cases added in the previous commit.
First, the old code didn't obtain the `packed-refs` lock until
`files_transaction_finish()`. This means that a failure to acquire the
`packed-refs` lock (e.g., due to contention with another process)
wasn't detected until it was too late (problems like this are supposed
to be detected in the "prepare" phase). The new code acquires the
`packed-refs` lock in `files_transaction_prepare()`, the same stage of
the processing when the loose reference locks are being acquired,
removing another reason why the "prepare" phase might succeed and the
"finish" phase might nevertheless fail.
Second, the old code deleted the loose version of a reference before
deleting any packed version of the same reference. This left a moment
when another process might think that the packed version of the
reference is current, which is incorrect. (Even worse, the packed
version of the reference can be arbitrarily old, and might even point
at an object that has since been garbage-collected.)
Third, if a reference deletion fails to acquire the `packed-refs` lock
altogether, then the old code might leave the repository in the
incorrect state (possibly corrupt) described in the previous
paragraph.
Now we activate the new "packed-refs" file (sans any references that
are being deleted) *before* deleting the corresponding loose
references. But we hold the "packed-refs" lock until after the loose
references have been finalized, thus preventing a simultaneous
"pack-refs" process from packing the loose version of the reference in
the time gap, which would otherwise defeat our attempt to delete it.
Signed-off-by: Michael Haggerty <mhagger@alum.mit.edu>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-09-08 21:51:51 +08:00
|
|
|
struct files_ref_store *refs =
|
|
|
|
files_downcast(ref_store, 0, "ref_transaction_abort");
|
|
|
|
|
|
|
|
files_transaction_cleanup(refs, transaction);
|
ref_transaction_prepare(): new optional step for reference updates
In the future, compound reference stores will sometimes need to modify
references in two different reference stores at the same time, meaning
that a single logical reference transaction might have to be
implemented as two internal sub-transactions. They won't want to call
`ref_transaction_commit()` for the two sub-transactions one after the
other, because that wouldn't be atomic (the first commit could succeed
and the second one fail). Instead, they will want to prepare both
sub-transactions (i.e., obtain any necessary locks and do any
pre-checks), and only if both prepare steps succeed, then commit both
sub-transactions.
Start preparing for that day by adding a new, optional
`ref_transaction_prepare()` step to the reference transaction
sequence, which obtains the locks and does any prechecks, reporting
any errors that occur. Also add a `ref_transaction_abort()` function
that can be used to abort a sub-transaction even if it has already
been prepared.
That is on the side of the public-facing API. On the side of the
`ref_store` VTABLE, get rid of `transaction_commit` and instead add
methods `transaction_prepare`, `transaction_finish`, and
`transaction_abort`. A `ref_transaction_commit()` now basically calls
methods `transaction_prepare` then `transaction_finish`.
Signed-off-by: Michael Haggerty <mhagger@alum.mit.edu>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-05-22 22:17:44 +08:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2015-11-09 21:34:01 +08:00
|
|
|
static int ref_present(const char *refname,
|
2022-08-26 01:09:48 +08:00
|
|
|
const struct object_id *oid UNUSED,
|
|
|
|
int flags UNUSED,
|
2022-08-19 18:08:32 +08:00
|
|
|
void *cb_data)
|
2015-11-09 21:34:01 +08:00
|
|
|
{
|
|
|
|
struct string_list *affected_refnames = cb_data;
|
|
|
|
|
|
|
|
return string_list_has_string(affected_refnames, refname);
|
|
|
|
}
|
|
|
|
|
2016-09-05 00:08:39 +08:00
|
|
|
static int files_initial_transaction_commit(struct ref_store *ref_store,
|
|
|
|
struct ref_transaction *transaction,
|
|
|
|
struct strbuf *err)
|
2015-11-09 21:34:01 +08:00
|
|
|
{
|
2016-09-05 00:08:12 +08:00
|
|
|
struct files_ref_store *refs =
|
2017-03-26 10:42:32 +08:00
|
|
|
files_downcast(ref_store, REF_STORE_WRITE,
|
|
|
|
"initial_ref_transaction_commit");
|
2017-05-22 22:17:37 +08:00
|
|
|
size_t i;
|
|
|
|
int ret = 0;
|
2015-11-09 21:34:01 +08:00
|
|
|
struct string_list affected_refnames = STRING_LIST_INIT_NODUP;
|
2017-09-08 21:51:49 +08:00
|
|
|
struct ref_transaction *packed_transaction = NULL;
|
2015-11-09 21:34:01 +08:00
|
|
|
|
|
|
|
assert(err);
|
|
|
|
|
|
|
|
if (transaction->state != REF_TRANSACTION_OPEN)
|
2018-05-02 17:38:39 +08:00
|
|
|
BUG("commit called for transaction that is not open");
|
2015-11-09 21:34:01 +08:00
|
|
|
|
|
|
|
/* Fail if a refname appears more than once in the transaction: */
|
2016-04-22 06:02:50 +08:00
|
|
|
for (i = 0; i < transaction->nr; i++)
|
|
|
|
string_list_append(&affected_refnames,
|
|
|
|
transaction->updates[i]->refname);
|
2015-11-09 21:34:01 +08:00
|
|
|
string_list_sort(&affected_refnames);
|
|
|
|
if (ref_update_reject_duplicates(&affected_refnames, err)) {
|
|
|
|
ret = TRANSACTION_GENERIC_ERROR;
|
|
|
|
goto cleanup;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* It's really undefined to call this function in an active
|
|
|
|
* repository or when there are existing references: we are
|
|
|
|
* only locking and changing packed-refs, so (1) any
|
|
|
|
* simultaneous processes might try to change a reference at
|
|
|
|
* the same time we do, and (2) any existing loose versions of
|
|
|
|
* the references that we are setting would have precedence
|
|
|
|
* over our values. But some remote helpers create the remote
|
|
|
|
* "HEAD" and "master" branches before calling this function,
|
|
|
|
* so here we really only check that none of the references
|
|
|
|
* that we are creating already exists.
|
|
|
|
*/
|
2017-03-26 10:42:36 +08:00
|
|
|
if (refs_for_each_rawref(&refs->base, ref_present,
|
|
|
|
&affected_refnames))
|
2018-05-02 17:38:39 +08:00
|
|
|
BUG("initial ref transaction called with existing refs");
|
2015-11-09 21:34:01 +08:00
|
|
|
|
2022-04-14 06:51:33 +08:00
|
|
|
packed_transaction = ref_store_transaction_begin(refs->packed_ref_store, err);
|
2017-09-08 21:51:49 +08:00
|
|
|
if (!packed_transaction) {
|
|
|
|
ret = TRANSACTION_GENERIC_ERROR;
|
|
|
|
goto cleanup;
|
|
|
|
}
|
|
|
|
|
2016-04-22 06:02:50 +08:00
|
|
|
for (i = 0; i < transaction->nr; i++) {
|
|
|
|
struct ref_update *update = transaction->updates[i];
|
2015-11-09 21:34:01 +08:00
|
|
|
|
|
|
|
if ((update->flags & REF_HAVE_OLD) &&
|
2017-05-07 06:10:23 +08:00
|
|
|
!is_null_oid(&update->old_oid))
|
2018-05-02 17:38:39 +08:00
|
|
|
BUG("initial ref transaction with old_sha1 set");
|
2017-03-26 10:42:34 +08:00
|
|
|
if (refs_verify_refname_available(&refs->base, update->refname,
|
|
|
|
&affected_refnames, NULL,
|
|
|
|
err)) {
|
2015-11-09 21:34:01 +08:00
|
|
|
ret = TRANSACTION_NAME_CONFLICT;
|
|
|
|
goto cleanup;
|
|
|
|
}
|
2017-09-08 21:51:49 +08:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Add a reference creation for this reference to the
|
|
|
|
* packed-refs transaction:
|
|
|
|
*/
|
|
|
|
ref_transaction_add_update(packed_transaction, update->refname,
|
|
|
|
update->flags & ~REF_HAVE_OLD,
|
2017-10-16 06:06:53 +08:00
|
|
|
&update->new_oid, &update->old_oid,
|
2017-09-08 21:51:49 +08:00
|
|
|
NULL);
|
2015-11-09 21:34:01 +08:00
|
|
|
}
|
|
|
|
|
2017-06-23 15:01:42 +08:00
|
|
|
if (packed_refs_lock(refs->packed_ref_store, 0, err)) {
|
2015-11-09 21:34:01 +08:00
|
|
|
ret = TRANSACTION_GENERIC_ERROR;
|
|
|
|
goto cleanup;
|
|
|
|
}
|
|
|
|
|
2017-09-08 21:51:49 +08:00
|
|
|
if (initial_ref_transaction_commit(packed_transaction, err)) {
|
2015-11-09 21:34:01 +08:00
|
|
|
ret = TRANSACTION_GENERIC_ERROR;
|
|
|
|
}
|
|
|
|
|
2018-01-18 21:38:41 +08:00
|
|
|
packed_refs_unlock(refs->packed_ref_store);
|
2015-11-09 21:34:01 +08:00
|
|
|
cleanup:
|
2017-09-08 21:51:49 +08:00
|
|
|
if (packed_transaction)
|
|
|
|
ref_transaction_free(packed_transaction);
|
2015-11-09 21:34:01 +08:00
|
|
|
transaction->state = REF_TRANSACTION_CLOSED;
|
|
|
|
string_list_clear(&affected_refnames, 0);
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
struct expire_reflog_cb {
|
|
|
|
reflog_expiry_should_prune_fn *should_prune_fn;
|
|
|
|
void *policy_cb;
|
|
|
|
FILE *newlog;
|
2017-02-22 07:47:32 +08:00
|
|
|
struct object_id last_kept_oid;
|
2021-12-22 12:06:48 +08:00
|
|
|
unsigned int rewrite:1,
|
|
|
|
dry_run:1;
|
2015-11-09 21:34:01 +08:00
|
|
|
};
|
|
|
|
|
2017-02-22 07:47:32 +08:00
|
|
|
static int expire_reflog_ent(struct object_id *ooid, struct object_id *noid,
|
2017-04-27 03:29:31 +08:00
|
|
|
const char *email, timestamp_t timestamp, int tz,
|
2015-11-09 21:34:01 +08:00
|
|
|
const char *message, void *cb_data)
|
|
|
|
{
|
|
|
|
struct expire_reflog_cb *cb = cb_data;
|
2021-12-22 12:06:48 +08:00
|
|
|
reflog_expiry_should_prune_fn *fn = cb->should_prune_fn;
|
2015-11-09 21:34:01 +08:00
|
|
|
|
2021-12-22 12:06:48 +08:00
|
|
|
if (cb->rewrite)
|
2017-02-22 07:47:32 +08:00
|
|
|
ooid = &cb->last_kept_oid;
|
2015-11-09 21:34:01 +08:00
|
|
|
|
2021-12-22 12:06:48 +08:00
|
|
|
if (fn(ooid, noid, email, timestamp, tz, message, cb->policy_cb))
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
if (cb->dry_run)
|
|
|
|
return 0; /* --dry-run */
|
|
|
|
|
|
|
|
fprintf(cb->newlog, "%s %s %s %"PRItime" %+05d\t%s", oid_to_hex(ooid),
|
|
|
|
oid_to_hex(noid), email, timestamp, tz, message);
|
|
|
|
oidcpy(&cb->last_kept_oid, noid);
|
|
|
|
|
2015-11-09 21:34:01 +08:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2016-09-05 00:08:38 +08:00
|
|
|
static int files_reflog_expire(struct ref_store *ref_store,
|
2021-08-23 19:36:11 +08:00
|
|
|
const char *refname,
|
2021-12-22 12:06:48 +08:00
|
|
|
unsigned int expire_flags,
|
2016-09-05 00:08:38 +08:00
|
|
|
reflog_expiry_prepare_fn prepare_fn,
|
|
|
|
reflog_expiry_should_prune_fn should_prune_fn,
|
|
|
|
reflog_expiry_cleanup_fn cleanup_fn,
|
|
|
|
void *policy_cb_data)
|
2015-11-09 21:34:01 +08:00
|
|
|
{
|
2016-09-05 00:08:34 +08:00
|
|
|
struct files_ref_store *refs =
|
2017-03-26 10:42:32 +08:00
|
|
|
files_downcast(ref_store, REF_STORE_WRITE, "reflog_expire");
|
2018-05-10 04:55:38 +08:00
|
|
|
struct lock_file reflog_lock = LOCK_INIT;
|
2015-11-09 21:34:01 +08:00
|
|
|
struct expire_reflog_cb cb;
|
|
|
|
struct ref_lock *lock;
|
2017-03-26 10:42:22 +08:00
|
|
|
struct strbuf log_file_sb = STRBUF_INIT;
|
2015-11-09 21:34:01 +08:00
|
|
|
char *log_file;
|
|
|
|
int status = 0;
|
|
|
|
struct strbuf err = STRBUF_INIT;
|
reflog expire: don't lock reflogs using previously seen OID
During reflog expiry, the cmd_reflog_expire() function first iterates
over all reflogs in logs/*, and then one-by-one acquires the lock for
each one and expires it. This behavior has been with us since this
command was implemented in 4264dc15e1 ("git reflog expire",
2006-12-19).
Change this to stop calling lock_ref_oid_basic() with the OID we saw
when we looped over the logs, instead have it pass the OID it managed
to lock.
This mostly mitigates a race condition where e.g. "git gc" will fail
in a concurrently updated repository because the branch moved since
"git reflog expire --all" was started. I.e. with:
error: cannot lock ref '<refname>': ref '<refname>' is at <OID-A> but expected <OID-B>
This behavior of passing in an "oid" was needed for an edge-case that
I've untangled in this and preceding commits though, namely that we
needed this OID because we'd:
1. Lookup the reflog name/OID via dwim_log()
2. With that OID, lock the reflog
3. Later in builtin/reflog.c we use the OID we looked as input to
lookup_commit_reference_gently(), assured that it's equal to the
OID we got from dwim_log().
We can be sure that this change is safe to make because between
dwim_log (step #1) and lock_ref_oid_basic (step #2) there was no other
logic relevant to the OID or expiry run in the cmd_reflog_expire()
caller.
We can thus treat that code as a black box, before and after this
change it would get an OID that's been locked, the only difference is
that now we mostly won't be failing to get the lock due to the TOCTOU
race[0]. That failure was purely an implementation detail in how the
"current OID" was looked up, it was divorced from the locking
mechanism.
What do we mean with "mostly"? It mostly mitigates it because we'll
still run into cases where the ref is locked and being updated as we
want to expire it, and other git processes wanting to update the refs
will in turn race with us as we expire the reflog.
That remaining race can in turn be mitigated with the
core.filesRefLockTimeout setting, see 4ff0f01cb7 ("refs: retry
acquiring reference locks for 100ms", 2017-08-21). In practice if that
value is high enough we'll probably never have ref updates or reflog
expiry failing, since the clients involved will retry for far longer
than the time any of those operations could take.
See [1] for an initial report of how this impacted "git gc" and a
large discussion about this change in early 2019. In particular patch
looked good to Michael Haggerty, see his[2]. That message seems to not
have made it to the ML archive, its content is quoted in full in my
[3].
I'm leaving behind now-unused code the refs API etc. that takes the
now-NULL "unused_oid" argument, and other code that can be simplified now
that we never have on OID in that context, that'll be cleaned up in
subsequent commits, but for now let's narrowly focus on fixing the
"git gc" issue. As the modified assert() shows we always pass a NULL
oid to reflog_expire() now.
Unfortunately this sort of probabilistic contention is hard to turn
into a test. I've tested this by running the following three subshells
in concurrent terminals:
(
rm -rf /tmp/git &&
git init /tmp/git &&
while true
do
head -c 10 /dev/urandom | hexdump >/tmp/git/out &&
git -C /tmp/git add out &&
git -C /tmp/git commit -m"out"
done
)
(
rm -rf /tmp/git-clone &&
git clone file:///tmp/git /tmp/git-clone &&
while git -C /tmp/git-clone pull
do
date
done
)
(
while git -C /tmp/git-clone reflog expire --all
do
date
done
)
Before this change the "reflog expire" would fail really quickly with
the "but expected" error noted above.
After this change both the "pull" and "reflog expire" will run for a
while, but eventually fail because I get unlucky with
core.filesRefLockTimeout (the "reflog expire" is in a really tight
loop). As noted above that can in turn be mitigated with higher values
of core.filesRefLockTimeout than the 100ms default.
As noted in the commentary added in the preceding commit there's also
the case of branches being racily deleted, that can be tested by
adding this to the above:
(
while git -C /tmp/git-clone branch topic master &&
git -C /tmp/git-clone branch -D topic
do
date
done
)
With core.filesRefLockTimeout set to 10 seconds (it can probably be a
lot lower) I managed to run all four of these concurrently for about
an hour, and accumulated ~125k commits, auto-gc's and all, and didn't
have a single failure. The loops visibly stall while waiting for the
lock, but that's expected and desired behavior.
0. https://en.wikipedia.org/wiki/Time-of-check_to_time-of-use
1. https://lore.kernel.org/git/87tvg7brlm.fsf@evledraar.gmail.com/
2. http://lore.kernel.org/git/b870a17d-2103-41b8-3cbc-7389d5fff33a@alum.mit.edu
3. https://lore.kernel.org/git/87pnqkco8v.fsf@evledraar.gmail.com/
Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2021-08-23 19:36:10 +08:00
|
|
|
const struct object_id *oid;
|
2015-11-09 21:34:01 +08:00
|
|
|
|
|
|
|
memset(&cb, 0, sizeof(cb));
|
2021-12-22 12:06:48 +08:00
|
|
|
cb.rewrite = !!(expire_flags & EXPIRE_REFLOGS_REWRITE);
|
|
|
|
cb.dry_run = !!(expire_flags & EXPIRE_REFLOGS_DRY_RUN);
|
2015-11-09 21:34:01 +08:00
|
|
|
cb.policy_cb = policy_cb_data;
|
|
|
|
cb.should_prune_fn = should_prune_fn;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* The reflog file is locked by holding the lock on the
|
|
|
|
* reference itself, plus we might need to update the
|
|
|
|
* reference if --updateref was specified:
|
|
|
|
*/
|
refs/files: remove "name exist?" check in lock_ref_oid_basic()
In lock_ref_oid_basic() we'll happily lock a reference that doesn't
exist yet. That's normal, and is how references are initially born,
but we don't need to retain checks here in lock_ref_oid_basic() about
the state of the ref, when what we're checking is either checked
already, or something we're about to discover by trying to lock the
ref with raceproof_create_file().
The one exception is the caller in files_reflog_expire(), who passes
us a "type" to find out if the reference is a symref or not. We can
move the that logic over to that caller, which can now defer its
discovery of whether or not the ref is a symref until it's needed. In
the preceding commit an exhaustive regression test was added for that
case in a new test in "t1417-reflog-updateref.sh".
The improved diagnostics here were added in
5b2d8d6f218 (lock_ref_sha1_basic(): improve diagnostics for ref D/F
conflicts, 2015-05-11), and then much of the surrounding code went
away recently in my 245fbba46d6 (refs/files: remove unused "errno ==
EISDIR" code, 2021-08-23).
The refs_resolve_ref_unsafe() code being removed here looks like it
should be tasked with doing that, but it's actually redundant to other
code.
The reason for that is as noted in 245fbba46d6 this once widely used
function now only has a handful of callers left, which all handle this
case themselves.
To the extent that we're racy between their check and ours removing
this check actually improves the situation, as we'll be doing fewer
things between the not-under-lock initial check and acquiring the
lock.
Why this is OK for all the remaining callers of lock_ref_oid_basic()
is noted below. There are only two of those callers:
* "git branch -[cm] <oldbranch> <newbranch>":
In files_copy_or_rename_ref() we'll call this when we copy or rename
refs via rename_ref() and copy_ref(). but only after we've checked
if the refname exists already via its own call to
refs_resolve_ref_unsafe() and refs_rename_ref_available().
As the updated comment to the latter here notes neither of those are
actually needed. If we delete not only this code but also
refs_rename_ref_available() we'll do just fine, we'll just emit a
less friendly error message if e.g. "git branch -m A B/C" would have
a D/F conflict with a "B" file.
Actually we'd probably die before that in case reflogs for the
branch existed, i.e. when the try to rename() or copy_file() the
relevant reflog, since if we've got a D/F conflict with a branch
name we'll probably also have the same with its reflogs (but not
necessarily, we might have reflogs, but it might not).
As some #leftoverbits that code seems buggy to me, i.e. the reflog
"protocol" should be to get a lock on the main ref, and then perform
ref and/or reflog operations. That code dates back to
c976d415e53 (git-branch: add options and tests for branch renaming,
2006-11-28) and probably pre-dated the solidifying of that
convention. But in any case, that edge case is not our bug or
problem right now.
* "git reflog expire <ref>":
In files_reflog_expire() we'll call this without previous ref
existence checking in files-backend.c, but that code is in turn
called by code that's just finished checking if the refname whose
reflog we're expiring exists.
See ae35e16cd43 (reflog expire: don't lock reflogs using previously
seen OID, 2021-08-23) for the current state of that code, and
5e6f003ca8a (reflog_expire(): ignore --updateref for symbolic
references, 2015-03-03) for the code we'd break if we only did a
"update = !!ref" here, which is covered by the aforementioned
regression test in "t1417-reflog-updateref.sh".
Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2021-10-16 17:39:13 +08:00
|
|
|
lock = lock_ref_oid_basic(refs, refname, &err);
|
2015-11-09 21:34:01 +08:00
|
|
|
if (!lock) {
|
|
|
|
error("cannot lock ref '%s': %s", refname, err.buf);
|
|
|
|
strbuf_release(&err);
|
|
|
|
return -1;
|
|
|
|
}
|
reflog expire: don't lock reflogs using previously seen OID
During reflog expiry, the cmd_reflog_expire() function first iterates
over all reflogs in logs/*, and then one-by-one acquires the lock for
each one and expires it. This behavior has been with us since this
command was implemented in 4264dc15e1 ("git reflog expire",
2006-12-19).
Change this to stop calling lock_ref_oid_basic() with the OID we saw
when we looped over the logs, instead have it pass the OID it managed
to lock.
This mostly mitigates a race condition where e.g. "git gc" will fail
in a concurrently updated repository because the branch moved since
"git reflog expire --all" was started. I.e. with:
error: cannot lock ref '<refname>': ref '<refname>' is at <OID-A> but expected <OID-B>
This behavior of passing in an "oid" was needed for an edge-case that
I've untangled in this and preceding commits though, namely that we
needed this OID because we'd:
1. Lookup the reflog name/OID via dwim_log()
2. With that OID, lock the reflog
3. Later in builtin/reflog.c we use the OID we looked as input to
lookup_commit_reference_gently(), assured that it's equal to the
OID we got from dwim_log().
We can be sure that this change is safe to make because between
dwim_log (step #1) and lock_ref_oid_basic (step #2) there was no other
logic relevant to the OID or expiry run in the cmd_reflog_expire()
caller.
We can thus treat that code as a black box, before and after this
change it would get an OID that's been locked, the only difference is
that now we mostly won't be failing to get the lock due to the TOCTOU
race[0]. That failure was purely an implementation detail in how the
"current OID" was looked up, it was divorced from the locking
mechanism.
What do we mean with "mostly"? It mostly mitigates it because we'll
still run into cases where the ref is locked and being updated as we
want to expire it, and other git processes wanting to update the refs
will in turn race with us as we expire the reflog.
That remaining race can in turn be mitigated with the
core.filesRefLockTimeout setting, see 4ff0f01cb7 ("refs: retry
acquiring reference locks for 100ms", 2017-08-21). In practice if that
value is high enough we'll probably never have ref updates or reflog
expiry failing, since the clients involved will retry for far longer
than the time any of those operations could take.
See [1] for an initial report of how this impacted "git gc" and a
large discussion about this change in early 2019. In particular patch
looked good to Michael Haggerty, see his[2]. That message seems to not
have made it to the ML archive, its content is quoted in full in my
[3].
I'm leaving behind now-unused code the refs API etc. that takes the
now-NULL "unused_oid" argument, and other code that can be simplified now
that we never have on OID in that context, that'll be cleaned up in
subsequent commits, but for now let's narrowly focus on fixing the
"git gc" issue. As the modified assert() shows we always pass a NULL
oid to reflog_expire() now.
Unfortunately this sort of probabilistic contention is hard to turn
into a test. I've tested this by running the following three subshells
in concurrent terminals:
(
rm -rf /tmp/git &&
git init /tmp/git &&
while true
do
head -c 10 /dev/urandom | hexdump >/tmp/git/out &&
git -C /tmp/git add out &&
git -C /tmp/git commit -m"out"
done
)
(
rm -rf /tmp/git-clone &&
git clone file:///tmp/git /tmp/git-clone &&
while git -C /tmp/git-clone pull
do
date
done
)
(
while git -C /tmp/git-clone reflog expire --all
do
date
done
)
Before this change the "reflog expire" would fail really quickly with
the "but expected" error noted above.
After this change both the "pull" and "reflog expire" will run for a
while, but eventually fail because I get unlucky with
core.filesRefLockTimeout (the "reflog expire" is in a really tight
loop). As noted above that can in turn be mitigated with higher values
of core.filesRefLockTimeout than the 100ms default.
As noted in the commentary added in the preceding commit there's also
the case of branches being racily deleted, that can be tested by
adding this to the above:
(
while git -C /tmp/git-clone branch topic master &&
git -C /tmp/git-clone branch -D topic
do
date
done
)
With core.filesRefLockTimeout set to 10 seconds (it can probably be a
lot lower) I managed to run all four of these concurrently for about
an hour, and accumulated ~125k commits, auto-gc's and all, and didn't
have a single failure. The loops visibly stall while waiting for the
lock, but that's expected and desired behavior.
0. https://en.wikipedia.org/wiki/Time-of-check_to_time-of-use
1. https://lore.kernel.org/git/87tvg7brlm.fsf@evledraar.gmail.com/
2. http://lore.kernel.org/git/b870a17d-2103-41b8-3cbc-7389d5fff33a@alum.mit.edu
3. https://lore.kernel.org/git/87pnqkco8v.fsf@evledraar.gmail.com/
Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2021-08-23 19:36:10 +08:00
|
|
|
oid = &lock->old_oid;
|
2021-08-23 19:36:09 +08:00
|
|
|
|
|
|
|
/*
|
|
|
|
* When refs are deleted, their reflog is deleted before the
|
|
|
|
* ref itself is deleted. This is because there is no separate
|
|
|
|
* lock for reflog; instead we take a lock on the ref with
|
|
|
|
* lock_ref_oid_basic().
|
|
|
|
*
|
|
|
|
* If a race happens and the reflog doesn't exist after we've
|
|
|
|
* acquired the lock that's OK. We've got nothing more to do;
|
|
|
|
* We were asked to delete the reflog, but someone else
|
|
|
|
* deleted it! The caller doesn't care that we deleted it,
|
|
|
|
* just that it is deleted. So we can return successfully.
|
|
|
|
*/
|
2017-03-26 10:42:36 +08:00
|
|
|
if (!refs_reflog_exists(ref_store, refname)) {
|
2015-11-09 21:34:01 +08:00
|
|
|
unlock_ref(lock);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2017-03-26 10:42:22 +08:00
|
|
|
files_reflog_path(refs, &log_file_sb, refname);
|
|
|
|
log_file = strbuf_detach(&log_file_sb, NULL);
|
2021-12-22 12:06:48 +08:00
|
|
|
if (!cb.dry_run) {
|
2015-11-09 21:34:01 +08:00
|
|
|
/*
|
|
|
|
* Even though holding $GIT_DIR/logs/$reflog.lock has
|
|
|
|
* no locking implications, we use the lock_file
|
|
|
|
* machinery here anyway because it does a lot of the
|
|
|
|
* work we need, including cleaning up if the program
|
|
|
|
* exits unexpectedly.
|
|
|
|
*/
|
|
|
|
if (hold_lock_file_for_update(&reflog_lock, log_file, 0) < 0) {
|
|
|
|
struct strbuf err = STRBUF_INIT;
|
|
|
|
unable_to_lock_message(log_file, errno, &err);
|
|
|
|
error("%s", err.buf);
|
|
|
|
strbuf_release(&err);
|
|
|
|
goto failure;
|
|
|
|
}
|
|
|
|
cb.newlog = fdopen_lock_file(&reflog_lock, "w");
|
|
|
|
if (!cb.newlog) {
|
|
|
|
error("cannot fdopen %s (%s)",
|
|
|
|
get_lock_file_path(&reflog_lock), strerror(errno));
|
|
|
|
goto failure;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-10-16 06:07:04 +08:00
|
|
|
(*prepare_fn)(refname, oid, cb.policy_cb);
|
2017-03-26 10:42:36 +08:00
|
|
|
refs_for_each_reflog_ent(ref_store, refname, expire_reflog_ent, &cb);
|
2015-11-09 21:34:01 +08:00
|
|
|
(*cleanup_fn)(cb.policy_cb);
|
|
|
|
|
2021-12-22 12:06:48 +08:00
|
|
|
if (!cb.dry_run) {
|
2015-11-09 21:34:01 +08:00
|
|
|
/*
|
|
|
|
* It doesn't make sense to adjust a reference pointed
|
|
|
|
* to by a symbolic ref based on expiring entries in
|
|
|
|
* the symbolic reference's reflog. Nor can we update
|
|
|
|
* a reference if there are no remaining reflog
|
|
|
|
* entries.
|
|
|
|
*/
|
refs/files: remove "name exist?" check in lock_ref_oid_basic()
In lock_ref_oid_basic() we'll happily lock a reference that doesn't
exist yet. That's normal, and is how references are initially born,
but we don't need to retain checks here in lock_ref_oid_basic() about
the state of the ref, when what we're checking is either checked
already, or something we're about to discover by trying to lock the
ref with raceproof_create_file().
The one exception is the caller in files_reflog_expire(), who passes
us a "type" to find out if the reference is a symref or not. We can
move the that logic over to that caller, which can now defer its
discovery of whether or not the ref is a symref until it's needed. In
the preceding commit an exhaustive regression test was added for that
case in a new test in "t1417-reflog-updateref.sh".
The improved diagnostics here were added in
5b2d8d6f218 (lock_ref_sha1_basic(): improve diagnostics for ref D/F
conflicts, 2015-05-11), and then much of the surrounding code went
away recently in my 245fbba46d6 (refs/files: remove unused "errno ==
EISDIR" code, 2021-08-23).
The refs_resolve_ref_unsafe() code being removed here looks like it
should be tasked with doing that, but it's actually redundant to other
code.
The reason for that is as noted in 245fbba46d6 this once widely used
function now only has a handful of callers left, which all handle this
case themselves.
To the extent that we're racy between their check and ours removing
this check actually improves the situation, as we'll be doing fewer
things between the not-under-lock initial check and acquiring the
lock.
Why this is OK for all the remaining callers of lock_ref_oid_basic()
is noted below. There are only two of those callers:
* "git branch -[cm] <oldbranch> <newbranch>":
In files_copy_or_rename_ref() we'll call this when we copy or rename
refs via rename_ref() and copy_ref(). but only after we've checked
if the refname exists already via its own call to
refs_resolve_ref_unsafe() and refs_rename_ref_available().
As the updated comment to the latter here notes neither of those are
actually needed. If we delete not only this code but also
refs_rename_ref_available() we'll do just fine, we'll just emit a
less friendly error message if e.g. "git branch -m A B/C" would have
a D/F conflict with a "B" file.
Actually we'd probably die before that in case reflogs for the
branch existed, i.e. when the try to rename() or copy_file() the
relevant reflog, since if we've got a D/F conflict with a branch
name we'll probably also have the same with its reflogs (but not
necessarily, we might have reflogs, but it might not).
As some #leftoverbits that code seems buggy to me, i.e. the reflog
"protocol" should be to get a lock on the main ref, and then perform
ref and/or reflog operations. That code dates back to
c976d415e53 (git-branch: add options and tests for branch renaming,
2006-11-28) and probably pre-dated the solidifying of that
convention. But in any case, that edge case is not our bug or
problem right now.
* "git reflog expire <ref>":
In files_reflog_expire() we'll call this without previous ref
existence checking in files-backend.c, but that code is in turn
called by code that's just finished checking if the refname whose
reflog we're expiring exists.
See ae35e16cd43 (reflog expire: don't lock reflogs using previously
seen OID, 2021-08-23) for the current state of that code, and
5e6f003ca8a (reflog_expire(): ignore --updateref for symbolic
references, 2015-03-03) for the code we'd break if we only did a
"update = !!ref" here, which is covered by the aforementioned
regression test in "t1417-reflog-updateref.sh".
Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2021-10-16 17:39:13 +08:00
|
|
|
int update = 0;
|
|
|
|
|
2021-12-22 12:06:48 +08:00
|
|
|
if ((expire_flags & EXPIRE_REFLOGS_UPDATE_REF) &&
|
refs/files: remove "name exist?" check in lock_ref_oid_basic()
In lock_ref_oid_basic() we'll happily lock a reference that doesn't
exist yet. That's normal, and is how references are initially born,
but we don't need to retain checks here in lock_ref_oid_basic() about
the state of the ref, when what we're checking is either checked
already, or something we're about to discover by trying to lock the
ref with raceproof_create_file().
The one exception is the caller in files_reflog_expire(), who passes
us a "type" to find out if the reference is a symref or not. We can
move the that logic over to that caller, which can now defer its
discovery of whether or not the ref is a symref until it's needed. In
the preceding commit an exhaustive regression test was added for that
case in a new test in "t1417-reflog-updateref.sh".
The improved diagnostics here were added in
5b2d8d6f218 (lock_ref_sha1_basic(): improve diagnostics for ref D/F
conflicts, 2015-05-11), and then much of the surrounding code went
away recently in my 245fbba46d6 (refs/files: remove unused "errno ==
EISDIR" code, 2021-08-23).
The refs_resolve_ref_unsafe() code being removed here looks like it
should be tasked with doing that, but it's actually redundant to other
code.
The reason for that is as noted in 245fbba46d6 this once widely used
function now only has a handful of callers left, which all handle this
case themselves.
To the extent that we're racy between their check and ours removing
this check actually improves the situation, as we'll be doing fewer
things between the not-under-lock initial check and acquiring the
lock.
Why this is OK for all the remaining callers of lock_ref_oid_basic()
is noted below. There are only two of those callers:
* "git branch -[cm] <oldbranch> <newbranch>":
In files_copy_or_rename_ref() we'll call this when we copy or rename
refs via rename_ref() and copy_ref(). but only after we've checked
if the refname exists already via its own call to
refs_resolve_ref_unsafe() and refs_rename_ref_available().
As the updated comment to the latter here notes neither of those are
actually needed. If we delete not only this code but also
refs_rename_ref_available() we'll do just fine, we'll just emit a
less friendly error message if e.g. "git branch -m A B/C" would have
a D/F conflict with a "B" file.
Actually we'd probably die before that in case reflogs for the
branch existed, i.e. when the try to rename() or copy_file() the
relevant reflog, since if we've got a D/F conflict with a branch
name we'll probably also have the same with its reflogs (but not
necessarily, we might have reflogs, but it might not).
As some #leftoverbits that code seems buggy to me, i.e. the reflog
"protocol" should be to get a lock on the main ref, and then perform
ref and/or reflog operations. That code dates back to
c976d415e53 (git-branch: add options and tests for branch renaming,
2006-11-28) and probably pre-dated the solidifying of that
convention. But in any case, that edge case is not our bug or
problem right now.
* "git reflog expire <ref>":
In files_reflog_expire() we'll call this without previous ref
existence checking in files-backend.c, but that code is in turn
called by code that's just finished checking if the refname whose
reflog we're expiring exists.
See ae35e16cd43 (reflog expire: don't lock reflogs using previously
seen OID, 2021-08-23) for the current state of that code, and
5e6f003ca8a (reflog_expire(): ignore --updateref for symbolic
references, 2015-03-03) for the code we'd break if we only did a
"update = !!ref" here, which is covered by the aforementioned
regression test in "t1417-reflog-updateref.sh".
Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2021-10-16 17:39:13 +08:00
|
|
|
!is_null_oid(&cb.last_kept_oid)) {
|
|
|
|
int type;
|
|
|
|
const char *ref;
|
|
|
|
|
2021-10-16 17:39:27 +08:00
|
|
|
ref = refs_resolve_ref_unsafe(&refs->base, refname,
|
refs/files: remove "name exist?" check in lock_ref_oid_basic()
In lock_ref_oid_basic() we'll happily lock a reference that doesn't
exist yet. That's normal, and is how references are initially born,
but we don't need to retain checks here in lock_ref_oid_basic() about
the state of the ref, when what we're checking is either checked
already, or something we're about to discover by trying to lock the
ref with raceproof_create_file().
The one exception is the caller in files_reflog_expire(), who passes
us a "type" to find out if the reference is a symref or not. We can
move the that logic over to that caller, which can now defer its
discovery of whether or not the ref is a symref until it's needed. In
the preceding commit an exhaustive regression test was added for that
case in a new test in "t1417-reflog-updateref.sh".
The improved diagnostics here were added in
5b2d8d6f218 (lock_ref_sha1_basic(): improve diagnostics for ref D/F
conflicts, 2015-05-11), and then much of the surrounding code went
away recently in my 245fbba46d6 (refs/files: remove unused "errno ==
EISDIR" code, 2021-08-23).
The refs_resolve_ref_unsafe() code being removed here looks like it
should be tasked with doing that, but it's actually redundant to other
code.
The reason for that is as noted in 245fbba46d6 this once widely used
function now only has a handful of callers left, which all handle this
case themselves.
To the extent that we're racy between their check and ours removing
this check actually improves the situation, as we'll be doing fewer
things between the not-under-lock initial check and acquiring the
lock.
Why this is OK for all the remaining callers of lock_ref_oid_basic()
is noted below. There are only two of those callers:
* "git branch -[cm] <oldbranch> <newbranch>":
In files_copy_or_rename_ref() we'll call this when we copy or rename
refs via rename_ref() and copy_ref(). but only after we've checked
if the refname exists already via its own call to
refs_resolve_ref_unsafe() and refs_rename_ref_available().
As the updated comment to the latter here notes neither of those are
actually needed. If we delete not only this code but also
refs_rename_ref_available() we'll do just fine, we'll just emit a
less friendly error message if e.g. "git branch -m A B/C" would have
a D/F conflict with a "B" file.
Actually we'd probably die before that in case reflogs for the
branch existed, i.e. when the try to rename() or copy_file() the
relevant reflog, since if we've got a D/F conflict with a branch
name we'll probably also have the same with its reflogs (but not
necessarily, we might have reflogs, but it might not).
As some #leftoverbits that code seems buggy to me, i.e. the reflog
"protocol" should be to get a lock on the main ref, and then perform
ref and/or reflog operations. That code dates back to
c976d415e53 (git-branch: add options and tests for branch renaming,
2006-11-28) and probably pre-dated the solidifying of that
convention. But in any case, that edge case is not our bug or
problem right now.
* "git reflog expire <ref>":
In files_reflog_expire() we'll call this without previous ref
existence checking in files-backend.c, but that code is in turn
called by code that's just finished checking if the refname whose
reflog we're expiring exists.
See ae35e16cd43 (reflog expire: don't lock reflogs using previously
seen OID, 2021-08-23) for the current state of that code, and
5e6f003ca8a (reflog_expire(): ignore --updateref for symbolic
references, 2015-03-03) for the code we'd break if we only did a
"update = !!ref" here, which is covered by the aforementioned
regression test in "t1417-reflog-updateref.sh".
Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2021-10-16 17:39:13 +08:00
|
|
|
RESOLVE_REF_NO_RECURSE,
|
2022-01-26 22:37:01 +08:00
|
|
|
NULL, &type);
|
refs/files: remove "name exist?" check in lock_ref_oid_basic()
In lock_ref_oid_basic() we'll happily lock a reference that doesn't
exist yet. That's normal, and is how references are initially born,
but we don't need to retain checks here in lock_ref_oid_basic() about
the state of the ref, when what we're checking is either checked
already, or something we're about to discover by trying to lock the
ref with raceproof_create_file().
The one exception is the caller in files_reflog_expire(), who passes
us a "type" to find out if the reference is a symref or not. We can
move the that logic over to that caller, which can now defer its
discovery of whether or not the ref is a symref until it's needed. In
the preceding commit an exhaustive regression test was added for that
case in a new test in "t1417-reflog-updateref.sh".
The improved diagnostics here were added in
5b2d8d6f218 (lock_ref_sha1_basic(): improve diagnostics for ref D/F
conflicts, 2015-05-11), and then much of the surrounding code went
away recently in my 245fbba46d6 (refs/files: remove unused "errno ==
EISDIR" code, 2021-08-23).
The refs_resolve_ref_unsafe() code being removed here looks like it
should be tasked with doing that, but it's actually redundant to other
code.
The reason for that is as noted in 245fbba46d6 this once widely used
function now only has a handful of callers left, which all handle this
case themselves.
To the extent that we're racy between their check and ours removing
this check actually improves the situation, as we'll be doing fewer
things between the not-under-lock initial check and acquiring the
lock.
Why this is OK for all the remaining callers of lock_ref_oid_basic()
is noted below. There are only two of those callers:
* "git branch -[cm] <oldbranch> <newbranch>":
In files_copy_or_rename_ref() we'll call this when we copy or rename
refs via rename_ref() and copy_ref(). but only after we've checked
if the refname exists already via its own call to
refs_resolve_ref_unsafe() and refs_rename_ref_available().
As the updated comment to the latter here notes neither of those are
actually needed. If we delete not only this code but also
refs_rename_ref_available() we'll do just fine, we'll just emit a
less friendly error message if e.g. "git branch -m A B/C" would have
a D/F conflict with a "B" file.
Actually we'd probably die before that in case reflogs for the
branch existed, i.e. when the try to rename() or copy_file() the
relevant reflog, since if we've got a D/F conflict with a branch
name we'll probably also have the same with its reflogs (but not
necessarily, we might have reflogs, but it might not).
As some #leftoverbits that code seems buggy to me, i.e. the reflog
"protocol" should be to get a lock on the main ref, and then perform
ref and/or reflog operations. That code dates back to
c976d415e53 (git-branch: add options and tests for branch renaming,
2006-11-28) and probably pre-dated the solidifying of that
convention. But in any case, that edge case is not our bug or
problem right now.
* "git reflog expire <ref>":
In files_reflog_expire() we'll call this without previous ref
existence checking in files-backend.c, but that code is in turn
called by code that's just finished checking if the refname whose
reflog we're expiring exists.
See ae35e16cd43 (reflog expire: don't lock reflogs using previously
seen OID, 2021-08-23) for the current state of that code, and
5e6f003ca8a (reflog_expire(): ignore --updateref for symbolic
references, 2015-03-03) for the code we'd break if we only did a
"update = !!ref" here, which is covered by the aforementioned
regression test in "t1417-reflog-updateref.sh".
Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2021-10-16 17:39:13 +08:00
|
|
|
update = !!(ref && !(type & REF_ISSYMREF));
|
|
|
|
}
|
2015-11-09 21:34:01 +08:00
|
|
|
|
2017-09-05 20:14:33 +08:00
|
|
|
if (close_lock_file_gently(&reflog_lock)) {
|
2015-11-09 21:34:01 +08:00
|
|
|
status |= error("couldn't write %s: %s", log_file,
|
|
|
|
strerror(errno));
|
2017-09-05 20:14:33 +08:00
|
|
|
rollback_lock_file(&reflog_lock);
|
2015-11-09 21:34:01 +08:00
|
|
|
} else if (update &&
|
2017-09-05 20:15:15 +08:00
|
|
|
(write_in_full(get_lock_file_fd(&lock->lk),
|
2018-07-16 09:27:59 +08:00
|
|
|
oid_to_hex(&cb.last_kept_oid), the_hash_algo->hexsz) < 0 ||
|
files-backend: prefer "0" for write_in_full() error check
Commit 06f46f237a (avoid "write_in_full(fd, buf, len) !=
len" pattern, 2017-09-13) converted this callsite from:
write_in_full(...) != 1
to
write_in_full(...) < 0
But during the conflict resolution in c50424a6f0 (Merge
branch 'jk/write-in-full-fix', 2017-09-25), this morphed
into
write_in_full(...) < 1
This behaves as we want, but we prefer to avoid modeling the
"less than length" error-check which can be subtly buggy, as
shown in efacf609c8 (config: avoid "write_in_full(fd, buf,
len) < len" pattern, 2017-09-13).
Signed-off-by: Jeff King <peff@peff.net>
Reviewed-by: Jonathan Nieder <jrnieder@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-09-26 04:27:17 +08:00
|
|
|
write_str_in_full(get_lock_file_fd(&lock->lk), "\n") < 0 ||
|
2017-09-05 20:14:33 +08:00
|
|
|
close_ref_gently(lock) < 0)) {
|
2015-11-09 21:34:01 +08:00
|
|
|
status |= error("couldn't write %s",
|
2017-09-05 20:15:15 +08:00
|
|
|
get_lock_file_path(&lock->lk));
|
2015-11-09 21:34:01 +08:00
|
|
|
rollback_lock_file(&reflog_lock);
|
|
|
|
} else if (commit_lock_file(&reflog_lock)) {
|
2015-12-12 02:40:54 +08:00
|
|
|
status |= error("unable to write reflog '%s' (%s)",
|
2015-11-09 21:34:01 +08:00
|
|
|
log_file, strerror(errno));
|
|
|
|
} else if (update && commit_ref(lock)) {
|
|
|
|
status |= error("couldn't set %s", lock->ref_name);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
free(log_file);
|
|
|
|
unlock_ref(lock);
|
|
|
|
return status;
|
|
|
|
|
|
|
|
failure:
|
|
|
|
rollback_lock_file(&reflog_lock);
|
|
|
|
free(log_file);
|
|
|
|
unlock_ref(lock);
|
|
|
|
return -1;
|
|
|
|
}
|
2016-09-05 00:08:10 +08:00
|
|
|
|
2022-08-26 01:09:48 +08:00
|
|
|
static int files_init_db(struct ref_store *ref_store, struct strbuf *err UNUSED)
|
2016-09-05 00:08:41 +08:00
|
|
|
{
|
2017-03-26 10:42:23 +08:00
|
|
|
struct files_ref_store *refs =
|
2017-03-26 10:42:32 +08:00
|
|
|
files_downcast(ref_store, REF_STORE_WRITE, "init_db");
|
2017-03-26 10:42:20 +08:00
|
|
|
struct strbuf sb = STRBUF_INIT;
|
|
|
|
|
2016-09-05 00:08:41 +08:00
|
|
|
/*
|
|
|
|
* Create .git/refs/{heads,tags}
|
|
|
|
*/
|
2017-03-26 10:42:23 +08:00
|
|
|
files_ref_path(refs, &sb, "refs/heads");
|
2017-03-26 10:42:20 +08:00
|
|
|
safe_create_dir(sb.buf, 1);
|
|
|
|
|
|
|
|
strbuf_reset(&sb);
|
2017-03-26 10:42:23 +08:00
|
|
|
files_ref_path(refs, &sb, "refs/tags");
|
2017-03-26 10:42:20 +08:00
|
|
|
safe_create_dir(sb.buf, 1);
|
|
|
|
|
|
|
|
strbuf_release(&sb);
|
2016-09-05 00:08:41 +08:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2016-09-05 00:08:10 +08:00
|
|
|
struct ref_storage_be refs_be_files = {
|
2022-03-18 01:27:15 +08:00
|
|
|
.next = NULL,
|
|
|
|
.name = "files",
|
|
|
|
.init = files_ref_store_create,
|
|
|
|
.init_db = files_init_db,
|
|
|
|
.transaction_prepare = files_transaction_prepare,
|
|
|
|
.transaction_finish = files_transaction_finish,
|
|
|
|
.transaction_abort = files_transaction_abort,
|
|
|
|
.initial_transaction_commit = files_initial_transaction_commit,
|
|
|
|
|
|
|
|
.pack_refs = files_pack_refs,
|
|
|
|
.create_symref = files_create_symref,
|
|
|
|
.delete_refs = files_delete_refs,
|
|
|
|
.rename_ref = files_rename_ref,
|
|
|
|
.copy_ref = files_copy_ref,
|
|
|
|
|
|
|
|
.iterator_begin = files_ref_iterator_begin,
|
|
|
|
.read_raw_ref = files_read_raw_ref,
|
|
|
|
.read_symbolic_ref = files_read_symbolic_ref,
|
|
|
|
|
|
|
|
.reflog_iterator_begin = files_reflog_iterator_begin,
|
|
|
|
.for_each_reflog_ent = files_for_each_reflog_ent,
|
|
|
|
.for_each_reflog_ent_reverse = files_for_each_reflog_ent_reverse,
|
|
|
|
.reflog_exists = files_reflog_exists,
|
|
|
|
.create_reflog = files_create_reflog,
|
|
|
|
.delete_reflog = files_delete_reflog,
|
|
|
|
.reflog_expire = files_reflog_expire
|
2016-09-05 00:08:10 +08:00
|
|
|
};
|