2015-11-10 19:42:36 +08:00
|
|
|
#ifndef REFS_REFS_INTERNAL_H
|
|
|
|
#define REFS_REFS_INTERNAL_H
|
|
|
|
|
2018-09-19 08:12:47 +08:00
|
|
|
#include "cache.h"
|
|
|
|
#include "refs.h"
|
2018-07-10 05:36:12 +08:00
|
|
|
#include "iterator.h"
|
2018-07-10 03:25:33 +08:00
|
|
|
|
2018-09-19 08:12:47 +08:00
|
|
|
struct ref_transaction;
|
|
|
|
|
2015-11-10 19:42:36 +08:00
|
|
|
/*
|
|
|
|
* Data structures and functions for the internal use of the refs
|
|
|
|
* module. Code outside of the refs module should use only the public
|
|
|
|
* functions defined in "refs.h", and should *not* include this file.
|
|
|
|
*/
|
|
|
|
|
|
|
|
/*
|
2017-11-05 16:42:05 +08:00
|
|
|
* The following flags can appear in `ref_update::flags`. Their
|
2017-11-05 16:42:06 +08:00
|
|
|
* numerical values must not conflict with those of REF_NO_DEREF and
|
2017-11-05 16:42:05 +08:00
|
|
|
* REF_FORCE_CREATE_REFLOG, which are also stored in
|
|
|
|
* `ref_update::flags`.
|
2015-11-10 19:42:36 +08:00
|
|
|
*/
|
|
|
|
|
|
|
|
/*
|
2017-11-05 16:42:09 +08:00
|
|
|
* The reference should be updated to new_oid.
|
2015-11-10 19:42:36 +08:00
|
|
|
*/
|
2017-11-05 16:42:05 +08:00
|
|
|
#define REF_HAVE_NEW (1 << 2)
|
2015-11-10 19:42:36 +08:00
|
|
|
|
|
|
|
/*
|
2017-11-05 16:42:05 +08:00
|
|
|
* The current reference's value should be checked to make sure that
|
2017-11-05 16:42:09 +08:00
|
|
|
* it agrees with old_oid.
|
2015-11-10 19:42:36 +08:00
|
|
|
*/
|
2017-11-05 16:42:05 +08:00
|
|
|
#define REF_HAVE_OLD (1 << 3)
|
2017-01-07 00:22:43 +08:00
|
|
|
|
2020-08-28 23:25:33 +08:00
|
|
|
/*
|
|
|
|
* Used as a flag in ref_update::flags when we want to log a ref
|
|
|
|
* update but not actually perform it. This is used when a symbolic
|
|
|
|
* ref update is split up.
|
|
|
|
*/
|
|
|
|
#define REF_LOG_ONLY (1 << 7)
|
|
|
|
|
2017-08-21 19:51:34 +08:00
|
|
|
/*
|
|
|
|
* Return the length of time to retry acquiring a loose reference lock
|
|
|
|
* before giving up, in milliseconds:
|
|
|
|
*/
|
|
|
|
long get_files_ref_lock_timeout_ms(void);
|
|
|
|
|
2015-11-10 19:42:36 +08:00
|
|
|
/*
|
|
|
|
* Return true iff refname is minimally safe. "Safe" here means that
|
|
|
|
* deleting a loose reference by this name will not do any damage, for
|
|
|
|
* example by causing a file that is not a reference to be deleted.
|
|
|
|
* This function does not check that the reference name is legal; for
|
|
|
|
* that, use check_refname_format().
|
|
|
|
*
|
2017-01-07 00:22:22 +08:00
|
|
|
* A refname that starts with "refs/" is considered safe iff it
|
|
|
|
* doesn't contain any "." or ".." components or consecutive '/'
|
|
|
|
* characters, end with '/', or (on Windows) contain any '\'
|
|
|
|
* characters. Names that do not start with "refs/" are considered
|
|
|
|
* safe iff they consist entirely of upper case characters and '_'
|
|
|
|
* (like "HEAD" and "MERGE_HEAD" but not "config" or "FOO/BAR").
|
2015-11-10 19:42:36 +08:00
|
|
|
*/
|
|
|
|
int refname_is_safe(const char *refname);
|
|
|
|
|
2017-06-23 15:01:37 +08:00
|
|
|
/*
|
|
|
|
* Helper function: return true if refname, which has the specified
|
|
|
|
* oid and flags, can be resolved to an object in the database. If the
|
|
|
|
* referred-to object does not exist, emit a warning and return false.
|
|
|
|
*/
|
|
|
|
int ref_resolves_to_object(const char *refname,
|
2021-10-09 05:08:15 +08:00
|
|
|
struct repository *repo,
|
2017-06-23 15:01:37 +08:00
|
|
|
const struct object_id *oid,
|
|
|
|
unsigned int flags);
|
|
|
|
|
2015-11-10 19:42:36 +08:00
|
|
|
enum peel_status {
|
|
|
|
/* object was peeled successfully: */
|
|
|
|
PEEL_PEELED = 0,
|
|
|
|
|
|
|
|
/*
|
|
|
|
* object cannot be peeled because the named object (or an
|
|
|
|
* object referred to by a tag in the peel chain), does not
|
|
|
|
* exist.
|
|
|
|
*/
|
|
|
|
PEEL_INVALID = -1,
|
|
|
|
|
|
|
|
/* object cannot be peeled because it is not a tag: */
|
|
|
|
PEEL_NON_TAG = -2,
|
|
|
|
|
|
|
|
/* ref_entry contains no peeled value because it is a symref: */
|
|
|
|
PEEL_IS_SYMREF = -3,
|
|
|
|
|
|
|
|
/*
|
|
|
|
* ref_entry cannot be peeled because it is broken (i.e., the
|
|
|
|
* symbolic reference cannot even be resolved to an object
|
|
|
|
* name):
|
|
|
|
*/
|
|
|
|
PEEL_BROKEN = -4
|
|
|
|
};
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Peel the named object; i.e., if the object is a tag, resolve the
|
|
|
|
* tag recursively until a non-tag is found. If successful, store the
|
2017-10-16 06:07:10 +08:00
|
|
|
* result to oid and return PEEL_PEELED. If the object is not a tag
|
2015-11-10 19:42:36 +08:00
|
|
|
* or is not valid, return PEEL_NON_TAG or PEEL_INVALID, respectively,
|
2017-11-05 16:42:09 +08:00
|
|
|
* and leave oid unchanged.
|
2015-11-10 19:42:36 +08:00
|
|
|
*/
|
2017-10-16 06:07:10 +08:00
|
|
|
enum peel_status peel_object(const struct object_id *name, struct object_id *oid);
|
2015-11-10 19:42:36 +08:00
|
|
|
|
|
|
|
/**
|
2017-11-05 16:42:09 +08:00
|
|
|
* Information needed for a single ref update. Set new_oid to the new
|
|
|
|
* value or to null_oid to delete the ref. To check the old value
|
|
|
|
* while the ref is locked, set (flags & REF_HAVE_OLD) and set old_oid
|
|
|
|
* to the old value, or to null_oid to ensure the ref does not exist
|
|
|
|
* before update.
|
2015-11-10 19:42:36 +08:00
|
|
|
*/
|
|
|
|
struct ref_update {
|
|
|
|
/*
|
2017-11-05 16:42:05 +08:00
|
|
|
* If (flags & REF_HAVE_NEW), set the reference to this value
|
|
|
|
* (or delete it, if `new_oid` is `null_oid`).
|
2015-11-10 19:42:36 +08:00
|
|
|
*/
|
2017-05-07 06:10:23 +08:00
|
|
|
struct object_id new_oid;
|
2016-04-25 23:48:32 +08:00
|
|
|
|
2015-11-10 19:42:36 +08:00
|
|
|
/*
|
|
|
|
* If (flags & REF_HAVE_OLD), check that the reference
|
2017-11-05 16:42:05 +08:00
|
|
|
* previously had this value (or didn't previously exist, if
|
|
|
|
* `old_oid` is `null_oid`).
|
2015-11-10 19:42:36 +08:00
|
|
|
*/
|
2017-05-07 06:10:23 +08:00
|
|
|
struct object_id old_oid;
|
2016-04-25 23:48:32 +08:00
|
|
|
|
2015-11-10 19:42:36 +08:00
|
|
|
/*
|
2017-11-05 16:42:06 +08:00
|
|
|
* One or more of REF_NO_DEREF, REF_FORCE_CREATE_REFLOG,
|
2017-11-05 16:42:05 +08:00
|
|
|
* REF_HAVE_NEW, REF_HAVE_OLD, or backend-specific flags.
|
2015-11-10 19:42:36 +08:00
|
|
|
*/
|
|
|
|
unsigned int flags;
|
2016-04-25 23:48:32 +08:00
|
|
|
|
2016-09-05 00:08:43 +08:00
|
|
|
void *backend_data;
|
refs: resolve symbolic refs first
Before committing ref updates, split symbolic ref updates into two
parts: an update to the underlying ref, and a log-only update to the
symbolic ref. This ensures that both references are locked correctly
during the transaction, including while their reflogs are updated.
Similarly, if the reference pointed to by HEAD is modified directly, add
a separate log-only update to HEAD, rather than leaving the job of
updating HEAD's reflog to commit_ref_update(). This change ensures that
HEAD is locked correctly while its reflog is being modified, as well as
being cheaper (HEAD only needs to be resolved once).
This makes use of a new function, lock_raw_ref(), which is analogous to
read_raw_ref(), but acquires a lock on the reference before reading it.
This change still has two problems:
* There are redundant read_ref_full() reference lookups.
* It is still possible to get incorrect reflogs for symbolic references
if there is a concurrent update by another process, since the old_oid
of a symref is determined before the lock on the pointed-to ref is
held.
Both problems will soon be fixed.
Signed-off-by: David Turner <dturner@twopensource.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
Signed-off-by: Michael Haggerty <mhagger@alum.mit.edu>
WIP
2016-04-25 21:56:07 +08:00
|
|
|
unsigned int type;
|
2015-11-10 19:42:36 +08:00
|
|
|
char *msg;
|
2016-04-25 23:48:32 +08:00
|
|
|
|
|
|
|
/*
|
|
|
|
* If this ref_update was split off of a symref update via
|
|
|
|
* split_symref_update(), then this member points at that
|
|
|
|
* update. This is used for two purposes:
|
|
|
|
* 1. When reporting errors, we report the refname under which
|
|
|
|
* the update was originally requested.
|
|
|
|
* 2. When we read the old value of this reference, we
|
|
|
|
* propagate it back to its parent update for recording in
|
|
|
|
* the latter's reflog.
|
|
|
|
*/
|
|
|
|
struct ref_update *parent_update;
|
|
|
|
|
2015-11-10 19:42:36 +08:00
|
|
|
const char refname[FLEX_ARRAY];
|
|
|
|
};
|
|
|
|
|
2021-10-16 17:39:09 +08:00
|
|
|
int refs_read_raw_ref(struct ref_store *ref_store, const char *refname,
|
|
|
|
struct object_id *oid, struct strbuf *referent,
|
|
|
|
unsigned int *type, int *failure_errno);
|
2017-03-21 00:33:07 +08:00
|
|
|
|
2017-05-22 22:17:45 +08:00
|
|
|
/*
|
|
|
|
* Write an error to `err` and return a nonzero value iff the same
|
|
|
|
* refname appears multiple times in `refnames`. `refnames` must be
|
|
|
|
* sorted on entry to this function.
|
|
|
|
*/
|
|
|
|
int ref_update_reject_duplicates(struct string_list *refnames,
|
|
|
|
struct strbuf *err);
|
|
|
|
|
2016-04-25 17:39:54 +08:00
|
|
|
/*
|
|
|
|
* Add a ref_update with the specified properties to transaction, and
|
|
|
|
* return a pointer to the new object. This function does not verify
|
2017-11-05 16:42:09 +08:00
|
|
|
* that refname is well-formed. new_oid and old_oid are only
|
2016-04-25 17:39:54 +08:00
|
|
|
* dereferenced if the REF_HAVE_NEW and REF_HAVE_OLD bits,
|
|
|
|
* respectively, are set in flags.
|
|
|
|
*/
|
|
|
|
struct ref_update *ref_transaction_add_update(
|
|
|
|
struct ref_transaction *transaction,
|
|
|
|
const char *refname, unsigned int flags,
|
2017-10-16 06:06:53 +08:00
|
|
|
const struct object_id *new_oid,
|
|
|
|
const struct object_id *old_oid,
|
2016-04-25 17:39:54 +08:00
|
|
|
const char *msg);
|
|
|
|
|
2015-11-10 19:42:36 +08:00
|
|
|
/*
|
|
|
|
* Transaction states.
|
ref_transaction_prepare(): new optional step for reference updates
In the future, compound reference stores will sometimes need to modify
references in two different reference stores at the same time, meaning
that a single logical reference transaction might have to be
implemented as two internal sub-transactions. They won't want to call
`ref_transaction_commit()` for the two sub-transactions one after the
other, because that wouldn't be atomic (the first commit could succeed
and the second one fail). Instead, they will want to prepare both
sub-transactions (i.e., obtain any necessary locks and do any
pre-checks), and only if both prepare steps succeed, then commit both
sub-transactions.
Start preparing for that day by adding a new, optional
`ref_transaction_prepare()` step to the reference transaction
sequence, which obtains the locks and does any prechecks, reporting
any errors that occur. Also add a `ref_transaction_abort()` function
that can be used to abort a sub-transaction even if it has already
been prepared.
That is on the side of the public-facing API. On the side of the
`ref_store` VTABLE, get rid of `transaction_commit` and instead add
methods `transaction_prepare`, `transaction_finish`, and
`transaction_abort`. A `ref_transaction_commit()` now basically calls
methods `transaction_prepare` then `transaction_finish`.
Signed-off-by: Michael Haggerty <mhagger@alum.mit.edu>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-05-22 22:17:44 +08:00
|
|
|
*
|
|
|
|
* OPEN: The transaction is initialized and new updates can still be
|
|
|
|
* added to it. An OPEN transaction can be prepared,
|
|
|
|
* committed, freed, or aborted (freeing and aborting an open
|
|
|
|
* transaction are equivalent).
|
|
|
|
*
|
|
|
|
* PREPARED: ref_transaction_prepare(), which locks all of the
|
|
|
|
* references involved in the update and checks that the
|
|
|
|
* update has no errors, has been called successfully for the
|
|
|
|
* transaction. A PREPARED transaction can be committed or
|
|
|
|
* aborted.
|
|
|
|
*
|
|
|
|
* CLOSED: The transaction is no longer active. A transaction becomes
|
|
|
|
* CLOSED if there is a failure while building the transaction
|
|
|
|
* or if a transaction is committed or aborted. A CLOSED
|
|
|
|
* transaction can only be freed.
|
2015-11-10 19:42:36 +08:00
|
|
|
*/
|
|
|
|
enum ref_transaction_state {
|
ref_transaction_prepare(): new optional step for reference updates
In the future, compound reference stores will sometimes need to modify
references in two different reference stores at the same time, meaning
that a single logical reference transaction might have to be
implemented as two internal sub-transactions. They won't want to call
`ref_transaction_commit()` for the two sub-transactions one after the
other, because that wouldn't be atomic (the first commit could succeed
and the second one fail). Instead, they will want to prepare both
sub-transactions (i.e., obtain any necessary locks and do any
pre-checks), and only if both prepare steps succeed, then commit both
sub-transactions.
Start preparing for that day by adding a new, optional
`ref_transaction_prepare()` step to the reference transaction
sequence, which obtains the locks and does any prechecks, reporting
any errors that occur. Also add a `ref_transaction_abort()` function
that can be used to abort a sub-transaction even if it has already
been prepared.
That is on the side of the public-facing API. On the side of the
`ref_store` VTABLE, get rid of `transaction_commit` and instead add
methods `transaction_prepare`, `transaction_finish`, and
`transaction_abort`. A `ref_transaction_commit()` now basically calls
methods `transaction_prepare` then `transaction_finish`.
Signed-off-by: Michael Haggerty <mhagger@alum.mit.edu>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-05-22 22:17:44 +08:00
|
|
|
REF_TRANSACTION_OPEN = 0,
|
|
|
|
REF_TRANSACTION_PREPARED = 1,
|
|
|
|
REF_TRANSACTION_CLOSED = 2
|
2015-11-10 19:42:36 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Data structure for holding a reference transaction, which can
|
|
|
|
* consist of checks and updates to multiple references, carried out
|
|
|
|
* as atomically as possible. This structure is opaque to callers.
|
|
|
|
*/
|
|
|
|
struct ref_transaction {
|
2017-03-26 10:42:35 +08:00
|
|
|
struct ref_store *ref_store;
|
2015-11-10 19:42:36 +08:00
|
|
|
struct ref_update **updates;
|
|
|
|
size_t alloc;
|
|
|
|
size_t nr;
|
|
|
|
enum ref_transaction_state state;
|
2017-09-08 21:51:44 +08:00
|
|
|
void *backend_data;
|
2015-11-10 19:42:36 +08:00
|
|
|
};
|
|
|
|
|
2015-11-10 19:42:40 +08:00
|
|
|
/*
|
|
|
|
* Check for entries in extras that are within the specified
|
|
|
|
* directory, where dirname is a reference directory name including
|
|
|
|
* the trailing slash (e.g., "refs/heads/foo/"). Ignore any
|
|
|
|
* conflicting references that are found in skip. If there is a
|
|
|
|
* conflicting reference, return its name.
|
|
|
|
*
|
|
|
|
* extras and skip must be sorted lists of reference names. Either one
|
|
|
|
* can be NULL, signifying the empty list.
|
|
|
|
*/
|
|
|
|
const char *find_descendant_ref(const char *dirname,
|
|
|
|
const struct string_list *extras,
|
|
|
|
const struct string_list *skip);
|
|
|
|
|
2016-04-08 03:03:10 +08:00
|
|
|
/* We allow "recursive" symbolic refs. Only within reason, though */
|
|
|
|
#define SYMREF_MAXDEPTH 5
|
2016-04-08 03:02:49 +08:00
|
|
|
|
2021-09-25 02:37:58 +08:00
|
|
|
/*
|
2021-09-25 02:39:44 +08:00
|
|
|
* These flags are passed to refs_ref_iterator_begin() (and do_for_each_ref(),
|
|
|
|
* which feeds it).
|
2021-09-25 02:37:58 +08:00
|
|
|
*/
|
2021-09-25 02:39:44 +08:00
|
|
|
enum do_for_each_ref_flags {
|
|
|
|
/*
|
|
|
|
* Include broken references in a do_for_each_ref*() iteration, which
|
|
|
|
* would normally be omitted. This includes both refs that point to
|
|
|
|
* missing objects (a true repository corruption), ones with illegal
|
|
|
|
* names (which we prefer not to expose to callers), as well as
|
|
|
|
* dangling symbolic refs (i.e., those that point to a non-existent
|
|
|
|
* ref; this is not a corruption, but as they have no valid oid, we
|
|
|
|
* omit them from normal iteration results).
|
|
|
|
*/
|
|
|
|
DO_FOR_EACH_INCLUDE_BROKEN = (1 << 0),
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Only include per-worktree refs in a do_for_each_ref*() iteration.
|
|
|
|
* Normally this will be used with a files ref_store, since that's
|
|
|
|
* where all reference backends will presumably store their
|
|
|
|
* per-worktree refs.
|
|
|
|
*/
|
|
|
|
DO_FOR_EACH_PER_WORKTREE_ONLY = (1 << 1),
|
refs: add DO_FOR_EACH_OMIT_DANGLING_SYMREFS flag
When the DO_FOR_EACH_INCLUDE_BROKEN flag is used, we include both actual
corrupt refs (illegal names, missing objects), but also symrefs that
point to nothing. This latter is not really a corruption, but just
something that may happen normally. For example, the symref at
refs/remotes/origin/HEAD may point to a tracking branch which is later
deleted. (The local HEAD may also be unborn, of course, but we do not
access it through ref iteration).
Most callers of for_each_ref() etc, do not care. They don't pass
INCLUDE_BROKEN, so don't see it at all. But for those which do pass it,
this somewhat-normal state causes extra warnings (e.g., from
for-each-ref) or even aborts operations (destructive repacks with
GIT_REF_PARANOIA set).
This patch just introduces the flag and the mechanism; there are no
callers yet (and hence no tests). Two things to note on the
implementation:
- we actually skip any symref that does not resolve to a ref. This
includes ones which point to an invalidly-named ref. You could argue
this is a more serious breakage than simple dangling. But the
overall effect is the same (we could not follow the symref), as well
as the impact on things like REF_PARANOIA (either way, a symref we
can't follow won't impact reachability, because we'll see the ref
itself during iteration). The underlying resolution function doesn't
distinguish these two cases (they both get REF_ISBROKEN).
- we change the iterator in refs/files-backend.c where we check
INCLUDE_BROKEN. There's a matching spot in refs/packed-backend.c,
but we don't know need to do anything there. The packed backend does
not support symrefs at all.
The resulting set of flags might be a bit easier to follow if we broke
this down into "INCLUDE_CORRUPT_REFS" and "INCLUDE_DANGLING_SYMREFS".
But there are a few reasons not do so:
- adding a new OMIT_DANGLING_SYMREFS flag lets us leave existing
callers intact, without changing their behavior (and some of them
really do want to see the dangling symrefs; e.g., t5505 has a test
which expects us to report when a symref becomes dangling)
- they're not actually independent. You cannot say "include dangling
symrefs" without also including refs whose objects are not
reachable, because dangling symrefs by definition do not have an
object. We could tweak the implementation to distinguish this, but
in practice nobody wants to ask for that. Adding the OMIT flag keeps
the implementation simple and makes sure we don't regress the
current behavior.
Signed-off-by: Jeff King <peff@peff.net>
Reviewed-by: Jonathan Tan <jonathantanmy@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2021-09-25 02:41:32 +08:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Omit dangling symrefs from output; this only has an effect with
|
|
|
|
* INCLUDE_BROKEN, since they are otherwise not included at all.
|
|
|
|
*/
|
|
|
|
DO_FOR_EACH_OMIT_DANGLING_SYMREFS = (1 << 2),
|
2021-09-25 02:39:44 +08:00
|
|
|
};
|
2016-04-08 03:02:49 +08:00
|
|
|
|
refs: introduce an iterator interface
Currently, the API for iterating over references is via a family of
for_each_ref()-type functions that invoke a callback function for each
selected reference. All of these eventually call do_for_each_ref(),
which knows how to do one thing: iterate in parallel through two
ref_caches, one for loose and one for packed refs, giving loose
references precedence over packed refs. This is rather complicated code,
and is quite specialized to the files backend. It also requires callers
to encapsulate their work into a callback function, which often means
that they have to define and use a "cb_data" struct to manage their
context.
The current design is already bursting at the seams, and will become
even more awkward in the upcoming world of multiple reference storage
backends:
* Per-worktree vs. shared references are currently handled via a kludge
in git_path() rather than iterating over each part of the reference
namespace separately and merging the results. This kludge will cease
to work when we have multiple reference storage backends.
* The current scheme is inflexible. What if we sometimes want to bypass
the ref_cache, or use it only for packed or only for loose refs? What
if we want to store symbolic refs in one type of storage backend and
non-symbolic ones in another?
In the future, each reference backend will need to define its own way of
iterating over references. The crux of the problem with the current
design is that it is impossible to compose for_each_ref()-style
iterations, because the flow of control is owned by the for_each_ref()
function. There is nothing that a caller can do but iterate through all
references in a single burst, so there is no way for it to interleave
references from multiple backends and present the result to the rest of
the world as a single compound backend.
This commit introduces a new iteration primitive for references: a
ref_iterator. A ref_iterator is a polymorphic object that a reference
storage backend can be asked to instantiate. There are three functions
that can be applied to a ref_iterator:
* ref_iterator_advance(): move to the next reference in the iteration
* ref_iterator_abort(): end the iteration before it is exhausted
* ref_iterator_peel(): peel the reference currently being looked at
Iterating using a ref_iterator leaves the flow of control in the hands
of the caller, which means that ref_iterators from multiple
sources (e.g., loose and packed refs) can be composed and presented to
the world as a single compound ref_iterator.
It also means that the backend code for implementing reference iteration
will sometimes be more complicated. For example, the
cache_ref_iterator (which iterates over a ref_cache) can't use the C
stack to recurse; instead, it must manage its own stack internally as
explicit data structures. There is also a lot of boilerplate connected
with object-oriented programming in C.
Eventually, end-user callers will be able to be written in a more
natural way—managing their own flow of control rather than having to
work via callbacks. Since there will only be a few reference backends
but there are many consumers of this API, this is a good tradeoff.
More importantly, we gain composability, and especially the possibility
of writing interchangeable parts that can work with any ref_iterator.
For example, merge_ref_iterator implements a generic way of merging the
contents of any two ref_iterators. It is used to merge loose + packed
refs as part of the implementation of the files_ref_iterator. But it
will also be possible to use it to merge other pairs of reference
sources (e.g., per-worktree vs. shared refs).
Another example is prefix_ref_iterator, which can be used to trim a
prefix off the front of reference names before presenting them to the
caller (e.g., "refs/heads/master" -> "master").
In this patch, we introduce the iterator abstraction and many utilities,
and implement a reference iterator for the files ref storage backend.
(I've written several other obvious utilities, for example a generic way
to filter references being iterated over. These will probably be useful
in the future. But they are not needed for this patch series, so I am
not including them at this time.)
In a moment we will rewrite do_for_each_ref() to work via reference
iterators (allowing some special-purpose code to be discarded), and do
something similar for reflogs. In future patch series, we will expose
the ref_iterator abstraction in the public refs API so that callers can
use it directly.
Implementation note: I tried abstracting this a layer further to allow
generic iterators (over arbitrary types of objects) and generic
utilities like a generic merge_iterator. But the implementation in C was
very cumbersome, involving (in my opinion) too much boilerplate and too
much unsafe casting, some of which would have had to be done on the
caller side. However, I did put a few iterator-related constants in a
top-level header file, iterator.h, as they will be useful in a moment to
implement iteration over directory trees and possibly other types of
iterators in the future.
Signed-off-by: Ramsay Jones <ramsay@ramsayjones.plus.com>
Signed-off-by: Michael Haggerty <mhagger@alum.mit.edu>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2016-06-18 12:15:15 +08:00
|
|
|
/*
|
|
|
|
* Reference iterators
|
|
|
|
*
|
|
|
|
* A reference iterator encapsulates the state of an in-progress
|
|
|
|
* iteration over references. Create an instance of `struct
|
|
|
|
* ref_iterator` via one of the functions in this module.
|
|
|
|
*
|
|
|
|
* A freshly-created ref_iterator doesn't yet point at a reference. To
|
|
|
|
* advance the iterator, call ref_iterator_advance(). If successful,
|
|
|
|
* this sets the iterator's refname, oid, and flags fields to describe
|
|
|
|
* the next reference and returns ITER_OK. The data pointed at by
|
|
|
|
* refname and oid belong to the iterator; if you want to retain them
|
|
|
|
* after calling ref_iterator_advance() again or calling
|
|
|
|
* ref_iterator_abort(), you must make a copy. When the iteration has
|
|
|
|
* been exhausted, ref_iterator_advance() releases any resources
|
2019-11-06 01:07:23 +08:00
|
|
|
* associated with the iteration, frees the ref_iterator object, and
|
refs: introduce an iterator interface
Currently, the API for iterating over references is via a family of
for_each_ref()-type functions that invoke a callback function for each
selected reference. All of these eventually call do_for_each_ref(),
which knows how to do one thing: iterate in parallel through two
ref_caches, one for loose and one for packed refs, giving loose
references precedence over packed refs. This is rather complicated code,
and is quite specialized to the files backend. It also requires callers
to encapsulate their work into a callback function, which often means
that they have to define and use a "cb_data" struct to manage their
context.
The current design is already bursting at the seams, and will become
even more awkward in the upcoming world of multiple reference storage
backends:
* Per-worktree vs. shared references are currently handled via a kludge
in git_path() rather than iterating over each part of the reference
namespace separately and merging the results. This kludge will cease
to work when we have multiple reference storage backends.
* The current scheme is inflexible. What if we sometimes want to bypass
the ref_cache, or use it only for packed or only for loose refs? What
if we want to store symbolic refs in one type of storage backend and
non-symbolic ones in another?
In the future, each reference backend will need to define its own way of
iterating over references. The crux of the problem with the current
design is that it is impossible to compose for_each_ref()-style
iterations, because the flow of control is owned by the for_each_ref()
function. There is nothing that a caller can do but iterate through all
references in a single burst, so there is no way for it to interleave
references from multiple backends and present the result to the rest of
the world as a single compound backend.
This commit introduces a new iteration primitive for references: a
ref_iterator. A ref_iterator is a polymorphic object that a reference
storage backend can be asked to instantiate. There are three functions
that can be applied to a ref_iterator:
* ref_iterator_advance(): move to the next reference in the iteration
* ref_iterator_abort(): end the iteration before it is exhausted
* ref_iterator_peel(): peel the reference currently being looked at
Iterating using a ref_iterator leaves the flow of control in the hands
of the caller, which means that ref_iterators from multiple
sources (e.g., loose and packed refs) can be composed and presented to
the world as a single compound ref_iterator.
It also means that the backend code for implementing reference iteration
will sometimes be more complicated. For example, the
cache_ref_iterator (which iterates over a ref_cache) can't use the C
stack to recurse; instead, it must manage its own stack internally as
explicit data structures. There is also a lot of boilerplate connected
with object-oriented programming in C.
Eventually, end-user callers will be able to be written in a more
natural way—managing their own flow of control rather than having to
work via callbacks. Since there will only be a few reference backends
but there are many consumers of this API, this is a good tradeoff.
More importantly, we gain composability, and especially the possibility
of writing interchangeable parts that can work with any ref_iterator.
For example, merge_ref_iterator implements a generic way of merging the
contents of any two ref_iterators. It is used to merge loose + packed
refs as part of the implementation of the files_ref_iterator. But it
will also be possible to use it to merge other pairs of reference
sources (e.g., per-worktree vs. shared refs).
Another example is prefix_ref_iterator, which can be used to trim a
prefix off the front of reference names before presenting them to the
caller (e.g., "refs/heads/master" -> "master").
In this patch, we introduce the iterator abstraction and many utilities,
and implement a reference iterator for the files ref storage backend.
(I've written several other obvious utilities, for example a generic way
to filter references being iterated over. These will probably be useful
in the future. But they are not needed for this patch series, so I am
not including them at this time.)
In a moment we will rewrite do_for_each_ref() to work via reference
iterators (allowing some special-purpose code to be discarded), and do
something similar for reflogs. In future patch series, we will expose
the ref_iterator abstraction in the public refs API so that callers can
use it directly.
Implementation note: I tried abstracting this a layer further to allow
generic iterators (over arbitrary types of objects) and generic
utilities like a generic merge_iterator. But the implementation in C was
very cumbersome, involving (in my opinion) too much boilerplate and too
much unsafe casting, some of which would have had to be done on the
caller side. However, I did put a few iterator-related constants in a
top-level header file, iterator.h, as they will be useful in a moment to
implement iteration over directory trees and possibly other types of
iterators in the future.
Signed-off-by: Ramsay Jones <ramsay@ramsayjones.plus.com>
Signed-off-by: Michael Haggerty <mhagger@alum.mit.edu>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2016-06-18 12:15:15 +08:00
|
|
|
* returns ITER_DONE. If you want to abort the iteration early, call
|
|
|
|
* ref_iterator_abort(), which also frees the ref_iterator object and
|
|
|
|
* any associated resources. If there was an internal error advancing
|
|
|
|
* to the next entry, ref_iterator_advance() aborts the iteration,
|
|
|
|
* frees the ref_iterator, and returns ITER_ERROR.
|
|
|
|
*
|
|
|
|
* The reference currently being looked at can be peeled by calling
|
|
|
|
* ref_iterator_peel(). This function is often faster than peel_ref(),
|
|
|
|
* so it should be preferred when iterating over references.
|
|
|
|
*
|
|
|
|
* Putting it all together, a typical iteration looks like this:
|
|
|
|
*
|
|
|
|
* int ok;
|
|
|
|
* struct ref_iterator *iter = ...;
|
|
|
|
*
|
|
|
|
* while ((ok = ref_iterator_advance(iter)) == ITER_OK) {
|
|
|
|
* if (want_to_stop_iteration()) {
|
|
|
|
* ok = ref_iterator_abort(iter);
|
|
|
|
* break;
|
|
|
|
* }
|
|
|
|
*
|
|
|
|
* // Access information about the current reference:
|
|
|
|
* if (!(iter->flags & REF_ISSYMREF))
|
2018-09-15 10:15:46 +08:00
|
|
|
* printf("%s is %s\n", iter->refname, oid_to_hex(iter->oid));
|
refs: introduce an iterator interface
Currently, the API for iterating over references is via a family of
for_each_ref()-type functions that invoke a callback function for each
selected reference. All of these eventually call do_for_each_ref(),
which knows how to do one thing: iterate in parallel through two
ref_caches, one for loose and one for packed refs, giving loose
references precedence over packed refs. This is rather complicated code,
and is quite specialized to the files backend. It also requires callers
to encapsulate their work into a callback function, which often means
that they have to define and use a "cb_data" struct to manage their
context.
The current design is already bursting at the seams, and will become
even more awkward in the upcoming world of multiple reference storage
backends:
* Per-worktree vs. shared references are currently handled via a kludge
in git_path() rather than iterating over each part of the reference
namespace separately and merging the results. This kludge will cease
to work when we have multiple reference storage backends.
* The current scheme is inflexible. What if we sometimes want to bypass
the ref_cache, or use it only for packed or only for loose refs? What
if we want to store symbolic refs in one type of storage backend and
non-symbolic ones in another?
In the future, each reference backend will need to define its own way of
iterating over references. The crux of the problem with the current
design is that it is impossible to compose for_each_ref()-style
iterations, because the flow of control is owned by the for_each_ref()
function. There is nothing that a caller can do but iterate through all
references in a single burst, so there is no way for it to interleave
references from multiple backends and present the result to the rest of
the world as a single compound backend.
This commit introduces a new iteration primitive for references: a
ref_iterator. A ref_iterator is a polymorphic object that a reference
storage backend can be asked to instantiate. There are three functions
that can be applied to a ref_iterator:
* ref_iterator_advance(): move to the next reference in the iteration
* ref_iterator_abort(): end the iteration before it is exhausted
* ref_iterator_peel(): peel the reference currently being looked at
Iterating using a ref_iterator leaves the flow of control in the hands
of the caller, which means that ref_iterators from multiple
sources (e.g., loose and packed refs) can be composed and presented to
the world as a single compound ref_iterator.
It also means that the backend code for implementing reference iteration
will sometimes be more complicated. For example, the
cache_ref_iterator (which iterates over a ref_cache) can't use the C
stack to recurse; instead, it must manage its own stack internally as
explicit data structures. There is also a lot of boilerplate connected
with object-oriented programming in C.
Eventually, end-user callers will be able to be written in a more
natural way—managing their own flow of control rather than having to
work via callbacks. Since there will only be a few reference backends
but there are many consumers of this API, this is a good tradeoff.
More importantly, we gain composability, and especially the possibility
of writing interchangeable parts that can work with any ref_iterator.
For example, merge_ref_iterator implements a generic way of merging the
contents of any two ref_iterators. It is used to merge loose + packed
refs as part of the implementation of the files_ref_iterator. But it
will also be possible to use it to merge other pairs of reference
sources (e.g., per-worktree vs. shared refs).
Another example is prefix_ref_iterator, which can be used to trim a
prefix off the front of reference names before presenting them to the
caller (e.g., "refs/heads/master" -> "master").
In this patch, we introduce the iterator abstraction and many utilities,
and implement a reference iterator for the files ref storage backend.
(I've written several other obvious utilities, for example a generic way
to filter references being iterated over. These will probably be useful
in the future. But they are not needed for this patch series, so I am
not including them at this time.)
In a moment we will rewrite do_for_each_ref() to work via reference
iterators (allowing some special-purpose code to be discarded), and do
something similar for reflogs. In future patch series, we will expose
the ref_iterator abstraction in the public refs API so that callers can
use it directly.
Implementation note: I tried abstracting this a layer further to allow
generic iterators (over arbitrary types of objects) and generic
utilities like a generic merge_iterator. But the implementation in C was
very cumbersome, involving (in my opinion) too much boilerplate and too
much unsafe casting, some of which would have had to be done on the
caller side. However, I did put a few iterator-related constants in a
top-level header file, iterator.h, as they will be useful in a moment to
implement iteration over directory trees and possibly other types of
iterators in the future.
Signed-off-by: Ramsay Jones <ramsay@ramsayjones.plus.com>
Signed-off-by: Michael Haggerty <mhagger@alum.mit.edu>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2016-06-18 12:15:15 +08:00
|
|
|
*
|
|
|
|
* // If you need to peel the reference:
|
|
|
|
* ref_iterator_peel(iter, &oid);
|
|
|
|
* }
|
|
|
|
*
|
|
|
|
* if (ok != ITER_DONE)
|
|
|
|
* handle_error();
|
|
|
|
*/
|
|
|
|
struct ref_iterator {
|
|
|
|
struct ref_iterator_vtable *vtable;
|
2017-09-14 01:15:55 +08:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Does this `ref_iterator` iterate over references in order
|
|
|
|
* by refname?
|
|
|
|
*/
|
|
|
|
unsigned int ordered : 1;
|
|
|
|
|
refs: introduce an iterator interface
Currently, the API for iterating over references is via a family of
for_each_ref()-type functions that invoke a callback function for each
selected reference. All of these eventually call do_for_each_ref(),
which knows how to do one thing: iterate in parallel through two
ref_caches, one for loose and one for packed refs, giving loose
references precedence over packed refs. This is rather complicated code,
and is quite specialized to the files backend. It also requires callers
to encapsulate their work into a callback function, which often means
that they have to define and use a "cb_data" struct to manage their
context.
The current design is already bursting at the seams, and will become
even more awkward in the upcoming world of multiple reference storage
backends:
* Per-worktree vs. shared references are currently handled via a kludge
in git_path() rather than iterating over each part of the reference
namespace separately and merging the results. This kludge will cease
to work when we have multiple reference storage backends.
* The current scheme is inflexible. What if we sometimes want to bypass
the ref_cache, or use it only for packed or only for loose refs? What
if we want to store symbolic refs in one type of storage backend and
non-symbolic ones in another?
In the future, each reference backend will need to define its own way of
iterating over references. The crux of the problem with the current
design is that it is impossible to compose for_each_ref()-style
iterations, because the flow of control is owned by the for_each_ref()
function. There is nothing that a caller can do but iterate through all
references in a single burst, so there is no way for it to interleave
references from multiple backends and present the result to the rest of
the world as a single compound backend.
This commit introduces a new iteration primitive for references: a
ref_iterator. A ref_iterator is a polymorphic object that a reference
storage backend can be asked to instantiate. There are three functions
that can be applied to a ref_iterator:
* ref_iterator_advance(): move to the next reference in the iteration
* ref_iterator_abort(): end the iteration before it is exhausted
* ref_iterator_peel(): peel the reference currently being looked at
Iterating using a ref_iterator leaves the flow of control in the hands
of the caller, which means that ref_iterators from multiple
sources (e.g., loose and packed refs) can be composed and presented to
the world as a single compound ref_iterator.
It also means that the backend code for implementing reference iteration
will sometimes be more complicated. For example, the
cache_ref_iterator (which iterates over a ref_cache) can't use the C
stack to recurse; instead, it must manage its own stack internally as
explicit data structures. There is also a lot of boilerplate connected
with object-oriented programming in C.
Eventually, end-user callers will be able to be written in a more
natural way—managing their own flow of control rather than having to
work via callbacks. Since there will only be a few reference backends
but there are many consumers of this API, this is a good tradeoff.
More importantly, we gain composability, and especially the possibility
of writing interchangeable parts that can work with any ref_iterator.
For example, merge_ref_iterator implements a generic way of merging the
contents of any two ref_iterators. It is used to merge loose + packed
refs as part of the implementation of the files_ref_iterator. But it
will also be possible to use it to merge other pairs of reference
sources (e.g., per-worktree vs. shared refs).
Another example is prefix_ref_iterator, which can be used to trim a
prefix off the front of reference names before presenting them to the
caller (e.g., "refs/heads/master" -> "master").
In this patch, we introduce the iterator abstraction and many utilities,
and implement a reference iterator for the files ref storage backend.
(I've written several other obvious utilities, for example a generic way
to filter references being iterated over. These will probably be useful
in the future. But they are not needed for this patch series, so I am
not including them at this time.)
In a moment we will rewrite do_for_each_ref() to work via reference
iterators (allowing some special-purpose code to be discarded), and do
something similar for reflogs. In future patch series, we will expose
the ref_iterator abstraction in the public refs API so that callers can
use it directly.
Implementation note: I tried abstracting this a layer further to allow
generic iterators (over arbitrary types of objects) and generic
utilities like a generic merge_iterator. But the implementation in C was
very cumbersome, involving (in my opinion) too much boilerplate and too
much unsafe casting, some of which would have had to be done on the
caller side. However, I did put a few iterator-related constants in a
top-level header file, iterator.h, as they will be useful in a moment to
implement iteration over directory trees and possibly other types of
iterators in the future.
Signed-off-by: Ramsay Jones <ramsay@ramsayjones.plus.com>
Signed-off-by: Michael Haggerty <mhagger@alum.mit.edu>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2016-06-18 12:15:15 +08:00
|
|
|
const char *refname;
|
|
|
|
const struct object_id *oid;
|
|
|
|
unsigned int flags;
|
|
|
|
};
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Advance the iterator to the first or next item and return ITER_OK.
|
|
|
|
* If the iteration is exhausted, free the resources associated with
|
|
|
|
* the ref_iterator and return ITER_DONE. On errors, free the iterator
|
|
|
|
* resources and return ITER_ERROR. It is a bug to use ref_iterator or
|
|
|
|
* call this function again after it has returned ITER_DONE or
|
|
|
|
* ITER_ERROR.
|
|
|
|
*/
|
|
|
|
int ref_iterator_advance(struct ref_iterator *ref_iterator);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* If possible, peel the reference currently being viewed by the
|
|
|
|
* iterator. Return 0 on success.
|
|
|
|
*/
|
|
|
|
int ref_iterator_peel(struct ref_iterator *ref_iterator,
|
|
|
|
struct object_id *peeled);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* End the iteration before it has been exhausted, freeing the
|
|
|
|
* reference iterator and any associated resources and returning
|
|
|
|
* ITER_DONE. If the abort itself failed, return ITER_ERROR.
|
|
|
|
*/
|
|
|
|
int ref_iterator_abort(struct ref_iterator *ref_iterator);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* An iterator over nothing (its first ref_iterator_advance() call
|
|
|
|
* returns ITER_DONE).
|
|
|
|
*/
|
|
|
|
struct ref_iterator *empty_ref_iterator_begin(void);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Return true iff ref_iterator is an empty_ref_iterator.
|
|
|
|
*/
|
|
|
|
int is_empty_ref_iterator(struct ref_iterator *ref_iterator);
|
|
|
|
|
2017-03-21 00:33:08 +08:00
|
|
|
/*
|
|
|
|
* Return an iterator that goes over each reference in `refs` for
|
|
|
|
* which the refname begins with prefix. If trim is non-zero, then
|
2020-05-21 01:36:09 +08:00
|
|
|
* trim that many characters off the beginning of each refname.
|
2021-09-25 02:39:44 +08:00
|
|
|
* The output is ordered by refname.
|
2017-03-21 00:33:08 +08:00
|
|
|
*/
|
|
|
|
struct ref_iterator *refs_ref_iterator_begin(
|
|
|
|
struct ref_store *refs,
|
2021-09-25 02:39:44 +08:00
|
|
|
const char *prefix, int trim,
|
|
|
|
enum do_for_each_ref_flags flags);
|
2017-03-21 00:33:08 +08:00
|
|
|
|
refs: introduce an iterator interface
Currently, the API for iterating over references is via a family of
for_each_ref()-type functions that invoke a callback function for each
selected reference. All of these eventually call do_for_each_ref(),
which knows how to do one thing: iterate in parallel through two
ref_caches, one for loose and one for packed refs, giving loose
references precedence over packed refs. This is rather complicated code,
and is quite specialized to the files backend. It also requires callers
to encapsulate their work into a callback function, which often means
that they have to define and use a "cb_data" struct to manage their
context.
The current design is already bursting at the seams, and will become
even more awkward in the upcoming world of multiple reference storage
backends:
* Per-worktree vs. shared references are currently handled via a kludge
in git_path() rather than iterating over each part of the reference
namespace separately and merging the results. This kludge will cease
to work when we have multiple reference storage backends.
* The current scheme is inflexible. What if we sometimes want to bypass
the ref_cache, or use it only for packed or only for loose refs? What
if we want to store symbolic refs in one type of storage backend and
non-symbolic ones in another?
In the future, each reference backend will need to define its own way of
iterating over references. The crux of the problem with the current
design is that it is impossible to compose for_each_ref()-style
iterations, because the flow of control is owned by the for_each_ref()
function. There is nothing that a caller can do but iterate through all
references in a single burst, so there is no way for it to interleave
references from multiple backends and present the result to the rest of
the world as a single compound backend.
This commit introduces a new iteration primitive for references: a
ref_iterator. A ref_iterator is a polymorphic object that a reference
storage backend can be asked to instantiate. There are three functions
that can be applied to a ref_iterator:
* ref_iterator_advance(): move to the next reference in the iteration
* ref_iterator_abort(): end the iteration before it is exhausted
* ref_iterator_peel(): peel the reference currently being looked at
Iterating using a ref_iterator leaves the flow of control in the hands
of the caller, which means that ref_iterators from multiple
sources (e.g., loose and packed refs) can be composed and presented to
the world as a single compound ref_iterator.
It also means that the backend code for implementing reference iteration
will sometimes be more complicated. For example, the
cache_ref_iterator (which iterates over a ref_cache) can't use the C
stack to recurse; instead, it must manage its own stack internally as
explicit data structures. There is also a lot of boilerplate connected
with object-oriented programming in C.
Eventually, end-user callers will be able to be written in a more
natural way—managing their own flow of control rather than having to
work via callbacks. Since there will only be a few reference backends
but there are many consumers of this API, this is a good tradeoff.
More importantly, we gain composability, and especially the possibility
of writing interchangeable parts that can work with any ref_iterator.
For example, merge_ref_iterator implements a generic way of merging the
contents of any two ref_iterators. It is used to merge loose + packed
refs as part of the implementation of the files_ref_iterator. But it
will also be possible to use it to merge other pairs of reference
sources (e.g., per-worktree vs. shared refs).
Another example is prefix_ref_iterator, which can be used to trim a
prefix off the front of reference names before presenting them to the
caller (e.g., "refs/heads/master" -> "master").
In this patch, we introduce the iterator abstraction and many utilities,
and implement a reference iterator for the files ref storage backend.
(I've written several other obvious utilities, for example a generic way
to filter references being iterated over. These will probably be useful
in the future. But they are not needed for this patch series, so I am
not including them at this time.)
In a moment we will rewrite do_for_each_ref() to work via reference
iterators (allowing some special-purpose code to be discarded), and do
something similar for reflogs. In future patch series, we will expose
the ref_iterator abstraction in the public refs API so that callers can
use it directly.
Implementation note: I tried abstracting this a layer further to allow
generic iterators (over arbitrary types of objects) and generic
utilities like a generic merge_iterator. But the implementation in C was
very cumbersome, involving (in my opinion) too much boilerplate and too
much unsafe casting, some of which would have had to be done on the
caller side. However, I did put a few iterator-related constants in a
top-level header file, iterator.h, as they will be useful in a moment to
implement iteration over directory trees and possibly other types of
iterators in the future.
Signed-off-by: Ramsay Jones <ramsay@ramsayjones.plus.com>
Signed-off-by: Michael Haggerty <mhagger@alum.mit.edu>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2016-06-18 12:15:15 +08:00
|
|
|
/*
|
|
|
|
* A callback function used to instruct merge_ref_iterator how to
|
|
|
|
* interleave the entries from iter0 and iter1. The function should
|
|
|
|
* return one of the constants defined in enum iterator_selection. It
|
|
|
|
* must not advance either of the iterators itself.
|
|
|
|
*
|
|
|
|
* The function must be prepared to handle the case that iter0 and/or
|
|
|
|
* iter1 is NULL, which indicates that the corresponding sub-iterator
|
|
|
|
* has been exhausted. Its return value must be consistent with the
|
|
|
|
* current states of the iterators; e.g., it must not return
|
|
|
|
* ITER_SKIP_1 if iter1 has already been exhausted.
|
|
|
|
*/
|
|
|
|
typedef enum iterator_selection ref_iterator_select_fn(
|
|
|
|
struct ref_iterator *iter0, struct ref_iterator *iter1,
|
|
|
|
void *cb_data);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Iterate over the entries from iter0 and iter1, with the values
|
|
|
|
* interleaved as directed by the select function. The iterator takes
|
|
|
|
* ownership of iter0 and iter1 and frees them when the iteration is
|
2017-09-14 01:15:55 +08:00
|
|
|
* over. A derived class should set `ordered` to 1 or 0 based on
|
|
|
|
* whether it generates its output in order by reference name.
|
refs: introduce an iterator interface
Currently, the API for iterating over references is via a family of
for_each_ref()-type functions that invoke a callback function for each
selected reference. All of these eventually call do_for_each_ref(),
which knows how to do one thing: iterate in parallel through two
ref_caches, one for loose and one for packed refs, giving loose
references precedence over packed refs. This is rather complicated code,
and is quite specialized to the files backend. It also requires callers
to encapsulate their work into a callback function, which often means
that they have to define and use a "cb_data" struct to manage their
context.
The current design is already bursting at the seams, and will become
even more awkward in the upcoming world of multiple reference storage
backends:
* Per-worktree vs. shared references are currently handled via a kludge
in git_path() rather than iterating over each part of the reference
namespace separately and merging the results. This kludge will cease
to work when we have multiple reference storage backends.
* The current scheme is inflexible. What if we sometimes want to bypass
the ref_cache, or use it only for packed or only for loose refs? What
if we want to store symbolic refs in one type of storage backend and
non-symbolic ones in another?
In the future, each reference backend will need to define its own way of
iterating over references. The crux of the problem with the current
design is that it is impossible to compose for_each_ref()-style
iterations, because the flow of control is owned by the for_each_ref()
function. There is nothing that a caller can do but iterate through all
references in a single burst, so there is no way for it to interleave
references from multiple backends and present the result to the rest of
the world as a single compound backend.
This commit introduces a new iteration primitive for references: a
ref_iterator. A ref_iterator is a polymorphic object that a reference
storage backend can be asked to instantiate. There are three functions
that can be applied to a ref_iterator:
* ref_iterator_advance(): move to the next reference in the iteration
* ref_iterator_abort(): end the iteration before it is exhausted
* ref_iterator_peel(): peel the reference currently being looked at
Iterating using a ref_iterator leaves the flow of control in the hands
of the caller, which means that ref_iterators from multiple
sources (e.g., loose and packed refs) can be composed and presented to
the world as a single compound ref_iterator.
It also means that the backend code for implementing reference iteration
will sometimes be more complicated. For example, the
cache_ref_iterator (which iterates over a ref_cache) can't use the C
stack to recurse; instead, it must manage its own stack internally as
explicit data structures. There is also a lot of boilerplate connected
with object-oriented programming in C.
Eventually, end-user callers will be able to be written in a more
natural way—managing their own flow of control rather than having to
work via callbacks. Since there will only be a few reference backends
but there are many consumers of this API, this is a good tradeoff.
More importantly, we gain composability, and especially the possibility
of writing interchangeable parts that can work with any ref_iterator.
For example, merge_ref_iterator implements a generic way of merging the
contents of any two ref_iterators. It is used to merge loose + packed
refs as part of the implementation of the files_ref_iterator. But it
will also be possible to use it to merge other pairs of reference
sources (e.g., per-worktree vs. shared refs).
Another example is prefix_ref_iterator, which can be used to trim a
prefix off the front of reference names before presenting them to the
caller (e.g., "refs/heads/master" -> "master").
In this patch, we introduce the iterator abstraction and many utilities,
and implement a reference iterator for the files ref storage backend.
(I've written several other obvious utilities, for example a generic way
to filter references being iterated over. These will probably be useful
in the future. But they are not needed for this patch series, so I am
not including them at this time.)
In a moment we will rewrite do_for_each_ref() to work via reference
iterators (allowing some special-purpose code to be discarded), and do
something similar for reflogs. In future patch series, we will expose
the ref_iterator abstraction in the public refs API so that callers can
use it directly.
Implementation note: I tried abstracting this a layer further to allow
generic iterators (over arbitrary types of objects) and generic
utilities like a generic merge_iterator. But the implementation in C was
very cumbersome, involving (in my opinion) too much boilerplate and too
much unsafe casting, some of which would have had to be done on the
caller side. However, I did put a few iterator-related constants in a
top-level header file, iterator.h, as they will be useful in a moment to
implement iteration over directory trees and possibly other types of
iterators in the future.
Signed-off-by: Ramsay Jones <ramsay@ramsayjones.plus.com>
Signed-off-by: Michael Haggerty <mhagger@alum.mit.edu>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2016-06-18 12:15:15 +08:00
|
|
|
*/
|
|
|
|
struct ref_iterator *merge_ref_iterator_begin(
|
2017-09-14 01:15:55 +08:00
|
|
|
int ordered,
|
refs: introduce an iterator interface
Currently, the API for iterating over references is via a family of
for_each_ref()-type functions that invoke a callback function for each
selected reference. All of these eventually call do_for_each_ref(),
which knows how to do one thing: iterate in parallel through two
ref_caches, one for loose and one for packed refs, giving loose
references precedence over packed refs. This is rather complicated code,
and is quite specialized to the files backend. It also requires callers
to encapsulate their work into a callback function, which often means
that they have to define and use a "cb_data" struct to manage their
context.
The current design is already bursting at the seams, and will become
even more awkward in the upcoming world of multiple reference storage
backends:
* Per-worktree vs. shared references are currently handled via a kludge
in git_path() rather than iterating over each part of the reference
namespace separately and merging the results. This kludge will cease
to work when we have multiple reference storage backends.
* The current scheme is inflexible. What if we sometimes want to bypass
the ref_cache, or use it only for packed or only for loose refs? What
if we want to store symbolic refs in one type of storage backend and
non-symbolic ones in another?
In the future, each reference backend will need to define its own way of
iterating over references. The crux of the problem with the current
design is that it is impossible to compose for_each_ref()-style
iterations, because the flow of control is owned by the for_each_ref()
function. There is nothing that a caller can do but iterate through all
references in a single burst, so there is no way for it to interleave
references from multiple backends and present the result to the rest of
the world as a single compound backend.
This commit introduces a new iteration primitive for references: a
ref_iterator. A ref_iterator is a polymorphic object that a reference
storage backend can be asked to instantiate. There are three functions
that can be applied to a ref_iterator:
* ref_iterator_advance(): move to the next reference in the iteration
* ref_iterator_abort(): end the iteration before it is exhausted
* ref_iterator_peel(): peel the reference currently being looked at
Iterating using a ref_iterator leaves the flow of control in the hands
of the caller, which means that ref_iterators from multiple
sources (e.g., loose and packed refs) can be composed and presented to
the world as a single compound ref_iterator.
It also means that the backend code for implementing reference iteration
will sometimes be more complicated. For example, the
cache_ref_iterator (which iterates over a ref_cache) can't use the C
stack to recurse; instead, it must manage its own stack internally as
explicit data structures. There is also a lot of boilerplate connected
with object-oriented programming in C.
Eventually, end-user callers will be able to be written in a more
natural way—managing their own flow of control rather than having to
work via callbacks. Since there will only be a few reference backends
but there are many consumers of this API, this is a good tradeoff.
More importantly, we gain composability, and especially the possibility
of writing interchangeable parts that can work with any ref_iterator.
For example, merge_ref_iterator implements a generic way of merging the
contents of any two ref_iterators. It is used to merge loose + packed
refs as part of the implementation of the files_ref_iterator. But it
will also be possible to use it to merge other pairs of reference
sources (e.g., per-worktree vs. shared refs).
Another example is prefix_ref_iterator, which can be used to trim a
prefix off the front of reference names before presenting them to the
caller (e.g., "refs/heads/master" -> "master").
In this patch, we introduce the iterator abstraction and many utilities,
and implement a reference iterator for the files ref storage backend.
(I've written several other obvious utilities, for example a generic way
to filter references being iterated over. These will probably be useful
in the future. But they are not needed for this patch series, so I am
not including them at this time.)
In a moment we will rewrite do_for_each_ref() to work via reference
iterators (allowing some special-purpose code to be discarded), and do
something similar for reflogs. In future patch series, we will expose
the ref_iterator abstraction in the public refs API so that callers can
use it directly.
Implementation note: I tried abstracting this a layer further to allow
generic iterators (over arbitrary types of objects) and generic
utilities like a generic merge_iterator. But the implementation in C was
very cumbersome, involving (in my opinion) too much boilerplate and too
much unsafe casting, some of which would have had to be done on the
caller side. However, I did put a few iterator-related constants in a
top-level header file, iterator.h, as they will be useful in a moment to
implement iteration over directory trees and possibly other types of
iterators in the future.
Signed-off-by: Ramsay Jones <ramsay@ramsayjones.plus.com>
Signed-off-by: Michael Haggerty <mhagger@alum.mit.edu>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2016-06-18 12:15:15 +08:00
|
|
|
struct ref_iterator *iter0, struct ref_iterator *iter1,
|
|
|
|
ref_iterator_select_fn *select, void *cb_data);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* An iterator consisting of the union of the entries from front and
|
|
|
|
* back. If there are entries common to the two sub-iterators, use the
|
|
|
|
* one from front. Each iterator must iterate over its entries in
|
|
|
|
* strcmp() order by refname for this to work.
|
|
|
|
*
|
|
|
|
* The new iterator takes ownership of its arguments and frees them
|
|
|
|
* when the iteration is over. As a convenience to callers, if front
|
|
|
|
* or back is an empty_ref_iterator, then abort that one immediately
|
|
|
|
* and return the other iterator directly, without wrapping it.
|
|
|
|
*/
|
|
|
|
struct ref_iterator *overlay_ref_iterator_begin(
|
|
|
|
struct ref_iterator *front, struct ref_iterator *back);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Wrap iter0, only letting through the references whose names start
|
|
|
|
* with prefix. If trim is set, set iter->refname to the name of the
|
|
|
|
* reference with that many characters trimmed off the front;
|
|
|
|
* otherwise set it to the full refname. The new iterator takes over
|
|
|
|
* ownership of iter0 and frees it when iteration is over. It makes
|
|
|
|
* its own copy of prefix.
|
|
|
|
*
|
|
|
|
* As an convenience to callers, if prefix is the empty string and
|
|
|
|
* trim is zero, this function returns iter0 directly, without
|
|
|
|
* wrapping it.
|
2017-09-14 01:15:55 +08:00
|
|
|
*
|
|
|
|
* The resulting ref_iterator is ordered if iter0 is.
|
refs: introduce an iterator interface
Currently, the API for iterating over references is via a family of
for_each_ref()-type functions that invoke a callback function for each
selected reference. All of these eventually call do_for_each_ref(),
which knows how to do one thing: iterate in parallel through two
ref_caches, one for loose and one for packed refs, giving loose
references precedence over packed refs. This is rather complicated code,
and is quite specialized to the files backend. It also requires callers
to encapsulate their work into a callback function, which often means
that they have to define and use a "cb_data" struct to manage their
context.
The current design is already bursting at the seams, and will become
even more awkward in the upcoming world of multiple reference storage
backends:
* Per-worktree vs. shared references are currently handled via a kludge
in git_path() rather than iterating over each part of the reference
namespace separately and merging the results. This kludge will cease
to work when we have multiple reference storage backends.
* The current scheme is inflexible. What if we sometimes want to bypass
the ref_cache, or use it only for packed or only for loose refs? What
if we want to store symbolic refs in one type of storage backend and
non-symbolic ones in another?
In the future, each reference backend will need to define its own way of
iterating over references. The crux of the problem with the current
design is that it is impossible to compose for_each_ref()-style
iterations, because the flow of control is owned by the for_each_ref()
function. There is nothing that a caller can do but iterate through all
references in a single burst, so there is no way for it to interleave
references from multiple backends and present the result to the rest of
the world as a single compound backend.
This commit introduces a new iteration primitive for references: a
ref_iterator. A ref_iterator is a polymorphic object that a reference
storage backend can be asked to instantiate. There are three functions
that can be applied to a ref_iterator:
* ref_iterator_advance(): move to the next reference in the iteration
* ref_iterator_abort(): end the iteration before it is exhausted
* ref_iterator_peel(): peel the reference currently being looked at
Iterating using a ref_iterator leaves the flow of control in the hands
of the caller, which means that ref_iterators from multiple
sources (e.g., loose and packed refs) can be composed and presented to
the world as a single compound ref_iterator.
It also means that the backend code for implementing reference iteration
will sometimes be more complicated. For example, the
cache_ref_iterator (which iterates over a ref_cache) can't use the C
stack to recurse; instead, it must manage its own stack internally as
explicit data structures. There is also a lot of boilerplate connected
with object-oriented programming in C.
Eventually, end-user callers will be able to be written in a more
natural way—managing their own flow of control rather than having to
work via callbacks. Since there will only be a few reference backends
but there are many consumers of this API, this is a good tradeoff.
More importantly, we gain composability, and especially the possibility
of writing interchangeable parts that can work with any ref_iterator.
For example, merge_ref_iterator implements a generic way of merging the
contents of any two ref_iterators. It is used to merge loose + packed
refs as part of the implementation of the files_ref_iterator. But it
will also be possible to use it to merge other pairs of reference
sources (e.g., per-worktree vs. shared refs).
Another example is prefix_ref_iterator, which can be used to trim a
prefix off the front of reference names before presenting them to the
caller (e.g., "refs/heads/master" -> "master").
In this patch, we introduce the iterator abstraction and many utilities,
and implement a reference iterator for the files ref storage backend.
(I've written several other obvious utilities, for example a generic way
to filter references being iterated over. These will probably be useful
in the future. But they are not needed for this patch series, so I am
not including them at this time.)
In a moment we will rewrite do_for_each_ref() to work via reference
iterators (allowing some special-purpose code to be discarded), and do
something similar for reflogs. In future patch series, we will expose
the ref_iterator abstraction in the public refs API so that callers can
use it directly.
Implementation note: I tried abstracting this a layer further to allow
generic iterators (over arbitrary types of objects) and generic
utilities like a generic merge_iterator. But the implementation in C was
very cumbersome, involving (in my opinion) too much boilerplate and too
much unsafe casting, some of which would have had to be done on the
caller side. However, I did put a few iterator-related constants in a
top-level header file, iterator.h, as they will be useful in a moment to
implement iteration over directory trees and possibly other types of
iterators in the future.
Signed-off-by: Ramsay Jones <ramsay@ramsayjones.plus.com>
Signed-off-by: Michael Haggerty <mhagger@alum.mit.edu>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2016-06-18 12:15:15 +08:00
|
|
|
*/
|
|
|
|
struct ref_iterator *prefix_ref_iterator_begin(struct ref_iterator *iter0,
|
|
|
|
const char *prefix,
|
|
|
|
int trim);
|
|
|
|
|
|
|
|
/* Internal implementation of reference iteration: */
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Base class constructor for ref_iterators. Initialize the
|
|
|
|
* ref_iterator part of iter, setting its vtable pointer as specified.
|
2017-09-14 01:15:55 +08:00
|
|
|
* `ordered` should be set to 1 if the iterator will iterate over
|
|
|
|
* references in order by refname; otherwise it should be set to 0.
|
refs: introduce an iterator interface
Currently, the API for iterating over references is via a family of
for_each_ref()-type functions that invoke a callback function for each
selected reference. All of these eventually call do_for_each_ref(),
which knows how to do one thing: iterate in parallel through two
ref_caches, one for loose and one for packed refs, giving loose
references precedence over packed refs. This is rather complicated code,
and is quite specialized to the files backend. It also requires callers
to encapsulate their work into a callback function, which often means
that they have to define and use a "cb_data" struct to manage their
context.
The current design is already bursting at the seams, and will become
even more awkward in the upcoming world of multiple reference storage
backends:
* Per-worktree vs. shared references are currently handled via a kludge
in git_path() rather than iterating over each part of the reference
namespace separately and merging the results. This kludge will cease
to work when we have multiple reference storage backends.
* The current scheme is inflexible. What if we sometimes want to bypass
the ref_cache, or use it only for packed or only for loose refs? What
if we want to store symbolic refs in one type of storage backend and
non-symbolic ones in another?
In the future, each reference backend will need to define its own way of
iterating over references. The crux of the problem with the current
design is that it is impossible to compose for_each_ref()-style
iterations, because the flow of control is owned by the for_each_ref()
function. There is nothing that a caller can do but iterate through all
references in a single burst, so there is no way for it to interleave
references from multiple backends and present the result to the rest of
the world as a single compound backend.
This commit introduces a new iteration primitive for references: a
ref_iterator. A ref_iterator is a polymorphic object that a reference
storage backend can be asked to instantiate. There are three functions
that can be applied to a ref_iterator:
* ref_iterator_advance(): move to the next reference in the iteration
* ref_iterator_abort(): end the iteration before it is exhausted
* ref_iterator_peel(): peel the reference currently being looked at
Iterating using a ref_iterator leaves the flow of control in the hands
of the caller, which means that ref_iterators from multiple
sources (e.g., loose and packed refs) can be composed and presented to
the world as a single compound ref_iterator.
It also means that the backend code for implementing reference iteration
will sometimes be more complicated. For example, the
cache_ref_iterator (which iterates over a ref_cache) can't use the C
stack to recurse; instead, it must manage its own stack internally as
explicit data structures. There is also a lot of boilerplate connected
with object-oriented programming in C.
Eventually, end-user callers will be able to be written in a more
natural way—managing their own flow of control rather than having to
work via callbacks. Since there will only be a few reference backends
but there are many consumers of this API, this is a good tradeoff.
More importantly, we gain composability, and especially the possibility
of writing interchangeable parts that can work with any ref_iterator.
For example, merge_ref_iterator implements a generic way of merging the
contents of any two ref_iterators. It is used to merge loose + packed
refs as part of the implementation of the files_ref_iterator. But it
will also be possible to use it to merge other pairs of reference
sources (e.g., per-worktree vs. shared refs).
Another example is prefix_ref_iterator, which can be used to trim a
prefix off the front of reference names before presenting them to the
caller (e.g., "refs/heads/master" -> "master").
In this patch, we introduce the iterator abstraction and many utilities,
and implement a reference iterator for the files ref storage backend.
(I've written several other obvious utilities, for example a generic way
to filter references being iterated over. These will probably be useful
in the future. But they are not needed for this patch series, so I am
not including them at this time.)
In a moment we will rewrite do_for_each_ref() to work via reference
iterators (allowing some special-purpose code to be discarded), and do
something similar for reflogs. In future patch series, we will expose
the ref_iterator abstraction in the public refs API so that callers can
use it directly.
Implementation note: I tried abstracting this a layer further to allow
generic iterators (over arbitrary types of objects) and generic
utilities like a generic merge_iterator. But the implementation in C was
very cumbersome, involving (in my opinion) too much boilerplate and too
much unsafe casting, some of which would have had to be done on the
caller side. However, I did put a few iterator-related constants in a
top-level header file, iterator.h, as they will be useful in a moment to
implement iteration over directory trees and possibly other types of
iterators in the future.
Signed-off-by: Ramsay Jones <ramsay@ramsayjones.plus.com>
Signed-off-by: Michael Haggerty <mhagger@alum.mit.edu>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2016-06-18 12:15:15 +08:00
|
|
|
* This is meant to be called only by the initializers of derived
|
|
|
|
* classes.
|
|
|
|
*/
|
|
|
|
void base_ref_iterator_init(struct ref_iterator *iter,
|
2017-09-14 01:15:55 +08:00
|
|
|
struct ref_iterator_vtable *vtable,
|
|
|
|
int ordered);
|
refs: introduce an iterator interface
Currently, the API for iterating over references is via a family of
for_each_ref()-type functions that invoke a callback function for each
selected reference. All of these eventually call do_for_each_ref(),
which knows how to do one thing: iterate in parallel through two
ref_caches, one for loose and one for packed refs, giving loose
references precedence over packed refs. This is rather complicated code,
and is quite specialized to the files backend. It also requires callers
to encapsulate their work into a callback function, which often means
that they have to define and use a "cb_data" struct to manage their
context.
The current design is already bursting at the seams, and will become
even more awkward in the upcoming world of multiple reference storage
backends:
* Per-worktree vs. shared references are currently handled via a kludge
in git_path() rather than iterating over each part of the reference
namespace separately and merging the results. This kludge will cease
to work when we have multiple reference storage backends.
* The current scheme is inflexible. What if we sometimes want to bypass
the ref_cache, or use it only for packed or only for loose refs? What
if we want to store symbolic refs in one type of storage backend and
non-symbolic ones in another?
In the future, each reference backend will need to define its own way of
iterating over references. The crux of the problem with the current
design is that it is impossible to compose for_each_ref()-style
iterations, because the flow of control is owned by the for_each_ref()
function. There is nothing that a caller can do but iterate through all
references in a single burst, so there is no way for it to interleave
references from multiple backends and present the result to the rest of
the world as a single compound backend.
This commit introduces a new iteration primitive for references: a
ref_iterator. A ref_iterator is a polymorphic object that a reference
storage backend can be asked to instantiate. There are three functions
that can be applied to a ref_iterator:
* ref_iterator_advance(): move to the next reference in the iteration
* ref_iterator_abort(): end the iteration before it is exhausted
* ref_iterator_peel(): peel the reference currently being looked at
Iterating using a ref_iterator leaves the flow of control in the hands
of the caller, which means that ref_iterators from multiple
sources (e.g., loose and packed refs) can be composed and presented to
the world as a single compound ref_iterator.
It also means that the backend code for implementing reference iteration
will sometimes be more complicated. For example, the
cache_ref_iterator (which iterates over a ref_cache) can't use the C
stack to recurse; instead, it must manage its own stack internally as
explicit data structures. There is also a lot of boilerplate connected
with object-oriented programming in C.
Eventually, end-user callers will be able to be written in a more
natural way—managing their own flow of control rather than having to
work via callbacks. Since there will only be a few reference backends
but there are many consumers of this API, this is a good tradeoff.
More importantly, we gain composability, and especially the possibility
of writing interchangeable parts that can work with any ref_iterator.
For example, merge_ref_iterator implements a generic way of merging the
contents of any two ref_iterators. It is used to merge loose + packed
refs as part of the implementation of the files_ref_iterator. But it
will also be possible to use it to merge other pairs of reference
sources (e.g., per-worktree vs. shared refs).
Another example is prefix_ref_iterator, which can be used to trim a
prefix off the front of reference names before presenting them to the
caller (e.g., "refs/heads/master" -> "master").
In this patch, we introduce the iterator abstraction and many utilities,
and implement a reference iterator for the files ref storage backend.
(I've written several other obvious utilities, for example a generic way
to filter references being iterated over. These will probably be useful
in the future. But they are not needed for this patch series, so I am
not including them at this time.)
In a moment we will rewrite do_for_each_ref() to work via reference
iterators (allowing some special-purpose code to be discarded), and do
something similar for reflogs. In future patch series, we will expose
the ref_iterator abstraction in the public refs API so that callers can
use it directly.
Implementation note: I tried abstracting this a layer further to allow
generic iterators (over arbitrary types of objects) and generic
utilities like a generic merge_iterator. But the implementation in C was
very cumbersome, involving (in my opinion) too much boilerplate and too
much unsafe casting, some of which would have had to be done on the
caller side. However, I did put a few iterator-related constants in a
top-level header file, iterator.h, as they will be useful in a moment to
implement iteration over directory trees and possibly other types of
iterators in the future.
Signed-off-by: Ramsay Jones <ramsay@ramsayjones.plus.com>
Signed-off-by: Michael Haggerty <mhagger@alum.mit.edu>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2016-06-18 12:15:15 +08:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Base class destructor for ref_iterators. Destroy the ref_iterator
|
|
|
|
* part of iter and shallow-free the object. This is meant to be
|
|
|
|
* called only by the destructors of derived classes.
|
|
|
|
*/
|
|
|
|
void base_ref_iterator_free(struct ref_iterator *iter);
|
|
|
|
|
|
|
|
/* Virtual function declarations for ref_iterators: */
|
|
|
|
|
2020-05-21 01:36:09 +08:00
|
|
|
/*
|
|
|
|
* backend-specific implementation of ref_iterator_advance. For symrefs, the
|
|
|
|
* function should set REF_ISSYMREF, and it should also dereference the symref
|
2021-09-25 02:39:44 +08:00
|
|
|
* to provide the OID referent. It should respect do_for_each_ref_flags
|
|
|
|
* that were passed to refs_ref_iterator_begin().
|
2020-05-21 01:36:09 +08:00
|
|
|
*/
|
refs: introduce an iterator interface
Currently, the API for iterating over references is via a family of
for_each_ref()-type functions that invoke a callback function for each
selected reference. All of these eventually call do_for_each_ref(),
which knows how to do one thing: iterate in parallel through two
ref_caches, one for loose and one for packed refs, giving loose
references precedence over packed refs. This is rather complicated code,
and is quite specialized to the files backend. It also requires callers
to encapsulate their work into a callback function, which often means
that they have to define and use a "cb_data" struct to manage their
context.
The current design is already bursting at the seams, and will become
even more awkward in the upcoming world of multiple reference storage
backends:
* Per-worktree vs. shared references are currently handled via a kludge
in git_path() rather than iterating over each part of the reference
namespace separately and merging the results. This kludge will cease
to work when we have multiple reference storage backends.
* The current scheme is inflexible. What if we sometimes want to bypass
the ref_cache, or use it only for packed or only for loose refs? What
if we want to store symbolic refs in one type of storage backend and
non-symbolic ones in another?
In the future, each reference backend will need to define its own way of
iterating over references. The crux of the problem with the current
design is that it is impossible to compose for_each_ref()-style
iterations, because the flow of control is owned by the for_each_ref()
function. There is nothing that a caller can do but iterate through all
references in a single burst, so there is no way for it to interleave
references from multiple backends and present the result to the rest of
the world as a single compound backend.
This commit introduces a new iteration primitive for references: a
ref_iterator. A ref_iterator is a polymorphic object that a reference
storage backend can be asked to instantiate. There are three functions
that can be applied to a ref_iterator:
* ref_iterator_advance(): move to the next reference in the iteration
* ref_iterator_abort(): end the iteration before it is exhausted
* ref_iterator_peel(): peel the reference currently being looked at
Iterating using a ref_iterator leaves the flow of control in the hands
of the caller, which means that ref_iterators from multiple
sources (e.g., loose and packed refs) can be composed and presented to
the world as a single compound ref_iterator.
It also means that the backend code for implementing reference iteration
will sometimes be more complicated. For example, the
cache_ref_iterator (which iterates over a ref_cache) can't use the C
stack to recurse; instead, it must manage its own stack internally as
explicit data structures. There is also a lot of boilerplate connected
with object-oriented programming in C.
Eventually, end-user callers will be able to be written in a more
natural way—managing their own flow of control rather than having to
work via callbacks. Since there will only be a few reference backends
but there are many consumers of this API, this is a good tradeoff.
More importantly, we gain composability, and especially the possibility
of writing interchangeable parts that can work with any ref_iterator.
For example, merge_ref_iterator implements a generic way of merging the
contents of any two ref_iterators. It is used to merge loose + packed
refs as part of the implementation of the files_ref_iterator. But it
will also be possible to use it to merge other pairs of reference
sources (e.g., per-worktree vs. shared refs).
Another example is prefix_ref_iterator, which can be used to trim a
prefix off the front of reference names before presenting them to the
caller (e.g., "refs/heads/master" -> "master").
In this patch, we introduce the iterator abstraction and many utilities,
and implement a reference iterator for the files ref storage backend.
(I've written several other obvious utilities, for example a generic way
to filter references being iterated over. These will probably be useful
in the future. But they are not needed for this patch series, so I am
not including them at this time.)
In a moment we will rewrite do_for_each_ref() to work via reference
iterators (allowing some special-purpose code to be discarded), and do
something similar for reflogs. In future patch series, we will expose
the ref_iterator abstraction in the public refs API so that callers can
use it directly.
Implementation note: I tried abstracting this a layer further to allow
generic iterators (over arbitrary types of objects) and generic
utilities like a generic merge_iterator. But the implementation in C was
very cumbersome, involving (in my opinion) too much boilerplate and too
much unsafe casting, some of which would have had to be done on the
caller side. However, I did put a few iterator-related constants in a
top-level header file, iterator.h, as they will be useful in a moment to
implement iteration over directory trees and possibly other types of
iterators in the future.
Signed-off-by: Ramsay Jones <ramsay@ramsayjones.plus.com>
Signed-off-by: Michael Haggerty <mhagger@alum.mit.edu>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2016-06-18 12:15:15 +08:00
|
|
|
typedef int ref_iterator_advance_fn(struct ref_iterator *ref_iterator);
|
|
|
|
|
2021-05-19 23:31:28 +08:00
|
|
|
/*
|
|
|
|
* Peels the current ref, returning 0 for success or -1 for failure.
|
|
|
|
*/
|
refs: introduce an iterator interface
Currently, the API for iterating over references is via a family of
for_each_ref()-type functions that invoke a callback function for each
selected reference. All of these eventually call do_for_each_ref(),
which knows how to do one thing: iterate in parallel through two
ref_caches, one for loose and one for packed refs, giving loose
references precedence over packed refs. This is rather complicated code,
and is quite specialized to the files backend. It also requires callers
to encapsulate their work into a callback function, which often means
that they have to define and use a "cb_data" struct to manage their
context.
The current design is already bursting at the seams, and will become
even more awkward in the upcoming world of multiple reference storage
backends:
* Per-worktree vs. shared references are currently handled via a kludge
in git_path() rather than iterating over each part of the reference
namespace separately and merging the results. This kludge will cease
to work when we have multiple reference storage backends.
* The current scheme is inflexible. What if we sometimes want to bypass
the ref_cache, or use it only for packed or only for loose refs? What
if we want to store symbolic refs in one type of storage backend and
non-symbolic ones in another?
In the future, each reference backend will need to define its own way of
iterating over references. The crux of the problem with the current
design is that it is impossible to compose for_each_ref()-style
iterations, because the flow of control is owned by the for_each_ref()
function. There is nothing that a caller can do but iterate through all
references in a single burst, so there is no way for it to interleave
references from multiple backends and present the result to the rest of
the world as a single compound backend.
This commit introduces a new iteration primitive for references: a
ref_iterator. A ref_iterator is a polymorphic object that a reference
storage backend can be asked to instantiate. There are three functions
that can be applied to a ref_iterator:
* ref_iterator_advance(): move to the next reference in the iteration
* ref_iterator_abort(): end the iteration before it is exhausted
* ref_iterator_peel(): peel the reference currently being looked at
Iterating using a ref_iterator leaves the flow of control in the hands
of the caller, which means that ref_iterators from multiple
sources (e.g., loose and packed refs) can be composed and presented to
the world as a single compound ref_iterator.
It also means that the backend code for implementing reference iteration
will sometimes be more complicated. For example, the
cache_ref_iterator (which iterates over a ref_cache) can't use the C
stack to recurse; instead, it must manage its own stack internally as
explicit data structures. There is also a lot of boilerplate connected
with object-oriented programming in C.
Eventually, end-user callers will be able to be written in a more
natural way—managing their own flow of control rather than having to
work via callbacks. Since there will only be a few reference backends
but there are many consumers of this API, this is a good tradeoff.
More importantly, we gain composability, and especially the possibility
of writing interchangeable parts that can work with any ref_iterator.
For example, merge_ref_iterator implements a generic way of merging the
contents of any two ref_iterators. It is used to merge loose + packed
refs as part of the implementation of the files_ref_iterator. But it
will also be possible to use it to merge other pairs of reference
sources (e.g., per-worktree vs. shared refs).
Another example is prefix_ref_iterator, which can be used to trim a
prefix off the front of reference names before presenting them to the
caller (e.g., "refs/heads/master" -> "master").
In this patch, we introduce the iterator abstraction and many utilities,
and implement a reference iterator for the files ref storage backend.
(I've written several other obvious utilities, for example a generic way
to filter references being iterated over. These will probably be useful
in the future. But they are not needed for this patch series, so I am
not including them at this time.)
In a moment we will rewrite do_for_each_ref() to work via reference
iterators (allowing some special-purpose code to be discarded), and do
something similar for reflogs. In future patch series, we will expose
the ref_iterator abstraction in the public refs API so that callers can
use it directly.
Implementation note: I tried abstracting this a layer further to allow
generic iterators (over arbitrary types of objects) and generic
utilities like a generic merge_iterator. But the implementation in C was
very cumbersome, involving (in my opinion) too much boilerplate and too
much unsafe casting, some of which would have had to be done on the
caller side. However, I did put a few iterator-related constants in a
top-level header file, iterator.h, as they will be useful in a moment to
implement iteration over directory trees and possibly other types of
iterators in the future.
Signed-off-by: Ramsay Jones <ramsay@ramsayjones.plus.com>
Signed-off-by: Michael Haggerty <mhagger@alum.mit.edu>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2016-06-18 12:15:15 +08:00
|
|
|
typedef int ref_iterator_peel_fn(struct ref_iterator *ref_iterator,
|
|
|
|
struct object_id *peeled);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Implementations of this function should free any resources specific
|
|
|
|
* to the derived class, then call base_ref_iterator_free() to clean
|
|
|
|
* up and free the ref_iterator object.
|
|
|
|
*/
|
|
|
|
typedef int ref_iterator_abort_fn(struct ref_iterator *ref_iterator);
|
|
|
|
|
|
|
|
struct ref_iterator_vtable {
|
|
|
|
ref_iterator_advance_fn *advance;
|
|
|
|
ref_iterator_peel_fn *peel;
|
|
|
|
ref_iterator_abort_fn *abort;
|
|
|
|
};
|
|
|
|
|
2016-04-08 03:02:49 +08:00
|
|
|
/*
|
do_for_each_ref(): reimplement using reference iteration
Use the reference iterator interface to implement do_for_each_ref().
Delete a bunch of code supporting the old for_each_ref() implementation.
And now that do_for_each_ref() is generic code (it is no longer tied to
the files backend), move it to refs.c.
The implementation is via a new function, do_for_each_ref_iterator(),
which takes a reference iterator as argument and calls a callback
function for each of the references in the iterator.
This change requires the current_ref performance hack for peel_ref() to
be implemented via ref_iterator_peel() rather than peel_entry() because
we don't have a ref_entry handy (it is hidden under three layers:
file_ref_iterator, merge_ref_iterator, and cache_ref_iterator). So:
* do_for_each_ref_iterator() records the active iterator in
current_ref_iter while it is running.
* peel_ref() checks whether current_ref_iter is pointing at the
requested reference. If so, it asks the iterator to peel the
reference (which it can do efficiently via its "peel" virtual
function). For extra safety, we do the optimization only if the
refname *addresses* are the same, not only if the refname *strings*
are the same, to forestall possible mixups between refnames that come
from different ref_iterators.
Please note that this optimization of peel_ref() is only available when
iterating via do_for_each_ref_iterator() (including all of the
for_each_ref() functions, which call it indirectly). It would be
complicated to implement a similar optimization when iterating directly
using a reference iterator, because multiple reference iterators can be
in use at the same time, with interleaved calls to
ref_iterator_advance(). (In fact we do exactly that in
merge_ref_iterator.)
But that is not necessary. peel_ref() is only called while iterating
over references. Callers who iterate using the for_each_ref() functions
benefit from the optimization described above. Callers who iterate using
reference iterators directly have access to the ref_iterator, so they
can call ref_iterator_peel() themselves to get an analogous optimization
in a more straightforward manner.
If we rewrite all callers to use the reference iteration API, then we
can remove the current_ref_iter hack permanently.
Signed-off-by: Michael Haggerty <mhagger@alum.mit.edu>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2016-06-18 12:15:16 +08:00
|
|
|
* current_ref_iter is a performance hack: when iterating over
|
|
|
|
* references using the for_each_ref*() functions, current_ref_iter is
|
|
|
|
* set to the reference iterator before calling the callback function.
|
|
|
|
* If the callback function calls peel_ref(), then peel_ref() first
|
|
|
|
* checks whether the reference to be peeled is the one referred to by
|
|
|
|
* the iterator (it usually is) and if so, asks the iterator for the
|
|
|
|
* peeled version of the reference if it is available. This avoids a
|
|
|
|
* refname lookup in a common case. current_ref_iter is set to NULL
|
|
|
|
* when the iteration is over.
|
|
|
|
*/
|
|
|
|
extern struct ref_iterator *current_ref_iter;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* The common backend for the for_each_*ref* functions. Call fn for
|
|
|
|
* each reference in iter. If the iterator itself ever returns
|
|
|
|
* ITER_ERROR, return -1. If fn ever returns a non-zero value, stop
|
|
|
|
* the iteration and return that value. Otherwise, return 0. In any
|
|
|
|
* case, free the iterator when done. This function is basically an
|
|
|
|
* adapter between the callback style of reference iteration and the
|
|
|
|
* iterator style.
|
|
|
|
*/
|
2018-08-21 02:24:16 +08:00
|
|
|
int do_for_each_repo_ref_iterator(struct repository *r,
|
|
|
|
struct ref_iterator *iter,
|
|
|
|
each_repo_ref_fn fn, void *cb_data);
|
2016-04-08 03:03:10 +08:00
|
|
|
|
2016-09-05 00:08:37 +08:00
|
|
|
struct ref_store;
|
|
|
|
|
2016-09-05 00:08:44 +08:00
|
|
|
/* refs backends */
|
|
|
|
|
2017-03-26 10:42:32 +08:00
|
|
|
/* ref_store_init flags */
|
|
|
|
#define REF_STORE_READ (1 << 0)
|
|
|
|
#define REF_STORE_WRITE (1 << 1) /* can perform update operations */
|
|
|
|
#define REF_STORE_ODB (1 << 2) /* has access to object database */
|
|
|
|
#define REF_STORE_MAIN (1 << 3)
|
2017-04-24 18:01:21 +08:00
|
|
|
#define REF_STORE_ALL_CAPS (REF_STORE_READ | \
|
|
|
|
REF_STORE_WRITE | \
|
|
|
|
REF_STORE_ODB | \
|
|
|
|
REF_STORE_MAIN)
|
2017-03-26 10:42:32 +08:00
|
|
|
|
2016-09-05 00:08:25 +08:00
|
|
|
/*
|
2017-03-26 10:42:31 +08:00
|
|
|
* Initialize the ref_store for the specified gitdir. These functions
|
|
|
|
* should call base_ref_store_init() to initialize the shared part of
|
|
|
|
* the ref_store and to record the ref_store for later lookup.
|
2016-09-05 00:08:25 +08:00
|
|
|
*/
|
2021-10-09 05:08:14 +08:00
|
|
|
typedef struct ref_store *ref_store_init_fn(struct repository *repo,
|
|
|
|
const char *gitdir,
|
2017-03-26 10:42:32 +08:00
|
|
|
unsigned int flags);
|
2016-09-05 00:08:25 +08:00
|
|
|
|
2016-09-05 00:08:41 +08:00
|
|
|
typedef int ref_init_db_fn(struct ref_store *refs, struct strbuf *err);
|
|
|
|
|
ref_transaction_prepare(): new optional step for reference updates
In the future, compound reference stores will sometimes need to modify
references in two different reference stores at the same time, meaning
that a single logical reference transaction might have to be
implemented as two internal sub-transactions. They won't want to call
`ref_transaction_commit()` for the two sub-transactions one after the
other, because that wouldn't be atomic (the first commit could succeed
and the second one fail). Instead, they will want to prepare both
sub-transactions (i.e., obtain any necessary locks and do any
pre-checks), and only if both prepare steps succeed, then commit both
sub-transactions.
Start preparing for that day by adding a new, optional
`ref_transaction_prepare()` step to the reference transaction
sequence, which obtains the locks and does any prechecks, reporting
any errors that occur. Also add a `ref_transaction_abort()` function
that can be used to abort a sub-transaction even if it has already
been prepared.
That is on the side of the public-facing API. On the side of the
`ref_store` VTABLE, get rid of `transaction_commit` and instead add
methods `transaction_prepare`, `transaction_finish`, and
`transaction_abort`. A `ref_transaction_commit()` now basically calls
methods `transaction_prepare` then `transaction_finish`.
Signed-off-by: Michael Haggerty <mhagger@alum.mit.edu>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-05-22 22:17:44 +08:00
|
|
|
typedef int ref_transaction_prepare_fn(struct ref_store *refs,
|
|
|
|
struct ref_transaction *transaction,
|
|
|
|
struct strbuf *err);
|
|
|
|
|
|
|
|
typedef int ref_transaction_finish_fn(struct ref_store *refs,
|
|
|
|
struct ref_transaction *transaction,
|
|
|
|
struct strbuf *err);
|
|
|
|
|
|
|
|
typedef int ref_transaction_abort_fn(struct ref_store *refs,
|
|
|
|
struct ref_transaction *transaction,
|
|
|
|
struct strbuf *err);
|
|
|
|
|
2016-09-05 00:08:25 +08:00
|
|
|
typedef int ref_transaction_commit_fn(struct ref_store *refs,
|
|
|
|
struct ref_transaction *transaction,
|
|
|
|
struct strbuf *err);
|
|
|
|
|
2016-09-05 00:08:27 +08:00
|
|
|
typedef int pack_refs_fn(struct ref_store *ref_store, unsigned int flags);
|
2016-09-05 00:08:28 +08:00
|
|
|
typedef int create_symref_fn(struct ref_store *ref_store,
|
|
|
|
const char *ref_target,
|
|
|
|
const char *refs_heads_master,
|
|
|
|
const char *logmsg);
|
2017-05-22 22:17:38 +08:00
|
|
|
typedef int delete_refs_fn(struct ref_store *ref_store, const char *msg,
|
2016-09-05 00:08:40 +08:00
|
|
|
struct string_list *refnames, unsigned int flags);
|
2016-09-05 00:08:42 +08:00
|
|
|
typedef int rename_ref_fn(struct ref_store *ref_store,
|
|
|
|
const char *oldref, const char *newref,
|
|
|
|
const char *logmsg);
|
branch: add a --copy (-c) option to go with --move (-m)
Add the ability to --copy a branch and its reflog and configuration,
this uses the same underlying machinery as the --move (-m) option
except the reflog and configuration is copied instead of being moved.
This is useful for e.g. copying a topic branch to a new version,
e.g. work to work-2 after submitting the work topic to the list, while
preserving all the tracking info and other configuration that goes
with the branch, and unlike --move keeping the other already-submitted
branch around for reference.
Like --move, when the source branch is the currently checked out
branch the HEAD is moved to the destination branch. In the case of
--move we don't really have a choice (other than remaining on a
detached HEAD) and in order to keep the functionality consistent, we
are doing it in similar way for --copy too.
The most common usage of this feature is expected to be moving to a
new topic branch which is a copy of the current one, in that case
moving to the target branch is what the user wants, and doesn't
unexpectedly behave differently than --move would.
One outstanding caveat of this implementation is that:
git checkout maint &&
git checkout master &&
git branch -c topic &&
git checkout -
Will check out 'maint' instead of 'master'. This is because the @{-N}
feature (or its -1 shorthand "-") relies on HEAD reflogs created by
the checkout command, so in this case we'll checkout maint instead of
master, as the user might expect. What to do about that is left to a
future change.
Helped-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: Sahil Dua <sahildua2305@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-06-19 05:19:16 +08:00
|
|
|
typedef int copy_ref_fn(struct ref_store *ref_store,
|
|
|
|
const char *oldref, const char *newref,
|
|
|
|
const char *logmsg);
|
2016-09-05 00:08:27 +08:00
|
|
|
|
2016-09-05 00:08:37 +08:00
|
|
|
/*
|
2017-05-22 22:17:33 +08:00
|
|
|
* Iterate over the references in `ref_store` whose names start with
|
|
|
|
* `prefix`. `prefix` is matched as a literal string, without regard
|
|
|
|
* for path separators. If prefix is NULL or the empty string, iterate
|
2017-09-14 01:15:55 +08:00
|
|
|
* over all references in `ref_store`. The output is ordered by
|
|
|
|
* refname.
|
2016-09-05 00:08:37 +08:00
|
|
|
*/
|
|
|
|
typedef struct ref_iterator *ref_iterator_begin_fn(
|
|
|
|
struct ref_store *ref_store,
|
|
|
|
const char *prefix, unsigned int flags);
|
|
|
|
|
2016-09-05 00:08:38 +08:00
|
|
|
/* reflog functions */
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Iterate over the references in the specified ref_store that have a
|
|
|
|
* reflog. The refs are iterated over in arbitrary order.
|
|
|
|
*/
|
|
|
|
typedef struct ref_iterator *reflog_iterator_begin_fn(
|
|
|
|
struct ref_store *ref_store);
|
|
|
|
|
|
|
|
typedef int for_each_reflog_ent_fn(struct ref_store *ref_store,
|
|
|
|
const char *refname,
|
|
|
|
each_reflog_ent_fn fn,
|
|
|
|
void *cb_data);
|
|
|
|
typedef int for_each_reflog_ent_reverse_fn(struct ref_store *ref_store,
|
|
|
|
const char *refname,
|
|
|
|
each_reflog_ent_fn fn,
|
|
|
|
void *cb_data);
|
|
|
|
typedef int reflog_exists_fn(struct ref_store *ref_store, const char *refname);
|
|
|
|
typedef int create_reflog_fn(struct ref_store *ref_store, const char *refname,
|
2021-11-22 22:19:08 +08:00
|
|
|
struct strbuf *err);
|
2016-09-05 00:08:38 +08:00
|
|
|
typedef int delete_reflog_fn(struct ref_store *ref_store, const char *refname);
|
|
|
|
typedef int reflog_expire_fn(struct ref_store *ref_store,
|
2021-08-23 19:36:11 +08:00
|
|
|
const char *refname,
|
2016-09-05 00:08:38 +08:00
|
|
|
unsigned int flags,
|
|
|
|
reflog_expiry_prepare_fn prepare_fn,
|
|
|
|
reflog_expiry_should_prune_fn should_prune_fn,
|
|
|
|
reflog_expiry_cleanup_fn cleanup_fn,
|
|
|
|
void *policy_cb_data);
|
|
|
|
|
2016-05-06 23:25:31 +08:00
|
|
|
/*
|
2016-09-05 00:08:20 +08:00
|
|
|
* Read a reference from the specified reference store, non-recursively.
|
|
|
|
* Set type to describe the reference, and:
|
2016-05-06 23:25:31 +08:00
|
|
|
*
|
2017-10-16 06:07:11 +08:00
|
|
|
* - If refname is the name of a normal reference, fill in oid
|
2016-05-06 23:25:31 +08:00
|
|
|
* (leaving referent unchanged).
|
|
|
|
*
|
|
|
|
* - If refname is the name of a symbolic reference, write the full
|
|
|
|
* name of the reference to which it refers (e.g.
|
|
|
|
* "refs/heads/master") to referent and set the REF_ISSYMREF bit in
|
2017-10-16 06:07:11 +08:00
|
|
|
* type (leaving oid unchanged). The caller is responsible for
|
2016-05-06 23:25:31 +08:00
|
|
|
* validating that referent is a valid reference name.
|
|
|
|
*
|
|
|
|
* WARNING: refname might be used as part of a filename, so it is
|
|
|
|
* important from a security standpoint that it be safe in the sense
|
|
|
|
* of refname_is_safe(). Moreover, for symrefs this function sets
|
|
|
|
* referent to whatever the repository says, which might not be a
|
|
|
|
* properly-formatted or even safe reference name. NEITHER INPUT NOR
|
|
|
|
* OUTPUT REFERENCE NAMES ARE VALIDATED WITHIN THIS FUNCTION.
|
|
|
|
*
|
2021-08-23 19:52:40 +08:00
|
|
|
* Return 0 on success, or -1 on failure. If the ref exists but is neither a
|
|
|
|
* symbolic ref nor an object ID, it is broken. In this case set REF_ISBROKEN in
|
|
|
|
* type, and return -1 (failure_errno should not be ENOENT)
|
|
|
|
*
|
|
|
|
* failure_errno provides errno codes that are interpreted beyond error
|
|
|
|
* reporting. The following error codes have special meaning:
|
|
|
|
* * ENOENT: the ref doesn't exist
|
|
|
|
* * EISDIR: ref name is a directory
|
|
|
|
* * ENOTDIR: ref prefix is not a directory
|
2016-05-06 23:25:31 +08:00
|
|
|
*
|
|
|
|
* Backend-specific flags might be set in type as well, regardless of
|
|
|
|
* outcome.
|
|
|
|
*
|
|
|
|
* It is OK for refname to point into referent. If so:
|
|
|
|
*
|
|
|
|
* - if the function succeeds with REF_ISSYMREF, referent will be
|
|
|
|
* overwritten and the memory formerly pointed to by it might be
|
|
|
|
* changed or even freed.
|
|
|
|
*
|
|
|
|
* - in all other cases, referent will be untouched, and therefore
|
|
|
|
* refname will still be valid and unchanged.
|
|
|
|
*/
|
2021-08-23 19:52:40 +08:00
|
|
|
typedef int read_raw_ref_fn(struct ref_store *ref_store, const char *refname,
|
|
|
|
struct object_id *oid, struct strbuf *referent,
|
|
|
|
unsigned int *type, int *failure_errno);
|
2016-09-05 00:08:16 +08:00
|
|
|
|
refs: add ability for backends to special-case reading of symbolic refs
Reading of symbolic and non-symbolic references is currently treated the
same in reference backends: we always call `refs_read_raw_ref()` and
then decide based on the returned flags what type it is. This has one
downside though: symbolic references may be treated different from
normal references in a backend from normal references. The packed-refs
backend for example doesn't even know about symbolic references, and as
a result it is pointless to even ask it for one.
There are cases where we really only care about whether a reference is
symbolic or not, but don't care about whether it exists at all or may be
a non-symbolic reference. But it is not possible to optimize for this
case right now, and as a consequence we will always first check for a
loose reference to exist, and if it doesn't, we'll query the packed-refs
backend for a known-to-not-be-symbolic reference. This is inefficient
and requires us to search all packed references even though we know to
not care for the result at all.
Introduce a new function `refs_read_symbolic_ref()` which allows us to
fix this case. This function will only ever return symbolic references
and can thus optimize for the scenario layed out above. By default, if
the backend doesn't provide an implementation for it, we just use the
old code path and fall back to `read_raw_ref()`. But in case the backend
provides its own, more efficient implementation, we will use that one
instead.
Note that this function is explicitly designed to not distinguish
between missing references and non-symbolic references. If it did, we'd
be forced to always search the packed-refs backend to see whether the
symbolic reference the user asked for really doesn't exist, or if it
exists as a non-symbolic reference.
Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2022-03-01 17:33:46 +08:00
|
|
|
/*
|
|
|
|
* Read a symbolic reference from the specified reference store. This function
|
|
|
|
* is optional: if not implemented by a backend, then `read_raw_ref_fn` is used
|
|
|
|
* to read the symbolcic reference instead. It is intended to be implemented
|
|
|
|
* only in case the backend can optimize the reading of symbolic references.
|
|
|
|
*
|
|
|
|
* Return 0 on success, or -1 on failure. `referent` will be set to the target
|
|
|
|
* of the symbolic reference on success. This function explicitly does not
|
|
|
|
* distinguish between error cases and the reference not being a symbolic
|
|
|
|
* reference to allow backends to optimize this operation in case symbolic and
|
|
|
|
* non-symbolic references are treated differently.
|
|
|
|
*/
|
|
|
|
typedef int read_symbolic_ref_fn(struct ref_store *ref_store, const char *refname,
|
|
|
|
struct strbuf *referent);
|
|
|
|
|
2016-09-05 00:08:10 +08:00
|
|
|
struct ref_storage_be {
|
|
|
|
struct ref_storage_be *next;
|
|
|
|
const char *name;
|
2016-09-05 00:08:11 +08:00
|
|
|
ref_store_init_fn *init;
|
2016-09-05 00:08:41 +08:00
|
|
|
ref_init_db_fn *init_db;
|
ref_transaction_prepare(): new optional step for reference updates
In the future, compound reference stores will sometimes need to modify
references in two different reference stores at the same time, meaning
that a single logical reference transaction might have to be
implemented as two internal sub-transactions. They won't want to call
`ref_transaction_commit()` for the two sub-transactions one after the
other, because that wouldn't be atomic (the first commit could succeed
and the second one fail). Instead, they will want to prepare both
sub-transactions (i.e., obtain any necessary locks and do any
pre-checks), and only if both prepare steps succeed, then commit both
sub-transactions.
Start preparing for that day by adding a new, optional
`ref_transaction_prepare()` step to the reference transaction
sequence, which obtains the locks and does any prechecks, reporting
any errors that occur. Also add a `ref_transaction_abort()` function
that can be used to abort a sub-transaction even if it has already
been prepared.
That is on the side of the public-facing API. On the side of the
`ref_store` VTABLE, get rid of `transaction_commit` and instead add
methods `transaction_prepare`, `transaction_finish`, and
`transaction_abort`. A `ref_transaction_commit()` now basically calls
methods `transaction_prepare` then `transaction_finish`.
Signed-off-by: Michael Haggerty <mhagger@alum.mit.edu>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-05-22 22:17:44 +08:00
|
|
|
|
|
|
|
ref_transaction_prepare_fn *transaction_prepare;
|
|
|
|
ref_transaction_finish_fn *transaction_finish;
|
|
|
|
ref_transaction_abort_fn *transaction_abort;
|
2016-09-05 00:08:39 +08:00
|
|
|
ref_transaction_commit_fn *initial_transaction_commit;
|
2016-09-05 00:08:25 +08:00
|
|
|
|
2016-09-05 00:08:27 +08:00
|
|
|
pack_refs_fn *pack_refs;
|
2016-09-05 00:08:28 +08:00
|
|
|
create_symref_fn *create_symref;
|
2016-09-05 00:08:40 +08:00
|
|
|
delete_refs_fn *delete_refs;
|
2016-09-05 00:08:42 +08:00
|
|
|
rename_ref_fn *rename_ref;
|
branch: add a --copy (-c) option to go with --move (-m)
Add the ability to --copy a branch and its reflog and configuration,
this uses the same underlying machinery as the --move (-m) option
except the reflog and configuration is copied instead of being moved.
This is useful for e.g. copying a topic branch to a new version,
e.g. work to work-2 after submitting the work topic to the list, while
preserving all the tracking info and other configuration that goes
with the branch, and unlike --move keeping the other already-submitted
branch around for reference.
Like --move, when the source branch is the currently checked out
branch the HEAD is moved to the destination branch. In the case of
--move we don't really have a choice (other than remaining on a
detached HEAD) and in order to keep the functionality consistent, we
are doing it in similar way for --copy too.
The most common usage of this feature is expected to be moving to a
new topic branch which is a copy of the current one, in that case
moving to the target branch is what the user wants, and doesn't
unexpectedly behave differently than --move would.
One outstanding caveat of this implementation is that:
git checkout maint &&
git checkout master &&
git branch -c topic &&
git checkout -
Will check out 'maint' instead of 'master'. This is because the @{-N}
feature (or its -1 shorthand "-") relies on HEAD reflogs created by
the checkout command, so in this case we'll checkout maint instead of
master, as the user might expect. What to do about that is left to a
future change.
Helped-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: Sahil Dua <sahildua2305@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-06-19 05:19:16 +08:00
|
|
|
copy_ref_fn *copy_ref;
|
2016-09-05 00:08:27 +08:00
|
|
|
|
2016-09-05 00:08:37 +08:00
|
|
|
ref_iterator_begin_fn *iterator_begin;
|
2016-09-05 00:08:25 +08:00
|
|
|
read_raw_ref_fn *read_raw_ref;
|
refs: add ability for backends to special-case reading of symbolic refs
Reading of symbolic and non-symbolic references is currently treated the
same in reference backends: we always call `refs_read_raw_ref()` and
then decide based on the returned flags what type it is. This has one
downside though: symbolic references may be treated different from
normal references in a backend from normal references. The packed-refs
backend for example doesn't even know about symbolic references, and as
a result it is pointless to even ask it for one.
There are cases where we really only care about whether a reference is
symbolic or not, but don't care about whether it exists at all or may be
a non-symbolic reference. But it is not possible to optimize for this
case right now, and as a consequence we will always first check for a
loose reference to exist, and if it doesn't, we'll query the packed-refs
backend for a known-to-not-be-symbolic reference. This is inefficient
and requires us to search all packed references even though we know to
not care for the result at all.
Introduce a new function `refs_read_symbolic_ref()` which allows us to
fix this case. This function will only ever return symbolic references
and can thus optimize for the scenario layed out above. By default, if
the backend doesn't provide an implementation for it, we just use the
old code path and fall back to `read_raw_ref()`. But in case the backend
provides its own, more efficient implementation, we will use that one
instead.
Note that this function is explicitly designed to not distinguish
between missing references and non-symbolic references. If it did, we'd
be forced to always search the packed-refs backend to see whether the
symbolic reference the user asked for really doesn't exist, or if it
exists as a non-symbolic reference.
Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2022-03-01 17:33:46 +08:00
|
|
|
read_symbolic_ref_fn *read_symbolic_ref;
|
2016-09-05 00:08:38 +08:00
|
|
|
|
|
|
|
reflog_iterator_begin_fn *reflog_iterator_begin;
|
|
|
|
for_each_reflog_ent_fn *for_each_reflog_ent;
|
|
|
|
for_each_reflog_ent_reverse_fn *for_each_reflog_ent_reverse;
|
|
|
|
reflog_exists_fn *reflog_exists;
|
|
|
|
create_reflog_fn *create_reflog;
|
|
|
|
delete_reflog_fn *delete_reflog;
|
|
|
|
reflog_expire_fn *reflog_expire;
|
2016-09-05 00:08:10 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
extern struct ref_storage_be refs_be_files;
|
2017-06-23 15:01:38 +08:00
|
|
|
extern struct ref_storage_be refs_be_packed;
|
2016-09-05 00:08:10 +08:00
|
|
|
|
2016-09-05 00:08:11 +08:00
|
|
|
/*
|
|
|
|
* A representation of the reference store for the main repository or
|
|
|
|
* a submodule. The ref_store instances for submodules are kept in a
|
2020-08-19 22:27:56 +08:00
|
|
|
* hash map; see get_submodule_ref_store() for more info.
|
2016-09-05 00:08:11 +08:00
|
|
|
*/
|
|
|
|
struct ref_store {
|
|
|
|
/* The backend describing this ref_store's storage scheme: */
|
|
|
|
const struct ref_storage_be *be;
|
2020-08-19 22:27:57 +08:00
|
|
|
|
2021-10-09 05:08:14 +08:00
|
|
|
struct repository *repo;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* The gitdir that this ref_store applies to. Note that this is not
|
|
|
|
* necessarily repo->gitdir if the repo has multiple worktrees.
|
|
|
|
*/
|
2020-08-19 22:27:57 +08:00
|
|
|
char *gitdir;
|
2016-09-05 00:08:11 +08:00
|
|
|
};
|
|
|
|
|
2020-08-19 22:27:55 +08:00
|
|
|
/*
|
2021-10-16 17:39:10 +08:00
|
|
|
* Parse contents of a loose ref file. *failure_errno maybe be set to EINVAL for
|
|
|
|
* invalid contents.
|
2020-08-19 22:27:55 +08:00
|
|
|
*/
|
|
|
|
int parse_loose_ref_contents(const char *buf, struct object_id *oid,
|
2021-10-16 17:39:10 +08:00
|
|
|
struct strbuf *referent, unsigned int *type,
|
|
|
|
int *failure_errno);
|
2020-08-19 22:27:55 +08:00
|
|
|
|
2016-09-05 00:08:11 +08:00
|
|
|
/*
|
2017-02-10 19:16:17 +08:00
|
|
|
* Fill in the generic part of refs and add it to our collection of
|
|
|
|
* reference stores.
|
2016-09-05 00:08:11 +08:00
|
|
|
*/
|
2021-12-23 02:11:54 +08:00
|
|
|
void base_ref_store_init(struct ref_store *refs, struct repository *repo,
|
|
|
|
const char *path, const struct ref_storage_be *be);
|
2016-09-05 00:08:11 +08:00
|
|
|
|
2020-09-09 18:15:08 +08:00
|
|
|
/*
|
|
|
|
* Support GIT_TRACE_REFS by optionally wrapping the given ref_store instance.
|
|
|
|
*/
|
|
|
|
struct ref_store *maybe_debug_wrap_ref_store(const char *gitdir, struct ref_store *store);
|
|
|
|
|
2015-11-10 19:42:36 +08:00
|
|
|
#endif /* REFS_REFS_INTERNAL_H */
|