2017-06-23 02:43:32 +08:00
|
|
|
#ifndef REPOSITORY_H
|
|
|
|
#define REPOSITORY_H
|
|
|
|
|
2024-05-17 16:18:34 +08:00
|
|
|
#include "strmap.h"
|
2024-09-12 19:30:04 +08:00
|
|
|
#include "repo-settings.h"
|
2024-05-17 16:18:34 +08:00
|
|
|
|
2017-06-23 02:43:42 +08:00
|
|
|
struct config_set;
|
2018-03-24 01:20:55 +08:00
|
|
|
struct git_hash_algo;
|
2017-06-23 02:43:43 +08:00
|
|
|
struct index_state;
|
2019-01-12 10:13:24 +08:00
|
|
|
struct lock_file;
|
2019-01-12 10:13:26 +08:00
|
|
|
struct pathspec;
|
2018-03-24 01:20:55 +08:00
|
|
|
struct raw_object_store;
|
2017-06-23 02:43:44 +08:00
|
|
|
struct submodule_cache;
|
2021-06-18 01:13:23 +08:00
|
|
|
struct promisor_remote_config;
|
2021-11-18 08:53:22 +08:00
|
|
|
struct remote_state;
|
2017-06-23 02:43:42 +08:00
|
|
|
|
2024-06-14 14:50:28 +08:00
|
|
|
enum ref_storage_format {
|
|
|
|
REF_STORAGE_FORMAT_UNKNOWN,
|
|
|
|
REF_STORAGE_FORMAT_FILES,
|
|
|
|
REF_STORAGE_FORMAT_REFTABLE,
|
|
|
|
};
|
|
|
|
|
2022-03-05 02:32:17 +08:00
|
|
|
struct repo_path_cache {
|
|
|
|
char *squash_msg;
|
|
|
|
char *merge_msg;
|
|
|
|
char *merge_rr;
|
|
|
|
char *merge_mode;
|
|
|
|
char *merge_head;
|
|
|
|
char *fetch_head;
|
|
|
|
char *shallow;
|
|
|
|
};
|
|
|
|
|
2017-06-23 02:43:32 +08:00
|
|
|
struct repository {
|
|
|
|
/* Environment */
|
|
|
|
/*
|
|
|
|
* Path to the git directory.
|
|
|
|
* Cannot be NULL after initialization.
|
|
|
|
*/
|
|
|
|
char *gitdir;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Path to the common git directory.
|
|
|
|
* Cannot be NULL after initialization.
|
|
|
|
*/
|
|
|
|
char *commondir;
|
|
|
|
|
|
|
|
/*
|
2018-03-24 01:20:55 +08:00
|
|
|
* Holds any information related to accessing the raw object content.
|
2017-06-23 02:43:32 +08:00
|
|
|
*/
|
2018-03-24 01:20:55 +08:00
|
|
|
struct raw_object_store *objects;
|
2018-03-03 19:35:57 +08:00
|
|
|
|
2018-05-09 03:37:24 +08:00
|
|
|
/*
|
|
|
|
* All objects in this repository that have been parsed. This structure
|
|
|
|
* owns all objects it references, so users of "struct object *"
|
|
|
|
* generally do not need to free them; instead, when a repository is no
|
|
|
|
* longer used, call parsed_object_pool_clear() on this structure, which
|
|
|
|
* is called by the repositories repo_clear on its desconstruction.
|
|
|
|
*/
|
|
|
|
struct parsed_object_pool *parsed_objects;
|
|
|
|
|
repository: mark the "refs" pointer as private
The "refs" pointer in a struct repository starts life as NULL, but then
is lazily initialized when it is accessed via get_main_ref_store().
However, it's easy for calling code to forget this and access it
directly, leading to code which works _some_ of the time, but fails if
it is called before anybody else accesses the refs.
This was the cause of the bug fixed by 5ff4b920eb (sha1-name: do not
assume that the ref store is initialized, 2020-04-09). In order to
prevent similar bugs, let's more clearly mark the "refs" field as
private.
In addition to helping future code, the name change will help us audit
any existing direct uses. Besides get_main_ref_store() itself, it turns
out there is only one. But we know it's OK as it is on the line directly
after the fix from 5ff4b920eb, which will have initialized the pointer.
However it's still a good idea for it to model the proper use of the
accessing function, so we'll convert it.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2020-04-10 11:04:11 +08:00
|
|
|
/*
|
|
|
|
* The store in which the refs are held. This should generally only be
|
|
|
|
* accessed via get_main_ref_store(), as that will lazily initialize
|
|
|
|
* the ref object.
|
|
|
|
*/
|
|
|
|
struct ref_store *refs_private;
|
2018-04-12 08:21:14 +08:00
|
|
|
|
2024-05-17 16:18:34 +08:00
|
|
|
/*
|
|
|
|
* A strmap of ref_stores, stored by submodule name, accessible via
|
|
|
|
* `repo_get_submodule_ref_store()`.
|
|
|
|
*/
|
|
|
|
struct strmap submodule_ref_stores;
|
|
|
|
|
2024-05-17 16:18:44 +08:00
|
|
|
/*
|
|
|
|
* A strmap of ref_stores, stored by worktree id, accessible via
|
|
|
|
* `get_worktree_ref_store()`.
|
|
|
|
*/
|
|
|
|
struct strmap worktree_ref_stores;
|
|
|
|
|
2018-05-18 06:51:51 +08:00
|
|
|
/*
|
|
|
|
* Contains path to often used file names.
|
|
|
|
*/
|
2022-03-05 02:32:17 +08:00
|
|
|
struct repo_path_cache cached_paths;
|
2018-05-18 06:51:51 +08:00
|
|
|
|
2017-06-23 02:43:32 +08:00
|
|
|
/*
|
|
|
|
* Path to the repository's graft file.
|
|
|
|
* Cannot be NULL after initialization.
|
|
|
|
*/
|
|
|
|
char *graft_file;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Path to the current worktree's index file.
|
|
|
|
* Cannot be NULL after initialization.
|
|
|
|
*/
|
|
|
|
char *index_file;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Path to the working directory.
|
|
|
|
* A NULL value indicates that there is no working directory.
|
|
|
|
*/
|
|
|
|
char *worktree;
|
|
|
|
|
2017-06-23 02:43:47 +08:00
|
|
|
/*
|
|
|
|
* Path from the root of the top-level superproject down to this
|
|
|
|
* repository. This is only non-NULL if the repository is initialized
|
|
|
|
* as a submodule of another repository.
|
|
|
|
*/
|
|
|
|
char *submodule_prefix;
|
|
|
|
|
2019-08-14 02:37:43 +08:00
|
|
|
struct repo_settings settings;
|
|
|
|
|
2017-06-23 02:43:42 +08:00
|
|
|
/* Subsystems */
|
|
|
|
/*
|
|
|
|
* Repository's config which contains key-value pairs from the usual
|
|
|
|
* set of config files (i.e. repo specific .git/config, user wide
|
|
|
|
* ~/.gitconfig, XDG config file and the global /etc/gitconfig)
|
|
|
|
*/
|
|
|
|
struct config_set *config;
|
|
|
|
|
2017-06-23 02:43:44 +08:00
|
|
|
/* Repository's submodule config as defined by '.gitmodules' */
|
|
|
|
struct submodule_cache *submodule_cache;
|
|
|
|
|
2017-06-23 02:43:43 +08:00
|
|
|
/*
|
|
|
|
* Repository's in-memory index.
|
|
|
|
* 'repo_read_index()' can be used to populate 'index'.
|
|
|
|
*/
|
|
|
|
struct index_state *index;
|
|
|
|
|
2021-11-18 08:53:22 +08:00
|
|
|
/* Repository's remotes and associated structures. */
|
|
|
|
struct remote_state *remote_state;
|
|
|
|
|
2017-11-13 05:28:53 +08:00
|
|
|
/* Repository's current hash algorithm, as serialized on disk. */
|
|
|
|
const struct git_hash_algo *hash_algo;
|
|
|
|
|
2023-10-02 10:40:08 +08:00
|
|
|
/* Repository's compatibility hash algorithm. */
|
|
|
|
const struct git_hash_algo *compat_hash_algo;
|
|
|
|
|
2023-12-29 15:26:39 +08:00
|
|
|
/* Repository's reference storage format, as serialized on disk. */
|
2024-06-06 13:29:01 +08:00
|
|
|
enum ref_storage_format ref_storage_format;
|
2023-12-29 15:26:39 +08:00
|
|
|
|
2019-02-23 06:25:01 +08:00
|
|
|
/* A unique-id for tracing purposes. */
|
|
|
|
int trace2_repo_id;
|
|
|
|
|
upload-pack: disable commit graph more gently for shallow traversal
When the client has asked for certain shallow options like
"deepen-since", we do a custom rev-list walk that pretends to be
shallow. Before doing so, we have to disable the commit-graph, since it
is not compatible with the shallow view of the repository. That's
handled by 829a321569 (commit-graph: close_commit_graph before shallow
walk, 2018-08-20). That commit literally closes and frees our
repo->objects->commit_graph struct.
That creates an interesting problem for commits that have _already_ been
parsed using the commit graph. Their commit->object.parsed flag is set,
their commit->graph_pos is set, but their commit->maybe_tree may still
be NULL. When somebody later calls repo_get_commit_tree(), we see that
we haven't loaded the tree oid yet and try to get it from the commit
graph. But since it has been freed, we segfault!
So the root of the issue is a data dependency between the commit's
lazy-load of the tree oid and the fact that the commit graph can go
away mid-process. How can we resolve it?
There are a couple of general approaches:
1. The obvious answer is to avoid loading the tree from the graph when
we see that it's NULL. But then what do we return for the tree oid?
If we return NULL, our caller in do_traverse() will rightly
complain that we have no tree. We'd have to fallback to loading the
actual commit object and re-parsing it. That requires teaching
parse_commit_buffer() to understand re-parsing (i.e., not starting
from a clean slate and not leaking any allocated bits like parent
list pointers).
2. When we close the commit graph, walk through the set of in-memory
objects and clear any graph_pos pointers. But this means we also
have to "unparse" any such commits so that we know they still need
to open the commit object to fill in their trees. So it's no less
complicated than (1), and is more expensive (since we clear objects
we might not later need).
3. Stop freeing the commit-graph struct. Continue to let it be used
for lazy-loads of tree oids, but let upload-pack specify that it
shouldn't be used for further commit parsing.
4. Push the whole shallow rev-list out to its own sub-process, with
the commit-graph disabled from the start, giving it a clean memory
space to work from.
I've chosen (3) here. Options (1) and (2) would work, but are
non-trivial to implement. Option (4) is more expensive, and I'm not sure
how complicated it is (shelling out for the actual rev-list part is
easy, but we do then parse the resulting commits internally, and I'm not
clear which parts need to be handling shallow-ness).
The new test in t5500 triggers this segfault, but see the comments there
for how horribly intimate it has to be with how both upload-pack and
commit graphs work.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2019-09-12 22:44:45 +08:00
|
|
|
/* True if commit-graph has been disabled within this process. */
|
|
|
|
int commit_graph_disabled;
|
|
|
|
|
2021-06-18 01:13:22 +08:00
|
|
|
/* Configurations related to promisor remotes. */
|
|
|
|
char *repository_format_partial_clone;
|
2021-06-18 01:13:23 +08:00
|
|
|
struct promisor_remote_config *promisor_remote_config;
|
2021-06-18 01:13:22 +08:00
|
|
|
|
2017-06-23 02:43:32 +08:00
|
|
|
/* Configurations */
|
2023-05-26 09:33:00 +08:00
|
|
|
int repository_format_worktree_config;
|
2017-06-23 02:43:32 +08:00
|
|
|
|
|
|
|
/* Indicate if a repository has a different 'commondir' from 'gitdir' */
|
|
|
|
unsigned different_commondir:1;
|
|
|
|
};
|
|
|
|
|
global: introduce `USE_THE_REPOSITORY_VARIABLE` macro
Use of the `the_repository` variable is deprecated nowadays, and we
slowly but steadily convert the codebase to not use it anymore. Instead,
callers should be passing down the repository to work on via parameters.
It is hard though to prove that a given code unit does not use this
variable anymore. The most trivial case, merely demonstrating that there
is no direct use of `the_repository`, is already a bit of a pain during
code reviews as the reviewer needs to manually verify claims made by the
patch author. The bigger problem though is that we have many interfaces
that implicitly rely on `the_repository`.
Introduce a new `USE_THE_REPOSITORY_VARIABLE` macro that allows code
units to opt into usage of `the_repository`. The intent of this macro is
to demonstrate that a certain code unit does not use this variable
anymore, and to keep it from new dependencies on it in future changes,
be it explicit or implicit
For now, the macro only guards `the_repository` itself as well as
`the_hash_algo`. There are many more known interfaces where we have an
implicit dependency on `the_repository`, but those are not guarded at
the current point in time. Over time though, we should start to add
guards as required (or even better, just remove them).
Define the macro as required in our code units. As expected, most of our
code still relies on the global variable. Nearly all of our builtins
rely on the variable as there is no way yet to pass `the_repository` to
their entry point. For now, declare the macro in "biultin.h" to keep the
required changes at least a little bit more contained.
Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2024-06-14 14:50:23 +08:00
|
|
|
#ifdef USE_THE_REPOSITORY_VARIABLE
|
2017-06-23 02:43:32 +08:00
|
|
|
extern struct repository *the_repository;
|
global: introduce `USE_THE_REPOSITORY_VARIABLE` macro
Use of the `the_repository` variable is deprecated nowadays, and we
slowly but steadily convert the codebase to not use it anymore. Instead,
callers should be passing down the repository to work on via parameters.
It is hard though to prove that a given code unit does not use this
variable anymore. The most trivial case, merely demonstrating that there
is no direct use of `the_repository`, is already a bit of a pain during
code reviews as the reviewer needs to manually verify claims made by the
patch author. The bigger problem though is that we have many interfaces
that implicitly rely on `the_repository`.
Introduce a new `USE_THE_REPOSITORY_VARIABLE` macro that allows code
units to opt into usage of `the_repository`. The intent of this macro is
to demonstrate that a certain code unit does not use this variable
anymore, and to keep it from new dependencies on it in future changes,
be it explicit or implicit
For now, the macro only guards `the_repository` itself as well as
`the_hash_algo`. There are many more known interfaces where we have an
implicit dependency on `the_repository`, but those are not guarded at
the current point in time. Over time though, we should start to add
guards as required (or even better, just remove them).
Define the macro as required in our code units. As expected, most of our
code still relies on the global variable. Nearly all of our builtins
rely on the variable as there is no way yet to pass `the_repository` to
their entry point. For now, declare the macro in "biultin.h" to keep the
required changes at least a little bit more contained.
Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2024-06-14 14:50:23 +08:00
|
|
|
#endif
|
2017-06-23 02:43:32 +08:00
|
|
|
|
2024-09-12 19:29:24 +08:00
|
|
|
const char *repo_get_git_dir(struct repository *repo);
|
2024-09-12 19:29:27 +08:00
|
|
|
const char *repo_get_common_dir(struct repository *repo);
|
2024-09-12 19:29:30 +08:00
|
|
|
const char *repo_get_object_directory(struct repository *repo);
|
2024-09-12 19:29:32 +08:00
|
|
|
const char *repo_get_index_file(struct repository *repo);
|
2024-09-12 19:29:35 +08:00
|
|
|
const char *repo_get_graft_file(struct repository *repo);
|
2024-09-12 19:29:40 +08:00
|
|
|
const char *repo_get_work_tree(struct repository *repo);
|
2024-09-12 19:29:24 +08:00
|
|
|
|
2018-03-23 23:55:23 +08:00
|
|
|
/*
|
|
|
|
* Define a custom repository layout. Any field can be NULL, which
|
|
|
|
* will default back to the path according to the default layout.
|
|
|
|
*/
|
2018-03-03 19:35:55 +08:00
|
|
|
struct set_gitdir_args {
|
|
|
|
const char *commondir;
|
|
|
|
const char *object_dir;
|
|
|
|
const char *graft_file;
|
|
|
|
const char *index_file;
|
2018-03-03 19:35:57 +08:00
|
|
|
const char *alternate_db;
|
2021-12-07 06:05:05 +08:00
|
|
|
int disable_ref_updates;
|
2018-03-03 19:35:55 +08:00
|
|
|
};
|
|
|
|
|
2018-06-30 17:20:29 +08:00
|
|
|
void repo_set_gitdir(struct repository *repo, const char *root,
|
|
|
|
const struct set_gitdir_args *extra_args);
|
|
|
|
void repo_set_worktree(struct repository *repo, const char *path);
|
|
|
|
void repo_set_hash_algo(struct repository *repo, int algo);
|
2023-10-02 10:40:08 +08:00
|
|
|
void repo_set_compat_hash_algo(struct repository *repo, int compat_algo);
|
2024-06-06 13:29:01 +08:00
|
|
|
void repo_set_ref_storage_format(struct repository *repo,
|
|
|
|
enum ref_storage_format format);
|
2024-04-18 20:14:33 +08:00
|
|
|
void initialize_repository(struct repository *repo);
|
2022-09-01 07:18:12 +08:00
|
|
|
RESULT_MUST_BE_USED
|
2018-06-30 17:20:29 +08:00
|
|
|
int repo_init(struct repository *r, const char *gitdir, const char *worktree);
|
2018-11-29 08:27:53 +08:00
|
|
|
|
|
|
|
/*
|
2021-09-10 02:47:28 +08:00
|
|
|
* Initialize the repository 'subrepo' as the submodule at the given path. If
|
|
|
|
* the submodule's gitdir cannot be found at <path>/.git, this function calls
|
|
|
|
* submodule_from_path() to try to find it. treeish_name is only used if
|
|
|
|
* submodule_from_path() needs to be called; see its documentation for more
|
|
|
|
* information.
|
|
|
|
* Return 0 upon success and a non-zero value upon failure.
|
2018-11-29 08:27:53 +08:00
|
|
|
*/
|
2021-09-10 02:47:28 +08:00
|
|
|
struct object_id;
|
2022-09-01 07:18:12 +08:00
|
|
|
RESULT_MUST_BE_USED
|
2018-11-29 08:27:53 +08:00
|
|
|
int repo_submodule_init(struct repository *subrepo,
|
2018-06-30 17:20:29 +08:00
|
|
|
struct repository *superproject,
|
2021-09-10 02:47:28 +08:00
|
|
|
const char *path,
|
|
|
|
const struct object_id *treeish_name);
|
2018-06-30 17:20:29 +08:00
|
|
|
void repo_clear(struct repository *repo);
|
2017-06-23 02:43:32 +08:00
|
|
|
|
2017-07-19 03:05:18 +08:00
|
|
|
/*
|
|
|
|
* Populates the repository's index from its index_file, an index struct will
|
|
|
|
* be allocated if needed.
|
|
|
|
*
|
|
|
|
* Return the number of index entries in the populated index or a value less
|
2019-11-06 01:07:23 +08:00
|
|
|
* than zero if an error occurred. If the repository's index has already been
|
2017-07-19 03:05:18 +08:00
|
|
|
* populated then the number of entries will simply be returned.
|
|
|
|
*/
|
2018-06-30 17:20:29 +08:00
|
|
|
int repo_read_index(struct repository *repo);
|
2019-01-12 10:13:24 +08:00
|
|
|
int repo_hold_locked_index(struct repository *repo,
|
|
|
|
struct lock_file *lf,
|
|
|
|
int flags);
|
2017-06-23 02:43:43 +08:00
|
|
|
|
2019-01-12 10:13:26 +08:00
|
|
|
int repo_read_index_unmerged(struct repository *);
|
2019-01-12 10:13:27 +08:00
|
|
|
/*
|
|
|
|
* Opportunistically update the index but do not complain if we can't.
|
|
|
|
* The lockfile is always committed or rolled back.
|
|
|
|
*/
|
|
|
|
void repo_update_index_if_able(struct repository *, struct lock_file *);
|
|
|
|
|
2020-06-05 17:10:01 +08:00
|
|
|
/*
|
|
|
|
* Return 1 if upgrade repository format to target_version succeeded,
|
|
|
|
* 0 if no upgrade is necessary, and -1 when upgrade is not possible.
|
|
|
|
*/
|
|
|
|
int upgrade_repository_format(int target_version);
|
|
|
|
|
2017-06-23 02:43:32 +08:00
|
|
|
#endif /* REPOSITORY_H */
|