2017-06-23 02:43:32 +08:00
|
|
|
#ifndef REPOSITORY_H
|
|
|
|
#define REPOSITORY_H
|
|
|
|
|
2024-05-17 16:18:34 +08:00
|
|
|
#include "strmap.h"
|
|
|
|
|
2017-06-23 02:43:42 +08:00
|
|
|
struct config_set;
|
2022-03-26 02:02:46 +08:00
|
|
|
struct fsmonitor_settings;
|
2018-03-24 01:20:55 +08:00
|
|
|
struct git_hash_algo;
|
2017-06-23 02:43:43 +08:00
|
|
|
struct index_state;
|
2019-01-12 10:13:24 +08:00
|
|
|
struct lock_file;
|
2019-01-12 10:13:26 +08:00
|
|
|
struct pathspec;
|
2018-03-24 01:20:55 +08:00
|
|
|
struct raw_object_store;
|
2017-06-23 02:43:44 +08:00
|
|
|
struct submodule_cache;
|
2021-06-18 01:13:23 +08:00
|
|
|
struct promisor_remote_config;
|
2021-11-18 08:53:22 +08:00
|
|
|
struct remote_state;
|
2017-06-23 02:43:42 +08:00
|
|
|
|
2019-08-14 02:37:46 +08:00
|
|
|
enum untracked_cache_setting {
|
repo-settings.c: simplify the setup
Simplify the setup code in repo-settings.c in various ways, making the
code shorter, easier to read, and requiring fewer hacks to do the same
thing as it did before:
Since 7211b9e7534 (repo-settings: consolidate some config settings,
2019-08-13) we have memset() the whole "settings" structure to -1 in
prepare_repo_settings(), and subsequently relied on the -1 value.
Most of the fields did not need to be initialized to -1, and because
we were doing that we had the enum labels "UNTRACKED_CACHE_UNSET" and
"FETCH_NEGOTIATION_UNSET" purely to reflect the resulting state
created this memset() in prepare_repo_settings(). No other code used
or relied on them, more on that below.
For the rest most of the subsequent "are we -1, then read xyz" can
simply be removed by re-arranging what we read first. E.g. when
setting the "index.version" setting we should have first read
"feature.experimental", so that it (and "feature.manyfiles") can
provide a default for our "index.version".
Instead the code setting it, added when "feature.manyFiles"[1] was
created, was using the UPDATE_DEFAULT_BOOL() macro added in an earlier
commit[2]. That macro is now gone, since it was only needed for this
pattern of reading things in the wrong order.
This also fixes an (admittedly obscure) logic error where we'd
conflate an explicit "-1" value in the config with our own earlier
memset() -1.
We can also remove the UPDATE_DEFAULT_BOOL() wrapper added in
[3]. Using it is redundant to simply using the return value from
repo_config_get_bool(), which is non-zero if the provided key exists
in the config.
Details on edge cases relating to the memset() to -1, continued from
"more on that below" above:
* UNTRACKED_CACHE_KEEP:
In [4] the "unset" and "keep" handling for core.untrackedCache was
consolidated. But it while we understand the "keep" value, we don't
handle it differently than the case of any other unknown value.
So let's retain UNTRACKED_CACHE_KEEP and remove the
UNTRACKED_CACHE_UNSET setting (which was always implicitly
UNTRACKED_CACHE_KEEP before). We don't need to inform any code
after prepare_repo_settings() that the setting was "unset", as far
as anyone else is concerned it's core.untrackedCache=keep. if
"core.untrackedcache" isn't present in the config.
* FETCH_NEGOTIATION_UNSET & FETCH_NEGOTIATION_NONE:
Since these two two enum fields added in [5] don't rely on the
memzero() setting them to "-1" anymore we don't have to provide
them with explicit values.
1. c6cc4c5afd2 (repo-settings: create feature.manyFiles setting,
2019-08-13)
2. 31b1de6a09b (commit-graph: turn on commit-graph by default,
2019-08-13)
3. 31b1de6a09b (commit-graph: turn on commit-graph by default,
2019-08-13)
4. ad0fb659993 (repo-settings: parse core.untrackedCache,
2019-08-13)
5. aaf633c2ad1 (repo-settings: create feature.experimental setting,
2019-08-13)
Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2021-09-21 21:13:02 +08:00
|
|
|
UNTRACKED_CACHE_KEEP,
|
|
|
|
UNTRACKED_CACHE_REMOVE,
|
|
|
|
UNTRACKED_CACHE_WRITE,
|
2019-08-14 02:37:46 +08:00
|
|
|
};
|
|
|
|
|
2019-08-14 02:37:48 +08:00
|
|
|
enum fetch_negotiation_setting {
|
2022-02-02 11:42:40 +08:00
|
|
|
FETCH_NEGOTIATION_CONSECUTIVE,
|
repo-settings.c: simplify the setup
Simplify the setup code in repo-settings.c in various ways, making the
code shorter, easier to read, and requiring fewer hacks to do the same
thing as it did before:
Since 7211b9e7534 (repo-settings: consolidate some config settings,
2019-08-13) we have memset() the whole "settings" structure to -1 in
prepare_repo_settings(), and subsequently relied on the -1 value.
Most of the fields did not need to be initialized to -1, and because
we were doing that we had the enum labels "UNTRACKED_CACHE_UNSET" and
"FETCH_NEGOTIATION_UNSET" purely to reflect the resulting state
created this memset() in prepare_repo_settings(). No other code used
or relied on them, more on that below.
For the rest most of the subsequent "are we -1, then read xyz" can
simply be removed by re-arranging what we read first. E.g. when
setting the "index.version" setting we should have first read
"feature.experimental", so that it (and "feature.manyfiles") can
provide a default for our "index.version".
Instead the code setting it, added when "feature.manyFiles"[1] was
created, was using the UPDATE_DEFAULT_BOOL() macro added in an earlier
commit[2]. That macro is now gone, since it was only needed for this
pattern of reading things in the wrong order.
This also fixes an (admittedly obscure) logic error where we'd
conflate an explicit "-1" value in the config with our own earlier
memset() -1.
We can also remove the UPDATE_DEFAULT_BOOL() wrapper added in
[3]. Using it is redundant to simply using the return value from
repo_config_get_bool(), which is non-zero if the provided key exists
in the config.
Details on edge cases relating to the memset() to -1, continued from
"more on that below" above:
* UNTRACKED_CACHE_KEEP:
In [4] the "unset" and "keep" handling for core.untrackedCache was
consolidated. But it while we understand the "keep" value, we don't
handle it differently than the case of any other unknown value.
So let's retain UNTRACKED_CACHE_KEEP and remove the
UNTRACKED_CACHE_UNSET setting (which was always implicitly
UNTRACKED_CACHE_KEEP before). We don't need to inform any code
after prepare_repo_settings() that the setting was "unset", as far
as anyone else is concerned it's core.untrackedCache=keep. if
"core.untrackedcache" isn't present in the config.
* FETCH_NEGOTIATION_UNSET & FETCH_NEGOTIATION_NONE:
Since these two two enum fields added in [5] don't rely on the
memzero() setting them to "-1" anymore we don't have to provide
them with explicit values.
1. c6cc4c5afd2 (repo-settings: create feature.manyFiles setting,
2019-08-13)
2. 31b1de6a09b (commit-graph: turn on commit-graph by default,
2019-08-13)
3. 31b1de6a09b (commit-graph: turn on commit-graph by default,
2019-08-13)
4. ad0fb659993 (repo-settings: parse core.untrackedCache,
2019-08-13)
5. aaf633c2ad1 (repo-settings: create feature.experimental setting,
2019-08-13)
Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2021-09-21 21:13:02 +08:00
|
|
|
FETCH_NEGOTIATION_SKIPPING,
|
|
|
|
FETCH_NEGOTIATION_NOOP,
|
2019-08-14 02:37:48 +08:00
|
|
|
};
|
|
|
|
|
2024-06-14 14:50:28 +08:00
|
|
|
enum ref_storage_format {
|
|
|
|
REF_STORAGE_FORMAT_UNKNOWN,
|
|
|
|
REF_STORAGE_FORMAT_FILES,
|
|
|
|
REF_STORAGE_FORMAT_REFTABLE,
|
|
|
|
};
|
|
|
|
|
2019-08-14 02:37:43 +08:00
|
|
|
struct repo_settings {
|
|
|
|
int initialized;
|
|
|
|
|
|
|
|
int core_commit_graph;
|
commit-graph: pass repo_settings instead of repository
The parse_commit_graph() function takes a 'struct repository *' pointer,
but it only ever accesses config settings (either directly or through
the .settings field of the repo struct). Move all relevant config
settings into the repo_settings struct, and update parse_commit_graph()
and its existing callers so that it takes 'struct repo_settings *'
instead.
Callers of parse_commit_graph() will now need to call
prepare_repo_settings() themselves, or initialize a 'struct
repo_settings' directly.
Prior to ab14d0676c (commit-graph: pass a 'struct repository *' in more
places, 2020-09-09), parsing a commit-graph was a pure function
depending only on the contents of the commit-graph itself. Commit
ab14d0676c introduced a dependency on a `struct repository` pointer, and
later commits such as b66d84756f (commit-graph: respect
'commitGraph.readChangedPaths', 2020-09-09) added dependencies on config
settings, which were accessed through the `settings` field of the
repository pointer. This field was initialized via a call to
`prepare_repo_settings()`.
Additionally, this fixes an issue in fuzz-commit-graph: In 44c7e62
(2021-12-06, repo-settings:prepare_repo_settings only in git repos),
prepare_repo_settings was changed to issue a BUG() if it is called by a
process whose CWD is not a Git repository.
The combination of commits mentioned above broke fuzz-commit-graph,
which attempts to parse arbitrary fuzzing-engine-provided bytes as a
commit graph file. Prior to this change, parse_commit_graph() called
prepare_repo_settings(), but since we run the fuzz tests without a valid
repository, we are hitting the BUG() from 44c7e62 for every test case.
Signed-off-by: Taylor Blau <me@ttaylorr.com>
Signed-off-by: Josh Steadmon <steadmon@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2022-07-15 05:43:06 +08:00
|
|
|
int commit_graph_generation_version;
|
2024-06-26 01:39:50 +08:00
|
|
|
int commit_graph_changed_paths_version;
|
2019-08-14 02:37:43 +08:00
|
|
|
int gc_write_commit_graph;
|
2019-09-03 10:22:02 +08:00
|
|
|
int fetch_write_commit_graph;
|
repository.h: don't use a mix of int and bitfields
Change the bitfield added in 58300f47432 (sparse-index: add
index.sparse config option, 2021-03-30) and 3964fc2aae7 (sparse-index:
add guard to ensure full index, 2021-03-30) to just use an "int"
boolean instead.
It might be smart to optimize the space here in the future, but by
consistently using an "int" we can take its address and pass it to
repo_cfg_bool(), and therefore don't need to handle "sparse_index" as
a special-case when reading the "index.sparse" setting.
There's no corresponding config for "command_requires_full_index", but
let's change it too for consistency and to prevent future bugs
creeping in due to one of these being "unsigned".
Using "int" consistently also prevents subtle bugs or undesired
control flow creeping in here. Before the preceding commit the
initialization of "command_requires_full_index" in
prepare_repo_settings() did nothing, i.e. this:
r->settings.command_requires_full_index = 1
Was redundant to the earlier memset() to -1. Likewise for
"sparse_index" added in 58300f47432 (sparse-index: add index.sparse
config option, 2021-03-30) the code and comment added there was
misleading, we weren't initializing it to off, but re-initializing it
from "1" to "0", and then finally checking the config, and perhaps
setting it to "1" again. I.e. we could have applied this patch before
the preceding commit:
+ assert(r->settings.command_requires_full_index == 1);
r->settings.command_requires_full_index = 1;
/*
* Initialize this as off.
*/
+ assert(r->settings.sparse_index == 1);
r->settings.sparse_index = 0;
if (!repo_config_get_bool(r, "index.sparse", &value) && value)
r->settings.sparse_index = 1;
Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2021-09-21 21:13:03 +08:00
|
|
|
int command_requires_full_index;
|
|
|
|
int sparse_index;
|
pack-revindex: introduce `pack.readReverseIndex`
Since 1615c567b8 (Documentation/config/pack.txt: advertise
'pack.writeReverseIndex', 2021-01-25), we have had the
`pack.writeReverseIndex` configuration option, which tells Git whether
or not it is allowed to write a ".rev" file when indexing a pack.
Introduce a complementary configuration knob, `pack.readReverseIndex` to
control whether or not Git will read any ".rev" file(s) that may be
available on disk.
This option is useful for debugging, as well as disabling the effect of
".rev" files in certain instances.
This is useful because of the trade-off[^1] between the time it takes to
generate a reverse index (slow from scratch, fast when reading an
existing ".rev" file), and the time it takes to access a record (the
opposite).
For example, even though it is faster to use the on-disk reverse index
when computing the on-disk size of a packed object, it is slower to
enumerate the same value for all objects.
Here are a couple of examples from linux.git. When computing the above
for a single object, using the on-disk reverse index is significantly
faster:
$ git rev-parse HEAD >in
$ hyperfine -L v false,true 'git.compile -c pack.readReverseIndex={v} cat-file --batch-check="%(objectsize:disk)" <in'
Benchmark 1: git.compile -c pack.readReverseIndex=false cat-file --batch-check="%(objectsize:disk)" <in
Time (mean ± σ): 302.5 ms ± 12.5 ms [User: 258.7 ms, System: 43.6 ms]
Range (min … max): 291.1 ms … 328.1 ms 10 runs
Benchmark 2: git.compile -c pack.readReverseIndex=true cat-file --batch-check="%(objectsize:disk)" <in
Time (mean ± σ): 3.9 ms ± 0.3 ms [User: 1.6 ms, System: 2.4 ms]
Range (min … max): 2.0 ms … 4.4 ms 801 runs
Summary
'git.compile -c pack.readReverseIndex=true cat-file --batch-check="%(objectsize:disk)" <in' ran
77.29 ± 7.14 times faster than 'git.compile -c pack.readReverseIndex=false cat-file --batch-check="%(objectsize:disk)" <in'
, but when instead trying to compute the on-disk object size for all
objects in the repository, using the ".rev" file is a disadvantage over
creating the reverse index from scratch:
$ hyperfine -L v false,true 'git.compile -c pack.readReverseIndex={v} cat-file --batch-check="%(objectsize:disk)" --batch-all-objects'
Benchmark 1: git.compile -c pack.readReverseIndex=false cat-file --batch-check="%(objectsize:disk)" --batch-all-objects
Time (mean ± σ): 8.258 s ± 0.035 s [User: 7.949 s, System: 0.308 s]
Range (min … max): 8.199 s … 8.293 s 10 runs
Benchmark 2: git.compile -c pack.readReverseIndex=true cat-file --batch-check="%(objectsize:disk)" --batch-all-objects
Time (mean ± σ): 16.976 s ± 0.107 s [User: 16.706 s, System: 0.268 s]
Range (min … max): 16.839 s … 17.105 s 10 runs
Summary
'git.compile -c pack.readReverseIndex=false cat-file --batch-check="%(objectsize:disk)" --batch-all-objects' ran
2.06 ± 0.02 times faster than 'git.compile -c pack.readReverseIndex=true cat-file --batch-check="%(objectsize:disk)" --batch-all-objects'
Luckily, the results when running `git cat-file` with `--unordered` are
closer together:
$ hyperfine -L v false,true 'git.compile -c pack.readReverseIndex={v} cat-file --unordered --batch-check="%(objectsize:disk)" --batch-all-objects'
Benchmark 1: git.compile -c pack.readReverseIndex=false cat-file --unordered --batch-check="%(objectsize:disk)" --batch-all-objects
Time (mean ± σ): 5.066 s ± 0.105 s [User: 4.792 s, System: 0.274 s]
Range (min … max): 4.943 s … 5.220 s 10 runs
Benchmark 2: git.compile -c pack.readReverseIndex=true cat-file --unordered --batch-check="%(objectsize:disk)" --batch-all-objects
Time (mean ± σ): 6.193 s ± 0.069 s [User: 5.937 s, System: 0.255 s]
Range (min … max): 6.145 s … 6.356 s 10 runs
Summary
'git.compile -c pack.readReverseIndex=false cat-file --unordered --batch-check="%(objectsize:disk)" --batch-all-objects' ran
1.22 ± 0.03 times faster than 'git.compile -c pack.readReverseIndex=true cat-file --unordered --batch-check="%(objectsize:disk)" --batch-all-objects'
Because the equilibrium point between these two is highly machine- and
repository-dependent, allow users to configure whether or not they will
read any ".rev" file(s) with this configuration knob.
[^1]: Generating a reverse index in memory takes O(N) time (where N is
the number of objects in the repository), since we use a radix sort.
Reading an entry from an on-disk ".rev" file is slower since each
operation is bound by disk I/O instead of memory I/O.
In order to compute the on-disk size of a packed object, we need to
find the offset of our object, and the adjacent object (the on-disk
size difference of these two). Finding the first offset requires a
binary search. Finding the latter involves a single .rev lookup.
Signed-off-by: Taylor Blau <me@ttaylorr.com>
Acked-by: Derrick Stolee <derrickstolee@github.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2023-04-13 06:20:30 +08:00
|
|
|
int pack_read_reverse_index;
|
pack-bitmap.c: use commit boundary during bitmap traversal
When reachability bitmap coverage exists in a repository, Git will use a
different (and hopefully faster) traversal to compute revision walks.
Consider a set of positive and negative tips (which we'll refer to with
their standard bitmap parlance by "wants", and "haves"). In order to
figure out what objects exist between the tips, the existing traversal
in `prepare_bitmap_walk()` does something like:
1. Consider if we can even compute the set of objects with bitmaps,
and fall back to the usual traversal if we cannot. For example,
pathspec limiting traversals can't be computed using bitmaps (since
they don't know which objects are at which paths). The same is true
of certain kinds of non-trivial object filters.
2. If we can compute the traversal with bitmaps, partition the
(dereferenced) tips into two object lists, "haves", and "wants",
based on whether or not the objects have the UNINTERESTING flag,
respectively.
3. Fall back to the ordinary object traversal if either (a) there are
more than zero haves, none of which are in the bitmapped pack or
MIDX, or (b) there are no wants.
4. Construct a reachability bitmap for the "haves" side by walking
from the revision tips down to any existing bitmaps, OR-ing in any
bitmaps as they are found.
5. Then do the same for the "wants" side, stopping at any objects that
appear in the "haves" bitmap.
6. Filter the results if any object filter (that can be easily
computed with bitmaps alone) was given, and then return back to the
caller.
When there is good bitmap coverage relative to the traversal tips, this
walk is often significantly faster than an ordinary object traversal
because it can visit far fewer objects.
But in certain cases, it can be significantly *slower* than the usual
object traversal. Why? Because we need to compute complete bitmaps on
either side of the walk. If either one (or both) of the sides require
walking many (or all!) objects before they get to an existing bitmap,
the extra bitmap machinery is mostly or all overhead.
One of the benefits, however, is that even if the walk is slower, bitmap
traversals are guaranteed to provide an *exact* answer. Unlike the
traditional object traversal algorithm, which can over-count the results
by not opening trees for older commits, the bitmap walk builds an exact
reachability bitmap for either side, meaning the results are never
over-counted.
But producing non-exact results is OK for our traversal here (both in
the bitmap case and not), as long as the results are over-counted, not
under.
Relaxing the bitmap traversal to allow it to produce over-counted
results gives us the opportunity to make some significant improvements.
Instead of the above, the new algorithm only has to walk from the
*boundary* down to the nearest bitmap, instead of from each of the
UNINTERESTING tips.
The boundary-based approach still has degenerate cases, but we'll show
in a moment that it is often a significant improvement.
The new algorithm works as follows:
1. Build a (partial) bitmap of the haves side by first OR-ing any
bitmap(s) that already exist for UNINTERESTING commits between the
haves and the boundary.
2. For each commit along the boundary, add it as a fill-in traversal
tip (where the traversal terminates once an existing bitmap is
found), and perform fill-in traversal.
3. Build up a complete bitmap of the wants side as usual, stopping any
time we intersect the (partial) haves side.
4. Return the results.
And is more-or-less equivalent to using the *old* algorithm with this
invocation:
$ git rev-list --objects --use-bitmap-index $WANTS --not \
$(git rev-list --objects --boundary $WANTS --not $HAVES |
perl -lne 'print $1 if /^-(.*)/')
The new result performs significantly better in many cases, particularly
when the distance from the boundary commit(s) to an existing bitmap is
shorter than the distance from (all of) the have tips to the nearest
bitmapped commit.
Note that when using the old bitmap traversal algorithm, the results can
be *slower* than without bitmaps! Under the new algorithm, the result is
computed faster with bitmaps than without (at the cost of over-counting
the true number of objects in a similar fashion as the non-bitmap
traversal):
# (Computing the number of tagged objects not on any branches
# without bitmaps).
$ time git rev-list --count --objects --tags --not --branches
20
real 0m1.388s
user 0m1.092s
sys 0m0.296s
# (Computing the same query using the old bitmap traversal).
$ time git rev-list --count --objects --tags --not --branches --use-bitmap-index
19
real 0m22.709s
user 0m21.628s
sys 0m1.076s
# (this commit)
$ time git.compile rev-list --count --objects --tags --not --branches --use-bitmap-index
19
real 0m1.518s
user 0m1.234s
sys 0m0.284s
The new algorithm is still slower than not using bitmaps at all, but it
is nearly a 15-fold improvement over the existing traversal.
In a more realistic setting (using my local copy of git.git), I can
observe a similar (if more modest) speed-up:
$ argv="--count --objects --branches --not --tags"
hyperfine \
-n 'no bitmaps' "git.compile rev-list $argv" \
-n 'existing traversal' "git.compile rev-list --use-bitmap-index $argv" \
-n 'boundary traversal' "git.compile -c pack.useBitmapBoundaryTraversal=true rev-list --use-bitmap-index $argv"
Benchmark 1: no bitmaps
Time (mean ± σ): 124.6 ms ± 2.1 ms [User: 103.7 ms, System: 20.8 ms]
Range (min … max): 122.6 ms … 133.1 ms 22 runs
Benchmark 2: existing traversal
Time (mean ± σ): 368.6 ms ± 3.0 ms [User: 325.3 ms, System: 43.1 ms]
Range (min … max): 365.1 ms … 374.8 ms 10 runs
Benchmark 3: boundary traversal
Time (mean ± σ): 167.6 ms ± 0.9 ms [User: 139.5 ms, System: 27.9 ms]
Range (min … max): 166.1 ms … 169.2 ms 17 runs
Summary
'no bitmaps' ran
1.34 ± 0.02 times faster than 'boundary traversal'
2.96 ± 0.05 times faster than 'existing traversal'
Here, the new algorithm is also still slower than not using bitmaps, but
represents a more than 2-fold improvement over the existing traversal in
a more modest example.
Since this algorithm was originally written (nearly a year and a half
ago, at the time of writing), the bitmap lookup table shipped, making
the new algorithm's result more competitive. A few other future
directions for improving bitmap traversal times beyond not using bitmaps
at all:
- Decrease the cost to decompress and OR together many bitmaps
together (particularly when enumerating the uninteresting side of
the walk). Here we could explore more efficient bitmap storage
techniques, like Roaring+Run and/or use SIMD instructions to speed
up ORing them together.
- Store pseudo-merge bitmaps, which could allow us to OR together
fewer "summary" bitmaps (which would also help with the above).
Helped-by: Jeff King <peff@peff.net>
Helped-by: Derrick Stolee <derrickstolee@github.com>
Signed-off-by: Taylor Blau <me@ttaylorr.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2023-05-09 01:38:12 +08:00
|
|
|
int pack_use_bitmap_boundary_traversal;
|
2024-02-06 06:50:23 +08:00
|
|
|
int pack_use_multi_pack_reuse;
|
2019-08-14 02:37:43 +08:00
|
|
|
|
2023-06-06 21:24:37 +08:00
|
|
|
/*
|
|
|
|
* Does this repository have core.useReplaceRefs=true (on by
|
|
|
|
* default)? This provides a repository-scoped version of this
|
|
|
|
* config, though it could be disabled process-wide via some Git
|
|
|
|
* builtins or the --no-replace-objects option. See
|
|
|
|
* replace_refs_enabled() for more details.
|
|
|
|
*/
|
|
|
|
int read_replace_refs;
|
|
|
|
|
2022-03-26 02:02:46 +08:00
|
|
|
struct fsmonitor_settings *fsmonitor; /* lazily loaded */
|
|
|
|
|
2019-08-14 02:37:43 +08:00
|
|
|
int index_version;
|
2023-01-07 00:31:56 +08:00
|
|
|
int index_skip_hash;
|
2019-08-14 02:37:46 +08:00
|
|
|
enum untracked_cache_setting core_untracked_cache;
|
2019-08-14 02:37:43 +08:00
|
|
|
|
|
|
|
int pack_use_sparse;
|
2019-08-14 02:37:48 +08:00
|
|
|
enum fetch_negotiation_setting fetch_negotiation_algorithm;
|
2020-09-25 20:33:34 +08:00
|
|
|
|
|
|
|
int core_multi_pack_index;
|
2019-08-14 02:37:43 +08:00
|
|
|
};
|
|
|
|
|
2022-03-05 02:32:17 +08:00
|
|
|
struct repo_path_cache {
|
|
|
|
char *squash_msg;
|
|
|
|
char *merge_msg;
|
|
|
|
char *merge_rr;
|
|
|
|
char *merge_mode;
|
|
|
|
char *merge_head;
|
|
|
|
char *fetch_head;
|
|
|
|
char *shallow;
|
|
|
|
};
|
|
|
|
|
2017-06-23 02:43:32 +08:00
|
|
|
struct repository {
|
|
|
|
/* Environment */
|
|
|
|
/*
|
|
|
|
* Path to the git directory.
|
|
|
|
* Cannot be NULL after initialization.
|
|
|
|
*/
|
|
|
|
char *gitdir;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Path to the common git directory.
|
|
|
|
* Cannot be NULL after initialization.
|
|
|
|
*/
|
|
|
|
char *commondir;
|
|
|
|
|
|
|
|
/*
|
2018-03-24 01:20:55 +08:00
|
|
|
* Holds any information related to accessing the raw object content.
|
2017-06-23 02:43:32 +08:00
|
|
|
*/
|
2018-03-24 01:20:55 +08:00
|
|
|
struct raw_object_store *objects;
|
2018-03-03 19:35:57 +08:00
|
|
|
|
2018-05-09 03:37:24 +08:00
|
|
|
/*
|
|
|
|
* All objects in this repository that have been parsed. This structure
|
|
|
|
* owns all objects it references, so users of "struct object *"
|
|
|
|
* generally do not need to free them; instead, when a repository is no
|
|
|
|
* longer used, call parsed_object_pool_clear() on this structure, which
|
|
|
|
* is called by the repositories repo_clear on its desconstruction.
|
|
|
|
*/
|
|
|
|
struct parsed_object_pool *parsed_objects;
|
|
|
|
|
repository: mark the "refs" pointer as private
The "refs" pointer in a struct repository starts life as NULL, but then
is lazily initialized when it is accessed via get_main_ref_store().
However, it's easy for calling code to forget this and access it
directly, leading to code which works _some_ of the time, but fails if
it is called before anybody else accesses the refs.
This was the cause of the bug fixed by 5ff4b920eb (sha1-name: do not
assume that the ref store is initialized, 2020-04-09). In order to
prevent similar bugs, let's more clearly mark the "refs" field as
private.
In addition to helping future code, the name change will help us audit
any existing direct uses. Besides get_main_ref_store() itself, it turns
out there is only one. But we know it's OK as it is on the line directly
after the fix from 5ff4b920eb, which will have initialized the pointer.
However it's still a good idea for it to model the proper use of the
accessing function, so we'll convert it.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2020-04-10 11:04:11 +08:00
|
|
|
/*
|
|
|
|
* The store in which the refs are held. This should generally only be
|
|
|
|
* accessed via get_main_ref_store(), as that will lazily initialize
|
|
|
|
* the ref object.
|
|
|
|
*/
|
|
|
|
struct ref_store *refs_private;
|
2018-04-12 08:21:14 +08:00
|
|
|
|
2024-05-17 16:18:34 +08:00
|
|
|
/*
|
|
|
|
* A strmap of ref_stores, stored by submodule name, accessible via
|
|
|
|
* `repo_get_submodule_ref_store()`.
|
|
|
|
*/
|
|
|
|
struct strmap submodule_ref_stores;
|
|
|
|
|
2024-05-17 16:18:44 +08:00
|
|
|
/*
|
|
|
|
* A strmap of ref_stores, stored by worktree id, accessible via
|
|
|
|
* `get_worktree_ref_store()`.
|
|
|
|
*/
|
|
|
|
struct strmap worktree_ref_stores;
|
|
|
|
|
2018-05-18 06:51:51 +08:00
|
|
|
/*
|
|
|
|
* Contains path to often used file names.
|
|
|
|
*/
|
2022-03-05 02:32:17 +08:00
|
|
|
struct repo_path_cache cached_paths;
|
2018-05-18 06:51:51 +08:00
|
|
|
|
2017-06-23 02:43:32 +08:00
|
|
|
/*
|
|
|
|
* Path to the repository's graft file.
|
|
|
|
* Cannot be NULL after initialization.
|
|
|
|
*/
|
|
|
|
char *graft_file;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Path to the current worktree's index file.
|
|
|
|
* Cannot be NULL after initialization.
|
|
|
|
*/
|
|
|
|
char *index_file;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Path to the working directory.
|
|
|
|
* A NULL value indicates that there is no working directory.
|
|
|
|
*/
|
|
|
|
char *worktree;
|
|
|
|
|
2017-06-23 02:43:47 +08:00
|
|
|
/*
|
|
|
|
* Path from the root of the top-level superproject down to this
|
|
|
|
* repository. This is only non-NULL if the repository is initialized
|
|
|
|
* as a submodule of another repository.
|
|
|
|
*/
|
|
|
|
char *submodule_prefix;
|
|
|
|
|
2019-08-14 02:37:43 +08:00
|
|
|
struct repo_settings settings;
|
|
|
|
|
2017-06-23 02:43:42 +08:00
|
|
|
/* Subsystems */
|
|
|
|
/*
|
|
|
|
* Repository's config which contains key-value pairs from the usual
|
|
|
|
* set of config files (i.e. repo specific .git/config, user wide
|
|
|
|
* ~/.gitconfig, XDG config file and the global /etc/gitconfig)
|
|
|
|
*/
|
|
|
|
struct config_set *config;
|
|
|
|
|
2017-06-23 02:43:44 +08:00
|
|
|
/* Repository's submodule config as defined by '.gitmodules' */
|
|
|
|
struct submodule_cache *submodule_cache;
|
|
|
|
|
2017-06-23 02:43:43 +08:00
|
|
|
/*
|
|
|
|
* Repository's in-memory index.
|
|
|
|
* 'repo_read_index()' can be used to populate 'index'.
|
|
|
|
*/
|
|
|
|
struct index_state *index;
|
|
|
|
|
2021-11-18 08:53:22 +08:00
|
|
|
/* Repository's remotes and associated structures. */
|
|
|
|
struct remote_state *remote_state;
|
|
|
|
|
2017-11-13 05:28:53 +08:00
|
|
|
/* Repository's current hash algorithm, as serialized on disk. */
|
|
|
|
const struct git_hash_algo *hash_algo;
|
|
|
|
|
2023-10-02 10:40:08 +08:00
|
|
|
/* Repository's compatibility hash algorithm. */
|
|
|
|
const struct git_hash_algo *compat_hash_algo;
|
|
|
|
|
2023-12-29 15:26:39 +08:00
|
|
|
/* Repository's reference storage format, as serialized on disk. */
|
2024-06-06 13:29:01 +08:00
|
|
|
enum ref_storage_format ref_storage_format;
|
2023-12-29 15:26:39 +08:00
|
|
|
|
2019-02-23 06:25:01 +08:00
|
|
|
/* A unique-id for tracing purposes. */
|
|
|
|
int trace2_repo_id;
|
|
|
|
|
upload-pack: disable commit graph more gently for shallow traversal
When the client has asked for certain shallow options like
"deepen-since", we do a custom rev-list walk that pretends to be
shallow. Before doing so, we have to disable the commit-graph, since it
is not compatible with the shallow view of the repository. That's
handled by 829a321569 (commit-graph: close_commit_graph before shallow
walk, 2018-08-20). That commit literally closes and frees our
repo->objects->commit_graph struct.
That creates an interesting problem for commits that have _already_ been
parsed using the commit graph. Their commit->object.parsed flag is set,
their commit->graph_pos is set, but their commit->maybe_tree may still
be NULL. When somebody later calls repo_get_commit_tree(), we see that
we haven't loaded the tree oid yet and try to get it from the commit
graph. But since it has been freed, we segfault!
So the root of the issue is a data dependency between the commit's
lazy-load of the tree oid and the fact that the commit graph can go
away mid-process. How can we resolve it?
There are a couple of general approaches:
1. The obvious answer is to avoid loading the tree from the graph when
we see that it's NULL. But then what do we return for the tree oid?
If we return NULL, our caller in do_traverse() will rightly
complain that we have no tree. We'd have to fallback to loading the
actual commit object and re-parsing it. That requires teaching
parse_commit_buffer() to understand re-parsing (i.e., not starting
from a clean slate and not leaking any allocated bits like parent
list pointers).
2. When we close the commit graph, walk through the set of in-memory
objects and clear any graph_pos pointers. But this means we also
have to "unparse" any such commits so that we know they still need
to open the commit object to fill in their trees. So it's no less
complicated than (1), and is more expensive (since we clear objects
we might not later need).
3. Stop freeing the commit-graph struct. Continue to let it be used
for lazy-loads of tree oids, but let upload-pack specify that it
shouldn't be used for further commit parsing.
4. Push the whole shallow rev-list out to its own sub-process, with
the commit-graph disabled from the start, giving it a clean memory
space to work from.
I've chosen (3) here. Options (1) and (2) would work, but are
non-trivial to implement. Option (4) is more expensive, and I'm not sure
how complicated it is (shelling out for the actual rev-list part is
easy, but we do then parse the resulting commits internally, and I'm not
clear which parts need to be handling shallow-ness).
The new test in t5500 triggers this segfault, but see the comments there
for how horribly intimate it has to be with how both upload-pack and
commit graphs work.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2019-09-12 22:44:45 +08:00
|
|
|
/* True if commit-graph has been disabled within this process. */
|
|
|
|
int commit_graph_disabled;
|
|
|
|
|
2021-06-18 01:13:22 +08:00
|
|
|
/* Configurations related to promisor remotes. */
|
|
|
|
char *repository_format_partial_clone;
|
2021-06-18 01:13:23 +08:00
|
|
|
struct promisor_remote_config *promisor_remote_config;
|
2021-06-18 01:13:22 +08:00
|
|
|
|
2017-06-23 02:43:32 +08:00
|
|
|
/* Configurations */
|
2023-05-26 09:33:00 +08:00
|
|
|
int repository_format_worktree_config;
|
2017-06-23 02:43:32 +08:00
|
|
|
|
|
|
|
/* Indicate if a repository has a different 'commondir' from 'gitdir' */
|
|
|
|
unsigned different_commondir:1;
|
|
|
|
};
|
|
|
|
|
global: introduce `USE_THE_REPOSITORY_VARIABLE` macro
Use of the `the_repository` variable is deprecated nowadays, and we
slowly but steadily convert the codebase to not use it anymore. Instead,
callers should be passing down the repository to work on via parameters.
It is hard though to prove that a given code unit does not use this
variable anymore. The most trivial case, merely demonstrating that there
is no direct use of `the_repository`, is already a bit of a pain during
code reviews as the reviewer needs to manually verify claims made by the
patch author. The bigger problem though is that we have many interfaces
that implicitly rely on `the_repository`.
Introduce a new `USE_THE_REPOSITORY_VARIABLE` macro that allows code
units to opt into usage of `the_repository`. The intent of this macro is
to demonstrate that a certain code unit does not use this variable
anymore, and to keep it from new dependencies on it in future changes,
be it explicit or implicit
For now, the macro only guards `the_repository` itself as well as
`the_hash_algo`. There are many more known interfaces where we have an
implicit dependency on `the_repository`, but those are not guarded at
the current point in time. Over time though, we should start to add
guards as required (or even better, just remove them).
Define the macro as required in our code units. As expected, most of our
code still relies on the global variable. Nearly all of our builtins
rely on the variable as there is no way yet to pass `the_repository` to
their entry point. For now, declare the macro in "biultin.h" to keep the
required changes at least a little bit more contained.
Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2024-06-14 14:50:23 +08:00
|
|
|
#ifdef USE_THE_REPOSITORY_VARIABLE
|
2017-06-23 02:43:32 +08:00
|
|
|
extern struct repository *the_repository;
|
global: introduce `USE_THE_REPOSITORY_VARIABLE` macro
Use of the `the_repository` variable is deprecated nowadays, and we
slowly but steadily convert the codebase to not use it anymore. Instead,
callers should be passing down the repository to work on via parameters.
It is hard though to prove that a given code unit does not use this
variable anymore. The most trivial case, merely demonstrating that there
is no direct use of `the_repository`, is already a bit of a pain during
code reviews as the reviewer needs to manually verify claims made by the
patch author. The bigger problem though is that we have many interfaces
that implicitly rely on `the_repository`.
Introduce a new `USE_THE_REPOSITORY_VARIABLE` macro that allows code
units to opt into usage of `the_repository`. The intent of this macro is
to demonstrate that a certain code unit does not use this variable
anymore, and to keep it from new dependencies on it in future changes,
be it explicit or implicit
For now, the macro only guards `the_repository` itself as well as
`the_hash_algo`. There are many more known interfaces where we have an
implicit dependency on `the_repository`, but those are not guarded at
the current point in time. Over time though, we should start to add
guards as required (or even better, just remove them).
Define the macro as required in our code units. As expected, most of our
code still relies on the global variable. Nearly all of our builtins
rely on the variable as there is no way yet to pass `the_repository` to
their entry point. For now, declare the macro in "biultin.h" to keep the
required changes at least a little bit more contained.
Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2024-06-14 14:50:23 +08:00
|
|
|
#endif
|
2017-06-23 02:43:32 +08:00
|
|
|
|
2018-03-23 23:55:23 +08:00
|
|
|
/*
|
|
|
|
* Define a custom repository layout. Any field can be NULL, which
|
|
|
|
* will default back to the path according to the default layout.
|
|
|
|
*/
|
2018-03-03 19:35:55 +08:00
|
|
|
struct set_gitdir_args {
|
|
|
|
const char *commondir;
|
|
|
|
const char *object_dir;
|
|
|
|
const char *graft_file;
|
|
|
|
const char *index_file;
|
2018-03-03 19:35:57 +08:00
|
|
|
const char *alternate_db;
|
2021-12-07 06:05:05 +08:00
|
|
|
int disable_ref_updates;
|
2018-03-03 19:35:55 +08:00
|
|
|
};
|
|
|
|
|
2018-06-30 17:20:29 +08:00
|
|
|
void repo_set_gitdir(struct repository *repo, const char *root,
|
|
|
|
const struct set_gitdir_args *extra_args);
|
|
|
|
void repo_set_worktree(struct repository *repo, const char *path);
|
|
|
|
void repo_set_hash_algo(struct repository *repo, int algo);
|
2023-10-02 10:40:08 +08:00
|
|
|
void repo_set_compat_hash_algo(struct repository *repo, int compat_algo);
|
2024-06-06 13:29:01 +08:00
|
|
|
void repo_set_ref_storage_format(struct repository *repo,
|
|
|
|
enum ref_storage_format format);
|
2024-04-18 20:14:33 +08:00
|
|
|
void initialize_repository(struct repository *repo);
|
2022-09-01 07:18:12 +08:00
|
|
|
RESULT_MUST_BE_USED
|
2018-06-30 17:20:29 +08:00
|
|
|
int repo_init(struct repository *r, const char *gitdir, const char *worktree);
|
2018-11-29 08:27:53 +08:00
|
|
|
|
|
|
|
/*
|
2021-09-10 02:47:28 +08:00
|
|
|
* Initialize the repository 'subrepo' as the submodule at the given path. If
|
|
|
|
* the submodule's gitdir cannot be found at <path>/.git, this function calls
|
|
|
|
* submodule_from_path() to try to find it. treeish_name is only used if
|
|
|
|
* submodule_from_path() needs to be called; see its documentation for more
|
|
|
|
* information.
|
|
|
|
* Return 0 upon success and a non-zero value upon failure.
|
2018-11-29 08:27:53 +08:00
|
|
|
*/
|
2021-09-10 02:47:28 +08:00
|
|
|
struct object_id;
|
2022-09-01 07:18:12 +08:00
|
|
|
RESULT_MUST_BE_USED
|
2018-11-29 08:27:53 +08:00
|
|
|
int repo_submodule_init(struct repository *subrepo,
|
2018-06-30 17:20:29 +08:00
|
|
|
struct repository *superproject,
|
2021-09-10 02:47:28 +08:00
|
|
|
const char *path,
|
|
|
|
const struct object_id *treeish_name);
|
2018-06-30 17:20:29 +08:00
|
|
|
void repo_clear(struct repository *repo);
|
2017-06-23 02:43:32 +08:00
|
|
|
|
2017-07-19 03:05:18 +08:00
|
|
|
/*
|
|
|
|
* Populates the repository's index from its index_file, an index struct will
|
|
|
|
* be allocated if needed.
|
|
|
|
*
|
|
|
|
* Return the number of index entries in the populated index or a value less
|
2019-11-06 01:07:23 +08:00
|
|
|
* than zero if an error occurred. If the repository's index has already been
|
2017-07-19 03:05:18 +08:00
|
|
|
* populated then the number of entries will simply be returned.
|
|
|
|
*/
|
2018-06-30 17:20:29 +08:00
|
|
|
int repo_read_index(struct repository *repo);
|
2019-01-12 10:13:24 +08:00
|
|
|
int repo_hold_locked_index(struct repository *repo,
|
|
|
|
struct lock_file *lf,
|
|
|
|
int flags);
|
2017-06-23 02:43:43 +08:00
|
|
|
|
2019-01-12 10:13:26 +08:00
|
|
|
int repo_read_index_unmerged(struct repository *);
|
2019-01-12 10:13:27 +08:00
|
|
|
/*
|
|
|
|
* Opportunistically update the index but do not complain if we can't.
|
|
|
|
* The lockfile is always committed or rolled back.
|
|
|
|
*/
|
|
|
|
void repo_update_index_if_able(struct repository *, struct lock_file *);
|
|
|
|
|
2019-08-14 02:37:43 +08:00
|
|
|
void prepare_repo_settings(struct repository *r);
|
2017-06-23 02:43:43 +08:00
|
|
|
|
2020-06-05 17:10:01 +08:00
|
|
|
/*
|
|
|
|
* Return 1 if upgrade repository format to target_version succeeded,
|
|
|
|
* 0 if no upgrade is necessary, and -1 when upgrade is not possible.
|
|
|
|
*/
|
|
|
|
int upgrade_repository_format(int target_version);
|
|
|
|
|
2017-06-23 02:43:32 +08:00
|
|
|
#endif /* REPOSITORY_H */
|