git/object-name.c

2166 lines
54 KiB
C
Raw Normal View History

global: introduce `USE_THE_REPOSITORY_VARIABLE` macro Use of the `the_repository` variable is deprecated nowadays, and we slowly but steadily convert the codebase to not use it anymore. Instead, callers should be passing down the repository to work on via parameters. It is hard though to prove that a given code unit does not use this variable anymore. The most trivial case, merely demonstrating that there is no direct use of `the_repository`, is already a bit of a pain during code reviews as the reviewer needs to manually verify claims made by the patch author. The bigger problem though is that we have many interfaces that implicitly rely on `the_repository`. Introduce a new `USE_THE_REPOSITORY_VARIABLE` macro that allows code units to opt into usage of `the_repository`. The intent of this macro is to demonstrate that a certain code unit does not use this variable anymore, and to keep it from new dependencies on it in future changes, be it explicit or implicit For now, the macro only guards `the_repository` itself as well as `the_hash_algo`. There are many more known interfaces where we have an implicit dependency on `the_repository`, but those are not guarded at the current point in time. Over time though, we should start to add guards as required (or even better, just remove them). Define the macro as required in our code units. As expected, most of our code still relies on the global variable. Nearly all of our builtins rely on the variable as there is no way yet to pass `the_repository` to their entry point. For now, declare the macro in "biultin.h" to keep the required changes at least a little bit more contained. Signed-off-by: Patrick Steinhardt <ps@pks.im> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2024-06-14 14:50:23 +08:00
#define USE_THE_REPOSITORY_VARIABLE
#include "git-compat-util.h"
#include "object-name.h"
#include "advice.h"
#include "config.h"
#include "environment.h"
#include "gettext.h"
#include "hex.h"
#include "tag.h"
#include "commit.h"
#include "tree.h"
#include "tree-walk.h"
#include "refs.h"
#include "remote.h"
#include "dir.h"
#include "oid-array.h"
#include "oidtree.h"
#include "packfile.h"
#include "pretty.h"
#include "object-store-ll.h"
#include "read-cache-ll.h"
#include "repository.h"
#include "setup.h"
#include "midx.h"
#include "commit-reach.h"
#include "date.h"
#include "object-file-convert.h"
static int get_oid_oneline(struct repository *r, const char *, struct object_id *,
const struct commit_list *);
typedef int (*disambiguate_hint_fn)(struct repository *, const struct object_id *, void *);
struct disambiguate_state {
int len; /* length of prefix in hex chars */
char hex_pfx[GIT_MAX_HEXSZ + 1];
struct object_id bin_pfx;
struct repository *repo;
disambiguate_hint_fn fn;
void *cb_data;
struct object_id candidate;
unsigned candidate_exists:1;
unsigned candidate_checked:1;
unsigned candidate_ok:1;
unsigned disambiguate_fn_used:1;
unsigned ambiguous:1;
unsigned always_call_fn:1;
};
static void update_candidates(struct disambiguate_state *ds, const struct object_id *current)
{
/* The hash algorithm of current has already been filtered */
if (ds->always_call_fn) {
ds->ambiguous = ds->fn(ds->repo, current, ds->cb_data) ? 1 : 0;
return;
}
if (!ds->candidate_exists) {
/* this is the first candidate */
oidcpy(&ds->candidate, current);
ds->candidate_exists = 1;
return;
} else if (oideq(&ds->candidate, current)) {
/* the same as what we already have seen */
return;
}
if (!ds->fn) {
/* cannot disambiguate between ds->candidate and current */
ds->ambiguous = 1;
return;
}
if (!ds->candidate_checked) {
ds->candidate_ok = ds->fn(ds->repo, &ds->candidate, ds->cb_data);
ds->disambiguate_fn_used = 1;
ds->candidate_checked = 1;
}
if (!ds->candidate_ok) {
/* discard the candidate; we know it does not satisfy fn */
oidcpy(&ds->candidate, current);
ds->candidate_checked = 0;
return;
}
/* if we reach this point, we know ds->candidate satisfies fn */
if (ds->fn(ds->repo, current, ds->cb_data)) {
/*
* if both current and candidate satisfy fn, we cannot
* disambiguate.
*/
ds->candidate_ok = 0;
ds->ambiguous = 1;
}
/* otherwise, current can be discarded and candidate is still good */
}
static int match_hash(unsigned, const unsigned char *, const unsigned char *);
oidtree: a crit-bit tree for odb_loose_cache This saves 8K per `struct object_directory', meaning it saves around 800MB in my case involving 100K alternates (half or more of those alternates are unlikely to hold loose objects). This is implemented in two parts: a generic, allocation-free `cbtree' and the `oidtree' wrapper on top of it. The latter provides allocation using alloc_state as a memory pool to improve locality and reduce free(3) overhead. Unlike oid-array, the crit-bit tree does not require sorting. Performance is bound by the key length, for oidtree that is fixed at sizeof(struct object_id). There's no need to have 256 oidtrees to mitigate the O(n log n) overhead like we did with oid-array. Being a prefix trie, it is natively suited for expanding short object IDs via prefix-limited iteration in `find_short_object_filename'. On my busy workstation, p4205 performance seems to be roughly unchanged (+/-8%). Startup with 100K total alternates with no loose objects seems around 10-20% faster on a hot cache. (800MB in memory savings means more memory for the kernel FS cache). The generic cbtree implementation does impose some extra overhead for oidtree in that it uses memcmp(3) on "struct object_id" so it wastes cycles comparing 12 extra bytes on SHA-1 repositories. I've not yet explored reducing this overhead, but I expect there are many places in our code base where we'd want to investigate this. More information on crit-bit trees: https://cr.yp.to/critbit.html Signed-off-by: Eric Wong <e@80x24.org> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2021-07-08 07:10:19 +08:00
static enum cb_next match_prefix(const struct object_id *oid, void *arg)
{
struct disambiguate_state *ds = arg;
/* no need to call match_hash, oidtree_each did prefix match */
update_candidates(ds, oid);
return ds->ambiguous ? CB_BREAK : CB_CONTINUE;
}
static void find_short_object_filename(struct disambiguate_state *ds)
{
struct object_directory *odb;
oidtree: a crit-bit tree for odb_loose_cache This saves 8K per `struct object_directory', meaning it saves around 800MB in my case involving 100K alternates (half or more of those alternates are unlikely to hold loose objects). This is implemented in two parts: a generic, allocation-free `cbtree' and the `oidtree' wrapper on top of it. The latter provides allocation using alloc_state as a memory pool to improve locality and reduce free(3) overhead. Unlike oid-array, the crit-bit tree does not require sorting. Performance is bound by the key length, for oidtree that is fixed at sizeof(struct object_id). There's no need to have 256 oidtrees to mitigate the O(n log n) overhead like we did with oid-array. Being a prefix trie, it is natively suited for expanding short object IDs via prefix-limited iteration in `find_short_object_filename'. On my busy workstation, p4205 performance seems to be roughly unchanged (+/-8%). Startup with 100K total alternates with no loose objects seems around 10-20% faster on a hot cache. (800MB in memory savings means more memory for the kernel FS cache). The generic cbtree implementation does impose some extra overhead for oidtree in that it uses memcmp(3) on "struct object_id" so it wastes cycles comparing 12 extra bytes on SHA-1 repositories. I've not yet explored reducing this overhead, but I expect there are many places in our code base where we'd want to investigate this. More information on crit-bit trees: https://cr.yp.to/critbit.html Signed-off-by: Eric Wong <e@80x24.org> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2021-07-08 07:10:19 +08:00
for (odb = ds->repo->objects->odb; odb && !ds->ambiguous; odb = odb->next)
oidtree_each(odb_loose_cache(odb, &ds->bin_pfx),
&ds->bin_pfx, ds->len, match_prefix, ds);
}
static int match_hash(unsigned len, const unsigned char *a, const unsigned char *b)
{
do {
if (*a != *b)
return 0;
a++;
b++;
len -= 2;
} while (len > 1);
if (len)
if ((*a ^ *b) & 0xf0)
return 0;
return 1;
}
static void unique_in_midx(struct multi_pack_index *m,
struct disambiguate_state *ds)
{
for (; m; m = m->base_midx) {
uint32_t num, i, first = 0;
const struct object_id *current = NULL;
int len = ds->len > ds->repo->hash_algo->hexsz ?
ds->repo->hash_algo->hexsz : ds->len;
if (!m->num_objects)
continue;
num = m->num_objects + m->num_objects_in_base;
bsearch_one_midx(&ds->bin_pfx, m, &first);
/*
* At this point, "first" is the location of the lowest
* object with an object name that could match
* "bin_pfx". See if we have 0, 1 or more objects that
* actually match(es).
*/
for (i = first; i < num && !ds->ambiguous; i++) {
struct object_id oid;
current = nth_midxed_object_oid(&oid, m, i);
if (!match_hash(len, ds->bin_pfx.hash, current->hash))
break;
update_candidates(ds, current);
}
}
}
static void unique_in_pack(struct packed_git *p,
struct disambiguate_state *ds)
{
uint32_t num, i, first = 0;
int len = ds->len > ds->repo->hash_algo->hexsz ?
ds->repo->hash_algo->hexsz : ds->len;
midx: add packs to packed_git linked list The multi-pack-index allows searching for objects across multiple packs using one object list. The original design gains many of these performance benefits by keeping the packs in the multi-pack-index out of the packed_git list. Unfortunately, this has one major drawback. If the multi-pack-index covers thousands of packs, and a command loads many of those packs, then we can hit the limit for open file descriptors. The close_one_pack() method is used to limit this resource, but it only looks at the packed_git list, and uses an LRU cache to prevent thrashing. Instead of complicating this close_one_pack() logic to include direct references to the multi-pack-index, simply add the packs opened by the multi-pack-index to the packed_git list. This immediately solves the file-descriptor limit problem, but requires some extra steps to avoid performance issues or other problems: 1. Create a multi_pack_index bit in the packed_git struct that is one if and only if the pack was loaded from a multi-pack-index. 2. Skip packs with the multi_pack_index bit when doing object lookups and abbreviations. These algorithms already check the multi-pack-index before the packed_git struct. This has a very small performance hit, as we need to walk more packed_git structs. This is acceptable, since these operations run binary search on the other packs, so this walk-and-ignore logic is very fast by comparison. 3. When closing a multi-pack-index file, do not close its packs, as those packs will be closed using close_all_packs(). In some cases, such as 'git repack', we run 'close_midx()' without also closing the packs, so we need to un-set the multi_pack_index bit in those packs. This is necessary, and caught by running t6501-freshen-objects.sh with GIT_TEST_MULTI_PACK_INDEX=1. To manually test this change, I inserted trace2 logging into close_pack_fd() and set pack_max_fds to 10, then ran 'git rev-list --all --objects' on a copy of the Git repo with 300+ pack-files and a multi-pack-index. The logs verified the packs are closed as we read them beyond the file descriptor limit. Signed-off-by: Derrick Stolee <dstolee@microsoft.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2019-04-30 00:18:56 +08:00
if (p->multi_pack_index)
return;
if (open_pack_index(p) || !p->num_objects)
return;
num = p->num_objects;
bsearch_pack(&ds->bin_pfx, p, &first);
/*
* At this point, "first" is the location of the lowest object
* with an object name that could match "bin_pfx". See if we have
* 0, 1 or more objects that actually match(es).
*/
for (i = first; i < num && !ds->ambiguous; i++) {
struct object_id oid;
nth_packed_object_id(&oid, p, i);
if (!match_hash(len, ds->bin_pfx.hash, oid.hash))
break;
update_candidates(ds, &oid);
}
}
static void find_short_packed_object(struct disambiguate_state *ds)
{
struct multi_pack_index *m;
struct packed_git *p;
/* Skip, unless oids from the storage hash algorithm are wanted */
if (ds->bin_pfx.algo && (&hash_algos[ds->bin_pfx.algo] != ds->repo->hash_algo))
return;
for (m = get_multi_pack_index(ds->repo); m && !ds->ambiguous;
m = m->next)
unique_in_midx(m, ds);
for (p = get_packed_git(ds->repo); p && !ds->ambiguous;
p = p->next)
unique_in_pack(p, ds);
}
static int finish_object_disambiguation(struct disambiguate_state *ds,
sha1_name: convert get_sha1* to get_oid* Now that all the callers of get_sha1 directly or indirectly use struct object_id, rename the functions starting with get_sha1 to start with get_oid. Convert the internals in sha1_name.c to use struct object_id as well, and eliminate explicit length checks where possible. Convert a use of 40 in get_oid_basic to GIT_SHA1_HEXSZ. Outside of sha1_name.c and cache.h, this transition was made with the following semantic patch: @@ expression E1, E2; @@ - get_sha1(E1, E2.hash) + get_oid(E1, &E2) @@ expression E1, E2; @@ - get_sha1(E1, E2->hash) + get_oid(E1, E2) @@ expression E1, E2; @@ - get_sha1_committish(E1, E2.hash) + get_oid_committish(E1, &E2) @@ expression E1, E2; @@ - get_sha1_committish(E1, E2->hash) + get_oid_committish(E1, E2) @@ expression E1, E2; @@ - get_sha1_treeish(E1, E2.hash) + get_oid_treeish(E1, &E2) @@ expression E1, E2; @@ - get_sha1_treeish(E1, E2->hash) + get_oid_treeish(E1, E2) @@ expression E1, E2; @@ - get_sha1_commit(E1, E2.hash) + get_oid_commit(E1, &E2) @@ expression E1, E2; @@ - get_sha1_commit(E1, E2->hash) + get_oid_commit(E1, E2) @@ expression E1, E2; @@ - get_sha1_tree(E1, E2.hash) + get_oid_tree(E1, &E2) @@ expression E1, E2; @@ - get_sha1_tree(E1, E2->hash) + get_oid_tree(E1, E2) @@ expression E1, E2; @@ - get_sha1_blob(E1, E2.hash) + get_oid_blob(E1, &E2) @@ expression E1, E2; @@ - get_sha1_blob(E1, E2->hash) + get_oid_blob(E1, E2) @@ expression E1, E2, E3, E4; @@ - get_sha1_with_context(E1, E2, E3.hash, E4) + get_oid_with_context(E1, E2, &E3, E4) @@ expression E1, E2, E3, E4; @@ - get_sha1_with_context(E1, E2, E3->hash, E4) + get_oid_with_context(E1, E2, E3, E4) Signed-off-by: brian m. carlson <sandals@crustytoothpaste.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-07-14 07:49:28 +08:00
struct object_id *oid)
{
if (ds->ambiguous)
return SHORT_NAME_AMBIGUOUS;
if (!ds->candidate_exists)
return MISSING_OBJECT;
if (!ds->candidate_checked)
/*
* If this is the only candidate, there is no point
* calling the disambiguation hint callback.
*
* On the other hand, if the current candidate
* replaced an earlier candidate that did _not_ pass
* the disambiguation hint callback, then we do have
* more than one objects that match the short name
* given, so we should make sure this one matches;
* otherwise, if we discovered this one and the one
* that we previously discarded in the reverse order,
* we would end up showing different results in the
* same repository!
*/
ds->candidate_ok = (!ds->disambiguate_fn_used ||
ds->fn(ds->repo, &ds->candidate, ds->cb_data));
if (!ds->candidate_ok)
return SHORT_NAME_AMBIGUOUS;
sha1_name: convert get_sha1* to get_oid* Now that all the callers of get_sha1 directly or indirectly use struct object_id, rename the functions starting with get_sha1 to start with get_oid. Convert the internals in sha1_name.c to use struct object_id as well, and eliminate explicit length checks where possible. Convert a use of 40 in get_oid_basic to GIT_SHA1_HEXSZ. Outside of sha1_name.c and cache.h, this transition was made with the following semantic patch: @@ expression E1, E2; @@ - get_sha1(E1, E2.hash) + get_oid(E1, &E2) @@ expression E1, E2; @@ - get_sha1(E1, E2->hash) + get_oid(E1, E2) @@ expression E1, E2; @@ - get_sha1_committish(E1, E2.hash) + get_oid_committish(E1, &E2) @@ expression E1, E2; @@ - get_sha1_committish(E1, E2->hash) + get_oid_committish(E1, E2) @@ expression E1, E2; @@ - get_sha1_treeish(E1, E2.hash) + get_oid_treeish(E1, &E2) @@ expression E1, E2; @@ - get_sha1_treeish(E1, E2->hash) + get_oid_treeish(E1, E2) @@ expression E1, E2; @@ - get_sha1_commit(E1, E2.hash) + get_oid_commit(E1, &E2) @@ expression E1, E2; @@ - get_sha1_commit(E1, E2->hash) + get_oid_commit(E1, E2) @@ expression E1, E2; @@ - get_sha1_tree(E1, E2.hash) + get_oid_tree(E1, &E2) @@ expression E1, E2; @@ - get_sha1_tree(E1, E2->hash) + get_oid_tree(E1, E2) @@ expression E1, E2; @@ - get_sha1_blob(E1, E2.hash) + get_oid_blob(E1, &E2) @@ expression E1, E2; @@ - get_sha1_blob(E1, E2->hash) + get_oid_blob(E1, E2) @@ expression E1, E2, E3, E4; @@ - get_sha1_with_context(E1, E2, E3.hash, E4) + get_oid_with_context(E1, E2, &E3, E4) @@ expression E1, E2, E3, E4; @@ - get_sha1_with_context(E1, E2, E3->hash, E4) + get_oid_with_context(E1, E2, E3, E4) Signed-off-by: brian m. carlson <sandals@crustytoothpaste.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-07-14 07:49:28 +08:00
oidcpy(oid, &ds->candidate);
return 0;
}
static int disambiguate_commit_only(struct repository *r,
const struct object_id *oid,
void *cb_data UNUSED)
{
int kind = oid_object_info(r, oid, NULL);
return kind == OBJ_COMMIT;
}
static int disambiguate_committish_only(struct repository *r,
const struct object_id *oid,
void *cb_data UNUSED)
{
struct object *obj;
int kind;
kind = oid_object_info(r, oid, NULL);
if (kind == OBJ_COMMIT)
return 1;
if (kind != OBJ_TAG)
return 0;
/* We need to do this the hard way... */
obj = deref_tag(r, parse_object(r, oid), NULL, 0);
if (obj && obj->type == OBJ_COMMIT)
return 1;
return 0;
}
static int disambiguate_tree_only(struct repository *r,
const struct object_id *oid,
void *cb_data UNUSED)
{
int kind = oid_object_info(r, oid, NULL);
return kind == OBJ_TREE;
}
static int disambiguate_treeish_only(struct repository *r,
const struct object_id *oid,
void *cb_data UNUSED)
{
struct object *obj;
int kind;
kind = oid_object_info(r, oid, NULL);
if (kind == OBJ_TREE || kind == OBJ_COMMIT)
return 1;
if (kind != OBJ_TAG)
return 0;
/* We need to do this the hard way... */
obj = deref_tag(r, parse_object(r, oid), NULL, 0);
if (obj && (obj->type == OBJ_TREE || obj->type == OBJ_COMMIT))
return 1;
return 0;
}
static int disambiguate_blob_only(struct repository *r,
const struct object_id *oid,
void *cb_data UNUSED)
{
int kind = oid_object_info(r, oid, NULL);
return kind == OBJ_BLOB;
}
static disambiguate_hint_fn default_disambiguate_hint;
int set_disambiguate_hint_config(const char *var, const char *value)
{
static const struct {
const char *name;
disambiguate_hint_fn fn;
} hints[] = {
{ "none", NULL },
{ "commit", disambiguate_commit_only },
{ "committish", disambiguate_committish_only },
{ "tree", disambiguate_tree_only },
{ "treeish", disambiguate_treeish_only },
{ "blob", disambiguate_blob_only }
};
int i;
if (!value)
return config_error_nonbool(var);
for (i = 0; i < ARRAY_SIZE(hints); i++) {
if (!strcasecmp(value, hints[i].name)) {
default_disambiguate_hint = hints[i].fn;
return 0;
}
}
return error("unknown hint type for '%s': %s", var, value);
}
static int init_object_disambiguation(struct repository *r,
const char *name, int len,
const struct git_hash_algo *algo,
struct disambiguate_state *ds)
{
int i;
if (len < MINIMUM_ABBREV || len > GIT_MAX_HEXSZ)
return -1;
memset(ds, 0, sizeof(*ds));
for (i = 0; i < len ;i++) {
unsigned char c = name[i];
unsigned char val;
if (c >= '0' && c <= '9')
val = c - '0';
else if (c >= 'a' && c <= 'f')
val = c - 'a' + 10;
else if (c >= 'A' && c <='F') {
val = c - 'A' + 10;
c -= 'A' - 'a';
}
else
return -1;
ds->hex_pfx[i] = c;
if (!(i & 1))
val <<= 4;
ds->bin_pfx.hash[i >> 1] |= val;
}
ds->len = len;
ds->hex_pfx[len] = '\0';
ds->repo = r;
ds->bin_pfx.algo = algo ? hash_algo_by_ptr(algo) : GIT_HASH_UNKNOWN;
prepare_alt_odb(r);
return 0;
}
struct ambiguous_output {
const struct disambiguate_state *ds;
struct strbuf advice;
struct strbuf sb;
};
static int show_ambiguous_object(const struct object_id *oid, void *data)
get_short_sha1: list ambiguous objects on error When the user gives us an ambiguous short sha1, we print an error and refuse to resolve it. In some cases, the next step is for them to feed us more characters (e.g., if they were retyping or cut-and-pasting from a full sha1). But in other cases, that might be all they have. For example, an old commit message may have used a 7-character hex that was unique at the time, but is now ambiguous. Git doesn't provide any information about the ambiguous objects it found, so it's hard for the user to find out which one they probably meant. This patch teaches get_short_sha1() to list the sha1s of the objects it found, along with a few bits of information that may help the user decide which one they meant. Here's what it looks like on git.git: $ git rev-parse b2e1 error: short SHA1 b2e1 is ambiguous hint: The candidates are: hint: b2e1196 tag v2.8.0-rc1 hint: b2e11d1 tree hint: b2e1632 commit 2007-11-14 - Merge branch 'bs/maint-commit-options' hint: b2e1759 blob hint: b2e18954 blob hint: b2e1895c blob fatal: ambiguous argument 'b2e1': unknown revision or path not in the working tree. Use '--' to separate paths from revisions, like this: 'git <command> [<revision>...] -- [<file>...]' We show the tagname for tags, and the date and subject for commits. For trees and blobs, in theory we could dig in the history to find the paths at which they were present. But that's very expensive (on the order of 30s for the kernel), and it's not likely to be all that helpful. Most short references are to commits, so the useful information is typically going to be that the object in question _isn't_ a commit. So it's silly to spend a lot of CPU preemptively digging up the path; the user can do it themselves if they really need to. And of course it's somewhat ironic that we abbreviate the sha1s in the disambiguation hint. But full sha1s would cause annoying line wrapping for the commit lines, and presumably the user is going to just re-issue their command immediately with the corrected sha1. We also restrict the list to those that match any disambiguation hint. E.g.: $ git rev-parse b2e1:foo error: short SHA1 b2e1 is ambiguous hint: The candidates are: hint: b2e1196 tag v2.8.0-rc1 hint: b2e11d1 tree hint: b2e1632 commit 2007-11-14 - Merge branch 'bs/maint-commit-options' fatal: Invalid object name 'b2e1'. does not bother reporting the blobs, because they cannot work as a treeish. Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2016-09-26 20:00:36 +08:00
{
struct ambiguous_output *state = data;
const struct disambiguate_state *ds = state->ds;
struct strbuf *advice = &state->advice;
struct strbuf *sb = &state->sb;
get_short_sha1: list ambiguous objects on error When the user gives us an ambiguous short sha1, we print an error and refuse to resolve it. In some cases, the next step is for them to feed us more characters (e.g., if they were retyping or cut-and-pasting from a full sha1). But in other cases, that might be all they have. For example, an old commit message may have used a 7-character hex that was unique at the time, but is now ambiguous. Git doesn't provide any information about the ambiguous objects it found, so it's hard for the user to find out which one they probably meant. This patch teaches get_short_sha1() to list the sha1s of the objects it found, along with a few bits of information that may help the user decide which one they meant. Here's what it looks like on git.git: $ git rev-parse b2e1 error: short SHA1 b2e1 is ambiguous hint: The candidates are: hint: b2e1196 tag v2.8.0-rc1 hint: b2e11d1 tree hint: b2e1632 commit 2007-11-14 - Merge branch 'bs/maint-commit-options' hint: b2e1759 blob hint: b2e18954 blob hint: b2e1895c blob fatal: ambiguous argument 'b2e1': unknown revision or path not in the working tree. Use '--' to separate paths from revisions, like this: 'git <command> [<revision>...] -- [<file>...]' We show the tagname for tags, and the date and subject for commits. For trees and blobs, in theory we could dig in the history to find the paths at which they were present. But that's very expensive (on the order of 30s for the kernel), and it's not likely to be all that helpful. Most short references are to commits, so the useful information is typically going to be that the object in question _isn't_ a commit. So it's silly to spend a lot of CPU preemptively digging up the path; the user can do it themselves if they really need to. And of course it's somewhat ironic that we abbreviate the sha1s in the disambiguation hint. But full sha1s would cause annoying line wrapping for the commit lines, and presumably the user is going to just re-issue their command immediately with the corrected sha1. We also restrict the list to those that match any disambiguation hint. E.g.: $ git rev-parse b2e1:foo error: short SHA1 b2e1 is ambiguous hint: The candidates are: hint: b2e1196 tag v2.8.0-rc1 hint: b2e11d1 tree hint: b2e1632 commit 2007-11-14 - Merge branch 'bs/maint-commit-options' fatal: Invalid object name 'b2e1'. does not bother reporting the blobs, because they cannot work as a treeish. Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2016-09-26 20:00:36 +08:00
int type;
const char *hash;
get_short_sha1: list ambiguous objects on error When the user gives us an ambiguous short sha1, we print an error and refuse to resolve it. In some cases, the next step is for them to feed us more characters (e.g., if they were retyping or cut-and-pasting from a full sha1). But in other cases, that might be all they have. For example, an old commit message may have used a 7-character hex that was unique at the time, but is now ambiguous. Git doesn't provide any information about the ambiguous objects it found, so it's hard for the user to find out which one they probably meant. This patch teaches get_short_sha1() to list the sha1s of the objects it found, along with a few bits of information that may help the user decide which one they meant. Here's what it looks like on git.git: $ git rev-parse b2e1 error: short SHA1 b2e1 is ambiguous hint: The candidates are: hint: b2e1196 tag v2.8.0-rc1 hint: b2e11d1 tree hint: b2e1632 commit 2007-11-14 - Merge branch 'bs/maint-commit-options' hint: b2e1759 blob hint: b2e18954 blob hint: b2e1895c blob fatal: ambiguous argument 'b2e1': unknown revision or path not in the working tree. Use '--' to separate paths from revisions, like this: 'git <command> [<revision>...] -- [<file>...]' We show the tagname for tags, and the date and subject for commits. For trees and blobs, in theory we could dig in the history to find the paths at which they were present. But that's very expensive (on the order of 30s for the kernel), and it's not likely to be all that helpful. Most short references are to commits, so the useful information is typically going to be that the object in question _isn't_ a commit. So it's silly to spend a lot of CPU preemptively digging up the path; the user can do it themselves if they really need to. And of course it's somewhat ironic that we abbreviate the sha1s in the disambiguation hint. But full sha1s would cause annoying line wrapping for the commit lines, and presumably the user is going to just re-issue their command immediately with the corrected sha1. We also restrict the list to those that match any disambiguation hint. E.g.: $ git rev-parse b2e1:foo error: short SHA1 b2e1 is ambiguous hint: The candidates are: hint: b2e1196 tag v2.8.0-rc1 hint: b2e11d1 tree hint: b2e1632 commit 2007-11-14 - Merge branch 'bs/maint-commit-options' fatal: Invalid object name 'b2e1'. does not bother reporting the blobs, because they cannot work as a treeish. Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2016-09-26 20:00:36 +08:00
if (ds->fn && !ds->fn(ds->repo, oid, ds->cb_data))
get_short_sha1: list ambiguous objects on error When the user gives us an ambiguous short sha1, we print an error and refuse to resolve it. In some cases, the next step is for them to feed us more characters (e.g., if they were retyping or cut-and-pasting from a full sha1). But in other cases, that might be all they have. For example, an old commit message may have used a 7-character hex that was unique at the time, but is now ambiguous. Git doesn't provide any information about the ambiguous objects it found, so it's hard for the user to find out which one they probably meant. This patch teaches get_short_sha1() to list the sha1s of the objects it found, along with a few bits of information that may help the user decide which one they meant. Here's what it looks like on git.git: $ git rev-parse b2e1 error: short SHA1 b2e1 is ambiguous hint: The candidates are: hint: b2e1196 tag v2.8.0-rc1 hint: b2e11d1 tree hint: b2e1632 commit 2007-11-14 - Merge branch 'bs/maint-commit-options' hint: b2e1759 blob hint: b2e18954 blob hint: b2e1895c blob fatal: ambiguous argument 'b2e1': unknown revision or path not in the working tree. Use '--' to separate paths from revisions, like this: 'git <command> [<revision>...] -- [<file>...]' We show the tagname for tags, and the date and subject for commits. For trees and blobs, in theory we could dig in the history to find the paths at which they were present. But that's very expensive (on the order of 30s for the kernel), and it's not likely to be all that helpful. Most short references are to commits, so the useful information is typically going to be that the object in question _isn't_ a commit. So it's silly to spend a lot of CPU preemptively digging up the path; the user can do it themselves if they really need to. And of course it's somewhat ironic that we abbreviate the sha1s in the disambiguation hint. But full sha1s would cause annoying line wrapping for the commit lines, and presumably the user is going to just re-issue their command immediately with the corrected sha1. We also restrict the list to those that match any disambiguation hint. E.g.: $ git rev-parse b2e1:foo error: short SHA1 b2e1 is ambiguous hint: The candidates are: hint: b2e1196 tag v2.8.0-rc1 hint: b2e11d1 tree hint: b2e1632 commit 2007-11-14 - Merge branch 'bs/maint-commit-options' fatal: Invalid object name 'b2e1'. does not bother reporting the blobs, because they cannot work as a treeish. Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2016-09-26 20:00:36 +08:00
return 0;
hash = repo_find_unique_abbrev(ds->repo, oid, DEFAULT_ABBREV);
type = oid_object_info(ds->repo, oid, NULL);
object-name: explicitly handle OBJ_BAD in show_ambiguous_object() Amend the "unknown type" handling in the code that displays the ambiguous object list to assert() that we're either going to get the "real" object types we can pass to type_name(), or a -1 (OBJ_BAD) return value from oid_object_info(). See [1] for the current output, and [1] for the commit that added the "unknown type" handling. We are never going to get an "unknown type" in the sense of custom types crafted with "hash-object --literally", since we're not using the OBJECT_INFO_ALLOW_UNKNOWN_TYPE flag. If we manage to otherwise unpack such an object without errors we'll die() in parse_loose_header_extended() called by sort_ambiguous() before we get to show_ambiguous_object(), as is asserted by the test added in the preceding commit. So saying "unknown type" here was always misleading, we really meant to say that we had a failure parsing the object at all, i.e. that we had repository corruption. If the problem is only that it's type is unknown we won't reach this code. So let's emit a generic "[bad object]" instead. As our tests added in the preceding commit show, we'll have emitted various "error" output already in those cases. We should do better in the truly "unknown type" cases, which we'd need to handle if we were passing down the OBJECT_INFO_ALLOW_UNKNOWN_TYPE flag. But let's leave that for some future improvement. In a subsequent commit I'll improve the output we do show, and not having to handle the "unknown type" (as in OBJECT_INFO_ALLOW_UNKNOWN_TYPE) simplifies that change. 1. 5cc044e0257 (get_short_oid: sort ambiguous objects by type, then SHA-1, 2018-05-10) 2. 1ffa26c461 (get_short_sha1: list ambiguous objects on error, 2016-09-26) Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2022-01-27 13:26:44 +08:00
if (type < 0) {
/*
* TRANSLATORS: This is a line of ambiguous object
* output shown when we cannot look up or parse the
* object in question. E.g. "deadbeef [bad object]".
*/
strbuf_addf(sb, _("%s [bad object]"), hash);
object-name: explicitly handle OBJ_BAD in show_ambiguous_object() Amend the "unknown type" handling in the code that displays the ambiguous object list to assert() that we're either going to get the "real" object types we can pass to type_name(), or a -1 (OBJ_BAD) return value from oid_object_info(). See [1] for the current output, and [1] for the commit that added the "unknown type" handling. We are never going to get an "unknown type" in the sense of custom types crafted with "hash-object --literally", since we're not using the OBJECT_INFO_ALLOW_UNKNOWN_TYPE flag. If we manage to otherwise unpack such an object without errors we'll die() in parse_loose_header_extended() called by sort_ambiguous() before we get to show_ambiguous_object(), as is asserted by the test added in the preceding commit. So saying "unknown type" here was always misleading, we really meant to say that we had a failure parsing the object at all, i.e. that we had repository corruption. If the problem is only that it's type is unknown we won't reach this code. So let's emit a generic "[bad object]" instead. As our tests added in the preceding commit show, we'll have emitted various "error" output already in those cases. We should do better in the truly "unknown type" cases, which we'd need to handle if we were passing down the OBJECT_INFO_ALLOW_UNKNOWN_TYPE flag. But let's leave that for some future improvement. In a subsequent commit I'll improve the output we do show, and not having to handle the "unknown type" (as in OBJECT_INFO_ALLOW_UNKNOWN_TYPE) simplifies that change. 1. 5cc044e0257 (get_short_oid: sort ambiguous objects by type, then SHA-1, 2018-05-10) 2. 1ffa26c461 (get_short_sha1: list ambiguous objects on error, 2016-09-26) Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2022-01-27 13:26:44 +08:00
goto out;
}
assert(type == OBJ_TREE || type == OBJ_COMMIT ||
type == OBJ_BLOB || type == OBJ_TAG);
get_short_sha1: list ambiguous objects on error When the user gives us an ambiguous short sha1, we print an error and refuse to resolve it. In some cases, the next step is for them to feed us more characters (e.g., if they were retyping or cut-and-pasting from a full sha1). But in other cases, that might be all they have. For example, an old commit message may have used a 7-character hex that was unique at the time, but is now ambiguous. Git doesn't provide any information about the ambiguous objects it found, so it's hard for the user to find out which one they probably meant. This patch teaches get_short_sha1() to list the sha1s of the objects it found, along with a few bits of information that may help the user decide which one they meant. Here's what it looks like on git.git: $ git rev-parse b2e1 error: short SHA1 b2e1 is ambiguous hint: The candidates are: hint: b2e1196 tag v2.8.0-rc1 hint: b2e11d1 tree hint: b2e1632 commit 2007-11-14 - Merge branch 'bs/maint-commit-options' hint: b2e1759 blob hint: b2e18954 blob hint: b2e1895c blob fatal: ambiguous argument 'b2e1': unknown revision or path not in the working tree. Use '--' to separate paths from revisions, like this: 'git <command> [<revision>...] -- [<file>...]' We show the tagname for tags, and the date and subject for commits. For trees and blobs, in theory we could dig in the history to find the paths at which they were present. But that's very expensive (on the order of 30s for the kernel), and it's not likely to be all that helpful. Most short references are to commits, so the useful information is typically going to be that the object in question _isn't_ a commit. So it's silly to spend a lot of CPU preemptively digging up the path; the user can do it themselves if they really need to. And of course it's somewhat ironic that we abbreviate the sha1s in the disambiguation hint. But full sha1s would cause annoying line wrapping for the commit lines, and presumably the user is going to just re-issue their command immediately with the corrected sha1. We also restrict the list to those that match any disambiguation hint. E.g.: $ git rev-parse b2e1:foo error: short SHA1 b2e1 is ambiguous hint: The candidates are: hint: b2e1196 tag v2.8.0-rc1 hint: b2e11d1 tree hint: b2e1632 commit 2007-11-14 - Merge branch 'bs/maint-commit-options' fatal: Invalid object name 'b2e1'. does not bother reporting the blobs, because they cannot work as a treeish. Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2016-09-26 20:00:36 +08:00
if (type == OBJ_COMMIT) {
struct strbuf date = STRBUF_INIT;
struct strbuf msg = STRBUF_INIT;
struct commit *commit = lookup_commit(ds->repo, oid);
get_short_sha1: list ambiguous objects on error When the user gives us an ambiguous short sha1, we print an error and refuse to resolve it. In some cases, the next step is for them to feed us more characters (e.g., if they were retyping or cut-and-pasting from a full sha1). But in other cases, that might be all they have. For example, an old commit message may have used a 7-character hex that was unique at the time, but is now ambiguous. Git doesn't provide any information about the ambiguous objects it found, so it's hard for the user to find out which one they probably meant. This patch teaches get_short_sha1() to list the sha1s of the objects it found, along with a few bits of information that may help the user decide which one they meant. Here's what it looks like on git.git: $ git rev-parse b2e1 error: short SHA1 b2e1 is ambiguous hint: The candidates are: hint: b2e1196 tag v2.8.0-rc1 hint: b2e11d1 tree hint: b2e1632 commit 2007-11-14 - Merge branch 'bs/maint-commit-options' hint: b2e1759 blob hint: b2e18954 blob hint: b2e1895c blob fatal: ambiguous argument 'b2e1': unknown revision or path not in the working tree. Use '--' to separate paths from revisions, like this: 'git <command> [<revision>...] -- [<file>...]' We show the tagname for tags, and the date and subject for commits. For trees and blobs, in theory we could dig in the history to find the paths at which they were present. But that's very expensive (on the order of 30s for the kernel), and it's not likely to be all that helpful. Most short references are to commits, so the useful information is typically going to be that the object in question _isn't_ a commit. So it's silly to spend a lot of CPU preemptively digging up the path; the user can do it themselves if they really need to. And of course it's somewhat ironic that we abbreviate the sha1s in the disambiguation hint. But full sha1s would cause annoying line wrapping for the commit lines, and presumably the user is going to just re-issue their command immediately with the corrected sha1. We also restrict the list to those that match any disambiguation hint. E.g.: $ git rev-parse b2e1:foo error: short SHA1 b2e1 is ambiguous hint: The candidates are: hint: b2e1196 tag v2.8.0-rc1 hint: b2e11d1 tree hint: b2e1632 commit 2007-11-14 - Merge branch 'bs/maint-commit-options' fatal: Invalid object name 'b2e1'. does not bother reporting the blobs, because they cannot work as a treeish. Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2016-09-26 20:00:36 +08:00
if (commit) {
struct pretty_print_context pp = {0};
pp.date_mode.type = DATE_SHORT;
repo_format_commit_message(the_repository, commit,
"%ad", &date, &pp);
repo_format_commit_message(the_repository, commit,
"%s", &msg, &pp);
get_short_sha1: list ambiguous objects on error When the user gives us an ambiguous short sha1, we print an error and refuse to resolve it. In some cases, the next step is for them to feed us more characters (e.g., if they were retyping or cut-and-pasting from a full sha1). But in other cases, that might be all they have. For example, an old commit message may have used a 7-character hex that was unique at the time, but is now ambiguous. Git doesn't provide any information about the ambiguous objects it found, so it's hard for the user to find out which one they probably meant. This patch teaches get_short_sha1() to list the sha1s of the objects it found, along with a few bits of information that may help the user decide which one they meant. Here's what it looks like on git.git: $ git rev-parse b2e1 error: short SHA1 b2e1 is ambiguous hint: The candidates are: hint: b2e1196 tag v2.8.0-rc1 hint: b2e11d1 tree hint: b2e1632 commit 2007-11-14 - Merge branch 'bs/maint-commit-options' hint: b2e1759 blob hint: b2e18954 blob hint: b2e1895c blob fatal: ambiguous argument 'b2e1': unknown revision or path not in the working tree. Use '--' to separate paths from revisions, like this: 'git <command> [<revision>...] -- [<file>...]' We show the tagname for tags, and the date and subject for commits. For trees and blobs, in theory we could dig in the history to find the paths at which they were present. But that's very expensive (on the order of 30s for the kernel), and it's not likely to be all that helpful. Most short references are to commits, so the useful information is typically going to be that the object in question _isn't_ a commit. So it's silly to spend a lot of CPU preemptively digging up the path; the user can do it themselves if they really need to. And of course it's somewhat ironic that we abbreviate the sha1s in the disambiguation hint. But full sha1s would cause annoying line wrapping for the commit lines, and presumably the user is going to just re-issue their command immediately with the corrected sha1. We also restrict the list to those that match any disambiguation hint. E.g.: $ git rev-parse b2e1:foo error: short SHA1 b2e1 is ambiguous hint: The candidates are: hint: b2e1196 tag v2.8.0-rc1 hint: b2e11d1 tree hint: b2e1632 commit 2007-11-14 - Merge branch 'bs/maint-commit-options' fatal: Invalid object name 'b2e1'. does not bother reporting the blobs, because they cannot work as a treeish. Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2016-09-26 20:00:36 +08:00
}
/*
* TRANSLATORS: This is a line of ambiguous commit
* object output. E.g.:
*
* "deadbeef commit 2021-01-01 - Some Commit Message"
*/
strbuf_addf(sb, _("%s commit %s - %s"), hash, date.buf,
msg.buf);
strbuf_release(&date);
strbuf_release(&msg);
get_short_sha1: list ambiguous objects on error When the user gives us an ambiguous short sha1, we print an error and refuse to resolve it. In some cases, the next step is for them to feed us more characters (e.g., if they were retyping or cut-and-pasting from a full sha1). But in other cases, that might be all they have. For example, an old commit message may have used a 7-character hex that was unique at the time, but is now ambiguous. Git doesn't provide any information about the ambiguous objects it found, so it's hard for the user to find out which one they probably meant. This patch teaches get_short_sha1() to list the sha1s of the objects it found, along with a few bits of information that may help the user decide which one they meant. Here's what it looks like on git.git: $ git rev-parse b2e1 error: short SHA1 b2e1 is ambiguous hint: The candidates are: hint: b2e1196 tag v2.8.0-rc1 hint: b2e11d1 tree hint: b2e1632 commit 2007-11-14 - Merge branch 'bs/maint-commit-options' hint: b2e1759 blob hint: b2e18954 blob hint: b2e1895c blob fatal: ambiguous argument 'b2e1': unknown revision or path not in the working tree. Use '--' to separate paths from revisions, like this: 'git <command> [<revision>...] -- [<file>...]' We show the tagname for tags, and the date and subject for commits. For trees and blobs, in theory we could dig in the history to find the paths at which they were present. But that's very expensive (on the order of 30s for the kernel), and it's not likely to be all that helpful. Most short references are to commits, so the useful information is typically going to be that the object in question _isn't_ a commit. So it's silly to spend a lot of CPU preemptively digging up the path; the user can do it themselves if they really need to. And of course it's somewhat ironic that we abbreviate the sha1s in the disambiguation hint. But full sha1s would cause annoying line wrapping for the commit lines, and presumably the user is going to just re-issue their command immediately with the corrected sha1. We also restrict the list to those that match any disambiguation hint. E.g.: $ git rev-parse b2e1:foo error: short SHA1 b2e1 is ambiguous hint: The candidates are: hint: b2e1196 tag v2.8.0-rc1 hint: b2e11d1 tree hint: b2e1632 commit 2007-11-14 - Merge branch 'bs/maint-commit-options' fatal: Invalid object name 'b2e1'. does not bother reporting the blobs, because they cannot work as a treeish. Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2016-09-26 20:00:36 +08:00
} else if (type == OBJ_TAG) {
struct tag *tag = lookup_tag(ds->repo, oid);
if (!parse_tag(tag) && tag->tag) {
/*
* TRANSLATORS: This is a line of ambiguous
* tag object output. E.g.:
*
* "deadbeef tag 2022-01-01 - Some Tag Message"
*
* The second argument is the YYYY-MM-DD found
* in the tag.
*
* The third argument is the "tag" string
* from object.c.
*/
strbuf_addf(sb, _("%s tag %s - %s"), hash,
show_date(tag->date, 0, DATE_MODE(SHORT)),
tag->tag);
} else {
/*
* TRANSLATORS: This is a line of ambiguous
* tag object output where we couldn't parse
* the tag itself. E.g.:
*
* "deadbeef [bad tag, could not parse it]"
*/
strbuf_addf(sb, _("%s [bad tag, could not parse it]"),
hash);
}
} else if (type == OBJ_TREE) {
/*
* TRANSLATORS: This is a line of ambiguous <type>
* object output. E.g. "deadbeef tree".
*/
strbuf_addf(sb, _("%s tree"), hash);
} else if (type == OBJ_BLOB) {
/*
* TRANSLATORS: This is a line of ambiguous <type>
* object output. E.g. "deadbeef blob".
*/
strbuf_addf(sb, _("%s blob"), hash);
get_short_sha1: list ambiguous objects on error When the user gives us an ambiguous short sha1, we print an error and refuse to resolve it. In some cases, the next step is for them to feed us more characters (e.g., if they were retyping or cut-and-pasting from a full sha1). But in other cases, that might be all they have. For example, an old commit message may have used a 7-character hex that was unique at the time, but is now ambiguous. Git doesn't provide any information about the ambiguous objects it found, so it's hard for the user to find out which one they probably meant. This patch teaches get_short_sha1() to list the sha1s of the objects it found, along with a few bits of information that may help the user decide which one they meant. Here's what it looks like on git.git: $ git rev-parse b2e1 error: short SHA1 b2e1 is ambiguous hint: The candidates are: hint: b2e1196 tag v2.8.0-rc1 hint: b2e11d1 tree hint: b2e1632 commit 2007-11-14 - Merge branch 'bs/maint-commit-options' hint: b2e1759 blob hint: b2e18954 blob hint: b2e1895c blob fatal: ambiguous argument 'b2e1': unknown revision or path not in the working tree. Use '--' to separate paths from revisions, like this: 'git <command> [<revision>...] -- [<file>...]' We show the tagname for tags, and the date and subject for commits. For trees and blobs, in theory we could dig in the history to find the paths at which they were present. But that's very expensive (on the order of 30s for the kernel), and it's not likely to be all that helpful. Most short references are to commits, so the useful information is typically going to be that the object in question _isn't_ a commit. So it's silly to spend a lot of CPU preemptively digging up the path; the user can do it themselves if they really need to. And of course it's somewhat ironic that we abbreviate the sha1s in the disambiguation hint. But full sha1s would cause annoying line wrapping for the commit lines, and presumably the user is going to just re-issue their command immediately with the corrected sha1. We also restrict the list to those that match any disambiguation hint. E.g.: $ git rev-parse b2e1:foo error: short SHA1 b2e1 is ambiguous hint: The candidates are: hint: b2e1196 tag v2.8.0-rc1 hint: b2e11d1 tree hint: b2e1632 commit 2007-11-14 - Merge branch 'bs/maint-commit-options' fatal: Invalid object name 'b2e1'. does not bother reporting the blobs, because they cannot work as a treeish. Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2016-09-26 20:00:36 +08:00
}
object-name: explicitly handle OBJ_BAD in show_ambiguous_object() Amend the "unknown type" handling in the code that displays the ambiguous object list to assert() that we're either going to get the "real" object types we can pass to type_name(), or a -1 (OBJ_BAD) return value from oid_object_info(). See [1] for the current output, and [1] for the commit that added the "unknown type" handling. We are never going to get an "unknown type" in the sense of custom types crafted with "hash-object --literally", since we're not using the OBJECT_INFO_ALLOW_UNKNOWN_TYPE flag. If we manage to otherwise unpack such an object without errors we'll die() in parse_loose_header_extended() called by sort_ambiguous() before we get to show_ambiguous_object(), as is asserted by the test added in the preceding commit. So saying "unknown type" here was always misleading, we really meant to say that we had a failure parsing the object at all, i.e. that we had repository corruption. If the problem is only that it's type is unknown we won't reach this code. So let's emit a generic "[bad object]" instead. As our tests added in the preceding commit show, we'll have emitted various "error" output already in those cases. We should do better in the truly "unknown type" cases, which we'd need to handle if we were passing down the OBJECT_INFO_ALLOW_UNKNOWN_TYPE flag. But let's leave that for some future improvement. In a subsequent commit I'll improve the output we do show, and not having to handle the "unknown type" (as in OBJECT_INFO_ALLOW_UNKNOWN_TYPE) simplifies that change. 1. 5cc044e0257 (get_short_oid: sort ambiguous objects by type, then SHA-1, 2018-05-10) 2. 1ffa26c461 (get_short_sha1: list ambiguous objects on error, 2016-09-26) Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2022-01-27 13:26:44 +08:00
out:
/*
* TRANSLATORS: This is line item of ambiguous object output
* from describe_ambiguous_object() above. For RTL languages
* you'll probably want to swap the "%s" and leading " " space
* around.
*/
strbuf_addf(advice, _(" %s\n"), sb->buf);
get_short_sha1: list ambiguous objects on error When the user gives us an ambiguous short sha1, we print an error and refuse to resolve it. In some cases, the next step is for them to feed us more characters (e.g., if they were retyping or cut-and-pasting from a full sha1). But in other cases, that might be all they have. For example, an old commit message may have used a 7-character hex that was unique at the time, but is now ambiguous. Git doesn't provide any information about the ambiguous objects it found, so it's hard for the user to find out which one they probably meant. This patch teaches get_short_sha1() to list the sha1s of the objects it found, along with a few bits of information that may help the user decide which one they meant. Here's what it looks like on git.git: $ git rev-parse b2e1 error: short SHA1 b2e1 is ambiguous hint: The candidates are: hint: b2e1196 tag v2.8.0-rc1 hint: b2e11d1 tree hint: b2e1632 commit 2007-11-14 - Merge branch 'bs/maint-commit-options' hint: b2e1759 blob hint: b2e18954 blob hint: b2e1895c blob fatal: ambiguous argument 'b2e1': unknown revision or path not in the working tree. Use '--' to separate paths from revisions, like this: 'git <command> [<revision>...] -- [<file>...]' We show the tagname for tags, and the date and subject for commits. For trees and blobs, in theory we could dig in the history to find the paths at which they were present. But that's very expensive (on the order of 30s for the kernel), and it's not likely to be all that helpful. Most short references are to commits, so the useful information is typically going to be that the object in question _isn't_ a commit. So it's silly to spend a lot of CPU preemptively digging up the path; the user can do it themselves if they really need to. And of course it's somewhat ironic that we abbreviate the sha1s in the disambiguation hint. But full sha1s would cause annoying line wrapping for the commit lines, and presumably the user is going to just re-issue their command immediately with the corrected sha1. We also restrict the list to those that match any disambiguation hint. E.g.: $ git rev-parse b2e1:foo error: short SHA1 b2e1 is ambiguous hint: The candidates are: hint: b2e1196 tag v2.8.0-rc1 hint: b2e11d1 tree hint: b2e1632 commit 2007-11-14 - Merge branch 'bs/maint-commit-options' fatal: Invalid object name 'b2e1'. does not bother reporting the blobs, because they cannot work as a treeish. Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2016-09-26 20:00:36 +08:00
strbuf_reset(sb);
get_short_sha1: list ambiguous objects on error When the user gives us an ambiguous short sha1, we print an error and refuse to resolve it. In some cases, the next step is for them to feed us more characters (e.g., if they were retyping or cut-and-pasting from a full sha1). But in other cases, that might be all they have. For example, an old commit message may have used a 7-character hex that was unique at the time, but is now ambiguous. Git doesn't provide any information about the ambiguous objects it found, so it's hard for the user to find out which one they probably meant. This patch teaches get_short_sha1() to list the sha1s of the objects it found, along with a few bits of information that may help the user decide which one they meant. Here's what it looks like on git.git: $ git rev-parse b2e1 error: short SHA1 b2e1 is ambiguous hint: The candidates are: hint: b2e1196 tag v2.8.0-rc1 hint: b2e11d1 tree hint: b2e1632 commit 2007-11-14 - Merge branch 'bs/maint-commit-options' hint: b2e1759 blob hint: b2e18954 blob hint: b2e1895c blob fatal: ambiguous argument 'b2e1': unknown revision or path not in the working tree. Use '--' to separate paths from revisions, like this: 'git <command> [<revision>...] -- [<file>...]' We show the tagname for tags, and the date and subject for commits. For trees and blobs, in theory we could dig in the history to find the paths at which they were present. But that's very expensive (on the order of 30s for the kernel), and it's not likely to be all that helpful. Most short references are to commits, so the useful information is typically going to be that the object in question _isn't_ a commit. So it's silly to spend a lot of CPU preemptively digging up the path; the user can do it themselves if they really need to. And of course it's somewhat ironic that we abbreviate the sha1s in the disambiguation hint. But full sha1s would cause annoying line wrapping for the commit lines, and presumably the user is going to just re-issue their command immediately with the corrected sha1. We also restrict the list to those that match any disambiguation hint. E.g.: $ git rev-parse b2e1:foo error: short SHA1 b2e1 is ambiguous hint: The candidates are: hint: b2e1196 tag v2.8.0-rc1 hint: b2e11d1 tree hint: b2e1632 commit 2007-11-14 - Merge branch 'bs/maint-commit-options' fatal: Invalid object name 'b2e1'. does not bother reporting the blobs, because they cannot work as a treeish. Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2016-09-26 20:00:36 +08:00
return 0;
}
static int collect_ambiguous(const struct object_id *oid, void *data)
{
oid_array_append(data, oid);
return 0;
}
static int repo_collect_ambiguous(struct repository *r UNUSED,
const struct object_id *oid,
void *data)
{
return collect_ambiguous(oid, data);
}
static int sort_ambiguous(const void *va, const void *vb, void *ctx)
get_short_oid: sort ambiguous objects by type, then SHA-1 Change the output emitted when an ambiguous object is encountered so that we show tags first, then commits, followed by trees, and finally blobs. Within each type we show objects in hashcmp() order. Before this change the objects were only ordered by hashcmp(). The reason for doing this is that the output looks better as a result, e.g. the v2.17.0 tag before this change on "git show e8f2" would display: hint: The candidates are: hint: e8f2093055 tree hint: e8f21caf94 commit 2013-06-24 - bash prompt: print unique detached HEAD abbreviated object name hint: e8f21d02f7 blob hint: e8f21d577c blob hint: e8f25a3a50 tree hint: e8f26250fa commit 2017-02-03 - Merge pull request #996 from jeffhostetler/jeffhostetler/register_rename_src hint: e8f2650052 tag v2.17.0 hint: e8f2867228 blob hint: e8f28d537c tree hint: e8f2a35526 blob hint: e8f2bc0c06 commit 2015-05-10 - Documentation: note behavior for multiple remote.url entries hint: e8f2cf6ec0 tree Now we'll instead show: hint: e8f2650052 tag v2.17.0 hint: e8f21caf94 commit 2013-06-24 - bash prompt: print unique detached HEAD abbreviated object name hint: e8f26250fa commit 2017-02-03 - Merge pull request #996 from jeffhostetler/jeffhostetler/register_rename_src hint: e8f2bc0c06 commit 2015-05-10 - Documentation: note behavior for multiple remote.url entries hint: e8f2093055 tree hint: e8f25a3a50 tree hint: e8f28d537c tree hint: e8f2cf6ec0 tree hint: e8f21d02f7 blob hint: e8f21d577c blob hint: e8f2867228 blob hint: e8f2a35526 blob Since we show the commit data in the output that's nicely aligned once we sort by object type. The decision to show tags before commits is pretty arbitrary. I don't want to order by object_type since there tags come last after blobs, which doesn't make sense if we want to show the most important things first. I could display them after commits, but it's much less likely that we'll display a tag, so if there is one it makes sense to show it prominently at the top. A note on the implementation: Derrick rightly pointed out[1] that we're bending over backwards here in get_short_oid() to first de-duplicate the list, and then emit it, but could simply do it in one step. The reason for that is that oid_array_for_each_unique() doesn't actually require that the array be sorted by oid_array_sort(), it just needs to be sorted in some order that guarantees that all objects with the same ID are adjacent to one another, which (barring a hash collision, which'll be someone else's problem) the sort_ambiguous() function does. I agree that would be simpler for this code, and had forgotten why I initially wrote it like this[2]. But on further reflection I think it's better to do more work here just so we're not underhandedly using the oid-array API where we lie about the list being sorted. That would break any subsequent use of oid_array_lookup() in subtle ways. I could get around that by hacking the API itself to support this use-case and documenting it, which I did as a WIP patch in [3], but I think it's too much code smell just for this one call site. It's simpler for the API to just introduce a oid_array_for_each() function to eagerly spew out the list without sorting or de-duplication, and then do the de-duplication and sorting in two passes. 1. https://public-inbox.org/git/20180501130318.58251-1-dstolee@microsoft.com/ 2. https://public-inbox.org/git/876047ze9v.fsf@evledraar.gmail.com/ 3. https://public-inbox.org/git/874ljrzctc.fsf@evledraar.gmail.com/ Helped-by: Derrick Stolee <dstolee@microsoft.com> Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2018-05-10 20:43:02 +08:00
{
struct repository *sort_ambiguous_repo = ctx;
const struct object_id *a = va, *b = vb;
int a_type = oid_object_info(sort_ambiguous_repo, a, NULL);
int b_type = oid_object_info(sort_ambiguous_repo, b, NULL);
get_short_oid: sort ambiguous objects by type, then SHA-1 Change the output emitted when an ambiguous object is encountered so that we show tags first, then commits, followed by trees, and finally blobs. Within each type we show objects in hashcmp() order. Before this change the objects were only ordered by hashcmp(). The reason for doing this is that the output looks better as a result, e.g. the v2.17.0 tag before this change on "git show e8f2" would display: hint: The candidates are: hint: e8f2093055 tree hint: e8f21caf94 commit 2013-06-24 - bash prompt: print unique detached HEAD abbreviated object name hint: e8f21d02f7 blob hint: e8f21d577c blob hint: e8f25a3a50 tree hint: e8f26250fa commit 2017-02-03 - Merge pull request #996 from jeffhostetler/jeffhostetler/register_rename_src hint: e8f2650052 tag v2.17.0 hint: e8f2867228 blob hint: e8f28d537c tree hint: e8f2a35526 blob hint: e8f2bc0c06 commit 2015-05-10 - Documentation: note behavior for multiple remote.url entries hint: e8f2cf6ec0 tree Now we'll instead show: hint: e8f2650052 tag v2.17.0 hint: e8f21caf94 commit 2013-06-24 - bash prompt: print unique detached HEAD abbreviated object name hint: e8f26250fa commit 2017-02-03 - Merge pull request #996 from jeffhostetler/jeffhostetler/register_rename_src hint: e8f2bc0c06 commit 2015-05-10 - Documentation: note behavior for multiple remote.url entries hint: e8f2093055 tree hint: e8f25a3a50 tree hint: e8f28d537c tree hint: e8f2cf6ec0 tree hint: e8f21d02f7 blob hint: e8f21d577c blob hint: e8f2867228 blob hint: e8f2a35526 blob Since we show the commit data in the output that's nicely aligned once we sort by object type. The decision to show tags before commits is pretty arbitrary. I don't want to order by object_type since there tags come last after blobs, which doesn't make sense if we want to show the most important things first. I could display them after commits, but it's much less likely that we'll display a tag, so if there is one it makes sense to show it prominently at the top. A note on the implementation: Derrick rightly pointed out[1] that we're bending over backwards here in get_short_oid() to first de-duplicate the list, and then emit it, but could simply do it in one step. The reason for that is that oid_array_for_each_unique() doesn't actually require that the array be sorted by oid_array_sort(), it just needs to be sorted in some order that guarantees that all objects with the same ID are adjacent to one another, which (barring a hash collision, which'll be someone else's problem) the sort_ambiguous() function does. I agree that would be simpler for this code, and had forgotten why I initially wrote it like this[2]. But on further reflection I think it's better to do more work here just so we're not underhandedly using the oid-array API where we lie about the list being sorted. That would break any subsequent use of oid_array_lookup() in subtle ways. I could get around that by hacking the API itself to support this use-case and documenting it, which I did as a WIP patch in [3], but I think it's too much code smell just for this one call site. It's simpler for the API to just introduce a oid_array_for_each() function to eagerly spew out the list without sorting or de-duplication, and then do the de-duplication and sorting in two passes. 1. https://public-inbox.org/git/20180501130318.58251-1-dstolee@microsoft.com/ 2. https://public-inbox.org/git/876047ze9v.fsf@evledraar.gmail.com/ 3. https://public-inbox.org/git/874ljrzctc.fsf@evledraar.gmail.com/ Helped-by: Derrick Stolee <dstolee@microsoft.com> Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2018-05-10 20:43:02 +08:00
int a_type_sort;
int b_type_sort;
/*
* Sorts by hash within the same object type, just as
* oid_array_for_each_unique() would do.
*/
if (a_type == b_type) {
if (a->algo == b->algo)
return oidcmp(a, b);
else
return a->algo > b->algo ? 1 : -1;
}
get_short_oid: sort ambiguous objects by type, then SHA-1 Change the output emitted when an ambiguous object is encountered so that we show tags first, then commits, followed by trees, and finally blobs. Within each type we show objects in hashcmp() order. Before this change the objects were only ordered by hashcmp(). The reason for doing this is that the output looks better as a result, e.g. the v2.17.0 tag before this change on "git show e8f2" would display: hint: The candidates are: hint: e8f2093055 tree hint: e8f21caf94 commit 2013-06-24 - bash prompt: print unique detached HEAD abbreviated object name hint: e8f21d02f7 blob hint: e8f21d577c blob hint: e8f25a3a50 tree hint: e8f26250fa commit 2017-02-03 - Merge pull request #996 from jeffhostetler/jeffhostetler/register_rename_src hint: e8f2650052 tag v2.17.0 hint: e8f2867228 blob hint: e8f28d537c tree hint: e8f2a35526 blob hint: e8f2bc0c06 commit 2015-05-10 - Documentation: note behavior for multiple remote.url entries hint: e8f2cf6ec0 tree Now we'll instead show: hint: e8f2650052 tag v2.17.0 hint: e8f21caf94 commit 2013-06-24 - bash prompt: print unique detached HEAD abbreviated object name hint: e8f26250fa commit 2017-02-03 - Merge pull request #996 from jeffhostetler/jeffhostetler/register_rename_src hint: e8f2bc0c06 commit 2015-05-10 - Documentation: note behavior for multiple remote.url entries hint: e8f2093055 tree hint: e8f25a3a50 tree hint: e8f28d537c tree hint: e8f2cf6ec0 tree hint: e8f21d02f7 blob hint: e8f21d577c blob hint: e8f2867228 blob hint: e8f2a35526 blob Since we show the commit data in the output that's nicely aligned once we sort by object type. The decision to show tags before commits is pretty arbitrary. I don't want to order by object_type since there tags come last after blobs, which doesn't make sense if we want to show the most important things first. I could display them after commits, but it's much less likely that we'll display a tag, so if there is one it makes sense to show it prominently at the top. A note on the implementation: Derrick rightly pointed out[1] that we're bending over backwards here in get_short_oid() to first de-duplicate the list, and then emit it, but could simply do it in one step. The reason for that is that oid_array_for_each_unique() doesn't actually require that the array be sorted by oid_array_sort(), it just needs to be sorted in some order that guarantees that all objects with the same ID are adjacent to one another, which (barring a hash collision, which'll be someone else's problem) the sort_ambiguous() function does. I agree that would be simpler for this code, and had forgotten why I initially wrote it like this[2]. But on further reflection I think it's better to do more work here just so we're not underhandedly using the oid-array API where we lie about the list being sorted. That would break any subsequent use of oid_array_lookup() in subtle ways. I could get around that by hacking the API itself to support this use-case and documenting it, which I did as a WIP patch in [3], but I think it's too much code smell just for this one call site. It's simpler for the API to just introduce a oid_array_for_each() function to eagerly spew out the list without sorting or de-duplication, and then do the de-duplication and sorting in two passes. 1. https://public-inbox.org/git/20180501130318.58251-1-dstolee@microsoft.com/ 2. https://public-inbox.org/git/876047ze9v.fsf@evledraar.gmail.com/ 3. https://public-inbox.org/git/874ljrzctc.fsf@evledraar.gmail.com/ Helped-by: Derrick Stolee <dstolee@microsoft.com> Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2018-05-10 20:43:02 +08:00
/*
* Between object types show tags, then commits, and finally
* trees and blobs.
*
* The object_type enum is commit, tree, blob, tag, but we
* want tag, commit, tree blob. Cleverly (perhaps too
* cleverly) do that with modulus, since the enum assigns 1 to
* commit, so tag becomes 0.
*/
a_type_sort = a_type % 4;
b_type_sort = b_type % 4;
return a_type_sort > b_type_sort ? 1 : -1;
}
static void sort_ambiguous_oid_array(struct repository *r, struct oid_array *a)
{
QSORT_S(a->oid, a->nr, sort_ambiguous, r);
}
static enum get_oid_result get_short_oid(struct repository *r,
const char *name, int len,
struct object_id *oid,
unsigned flags)
{
int status;
struct disambiguate_state ds;
int quietly = !!(flags & GET_OID_QUIETLY);
const struct git_hash_algo *algo = r->hash_algo;
if (flags & GET_OID_HASH_ANY)
algo = NULL;
if (init_object_disambiguation(r, name, len, algo, &ds) < 0)
return -1;
if (HAS_MULTI_BITS(flags & GET_OID_DISAMBIGUATORS))
BUG("multiple get_short_oid disambiguator flags");
if (flags & GET_OID_COMMIT)
ds.fn = disambiguate_commit_only;
else if (flags & GET_OID_COMMITTISH)
ds.fn = disambiguate_committish_only;
else if (flags & GET_OID_TREE)
ds.fn = disambiguate_tree_only;
else if (flags & GET_OID_TREEISH)
ds.fn = disambiguate_treeish_only;
else if (flags & GET_OID_BLOB)
ds.fn = disambiguate_blob_only;
else
ds.fn = default_disambiguate_hint;
find_short_object_filename(&ds);
find_short_packed_object(&ds);
sha1_name: convert get_sha1* to get_oid* Now that all the callers of get_sha1 directly or indirectly use struct object_id, rename the functions starting with get_sha1 to start with get_oid. Convert the internals in sha1_name.c to use struct object_id as well, and eliminate explicit length checks where possible. Convert a use of 40 in get_oid_basic to GIT_SHA1_HEXSZ. Outside of sha1_name.c and cache.h, this transition was made with the following semantic patch: @@ expression E1, E2; @@ - get_sha1(E1, E2.hash) + get_oid(E1, &E2) @@ expression E1, E2; @@ - get_sha1(E1, E2->hash) + get_oid(E1, E2) @@ expression E1, E2; @@ - get_sha1_committish(E1, E2.hash) + get_oid_committish(E1, &E2) @@ expression E1, E2; @@ - get_sha1_committish(E1, E2->hash) + get_oid_committish(E1, E2) @@ expression E1, E2; @@ - get_sha1_treeish(E1, E2.hash) + get_oid_treeish(E1, &E2) @@ expression E1, E2; @@ - get_sha1_treeish(E1, E2->hash) + get_oid_treeish(E1, E2) @@ expression E1, E2; @@ - get_sha1_commit(E1, E2.hash) + get_oid_commit(E1, &E2) @@ expression E1, E2; @@ - get_sha1_commit(E1, E2->hash) + get_oid_commit(E1, E2) @@ expression E1, E2; @@ - get_sha1_tree(E1, E2.hash) + get_oid_tree(E1, &E2) @@ expression E1, E2; @@ - get_sha1_tree(E1, E2->hash) + get_oid_tree(E1, E2) @@ expression E1, E2; @@ - get_sha1_blob(E1, E2.hash) + get_oid_blob(E1, &E2) @@ expression E1, E2; @@ - get_sha1_blob(E1, E2->hash) + get_oid_blob(E1, E2) @@ expression E1, E2, E3, E4; @@ - get_sha1_with_context(E1, E2, E3.hash, E4) + get_oid_with_context(E1, E2, &E3, E4) @@ expression E1, E2, E3, E4; @@ - get_sha1_with_context(E1, E2, E3->hash, E4) + get_oid_with_context(E1, E2, E3, E4) Signed-off-by: brian m. carlson <sandals@crustytoothpaste.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-07-14 07:49:28 +08:00
status = finish_object_disambiguation(&ds, oid);
/*
* If we didn't find it, do the usual reprepare() slow-path,
* since the object may have recently been added to the repository
* or migrated from loose to packed.
*/
if (status == MISSING_OBJECT) {
reprepare_packed_git(r);
find_short_object_filename(&ds);
find_short_packed_object(&ds);
status = finish_object_disambiguation(&ds, oid);
}
get_short_sha1: list ambiguous objects on error When the user gives us an ambiguous short sha1, we print an error and refuse to resolve it. In some cases, the next step is for them to feed us more characters (e.g., if they were retyping or cut-and-pasting from a full sha1). But in other cases, that might be all they have. For example, an old commit message may have used a 7-character hex that was unique at the time, but is now ambiguous. Git doesn't provide any information about the ambiguous objects it found, so it's hard for the user to find out which one they probably meant. This patch teaches get_short_sha1() to list the sha1s of the objects it found, along with a few bits of information that may help the user decide which one they meant. Here's what it looks like on git.git: $ git rev-parse b2e1 error: short SHA1 b2e1 is ambiguous hint: The candidates are: hint: b2e1196 tag v2.8.0-rc1 hint: b2e11d1 tree hint: b2e1632 commit 2007-11-14 - Merge branch 'bs/maint-commit-options' hint: b2e1759 blob hint: b2e18954 blob hint: b2e1895c blob fatal: ambiguous argument 'b2e1': unknown revision or path not in the working tree. Use '--' to separate paths from revisions, like this: 'git <command> [<revision>...] -- [<file>...]' We show the tagname for tags, and the date and subject for commits. For trees and blobs, in theory we could dig in the history to find the paths at which they were present. But that's very expensive (on the order of 30s for the kernel), and it's not likely to be all that helpful. Most short references are to commits, so the useful information is typically going to be that the object in question _isn't_ a commit. So it's silly to spend a lot of CPU preemptively digging up the path; the user can do it themselves if they really need to. And of course it's somewhat ironic that we abbreviate the sha1s in the disambiguation hint. But full sha1s would cause annoying line wrapping for the commit lines, and presumably the user is going to just re-issue their command immediately with the corrected sha1. We also restrict the list to those that match any disambiguation hint. E.g.: $ git rev-parse b2e1:foo error: short SHA1 b2e1 is ambiguous hint: The candidates are: hint: b2e1196 tag v2.8.0-rc1 hint: b2e11d1 tree hint: b2e1632 commit 2007-11-14 - Merge branch 'bs/maint-commit-options' fatal: Invalid object name 'b2e1'. does not bother reporting the blobs, because they cannot work as a treeish. Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2016-09-26 20:00:36 +08:00
if (!quietly && (status == SHORT_NAME_AMBIGUOUS)) {
get_short_oid: sort ambiguous objects by type, then SHA-1 Change the output emitted when an ambiguous object is encountered so that we show tags first, then commits, followed by trees, and finally blobs. Within each type we show objects in hashcmp() order. Before this change the objects were only ordered by hashcmp(). The reason for doing this is that the output looks better as a result, e.g. the v2.17.0 tag before this change on "git show e8f2" would display: hint: The candidates are: hint: e8f2093055 tree hint: e8f21caf94 commit 2013-06-24 - bash prompt: print unique detached HEAD abbreviated object name hint: e8f21d02f7 blob hint: e8f21d577c blob hint: e8f25a3a50 tree hint: e8f26250fa commit 2017-02-03 - Merge pull request #996 from jeffhostetler/jeffhostetler/register_rename_src hint: e8f2650052 tag v2.17.0 hint: e8f2867228 blob hint: e8f28d537c tree hint: e8f2a35526 blob hint: e8f2bc0c06 commit 2015-05-10 - Documentation: note behavior for multiple remote.url entries hint: e8f2cf6ec0 tree Now we'll instead show: hint: e8f2650052 tag v2.17.0 hint: e8f21caf94 commit 2013-06-24 - bash prompt: print unique detached HEAD abbreviated object name hint: e8f26250fa commit 2017-02-03 - Merge pull request #996 from jeffhostetler/jeffhostetler/register_rename_src hint: e8f2bc0c06 commit 2015-05-10 - Documentation: note behavior for multiple remote.url entries hint: e8f2093055 tree hint: e8f25a3a50 tree hint: e8f28d537c tree hint: e8f2cf6ec0 tree hint: e8f21d02f7 blob hint: e8f21d577c blob hint: e8f2867228 blob hint: e8f2a35526 blob Since we show the commit data in the output that's nicely aligned once we sort by object type. The decision to show tags before commits is pretty arbitrary. I don't want to order by object_type since there tags come last after blobs, which doesn't make sense if we want to show the most important things first. I could display them after commits, but it's much less likely that we'll display a tag, so if there is one it makes sense to show it prominently at the top. A note on the implementation: Derrick rightly pointed out[1] that we're bending over backwards here in get_short_oid() to first de-duplicate the list, and then emit it, but could simply do it in one step. The reason for that is that oid_array_for_each_unique() doesn't actually require that the array be sorted by oid_array_sort(), it just needs to be sorted in some order that guarantees that all objects with the same ID are adjacent to one another, which (barring a hash collision, which'll be someone else's problem) the sort_ambiguous() function does. I agree that would be simpler for this code, and had forgotten why I initially wrote it like this[2]. But on further reflection I think it's better to do more work here just so we're not underhandedly using the oid-array API where we lie about the list being sorted. That would break any subsequent use of oid_array_lookup() in subtle ways. I could get around that by hacking the API itself to support this use-case and documenting it, which I did as a WIP patch in [3], but I think it's too much code smell just for this one call site. It's simpler for the API to just introduce a oid_array_for_each() function to eagerly spew out the list without sorting or de-duplication, and then do the de-duplication and sorting in two passes. 1. https://public-inbox.org/git/20180501130318.58251-1-dstolee@microsoft.com/ 2. https://public-inbox.org/git/876047ze9v.fsf@evledraar.gmail.com/ 3. https://public-inbox.org/git/874ljrzctc.fsf@evledraar.gmail.com/ Helped-by: Derrick Stolee <dstolee@microsoft.com> Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2018-05-10 20:43:02 +08:00
struct oid_array collect = OID_ARRAY_INIT;
struct ambiguous_output out = {
.ds = &ds,
.sb = STRBUF_INIT,
.advice = STRBUF_INIT,
};
get_short_oid: sort ambiguous objects by type, then SHA-1 Change the output emitted when an ambiguous object is encountered so that we show tags first, then commits, followed by trees, and finally blobs. Within each type we show objects in hashcmp() order. Before this change the objects were only ordered by hashcmp(). The reason for doing this is that the output looks better as a result, e.g. the v2.17.0 tag before this change on "git show e8f2" would display: hint: The candidates are: hint: e8f2093055 tree hint: e8f21caf94 commit 2013-06-24 - bash prompt: print unique detached HEAD abbreviated object name hint: e8f21d02f7 blob hint: e8f21d577c blob hint: e8f25a3a50 tree hint: e8f26250fa commit 2017-02-03 - Merge pull request #996 from jeffhostetler/jeffhostetler/register_rename_src hint: e8f2650052 tag v2.17.0 hint: e8f2867228 blob hint: e8f28d537c tree hint: e8f2a35526 blob hint: e8f2bc0c06 commit 2015-05-10 - Documentation: note behavior for multiple remote.url entries hint: e8f2cf6ec0 tree Now we'll instead show: hint: e8f2650052 tag v2.17.0 hint: e8f21caf94 commit 2013-06-24 - bash prompt: print unique detached HEAD abbreviated object name hint: e8f26250fa commit 2017-02-03 - Merge pull request #996 from jeffhostetler/jeffhostetler/register_rename_src hint: e8f2bc0c06 commit 2015-05-10 - Documentation: note behavior for multiple remote.url entries hint: e8f2093055 tree hint: e8f25a3a50 tree hint: e8f28d537c tree hint: e8f2cf6ec0 tree hint: e8f21d02f7 blob hint: e8f21d577c blob hint: e8f2867228 blob hint: e8f2a35526 blob Since we show the commit data in the output that's nicely aligned once we sort by object type. The decision to show tags before commits is pretty arbitrary. I don't want to order by object_type since there tags come last after blobs, which doesn't make sense if we want to show the most important things first. I could display them after commits, but it's much less likely that we'll display a tag, so if there is one it makes sense to show it prominently at the top. A note on the implementation: Derrick rightly pointed out[1] that we're bending over backwards here in get_short_oid() to first de-duplicate the list, and then emit it, but could simply do it in one step. The reason for that is that oid_array_for_each_unique() doesn't actually require that the array be sorted by oid_array_sort(), it just needs to be sorted in some order that guarantees that all objects with the same ID are adjacent to one another, which (barring a hash collision, which'll be someone else's problem) the sort_ambiguous() function does. I agree that would be simpler for this code, and had forgotten why I initially wrote it like this[2]. But on further reflection I think it's better to do more work here just so we're not underhandedly using the oid-array API where we lie about the list being sorted. That would break any subsequent use of oid_array_lookup() in subtle ways. I could get around that by hacking the API itself to support this use-case and documenting it, which I did as a WIP patch in [3], but I think it's too much code smell just for this one call site. It's simpler for the API to just introduce a oid_array_for_each() function to eagerly spew out the list without sorting or de-duplication, and then do the de-duplication and sorting in two passes. 1. https://public-inbox.org/git/20180501130318.58251-1-dstolee@microsoft.com/ 2. https://public-inbox.org/git/876047ze9v.fsf@evledraar.gmail.com/ 3. https://public-inbox.org/git/874ljrzctc.fsf@evledraar.gmail.com/ Helped-by: Derrick Stolee <dstolee@microsoft.com> Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2018-05-10 20:43:02 +08:00
error(_("short object ID %s is ambiguous"), ds.hex_pfx);
get_short_sha1: list ambiguous objects on error When the user gives us an ambiguous short sha1, we print an error and refuse to resolve it. In some cases, the next step is for them to feed us more characters (e.g., if they were retyping or cut-and-pasting from a full sha1). But in other cases, that might be all they have. For example, an old commit message may have used a 7-character hex that was unique at the time, but is now ambiguous. Git doesn't provide any information about the ambiguous objects it found, so it's hard for the user to find out which one they probably meant. This patch teaches get_short_sha1() to list the sha1s of the objects it found, along with a few bits of information that may help the user decide which one they meant. Here's what it looks like on git.git: $ git rev-parse b2e1 error: short SHA1 b2e1 is ambiguous hint: The candidates are: hint: b2e1196 tag v2.8.0-rc1 hint: b2e11d1 tree hint: b2e1632 commit 2007-11-14 - Merge branch 'bs/maint-commit-options' hint: b2e1759 blob hint: b2e18954 blob hint: b2e1895c blob fatal: ambiguous argument 'b2e1': unknown revision or path not in the working tree. Use '--' to separate paths from revisions, like this: 'git <command> [<revision>...] -- [<file>...]' We show the tagname for tags, and the date and subject for commits. For trees and blobs, in theory we could dig in the history to find the paths at which they were present. But that's very expensive (on the order of 30s for the kernel), and it's not likely to be all that helpful. Most short references are to commits, so the useful information is typically going to be that the object in question _isn't_ a commit. So it's silly to spend a lot of CPU preemptively digging up the path; the user can do it themselves if they really need to. And of course it's somewhat ironic that we abbreviate the sha1s in the disambiguation hint. But full sha1s would cause annoying line wrapping for the commit lines, and presumably the user is going to just re-issue their command immediately with the corrected sha1. We also restrict the list to those that match any disambiguation hint. E.g.: $ git rev-parse b2e1:foo error: short SHA1 b2e1 is ambiguous hint: The candidates are: hint: b2e1196 tag v2.8.0-rc1 hint: b2e11d1 tree hint: b2e1632 commit 2007-11-14 - Merge branch 'bs/maint-commit-options' fatal: Invalid object name 'b2e1'. does not bother reporting the blobs, because they cannot work as a treeish. Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2016-09-26 20:00:36 +08:00
/*
* We may still have ambiguity if we simply saw a series of
* candidates that did not satisfy our hint function. In
* that case, we still want to show them, so disable the hint
* function entirely.
*/
if (!ds.ambiguous)
ds.fn = NULL;
repo_for_each_abbrev(r, ds.hex_pfx, algo, collect_ambiguous, &collect);
sort_ambiguous_oid_array(r, &collect);
get_short_oid: sort ambiguous objects by type, then SHA-1 Change the output emitted when an ambiguous object is encountered so that we show tags first, then commits, followed by trees, and finally blobs. Within each type we show objects in hashcmp() order. Before this change the objects were only ordered by hashcmp(). The reason for doing this is that the output looks better as a result, e.g. the v2.17.0 tag before this change on "git show e8f2" would display: hint: The candidates are: hint: e8f2093055 tree hint: e8f21caf94 commit 2013-06-24 - bash prompt: print unique detached HEAD abbreviated object name hint: e8f21d02f7 blob hint: e8f21d577c blob hint: e8f25a3a50 tree hint: e8f26250fa commit 2017-02-03 - Merge pull request #996 from jeffhostetler/jeffhostetler/register_rename_src hint: e8f2650052 tag v2.17.0 hint: e8f2867228 blob hint: e8f28d537c tree hint: e8f2a35526 blob hint: e8f2bc0c06 commit 2015-05-10 - Documentation: note behavior for multiple remote.url entries hint: e8f2cf6ec0 tree Now we'll instead show: hint: e8f2650052 tag v2.17.0 hint: e8f21caf94 commit 2013-06-24 - bash prompt: print unique detached HEAD abbreviated object name hint: e8f26250fa commit 2017-02-03 - Merge pull request #996 from jeffhostetler/jeffhostetler/register_rename_src hint: e8f2bc0c06 commit 2015-05-10 - Documentation: note behavior for multiple remote.url entries hint: e8f2093055 tree hint: e8f25a3a50 tree hint: e8f28d537c tree hint: e8f2cf6ec0 tree hint: e8f21d02f7 blob hint: e8f21d577c blob hint: e8f2867228 blob hint: e8f2a35526 blob Since we show the commit data in the output that's nicely aligned once we sort by object type. The decision to show tags before commits is pretty arbitrary. I don't want to order by object_type since there tags come last after blobs, which doesn't make sense if we want to show the most important things first. I could display them after commits, but it's much less likely that we'll display a tag, so if there is one it makes sense to show it prominently at the top. A note on the implementation: Derrick rightly pointed out[1] that we're bending over backwards here in get_short_oid() to first de-duplicate the list, and then emit it, but could simply do it in one step. The reason for that is that oid_array_for_each_unique() doesn't actually require that the array be sorted by oid_array_sort(), it just needs to be sorted in some order that guarantees that all objects with the same ID are adjacent to one another, which (barring a hash collision, which'll be someone else's problem) the sort_ambiguous() function does. I agree that would be simpler for this code, and had forgotten why I initially wrote it like this[2]. But on further reflection I think it's better to do more work here just so we're not underhandedly using the oid-array API where we lie about the list being sorted. That would break any subsequent use of oid_array_lookup() in subtle ways. I could get around that by hacking the API itself to support this use-case and documenting it, which I did as a WIP patch in [3], but I think it's too much code smell just for this one call site. It's simpler for the API to just introduce a oid_array_for_each() function to eagerly spew out the list without sorting or de-duplication, and then do the de-duplication and sorting in two passes. 1. https://public-inbox.org/git/20180501130318.58251-1-dstolee@microsoft.com/ 2. https://public-inbox.org/git/876047ze9v.fsf@evledraar.gmail.com/ 3. https://public-inbox.org/git/874ljrzctc.fsf@evledraar.gmail.com/ Helped-by: Derrick Stolee <dstolee@microsoft.com> Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2018-05-10 20:43:02 +08:00
if (oid_array_for_each(&collect, show_ambiguous_object, &out))
get_short_oid: sort ambiguous objects by type, then SHA-1 Change the output emitted when an ambiguous object is encountered so that we show tags first, then commits, followed by trees, and finally blobs. Within each type we show objects in hashcmp() order. Before this change the objects were only ordered by hashcmp(). The reason for doing this is that the output looks better as a result, e.g. the v2.17.0 tag before this change on "git show e8f2" would display: hint: The candidates are: hint: e8f2093055 tree hint: e8f21caf94 commit 2013-06-24 - bash prompt: print unique detached HEAD abbreviated object name hint: e8f21d02f7 blob hint: e8f21d577c blob hint: e8f25a3a50 tree hint: e8f26250fa commit 2017-02-03 - Merge pull request #996 from jeffhostetler/jeffhostetler/register_rename_src hint: e8f2650052 tag v2.17.0 hint: e8f2867228 blob hint: e8f28d537c tree hint: e8f2a35526 blob hint: e8f2bc0c06 commit 2015-05-10 - Documentation: note behavior for multiple remote.url entries hint: e8f2cf6ec0 tree Now we'll instead show: hint: e8f2650052 tag v2.17.0 hint: e8f21caf94 commit 2013-06-24 - bash prompt: print unique detached HEAD abbreviated object name hint: e8f26250fa commit 2017-02-03 - Merge pull request #996 from jeffhostetler/jeffhostetler/register_rename_src hint: e8f2bc0c06 commit 2015-05-10 - Documentation: note behavior for multiple remote.url entries hint: e8f2093055 tree hint: e8f25a3a50 tree hint: e8f28d537c tree hint: e8f2cf6ec0 tree hint: e8f21d02f7 blob hint: e8f21d577c blob hint: e8f2867228 blob hint: e8f2a35526 blob Since we show the commit data in the output that's nicely aligned once we sort by object type. The decision to show tags before commits is pretty arbitrary. I don't want to order by object_type since there tags come last after blobs, which doesn't make sense if we want to show the most important things first. I could display them after commits, but it's much less likely that we'll display a tag, so if there is one it makes sense to show it prominently at the top. A note on the implementation: Derrick rightly pointed out[1] that we're bending over backwards here in get_short_oid() to first de-duplicate the list, and then emit it, but could simply do it in one step. The reason for that is that oid_array_for_each_unique() doesn't actually require that the array be sorted by oid_array_sort(), it just needs to be sorted in some order that guarantees that all objects with the same ID are adjacent to one another, which (barring a hash collision, which'll be someone else's problem) the sort_ambiguous() function does. I agree that would be simpler for this code, and had forgotten why I initially wrote it like this[2]. But on further reflection I think it's better to do more work here just so we're not underhandedly using the oid-array API where we lie about the list being sorted. That would break any subsequent use of oid_array_lookup() in subtle ways. I could get around that by hacking the API itself to support this use-case and documenting it, which I did as a WIP patch in [3], but I think it's too much code smell just for this one call site. It's simpler for the API to just introduce a oid_array_for_each() function to eagerly spew out the list without sorting or de-duplication, and then do the de-duplication and sorting in two passes. 1. https://public-inbox.org/git/20180501130318.58251-1-dstolee@microsoft.com/ 2. https://public-inbox.org/git/876047ze9v.fsf@evledraar.gmail.com/ 3. https://public-inbox.org/git/874ljrzctc.fsf@evledraar.gmail.com/ Helped-by: Derrick Stolee <dstolee@microsoft.com> Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2018-05-10 20:43:02 +08:00
BUG("show_ambiguous_object shouldn't return non-zero");
/*
* TRANSLATORS: The argument is the list of ambiguous
* objects composed in show_ambiguous_object(). See
* its "TRANSLATORS" comments for details.
*/
advise(_("The candidates are:\n%s"), out.advice.buf);
get_short_oid: sort ambiguous objects by type, then SHA-1 Change the output emitted when an ambiguous object is encountered so that we show tags first, then commits, followed by trees, and finally blobs. Within each type we show objects in hashcmp() order. Before this change the objects were only ordered by hashcmp(). The reason for doing this is that the output looks better as a result, e.g. the v2.17.0 tag before this change on "git show e8f2" would display: hint: The candidates are: hint: e8f2093055 tree hint: e8f21caf94 commit 2013-06-24 - bash prompt: print unique detached HEAD abbreviated object name hint: e8f21d02f7 blob hint: e8f21d577c blob hint: e8f25a3a50 tree hint: e8f26250fa commit 2017-02-03 - Merge pull request #996 from jeffhostetler/jeffhostetler/register_rename_src hint: e8f2650052 tag v2.17.0 hint: e8f2867228 blob hint: e8f28d537c tree hint: e8f2a35526 blob hint: e8f2bc0c06 commit 2015-05-10 - Documentation: note behavior for multiple remote.url entries hint: e8f2cf6ec0 tree Now we'll instead show: hint: e8f2650052 tag v2.17.0 hint: e8f21caf94 commit 2013-06-24 - bash prompt: print unique detached HEAD abbreviated object name hint: e8f26250fa commit 2017-02-03 - Merge pull request #996 from jeffhostetler/jeffhostetler/register_rename_src hint: e8f2bc0c06 commit 2015-05-10 - Documentation: note behavior for multiple remote.url entries hint: e8f2093055 tree hint: e8f25a3a50 tree hint: e8f28d537c tree hint: e8f2cf6ec0 tree hint: e8f21d02f7 blob hint: e8f21d577c blob hint: e8f2867228 blob hint: e8f2a35526 blob Since we show the commit data in the output that's nicely aligned once we sort by object type. The decision to show tags before commits is pretty arbitrary. I don't want to order by object_type since there tags come last after blobs, which doesn't make sense if we want to show the most important things first. I could display them after commits, but it's much less likely that we'll display a tag, so if there is one it makes sense to show it prominently at the top. A note on the implementation: Derrick rightly pointed out[1] that we're bending over backwards here in get_short_oid() to first de-duplicate the list, and then emit it, but could simply do it in one step. The reason for that is that oid_array_for_each_unique() doesn't actually require that the array be sorted by oid_array_sort(), it just needs to be sorted in some order that guarantees that all objects with the same ID are adjacent to one another, which (barring a hash collision, which'll be someone else's problem) the sort_ambiguous() function does. I agree that would be simpler for this code, and had forgotten why I initially wrote it like this[2]. But on further reflection I think it's better to do more work here just so we're not underhandedly using the oid-array API where we lie about the list being sorted. That would break any subsequent use of oid_array_lookup() in subtle ways. I could get around that by hacking the API itself to support this use-case and documenting it, which I did as a WIP patch in [3], but I think it's too much code smell just for this one call site. It's simpler for the API to just introduce a oid_array_for_each() function to eagerly spew out the list without sorting or de-duplication, and then do the de-duplication and sorting in two passes. 1. https://public-inbox.org/git/20180501130318.58251-1-dstolee@microsoft.com/ 2. https://public-inbox.org/git/876047ze9v.fsf@evledraar.gmail.com/ 3. https://public-inbox.org/git/874ljrzctc.fsf@evledraar.gmail.com/ Helped-by: Derrick Stolee <dstolee@microsoft.com> Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2018-05-10 20:43:02 +08:00
oid_array_clear(&collect);
strbuf_release(&out.advice);
strbuf_release(&out.sb);
get_short_sha1: list ambiguous objects on error When the user gives us an ambiguous short sha1, we print an error and refuse to resolve it. In some cases, the next step is for them to feed us more characters (e.g., if they were retyping or cut-and-pasting from a full sha1). But in other cases, that might be all they have. For example, an old commit message may have used a 7-character hex that was unique at the time, but is now ambiguous. Git doesn't provide any information about the ambiguous objects it found, so it's hard for the user to find out which one they probably meant. This patch teaches get_short_sha1() to list the sha1s of the objects it found, along with a few bits of information that may help the user decide which one they meant. Here's what it looks like on git.git: $ git rev-parse b2e1 error: short SHA1 b2e1 is ambiguous hint: The candidates are: hint: b2e1196 tag v2.8.0-rc1 hint: b2e11d1 tree hint: b2e1632 commit 2007-11-14 - Merge branch 'bs/maint-commit-options' hint: b2e1759 blob hint: b2e18954 blob hint: b2e1895c blob fatal: ambiguous argument 'b2e1': unknown revision or path not in the working tree. Use '--' to separate paths from revisions, like this: 'git <command> [<revision>...] -- [<file>...]' We show the tagname for tags, and the date and subject for commits. For trees and blobs, in theory we could dig in the history to find the paths at which they were present. But that's very expensive (on the order of 30s for the kernel), and it's not likely to be all that helpful. Most short references are to commits, so the useful information is typically going to be that the object in question _isn't_ a commit. So it's silly to spend a lot of CPU preemptively digging up the path; the user can do it themselves if they really need to. And of course it's somewhat ironic that we abbreviate the sha1s in the disambiguation hint. But full sha1s would cause annoying line wrapping for the commit lines, and presumably the user is going to just re-issue their command immediately with the corrected sha1. We also restrict the list to those that match any disambiguation hint. E.g.: $ git rev-parse b2e1:foo error: short SHA1 b2e1 is ambiguous hint: The candidates are: hint: b2e1196 tag v2.8.0-rc1 hint: b2e11d1 tree hint: b2e1632 commit 2007-11-14 - Merge branch 'bs/maint-commit-options' fatal: Invalid object name 'b2e1'. does not bother reporting the blobs, because they cannot work as a treeish. Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2016-09-26 20:00:36 +08:00
}
return status;
}
int repo_for_each_abbrev(struct repository *r, const char *prefix,
const struct git_hash_algo *algo,
each_abbrev_fn fn, void *cb_data)
{
struct oid_array collect = OID_ARRAY_INIT;
struct disambiguate_state ds;
int ret;
if (init_object_disambiguation(r, prefix, strlen(prefix), algo, &ds) < 0)
return -1;
ds.always_call_fn = 1;
ds.fn = repo_collect_ambiguous;
ds.cb_data = &collect;
find_short_object_filename(&ds);
find_short_packed_object(&ds);
ret = oid_array_for_each_unique(&collect, fn, cb_data);
oid_array_clear(&collect);
return ret;
}
/*
* Return the slot of the most-significant bit set in "val". There are various
* ways to do this quickly with fls() or __builtin_clzl(), but speed is
* probably not a big deal here.
*/
static unsigned msb(unsigned long val)
{
unsigned r = 0;
while (val >>= 1)
r++;
return r;
}
struct min_abbrev_data {
unsigned int init_len;
unsigned int cur_len;
char *hex;
struct repository *repo;
const struct object_id *oid;
};
static inline char get_hex_char_from_oid(const struct object_id *oid,
unsigned int pos)
{
static const char hex[] = "0123456789abcdef";
if ((pos & 1) == 0)
return hex[oid->hash[pos >> 1] >> 4];
else
return hex[oid->hash[pos >> 1] & 0xf];
}
static int extend_abbrev_len(const struct object_id *oid, void *cb_data)
{
struct min_abbrev_data *mad = cb_data;
unsigned int i = mad->init_len;
while (mad->hex[i] && mad->hex[i] == get_hex_char_from_oid(oid, i))
i++;
if (i < GIT_MAX_RAWSZ && i >= mad->cur_len)
mad->cur_len = i + 1;
return 0;
}
static int repo_extend_abbrev_len(struct repository *r UNUSED,
const struct object_id *oid,
void *cb_data)
{
return extend_abbrev_len(oid, cb_data);
}
static void find_abbrev_len_for_midx(struct multi_pack_index *m,
struct min_abbrev_data *mad)
{
for (; m; m = m->base_midx) {
int match = 0;
uint32_t num, first = 0;
struct object_id oid;
const struct object_id *mad_oid;
if (!m->num_objects)
continue;
num = m->num_objects + m->num_objects_in_base;
mad_oid = mad->oid;
match = bsearch_one_midx(mad_oid, m, &first);
/*
* first is now the position in the packfile where we
* would insert mad->hash if it does not exist (or the
* position of mad->hash if it does exist). Hence, we
* consider a maximum of two objects nearby for the
* abbreviation length.
*/
mad->init_len = 0;
if (!match) {
if (nth_midxed_object_oid(&oid, m, first))
extend_abbrev_len(&oid, mad);
} else if (first < num - 1) {
if (nth_midxed_object_oid(&oid, m, first + 1))
extend_abbrev_len(&oid, mad);
}
if (first > 0) {
if (nth_midxed_object_oid(&oid, m, first - 1))
extend_abbrev_len(&oid, mad);
}
mad->init_len = mad->cur_len;
}
}
static void find_abbrev_len_for_pack(struct packed_git *p,
struct min_abbrev_data *mad)
{
int match = 0;
uint32_t num, first = 0;
struct object_id oid;
const struct object_id *mad_oid;
midx: add packs to packed_git linked list The multi-pack-index allows searching for objects across multiple packs using one object list. The original design gains many of these performance benefits by keeping the packs in the multi-pack-index out of the packed_git list. Unfortunately, this has one major drawback. If the multi-pack-index covers thousands of packs, and a command loads many of those packs, then we can hit the limit for open file descriptors. The close_one_pack() method is used to limit this resource, but it only looks at the packed_git list, and uses an LRU cache to prevent thrashing. Instead of complicating this close_one_pack() logic to include direct references to the multi-pack-index, simply add the packs opened by the multi-pack-index to the packed_git list. This immediately solves the file-descriptor limit problem, but requires some extra steps to avoid performance issues or other problems: 1. Create a multi_pack_index bit in the packed_git struct that is one if and only if the pack was loaded from a multi-pack-index. 2. Skip packs with the multi_pack_index bit when doing object lookups and abbreviations. These algorithms already check the multi-pack-index before the packed_git struct. This has a very small performance hit, as we need to walk more packed_git structs. This is acceptable, since these operations run binary search on the other packs, so this walk-and-ignore logic is very fast by comparison. 3. When closing a multi-pack-index file, do not close its packs, as those packs will be closed using close_all_packs(). In some cases, such as 'git repack', we run 'close_midx()' without also closing the packs, so we need to un-set the multi_pack_index bit in those packs. This is necessary, and caught by running t6501-freshen-objects.sh with GIT_TEST_MULTI_PACK_INDEX=1. To manually test this change, I inserted trace2 logging into close_pack_fd() and set pack_max_fds to 10, then ran 'git rev-list --all --objects' on a copy of the Git repo with 300+ pack-files and a multi-pack-index. The logs verified the packs are closed as we read them beyond the file descriptor limit. Signed-off-by: Derrick Stolee <dstolee@microsoft.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2019-04-30 00:18:56 +08:00
if (p->multi_pack_index)
return;
if (open_pack_index(p) || !p->num_objects)
return;
num = p->num_objects;
mad_oid = mad->oid;
match = bsearch_pack(mad_oid, p, &first);
/*
* first is now the position in the packfile where we would insert
* mad->hash if it does not exist (or the position of mad->hash if
* it does exist). Hence, we consider a maximum of two objects
* nearby for the abbreviation length.
*/
mad->init_len = 0;
if (!match) {
if (!nth_packed_object_id(&oid, p, first))
extend_abbrev_len(&oid, mad);
} else if (first < num - 1) {
if (!nth_packed_object_id(&oid, p, first + 1))
extend_abbrev_len(&oid, mad);
}
if (first > 0) {
if (!nth_packed_object_id(&oid, p, first - 1))
extend_abbrev_len(&oid, mad);
}
mad->init_len = mad->cur_len;
}
static void find_abbrev_len_packed(struct min_abbrev_data *mad)
{
struct multi_pack_index *m;
struct packed_git *p;
for (m = get_multi_pack_index(mad->repo); m; m = m->next)
find_abbrev_len_for_midx(m, mad);
for (p = get_packed_git(mad->repo); p; p = p->next)
find_abbrev_len_for_pack(p, mad);
}
void strbuf_repo_add_unique_abbrev(struct strbuf *sb, struct repository *repo,
const struct object_id *oid, int abbrev_len)
{
int r;
strbuf_grow(sb, GIT_MAX_HEXSZ + 1);
r = repo_find_unique_abbrev_r(repo, sb->buf + sb->len, oid, abbrev_len);
strbuf_setlen(sb, sb->len + r);
}
void strbuf_add_unique_abbrev(struct strbuf *sb, const struct object_id *oid,
int abbrev_len)
{
strbuf_repo_add_unique_abbrev(sb, the_repository, oid, abbrev_len);
}
int repo_find_unique_abbrev_r(struct repository *r, char *hex,
const struct object_id *oid, int len)
{
const struct git_hash_algo *algo =
oid->algo ? &hash_algos[oid->algo] : r->hash_algo;
struct disambiguate_state ds;
struct min_abbrev_data mad;
struct object_id oid_ret;
const unsigned hexsz = algo->hexsz;
if (len < 0) {
unsigned long count = repo_approximate_object_count(r);
/*
* Add one because the MSB only tells us the highest bit set,
* not including the value of all the _other_ bits (so "15"
* is only one off of 2^4, but the MSB is the 3rd bit.
*/
len = msb(count) + 1;
/*
* We now know we have on the order of 2^len objects, which
* expects a collision at 2^(len/2). But we also care about hex
* chars, not bits, and there are 4 bits per hex. So all
* together we need to divide by 2 and round up.
*/
len = DIV_ROUND_UP(len, 2);
/*
* For very small repos, we stick with our regular fallback.
*/
if (len < FALLBACK_DEFAULT_ABBREV)
len = FALLBACK_DEFAULT_ABBREV;
}
oid_to_hex_r(hex, oid);
object-name: don't try to abbreviate to lengths greater than hexsz When given a length that equals the current hash algorithm's hex size, then `repo_find_unique_abbrev_r()` exits early without trying to find an abbreviation. This is only sensible because there is nothing to abbreviate in the first place, so searching through objects to find a unique prefix would be a waste of compute. What we don't handle though is the case where the user passes a length greater than the hash length. This is fine in practice as we still compute the correct result. But at the very least, this is a waste of resources as we try to abbreviate a value that cannot be abbreviated, which causes us to hit the object database. Start to explicitly handle values larger than hexsz to avoid this performance penalty, which leads to a measureable speedup. The following benchmark has been executed in linux.git: Benchmark 1: git -c core.abbrev=9000 log --abbrev-commit (revision = HEAD~) Time (mean ± σ): 12.812 s ± 0.040 s [User: 12.225 s, System: 0.554 s] Range (min … max): 12.723 s … 12.857 s 10 runs Benchmark 2: git -c core.abbrev=9000 log --abbrev-commit (revision = HEAD) Time (mean ± σ): 11.095 s ± 0.029 s [User: 10.546 s, System: 0.521 s] Range (min … max): 11.037 s … 11.122 s 10 runs Summary git -c core.abbrev=9000 log --abbrev-commit HEAD (revision = HEAD) ran 1.15 ± 0.00 times faster than git -c core.abbrev=9000 log --abbrev-commit HEAD (revision = HEAD~) Signed-off-by: Patrick Steinhardt <ps@pks.im> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2024-06-12 16:03:36 +08:00
if (len >= hexsz || !len)
return hexsz;
mad.repo = r;
mad.init_len = len;
mad.cur_len = len;
mad.hex = hex;
mad.oid = oid;
find_abbrev_len_packed(&mad);
if (init_object_disambiguation(r, hex, mad.cur_len, algo, &ds) < 0)
return -1;
ds.fn = repo_extend_abbrev_len;
ds.always_call_fn = 1;
ds.cb_data = (void *)&mad;
find_short_object_filename(&ds);
(void)finish_object_disambiguation(&ds, &oid_ret);
hex[mad.cur_len] = 0;
return mad.cur_len;
}
const char *repo_find_unique_abbrev(struct repository *r,
const struct object_id *oid,
int len)
{
static int bufno;
static char hexbuffer[4][GIT_MAX_HEXSZ + 1];
char *hex = hexbuffer[bufno];
bufno = (bufno + 1) % ARRAY_SIZE(hexbuffer);
repo_find_unique_abbrev_r(r, hex, oid, len);
return hex;
}
static int ambiguous_path(const char *path, int len)
2005-10-29 03:41:49 +08:00
{
int slash = 1;
int cnt;
2005-10-29 03:41:49 +08:00
for (cnt = 0; cnt < len; cnt++) {
2005-10-29 03:41:49 +08:00
switch (*path++) {
case '\0':
break;
case '/':
if (slash)
break;
slash = 1;
continue;
case '.':
continue;
default:
slash = 0;
continue;
}
break;
2005-10-29 03:41:49 +08:00
}
return slash;
2005-10-29 03:41:49 +08:00
}
static inline int at_mark(const char *string, int len,
const char **suffix, int nr)
Teach @{upstream} syntax to strbuf_branchanme() This teaches @{upstream} syntax to interpret_branch_name(), instead of dwim_ref() machinery. There are places in git UI that behaves differently when you give a local branch name and when you give an extended SHA-1 expression that evaluates to the commit object name at the tip of the branch. The intent is that the special syntax such as @{-1} can stand in as if the user spelled the name of the branch in such places. The name of the branch "frotz" to switch to ("git checkout frotz"), and the name of the branch "nitfol" to fork a new branch "frotz" from ("git checkout -b frotz nitfol"), are examples of such places. These places take only the name of the branch (e.g. "frotz"), and they are supposed to act differently to an equivalent refname (e.g. "refs/heads/frotz"), so hooking the @{upstream} and @{-N} syntax to dwim_ref() is insufficient when we want to deal with cases a local branch is forked from another local branch and use "forked@{upstream}" to name the forkee branch. The "upstream" syntax "forked@{u}" is to specify the ref that "forked" is configured to merge with, and most often the forkee is a remote tracking branch, not a local branch. We cannot simply return a local branch name, but that does not necessarily mean we have to returns the full refname (e.g. refs/remotes/origin/frotz, when returning origin/frotz is enough). This update calls shorten_unambiguous_ref() to do so. Signed-off-by: Junio C Hamano <gitster@pobox.com>
2010-01-20 15:17:11 +08:00
{
int i;
for (i = 0; i < nr; i++) {
Teach @{upstream} syntax to strbuf_branchanme() This teaches @{upstream} syntax to interpret_branch_name(), instead of dwim_ref() machinery. There are places in git UI that behaves differently when you give a local branch name and when you give an extended SHA-1 expression that evaluates to the commit object name at the tip of the branch. The intent is that the special syntax such as @{-1} can stand in as if the user spelled the name of the branch in such places. The name of the branch "frotz" to switch to ("git checkout frotz"), and the name of the branch "nitfol" to fork a new branch "frotz" from ("git checkout -b frotz nitfol"), are examples of such places. These places take only the name of the branch (e.g. "frotz"), and they are supposed to act differently to an equivalent refname (e.g. "refs/heads/frotz"), so hooking the @{upstream} and @{-N} syntax to dwim_ref() is insufficient when we want to deal with cases a local branch is forked from another local branch and use "forked@{upstream}" to name the forkee branch. The "upstream" syntax "forked@{u}" is to specify the ref that "forked" is configured to merge with, and most often the forkee is a remote tracking branch, not a local branch. We cannot simply return a local branch name, but that does not necessarily mean we have to returns the full refname (e.g. refs/remotes/origin/frotz, when returning origin/frotz is enough). This update calls shorten_unambiguous_ref() to do so. Signed-off-by: Junio C Hamano <gitster@pobox.com>
2010-01-20 15:17:11 +08:00
int suffix_len = strlen(suffix[i]);
if (suffix_len <= len
rev-parse: match @{upstream}, @{u} and @{push} case-insensitively Change the revision parsing logic to match @{upstream}, @{u} & @{push} case-insensitively. Before this change supplying anything except the lower-case forms emits an "unknown revision or path not in the working tree" error. This change makes upper-case & mixed-case versions equivalent to the lower-case versions. The use-case for this is being able to hold the shift key down while typing @{u} on certain keyboard layouts, which makes the sequence easier to type, and reduces cases where git throws an error at the user where it could do what he means instead. These suffixes now join various other suffixes & special syntax documented in gitrevisions(7) that matches case-insensitively. A table showing the status of the various forms documented there before & after this patch is shown below. The key for the table is: - CI = Case Insensitive - CIP = Case Insensitive Possible (without ambiguities) - AG = Accepts Garbage (.e.g. @{./.4.minutes./.}) Before this change: |----------------+-----+------+-----| | What? | CI? | CIP? | AG? | |----------------+-----+------+-----| | @{<date>} | Y | Y | Y | | @{upstream} | N | Y | N | | @{push} | N | Y | N | |----------------+-----+------+-----| After it: |----------------+-----+------+-----| | What? | CI? | CIP? | AG? | |----------------+-----+------+-----| | @{<date>} | Y | Y | Y | | @{upstream} | Y | Y | N | | @{push} | Y | Y | N | |----------------+-----+------+-----| The ^{<type>} suffix is not made case-insensitive, because other places that take <type> like "cat-file -t <type>" do want them case sensitively (after all we never declared that type names are case insensitive). Allowing case-insensitive typename only with this syntax will make the resulting Git as a whole inconsistent. This change was independently authored to scratch a longtime itch, but when I was about to submit it I discovered that a similar patch had been submitted unsuccessfully before by Conrad Irwin in August 2011 as "rev-parse: Allow @{U} as a synonym for @{u}" (<1313287071-7851-1-git-send-email-conrad.irwin@gmail.com>). The tests for this patch are more exhaustive than in the 2011 submission. The starting point for them was to first change the code to only support upper-case versions of the existing words, seeing what broke, and amending the breaking tests to check upper case & mixed case as appropriate, and where not redundant to other similar tests. The implementation itself is equivalent. Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-03-27 19:16:55 +08:00
&& !strncasecmp(string, suffix[i], suffix_len))
Teach @{upstream} syntax to strbuf_branchanme() This teaches @{upstream} syntax to interpret_branch_name(), instead of dwim_ref() machinery. There are places in git UI that behaves differently when you give a local branch name and when you give an extended SHA-1 expression that evaluates to the commit object name at the tip of the branch. The intent is that the special syntax such as @{-1} can stand in as if the user spelled the name of the branch in such places. The name of the branch "frotz" to switch to ("git checkout frotz"), and the name of the branch "nitfol" to fork a new branch "frotz" from ("git checkout -b frotz nitfol"), are examples of such places. These places take only the name of the branch (e.g. "frotz"), and they are supposed to act differently to an equivalent refname (e.g. "refs/heads/frotz"), so hooking the @{upstream} and @{-N} syntax to dwim_ref() is insufficient when we want to deal with cases a local branch is forked from another local branch and use "forked@{upstream}" to name the forkee branch. The "upstream" syntax "forked@{u}" is to specify the ref that "forked" is configured to merge with, and most often the forkee is a remote tracking branch, not a local branch. We cannot simply return a local branch name, but that does not necessarily mean we have to returns the full refname (e.g. refs/remotes/origin/frotz, when returning origin/frotz is enough). This update calls shorten_unambiguous_ref() to do so. Signed-off-by: Junio C Hamano <gitster@pobox.com>
2010-01-20 15:17:11 +08:00
return suffix_len;
}
return 0;
}
static inline int upstream_mark(const char *string, int len)
{
const char *suffix[] = { "@{upstream}", "@{u}" };
return at_mark(string, len, suffix, ARRAY_SIZE(suffix));
}
static inline int push_mark(const char *string, int len)
{
const char *suffix[] = { "@{push}" };
return at_mark(string, len, suffix, ARRAY_SIZE(suffix));
}
static enum get_oid_result get_oid_1(struct repository *r, const char *name, int len, struct object_id *oid, unsigned lookup_flags);
static int interpret_nth_prior_checkout(struct repository *r, const char *name, int namelen, struct strbuf *buf);
static int get_oid_basic(struct repository *r, const char *str, int len,
struct object_id *oid, unsigned int flags)
{
static const char *warn_msg = "refname '%.*s' is ambiguous.";
static const char *object_name_msg = N_(
"Git normally never creates a ref that ends with 40 hex characters\n"
"because it will be ignored when you just specify 40-hex. These refs\n"
"may be created by mistake. For example,\n"
"\n"
" git switch -c $br $(git rev-parse ...)\n"
"\n"
"where \"$br\" is somehow empty and a 40-hex ref is created. Please\n"
"examine these refs and maybe delete them. Turn this message off by\n"
"running \"git config advice.objectNameWarning false\"");
sha1_name: convert get_sha1* to get_oid* Now that all the callers of get_sha1 directly or indirectly use struct object_id, rename the functions starting with get_sha1 to start with get_oid. Convert the internals in sha1_name.c to use struct object_id as well, and eliminate explicit length checks where possible. Convert a use of 40 in get_oid_basic to GIT_SHA1_HEXSZ. Outside of sha1_name.c and cache.h, this transition was made with the following semantic patch: @@ expression E1, E2; @@ - get_sha1(E1, E2.hash) + get_oid(E1, &E2) @@ expression E1, E2; @@ - get_sha1(E1, E2->hash) + get_oid(E1, E2) @@ expression E1, E2; @@ - get_sha1_committish(E1, E2.hash) + get_oid_committish(E1, &E2) @@ expression E1, E2; @@ - get_sha1_committish(E1, E2->hash) + get_oid_committish(E1, E2) @@ expression E1, E2; @@ - get_sha1_treeish(E1, E2.hash) + get_oid_treeish(E1, &E2) @@ expression E1, E2; @@ - get_sha1_treeish(E1, E2->hash) + get_oid_treeish(E1, E2) @@ expression E1, E2; @@ - get_sha1_commit(E1, E2.hash) + get_oid_commit(E1, &E2) @@ expression E1, E2; @@ - get_sha1_commit(E1, E2->hash) + get_oid_commit(E1, E2) @@ expression E1, E2; @@ - get_sha1_tree(E1, E2.hash) + get_oid_tree(E1, &E2) @@ expression E1, E2; @@ - get_sha1_tree(E1, E2->hash) + get_oid_tree(E1, E2) @@ expression E1, E2; @@ - get_sha1_blob(E1, E2.hash) + get_oid_blob(E1, &E2) @@ expression E1, E2; @@ - get_sha1_blob(E1, E2->hash) + get_oid_blob(E1, E2) @@ expression E1, E2, E3, E4; @@ - get_sha1_with_context(E1, E2, E3.hash, E4) + get_oid_with_context(E1, E2, &E3, E4) @@ expression E1, E2, E3, E4; @@ - get_sha1_with_context(E1, E2, E3->hash, E4) + get_oid_with_context(E1, E2, E3, E4) Signed-off-by: brian m. carlson <sandals@crustytoothpaste.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-07-14 07:49:28 +08:00
struct object_id tmp_oid;
char *real_ref = NULL;
int refs_found = 0;
int at, reflog_len, nth_prior = 0;
int fatal = !(flags & GET_OID_QUIETLY);
if (len == r->hash_algo->hexsz && !get_oid_hex(str, oid)) {
if (warn_ambiguous_refs && warn_on_object_refname_ambiguity) {
refs_found = repo_dwim_ref(r, str, len, &tmp_oid, &real_ref, 0);
if (refs_found > 0) {
cat-file: disable object/refname ambiguity check for batch mode A common use of "cat-file --batch-check" is to feed a list of objects from "rev-list --objects" or a similar command. In this instance, all of our input objects are 40-byte sha1 ids. However, cat-file has always allowed arbitrary revision specifiers, and feeds the result to get_sha1(). Fortunately, get_sha1() recognizes a 40-byte sha1 before doing any hard work trying to look up refs, meaning this scenario should end up spending very little time converting the input into an object sha1. However, since 798c35f (get_sha1: warn about full or short object names that look like refs, 2013-05-29), when we encounter this case, we spend the extra effort to do a refname lookup anyway, just to print a warning. This is further exacerbated by ca91993 (get_packed_ref_cache: reload packed-refs file when it changes, 2013-06-20), which makes individual ref lookup more expensive by requiring a stat() of the packed-refs file for each missing ref. With no patches, this is the time it takes to run: $ git rev-list --objects --all >objects $ time git cat-file --batch-check='%(objectname)' <objects on the linux.git repository: real 1m13.494s user 0m25.924s sys 0m47.532s If we revert ca91993, the packed-refs up-to-date check, it gets a little better: real 0m54.697s user 0m21.692s sys 0m32.916s but we are still spending quite a bit of time on ref lookup (and we would not want to revert that patch, anyway, which has correctness issues). If we revert 798c35f, disabling the warning entirely, we get a much more reasonable time: real 0m7.452s user 0m6.836s sys 0m0.608s This patch does the moral equivalent of this final case (and gets similar speedups). We introduce a global flag that callers of get_sha1() can use to avoid paying the price for the warning. Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-07-12 14:20:05 +08:00
warning(warn_msg, len, str);
if (advice_enabled(ADVICE_OBJECT_NAME_WARNING))
cat-file: disable object/refname ambiguity check for batch mode A common use of "cat-file --batch-check" is to feed a list of objects from "rev-list --objects" or a similar command. In this instance, all of our input objects are 40-byte sha1 ids. However, cat-file has always allowed arbitrary revision specifiers, and feeds the result to get_sha1(). Fortunately, get_sha1() recognizes a 40-byte sha1 before doing any hard work trying to look up refs, meaning this scenario should end up spending very little time converting the input into an object sha1. However, since 798c35f (get_sha1: warn about full or short object names that look like refs, 2013-05-29), when we encounter this case, we spend the extra effort to do a refname lookup anyway, just to print a warning. This is further exacerbated by ca91993 (get_packed_ref_cache: reload packed-refs file when it changes, 2013-06-20), which makes individual ref lookup more expensive by requiring a stat() of the packed-refs file for each missing ref. With no patches, this is the time it takes to run: $ git rev-list --objects --all >objects $ time git cat-file --batch-check='%(objectname)' <objects on the linux.git repository: real 1m13.494s user 0m25.924s sys 0m47.532s If we revert ca91993, the packed-refs up-to-date check, it gets a little better: real 0m54.697s user 0m21.692s sys 0m32.916s but we are still spending quite a bit of time on ref lookup (and we would not want to revert that patch, anyway, which has correctness issues). If we revert 798c35f, disabling the warning entirely, we get a much more reasonable time: real 0m7.452s user 0m6.836s sys 0m0.608s This patch does the moral equivalent of this final case (and gets similar speedups). We introduce a global flag that callers of get_sha1() can use to avoid paying the price for the warning. Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-07-12 14:20:05 +08:00
fprintf(stderr, "%s\n", _(object_name_msg));
}
free(real_ref);
}
return 0;
}
/* basic@{time or number or -number} format to query ref-log */
reflog_len = at = 0;
if (len && str[len-1] == '}') {
for (at = len-4; at >= 0; at--) {
if (str[at] == '@' && str[at+1] == '{') {
if (str[at+2] == '-') {
if (at != 0)
/* @{-N} not at start */
return -1;
nth_prior = 1;
continue;
}
if (!upstream_mark(str + at, len - at) &&
!push_mark(str + at, len - at)) {
reflog_len = (len-1) - (at+2);
len = at;
}
break;
}
}
}
2005-10-29 03:41:49 +08:00
/* Accept only unambiguous ref paths. */
if (len && ambiguous_path(str, len))
2005-10-29 03:41:49 +08:00
return -1;
if (nth_prior) {
struct strbuf buf = STRBUF_INIT;
int detached;
if (interpret_nth_prior_checkout(r, str, len, &buf) > 0) {
detached = (buf.len == r->hash_algo->hexsz && !get_oid_hex(buf.buf, oid));
strbuf_release(&buf);
if (detached)
return 0;
}
}
if (!len && reflog_len)
/* allow "@{...}" to mean the current branch reflog */
refs_found = repo_dwim_ref(r, "HEAD", 4, oid, &real_ref, !fatal);
else if (reflog_len)
refs_found = repo_dwim_log(r, str, len, oid, &real_ref);
else
refs_found = repo_dwim_ref(r, str, len, oid, &real_ref, !fatal);
if (!refs_found)
return -1;
if (warn_ambiguous_refs && !(flags & GET_OID_QUIETLY) &&
(refs_found > 1 ||
!get_short_oid(r, str, len, &tmp_oid, GET_OID_QUIETLY)))
warning(warn_msg, len, str);
if (reflog_len) {
int nth, i;
timestamp_t at_time;
timestamp_t co_time;
int co_tz, co_cnt;
/* Is it asking for N-th entry, or approxidate? */
for (i = nth = 0; 0 <= nth && i < reflog_len; i++) {
char ch = str[at+2+i];
if ('0' <= ch && ch <= '9')
nth = nth * 10 + ch - '0';
else
nth = -1;
}
if (100000000 <= nth) {
at_time = nth;
nth = -1;
} else if (0 <= nth)
at_time = 0;
else {
int errors = 0;
char *tmp = xstrndup(str + at + 2, reflog_len);
at_time = approxidate_careful(tmp, &errors);
free(tmp);
if (errors) {
free(real_ref);
return -1;
}
}
if (read_ref_at(get_main_ref_store(r),
real_ref, flags, at_time, nth, oid, NULL,
&co_time, &co_tz, &co_cnt)) {
if (!len) {
if (!skip_prefix(real_ref, "refs/heads/", &str))
str = "HEAD";
len = strlen(str);
}
if (at_time) {
if (!(flags & GET_OID_QUIETLY)) {
warning(_("log for '%.*s' only goes back to %s"),
len, str,
convert "enum date_mode" into a struct In preparation for adding date modes that may carry extra information beyond the mode itself, this patch converts the date_mode enum into a struct. Most of the conversion is fairly straightforward; we pass the struct as a pointer and dereference the type field where necessary. Locations that declare a date_mode can use a "{}" constructor. However, the tricky case is where we use the enum labels as constants, like: show_date(t, tz, DATE_NORMAL); Ideally we could say: show_date(t, tz, &{ DATE_NORMAL }); but of course C does not allow that. Likewise, we cannot cast the constant to a struct, because we need to pass an actual address. Our options are basically: 1. Manually add a "struct date_mode d = { DATE_NORMAL }" definition to each caller, and pass "&d". This makes the callers uglier, because they sometimes do not even have their own scope (e.g., they are inside a switch statement). 2. Provide a pre-made global "date_normal" struct that can be passed by address. We'd also need "date_rfc2822", "date_iso8601", and so forth. But at least the ugliness is defined in one place. 3. Provide a wrapper that generates the correct struct on the fly. The big downside is that we end up pointing to a single global, which makes our wrapper non-reentrant. But show_date is already not reentrant, so it does not matter. This patch implements 3, along with a minor macro to keep the size of the callers sane. Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2015-06-26 00:55:02 +08:00
show_date(co_time, co_tz, DATE_MODE(RFC2822)));
}
get_oid_basic(): special-case ref@{n} for oldest reflog entry The goal of 6436a20284 (refs: allow @{n} to work with n-sized reflog, 2021-01-07) was that if we have "n" entries in a reflog, we should still be able to resolve ref@{n} by looking at the "old" value of the oldest entry. Commit 6436a20284 tried to put the logic into read_ref_at() by shifting its idea of "n" by one. But we reverted that in the previous commit, since it led to bugs in other callers which cared about the details of the reflog entry we found. Instead, let's put the special case into the caller that resolves @{n}, as it cares only about the oid. read_ref_at() is even kind enough to return the "old" value from the final reflog; it just returns "1" to signal to us that we ran off the end of the reflog. But we can notice in the caller that we read just enough records for that "old" value to be the one we're looking for, and use it. Note that read_ref_at() could notice this case, too, and just return 0. But we don't want to do that, because the caller must be made aware that we only found the oid, not an actual reflog entry (and the call sites in show-branch do care about this). There is one complication, though. When read_ref_at() hits a truncated reflog, it will return the "old" value of the oldest entry only if it is not the null oid. Otherwise, it actually returns the "new" value from that entry! This bit of fudging is due to d1a4489a56 (avoid null SHA1 in oldest reflog, 2008-07-08), where asking for "ref@{20.years.ago}" for a ref created recently will produce the initial value as a convenience (even though technically it did not exist 20 years ago). But this convenience is only useful for time-based cutoffs. For count-based cutoffs, get_oid_basic() has always simply complained about going too far back: $ git rev-parse HEAD@{20} fatal: log for 'HEAD' only has 16 entries and we should continue to do so, rather than returning a nonsense value (there's even a test in t1508 already which covers this). So let's have the d1a4489a56 code kick in only when doing timestamp-based cutoffs. Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2024-02-26 18:04:07 +08:00
} else if (nth == co_cnt && !is_null_oid(oid)) {
/*
* We were asked for the Nth reflog (counting
* from 0), but there were only N entries.
* read_ref_at() will have returned "1" to tell
* us it did not find an entry, but it did
* still fill in the oid with the "old" value,
* which we can use.
*/
} else {
if (flags & GET_OID_QUIETLY) {
exit(128);
}
die(_("log for '%.*s' only has %d entries"),
len, str, co_cnt);
}
}
}
free(real_ref);
return 0;
}
static enum get_oid_result get_parent(struct repository *r,
const char *name, int len,
struct object_id *result, int idx)
{
struct object_id oid;
enum get_oid_result ret = get_oid_1(r, name, len, &oid,
GET_OID_COMMITTISH);
struct commit *commit;
struct commit_list *p;
if (ret)
return ret;
commit = lookup_commit_reference(r, &oid);
libs: use "struct repository *" argument, not "the_repository" As can easily be seen from grepping in our sources, we had these uses of "the_repository" in various library code in cases where the function in question was already getting a "struct repository *" argument. Let's use that argument instead. Out of these changes only the changes to "cache-tree.c", "commit-reach.c", "shallow.c" and "upload-pack.c" would have cleanly applied before the migration away from the "repo_*()" wrapper macros in the preceding commits. The rest aren't new, as we'd previously implicitly refer to "the_repository", but it's now more obvious that we were doing the wrong thing all along, and should have used the parameter instead. The change to change "get_index_format_default(the_repository)" in "read-cache.c" to use the "r" variable instead should arguably have been part of [1], or in the subsequent cleanup in [2]. Let's do it here, as can be seen from the initial code in [3] it's not important that we use "the_repository" there, but would prefer to always use the current repository. This change excludes the "the_repository" use in "upload-pack.c"'s upload_pack_advertise(), as the in-flight [4] makes that change. 1. ee1f0c242ef (read-cache: add index.skipHash config option, 2023-01-06) 2. 6269f8eaad0 (treewide: always have a valid "index_state.repo" member, 2023-01-17) 3. 7211b9e7534 (repo-settings: consolidate some config settings, 2019-08-13) 4. <Y/hbUsGPVNAxTdmS@coredump.intra.peff.net> Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2023-03-28 21:58:58 +08:00
if (repo_parse_commit(r, commit))
return MISSING_OBJECT;
if (!idx) {
sha1_name: convert get_sha1* to get_oid* Now that all the callers of get_sha1 directly or indirectly use struct object_id, rename the functions starting with get_sha1 to start with get_oid. Convert the internals in sha1_name.c to use struct object_id as well, and eliminate explicit length checks where possible. Convert a use of 40 in get_oid_basic to GIT_SHA1_HEXSZ. Outside of sha1_name.c and cache.h, this transition was made with the following semantic patch: @@ expression E1, E2; @@ - get_sha1(E1, E2.hash) + get_oid(E1, &E2) @@ expression E1, E2; @@ - get_sha1(E1, E2->hash) + get_oid(E1, E2) @@ expression E1, E2; @@ - get_sha1_committish(E1, E2.hash) + get_oid_committish(E1, &E2) @@ expression E1, E2; @@ - get_sha1_committish(E1, E2->hash) + get_oid_committish(E1, E2) @@ expression E1, E2; @@ - get_sha1_treeish(E1, E2.hash) + get_oid_treeish(E1, &E2) @@ expression E1, E2; @@ - get_sha1_treeish(E1, E2->hash) + get_oid_treeish(E1, E2) @@ expression E1, E2; @@ - get_sha1_commit(E1, E2.hash) + get_oid_commit(E1, &E2) @@ expression E1, E2; @@ - get_sha1_commit(E1, E2->hash) + get_oid_commit(E1, E2) @@ expression E1, E2; @@ - get_sha1_tree(E1, E2.hash) + get_oid_tree(E1, &E2) @@ expression E1, E2; @@ - get_sha1_tree(E1, E2->hash) + get_oid_tree(E1, E2) @@ expression E1, E2; @@ - get_sha1_blob(E1, E2.hash) + get_oid_blob(E1, &E2) @@ expression E1, E2; @@ - get_sha1_blob(E1, E2->hash) + get_oid_blob(E1, E2) @@ expression E1, E2, E3, E4; @@ - get_sha1_with_context(E1, E2, E3.hash, E4) + get_oid_with_context(E1, E2, &E3, E4) @@ expression E1, E2, E3, E4; @@ - get_sha1_with_context(E1, E2, E3->hash, E4) + get_oid_with_context(E1, E2, E3, E4) Signed-off-by: brian m. carlson <sandals@crustytoothpaste.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-07-14 07:49:28 +08:00
oidcpy(result, &commit->object.oid);
return FOUND;
}
p = commit->parents;
while (p) {
if (!--idx) {
sha1_name: convert get_sha1* to get_oid* Now that all the callers of get_sha1 directly or indirectly use struct object_id, rename the functions starting with get_sha1 to start with get_oid. Convert the internals in sha1_name.c to use struct object_id as well, and eliminate explicit length checks where possible. Convert a use of 40 in get_oid_basic to GIT_SHA1_HEXSZ. Outside of sha1_name.c and cache.h, this transition was made with the following semantic patch: @@ expression E1, E2; @@ - get_sha1(E1, E2.hash) + get_oid(E1, &E2) @@ expression E1, E2; @@ - get_sha1(E1, E2->hash) + get_oid(E1, E2) @@ expression E1, E2; @@ - get_sha1_committish(E1, E2.hash) + get_oid_committish(E1, &E2) @@ expression E1, E2; @@ - get_sha1_committish(E1, E2->hash) + get_oid_committish(E1, E2) @@ expression E1, E2; @@ - get_sha1_treeish(E1, E2.hash) + get_oid_treeish(E1, &E2) @@ expression E1, E2; @@ - get_sha1_treeish(E1, E2->hash) + get_oid_treeish(E1, E2) @@ expression E1, E2; @@ - get_sha1_commit(E1, E2.hash) + get_oid_commit(E1, &E2) @@ expression E1, E2; @@ - get_sha1_commit(E1, E2->hash) + get_oid_commit(E1, E2) @@ expression E1, E2; @@ - get_sha1_tree(E1, E2.hash) + get_oid_tree(E1, &E2) @@ expression E1, E2; @@ - get_sha1_tree(E1, E2->hash) + get_oid_tree(E1, E2) @@ expression E1, E2; @@ - get_sha1_blob(E1, E2.hash) + get_oid_blob(E1, &E2) @@ expression E1, E2; @@ - get_sha1_blob(E1, E2->hash) + get_oid_blob(E1, E2) @@ expression E1, E2, E3, E4; @@ - get_sha1_with_context(E1, E2, E3.hash, E4) + get_oid_with_context(E1, E2, &E3, E4) @@ expression E1, E2, E3, E4; @@ - get_sha1_with_context(E1, E2, E3->hash, E4) + get_oid_with_context(E1, E2, E3, E4) Signed-off-by: brian m. carlson <sandals@crustytoothpaste.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-07-14 07:49:28 +08:00
oidcpy(result, &p->item->object.oid);
return FOUND;
}
p = p->next;
}
return MISSING_OBJECT;
}
static enum get_oid_result get_nth_ancestor(struct repository *r,
const char *name, int len,
struct object_id *result,
int generation)
{
struct object_id oid;
struct commit *commit;
int ret;
ret = get_oid_1(r, name, len, &oid, GET_OID_COMMITTISH);
if (ret)
return ret;
commit = lookup_commit_reference(r, &oid);
if (!commit)
return MISSING_OBJECT;
while (generation--) {
libs: use "struct repository *" argument, not "the_repository" As can easily be seen from grepping in our sources, we had these uses of "the_repository" in various library code in cases where the function in question was already getting a "struct repository *" argument. Let's use that argument instead. Out of these changes only the changes to "cache-tree.c", "commit-reach.c", "shallow.c" and "upload-pack.c" would have cleanly applied before the migration away from the "repo_*()" wrapper macros in the preceding commits. The rest aren't new, as we'd previously implicitly refer to "the_repository", but it's now more obvious that we were doing the wrong thing all along, and should have used the parameter instead. The change to change "get_index_format_default(the_repository)" in "read-cache.c" to use the "r" variable instead should arguably have been part of [1], or in the subsequent cleanup in [2]. Let's do it here, as can be seen from the initial code in [3] it's not important that we use "the_repository" there, but would prefer to always use the current repository. This change excludes the "the_repository" use in "upload-pack.c"'s upload_pack_advertise(), as the in-flight [4] makes that change. 1. ee1f0c242ef (read-cache: add index.skipHash config option, 2023-01-06) 2. 6269f8eaad0 (treewide: always have a valid "index_state.repo" member, 2023-01-17) 3. 7211b9e7534 (repo-settings: consolidate some config settings, 2019-08-13) 4. <Y/hbUsGPVNAxTdmS@coredump.intra.peff.net> Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2023-03-28 21:58:58 +08:00
if (repo_parse_commit(r, commit) || !commit->parents)
return MISSING_OBJECT;
commit = commit->parents->item;
}
sha1_name: convert get_sha1* to get_oid* Now that all the callers of get_sha1 directly or indirectly use struct object_id, rename the functions starting with get_sha1 to start with get_oid. Convert the internals in sha1_name.c to use struct object_id as well, and eliminate explicit length checks where possible. Convert a use of 40 in get_oid_basic to GIT_SHA1_HEXSZ. Outside of sha1_name.c and cache.h, this transition was made with the following semantic patch: @@ expression E1, E2; @@ - get_sha1(E1, E2.hash) + get_oid(E1, &E2) @@ expression E1, E2; @@ - get_sha1(E1, E2->hash) + get_oid(E1, E2) @@ expression E1, E2; @@ - get_sha1_committish(E1, E2.hash) + get_oid_committish(E1, &E2) @@ expression E1, E2; @@ - get_sha1_committish(E1, E2->hash) + get_oid_committish(E1, E2) @@ expression E1, E2; @@ - get_sha1_treeish(E1, E2.hash) + get_oid_treeish(E1, &E2) @@ expression E1, E2; @@ - get_sha1_treeish(E1, E2->hash) + get_oid_treeish(E1, E2) @@ expression E1, E2; @@ - get_sha1_commit(E1, E2.hash) + get_oid_commit(E1, &E2) @@ expression E1, E2; @@ - get_sha1_commit(E1, E2->hash) + get_oid_commit(E1, E2) @@ expression E1, E2; @@ - get_sha1_tree(E1, E2.hash) + get_oid_tree(E1, &E2) @@ expression E1, E2; @@ - get_sha1_tree(E1, E2->hash) + get_oid_tree(E1, E2) @@ expression E1, E2; @@ - get_sha1_blob(E1, E2.hash) + get_oid_blob(E1, &E2) @@ expression E1, E2; @@ - get_sha1_blob(E1, E2->hash) + get_oid_blob(E1, E2) @@ expression E1, E2, E3, E4; @@ - get_sha1_with_context(E1, E2, E3.hash, E4) + get_oid_with_context(E1, E2, &E3, E4) @@ expression E1, E2, E3, E4; @@ - get_sha1_with_context(E1, E2, E3->hash, E4) + get_oid_with_context(E1, E2, E3, E4) Signed-off-by: brian m. carlson <sandals@crustytoothpaste.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-07-14 07:49:28 +08:00
oidcpy(result, &commit->object.oid);
return FOUND;
}
struct object *repo_peel_to_type(struct repository *r, const char *name, int namelen,
struct object *o, enum object_type expected_type)
{
if (name && !namelen)
namelen = strlen(name);
while (1) {
if (!o || (!o->parsed && !parse_object(r, &o->oid)))
return NULL;
peel_onion(): teach $foo^{object} peeler A string that names an object can be suffixed with ^{type} peeler to say "I have this object name; peel it until you get this type. If you cannot do so, it is an error". v1.8.2^{commit} asks for a commit that is pointed at an annotated tag v1.8.2; v1.8.2^{tree} unwraps it further to the top-level tree object. A special suffix ^{} (i.e. no type specified) means "I do not care what it unwraps to; just peel annotated tag until you get something that is not a tag". When you have a random user-supplied string, you can turn it to a bare 40-hex object name, and cause it to error out if such an object does not exist, with: git rev-parse --verify "$userstring^{}" for most objects, but this does not yield the tag object name when $userstring refers to an annotated tag. Introduce a new suffix, ^{object}, that only makes sure the given name refers to an existing object. Then git rev-parse --verify "$userstring^{object}" becomes a way to make sure $userstring refers to an existing object. This is necessary because the plumbing "rev-parse --verify" is only about "make sure the argument is something we can feed to get_sha1() and turn it into a raw 20-byte object name SHA-1" and is not about "make sure that 20-byte object name SHA-1 refers to an object that exists in our object store". When the given $userstring is already a 40-hex, by definition "rev-parse --verify $userstring" can turn it into a raw 20-byte object name. With "$userstring^{object}", we can make sure that the 40-hex string names an object that exists in our object store before "--verify" kicks in. Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-04-01 06:24:12 +08:00
if (expected_type == OBJ_ANY || o->type == expected_type)
return o;
if (o->type == OBJ_TAG)
o = ((struct tag*) o)->tagged;
else if (o->type == OBJ_COMMIT)
o = &(repo_get_commit_tree(r, ((struct commit *)o))->object);
else {
if (name)
error("%.*s: expected %s type, but the object "
"dereferences to %s type",
namelen, name, type_name(expected_type),
type_name(o->type));
return NULL;
}
}
}
static int peel_onion(struct repository *r, const char *name, int len,
struct object_id *oid, unsigned lookup_flags)
{
struct object_id outer;
const char *sp;
unsigned int expected_type = 0;
struct object *o;
/*
* "ref^{type}" dereferences ref repeatedly until you cannot
* dereference anymore, or you get an object of given type,
* whichever comes first. "ref^{}" means just dereference
* tags until you get a non-tag. "ref^0" is a shorthand for
* "ref^{commit}". "commit^{tree}" could be used to find the
* top-level tree of the given commit.
*/
if (len < 4 || name[len-1] != '}')
return -1;
for (sp = name + len - 1; name <= sp; sp--) {
int ch = *sp;
if (ch == '{' && name < sp && sp[-1] == '^')
break;
}
if (sp <= name)
return -1;
sp++; /* beginning of type name, or closing brace for empty */
if (starts_with(sp, "commit}"))
expected_type = OBJ_COMMIT;
else if (starts_with(sp, "tag}"))
expected_type = OBJ_TAG;
else if (starts_with(sp, "tree}"))
expected_type = OBJ_TREE;
else if (starts_with(sp, "blob}"))
expected_type = OBJ_BLOB;
else if (starts_with(sp, "object}"))
peel_onion(): teach $foo^{object} peeler A string that names an object can be suffixed with ^{type} peeler to say "I have this object name; peel it until you get this type. If you cannot do so, it is an error". v1.8.2^{commit} asks for a commit that is pointed at an annotated tag v1.8.2; v1.8.2^{tree} unwraps it further to the top-level tree object. A special suffix ^{} (i.e. no type specified) means "I do not care what it unwraps to; just peel annotated tag until you get something that is not a tag". When you have a random user-supplied string, you can turn it to a bare 40-hex object name, and cause it to error out if such an object does not exist, with: git rev-parse --verify "$userstring^{}" for most objects, but this does not yield the tag object name when $userstring refers to an annotated tag. Introduce a new suffix, ^{object}, that only makes sure the given name refers to an existing object. Then git rev-parse --verify "$userstring^{object}" becomes a way to make sure $userstring refers to an existing object. This is necessary because the plumbing "rev-parse --verify" is only about "make sure the argument is something we can feed to get_sha1() and turn it into a raw 20-byte object name SHA-1" and is not about "make sure that 20-byte object name SHA-1 refers to an object that exists in our object store". When the given $userstring is already a 40-hex, by definition "rev-parse --verify $userstring" can turn it into a raw 20-byte object name. With "$userstring^{object}", we can make sure that the 40-hex string names an object that exists in our object store before "--verify" kicks in. Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-04-01 06:24:12 +08:00
expected_type = OBJ_ANY;
else if (sp[0] == '}')
expected_type = OBJ_NONE;
else if (sp[0] == '/')
expected_type = OBJ_COMMIT;
else
return -1;
lookup_flags &= ~GET_OID_DISAMBIGUATORS;
if (expected_type == OBJ_COMMIT)
lookup_flags |= GET_OID_COMMITTISH;
else if (expected_type == OBJ_TREE)
lookup_flags |= GET_OID_TREEISH;
if (get_oid_1(r, name, sp - name - 2, &outer, lookup_flags))
return -1;
o = parse_object(r, &outer);
if (!o)
return -1;
if (!expected_type) {
o = deref_tag(r, o, name, sp - name - 2);
if (!o || (!o->parsed && !parse_object(r, &o->oid)))
return -1;
sha1_name: convert get_sha1* to get_oid* Now that all the callers of get_sha1 directly or indirectly use struct object_id, rename the functions starting with get_sha1 to start with get_oid. Convert the internals in sha1_name.c to use struct object_id as well, and eliminate explicit length checks where possible. Convert a use of 40 in get_oid_basic to GIT_SHA1_HEXSZ. Outside of sha1_name.c and cache.h, this transition was made with the following semantic patch: @@ expression E1, E2; @@ - get_sha1(E1, E2.hash) + get_oid(E1, &E2) @@ expression E1, E2; @@ - get_sha1(E1, E2->hash) + get_oid(E1, E2) @@ expression E1, E2; @@ - get_sha1_committish(E1, E2.hash) + get_oid_committish(E1, &E2) @@ expression E1, E2; @@ - get_sha1_committish(E1, E2->hash) + get_oid_committish(E1, E2) @@ expression E1, E2; @@ - get_sha1_treeish(E1, E2.hash) + get_oid_treeish(E1, &E2) @@ expression E1, E2; @@ - get_sha1_treeish(E1, E2->hash) + get_oid_treeish(E1, E2) @@ expression E1, E2; @@ - get_sha1_commit(E1, E2.hash) + get_oid_commit(E1, &E2) @@ expression E1, E2; @@ - get_sha1_commit(E1, E2->hash) + get_oid_commit(E1, E2) @@ expression E1, E2; @@ - get_sha1_tree(E1, E2.hash) + get_oid_tree(E1, &E2) @@ expression E1, E2; @@ - get_sha1_tree(E1, E2->hash) + get_oid_tree(E1, E2) @@ expression E1, E2; @@ - get_sha1_blob(E1, E2.hash) + get_oid_blob(E1, &E2) @@ expression E1, E2; @@ - get_sha1_blob(E1, E2->hash) + get_oid_blob(E1, E2) @@ expression E1, E2, E3, E4; @@ - get_sha1_with_context(E1, E2, E3.hash, E4) + get_oid_with_context(E1, E2, &E3, E4) @@ expression E1, E2, E3, E4; @@ - get_sha1_with_context(E1, E2, E3->hash, E4) + get_oid_with_context(E1, E2, E3, E4) Signed-off-by: brian m. carlson <sandals@crustytoothpaste.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-07-14 07:49:28 +08:00
oidcpy(oid, &o->oid);
return 0;
}
/*
* At this point, the syntax look correct, so
* if we do not get the needed object, we should
* barf.
*/
o = repo_peel_to_type(r, name, len, o, expected_type);
if (!o)
return -1;
sha1_name: convert get_sha1* to get_oid* Now that all the callers of get_sha1 directly or indirectly use struct object_id, rename the functions starting with get_sha1 to start with get_oid. Convert the internals in sha1_name.c to use struct object_id as well, and eliminate explicit length checks where possible. Convert a use of 40 in get_oid_basic to GIT_SHA1_HEXSZ. Outside of sha1_name.c and cache.h, this transition was made with the following semantic patch: @@ expression E1, E2; @@ - get_sha1(E1, E2.hash) + get_oid(E1, &E2) @@ expression E1, E2; @@ - get_sha1(E1, E2->hash) + get_oid(E1, E2) @@ expression E1, E2; @@ - get_sha1_committish(E1, E2.hash) + get_oid_committish(E1, &E2) @@ expression E1, E2; @@ - get_sha1_committish(E1, E2->hash) + get_oid_committish(E1, E2) @@ expression E1, E2; @@ - get_sha1_treeish(E1, E2.hash) + get_oid_treeish(E1, &E2) @@ expression E1, E2; @@ - get_sha1_treeish(E1, E2->hash) + get_oid_treeish(E1, E2) @@ expression E1, E2; @@ - get_sha1_commit(E1, E2.hash) + get_oid_commit(E1, &E2) @@ expression E1, E2; @@ - get_sha1_commit(E1, E2->hash) + get_oid_commit(E1, E2) @@ expression E1, E2; @@ - get_sha1_tree(E1, E2.hash) + get_oid_tree(E1, &E2) @@ expression E1, E2; @@ - get_sha1_tree(E1, E2->hash) + get_oid_tree(E1, E2) @@ expression E1, E2; @@ - get_sha1_blob(E1, E2.hash) + get_oid_blob(E1, &E2) @@ expression E1, E2; @@ - get_sha1_blob(E1, E2->hash) + get_oid_blob(E1, E2) @@ expression E1, E2, E3, E4; @@ - get_sha1_with_context(E1, E2, E3.hash, E4) + get_oid_with_context(E1, E2, &E3, E4) @@ expression E1, E2, E3, E4; @@ - get_sha1_with_context(E1, E2, E3->hash, E4) + get_oid_with_context(E1, E2, E3, E4) Signed-off-by: brian m. carlson <sandals@crustytoothpaste.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-07-14 07:49:28 +08:00
oidcpy(oid, &o->oid);
if (sp[0] == '/') {
/* "$commit^{/foo}" */
char *prefix;
int ret;
struct commit_list *list = NULL;
/*
* $commit^{/}. Some regex implementation may reject.
* We don't need regex anyway. '' pattern always matches.
*/
if (sp[1] == '}')
return 0;
prefix = xstrndup(sp + 1, name + len - 1 - (sp + 1));
commit_list_insert((struct commit *)o, &list);
ret = get_oid_oneline(r, prefix, oid, list);
free_commit_list(list);
free(prefix);
return ret;
}
return 0;
}
static int get_describe_name(struct repository *r,
const char *name, int len,
struct object_id *oid)
{
const char *cp;
unsigned flags = GET_OID_QUIETLY | GET_OID_COMMIT;
for (cp = name + len - 1; name + 2 <= cp; cp--) {
char ch = *cp;
if (!isxdigit(ch)) {
/* We must be looking at g in "SOMETHING-g"
* for it to be describe output.
*/
if (ch == 'g' && cp[-1] == '-') {
cp++;
len -= cp - name;
return get_short_oid(r,
cp, len, oid, flags);
}
}
}
return -1;
}
static enum get_oid_result get_oid_1(struct repository *r,
const char *name, int len,
struct object_id *oid,
unsigned lookup_flags)
{
int ret, has_suffix;
const char *cp;
/*
* "name~3" is "name^^^", "name~" is "name~1", and "name^" is "name^1".
*/
has_suffix = 0;
for (cp = name + len - 1; name <= cp; cp--) {
int ch = *cp;
if ('0' <= ch && ch <= '9')
continue;
if (ch == '~' || ch == '^')
has_suffix = ch;
break;
}
if (has_suffix) {
unsigned int num = 0;
int len1 = cp - name;
cp++;
while (cp < name + len) {
unsigned int digit = *cp++ - '0';
if (unsigned_mult_overflows(num, 10))
return MISSING_OBJECT;
num *= 10;
if (unsigned_add_overflows(num, digit))
return MISSING_OBJECT;
num += digit;
}
if (!num && len1 == len - 1)
num = 1;
else if (num > INT_MAX)
return MISSING_OBJECT;
if (has_suffix == '^')
return get_parent(r, name, len1, oid, num);
/* else if (has_suffix == '~') -- goes without saying */
return get_nth_ancestor(r, name, len1, oid, num);
}
ret = peel_onion(r, name, len, oid, lookup_flags);
if (!ret)
return FOUND;
ret = get_oid_basic(r, name, len, oid, lookup_flags);
if (!ret)
return FOUND;
/* It could be describe output that is "SOMETHING-gXXXX" */
ret = get_describe_name(r, name, len, oid);
if (!ret)
return FOUND;
return get_short_oid(r, name, len, oid, lookup_flags);
}
/*
* This interprets names like ':/Initial revision of "git"' by searching
* through history and returning the first commit whose message starts
* the given regular expression.
*
* For negative-matching, prefix the pattern-part with '!-', like: ':/!-WIP'.
*
* For a literal '!' character at the beginning of a pattern, you have to repeat
* that, like: ':/!!foo'
*
* For future extension, all other sequences beginning with ':/!' are reserved.
*/
/* Remember to update object flag allocation in object.h */
#define ONELINE_SEEN (1u<<20)
struct handle_one_ref_cb {
struct repository *repo;
struct commit_list **list;
};
static int handle_one_ref(const char *path, const char *referent UNUSED, const struct object_id *oid,
int flag UNUSED,
void *cb_data)
{
struct handle_one_ref_cb *cb = cb_data;
struct commit_list **list = cb->list;
struct object *object = parse_object(cb->repo, oid);
if (!object)
return 0;
if (object->type == OBJ_TAG) {
object = deref_tag(cb->repo, object, path,
strlen(path));
if (!object)
return 0;
}
if (object->type != OBJ_COMMIT)
return 0;
commit_list_insert((struct commit *)object, list);
return 0;
}
static int get_oid_oneline(struct repository *r,
const char *prefix, struct object_id *oid,
const struct commit_list *list)
{
struct commit_list *copy = NULL;
const struct commit_list *l;
int found = 0;
int negative = 0;
regex_t regex;
if (prefix[0] == '!') {
prefix++;
if (prefix[0] == '-') {
prefix++;
negative = 1;
} else if (prefix[0] != '!') {
return -1;
}
}
if (regcomp(&regex, prefix, REG_EXTENDED))
return -1;
for (l = list; l; l = l->next) {
l->item->object.flags |= ONELINE_SEEN;
commit_list_insert(l->item, &copy);
}
while (copy) {
const char *p, *buf;
struct commit *commit;
int matches;
commit = pop_most_recent_commit(&copy, ONELINE_SEEN);
if (!parse_object(r, &commit->object.oid))
continue;
libs: use "struct repository *" argument, not "the_repository" As can easily be seen from grepping in our sources, we had these uses of "the_repository" in various library code in cases where the function in question was already getting a "struct repository *" argument. Let's use that argument instead. Out of these changes only the changes to "cache-tree.c", "commit-reach.c", "shallow.c" and "upload-pack.c" would have cleanly applied before the migration away from the "repo_*()" wrapper macros in the preceding commits. The rest aren't new, as we'd previously implicitly refer to "the_repository", but it's now more obvious that we were doing the wrong thing all along, and should have used the parameter instead. The change to change "get_index_format_default(the_repository)" in "read-cache.c" to use the "r" variable instead should arguably have been part of [1], or in the subsequent cleanup in [2]. Let's do it here, as can be seen from the initial code in [3] it's not important that we use "the_repository" there, but would prefer to always use the current repository. This change excludes the "the_repository" use in "upload-pack.c"'s upload_pack_advertise(), as the in-flight [4] makes that change. 1. ee1f0c242ef (read-cache: add index.skipHash config option, 2023-01-06) 2. 6269f8eaad0 (treewide: always have a valid "index_state.repo" member, 2023-01-17) 3. 7211b9e7534 (repo-settings: consolidate some config settings, 2019-08-13) 4. <Y/hbUsGPVNAxTdmS@coredump.intra.peff.net> Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2023-03-28 21:58:58 +08:00
buf = repo_get_commit_buffer(r, commit, NULL);
p = strstr(buf, "\n\n");
matches = negative ^ (p && !regexec(&regex, p + 2, 0, NULL, 0));
libs: use "struct repository *" argument, not "the_repository" As can easily be seen from grepping in our sources, we had these uses of "the_repository" in various library code in cases where the function in question was already getting a "struct repository *" argument. Let's use that argument instead. Out of these changes only the changes to "cache-tree.c", "commit-reach.c", "shallow.c" and "upload-pack.c" would have cleanly applied before the migration away from the "repo_*()" wrapper macros in the preceding commits. The rest aren't new, as we'd previously implicitly refer to "the_repository", but it's now more obvious that we were doing the wrong thing all along, and should have used the parameter instead. The change to change "get_index_format_default(the_repository)" in "read-cache.c" to use the "r" variable instead should arguably have been part of [1], or in the subsequent cleanup in [2]. Let's do it here, as can be seen from the initial code in [3] it's not important that we use "the_repository" there, but would prefer to always use the current repository. This change excludes the "the_repository" use in "upload-pack.c"'s upload_pack_advertise(), as the in-flight [4] makes that change. 1. ee1f0c242ef (read-cache: add index.skipHash config option, 2023-01-06) 2. 6269f8eaad0 (treewide: always have a valid "index_state.repo" member, 2023-01-17) 3. 7211b9e7534 (repo-settings: consolidate some config settings, 2019-08-13) 4. <Y/hbUsGPVNAxTdmS@coredump.intra.peff.net> Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2023-03-28 21:58:58 +08:00
repo_unuse_commit_buffer(r, commit, buf);
if (matches) {
sha1_name: convert get_sha1* to get_oid* Now that all the callers of get_sha1 directly or indirectly use struct object_id, rename the functions starting with get_sha1 to start with get_oid. Convert the internals in sha1_name.c to use struct object_id as well, and eliminate explicit length checks where possible. Convert a use of 40 in get_oid_basic to GIT_SHA1_HEXSZ. Outside of sha1_name.c and cache.h, this transition was made with the following semantic patch: @@ expression E1, E2; @@ - get_sha1(E1, E2.hash) + get_oid(E1, &E2) @@ expression E1, E2; @@ - get_sha1(E1, E2->hash) + get_oid(E1, E2) @@ expression E1, E2; @@ - get_sha1_committish(E1, E2.hash) + get_oid_committish(E1, &E2) @@ expression E1, E2; @@ - get_sha1_committish(E1, E2->hash) + get_oid_committish(E1, E2) @@ expression E1, E2; @@ - get_sha1_treeish(E1, E2.hash) + get_oid_treeish(E1, &E2) @@ expression E1, E2; @@ - get_sha1_treeish(E1, E2->hash) + get_oid_treeish(E1, E2) @@ expression E1, E2; @@ - get_sha1_commit(E1, E2.hash) + get_oid_commit(E1, &E2) @@ expression E1, E2; @@ - get_sha1_commit(E1, E2->hash) + get_oid_commit(E1, E2) @@ expression E1, E2; @@ - get_sha1_tree(E1, E2.hash) + get_oid_tree(E1, &E2) @@ expression E1, E2; @@ - get_sha1_tree(E1, E2->hash) + get_oid_tree(E1, E2) @@ expression E1, E2; @@ - get_sha1_blob(E1, E2.hash) + get_oid_blob(E1, &E2) @@ expression E1, E2; @@ - get_sha1_blob(E1, E2->hash) + get_oid_blob(E1, E2) @@ expression E1, E2, E3, E4; @@ - get_sha1_with_context(E1, E2, E3.hash, E4) + get_oid_with_context(E1, E2, &E3, E4) @@ expression E1, E2, E3, E4; @@ - get_sha1_with_context(E1, E2, E3->hash, E4) + get_oid_with_context(E1, E2, E3, E4) Signed-off-by: brian m. carlson <sandals@crustytoothpaste.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-07-14 07:49:28 +08:00
oidcpy(oid, &commit->object.oid);
found = 1;
break;
}
}
regfree(&regex);
for (l = list; l; l = l->next)
clear_commit_marks(l->item, ONELINE_SEEN);
free_commit_list(copy);
return found ? 0 : -1;
}
struct grab_nth_branch_switch_cbdata {
int remaining;
struct strbuf *sb;
};
static int grab_nth_branch_switch(struct object_id *ooid UNUSED,
struct object_id *noid UNUSED,
const char *email UNUSED,
timestamp_t timestamp UNUSED,
int tz UNUSED,
const char *message, void *cb_data)
{
struct grab_nth_branch_switch_cbdata *cb = cb_data;
const char *match = NULL, *target = NULL;
size_t len;
if (skip_prefix(message, "checkout: moving from ", &match))
target = strstr(match, " to ");
if (!match || !target)
return 0;
if (--(cb->remaining) == 0) {
len = target - match;
strbuf_reset(cb->sb);
strbuf_add(cb->sb, match, len);
return 1; /* we are done */
}
return 0;
}
/*
Teach @{upstream} syntax to strbuf_branchanme() This teaches @{upstream} syntax to interpret_branch_name(), instead of dwim_ref() machinery. There are places in git UI that behaves differently when you give a local branch name and when you give an extended SHA-1 expression that evaluates to the commit object name at the tip of the branch. The intent is that the special syntax such as @{-1} can stand in as if the user spelled the name of the branch in such places. The name of the branch "frotz" to switch to ("git checkout frotz"), and the name of the branch "nitfol" to fork a new branch "frotz" from ("git checkout -b frotz nitfol"), are examples of such places. These places take only the name of the branch (e.g. "frotz"), and they are supposed to act differently to an equivalent refname (e.g. "refs/heads/frotz"), so hooking the @{upstream} and @{-N} syntax to dwim_ref() is insufficient when we want to deal with cases a local branch is forked from another local branch and use "forked@{upstream}" to name the forkee branch. The "upstream" syntax "forked@{u}" is to specify the ref that "forked" is configured to merge with, and most often the forkee is a remote tracking branch, not a local branch. We cannot simply return a local branch name, but that does not necessarily mean we have to returns the full refname (e.g. refs/remotes/origin/frotz, when returning origin/frotz is enough). This update calls shorten_unambiguous_ref() to do so. Signed-off-by: Junio C Hamano <gitster@pobox.com>
2010-01-20 15:17:11 +08:00
* Parse @{-N} syntax, return the number of characters parsed
* if successful; otherwise signal an error with negative value.
*/
static int interpret_nth_prior_checkout(struct repository *r,
const char *name, int namelen,
interpret_branch_name: always respect "namelen" parameter interpret_branch_name gets passed a "name" buffer to parse, along with a "namelen" parameter representing its length. If "namelen" is zero, we fallback to the NUL-terminated string-length of "name". However, it does not necessarily follow that if we have gotten a non-zero "namelen", it is the NUL-terminated string-length of "name". E.g., when get_sha1() is parsing "foo:bar", we will be asked to operate only on the first three characters. Yet in interpret_branch_name and its helpers, we use string functions like strchr() to operate on "name", looking past the length we were given. This can result in us mis-parsing object names. We should instead be limiting our search to "namelen" bytes. There are three distinct types of object names this patch addresses: - The intrepret_empty_at helper uses strchr to find the next @-expression after our potential empty-at. In an expression like "@:foo@bar", it erroneously thinks that the second "@" is relevant, even if we were asked only to look at the first character. This case is easy to trigger (and we test it in this patch). - When finding the initial @-mark for @{upstream}, we use strchr. This means we might treat "foo:@{upstream}" as the upstream for "foo:", even though we were asked only to look at "foo". We cannot test this one in practice, because it is masked by another bug (which is fixed in the next patch). - The interpret_nth_prior_checkout helper did not receive the name length at all. This turns out not to be a problem in practice, though, because its parsing is so limited: it always starts from the far-left of the string, and will not tolerate a colon (which is currently the only way to get a smaller-than-strlen "namelen"). However, it's still worth fixing to make the code more obviously correct, and to future-proof us against callers with more exotic buffers. Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2014-01-15 16:31:57 +08:00
struct strbuf *buf)
{
long nth;
int retval;
struct grab_nth_branch_switch_cbdata cb;
const char *brace;
char *num_end;
interpret_branch_name: always respect "namelen" parameter interpret_branch_name gets passed a "name" buffer to parse, along with a "namelen" parameter representing its length. If "namelen" is zero, we fallback to the NUL-terminated string-length of "name". However, it does not necessarily follow that if we have gotten a non-zero "namelen", it is the NUL-terminated string-length of "name". E.g., when get_sha1() is parsing "foo:bar", we will be asked to operate only on the first three characters. Yet in interpret_branch_name and its helpers, we use string functions like strchr() to operate on "name", looking past the length we were given. This can result in us mis-parsing object names. We should instead be limiting our search to "namelen" bytes. There are three distinct types of object names this patch addresses: - The intrepret_empty_at helper uses strchr to find the next @-expression after our potential empty-at. In an expression like "@:foo@bar", it erroneously thinks that the second "@" is relevant, even if we were asked only to look at the first character. This case is easy to trigger (and we test it in this patch). - When finding the initial @-mark for @{upstream}, we use strchr. This means we might treat "foo:@{upstream}" as the upstream for "foo:", even though we were asked only to look at "foo". We cannot test this one in practice, because it is masked by another bug (which is fixed in the next patch). - The interpret_nth_prior_checkout helper did not receive the name length at all. This turns out not to be a problem in practice, though, because its parsing is so limited: it always starts from the far-left of the string, and will not tolerate a colon (which is currently the only way to get a smaller-than-strlen "namelen"). However, it's still worth fixing to make the code more obviously correct, and to future-proof us against callers with more exotic buffers. Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2014-01-15 16:31:57 +08:00
if (namelen < 4)
return -1;
if (name[0] != '@' || name[1] != '{' || name[2] != '-')
return -1;
interpret_branch_name: always respect "namelen" parameter interpret_branch_name gets passed a "name" buffer to parse, along with a "namelen" parameter representing its length. If "namelen" is zero, we fallback to the NUL-terminated string-length of "name". However, it does not necessarily follow that if we have gotten a non-zero "namelen", it is the NUL-terminated string-length of "name". E.g., when get_sha1() is parsing "foo:bar", we will be asked to operate only on the first three characters. Yet in interpret_branch_name and its helpers, we use string functions like strchr() to operate on "name", looking past the length we were given. This can result in us mis-parsing object names. We should instead be limiting our search to "namelen" bytes. There are three distinct types of object names this patch addresses: - The intrepret_empty_at helper uses strchr to find the next @-expression after our potential empty-at. In an expression like "@:foo@bar", it erroneously thinks that the second "@" is relevant, even if we were asked only to look at the first character. This case is easy to trigger (and we test it in this patch). - When finding the initial @-mark for @{upstream}, we use strchr. This means we might treat "foo:@{upstream}" as the upstream for "foo:", even though we were asked only to look at "foo". We cannot test this one in practice, because it is masked by another bug (which is fixed in the next patch). - The interpret_nth_prior_checkout helper did not receive the name length at all. This turns out not to be a problem in practice, though, because its parsing is so limited: it always starts from the far-left of the string, and will not tolerate a colon (which is currently the only way to get a smaller-than-strlen "namelen"). However, it's still worth fixing to make the code more obviously correct, and to future-proof us against callers with more exotic buffers. Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2014-01-15 16:31:57 +08:00
brace = memchr(name, '}', namelen);
if (!brace)
return -1;
nth = strtol(name + 3, &num_end, 10);
if (num_end != brace)
return -1;
if (nth <= 0)
return -1;
cb.remaining = nth;
cb.sb = buf;
retval = refs_for_each_reflog_ent_reverse(get_main_ref_store(r),
"HEAD", grab_nth_branch_switch, &cb);
if (0 < retval) {
retval = brace - name + 1;
} else
retval = 0;
return retval;
}
int repo_get_oid_mb(struct repository *r,
const char *name,
struct object_id *oid)
{
struct commit *one, *two;
struct commit_list *mbs = NULL;
struct object_id oid_tmp;
const char *dots;
int st;
dots = strstr(name, "...");
if (!dots)
return repo_get_oid(r, name, oid);
if (dots == name)
st = repo_get_oid(r, "HEAD", &oid_tmp);
else {
struct strbuf sb;
strbuf_init(&sb, dots - name);
strbuf_add(&sb, name, dots - name);
st = repo_get_oid_committish(r, sb.buf, &oid_tmp);
strbuf_release(&sb);
}
if (st)
return st;
one = lookup_commit_reference_gently(r, &oid_tmp, 0);
if (!one)
return -1;
if (repo_get_oid_committish(r, dots[3] ? (dots + 3) : "HEAD", &oid_tmp))
return -1;
two = lookup_commit_reference_gently(r, &oid_tmp, 0);
if (!two)
return -1;
if (repo_get_merge_bases(r, one, two, &mbs) < 0) {
free_commit_list(mbs);
return -1;
}
if (!mbs || mbs->next)
st = -1;
else {
st = 0;
oidcpy(oid, &mbs->item->object.oid);
}
free_commit_list(mbs);
return st;
}
/* parse @something syntax, when 'something' is not {.*} */
static int interpret_empty_at(const char *name, int namelen, int len, struct strbuf *buf)
{
const char *next;
if (len || name[1] == '{')
return -1;
/* make sure it's a single @, or @@{.*}, not @foo */
interpret_branch_name: always respect "namelen" parameter interpret_branch_name gets passed a "name" buffer to parse, along with a "namelen" parameter representing its length. If "namelen" is zero, we fallback to the NUL-terminated string-length of "name". However, it does not necessarily follow that if we have gotten a non-zero "namelen", it is the NUL-terminated string-length of "name". E.g., when get_sha1() is parsing "foo:bar", we will be asked to operate only on the first three characters. Yet in interpret_branch_name and its helpers, we use string functions like strchr() to operate on "name", looking past the length we were given. This can result in us mis-parsing object names. We should instead be limiting our search to "namelen" bytes. There are three distinct types of object names this patch addresses: - The intrepret_empty_at helper uses strchr to find the next @-expression after our potential empty-at. In an expression like "@:foo@bar", it erroneously thinks that the second "@" is relevant, even if we were asked only to look at the first character. This case is easy to trigger (and we test it in this patch). - When finding the initial @-mark for @{upstream}, we use strchr. This means we might treat "foo:@{upstream}" as the upstream for "foo:", even though we were asked only to look at "foo". We cannot test this one in practice, because it is masked by another bug (which is fixed in the next patch). - The interpret_nth_prior_checkout helper did not receive the name length at all. This turns out not to be a problem in practice, though, because its parsing is so limited: it always starts from the far-left of the string, and will not tolerate a colon (which is currently the only way to get a smaller-than-strlen "namelen"). However, it's still worth fixing to make the code more obviously correct, and to future-proof us against callers with more exotic buffers. Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2014-01-15 16:31:57 +08:00
next = memchr(name + len + 1, '@', namelen - len - 1);
if (next && next[1] != '{')
return -1;
if (!next)
next = name + namelen;
if (next != name + 1)
return -1;
strbuf_reset(buf);
strbuf_add(buf, "HEAD", 4);
return 1;
}
static int reinterpret(struct repository *r,
const char *name, int namelen, int len,
interpret_branch_name: allow callers to restrict expansions The interpret_branch_name() function converts names like @{-1} and @{upstream} into branch names. The expanded ref names are not fully qualified, and may be outside of the refs/heads/ namespace (e.g., "@" expands to "HEAD", and "@{upstream}" is likely to be in "refs/remotes/"). This is OK for callers like dwim_ref() which are primarily interested in resolving the resulting name, no matter where it is. But callers like "git branch" treat the result as a branch name in refs/heads/. When we expand to a ref outside that namespace, the results are very confusing (e.g., "git branch @" tries to create refs/heads/HEAD, which is nonsense). Callers can't know from the returned string how the expansion happened (e.g., did the user really ask for a branch named "HEAD", or did we do a bogus expansion?). One fix would be to return some out-parameters describing the types of expansion that occurred. This has the benefit that the caller can generate precise error messages ("I understood @{upstream} to mean origin/master, but that is a remote tracking branch, so you cannot create it as a local name"). However, out-parameters make the function interface somewhat cumbersome. Instead, let's do the opposite: let the caller tell us which elements to expand. That's easier to pass in, and none of the callers give more precise error messages than "@{upstream} isn't a valid branch name" anyway (which should be sufficient). The strbuf_branchname() function needs a similar parameter, as most of the callers access interpret_branch_name() through it. We can break the callers down into two groups: 1. Callers that are happy with any kind of ref in the result. We pass "0" here, so they continue to work without restrictions. This includes merge_name(), the reflog handling in add_pending_object_with_path(), and substitute_branch_name(). This last is what powers dwim_ref(). 2. Callers that have funny corner cases (mostly in git-branch and git-checkout). These need to make use of the new parameter, but I've left them as "0" in this patch, and will address them individually in follow-on patches. Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-03-02 16:23:01 +08:00
struct strbuf *buf, unsigned allowed)
{
/* we have extra data, which might need further processing */
struct strbuf tmp = STRBUF_INIT;
int used = buf->len;
int ret;
struct interpret_branch_name_options options = {
.allowed = allowed
};
strbuf_add(buf, name + len, namelen - len);
ret = repo_interpret_branch_name(r, buf->buf, buf->len, &tmp, &options);
/* that data was not interpreted, remove our cruft */
if (ret < 0) {
strbuf_setlen(buf, used);
return len;
}
strbuf_reset(buf);
strbuf_addbuf(buf, &tmp);
strbuf_release(&tmp);
/* tweak for size of {-N} versus expanded ref name */
return ret - used + len;
}
static void set_shortened_ref(struct repository *r, struct strbuf *buf, const char *ref)
{
char *s = refs_shorten_unambiguous_ref(get_main_ref_store(r), ref, 0);
strbuf_reset(buf);
strbuf_addstr(buf, s);
free(s);
}
interpret_branch_name: allow callers to restrict expansions The interpret_branch_name() function converts names like @{-1} and @{upstream} into branch names. The expanded ref names are not fully qualified, and may be outside of the refs/heads/ namespace (e.g., "@" expands to "HEAD", and "@{upstream}" is likely to be in "refs/remotes/"). This is OK for callers like dwim_ref() which are primarily interested in resolving the resulting name, no matter where it is. But callers like "git branch" treat the result as a branch name in refs/heads/. When we expand to a ref outside that namespace, the results are very confusing (e.g., "git branch @" tries to create refs/heads/HEAD, which is nonsense). Callers can't know from the returned string how the expansion happened (e.g., did the user really ask for a branch named "HEAD", or did we do a bogus expansion?). One fix would be to return some out-parameters describing the types of expansion that occurred. This has the benefit that the caller can generate precise error messages ("I understood @{upstream} to mean origin/master, but that is a remote tracking branch, so you cannot create it as a local name"). However, out-parameters make the function interface somewhat cumbersome. Instead, let's do the opposite: let the caller tell us which elements to expand. That's easier to pass in, and none of the callers give more precise error messages than "@{upstream} isn't a valid branch name" anyway (which should be sufficient). The strbuf_branchname() function needs a similar parameter, as most of the callers access interpret_branch_name() through it. We can break the callers down into two groups: 1. Callers that are happy with any kind of ref in the result. We pass "0" here, so they continue to work without restrictions. This includes merge_name(), the reflog handling in add_pending_object_with_path(), and substitute_branch_name(). This last is what powers dwim_ref(). 2. Callers that have funny corner cases (mostly in git-branch and git-checkout). These need to make use of the new parameter, but I've left them as "0" in this patch, and will address them individually in follow-on patches. Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-03-02 16:23:01 +08:00
static int branch_interpret_allowed(const char *refname, unsigned allowed)
{
if (!allowed)
return 1;
if ((allowed & INTERPRET_BRANCH_LOCAL) &&
starts_with(refname, "refs/heads/"))
return 1;
if ((allowed & INTERPRET_BRANCH_REMOTE) &&
starts_with(refname, "refs/remotes/"))
return 1;
return 0;
}
static int interpret_branch_mark(struct repository *r,
const char *name, int namelen,
int at, struct strbuf *buf,
int (*get_mark)(const char *, int),
const char *(*get_data)(struct branch *,
interpret_branch_name: allow callers to restrict expansions The interpret_branch_name() function converts names like @{-1} and @{upstream} into branch names. The expanded ref names are not fully qualified, and may be outside of the refs/heads/ namespace (e.g., "@" expands to "HEAD", and "@{upstream}" is likely to be in "refs/remotes/"). This is OK for callers like dwim_ref() which are primarily interested in resolving the resulting name, no matter where it is. But callers like "git branch" treat the result as a branch name in refs/heads/. When we expand to a ref outside that namespace, the results are very confusing (e.g., "git branch @" tries to create refs/heads/HEAD, which is nonsense). Callers can't know from the returned string how the expansion happened (e.g., did the user really ask for a branch named "HEAD", or did we do a bogus expansion?). One fix would be to return some out-parameters describing the types of expansion that occurred. This has the benefit that the caller can generate precise error messages ("I understood @{upstream} to mean origin/master, but that is a remote tracking branch, so you cannot create it as a local name"). However, out-parameters make the function interface somewhat cumbersome. Instead, let's do the opposite: let the caller tell us which elements to expand. That's easier to pass in, and none of the callers give more precise error messages than "@{upstream} isn't a valid branch name" anyway (which should be sufficient). The strbuf_branchname() function needs a similar parameter, as most of the callers access interpret_branch_name() through it. We can break the callers down into two groups: 1. Callers that are happy with any kind of ref in the result. We pass "0" here, so they continue to work without restrictions. This includes merge_name(), the reflog handling in add_pending_object_with_path(), and substitute_branch_name(). This last is what powers dwim_ref(). 2. Callers that have funny corner cases (mostly in git-branch and git-checkout). These need to make use of the new parameter, but I've left them as "0" in this patch, and will address them individually in follow-on patches. Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-03-02 16:23:01 +08:00
struct strbuf *),
const struct interpret_branch_name_options *options)
{
int len;
struct branch *branch;
struct strbuf err = STRBUF_INIT;
const char *value;
len = get_mark(name + at, namelen - at);
if (!len)
return -1;
interpret_branch_name: avoid @{upstream} past colon get_sha1() cannot currently parse a valid object name like "HEAD:@{upstream}" (assuming that such an oddly named file exists in the HEAD commit). It takes two passes to parse the string: 1. It first considers the whole thing as a ref, which results in looking for the upstream of "HEAD:". 2. It finds the colon, parses "HEAD" as a tree-ish, and then finds the path "@{upstream}" in the tree. For a path that looks like a normal reflog (e.g., "HEAD:@{yesterday}"), the first pass is a no-op. We try to dwim_ref("HEAD:"), that returns zero refs, and we proceed with colon-parsing. For "HEAD:@{upstream}", though, the first pass ends up in interpret_upstream_mark, which tries to find the branch "HEAD:". When it sees that the branch does not exist, it actually dies rather than returning an error to the caller. As a result, we never make it to the second pass. One obvious way of fixing this would be to teach interpret_upstream_mark to simply report "no, this isn't an upstream" in such a case. However, that would make the error-reporting for legitimate upstream cases significantly worse. Something like "bogus@{upstream}" would simply report "unknown revision: bogus@{upstream}", while the current code diagnoses a wide variety of possible misconfigurations (no such branch, branch exists but does not have upstream, etc). However, we can take advantage of the fact that a branch name cannot contain a colon. Therefore even if we find an upstream mark, any prefix with a colon must mean that the upstream mark we found is actually a pathname, and should be disregarded completely. This patch implements that logic. Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2014-01-15 16:37:23 +08:00
if (memchr(name, ':', at))
return -1;
if (at) {
char *name_str = xmemdupz(name, at);
branch = branch_get(name_str);
free(name_str);
} else
branch = branch_get(NULL);
value = get_data(branch, &err);
if (!value) {
if (options->nonfatal_dangling_mark) {
strbuf_release(&err);
return -1;
} else {
die("%s", err.buf);
}
}
if (!branch_interpret_allowed(value, options->allowed))
interpret_branch_name: allow callers to restrict expansions The interpret_branch_name() function converts names like @{-1} and @{upstream} into branch names. The expanded ref names are not fully qualified, and may be outside of the refs/heads/ namespace (e.g., "@" expands to "HEAD", and "@{upstream}" is likely to be in "refs/remotes/"). This is OK for callers like dwim_ref() which are primarily interested in resolving the resulting name, no matter where it is. But callers like "git branch" treat the result as a branch name in refs/heads/. When we expand to a ref outside that namespace, the results are very confusing (e.g., "git branch @" tries to create refs/heads/HEAD, which is nonsense). Callers can't know from the returned string how the expansion happened (e.g., did the user really ask for a branch named "HEAD", or did we do a bogus expansion?). One fix would be to return some out-parameters describing the types of expansion that occurred. This has the benefit that the caller can generate precise error messages ("I understood @{upstream} to mean origin/master, but that is a remote tracking branch, so you cannot create it as a local name"). However, out-parameters make the function interface somewhat cumbersome. Instead, let's do the opposite: let the caller tell us which elements to expand. That's easier to pass in, and none of the callers give more precise error messages than "@{upstream} isn't a valid branch name" anyway (which should be sufficient). The strbuf_branchname() function needs a similar parameter, as most of the callers access interpret_branch_name() through it. We can break the callers down into two groups: 1. Callers that are happy with any kind of ref in the result. We pass "0" here, so they continue to work without restrictions. This includes merge_name(), the reflog handling in add_pending_object_with_path(), and substitute_branch_name(). This last is what powers dwim_ref(). 2. Callers that have funny corner cases (mostly in git-branch and git-checkout). These need to make use of the new parameter, but I've left them as "0" in this patch, and will address them individually in follow-on patches. Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-03-02 16:23:01 +08:00
return -1;
set_shortened_ref(r, buf, value);
return len + at;
}
int repo_interpret_branch_name(struct repository *r,
const char *name, int namelen,
struct strbuf *buf,
const struct interpret_branch_name_options *options)
Teach @{upstream} syntax to strbuf_branchanme() This teaches @{upstream} syntax to interpret_branch_name(), instead of dwim_ref() machinery. There are places in git UI that behaves differently when you give a local branch name and when you give an extended SHA-1 expression that evaluates to the commit object name at the tip of the branch. The intent is that the special syntax such as @{-1} can stand in as if the user spelled the name of the branch in such places. The name of the branch "frotz" to switch to ("git checkout frotz"), and the name of the branch "nitfol" to fork a new branch "frotz" from ("git checkout -b frotz nitfol"), are examples of such places. These places take only the name of the branch (e.g. "frotz"), and they are supposed to act differently to an equivalent refname (e.g. "refs/heads/frotz"), so hooking the @{upstream} and @{-N} syntax to dwim_ref() is insufficient when we want to deal with cases a local branch is forked from another local branch and use "forked@{upstream}" to name the forkee branch. The "upstream" syntax "forked@{u}" is to specify the ref that "forked" is configured to merge with, and most often the forkee is a remote tracking branch, not a local branch. We cannot simply return a local branch name, but that does not necessarily mean we have to returns the full refname (e.g. refs/remotes/origin/frotz, when returning origin/frotz is enough). This update calls shorten_unambiguous_ref() to do so. Signed-off-by: Junio C Hamano <gitster@pobox.com>
2010-01-20 15:17:11 +08:00
{
char *at;
const char *start;
int len;
Teach @{upstream} syntax to strbuf_branchanme() This teaches @{upstream} syntax to interpret_branch_name(), instead of dwim_ref() machinery. There are places in git UI that behaves differently when you give a local branch name and when you give an extended SHA-1 expression that evaluates to the commit object name at the tip of the branch. The intent is that the special syntax such as @{-1} can stand in as if the user spelled the name of the branch in such places. The name of the branch "frotz" to switch to ("git checkout frotz"), and the name of the branch "nitfol" to fork a new branch "frotz" from ("git checkout -b frotz nitfol"), are examples of such places. These places take only the name of the branch (e.g. "frotz"), and they are supposed to act differently to an equivalent refname (e.g. "refs/heads/frotz"), so hooking the @{upstream} and @{-N} syntax to dwim_ref() is insufficient when we want to deal with cases a local branch is forked from another local branch and use "forked@{upstream}" to name the forkee branch. The "upstream" syntax "forked@{u}" is to specify the ref that "forked" is configured to merge with, and most often the forkee is a remote tracking branch, not a local branch. We cannot simply return a local branch name, but that does not necessarily mean we have to returns the full refname (e.g. refs/remotes/origin/frotz, when returning origin/frotz is enough). This update calls shorten_unambiguous_ref() to do so. Signed-off-by: Junio C Hamano <gitster@pobox.com>
2010-01-20 15:17:11 +08:00
if (!namelen)
namelen = strlen(name);
if (!options->allowed || (options->allowed & INTERPRET_BRANCH_LOCAL)) {
len = interpret_nth_prior_checkout(r, name, namelen, buf);
interpret_branch_name: allow callers to restrict expansions The interpret_branch_name() function converts names like @{-1} and @{upstream} into branch names. The expanded ref names are not fully qualified, and may be outside of the refs/heads/ namespace (e.g., "@" expands to "HEAD", and "@{upstream}" is likely to be in "refs/remotes/"). This is OK for callers like dwim_ref() which are primarily interested in resolving the resulting name, no matter where it is. But callers like "git branch" treat the result as a branch name in refs/heads/. When we expand to a ref outside that namespace, the results are very confusing (e.g., "git branch @" tries to create refs/heads/HEAD, which is nonsense). Callers can't know from the returned string how the expansion happened (e.g., did the user really ask for a branch named "HEAD", or did we do a bogus expansion?). One fix would be to return some out-parameters describing the types of expansion that occurred. This has the benefit that the caller can generate precise error messages ("I understood @{upstream} to mean origin/master, but that is a remote tracking branch, so you cannot create it as a local name"). However, out-parameters make the function interface somewhat cumbersome. Instead, let's do the opposite: let the caller tell us which elements to expand. That's easier to pass in, and none of the callers give more precise error messages than "@{upstream} isn't a valid branch name" anyway (which should be sufficient). The strbuf_branchname() function needs a similar parameter, as most of the callers access interpret_branch_name() through it. We can break the callers down into two groups: 1. Callers that are happy with any kind of ref in the result. We pass "0" here, so they continue to work without restrictions. This includes merge_name(), the reflog handling in add_pending_object_with_path(), and substitute_branch_name(). This last is what powers dwim_ref(). 2. Callers that have funny corner cases (mostly in git-branch and git-checkout). These need to make use of the new parameter, but I've left them as "0" in this patch, and will address them individually in follow-on patches. Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-03-02 16:23:01 +08:00
if (!len) {
return len; /* syntax Ok, not enough switches */
} else if (len > 0) {
if (len == namelen)
return len; /* consumed all */
else
return reinterpret(r, name, namelen, len, buf,
options->allowed);
interpret_branch_name: allow callers to restrict expansions The interpret_branch_name() function converts names like @{-1} and @{upstream} into branch names. The expanded ref names are not fully qualified, and may be outside of the refs/heads/ namespace (e.g., "@" expands to "HEAD", and "@{upstream}" is likely to be in "refs/remotes/"). This is OK for callers like dwim_ref() which are primarily interested in resolving the resulting name, no matter where it is. But callers like "git branch" treat the result as a branch name in refs/heads/. When we expand to a ref outside that namespace, the results are very confusing (e.g., "git branch @" tries to create refs/heads/HEAD, which is nonsense). Callers can't know from the returned string how the expansion happened (e.g., did the user really ask for a branch named "HEAD", or did we do a bogus expansion?). One fix would be to return some out-parameters describing the types of expansion that occurred. This has the benefit that the caller can generate precise error messages ("I understood @{upstream} to mean origin/master, but that is a remote tracking branch, so you cannot create it as a local name"). However, out-parameters make the function interface somewhat cumbersome. Instead, let's do the opposite: let the caller tell us which elements to expand. That's easier to pass in, and none of the callers give more precise error messages than "@{upstream} isn't a valid branch name" anyway (which should be sufficient). The strbuf_branchname() function needs a similar parameter, as most of the callers access interpret_branch_name() through it. We can break the callers down into two groups: 1. Callers that are happy with any kind of ref in the result. We pass "0" here, so they continue to work without restrictions. This includes merge_name(), the reflog handling in add_pending_object_with_path(), and substitute_branch_name(). This last is what powers dwim_ref(). 2. Callers that have funny corner cases (mostly in git-branch and git-checkout). These need to make use of the new parameter, but I've left them as "0" in this patch, and will address them individually in follow-on patches. Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-03-02 16:23:01 +08:00
}
}
for (start = name;
(at = memchr(start, '@', namelen - (start - name)));
start = at + 1) {
if (!options->allowed || (options->allowed & INTERPRET_BRANCH_HEAD)) {
interpret_branch_name: allow callers to restrict expansions The interpret_branch_name() function converts names like @{-1} and @{upstream} into branch names. The expanded ref names are not fully qualified, and may be outside of the refs/heads/ namespace (e.g., "@" expands to "HEAD", and "@{upstream}" is likely to be in "refs/remotes/"). This is OK for callers like dwim_ref() which are primarily interested in resolving the resulting name, no matter where it is. But callers like "git branch" treat the result as a branch name in refs/heads/. When we expand to a ref outside that namespace, the results are very confusing (e.g., "git branch @" tries to create refs/heads/HEAD, which is nonsense). Callers can't know from the returned string how the expansion happened (e.g., did the user really ask for a branch named "HEAD", or did we do a bogus expansion?). One fix would be to return some out-parameters describing the types of expansion that occurred. This has the benefit that the caller can generate precise error messages ("I understood @{upstream} to mean origin/master, but that is a remote tracking branch, so you cannot create it as a local name"). However, out-parameters make the function interface somewhat cumbersome. Instead, let's do the opposite: let the caller tell us which elements to expand. That's easier to pass in, and none of the callers give more precise error messages than "@{upstream} isn't a valid branch name" anyway (which should be sufficient). The strbuf_branchname() function needs a similar parameter, as most of the callers access interpret_branch_name() through it. We can break the callers down into two groups: 1. Callers that are happy with any kind of ref in the result. We pass "0" here, so they continue to work without restrictions. This includes merge_name(), the reflog handling in add_pending_object_with_path(), and substitute_branch_name(). This last is what powers dwim_ref(). 2. Callers that have funny corner cases (mostly in git-branch and git-checkout). These need to make use of the new parameter, but I've left them as "0" in this patch, and will address them individually in follow-on patches. Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-03-02 16:23:01 +08:00
len = interpret_empty_at(name, namelen, at - name, buf);
if (len > 0)
return reinterpret(r, name, namelen, len, buf,
options->allowed);
interpret_branch_name: allow callers to restrict expansions The interpret_branch_name() function converts names like @{-1} and @{upstream} into branch names. The expanded ref names are not fully qualified, and may be outside of the refs/heads/ namespace (e.g., "@" expands to "HEAD", and "@{upstream}" is likely to be in "refs/remotes/"). This is OK for callers like dwim_ref() which are primarily interested in resolving the resulting name, no matter where it is. But callers like "git branch" treat the result as a branch name in refs/heads/. When we expand to a ref outside that namespace, the results are very confusing (e.g., "git branch @" tries to create refs/heads/HEAD, which is nonsense). Callers can't know from the returned string how the expansion happened (e.g., did the user really ask for a branch named "HEAD", or did we do a bogus expansion?). One fix would be to return some out-parameters describing the types of expansion that occurred. This has the benefit that the caller can generate precise error messages ("I understood @{upstream} to mean origin/master, but that is a remote tracking branch, so you cannot create it as a local name"). However, out-parameters make the function interface somewhat cumbersome. Instead, let's do the opposite: let the caller tell us which elements to expand. That's easier to pass in, and none of the callers give more precise error messages than "@{upstream} isn't a valid branch name" anyway (which should be sufficient). The strbuf_branchname() function needs a similar parameter, as most of the callers access interpret_branch_name() through it. We can break the callers down into two groups: 1. Callers that are happy with any kind of ref in the result. We pass "0" here, so they continue to work without restrictions. This includes merge_name(), the reflog handling in add_pending_object_with_path(), and substitute_branch_name(). This last is what powers dwim_ref(). 2. Callers that have funny corner cases (mostly in git-branch and git-checkout). These need to make use of the new parameter, but I've left them as "0" in this patch, and will address them individually in follow-on patches. Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-03-02 16:23:01 +08:00
}
len = interpret_branch_mark(r, name, namelen, at - name, buf,
interpret_branch_name: allow callers to restrict expansions The interpret_branch_name() function converts names like @{-1} and @{upstream} into branch names. The expanded ref names are not fully qualified, and may be outside of the refs/heads/ namespace (e.g., "@" expands to "HEAD", and "@{upstream}" is likely to be in "refs/remotes/"). This is OK for callers like dwim_ref() which are primarily interested in resolving the resulting name, no matter where it is. But callers like "git branch" treat the result as a branch name in refs/heads/. When we expand to a ref outside that namespace, the results are very confusing (e.g., "git branch @" tries to create refs/heads/HEAD, which is nonsense). Callers can't know from the returned string how the expansion happened (e.g., did the user really ask for a branch named "HEAD", or did we do a bogus expansion?). One fix would be to return some out-parameters describing the types of expansion that occurred. This has the benefit that the caller can generate precise error messages ("I understood @{upstream} to mean origin/master, but that is a remote tracking branch, so you cannot create it as a local name"). However, out-parameters make the function interface somewhat cumbersome. Instead, let's do the opposite: let the caller tell us which elements to expand. That's easier to pass in, and none of the callers give more precise error messages than "@{upstream} isn't a valid branch name" anyway (which should be sufficient). The strbuf_branchname() function needs a similar parameter, as most of the callers access interpret_branch_name() through it. We can break the callers down into two groups: 1. Callers that are happy with any kind of ref in the result. We pass "0" here, so they continue to work without restrictions. This includes merge_name(), the reflog handling in add_pending_object_with_path(), and substitute_branch_name(). This last is what powers dwim_ref(). 2. Callers that have funny corner cases (mostly in git-branch and git-checkout). These need to make use of the new parameter, but I've left them as "0" in this patch, and will address them individually in follow-on patches. Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-03-02 16:23:01 +08:00
upstream_mark, branch_get_upstream,
options);
if (len > 0)
return len;
len = interpret_branch_mark(r, name, namelen, at - name, buf,
interpret_branch_name: allow callers to restrict expansions The interpret_branch_name() function converts names like @{-1} and @{upstream} into branch names. The expanded ref names are not fully qualified, and may be outside of the refs/heads/ namespace (e.g., "@" expands to "HEAD", and "@{upstream}" is likely to be in "refs/remotes/"). This is OK for callers like dwim_ref() which are primarily interested in resolving the resulting name, no matter where it is. But callers like "git branch" treat the result as a branch name in refs/heads/. When we expand to a ref outside that namespace, the results are very confusing (e.g., "git branch @" tries to create refs/heads/HEAD, which is nonsense). Callers can't know from the returned string how the expansion happened (e.g., did the user really ask for a branch named "HEAD", or did we do a bogus expansion?). One fix would be to return some out-parameters describing the types of expansion that occurred. This has the benefit that the caller can generate precise error messages ("I understood @{upstream} to mean origin/master, but that is a remote tracking branch, so you cannot create it as a local name"). However, out-parameters make the function interface somewhat cumbersome. Instead, let's do the opposite: let the caller tell us which elements to expand. That's easier to pass in, and none of the callers give more precise error messages than "@{upstream} isn't a valid branch name" anyway (which should be sufficient). The strbuf_branchname() function needs a similar parameter, as most of the callers access interpret_branch_name() through it. We can break the callers down into two groups: 1. Callers that are happy with any kind of ref in the result. We pass "0" here, so they continue to work without restrictions. This includes merge_name(), the reflog handling in add_pending_object_with_path(), and substitute_branch_name(). This last is what powers dwim_ref(). 2. Callers that have funny corner cases (mostly in git-branch and git-checkout). These need to make use of the new parameter, but I've left them as "0" in this patch, and will address them individually in follow-on patches. Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-03-02 16:23:01 +08:00
push_mark, branch_get_push,
options);
if (len > 0)
return len;
}
return -1;
Teach @{upstream} syntax to strbuf_branchanme() This teaches @{upstream} syntax to interpret_branch_name(), instead of dwim_ref() machinery. There are places in git UI that behaves differently when you give a local branch name and when you give an extended SHA-1 expression that evaluates to the commit object name at the tip of the branch. The intent is that the special syntax such as @{-1} can stand in as if the user spelled the name of the branch in such places. The name of the branch "frotz" to switch to ("git checkout frotz"), and the name of the branch "nitfol" to fork a new branch "frotz" from ("git checkout -b frotz nitfol"), are examples of such places. These places take only the name of the branch (e.g. "frotz"), and they are supposed to act differently to an equivalent refname (e.g. "refs/heads/frotz"), so hooking the @{upstream} and @{-N} syntax to dwim_ref() is insufficient when we want to deal with cases a local branch is forked from another local branch and use "forked@{upstream}" to name the forkee branch. The "upstream" syntax "forked@{u}" is to specify the ref that "forked" is configured to merge with, and most often the forkee is a remote tracking branch, not a local branch. We cannot simply return a local branch name, but that does not necessarily mean we have to returns the full refname (e.g. refs/remotes/origin/frotz, when returning origin/frotz is enough). This update calls shorten_unambiguous_ref() to do so. Signed-off-by: Junio C Hamano <gitster@pobox.com>
2010-01-20 15:17:11 +08:00
}
interpret_branch_name: allow callers to restrict expansions The interpret_branch_name() function converts names like @{-1} and @{upstream} into branch names. The expanded ref names are not fully qualified, and may be outside of the refs/heads/ namespace (e.g., "@" expands to "HEAD", and "@{upstream}" is likely to be in "refs/remotes/"). This is OK for callers like dwim_ref() which are primarily interested in resolving the resulting name, no matter where it is. But callers like "git branch" treat the result as a branch name in refs/heads/. When we expand to a ref outside that namespace, the results are very confusing (e.g., "git branch @" tries to create refs/heads/HEAD, which is nonsense). Callers can't know from the returned string how the expansion happened (e.g., did the user really ask for a branch named "HEAD", or did we do a bogus expansion?). One fix would be to return some out-parameters describing the types of expansion that occurred. This has the benefit that the caller can generate precise error messages ("I understood @{upstream} to mean origin/master, but that is a remote tracking branch, so you cannot create it as a local name"). However, out-parameters make the function interface somewhat cumbersome. Instead, let's do the opposite: let the caller tell us which elements to expand. That's easier to pass in, and none of the callers give more precise error messages than "@{upstream} isn't a valid branch name" anyway (which should be sufficient). The strbuf_branchname() function needs a similar parameter, as most of the callers access interpret_branch_name() through it. We can break the callers down into two groups: 1. Callers that are happy with any kind of ref in the result. We pass "0" here, so they continue to work without restrictions. This includes merge_name(), the reflog handling in add_pending_object_with_path(), and substitute_branch_name(). This last is what powers dwim_ref(). 2. Callers that have funny corner cases (mostly in git-branch and git-checkout). These need to make use of the new parameter, but I've left them as "0" in this patch, and will address them individually in follow-on patches. Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-03-02 16:23:01 +08:00
void strbuf_branchname(struct strbuf *sb, const char *name, unsigned allowed)
{
int len = strlen(name);
struct interpret_branch_name_options options = {
.allowed = allowed
};
int used = repo_interpret_branch_name(the_repository, name, len, sb,
&options);
if (used < 0)
used = 0;
strbuf_add(sb, name + used, len - used);
}
int strbuf_check_branch_ref(struct strbuf *sb, const char *name)
{
check-ref-format --branch: do not expand @{...} outside repository Running "git check-ref-format --branch @{-1}" from outside any repository produces $ git check-ref-format --branch @{-1} BUG: environment.c:182: git environment hasn't been setup This is because the expansion of @{-1} must come from the HEAD reflog, which involves opening the repository. @{u} and @{push} (which are more unusual because they typically would not expand to a local branch) trigger the same assertion. This has been broken since day one. Before v2.13.0-rc0~48^2 (setup_git_env: avoid blind fall-back to ".git", 2016-10-02), the breakage was more subtle: Git would read reflogs from ".git" within the current directory even if it was not a valid repository. Usually that is harmless because Git is not being run from the root directory of an invalid repository, but in edge cases such accesses can be confusing or harmful. Since v2.13.0, the problem is easier to diagnose because Git aborts with a BUG message. Erroring out is the right behavior: when asked to interpret a branch name like "@{-1}", there is no reasonable answer in this context. But we should print a message saying so instead of an assertion failure. We do not forbid "check-ref-format --branch" from outside a repository altogether because it is ok for a script to pre-process branch arguments without @{...} in such a context. For example, with pre-2.13 Git, a script that does branch='master'; # default value parse_options branch=$(git check-ref-format --branch "$branch") to normalize an optional branch name provided by the user would work both inside a repository (where the user could provide '@{-1}') and outside (where '@{-1}' should not be accepted). So disable the "expand @{...}" half of the feature when run outside a repository, but keep the check of the syntax of a proposed branch name. This way, when run from outside a repository, "git check-ref-format --branch @{-1}" will gracefully fail: $ git check-ref-format --branch @{-1} fatal: '@{-1}' is not a valid branch name and "git check-ref-format --branch master" will succeed as before: $ git check-ref-format --branch master master restoring the usual pre-2.13 behavior. [jn: split out from a larger patch; moved conditional to strbuf_check_branch_ref instead of its caller; fleshed out commit message; some style tweaks in tests] Reported-by: Marko Kungla <marko.kungla@gmail.com> Helped-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com> Signed-off-by: Jonathan Nieder <jrnieder@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-10-17 15:08:08 +08:00
if (startup_info->have_repository)
strbuf_branchname(sb, name, INTERPRET_BRANCH_LOCAL);
else
strbuf_addstr(sb, name);
/*
* This splice must be done even if we end up rejecting the
* name; builtin/branch.c::copy_or_rename_branch() still wants
* to see what the name expanded to so that "branch -m" can be
* used as a tool to correct earlier mistakes.
*/
strbuf_splice(sb, 0, 0, "refs/heads/", 11);
if (*name == '-' ||
!strcmp(sb->buf, "refs/heads/HEAD"))
return -1;
return check_refname_format(sb->buf, 0);
}
void object_context_release(struct object_context *ctx)
{
free(ctx->path);
strbuf_release(&ctx->symlink_path);
}
/*
sha1_name: convert get_sha1* to get_oid* Now that all the callers of get_sha1 directly or indirectly use struct object_id, rename the functions starting with get_sha1 to start with get_oid. Convert the internals in sha1_name.c to use struct object_id as well, and eliminate explicit length checks where possible. Convert a use of 40 in get_oid_basic to GIT_SHA1_HEXSZ. Outside of sha1_name.c and cache.h, this transition was made with the following semantic patch: @@ expression E1, E2; @@ - get_sha1(E1, E2.hash) + get_oid(E1, &E2) @@ expression E1, E2; @@ - get_sha1(E1, E2->hash) + get_oid(E1, E2) @@ expression E1, E2; @@ - get_sha1_committish(E1, E2.hash) + get_oid_committish(E1, &E2) @@ expression E1, E2; @@ - get_sha1_committish(E1, E2->hash) + get_oid_committish(E1, E2) @@ expression E1, E2; @@ - get_sha1_treeish(E1, E2.hash) + get_oid_treeish(E1, &E2) @@ expression E1, E2; @@ - get_sha1_treeish(E1, E2->hash) + get_oid_treeish(E1, E2) @@ expression E1, E2; @@ - get_sha1_commit(E1, E2.hash) + get_oid_commit(E1, &E2) @@ expression E1, E2; @@ - get_sha1_commit(E1, E2->hash) + get_oid_commit(E1, E2) @@ expression E1, E2; @@ - get_sha1_tree(E1, E2.hash) + get_oid_tree(E1, &E2) @@ expression E1, E2; @@ - get_sha1_tree(E1, E2->hash) + get_oid_tree(E1, E2) @@ expression E1, E2; @@ - get_sha1_blob(E1, E2.hash) + get_oid_blob(E1, &E2) @@ expression E1, E2; @@ - get_sha1_blob(E1, E2->hash) + get_oid_blob(E1, E2) @@ expression E1, E2, E3, E4; @@ - get_sha1_with_context(E1, E2, E3.hash, E4) + get_oid_with_context(E1, E2, &E3, E4) @@ expression E1, E2, E3, E4; @@ - get_sha1_with_context(E1, E2, E3->hash, E4) + get_oid_with_context(E1, E2, E3, E4) Signed-off-by: brian m. carlson <sandals@crustytoothpaste.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-07-14 07:49:28 +08:00
* This is like "get_oid_basic()", except it allows "object ID expressions",
* notably "xyz^" for "parent of xyz"
*/
int repo_get_oid(struct repository *r, const char *name, struct object_id *oid)
{
sha1_name: convert get_sha1* to get_oid* Now that all the callers of get_sha1 directly or indirectly use struct object_id, rename the functions starting with get_sha1 to start with get_oid. Convert the internals in sha1_name.c to use struct object_id as well, and eliminate explicit length checks where possible. Convert a use of 40 in get_oid_basic to GIT_SHA1_HEXSZ. Outside of sha1_name.c and cache.h, this transition was made with the following semantic patch: @@ expression E1, E2; @@ - get_sha1(E1, E2.hash) + get_oid(E1, &E2) @@ expression E1, E2; @@ - get_sha1(E1, E2->hash) + get_oid(E1, E2) @@ expression E1, E2; @@ - get_sha1_committish(E1, E2.hash) + get_oid_committish(E1, &E2) @@ expression E1, E2; @@ - get_sha1_committish(E1, E2->hash) + get_oid_committish(E1, E2) @@ expression E1, E2; @@ - get_sha1_treeish(E1, E2.hash) + get_oid_treeish(E1, &E2) @@ expression E1, E2; @@ - get_sha1_treeish(E1, E2->hash) + get_oid_treeish(E1, E2) @@ expression E1, E2; @@ - get_sha1_commit(E1, E2.hash) + get_oid_commit(E1, &E2) @@ expression E1, E2; @@ - get_sha1_commit(E1, E2->hash) + get_oid_commit(E1, E2) @@ expression E1, E2; @@ - get_sha1_tree(E1, E2.hash) + get_oid_tree(E1, &E2) @@ expression E1, E2; @@ - get_sha1_tree(E1, E2->hash) + get_oid_tree(E1, E2) @@ expression E1, E2; @@ - get_sha1_blob(E1, E2.hash) + get_oid_blob(E1, &E2) @@ expression E1, E2; @@ - get_sha1_blob(E1, E2->hash) + get_oid_blob(E1, E2) @@ expression E1, E2, E3, E4; @@ - get_sha1_with_context(E1, E2, E3.hash, E4) + get_oid_with_context(E1, E2, &E3, E4) @@ expression E1, E2, E3, E4; @@ - get_sha1_with_context(E1, E2, E3->hash, E4) + get_oid_with_context(E1, E2, E3, E4) Signed-off-by: brian m. carlson <sandals@crustytoothpaste.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-07-14 07:49:28 +08:00
struct object_context unused;
int ret = get_oid_with_context(r, name, 0, oid, &unused);
object_context_release(&unused);
return ret;
}
/*
* This returns a non-zero value if the string (built using printf
* format and the given arguments) is not a valid object.
*/
int get_oidf(struct object_id *oid, const char *fmt, ...)
{
va_list ap;
int ret;
struct strbuf sb = STRBUF_INIT;
va_start(ap, fmt);
strbuf_vaddf(&sb, fmt, ap);
va_end(ap);
ret = repo_get_oid(the_repository, sb.buf, oid);
strbuf_release(&sb);
return ret;
}
/*
* Many callers know that the user meant to name a commit-ish by
* syntactical positions where the object name appears. Calling this
* function allows the machinery to disambiguate shorter-than-unique
* abbreviated object names between commit-ish and others.
*
* Note that this does NOT error out when the named object is not a
* commit-ish. It is merely to give a hint to the disambiguation
* machinery.
*/
int repo_get_oid_committish(struct repository *r,
const char *name,
struct object_id *oid)
{
struct object_context unused;
int ret = get_oid_with_context(r, name, GET_OID_COMMITTISH,
oid, &unused);
object_context_release(&unused);
return ret;
}
int repo_get_oid_treeish(struct repository *r,
const char *name,
struct object_id *oid)
{
struct object_context unused;
int ret = get_oid_with_context(r, name, GET_OID_TREEISH,
oid, &unused);
object_context_release(&unused);
return ret;
}
int repo_get_oid_commit(struct repository *r,
const char *name,
struct object_id *oid)
{
struct object_context unused;
int ret = get_oid_with_context(r, name, GET_OID_COMMIT,
oid, &unused);
object_context_release(&unused);
return ret;
}
int repo_get_oid_tree(struct repository *r,
const char *name,
struct object_id *oid)
{
struct object_context unused;
int ret = get_oid_with_context(r, name, GET_OID_TREE,
oid, &unused);
object_context_release(&unused);
return ret;
}
int repo_get_oid_blob(struct repository *r,
const char *name,
struct object_id *oid)
{
struct object_context unused;
int ret = get_oid_with_context(r, name, GET_OID_BLOB,
oid, &unused);
object_context_release(&unused);
return ret;
}
/* Must be called only when object_name:filename doesn't exist. */
static void diagnose_invalid_oid_path(struct repository *r,
const char *prefix,
sha1_name: convert get_sha1* to get_oid* Now that all the callers of get_sha1 directly or indirectly use struct object_id, rename the functions starting with get_sha1 to start with get_oid. Convert the internals in sha1_name.c to use struct object_id as well, and eliminate explicit length checks where possible. Convert a use of 40 in get_oid_basic to GIT_SHA1_HEXSZ. Outside of sha1_name.c and cache.h, this transition was made with the following semantic patch: @@ expression E1, E2; @@ - get_sha1(E1, E2.hash) + get_oid(E1, &E2) @@ expression E1, E2; @@ - get_sha1(E1, E2->hash) + get_oid(E1, E2) @@ expression E1, E2; @@ - get_sha1_committish(E1, E2.hash) + get_oid_committish(E1, &E2) @@ expression E1, E2; @@ - get_sha1_committish(E1, E2->hash) + get_oid_committish(E1, E2) @@ expression E1, E2; @@ - get_sha1_treeish(E1, E2.hash) + get_oid_treeish(E1, &E2) @@ expression E1, E2; @@ - get_sha1_treeish(E1, E2->hash) + get_oid_treeish(E1, E2) @@ expression E1, E2; @@ - get_sha1_commit(E1, E2.hash) + get_oid_commit(E1, &E2) @@ expression E1, E2; @@ - get_sha1_commit(E1, E2->hash) + get_oid_commit(E1, E2) @@ expression E1, E2; @@ - get_sha1_tree(E1, E2.hash) + get_oid_tree(E1, &E2) @@ expression E1, E2; @@ - get_sha1_tree(E1, E2->hash) + get_oid_tree(E1, E2) @@ expression E1, E2; @@ - get_sha1_blob(E1, E2.hash) + get_oid_blob(E1, &E2) @@ expression E1, E2; @@ - get_sha1_blob(E1, E2->hash) + get_oid_blob(E1, E2) @@ expression E1, E2, E3, E4; @@ - get_sha1_with_context(E1, E2, E3.hash, E4) + get_oid_with_context(E1, E2, &E3, E4) @@ expression E1, E2, E3, E4; @@ - get_sha1_with_context(E1, E2, E3->hash, E4) + get_oid_with_context(E1, E2, E3, E4) Signed-off-by: brian m. carlson <sandals@crustytoothpaste.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-07-14 07:49:28 +08:00
const char *filename,
const struct object_id *tree_oid,
const char *object_name,
int object_name_len)
{
sha1_name: convert get_sha1* to get_oid* Now that all the callers of get_sha1 directly or indirectly use struct object_id, rename the functions starting with get_sha1 to start with get_oid. Convert the internals in sha1_name.c to use struct object_id as well, and eliminate explicit length checks where possible. Convert a use of 40 in get_oid_basic to GIT_SHA1_HEXSZ. Outside of sha1_name.c and cache.h, this transition was made with the following semantic patch: @@ expression E1, E2; @@ - get_sha1(E1, E2.hash) + get_oid(E1, &E2) @@ expression E1, E2; @@ - get_sha1(E1, E2->hash) + get_oid(E1, E2) @@ expression E1, E2; @@ - get_sha1_committish(E1, E2.hash) + get_oid_committish(E1, &E2) @@ expression E1, E2; @@ - get_sha1_committish(E1, E2->hash) + get_oid_committish(E1, E2) @@ expression E1, E2; @@ - get_sha1_treeish(E1, E2.hash) + get_oid_treeish(E1, &E2) @@ expression E1, E2; @@ - get_sha1_treeish(E1, E2->hash) + get_oid_treeish(E1, E2) @@ expression E1, E2; @@ - get_sha1_commit(E1, E2.hash) + get_oid_commit(E1, &E2) @@ expression E1, E2; @@ - get_sha1_commit(E1, E2->hash) + get_oid_commit(E1, E2) @@ expression E1, E2; @@ - get_sha1_tree(E1, E2.hash) + get_oid_tree(E1, &E2) @@ expression E1, E2; @@ - get_sha1_tree(E1, E2->hash) + get_oid_tree(E1, E2) @@ expression E1, E2; @@ - get_sha1_blob(E1, E2.hash) + get_oid_blob(E1, &E2) @@ expression E1, E2; @@ - get_sha1_blob(E1, E2->hash) + get_oid_blob(E1, E2) @@ expression E1, E2, E3, E4; @@ - get_sha1_with_context(E1, E2, E3.hash, E4) + get_oid_with_context(E1, E2, &E3, E4) @@ expression E1, E2, E3, E4; @@ - get_sha1_with_context(E1, E2, E3->hash, E4) + get_oid_with_context(E1, E2, E3, E4) Signed-off-by: brian m. carlson <sandals@crustytoothpaste.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-07-14 07:49:28 +08:00
struct object_id oid;
unsigned short mode;
if (!prefix)
prefix = "";
if (file_exists(filename))
die(_("path '%s' exists on disk, but not in '%.*s'"),
filename, object_name_len, object_name);
if (is_missing_file_error(errno)) {
char *fullname = xstrfmt("%s%s", prefix, filename);
if (!get_tree_entry(r, tree_oid, fullname, &oid, &mode)) {
die(_("path '%s' exists, but not '%s'\n"
"hint: Did you mean '%.*s:%s' aka '%.*s:./%s'?"),
fullname,
filename,
object_name_len, object_name,
fullname,
object_name_len, object_name,
filename);
}
die(_("path '%s' does not exist in '%.*s'"),
filename, object_name_len, object_name);
}
}
/* Must be called only when :stage:filename doesn't exist. */
static void diagnose_invalid_index_path(struct repository *r,
int stage,
const char *prefix,
const char *filename)
{
struct index_state *istate = r->index;
Convert "struct cache_entry *" to "const ..." wherever possible I attempted to make index_state->cache[] a "const struct cache_entry **" to find out how existing entries in index are modified and where. The question I have is what do we do if we really need to keep track of on-disk changes in the index. The result is - diff-lib.c: setting CE_UPTODATE - name-hash.c: setting CE_HASHED - preload-index.c, read-cache.c, unpack-trees.c and builtin/update-index: obvious - entry.c: write_entry() may refresh the checked out entry via fill_stat_cache_info(). This causes "non-const struct cache_entry *" in builtin/apply.c, builtin/checkout-index.c and builtin/checkout.c - builtin/ls-files.c: --with-tree changes stagemask and may set CE_UPDATE Of these, write_entry() and its call sites are probably most interesting because it modifies on-disk info. But this is stat info and can be retrieved via refresh, at least for porcelain commands. Other just uses ce_flags for local purposes. So, keeping track of "dirty" entries is just a matter of setting a flag in index modification functions exposed by read-cache.c. Except unpack-trees, the rest of the code base does not do anything funny behind read-cache's back. The actual patch is less valueable than the summary above. But if anyone wants to re-identify the above sites. Applying this patch, then this: diff --git a/cache.h b/cache.h index 430d021..1692891 100644 --- a/cache.h +++ b/cache.h @@ -267,7 +267,7 @@ static inline unsigned int canon_mode(unsigned int mode) #define cache_entry_size(len) (offsetof(struct cache_entry,name) + (len) + 1) struct index_state { - struct cache_entry **cache; + const struct cache_entry **cache; unsigned int version; unsigned int cache_nr, cache_alloc, cache_changed; struct string_list *resolve_undo; will help quickly identify them without bogus warnings. Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-07-09 23:29:00 +08:00
const struct cache_entry *ce;
int pos;
unsigned namelen = strlen(filename);
struct strbuf fullname = STRBUF_INIT;
if (!prefix)
prefix = "";
/* Wrong stage number? */
pos = index_name_pos(istate, filename, namelen);
if (pos < 0)
pos = -pos - 1;
if (pos < istate->cache_nr) {
ce = istate->cache[pos];
if (!S_ISSPARSEDIR(ce->ce_mode) &&
ce_namelen(ce) == namelen &&
!memcmp(ce->name, filename, namelen))
die(_("path '%s' is in the index, but not at stage %d\n"
"hint: Did you mean ':%d:%s'?"),
filename, stage,
ce_stage(ce), filename);
}
/* Confusion between relative and absolute filenames? */
strbuf_addstr(&fullname, prefix);
strbuf_addstr(&fullname, filename);
pos = index_name_pos(istate, fullname.buf, fullname.len);
if (pos < 0)
pos = -pos - 1;
if (pos < istate->cache_nr) {
ce = istate->cache[pos];
if (!S_ISSPARSEDIR(ce->ce_mode) &&
ce_namelen(ce) == fullname.len &&
!memcmp(ce->name, fullname.buf, fullname.len))
die(_("path '%s' is in the index, but not '%s'\n"
"hint: Did you mean ':%d:%s' aka ':%d:./%s'?"),
fullname.buf, filename,
ce_stage(ce), fullname.buf,
ce_stage(ce), filename);
}
if (repo_file_exists(r, filename))
die(_("path '%s' exists on disk, but not in the index"), filename);
if (is_missing_file_error(errno))
die(_("path '%s' does not exist (neither on disk nor in the index)"),
filename);
strbuf_release(&fullname);
}
static char *resolve_relative_path(struct repository *r, const char *rel)
{
if (!starts_with(rel, "./") && !starts_with(rel, "../"))
return NULL;
if (r != the_repository || !is_inside_work_tree())
die(_("relative path syntax can't be used outside working tree"));
/* die() inside prefix_path() if resolved path is outside worktree */
return prefix_path(startup_info->prefix,
startup_info->prefix ? strlen(startup_info->prefix) : 0,
rel);
}
static int reject_tree_in_index(struct repository *repo,
int only_to_die,
const struct cache_entry *ce,
int stage,
const char *prefix,
const char *cp)
{
if (!S_ISSPARSEDIR(ce->ce_mode))
return 0;
if (only_to_die)
diagnose_invalid_index_path(repo, stage, prefix, cp);
return -1;
}
static enum get_oid_result get_oid_with_context_1(struct repository *repo,
const char *name,
sha1_name: convert get_sha1* to get_oid* Now that all the callers of get_sha1 directly or indirectly use struct object_id, rename the functions starting with get_sha1 to start with get_oid. Convert the internals in sha1_name.c to use struct object_id as well, and eliminate explicit length checks where possible. Convert a use of 40 in get_oid_basic to GIT_SHA1_HEXSZ. Outside of sha1_name.c and cache.h, this transition was made with the following semantic patch: @@ expression E1, E2; @@ - get_sha1(E1, E2.hash) + get_oid(E1, &E2) @@ expression E1, E2; @@ - get_sha1(E1, E2->hash) + get_oid(E1, E2) @@ expression E1, E2; @@ - get_sha1_committish(E1, E2.hash) + get_oid_committish(E1, &E2) @@ expression E1, E2; @@ - get_sha1_committish(E1, E2->hash) + get_oid_committish(E1, E2) @@ expression E1, E2; @@ - get_sha1_treeish(E1, E2.hash) + get_oid_treeish(E1, &E2) @@ expression E1, E2; @@ - get_sha1_treeish(E1, E2->hash) + get_oid_treeish(E1, E2) @@ expression E1, E2; @@ - get_sha1_commit(E1, E2.hash) + get_oid_commit(E1, &E2) @@ expression E1, E2; @@ - get_sha1_commit(E1, E2->hash) + get_oid_commit(E1, E2) @@ expression E1, E2; @@ - get_sha1_tree(E1, E2.hash) + get_oid_tree(E1, &E2) @@ expression E1, E2; @@ - get_sha1_tree(E1, E2->hash) + get_oid_tree(E1, E2) @@ expression E1, E2; @@ - get_sha1_blob(E1, E2.hash) + get_oid_blob(E1, &E2) @@ expression E1, E2; @@ - get_sha1_blob(E1, E2->hash) + get_oid_blob(E1, E2) @@ expression E1, E2, E3, E4; @@ - get_sha1_with_context(E1, E2, E3.hash, E4) + get_oid_with_context(E1, E2, &E3, E4) @@ expression E1, E2, E3, E4; @@ - get_sha1_with_context(E1, E2, E3->hash, E4) + get_oid_with_context(E1, E2, E3, E4) Signed-off-by: brian m. carlson <sandals@crustytoothpaste.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-07-14 07:49:28 +08:00
unsigned flags,
const char *prefix,
struct object_id *oid,
struct object_context *oc)
{
int ret, bracket_depth;
int namelen = strlen(name);
const char *cp;
int only_to_die = flags & GET_OID_ONLY_TO_DIE;
memset(oc, 0, sizeof(*oc));
oc->mode = S_IFINVALID;
strbuf_init(&oc->symlink_path, 0);
ret = get_oid_1(repo, name, namelen, oid, flags);
if (!ret && flags & GET_OID_REQUIRE_PATH)
die(_("<object>:<path> required, only <object> '%s' given"),
name);
if (!ret)
return ret;
/*
* tree:path --> object name of path in tree
* :path -> object name of absolute path in index
* :./path -> object name of path relative to cwd in index
* :[0-3]:path -> object name of path in index at stage
* :/foo -> recent commit matching foo
*/
if (name[0] == ':') {
int stage = 0;
Convert "struct cache_entry *" to "const ..." wherever possible I attempted to make index_state->cache[] a "const struct cache_entry **" to find out how existing entries in index are modified and where. The question I have is what do we do if we really need to keep track of on-disk changes in the index. The result is - diff-lib.c: setting CE_UPTODATE - name-hash.c: setting CE_HASHED - preload-index.c, read-cache.c, unpack-trees.c and builtin/update-index: obvious - entry.c: write_entry() may refresh the checked out entry via fill_stat_cache_info(). This causes "non-const struct cache_entry *" in builtin/apply.c, builtin/checkout-index.c and builtin/checkout.c - builtin/ls-files.c: --with-tree changes stagemask and may set CE_UPDATE Of these, write_entry() and its call sites are probably most interesting because it modifies on-disk info. But this is stat info and can be retrieved via refresh, at least for porcelain commands. Other just uses ce_flags for local purposes. So, keeping track of "dirty" entries is just a matter of setting a flag in index modification functions exposed by read-cache.c. Except unpack-trees, the rest of the code base does not do anything funny behind read-cache's back. The actual patch is less valueable than the summary above. But if anyone wants to re-identify the above sites. Applying this patch, then this: diff --git a/cache.h b/cache.h index 430d021..1692891 100644 --- a/cache.h +++ b/cache.h @@ -267,7 +267,7 @@ static inline unsigned int canon_mode(unsigned int mode) #define cache_entry_size(len) (offsetof(struct cache_entry,name) + (len) + 1) struct index_state { - struct cache_entry **cache; + const struct cache_entry **cache; unsigned int version; unsigned int cache_nr, cache_alloc, cache_changed; struct string_list *resolve_undo; will help quickly identify them without bogus warnings. Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-07-09 23:29:00 +08:00
const struct cache_entry *ce;
char *new_path = NULL;
int pos;
if (!only_to_die && namelen > 2 && name[1] == '/') {
struct handle_one_ref_cb cb;
struct commit_list *list = NULL;
cb.repo = repo;
cb.list = &list;
refs_for_each_ref(get_main_ref_store(repo), handle_one_ref, &cb);
refs_head_ref(get_main_ref_store(repo), handle_one_ref, &cb);
commit_list_sort_by_date(&list);
ret = get_oid_oneline(repo, name + 2, oid, list);
free_commit_list(list);
return ret;
}
if (namelen < 3 ||
name[2] != ':' ||
name[1] < '0' || '3' < name[1])
cp = name + 1;
else {
stage = name[1] - '0';
cp = name + 3;
}
new_path = resolve_relative_path(repo, cp);
if (!new_path) {
namelen = namelen - (cp - name);
} else {
cp = new_path;
namelen = strlen(cp);
}
if (flags & GET_OID_RECORD_PATH)
oc->path = xstrdup(cp);
get_oid: handle NULL repo->index When get_oid() and its helpers see an index name like ":.gitmodules", they try to load the index on demand, like: if (repo->index->cache) repo_read_index(repo); However, that misses the case when "repo->index" itself is NULL; we'll segfault in the conditional. This never happens with the_repository; there we always point its index field to &the_index. But a submodule repository may have a NULL index field until somebody calls repo_read_index(). This bug is triggered by t7411, but it was hard to notice because it's in an expect_failure block. That test was added by 2b1257e463 (t/helper: add test-submodule-nested-repo-config, 2018-10-25). Back then we had no easy way to access the .gitmodules blob of a submodule repo, so we expected (and got) an error message to that effect. Later, d9b8b8f896 (submodule-config.c: use repo_get_oid for reading .gitmodules, 2019-04-16) started looking in the correct repo, which is when we started triggering the segfault. With this fix, the test starts passing (once we clean it up as its comment instructs). Note that as far as I know, this bug could not be triggered outside of the test suite. It requires resolving an index name in a submodule, and all of the code paths (aside from test-tool) which do that either load the index themselves, or always pass the_repository. Ultimately it comes from 3a7a698e93 (sha1-name.c: remove implicit dependency on the_index, 2019-01-12), which replaced a check of "the_index.cache" with "repo->index->cache". So even if there is another way to trigger it, it wouldn't affect any versions before then. Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2019-05-14 21:54:55 +08:00
if (!repo->index || !repo->index->cache)
repo_read_index(repo);
pos = index_name_pos(repo->index, cp, namelen);
if (pos < 0)
pos = -pos - 1;
while (pos < repo->index->cache_nr) {
ce = repo->index->cache[pos];
if (ce_namelen(ce) != namelen ||
memcmp(ce->name, cp, namelen))
break;
if (ce_stage(ce) == stage) {
free(new_path);
if (reject_tree_in_index(repo, only_to_die, ce,
stage, prefix, cp))
return -1;
sha1_name: convert get_sha1* to get_oid* Now that all the callers of get_sha1 directly or indirectly use struct object_id, rename the functions starting with get_sha1 to start with get_oid. Convert the internals in sha1_name.c to use struct object_id as well, and eliminate explicit length checks where possible. Convert a use of 40 in get_oid_basic to GIT_SHA1_HEXSZ. Outside of sha1_name.c and cache.h, this transition was made with the following semantic patch: @@ expression E1, E2; @@ - get_sha1(E1, E2.hash) + get_oid(E1, &E2) @@ expression E1, E2; @@ - get_sha1(E1, E2->hash) + get_oid(E1, E2) @@ expression E1, E2; @@ - get_sha1_committish(E1, E2.hash) + get_oid_committish(E1, &E2) @@ expression E1, E2; @@ - get_sha1_committish(E1, E2->hash) + get_oid_committish(E1, E2) @@ expression E1, E2; @@ - get_sha1_treeish(E1, E2.hash) + get_oid_treeish(E1, &E2) @@ expression E1, E2; @@ - get_sha1_treeish(E1, E2->hash) + get_oid_treeish(E1, E2) @@ expression E1, E2; @@ - get_sha1_commit(E1, E2.hash) + get_oid_commit(E1, &E2) @@ expression E1, E2; @@ - get_sha1_commit(E1, E2->hash) + get_oid_commit(E1, E2) @@ expression E1, E2; @@ - get_sha1_tree(E1, E2.hash) + get_oid_tree(E1, &E2) @@ expression E1, E2; @@ - get_sha1_tree(E1, E2->hash) + get_oid_tree(E1, E2) @@ expression E1, E2; @@ - get_sha1_blob(E1, E2.hash) + get_oid_blob(E1, &E2) @@ expression E1, E2; @@ - get_sha1_blob(E1, E2->hash) + get_oid_blob(E1, E2) @@ expression E1, E2, E3, E4; @@ - get_sha1_with_context(E1, E2, E3.hash, E4) + get_oid_with_context(E1, E2, &E3, E4) @@ expression E1, E2, E3, E4; @@ - get_sha1_with_context(E1, E2, E3->hash, E4) + get_oid_with_context(E1, E2, E3, E4) Signed-off-by: brian m. carlson <sandals@crustytoothpaste.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-07-14 07:49:28 +08:00
oidcpy(oid, &ce->oid);
oc->mode = ce->ce_mode;
return 0;
}
pos++;
}
if (only_to_die && name[1] && name[1] != '/')
diagnose_invalid_index_path(repo, stage, prefix, cp);
free(new_path);
return -1;
}
for (cp = name, bracket_depth = 0; *cp; cp++) {
if (*cp == '{')
bracket_depth++;
else if (bracket_depth && *cp == '}')
bracket_depth--;
else if (!bracket_depth && *cp == ':')
break;
}
if (*cp == ':') {
sha1_name: convert get_sha1* to get_oid* Now that all the callers of get_sha1 directly or indirectly use struct object_id, rename the functions starting with get_sha1 to start with get_oid. Convert the internals in sha1_name.c to use struct object_id as well, and eliminate explicit length checks where possible. Convert a use of 40 in get_oid_basic to GIT_SHA1_HEXSZ. Outside of sha1_name.c and cache.h, this transition was made with the following semantic patch: @@ expression E1, E2; @@ - get_sha1(E1, E2.hash) + get_oid(E1, &E2) @@ expression E1, E2; @@ - get_sha1(E1, E2->hash) + get_oid(E1, E2) @@ expression E1, E2; @@ - get_sha1_committish(E1, E2.hash) + get_oid_committish(E1, &E2) @@ expression E1, E2; @@ - get_sha1_committish(E1, E2->hash) + get_oid_committish(E1, E2) @@ expression E1, E2; @@ - get_sha1_treeish(E1, E2.hash) + get_oid_treeish(E1, &E2) @@ expression E1, E2; @@ - get_sha1_treeish(E1, E2->hash) + get_oid_treeish(E1, E2) @@ expression E1, E2; @@ - get_sha1_commit(E1, E2.hash) + get_oid_commit(E1, &E2) @@ expression E1, E2; @@ - get_sha1_commit(E1, E2->hash) + get_oid_commit(E1, E2) @@ expression E1, E2; @@ - get_sha1_tree(E1, E2.hash) + get_oid_tree(E1, &E2) @@ expression E1, E2; @@ - get_sha1_tree(E1, E2->hash) + get_oid_tree(E1, E2) @@ expression E1, E2; @@ - get_sha1_blob(E1, E2.hash) + get_oid_blob(E1, &E2) @@ expression E1, E2; @@ - get_sha1_blob(E1, E2->hash) + get_oid_blob(E1, E2) @@ expression E1, E2, E3, E4; @@ - get_sha1_with_context(E1, E2, E3.hash, E4) + get_oid_with_context(E1, E2, &E3, E4) @@ expression E1, E2, E3, E4; @@ - get_sha1_with_context(E1, E2, E3->hash, E4) + get_oid_with_context(E1, E2, E3, E4) Signed-off-by: brian m. carlson <sandals@crustytoothpaste.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-07-14 07:49:28 +08:00
struct object_id tree_oid;
int len = cp - name;
unsigned sub_flags = flags;
sub_flags &= ~GET_OID_DISAMBIGUATORS;
sub_flags |= GET_OID_TREEISH;
if (!get_oid_1(repo, name, len, &tree_oid, sub_flags)) {
const char *filename = cp+1;
char *new_filename = NULL;
new_filename = resolve_relative_path(repo, filename);
if (new_filename)
filename = new_filename;
if (flags & GET_OID_FOLLOW_SYMLINKS) {
ret = get_tree_entry_follow_symlinks(repo, &tree_oid,
filename, oid, &oc->symlink_path,
&oc->mode);
} else {
ret = get_tree_entry(repo, &tree_oid, filename, oid,
&oc->mode);
if (ret && only_to_die) {
diagnose_invalid_oid_path(repo, prefix,
filename,
sha1_name: convert get_sha1* to get_oid* Now that all the callers of get_sha1 directly or indirectly use struct object_id, rename the functions starting with get_sha1 to start with get_oid. Convert the internals in sha1_name.c to use struct object_id as well, and eliminate explicit length checks where possible. Convert a use of 40 in get_oid_basic to GIT_SHA1_HEXSZ. Outside of sha1_name.c and cache.h, this transition was made with the following semantic patch: @@ expression E1, E2; @@ - get_sha1(E1, E2.hash) + get_oid(E1, &E2) @@ expression E1, E2; @@ - get_sha1(E1, E2->hash) + get_oid(E1, E2) @@ expression E1, E2; @@ - get_sha1_committish(E1, E2.hash) + get_oid_committish(E1, &E2) @@ expression E1, E2; @@ - get_sha1_committish(E1, E2->hash) + get_oid_committish(E1, E2) @@ expression E1, E2; @@ - get_sha1_treeish(E1, E2.hash) + get_oid_treeish(E1, &E2) @@ expression E1, E2; @@ - get_sha1_treeish(E1, E2->hash) + get_oid_treeish(E1, E2) @@ expression E1, E2; @@ - get_sha1_commit(E1, E2.hash) + get_oid_commit(E1, &E2) @@ expression E1, E2; @@ - get_sha1_commit(E1, E2->hash) + get_oid_commit(E1, E2) @@ expression E1, E2; @@ - get_sha1_tree(E1, E2.hash) + get_oid_tree(E1, &E2) @@ expression E1, E2; @@ - get_sha1_tree(E1, E2->hash) + get_oid_tree(E1, E2) @@ expression E1, E2; @@ - get_sha1_blob(E1, E2.hash) + get_oid_blob(E1, &E2) @@ expression E1, E2; @@ - get_sha1_blob(E1, E2->hash) + get_oid_blob(E1, E2) @@ expression E1, E2, E3, E4; @@ - get_sha1_with_context(E1, E2, E3.hash, E4) + get_oid_with_context(E1, E2, &E3, E4) @@ expression E1, E2, E3, E4; @@ - get_sha1_with_context(E1, E2, E3->hash, E4) + get_oid_with_context(E1, E2, E3, E4) Signed-off-by: brian m. carlson <sandals@crustytoothpaste.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-07-14 07:49:28 +08:00
&tree_oid,
name, len);
}
}
if (flags & GET_OID_RECORD_PATH)
oc->path = xstrdup(filename);
free(new_filename);
return ret;
} else {
if (only_to_die)
die(_("invalid object name '%.*s'."), len, name);
}
}
return ret;
}
/*
* Call this function when you know "name" given by the end user must
* name an object but it doesn't; the function _may_ die with a better
* diagnostic message than "no such object 'name'", e.g. "Path 'doc' does not
* exist in 'HEAD'" when given "HEAD:doc", or it may return in which case
* you have a chance to diagnose the error further.
*/
void maybe_die_on_misspelt_object_name(struct repository *r,
const char *name,
const char *prefix)
{
struct object_context oc;
sha1_name: convert get_sha1* to get_oid* Now that all the callers of get_sha1 directly or indirectly use struct object_id, rename the functions starting with get_sha1 to start with get_oid. Convert the internals in sha1_name.c to use struct object_id as well, and eliminate explicit length checks where possible. Convert a use of 40 in get_oid_basic to GIT_SHA1_HEXSZ. Outside of sha1_name.c and cache.h, this transition was made with the following semantic patch: @@ expression E1, E2; @@ - get_sha1(E1, E2.hash) + get_oid(E1, &E2) @@ expression E1, E2; @@ - get_sha1(E1, E2->hash) + get_oid(E1, E2) @@ expression E1, E2; @@ - get_sha1_committish(E1, E2.hash) + get_oid_committish(E1, &E2) @@ expression E1, E2; @@ - get_sha1_committish(E1, E2->hash) + get_oid_committish(E1, E2) @@ expression E1, E2; @@ - get_sha1_treeish(E1, E2.hash) + get_oid_treeish(E1, &E2) @@ expression E1, E2; @@ - get_sha1_treeish(E1, E2->hash) + get_oid_treeish(E1, E2) @@ expression E1, E2; @@ - get_sha1_commit(E1, E2.hash) + get_oid_commit(E1, &E2) @@ expression E1, E2; @@ - get_sha1_commit(E1, E2->hash) + get_oid_commit(E1, E2) @@ expression E1, E2; @@ - get_sha1_tree(E1, E2.hash) + get_oid_tree(E1, &E2) @@ expression E1, E2; @@ - get_sha1_tree(E1, E2->hash) + get_oid_tree(E1, E2) @@ expression E1, E2; @@ - get_sha1_blob(E1, E2.hash) + get_oid_blob(E1, &E2) @@ expression E1, E2; @@ - get_sha1_blob(E1, E2->hash) + get_oid_blob(E1, E2) @@ expression E1, E2, E3, E4; @@ - get_sha1_with_context(E1, E2, E3.hash, E4) + get_oid_with_context(E1, E2, &E3, E4) @@ expression E1, E2, E3, E4; @@ - get_sha1_with_context(E1, E2, E3->hash, E4) + get_oid_with_context(E1, E2, E3, E4) Signed-off-by: brian m. carlson <sandals@crustytoothpaste.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-07-14 07:49:28 +08:00
struct object_id oid;
get_oid_with_context_1(r, name, GET_OID_ONLY_TO_DIE | GET_OID_QUIETLY,
prefix, &oid, &oc);
object_context_release(&oc);
}
enum get_oid_result get_oid_with_context(struct repository *repo,
const char *str,
unsigned flags,
struct object_id *oid,
struct object_context *oc)
{
if (flags & GET_OID_FOLLOW_SYMLINKS && flags & GET_OID_ONLY_TO_DIE)
BUG("incompatible flags for get_oid_with_context");
return get_oid_with_context_1(repo, str, flags, NULL, oid, oc);
}