git/ref-filter.c

3593 lines
96 KiB
C
Raw Normal View History

global: introduce `USE_THE_REPOSITORY_VARIABLE` macro Use of the `the_repository` variable is deprecated nowadays, and we slowly but steadily convert the codebase to not use it anymore. Instead, callers should be passing down the repository to work on via parameters. It is hard though to prove that a given code unit does not use this variable anymore. The most trivial case, merely demonstrating that there is no direct use of `the_repository`, is already a bit of a pain during code reviews as the reviewer needs to manually verify claims made by the patch author. The bigger problem though is that we have many interfaces that implicitly rely on `the_repository`. Introduce a new `USE_THE_REPOSITORY_VARIABLE` macro that allows code units to opt into usage of `the_repository`. The intent of this macro is to demonstrate that a certain code unit does not use this variable anymore, and to keep it from new dependencies on it in future changes, be it explicit or implicit For now, the macro only guards `the_repository` itself as well as `the_hash_algo`. There are many more known interfaces where we have an implicit dependency on `the_repository`, but those are not guarded at the current point in time. Over time though, we should start to add guards as required (or even better, just remove them). Define the macro as required in our code units. As expected, most of our code still relies on the global variable. Nearly all of our builtins rely on the variable as there is no way yet to pass `the_repository` to their entry point. For now, declare the macro in "biultin.h" to keep the required changes at least a little bit more contained. Signed-off-by: Patrick Steinhardt <ps@pks.im> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2024-06-14 14:50:23 +08:00
#define USE_THE_REPOSITORY_VARIABLE
#include "git-compat-util.h"
#include "environment.h"
#include "gettext.h"
#include "config.h"
#include "gpg-interface.h"
#include "hex.h"
#include "parse-options.h"
#include "run-command.h"
#include "refs.h"
#include "wildmatch.h"
#include "object-name.h"
#include "object-store-ll.h"
#include "oid-array.h"
#include "repository.h"
#include "commit.h"
#include "mailmap.h"
#include "ident.h"
#include "remote.h"
#include "color.h"
#include "tag.h"
#include "quote.h"
#include "ref-filter.h"
#include "revision.h"
2015-09-11 23:03:07 +08:00
#include "utf8.h"
#include "versioncmp.h"
#include "trailer.h"
#include "wt-status.h"
#include "commit-slab.h"
#include "commit-reach.h"
#include "worktree.h"
#include "hashmap.h"
static struct ref_msg {
const char *gone;
const char *ahead;
const char *behind;
const char *ahead_behind;
} msgs = {
/* Untranslated plumbing messages: */
"gone",
"ahead %d",
"behind %d",
"ahead %d, behind %d"
};
void setup_ref_filter_porcelain_msg(void)
{
msgs.gone = _("gone");
msgs.ahead = _("ahead %d");
msgs.behind = _("behind %d");
msgs.ahead_behind = _("ahead %d, behind %d");
}
typedef enum { FIELD_STR, FIELD_ULONG, FIELD_TIME } cmp_type;
typedef enum { COMPARE_EQUAL, COMPARE_UNEQUAL, COMPARE_NONE } cmp_status;
typedef enum { SOURCE_NONE = 0, SOURCE_OBJ, SOURCE_OTHER } info_source;
struct align {
align_type position;
unsigned int width;
};
struct if_then_else {
cmp_status cmp_status;
const char *str;
unsigned int then_atom_seen : 1,
else_atom_seen : 1,
condition_satisfied : 1;
};
struct refname_atom {
enum { R_NORMAL, R_SHORT, R_LSTRIP, R_RSTRIP } option;
int lstrip, rstrip;
};
static struct ref_trailer_buf {
struct string_list filter_list;
struct strbuf sepbuf;
struct strbuf kvsepbuf;
} ref_trailer_buf = {STRING_LIST_INIT_NODUP, STRBUF_INIT, STRBUF_INIT};
static struct expand_data {
struct object_id oid;
enum object_type type;
unsigned long size;
off_t disk_size;
struct object_id delta_base_oid;
void *content;
struct object_info info;
} oi, oi_deref;
struct ref_to_worktree_entry {
struct hashmap_entry ent;
struct worktree *wt; /* key is wt->head_ref */
};
static int ref_to_worktree_map_cmpfnc(const void *lookupdata UNUSED,
const struct hashmap_entry *eptr,
const struct hashmap_entry *kptr,
const void *keydata_aka_refname)
{
const struct ref_to_worktree_entry *e, *k;
e = container_of(eptr, const struct ref_to_worktree_entry, ent);
k = container_of(kptr, const struct ref_to_worktree_entry, ent);
return strcmp(e->wt->head_ref,
keydata_aka_refname ? keydata_aka_refname : k->wt->head_ref);
}
static struct ref_to_worktree_map {
struct hashmap map;
struct worktree **worktrees;
} ref_to_worktree_map;
/*
* The enum atom_type is used as the index of valid_atom array.
* In the atom parsing stage, it will be passed to used_atom.atom_type
* as the identifier of the atom type. We can check the type of used_atom
* entry by `if (used_atom[i].atom_type == ATOM_*)`.
*/
enum atom_type {
ATOM_REFNAME,
ATOM_OBJECTTYPE,
ATOM_OBJECTSIZE,
ATOM_OBJECTNAME,
ATOM_DELTABASE,
ATOM_TREE,
ATOM_PARENT,
ATOM_NUMPARENT,
ATOM_OBJECT,
ATOM_TYPE,
ATOM_TAG,
ATOM_AUTHOR,
ATOM_AUTHORNAME,
ATOM_AUTHOREMAIL,
ATOM_AUTHORDATE,
ATOM_COMMITTER,
ATOM_COMMITTERNAME,
ATOM_COMMITTEREMAIL,
ATOM_COMMITTERDATE,
ATOM_TAGGER,
ATOM_TAGGERNAME,
ATOM_TAGGEREMAIL,
ATOM_TAGGERDATE,
ATOM_CREATOR,
ATOM_CREATORDATE,
ATOM_DESCRIBE,
ATOM_SUBJECT,
ATOM_BODY,
ATOM_TRAILERS,
ATOM_CONTENTS,
ATOM_SIGNATURE,
ref-filter: add %(raw) atom Add new formatting option `%(raw)`, which will print the raw object data without any changes. It will help further to migrate all cat-file formatting logic from cat-file to ref-filter. The raw data of blob, tree objects may contain '\0', but most of the logic in `ref-filter` depends on the output of the atom being text (specifically, no embedded NULs in it). E.g. `quote_formatting()` use `strbuf_addstr()` or `*._quote_buf()` add the data to the buffer. The raw data of a tree object is `100644 one\0...`, only the `100644 one` will be added to the buffer, which is incorrect. Therefore, we need to find a way to record the length of the atom_value's member `s`. Although strbuf can already record the string and its length, if we want to replace the type of atom_value's member `s` with strbuf, many places in ref-filter that are filled with dynamically allocated mermory in `v->s` are not easy to replace. At the same time, we need to check if `v->s == NULL` in populate_value(), and strbuf cannot easily distinguish NULL and empty strings, but c-style "const char *" can do it. So add a new member in `struct atom_value`: `s_size`, which can record raw object size, it can help us add raw object data to the buffer or compare two buffers which contain raw object data. Note that `--format=%(raw)` cannot be used with `--python`, `--shell`, `--tcl`, and `--perl` because if the binary raw data is passed to a variable in such languages, these may not support arbitrary binary data in their string variable type. Reviewed-by: Jacob Keller <jacob.keller@gmail.com> Mentored-by: Christian Couder <christian.couder@gmail.com> Mentored-by: Hariom Verma <hariom18599@gmail.com> Helped-by: Bagas Sanjaya <bagasdotme@gmail.com> Helped-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com> Helped-by: Felipe Contreras <felipe.contreras@gmail.com> Helped-by: Phillip Wood <phillip.wood@dunelm.org.uk> Helped-by: Junio C Hamano <gitster@pobox.com> Based-on-patch-by: Olga Telezhnaya <olyatelezhnaya@gmail.com> Signed-off-by: ZheNing Hu <adlternative@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2021-07-26 11:26:47 +08:00
ATOM_RAW,
ATOM_UPSTREAM,
ATOM_PUSH,
ATOM_SYMREF,
ATOM_FLAG,
ATOM_HEAD,
ATOM_COLOR,
ATOM_WORKTREEPATH,
ATOM_ALIGN,
ATOM_END,
ATOM_IF,
ATOM_THEN,
ATOM_ELSE,
ATOM_REST,
for-each-ref: add ahead-behind format atom The previous change implemented the ahead_behind() method, including an algorithm to compute the ahead/behind values for a number of commit tips relative to a number of commit bases. Now, integrate that algorithm as part of 'git for-each-ref' hidden behind a new format atom, ahead-behind. This naturally extends to 'git branch' and 'git tag' builtins, as well. This format allows specifying multiple bases, if so desired, and all matching references are compared against all of those bases. For this reason, failing to read a reference provided from these atoms results in an error. In order to translate the ahead_behind() method information to the format output code in ref-filter.c, we must populate arrays of ahead_behind_count structs. In struct ref_array, we store the full array that will be passed to ahead_behind(). In struct ref_array_item, we store an array of pointers that point to the relvant items within the full array. In this way, we can pull all relevant ahead/behind values directly when formatting output for a specific item. It also ensures the lifetime of the ahead_behind_count structs matches the time that the array is being used. Add specific tests of the ahead/behind counts in t6600-test-reach.sh, as it has an interesting repository shape. In particular, its merging strategy and its use of different commit-graphs would demonstrate over- counting if the ahead_behind() method did not already account for that possibility. Also add tests for the specific for-each-ref, branch, and tag builtins. In the case of 'git tag', there are intersting cases that happen when some of the selected tips are not commits. This requires careful logic around commits_nr in the second loop of filter_ahead_behind(). Also, the test in t7004 is carefully located to avoid being dependent on the GPG prereq. It also avoids using the test_commit helper, as that will add ticks to the time and disrupt the expected timestamps in later tag tests. Also add performance tests in a new p1300-graph-walks.sh script. This will be useful for more uses in the future, but for now compare the ahead-behind counting algorithm in 'git for-each-ref' to the naive implementation by running 'git rev-list --count' processes for each input. For the Git source code repository, the improvement is already obvious: Test this tree --------------------------------------------------------------- 1500.2: ahead-behind counts: git for-each-ref 0.07(0.07+0.00) 1500.3: ahead-behind counts: git branch 0.07(0.06+0.00) 1500.4: ahead-behind counts: git tag 0.07(0.06+0.00) 1500.5: ahead-behind counts: git rev-list 1.32(1.04+0.27) But the standard performance benchmark is the Linux kernel repository, which demosntrates a significant improvement: Test this tree --------------------------------------------------------------- 1500.2: ahead-behind counts: git for-each-ref 0.27(0.24+0.02) 1500.3: ahead-behind counts: git branch 0.27(0.24+0.03) 1500.4: ahead-behind counts: git tag 0.28(0.27+0.01) 1500.5: ahead-behind counts: git rev-list 4.57(4.03+0.54) The 'git rev-list' test exists in this change as a demonstration, but it will be removed in the next change to avoid wasting time on this comparison. Signed-off-by: Derrick Stolee <derrickstolee@github.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2023-03-20 19:26:54 +08:00
ATOM_AHEADBEHIND,
for-each-ref: add 'is-base' token The previous change introduced the get_branch_base_for_tip() method in commit-reach.c. The motivation of that change was about using a heuristic to deteremine the base branch for a source commit from a list of candidate commit tips. This change makes that algorithm visible to users via a new atom in the 'git for-each-ref' format. This change is very similar to the chang in 49abcd21da6 (for-each-ref: add ahead-behind format atom, 2023-03-20). Introduce the 'is-base:<source>' atom, which will indicate that the algorithm should be computed and the result of the algorithm is reported using an indicator of the form '(<source>)'. For example, using '%(is-base:HEAD)' would result in one line having the token '(HEAD)'. Use the sorted order of refs included in the ref filter to break ties in the algorithm's heuristic. In the previous change, the motivating examples include using an L0 trunk, long-lived L1 branches, and temporary release branches. A caller could communicate the ordered preference among these categories using the input refpecs and avoiding a different sort mechanism. This sorting behavior is tested in the test scripts. It is important to include this atom as a special case to can_do_iterative_format() to match the expectations created in bd98f9774e1 (ref-filter.c: filter & format refs in the same callback, 2023-11-14). The ahead-behind atom was one of the special cases, and this similarly requires using an algorithm across all input refs before starting the format of any single ref. In the test script, the format tokens use colons or lack whitespace to avoid Git complaining about trailing whitespace errors. Signed-off-by: Derrick Stolee <stolee@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2024-08-14 18:31:29 +08:00
ATOM_ISBASE,
};
/*
* An atom is a valid field atom listed below, possibly prefixed with
* a "*" to denote deref_tag().
*
* We parse given format string and sort specifiers, and make a list
* of properties that we need to extract out of objects. ref_array_item
* structure will hold an array of values extracted that can be
* indexed with the "atom number", which is an index into this
* array.
*/
static struct used_atom {
enum atom_type atom_type;
const char *name;
cmp_type type;
info_source source;
union {
char color[COLOR_MAXLEN];
struct align align;
struct {
for-each-ref: let upstream/push optionally report the remote name There are times when e.g. scripts want to know not only the name of the upstream branch on the remote repository, but also the name of the remote. This patch offers the new suffix :remotename for the upstream and for the push atoms, allowing to show exactly that. Example: $ cat .git/config ... [remote "origin"] url = https://where.do.we.come/from fetch = refs/heads/*:refs/remote/origin/* [remote "hello-world"] url = https://hello.world/git fetch = refs/heads/*:refs/remote/origin/* pushURL = hello.world:git push = refs/heads/*:refs/heads/* [branch "master"] remote = origin pushRemote = hello-world ... $ git for-each-ref \ --format='%(upstream) %(upstream:remotename) %(push:remotename)' \ refs/heads/master refs/remotes/origin/master origin hello-world The implementation chooses *not* to DWIM the push remote if no explicit push remote was configured; The reason is that it is possible to DWIM this by using %(if)%(push:remotename)%(then) %(push:remotename) %(else) %(upstream:remotename) %(end) while it would be impossible to "un-DWIM" the information in case the caller is really only interested in explicit push remotes. While `:remote` would be shorter, it would also be a bit more ambiguous, and it would also shut the door e.g. for `:remoteref` (which would obviously refer to the corresponding ref in the remote repository). Note: the dashless, non-CamelCased form `:remotename` follows the example of the `:trackshort` example. Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-10-05 20:19:09 +08:00
enum {
RR_REF, RR_TRACK, RR_TRACKSHORT, RR_REMOTE_NAME, RR_REMOTE_REF
for-each-ref: let upstream/push optionally report the remote name There are times when e.g. scripts want to know not only the name of the upstream branch on the remote repository, but also the name of the remote. This patch offers the new suffix :remotename for the upstream and for the push atoms, allowing to show exactly that. Example: $ cat .git/config ... [remote "origin"] url = https://where.do.we.come/from fetch = refs/heads/*:refs/remote/origin/* [remote "hello-world"] url = https://hello.world/git fetch = refs/heads/*:refs/remote/origin/* pushURL = hello.world:git push = refs/heads/*:refs/heads/* [branch "master"] remote = origin pushRemote = hello-world ... $ git for-each-ref \ --format='%(upstream) %(upstream:remotename) %(push:remotename)' \ refs/heads/master refs/remotes/origin/master origin hello-world The implementation chooses *not* to DWIM the push remote if no explicit push remote was configured; The reason is that it is possible to DWIM this by using %(if)%(push:remotename)%(then) %(push:remotename) %(else) %(upstream:remotename) %(end) while it would be impossible to "un-DWIM" the information in case the caller is really only interested in explicit push remotes. While `:remote` would be shorter, it would also be a bit more ambiguous, and it would also shut the door e.g. for `:remoteref` (which would obviously refer to the corresponding ref in the remote repository). Note: the dashless, non-CamelCased form `:remotename` follows the example of the `:trackshort` example. Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-10-05 20:19:09 +08:00
} option;
struct refname_atom refname;
for-each-ref: let upstream/push optionally report the remote name There are times when e.g. scripts want to know not only the name of the upstream branch on the remote repository, but also the name of the remote. This patch offers the new suffix :remotename for the upstream and for the push atoms, allowing to show exactly that. Example: $ cat .git/config ... [remote "origin"] url = https://where.do.we.come/from fetch = refs/heads/*:refs/remote/origin/* [remote "hello-world"] url = https://hello.world/git fetch = refs/heads/*:refs/remote/origin/* pushURL = hello.world:git push = refs/heads/*:refs/heads/* [branch "master"] remote = origin pushRemote = hello-world ... $ git for-each-ref \ --format='%(upstream) %(upstream:remotename) %(push:remotename)' \ refs/heads/master refs/remotes/origin/master origin hello-world The implementation chooses *not* to DWIM the push remote if no explicit push remote was configured; The reason is that it is possible to DWIM this by using %(if)%(push:remotename)%(then) %(push:remotename) %(else) %(upstream:remotename) %(end) while it would be impossible to "un-DWIM" the information in case the caller is really only interested in explicit push remotes. While `:remote` would be shorter, it would also be a bit more ambiguous, and it would also shut the door e.g. for `:remoteref` (which would obviously refer to the corresponding ref in the remote repository). Note: the dashless, non-CamelCased form `:remotename` follows the example of the `:trackshort` example. Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-10-05 20:19:09 +08:00
unsigned int nobracket : 1, push : 1, push_remote : 1;
} remote_ref;
struct {
enum { C_BARE, C_BODY, C_BODY_DEP, C_LENGTH, C_LINES,
C_SIG, C_SUB, C_SUB_SANITIZE, C_TRAILERS } option;
struct process_trailer_options trailer_opts;
unsigned int nlines;
} contents;
ref-filter: add %(raw) atom Add new formatting option `%(raw)`, which will print the raw object data without any changes. It will help further to migrate all cat-file formatting logic from cat-file to ref-filter. The raw data of blob, tree objects may contain '\0', but most of the logic in `ref-filter` depends on the output of the atom being text (specifically, no embedded NULs in it). E.g. `quote_formatting()` use `strbuf_addstr()` or `*._quote_buf()` add the data to the buffer. The raw data of a tree object is `100644 one\0...`, only the `100644 one` will be added to the buffer, which is incorrect. Therefore, we need to find a way to record the length of the atom_value's member `s`. Although strbuf can already record the string and its length, if we want to replace the type of atom_value's member `s` with strbuf, many places in ref-filter that are filled with dynamically allocated mermory in `v->s` are not easy to replace. At the same time, we need to check if `v->s == NULL` in populate_value(), and strbuf cannot easily distinguish NULL and empty strings, but c-style "const char *" can do it. So add a new member in `struct atom_value`: `s_size`, which can record raw object size, it can help us add raw object data to the buffer or compare two buffers which contain raw object data. Note that `--format=%(raw)` cannot be used with `--python`, `--shell`, `--tcl`, and `--perl` because if the binary raw data is passed to a variable in such languages, these may not support arbitrary binary data in their string variable type. Reviewed-by: Jacob Keller <jacob.keller@gmail.com> Mentored-by: Christian Couder <christian.couder@gmail.com> Mentored-by: Hariom Verma <hariom18599@gmail.com> Helped-by: Bagas Sanjaya <bagasdotme@gmail.com> Helped-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com> Helped-by: Felipe Contreras <felipe.contreras@gmail.com> Helped-by: Phillip Wood <phillip.wood@dunelm.org.uk> Helped-by: Junio C Hamano <gitster@pobox.com> Based-on-patch-by: Olga Telezhnaya <olyatelezhnaya@gmail.com> Signed-off-by: ZheNing Hu <adlternative@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2021-07-26 11:26:47 +08:00
struct {
enum { RAW_BARE, RAW_LENGTH } option;
} raw_data;
struct {
cmp_status cmp_status;
const char *str;
} if_then_else;
struct {
enum { O_FULL, O_LENGTH, O_SHORT } option;
unsigned int length;
} oid;
struct {
enum { O_SIZE, O_SIZE_DISK } option;
} objectsize;
struct {
enum { N_RAW, N_MAILMAP } option;
} name_option;
struct {
enum {
EO_RAW = 0,
EO_TRIM = 1<<0,
EO_LOCALPART = 1<<1,
EO_MAILMAP = 1<<2,
} option;
} email_option;
struct {
enum { S_BARE, S_GRADE, S_SIGNER, S_KEY,
S_FINGERPRINT, S_PRI_KEY_FP, S_TRUST_LEVEL } option;
} signature;
const char **describe_args;
struct refname_atom refname;
char *head;
} u;
} *used_atom;
static int used_atom_cnt, need_tagged, need_symref;
/*
* Expand string, append it to strbuf *sb, then return error code ret.
* Allow to save few lines of code.
*/
__attribute__((format (printf, 3, 4)))
static int strbuf_addf_ret(struct strbuf *sb, int ret, const char *fmt, ...)
{
va_list ap;
va_start(ap, fmt);
strbuf_vaddf(sb, fmt, ap);
va_end(ap);
return ret;
}
static int err_no_arg(struct strbuf *sb, const char *name)
{
ref-filter: truncate atom names in error messages If you pass a bogus argument to %(refname), you may end up with a message like this: $ git for-each-ref --format='%(refname:foo)' fatal: unrecognized %(refname:foo) argument: foo which is confusing. It should just say: fatal: unrecognized %(refname) argument: foo which is clearer, and is consistent with most other atom parsers. Those other parsers do not have the same problem because they pass the atom name from a string literal in the parser function. But because the parser for %(refname) also handles %(upstream) and %(push), it instead uses atom->name, which includes the arguments. The oid atom parser which handles %(tree), %(parent), etc suffers from the same problem. It seems like the cleanest fix would be for atom->name to be _just_ the name, since there's already a separate "args" field. But since that field is also used for other things, we can't change it easily (e.g., it's how we find things in the used_atoms array, and clearly %(refname) and %(refname:short) are not the same thing). Instead, we'll teach our error_bad_arg() function to stop at the first ":". This is a little hacky, as we're effectively re-parsing the name, but the format is simple enough to do this as a one-liner, and this localizes the change to the error-reporting code. We'll give the same treatment to err_no_arg(). None of its callers use this atom->name trick, but it's worth future-proofing it while we're here. Signed-off-by: Jeff King <peff@peff.net> Acked-by: Taylor Blau <me@ttaylorr.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2022-12-15 00:23:53 +08:00
size_t namelen = strchrnul(name, ':') - name;
strbuf_addf(sb, _("%%(%.*s) does not take arguments"),
(int)namelen, name);
return -1;
}
static int err_bad_arg(struct strbuf *sb, const char *name, const char *arg)
{
ref-filter: truncate atom names in error messages If you pass a bogus argument to %(refname), you may end up with a message like this: $ git for-each-ref --format='%(refname:foo)' fatal: unrecognized %(refname:foo) argument: foo which is confusing. It should just say: fatal: unrecognized %(refname) argument: foo which is clearer, and is consistent with most other atom parsers. Those other parsers do not have the same problem because they pass the atom name from a string literal in the parser function. But because the parser for %(refname) also handles %(upstream) and %(push), it instead uses atom->name, which includes the arguments. The oid atom parser which handles %(tree), %(parent), etc suffers from the same problem. It seems like the cleanest fix would be for atom->name to be _just_ the name, since there's already a separate "args" field. But since that field is also used for other things, we can't change it easily (e.g., it's how we find things in the used_atoms array, and clearly %(refname) and %(refname:short) are not the same thing). Instead, we'll teach our error_bad_arg() function to stop at the first ":". This is a little hacky, as we're effectively re-parsing the name, but the format is simple enough to do this as a one-liner, and this localizes the change to the error-reporting code. We'll give the same treatment to err_no_arg(). None of its callers use this atom->name trick, but it's worth future-proofing it while we're here. Signed-off-by: Jeff King <peff@peff.net> Acked-by: Taylor Blau <me@ttaylorr.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2022-12-15 00:23:53 +08:00
size_t namelen = strchrnul(name, ':') - name;
strbuf_addf(sb, _("unrecognized %%(%.*s) argument: %s"),
(int)namelen, name, arg);
return -1;
}
/*
* Parse option of name "candidate" in the option string "to_parse" of
* the form
*
* "candidate1[=val1],candidate2[=val2],candidate3[=val3],..."
*
* The remaining part of "to_parse" is stored in "end" (if we are
* parsing the last candidate, then this is NULL) and the value of
* the candidate is stored in "valuestart" and its length in "valuelen",
* that is the portion after "=". Since it is possible for a "candidate"
* to not have a value, in such cases, "valuestart" is set to point to
* NULL and "valuelen" to 0.
*
* The function returns 1 on success. It returns 0 if we don't find
* "candidate" in "to_parse" or we find "candidate" but it is followed
* by more chars (for example, "candidatefoo"), that is, we don't find
* an exact match.
*
* This function only does the above for one "candidate" at a time. So
* it has to be called each time trying to parse a "candidate" in the
* option string "to_parse".
*/
static int match_atom_arg_value(const char *to_parse, const char *candidate,
const char **end, const char **valuestart,
size_t *valuelen)
{
const char *atom;
if (!skip_prefix(to_parse, candidate, &atom))
return 0; /* definitely not "candidate" */
if (*atom == '=') {
/* we just saw "candidate=" */
*valuestart = atom + 1;
atom = strchrnul(*valuestart, ',');
*valuelen = atom - *valuestart;
} else if (*atom != ',' && *atom != '\0') {
/* key begins with "candidate" but has more chars */
return 0;
} else {
/* just "candidate" without "=val" */
*valuestart = NULL;
*valuelen = 0;
}
/* atom points at either the ',' or NUL after this key[=val] */
if (*atom == ',')
atom++;
else if (*atom)
BUG("Why is *atom not NULL yet?");
*end = atom;
return 1;
}
/*
* Parse boolean option of name "candidate" in the option list "to_parse"
* of the form
*
* "candidate1[=bool1],candidate2[=bool2],candidate3[=bool3],..."
*
* The remaining part of "to_parse" is stored in "end" (if we are parsing
* the last candidate, then this is NULL) and the value (if given) is
* parsed and stored in "val", so "val" always points to either 0 or 1.
* If the value is not given, then "val" is set to point to 1.
*
* The boolean value is parsed using "git_parse_maybe_bool()", so the
* accepted values are
*
* to set true - "1", "yes", "true"
* to set false - "0", "no", "false"
*
* This function returns 1 on success. It returns 0 when we don't find
* an exact match for "candidate" or when the boolean value given is
* not valid.
*/
static int match_atom_bool_arg(const char *to_parse, const char *candidate,
const char **end, int *val)
{
const char *argval;
char *strval;
size_t arglen;
int v;
if (!match_atom_arg_value(to_parse, candidate, end, &argval, &arglen))
return 0;
if (!argval) {
*val = 1;
return 1;
}
strval = xstrndup(argval, arglen);
v = git_parse_maybe_bool(strval);
free(strval);
if (v == -1)
return 0;
*val = v;
return 1;
}
static int color_atom_parser(struct ref_format *format, struct used_atom *atom,
const char *color_value, struct strbuf *err)
{
if (!color_value)
return strbuf_addf_ret(err, -1, _("expected format: %%(color:<color>)"));
if (color_parse(color_value, atom->u.color) < 0)
return strbuf_addf_ret(err, -1, _("unrecognized color: %%(color:%s)"),
color_value);
/*
* We check this after we've parsed the color, which lets us complain
* about syntactically bogus color names even if they won't be used.
*/
if (!want_color(format->use_color))
color_parse("", atom->u.color);
return 0;
}
static int refname_atom_parser_internal(struct refname_atom *atom, const char *arg,
const char *name, struct strbuf *err)
{
if (!arg)
atom->option = R_NORMAL;
else if (!strcmp(arg, "short"))
atom->option = R_SHORT;
else if (skip_prefix(arg, "lstrip=", &arg) ||
skip_prefix(arg, "strip=", &arg)) {
atom->option = R_LSTRIP;
if (strtol_i(arg, 10, &atom->lstrip))
return strbuf_addf_ret(err, -1, _("Integer value expected refname:lstrip=%s"), arg);
} else if (skip_prefix(arg, "rstrip=", &arg)) {
atom->option = R_RSTRIP;
if (strtol_i(arg, 10, &atom->rstrip))
return strbuf_addf_ret(err, -1, _("Integer value expected refname:rstrip=%s"), arg);
} else
ref-filter: truncate atom names in error messages If you pass a bogus argument to %(refname), you may end up with a message like this: $ git for-each-ref --format='%(refname:foo)' fatal: unrecognized %(refname:foo) argument: foo which is confusing. It should just say: fatal: unrecognized %(refname) argument: foo which is clearer, and is consistent with most other atom parsers. Those other parsers do not have the same problem because they pass the atom name from a string literal in the parser function. But because the parser for %(refname) also handles %(upstream) and %(push), it instead uses atom->name, which includes the arguments. The oid atom parser which handles %(tree), %(parent), etc suffers from the same problem. It seems like the cleanest fix would be for atom->name to be _just_ the name, since there's already a separate "args" field. But since that field is also used for other things, we can't change it easily (e.g., it's how we find things in the used_atoms array, and clearly %(refname) and %(refname:short) are not the same thing). Instead, we'll teach our error_bad_arg() function to stop at the first ":". This is a little hacky, as we're effectively re-parsing the name, but the format is simple enough to do this as a one-liner, and this localizes the change to the error-reporting code. We'll give the same treatment to err_no_arg(). None of its callers use this atom->name trick, but it's worth future-proofing it while we're here. Signed-off-by: Jeff King <peff@peff.net> Acked-by: Taylor Blau <me@ttaylorr.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2022-12-15 00:23:53 +08:00
return err_bad_arg(err, name, arg);
return 0;
}
static int remote_ref_atom_parser(struct ref_format *format UNUSED,
struct used_atom *atom,
const char *arg, struct strbuf *err)
{
struct string_list params = STRING_LIST_INIT_DUP;
int i;
for-each-ref: let upstream/push optionally report the remote name There are times when e.g. scripts want to know not only the name of the upstream branch on the remote repository, but also the name of the remote. This patch offers the new suffix :remotename for the upstream and for the push atoms, allowing to show exactly that. Example: $ cat .git/config ... [remote "origin"] url = https://where.do.we.come/from fetch = refs/heads/*:refs/remote/origin/* [remote "hello-world"] url = https://hello.world/git fetch = refs/heads/*:refs/remote/origin/* pushURL = hello.world:git push = refs/heads/*:refs/heads/* [branch "master"] remote = origin pushRemote = hello-world ... $ git for-each-ref \ --format='%(upstream) %(upstream:remotename) %(push:remotename)' \ refs/heads/master refs/remotes/origin/master origin hello-world The implementation chooses *not* to DWIM the push remote if no explicit push remote was configured; The reason is that it is possible to DWIM this by using %(if)%(push:remotename)%(then) %(push:remotename) %(else) %(upstream:remotename) %(end) while it would be impossible to "un-DWIM" the information in case the caller is really only interested in explicit push remotes. While `:remote` would be shorter, it would also be a bit more ambiguous, and it would also shut the door e.g. for `:remoteref` (which would obviously refer to the corresponding ref in the remote repository). Note: the dashless, non-CamelCased form `:remotename` follows the example of the `:trackshort` example. Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-10-05 20:19:09 +08:00
if (!strcmp(atom->name, "push") || starts_with(atom->name, "push:"))
atom->u.remote_ref.push = 1;
if (!arg) {
atom->u.remote_ref.option = RR_REF;
return refname_atom_parser_internal(&atom->u.remote_ref.refname,
arg, atom->name, err);
}
atom->u.remote_ref.nobracket = 0;
string_list_split(&params, arg, ',', -1);
for (i = 0; i < params.nr; i++) {
const char *s = params.items[i].string;
if (!strcmp(s, "track"))
atom->u.remote_ref.option = RR_TRACK;
else if (!strcmp(s, "trackshort"))
atom->u.remote_ref.option = RR_TRACKSHORT;
else if (!strcmp(s, "nobracket"))
atom->u.remote_ref.nobracket = 1;
for-each-ref: let upstream/push optionally report the remote name There are times when e.g. scripts want to know not only the name of the upstream branch on the remote repository, but also the name of the remote. This patch offers the new suffix :remotename for the upstream and for the push atoms, allowing to show exactly that. Example: $ cat .git/config ... [remote "origin"] url = https://where.do.we.come/from fetch = refs/heads/*:refs/remote/origin/* [remote "hello-world"] url = https://hello.world/git fetch = refs/heads/*:refs/remote/origin/* pushURL = hello.world:git push = refs/heads/*:refs/heads/* [branch "master"] remote = origin pushRemote = hello-world ... $ git for-each-ref \ --format='%(upstream) %(upstream:remotename) %(push:remotename)' \ refs/heads/master refs/remotes/origin/master origin hello-world The implementation chooses *not* to DWIM the push remote if no explicit push remote was configured; The reason is that it is possible to DWIM this by using %(if)%(push:remotename)%(then) %(push:remotename) %(else) %(upstream:remotename) %(end) while it would be impossible to "un-DWIM" the information in case the caller is really only interested in explicit push remotes. While `:remote` would be shorter, it would also be a bit more ambiguous, and it would also shut the door e.g. for `:remoteref` (which would obviously refer to the corresponding ref in the remote repository). Note: the dashless, non-CamelCased form `:remotename` follows the example of the `:trackshort` example. Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-10-05 20:19:09 +08:00
else if (!strcmp(s, "remotename")) {
atom->u.remote_ref.option = RR_REMOTE_NAME;
atom->u.remote_ref.push_remote = 1;
} else if (!strcmp(s, "remoteref")) {
atom->u.remote_ref.option = RR_REMOTE_REF;
atom->u.remote_ref.push_remote = 1;
for-each-ref: let upstream/push optionally report the remote name There are times when e.g. scripts want to know not only the name of the upstream branch on the remote repository, but also the name of the remote. This patch offers the new suffix :remotename for the upstream and for the push atoms, allowing to show exactly that. Example: $ cat .git/config ... [remote "origin"] url = https://where.do.we.come/from fetch = refs/heads/*:refs/remote/origin/* [remote "hello-world"] url = https://hello.world/git fetch = refs/heads/*:refs/remote/origin/* pushURL = hello.world:git push = refs/heads/*:refs/heads/* [branch "master"] remote = origin pushRemote = hello-world ... $ git for-each-ref \ --format='%(upstream) %(upstream:remotename) %(push:remotename)' \ refs/heads/master refs/remotes/origin/master origin hello-world The implementation chooses *not* to DWIM the push remote if no explicit push remote was configured; The reason is that it is possible to DWIM this by using %(if)%(push:remotename)%(then) %(push:remotename) %(else) %(upstream:remotename) %(end) while it would be impossible to "un-DWIM" the information in case the caller is really only interested in explicit push remotes. While `:remote` would be shorter, it would also be a bit more ambiguous, and it would also shut the door e.g. for `:remoteref` (which would obviously refer to the corresponding ref in the remote repository). Note: the dashless, non-CamelCased form `:remotename` follows the example of the `:trackshort` example. Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-10-05 20:19:09 +08:00
} else {
atom->u.remote_ref.option = RR_REF;
if (refname_atom_parser_internal(&atom->u.remote_ref.refname,
arg, atom->name, err)) {
string_list_clear(&params, 0);
return -1;
}
}
}
string_list_clear(&params, 0);
return 0;
}
static int objecttype_atom_parser(struct ref_format *format UNUSED,
struct used_atom *atom,
const char *arg, struct strbuf *err)
{
if (arg)
return err_no_arg(err, "objecttype");
if (*atom->name == '*')
oi_deref.info.typep = &oi_deref.type;
else
oi.info.typep = &oi.type;
return 0;
}
static int objectsize_atom_parser(struct ref_format *format UNUSED,
struct used_atom *atom,
const char *arg, struct strbuf *err)
{
if (!arg) {
atom->u.objectsize.option = O_SIZE;
if (*atom->name == '*')
oi_deref.info.sizep = &oi_deref.size;
else
oi.info.sizep = &oi.size;
} else if (!strcmp(arg, "disk")) {
atom->u.objectsize.option = O_SIZE_DISK;
if (*atom->name == '*')
oi_deref.info.disk_sizep = &oi_deref.disk_size;
else
oi.info.disk_sizep = &oi.disk_size;
} else
return err_bad_arg(err, "objectsize", arg);
return 0;
}
static int deltabase_atom_parser(struct ref_format *format UNUSED,
struct used_atom *atom,
const char *arg, struct strbuf *err)
{
if (arg)
return err_no_arg(err, "deltabase");
if (*atom->name == '*')
oi_deref.info.delta_base_oid = &oi_deref.delta_base_oid;
else
oi.info.delta_base_oid = &oi.delta_base_oid;
return 0;
}
static int body_atom_parser(struct ref_format *format UNUSED,
struct used_atom *atom,
const char *arg, struct strbuf *err)
{
if (arg)
return err_no_arg(err, "body");
atom->u.contents.option = C_BODY_DEP;
return 0;
}
static int subject_atom_parser(struct ref_format *format UNUSED,
struct used_atom *atom,
const char *arg, struct strbuf *err)
{
if (!arg)
atom->u.contents.option = C_SUB;
else if (!strcmp(arg, "sanitize"))
atom->u.contents.option = C_SUB_SANITIZE;
else
return err_bad_arg(err, "subject", arg);
return 0;
}
static int parse_signature_option(const char *arg)
{
if (!arg)
return S_BARE;
else if (!strcmp(arg, "signer"))
return S_SIGNER;
else if (!strcmp(arg, "grade"))
return S_GRADE;
else if (!strcmp(arg, "key"))
return S_KEY;
else if (!strcmp(arg, "fingerprint"))
return S_FINGERPRINT;
else if (!strcmp(arg, "primarykeyfingerprint"))
return S_PRI_KEY_FP;
else if (!strcmp(arg, "trustlevel"))
return S_TRUST_LEVEL;
return -1;
}
static int signature_atom_parser(struct ref_format *format UNUSED,
struct used_atom *atom,
const char *arg, struct strbuf *err)
{
int opt = parse_signature_option(arg);
if (opt < 0)
return err_bad_arg(err, "signature", arg);
atom->u.signature.option = opt;
return 0;
}
static int trailers_atom_parser(struct ref_format *format UNUSED,
struct used_atom *atom,
const char *arg, struct strbuf *err)
{
atom->u.contents.trailer_opts.no_divider = 1;
if (arg) {
const char *argbuf = xstrfmt("%s)", arg);
char *invalid_arg = NULL;
if (format_set_trailers_options(&atom->u.contents.trailer_opts,
&ref_trailer_buf.filter_list,
&ref_trailer_buf.sepbuf,
&ref_trailer_buf.kvsepbuf,
&argbuf, &invalid_arg)) {
if (!invalid_arg)
strbuf_addf(err, _("expected %%(trailers:key=<value>)"));
else
strbuf_addf(err, _("unknown %%(trailers) argument: %s"), invalid_arg);
free((char *)invalid_arg);
return -1;
}
}
atom->u.contents.option = C_TRAILERS;
return 0;
}
static int contents_atom_parser(struct ref_format *format, struct used_atom *atom,
const char *arg, struct strbuf *err)
{
if (!arg)
atom->u.contents.option = C_BARE;
else if (!strcmp(arg, "body"))
atom->u.contents.option = C_BODY;
else if (!strcmp(arg, "size")) {
atom->type = FIELD_ULONG;
atom->u.contents.option = C_LENGTH;
} else if (!strcmp(arg, "signature"))
atom->u.contents.option = C_SIG;
else if (!strcmp(arg, "subject"))
atom->u.contents.option = C_SUB;
else if (!strcmp(arg, "trailers")) {
if (trailers_atom_parser(format, atom, NULL, err))
return -1;
} else if (skip_prefix(arg, "trailers:", &arg)) {
if (trailers_atom_parser(format, atom, arg, err))
return -1;
} else if (skip_prefix(arg, "lines=", &arg)) {
atom->u.contents.option = C_LINES;
if (strtoul_ui(arg, 10, &atom->u.contents.nlines))
return strbuf_addf_ret(err, -1, _("positive value expected contents:lines=%s"), arg);
} else
return err_bad_arg(err, "contents", arg);
return 0;
}
static int describe_atom_option_parser(struct strvec *args, const char **arg,
struct strbuf *err)
{
const char *argval;
size_t arglen = 0;
int optval = 0;
if (match_atom_bool_arg(*arg, "tags", arg, &optval)) {
if (!optval)
strvec_push(args, "--no-tags");
else
strvec_push(args, "--tags");
return 1;
}
if (match_atom_arg_value(*arg, "abbrev", arg, &argval, &arglen)) {
char *endptr;
if (!arglen)
return strbuf_addf_ret(err, -1,
_("argument expected for %s"),
"describe:abbrev");
if (strtol(argval, &endptr, 10) < 0)
return strbuf_addf_ret(err, -1,
_("positive value expected %s=%s"),
"describe:abbrev", argval);
if (endptr - argval != arglen)
return strbuf_addf_ret(err, -1,
_("cannot fully parse %s=%s"),
"describe:abbrev", argval);
strvec_pushf(args, "--abbrev=%.*s", (int)arglen, argval);
return 1;
}
if (match_atom_arg_value(*arg, "match", arg, &argval, &arglen)) {
if (!arglen)
return strbuf_addf_ret(err, -1,
_("value expected %s="),
"describe:match");
strvec_pushf(args, "--match=%.*s", (int)arglen, argval);
return 1;
}
if (match_atom_arg_value(*arg, "exclude", arg, &argval, &arglen)) {
if (!arglen)
return strbuf_addf_ret(err, -1,
_("value expected %s="),
"describe:exclude");
strvec_pushf(args, "--exclude=%.*s", (int)arglen, argval);
return 1;
}
return 0;
}
static int describe_atom_parser(struct ref_format *format UNUSED,
struct used_atom *atom,
const char *arg, struct strbuf *err)
{
struct strvec args = STRVEC_INIT;
for (;;) {
int found = 0;
const char *bad_arg = arg;
if (!arg || !*arg)
break;
found = describe_atom_option_parser(&args, &arg, err);
if (found < 0)
return found;
if (!found)
return err_bad_arg(err, "describe", bad_arg);
}
atom->u.describe_args = strvec_detach(&args);
return 0;
}
static int raw_atom_parser(struct ref_format *format UNUSED,
struct used_atom *atom,
const char *arg, struct strbuf *err)
ref-filter: add %(raw) atom Add new formatting option `%(raw)`, which will print the raw object data without any changes. It will help further to migrate all cat-file formatting logic from cat-file to ref-filter. The raw data of blob, tree objects may contain '\0', but most of the logic in `ref-filter` depends on the output of the atom being text (specifically, no embedded NULs in it). E.g. `quote_formatting()` use `strbuf_addstr()` or `*._quote_buf()` add the data to the buffer. The raw data of a tree object is `100644 one\0...`, only the `100644 one` will be added to the buffer, which is incorrect. Therefore, we need to find a way to record the length of the atom_value's member `s`. Although strbuf can already record the string and its length, if we want to replace the type of atom_value's member `s` with strbuf, many places in ref-filter that are filled with dynamically allocated mermory in `v->s` are not easy to replace. At the same time, we need to check if `v->s == NULL` in populate_value(), and strbuf cannot easily distinguish NULL and empty strings, but c-style "const char *" can do it. So add a new member in `struct atom_value`: `s_size`, which can record raw object size, it can help us add raw object data to the buffer or compare two buffers which contain raw object data. Note that `--format=%(raw)` cannot be used with `--python`, `--shell`, `--tcl`, and `--perl` because if the binary raw data is passed to a variable in such languages, these may not support arbitrary binary data in their string variable type. Reviewed-by: Jacob Keller <jacob.keller@gmail.com> Mentored-by: Christian Couder <christian.couder@gmail.com> Mentored-by: Hariom Verma <hariom18599@gmail.com> Helped-by: Bagas Sanjaya <bagasdotme@gmail.com> Helped-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com> Helped-by: Felipe Contreras <felipe.contreras@gmail.com> Helped-by: Phillip Wood <phillip.wood@dunelm.org.uk> Helped-by: Junio C Hamano <gitster@pobox.com> Based-on-patch-by: Olga Telezhnaya <olyatelezhnaya@gmail.com> Signed-off-by: ZheNing Hu <adlternative@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2021-07-26 11:26:47 +08:00
{
if (!arg)
atom->u.raw_data.option = RAW_BARE;
else if (!strcmp(arg, "size")) {
atom->type = FIELD_ULONG;
ref-filter: add %(raw) atom Add new formatting option `%(raw)`, which will print the raw object data without any changes. It will help further to migrate all cat-file formatting logic from cat-file to ref-filter. The raw data of blob, tree objects may contain '\0', but most of the logic in `ref-filter` depends on the output of the atom being text (specifically, no embedded NULs in it). E.g. `quote_formatting()` use `strbuf_addstr()` or `*._quote_buf()` add the data to the buffer. The raw data of a tree object is `100644 one\0...`, only the `100644 one` will be added to the buffer, which is incorrect. Therefore, we need to find a way to record the length of the atom_value's member `s`. Although strbuf can already record the string and its length, if we want to replace the type of atom_value's member `s` with strbuf, many places in ref-filter that are filled with dynamically allocated mermory in `v->s` are not easy to replace. At the same time, we need to check if `v->s == NULL` in populate_value(), and strbuf cannot easily distinguish NULL and empty strings, but c-style "const char *" can do it. So add a new member in `struct atom_value`: `s_size`, which can record raw object size, it can help us add raw object data to the buffer or compare two buffers which contain raw object data. Note that `--format=%(raw)` cannot be used with `--python`, `--shell`, `--tcl`, and `--perl` because if the binary raw data is passed to a variable in such languages, these may not support arbitrary binary data in their string variable type. Reviewed-by: Jacob Keller <jacob.keller@gmail.com> Mentored-by: Christian Couder <christian.couder@gmail.com> Mentored-by: Hariom Verma <hariom18599@gmail.com> Helped-by: Bagas Sanjaya <bagasdotme@gmail.com> Helped-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com> Helped-by: Felipe Contreras <felipe.contreras@gmail.com> Helped-by: Phillip Wood <phillip.wood@dunelm.org.uk> Helped-by: Junio C Hamano <gitster@pobox.com> Based-on-patch-by: Olga Telezhnaya <olyatelezhnaya@gmail.com> Signed-off-by: ZheNing Hu <adlternative@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2021-07-26 11:26:47 +08:00
atom->u.raw_data.option = RAW_LENGTH;
} else
return err_bad_arg(err, "raw", arg);
ref-filter: add %(raw) atom Add new formatting option `%(raw)`, which will print the raw object data without any changes. It will help further to migrate all cat-file formatting logic from cat-file to ref-filter. The raw data of blob, tree objects may contain '\0', but most of the logic in `ref-filter` depends on the output of the atom being text (specifically, no embedded NULs in it). E.g. `quote_formatting()` use `strbuf_addstr()` or `*._quote_buf()` add the data to the buffer. The raw data of a tree object is `100644 one\0...`, only the `100644 one` will be added to the buffer, which is incorrect. Therefore, we need to find a way to record the length of the atom_value's member `s`. Although strbuf can already record the string and its length, if we want to replace the type of atom_value's member `s` with strbuf, many places in ref-filter that are filled with dynamically allocated mermory in `v->s` are not easy to replace. At the same time, we need to check if `v->s == NULL` in populate_value(), and strbuf cannot easily distinguish NULL and empty strings, but c-style "const char *" can do it. So add a new member in `struct atom_value`: `s_size`, which can record raw object size, it can help us add raw object data to the buffer or compare two buffers which contain raw object data. Note that `--format=%(raw)` cannot be used with `--python`, `--shell`, `--tcl`, and `--perl` because if the binary raw data is passed to a variable in such languages, these may not support arbitrary binary data in their string variable type. Reviewed-by: Jacob Keller <jacob.keller@gmail.com> Mentored-by: Christian Couder <christian.couder@gmail.com> Mentored-by: Hariom Verma <hariom18599@gmail.com> Helped-by: Bagas Sanjaya <bagasdotme@gmail.com> Helped-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com> Helped-by: Felipe Contreras <felipe.contreras@gmail.com> Helped-by: Phillip Wood <phillip.wood@dunelm.org.uk> Helped-by: Junio C Hamano <gitster@pobox.com> Based-on-patch-by: Olga Telezhnaya <olyatelezhnaya@gmail.com> Signed-off-by: ZheNing Hu <adlternative@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2021-07-26 11:26:47 +08:00
return 0;
}
static int oid_atom_parser(struct ref_format *format UNUSED,
struct used_atom *atom,
const char *arg, struct strbuf *err)
{
if (!arg)
atom->u.oid.option = O_FULL;
else if (!strcmp(arg, "short"))
atom->u.oid.option = O_SHORT;
else if (skip_prefix(arg, "short=", &arg)) {
atom->u.oid.option = O_LENGTH;
if (strtoul_ui(arg, 10, &atom->u.oid.length) ||
atom->u.oid.length == 0)
return strbuf_addf_ret(err, -1, _("positive value expected '%s' in %%(%s)"), arg, atom->name);
if (atom->u.oid.length < MINIMUM_ABBREV)
atom->u.oid.length = MINIMUM_ABBREV;
} else
ref-filter: truncate atom names in error messages If you pass a bogus argument to %(refname), you may end up with a message like this: $ git for-each-ref --format='%(refname:foo)' fatal: unrecognized %(refname:foo) argument: foo which is confusing. It should just say: fatal: unrecognized %(refname) argument: foo which is clearer, and is consistent with most other atom parsers. Those other parsers do not have the same problem because they pass the atom name from a string literal in the parser function. But because the parser for %(refname) also handles %(upstream) and %(push), it instead uses atom->name, which includes the arguments. The oid atom parser which handles %(tree), %(parent), etc suffers from the same problem. It seems like the cleanest fix would be for atom->name to be _just_ the name, since there's already a separate "args" field. But since that field is also used for other things, we can't change it easily (e.g., it's how we find things in the used_atoms array, and clearly %(refname) and %(refname:short) are not the same thing). Instead, we'll teach our error_bad_arg() function to stop at the first ":". This is a little hacky, as we're effectively re-parsing the name, but the format is simple enough to do this as a one-liner, and this localizes the change to the error-reporting code. We'll give the same treatment to err_no_arg(). None of its callers use this atom->name trick, but it's worth future-proofing it while we're here. Signed-off-by: Jeff King <peff@peff.net> Acked-by: Taylor Blau <me@ttaylorr.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2022-12-15 00:23:53 +08:00
return err_bad_arg(err, atom->name, arg);
return 0;
}
static int person_name_atom_parser(struct ref_format *format UNUSED,
struct used_atom *atom,
const char *arg, struct strbuf *err)
{
if (!arg)
atom->u.name_option.option = N_RAW;
else if (!strcmp(arg, "mailmap"))
atom->u.name_option.option = N_MAILMAP;
else
return err_bad_arg(err, atom->name, arg);
return 0;
}
static int email_atom_option_parser(const char **arg)
{
if (!*arg)
return EO_RAW;
if (skip_prefix(*arg, "trim", arg))
return EO_TRIM;
if (skip_prefix(*arg, "localpart", arg))
return EO_LOCALPART;
if (skip_prefix(*arg, "mailmap", arg))
return EO_MAILMAP;
return -1;
}
static int person_email_atom_parser(struct ref_format *format UNUSED,
struct used_atom *atom,
const char *arg, struct strbuf *err)
{
for (;;) {
int opt = email_atom_option_parser(&arg);
const char *bad_arg = arg;
if (opt < 0)
return err_bad_arg(err, atom->name, bad_arg);
atom->u.email_option.option |= opt;
if (!arg || !*arg)
break;
if (*arg == ',')
arg++;
else
return err_bad_arg(err, atom->name, bad_arg);
}
return 0;
}
static int refname_atom_parser(struct ref_format *format UNUSED,
struct used_atom *atom,
const char *arg, struct strbuf *err)
{
return refname_atom_parser_internal(&atom->u.refname, arg, atom->name, err);
}
static align_type parse_align_position(const char *s)
{
if (!strcmp(s, "right"))
return ALIGN_RIGHT;
else if (!strcmp(s, "middle"))
return ALIGN_MIDDLE;
else if (!strcmp(s, "left"))
return ALIGN_LEFT;
return -1;
}
static int align_atom_parser(struct ref_format *format UNUSED,
struct used_atom *atom,
const char *arg, struct strbuf *err)
{
struct align *align = &atom->u.align;
struct string_list params = STRING_LIST_INIT_DUP;
int i;
unsigned int width = ~0U;
if (!arg)
return strbuf_addf_ret(err, -1, _("expected format: %%(align:<width>,<position>)"));
align->position = ALIGN_LEFT;
string_list_split(&params, arg, ',', -1);
for (i = 0; i < params.nr; i++) {
const char *s = params.items[i].string;
int position;
if (skip_prefix(s, "position=", &s)) {
position = parse_align_position(s);
if (position < 0) {
strbuf_addf(err, _("unrecognized position:%s"), s);
string_list_clear(&params, 0);
return -1;
}
align->position = position;
} else if (skip_prefix(s, "width=", &s)) {
if (strtoul_ui(s, 10, &width)) {
strbuf_addf(err, _("unrecognized width:%s"), s);
string_list_clear(&params, 0);
return -1;
}
} else if (!strtoul_ui(s, 10, &width))
;
else if ((position = parse_align_position(s)) >= 0)
align->position = position;
else {
strbuf_addf(err, _("unrecognized %%(%s) argument: %s"), "align", s);
string_list_clear(&params, 0);
return -1;
}
}
if (width == ~0U) {
string_list_clear(&params, 0);
return strbuf_addf_ret(err, -1, _("positive width expected with the %%(align) atom"));
}
align->width = width;
string_list_clear(&params, 0);
return 0;
}
static int if_atom_parser(struct ref_format *format UNUSED,
struct used_atom *atom,
const char *arg, struct strbuf *err)
{
if (!arg) {
atom->u.if_then_else.cmp_status = COMPARE_NONE;
return 0;
} else if (skip_prefix(arg, "equals=", &atom->u.if_then_else.str)) {
atom->u.if_then_else.cmp_status = COMPARE_EQUAL;
} else if (skip_prefix(arg, "notequals=", &atom->u.if_then_else.str)) {
atom->u.if_then_else.cmp_status = COMPARE_UNEQUAL;
} else
return err_bad_arg(err, "if", arg);
return 0;
}
static int rest_atom_parser(struct ref_format *format UNUSED,
struct used_atom *atom UNUSED,
const char *arg, struct strbuf *err)
{
if (arg)
return err_no_arg(err, "rest");
return 0;
}
static int ahead_behind_atom_parser(struct ref_format *format,
struct used_atom *atom UNUSED,
for-each-ref: add ahead-behind format atom The previous change implemented the ahead_behind() method, including an algorithm to compute the ahead/behind values for a number of commit tips relative to a number of commit bases. Now, integrate that algorithm as part of 'git for-each-ref' hidden behind a new format atom, ahead-behind. This naturally extends to 'git branch' and 'git tag' builtins, as well. This format allows specifying multiple bases, if so desired, and all matching references are compared against all of those bases. For this reason, failing to read a reference provided from these atoms results in an error. In order to translate the ahead_behind() method information to the format output code in ref-filter.c, we must populate arrays of ahead_behind_count structs. In struct ref_array, we store the full array that will be passed to ahead_behind(). In struct ref_array_item, we store an array of pointers that point to the relvant items within the full array. In this way, we can pull all relevant ahead/behind values directly when formatting output for a specific item. It also ensures the lifetime of the ahead_behind_count structs matches the time that the array is being used. Add specific tests of the ahead/behind counts in t6600-test-reach.sh, as it has an interesting repository shape. In particular, its merging strategy and its use of different commit-graphs would demonstrate over- counting if the ahead_behind() method did not already account for that possibility. Also add tests for the specific for-each-ref, branch, and tag builtins. In the case of 'git tag', there are intersting cases that happen when some of the selected tips are not commits. This requires careful logic around commits_nr in the second loop of filter_ahead_behind(). Also, the test in t7004 is carefully located to avoid being dependent on the GPG prereq. It also avoids using the test_commit helper, as that will add ticks to the time and disrupt the expected timestamps in later tag tests. Also add performance tests in a new p1300-graph-walks.sh script. This will be useful for more uses in the future, but for now compare the ahead-behind counting algorithm in 'git for-each-ref' to the naive implementation by running 'git rev-list --count' processes for each input. For the Git source code repository, the improvement is already obvious: Test this tree --------------------------------------------------------------- 1500.2: ahead-behind counts: git for-each-ref 0.07(0.07+0.00) 1500.3: ahead-behind counts: git branch 0.07(0.06+0.00) 1500.4: ahead-behind counts: git tag 0.07(0.06+0.00) 1500.5: ahead-behind counts: git rev-list 1.32(1.04+0.27) But the standard performance benchmark is the Linux kernel repository, which demosntrates a significant improvement: Test this tree --------------------------------------------------------------- 1500.2: ahead-behind counts: git for-each-ref 0.27(0.24+0.02) 1500.3: ahead-behind counts: git branch 0.27(0.24+0.03) 1500.4: ahead-behind counts: git tag 0.28(0.27+0.01) 1500.5: ahead-behind counts: git rev-list 4.57(4.03+0.54) The 'git rev-list' test exists in this change as a demonstration, but it will be removed in the next change to avoid wasting time on this comparison. Signed-off-by: Derrick Stolee <derrickstolee@github.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2023-03-20 19:26:54 +08:00
const char *arg, struct strbuf *err)
{
struct string_list_item *item;
if (!arg)
return strbuf_addf_ret(err, -1, _("expected format: %%(ahead-behind:<committish>)"));
item = string_list_append(&format->bases, arg);
item->util = lookup_commit_reference_by_name(arg);
if (!item->util)
die("failed to find '%s'", arg);
return 0;
}
for-each-ref: add 'is-base' token The previous change introduced the get_branch_base_for_tip() method in commit-reach.c. The motivation of that change was about using a heuristic to deteremine the base branch for a source commit from a list of candidate commit tips. This change makes that algorithm visible to users via a new atom in the 'git for-each-ref' format. This change is very similar to the chang in 49abcd21da6 (for-each-ref: add ahead-behind format atom, 2023-03-20). Introduce the 'is-base:<source>' atom, which will indicate that the algorithm should be computed and the result of the algorithm is reported using an indicator of the form '(<source>)'. For example, using '%(is-base:HEAD)' would result in one line having the token '(HEAD)'. Use the sorted order of refs included in the ref filter to break ties in the algorithm's heuristic. In the previous change, the motivating examples include using an L0 trunk, long-lived L1 branches, and temporary release branches. A caller could communicate the ordered preference among these categories using the input refpecs and avoiding a different sort mechanism. This sorting behavior is tested in the test scripts. It is important to include this atom as a special case to can_do_iterative_format() to match the expectations created in bd98f9774e1 (ref-filter.c: filter & format refs in the same callback, 2023-11-14). The ahead-behind atom was one of the special cases, and this similarly requires using an algorithm across all input refs before starting the format of any single ref. In the test script, the format tokens use colons or lack whitespace to avoid Git complaining about trailing whitespace errors. Signed-off-by: Derrick Stolee <stolee@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2024-08-14 18:31:29 +08:00
static int is_base_atom_parser(struct ref_format *format,
struct used_atom *atom UNUSED,
const char *arg, struct strbuf *err)
{
struct string_list_item *item;
if (!arg)
return strbuf_addf_ret(err, -1, _("expected format: %%(is-base:<committish>)"));
item = string_list_append(&format->is_base_tips, arg);
item->util = lookup_commit_reference_by_name(arg);
if (!item->util)
die("failed to find '%s'", arg);
return 0;
}
static int head_atom_parser(struct ref_format *format UNUSED,
struct used_atom *atom,
const char *arg, struct strbuf *err)
{
if (arg)
return err_no_arg(err, "HEAD");
atom->u.head = refs_resolve_refdup(get_main_ref_store(the_repository),
"HEAD", RESOLVE_REF_READING, NULL,
NULL);
return 0;
}
static struct {
const char *name;
info_source source;
cmp_type cmp_type;
int (*parser)(struct ref_format *format, struct used_atom *atom,
const char *arg, struct strbuf *err);
} valid_atom[] = {
[ATOM_REFNAME] = { "refname", SOURCE_NONE, FIELD_STR, refname_atom_parser },
[ATOM_OBJECTTYPE] = { "objecttype", SOURCE_OTHER, FIELD_STR, objecttype_atom_parser },
[ATOM_OBJECTSIZE] = { "objectsize", SOURCE_OTHER, FIELD_ULONG, objectsize_atom_parser },
[ATOM_OBJECTNAME] = { "objectname", SOURCE_OTHER, FIELD_STR, oid_atom_parser },
[ATOM_DELTABASE] = { "deltabase", SOURCE_OTHER, FIELD_STR, deltabase_atom_parser },
[ATOM_TREE] = { "tree", SOURCE_OBJ, FIELD_STR, oid_atom_parser },
[ATOM_PARENT] = { "parent", SOURCE_OBJ, FIELD_STR, oid_atom_parser },
[ATOM_NUMPARENT] = { "numparent", SOURCE_OBJ, FIELD_ULONG },
[ATOM_OBJECT] = { "object", SOURCE_OBJ },
[ATOM_TYPE] = { "type", SOURCE_OBJ },
[ATOM_TAG] = { "tag", SOURCE_OBJ },
[ATOM_AUTHOR] = { "author", SOURCE_OBJ },
[ATOM_AUTHORNAME] = { "authorname", SOURCE_OBJ, FIELD_STR, person_name_atom_parser },
[ATOM_AUTHOREMAIL] = { "authoremail", SOURCE_OBJ, FIELD_STR, person_email_atom_parser },
[ATOM_AUTHORDATE] = { "authordate", SOURCE_OBJ, FIELD_TIME },
[ATOM_COMMITTER] = { "committer", SOURCE_OBJ },
[ATOM_COMMITTERNAME] = { "committername", SOURCE_OBJ, FIELD_STR, person_name_atom_parser },
[ATOM_COMMITTEREMAIL] = { "committeremail", SOURCE_OBJ, FIELD_STR, person_email_atom_parser },
[ATOM_COMMITTERDATE] = { "committerdate", SOURCE_OBJ, FIELD_TIME },
[ATOM_TAGGER] = { "tagger", SOURCE_OBJ },
[ATOM_TAGGERNAME] = { "taggername", SOURCE_OBJ, FIELD_STR, person_name_atom_parser },
[ATOM_TAGGEREMAIL] = { "taggeremail", SOURCE_OBJ, FIELD_STR, person_email_atom_parser },
[ATOM_TAGGERDATE] = { "taggerdate", SOURCE_OBJ, FIELD_TIME },
[ATOM_CREATOR] = { "creator", SOURCE_OBJ },
[ATOM_CREATORDATE] = { "creatordate", SOURCE_OBJ, FIELD_TIME },
[ATOM_DESCRIBE] = { "describe", SOURCE_OBJ, FIELD_STR, describe_atom_parser },
[ATOM_SUBJECT] = { "subject", SOURCE_OBJ, FIELD_STR, subject_atom_parser },
[ATOM_BODY] = { "body", SOURCE_OBJ, FIELD_STR, body_atom_parser },
[ATOM_TRAILERS] = { "trailers", SOURCE_OBJ, FIELD_STR, trailers_atom_parser },
[ATOM_CONTENTS] = { "contents", SOURCE_OBJ, FIELD_STR, contents_atom_parser },
[ATOM_SIGNATURE] = { "signature", SOURCE_OBJ, FIELD_STR, signature_atom_parser },
ref-filter: add %(raw) atom Add new formatting option `%(raw)`, which will print the raw object data without any changes. It will help further to migrate all cat-file formatting logic from cat-file to ref-filter. The raw data of blob, tree objects may contain '\0', but most of the logic in `ref-filter` depends on the output of the atom being text (specifically, no embedded NULs in it). E.g. `quote_formatting()` use `strbuf_addstr()` or `*._quote_buf()` add the data to the buffer. The raw data of a tree object is `100644 one\0...`, only the `100644 one` will be added to the buffer, which is incorrect. Therefore, we need to find a way to record the length of the atom_value's member `s`. Although strbuf can already record the string and its length, if we want to replace the type of atom_value's member `s` with strbuf, many places in ref-filter that are filled with dynamically allocated mermory in `v->s` are not easy to replace. At the same time, we need to check if `v->s == NULL` in populate_value(), and strbuf cannot easily distinguish NULL and empty strings, but c-style "const char *" can do it. So add a new member in `struct atom_value`: `s_size`, which can record raw object size, it can help us add raw object data to the buffer or compare two buffers which contain raw object data. Note that `--format=%(raw)` cannot be used with `--python`, `--shell`, `--tcl`, and `--perl` because if the binary raw data is passed to a variable in such languages, these may not support arbitrary binary data in their string variable type. Reviewed-by: Jacob Keller <jacob.keller@gmail.com> Mentored-by: Christian Couder <christian.couder@gmail.com> Mentored-by: Hariom Verma <hariom18599@gmail.com> Helped-by: Bagas Sanjaya <bagasdotme@gmail.com> Helped-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com> Helped-by: Felipe Contreras <felipe.contreras@gmail.com> Helped-by: Phillip Wood <phillip.wood@dunelm.org.uk> Helped-by: Junio C Hamano <gitster@pobox.com> Based-on-patch-by: Olga Telezhnaya <olyatelezhnaya@gmail.com> Signed-off-by: ZheNing Hu <adlternative@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2021-07-26 11:26:47 +08:00
[ATOM_RAW] = { "raw", SOURCE_OBJ, FIELD_STR, raw_atom_parser },
[ATOM_UPSTREAM] = { "upstream", SOURCE_NONE, FIELD_STR, remote_ref_atom_parser },
[ATOM_PUSH] = { "push", SOURCE_NONE, FIELD_STR, remote_ref_atom_parser },
[ATOM_SYMREF] = { "symref", SOURCE_NONE, FIELD_STR, refname_atom_parser },
[ATOM_FLAG] = { "flag", SOURCE_NONE },
[ATOM_HEAD] = { "HEAD", SOURCE_NONE, FIELD_STR, head_atom_parser },
[ATOM_COLOR] = { "color", SOURCE_NONE, FIELD_STR, color_atom_parser },
[ATOM_WORKTREEPATH] = { "worktreepath", SOURCE_NONE },
[ATOM_ALIGN] = { "align", SOURCE_NONE, FIELD_STR, align_atom_parser },
[ATOM_END] = { "end", SOURCE_NONE },
[ATOM_IF] = { "if", SOURCE_NONE, FIELD_STR, if_atom_parser },
[ATOM_THEN] = { "then", SOURCE_NONE },
[ATOM_ELSE] = { "else", SOURCE_NONE },
[ATOM_REST] = { "rest", SOURCE_NONE, FIELD_STR, rest_atom_parser },
for-each-ref: add ahead-behind format atom The previous change implemented the ahead_behind() method, including an algorithm to compute the ahead/behind values for a number of commit tips relative to a number of commit bases. Now, integrate that algorithm as part of 'git for-each-ref' hidden behind a new format atom, ahead-behind. This naturally extends to 'git branch' and 'git tag' builtins, as well. This format allows specifying multiple bases, if so desired, and all matching references are compared against all of those bases. For this reason, failing to read a reference provided from these atoms results in an error. In order to translate the ahead_behind() method information to the format output code in ref-filter.c, we must populate arrays of ahead_behind_count structs. In struct ref_array, we store the full array that will be passed to ahead_behind(). In struct ref_array_item, we store an array of pointers that point to the relvant items within the full array. In this way, we can pull all relevant ahead/behind values directly when formatting output for a specific item. It also ensures the lifetime of the ahead_behind_count structs matches the time that the array is being used. Add specific tests of the ahead/behind counts in t6600-test-reach.sh, as it has an interesting repository shape. In particular, its merging strategy and its use of different commit-graphs would demonstrate over- counting if the ahead_behind() method did not already account for that possibility. Also add tests for the specific for-each-ref, branch, and tag builtins. In the case of 'git tag', there are intersting cases that happen when some of the selected tips are not commits. This requires careful logic around commits_nr in the second loop of filter_ahead_behind(). Also, the test in t7004 is carefully located to avoid being dependent on the GPG prereq. It also avoids using the test_commit helper, as that will add ticks to the time and disrupt the expected timestamps in later tag tests. Also add performance tests in a new p1300-graph-walks.sh script. This will be useful for more uses in the future, but for now compare the ahead-behind counting algorithm in 'git for-each-ref' to the naive implementation by running 'git rev-list --count' processes for each input. For the Git source code repository, the improvement is already obvious: Test this tree --------------------------------------------------------------- 1500.2: ahead-behind counts: git for-each-ref 0.07(0.07+0.00) 1500.3: ahead-behind counts: git branch 0.07(0.06+0.00) 1500.4: ahead-behind counts: git tag 0.07(0.06+0.00) 1500.5: ahead-behind counts: git rev-list 1.32(1.04+0.27) But the standard performance benchmark is the Linux kernel repository, which demosntrates a significant improvement: Test this tree --------------------------------------------------------------- 1500.2: ahead-behind counts: git for-each-ref 0.27(0.24+0.02) 1500.3: ahead-behind counts: git branch 0.27(0.24+0.03) 1500.4: ahead-behind counts: git tag 0.28(0.27+0.01) 1500.5: ahead-behind counts: git rev-list 4.57(4.03+0.54) The 'git rev-list' test exists in this change as a demonstration, but it will be removed in the next change to avoid wasting time on this comparison. Signed-off-by: Derrick Stolee <derrickstolee@github.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2023-03-20 19:26:54 +08:00
[ATOM_AHEADBEHIND] = { "ahead-behind", SOURCE_OTHER, FIELD_STR, ahead_behind_atom_parser },
for-each-ref: add 'is-base' token The previous change introduced the get_branch_base_for_tip() method in commit-reach.c. The motivation of that change was about using a heuristic to deteremine the base branch for a source commit from a list of candidate commit tips. This change makes that algorithm visible to users via a new atom in the 'git for-each-ref' format. This change is very similar to the chang in 49abcd21da6 (for-each-ref: add ahead-behind format atom, 2023-03-20). Introduce the 'is-base:<source>' atom, which will indicate that the algorithm should be computed and the result of the algorithm is reported using an indicator of the form '(<source>)'. For example, using '%(is-base:HEAD)' would result in one line having the token '(HEAD)'. Use the sorted order of refs included in the ref filter to break ties in the algorithm's heuristic. In the previous change, the motivating examples include using an L0 trunk, long-lived L1 branches, and temporary release branches. A caller could communicate the ordered preference among these categories using the input refpecs and avoiding a different sort mechanism. This sorting behavior is tested in the test scripts. It is important to include this atom as a special case to can_do_iterative_format() to match the expectations created in bd98f9774e1 (ref-filter.c: filter & format refs in the same callback, 2023-11-14). The ahead-behind atom was one of the special cases, and this similarly requires using an algorithm across all input refs before starting the format of any single ref. In the test script, the format tokens use colons or lack whitespace to avoid Git complaining about trailing whitespace errors. Signed-off-by: Derrick Stolee <stolee@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2024-08-14 18:31:29 +08:00
[ATOM_ISBASE] = { "is-base", SOURCE_OTHER, FIELD_STR, is_base_atom_parser },
/*
* Please update $__git_ref_fieldlist in git-completion.bash
* when you add new atoms
*/
};
#define REF_FORMATTING_STATE_INIT { 0 }
struct ref_formatting_stack {
struct ref_formatting_stack *prev;
struct strbuf output;
void (*at_end)(struct ref_formatting_stack **stack);
2015-09-11 23:03:07 +08:00
void *at_end_data;
};
struct ref_formatting_state {
int quote_style;
struct ref_formatting_stack *stack;
};
struct atom_value {
const char *s;
ref-filter: add %(raw) atom Add new formatting option `%(raw)`, which will print the raw object data without any changes. It will help further to migrate all cat-file formatting logic from cat-file to ref-filter. The raw data of blob, tree objects may contain '\0', but most of the logic in `ref-filter` depends on the output of the atom being text (specifically, no embedded NULs in it). E.g. `quote_formatting()` use `strbuf_addstr()` or `*._quote_buf()` add the data to the buffer. The raw data of a tree object is `100644 one\0...`, only the `100644 one` will be added to the buffer, which is incorrect. Therefore, we need to find a way to record the length of the atom_value's member `s`. Although strbuf can already record the string and its length, if we want to replace the type of atom_value's member `s` with strbuf, many places in ref-filter that are filled with dynamically allocated mermory in `v->s` are not easy to replace. At the same time, we need to check if `v->s == NULL` in populate_value(), and strbuf cannot easily distinguish NULL and empty strings, but c-style "const char *" can do it. So add a new member in `struct atom_value`: `s_size`, which can record raw object size, it can help us add raw object data to the buffer or compare two buffers which contain raw object data. Note that `--format=%(raw)` cannot be used with `--python`, `--shell`, `--tcl`, and `--perl` because if the binary raw data is passed to a variable in such languages, these may not support arbitrary binary data in their string variable type. Reviewed-by: Jacob Keller <jacob.keller@gmail.com> Mentored-by: Christian Couder <christian.couder@gmail.com> Mentored-by: Hariom Verma <hariom18599@gmail.com> Helped-by: Bagas Sanjaya <bagasdotme@gmail.com> Helped-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com> Helped-by: Felipe Contreras <felipe.contreras@gmail.com> Helped-by: Phillip Wood <phillip.wood@dunelm.org.uk> Helped-by: Junio C Hamano <gitster@pobox.com> Based-on-patch-by: Olga Telezhnaya <olyatelezhnaya@gmail.com> Signed-off-by: ZheNing Hu <adlternative@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2021-07-26 11:26:47 +08:00
ssize_t s_size;
int (*handler)(struct atom_value *atomv, struct ref_formatting_state *state,
struct strbuf *err);
uintmax_t value; /* used for sorting when not FIELD_STR */
struct used_atom *atom;
};
ref-filter: add %(raw) atom Add new formatting option `%(raw)`, which will print the raw object data without any changes. It will help further to migrate all cat-file formatting logic from cat-file to ref-filter. The raw data of blob, tree objects may contain '\0', but most of the logic in `ref-filter` depends on the output of the atom being text (specifically, no embedded NULs in it). E.g. `quote_formatting()` use `strbuf_addstr()` or `*._quote_buf()` add the data to the buffer. The raw data of a tree object is `100644 one\0...`, only the `100644 one` will be added to the buffer, which is incorrect. Therefore, we need to find a way to record the length of the atom_value's member `s`. Although strbuf can already record the string and its length, if we want to replace the type of atom_value's member `s` with strbuf, many places in ref-filter that are filled with dynamically allocated mermory in `v->s` are not easy to replace. At the same time, we need to check if `v->s == NULL` in populate_value(), and strbuf cannot easily distinguish NULL and empty strings, but c-style "const char *" can do it. So add a new member in `struct atom_value`: `s_size`, which can record raw object size, it can help us add raw object data to the buffer or compare two buffers which contain raw object data. Note that `--format=%(raw)` cannot be used with `--python`, `--shell`, `--tcl`, and `--perl` because if the binary raw data is passed to a variable in such languages, these may not support arbitrary binary data in their string variable type. Reviewed-by: Jacob Keller <jacob.keller@gmail.com> Mentored-by: Christian Couder <christian.couder@gmail.com> Mentored-by: Hariom Verma <hariom18599@gmail.com> Helped-by: Bagas Sanjaya <bagasdotme@gmail.com> Helped-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com> Helped-by: Felipe Contreras <felipe.contreras@gmail.com> Helped-by: Phillip Wood <phillip.wood@dunelm.org.uk> Helped-by: Junio C Hamano <gitster@pobox.com> Based-on-patch-by: Olga Telezhnaya <olyatelezhnaya@gmail.com> Signed-off-by: ZheNing Hu <adlternative@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2021-07-26 11:26:47 +08:00
#define ATOM_SIZE_UNSPECIFIED (-1)
#define ATOM_VALUE_INIT { \
.s_size = ATOM_SIZE_UNSPECIFIED \
}
/*
* Used to parse format string and sort specifiers
*/
static int parse_ref_filter_atom(struct ref_format *format,
const char *atom, const char *ep,
struct strbuf *err)
{
const char *sp;
const char *arg;
int i, at, atom_len;
sp = atom;
if (*sp == '*' && sp < ep)
sp++; /* deref */
if (ep <= sp)
return strbuf_addf_ret(err, -1, _("malformed field name: %.*s"),
(int)(ep-atom), atom);
/*
* If the atom name has a colon, strip it and everything after
* it off - it specifies the format for this entry, and
* shouldn't be used for checking against the valid_atom
* table.
*/
arg = memchr(sp, ':', ep - sp);
atom_len = (arg ? arg : ep) - sp;
ref-filter: add %(raw) atom Add new formatting option `%(raw)`, which will print the raw object data without any changes. It will help further to migrate all cat-file formatting logic from cat-file to ref-filter. The raw data of blob, tree objects may contain '\0', but most of the logic in `ref-filter` depends on the output of the atom being text (specifically, no embedded NULs in it). E.g. `quote_formatting()` use `strbuf_addstr()` or `*._quote_buf()` add the data to the buffer. The raw data of a tree object is `100644 one\0...`, only the `100644 one` will be added to the buffer, which is incorrect. Therefore, we need to find a way to record the length of the atom_value's member `s`. Although strbuf can already record the string and its length, if we want to replace the type of atom_value's member `s` with strbuf, many places in ref-filter that are filled with dynamically allocated mermory in `v->s` are not easy to replace. At the same time, we need to check if `v->s == NULL` in populate_value(), and strbuf cannot easily distinguish NULL and empty strings, but c-style "const char *" can do it. So add a new member in `struct atom_value`: `s_size`, which can record raw object size, it can help us add raw object data to the buffer or compare two buffers which contain raw object data. Note that `--format=%(raw)` cannot be used with `--python`, `--shell`, `--tcl`, and `--perl` because if the binary raw data is passed to a variable in such languages, these may not support arbitrary binary data in their string variable type. Reviewed-by: Jacob Keller <jacob.keller@gmail.com> Mentored-by: Christian Couder <christian.couder@gmail.com> Mentored-by: Hariom Verma <hariom18599@gmail.com> Helped-by: Bagas Sanjaya <bagasdotme@gmail.com> Helped-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com> Helped-by: Felipe Contreras <felipe.contreras@gmail.com> Helped-by: Phillip Wood <phillip.wood@dunelm.org.uk> Helped-by: Junio C Hamano <gitster@pobox.com> Based-on-patch-by: Olga Telezhnaya <olyatelezhnaya@gmail.com> Signed-off-by: ZheNing Hu <adlternative@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2021-07-26 11:26:47 +08:00
/* Do we have the atom already used elsewhere? */
for (i = 0; i < used_atom_cnt; i++) {
int len = strlen(used_atom[i].name);
if (len == ep - atom && !memcmp(used_atom[i].name, atom, len))
return i;
}
/* Is the atom a valid one? */
for (i = 0; i < ARRAY_SIZE(valid_atom); i++) {
int len = strlen(valid_atom[i].name);
if (len == atom_len && !memcmp(valid_atom[i].name, sp, len))
break;
}
if (ARRAY_SIZE(valid_atom) <= i)
return strbuf_addf_ret(err, -1, _("unknown field name: %.*s"),
(int)(ep-atom), atom);
if (valid_atom[i].source != SOURCE_NONE && !have_git_dir())
return strbuf_addf_ret(err, -1,
_("not a git repository, but the field '%.*s' requires access to object data"),
(int)(ep-atom), atom);
/* Add it in, including the deref prefix */
at = used_atom_cnt;
used_atom_cnt++;
REALLOC_ARRAY(used_atom, used_atom_cnt);
used_atom[at].atom_type = i;
used_atom[at].name = xmemdupz(atom, ep - atom);
used_atom[at].type = valid_atom[i].cmp_type;
used_atom[at].source = valid_atom[i].source;
if (used_atom[at].source == SOURCE_OBJ) {
if (*atom == '*')
oi_deref.info.contentp = &oi_deref.content;
else
oi.info.contentp = &oi.content;
}
if (arg) {
arg = used_atom[at].name + (arg - atom) + 1;
if (!*arg) {
/*
* Treat empty sub-arguments list as NULL (i.e.,
* "%(atom:)" is equivalent to "%(atom)").
*/
arg = NULL;
}
}
memset(&used_atom[at].u, 0, sizeof(used_atom[at].u));
if (valid_atom[i].parser && valid_atom[i].parser(format, &used_atom[at], arg, err))
return -1;
if (*atom == '*')
need_tagged = 1;
if (i == ATOM_SYMREF)
need_symref = 1;
return at;
}
ref-filter: add %(raw) atom Add new formatting option `%(raw)`, which will print the raw object data without any changes. It will help further to migrate all cat-file formatting logic from cat-file to ref-filter. The raw data of blob, tree objects may contain '\0', but most of the logic in `ref-filter` depends on the output of the atom being text (specifically, no embedded NULs in it). E.g. `quote_formatting()` use `strbuf_addstr()` or `*._quote_buf()` add the data to the buffer. The raw data of a tree object is `100644 one\0...`, only the `100644 one` will be added to the buffer, which is incorrect. Therefore, we need to find a way to record the length of the atom_value's member `s`. Although strbuf can already record the string and its length, if we want to replace the type of atom_value's member `s` with strbuf, many places in ref-filter that are filled with dynamically allocated mermory in `v->s` are not easy to replace. At the same time, we need to check if `v->s == NULL` in populate_value(), and strbuf cannot easily distinguish NULL and empty strings, but c-style "const char *" can do it. So add a new member in `struct atom_value`: `s_size`, which can record raw object size, it can help us add raw object data to the buffer or compare two buffers which contain raw object data. Note that `--format=%(raw)` cannot be used with `--python`, `--shell`, `--tcl`, and `--perl` because if the binary raw data is passed to a variable in such languages, these may not support arbitrary binary data in their string variable type. Reviewed-by: Jacob Keller <jacob.keller@gmail.com> Mentored-by: Christian Couder <christian.couder@gmail.com> Mentored-by: Hariom Verma <hariom18599@gmail.com> Helped-by: Bagas Sanjaya <bagasdotme@gmail.com> Helped-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com> Helped-by: Felipe Contreras <felipe.contreras@gmail.com> Helped-by: Phillip Wood <phillip.wood@dunelm.org.uk> Helped-by: Junio C Hamano <gitster@pobox.com> Based-on-patch-by: Olga Telezhnaya <olyatelezhnaya@gmail.com> Signed-off-by: ZheNing Hu <adlternative@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2021-07-26 11:26:47 +08:00
static void quote_formatting(struct strbuf *s, const char *str, ssize_t len, int quote_style)
{
switch (quote_style) {
case QUOTE_NONE:
ref-filter: add %(raw) atom Add new formatting option `%(raw)`, which will print the raw object data without any changes. It will help further to migrate all cat-file formatting logic from cat-file to ref-filter. The raw data of blob, tree objects may contain '\0', but most of the logic in `ref-filter` depends on the output of the atom being text (specifically, no embedded NULs in it). E.g. `quote_formatting()` use `strbuf_addstr()` or `*._quote_buf()` add the data to the buffer. The raw data of a tree object is `100644 one\0...`, only the `100644 one` will be added to the buffer, which is incorrect. Therefore, we need to find a way to record the length of the atom_value's member `s`. Although strbuf can already record the string and its length, if we want to replace the type of atom_value's member `s` with strbuf, many places in ref-filter that are filled with dynamically allocated mermory in `v->s` are not easy to replace. At the same time, we need to check if `v->s == NULL` in populate_value(), and strbuf cannot easily distinguish NULL and empty strings, but c-style "const char *" can do it. So add a new member in `struct atom_value`: `s_size`, which can record raw object size, it can help us add raw object data to the buffer or compare two buffers which contain raw object data. Note that `--format=%(raw)` cannot be used with `--python`, `--shell`, `--tcl`, and `--perl` because if the binary raw data is passed to a variable in such languages, these may not support arbitrary binary data in their string variable type. Reviewed-by: Jacob Keller <jacob.keller@gmail.com> Mentored-by: Christian Couder <christian.couder@gmail.com> Mentored-by: Hariom Verma <hariom18599@gmail.com> Helped-by: Bagas Sanjaya <bagasdotme@gmail.com> Helped-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com> Helped-by: Felipe Contreras <felipe.contreras@gmail.com> Helped-by: Phillip Wood <phillip.wood@dunelm.org.uk> Helped-by: Junio C Hamano <gitster@pobox.com> Based-on-patch-by: Olga Telezhnaya <olyatelezhnaya@gmail.com> Signed-off-by: ZheNing Hu <adlternative@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2021-07-26 11:26:47 +08:00
if (len < 0)
strbuf_addstr(s, str);
else
strbuf_add(s, str, len);
break;
case QUOTE_SHELL:
sq_quote_buf(s, str);
break;
case QUOTE_PERL:
if (len < 0)
perl_quote_buf(s, str);
else
perl_quote_buf_with_len(s, str, len);
break;
case QUOTE_PYTHON:
python_quote_buf(s, str);
break;
case QUOTE_TCL:
tcl_quote_buf(s, str);
break;
}
}
static int append_atom(struct atom_value *v, struct ref_formatting_state *state,
struct strbuf *err UNUSED)
{
2015-09-11 23:03:07 +08:00
/*
* Quote formatting is only done when the stack has a single
* element. Otherwise quote formatting is done on the
* element's entire output strbuf when the %(end) atom is
* encountered.
*/
if (!state->stack->prev)
ref-filter: add %(raw) atom Add new formatting option `%(raw)`, which will print the raw object data without any changes. It will help further to migrate all cat-file formatting logic from cat-file to ref-filter. The raw data of blob, tree objects may contain '\0', but most of the logic in `ref-filter` depends on the output of the atom being text (specifically, no embedded NULs in it). E.g. `quote_formatting()` use `strbuf_addstr()` or `*._quote_buf()` add the data to the buffer. The raw data of a tree object is `100644 one\0...`, only the `100644 one` will be added to the buffer, which is incorrect. Therefore, we need to find a way to record the length of the atom_value's member `s`. Although strbuf can already record the string and its length, if we want to replace the type of atom_value's member `s` with strbuf, many places in ref-filter that are filled with dynamically allocated mermory in `v->s` are not easy to replace. At the same time, we need to check if `v->s == NULL` in populate_value(), and strbuf cannot easily distinguish NULL and empty strings, but c-style "const char *" can do it. So add a new member in `struct atom_value`: `s_size`, which can record raw object size, it can help us add raw object data to the buffer or compare two buffers which contain raw object data. Note that `--format=%(raw)` cannot be used with `--python`, `--shell`, `--tcl`, and `--perl` because if the binary raw data is passed to a variable in such languages, these may not support arbitrary binary data in their string variable type. Reviewed-by: Jacob Keller <jacob.keller@gmail.com> Mentored-by: Christian Couder <christian.couder@gmail.com> Mentored-by: Hariom Verma <hariom18599@gmail.com> Helped-by: Bagas Sanjaya <bagasdotme@gmail.com> Helped-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com> Helped-by: Felipe Contreras <felipe.contreras@gmail.com> Helped-by: Phillip Wood <phillip.wood@dunelm.org.uk> Helped-by: Junio C Hamano <gitster@pobox.com> Based-on-patch-by: Olga Telezhnaya <olyatelezhnaya@gmail.com> Signed-off-by: ZheNing Hu <adlternative@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2021-07-26 11:26:47 +08:00
quote_formatting(&state->stack->output, v->s, v->s_size, state->quote_style);
else if (v->s_size < 0)
2015-09-11 23:03:07 +08:00
strbuf_addstr(&state->stack->output, v->s);
ref-filter: add %(raw) atom Add new formatting option `%(raw)`, which will print the raw object data without any changes. It will help further to migrate all cat-file formatting logic from cat-file to ref-filter. The raw data of blob, tree objects may contain '\0', but most of the logic in `ref-filter` depends on the output of the atom being text (specifically, no embedded NULs in it). E.g. `quote_formatting()` use `strbuf_addstr()` or `*._quote_buf()` add the data to the buffer. The raw data of a tree object is `100644 one\0...`, only the `100644 one` will be added to the buffer, which is incorrect. Therefore, we need to find a way to record the length of the atom_value's member `s`. Although strbuf can already record the string and its length, if we want to replace the type of atom_value's member `s` with strbuf, many places in ref-filter that are filled with dynamically allocated mermory in `v->s` are not easy to replace. At the same time, we need to check if `v->s == NULL` in populate_value(), and strbuf cannot easily distinguish NULL and empty strings, but c-style "const char *" can do it. So add a new member in `struct atom_value`: `s_size`, which can record raw object size, it can help us add raw object data to the buffer or compare two buffers which contain raw object data. Note that `--format=%(raw)` cannot be used with `--python`, `--shell`, `--tcl`, and `--perl` because if the binary raw data is passed to a variable in such languages, these may not support arbitrary binary data in their string variable type. Reviewed-by: Jacob Keller <jacob.keller@gmail.com> Mentored-by: Christian Couder <christian.couder@gmail.com> Mentored-by: Hariom Verma <hariom18599@gmail.com> Helped-by: Bagas Sanjaya <bagasdotme@gmail.com> Helped-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com> Helped-by: Felipe Contreras <felipe.contreras@gmail.com> Helped-by: Phillip Wood <phillip.wood@dunelm.org.uk> Helped-by: Junio C Hamano <gitster@pobox.com> Based-on-patch-by: Olga Telezhnaya <olyatelezhnaya@gmail.com> Signed-off-by: ZheNing Hu <adlternative@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2021-07-26 11:26:47 +08:00
else
strbuf_add(&state->stack->output, v->s, v->s_size);
return 0;
}
static void push_stack_element(struct ref_formatting_stack **stack)
{
struct ref_formatting_stack *s = xcalloc(1, sizeof(struct ref_formatting_stack));
strbuf_init(&s->output, 0);
s->prev = *stack;
*stack = s;
}
static void pop_stack_element(struct ref_formatting_stack **stack)
{
struct ref_formatting_stack *current = *stack;
struct ref_formatting_stack *prev = current->prev;
if (prev)
strbuf_addbuf(&prev->output, &current->output);
strbuf_release(&current->output);
free(current);
*stack = prev;
}
static void end_align_handler(struct ref_formatting_stack **stack)
2015-09-11 23:03:07 +08:00
{
struct ref_formatting_stack *cur = *stack;
struct align *align = (struct align *)cur->at_end_data;
2015-09-11 23:03:07 +08:00
struct strbuf s = STRBUF_INIT;
strbuf_utf8_align(&s, align->position, align->width, cur->output.buf);
strbuf_swap(&cur->output, &s);
2015-09-11 23:03:07 +08:00
strbuf_release(&s);
}
static int align_atom_handler(struct atom_value *atomv, struct ref_formatting_state *state,
struct strbuf *err UNUSED)
2015-09-11 23:03:07 +08:00
{
struct ref_formatting_stack *new_stack;
2015-09-11 23:03:07 +08:00
push_stack_element(&state->stack);
new_stack = state->stack;
new_stack->at_end = end_align_handler;
new_stack->at_end_data = &atomv->atom->u.align;
return 0;
2015-09-11 23:03:07 +08:00
}
static void if_then_else_handler(struct ref_formatting_stack **stack)
{
struct ref_formatting_stack *cur = *stack;
struct ref_formatting_stack *prev = cur->prev;
struct if_then_else *if_then_else = (struct if_then_else *)cur->at_end_data;
if (!if_then_else->then_atom_seen)
die(_("format: %%(%s) atom used without a %%(%s) atom"), "if", "then");
if (if_then_else->else_atom_seen) {
/*
* There is an %(else) atom: we need to drop one state from the
* stack, either the %(else) branch if the condition is satisfied, or
* the %(then) branch if it isn't.
*/
if (if_then_else->condition_satisfied) {
strbuf_reset(&cur->output);
pop_stack_element(&cur);
} else {
strbuf_swap(&cur->output, &prev->output);
strbuf_reset(&cur->output);
pop_stack_element(&cur);
}
} else if (!if_then_else->condition_satisfied) {
/*
* No %(else) atom: just drop the %(then) branch if the
* condition is not satisfied.
*/
strbuf_reset(&cur->output);
}
*stack = cur;
free(if_then_else);
}
static int if_atom_handler(struct atom_value *atomv, struct ref_formatting_state *state,
struct strbuf *err UNUSED)
{
struct ref_formatting_stack *new_stack;
struct if_then_else *if_then_else = xcalloc(1,
sizeof(struct if_then_else));
if_then_else->str = atomv->atom->u.if_then_else.str;
if_then_else->cmp_status = atomv->atom->u.if_then_else.cmp_status;
push_stack_element(&state->stack);
new_stack = state->stack;
new_stack->at_end = if_then_else_handler;
new_stack->at_end_data = if_then_else;
return 0;
}
ref-filter: add %(raw) atom Add new formatting option `%(raw)`, which will print the raw object data without any changes. It will help further to migrate all cat-file formatting logic from cat-file to ref-filter. The raw data of blob, tree objects may contain '\0', but most of the logic in `ref-filter` depends on the output of the atom being text (specifically, no embedded NULs in it). E.g. `quote_formatting()` use `strbuf_addstr()` or `*._quote_buf()` add the data to the buffer. The raw data of a tree object is `100644 one\0...`, only the `100644 one` will be added to the buffer, which is incorrect. Therefore, we need to find a way to record the length of the atom_value's member `s`. Although strbuf can already record the string and its length, if we want to replace the type of atom_value's member `s` with strbuf, many places in ref-filter that are filled with dynamically allocated mermory in `v->s` are not easy to replace. At the same time, we need to check if `v->s == NULL` in populate_value(), and strbuf cannot easily distinguish NULL and empty strings, but c-style "const char *" can do it. So add a new member in `struct atom_value`: `s_size`, which can record raw object size, it can help us add raw object data to the buffer or compare two buffers which contain raw object data. Note that `--format=%(raw)` cannot be used with `--python`, `--shell`, `--tcl`, and `--perl` because if the binary raw data is passed to a variable in such languages, these may not support arbitrary binary data in their string variable type. Reviewed-by: Jacob Keller <jacob.keller@gmail.com> Mentored-by: Christian Couder <christian.couder@gmail.com> Mentored-by: Hariom Verma <hariom18599@gmail.com> Helped-by: Bagas Sanjaya <bagasdotme@gmail.com> Helped-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com> Helped-by: Felipe Contreras <felipe.contreras@gmail.com> Helped-by: Phillip Wood <phillip.wood@dunelm.org.uk> Helped-by: Junio C Hamano <gitster@pobox.com> Based-on-patch-by: Olga Telezhnaya <olyatelezhnaya@gmail.com> Signed-off-by: ZheNing Hu <adlternative@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2021-07-26 11:26:47 +08:00
static int is_empty(struct strbuf *buf)
{
ref-filter: add %(raw) atom Add new formatting option `%(raw)`, which will print the raw object data without any changes. It will help further to migrate all cat-file formatting logic from cat-file to ref-filter. The raw data of blob, tree objects may contain '\0', but most of the logic in `ref-filter` depends on the output of the atom being text (specifically, no embedded NULs in it). E.g. `quote_formatting()` use `strbuf_addstr()` or `*._quote_buf()` add the data to the buffer. The raw data of a tree object is `100644 one\0...`, only the `100644 one` will be added to the buffer, which is incorrect. Therefore, we need to find a way to record the length of the atom_value's member `s`. Although strbuf can already record the string and its length, if we want to replace the type of atom_value's member `s` with strbuf, many places in ref-filter that are filled with dynamically allocated mermory in `v->s` are not easy to replace. At the same time, we need to check if `v->s == NULL` in populate_value(), and strbuf cannot easily distinguish NULL and empty strings, but c-style "const char *" can do it. So add a new member in `struct atom_value`: `s_size`, which can record raw object size, it can help us add raw object data to the buffer or compare two buffers which contain raw object data. Note that `--format=%(raw)` cannot be used with `--python`, `--shell`, `--tcl`, and `--perl` because if the binary raw data is passed to a variable in such languages, these may not support arbitrary binary data in their string variable type. Reviewed-by: Jacob Keller <jacob.keller@gmail.com> Mentored-by: Christian Couder <christian.couder@gmail.com> Mentored-by: Hariom Verma <hariom18599@gmail.com> Helped-by: Bagas Sanjaya <bagasdotme@gmail.com> Helped-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com> Helped-by: Felipe Contreras <felipe.contreras@gmail.com> Helped-by: Phillip Wood <phillip.wood@dunelm.org.uk> Helped-by: Junio C Hamano <gitster@pobox.com> Based-on-patch-by: Olga Telezhnaya <olyatelezhnaya@gmail.com> Signed-off-by: ZheNing Hu <adlternative@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2021-07-26 11:26:47 +08:00
const char *cur = buf->buf;
const char *end = buf->buf + buf->len;
while (cur != end && (isspace(*cur)))
cur++;
return cur == end;
}
static int then_atom_handler(struct atom_value *atomv UNUSED,
struct ref_formatting_state *state,
struct strbuf *err)
{
struct ref_formatting_stack *cur = state->stack;
struct if_then_else *if_then_else = NULL;
ref-filter: add %(raw) atom Add new formatting option `%(raw)`, which will print the raw object data without any changes. It will help further to migrate all cat-file formatting logic from cat-file to ref-filter. The raw data of blob, tree objects may contain '\0', but most of the logic in `ref-filter` depends on the output of the atom being text (specifically, no embedded NULs in it). E.g. `quote_formatting()` use `strbuf_addstr()` or `*._quote_buf()` add the data to the buffer. The raw data of a tree object is `100644 one\0...`, only the `100644 one` will be added to the buffer, which is incorrect. Therefore, we need to find a way to record the length of the atom_value's member `s`. Although strbuf can already record the string and its length, if we want to replace the type of atom_value's member `s` with strbuf, many places in ref-filter that are filled with dynamically allocated mermory in `v->s` are not easy to replace. At the same time, we need to check if `v->s == NULL` in populate_value(), and strbuf cannot easily distinguish NULL and empty strings, but c-style "const char *" can do it. So add a new member in `struct atom_value`: `s_size`, which can record raw object size, it can help us add raw object data to the buffer or compare two buffers which contain raw object data. Note that `--format=%(raw)` cannot be used with `--python`, `--shell`, `--tcl`, and `--perl` because if the binary raw data is passed to a variable in such languages, these may not support arbitrary binary data in their string variable type. Reviewed-by: Jacob Keller <jacob.keller@gmail.com> Mentored-by: Christian Couder <christian.couder@gmail.com> Mentored-by: Hariom Verma <hariom18599@gmail.com> Helped-by: Bagas Sanjaya <bagasdotme@gmail.com> Helped-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com> Helped-by: Felipe Contreras <felipe.contreras@gmail.com> Helped-by: Phillip Wood <phillip.wood@dunelm.org.uk> Helped-by: Junio C Hamano <gitster@pobox.com> Based-on-patch-by: Olga Telezhnaya <olyatelezhnaya@gmail.com> Signed-off-by: ZheNing Hu <adlternative@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2021-07-26 11:26:47 +08:00
size_t str_len = 0;
if (cur->at_end == if_then_else_handler)
if_then_else = (struct if_then_else *)cur->at_end_data;
if (!if_then_else)
return strbuf_addf_ret(err, -1, _("format: %%(%s) atom used without a %%(%s) atom"), "then", "if");
if (if_then_else->then_atom_seen)
return strbuf_addf_ret(err, -1, _("format: %%(then) atom used more than once"));
if (if_then_else->else_atom_seen)
return strbuf_addf_ret(err, -1, _("format: %%(then) atom used after %%(else)"));
if_then_else->then_atom_seen = 1;
ref-filter: add %(raw) atom Add new formatting option `%(raw)`, which will print the raw object data without any changes. It will help further to migrate all cat-file formatting logic from cat-file to ref-filter. The raw data of blob, tree objects may contain '\0', but most of the logic in `ref-filter` depends on the output of the atom being text (specifically, no embedded NULs in it). E.g. `quote_formatting()` use `strbuf_addstr()` or `*._quote_buf()` add the data to the buffer. The raw data of a tree object is `100644 one\0...`, only the `100644 one` will be added to the buffer, which is incorrect. Therefore, we need to find a way to record the length of the atom_value's member `s`. Although strbuf can already record the string and its length, if we want to replace the type of atom_value's member `s` with strbuf, many places in ref-filter that are filled with dynamically allocated mermory in `v->s` are not easy to replace. At the same time, we need to check if `v->s == NULL` in populate_value(), and strbuf cannot easily distinguish NULL and empty strings, but c-style "const char *" can do it. So add a new member in `struct atom_value`: `s_size`, which can record raw object size, it can help us add raw object data to the buffer or compare two buffers which contain raw object data. Note that `--format=%(raw)` cannot be used with `--python`, `--shell`, `--tcl`, and `--perl` because if the binary raw data is passed to a variable in such languages, these may not support arbitrary binary data in their string variable type. Reviewed-by: Jacob Keller <jacob.keller@gmail.com> Mentored-by: Christian Couder <christian.couder@gmail.com> Mentored-by: Hariom Verma <hariom18599@gmail.com> Helped-by: Bagas Sanjaya <bagasdotme@gmail.com> Helped-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com> Helped-by: Felipe Contreras <felipe.contreras@gmail.com> Helped-by: Phillip Wood <phillip.wood@dunelm.org.uk> Helped-by: Junio C Hamano <gitster@pobox.com> Based-on-patch-by: Olga Telezhnaya <olyatelezhnaya@gmail.com> Signed-off-by: ZheNing Hu <adlternative@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2021-07-26 11:26:47 +08:00
if (if_then_else->str)
str_len = strlen(if_then_else->str);
/*
* If the 'equals' or 'notequals' attribute is used then
* perform the required comparison. If not, only non-empty
* strings satisfy the 'if' condition.
*/
if (if_then_else->cmp_status == COMPARE_EQUAL) {
ref-filter: add %(raw) atom Add new formatting option `%(raw)`, which will print the raw object data without any changes. It will help further to migrate all cat-file formatting logic from cat-file to ref-filter. The raw data of blob, tree objects may contain '\0', but most of the logic in `ref-filter` depends on the output of the atom being text (specifically, no embedded NULs in it). E.g. `quote_formatting()` use `strbuf_addstr()` or `*._quote_buf()` add the data to the buffer. The raw data of a tree object is `100644 one\0...`, only the `100644 one` will be added to the buffer, which is incorrect. Therefore, we need to find a way to record the length of the atom_value's member `s`. Although strbuf can already record the string and its length, if we want to replace the type of atom_value's member `s` with strbuf, many places in ref-filter that are filled with dynamically allocated mermory in `v->s` are not easy to replace. At the same time, we need to check if `v->s == NULL` in populate_value(), and strbuf cannot easily distinguish NULL and empty strings, but c-style "const char *" can do it. So add a new member in `struct atom_value`: `s_size`, which can record raw object size, it can help us add raw object data to the buffer or compare two buffers which contain raw object data. Note that `--format=%(raw)` cannot be used with `--python`, `--shell`, `--tcl`, and `--perl` because if the binary raw data is passed to a variable in such languages, these may not support arbitrary binary data in their string variable type. Reviewed-by: Jacob Keller <jacob.keller@gmail.com> Mentored-by: Christian Couder <christian.couder@gmail.com> Mentored-by: Hariom Verma <hariom18599@gmail.com> Helped-by: Bagas Sanjaya <bagasdotme@gmail.com> Helped-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com> Helped-by: Felipe Contreras <felipe.contreras@gmail.com> Helped-by: Phillip Wood <phillip.wood@dunelm.org.uk> Helped-by: Junio C Hamano <gitster@pobox.com> Based-on-patch-by: Olga Telezhnaya <olyatelezhnaya@gmail.com> Signed-off-by: ZheNing Hu <adlternative@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2021-07-26 11:26:47 +08:00
if (str_len == cur->output.len &&
!memcmp(if_then_else->str, cur->output.buf, cur->output.len))
if_then_else->condition_satisfied = 1;
} else if (if_then_else->cmp_status == COMPARE_UNEQUAL) {
ref-filter: add %(raw) atom Add new formatting option `%(raw)`, which will print the raw object data without any changes. It will help further to migrate all cat-file formatting logic from cat-file to ref-filter. The raw data of blob, tree objects may contain '\0', but most of the logic in `ref-filter` depends on the output of the atom being text (specifically, no embedded NULs in it). E.g. `quote_formatting()` use `strbuf_addstr()` or `*._quote_buf()` add the data to the buffer. The raw data of a tree object is `100644 one\0...`, only the `100644 one` will be added to the buffer, which is incorrect. Therefore, we need to find a way to record the length of the atom_value's member `s`. Although strbuf can already record the string and its length, if we want to replace the type of atom_value's member `s` with strbuf, many places in ref-filter that are filled with dynamically allocated mermory in `v->s` are not easy to replace. At the same time, we need to check if `v->s == NULL` in populate_value(), and strbuf cannot easily distinguish NULL and empty strings, but c-style "const char *" can do it. So add a new member in `struct atom_value`: `s_size`, which can record raw object size, it can help us add raw object data to the buffer or compare two buffers which contain raw object data. Note that `--format=%(raw)` cannot be used with `--python`, `--shell`, `--tcl`, and `--perl` because if the binary raw data is passed to a variable in such languages, these may not support arbitrary binary data in their string variable type. Reviewed-by: Jacob Keller <jacob.keller@gmail.com> Mentored-by: Christian Couder <christian.couder@gmail.com> Mentored-by: Hariom Verma <hariom18599@gmail.com> Helped-by: Bagas Sanjaya <bagasdotme@gmail.com> Helped-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com> Helped-by: Felipe Contreras <felipe.contreras@gmail.com> Helped-by: Phillip Wood <phillip.wood@dunelm.org.uk> Helped-by: Junio C Hamano <gitster@pobox.com> Based-on-patch-by: Olga Telezhnaya <olyatelezhnaya@gmail.com> Signed-off-by: ZheNing Hu <adlternative@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2021-07-26 11:26:47 +08:00
if (str_len != cur->output.len ||
memcmp(if_then_else->str, cur->output.buf, cur->output.len))
if_then_else->condition_satisfied = 1;
ref-filter: add %(raw) atom Add new formatting option `%(raw)`, which will print the raw object data without any changes. It will help further to migrate all cat-file formatting logic from cat-file to ref-filter. The raw data of blob, tree objects may contain '\0', but most of the logic in `ref-filter` depends on the output of the atom being text (specifically, no embedded NULs in it). E.g. `quote_formatting()` use `strbuf_addstr()` or `*._quote_buf()` add the data to the buffer. The raw data of a tree object is `100644 one\0...`, only the `100644 one` will be added to the buffer, which is incorrect. Therefore, we need to find a way to record the length of the atom_value's member `s`. Although strbuf can already record the string and its length, if we want to replace the type of atom_value's member `s` with strbuf, many places in ref-filter that are filled with dynamically allocated mermory in `v->s` are not easy to replace. At the same time, we need to check if `v->s == NULL` in populate_value(), and strbuf cannot easily distinguish NULL and empty strings, but c-style "const char *" can do it. So add a new member in `struct atom_value`: `s_size`, which can record raw object size, it can help us add raw object data to the buffer or compare two buffers which contain raw object data. Note that `--format=%(raw)` cannot be used with `--python`, `--shell`, `--tcl`, and `--perl` because if the binary raw data is passed to a variable in such languages, these may not support arbitrary binary data in their string variable type. Reviewed-by: Jacob Keller <jacob.keller@gmail.com> Mentored-by: Christian Couder <christian.couder@gmail.com> Mentored-by: Hariom Verma <hariom18599@gmail.com> Helped-by: Bagas Sanjaya <bagasdotme@gmail.com> Helped-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com> Helped-by: Felipe Contreras <felipe.contreras@gmail.com> Helped-by: Phillip Wood <phillip.wood@dunelm.org.uk> Helped-by: Junio C Hamano <gitster@pobox.com> Based-on-patch-by: Olga Telezhnaya <olyatelezhnaya@gmail.com> Signed-off-by: ZheNing Hu <adlternative@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2021-07-26 11:26:47 +08:00
} else if (cur->output.len && !is_empty(&cur->output))
if_then_else->condition_satisfied = 1;
strbuf_reset(&cur->output);
return 0;
}
static int else_atom_handler(struct atom_value *atomv UNUSED,
struct ref_formatting_state *state,
struct strbuf *err)
{
struct ref_formatting_stack *prev = state->stack;
struct if_then_else *if_then_else = NULL;
if (prev->at_end == if_then_else_handler)
if_then_else = (struct if_then_else *)prev->at_end_data;
if (!if_then_else)
return strbuf_addf_ret(err, -1, _("format: %%(%s) atom used without a %%(%s) atom"), "else", "if");
if (!if_then_else->then_atom_seen)
return strbuf_addf_ret(err, -1, _("format: %%(%s) atom used without a %%(%s) atom"), "else", "then");
if (if_then_else->else_atom_seen)
return strbuf_addf_ret(err, -1, _("format: %%(else) atom used more than once"));
if_then_else->else_atom_seen = 1;
push_stack_element(&state->stack);
state->stack->at_end_data = prev->at_end_data;
state->stack->at_end = prev->at_end;
return 0;
2015-09-11 23:03:07 +08:00
}
static int end_atom_handler(struct atom_value *atomv UNUSED,
struct ref_formatting_state *state,
struct strbuf *err)
2015-09-11 23:03:07 +08:00
{
struct ref_formatting_stack *current = state->stack;
struct strbuf s = STRBUF_INIT;
if (!current->at_end)
return strbuf_addf_ret(err, -1, _("format: %%(end) atom used without corresponding atom"));
current->at_end(&state->stack);
/* Stack may have been popped within at_end(), hence reset the current pointer */
current = state->stack;
2015-09-11 23:03:07 +08:00
/*
* Perform quote formatting when the stack element is that of
* a supporting atom. If nested then perform quote formatting
* only on the topmost supporting atom.
*/
if (!current->prev->prev) {
ref-filter: add %(raw) atom Add new formatting option `%(raw)`, which will print the raw object data without any changes. It will help further to migrate all cat-file formatting logic from cat-file to ref-filter. The raw data of blob, tree objects may contain '\0', but most of the logic in `ref-filter` depends on the output of the atom being text (specifically, no embedded NULs in it). E.g. `quote_formatting()` use `strbuf_addstr()` or `*._quote_buf()` add the data to the buffer. The raw data of a tree object is `100644 one\0...`, only the `100644 one` will be added to the buffer, which is incorrect. Therefore, we need to find a way to record the length of the atom_value's member `s`. Although strbuf can already record the string and its length, if we want to replace the type of atom_value's member `s` with strbuf, many places in ref-filter that are filled with dynamically allocated mermory in `v->s` are not easy to replace. At the same time, we need to check if `v->s == NULL` in populate_value(), and strbuf cannot easily distinguish NULL and empty strings, but c-style "const char *" can do it. So add a new member in `struct atom_value`: `s_size`, which can record raw object size, it can help us add raw object data to the buffer or compare two buffers which contain raw object data. Note that `--format=%(raw)` cannot be used with `--python`, `--shell`, `--tcl`, and `--perl` because if the binary raw data is passed to a variable in such languages, these may not support arbitrary binary data in their string variable type. Reviewed-by: Jacob Keller <jacob.keller@gmail.com> Mentored-by: Christian Couder <christian.couder@gmail.com> Mentored-by: Hariom Verma <hariom18599@gmail.com> Helped-by: Bagas Sanjaya <bagasdotme@gmail.com> Helped-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com> Helped-by: Felipe Contreras <felipe.contreras@gmail.com> Helped-by: Phillip Wood <phillip.wood@dunelm.org.uk> Helped-by: Junio C Hamano <gitster@pobox.com> Based-on-patch-by: Olga Telezhnaya <olyatelezhnaya@gmail.com> Signed-off-by: ZheNing Hu <adlternative@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2021-07-26 11:26:47 +08:00
quote_formatting(&s, current->output.buf, current->output.len, state->quote_style);
2015-09-11 23:03:07 +08:00
strbuf_swap(&current->output, &s);
}
strbuf_release(&s);
pop_stack_element(&state->stack);
return 0;
2015-09-11 23:03:07 +08:00
}
/*
* In a format string, find the next occurrence of %(atom).
*/
static const char *find_next(const char *cp)
{
while (*cp) {
if (*cp == '%') {
/*
* %( is the start of an atom;
* %% is a quoted per-cent.
*/
if (cp[1] == '(')
return cp;
else if (cp[1] == '%')
cp++; /* skip over two % */
/* otherwise this is a singleton, literal % */
}
cp++;
}
return NULL;
}
static int reject_atom(enum atom_type atom_type)
{
return atom_type == ATOM_REST;
}
/*
* Make sure the format string is well formed, and parse out
* the used atoms.
*/
int verify_ref_format(struct ref_format *format)
{
const char *cp, *sp;
format->need_color_reset_at_eol = 0;
for (cp = format->format; *cp && (sp = find_next(cp)); ) {
struct strbuf err = STRBUF_INIT;
const char *color, *ep = strchr(sp, ')');
int at;
if (!ep)
return error(_("malformed format string %s"), sp);
/* sp points at "%(" and ep points at the closing ")" */
at = parse_ref_filter_atom(format, sp + 2, ep, &err);
if (at < 0)
die("%s", err.buf);
if (reject_atom(used_atom[at].atom_type))
die(_("this command reject atom %%(%.*s)"), (int)(ep - sp - 2), sp + 2);
if ((format->quote_style == QUOTE_PYTHON ||
format->quote_style == QUOTE_SHELL ||
format->quote_style == QUOTE_TCL) &&
used_atom[at].atom_type == ATOM_RAW &&
used_atom[at].u.raw_data.option == RAW_BARE)
die(_("--format=%.*s cannot be used with "
"--python, --shell, --tcl"), (int)(ep - sp - 2), sp + 2);
cp = ep + 1;
if (skip_prefix(used_atom[at].name, "color:", &color))
format->need_color_reset_at_eol = !!strcmp(color, "reset");
strbuf_release(&err);
}
if (format->need_color_reset_at_eol && !want_color(format->use_color))
format->need_color_reset_at_eol = 0;
return 0;
}
static const char *do_grab_oid(const char *field, const struct object_id *oid,
struct used_atom *atom)
{
switch (atom->u.oid.option) {
case O_FULL:
return oid_to_hex(oid);
case O_LENGTH:
return repo_find_unique_abbrev(the_repository, oid,
atom->u.oid.length);
case O_SHORT:
return repo_find_unique_abbrev(the_repository, oid,
DEFAULT_ABBREV);
default:
BUG("unknown %%(%s) option", field);
}
}
static int grab_oid(const char *name, const char *field, const struct object_id *oid,
struct atom_value *v, struct used_atom *atom)
{
if (starts_with(name, field)) {
v->s = xstrdup(do_grab_oid(field, oid, atom));
return 1;
}
return 0;
}
/* See grab_values */
static void grab_common_values(struct atom_value *val, int deref, struct expand_data *oi)
{
int i;
for (i = 0; i < used_atom_cnt; i++) {
const char *name = used_atom[i].name;
enum atom_type atom_type = used_atom[i].atom_type;
struct atom_value *v = &val[i];
if (!!deref != (*name == '*'))
continue;
if (deref)
name++;
if (atom_type == ATOM_OBJECTTYPE)
v->s = xstrdup(type_name(oi->type));
else if (atom_type == ATOM_OBJECTSIZE) {
if (used_atom[i].u.objectsize.option == O_SIZE_DISK) {
v->value = oi->disk_size;
v->s = xstrfmt("%"PRIuMAX, (uintmax_t)oi->disk_size);
} else if (used_atom[i].u.objectsize.option == O_SIZE) {
v->value = oi->size;
v->s = xstrfmt("%"PRIuMAX , (uintmax_t)oi->size);
}
} else if (atom_type == ATOM_DELTABASE)
v->s = xstrdup(oid_to_hex(&oi->delta_base_oid));
else if (atom_type == ATOM_OBJECTNAME && deref)
grab_oid(name, "objectname", &oi->oid, v, &used_atom[i]);
}
}
/* See grab_values */
static void grab_tag_values(struct atom_value *val, int deref, struct object *obj)
{
int i;
struct tag *tag = (struct tag *) obj;
for (i = 0; i < used_atom_cnt; i++) {
const char *name = used_atom[i].name;
enum atom_type atom_type = used_atom[i].atom_type;
struct atom_value *v = &val[i];
if (!!deref != (*name == '*'))
continue;
if (deref)
name++;
if (atom_type == ATOM_TAG)
v->s = xstrdup(tag->tag);
else if (atom_type == ATOM_TYPE && tag->tagged)
v->s = xstrdup(type_name(tag->tagged->type));
else if (atom_type == ATOM_OBJECT && tag->tagged)
v->s = xstrdup(oid_to_hex(&tag->tagged->oid));
}
}
/* See grab_values */
static void grab_commit_values(struct atom_value *val, int deref, struct object *obj)
{
int i;
struct commit *commit = (struct commit *) obj;
for (i = 0; i < used_atom_cnt; i++) {
const char *name = used_atom[i].name;
enum atom_type atom_type = used_atom[i].atom_type;
struct atom_value *v = &val[i];
if (!!deref != (*name == '*'))
continue;
if (deref)
name++;
if (atom_type == ATOM_TREE &&
grab_oid(name, "tree", get_commit_tree_oid(commit), v, &used_atom[i]))
continue;
if (atom_type == ATOM_NUMPARENT) {
v->value = commit_list_count(commit->parents);
v->s = xstrfmt("%lu", (unsigned long)v->value);
}
else if (atom_type == ATOM_PARENT) {
struct commit_list *parents;
struct strbuf s = STRBUF_INIT;
for (parents = commit->parents; parents; parents = parents->next) {
struct object_id *oid = &parents->item->object.oid;
if (parents != commit->parents)
strbuf_addch(&s, ' ');
strbuf_addstr(&s, do_grab_oid("parent", oid, &used_atom[i]));
}
v->s = strbuf_detach(&s, NULL);
}
}
}
static const char *find_wholine(const char *who, int wholen, const char *buf)
{
const char *eol;
while (*buf) {
if (!strncmp(buf, who, wholen) &&
buf[wholen] == ' ')
return buf + wholen + 1;
eol = strchr(buf, '\n');
if (!eol)
return "";
eol++;
if (*eol == '\n')
return ""; /* end of header */
buf = eol;
}
return "";
}
static const char *copy_line(const char *buf)
{
const char *eol = strchrnul(buf, '\n');
return xmemdupz(buf, eol - buf);
}
static const char *copy_name(const char *buf)
{
const char *cp;
for (cp = buf; *cp && *cp != '\n'; cp++) {
convert trivial uses of strncmp() to starts_with() It's more readable to use starts_with() instead of strncmp() to match a prefix, as the latter requires a manually-computed length, and has the funny "matching is zero" return value common to cmp functions. This patch converts several cases which were found with: git grep 'strncmp(.*, [0-9]*)' But note that it doesn't convert all such cases. There are several where the magic length number is repeated elsewhere in the code, like: /* handle "buf" which isn't NUL-terminated and might be too small */ if (len >= 3 && !strncmp(buf, "foo", 3)) or: /* exact match for "foo", but within a larger string */ if (end - buf == 3 && !strncmp(buf, "foo", 3)) While it would not produce the wrong outcome to use starts_with() in these cases, we'd still be left with one instance of "3". We're better to leave them for now, as the repeated "3" makes it clear that the two are linked (there may be other refactorings that handle both, but they're out of scope for this patch). A few things to note while reading the patch: - all cases but one are trying to match, and so lose the extra "!". The case in the first hunk of urlmatch.c is not-matching, and hence gains a "!". - the case in remote-fd.c is matching the beginning of "connect foo", but we never look at str+8 to parse the "foo" part (which would make this a candidate for skip_prefix(), not starts_with()). This seems at first glance like a bug, but is a limitation of how remote-fd works. - the second hunk in urlmatch.c shows some cases adjacent to other strncmp() calls that are left. These are of the "exact match within a larger string" type, as described above. Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2023-01-07 21:26:18 +08:00
if (starts_with(cp, " <"))
return xmemdupz(buf, cp - buf);
}
return xstrdup("");
}
static const char *find_end_of_email(const char *email, int opt)
{
const char *eoemail;
if (opt & EO_LOCALPART) {
eoemail = strchr(email, '@');
if (eoemail)
return eoemail;
return strchr(email, '>');
}
if (opt & EO_TRIM)
return strchr(email, '>');
/*
* The option here is either the raw email option or the raw
* mailmap option (that is EO_RAW or EO_MAILMAP). In such cases,
* we directly grab the whole email including the closing
* angle brackets.
*
* If EO_MAILMAP was set with any other option (that is either
* EO_TRIM or EO_LOCALPART), we already grab the end of email
* above.
*/
eoemail = strchr(email, '>');
if (eoemail)
eoemail++;
return eoemail;
}
static const char *copy_email(const char *buf, struct used_atom *atom)
{
const char *email = strchr(buf, '<');
const char *eoemail;
int opt = atom->u.email_option.option;
if (!email)
return xstrdup("");
if (opt & (EO_LOCALPART | EO_TRIM))
email++;
eoemail = find_end_of_email(email, opt);
if (!eoemail)
return xstrdup("");
return xmemdupz(email, eoemail - email);
}
static char *copy_subject(const char *buf, unsigned long len)
{
ref-filter: handle CRLF at end-of-line more gracefully The ref-filter code does not correctly handle commit or tag messages that use CRLF as the line terminator. Such messages can be created with the `--cleanup=verbatim` option of `git commit` and `git tag`, or by using `git commit-tree` directly. The function `find_subpos` in ref-filter.c looks for two consecutive LFs to find the end of the subject line, a sequence which is absent in messages using CRLF. This results in the whole message being parsed as the subject line (`%(contents:subject)`), and the body of the message (`%(contents:body)`) being empty. Moreover, in `copy_subject`, which wants to return the subject as a single line, '\n' is replaced by space, but '\r' is untouched. This impacts the output of `git branch`, `git tag` and `git for-each-ref`. This behaviour is a regression for `git branch --verbose`, which bisects down to 949af0684c (branch: use ref-filter printing APIs, 2017-01-10). Adjust the ref-filter code to be more lenient by hardening the logic in `copy_subject` and `find_subpos` to correctly parse messages containing CRLF. Add a new test script, 't3920-crlf-messages.sh', to test the behaviour of commands using either the ref-filter or the pretty APIs with messages using CRLF line endings. The function `test_crlf_subject_body_and_contents` can be used to test that the `--format` option of `branch`, `tag`, `for-each-ref`, `log` and `show` correctly displays the subject, body and raw content of commit and tag messages using CRLF. Test the output of `branch`, `tag` and `for-each-ref` with such commits. Helped-by: Junio C Hamano <gitster@pobox.com> Helped-by: Eric Sunshine <sunshine@sunshineco.com> Signed-off-by: Philippe Blain <levraiphilippeblain@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2020-10-29 20:48:28 +08:00
struct strbuf sb = STRBUF_INIT;
int i;
ref-filter: handle CRLF at end-of-line more gracefully The ref-filter code does not correctly handle commit or tag messages that use CRLF as the line terminator. Such messages can be created with the `--cleanup=verbatim` option of `git commit` and `git tag`, or by using `git commit-tree` directly. The function `find_subpos` in ref-filter.c looks for two consecutive LFs to find the end of the subject line, a sequence which is absent in messages using CRLF. This results in the whole message being parsed as the subject line (`%(contents:subject)`), and the body of the message (`%(contents:body)`) being empty. Moreover, in `copy_subject`, which wants to return the subject as a single line, '\n' is replaced by space, but '\r' is untouched. This impacts the output of `git branch`, `git tag` and `git for-each-ref`. This behaviour is a regression for `git branch --verbose`, which bisects down to 949af0684c (branch: use ref-filter printing APIs, 2017-01-10). Adjust the ref-filter code to be more lenient by hardening the logic in `copy_subject` and `find_subpos` to correctly parse messages containing CRLF. Add a new test script, 't3920-crlf-messages.sh', to test the behaviour of commands using either the ref-filter or the pretty APIs with messages using CRLF line endings. The function `test_crlf_subject_body_and_contents` can be used to test that the `--format` option of `branch`, `tag`, `for-each-ref`, `log` and `show` correctly displays the subject, body and raw content of commit and tag messages using CRLF. Test the output of `branch`, `tag` and `for-each-ref` with such commits. Helped-by: Junio C Hamano <gitster@pobox.com> Helped-by: Eric Sunshine <sunshine@sunshineco.com> Signed-off-by: Philippe Blain <levraiphilippeblain@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2020-10-29 20:48:28 +08:00
for (i = 0; i < len; i++) {
if (buf[i] == '\r' && i + 1 < len && buf[i + 1] == '\n')
continue; /* ignore CR in CRLF */
ref-filter: handle CRLF at end-of-line more gracefully The ref-filter code does not correctly handle commit or tag messages that use CRLF as the line terminator. Such messages can be created with the `--cleanup=verbatim` option of `git commit` and `git tag`, or by using `git commit-tree` directly. The function `find_subpos` in ref-filter.c looks for two consecutive LFs to find the end of the subject line, a sequence which is absent in messages using CRLF. This results in the whole message being parsed as the subject line (`%(contents:subject)`), and the body of the message (`%(contents:body)`) being empty. Moreover, in `copy_subject`, which wants to return the subject as a single line, '\n' is replaced by space, but '\r' is untouched. This impacts the output of `git branch`, `git tag` and `git for-each-ref`. This behaviour is a regression for `git branch --verbose`, which bisects down to 949af0684c (branch: use ref-filter printing APIs, 2017-01-10). Adjust the ref-filter code to be more lenient by hardening the logic in `copy_subject` and `find_subpos` to correctly parse messages containing CRLF. Add a new test script, 't3920-crlf-messages.sh', to test the behaviour of commands using either the ref-filter or the pretty APIs with messages using CRLF line endings. The function `test_crlf_subject_body_and_contents` can be used to test that the `--format` option of `branch`, `tag`, `for-each-ref`, `log` and `show` correctly displays the subject, body and raw content of commit and tag messages using CRLF. Test the output of `branch`, `tag` and `for-each-ref` with such commits. Helped-by: Junio C Hamano <gitster@pobox.com> Helped-by: Eric Sunshine <sunshine@sunshineco.com> Signed-off-by: Philippe Blain <levraiphilippeblain@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2020-10-29 20:48:28 +08:00
if (buf[i] == '\n')
strbuf_addch(&sb, ' ');
else
strbuf_addch(&sb, buf[i]);
}
return strbuf_detach(&sb, NULL);
}
static void grab_date(const char *buf, struct atom_value *v, const char *atomname)
{
const char *eoemail = strstr(buf, "> ");
char *zone;
timestamp_t timestamp;
long tz;
struct date_mode date_mode = DATE_MODE_INIT;
const char *formatp;
/*
* We got here because atomname ends in "date" or "date<something>";
* it's not possible that <something> is not ":<format>" because
* parse_ref_filter_atom() wouldn't have allowed it, so we can assume that no
* ":" means no format is specified, and use the default.
*/
formatp = strchr(atomname, ':');
if (formatp) {
formatp++;
parse_date_format(formatp, &date_mode);
/*
* If this is a sort field and a format was specified, we'll
* want to compare formatted date by string value.
*/
v->atom->type = FIELD_STR;
}
if (!eoemail)
goto bad;
timestamp = parse_timestamp(eoemail + 2, &zone, 10);
if (timestamp == TIME_MAX)
goto bad;
errno = 0;
tz = strtol(zone, NULL, 10);
if ((tz == LONG_MIN || tz == LONG_MAX) && errno == ERANGE)
goto bad;
v->s = xstrdup(show_date(timestamp, tz, date_mode));
v->value = timestamp;
date API: add and use a date_mode_release() Fix a memory leak in the parse_date_format() function by providing a new date_mode_release() companion function. By using this in "t/helper/test-date.c" we can mark the "t0006-date.sh" test as passing when git is compiled with SANITIZE=leak, and whitelist it to run under "GIT_TEST_PASSING_SANITIZE_LEAK=true" by adding "TEST_PASSES_SANITIZE_LEAK=true" to the test itself. The other tests that expose this memory leak (i.e. take the "mode->type == DATE_STRFTIME" branch in parse_date_format()) are "t6300-for-each-ref.sh" and "t7004-tag.sh". The former is due to an easily fixed leak in "ref-filter.c", and brings the failures in "t6300-for-each-ref.sh" down from 51 to 48. Fixing the remaining leaks will have to wait until there's a release_revisions() in "revision.c", as they have to do with leaks via "struct rev_info". There is also a leak in "builtin/blame.c" due to its call to parse_date_format() to parse the "blame.date" configuration. However as it declares a file-level "static struct date_mode blame_date_mode" to track the data, LSAN will not report it as a leak. It's possible to get valgrind(1) to complain about it with e.g.: valgrind --leak-check=full --show-leak-kinds=all ./git -P -c blame.date=format:%Y blame README.md But let's focus on things LSAN complains about, and are thus observable with "TEST_PASSES_SANITIZE_LEAK=true". We should get to fixing memory leaks in "builtin/blame.c", but as doing so would require some re-arrangement of cmd_blame() let's leave it for some other time. Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2022-02-16 16:14:05 +08:00
date_mode_release(&date_mode);
return;
bad:
v->s = xstrdup("");
v->value = 0;
}
static struct string_list mailmap = STRING_LIST_INIT_NODUP;
/* See grab_values */
static void grab_person(const char *who, struct atom_value *val, int deref, void *buf)
{
int i;
int wholen = strlen(who);
const char *wholine = NULL;
const char *headers[] = { "author ", "committer ",
"tagger ", NULL };
for (i = 0; i < used_atom_cnt; i++) {
struct used_atom *atom = &used_atom[i];
const char *name = atom->name;
struct atom_value *v = &val[i];
struct strbuf mailmap_buf = STRBUF_INIT;
if (!!deref != (*name == '*'))
continue;
if (deref)
name++;
if (strncmp(who, name, wholen))
continue;
if (name[wholen] != 0 &&
!starts_with(name + wholen, "name") &&
!starts_with(name + wholen, "email") &&
!starts_with(name + wholen, "date"))
continue;
if ((starts_with(name + wholen, "name") &&
(atom->u.name_option.option == N_MAILMAP)) ||
(starts_with(name + wholen, "email") &&
(atom->u.email_option.option & EO_MAILMAP))) {
if (!mailmap.items)
read_mailmap(&mailmap);
strbuf_addstr(&mailmap_buf, buf);
apply_mailmap_to_header(&mailmap_buf, headers, &mailmap);
wholine = find_wholine(who, wholen, mailmap_buf.buf);
} else {
wholine = find_wholine(who, wholen, buf);
}
if (!wholine)
return; /* no point looking for it */
if (name[wholen] == 0)
v->s = copy_line(wholine);
else if (starts_with(name + wholen, "name"))
v->s = copy_name(wholine);
else if (starts_with(name + wholen, "email"))
v->s = copy_email(wholine, &used_atom[i]);
else if (starts_with(name + wholen, "date"))
grab_date(wholine, v, name);
strbuf_release(&mailmap_buf);
}
/*
* For a tag or a commit object, if "creator" or "creatordate" is
* requested, do something special.
*/
if (strcmp(who, "tagger") && strcmp(who, "committer"))
return; /* "author" for commit object is not wanted */
if (!wholine)
wholine = find_wholine(who, wholen, buf);
if (!wholine)
return;
for (i = 0; i < used_atom_cnt; i++) {
const char *name = used_atom[i].name;
enum atom_type atom_type = used_atom[i].atom_type;
struct atom_value *v = &val[i];
if (!!deref != (*name == '*'))
continue;
if (deref)
name++;
if (atom_type == ATOM_CREATORDATE)
grab_date(wholine, v, name);
else if (atom_type == ATOM_CREATOR)
v->s = copy_line(wholine);
}
}
static void grab_signature(struct atom_value *val, int deref, struct object *obj)
{
int i;
struct commit *commit = (struct commit *) obj;
struct signature_check sigc = { 0 };
int signature_checked = 0;
for (i = 0; i < used_atom_cnt; i++) {
struct used_atom *atom = &used_atom[i];
const char *name = atom->name;
struct atom_value *v = &val[i];
int opt;
if (!!deref != (*name == '*'))
continue;
if (deref)
name++;
if (!skip_prefix(name, "signature", &name) ||
(*name && *name != ':'))
continue;
if (!*name)
name = NULL;
else
name++;
opt = parse_signature_option(name);
if (opt < 0)
continue;
if (!signature_checked) {
check_commit_signature(commit, &sigc);
signature_checked = 1;
}
switch (opt) {
case S_BARE:
v->s = xstrdup(sigc.output ? sigc.output: "");
break;
case S_SIGNER:
v->s = xstrdup(sigc.signer ? sigc.signer : "");
break;
case S_GRADE:
switch (sigc.result) {
case 'G':
switch (sigc.trust_level) {
case TRUST_UNDEFINED:
case TRUST_NEVER:
v->s = xstrfmt("%c", (char)'U');
break;
default:
v->s = xstrfmt("%c", (char)'G');
break;
}
break;
case 'B':
case 'E':
case 'N':
case 'X':
case 'Y':
case 'R':
v->s = xstrfmt("%c", (char)sigc.result);
break;
}
break;
case S_KEY:
v->s = xstrdup(sigc.key ? sigc.key : "");
break;
case S_FINGERPRINT:
v->s = xstrdup(sigc.fingerprint ?
sigc.fingerprint : "");
break;
case S_PRI_KEY_FP:
v->s = xstrdup(sigc.primary_key_fingerprint ?
sigc.primary_key_fingerprint : "");
break;
case S_TRUST_LEVEL:
v->s = xstrdup(gpg_trust_level_to_str(sigc.trust_level));
break;
}
}
if (signature_checked)
signature_check_clear(&sigc);
}
static void find_subpos(const char *buf,
const char **sub, size_t *sublen,
const char **body, size_t *bodylen,
size_t *nonsiglen,
const char **sig, size_t *siglen)
{
struct strbuf payload = STRBUF_INIT;
struct strbuf signature = STRBUF_INIT;
const char *eol;
const char *end = buf + strlen(buf);
const char *sigstart;
/* parse signature first; we might not even have a subject line */
parse_signature(buf, end - buf, &payload, &signature);
built-ins & libs & helpers: add/move destructors, fix leaks Fix various leaks in built-ins, libraries and a test helper here we were missing a call to strbuf_release(), string_list_clear() etc, or were calling them after a potential "return". Comments on individual changes: - builtin/checkout.c: Fix a memory leak that was introduced in [1]. A sibling leak introduced in [2] was recently fixed in [3]. As with [3] we should be using the wt_status_state_free_buffers() API introduced in [4]. - builtin/repack.c: Fix a leak that's been here since this use of "strbuf_release()" was added in a1bbc6c0176 (repack: rewrite the shell script in C, 2013-09-15). We don't use the variable for anything except this loop, so we can instead free it right afterwards. - builtin/rev-parse: Fix a leak that's been here since this code was added in 21d47835386 (Add a parseopt mode to git-rev-parse to bring parse-options to shell scripts., 2007-11-04). - builtin/stash.c: Fix a couple of leaks that have been here since this code was added in d4788af875c (stash: convert create to builtin, 2019-02-25), we strbuf_release()'d only some of the "struct strbuf" we allocated earlier in the function, let's release all of them. - ref-filter.c: Fix a leak in 482c1191869 (gpg-interface: improve interface for parsing tags, 2021-02-11), we don't use the "payload" variable that we ask parse_signature() to populate for us, so let's free it. - t/helper/test-fake-ssh.c: Fix a leak that's been here since this code was added in 3064d5a38c7 (mingw: fix t5601-clone.sh, 2016-01-27). Let's free the "struct strbuf" as soon as we don't need it anymore. 1. c45f0f525de (switch: reject if some operation is in progress, 2019-03-29) 2. 2708ce62d21 (branch: sort detached HEAD based on a flag, 2021-01-07) 3. abcac2e19fa (ref-filter.c: fix a leak in get_head_description, 2022-09-25) 4. 962dd7ebc3e (wt-status: introduce wt_status_state_free_buffers(), 2020-09-27). Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com> Signed-off-by: Taylor Blau <me@ttaylorr.com>
2022-11-09 02:17:42 +08:00
strbuf_release(&payload);
/* skip past header until we hit empty line */
while (*buf && *buf != '\n') {
eol = strchrnul(buf, '\n');
if (*eol)
eol++;
buf = eol;
}
/* skip any empty lines */
while (*buf == '\n')
buf++;
*sig = strbuf_detach(&signature, siglen);
sigstart = buf + parse_signed_buffer(buf, strlen(buf));
/* subject is first non-empty line */
*sub = buf;
ref-filter: handle CRLF at end-of-line more gracefully The ref-filter code does not correctly handle commit or tag messages that use CRLF as the line terminator. Such messages can be created with the `--cleanup=verbatim` option of `git commit` and `git tag`, or by using `git commit-tree` directly. The function `find_subpos` in ref-filter.c looks for two consecutive LFs to find the end of the subject line, a sequence which is absent in messages using CRLF. This results in the whole message being parsed as the subject line (`%(contents:subject)`), and the body of the message (`%(contents:body)`) being empty. Moreover, in `copy_subject`, which wants to return the subject as a single line, '\n' is replaced by space, but '\r' is untouched. This impacts the output of `git branch`, `git tag` and `git for-each-ref`. This behaviour is a regression for `git branch --verbose`, which bisects down to 949af0684c (branch: use ref-filter printing APIs, 2017-01-10). Adjust the ref-filter code to be more lenient by hardening the logic in `copy_subject` and `find_subpos` to correctly parse messages containing CRLF. Add a new test script, 't3920-crlf-messages.sh', to test the behaviour of commands using either the ref-filter or the pretty APIs with messages using CRLF line endings. The function `test_crlf_subject_body_and_contents` can be used to test that the `--format` option of `branch`, `tag`, `for-each-ref`, `log` and `show` correctly displays the subject, body and raw content of commit and tag messages using CRLF. Test the output of `branch`, `tag` and `for-each-ref` with such commits. Helped-by: Junio C Hamano <gitster@pobox.com> Helped-by: Eric Sunshine <sunshine@sunshineco.com> Signed-off-by: Philippe Blain <levraiphilippeblain@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2020-10-29 20:48:28 +08:00
/* subject goes to first empty line before signature begins */
ref-filter: fix parsing of signatures with CRLF and no body This commit fixes a bug when parsing tags that have CRLF line endings, a signature, and no body, like this (the "^M" are marking the CRs): this is the subject^M -----BEGIN PGP SIGNATURE-----^M ^M ...some stuff...^M -----END PGP SIGNATURE-----^M When trying to find the start of the body, we look for a blank line separating the subject and body. In this case, there isn't one. But we search for it using strstr(), which will find the blank line in the signature. In the non-CRLF code path, we check whether the line we found is past the start of the signature, and if so, put the body pointer at the start of the signature (effectively making the body empty). But the CRLF code path doesn't catch the same case, and we end up with the body pointer in the middle of the signature field. This has two visible problems: - printing %(contents:subject) will show part of the signature, too, since the subject length is computed as (body - subject) - the length of the body is (sig - body), which makes it negative. Asking for %(contents:body) causes us to cast this to a very large size_t when we feed it to xmemdupz(), which then complains about trying to allocate too much memory. These are essentially the same bugs fixed in the previous commit, except that they happen when there is a CRLF blank line in the signature, rather than no blank line at all. Both are caused by the refactoring in 9f75ce3d8f (ref-filter: handle CRLF at end-of-line more gracefully, 2020-10-29). We can fix this by doing the same "sigstart" check that we do in the non-CRLF case. And rather than repeat ourselves, we can just use short-circuiting OR to collapse both cases into a single conditional. I.e., rather than: if (strstr("\n\n")) ...found blank, check if it's in signature... else if (strstr("\r\n\r\n")) ...found blank, check if it's in signature... else ...no blank line found... we can collapse this to: if (strstr("\n\n")) || strstr("\r\n\r\n"))) ...found blank, check if it's in signature... else ...no blank line found... The tests show the problem and the fix. Though it wasn't broken, I included contents:signature here to make sure it still behaves as expected, but note the shell hackery needed to make it work. A less-clever option would be to skip using test_atom and just "append_cr >expected" ourselves. Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Taylor Blau <me@ttaylorr.com>
2022-11-02 15:44:00 +08:00
if ((eol = strstr(*sub, "\n\n")) ||
(eol = strstr(*sub, "\r\n\r\n"))) {
eol = eol < sigstart ? eol : sigstart;
ref-filter: fix parsing of signatures with CRLF and no body This commit fixes a bug when parsing tags that have CRLF line endings, a signature, and no body, like this (the "^M" are marking the CRs): this is the subject^M -----BEGIN PGP SIGNATURE-----^M ^M ...some stuff...^M -----END PGP SIGNATURE-----^M When trying to find the start of the body, we look for a blank line separating the subject and body. In this case, there isn't one. But we search for it using strstr(), which will find the blank line in the signature. In the non-CRLF code path, we check whether the line we found is past the start of the signature, and if so, put the body pointer at the start of the signature (effectively making the body empty). But the CRLF code path doesn't catch the same case, and we end up with the body pointer in the middle of the signature field. This has two visible problems: - printing %(contents:subject) will show part of the signature, too, since the subject length is computed as (body - subject) - the length of the body is (sig - body), which makes it negative. Asking for %(contents:body) causes us to cast this to a very large size_t when we feed it to xmemdupz(), which then complains about trying to allocate too much memory. These are essentially the same bugs fixed in the previous commit, except that they happen when there is a CRLF blank line in the signature, rather than no blank line at all. Both are caused by the refactoring in 9f75ce3d8f (ref-filter: handle CRLF at end-of-line more gracefully, 2020-10-29). We can fix this by doing the same "sigstart" check that we do in the non-CRLF case. And rather than repeat ourselves, we can just use short-circuiting OR to collapse both cases into a single conditional. I.e., rather than: if (strstr("\n\n")) ...found blank, check if it's in signature... else if (strstr("\r\n\r\n")) ...found blank, check if it's in signature... else ...no blank line found... we can collapse this to: if (strstr("\n\n")) || strstr("\r\n\r\n"))) ...found blank, check if it's in signature... else ...no blank line found... The tests show the problem and the fix. Though it wasn't broken, I included contents:signature here to make sure it still behaves as expected, but note the shell hackery needed to make it work. A less-clever option would be to skip using test_atom and just "append_cr >expected" ourselves. Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Taylor Blau <me@ttaylorr.com>
2022-11-02 15:44:00 +08:00
} else {
ref-filter: handle CRLF at end-of-line more gracefully The ref-filter code does not correctly handle commit or tag messages that use CRLF as the line terminator. Such messages can be created with the `--cleanup=verbatim` option of `git commit` and `git tag`, or by using `git commit-tree` directly. The function `find_subpos` in ref-filter.c looks for two consecutive LFs to find the end of the subject line, a sequence which is absent in messages using CRLF. This results in the whole message being parsed as the subject line (`%(contents:subject)`), and the body of the message (`%(contents:body)`) being empty. Moreover, in `copy_subject`, which wants to return the subject as a single line, '\n' is replaced by space, but '\r' is untouched. This impacts the output of `git branch`, `git tag` and `git for-each-ref`. This behaviour is a regression for `git branch --verbose`, which bisects down to 949af0684c (branch: use ref-filter printing APIs, 2017-01-10). Adjust the ref-filter code to be more lenient by hardening the logic in `copy_subject` and `find_subpos` to correctly parse messages containing CRLF. Add a new test script, 't3920-crlf-messages.sh', to test the behaviour of commands using either the ref-filter or the pretty APIs with messages using CRLF line endings. The function `test_crlf_subject_body_and_contents` can be used to test that the `--format` option of `branch`, `tag`, `for-each-ref`, `log` and `show` correctly displays the subject, body and raw content of commit and tag messages using CRLF. Test the output of `branch`, `tag` and `for-each-ref` with such commits. Helped-by: Junio C Hamano <gitster@pobox.com> Helped-by: Eric Sunshine <sunshine@sunshineco.com> Signed-off-by: Philippe Blain <levraiphilippeblain@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2020-10-29 20:48:28 +08:00
/* treat whole message as subject */
ref-filter: fix parsing of signatures without blank lines When ref-filter is asked to show %(content:subject), etc, we end up in find_subpos() to parse out the three major parts: the subject, the body, and the signature (if any). When searching for the blank line between the subject and body, if we don't find anything, we try to treat the whole message as the subject, with no body. But our idea of "the whole message" needs to take into account the signature, too. Since 9f75ce3d8f (ref-filter: handle CRLF at end-of-line more gracefully, 2020-10-29), the code instead goes all the way to the end of the buffer, which produces confusing output. Here's an example. If we have a tag message like this: this is the subject -----BEGIN SSH SIGNATURE----- ...some stuff... -----END SSH SIGNATURE----- then the current parser will put the start of the body at the end of the whole buffer. This produces two buggy outcomes: - since the subject length is computed as (body - subject), showing %(contents:subject) will print both the subject and the signature, rather than just the single line - since the body length is computed as (sig - body), and the body now starts _after_ the signature, we end up with a negative length! Fortunately we never access out-of-bounds memory, because the negative length is fed to xmemdupz(), which casts it to a size_t, and xmalloc() bails trying to allocate an absurdly large value. In theory it would be possible for somebody making a malicious tag to wrap it around to a more reasonable value, but it would require a tag on the order of 2^63 bytes. And even if they did, all they get is an out of bounds string read. So the security implications are probably not interesting. We can fix both by correctly putting the start of the body at the same index as the start of the signature (effectively making the body empty). Note that this is a real issue with signatures generated with gpg.format set to "ssh", which would look like the example above. In the new tests here I use a hard-coded tag message, for a few reasons: - regardless of what the ssh-signing code produces now or in the future, we should be testing this particular case - skipping the actual signature makes the tests simpler to write (and allows them to run on more systems) - t6300 has helpers for working with gpg signatures; for the purposes of this bug, "BEGIN PGP" is just as good a demonstration, and this simplifies the tests Curiously, the same issue doesn't happen with real gpg signatures (and there are even existing tests in t6300 with cover this). Those have a blank line between the header and the content, like: this is the subject -----BEGIN PGP SIGNATURE----- ...some stuff... -----END PGP SIGNATURE----- Because we search for the subject/body separator line with a strstr(), we find the blank line in the signature, even though it's outside of what we'd consider the body. But that puts us unto a separate code path, which realizes that we're now in the signature and adjusts the line back to "sigstart". So this patch is basically just making the "no line found at all" case match that. And note that "sigstart" is always defined (if there is no signature, it points to the end of the buffer as you'd expect). Reported-by: Martin Englund <martin@englund.nu> Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Taylor Blau <me@ttaylorr.com>
2022-11-02 15:42:07 +08:00
eol = sigstart;
}
ref-filter: handle CRLF at end-of-line more gracefully The ref-filter code does not correctly handle commit or tag messages that use CRLF as the line terminator. Such messages can be created with the `--cleanup=verbatim` option of `git commit` and `git tag`, or by using `git commit-tree` directly. The function `find_subpos` in ref-filter.c looks for two consecutive LFs to find the end of the subject line, a sequence which is absent in messages using CRLF. This results in the whole message being parsed as the subject line (`%(contents:subject)`), and the body of the message (`%(contents:body)`) being empty. Moreover, in `copy_subject`, which wants to return the subject as a single line, '\n' is replaced by space, but '\r' is untouched. This impacts the output of `git branch`, `git tag` and `git for-each-ref`. This behaviour is a regression for `git branch --verbose`, which bisects down to 949af0684c (branch: use ref-filter printing APIs, 2017-01-10). Adjust the ref-filter code to be more lenient by hardening the logic in `copy_subject` and `find_subpos` to correctly parse messages containing CRLF. Add a new test script, 't3920-crlf-messages.sh', to test the behaviour of commands using either the ref-filter or the pretty APIs with messages using CRLF line endings. The function `test_crlf_subject_body_and_contents` can be used to test that the `--format` option of `branch`, `tag`, `for-each-ref`, `log` and `show` correctly displays the subject, body and raw content of commit and tag messages using CRLF. Test the output of `branch`, `tag` and `for-each-ref` with such commits. Helped-by: Junio C Hamano <gitster@pobox.com> Helped-by: Eric Sunshine <sunshine@sunshineco.com> Signed-off-by: Philippe Blain <levraiphilippeblain@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2020-10-29 20:48:28 +08:00
buf = eol;
*sublen = buf - *sub;
/* drop trailing newline, if present */
ref-filter: handle CRLF at end-of-line more gracefully The ref-filter code does not correctly handle commit or tag messages that use CRLF as the line terminator. Such messages can be created with the `--cleanup=verbatim` option of `git commit` and `git tag`, or by using `git commit-tree` directly. The function `find_subpos` in ref-filter.c looks for two consecutive LFs to find the end of the subject line, a sequence which is absent in messages using CRLF. This results in the whole message being parsed as the subject line (`%(contents:subject)`), and the body of the message (`%(contents:body)`) being empty. Moreover, in `copy_subject`, which wants to return the subject as a single line, '\n' is replaced by space, but '\r' is untouched. This impacts the output of `git branch`, `git tag` and `git for-each-ref`. This behaviour is a regression for `git branch --verbose`, which bisects down to 949af0684c (branch: use ref-filter printing APIs, 2017-01-10). Adjust the ref-filter code to be more lenient by hardening the logic in `copy_subject` and `find_subpos` to correctly parse messages containing CRLF. Add a new test script, 't3920-crlf-messages.sh', to test the behaviour of commands using either the ref-filter or the pretty APIs with messages using CRLF line endings. The function `test_crlf_subject_body_and_contents` can be used to test that the `--format` option of `branch`, `tag`, `for-each-ref`, `log` and `show` correctly displays the subject, body and raw content of commit and tag messages using CRLF. Test the output of `branch`, `tag` and `for-each-ref` with such commits. Helped-by: Junio C Hamano <gitster@pobox.com> Helped-by: Eric Sunshine <sunshine@sunshineco.com> Signed-off-by: Philippe Blain <levraiphilippeblain@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2020-10-29 20:48:28 +08:00
while (*sublen && ((*sub)[*sublen - 1] == '\n' ||
(*sub)[*sublen - 1] == '\r'))
*sublen -= 1;
/* skip any empty lines */
ref-filter: handle CRLF at end-of-line more gracefully The ref-filter code does not correctly handle commit or tag messages that use CRLF as the line terminator. Such messages can be created with the `--cleanup=verbatim` option of `git commit` and `git tag`, or by using `git commit-tree` directly. The function `find_subpos` in ref-filter.c looks for two consecutive LFs to find the end of the subject line, a sequence which is absent in messages using CRLF. This results in the whole message being parsed as the subject line (`%(contents:subject)`), and the body of the message (`%(contents:body)`) being empty. Moreover, in `copy_subject`, which wants to return the subject as a single line, '\n' is replaced by space, but '\r' is untouched. This impacts the output of `git branch`, `git tag` and `git for-each-ref`. This behaviour is a regression for `git branch --verbose`, which bisects down to 949af0684c (branch: use ref-filter printing APIs, 2017-01-10). Adjust the ref-filter code to be more lenient by hardening the logic in `copy_subject` and `find_subpos` to correctly parse messages containing CRLF. Add a new test script, 't3920-crlf-messages.sh', to test the behaviour of commands using either the ref-filter or the pretty APIs with messages using CRLF line endings. The function `test_crlf_subject_body_and_contents` can be used to test that the `--format` option of `branch`, `tag`, `for-each-ref`, `log` and `show` correctly displays the subject, body and raw content of commit and tag messages using CRLF. Test the output of `branch`, `tag` and `for-each-ref` with such commits. Helped-by: Junio C Hamano <gitster@pobox.com> Helped-by: Eric Sunshine <sunshine@sunshineco.com> Signed-off-by: Philippe Blain <levraiphilippeblain@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2020-10-29 20:48:28 +08:00
while (*buf == '\n' || *buf == '\r')
buf++;
*body = buf;
*bodylen = strlen(buf);
*nonsiglen = sigstart - buf;
}
/*
* If 'lines' is greater than 0, append that many lines from the given
* 'buf' of length 'size' to the given strbuf.
*/
static void append_lines(struct strbuf *out, const char *buf, unsigned long size, int lines)
{
int i;
const char *sp, *eol;
size_t len;
sp = buf;
for (i = 0; i < lines && sp < buf + size; i++) {
if (i)
strbuf_addstr(out, "\n ");
eol = memchr(sp, '\n', size - (sp - buf));
len = eol ? eol - sp : size - (sp - buf);
strbuf_add(out, sp, len);
if (!eol)
break;
sp = eol + 1;
}
}
static void grab_describe_values(struct atom_value *val, int deref,
struct object *obj)
{
struct commit *commit = (struct commit *)obj;
int i;
for (i = 0; i < used_atom_cnt; i++) {
struct used_atom *atom = &used_atom[i];
enum atom_type type = atom->atom_type;
const char *name = atom->name;
struct atom_value *v = &val[i];
struct child_process cmd = CHILD_PROCESS_INIT;
struct strbuf out = STRBUF_INIT;
struct strbuf err = STRBUF_INIT;
if (type != ATOM_DESCRIBE)
continue;
if (!!deref != (*name == '*'))
continue;
cmd.git_cmd = 1;
strvec_push(&cmd.args, "describe");
strvec_pushv(&cmd.args, atom->u.describe_args);
strvec_push(&cmd.args, oid_to_hex(&commit->object.oid));
if (pipe_command(&cmd, NULL, 0, &out, 0, &err, 0) < 0) {
error(_("failed to run 'describe'"));
v->s = xstrdup("");
continue;
}
strbuf_rtrim(&out);
v->s = strbuf_detach(&out, NULL);
strbuf_release(&err);
}
}
/* See grab_values */
static void grab_sub_body_contents(struct atom_value *val, int deref, struct expand_data *data)
{
int i;
const char *subpos = NULL, *bodypos = NULL, *sigpos = NULL;
size_t sublen = 0, bodylen = 0, nonsiglen = 0, siglen = 0;
void *buf = data->content;
for (i = 0; i < used_atom_cnt; i++) {
struct used_atom *atom = &used_atom[i];
const char *name = atom->name;
struct atom_value *v = &val[i];
ref-filter: add %(raw) atom Add new formatting option `%(raw)`, which will print the raw object data without any changes. It will help further to migrate all cat-file formatting logic from cat-file to ref-filter. The raw data of blob, tree objects may contain '\0', but most of the logic in `ref-filter` depends on the output of the atom being text (specifically, no embedded NULs in it). E.g. `quote_formatting()` use `strbuf_addstr()` or `*._quote_buf()` add the data to the buffer. The raw data of a tree object is `100644 one\0...`, only the `100644 one` will be added to the buffer, which is incorrect. Therefore, we need to find a way to record the length of the atom_value's member `s`. Although strbuf can already record the string and its length, if we want to replace the type of atom_value's member `s` with strbuf, many places in ref-filter that are filled with dynamically allocated mermory in `v->s` are not easy to replace. At the same time, we need to check if `v->s == NULL` in populate_value(), and strbuf cannot easily distinguish NULL and empty strings, but c-style "const char *" can do it. So add a new member in `struct atom_value`: `s_size`, which can record raw object size, it can help us add raw object data to the buffer or compare two buffers which contain raw object data. Note that `--format=%(raw)` cannot be used with `--python`, `--shell`, `--tcl`, and `--perl` because if the binary raw data is passed to a variable in such languages, these may not support arbitrary binary data in their string variable type. Reviewed-by: Jacob Keller <jacob.keller@gmail.com> Mentored-by: Christian Couder <christian.couder@gmail.com> Mentored-by: Hariom Verma <hariom18599@gmail.com> Helped-by: Bagas Sanjaya <bagasdotme@gmail.com> Helped-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com> Helped-by: Felipe Contreras <felipe.contreras@gmail.com> Helped-by: Phillip Wood <phillip.wood@dunelm.org.uk> Helped-by: Junio C Hamano <gitster@pobox.com> Based-on-patch-by: Olga Telezhnaya <olyatelezhnaya@gmail.com> Signed-off-by: ZheNing Hu <adlternative@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2021-07-26 11:26:47 +08:00
enum atom_type atom_type = atom->atom_type;
if (!!deref != (*name == '*'))
continue;
if (deref)
name++;
ref-filter: add %(raw) atom Add new formatting option `%(raw)`, which will print the raw object data without any changes. It will help further to migrate all cat-file formatting logic from cat-file to ref-filter. The raw data of blob, tree objects may contain '\0', but most of the logic in `ref-filter` depends on the output of the atom being text (specifically, no embedded NULs in it). E.g. `quote_formatting()` use `strbuf_addstr()` or `*._quote_buf()` add the data to the buffer. The raw data of a tree object is `100644 one\0...`, only the `100644 one` will be added to the buffer, which is incorrect. Therefore, we need to find a way to record the length of the atom_value's member `s`. Although strbuf can already record the string and its length, if we want to replace the type of atom_value's member `s` with strbuf, many places in ref-filter that are filled with dynamically allocated mermory in `v->s` are not easy to replace. At the same time, we need to check if `v->s == NULL` in populate_value(), and strbuf cannot easily distinguish NULL and empty strings, but c-style "const char *" can do it. So add a new member in `struct atom_value`: `s_size`, which can record raw object size, it can help us add raw object data to the buffer or compare two buffers which contain raw object data. Note that `--format=%(raw)` cannot be used with `--python`, `--shell`, `--tcl`, and `--perl` because if the binary raw data is passed to a variable in such languages, these may not support arbitrary binary data in their string variable type. Reviewed-by: Jacob Keller <jacob.keller@gmail.com> Mentored-by: Christian Couder <christian.couder@gmail.com> Mentored-by: Hariom Verma <hariom18599@gmail.com> Helped-by: Bagas Sanjaya <bagasdotme@gmail.com> Helped-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com> Helped-by: Felipe Contreras <felipe.contreras@gmail.com> Helped-by: Phillip Wood <phillip.wood@dunelm.org.uk> Helped-by: Junio C Hamano <gitster@pobox.com> Based-on-patch-by: Olga Telezhnaya <olyatelezhnaya@gmail.com> Signed-off-by: ZheNing Hu <adlternative@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2021-07-26 11:26:47 +08:00
if (atom_type == ATOM_RAW) {
unsigned long buf_size = data->size;
if (atom->u.raw_data.option == RAW_BARE) {
v->s = xmemdupz(buf, buf_size);
v->s_size = buf_size;
} else if (atom->u.raw_data.option == RAW_LENGTH) {
v->value = buf_size;
v->s = xstrfmt("%"PRIuMAX, v->value);
ref-filter: add %(raw) atom Add new formatting option `%(raw)`, which will print the raw object data without any changes. It will help further to migrate all cat-file formatting logic from cat-file to ref-filter. The raw data of blob, tree objects may contain '\0', but most of the logic in `ref-filter` depends on the output of the atom being text (specifically, no embedded NULs in it). E.g. `quote_formatting()` use `strbuf_addstr()` or `*._quote_buf()` add the data to the buffer. The raw data of a tree object is `100644 one\0...`, only the `100644 one` will be added to the buffer, which is incorrect. Therefore, we need to find a way to record the length of the atom_value's member `s`. Although strbuf can already record the string and its length, if we want to replace the type of atom_value's member `s` with strbuf, many places in ref-filter that are filled with dynamically allocated mermory in `v->s` are not easy to replace. At the same time, we need to check if `v->s == NULL` in populate_value(), and strbuf cannot easily distinguish NULL and empty strings, but c-style "const char *" can do it. So add a new member in `struct atom_value`: `s_size`, which can record raw object size, it can help us add raw object data to the buffer or compare two buffers which contain raw object data. Note that `--format=%(raw)` cannot be used with `--python`, `--shell`, `--tcl`, and `--perl` because if the binary raw data is passed to a variable in such languages, these may not support arbitrary binary data in their string variable type. Reviewed-by: Jacob Keller <jacob.keller@gmail.com> Mentored-by: Christian Couder <christian.couder@gmail.com> Mentored-by: Hariom Verma <hariom18599@gmail.com> Helped-by: Bagas Sanjaya <bagasdotme@gmail.com> Helped-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com> Helped-by: Felipe Contreras <felipe.contreras@gmail.com> Helped-by: Phillip Wood <phillip.wood@dunelm.org.uk> Helped-by: Junio C Hamano <gitster@pobox.com> Based-on-patch-by: Olga Telezhnaya <olyatelezhnaya@gmail.com> Signed-off-by: ZheNing Hu <adlternative@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2021-07-26 11:26:47 +08:00
}
continue;
}
if ((data->type != OBJ_TAG &&
data->type != OBJ_COMMIT) ||
(strcmp(name, "body") &&
!starts_with(name, "subject") &&
!starts_with(name, "trailers") &&
!starts_with(name, "contents")))
continue;
if (!subpos)
find_subpos(buf,
&subpos, &sublen,
&bodypos, &bodylen, &nonsiglen,
&sigpos, &siglen);
if (atom->u.contents.option == C_SUB)
v->s = copy_subject(subpos, sublen);
else if (atom->u.contents.option == C_SUB_SANITIZE) {
struct strbuf sb = STRBUF_INIT;
format_sanitized_subject(&sb, subpos, sublen);
v->s = strbuf_detach(&sb, NULL);
} else if (atom->u.contents.option == C_BODY_DEP)
v->s = xmemdupz(bodypos, bodylen);
else if (atom->u.contents.option == C_LENGTH) {
v->value = strlen(subpos);
v->s = xstrfmt("%"PRIuMAX, v->value);
} else if (atom->u.contents.option == C_BODY)
v->s = xmemdupz(bodypos, nonsiglen);
else if (atom->u.contents.option == C_SIG)
v->s = xmemdupz(sigpos, siglen);
else if (atom->u.contents.option == C_LINES) {
struct strbuf s = STRBUF_INIT;
const char *contents_end = bodypos + nonsiglen;
/* Size is the length of the message after removing the signature */
append_lines(&s, subpos, contents_end - subpos, atom->u.contents.nlines);
v->s = strbuf_detach(&s, NULL);
} else if (atom->u.contents.option == C_TRAILERS) {
struct strbuf s = STRBUF_INIT;
/* Format the trailer info according to the trailer_opts given */
trailer: reorder format_trailers_from_commit() parameters Currently there are two functions for formatting trailers in <trailer.h>: void format_trailers(const struct process_trailer_options *, struct list_head *trailers, FILE *outfile); void format_trailers_from_commit(struct strbuf *out, const char *msg, const struct process_trailer_options *opts); and although they are similar enough (even taking the same process_trailer_options struct pointer) they are used quite differently. One might intuitively think that format_trailers_from_commit() builds on top of format_trailers(), but this is not the case. Instead format_trailers_from_commit() calls format_trailer_info() and format_trailers() is never called in that codepath. This is a preparatory refactor to help us deprecate format_trailers() in favor of format_trailer_info() (at which point we can rename the latter to the former). When the deprecation is complete, both format_trailers_from_commit(), and the interpret-trailers builtin will be able to call into the same helper function (instead of format_trailers() and format_trailer_info(), respectively). Unifying the formatters is desirable because it simplifies the API. Reorder parameters for format_trailers_from_commit() to prefer const struct process_trailer_options *opts as the first parameter, because these options are intimately tied to formatting trailers. And take struct strbuf *out last, because it's an "out parameter" (something that the caller wants to use as the output of this function). Similarly, reorder parameters for format_trailer_info(), because later on we will unify the two together. Signed-off-by: Linus Arver <linusa@google.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2024-03-01 08:14:42 +08:00
format_trailers_from_commit(&atom->u.contents.trailer_opts, subpos, &s);
v->s = strbuf_detach(&s, NULL);
} else if (atom->u.contents.option == C_BARE)
v->s = xstrdup(subpos);
}
free((void *)sigpos);
}
/*
* We want to have empty print-string for field requests
* that do not apply (e.g. "authordate" for a tag object)
*/
static void fill_missing_values(struct atom_value *val)
{
int i;
for (i = 0; i < used_atom_cnt; i++) {
struct atom_value *v = &val[i];
if (!v->s)
v->s = xstrdup("");
}
}
/*
* val is a list of atom_value to hold returned values. Extract
* the values for atoms in used_atom array out of (obj, buf, sz).
* when deref is false, (obj, buf, sz) is the object that is
* pointed at by the ref itself; otherwise it is the object the
* ref (which is a tag) refers to.
*/
static void grab_values(struct atom_value *val, int deref, struct object *obj, struct expand_data *data)
{
void *buf = data->content;
switch (obj->type) {
case OBJ_TAG:
grab_tag_values(val, deref, obj);
grab_sub_body_contents(val, deref, data);
grab_person("tagger", val, deref, buf);
grab_describe_values(val, deref, obj);
break;
case OBJ_COMMIT:
grab_commit_values(val, deref, obj);
grab_sub_body_contents(val, deref, data);
grab_person("author", val, deref, buf);
grab_person("committer", val, deref, buf);
grab_signature(val, deref, obj);
grab_describe_values(val, deref, obj);
break;
case OBJ_TREE:
/* grab_tree_values(val, deref, obj, buf, sz); */
ref-filter: add %(raw) atom Add new formatting option `%(raw)`, which will print the raw object data without any changes. It will help further to migrate all cat-file formatting logic from cat-file to ref-filter. The raw data of blob, tree objects may contain '\0', but most of the logic in `ref-filter` depends on the output of the atom being text (specifically, no embedded NULs in it). E.g. `quote_formatting()` use `strbuf_addstr()` or `*._quote_buf()` add the data to the buffer. The raw data of a tree object is `100644 one\0...`, only the `100644 one` will be added to the buffer, which is incorrect. Therefore, we need to find a way to record the length of the atom_value's member `s`. Although strbuf can already record the string and its length, if we want to replace the type of atom_value's member `s` with strbuf, many places in ref-filter that are filled with dynamically allocated mermory in `v->s` are not easy to replace. At the same time, we need to check if `v->s == NULL` in populate_value(), and strbuf cannot easily distinguish NULL and empty strings, but c-style "const char *" can do it. So add a new member in `struct atom_value`: `s_size`, which can record raw object size, it can help us add raw object data to the buffer or compare two buffers which contain raw object data. Note that `--format=%(raw)` cannot be used with `--python`, `--shell`, `--tcl`, and `--perl` because if the binary raw data is passed to a variable in such languages, these may not support arbitrary binary data in their string variable type. Reviewed-by: Jacob Keller <jacob.keller@gmail.com> Mentored-by: Christian Couder <christian.couder@gmail.com> Mentored-by: Hariom Verma <hariom18599@gmail.com> Helped-by: Bagas Sanjaya <bagasdotme@gmail.com> Helped-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com> Helped-by: Felipe Contreras <felipe.contreras@gmail.com> Helped-by: Phillip Wood <phillip.wood@dunelm.org.uk> Helped-by: Junio C Hamano <gitster@pobox.com> Based-on-patch-by: Olga Telezhnaya <olyatelezhnaya@gmail.com> Signed-off-by: ZheNing Hu <adlternative@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2021-07-26 11:26:47 +08:00
grab_sub_body_contents(val, deref, data);
break;
case OBJ_BLOB:
/* grab_blob_values(val, deref, obj, buf, sz); */
ref-filter: add %(raw) atom Add new formatting option `%(raw)`, which will print the raw object data without any changes. It will help further to migrate all cat-file formatting logic from cat-file to ref-filter. The raw data of blob, tree objects may contain '\0', but most of the logic in `ref-filter` depends on the output of the atom being text (specifically, no embedded NULs in it). E.g. `quote_formatting()` use `strbuf_addstr()` or `*._quote_buf()` add the data to the buffer. The raw data of a tree object is `100644 one\0...`, only the `100644 one` will be added to the buffer, which is incorrect. Therefore, we need to find a way to record the length of the atom_value's member `s`. Although strbuf can already record the string and its length, if we want to replace the type of atom_value's member `s` with strbuf, many places in ref-filter that are filled with dynamically allocated mermory in `v->s` are not easy to replace. At the same time, we need to check if `v->s == NULL` in populate_value(), and strbuf cannot easily distinguish NULL and empty strings, but c-style "const char *" can do it. So add a new member in `struct atom_value`: `s_size`, which can record raw object size, it can help us add raw object data to the buffer or compare two buffers which contain raw object data. Note that `--format=%(raw)` cannot be used with `--python`, `--shell`, `--tcl`, and `--perl` because if the binary raw data is passed to a variable in such languages, these may not support arbitrary binary data in their string variable type. Reviewed-by: Jacob Keller <jacob.keller@gmail.com> Mentored-by: Christian Couder <christian.couder@gmail.com> Mentored-by: Hariom Verma <hariom18599@gmail.com> Helped-by: Bagas Sanjaya <bagasdotme@gmail.com> Helped-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com> Helped-by: Felipe Contreras <felipe.contreras@gmail.com> Helped-by: Phillip Wood <phillip.wood@dunelm.org.uk> Helped-by: Junio C Hamano <gitster@pobox.com> Based-on-patch-by: Olga Telezhnaya <olyatelezhnaya@gmail.com> Signed-off-by: ZheNing Hu <adlternative@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2021-07-26 11:26:47 +08:00
grab_sub_body_contents(val, deref, data);
break;
default:
die("Eh? Object of type %d?", obj->type);
}
}
static inline char *copy_advance(char *dst, const char *src)
{
while (*src)
*dst++ = *src++;
return dst;
}
static const char *lstrip_ref_components(const char *refname, int len)
tag: do not show ambiguous tag names as "tags/foo" Since b7cc53e9 (tag.c: use 'ref-filter' APIs, 2015-07-11), git-tag has started showing tags with ambiguous names (i.e., when both "heads/foo" and "tags/foo" exists) as "tags/foo" instead of just "foo". This is both: - pointless; the output of "git tag" includes only refs/tags, so we know that "foo" means the one in "refs/tags". and - ambiguous; in the original output, we know that the line "foo" means that "refs/tags/foo" exists. In the new output, it is unclear whether we mean "refs/tags/foo" or "refs/tags/tags/foo". The reason this happens is that commit b7cc53e9 switched git-tag to use ref-filter's "%(refname:short)" output formatting, which was adapted from for-each-ref. This more general code does not know that we care only about tags, and uses shorten_unambiguous_ref to get the short-name. We need to tell it that we care only about "refs/tags/", and it should shorten with respect to that value. In theory, the ref-filter code could figure this out by us passing FILTER_REFS_TAGS. But there are two complications there: 1. The handling of refname:short is deep in formatting code that does not even have our ref_filter struct, let alone the arguments to the filter_ref struct. 2. In git v2.7.0, we expose the formatting language to the user. If we follow this path, it will mean that "%(refname:short)" behaves differently for "tag" versus "for-each-ref" (including "for-each-ref refs/tags/"), which can lead to confusion. Instead, let's add a new modifier to the formatting language, "strip", to remove a specific set of prefix components. This fixes "git tag", and lets users invoke the same behavior from their own custom formats (for "tag" or "for-each-ref") while leaving ":short" with its same consistent meaning in all places. We introduce a test in t7004 for "git tag", which fails without this patch. We also add a similar test in t3203 for "git branch", which does not actually fail. But since it is likely that "branch" will eventually use the same formatting code, the test helps defend against future regressions. Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2016-01-26 11:00:05 +08:00
{
long remaining = len;
const char *start = xstrdup(refname);
const char *to_free = start;
tag: do not show ambiguous tag names as "tags/foo" Since b7cc53e9 (tag.c: use 'ref-filter' APIs, 2015-07-11), git-tag has started showing tags with ambiguous names (i.e., when both "heads/foo" and "tags/foo" exists) as "tags/foo" instead of just "foo". This is both: - pointless; the output of "git tag" includes only refs/tags, so we know that "foo" means the one in "refs/tags". and - ambiguous; in the original output, we know that the line "foo" means that "refs/tags/foo" exists. In the new output, it is unclear whether we mean "refs/tags/foo" or "refs/tags/tags/foo". The reason this happens is that commit b7cc53e9 switched git-tag to use ref-filter's "%(refname:short)" output formatting, which was adapted from for-each-ref. This more general code does not know that we care only about tags, and uses shorten_unambiguous_ref to get the short-name. We need to tell it that we care only about "refs/tags/", and it should shorten with respect to that value. In theory, the ref-filter code could figure this out by us passing FILTER_REFS_TAGS. But there are two complications there: 1. The handling of refname:short is deep in formatting code that does not even have our ref_filter struct, let alone the arguments to the filter_ref struct. 2. In git v2.7.0, we expose the formatting language to the user. If we follow this path, it will mean that "%(refname:short)" behaves differently for "tag" versus "for-each-ref" (including "for-each-ref refs/tags/"), which can lead to confusion. Instead, let's add a new modifier to the formatting language, "strip", to remove a specific set of prefix components. This fixes "git tag", and lets users invoke the same behavior from their own custom formats (for "tag" or "for-each-ref") while leaving ":short" with its same consistent meaning in all places. We introduce a test in t7004 for "git tag", which fails without this patch. We also add a similar test in t3203 for "git branch", which does not actually fail. But since it is likely that "branch" will eventually use the same formatting code, the test helps defend against future regressions. Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2016-01-26 11:00:05 +08:00
if (len < 0) {
int i;
const char *p = refname;
/* Find total no of '/' separated path-components */
for (i = 0; p[i]; p[i] == '/' ? i++ : *p++)
;
/*
* The number of components we need to strip is now
* the total minus the components to be left (Plus one
* because we count the number of '/', but the number
* of components is one more than the no of '/').
*/
remaining = i + len + 1;
}
tag: do not show ambiguous tag names as "tags/foo" Since b7cc53e9 (tag.c: use 'ref-filter' APIs, 2015-07-11), git-tag has started showing tags with ambiguous names (i.e., when both "heads/foo" and "tags/foo" exists) as "tags/foo" instead of just "foo". This is both: - pointless; the output of "git tag" includes only refs/tags, so we know that "foo" means the one in "refs/tags". and - ambiguous; in the original output, we know that the line "foo" means that "refs/tags/foo" exists. In the new output, it is unclear whether we mean "refs/tags/foo" or "refs/tags/tags/foo". The reason this happens is that commit b7cc53e9 switched git-tag to use ref-filter's "%(refname:short)" output formatting, which was adapted from for-each-ref. This more general code does not know that we care only about tags, and uses shorten_unambiguous_ref to get the short-name. We need to tell it that we care only about "refs/tags/", and it should shorten with respect to that value. In theory, the ref-filter code could figure this out by us passing FILTER_REFS_TAGS. But there are two complications there: 1. The handling of refname:short is deep in formatting code that does not even have our ref_filter struct, let alone the arguments to the filter_ref struct. 2. In git v2.7.0, we expose the formatting language to the user. If we follow this path, it will mean that "%(refname:short)" behaves differently for "tag" versus "for-each-ref" (including "for-each-ref refs/tags/"), which can lead to confusion. Instead, let's add a new modifier to the formatting language, "strip", to remove a specific set of prefix components. This fixes "git tag", and lets users invoke the same behavior from their own custom formats (for "tag" or "for-each-ref") while leaving ":short" with its same consistent meaning in all places. We introduce a test in t7004 for "git tag", which fails without this patch. We also add a similar test in t3203 for "git branch", which does not actually fail. But since it is likely that "branch" will eventually use the same formatting code, the test helps defend against future regressions. Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2016-01-26 11:00:05 +08:00
while (remaining > 0) {
tag: do not show ambiguous tag names as "tags/foo" Since b7cc53e9 (tag.c: use 'ref-filter' APIs, 2015-07-11), git-tag has started showing tags with ambiguous names (i.e., when both "heads/foo" and "tags/foo" exists) as "tags/foo" instead of just "foo". This is both: - pointless; the output of "git tag" includes only refs/tags, so we know that "foo" means the one in "refs/tags". and - ambiguous; in the original output, we know that the line "foo" means that "refs/tags/foo" exists. In the new output, it is unclear whether we mean "refs/tags/foo" or "refs/tags/tags/foo". The reason this happens is that commit b7cc53e9 switched git-tag to use ref-filter's "%(refname:short)" output formatting, which was adapted from for-each-ref. This more general code does not know that we care only about tags, and uses shorten_unambiguous_ref to get the short-name. We need to tell it that we care only about "refs/tags/", and it should shorten with respect to that value. In theory, the ref-filter code could figure this out by us passing FILTER_REFS_TAGS. But there are two complications there: 1. The handling of refname:short is deep in formatting code that does not even have our ref_filter struct, let alone the arguments to the filter_ref struct. 2. In git v2.7.0, we expose the formatting language to the user. If we follow this path, it will mean that "%(refname:short)" behaves differently for "tag" versus "for-each-ref" (including "for-each-ref refs/tags/"), which can lead to confusion. Instead, let's add a new modifier to the formatting language, "strip", to remove a specific set of prefix components. This fixes "git tag", and lets users invoke the same behavior from their own custom formats (for "tag" or "for-each-ref") while leaving ":short" with its same consistent meaning in all places. We introduce a test in t7004 for "git tag", which fails without this patch. We also add a similar test in t3203 for "git branch", which does not actually fail. But since it is likely that "branch" will eventually use the same formatting code, the test helps defend against future regressions. Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2016-01-26 11:00:05 +08:00
switch (*start++) {
case '\0':
free((char *)to_free);
return xstrdup("");
tag: do not show ambiguous tag names as "tags/foo" Since b7cc53e9 (tag.c: use 'ref-filter' APIs, 2015-07-11), git-tag has started showing tags with ambiguous names (i.e., when both "heads/foo" and "tags/foo" exists) as "tags/foo" instead of just "foo". This is both: - pointless; the output of "git tag" includes only refs/tags, so we know that "foo" means the one in "refs/tags". and - ambiguous; in the original output, we know that the line "foo" means that "refs/tags/foo" exists. In the new output, it is unclear whether we mean "refs/tags/foo" or "refs/tags/tags/foo". The reason this happens is that commit b7cc53e9 switched git-tag to use ref-filter's "%(refname:short)" output formatting, which was adapted from for-each-ref. This more general code does not know that we care only about tags, and uses shorten_unambiguous_ref to get the short-name. We need to tell it that we care only about "refs/tags/", and it should shorten with respect to that value. In theory, the ref-filter code could figure this out by us passing FILTER_REFS_TAGS. But there are two complications there: 1. The handling of refname:short is deep in formatting code that does not even have our ref_filter struct, let alone the arguments to the filter_ref struct. 2. In git v2.7.0, we expose the formatting language to the user. If we follow this path, it will mean that "%(refname:short)" behaves differently for "tag" versus "for-each-ref" (including "for-each-ref refs/tags/"), which can lead to confusion. Instead, let's add a new modifier to the formatting language, "strip", to remove a specific set of prefix components. This fixes "git tag", and lets users invoke the same behavior from their own custom formats (for "tag" or "for-each-ref") while leaving ":short" with its same consistent meaning in all places. We introduce a test in t7004 for "git tag", which fails without this patch. We also add a similar test in t3203 for "git branch", which does not actually fail. But since it is likely that "branch" will eventually use the same formatting code, the test helps defend against future regressions. Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2016-01-26 11:00:05 +08:00
case '/':
remaining--;
break;
}
}
start = xstrdup(start);
free((char *)to_free);
tag: do not show ambiguous tag names as "tags/foo" Since b7cc53e9 (tag.c: use 'ref-filter' APIs, 2015-07-11), git-tag has started showing tags with ambiguous names (i.e., when both "heads/foo" and "tags/foo" exists) as "tags/foo" instead of just "foo". This is both: - pointless; the output of "git tag" includes only refs/tags, so we know that "foo" means the one in "refs/tags". and - ambiguous; in the original output, we know that the line "foo" means that "refs/tags/foo" exists. In the new output, it is unclear whether we mean "refs/tags/foo" or "refs/tags/tags/foo". The reason this happens is that commit b7cc53e9 switched git-tag to use ref-filter's "%(refname:short)" output formatting, which was adapted from for-each-ref. This more general code does not know that we care only about tags, and uses shorten_unambiguous_ref to get the short-name. We need to tell it that we care only about "refs/tags/", and it should shorten with respect to that value. In theory, the ref-filter code could figure this out by us passing FILTER_REFS_TAGS. But there are two complications there: 1. The handling of refname:short is deep in formatting code that does not even have our ref_filter struct, let alone the arguments to the filter_ref struct. 2. In git v2.7.0, we expose the formatting language to the user. If we follow this path, it will mean that "%(refname:short)" behaves differently for "tag" versus "for-each-ref" (including "for-each-ref refs/tags/"), which can lead to confusion. Instead, let's add a new modifier to the formatting language, "strip", to remove a specific set of prefix components. This fixes "git tag", and lets users invoke the same behavior from their own custom formats (for "tag" or "for-each-ref") while leaving ":short" with its same consistent meaning in all places. We introduce a test in t7004 for "git tag", which fails without this patch. We also add a similar test in t3203 for "git branch", which does not actually fail. But since it is likely that "branch" will eventually use the same formatting code, the test helps defend against future regressions. Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2016-01-26 11:00:05 +08:00
return start;
}
static const char *rstrip_ref_components(const char *refname, int len)
{
long remaining = len;
const char *start = xstrdup(refname);
const char *to_free = start;
if (len < 0) {
int i;
const char *p = refname;
/* Find total no of '/' separated path-components */
for (i = 0; p[i]; p[i] == '/' ? i++ : *p++)
;
/*
* The number of components we need to strip is now
* the total minus the components to be left (Plus one
* because we count the number of '/', but the number
* of components is one more than the no of '/').
*/
remaining = i + len + 1;
}
while (remaining-- > 0) {
char *p = strrchr(start, '/');
if (!p) {
free((char *)to_free);
return xstrdup("");
} else
p[0] = '\0';
}
return start;
}
static const char *show_ref(struct refname_atom *atom, const char *refname)
{
if (atom->option == R_SHORT)
return refs_shorten_unambiguous_ref(get_main_ref_store(the_repository),
refname,
warn_ambiguous_refs);
else if (atom->option == R_LSTRIP)
return lstrip_ref_components(refname, atom->lstrip);
else if (atom->option == R_RSTRIP)
return rstrip_ref_components(refname, atom->rstrip);
else
return xstrdup(refname);
}
static void fill_remote_ref_details(struct used_atom *atom, const char *refname,
struct branch *branch, const char **s)
{
int num_ours, num_theirs;
if (atom->u.remote_ref.option == RR_REF)
*s = show_ref(&atom->u.remote_ref.refname, refname);
else if (atom->u.remote_ref.option == RR_TRACK) {
if (stat_tracking_info(branch, &num_ours, &num_theirs,
NULL, atom->u.remote_ref.push,
AHEAD_BEHIND_FULL) < 0) {
*s = xstrdup(msgs.gone);
} else if (!num_ours && !num_theirs)
*s = xstrdup("");
else if (!num_ours)
*s = xstrfmt(msgs.behind, num_theirs);
else if (!num_theirs)
*s = xstrfmt(msgs.ahead, num_ours);
else
*s = xstrfmt(msgs.ahead_behind,
num_ours, num_theirs);
if (!atom->u.remote_ref.nobracket && *s[0]) {
const char *to_free = *s;
*s = xstrfmt("[%s]", *s);
free((void *)to_free);
}
} else if (atom->u.remote_ref.option == RR_TRACKSHORT) {
if (stat_tracking_info(branch, &num_ours, &num_theirs,
NULL, atom->u.remote_ref.push,
AHEAD_BEHIND_FULL) < 0) {
*s = xstrdup("");
return;
}
if (!num_ours && !num_theirs)
*s = xstrdup("=");
else if (!num_ours)
*s = xstrdup("<");
else if (!num_theirs)
*s = xstrdup(">");
else
*s = xstrdup("<>");
for-each-ref: let upstream/push optionally report the remote name There are times when e.g. scripts want to know not only the name of the upstream branch on the remote repository, but also the name of the remote. This patch offers the new suffix :remotename for the upstream and for the push atoms, allowing to show exactly that. Example: $ cat .git/config ... [remote "origin"] url = https://where.do.we.come/from fetch = refs/heads/*:refs/remote/origin/* [remote "hello-world"] url = https://hello.world/git fetch = refs/heads/*:refs/remote/origin/* pushURL = hello.world:git push = refs/heads/*:refs/heads/* [branch "master"] remote = origin pushRemote = hello-world ... $ git for-each-ref \ --format='%(upstream) %(upstream:remotename) %(push:remotename)' \ refs/heads/master refs/remotes/origin/master origin hello-world The implementation chooses *not* to DWIM the push remote if no explicit push remote was configured; The reason is that it is possible to DWIM this by using %(if)%(push:remotename)%(then) %(push:remotename) %(else) %(upstream:remotename) %(end) while it would be impossible to "un-DWIM" the information in case the caller is really only interested in explicit push remotes. While `:remote` would be shorter, it would also be a bit more ambiguous, and it would also shut the door e.g. for `:remoteref` (which would obviously refer to the corresponding ref in the remote repository). Note: the dashless, non-CamelCased form `:remotename` follows the example of the `:trackshort` example. Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-10-05 20:19:09 +08:00
} else if (atom->u.remote_ref.option == RR_REMOTE_NAME) {
int explicit;
const char *remote = atom->u.remote_ref.push ?
pushremote_for_branch(branch, &explicit) :
remote_for_branch(branch, &explicit);
*s = xstrdup(explicit ? remote : "");
} else if (atom->u.remote_ref.option == RR_REMOTE_REF) {
const char *merge;
merge = remote_ref_for_branch(branch, atom->u.remote_ref.push);
*s = xstrdup(merge ? merge : "");
} else
BUG("unhandled RR_* enum");
}
char *get_head_description(void)
{
struct strbuf desc = STRBUF_INIT;
struct wt_status_state state;
memset(&state, 0, sizeof(state));
wt_status_get_state(the_repository, &state, 1);
if (state.rebase_in_progress ||
state.rebase_interactive_in_progress) {
if (state.branch)
branch: sort detached HEAD based on a flag Change the ref-filter sorting of detached HEAD to check the FILTER_REFS_DETACHED_HEAD flag, instead of relying on the ref description filled-in by get_head_description() to start with "(", which in turn we expect to ASCII-sort before any other reference. For context, we'd like the detached line to appear first at the start of "git branch -l", e.g.: $ git branch -l * (HEAD detached at <hash>) master This doesn't change that, but improves on a fix made in 28438e84e04 (ref-filter: sort detached HEAD lines firstly, 2019-06-18) and gives the Chinese translation the ability to use its preferred punctuation marks again. In Chinese the fullwidth versions of punctuation like "()" are typically written as (U+FF08 fullwidth left parenthesis), (U+FF09 fullwidth right parenthesis) instead[1]. This form is used in both po/zh_{CN,TW}.po in most cases where "()" is translated in a string. Aside from that improvement to the Chinese translation, it also just makes for cleaner code that we mark any special cases in the ref_array we're sorting with flags and make the sort function aware of them, instead of piggy-backing on the general-case of strcmp() doing the right thing. As seen in the amended tests this made reverse sorting a bit more consistent. Before this we'd sometimes sort this message in the middle, now it's consistently at the beginning or end, depending on whether we're doing a normal or reverse sort. Having it at the end doesn't make much sense either, but at least it behaves consistently now. A follow-up commit will make this behavior under reverse sorting even better. I'm removing the "TRANSLATORS" comments that were in the old code while I'm at it. Those were added in d4919bb288e (ref-filter: move get_head_description() from branch.c, 2017-01-10). I think it's obvious from context, string and translation memory in typical translation tools that these are the same or similar string. 1. https://en.wikipedia.org/wiki/Chinese_punctuation#Marks_similar_to_European_punctuation Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2021-01-07 17:51:52 +08:00
strbuf_addf(&desc, _("(no branch, rebasing %s)"),
state.branch);
else
branch: sort detached HEAD based on a flag Change the ref-filter sorting of detached HEAD to check the FILTER_REFS_DETACHED_HEAD flag, instead of relying on the ref description filled-in by get_head_description() to start with "(", which in turn we expect to ASCII-sort before any other reference. For context, we'd like the detached line to appear first at the start of "git branch -l", e.g.: $ git branch -l * (HEAD detached at <hash>) master This doesn't change that, but improves on a fix made in 28438e84e04 (ref-filter: sort detached HEAD lines firstly, 2019-06-18) and gives the Chinese translation the ability to use its preferred punctuation marks again. In Chinese the fullwidth versions of punctuation like "()" are typically written as (U+FF08 fullwidth left parenthesis), (U+FF09 fullwidth right parenthesis) instead[1]. This form is used in both po/zh_{CN,TW}.po in most cases where "()" is translated in a string. Aside from that improvement to the Chinese translation, it also just makes for cleaner code that we mark any special cases in the ref_array we're sorting with flags and make the sort function aware of them, instead of piggy-backing on the general-case of strcmp() doing the right thing. As seen in the amended tests this made reverse sorting a bit more consistent. Before this we'd sometimes sort this message in the middle, now it's consistently at the beginning or end, depending on whether we're doing a normal or reverse sort. Having it at the end doesn't make much sense either, but at least it behaves consistently now. A follow-up commit will make this behavior under reverse sorting even better. I'm removing the "TRANSLATORS" comments that were in the old code while I'm at it. Those were added in d4919bb288e (ref-filter: move get_head_description() from branch.c, 2017-01-10). I think it's obvious from context, string and translation memory in typical translation tools that these are the same or similar string. 1. https://en.wikipedia.org/wiki/Chinese_punctuation#Marks_similar_to_European_punctuation Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2021-01-07 17:51:52 +08:00
strbuf_addf(&desc, _("(no branch, rebasing detached HEAD %s)"),
state.detached_from);
} else if (state.bisect_in_progress)
branch: sort detached HEAD based on a flag Change the ref-filter sorting of detached HEAD to check the FILTER_REFS_DETACHED_HEAD flag, instead of relying on the ref description filled-in by get_head_description() to start with "(", which in turn we expect to ASCII-sort before any other reference. For context, we'd like the detached line to appear first at the start of "git branch -l", e.g.: $ git branch -l * (HEAD detached at <hash>) master This doesn't change that, but improves on a fix made in 28438e84e04 (ref-filter: sort detached HEAD lines firstly, 2019-06-18) and gives the Chinese translation the ability to use its preferred punctuation marks again. In Chinese the fullwidth versions of punctuation like "()" are typically written as (U+FF08 fullwidth left parenthesis), (U+FF09 fullwidth right parenthesis) instead[1]. This form is used in both po/zh_{CN,TW}.po in most cases where "()" is translated in a string. Aside from that improvement to the Chinese translation, it also just makes for cleaner code that we mark any special cases in the ref_array we're sorting with flags and make the sort function aware of them, instead of piggy-backing on the general-case of strcmp() doing the right thing. As seen in the amended tests this made reverse sorting a bit more consistent. Before this we'd sometimes sort this message in the middle, now it's consistently at the beginning or end, depending on whether we're doing a normal or reverse sort. Having it at the end doesn't make much sense either, but at least it behaves consistently now. A follow-up commit will make this behavior under reverse sorting even better. I'm removing the "TRANSLATORS" comments that were in the old code while I'm at it. Those were added in d4919bb288e (ref-filter: move get_head_description() from branch.c, 2017-01-10). I think it's obvious from context, string and translation memory in typical translation tools that these are the same or similar string. 1. https://en.wikipedia.org/wiki/Chinese_punctuation#Marks_similar_to_European_punctuation Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2021-01-07 17:51:52 +08:00
strbuf_addf(&desc, _("(no branch, bisect started on %s)"),
status: fix branch shown when not only bisecting In 83c750acde (wt-status.*: better advice for git status added, 2012-06-05), git-status received new informative messages to describe the ongoing work in a worktree. These messages were enhanced in 0722c805d6 (status: show the branch name if possible in in-progress info, 2013-02-03), to show, if possible, the branch where the operation was initiated. Since then, we show incorrect information when several operations are in progress and one of them is bisect: $ git checkout -b foo $ GIT_SEQUENCE_EDITOR='echo break >' git rebase -i HEAD~ $ git checkout -b bar $ git bisect start $ git status ... You are currently editing a commit while rebasing branch 'bar' on '...'. You are currently bisecting, started from branch 'bar'. ... Note that we erroneously say "while rebasing branch 'bar'" when we should be referring to "foo". This must have gone unnoticed for so long because it must be unusual to start a bisection while another operation is in progress. And even less usual to involve different branches. It caught my attention reviewing a leak introduced in 8b87cfd000 (wt-status: move strbuf into read_and_strip_branch(), 2013-03-16). A simple change to deal with this situation can be to record in struct wt_status_state, the branch where the bisect starts separately from the branch related to other operations. Let's do it and so we'll be able to display correct information and we'll avoid the leak as well. Signed-off-by: Rubén Justo <rjusto@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2023-09-10 04:12:47 +08:00
state.bisecting_from);
else if (state.detached_from) {
if (state.detached_at)
branch: sort detached HEAD based on a flag Change the ref-filter sorting of detached HEAD to check the FILTER_REFS_DETACHED_HEAD flag, instead of relying on the ref description filled-in by get_head_description() to start with "(", which in turn we expect to ASCII-sort before any other reference. For context, we'd like the detached line to appear first at the start of "git branch -l", e.g.: $ git branch -l * (HEAD detached at <hash>) master This doesn't change that, but improves on a fix made in 28438e84e04 (ref-filter: sort detached HEAD lines firstly, 2019-06-18) and gives the Chinese translation the ability to use its preferred punctuation marks again. In Chinese the fullwidth versions of punctuation like "()" are typically written as (U+FF08 fullwidth left parenthesis), (U+FF09 fullwidth right parenthesis) instead[1]. This form is used in both po/zh_{CN,TW}.po in most cases where "()" is translated in a string. Aside from that improvement to the Chinese translation, it also just makes for cleaner code that we mark any special cases in the ref_array we're sorting with flags and make the sort function aware of them, instead of piggy-backing on the general-case of strcmp() doing the right thing. As seen in the amended tests this made reverse sorting a bit more consistent. Before this we'd sometimes sort this message in the middle, now it's consistently at the beginning or end, depending on whether we're doing a normal or reverse sort. Having it at the end doesn't make much sense either, but at least it behaves consistently now. A follow-up commit will make this behavior under reverse sorting even better. I'm removing the "TRANSLATORS" comments that were in the old code while I'm at it. Those were added in d4919bb288e (ref-filter: move get_head_description() from branch.c, 2017-01-10). I think it's obvious from context, string and translation memory in typical translation tools that these are the same or similar string. 1. https://en.wikipedia.org/wiki/Chinese_punctuation#Marks_similar_to_European_punctuation Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2021-01-07 17:51:52 +08:00
strbuf_addf(&desc, _("(HEAD detached at %s)"),
state.detached_from);
else
branch: sort detached HEAD based on a flag Change the ref-filter sorting of detached HEAD to check the FILTER_REFS_DETACHED_HEAD flag, instead of relying on the ref description filled-in by get_head_description() to start with "(", which in turn we expect to ASCII-sort before any other reference. For context, we'd like the detached line to appear first at the start of "git branch -l", e.g.: $ git branch -l * (HEAD detached at <hash>) master This doesn't change that, but improves on a fix made in 28438e84e04 (ref-filter: sort detached HEAD lines firstly, 2019-06-18) and gives the Chinese translation the ability to use its preferred punctuation marks again. In Chinese the fullwidth versions of punctuation like "()" are typically written as (U+FF08 fullwidth left parenthesis), (U+FF09 fullwidth right parenthesis) instead[1]. This form is used in both po/zh_{CN,TW}.po in most cases where "()" is translated in a string. Aside from that improvement to the Chinese translation, it also just makes for cleaner code that we mark any special cases in the ref_array we're sorting with flags and make the sort function aware of them, instead of piggy-backing on the general-case of strcmp() doing the right thing. As seen in the amended tests this made reverse sorting a bit more consistent. Before this we'd sometimes sort this message in the middle, now it's consistently at the beginning or end, depending on whether we're doing a normal or reverse sort. Having it at the end doesn't make much sense either, but at least it behaves consistently now. A follow-up commit will make this behavior under reverse sorting even better. I'm removing the "TRANSLATORS" comments that were in the old code while I'm at it. Those were added in d4919bb288e (ref-filter: move get_head_description() from branch.c, 2017-01-10). I think it's obvious from context, string and translation memory in typical translation tools that these are the same or similar string. 1. https://en.wikipedia.org/wiki/Chinese_punctuation#Marks_similar_to_European_punctuation Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2021-01-07 17:51:52 +08:00
strbuf_addf(&desc, _("(HEAD detached from %s)"),
state.detached_from);
} else
strbuf_addstr(&desc, _("(no branch)"));
wt_status_state_free_buffers(&state);
return strbuf_detach(&desc, NULL);
}
static const char *get_symref(struct used_atom *atom, struct ref_array_item *ref)
{
if (!ref->symref)
return xstrdup("");
else
return show_ref(&atom->u.refname, ref->symref);
}
static const char *get_refname(struct used_atom *atom, struct ref_array_item *ref)
{
if (ref->kind & FILTER_REFS_DETACHED_HEAD)
return get_head_description();
return show_ref(&atom->u.refname, ref->refname);
}
static int get_object(struct ref_array_item *ref, int deref, struct object **obj,
struct expand_data *oi, struct strbuf *err)
{
/* parse_object_buffer() will set eaten to 0 if free() will be needed */
int eaten = 1;
if (oi->info.contentp) {
/* We need to know that to use parse_object_buffer properly */
oi->info.sizep = &oi->size;
oi->info.typep = &oi->type;
}
if (oid_object_info_extended(the_repository, &oi->oid, &oi->info,
OBJECT_INFO_LOOKUP_REPLACE))
return strbuf_addf_ret(err, -1, _("missing object %s for %s"),
oid_to_hex(&oi->oid), ref->refname);
if (oi->info.disk_sizep && oi->disk_size < 0)
BUG("Object size is less than zero.");
if (oi->info.contentp) {
*obj = parse_object_buffer(the_repository, &oi->oid, oi->type, oi->size, oi->content, &eaten);
if (!*obj) {
if (!eaten)
free(oi->content);
return strbuf_addf_ret(err, -1, _("parse_object_buffer failed on %s for %s"),
oid_to_hex(&oi->oid), ref->refname);
}
grab_values(ref->value, deref, *obj, oi);
}
grab_common_values(ref->value, deref, oi);
if (!eaten)
free(oi->content);
return 0;
}
static void populate_worktree_map(struct hashmap *map, struct worktree **worktrees)
{
int i;
for (i = 0; worktrees[i]; i++) {
if (worktrees[i]->head_ref) {
struct ref_to_worktree_entry *entry;
entry = xmalloc(sizeof(*entry));
entry->wt = worktrees[i];
hashmap_entry_init(&entry->ent,
strhash(worktrees[i]->head_ref));
hashmap_add(map, &entry->ent);
}
}
}
static void lazy_init_worktree_map(void)
{
if (ref_to_worktree_map.worktrees)
return;
ref_to_worktree_map.worktrees = get_worktrees();
hashmap_init(&(ref_to_worktree_map.map), ref_to_worktree_map_cmpfnc, NULL, 0);
populate_worktree_map(&(ref_to_worktree_map.map), ref_to_worktree_map.worktrees);
}
static char *get_worktree_path(const struct ref_array_item *ref)
{
struct hashmap_entry entry, *e;
struct ref_to_worktree_entry *lookup_result;
lazy_init_worktree_map();
hashmap_entry_init(&entry, strhash(ref->refname));
e = hashmap_get(&(ref_to_worktree_map.map), &entry, ref->refname);
if (!e)
return xstrdup("");
lookup_result = container_of(e, struct ref_to_worktree_entry, ent);
return xstrdup(lookup_result->wt->path);
}
/*
* Parse the object referred by ref, and grab needed value.
*/
static int populate_value(struct ref_array_item *ref, struct strbuf *err)
{
struct object *obj;
int i;
struct object_info empty = OBJECT_INFO_INIT;
for-each-ref: add ahead-behind format atom The previous change implemented the ahead_behind() method, including an algorithm to compute the ahead/behind values for a number of commit tips relative to a number of commit bases. Now, integrate that algorithm as part of 'git for-each-ref' hidden behind a new format atom, ahead-behind. This naturally extends to 'git branch' and 'git tag' builtins, as well. This format allows specifying multiple bases, if so desired, and all matching references are compared against all of those bases. For this reason, failing to read a reference provided from these atoms results in an error. In order to translate the ahead_behind() method information to the format output code in ref-filter.c, we must populate arrays of ahead_behind_count structs. In struct ref_array, we store the full array that will be passed to ahead_behind(). In struct ref_array_item, we store an array of pointers that point to the relvant items within the full array. In this way, we can pull all relevant ahead/behind values directly when formatting output for a specific item. It also ensures the lifetime of the ahead_behind_count structs matches the time that the array is being used. Add specific tests of the ahead/behind counts in t6600-test-reach.sh, as it has an interesting repository shape. In particular, its merging strategy and its use of different commit-graphs would demonstrate over- counting if the ahead_behind() method did not already account for that possibility. Also add tests for the specific for-each-ref, branch, and tag builtins. In the case of 'git tag', there are intersting cases that happen when some of the selected tips are not commits. This requires careful logic around commits_nr in the second loop of filter_ahead_behind(). Also, the test in t7004 is carefully located to avoid being dependent on the GPG prereq. It also avoids using the test_commit helper, as that will add ticks to the time and disrupt the expected timestamps in later tag tests. Also add performance tests in a new p1300-graph-walks.sh script. This will be useful for more uses in the future, but for now compare the ahead-behind counting algorithm in 'git for-each-ref' to the naive implementation by running 'git rev-list --count' processes for each input. For the Git source code repository, the improvement is already obvious: Test this tree --------------------------------------------------------------- 1500.2: ahead-behind counts: git for-each-ref 0.07(0.07+0.00) 1500.3: ahead-behind counts: git branch 0.07(0.06+0.00) 1500.4: ahead-behind counts: git tag 0.07(0.06+0.00) 1500.5: ahead-behind counts: git rev-list 1.32(1.04+0.27) But the standard performance benchmark is the Linux kernel repository, which demosntrates a significant improvement: Test this tree --------------------------------------------------------------- 1500.2: ahead-behind counts: git for-each-ref 0.27(0.24+0.02) 1500.3: ahead-behind counts: git branch 0.27(0.24+0.03) 1500.4: ahead-behind counts: git tag 0.28(0.27+0.01) 1500.5: ahead-behind counts: git rev-list 4.57(4.03+0.54) The 'git rev-list' test exists in this change as a demonstration, but it will be removed in the next change to avoid wasting time on this comparison. Signed-off-by: Derrick Stolee <derrickstolee@github.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2023-03-20 19:26:54 +08:00
int ahead_behind_atoms = 0;
for-each-ref: add 'is-base' token The previous change introduced the get_branch_base_for_tip() method in commit-reach.c. The motivation of that change was about using a heuristic to deteremine the base branch for a source commit from a list of candidate commit tips. This change makes that algorithm visible to users via a new atom in the 'git for-each-ref' format. This change is very similar to the chang in 49abcd21da6 (for-each-ref: add ahead-behind format atom, 2023-03-20). Introduce the 'is-base:<source>' atom, which will indicate that the algorithm should be computed and the result of the algorithm is reported using an indicator of the form '(<source>)'. For example, using '%(is-base:HEAD)' would result in one line having the token '(HEAD)'. Use the sorted order of refs included in the ref filter to break ties in the algorithm's heuristic. In the previous change, the motivating examples include using an L0 trunk, long-lived L1 branches, and temporary release branches. A caller could communicate the ordered preference among these categories using the input refpecs and avoiding a different sort mechanism. This sorting behavior is tested in the test scripts. It is important to include this atom as a special case to can_do_iterative_format() to match the expectations created in bd98f9774e1 (ref-filter.c: filter & format refs in the same callback, 2023-11-14). The ahead-behind atom was one of the special cases, and this similarly requires using an algorithm across all input refs before starting the format of any single ref. In the test script, the format tokens use colons or lack whitespace to avoid Git complaining about trailing whitespace errors. Signed-off-by: Derrick Stolee <stolee@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2024-08-14 18:31:29 +08:00
int is_base_atoms = 0;
CALLOC_ARRAY(ref->value, used_atom_cnt);
/**
* NEEDSWORK: The following code might be unncessary if all codepaths
* that call populate_value() populates the symref member of ref_array_item
* like in apply_ref_filter(). Currently pretty_print_ref() is the only codepath
* that calls populate_value() without first populating symref.
*/
if (need_symref && (ref->flag & REF_ISSYMREF) && !ref->symref) {
ref->symref = refs_resolve_refdup(get_main_ref_store(the_repository),
ref->refname,
RESOLVE_REF_READING,
NULL, NULL);
if (!ref->symref)
ref->symref = xstrdup("");
}
/* Fill in specials first */
for (i = 0; i < used_atom_cnt; i++) {
struct used_atom *atom = &used_atom[i];
enum atom_type atom_type = atom->atom_type;
const char *name = used_atom[i].name;
struct atom_value *v = &ref->value[i];
int deref = 0;
const char *refname;
struct branch *branch = NULL;
ref-filter: add %(raw) atom Add new formatting option `%(raw)`, which will print the raw object data without any changes. It will help further to migrate all cat-file formatting logic from cat-file to ref-filter. The raw data of blob, tree objects may contain '\0', but most of the logic in `ref-filter` depends on the output of the atom being text (specifically, no embedded NULs in it). E.g. `quote_formatting()` use `strbuf_addstr()` or `*._quote_buf()` add the data to the buffer. The raw data of a tree object is `100644 one\0...`, only the `100644 one` will be added to the buffer, which is incorrect. Therefore, we need to find a way to record the length of the atom_value's member `s`. Although strbuf can already record the string and its length, if we want to replace the type of atom_value's member `s` with strbuf, many places in ref-filter that are filled with dynamically allocated mermory in `v->s` are not easy to replace. At the same time, we need to check if `v->s == NULL` in populate_value(), and strbuf cannot easily distinguish NULL and empty strings, but c-style "const char *" can do it. So add a new member in `struct atom_value`: `s_size`, which can record raw object size, it can help us add raw object data to the buffer or compare two buffers which contain raw object data. Note that `--format=%(raw)` cannot be used with `--python`, `--shell`, `--tcl`, and `--perl` because if the binary raw data is passed to a variable in such languages, these may not support arbitrary binary data in their string variable type. Reviewed-by: Jacob Keller <jacob.keller@gmail.com> Mentored-by: Christian Couder <christian.couder@gmail.com> Mentored-by: Hariom Verma <hariom18599@gmail.com> Helped-by: Bagas Sanjaya <bagasdotme@gmail.com> Helped-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com> Helped-by: Felipe Contreras <felipe.contreras@gmail.com> Helped-by: Phillip Wood <phillip.wood@dunelm.org.uk> Helped-by: Junio C Hamano <gitster@pobox.com> Based-on-patch-by: Olga Telezhnaya <olyatelezhnaya@gmail.com> Signed-off-by: ZheNing Hu <adlternative@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2021-07-26 11:26:47 +08:00
v->s_size = ATOM_SIZE_UNSPECIFIED;
v->handler = append_atom;
v->value = 0;
v->atom = atom;
if (*name == '*') {
deref = 1;
name++;
}
if (atom_type == ATOM_REFNAME)
refname = get_refname(atom, ref);
else if (atom_type == ATOM_WORKTREEPATH) {
if (ref->kind == FILTER_REFS_BRANCHES)
v->s = get_worktree_path(ref);
else
v->s = xstrdup("");
continue;
}
else if (atom_type == ATOM_SYMREF)
refname = get_symref(atom, ref);
else if (atom_type == ATOM_UPSTREAM) {
const char *branch_name;
/* only local branches may have an upstream */
if (!skip_prefix(ref->refname, "refs/heads/",
&branch_name)) {
v->s = xstrdup("");
continue;
}
branch = branch_get(branch_name);
refname = branch_get_upstream(branch, NULL);
if (refname)
fill_remote_ref_details(atom, refname, branch, &v->s);
else
v->s = xstrdup("");
continue;
} else if (atom_type == ATOM_PUSH && atom->u.remote_ref.push) {
const char *branch_name;
v->s = xstrdup("");
if (!skip_prefix(ref->refname, "refs/heads/",
&branch_name))
continue;
branch = branch_get(branch_name);
for-each-ref: let upstream/push optionally report the remote name There are times when e.g. scripts want to know not only the name of the upstream branch on the remote repository, but also the name of the remote. This patch offers the new suffix :remotename for the upstream and for the push atoms, allowing to show exactly that. Example: $ cat .git/config ... [remote "origin"] url = https://where.do.we.come/from fetch = refs/heads/*:refs/remote/origin/* [remote "hello-world"] url = https://hello.world/git fetch = refs/heads/*:refs/remote/origin/* pushURL = hello.world:git push = refs/heads/*:refs/heads/* [branch "master"] remote = origin pushRemote = hello-world ... $ git for-each-ref \ --format='%(upstream) %(upstream:remotename) %(push:remotename)' \ refs/heads/master refs/remotes/origin/master origin hello-world The implementation chooses *not* to DWIM the push remote if no explicit push remote was configured; The reason is that it is possible to DWIM this by using %(if)%(push:remotename)%(then) %(push:remotename) %(else) %(upstream:remotename) %(end) while it would be impossible to "un-DWIM" the information in case the caller is really only interested in explicit push remotes. While `:remote` would be shorter, it would also be a bit more ambiguous, and it would also shut the door e.g. for `:remoteref` (which would obviously refer to the corresponding ref in the remote repository). Note: the dashless, non-CamelCased form `:remotename` follows the example of the `:trackshort` example. Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-10-05 20:19:09 +08:00
if (atom->u.remote_ref.push_remote)
refname = NULL;
else {
refname = branch_get_push(branch, NULL);
if (!refname)
continue;
}
/* We will definitely re-init v->s on the next line. */
free((char *)v->s);
fill_remote_ref_details(atom, refname, branch, &v->s);
continue;
} else if (atom_type == ATOM_COLOR) {
v->s = xstrdup(atom->u.color);
continue;
} else if (atom_type == ATOM_FLAG) {
char buf[256], *cp = buf;
if (ref->flag & REF_ISSYMREF)
cp = copy_advance(cp, ",symref");
if (ref->flag & REF_ISPACKED)
cp = copy_advance(cp, ",packed");
if (cp == buf)
v->s = xstrdup("");
else {
*cp = '\0';
v->s = xstrdup(buf + 1);
}
continue;
} else if (!deref && atom_type == ATOM_OBJECTNAME &&
grab_oid(name, "objectname", &ref->objectname, v, atom)) {
continue;
} else if (atom_type == ATOM_HEAD) {
if (atom->u.head && !strcmp(ref->refname, atom->u.head))
v->s = xstrdup("*");
else
v->s = xstrdup(" ");
continue;
} else if (atom_type == ATOM_ALIGN) {
2015-09-11 23:03:07 +08:00
v->handler = align_atom_handler;
v->s = xstrdup("");
2015-09-11 23:03:07 +08:00
continue;
} else if (atom_type == ATOM_END) {
2015-09-11 23:03:07 +08:00
v->handler = end_atom_handler;
v->s = xstrdup("");
2015-09-11 23:03:07 +08:00
continue;
} else if (atom_type == ATOM_IF) {
const char *s;
if (skip_prefix(name, "if:", &s))
v->s = xstrdup(s);
else
v->s = xstrdup("");
v->handler = if_atom_handler;
continue;
} else if (atom_type == ATOM_THEN) {
v->handler = then_atom_handler;
v->s = xstrdup("");
continue;
} else if (atom_type == ATOM_ELSE) {
v->handler = else_atom_handler;
v->s = xstrdup("");
continue;
} else if (atom_type == ATOM_REST) {
if (ref->rest)
v->s = xstrdup(ref->rest);
else
v->s = xstrdup("");
continue;
for-each-ref: add ahead-behind format atom The previous change implemented the ahead_behind() method, including an algorithm to compute the ahead/behind values for a number of commit tips relative to a number of commit bases. Now, integrate that algorithm as part of 'git for-each-ref' hidden behind a new format atom, ahead-behind. This naturally extends to 'git branch' and 'git tag' builtins, as well. This format allows specifying multiple bases, if so desired, and all matching references are compared against all of those bases. For this reason, failing to read a reference provided from these atoms results in an error. In order to translate the ahead_behind() method information to the format output code in ref-filter.c, we must populate arrays of ahead_behind_count structs. In struct ref_array, we store the full array that will be passed to ahead_behind(). In struct ref_array_item, we store an array of pointers that point to the relvant items within the full array. In this way, we can pull all relevant ahead/behind values directly when formatting output for a specific item. It also ensures the lifetime of the ahead_behind_count structs matches the time that the array is being used. Add specific tests of the ahead/behind counts in t6600-test-reach.sh, as it has an interesting repository shape. In particular, its merging strategy and its use of different commit-graphs would demonstrate over- counting if the ahead_behind() method did not already account for that possibility. Also add tests for the specific for-each-ref, branch, and tag builtins. In the case of 'git tag', there are intersting cases that happen when some of the selected tips are not commits. This requires careful logic around commits_nr in the second loop of filter_ahead_behind(). Also, the test in t7004 is carefully located to avoid being dependent on the GPG prereq. It also avoids using the test_commit helper, as that will add ticks to the time and disrupt the expected timestamps in later tag tests. Also add performance tests in a new p1300-graph-walks.sh script. This will be useful for more uses in the future, but for now compare the ahead-behind counting algorithm in 'git for-each-ref' to the naive implementation by running 'git rev-list --count' processes for each input. For the Git source code repository, the improvement is already obvious: Test this tree --------------------------------------------------------------- 1500.2: ahead-behind counts: git for-each-ref 0.07(0.07+0.00) 1500.3: ahead-behind counts: git branch 0.07(0.06+0.00) 1500.4: ahead-behind counts: git tag 0.07(0.06+0.00) 1500.5: ahead-behind counts: git rev-list 1.32(1.04+0.27) But the standard performance benchmark is the Linux kernel repository, which demosntrates a significant improvement: Test this tree --------------------------------------------------------------- 1500.2: ahead-behind counts: git for-each-ref 0.27(0.24+0.02) 1500.3: ahead-behind counts: git branch 0.27(0.24+0.03) 1500.4: ahead-behind counts: git tag 0.28(0.27+0.01) 1500.5: ahead-behind counts: git rev-list 4.57(4.03+0.54) The 'git rev-list' test exists in this change as a demonstration, but it will be removed in the next change to avoid wasting time on this comparison. Signed-off-by: Derrick Stolee <derrickstolee@github.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2023-03-20 19:26:54 +08:00
} else if (atom_type == ATOM_AHEADBEHIND) {
if (ref->counts) {
const struct ahead_behind_count *count;
count = ref->counts[ahead_behind_atoms++];
v->s = xstrfmt("%d %d", count->ahead, count->behind);
} else {
/* Not a commit. */
v->s = xstrdup("");
}
continue;
for-each-ref: add 'is-base' token The previous change introduced the get_branch_base_for_tip() method in commit-reach.c. The motivation of that change was about using a heuristic to deteremine the base branch for a source commit from a list of candidate commit tips. This change makes that algorithm visible to users via a new atom in the 'git for-each-ref' format. This change is very similar to the chang in 49abcd21da6 (for-each-ref: add ahead-behind format atom, 2023-03-20). Introduce the 'is-base:<source>' atom, which will indicate that the algorithm should be computed and the result of the algorithm is reported using an indicator of the form '(<source>)'. For example, using '%(is-base:HEAD)' would result in one line having the token '(HEAD)'. Use the sorted order of refs included in the ref filter to break ties in the algorithm's heuristic. In the previous change, the motivating examples include using an L0 trunk, long-lived L1 branches, and temporary release branches. A caller could communicate the ordered preference among these categories using the input refpecs and avoiding a different sort mechanism. This sorting behavior is tested in the test scripts. It is important to include this atom as a special case to can_do_iterative_format() to match the expectations created in bd98f9774e1 (ref-filter.c: filter & format refs in the same callback, 2023-11-14). The ahead-behind atom was one of the special cases, and this similarly requires using an algorithm across all input refs before starting the format of any single ref. In the test script, the format tokens use colons or lack whitespace to avoid Git complaining about trailing whitespace errors. Signed-off-by: Derrick Stolee <stolee@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2024-08-14 18:31:29 +08:00
} else if (atom_type == ATOM_ISBASE) {
if (ref->is_base && ref->is_base[is_base_atoms]) {
v->s = xstrfmt("(%s)", ref->is_base[is_base_atoms]);
free(ref->is_base[is_base_atoms]);
} else {
v->s = xstrdup("");
}
is_base_atoms++;
continue;
} else
continue;
if (!deref)
v->s = xstrdup(refname);
else
v->s = xstrfmt("%s^{}", refname);
free((char *)refname);
}
for (i = 0; i < used_atom_cnt; i++) {
struct atom_value *v = &ref->value[i];
if (v->s == NULL && used_atom[i].source == SOURCE_NONE)
return strbuf_addf_ret(err, -1, _("missing object %s for %s"),
oid_to_hex(&ref->objectname), ref->refname);
}
if (need_tagged)
oi.info.contentp = &oi.content;
if (!memcmp(&oi.info, &empty, sizeof(empty)) &&
!memcmp(&oi_deref.info, &empty, sizeof(empty)))
return 0;
oi.oid = ref->objectname;
if (get_object(ref, 0, &obj, &oi, err))
return -1;
/*
* If there is no atom that wants to know about tagged
* object, we are done.
*/
if (!need_tagged || (obj->type != OBJ_TAG))
return 0;
/*
ref-filter.c: use peeled tag for '*' format fields In most builtins ('rev-parse <revision>^{}', 'show-ref --dereference'), "dereferencing" a tag refers to a recursive peel of the tag object. Unlike these cases, the dereferencing prefix ('*') in 'for-each-ref' format specifiers triggers only a single, non-recursive dereference of a given tag object. For most annotated tags, a single dereference is all that is needed to access the tag's associated commit or tree; "recursive" and "non-recursive" dereferencing are functionally equivalent in these cases. However, nested tags (annotated tags whose target is another annotated tag) dereferenced once return another tag, where a recursive dereference would return the commit or tree. Currently, if a user wants to filter & format refs and include information about a recursively-dereferenced tag, they can do so with something like 'cat-file --batch-check': git for-each-ref --format="%(objectname)^{} %(refname)" <pattern> | git cat-file --batch-check="%(objectname) %(rest)" But the combination of commands is inefficient. So, to improve the performance of this use case and align the defererencing behavior of 'for-each-ref' with that of other commands, update the ref formatting code to use the peeled tag (from 'peel_iterated_oid()') to populate '*' fields rather than the tag's immediate target object (from 'get_tagged_oid()'). Additionally, add a test to 't6300-for-each-ref' to verify new nested tag behavior and update 't6302-for-each-ref-filter.sh' to print the correct value for nested dereferenced fields. Signed-off-by: Victoria Dye <vdye@github.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2023-11-15 03:53:57 +08:00
* If it is a tag object, see if we use the peeled value. If we do,
* grab the peeled OID.
*/
if (need_tagged && peel_iterated_oid(the_repository, &obj->oid, &oi_deref.oid))
ref-filter.c: use peeled tag for '*' format fields In most builtins ('rev-parse <revision>^{}', 'show-ref --dereference'), "dereferencing" a tag refers to a recursive peel of the tag object. Unlike these cases, the dereferencing prefix ('*') in 'for-each-ref' format specifiers triggers only a single, non-recursive dereference of a given tag object. For most annotated tags, a single dereference is all that is needed to access the tag's associated commit or tree; "recursive" and "non-recursive" dereferencing are functionally equivalent in these cases. However, nested tags (annotated tags whose target is another annotated tag) dereferenced once return another tag, where a recursive dereference would return the commit or tree. Currently, if a user wants to filter & format refs and include information about a recursively-dereferenced tag, they can do so with something like 'cat-file --batch-check': git for-each-ref --format="%(objectname)^{} %(refname)" <pattern> | git cat-file --batch-check="%(objectname) %(rest)" But the combination of commands is inefficient. So, to improve the performance of this use case and align the defererencing behavior of 'for-each-ref' with that of other commands, update the ref formatting code to use the peeled tag (from 'peel_iterated_oid()') to populate '*' fields rather than the tag's immediate target object (from 'get_tagged_oid()'). Additionally, add a test to 't6300-for-each-ref' to verify new nested tag behavior and update 't6302-for-each-ref-filter.sh' to print the correct value for nested dereferenced fields. Signed-off-by: Victoria Dye <vdye@github.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2023-11-15 03:53:57 +08:00
die("bad tag");
return get_object(ref, 1, &obj, &oi_deref, err);
}
/*
* Given a ref, return the value for the atom. This lazily gets value
* out of the object by calling populate value.
*/
static int get_ref_atom_value(struct ref_array_item *ref, int atom,
struct atom_value **v, struct strbuf *err)
{
if (!ref->value) {
if (populate_value(ref, err))
return -1;
fill_missing_values(ref->value);
}
*v = &ref->value[atom];
return 0;
}
/*
* Return 1 if the refname matches one of the patterns, otherwise 0.
* A pattern can be a literal prefix (e.g. a refname "refs/heads/master"
* matches a pattern "refs/heads/mas") or a wildcard (e.g. the same ref
* matches "refs/heads/mas*", too).
*/
static int match_pattern(const char **patterns, const char *refname,
int ignore_case)
{
unsigned flags = 0;
if (ignore_case)
flags |= WM_CASEFOLD;
/*
* When no '--format' option is given we need to skip the prefix
* for matching refs of tags and branches.
*/
(void)(skip_prefix(refname, "refs/tags/", &refname) ||
skip_prefix(refname, "refs/heads/", &refname) ||
skip_prefix(refname, "refs/remotes/", &refname) ||
skip_prefix(refname, "refs/", &refname));
for (; *patterns; patterns++) {
if (!wildmatch(*patterns, refname, flags))
return 1;
}
return 0;
}
/*
* Return 1 if the refname matches one of the patterns, otherwise 0.
* A pattern can be path prefix (e.g. a refname "refs/heads/master"
* matches a pattern "refs/heads/" but not "refs/heads/m") or a
* wildcard (e.g. the same ref matches "refs/heads/m*", too).
*/
static int match_name_as_path(const char **pattern, const char *refname,
int ignore_case)
{
int namelen = strlen(refname);
unsigned flags = WM_PATHNAME;
if (ignore_case)
flags |= WM_CASEFOLD;
for (; *pattern; pattern++) {
const char *p = *pattern;
int plen = strlen(p);
if ((plen <= namelen) &&
!strncmp(refname, p, plen) &&
(refname[plen] == '\0' ||
refname[plen] == '/' ||
p[plen-1] == '/'))
return 1;
if (!wildmatch(p, refname, flags))
return 1;
}
return 0;
}
/* Return 1 if the refname matches one of the patterns, otherwise 0. */
static int filter_pattern_match(struct ref_filter *filter, const char *refname)
{
if (!*filter->name_patterns)
return 1; /* No pattern always matches */
if (filter->match_as_path)
return match_name_as_path(filter->name_patterns, refname,
filter->ignore_case);
return match_pattern(filter->name_patterns, refname,
filter->ignore_case);
}
builtin/for-each-ref.c: add `--exclude` option When using `for-each-ref`, it is sometimes convenient for the caller to be able to exclude certain parts of the references. For example, if there are many `refs/__hidden__/*` references, the caller may want to emit all references *except* the hidden ones. Currently, the only way to do this is to post-process the output, like: $ git for-each-ref --format='%(refname)' | grep -v '^refs/hidden/' Which is do-able, but requires processing a potentially large quantity of references. Teach `git for-each-ref` a new `--exclude=<pattern>` option, which excludes references from the results if they match one or more excluded patterns. This patch provides a naive implementation where the `ref_filter` still sees all references (including ones that it will discard) and is left to check whether each reference matches any excluded pattern(s) before emitting them. By culling out references we know the caller doesn't care about, we can avoid allocating memory for their storage, as well as spending time sorting the output (among other things). Even the naive implementation provides a significant speed-up on a modified copy of linux.git (that has a hidden ref pointing at each commit): $ hyperfine \ 'git.compile for-each-ref --format="%(objectname) %(refname)" | grep -vE "[0-9a-f]{40} refs/pull/"' \ 'git.compile for-each-ref --format="%(objectname) %(refname)" --exclude refs/pull/' Benchmark 1: git.compile for-each-ref --format="%(objectname) %(refname)" | grep -vE "[0-9a-f]{40} refs/pull/" Time (mean ± σ): 820.1 ms ± 2.0 ms [User: 703.7 ms, System: 152.0 ms] Range (min … max): 817.7 ms … 823.3 ms 10 runs Benchmark 2: git.compile for-each-ref --format="%(objectname) %(refname)" --exclude refs/pull/ Time (mean ± σ): 106.6 ms ± 1.1 ms [User: 99.4 ms, System: 7.1 ms] Range (min … max): 104.7 ms … 109.1 ms 27 runs Summary 'git.compile for-each-ref --format="%(objectname) %(refname)" --exclude refs/pull/' ran 7.69 ± 0.08 times faster than 'git.compile for-each-ref --format="%(objectname) %(refname)" | grep -vE "[0-9a-f]{40} refs/pull/"' Subsequent patches will improve on this by avoiding visiting excluded sections of the `packed-refs` file in certain cases. Co-authored-by: Jeff King <peff@peff.net> Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Taylor Blau <me@ttaylorr.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2023-07-11 05:12:19 +08:00
static int filter_exclude_match(struct ref_filter *filter, const char *refname)
{
if (!filter->exclude.nr)
return 0;
if (filter->match_as_path)
return match_name_as_path(filter->exclude.v, refname,
filter->ignore_case);
return match_pattern(filter->exclude.v, refname, filter->ignore_case);
}
/*
* This is the same as for_each_fullref_in(), but it tries to iterate
* only over the patterns we'll care about. Note that it _doesn't_ do a full
* pattern match, so the callback still has to match each ref individually.
*/
static int for_each_fullref_in_pattern(struct ref_filter *filter,
each_ref_fn cb,
void *cb_data)
{
if (filter->kind & FILTER_REFS_ROOT_REFS) {
/* In this case, we want to print all refs including root refs. */
return refs_for_each_include_root_refs(get_main_ref_store(the_repository),
cb, cb_data);
}
if (!filter->match_as_path) {
/*
* in this case, the patterns are applied after
* prefixes like "refs/heads/" etc. are stripped off,
* so we have to look at everything:
*/
return refs_for_each_fullref_in(get_main_ref_store(the_repository),
"", NULL, cb, cb_data);
}
if (filter->ignore_case) {
/*
* we can't handle case-insensitive comparisons,
* so just return everything and let the caller
* sort it out.
*/
return refs_for_each_fullref_in(get_main_ref_store(the_repository),
"", NULL, cb, cb_data);
}
if (!filter->name_patterns[0]) {
/* no patterns; we have to look at everything */
refs/packed-backend.c: implement jump lists to avoid excluded pattern(s) When iterating through the `packed-refs` file in order to answer a query like: $ git for-each-ref --exclude=refs/__hidden__ it would be useful to avoid walking over all of the entries in `refs/__hidden__/*` when possible, since we know that the ref-filter code is going to throw them away anyways. In certain circumstances, doing so is possible. The algorithm for doing so is as follows: - For each excluded pattern, find the first record that matches it, and the first record that *doesn't* match it (i.e. the location you'd next want to consider when excluding that pattern). - Sort the set of excluded regions from the previous step in ascending order of the first location within the `packed-refs` file that matches. - Clean up the results from the previous step: discard empty regions, and combine adjacent regions. The set of regions which remains is referred to as the "jump list", and never contains any references which should be included in the result set. Then when iterating through the `packed-refs` file, if `iter->pos` is ever contained in one of the regions from the previous steps, advance `iter->pos` past the end of that region, and continue enumeration. Note that we only perform this optimization when none of the excluded pattern(s) have special meta-characters in them. For a pattern like "refs/foo[ac]", the excluded regions ("refs/fooa", "refs/fooc", and everything underneath them) are not connected. A future implementation that handles this case may split the character class (pretending as if two patterns were excluded: "refs/fooa", and "refs/fooc"). There are a few other gotchas worth considering. First, note that the jump list is sorted, so once we jump past a region, we can avoid considering it (or any regions preceding it) again. The member `jump_pos` is used to track the first next-possible region to jump through. Second, note that the jump list is best-effort, since we do not handle loose references, and because of the meta-character issue above. The jump list may not skip past all references which won't appear in the results, but will never skip over a reference which does appear in the result set. In repositories with a large number of hidden references, the speed-up can be significant. Tests here are done with a copy of linux.git with a reference "refs/pull/N" pointing at every commit, as in: $ git rev-list HEAD | awk '{ print "create refs/pull/" NR " " $0 }' | git update-ref --stdin $ git pack-refs --all , it is significantly faster to have `for-each-ref` jump over the excluded references, as opposed to filtering them out after the fact: $ hyperfine \ 'git for-each-ref --format="%(objectname) %(refname)" | grep -vE "^[0-9a-f]{40} refs/pull/"' \ 'git.prev for-each-ref --format="%(objectname) %(refname)" --exclude="refs/pull"' \ 'git.compile for-each-ref --format="%(objectname) %(refname)" --exclude="refs/pull"' Benchmark 1: git for-each-ref --format="%(objectname) %(refname)" | grep -vE "^[0-9a-f]{40} refs/pull/" Time (mean ± σ): 798.1 ms ± 3.3 ms [User: 687.6 ms, System: 146.4 ms] Range (min … max): 794.5 ms … 805.5 ms 10 runs Benchmark 2: git.prev for-each-ref --format="%(objectname) %(refname)" --exclude="refs/pull" Time (mean ± σ): 98.9 ms ± 1.4 ms [User: 93.1 ms, System: 5.7 ms] Range (min … max): 97.0 ms … 104.0 ms 29 runs Benchmark 3: git.compile for-each-ref --format="%(objectname) %(refname)" --exclude="refs/pull" Time (mean ± σ): 4.5 ms ± 0.2 ms [User: 0.7 ms, System: 3.8 ms] Range (min … max): 4.1 ms … 5.8 ms 524 runs Summary 'git.compile for-each-ref --format="%(objectname) %(refname)" --exclude="refs/pull"' ran 21.87 ± 1.05 times faster than 'git.prev for-each-ref --format="%(objectname) %(refname)" --exclude="refs/pull"' 176.52 ± 8.19 times faster than 'git for-each-ref --format="%(objectname) %(refname)" | grep -vE "^[0-9a-f]{40} refs/pull/"' (Comparing stock git and this patch isn't quite fair, since an earlier commit in this series adds a naive implementation of the `--exclude` option. `git.prev` is built from the previous commit and includes this naive implementation). Using the jump list is fairly straightforward (see the changes to `refs/packed-backend.c::next_record()`), but constructing the list is not. To ensure that the construction is correct, add a new suite of tests in t1419 covering various corner cases (overlapping regions, partially overlapping regions, adjacent regions, etc.). Co-authored-by: Jeff King <peff@peff.net> Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Taylor Blau <me@ttaylorr.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2023-07-11 05:12:28 +08:00
return refs_for_each_fullref_in(get_main_ref_store(the_repository),
"", filter->exclude.v, cb, cb_data);
}
return refs_for_each_fullref_in_prefixes(get_main_ref_store(the_repository),
NULL, filter->name_patterns,
refs/packed-backend.c: implement jump lists to avoid excluded pattern(s) When iterating through the `packed-refs` file in order to answer a query like: $ git for-each-ref --exclude=refs/__hidden__ it would be useful to avoid walking over all of the entries in `refs/__hidden__/*` when possible, since we know that the ref-filter code is going to throw them away anyways. In certain circumstances, doing so is possible. The algorithm for doing so is as follows: - For each excluded pattern, find the first record that matches it, and the first record that *doesn't* match it (i.e. the location you'd next want to consider when excluding that pattern). - Sort the set of excluded regions from the previous step in ascending order of the first location within the `packed-refs` file that matches. - Clean up the results from the previous step: discard empty regions, and combine adjacent regions. The set of regions which remains is referred to as the "jump list", and never contains any references which should be included in the result set. Then when iterating through the `packed-refs` file, if `iter->pos` is ever contained in one of the regions from the previous steps, advance `iter->pos` past the end of that region, and continue enumeration. Note that we only perform this optimization when none of the excluded pattern(s) have special meta-characters in them. For a pattern like "refs/foo[ac]", the excluded regions ("refs/fooa", "refs/fooc", and everything underneath them) are not connected. A future implementation that handles this case may split the character class (pretending as if two patterns were excluded: "refs/fooa", and "refs/fooc"). There are a few other gotchas worth considering. First, note that the jump list is sorted, so once we jump past a region, we can avoid considering it (or any regions preceding it) again. The member `jump_pos` is used to track the first next-possible region to jump through. Second, note that the jump list is best-effort, since we do not handle loose references, and because of the meta-character issue above. The jump list may not skip past all references which won't appear in the results, but will never skip over a reference which does appear in the result set. In repositories with a large number of hidden references, the speed-up can be significant. Tests here are done with a copy of linux.git with a reference "refs/pull/N" pointing at every commit, as in: $ git rev-list HEAD | awk '{ print "create refs/pull/" NR " " $0 }' | git update-ref --stdin $ git pack-refs --all , it is significantly faster to have `for-each-ref` jump over the excluded references, as opposed to filtering them out after the fact: $ hyperfine \ 'git for-each-ref --format="%(objectname) %(refname)" | grep -vE "^[0-9a-f]{40} refs/pull/"' \ 'git.prev for-each-ref --format="%(objectname) %(refname)" --exclude="refs/pull"' \ 'git.compile for-each-ref --format="%(objectname) %(refname)" --exclude="refs/pull"' Benchmark 1: git for-each-ref --format="%(objectname) %(refname)" | grep -vE "^[0-9a-f]{40} refs/pull/" Time (mean ± σ): 798.1 ms ± 3.3 ms [User: 687.6 ms, System: 146.4 ms] Range (min … max): 794.5 ms … 805.5 ms 10 runs Benchmark 2: git.prev for-each-ref --format="%(objectname) %(refname)" --exclude="refs/pull" Time (mean ± σ): 98.9 ms ± 1.4 ms [User: 93.1 ms, System: 5.7 ms] Range (min … max): 97.0 ms … 104.0 ms 29 runs Benchmark 3: git.compile for-each-ref --format="%(objectname) %(refname)" --exclude="refs/pull" Time (mean ± σ): 4.5 ms ± 0.2 ms [User: 0.7 ms, System: 3.8 ms] Range (min … max): 4.1 ms … 5.8 ms 524 runs Summary 'git.compile for-each-ref --format="%(objectname) %(refname)" --exclude="refs/pull"' ran 21.87 ± 1.05 times faster than 'git.prev for-each-ref --format="%(objectname) %(refname)" --exclude="refs/pull"' 176.52 ± 8.19 times faster than 'git for-each-ref --format="%(objectname) %(refname)" | grep -vE "^[0-9a-f]{40} refs/pull/"' (Comparing stock git and this patch isn't quite fair, since an earlier commit in this series adds a naive implementation of the `--exclude` option. `git.prev` is built from the previous commit and includes this naive implementation). Using the jump list is fairly straightforward (see the changes to `refs/packed-backend.c::next_record()`), but constructing the list is not. To ensure that the construction is correct, add a new suite of tests in t1419 covering various corner cases (overlapping regions, partially overlapping regions, adjacent regions, etc.). Co-authored-by: Jeff King <peff@peff.net> Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Taylor Blau <me@ttaylorr.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2023-07-11 05:12:28 +08:00
filter->exclude.v,
cb, cb_data);
}
/*
* Given a ref (oid, refname), check if the ref belongs to the array
* of oids. If the given ref is a tag, check if the given tag points
* at one of the oids in the given oid array. Returns non-zero if a
* match is found.
*
* NEEDSWORK:
* As the refs are cached we might know what refname peels to without
* the need to parse the object via parse_object(). peel_ref() might be a
* more efficient alternative to obtain the pointee.
*/
static int match_points_at(struct oid_array *points_at,
const struct object_id *oid,
const char *refname)
{
struct object *obj;
if (oid_array_lookup(points_at, oid) >= 0)
return 1;
ref-filter: avoid parsing non-tags in match_points_at() When handling --points-at, we have to try to peel each ref to see if it's a tag that points at a requested oid. We start this process by calling parse_object() on the oid pointed to by each ref. The cost of parsing each object adds up, especially in an output that doesn't otherwise need to open the objects at all. Ideally we'd use peel_iterated_oid() here, which uses the cached information in the packed-refs file. But we can't, because our --points-at must match not only the fully peeled value, but any interim values (so if tag A points to tag B which points to commit C, we should match --points-at=B, but peel_iterated_oid() will only tell us about C). So the best we can do (absent changes to the packed-refs peel traits) is to avoid parsing non-tags. The obvious way to do that is to call oid_object_info() to check the type before parsing. But there are a few gotchas there, like checking if the object has already been parsed. Instead we can just tell parse_object() that we are OK skipping the hash check, which lets it turn on several optimizations. Commits can be loaded via the commit graph (so it's both fast and we have the benefit of the parsed data if we need it later at the output stage). Blobs are not loaded at all. Trees are still loaded, but it's rather rare to have a ref point directly to a tree (and since this is just an optimization, kicking in 99% of the time is OK). Even though we're paying for an extra lookup, the cost to avoid parsing the non-tags is a net benefit. In my git.git repository with 941 tags and 1440 other refs pointing to commits, this significantly cuts the runtime: Benchmark 1: ./git.old for-each-ref --points-at=HEAD Time (mean ± σ): 26.8 ms ± 0.5 ms [User: 24.5 ms, System: 2.2 ms] Range (min … max): 25.9 ms … 29.2 ms 107 runs Benchmark 2: ./git.new for-each-ref --points-at=HEAD Time (mean ± σ): 9.1 ms ± 0.3 ms [User: 6.8 ms, System: 2.2 ms] Range (min … max): 8.6 ms … 10.2 ms 308 runs Summary './git.new for-each-ref --points-at=HEAD' ran 2.96 ± 0.10 times faster than './git.old for-each-ref --points-at=HEAD' In a repository that is mostly annotated tags, we'd expect less improvement (we might still skip a few object loads, but that's balanced by the extra lookups). In my clone of linux.git, which has 782 tags and 3 branches, the run-time is about the same (it's actually ~1% faster on average after this patch, but that's within the run-to-run noise). Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2023-07-03 06:37:47 +08:00
obj = parse_object_with_flags(the_repository, oid,
PARSE_OBJECT_SKIP_HASH_CHECK);
while (obj && obj->type == OBJ_TAG) {
2023-07-03 06:35:40 +08:00
struct tag *tag = (struct tag *)obj;
if (parse_tag(tag) < 0) {
obj = NULL;
break;
}
if (oid_array_lookup(points_at, get_tagged_oid(tag)) >= 0)
return 1;
2023-07-03 06:35:40 +08:00
obj = tag->tagged;
}
if (!obj)
die(_("malformed object at '%s'"), refname);
return 0;
}
/*
* Allocate space for a new ref_array_item and copy the name and oid to it.
*
* Callers can then fill in other struct members at their leisure.
*/
static struct ref_array_item *new_ref_array_item(const char *refname,
const struct object_id *oid)
{
struct ref_array_item *ref;
FLEX_ALLOC_STR(ref, refname, refname);
oidcpy(&ref->objectname, oid);
ref->rest = NULL;
return ref;
}
static void ref_array_append(struct ref_array *array, struct ref_array_item *ref)
{
ALLOC_GROW(array->items, array->nr + 1, array->alloc);
array->items[array->nr++] = ref;
}
struct ref_array_item *ref_array_push(struct ref_array *array,
const char *refname,
const struct object_id *oid)
{
struct ref_array_item *ref = new_ref_array_item(refname, oid);
ref_array_append(array, ref);
return ref;
}
static int ref_kind_from_refname(const char *refname)
{
unsigned int i;
static struct {
const char *prefix;
unsigned int kind;
} ref_kind[] = {
{ "refs/heads/" , FILTER_REFS_BRANCHES },
{ "refs/remotes/" , FILTER_REFS_REMOTES },
{ "refs/tags/", FILTER_REFS_TAGS}
};
if (!strcmp(refname, "HEAD"))
return FILTER_REFS_DETACHED_HEAD;
for (i = 0; i < ARRAY_SIZE(ref_kind); i++) {
if (starts_with(refname, ref_kind[i].prefix))
return ref_kind[i].kind;
}
if (is_pseudo_ref(refname))
return FILTER_REFS_PSEUDOREFS;
if (is_root_ref(refname))
return FILTER_REFS_ROOT_REFS;
return FILTER_REFS_OTHERS;
}
static int filter_ref_kind(struct ref_filter *filter, const char *refname)
{
if (filter->kind == FILTER_REFS_BRANCHES ||
filter->kind == FILTER_REFS_REMOTES ||
filter->kind == FILTER_REFS_TAGS)
return filter->kind;
return ref_kind_from_refname(refname);
}
static struct ref_array_item *apply_ref_filter(const char *refname, const char *referent, const struct object_id *oid,
int flag, struct ref_filter *filter)
{
struct ref_array_item *ref;
struct commit *commit = NULL;
unsigned int kind;
if (flag & REF_BAD_NAME) {
warning(_("ignoring ref with broken name %s"), refname);
return NULL;
}
if (flag & REF_ISBROKEN) {
warning(_("ignoring broken ref %s"), refname);
return NULL;
}
/* Obtain the current ref kind from filter_ref_kind() and ignore unwanted refs. */
kind = filter_ref_kind(filter, refname);
/*
* Generally HEAD refs are printed with special description denoting a rebase,
* detached state and so forth. This is useful when only printing the HEAD ref
* But when it is being printed along with other root refs, it makes sense to
* keep the formatting consistent. So we mask the type to act like a root ref.
*/
if (filter->kind & FILTER_REFS_ROOT_REFS && kind == FILTER_REFS_DETACHED_HEAD)
kind = FILTER_REFS_ROOT_REFS;
else if (!(kind & filter->kind))
return NULL;
if (!filter_pattern_match(filter, refname))
return NULL;
builtin/for-each-ref.c: add `--exclude` option When using `for-each-ref`, it is sometimes convenient for the caller to be able to exclude certain parts of the references. For example, if there are many `refs/__hidden__/*` references, the caller may want to emit all references *except* the hidden ones. Currently, the only way to do this is to post-process the output, like: $ git for-each-ref --format='%(refname)' | grep -v '^refs/hidden/' Which is do-able, but requires processing a potentially large quantity of references. Teach `git for-each-ref` a new `--exclude=<pattern>` option, which excludes references from the results if they match one or more excluded patterns. This patch provides a naive implementation where the `ref_filter` still sees all references (including ones that it will discard) and is left to check whether each reference matches any excluded pattern(s) before emitting them. By culling out references we know the caller doesn't care about, we can avoid allocating memory for their storage, as well as spending time sorting the output (among other things). Even the naive implementation provides a significant speed-up on a modified copy of linux.git (that has a hidden ref pointing at each commit): $ hyperfine \ 'git.compile for-each-ref --format="%(objectname) %(refname)" | grep -vE "[0-9a-f]{40} refs/pull/"' \ 'git.compile for-each-ref --format="%(objectname) %(refname)" --exclude refs/pull/' Benchmark 1: git.compile for-each-ref --format="%(objectname) %(refname)" | grep -vE "[0-9a-f]{40} refs/pull/" Time (mean ± σ): 820.1 ms ± 2.0 ms [User: 703.7 ms, System: 152.0 ms] Range (min … max): 817.7 ms … 823.3 ms 10 runs Benchmark 2: git.compile for-each-ref --format="%(objectname) %(refname)" --exclude refs/pull/ Time (mean ± σ): 106.6 ms ± 1.1 ms [User: 99.4 ms, System: 7.1 ms] Range (min … max): 104.7 ms … 109.1 ms 27 runs Summary 'git.compile for-each-ref --format="%(objectname) %(refname)" --exclude refs/pull/' ran 7.69 ± 0.08 times faster than 'git.compile for-each-ref --format="%(objectname) %(refname)" | grep -vE "[0-9a-f]{40} refs/pull/"' Subsequent patches will improve on this by avoiding visiting excluded sections of the `packed-refs` file in certain cases. Co-authored-by: Jeff King <peff@peff.net> Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Taylor Blau <me@ttaylorr.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2023-07-11 05:12:19 +08:00
if (filter_exclude_match(filter, refname))
return NULL;
builtin/for-each-ref.c: add `--exclude` option When using `for-each-ref`, it is sometimes convenient for the caller to be able to exclude certain parts of the references. For example, if there are many `refs/__hidden__/*` references, the caller may want to emit all references *except* the hidden ones. Currently, the only way to do this is to post-process the output, like: $ git for-each-ref --format='%(refname)' | grep -v '^refs/hidden/' Which is do-able, but requires processing a potentially large quantity of references. Teach `git for-each-ref` a new `--exclude=<pattern>` option, which excludes references from the results if they match one or more excluded patterns. This patch provides a naive implementation where the `ref_filter` still sees all references (including ones that it will discard) and is left to check whether each reference matches any excluded pattern(s) before emitting them. By culling out references we know the caller doesn't care about, we can avoid allocating memory for their storage, as well as spending time sorting the output (among other things). Even the naive implementation provides a significant speed-up on a modified copy of linux.git (that has a hidden ref pointing at each commit): $ hyperfine \ 'git.compile for-each-ref --format="%(objectname) %(refname)" | grep -vE "[0-9a-f]{40} refs/pull/"' \ 'git.compile for-each-ref --format="%(objectname) %(refname)" --exclude refs/pull/' Benchmark 1: git.compile for-each-ref --format="%(objectname) %(refname)" | grep -vE "[0-9a-f]{40} refs/pull/" Time (mean ± σ): 820.1 ms ± 2.0 ms [User: 703.7 ms, System: 152.0 ms] Range (min … max): 817.7 ms … 823.3 ms 10 runs Benchmark 2: git.compile for-each-ref --format="%(objectname) %(refname)" --exclude refs/pull/ Time (mean ± σ): 106.6 ms ± 1.1 ms [User: 99.4 ms, System: 7.1 ms] Range (min … max): 104.7 ms … 109.1 ms 27 runs Summary 'git.compile for-each-ref --format="%(objectname) %(refname)" --exclude refs/pull/' ran 7.69 ± 0.08 times faster than 'git.compile for-each-ref --format="%(objectname) %(refname)" | grep -vE "[0-9a-f]{40} refs/pull/"' Subsequent patches will improve on this by avoiding visiting excluded sections of the `packed-refs` file in certain cases. Co-authored-by: Jeff King <peff@peff.net> Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Taylor Blau <me@ttaylorr.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2023-07-11 05:12:19 +08:00
if (filter->points_at.nr && !match_points_at(&filter->points_at, oid, refname))
return NULL;
/*
* A merge filter is applied on refs pointing to commits. Hence
* obtain the commit using the 'oid' available and discard all
* non-commits early. The actual filtering is done later.
*/
if (filter->reachable_from || filter->unreachable_from ||
filter->with_commit || filter->no_commit || filter->verbose) {
commit = lookup_commit_reference_gently(the_repository, oid, 1);
if (!commit)
return NULL;
ref-filter: add --no-contains option to tag/branch/for-each-ref Change the tag, branch & for-each-ref commands to have a --no-contains option in addition to their longstanding --contains options. This allows for finding the last-good rollout tag given a known-bad <commit>. Given a hypothetically bad commit cf5c7253e0, the git version to revert to can be found with this hacky two-liner: (git tag -l 'v[0-9]*'; git tag -l --contains cf5c7253e0 'v[0-9]*') | sort | uniq -c | grep -E '^ *1 ' | awk '{print $2}' | tail -n 10 With this new --no-contains option the same can be achieved with: git tag -l --no-contains cf5c7253e0 'v[0-9]*' | sort | tail -n 10 As the filtering machinery is shared between the tag, branch & for-each-ref commands, implement this for those commands too. A practical use for this with "branch" is e.g. finding branches which were branched off between v2.8.0 and v2.10.0: git branch --contains v2.8.0 --no-contains v2.10.0 The "describe" command also has a --contains option, but its semantics are unrelated to what tag/branch/for-each-ref use --contains for. A --no-contains option for "describe" wouldn't make any sense, other than being exactly equivalent to not supplying --contains at all, which would be confusing at best. Add a --without option to "tag" as an alias for --no-contains, for consistency with --with and --contains. The --with option is undocumented, and possibly the only user of it is Junio (<xmqqefy71iej.fsf@gitster.mtv.corp.google.com>). But it's trivial to support, so let's do that. The additions to the the test suite are inverse copies of the corresponding --contains tests. With this change --no-contains for tag, branch & for-each-ref is just as well tested as the existing --contains option. In addition to those tests, add a test for "tag" which asserts that --no-contains won't find tree/blob tags, which is slightly unintuitive, but consistent with how --contains works & is documented. Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-03-25 02:40:57 +08:00
/* We perform the filtering for the '--contains' option... */
if (filter->with_commit &&
!commit_contains(filter, commit, filter->with_commit, &filter->internal.contains_cache))
return NULL;
ref-filter: add --no-contains option to tag/branch/for-each-ref Change the tag, branch & for-each-ref commands to have a --no-contains option in addition to their longstanding --contains options. This allows for finding the last-good rollout tag given a known-bad <commit>. Given a hypothetically bad commit cf5c7253e0, the git version to revert to can be found with this hacky two-liner: (git tag -l 'v[0-9]*'; git tag -l --contains cf5c7253e0 'v[0-9]*') | sort | uniq -c | grep -E '^ *1 ' | awk '{print $2}' | tail -n 10 With this new --no-contains option the same can be achieved with: git tag -l --no-contains cf5c7253e0 'v[0-9]*' | sort | tail -n 10 As the filtering machinery is shared between the tag, branch & for-each-ref commands, implement this for those commands too. A practical use for this with "branch" is e.g. finding branches which were branched off between v2.8.0 and v2.10.0: git branch --contains v2.8.0 --no-contains v2.10.0 The "describe" command also has a --contains option, but its semantics are unrelated to what tag/branch/for-each-ref use --contains for. A --no-contains option for "describe" wouldn't make any sense, other than being exactly equivalent to not supplying --contains at all, which would be confusing at best. Add a --without option to "tag" as an alias for --no-contains, for consistency with --with and --contains. The --with option is undocumented, and possibly the only user of it is Junio (<xmqqefy71iej.fsf@gitster.mtv.corp.google.com>). But it's trivial to support, so let's do that. The additions to the the test suite are inverse copies of the corresponding --contains tests. With this change --no-contains for tag, branch & for-each-ref is just as well tested as the existing --contains option. In addition to those tests, add a test for "tag" which asserts that --no-contains won't find tree/blob tags, which is slightly unintuitive, but consistent with how --contains works & is documented. Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-03-25 02:40:57 +08:00
/* ...or for the `--no-contains' option */
if (filter->no_commit &&
commit_contains(filter, commit, filter->no_commit, &filter->internal.no_contains_cache))
return NULL;
}
/*
* We do not open the object yet; sort may only need refname
* to do its job and the resulting list may yet to be pruned
* by maxcount logic.
*/
ref = new_ref_array_item(refname, oid);
ref->commit = commit;
ref->flag = flag;
ref->kind = kind;
ref->symref = xstrdup_or_null(referent);
return ref;
}
struct ref_filter_cbdata {
struct ref_array *array;
struct ref_filter *filter;
};
/*
* A call-back given to for_each_ref(). Filter refs and keep them for
* later object processing.
*/
static int filter_one(const char *refname, const char *referent, const struct object_id *oid, int flag, void *cb_data)
{
struct ref_filter_cbdata *ref_cbdata = cb_data;
struct ref_array_item *ref;
ref = apply_ref_filter(refname, referent, oid, flag, ref_cbdata->filter);
if (ref)
ref_array_append(ref_cbdata->array, ref);
return 0;
}
/* Free memory allocated for a ref_array_item */
static void free_array_item(struct ref_array_item *item)
{
free((char *)item->symref);
if (item->value) {
int i;
for (i = 0; i < used_atom_cnt; i++)
free((char *)item->value[i].s);
free(item->value);
}
for-each-ref: add ahead-behind format atom The previous change implemented the ahead_behind() method, including an algorithm to compute the ahead/behind values for a number of commit tips relative to a number of commit bases. Now, integrate that algorithm as part of 'git for-each-ref' hidden behind a new format atom, ahead-behind. This naturally extends to 'git branch' and 'git tag' builtins, as well. This format allows specifying multiple bases, if so desired, and all matching references are compared against all of those bases. For this reason, failing to read a reference provided from these atoms results in an error. In order to translate the ahead_behind() method information to the format output code in ref-filter.c, we must populate arrays of ahead_behind_count structs. In struct ref_array, we store the full array that will be passed to ahead_behind(). In struct ref_array_item, we store an array of pointers that point to the relvant items within the full array. In this way, we can pull all relevant ahead/behind values directly when formatting output for a specific item. It also ensures the lifetime of the ahead_behind_count structs matches the time that the array is being used. Add specific tests of the ahead/behind counts in t6600-test-reach.sh, as it has an interesting repository shape. In particular, its merging strategy and its use of different commit-graphs would demonstrate over- counting if the ahead_behind() method did not already account for that possibility. Also add tests for the specific for-each-ref, branch, and tag builtins. In the case of 'git tag', there are intersting cases that happen when some of the selected tips are not commits. This requires careful logic around commits_nr in the second loop of filter_ahead_behind(). Also, the test in t7004 is carefully located to avoid being dependent on the GPG prereq. It also avoids using the test_commit helper, as that will add ticks to the time and disrupt the expected timestamps in later tag tests. Also add performance tests in a new p1300-graph-walks.sh script. This will be useful for more uses in the future, but for now compare the ahead-behind counting algorithm in 'git for-each-ref' to the naive implementation by running 'git rev-list --count' processes for each input. For the Git source code repository, the improvement is already obvious: Test this tree --------------------------------------------------------------- 1500.2: ahead-behind counts: git for-each-ref 0.07(0.07+0.00) 1500.3: ahead-behind counts: git branch 0.07(0.06+0.00) 1500.4: ahead-behind counts: git tag 0.07(0.06+0.00) 1500.5: ahead-behind counts: git rev-list 1.32(1.04+0.27) But the standard performance benchmark is the Linux kernel repository, which demosntrates a significant improvement: Test this tree --------------------------------------------------------------- 1500.2: ahead-behind counts: git for-each-ref 0.27(0.24+0.02) 1500.3: ahead-behind counts: git branch 0.27(0.24+0.03) 1500.4: ahead-behind counts: git tag 0.28(0.27+0.01) 1500.5: ahead-behind counts: git rev-list 4.57(4.03+0.54) The 'git rev-list' test exists in this change as a demonstration, but it will be removed in the next change to avoid wasting time on this comparison. Signed-off-by: Derrick Stolee <derrickstolee@github.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2023-03-20 19:26:54 +08:00
free(item->counts);
for-each-ref: add 'is-base' token The previous change introduced the get_branch_base_for_tip() method in commit-reach.c. The motivation of that change was about using a heuristic to deteremine the base branch for a source commit from a list of candidate commit tips. This change makes that algorithm visible to users via a new atom in the 'git for-each-ref' format. This change is very similar to the chang in 49abcd21da6 (for-each-ref: add ahead-behind format atom, 2023-03-20). Introduce the 'is-base:<source>' atom, which will indicate that the algorithm should be computed and the result of the algorithm is reported using an indicator of the form '(<source>)'. For example, using '%(is-base:HEAD)' would result in one line having the token '(HEAD)'. Use the sorted order of refs included in the ref filter to break ties in the algorithm's heuristic. In the previous change, the motivating examples include using an L0 trunk, long-lived L1 branches, and temporary release branches. A caller could communicate the ordered preference among these categories using the input refpecs and avoiding a different sort mechanism. This sorting behavior is tested in the test scripts. It is important to include this atom as a special case to can_do_iterative_format() to match the expectations created in bd98f9774e1 (ref-filter.c: filter & format refs in the same callback, 2023-11-14). The ahead-behind atom was one of the special cases, and this similarly requires using an algorithm across all input refs before starting the format of any single ref. In the test script, the format tokens use colons or lack whitespace to avoid Git complaining about trailing whitespace errors. Signed-off-by: Derrick Stolee <stolee@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2024-08-14 18:31:29 +08:00
free(item->is_base);
free(item);
}
struct ref_filter_and_format_cbdata {
struct ref_filter *filter;
struct ref_format *format;
struct ref_filter_and_format_internal {
int count;
} internal;
};
static int filter_and_format_one(const char *refname, const char *referent, const struct object_id *oid, int flag, void *cb_data)
{
struct ref_filter_and_format_cbdata *ref_cbdata = cb_data;
struct ref_array_item *ref;
struct strbuf output = STRBUF_INIT, err = STRBUF_INIT;
ref = apply_ref_filter(refname, referent, oid, flag, ref_cbdata->filter);
if (!ref)
return 0;
if (format_ref_array_item(ref, ref_cbdata->format, &output, &err))
die("%s", err.buf);
if (output.len || !ref_cbdata->format->array_opts.omit_empty) {
fwrite(output.buf, 1, output.len, stdout);
putchar('\n');
}
strbuf_release(&output);
strbuf_release(&err);
free_array_item(ref);
/*
* Increment the running count of refs that match the filter. If
* max_count is set and we've reached the max, stop the ref
* iteration by returning a nonzero value.
*/
if (ref_cbdata->format->array_opts.max_count &&
++ref_cbdata->internal.count >= ref_cbdata->format->array_opts.max_count)
return 1;
return 0;
}
/* Free all memory allocated for ref_array */
void ref_array_clear(struct ref_array *array)
{
int i;
for (i = 0; i < array->nr; i++)
free_array_item(array->items[i]);
FREE_AND_NULL(array->items);
array->nr = array->alloc = 0;
for (i = 0; i < used_atom_cnt; i++) {
struct used_atom *atom = &used_atom[i];
if (atom->atom_type == ATOM_HEAD)
free(atom->u.head);
free((char *)atom->name);
}
FREE_AND_NULL(used_atom);
used_atom_cnt = 0;
if (ref_to_worktree_map.worktrees) {
hashmap_clear_and_free(&(ref_to_worktree_map.map),
struct ref_to_worktree_entry, ent);
free_worktrees(ref_to_worktree_map.worktrees);
ref_to_worktree_map.worktrees = NULL;
}
for-each-ref: add ahead-behind format atom The previous change implemented the ahead_behind() method, including an algorithm to compute the ahead/behind values for a number of commit tips relative to a number of commit bases. Now, integrate that algorithm as part of 'git for-each-ref' hidden behind a new format atom, ahead-behind. This naturally extends to 'git branch' and 'git tag' builtins, as well. This format allows specifying multiple bases, if so desired, and all matching references are compared against all of those bases. For this reason, failing to read a reference provided from these atoms results in an error. In order to translate the ahead_behind() method information to the format output code in ref-filter.c, we must populate arrays of ahead_behind_count structs. In struct ref_array, we store the full array that will be passed to ahead_behind(). In struct ref_array_item, we store an array of pointers that point to the relvant items within the full array. In this way, we can pull all relevant ahead/behind values directly when formatting output for a specific item. It also ensures the lifetime of the ahead_behind_count structs matches the time that the array is being used. Add specific tests of the ahead/behind counts in t6600-test-reach.sh, as it has an interesting repository shape. In particular, its merging strategy and its use of different commit-graphs would demonstrate over- counting if the ahead_behind() method did not already account for that possibility. Also add tests for the specific for-each-ref, branch, and tag builtins. In the case of 'git tag', there are intersting cases that happen when some of the selected tips are not commits. This requires careful logic around commits_nr in the second loop of filter_ahead_behind(). Also, the test in t7004 is carefully located to avoid being dependent on the GPG prereq. It also avoids using the test_commit helper, as that will add ticks to the time and disrupt the expected timestamps in later tag tests. Also add performance tests in a new p1300-graph-walks.sh script. This will be useful for more uses in the future, but for now compare the ahead-behind counting algorithm in 'git for-each-ref' to the naive implementation by running 'git rev-list --count' processes for each input. For the Git source code repository, the improvement is already obvious: Test this tree --------------------------------------------------------------- 1500.2: ahead-behind counts: git for-each-ref 0.07(0.07+0.00) 1500.3: ahead-behind counts: git branch 0.07(0.06+0.00) 1500.4: ahead-behind counts: git tag 0.07(0.06+0.00) 1500.5: ahead-behind counts: git rev-list 1.32(1.04+0.27) But the standard performance benchmark is the Linux kernel repository, which demosntrates a significant improvement: Test this tree --------------------------------------------------------------- 1500.2: ahead-behind counts: git for-each-ref 0.27(0.24+0.02) 1500.3: ahead-behind counts: git branch 0.27(0.24+0.03) 1500.4: ahead-behind counts: git tag 0.28(0.27+0.01) 1500.5: ahead-behind counts: git rev-list 4.57(4.03+0.54) The 'git rev-list' test exists in this change as a demonstration, but it will be removed in the next change to avoid wasting time on this comparison. Signed-off-by: Derrick Stolee <derrickstolee@github.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2023-03-20 19:26:54 +08:00
FREE_AND_NULL(array->counts);
}
#define EXCLUDE_REACHED 0
#define INCLUDE_REACHED 1
static void reach_filter(struct ref_array *array,
struct commit_list **check_reachable,
int include_reached)
{
int i, old_nr;
struct commit **to_clear;
if (!*check_reachable)
return;
CALLOC_ARRAY(to_clear, array->nr);
for (i = 0; i < array->nr; i++) {
struct ref_array_item *item = array->items[i];
to_clear[i] = item->commit;
}
commit-reach: add tips_reachable_from_bases() Both 'git for-each-ref --merged=<X>' and 'git branch --merged=<X>' use the ref-filter machinery to select references or branches (respectively) that are reachable from a set of commits presented by one or more --merged arguments. This happens within reach_filter(), which uses the revision-walk machinery to walk history in a standard way. However, the commit-reach.c file is full of custom searches that are more efficient, especially for reachability queries that can terminate early when reachability is discovered. Add a new tips_reachable_from_bases() method to commit-reach.c and call it from within reach_filter() in ref-filter.c. This affects both 'git branch' and 'git for-each-ref' as tested in p1500-graph-walks.sh. For the Linux kernel repository, we take an already-fast algorithm and make it even faster: Test HEAD~1 HEAD ------------------------------------------------------------------- 1500.5: contains: git for-each-ref --merged 0.13 0.02 -84.6% 1500.6: contains: git branch --merged 0.14 0.02 -85.7% 1500.7: contains: git tag --merged 0.15 0.03 -80.0% (Note that we remove the iterative 'git rev-list' test from p1500 because it no longer makes sense as a comparison to 'git for-each-ref' and would just waste time running it for these comparisons.) The algorithm is implemented in commit-reach.c in the method tips_reachable_from_base(). This method takes a string_list of tips and assigns the 'util' for each item with the value 1 if the base commit can reach those tips. Like other reachability queries in commit-reach.c, the fastest way to search for "can A reach B?" is to do a depth-first search up to the generation number of B, preferring to explore first parents before later parents. While we must walk all reachable commits up to that generation number when the answer is "no", the depth-first search can answer "yes" much faster than other approaches in most cases. This search becomes trickier when there are multiple targets for the depth-first search. The commits with lower generation number are more likely to be within the history of the start commit, but we don't want to waste time searching commits of low generation number if the commit target with lowest generation number has already been found. The trick here is to take the input commits and sort them by generation number in ascending order. Track the index within this order as min_generation_index. When we find a commit, if its index in the list is equal to min_generation_index, then we can increase the generation number boundary of our search to the next-lowest value in the list. With this mechanism, the number of commits to search is minimized with respect to the depth-first search heuristic. We will walk all commits up to the minimum generation number of a commit that is _not_ reachable from the start, but we will walk only the necessary portion of the depth-first search for the reachable commits of lower generation. Add extra tests for this behavior in t6600-test-reach.sh as the interesting data shape of that repository can sometimes demonstrate corner case bugs. Signed-off-by: Derrick Stolee <derrickstolee@github.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2023-03-20 19:26:55 +08:00
tips_reachable_from_bases(the_repository,
*check_reachable,
commit-reach: add tips_reachable_from_bases() Both 'git for-each-ref --merged=<X>' and 'git branch --merged=<X>' use the ref-filter machinery to select references or branches (respectively) that are reachable from a set of commits presented by one or more --merged arguments. This happens within reach_filter(), which uses the revision-walk machinery to walk history in a standard way. However, the commit-reach.c file is full of custom searches that are more efficient, especially for reachability queries that can terminate early when reachability is discovered. Add a new tips_reachable_from_bases() method to commit-reach.c and call it from within reach_filter() in ref-filter.c. This affects both 'git branch' and 'git for-each-ref' as tested in p1500-graph-walks.sh. For the Linux kernel repository, we take an already-fast algorithm and make it even faster: Test HEAD~1 HEAD ------------------------------------------------------------------- 1500.5: contains: git for-each-ref --merged 0.13 0.02 -84.6% 1500.6: contains: git branch --merged 0.14 0.02 -85.7% 1500.7: contains: git tag --merged 0.15 0.03 -80.0% (Note that we remove the iterative 'git rev-list' test from p1500 because it no longer makes sense as a comparison to 'git for-each-ref' and would just waste time running it for these comparisons.) The algorithm is implemented in commit-reach.c in the method tips_reachable_from_base(). This method takes a string_list of tips and assigns the 'util' for each item with the value 1 if the base commit can reach those tips. Like other reachability queries in commit-reach.c, the fastest way to search for "can A reach B?" is to do a depth-first search up to the generation number of B, preferring to explore first parents before later parents. While we must walk all reachable commits up to that generation number when the answer is "no", the depth-first search can answer "yes" much faster than other approaches in most cases. This search becomes trickier when there are multiple targets for the depth-first search. The commits with lower generation number are more likely to be within the history of the start commit, but we don't want to waste time searching commits of low generation number if the commit target with lowest generation number has already been found. The trick here is to take the input commits and sort them by generation number in ascending order. Track the index within this order as min_generation_index. When we find a commit, if its index in the list is equal to min_generation_index, then we can increase the generation number boundary of our search to the next-lowest value in the list. With this mechanism, the number of commits to search is minimized with respect to the depth-first search heuristic. We will walk all commits up to the minimum generation number of a commit that is _not_ reachable from the start, but we will walk only the necessary portion of the depth-first search for the reachable commits of lower generation. Add extra tests for this behavior in t6600-test-reach.sh as the interesting data shape of that repository can sometimes demonstrate corner case bugs. Signed-off-by: Derrick Stolee <derrickstolee@github.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2023-03-20 19:26:55 +08:00
to_clear, array->nr,
UNINTERESTING);
old_nr = array->nr;
array->nr = 0;
for (i = 0; i < old_nr; i++) {
struct ref_array_item *item = array->items[i];
struct commit *commit = item->commit;
int is_merged = !!(commit->object.flags & UNINTERESTING);
if (is_merged == include_reached)
array->items[array->nr++] = array->items[i];
else
free_array_item(item);
}
clear_commit_marks_many(old_nr, to_clear, ALL_REV_FLAGS);
while (*check_reachable) {
struct commit *merge_commit = pop_commit(check_reachable);
clear_commit_marks(merge_commit, ALL_REV_FLAGS);
}
free(to_clear);
}
for-each-ref: add ahead-behind format atom The previous change implemented the ahead_behind() method, including an algorithm to compute the ahead/behind values for a number of commit tips relative to a number of commit bases. Now, integrate that algorithm as part of 'git for-each-ref' hidden behind a new format atom, ahead-behind. This naturally extends to 'git branch' and 'git tag' builtins, as well. This format allows specifying multiple bases, if so desired, and all matching references are compared against all of those bases. For this reason, failing to read a reference provided from these atoms results in an error. In order to translate the ahead_behind() method information to the format output code in ref-filter.c, we must populate arrays of ahead_behind_count structs. In struct ref_array, we store the full array that will be passed to ahead_behind(). In struct ref_array_item, we store an array of pointers that point to the relvant items within the full array. In this way, we can pull all relevant ahead/behind values directly when formatting output for a specific item. It also ensures the lifetime of the ahead_behind_count structs matches the time that the array is being used. Add specific tests of the ahead/behind counts in t6600-test-reach.sh, as it has an interesting repository shape. In particular, its merging strategy and its use of different commit-graphs would demonstrate over- counting if the ahead_behind() method did not already account for that possibility. Also add tests for the specific for-each-ref, branch, and tag builtins. In the case of 'git tag', there are intersting cases that happen when some of the selected tips are not commits. This requires careful logic around commits_nr in the second loop of filter_ahead_behind(). Also, the test in t7004 is carefully located to avoid being dependent on the GPG prereq. It also avoids using the test_commit helper, as that will add ticks to the time and disrupt the expected timestamps in later tag tests. Also add performance tests in a new p1300-graph-walks.sh script. This will be useful for more uses in the future, but for now compare the ahead-behind counting algorithm in 'git for-each-ref' to the naive implementation by running 'git rev-list --count' processes for each input. For the Git source code repository, the improvement is already obvious: Test this tree --------------------------------------------------------------- 1500.2: ahead-behind counts: git for-each-ref 0.07(0.07+0.00) 1500.3: ahead-behind counts: git branch 0.07(0.06+0.00) 1500.4: ahead-behind counts: git tag 0.07(0.06+0.00) 1500.5: ahead-behind counts: git rev-list 1.32(1.04+0.27) But the standard performance benchmark is the Linux kernel repository, which demosntrates a significant improvement: Test this tree --------------------------------------------------------------- 1500.2: ahead-behind counts: git for-each-ref 0.27(0.24+0.02) 1500.3: ahead-behind counts: git branch 0.27(0.24+0.03) 1500.4: ahead-behind counts: git tag 0.28(0.27+0.01) 1500.5: ahead-behind counts: git rev-list 4.57(4.03+0.54) The 'git rev-list' test exists in this change as a demonstration, but it will be removed in the next change to avoid wasting time on this comparison. Signed-off-by: Derrick Stolee <derrickstolee@github.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2023-03-20 19:26:54 +08:00
void filter_ahead_behind(struct repository *r,
struct ref_format *format,
struct ref_array *array)
{
struct commit **commits;
size_t commits_nr = format->bases.nr + array->nr;
if (!format->bases.nr || !array->nr)
return;
ALLOC_ARRAY(commits, commits_nr);
for (size_t i = 0; i < format->bases.nr; i++)
commits[i] = format->bases.items[i].util;
ALLOC_ARRAY(array->counts, st_mult(format->bases.nr, array->nr));
commits_nr = format->bases.nr;
array->counts_nr = 0;
for (size_t i = 0; i < array->nr; i++) {
const char *name = array->items[i]->refname;
commits[commits_nr] = lookup_commit_reference_by_name(name);
if (!commits[commits_nr])
continue;
CALLOC_ARRAY(array->items[i]->counts, format->bases.nr);
for (size_t j = 0; j < format->bases.nr; j++) {
struct ahead_behind_count *count;
count = &array->counts[array->counts_nr++];
count->tip_index = commits_nr;
count->base_index = j;
array->items[i]->counts[j] = count;
}
commits_nr++;
}
ahead_behind(r, commits, commits_nr, array->counts, array->counts_nr);
free(commits);
}
for-each-ref: add 'is-base' token The previous change introduced the get_branch_base_for_tip() method in commit-reach.c. The motivation of that change was about using a heuristic to deteremine the base branch for a source commit from a list of candidate commit tips. This change makes that algorithm visible to users via a new atom in the 'git for-each-ref' format. This change is very similar to the chang in 49abcd21da6 (for-each-ref: add ahead-behind format atom, 2023-03-20). Introduce the 'is-base:<source>' atom, which will indicate that the algorithm should be computed and the result of the algorithm is reported using an indicator of the form '(<source>)'. For example, using '%(is-base:HEAD)' would result in one line having the token '(HEAD)'. Use the sorted order of refs included in the ref filter to break ties in the algorithm's heuristic. In the previous change, the motivating examples include using an L0 trunk, long-lived L1 branches, and temporary release branches. A caller could communicate the ordered preference among these categories using the input refpecs and avoiding a different sort mechanism. This sorting behavior is tested in the test scripts. It is important to include this atom as a special case to can_do_iterative_format() to match the expectations created in bd98f9774e1 (ref-filter.c: filter & format refs in the same callback, 2023-11-14). The ahead-behind atom was one of the special cases, and this similarly requires using an algorithm across all input refs before starting the format of any single ref. In the test script, the format tokens use colons or lack whitespace to avoid Git complaining about trailing whitespace errors. Signed-off-by: Derrick Stolee <stolee@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2024-08-14 18:31:29 +08:00
void filter_is_base(struct repository *r,
struct ref_format *format,
struct ref_array *array)
{
struct commit **bases;
size_t bases_nr = 0;
struct ref_array_item **back_index;
if (!format->is_base_tips.nr || !array->nr)
return;
CALLOC_ARRAY(back_index, array->nr);
CALLOC_ARRAY(bases, array->nr);
for (size_t i = 0; i < array->nr; i++) {
const char *name = array->items[i]->refname;
struct commit *c = lookup_commit_reference_by_name_gently(name, 1);
CALLOC_ARRAY(array->items[i]->is_base, format->is_base_tips.nr);
if (!c)
continue;
back_index[bases_nr] = array->items[i];
bases[bases_nr] = c;
bases_nr++;
}
for (size_t i = 0; i < format->is_base_tips.nr; i++) {
struct commit *tip = format->is_base_tips.items[i].util;
int base_index = get_branch_base_for_tip(r, tip, bases, bases_nr);
if (base_index < 0)
continue;
/* Store the string for use in output later. */
back_index[base_index]->is_base[i] = xstrdup(format->is_base_tips.items[i].string);
}
free(back_index);
free(bases);
}
static int do_filter_refs(struct ref_filter *filter, unsigned int type, each_ref_fn fn, void *cb_data)
{
int ret = 0;
filter->kind = type & FILTER_REFS_KIND_MASK;
init_contains_cache(&filter->internal.contains_cache);
init_contains_cache(&filter->internal.no_contains_cache);
/* Simple per-ref filtering */
if (!filter->kind)
die("filter_refs: invalid type");
else {
/*
* For common cases where we need only branches or remotes or tags,
* we only iterate through those refs. If a mix of refs is needed,
* we iterate over all refs and filter out required refs with the help
* of filter_ref_kind().
*/
if (filter->kind == FILTER_REFS_BRANCHES)
ret = refs_for_each_fullref_in(get_main_ref_store(the_repository),
"refs/heads/", NULL,
fn, cb_data);
else if (filter->kind == FILTER_REFS_REMOTES)
ret = refs_for_each_fullref_in(get_main_ref_store(the_repository),
"refs/remotes/", NULL,
fn, cb_data);
else if (filter->kind == FILTER_REFS_TAGS)
ret = refs_for_each_fullref_in(get_main_ref_store(the_repository),
"refs/tags/", NULL, fn,
cb_data);
else if (filter->kind & FILTER_REFS_REGULAR)
ret = for_each_fullref_in_pattern(filter, fn, cb_data);
/*
* When printing all ref types, HEAD is already included,
* so we don't want to print HEAD again.
*/
if (!ret && !(filter->kind & FILTER_REFS_ROOT_REFS) &&
(filter->kind & FILTER_REFS_DETACHED_HEAD))
refs_head_ref(get_main_ref_store(the_repository), fn,
cb_data);
}
clear_contains_cache(&filter->internal.contains_cache);
clear_contains_cache(&filter->internal.no_contains_cache);
return ret;
}
/*
* API for filtering a set of refs. Based on the type of refs the user
* has requested, we iterate through those refs and apply filters
* as per the given ref_filter structure and finally store the
* filtered refs in the ref_array structure.
*/
int filter_refs(struct ref_array *array, struct ref_filter *filter, unsigned int type)
{
struct ref_filter_cbdata ref_cbdata;
int save_commit_buffer_orig;
int ret = 0;
ref_cbdata.array = array;
ref_cbdata.filter = filter;
save_commit_buffer_orig = save_commit_buffer;
save_commit_buffer = 0;
ret = do_filter_refs(filter, type, filter_one, &ref_cbdata);
/* Filters that need revision walking */
reach_filter(array, &filter->reachable_from, INCLUDE_REACHED);
reach_filter(array, &filter->unreachable_from, EXCLUDE_REACHED);
ref-filter: disable save_commit_buffer while traversing Various ref-filter options like "--contains" or "--merged" may cause us to traverse large segments of the history graph. It's counter-productive to have save_commit_buffer turned on, as that will instruct the commit code to cache in-memory the object contents for each commit we traverse. This increases the amount of heap memory used while providing little or no benefit, since we're not actually planning to display those commits (which is the usual reason that tools like git-log want to keep them around). We can easily disable this feature while ref-filter is running. This lowers peak heap (as measured by massif) for running: git tag --contains 1da177e4c3 in linux.git from ~100MB to ~20MB. It also seems to improve runtime by 4-5% (600ms vs 630ms). A few points to note: - it should be safe to temporarily disable save_commit_buffer like this. The saved buffers are accessed through get_commit_buffer(), which treats the saved ones like a cache, and loads on-demand from the object database on a cache miss. So any code that was using this would not be wrong, it might just incur an extra object lookup for some objects. But... - I don't think any ref-filter related code is using the cache. While it's true that an option like "--format=%(*contents:subject)" or "--sort=*authordate" will need to look at the commit contents, ref-filter doesn't use get_commit_buffer() to do so! It always reads the objects directly via read_object_file(), though it does avoid re-reading objects if the format can be satisfied without them. Timing "git tag --format=%(*authordate)" shows that we're the same before and after, as expected. - Note that all of this assumes you don't have a commit-graph file. if you do, then the heap usage is even lower, and the runtime is 10x faster. So in that sense this is not urgent, as there's a much better solution. But since it's such an obvious and easy win for fallback cases (including commits which aren't yet in the graph file), there's no reason not to. Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2022-07-11 22:48:06 +08:00
save_commit_buffer = save_commit_buffer_orig;
return ret;
}
static inline int can_do_iterative_format(struct ref_filter *filter,
struct ref_sorting *sorting,
struct ref_format *format)
{
/*
* Filtering & formatting results within a single ref iteration
* callback is not compatible with options that require
* post-processing a filtered ref_array. These include:
* - filtering on reachability
* - sorting the filtered results
* - including ahead-behind information in the formatted output
*/
return !(filter->reachable_from ||
filter->unreachable_from ||
sorting ||
for-each-ref: add 'is-base' token The previous change introduced the get_branch_base_for_tip() method in commit-reach.c. The motivation of that change was about using a heuristic to deteremine the base branch for a source commit from a list of candidate commit tips. This change makes that algorithm visible to users via a new atom in the 'git for-each-ref' format. This change is very similar to the chang in 49abcd21da6 (for-each-ref: add ahead-behind format atom, 2023-03-20). Introduce the 'is-base:<source>' atom, which will indicate that the algorithm should be computed and the result of the algorithm is reported using an indicator of the form '(<source>)'. For example, using '%(is-base:HEAD)' would result in one line having the token '(HEAD)'. Use the sorted order of refs included in the ref filter to break ties in the algorithm's heuristic. In the previous change, the motivating examples include using an L0 trunk, long-lived L1 branches, and temporary release branches. A caller could communicate the ordered preference among these categories using the input refpecs and avoiding a different sort mechanism. This sorting behavior is tested in the test scripts. It is important to include this atom as a special case to can_do_iterative_format() to match the expectations created in bd98f9774e1 (ref-filter.c: filter & format refs in the same callback, 2023-11-14). The ahead-behind atom was one of the special cases, and this similarly requires using an algorithm across all input refs before starting the format of any single ref. In the test script, the format tokens use colons or lack whitespace to avoid Git complaining about trailing whitespace errors. Signed-off-by: Derrick Stolee <stolee@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2024-08-14 18:31:29 +08:00
format->bases.nr ||
format->is_base_tips.nr);
}
void filter_and_format_refs(struct ref_filter *filter, unsigned int type,
struct ref_sorting *sorting,
struct ref_format *format)
{
if (can_do_iterative_format(filter, sorting, format)) {
int save_commit_buffer_orig;
struct ref_filter_and_format_cbdata ref_cbdata = {
.filter = filter,
.format = format,
};
save_commit_buffer_orig = save_commit_buffer;
save_commit_buffer = 0;
do_filter_refs(filter, type, filter_and_format_one, &ref_cbdata);
save_commit_buffer = save_commit_buffer_orig;
} else {
struct ref_array array = { 0 };
filter_refs(&array, filter, type);
filter_ahead_behind(the_repository, format, &array);
for-each-ref: add 'is-base' token The previous change introduced the get_branch_base_for_tip() method in commit-reach.c. The motivation of that change was about using a heuristic to deteremine the base branch for a source commit from a list of candidate commit tips. This change makes that algorithm visible to users via a new atom in the 'git for-each-ref' format. This change is very similar to the chang in 49abcd21da6 (for-each-ref: add ahead-behind format atom, 2023-03-20). Introduce the 'is-base:<source>' atom, which will indicate that the algorithm should be computed and the result of the algorithm is reported using an indicator of the form '(<source>)'. For example, using '%(is-base:HEAD)' would result in one line having the token '(HEAD)'. Use the sorted order of refs included in the ref filter to break ties in the algorithm's heuristic. In the previous change, the motivating examples include using an L0 trunk, long-lived L1 branches, and temporary release branches. A caller could communicate the ordered preference among these categories using the input refpecs and avoiding a different sort mechanism. This sorting behavior is tested in the test scripts. It is important to include this atom as a special case to can_do_iterative_format() to match the expectations created in bd98f9774e1 (ref-filter.c: filter & format refs in the same callback, 2023-11-14). The ahead-behind atom was one of the special cases, and this similarly requires using an algorithm across all input refs before starting the format of any single ref. In the test script, the format tokens use colons or lack whitespace to avoid Git complaining about trailing whitespace errors. Signed-off-by: Derrick Stolee <stolee@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2024-08-14 18:31:29 +08:00
filter_is_base(the_repository, format, &array);
ref_array_sort(sorting, &array);
print_formatted_ref_array(&array, format);
ref_array_clear(&array);
}
}
branch: sort detached HEAD based on a flag Change the ref-filter sorting of detached HEAD to check the FILTER_REFS_DETACHED_HEAD flag, instead of relying on the ref description filled-in by get_head_description() to start with "(", which in turn we expect to ASCII-sort before any other reference. For context, we'd like the detached line to appear first at the start of "git branch -l", e.g.: $ git branch -l * (HEAD detached at <hash>) master This doesn't change that, but improves on a fix made in 28438e84e04 (ref-filter: sort detached HEAD lines firstly, 2019-06-18) and gives the Chinese translation the ability to use its preferred punctuation marks again. In Chinese the fullwidth versions of punctuation like "()" are typically written as (U+FF08 fullwidth left parenthesis), (U+FF09 fullwidth right parenthesis) instead[1]. This form is used in both po/zh_{CN,TW}.po in most cases where "()" is translated in a string. Aside from that improvement to the Chinese translation, it also just makes for cleaner code that we mark any special cases in the ref_array we're sorting with flags and make the sort function aware of them, instead of piggy-backing on the general-case of strcmp() doing the right thing. As seen in the amended tests this made reverse sorting a bit more consistent. Before this we'd sometimes sort this message in the middle, now it's consistently at the beginning or end, depending on whether we're doing a normal or reverse sort. Having it at the end doesn't make much sense either, but at least it behaves consistently now. A follow-up commit will make this behavior under reverse sorting even better. I'm removing the "TRANSLATORS" comments that were in the old code while I'm at it. Those were added in d4919bb288e (ref-filter: move get_head_description() from branch.c, 2017-01-10). I think it's obvious from context, string and translation memory in typical translation tools that these are the same or similar string. 1. https://en.wikipedia.org/wiki/Chinese_punctuation#Marks_similar_to_European_punctuation Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2021-01-07 17:51:52 +08:00
static int compare_detached_head(struct ref_array_item *a, struct ref_array_item *b)
{
if (!(a->kind ^ b->kind))
BUG("ref_kind_from_refname() should only mark one ref as HEAD");
if (a->kind & FILTER_REFS_DETACHED_HEAD)
return -1;
else if (b->kind & FILTER_REFS_DETACHED_HEAD)
return 1;
BUG("should have died in the xor check above");
return 0;
}
ref-filter: add %(raw) atom Add new formatting option `%(raw)`, which will print the raw object data without any changes. It will help further to migrate all cat-file formatting logic from cat-file to ref-filter. The raw data of blob, tree objects may contain '\0', but most of the logic in `ref-filter` depends on the output of the atom being text (specifically, no embedded NULs in it). E.g. `quote_formatting()` use `strbuf_addstr()` or `*._quote_buf()` add the data to the buffer. The raw data of a tree object is `100644 one\0...`, only the `100644 one` will be added to the buffer, which is incorrect. Therefore, we need to find a way to record the length of the atom_value's member `s`. Although strbuf can already record the string and its length, if we want to replace the type of atom_value's member `s` with strbuf, many places in ref-filter that are filled with dynamically allocated mermory in `v->s` are not easy to replace. At the same time, we need to check if `v->s == NULL` in populate_value(), and strbuf cannot easily distinguish NULL and empty strings, but c-style "const char *" can do it. So add a new member in `struct atom_value`: `s_size`, which can record raw object size, it can help us add raw object data to the buffer or compare two buffers which contain raw object data. Note that `--format=%(raw)` cannot be used with `--python`, `--shell`, `--tcl`, and `--perl` because if the binary raw data is passed to a variable in such languages, these may not support arbitrary binary data in their string variable type. Reviewed-by: Jacob Keller <jacob.keller@gmail.com> Mentored-by: Christian Couder <christian.couder@gmail.com> Mentored-by: Hariom Verma <hariom18599@gmail.com> Helped-by: Bagas Sanjaya <bagasdotme@gmail.com> Helped-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com> Helped-by: Felipe Contreras <felipe.contreras@gmail.com> Helped-by: Phillip Wood <phillip.wood@dunelm.org.uk> Helped-by: Junio C Hamano <gitster@pobox.com> Based-on-patch-by: Olga Telezhnaya <olyatelezhnaya@gmail.com> Signed-off-by: ZheNing Hu <adlternative@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2021-07-26 11:26:47 +08:00
static int memcasecmp(const void *vs1, const void *vs2, size_t n)
{
const char *s1 = vs1, *s2 = vs2;
const char *end = s1 + n;
for (; s1 < end; s1++, s2++) {
int diff = tolower(*s1) - tolower(*s2);
if (diff)
return diff;
}
return 0;
}
for-each-ref: delay parsing of --sort=<atom> options The for-each-ref family of commands invoke parsers immediately when it sees each --sort=<atom> option, and die before even seeing the other options on the command line when the <atom> is unrecognised. Instead, accumulate them in a string list, and have them parsed into a ref_sorting structure after the command line parsing is done. As a consequence, "git branch --sort=bogus -h" used to fail to give the brief help, which arguably may have been a feature, now does so, which is more consistent with how other options work. The patch is smaller than the actual extent of the "damage" to the codebase, thanks to the fact that the original code consistently used OPT_REF_SORT() macro to handle command line options. We only needed to replace the variable used for the list, and implementation of the callback function used in the macro. The old rule was for the users of the API to: - Declare ref_sorting and ref_sorting_tail variables; - OPT_REF_SORT() macro will instantiate ref_sorting instance (which may barf and die) and append it to the tail; - Append to the tail each ref_sorting read from the configuration by parsing in the config callback (which may barf and die); - See if ref_sorting is null and use ref_sorting_default() instead. Now the rule is not all that different but is simpler: - Declare ref_sorting_options string list. - OPT_REF_SORT() macro will append it to the string list; - Append to the string list the sort key read from the configuration; - call ref_sorting_options() to turn the string list to ref_sorting structure (which also deals with the default value). As side effects, this change also cleans up a few issues: - 95be717c (parse_opt_ref_sorting: always use with NONEG flag, 2019-03-20) muses that "git for-each-ref --no-sort" should simply clear the sort keys accumulated so far; it now does. - The implementation detail of "struct ref_sorting" and the helper function parse_ref_sorting() can now be private to the ref-filter API implementation. - If you set branch.sort to a bogus value, the any "git branch" invocation, not only the listing mode, would abort with the original code; now it doesn't Signed-off-by: Junio C Hamano <gitster@pobox.com>
2021-10-21 03:23:53 +08:00
struct ref_sorting {
struct ref_sorting *next;
int atom; /* index into used_atom array (internal) */
enum ref_sorting_order sort_flags;
};
static int cmp_ref_sorting(struct ref_sorting *s, struct ref_array_item *a, struct ref_array_item *b)
{
struct atom_value *va, *vb;
int cmp;
int cmp_detached_head = 0;
cmp_type cmp_type = used_atom[s->atom].type;
struct strbuf err = STRBUF_INIT;
if (get_ref_atom_value(a, s->atom, &va, &err))
die("%s", err.buf);
if (get_ref_atom_value(b, s->atom, &vb, &err))
die("%s", err.buf);
strbuf_release(&err);
branch: sort detached HEAD based on a flag Change the ref-filter sorting of detached HEAD to check the FILTER_REFS_DETACHED_HEAD flag, instead of relying on the ref description filled-in by get_head_description() to start with "(", which in turn we expect to ASCII-sort before any other reference. For context, we'd like the detached line to appear first at the start of "git branch -l", e.g.: $ git branch -l * (HEAD detached at <hash>) master This doesn't change that, but improves on a fix made in 28438e84e04 (ref-filter: sort detached HEAD lines firstly, 2019-06-18) and gives the Chinese translation the ability to use its preferred punctuation marks again. In Chinese the fullwidth versions of punctuation like "()" are typically written as (U+FF08 fullwidth left parenthesis), (U+FF09 fullwidth right parenthesis) instead[1]. This form is used in both po/zh_{CN,TW}.po in most cases where "()" is translated in a string. Aside from that improvement to the Chinese translation, it also just makes for cleaner code that we mark any special cases in the ref_array we're sorting with flags and make the sort function aware of them, instead of piggy-backing on the general-case of strcmp() doing the right thing. As seen in the amended tests this made reverse sorting a bit more consistent. Before this we'd sometimes sort this message in the middle, now it's consistently at the beginning or end, depending on whether we're doing a normal or reverse sort. Having it at the end doesn't make much sense either, but at least it behaves consistently now. A follow-up commit will make this behavior under reverse sorting even better. I'm removing the "TRANSLATORS" comments that were in the old code while I'm at it. Those were added in d4919bb288e (ref-filter: move get_head_description() from branch.c, 2017-01-10). I think it's obvious from context, string and translation memory in typical translation tools that these are the same or similar string. 1. https://en.wikipedia.org/wiki/Chinese_punctuation#Marks_similar_to_European_punctuation Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2021-01-07 17:51:52 +08:00
if (s->sort_flags & REF_SORTING_DETACHED_HEAD_FIRST &&
((a->kind | b->kind) & FILTER_REFS_DETACHED_HEAD)) {
cmp = compare_detached_head(a, b);
cmp_detached_head = 1;
branch: sort detached HEAD based on a flag Change the ref-filter sorting of detached HEAD to check the FILTER_REFS_DETACHED_HEAD flag, instead of relying on the ref description filled-in by get_head_description() to start with "(", which in turn we expect to ASCII-sort before any other reference. For context, we'd like the detached line to appear first at the start of "git branch -l", e.g.: $ git branch -l * (HEAD detached at <hash>) master This doesn't change that, but improves on a fix made in 28438e84e04 (ref-filter: sort detached HEAD lines firstly, 2019-06-18) and gives the Chinese translation the ability to use its preferred punctuation marks again. In Chinese the fullwidth versions of punctuation like "()" are typically written as (U+FF08 fullwidth left parenthesis), (U+FF09 fullwidth right parenthesis) instead[1]. This form is used in both po/zh_{CN,TW}.po in most cases where "()" is translated in a string. Aside from that improvement to the Chinese translation, it also just makes for cleaner code that we mark any special cases in the ref_array we're sorting with flags and make the sort function aware of them, instead of piggy-backing on the general-case of strcmp() doing the right thing. As seen in the amended tests this made reverse sorting a bit more consistent. Before this we'd sometimes sort this message in the middle, now it's consistently at the beginning or end, depending on whether we're doing a normal or reverse sort. Having it at the end doesn't make much sense either, but at least it behaves consistently now. A follow-up commit will make this behavior under reverse sorting even better. I'm removing the "TRANSLATORS" comments that were in the old code while I'm at it. Those were added in d4919bb288e (ref-filter: move get_head_description() from branch.c, 2017-01-10). I think it's obvious from context, string and translation memory in typical translation tools that these are the same or similar string. 1. https://en.wikipedia.org/wiki/Chinese_punctuation#Marks_similar_to_European_punctuation Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2021-01-07 17:51:52 +08:00
} else if (s->sort_flags & REF_SORTING_VERSION) {
cmp = versioncmp(va->s, vb->s);
} else if (cmp_type == FIELD_STR) {
ref-filter: add %(raw) atom Add new formatting option `%(raw)`, which will print the raw object data without any changes. It will help further to migrate all cat-file formatting logic from cat-file to ref-filter. The raw data of blob, tree objects may contain '\0', but most of the logic in `ref-filter` depends on the output of the atom being text (specifically, no embedded NULs in it). E.g. `quote_formatting()` use `strbuf_addstr()` or `*._quote_buf()` add the data to the buffer. The raw data of a tree object is `100644 one\0...`, only the `100644 one` will be added to the buffer, which is incorrect. Therefore, we need to find a way to record the length of the atom_value's member `s`. Although strbuf can already record the string and its length, if we want to replace the type of atom_value's member `s` with strbuf, many places in ref-filter that are filled with dynamically allocated mermory in `v->s` are not easy to replace. At the same time, we need to check if `v->s == NULL` in populate_value(), and strbuf cannot easily distinguish NULL and empty strings, but c-style "const char *" can do it. So add a new member in `struct atom_value`: `s_size`, which can record raw object size, it can help us add raw object data to the buffer or compare two buffers which contain raw object data. Note that `--format=%(raw)` cannot be used with `--python`, `--shell`, `--tcl`, and `--perl` because if the binary raw data is passed to a variable in such languages, these may not support arbitrary binary data in their string variable type. Reviewed-by: Jacob Keller <jacob.keller@gmail.com> Mentored-by: Christian Couder <christian.couder@gmail.com> Mentored-by: Hariom Verma <hariom18599@gmail.com> Helped-by: Bagas Sanjaya <bagasdotme@gmail.com> Helped-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com> Helped-by: Felipe Contreras <felipe.contreras@gmail.com> Helped-by: Phillip Wood <phillip.wood@dunelm.org.uk> Helped-by: Junio C Hamano <gitster@pobox.com> Based-on-patch-by: Olga Telezhnaya <olyatelezhnaya@gmail.com> Signed-off-by: ZheNing Hu <adlternative@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2021-07-26 11:26:47 +08:00
if (va->s_size < 0 && vb->s_size < 0) {
int (*cmp_fn)(const char *, const char *);
cmp_fn = s->sort_flags & REF_SORTING_ICASE
? strcasecmp : strcmp;
cmp = cmp_fn(va->s, vb->s);
} else {
size_t a_size = va->s_size < 0 ?
strlen(va->s) : va->s_size;
size_t b_size = vb->s_size < 0 ?
strlen(vb->s) : vb->s_size;
int (*cmp_fn)(const void *, const void *, size_t);
cmp_fn = s->sort_flags & REF_SORTING_ICASE
? memcasecmp : memcmp;
cmp = cmp_fn(va->s, vb->s, b_size > a_size ?
a_size : b_size);
if (!cmp) {
if (a_size > b_size)
cmp = 1;
else if (a_size < b_size)
cmp = -1;
}
}
} else {
if (va->value < vb->value)
cmp = -1;
else if (va->value == vb->value)
ref-filter: apply fallback refname sort only after all user sorts Commit 9e468334b4 (ref-filter: fallback on alphabetical comparison, 2015-10-30) taught ref-filter's sort to fallback to comparing refnames. But it did it at the wrong level, overriding the comparison result for a single "--sort" key from the user, rather than after all sort keys have been exhausted. This worked correctly for a single "--sort" option, but not for multiple ones. We'd break any ties in the first key with the refname and never evaluate the second key at all. To make matters even more interesting, we only applied this fallback sometimes! For a field like "taggeremail" which requires a string comparison, we'd truly return the result of strcmp(), even if it was 0. But for numerical "value" fields like "taggerdate", we did apply the fallback. And that's why our multiple-sort test missed this: it uses taggeremail as the main comparison. So let's start by adding a much more rigorous test. We'll have a set of commits expressing every combination of two tagger emails, dates, and refnames. Then we can confirm that our sort is applied with the correct precedence, and we'll be hitting both the string and value comparators. That does show the bug, and the fix is simple: moving the fallback to the outer compare_refs() function, after all ref_sorting keys have been exhausted. Note that in the outer function we don't have an "ignore_case" flag, as it's part of each individual ref_sorting element. It's debatable what such a fallback should do, since we didn't use the user's keys to match. But until now we have been trying to respect that flag, so the least-invasive thing is to try to continue to do so. Since all callers in the current code either set the flag for all keys or for none, we can just pull the flag from the first key. In a hypothetical world where the user really can flip the case-insensitivity of keys separately, we may want to extend the code to distinguish that case from a blanket "--ignore-case". Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2020-05-03 17:13:09 +08:00
cmp = 0;
else
cmp = 1;
}
return (s->sort_flags & REF_SORTING_REVERSE && !cmp_detached_head)
? -cmp : cmp;
}
static int compare_refs(const void *a_, const void *b_, void *ref_sorting)
{
struct ref_array_item *a = *((struct ref_array_item **)a_);
struct ref_array_item *b = *((struct ref_array_item **)b_);
struct ref_sorting *s;
for (s = ref_sorting; s; s = s->next) {
int cmp = cmp_ref_sorting(s, a, b);
if (cmp)
return cmp;
}
ref-filter: apply fallback refname sort only after all user sorts Commit 9e468334b4 (ref-filter: fallback on alphabetical comparison, 2015-10-30) taught ref-filter's sort to fallback to comparing refnames. But it did it at the wrong level, overriding the comparison result for a single "--sort" key from the user, rather than after all sort keys have been exhausted. This worked correctly for a single "--sort" option, but not for multiple ones. We'd break any ties in the first key with the refname and never evaluate the second key at all. To make matters even more interesting, we only applied this fallback sometimes! For a field like "taggeremail" which requires a string comparison, we'd truly return the result of strcmp(), even if it was 0. But for numerical "value" fields like "taggerdate", we did apply the fallback. And that's why our multiple-sort test missed this: it uses taggeremail as the main comparison. So let's start by adding a much more rigorous test. We'll have a set of commits expressing every combination of two tagger emails, dates, and refnames. Then we can confirm that our sort is applied with the correct precedence, and we'll be hitting both the string and value comparators. That does show the bug, and the fix is simple: moving the fallback to the outer compare_refs() function, after all ref_sorting keys have been exhausted. Note that in the outer function we don't have an "ignore_case" flag, as it's part of each individual ref_sorting element. It's debatable what such a fallback should do, since we didn't use the user's keys to match. But until now we have been trying to respect that flag, so the least-invasive thing is to try to continue to do so. Since all callers in the current code either set the flag for all keys or for none, we can just pull the flag from the first key. In a hypothetical world where the user really can flip the case-insensitivity of keys separately, we may want to extend the code to distinguish that case from a blanket "--ignore-case". Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2020-05-03 17:13:09 +08:00
s = ref_sorting;
return s && s->sort_flags & REF_SORTING_ICASE ?
ref-filter: apply fallback refname sort only after all user sorts Commit 9e468334b4 (ref-filter: fallback on alphabetical comparison, 2015-10-30) taught ref-filter's sort to fallback to comparing refnames. But it did it at the wrong level, overriding the comparison result for a single "--sort" key from the user, rather than after all sort keys have been exhausted. This worked correctly for a single "--sort" option, but not for multiple ones. We'd break any ties in the first key with the refname and never evaluate the second key at all. To make matters even more interesting, we only applied this fallback sometimes! For a field like "taggeremail" which requires a string comparison, we'd truly return the result of strcmp(), even if it was 0. But for numerical "value" fields like "taggerdate", we did apply the fallback. And that's why our multiple-sort test missed this: it uses taggeremail as the main comparison. So let's start by adding a much more rigorous test. We'll have a set of commits expressing every combination of two tagger emails, dates, and refnames. Then we can confirm that our sort is applied with the correct precedence, and we'll be hitting both the string and value comparators. That does show the bug, and the fix is simple: moving the fallback to the outer compare_refs() function, after all ref_sorting keys have been exhausted. Note that in the outer function we don't have an "ignore_case" flag, as it's part of each individual ref_sorting element. It's debatable what such a fallback should do, since we didn't use the user's keys to match. But until now we have been trying to respect that flag, so the least-invasive thing is to try to continue to do so. Since all callers in the current code either set the flag for all keys or for none, we can just pull the flag from the first key. In a hypothetical world where the user really can flip the case-insensitivity of keys separately, we may want to extend the code to distinguish that case from a blanket "--ignore-case". Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2020-05-03 17:13:09 +08:00
strcasecmp(a->refname, b->refname) :
strcmp(a->refname, b->refname);
}
void ref_sorting_set_sort_flags_all(struct ref_sorting *sorting,
unsigned int mask, int on)
{
for (; sorting; sorting = sorting->next) {
if (on)
sorting->sort_flags |= mask;
else
sorting->sort_flags &= ~mask;
}
}
void ref_array_sort(struct ref_sorting *sorting, struct ref_array *array)
{
ref-filter.c: really don't sort when using --no-sort When '--no-sort' is passed to 'for-each-ref', 'tag', and 'branch', the printed refs are still sorted by ascending refname. Change the handling of sort options in these commands so that '--no-sort' to truly disables sorting. '--no-sort' does not disable sorting in these commands is because their option parsing does not distinguish between "the absence of '--sort'" (and/or values for tag.sort & branch.sort) and '--no-sort'. Both result in an empty 'sorting_options' string list, which is parsed by 'ref_sorting_options()' to create the 'struct ref_sorting *' for the command. If the string list is empty, 'ref_sorting_options()' interprets that as "the absence of '--sort'" and returns the default ref sorting structure (equivalent to "refname" sort). To handle '--no-sort' properly while preserving the "refname" sort in the "absence of --sort'" case, first explicitly add "refname" to the string list *before* parsing options. This alone doesn't actually change any behavior, since 'compare_refs()' already falls back on comparing refnames if two refs are equal w.r.t all other sort keys. Now that the string list is populated by default, '--no-sort' is the only way to empty the 'sorting_options' string list. Update 'ref_sorting_options()' to return a NULL 'struct ref_sorting *' if the string list is empty, and add a condition to 'ref_array_sort()' to skip the sort altogether if the sort structure is NULL. Note that other functions using 'struct ref_sorting *' do not need any changes because they already ignore NULL values. Finally, remove the condition around sorting in 'ls-remote', since it's no longer necessary. Unlike 'for-each-ref' et. al., it does *not* do any sorting by default. This default is preserved by simply leaving its sort key string list empty before parsing options; if no additional sort keys are set, 'struct ref_sorting *' is NULL and sorting is skipped. Signed-off-by: Victoria Dye <vdye@github.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2023-11-15 03:53:49 +08:00
if (sorting)
QSORT_S(array->items, array->nr, compare_refs, sorting);
}
static void append_literal(const char *cp, const char *ep, struct ref_formatting_state *state)
{
struct strbuf *s = &state->stack->output;
while (*cp && (!ep || cp < ep)) {
if (*cp == '%') {
if (cp[1] == '%')
cp++;
else {
int ch = hex2chr(cp + 1);
if (0 <= ch) {
strbuf_addch(s, ch);
cp += 3;
continue;
}
}
}
strbuf_addch(s, *cp);
cp++;
}
}
int format_ref_array_item(struct ref_array_item *info,
struct ref_format *format,
struct strbuf *final_buf,
struct strbuf *error_buf)
{
const char *cp, *sp, *ep;
struct ref_formatting_state state = REF_FORMATTING_STATE_INIT;
state.quote_style = format->quote_style;
push_stack_element(&state.stack);
for (cp = format->format; *cp && (sp = find_next(cp)); cp = ep + 1) {
struct atom_value *atomv;
int pos;
ep = strchr(sp, ')');
if (cp < sp)
append_literal(cp, sp, &state);
pos = parse_ref_filter_atom(format, sp + 2, ep, error_buf);
if (pos < 0 || get_ref_atom_value(info, pos, &atomv, error_buf) ||
atomv->handler(atomv, &state, error_buf)) {
pop_stack_element(&state.stack);
return -1;
}
}
if (*cp) {
sp = cp + strlen(cp);
append_literal(cp, sp, &state);
}
if (format->need_color_reset_at_eol) {
ref-filter: add %(raw) atom Add new formatting option `%(raw)`, which will print the raw object data without any changes. It will help further to migrate all cat-file formatting logic from cat-file to ref-filter. The raw data of blob, tree objects may contain '\0', but most of the logic in `ref-filter` depends on the output of the atom being text (specifically, no embedded NULs in it). E.g. `quote_formatting()` use `strbuf_addstr()` or `*._quote_buf()` add the data to the buffer. The raw data of a tree object is `100644 one\0...`, only the `100644 one` will be added to the buffer, which is incorrect. Therefore, we need to find a way to record the length of the atom_value's member `s`. Although strbuf can already record the string and its length, if we want to replace the type of atom_value's member `s` with strbuf, many places in ref-filter that are filled with dynamically allocated mermory in `v->s` are not easy to replace. At the same time, we need to check if `v->s == NULL` in populate_value(), and strbuf cannot easily distinguish NULL and empty strings, but c-style "const char *" can do it. So add a new member in `struct atom_value`: `s_size`, which can record raw object size, it can help us add raw object data to the buffer or compare two buffers which contain raw object data. Note that `--format=%(raw)` cannot be used with `--python`, `--shell`, `--tcl`, and `--perl` because if the binary raw data is passed to a variable in such languages, these may not support arbitrary binary data in their string variable type. Reviewed-by: Jacob Keller <jacob.keller@gmail.com> Mentored-by: Christian Couder <christian.couder@gmail.com> Mentored-by: Hariom Verma <hariom18599@gmail.com> Helped-by: Bagas Sanjaya <bagasdotme@gmail.com> Helped-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com> Helped-by: Felipe Contreras <felipe.contreras@gmail.com> Helped-by: Phillip Wood <phillip.wood@dunelm.org.uk> Helped-by: Junio C Hamano <gitster@pobox.com> Based-on-patch-by: Olga Telezhnaya <olyatelezhnaya@gmail.com> Signed-off-by: ZheNing Hu <adlternative@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2021-07-26 11:26:47 +08:00
struct atom_value resetv = ATOM_VALUE_INIT;
resetv.s = GIT_COLOR_RESET;
if (append_atom(&resetv, &state, error_buf)) {
pop_stack_element(&state.stack);
return -1;
}
}
if (state.stack->prev) {
pop_stack_element(&state.stack);
return strbuf_addf_ret(error_buf, -1, _("format: %%(end) atom missing"));
}
strbuf_addbuf(final_buf, &state.stack->output);
pop_stack_element(&state.stack);
return 0;
}
void print_formatted_ref_array(struct ref_array *array, struct ref_format *format)
{
int total;
struct strbuf output = STRBUF_INIT, err = STRBUF_INIT;
total = format->array_opts.max_count;
if (!total || array->nr < total)
total = array->nr;
for (int i = 0; i < total; i++) {
strbuf_reset(&err);
strbuf_reset(&output);
if (format_ref_array_item(array->items[i], format, &output, &err))
die("%s", err.buf);
if (output.len || !format->array_opts.omit_empty) {
fwrite(output.buf, 1, output.len, stdout);
putchar('\n');
}
}
strbuf_release(&err);
strbuf_release(&output);
}
void pretty_print_ref(const char *name, const struct object_id *oid,
struct ref_format *format)
{
struct ref_array_item *ref_item;
struct strbuf output = STRBUF_INIT;
struct strbuf err = STRBUF_INIT;
ref_item = new_ref_array_item(name, oid);
ref_item->kind = ref_kind_from_refname(name);
if (format_ref_array_item(ref_item, format, &output, &err))
die("%s", err.buf);
fwrite(output.buf, 1, output.len, stdout);
putchar('\n');
strbuf_release(&err);
strbuf_release(&output);
free_array_item(ref_item);
}
static int parse_sorting_atom(const char *atom)
{
/*
* This parses an atom using a dummy ref_format, since we don't
* actually care about the formatting details.
*/
struct ref_format dummy = REF_FORMAT_INIT;
const char *end = atom + strlen(atom);
struct strbuf err = STRBUF_INIT;
int res = parse_ref_filter_atom(&dummy, atom, end, &err);
if (res < 0)
die("%s", err.buf);
strbuf_release(&err);
return res;
}
for-each-ref: delay parsing of --sort=<atom> options The for-each-ref family of commands invoke parsers immediately when it sees each --sort=<atom> option, and die before even seeing the other options on the command line when the <atom> is unrecognised. Instead, accumulate them in a string list, and have them parsed into a ref_sorting structure after the command line parsing is done. As a consequence, "git branch --sort=bogus -h" used to fail to give the brief help, which arguably may have been a feature, now does so, which is more consistent with how other options work. The patch is smaller than the actual extent of the "damage" to the codebase, thanks to the fact that the original code consistently used OPT_REF_SORT() macro to handle command line options. We only needed to replace the variable used for the list, and implementation of the callback function used in the macro. The old rule was for the users of the API to: - Declare ref_sorting and ref_sorting_tail variables; - OPT_REF_SORT() macro will instantiate ref_sorting instance (which may barf and die) and append it to the tail; - Append to the tail each ref_sorting read from the configuration by parsing in the config callback (which may barf and die); - See if ref_sorting is null and use ref_sorting_default() instead. Now the rule is not all that different but is simpler: - Declare ref_sorting_options string list. - OPT_REF_SORT() macro will append it to the string list; - Append to the string list the sort key read from the configuration; - call ref_sorting_options() to turn the string list to ref_sorting structure (which also deals with the default value). As side effects, this change also cleans up a few issues: - 95be717c (parse_opt_ref_sorting: always use with NONEG flag, 2019-03-20) muses that "git for-each-ref --no-sort" should simply clear the sort keys accumulated so far; it now does. - The implementation detail of "struct ref_sorting" and the helper function parse_ref_sorting() can now be private to the ref-filter API implementation. - If you set branch.sort to a bogus value, the any "git branch" invocation, not only the listing mode, would abort with the original code; now it doesn't Signed-off-by: Junio C Hamano <gitster@pobox.com>
2021-10-21 03:23:53 +08:00
static void parse_ref_sorting(struct ref_sorting **sorting_tail, const char *arg)
{
struct ref_sorting *s;
CALLOC_ARRAY(s, 1);
s->next = *sorting_tail;
*sorting_tail = s;
if (*arg == '-') {
s->sort_flags |= REF_SORTING_REVERSE;
arg++;
}
if (skip_prefix(arg, "version:", &arg) ||
skip_prefix(arg, "v:", &arg))
s->sort_flags |= REF_SORTING_VERSION;
s->atom = parse_sorting_atom(arg);
}
for-each-ref: delay parsing of --sort=<atom> options The for-each-ref family of commands invoke parsers immediately when it sees each --sort=<atom> option, and die before even seeing the other options on the command line when the <atom> is unrecognised. Instead, accumulate them in a string list, and have them parsed into a ref_sorting structure after the command line parsing is done. As a consequence, "git branch --sort=bogus -h" used to fail to give the brief help, which arguably may have been a feature, now does so, which is more consistent with how other options work. The patch is smaller than the actual extent of the "damage" to the codebase, thanks to the fact that the original code consistently used OPT_REF_SORT() macro to handle command line options. We only needed to replace the variable used for the list, and implementation of the callback function used in the macro. The old rule was for the users of the API to: - Declare ref_sorting and ref_sorting_tail variables; - OPT_REF_SORT() macro will instantiate ref_sorting instance (which may barf and die) and append it to the tail; - Append to the tail each ref_sorting read from the configuration by parsing in the config callback (which may barf and die); - See if ref_sorting is null and use ref_sorting_default() instead. Now the rule is not all that different but is simpler: - Declare ref_sorting_options string list. - OPT_REF_SORT() macro will append it to the string list; - Append to the string list the sort key read from the configuration; - call ref_sorting_options() to turn the string list to ref_sorting structure (which also deals with the default value). As side effects, this change also cleans up a few issues: - 95be717c (parse_opt_ref_sorting: always use with NONEG flag, 2019-03-20) muses that "git for-each-ref --no-sort" should simply clear the sort keys accumulated so far; it now does. - The implementation detail of "struct ref_sorting" and the helper function parse_ref_sorting() can now be private to the ref-filter API implementation. - If you set branch.sort to a bogus value, the any "git branch" invocation, not only the listing mode, would abort with the original code; now it doesn't Signed-off-by: Junio C Hamano <gitster@pobox.com>
2021-10-21 03:23:53 +08:00
struct ref_sorting *ref_sorting_options(struct string_list *options)
{
for-each-ref: delay parsing of --sort=<atom> options The for-each-ref family of commands invoke parsers immediately when it sees each --sort=<atom> option, and die before even seeing the other options on the command line when the <atom> is unrecognised. Instead, accumulate them in a string list, and have them parsed into a ref_sorting structure after the command line parsing is done. As a consequence, "git branch --sort=bogus -h" used to fail to give the brief help, which arguably may have been a feature, now does so, which is more consistent with how other options work. The patch is smaller than the actual extent of the "damage" to the codebase, thanks to the fact that the original code consistently used OPT_REF_SORT() macro to handle command line options. We only needed to replace the variable used for the list, and implementation of the callback function used in the macro. The old rule was for the users of the API to: - Declare ref_sorting and ref_sorting_tail variables; - OPT_REF_SORT() macro will instantiate ref_sorting instance (which may barf and die) and append it to the tail; - Append to the tail each ref_sorting read from the configuration by parsing in the config callback (which may barf and die); - See if ref_sorting is null and use ref_sorting_default() instead. Now the rule is not all that different but is simpler: - Declare ref_sorting_options string list. - OPT_REF_SORT() macro will append it to the string list; - Append to the string list the sort key read from the configuration; - call ref_sorting_options() to turn the string list to ref_sorting structure (which also deals with the default value). As side effects, this change also cleans up a few issues: - 95be717c (parse_opt_ref_sorting: always use with NONEG flag, 2019-03-20) muses that "git for-each-ref --no-sort" should simply clear the sort keys accumulated so far; it now does. - The implementation detail of "struct ref_sorting" and the helper function parse_ref_sorting() can now be private to the ref-filter API implementation. - If you set branch.sort to a bogus value, the any "git branch" invocation, not only the listing mode, would abort with the original code; now it doesn't Signed-off-by: Junio C Hamano <gitster@pobox.com>
2021-10-21 03:23:53 +08:00
struct string_list_item *item;
struct ref_sorting *sorting = NULL, **tail = &sorting;
ref-filter.c: really don't sort when using --no-sort When '--no-sort' is passed to 'for-each-ref', 'tag', and 'branch', the printed refs are still sorted by ascending refname. Change the handling of sort options in these commands so that '--no-sort' to truly disables sorting. '--no-sort' does not disable sorting in these commands is because their option parsing does not distinguish between "the absence of '--sort'" (and/or values for tag.sort & branch.sort) and '--no-sort'. Both result in an empty 'sorting_options' string list, which is parsed by 'ref_sorting_options()' to create the 'struct ref_sorting *' for the command. If the string list is empty, 'ref_sorting_options()' interprets that as "the absence of '--sort'" and returns the default ref sorting structure (equivalent to "refname" sort). To handle '--no-sort' properly while preserving the "refname" sort in the "absence of --sort'" case, first explicitly add "refname" to the string list *before* parsing options. This alone doesn't actually change any behavior, since 'compare_refs()' already falls back on comparing refnames if two refs are equal w.r.t all other sort keys. Now that the string list is populated by default, '--no-sort' is the only way to empty the 'sorting_options' string list. Update 'ref_sorting_options()' to return a NULL 'struct ref_sorting *' if the string list is empty, and add a condition to 'ref_array_sort()' to skip the sort altogether if the sort structure is NULL. Note that other functions using 'struct ref_sorting *' do not need any changes because they already ignore NULL values. Finally, remove the condition around sorting in 'ls-remote', since it's no longer necessary. Unlike 'for-each-ref' et. al., it does *not* do any sorting by default. This default is preserved by simply leaving its sort key string list empty before parsing options; if no additional sort keys are set, 'struct ref_sorting *' is NULL and sorting is skipped. Signed-off-by: Victoria Dye <vdye@github.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2023-11-15 03:53:49 +08:00
if (options->nr) {
for-each-ref: delay parsing of --sort=<atom> options The for-each-ref family of commands invoke parsers immediately when it sees each --sort=<atom> option, and die before even seeing the other options on the command line when the <atom> is unrecognised. Instead, accumulate them in a string list, and have them parsed into a ref_sorting structure after the command line parsing is done. As a consequence, "git branch --sort=bogus -h" used to fail to give the brief help, which arguably may have been a feature, now does so, which is more consistent with how other options work. The patch is smaller than the actual extent of the "damage" to the codebase, thanks to the fact that the original code consistently used OPT_REF_SORT() macro to handle command line options. We only needed to replace the variable used for the list, and implementation of the callback function used in the macro. The old rule was for the users of the API to: - Declare ref_sorting and ref_sorting_tail variables; - OPT_REF_SORT() macro will instantiate ref_sorting instance (which may barf and die) and append it to the tail; - Append to the tail each ref_sorting read from the configuration by parsing in the config callback (which may barf and die); - See if ref_sorting is null and use ref_sorting_default() instead. Now the rule is not all that different but is simpler: - Declare ref_sorting_options string list. - OPT_REF_SORT() macro will append it to the string list; - Append to the string list the sort key read from the configuration; - call ref_sorting_options() to turn the string list to ref_sorting structure (which also deals with the default value). As side effects, this change also cleans up a few issues: - 95be717c (parse_opt_ref_sorting: always use with NONEG flag, 2019-03-20) muses that "git for-each-ref --no-sort" should simply clear the sort keys accumulated so far; it now does. - The implementation detail of "struct ref_sorting" and the helper function parse_ref_sorting() can now be private to the ref-filter API implementation. - If you set branch.sort to a bogus value, the any "git branch" invocation, not only the listing mode, would abort with the original code; now it doesn't Signed-off-by: Junio C Hamano <gitster@pobox.com>
2021-10-21 03:23:53 +08:00
for_each_string_list_item(item, options)
parse_ref_sorting(tail, item->string);
}
/*
for-each-ref: delay parsing of --sort=<atom> options The for-each-ref family of commands invoke parsers immediately when it sees each --sort=<atom> option, and die before even seeing the other options on the command line when the <atom> is unrecognised. Instead, accumulate them in a string list, and have them parsed into a ref_sorting structure after the command line parsing is done. As a consequence, "git branch --sort=bogus -h" used to fail to give the brief help, which arguably may have been a feature, now does so, which is more consistent with how other options work. The patch is smaller than the actual extent of the "damage" to the codebase, thanks to the fact that the original code consistently used OPT_REF_SORT() macro to handle command line options. We only needed to replace the variable used for the list, and implementation of the callback function used in the macro. The old rule was for the users of the API to: - Declare ref_sorting and ref_sorting_tail variables; - OPT_REF_SORT() macro will instantiate ref_sorting instance (which may barf and die) and append it to the tail; - Append to the tail each ref_sorting read from the configuration by parsing in the config callback (which may barf and die); - See if ref_sorting is null and use ref_sorting_default() instead. Now the rule is not all that different but is simpler: - Declare ref_sorting_options string list. - OPT_REF_SORT() macro will append it to the string list; - Append to the string list the sort key read from the configuration; - call ref_sorting_options() to turn the string list to ref_sorting structure (which also deals with the default value). As side effects, this change also cleans up a few issues: - 95be717c (parse_opt_ref_sorting: always use with NONEG flag, 2019-03-20) muses that "git for-each-ref --no-sort" should simply clear the sort keys accumulated so far; it now does. - The implementation detail of "struct ref_sorting" and the helper function parse_ref_sorting() can now be private to the ref-filter API implementation. - If you set branch.sort to a bogus value, the any "git branch" invocation, not only the listing mode, would abort with the original code; now it doesn't Signed-off-by: Junio C Hamano <gitster@pobox.com>
2021-10-21 03:23:53 +08:00
* From here on, the ref_sorting list should be used to talk
* about the sort order used for the output. The caller
* should not touch the string form anymore.
*/
for-each-ref: delay parsing of --sort=<atom> options The for-each-ref family of commands invoke parsers immediately when it sees each --sort=<atom> option, and die before even seeing the other options on the command line when the <atom> is unrecognised. Instead, accumulate them in a string list, and have them parsed into a ref_sorting structure after the command line parsing is done. As a consequence, "git branch --sort=bogus -h" used to fail to give the brief help, which arguably may have been a feature, now does so, which is more consistent with how other options work. The patch is smaller than the actual extent of the "damage" to the codebase, thanks to the fact that the original code consistently used OPT_REF_SORT() macro to handle command line options. We only needed to replace the variable used for the list, and implementation of the callback function used in the macro. The old rule was for the users of the API to: - Declare ref_sorting and ref_sorting_tail variables; - OPT_REF_SORT() macro will instantiate ref_sorting instance (which may barf and die) and append it to the tail; - Append to the tail each ref_sorting read from the configuration by parsing in the config callback (which may barf and die); - See if ref_sorting is null and use ref_sorting_default() instead. Now the rule is not all that different but is simpler: - Declare ref_sorting_options string list. - OPT_REF_SORT() macro will append it to the string list; - Append to the string list the sort key read from the configuration; - call ref_sorting_options() to turn the string list to ref_sorting structure (which also deals with the default value). As side effects, this change also cleans up a few issues: - 95be717c (parse_opt_ref_sorting: always use with NONEG flag, 2019-03-20) muses that "git for-each-ref --no-sort" should simply clear the sort keys accumulated so far; it now does. - The implementation detail of "struct ref_sorting" and the helper function parse_ref_sorting() can now be private to the ref-filter API implementation. - If you set branch.sort to a bogus value, the any "git branch" invocation, not only the listing mode, would abort with the original code; now it doesn't Signed-off-by: Junio C Hamano <gitster@pobox.com>
2021-10-21 03:23:53 +08:00
string_list_clear(options, 0);
return sorting;
}
void ref_sorting_release(struct ref_sorting *sorting)
{
while (sorting) {
struct ref_sorting *next = sorting->next;
free(sorting);
sorting = next;
}
}
int parse_opt_merge_filter(const struct option *opt, const char *arg, int unset)
{
struct ref_filter *rf = opt->value;
struct object_id oid;
struct commit *merge_commit;
assert NOARG/NONEG behavior of parse-options callbacks When we define a parse-options callback, the flags we put in the option struct must match what the callback expects. For example, a callback which does not handle the "unset" parameter should only be used with PARSE_OPT_NONEG. But since the callback and the option struct are not defined next to each other, it's easy to get this wrong (as earlier patches in this series show). Fortunately, the compiler can help us here: compiling with -Wunused-parameters can show us which callbacks ignore their "unset" parameters (and likewise, ones that ignore "arg" expect to be triggered with PARSE_OPT_NOARG). But after we've inspected a callback and determined that all of its callers use the right flags, what do we do next? We'd like to silence the compiler warning, but do so in a way that will catch any wrong calls in the future. We can do that by actually checking those variables and asserting that they match our expectations. Because this is such a common pattern, we'll introduce some helper macros. The resulting messages aren't as descriptive as we could make them, but the file/line information from BUG() is enough to identify the problem (and anyway, the point is that these should never be seen). Each of the annotated callbacks in this patch triggers -Wunused-parameters, and was manually inspected to make sure all callers use the correct options (so none of these BUGs should be triggerable). Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2018-11-05 14:45:42 +08:00
BUG_ON_OPT_NEG(unset);
if (repo_get_oid(the_repository, arg, &oid))
die(_("malformed object name %s"), arg);
merge_commit = lookup_commit_reference_gently(the_repository, &oid, 0);
if (!merge_commit)
return error(_("option `%s' must point to a commit"), opt->long_name);
if (starts_with(opt->long_name, "no"))
commit_list_insert(merge_commit, &rf->unreachable_from);
else
commit_list_insert(merge_commit, &rf->reachable_from);
return 0;
}
void ref_filter_init(struct ref_filter *filter)
{
struct ref_filter blank = REF_FILTER_INIT;
memcpy(filter, &blank, sizeof(blank));
}
void ref_filter_clear(struct ref_filter *filter)
{
builtin/for-each-ref.c: add `--exclude` option When using `for-each-ref`, it is sometimes convenient for the caller to be able to exclude certain parts of the references. For example, if there are many `refs/__hidden__/*` references, the caller may want to emit all references *except* the hidden ones. Currently, the only way to do this is to post-process the output, like: $ git for-each-ref --format='%(refname)' | grep -v '^refs/hidden/' Which is do-able, but requires processing a potentially large quantity of references. Teach `git for-each-ref` a new `--exclude=<pattern>` option, which excludes references from the results if they match one or more excluded patterns. This patch provides a naive implementation where the `ref_filter` still sees all references (including ones that it will discard) and is left to check whether each reference matches any excluded pattern(s) before emitting them. By culling out references we know the caller doesn't care about, we can avoid allocating memory for their storage, as well as spending time sorting the output (among other things). Even the naive implementation provides a significant speed-up on a modified copy of linux.git (that has a hidden ref pointing at each commit): $ hyperfine \ 'git.compile for-each-ref --format="%(objectname) %(refname)" | grep -vE "[0-9a-f]{40} refs/pull/"' \ 'git.compile for-each-ref --format="%(objectname) %(refname)" --exclude refs/pull/' Benchmark 1: git.compile for-each-ref --format="%(objectname) %(refname)" | grep -vE "[0-9a-f]{40} refs/pull/" Time (mean ± σ): 820.1 ms ± 2.0 ms [User: 703.7 ms, System: 152.0 ms] Range (min … max): 817.7 ms … 823.3 ms 10 runs Benchmark 2: git.compile for-each-ref --format="%(objectname) %(refname)" --exclude refs/pull/ Time (mean ± σ): 106.6 ms ± 1.1 ms [User: 99.4 ms, System: 7.1 ms] Range (min … max): 104.7 ms … 109.1 ms 27 runs Summary 'git.compile for-each-ref --format="%(objectname) %(refname)" --exclude refs/pull/' ran 7.69 ± 0.08 times faster than 'git.compile for-each-ref --format="%(objectname) %(refname)" | grep -vE "[0-9a-f]{40} refs/pull/"' Subsequent patches will improve on this by avoiding visiting excluded sections of the `packed-refs` file in certain cases. Co-authored-by: Jeff King <peff@peff.net> Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Taylor Blau <me@ttaylorr.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2023-07-11 05:12:19 +08:00
strvec_clear(&filter->exclude);
oid_array_clear(&filter->points_at);
free_commit_list(filter->with_commit);
free_commit_list(filter->no_commit);
free_commit_list(filter->reachable_from);
free_commit_list(filter->unreachable_from);
ref_filter_init(filter);
}