2023-04-23 04:17:23 +08:00
|
|
|
#include "git-compat-util.h"
|
2023-03-21 14:26:03 +08:00
|
|
|
#include "environment.h"
|
2023-03-21 14:25:54 +08:00
|
|
|
#include "gettext.h"
|
2023-07-24 00:19:58 +08:00
|
|
|
#include "config.h"
|
2023-04-23 04:17:26 +08:00
|
|
|
#include "gpg-interface.h"
|
2023-02-24 08:09:27 +08:00
|
|
|
#include "hex.h"
|
2015-06-14 03:37:27 +08:00
|
|
|
#include "parse-options.h"
|
2023-07-24 00:19:59 +08:00
|
|
|
#include "run-command.h"
|
2015-06-14 03:37:27 +08:00
|
|
|
#include "refs.h"
|
|
|
|
#include "wildmatch.h"
|
2023-04-11 15:41:49 +08:00
|
|
|
#include "object-name.h"
|
2023-05-16 14:34:06 +08:00
|
|
|
#include "object-store-ll.h"
|
2023-04-11 11:00:42 +08:00
|
|
|
#include "oid-array.h"
|
2018-06-29 09:21:51 +08:00
|
|
|
#include "repository.h"
|
2015-06-14 03:37:27 +08:00
|
|
|
#include "commit.h"
|
|
|
|
#include "remote.h"
|
|
|
|
#include "color.h"
|
|
|
|
#include "tag.h"
|
|
|
|
#include "quote.h"
|
|
|
|
#include "ref-filter.h"
|
2015-07-08 00:06:12 +08:00
|
|
|
#include "revision.h"
|
2015-09-11 23:03:07 +08:00
|
|
|
#include "utf8.h"
|
2015-09-10 23:48:25 +08:00
|
|
|
#include "version.h"
|
2023-04-23 04:17:17 +08:00
|
|
|
#include "versioncmp.h"
|
2016-11-19 08:58:15 +08:00
|
|
|
#include "trailer.h"
|
2017-01-10 16:49:38 +08:00
|
|
|
#include "wt-status.h"
|
2017-03-09 21:29:49 +08:00
|
|
|
#include "commit-slab.h"
|
2018-05-01 20:47:15 +08:00
|
|
|
#include "commit-graph.h"
|
2018-07-21 00:33:04 +08:00
|
|
|
#include "commit-reach.h"
|
2019-04-29 13:19:42 +08:00
|
|
|
#include "worktree.h"
|
|
|
|
#include "hashmap.h"
|
2020-07-29 04:23:39 +08:00
|
|
|
#include "strvec.h"
|
2015-06-14 03:37:27 +08:00
|
|
|
|
2017-01-10 16:49:50 +08:00
|
|
|
static struct ref_msg {
|
|
|
|
const char *gone;
|
|
|
|
const char *ahead;
|
|
|
|
const char *behind;
|
|
|
|
const char *ahead_behind;
|
|
|
|
} msgs = {
|
|
|
|
/* Untranslated plumbing messages: */
|
|
|
|
"gone",
|
|
|
|
"ahead %d",
|
|
|
|
"behind %d",
|
|
|
|
"ahead %d, behind %d"
|
|
|
|
};
|
|
|
|
|
|
|
|
void setup_ref_filter_porcelain_msg(void)
|
|
|
|
{
|
|
|
|
msgs.gone = _("gone");
|
|
|
|
msgs.ahead = _("ahead %d");
|
|
|
|
msgs.behind = _("behind %d");
|
|
|
|
msgs.ahead_behind = _("ahead %d, behind %d");
|
|
|
|
}
|
2015-06-14 03:37:27 +08:00
|
|
|
|
|
|
|
typedef enum { FIELD_STR, FIELD_ULONG, FIELD_TIME } cmp_type;
|
2017-01-10 16:49:36 +08:00
|
|
|
typedef enum { COMPARE_EQUAL, COMPARE_UNEQUAL, COMPARE_NONE } cmp_status;
|
2018-07-17 16:22:57 +08:00
|
|
|
typedef enum { SOURCE_NONE = 0, SOURCE_OBJ, SOURCE_OTHER } info_source;
|
2015-06-14 03:37:27 +08:00
|
|
|
|
2016-02-18 02:06:15 +08:00
|
|
|
struct align {
|
|
|
|
align_type position;
|
|
|
|
unsigned int width;
|
|
|
|
};
|
|
|
|
|
2017-01-10 16:49:34 +08:00
|
|
|
struct if_then_else {
|
2017-01-10 16:49:36 +08:00
|
|
|
cmp_status cmp_status;
|
|
|
|
const char *str;
|
2017-01-10 16:49:34 +08:00
|
|
|
unsigned int then_atom_seen : 1,
|
|
|
|
else_atom_seen : 1,
|
|
|
|
condition_satisfied : 1;
|
|
|
|
};
|
|
|
|
|
2017-01-10 16:49:43 +08:00
|
|
|
struct refname_atom {
|
2017-01-10 16:49:49 +08:00
|
|
|
enum { R_NORMAL, R_SHORT, R_LSTRIP, R_RSTRIP } option;
|
|
|
|
int lstrip, rstrip;
|
2017-01-10 16:49:43 +08:00
|
|
|
};
|
|
|
|
|
2021-02-13 09:52:43 +08:00
|
|
|
static struct ref_trailer_buf {
|
|
|
|
struct string_list filter_list;
|
|
|
|
struct strbuf sepbuf;
|
|
|
|
struct strbuf kvsepbuf;
|
|
|
|
} ref_trailer_buf = {STRING_LIST_INIT_NODUP, STRBUF_INIT, STRBUF_INIT};
|
|
|
|
|
2018-07-17 16:22:57 +08:00
|
|
|
static struct expand_data {
|
|
|
|
struct object_id oid;
|
|
|
|
enum object_type type;
|
|
|
|
unsigned long size;
|
|
|
|
off_t disk_size;
|
|
|
|
struct object_id delta_base_oid;
|
|
|
|
void *content;
|
|
|
|
|
|
|
|
struct object_info info;
|
|
|
|
} oi, oi_deref;
|
|
|
|
|
2019-04-29 13:19:42 +08:00
|
|
|
struct ref_to_worktree_entry {
|
2019-10-07 07:30:43 +08:00
|
|
|
struct hashmap_entry ent;
|
2019-04-29 13:19:42 +08:00
|
|
|
struct worktree *wt; /* key is wt->head_ref */
|
|
|
|
};
|
|
|
|
|
2022-08-26 01:09:48 +08:00
|
|
|
static int ref_to_worktree_map_cmpfnc(const void *lookupdata UNUSED,
|
2019-10-07 07:30:37 +08:00
|
|
|
const struct hashmap_entry *eptr,
|
|
|
|
const struct hashmap_entry *kptr,
|
2019-04-29 13:19:42 +08:00
|
|
|
const void *keydata_aka_refname)
|
|
|
|
{
|
2019-10-07 07:30:37 +08:00
|
|
|
const struct ref_to_worktree_entry *e, *k;
|
|
|
|
|
|
|
|
e = container_of(eptr, const struct ref_to_worktree_entry, ent);
|
|
|
|
k = container_of(kptr, const struct ref_to_worktree_entry, ent);
|
|
|
|
|
2019-04-29 13:19:42 +08:00
|
|
|
return strcmp(e->wt->head_ref,
|
|
|
|
keydata_aka_refname ? keydata_aka_refname : k->wt->head_ref);
|
|
|
|
}
|
|
|
|
|
|
|
|
static struct ref_to_worktree_map {
|
|
|
|
struct hashmap map;
|
|
|
|
struct worktree **worktrees;
|
|
|
|
} ref_to_worktree_map;
|
|
|
|
|
2021-05-13 23:15:38 +08:00
|
|
|
/*
|
|
|
|
* The enum atom_type is used as the index of valid_atom array.
|
|
|
|
* In the atom parsing stage, it will be passed to used_atom.atom_type
|
|
|
|
* as the identifier of the atom type. We can check the type of used_atom
|
|
|
|
* entry by `if (used_atom[i].atom_type == ATOM_*)`.
|
|
|
|
*/
|
|
|
|
enum atom_type {
|
|
|
|
ATOM_REFNAME,
|
|
|
|
ATOM_OBJECTTYPE,
|
|
|
|
ATOM_OBJECTSIZE,
|
|
|
|
ATOM_OBJECTNAME,
|
|
|
|
ATOM_DELTABASE,
|
|
|
|
ATOM_TREE,
|
|
|
|
ATOM_PARENT,
|
|
|
|
ATOM_NUMPARENT,
|
|
|
|
ATOM_OBJECT,
|
|
|
|
ATOM_TYPE,
|
|
|
|
ATOM_TAG,
|
|
|
|
ATOM_AUTHOR,
|
|
|
|
ATOM_AUTHORNAME,
|
|
|
|
ATOM_AUTHOREMAIL,
|
|
|
|
ATOM_AUTHORDATE,
|
|
|
|
ATOM_COMMITTER,
|
|
|
|
ATOM_COMMITTERNAME,
|
|
|
|
ATOM_COMMITTEREMAIL,
|
|
|
|
ATOM_COMMITTERDATE,
|
|
|
|
ATOM_TAGGER,
|
|
|
|
ATOM_TAGGERNAME,
|
|
|
|
ATOM_TAGGEREMAIL,
|
|
|
|
ATOM_TAGGERDATE,
|
|
|
|
ATOM_CREATOR,
|
|
|
|
ATOM_CREATORDATE,
|
2023-07-24 00:19:59 +08:00
|
|
|
ATOM_DESCRIBE,
|
2021-05-13 23:15:38 +08:00
|
|
|
ATOM_SUBJECT,
|
|
|
|
ATOM_BODY,
|
|
|
|
ATOM_TRAILERS,
|
|
|
|
ATOM_CONTENTS,
|
2023-06-05 02:22:47 +08:00
|
|
|
ATOM_SIGNATURE,
|
ref-filter: add %(raw) atom
Add new formatting option `%(raw)`, which will print the raw
object data without any changes. It will help further to migrate
all cat-file formatting logic from cat-file to ref-filter.
The raw data of blob, tree objects may contain '\0', but most of
the logic in `ref-filter` depends on the output of the atom being
text (specifically, no embedded NULs in it).
E.g. `quote_formatting()` use `strbuf_addstr()` or `*._quote_buf()`
add the data to the buffer. The raw data of a tree object is
`100644 one\0...`, only the `100644 one` will be added to the buffer,
which is incorrect.
Therefore, we need to find a way to record the length of the
atom_value's member `s`. Although strbuf can already record the
string and its length, if we want to replace the type of atom_value's
member `s` with strbuf, many places in ref-filter that are filled
with dynamically allocated mermory in `v->s` are not easy to replace.
At the same time, we need to check if `v->s == NULL` in
populate_value(), and strbuf cannot easily distinguish NULL and empty
strings, but c-style "const char *" can do it. So add a new member in
`struct atom_value`: `s_size`, which can record raw object size, it
can help us add raw object data to the buffer or compare two buffers
which contain raw object data.
Note that `--format=%(raw)` cannot be used with `--python`, `--shell`,
`--tcl`, and `--perl` because if the binary raw data is passed to a
variable in such languages, these may not support arbitrary binary data
in their string variable type.
Reviewed-by: Jacob Keller <jacob.keller@gmail.com>
Mentored-by: Christian Couder <christian.couder@gmail.com>
Mentored-by: Hariom Verma <hariom18599@gmail.com>
Helped-by: Bagas Sanjaya <bagasdotme@gmail.com>
Helped-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Helped-by: Felipe Contreras <felipe.contreras@gmail.com>
Helped-by: Phillip Wood <phillip.wood@dunelm.org.uk>
Helped-by: Junio C Hamano <gitster@pobox.com>
Based-on-patch-by: Olga Telezhnaya <olyatelezhnaya@gmail.com>
Signed-off-by: ZheNing Hu <adlternative@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2021-07-26 11:26:47 +08:00
|
|
|
ATOM_RAW,
|
2021-05-13 23:15:38 +08:00
|
|
|
ATOM_UPSTREAM,
|
|
|
|
ATOM_PUSH,
|
|
|
|
ATOM_SYMREF,
|
|
|
|
ATOM_FLAG,
|
|
|
|
ATOM_HEAD,
|
|
|
|
ATOM_COLOR,
|
|
|
|
ATOM_WORKTREEPATH,
|
|
|
|
ATOM_ALIGN,
|
|
|
|
ATOM_END,
|
|
|
|
ATOM_IF,
|
|
|
|
ATOM_THEN,
|
|
|
|
ATOM_ELSE,
|
2021-07-26 11:26:50 +08:00
|
|
|
ATOM_REST,
|
for-each-ref: add ahead-behind format atom
The previous change implemented the ahead_behind() method, including an
algorithm to compute the ahead/behind values for a number of commit tips
relative to a number of commit bases. Now, integrate that algorithm as
part of 'git for-each-ref' hidden behind a new format atom,
ahead-behind. This naturally extends to 'git branch' and 'git tag'
builtins, as well.
This format allows specifying multiple bases, if so desired, and all
matching references are compared against all of those bases. For this
reason, failing to read a reference provided from these atoms results in
an error.
In order to translate the ahead_behind() method information to the
format output code in ref-filter.c, we must populate arrays of
ahead_behind_count structs. In struct ref_array, we store the full array
that will be passed to ahead_behind(). In struct ref_array_item, we
store an array of pointers that point to the relvant items within the
full array. In this way, we can pull all relevant ahead/behind values
directly when formatting output for a specific item. It also ensures the
lifetime of the ahead_behind_count structs matches the time that the
array is being used.
Add specific tests of the ahead/behind counts in t6600-test-reach.sh, as
it has an interesting repository shape. In particular, its merging
strategy and its use of different commit-graphs would demonstrate over-
counting if the ahead_behind() method did not already account for that
possibility.
Also add tests for the specific for-each-ref, branch, and tag builtins.
In the case of 'git tag', there are intersting cases that happen when
some of the selected tips are not commits. This requires careful logic
around commits_nr in the second loop of filter_ahead_behind(). Also, the
test in t7004 is carefully located to avoid being dependent on the GPG
prereq. It also avoids using the test_commit helper, as that will add
ticks to the time and disrupt the expected timestamps in later tag
tests.
Also add performance tests in a new p1300-graph-walks.sh script. This
will be useful for more uses in the future, but for now compare the
ahead-behind counting algorithm in 'git for-each-ref' to the naive
implementation by running 'git rev-list --count' processes for each
input.
For the Git source code repository, the improvement is already obvious:
Test this tree
---------------------------------------------------------------
1500.2: ahead-behind counts: git for-each-ref 0.07(0.07+0.00)
1500.3: ahead-behind counts: git branch 0.07(0.06+0.00)
1500.4: ahead-behind counts: git tag 0.07(0.06+0.00)
1500.5: ahead-behind counts: git rev-list 1.32(1.04+0.27)
But the standard performance benchmark is the Linux kernel repository,
which demosntrates a significant improvement:
Test this tree
---------------------------------------------------------------
1500.2: ahead-behind counts: git for-each-ref 0.27(0.24+0.02)
1500.3: ahead-behind counts: git branch 0.27(0.24+0.03)
1500.4: ahead-behind counts: git tag 0.28(0.27+0.01)
1500.5: ahead-behind counts: git rev-list 4.57(4.03+0.54)
The 'git rev-list' test exists in this change as a demonstration, but it
will be removed in the next change to avoid wasting time on this
comparison.
Signed-off-by: Derrick Stolee <derrickstolee@github.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2023-03-20 19:26:54 +08:00
|
|
|
ATOM_AHEADBEHIND,
|
2021-05-13 23:15:38 +08:00
|
|
|
};
|
|
|
|
|
2016-02-18 02:06:10 +08:00
|
|
|
/*
|
|
|
|
* An atom is a valid field atom listed below, possibly prefixed with
|
|
|
|
* a "*" to denote deref_tag().
|
|
|
|
*
|
|
|
|
* We parse given format string and sort specifiers, and make a list
|
|
|
|
* of properties that we need to extract out of objects. ref_array_item
|
|
|
|
* structure will hold an array of values extracted that can be
|
|
|
|
* indexed with the "atom number", which is an index into this
|
|
|
|
* array.
|
|
|
|
*/
|
2016-02-18 02:06:11 +08:00
|
|
|
static struct used_atom {
|
2021-05-13 23:15:38 +08:00
|
|
|
enum atom_type atom_type;
|
2016-02-18 02:06:11 +08:00
|
|
|
const char *name;
|
|
|
|
cmp_type type;
|
2018-07-17 16:22:57 +08:00
|
|
|
info_source source;
|
2016-02-18 02:06:13 +08:00
|
|
|
union {
|
|
|
|
char color[COLOR_MAXLEN];
|
2016-02-18 02:06:15 +08:00
|
|
|
struct align align;
|
2017-01-10 16:49:41 +08:00
|
|
|
struct {
|
2017-10-05 20:19:09 +08:00
|
|
|
enum {
|
2017-11-08 00:31:08 +08:00
|
|
|
RR_REF, RR_TRACK, RR_TRACKSHORT, RR_REMOTE_NAME, RR_REMOTE_REF
|
2017-10-05 20:19:09 +08:00
|
|
|
} option;
|
2017-01-10 16:49:45 +08:00
|
|
|
struct refname_atom refname;
|
2017-10-05 20:19:09 +08:00
|
|
|
unsigned int nobracket : 1, push : 1, push_remote : 1;
|
2017-01-10 16:49:41 +08:00
|
|
|
} remote_ref;
|
2016-02-18 02:06:18 +08:00
|
|
|
struct {
|
2020-08-22 05:41:50 +08:00
|
|
|
enum { C_BARE, C_BODY, C_BODY_DEP, C_LENGTH, C_LINES,
|
|
|
|
C_SIG, C_SUB, C_SUB_SANITIZE, C_TRAILERS } option;
|
2017-10-02 13:25:23 +08:00
|
|
|
struct process_trailer_options trailer_opts;
|
2016-02-18 02:06:18 +08:00
|
|
|
unsigned int nlines;
|
|
|
|
} contents;
|
ref-filter: add %(raw) atom
Add new formatting option `%(raw)`, which will print the raw
object data without any changes. It will help further to migrate
all cat-file formatting logic from cat-file to ref-filter.
The raw data of blob, tree objects may contain '\0', but most of
the logic in `ref-filter` depends on the output of the atom being
text (specifically, no embedded NULs in it).
E.g. `quote_formatting()` use `strbuf_addstr()` or `*._quote_buf()`
add the data to the buffer. The raw data of a tree object is
`100644 one\0...`, only the `100644 one` will be added to the buffer,
which is incorrect.
Therefore, we need to find a way to record the length of the
atom_value's member `s`. Although strbuf can already record the
string and its length, if we want to replace the type of atom_value's
member `s` with strbuf, many places in ref-filter that are filled
with dynamically allocated mermory in `v->s` are not easy to replace.
At the same time, we need to check if `v->s == NULL` in
populate_value(), and strbuf cannot easily distinguish NULL and empty
strings, but c-style "const char *" can do it. So add a new member in
`struct atom_value`: `s_size`, which can record raw object size, it
can help us add raw object data to the buffer or compare two buffers
which contain raw object data.
Note that `--format=%(raw)` cannot be used with `--python`, `--shell`,
`--tcl`, and `--perl` because if the binary raw data is passed to a
variable in such languages, these may not support arbitrary binary data
in their string variable type.
Reviewed-by: Jacob Keller <jacob.keller@gmail.com>
Mentored-by: Christian Couder <christian.couder@gmail.com>
Mentored-by: Hariom Verma <hariom18599@gmail.com>
Helped-by: Bagas Sanjaya <bagasdotme@gmail.com>
Helped-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Helped-by: Felipe Contreras <felipe.contreras@gmail.com>
Helped-by: Phillip Wood <phillip.wood@dunelm.org.uk>
Helped-by: Junio C Hamano <gitster@pobox.com>
Based-on-patch-by: Olga Telezhnaya <olyatelezhnaya@gmail.com>
Signed-off-by: ZheNing Hu <adlternative@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2021-07-26 11:26:47 +08:00
|
|
|
struct {
|
|
|
|
enum { RAW_BARE, RAW_LENGTH } option;
|
|
|
|
} raw_data;
|
2017-01-10 16:49:36 +08:00
|
|
|
struct {
|
|
|
|
cmp_status cmp_status;
|
|
|
|
const char *str;
|
|
|
|
} if_then_else;
|
2017-01-10 16:49:37 +08:00
|
|
|
struct {
|
|
|
|
enum { O_FULL, O_LENGTH, O_SHORT } option;
|
|
|
|
unsigned int length;
|
2020-08-22 05:41:46 +08:00
|
|
|
} oid;
|
2021-05-13 23:15:37 +08:00
|
|
|
struct {
|
|
|
|
enum { O_SIZE, O_SIZE_DISK } option;
|
|
|
|
} objectsize;
|
2020-08-22 05:41:43 +08:00
|
|
|
struct email_option {
|
|
|
|
enum { EO_RAW, EO_TRIM, EO_LOCALPART } option;
|
|
|
|
} email_option;
|
2023-06-05 02:22:47 +08:00
|
|
|
struct {
|
|
|
|
enum { S_BARE, S_GRADE, S_SIGNER, S_KEY,
|
|
|
|
S_FINGERPRINT, S_PRI_KEY_FP, S_TRUST_LEVEL } option;
|
|
|
|
} signature;
|
2023-07-24 00:19:59 +08:00
|
|
|
const char **describe_args;
|
2017-01-10 16:49:43 +08:00
|
|
|
struct refname_atom refname;
|
2017-05-19 14:12:12 +08:00
|
|
|
char *head;
|
2016-02-18 02:06:13 +08:00
|
|
|
} u;
|
2016-02-18 02:06:11 +08:00
|
|
|
} *used_atom;
|
2016-02-18 02:06:10 +08:00
|
|
|
static int used_atom_cnt, need_tagged, need_symref;
|
|
|
|
|
2018-03-29 20:49:45 +08:00
|
|
|
/*
|
|
|
|
* Expand string, append it to strbuf *sb, then return error code ret.
|
|
|
|
* Allow to save few lines of code.
|
|
|
|
*/
|
2021-07-13 16:05:18 +08:00
|
|
|
__attribute__((format (printf, 3, 4)))
|
2018-03-29 20:49:45 +08:00
|
|
|
static int strbuf_addf_ret(struct strbuf *sb, int ret, const char *fmt, ...)
|
|
|
|
{
|
|
|
|
va_list ap;
|
|
|
|
va_start(ap, fmt);
|
|
|
|
strbuf_vaddf(sb, fmt, ap);
|
|
|
|
va_end(ap);
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2022-12-15 00:19:43 +08:00
|
|
|
static int err_no_arg(struct strbuf *sb, const char *name)
|
|
|
|
{
|
ref-filter: truncate atom names in error messages
If you pass a bogus argument to %(refname), you may end up with a
message like this:
$ git for-each-ref --format='%(refname:foo)'
fatal: unrecognized %(refname:foo) argument: foo
which is confusing. It should just say:
fatal: unrecognized %(refname) argument: foo
which is clearer, and is consistent with most other atom parsers. Those
other parsers do not have the same problem because they pass the atom
name from a string literal in the parser function. But because the
parser for %(refname) also handles %(upstream) and %(push), it instead
uses atom->name, which includes the arguments. The oid atom parser which
handles %(tree), %(parent), etc suffers from the same problem.
It seems like the cleanest fix would be for atom->name to be _just_ the
name, since there's already a separate "args" field. But since that
field is also used for other things, we can't change it easily (e.g.,
it's how we find things in the used_atoms array, and clearly %(refname)
and %(refname:short) are not the same thing).
Instead, we'll teach our error_bad_arg() function to stop at the first
":". This is a little hacky, as we're effectively re-parsing the name,
but the format is simple enough to do this as a one-liner, and this
localizes the change to the error-reporting code.
We'll give the same treatment to err_no_arg(). None of its callers use
this atom->name trick, but it's worth future-proofing it while we're
here.
Signed-off-by: Jeff King <peff@peff.net>
Acked-by: Taylor Blau <me@ttaylorr.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2022-12-15 00:23:53 +08:00
|
|
|
size_t namelen = strchrnul(name, ':') - name;
|
|
|
|
strbuf_addf(sb, _("%%(%.*s) does not take arguments"),
|
|
|
|
(int)namelen, name);
|
2022-12-15 00:19:43 +08:00
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
2022-12-15 00:20:19 +08:00
|
|
|
static int err_bad_arg(struct strbuf *sb, const char *name, const char *arg)
|
|
|
|
{
|
ref-filter: truncate atom names in error messages
If you pass a bogus argument to %(refname), you may end up with a
message like this:
$ git for-each-ref --format='%(refname:foo)'
fatal: unrecognized %(refname:foo) argument: foo
which is confusing. It should just say:
fatal: unrecognized %(refname) argument: foo
which is clearer, and is consistent with most other atom parsers. Those
other parsers do not have the same problem because they pass the atom
name from a string literal in the parser function. But because the
parser for %(refname) also handles %(upstream) and %(push), it instead
uses atom->name, which includes the arguments. The oid atom parser which
handles %(tree), %(parent), etc suffers from the same problem.
It seems like the cleanest fix would be for atom->name to be _just_ the
name, since there's already a separate "args" field. But since that
field is also used for other things, we can't change it easily (e.g.,
it's how we find things in the used_atoms array, and clearly %(refname)
and %(refname:short) are not the same thing).
Instead, we'll teach our error_bad_arg() function to stop at the first
":". This is a little hacky, as we're effectively re-parsing the name,
but the format is simple enough to do this as a one-liner, and this
localizes the change to the error-reporting code.
We'll give the same treatment to err_no_arg(). None of its callers use
this atom->name trick, but it's worth future-proofing it while we're
here.
Signed-off-by: Jeff King <peff@peff.net>
Acked-by: Taylor Blau <me@ttaylorr.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2022-12-15 00:23:53 +08:00
|
|
|
size_t namelen = strchrnul(name, ':') - name;
|
|
|
|
strbuf_addf(sb, _("unrecognized %%(%.*s) argument: %s"),
|
|
|
|
(int)namelen, name, arg);
|
2022-12-15 00:20:19 +08:00
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
2023-07-24 00:19:58 +08:00
|
|
|
/*
|
|
|
|
* Parse option of name "candidate" in the option string "to_parse" of
|
|
|
|
* the form
|
|
|
|
*
|
|
|
|
* "candidate1[=val1],candidate2[=val2],candidate3[=val3],..."
|
|
|
|
*
|
|
|
|
* The remaining part of "to_parse" is stored in "end" (if we are
|
|
|
|
* parsing the last candidate, then this is NULL) and the value of
|
|
|
|
* the candidate is stored in "valuestart" and its length in "valuelen",
|
|
|
|
* that is the portion after "=". Since it is possible for a "candidate"
|
|
|
|
* to not have a value, in such cases, "valuestart" is set to point to
|
|
|
|
* NULL and "valuelen" to 0.
|
|
|
|
*
|
|
|
|
* The function returns 1 on success. It returns 0 if we don't find
|
|
|
|
* "candidate" in "to_parse" or we find "candidate" but it is followed
|
|
|
|
* by more chars (for example, "candidatefoo"), that is, we don't find
|
|
|
|
* an exact match.
|
|
|
|
*
|
|
|
|
* This function only does the above for one "candidate" at a time. So
|
|
|
|
* it has to be called each time trying to parse a "candidate" in the
|
|
|
|
* option string "to_parse".
|
|
|
|
*/
|
|
|
|
static int match_atom_arg_value(const char *to_parse, const char *candidate,
|
|
|
|
const char **end, const char **valuestart,
|
|
|
|
size_t *valuelen)
|
|
|
|
{
|
|
|
|
const char *atom;
|
|
|
|
|
|
|
|
if (!skip_prefix(to_parse, candidate, &atom))
|
|
|
|
return 0; /* definitely not "candidate" */
|
|
|
|
|
|
|
|
if (*atom == '=') {
|
|
|
|
/* we just saw "candidate=" */
|
|
|
|
*valuestart = atom + 1;
|
|
|
|
atom = strchrnul(*valuestart, ',');
|
|
|
|
*valuelen = atom - *valuestart;
|
|
|
|
} else if (*atom != ',' && *atom != '\0') {
|
|
|
|
/* key begins with "candidate" but has more chars */
|
|
|
|
return 0;
|
|
|
|
} else {
|
|
|
|
/* just "candidate" without "=val" */
|
|
|
|
*valuestart = NULL;
|
|
|
|
*valuelen = 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* atom points at either the ',' or NUL after this key[=val] */
|
|
|
|
if (*atom == ',')
|
|
|
|
atom++;
|
|
|
|
else if (*atom)
|
|
|
|
BUG("Why is *atom not NULL yet?");
|
|
|
|
|
|
|
|
*end = atom;
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Parse boolean option of name "candidate" in the option list "to_parse"
|
|
|
|
* of the form
|
|
|
|
*
|
|
|
|
* "candidate1[=bool1],candidate2[=bool2],candidate3[=bool3],..."
|
|
|
|
*
|
|
|
|
* The remaining part of "to_parse" is stored in "end" (if we are parsing
|
|
|
|
* the last candidate, then this is NULL) and the value (if given) is
|
|
|
|
* parsed and stored in "val", so "val" always points to either 0 or 1.
|
|
|
|
* If the value is not given, then "val" is set to point to 1.
|
|
|
|
*
|
|
|
|
* The boolean value is parsed using "git_parse_maybe_bool()", so the
|
|
|
|
* accepted values are
|
|
|
|
*
|
|
|
|
* to set true - "1", "yes", "true"
|
|
|
|
* to set false - "0", "no", "false"
|
|
|
|
*
|
|
|
|
* This function returns 1 on success. It returns 0 when we don't find
|
|
|
|
* an exact match for "candidate" or when the boolean value given is
|
|
|
|
* not valid.
|
|
|
|
*/
|
|
|
|
static int match_atom_bool_arg(const char *to_parse, const char *candidate,
|
|
|
|
const char **end, int *val)
|
|
|
|
{
|
|
|
|
const char *argval;
|
|
|
|
char *strval;
|
|
|
|
size_t arglen;
|
|
|
|
int v;
|
|
|
|
|
|
|
|
if (!match_atom_arg_value(to_parse, candidate, end, &argval, &arglen))
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
if (!argval) {
|
|
|
|
*val = 1;
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
strval = xstrndup(argval, arglen);
|
|
|
|
v = git_parse_maybe_bool(strval);
|
|
|
|
free(strval);
|
|
|
|
|
|
|
|
if (v == -1)
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
*val = v;
|
|
|
|
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
2021-07-26 11:26:49 +08:00
|
|
|
static int color_atom_parser(struct ref_format *format, struct used_atom *atom,
|
2018-03-29 20:49:45 +08:00
|
|
|
const char *color_value, struct strbuf *err)
|
2016-02-18 02:06:13 +08:00
|
|
|
{
|
|
|
|
if (!color_value)
|
2018-03-29 20:49:45 +08:00
|
|
|
return strbuf_addf_ret(err, -1, _("expected format: %%(color:<color>)"));
|
2016-02-18 02:06:13 +08:00
|
|
|
if (color_parse(color_value, atom->u.color) < 0)
|
2018-03-29 20:49:45 +08:00
|
|
|
return strbuf_addf_ret(err, -1, _("unrecognized color: %%(color:%s)"),
|
|
|
|
color_value);
|
ref-filter: consult want_color() before emitting colors
When color placeholders like %(color:red) are used in a
ref-filter format, we unconditionally output the colors,
even if the user has asked us for no colors. This usually
isn't a problem when the user is constructing a --format on
the command line, but it means we may do the wrong thing
when the format is fed from a script or alias. For example:
$ git config alias.b 'branch --format=%(color:green)%(refname)'
$ git b --no-color
should probably omit the green color. Likewise, running:
$ git b >branches
should probably also omit the color, just as we would for
all baked-in coloring (and as we recently started to do for
user-specified colors in --pretty formats).
This commit makes both of those cases work by teaching
the ref-filter code to consult want_color() before
outputting any color. The color flag in ref_format defaults
to "-1", which means we'll consult color.ui, which in turn
defaults to the usual isatty() check on stdout. However,
callers like git-branch which support their own color config
(and command-line options) can override that.
The new tests independently cover all three of the callers
of ref-filter (for-each-ref, tag, and branch). Even though
these seem redundant, it confirms that we've correctly
plumbed through all of the necessary config to make colors
work by default.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-07-13 23:09:32 +08:00
|
|
|
/*
|
|
|
|
* We check this after we've parsed the color, which lets us complain
|
|
|
|
* about syntactically bogus color names even if they won't be used.
|
|
|
|
*/
|
|
|
|
if (!want_color(format->use_color))
|
|
|
|
color_parse("", atom->u.color);
|
2018-03-29 20:49:45 +08:00
|
|
|
return 0;
|
2016-02-18 02:06:13 +08:00
|
|
|
}
|
|
|
|
|
2018-03-29 20:49:45 +08:00
|
|
|
static int refname_atom_parser_internal(struct refname_atom *atom, const char *arg,
|
|
|
|
const char *name, struct strbuf *err)
|
2016-02-18 02:06:17 +08:00
|
|
|
{
|
|
|
|
if (!arg)
|
2017-01-10 16:49:43 +08:00
|
|
|
atom->option = R_NORMAL;
|
2016-02-18 02:06:17 +08:00
|
|
|
else if (!strcmp(arg, "short"))
|
2017-01-10 16:49:43 +08:00
|
|
|
atom->option = R_SHORT;
|
2017-02-08 03:50:34 +08:00
|
|
|
else if (skip_prefix(arg, "lstrip=", &arg) ||
|
|
|
|
skip_prefix(arg, "strip=", &arg)) {
|
2017-01-10 16:49:46 +08:00
|
|
|
atom->option = R_LSTRIP;
|
2017-01-10 16:49:48 +08:00
|
|
|
if (strtol_i(arg, 10, &atom->lstrip))
|
2018-03-29 20:49:45 +08:00
|
|
|
return strbuf_addf_ret(err, -1, _("Integer value expected refname:lstrip=%s"), arg);
|
2017-01-10 16:49:49 +08:00
|
|
|
} else if (skip_prefix(arg, "rstrip=", &arg)) {
|
|
|
|
atom->option = R_RSTRIP;
|
|
|
|
if (strtol_i(arg, 10, &atom->rstrip))
|
2018-03-29 20:49:45 +08:00
|
|
|
return strbuf_addf_ret(err, -1, _("Integer value expected refname:rstrip=%s"), arg);
|
2017-01-10 16:49:43 +08:00
|
|
|
} else
|
ref-filter: truncate atom names in error messages
If you pass a bogus argument to %(refname), you may end up with a
message like this:
$ git for-each-ref --format='%(refname:foo)'
fatal: unrecognized %(refname:foo) argument: foo
which is confusing. It should just say:
fatal: unrecognized %(refname) argument: foo
which is clearer, and is consistent with most other atom parsers. Those
other parsers do not have the same problem because they pass the atom
name from a string literal in the parser function. But because the
parser for %(refname) also handles %(upstream) and %(push), it instead
uses atom->name, which includes the arguments. The oid atom parser which
handles %(tree), %(parent), etc suffers from the same problem.
It seems like the cleanest fix would be for atom->name to be _just_ the
name, since there's already a separate "args" field. But since that
field is also used for other things, we can't change it easily (e.g.,
it's how we find things in the used_atoms array, and clearly %(refname)
and %(refname:short) are not the same thing).
Instead, we'll teach our error_bad_arg() function to stop at the first
":". This is a little hacky, as we're effectively re-parsing the name,
but the format is simple enough to do this as a one-liner, and this
localizes the change to the error-reporting code.
We'll give the same treatment to err_no_arg(). None of its callers use
this atom->name trick, but it's worth future-proofing it while we're
here.
Signed-off-by: Jeff King <peff@peff.net>
Acked-by: Taylor Blau <me@ttaylorr.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2022-12-15 00:23:53 +08:00
|
|
|
return err_bad_arg(err, name, arg);
|
2018-03-29 20:49:45 +08:00
|
|
|
return 0;
|
2017-01-10 16:49:43 +08:00
|
|
|
}
|
|
|
|
|
ref-filter: mark unused callback parameters
The ref-filter code uses virtual functions to handle specific atoms, but
many of the functions ignore some parameters:
- most atom parsers do not need the ref_format itself, unless they are
looking at centralized options like use_color, quote_style, etc.
- meta-atom handlers like append_atom(), align_atom_handler(), etc,
can't generate errors, so ignore their "err" parameter
- likewise, the handlers for then/else/end do not even need to look at
their atom_value, as the "if" handler put everything they need into
the ref_formatting_state stack
Since these functions all have to conform to virtual function
interfaces, we can't just drop the unused parameters, but must mark them
as UNUSED (to appease -Wunused-parameter).
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2023-02-24 14:39:06 +08:00
|
|
|
static int remote_ref_atom_parser(struct ref_format *format UNUSED,
|
|
|
|
struct used_atom *atom,
|
2018-03-29 20:49:45 +08:00
|
|
|
const char *arg, struct strbuf *err)
|
2016-02-18 02:06:17 +08:00
|
|
|
{
|
2017-01-10 16:49:41 +08:00
|
|
|
struct string_list params = STRING_LIST_INIT_DUP;
|
|
|
|
int i;
|
|
|
|
|
2017-10-05 20:19:09 +08:00
|
|
|
if (!strcmp(atom->name, "push") || starts_with(atom->name, "push:"))
|
|
|
|
atom->u.remote_ref.push = 1;
|
|
|
|
|
2017-01-10 16:49:41 +08:00
|
|
|
if (!arg) {
|
2017-01-10 16:49:45 +08:00
|
|
|
atom->u.remote_ref.option = RR_REF;
|
2018-03-29 20:49:45 +08:00
|
|
|
return refname_atom_parser_internal(&atom->u.remote_ref.refname,
|
|
|
|
arg, atom->name, err);
|
2017-01-10 16:49:41 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
atom->u.remote_ref.nobracket = 0;
|
|
|
|
string_list_split(¶ms, arg, ',', -1);
|
|
|
|
|
|
|
|
for (i = 0; i < params.nr; i++) {
|
|
|
|
const char *s = params.items[i].string;
|
|
|
|
|
2017-01-10 16:49:45 +08:00
|
|
|
if (!strcmp(s, "track"))
|
2017-01-10 16:49:41 +08:00
|
|
|
atom->u.remote_ref.option = RR_TRACK;
|
|
|
|
else if (!strcmp(s, "trackshort"))
|
|
|
|
atom->u.remote_ref.option = RR_TRACKSHORT;
|
|
|
|
else if (!strcmp(s, "nobracket"))
|
|
|
|
atom->u.remote_ref.nobracket = 1;
|
2017-10-05 20:19:09 +08:00
|
|
|
else if (!strcmp(s, "remotename")) {
|
|
|
|
atom->u.remote_ref.option = RR_REMOTE_NAME;
|
|
|
|
atom->u.remote_ref.push_remote = 1;
|
2017-11-08 00:31:08 +08:00
|
|
|
} else if (!strcmp(s, "remoteref")) {
|
|
|
|
atom->u.remote_ref.option = RR_REMOTE_REF;
|
|
|
|
atom->u.remote_ref.push_remote = 1;
|
2017-10-05 20:19:09 +08:00
|
|
|
} else {
|
2017-01-10 16:49:45 +08:00
|
|
|
atom->u.remote_ref.option = RR_REF;
|
2018-03-29 20:49:45 +08:00
|
|
|
if (refname_atom_parser_internal(&atom->u.remote_ref.refname,
|
|
|
|
arg, atom->name, err)) {
|
|
|
|
string_list_clear(¶ms, 0);
|
|
|
|
return -1;
|
|
|
|
}
|
2017-01-10 16:49:45 +08:00
|
|
|
}
|
2017-01-10 16:49:41 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
string_list_clear(¶ms, 0);
|
2018-03-29 20:49:45 +08:00
|
|
|
return 0;
|
2016-02-18 02:06:17 +08:00
|
|
|
}
|
|
|
|
|
ref-filter: mark unused callback parameters
The ref-filter code uses virtual functions to handle specific atoms, but
many of the functions ignore some parameters:
- most atom parsers do not need the ref_format itself, unless they are
looking at centralized options like use_color, quote_style, etc.
- meta-atom handlers like append_atom(), align_atom_handler(), etc,
can't generate errors, so ignore their "err" parameter
- likewise, the handlers for then/else/end do not even need to look at
their atom_value, as the "if" handler put everything they need into
the ref_formatting_state stack
Since these functions all have to conform to virtual function
interfaces, we can't just drop the unused parameters, but must mark them
as UNUSED (to appease -Wunused-parameter).
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2023-02-24 14:39:06 +08:00
|
|
|
static int objecttype_atom_parser(struct ref_format *format UNUSED,
|
|
|
|
struct used_atom *atom,
|
2018-07-17 16:22:57 +08:00
|
|
|
const char *arg, struct strbuf *err)
|
|
|
|
{
|
|
|
|
if (arg)
|
2022-12-15 00:19:43 +08:00
|
|
|
return err_no_arg(err, "objecttype");
|
2018-07-17 16:22:57 +08:00
|
|
|
if (*atom->name == '*')
|
|
|
|
oi_deref.info.typep = &oi_deref.type;
|
|
|
|
else
|
|
|
|
oi.info.typep = &oi.type;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
ref-filter: mark unused callback parameters
The ref-filter code uses virtual functions to handle specific atoms, but
many of the functions ignore some parameters:
- most atom parsers do not need the ref_format itself, unless they are
looking at centralized options like use_color, quote_style, etc.
- meta-atom handlers like append_atom(), align_atom_handler(), etc,
can't generate errors, so ignore their "err" parameter
- likewise, the handlers for then/else/end do not even need to look at
their atom_value, as the "if" handler put everything they need into
the ref_formatting_state stack
Since these functions all have to conform to virtual function
interfaces, we can't just drop the unused parameters, but must mark them
as UNUSED (to appease -Wunused-parameter).
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2023-02-24 14:39:06 +08:00
|
|
|
static int objectsize_atom_parser(struct ref_format *format UNUSED,
|
|
|
|
struct used_atom *atom,
|
2018-07-17 16:22:57 +08:00
|
|
|
const char *arg, struct strbuf *err)
|
|
|
|
{
|
2018-12-24 21:24:30 +08:00
|
|
|
if (!arg) {
|
2021-05-13 23:15:37 +08:00
|
|
|
atom->u.objectsize.option = O_SIZE;
|
2018-12-24 21:24:30 +08:00
|
|
|
if (*atom->name == '*')
|
|
|
|
oi_deref.info.sizep = &oi_deref.size;
|
|
|
|
else
|
|
|
|
oi.info.sizep = &oi.size;
|
|
|
|
} else if (!strcmp(arg, "disk")) {
|
2021-05-13 23:15:37 +08:00
|
|
|
atom->u.objectsize.option = O_SIZE_DISK;
|
2018-12-24 21:24:30 +08:00
|
|
|
if (*atom->name == '*')
|
|
|
|
oi_deref.info.disk_sizep = &oi_deref.disk_size;
|
|
|
|
else
|
|
|
|
oi.info.disk_sizep = &oi.disk_size;
|
|
|
|
} else
|
2022-12-15 00:20:19 +08:00
|
|
|
return err_bad_arg(err, "objectsize", arg);
|
2018-07-17 16:22:57 +08:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
ref-filter: mark unused callback parameters
The ref-filter code uses virtual functions to handle specific atoms, but
many of the functions ignore some parameters:
- most atom parsers do not need the ref_format itself, unless they are
looking at centralized options like use_color, quote_style, etc.
- meta-atom handlers like append_atom(), align_atom_handler(), etc,
can't generate errors, so ignore their "err" parameter
- likewise, the handlers for then/else/end do not even need to look at
their atom_value, as the "if" handler put everything they need into
the ref_formatting_state stack
Since these functions all have to conform to virtual function
interfaces, we can't just drop the unused parameters, but must mark them
as UNUSED (to appease -Wunused-parameter).
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2023-02-24 14:39:06 +08:00
|
|
|
static int deltabase_atom_parser(struct ref_format *format UNUSED,
|
|
|
|
struct used_atom *atom,
|
2018-12-24 21:24:30 +08:00
|
|
|
const char *arg, struct strbuf *err)
|
2018-07-17 16:22:57 +08:00
|
|
|
{
|
|
|
|
if (arg)
|
2022-12-15 00:19:43 +08:00
|
|
|
return err_no_arg(err, "deltabase");
|
2018-07-17 16:22:57 +08:00
|
|
|
if (*atom->name == '*')
|
2020-02-24 12:36:56 +08:00
|
|
|
oi_deref.info.delta_base_oid = &oi_deref.delta_base_oid;
|
2018-07-17 16:22:57 +08:00
|
|
|
else
|
2020-02-24 12:36:56 +08:00
|
|
|
oi.info.delta_base_oid = &oi.delta_base_oid;
|
2018-07-17 16:22:57 +08:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
ref-filter: mark unused callback parameters
The ref-filter code uses virtual functions to handle specific atoms, but
many of the functions ignore some parameters:
- most atom parsers do not need the ref_format itself, unless they are
looking at centralized options like use_color, quote_style, etc.
- meta-atom handlers like append_atom(), align_atom_handler(), etc,
can't generate errors, so ignore their "err" parameter
- likewise, the handlers for then/else/end do not even need to look at
their atom_value, as the "if" handler put everything they need into
the ref_formatting_state stack
Since these functions all have to conform to virtual function
interfaces, we can't just drop the unused parameters, but must mark them
as UNUSED (to appease -Wunused-parameter).
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2023-02-24 14:39:06 +08:00
|
|
|
static int body_atom_parser(struct ref_format *format UNUSED,
|
|
|
|
struct used_atom *atom,
|
2018-03-29 20:49:45 +08:00
|
|
|
const char *arg, struct strbuf *err)
|
2016-02-18 02:06:18 +08:00
|
|
|
{
|
|
|
|
if (arg)
|
2022-12-15 00:19:43 +08:00
|
|
|
return err_no_arg(err, "body");
|
2016-02-18 02:06:18 +08:00
|
|
|
atom->u.contents.option = C_BODY_DEP;
|
2018-03-29 20:49:45 +08:00
|
|
|
return 0;
|
2016-02-18 02:06:18 +08:00
|
|
|
}
|
|
|
|
|
ref-filter: mark unused callback parameters
The ref-filter code uses virtual functions to handle specific atoms, but
many of the functions ignore some parameters:
- most atom parsers do not need the ref_format itself, unless they are
looking at centralized options like use_color, quote_style, etc.
- meta-atom handlers like append_atom(), align_atom_handler(), etc,
can't generate errors, so ignore their "err" parameter
- likewise, the handlers for then/else/end do not even need to look at
their atom_value, as the "if" handler put everything they need into
the ref_formatting_state stack
Since these functions all have to conform to virtual function
interfaces, we can't just drop the unused parameters, but must mark them
as UNUSED (to appease -Wunused-parameter).
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2023-02-24 14:39:06 +08:00
|
|
|
static int subject_atom_parser(struct ref_format *format UNUSED,
|
|
|
|
struct used_atom *atom,
|
2018-03-29 20:49:45 +08:00
|
|
|
const char *arg, struct strbuf *err)
|
2016-02-18 02:06:18 +08:00
|
|
|
{
|
2020-08-22 05:41:50 +08:00
|
|
|
if (!arg)
|
|
|
|
atom->u.contents.option = C_SUB;
|
|
|
|
else if (!strcmp(arg, "sanitize"))
|
|
|
|
atom->u.contents.option = C_SUB_SANITIZE;
|
|
|
|
else
|
2022-12-15 00:20:19 +08:00
|
|
|
return err_bad_arg(err, "subject", arg);
|
2018-03-29 20:49:45 +08:00
|
|
|
return 0;
|
2016-02-18 02:06:18 +08:00
|
|
|
}
|
|
|
|
|
2023-06-05 02:22:47 +08:00
|
|
|
static int parse_signature_option(const char *arg)
|
|
|
|
{
|
|
|
|
if (!arg)
|
|
|
|
return S_BARE;
|
|
|
|
else if (!strcmp(arg, "signer"))
|
|
|
|
return S_SIGNER;
|
|
|
|
else if (!strcmp(arg, "grade"))
|
|
|
|
return S_GRADE;
|
|
|
|
else if (!strcmp(arg, "key"))
|
|
|
|
return S_KEY;
|
|
|
|
else if (!strcmp(arg, "fingerprint"))
|
|
|
|
return S_FINGERPRINT;
|
|
|
|
else if (!strcmp(arg, "primarykeyfingerprint"))
|
|
|
|
return S_PRI_KEY_FP;
|
|
|
|
else if (!strcmp(arg, "trustlevel"))
|
|
|
|
return S_TRUST_LEVEL;
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int signature_atom_parser(struct ref_format *format UNUSED,
|
|
|
|
struct used_atom *atom,
|
|
|
|
const char *arg, struct strbuf *err)
|
|
|
|
{
|
|
|
|
int opt = parse_signature_option(arg);
|
|
|
|
if (opt < 0)
|
|
|
|
return err_bad_arg(err, "signature", arg);
|
|
|
|
atom->u.signature.option = opt;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int trailers_atom_parser(struct ref_format *format, struct used_atom *atom,
|
2018-03-29 20:49:45 +08:00
|
|
|
const char *arg, struct strbuf *err)
|
2016-11-19 08:58:15 +08:00
|
|
|
{
|
2018-08-23 08:50:17 +08:00
|
|
|
atom->u.contents.trailer_opts.no_divider = 1;
|
|
|
|
|
2017-10-02 13:25:23 +08:00
|
|
|
if (arg) {
|
2021-02-13 09:52:43 +08:00
|
|
|
const char *argbuf = xstrfmt("%s)", arg);
|
|
|
|
char *invalid_arg = NULL;
|
|
|
|
|
|
|
|
if (format_set_trailers_options(&atom->u.contents.trailer_opts,
|
|
|
|
&ref_trailer_buf.filter_list,
|
|
|
|
&ref_trailer_buf.sepbuf,
|
|
|
|
&ref_trailer_buf.kvsepbuf,
|
|
|
|
&argbuf, &invalid_arg)) {
|
|
|
|
if (!invalid_arg)
|
|
|
|
strbuf_addf(err, _("expected %%(trailers:key=<value>)"));
|
|
|
|
else
|
|
|
|
strbuf_addf(err, _("unknown %%(trailers) argument: %s"), invalid_arg);
|
|
|
|
free((char *)invalid_arg);
|
|
|
|
return -1;
|
2017-10-02 13:25:23 +08:00
|
|
|
}
|
|
|
|
}
|
2016-11-19 08:58:15 +08:00
|
|
|
atom->u.contents.option = C_TRAILERS;
|
2018-03-29 20:49:45 +08:00
|
|
|
return 0;
|
2016-11-19 08:58:15 +08:00
|
|
|
}
|
|
|
|
|
2021-07-26 11:26:49 +08:00
|
|
|
static int contents_atom_parser(struct ref_format *format, struct used_atom *atom,
|
2018-03-29 20:49:45 +08:00
|
|
|
const char *arg, struct strbuf *err)
|
2016-02-18 02:06:18 +08:00
|
|
|
{
|
|
|
|
if (!arg)
|
|
|
|
atom->u.contents.option = C_BARE;
|
|
|
|
else if (!strcmp(arg, "body"))
|
|
|
|
atom->u.contents.option = C_BODY;
|
2023-09-02 17:00:39 +08:00
|
|
|
else if (!strcmp(arg, "size")) {
|
|
|
|
atom->type = FIELD_ULONG;
|
2020-07-16 20:19:40 +08:00
|
|
|
atom->u.contents.option = C_LENGTH;
|
2023-09-02 17:00:39 +08:00
|
|
|
} else if (!strcmp(arg, "signature"))
|
2016-02-18 02:06:18 +08:00
|
|
|
atom->u.contents.option = C_SIG;
|
|
|
|
else if (!strcmp(arg, "subject"))
|
|
|
|
atom->u.contents.option = C_SUB;
|
2020-08-22 05:06:14 +08:00
|
|
|
else if (!strcmp(arg, "trailers")) {
|
|
|
|
if (trailers_atom_parser(format, atom, NULL, err))
|
|
|
|
return -1;
|
|
|
|
} else if (skip_prefix(arg, "trailers:", &arg)) {
|
|
|
|
if (trailers_atom_parser(format, atom, arg, err))
|
2018-03-29 20:49:45 +08:00
|
|
|
return -1;
|
2017-10-02 13:25:24 +08:00
|
|
|
} else if (skip_prefix(arg, "lines=", &arg)) {
|
2016-02-18 02:06:18 +08:00
|
|
|
atom->u.contents.option = C_LINES;
|
|
|
|
if (strtoul_ui(arg, 10, &atom->u.contents.nlines))
|
2018-03-29 20:49:45 +08:00
|
|
|
return strbuf_addf_ret(err, -1, _("positive value expected contents:lines=%s"), arg);
|
2016-02-18 02:06:18 +08:00
|
|
|
} else
|
2022-12-15 00:20:19 +08:00
|
|
|
return err_bad_arg(err, "contents", arg);
|
2018-03-29 20:49:45 +08:00
|
|
|
return 0;
|
2016-02-18 02:06:18 +08:00
|
|
|
}
|
|
|
|
|
2023-07-24 00:19:59 +08:00
|
|
|
static int describe_atom_option_parser(struct strvec *args, const char **arg,
|
|
|
|
struct strbuf *err)
|
|
|
|
{
|
|
|
|
const char *argval;
|
|
|
|
size_t arglen = 0;
|
|
|
|
int optval = 0;
|
|
|
|
|
|
|
|
if (match_atom_bool_arg(*arg, "tags", arg, &optval)) {
|
|
|
|
if (!optval)
|
|
|
|
strvec_push(args, "--no-tags");
|
|
|
|
else
|
|
|
|
strvec_push(args, "--tags");
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (match_atom_arg_value(*arg, "abbrev", arg, &argval, &arglen)) {
|
|
|
|
char *endptr;
|
|
|
|
|
|
|
|
if (!arglen)
|
|
|
|
return strbuf_addf_ret(err, -1,
|
|
|
|
_("argument expected for %s"),
|
|
|
|
"describe:abbrev");
|
|
|
|
if (strtol(argval, &endptr, 10) < 0)
|
|
|
|
return strbuf_addf_ret(err, -1,
|
|
|
|
_("positive value expected %s=%s"),
|
|
|
|
"describe:abbrev", argval);
|
|
|
|
if (endptr - argval != arglen)
|
|
|
|
return strbuf_addf_ret(err, -1,
|
|
|
|
_("cannot fully parse %s=%s"),
|
|
|
|
"describe:abbrev", argval);
|
|
|
|
|
|
|
|
strvec_pushf(args, "--abbrev=%.*s", (int)arglen, argval);
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (match_atom_arg_value(*arg, "match", arg, &argval, &arglen)) {
|
|
|
|
if (!arglen)
|
|
|
|
return strbuf_addf_ret(err, -1,
|
|
|
|
_("value expected %s="),
|
|
|
|
"describe:match");
|
|
|
|
|
|
|
|
strvec_pushf(args, "--match=%.*s", (int)arglen, argval);
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (match_atom_arg_value(*arg, "exclude", arg, &argval, &arglen)) {
|
|
|
|
if (!arglen)
|
|
|
|
return strbuf_addf_ret(err, -1,
|
|
|
|
_("value expected %s="),
|
|
|
|
"describe:exclude");
|
|
|
|
|
|
|
|
strvec_pushf(args, "--exclude=%.*s", (int)arglen, argval);
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int describe_atom_parser(struct ref_format *format UNUSED,
|
|
|
|
struct used_atom *atom,
|
|
|
|
const char *arg, struct strbuf *err)
|
|
|
|
{
|
|
|
|
struct strvec args = STRVEC_INIT;
|
|
|
|
|
|
|
|
for (;;) {
|
|
|
|
int found = 0;
|
|
|
|
const char *bad_arg = arg;
|
|
|
|
|
|
|
|
if (!arg || !*arg)
|
|
|
|
break;
|
|
|
|
|
|
|
|
found = describe_atom_option_parser(&args, &arg, err);
|
|
|
|
if (found < 0)
|
|
|
|
return found;
|
|
|
|
if (!found)
|
|
|
|
return err_bad_arg(err, "describe", bad_arg);
|
|
|
|
}
|
|
|
|
atom->u.describe_args = strvec_detach(&args);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
ref-filter: mark unused callback parameters
The ref-filter code uses virtual functions to handle specific atoms, but
many of the functions ignore some parameters:
- most atom parsers do not need the ref_format itself, unless they are
looking at centralized options like use_color, quote_style, etc.
- meta-atom handlers like append_atom(), align_atom_handler(), etc,
can't generate errors, so ignore their "err" parameter
- likewise, the handlers for then/else/end do not even need to look at
their atom_value, as the "if" handler put everything they need into
the ref_formatting_state stack
Since these functions all have to conform to virtual function
interfaces, we can't just drop the unused parameters, but must mark them
as UNUSED (to appease -Wunused-parameter).
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2023-02-24 14:39:06 +08:00
|
|
|
static int raw_atom_parser(struct ref_format *format UNUSED,
|
|
|
|
struct used_atom *atom,
|
|
|
|
const char *arg, struct strbuf *err)
|
ref-filter: add %(raw) atom
Add new formatting option `%(raw)`, which will print the raw
object data without any changes. It will help further to migrate
all cat-file formatting logic from cat-file to ref-filter.
The raw data of blob, tree objects may contain '\0', but most of
the logic in `ref-filter` depends on the output of the atom being
text (specifically, no embedded NULs in it).
E.g. `quote_formatting()` use `strbuf_addstr()` or `*._quote_buf()`
add the data to the buffer. The raw data of a tree object is
`100644 one\0...`, only the `100644 one` will be added to the buffer,
which is incorrect.
Therefore, we need to find a way to record the length of the
atom_value's member `s`. Although strbuf can already record the
string and its length, if we want to replace the type of atom_value's
member `s` with strbuf, many places in ref-filter that are filled
with dynamically allocated mermory in `v->s` are not easy to replace.
At the same time, we need to check if `v->s == NULL` in
populate_value(), and strbuf cannot easily distinguish NULL and empty
strings, but c-style "const char *" can do it. So add a new member in
`struct atom_value`: `s_size`, which can record raw object size, it
can help us add raw object data to the buffer or compare two buffers
which contain raw object data.
Note that `--format=%(raw)` cannot be used with `--python`, `--shell`,
`--tcl`, and `--perl` because if the binary raw data is passed to a
variable in such languages, these may not support arbitrary binary data
in their string variable type.
Reviewed-by: Jacob Keller <jacob.keller@gmail.com>
Mentored-by: Christian Couder <christian.couder@gmail.com>
Mentored-by: Hariom Verma <hariom18599@gmail.com>
Helped-by: Bagas Sanjaya <bagasdotme@gmail.com>
Helped-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Helped-by: Felipe Contreras <felipe.contreras@gmail.com>
Helped-by: Phillip Wood <phillip.wood@dunelm.org.uk>
Helped-by: Junio C Hamano <gitster@pobox.com>
Based-on-patch-by: Olga Telezhnaya <olyatelezhnaya@gmail.com>
Signed-off-by: ZheNing Hu <adlternative@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2021-07-26 11:26:47 +08:00
|
|
|
{
|
|
|
|
if (!arg)
|
|
|
|
atom->u.raw_data.option = RAW_BARE;
|
2023-09-02 17:00:39 +08:00
|
|
|
else if (!strcmp(arg, "size")) {
|
|
|
|
atom->type = FIELD_ULONG;
|
ref-filter: add %(raw) atom
Add new formatting option `%(raw)`, which will print the raw
object data without any changes. It will help further to migrate
all cat-file formatting logic from cat-file to ref-filter.
The raw data of blob, tree objects may contain '\0', but most of
the logic in `ref-filter` depends on the output of the atom being
text (specifically, no embedded NULs in it).
E.g. `quote_formatting()` use `strbuf_addstr()` or `*._quote_buf()`
add the data to the buffer. The raw data of a tree object is
`100644 one\0...`, only the `100644 one` will be added to the buffer,
which is incorrect.
Therefore, we need to find a way to record the length of the
atom_value's member `s`. Although strbuf can already record the
string and its length, if we want to replace the type of atom_value's
member `s` with strbuf, many places in ref-filter that are filled
with dynamically allocated mermory in `v->s` are not easy to replace.
At the same time, we need to check if `v->s == NULL` in
populate_value(), and strbuf cannot easily distinguish NULL and empty
strings, but c-style "const char *" can do it. So add a new member in
`struct atom_value`: `s_size`, which can record raw object size, it
can help us add raw object data to the buffer or compare two buffers
which contain raw object data.
Note that `--format=%(raw)` cannot be used with `--python`, `--shell`,
`--tcl`, and `--perl` because if the binary raw data is passed to a
variable in such languages, these may not support arbitrary binary data
in their string variable type.
Reviewed-by: Jacob Keller <jacob.keller@gmail.com>
Mentored-by: Christian Couder <christian.couder@gmail.com>
Mentored-by: Hariom Verma <hariom18599@gmail.com>
Helped-by: Bagas Sanjaya <bagasdotme@gmail.com>
Helped-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Helped-by: Felipe Contreras <felipe.contreras@gmail.com>
Helped-by: Phillip Wood <phillip.wood@dunelm.org.uk>
Helped-by: Junio C Hamano <gitster@pobox.com>
Based-on-patch-by: Olga Telezhnaya <olyatelezhnaya@gmail.com>
Signed-off-by: ZheNing Hu <adlternative@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2021-07-26 11:26:47 +08:00
|
|
|
atom->u.raw_data.option = RAW_LENGTH;
|
2023-09-02 17:00:39 +08:00
|
|
|
} else
|
2022-12-15 00:20:19 +08:00
|
|
|
return err_bad_arg(err, "raw", arg);
|
ref-filter: add %(raw) atom
Add new formatting option `%(raw)`, which will print the raw
object data without any changes. It will help further to migrate
all cat-file formatting logic from cat-file to ref-filter.
The raw data of blob, tree objects may contain '\0', but most of
the logic in `ref-filter` depends on the output of the atom being
text (specifically, no embedded NULs in it).
E.g. `quote_formatting()` use `strbuf_addstr()` or `*._quote_buf()`
add the data to the buffer. The raw data of a tree object is
`100644 one\0...`, only the `100644 one` will be added to the buffer,
which is incorrect.
Therefore, we need to find a way to record the length of the
atom_value's member `s`. Although strbuf can already record the
string and its length, if we want to replace the type of atom_value's
member `s` with strbuf, many places in ref-filter that are filled
with dynamically allocated mermory in `v->s` are not easy to replace.
At the same time, we need to check if `v->s == NULL` in
populate_value(), and strbuf cannot easily distinguish NULL and empty
strings, but c-style "const char *" can do it. So add a new member in
`struct atom_value`: `s_size`, which can record raw object size, it
can help us add raw object data to the buffer or compare two buffers
which contain raw object data.
Note that `--format=%(raw)` cannot be used with `--python`, `--shell`,
`--tcl`, and `--perl` because if the binary raw data is passed to a
variable in such languages, these may not support arbitrary binary data
in their string variable type.
Reviewed-by: Jacob Keller <jacob.keller@gmail.com>
Mentored-by: Christian Couder <christian.couder@gmail.com>
Mentored-by: Hariom Verma <hariom18599@gmail.com>
Helped-by: Bagas Sanjaya <bagasdotme@gmail.com>
Helped-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Helped-by: Felipe Contreras <felipe.contreras@gmail.com>
Helped-by: Phillip Wood <phillip.wood@dunelm.org.uk>
Helped-by: Junio C Hamano <gitster@pobox.com>
Based-on-patch-by: Olga Telezhnaya <olyatelezhnaya@gmail.com>
Signed-off-by: ZheNing Hu <adlternative@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2021-07-26 11:26:47 +08:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
ref-filter: mark unused callback parameters
The ref-filter code uses virtual functions to handle specific atoms, but
many of the functions ignore some parameters:
- most atom parsers do not need the ref_format itself, unless they are
looking at centralized options like use_color, quote_style, etc.
- meta-atom handlers like append_atom(), align_atom_handler(), etc,
can't generate errors, so ignore their "err" parameter
- likewise, the handlers for then/else/end do not even need to look at
their atom_value, as the "if" handler put everything they need into
the ref_formatting_state stack
Since these functions all have to conform to virtual function
interfaces, we can't just drop the unused parameters, but must mark them
as UNUSED (to appease -Wunused-parameter).
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2023-02-24 14:39:06 +08:00
|
|
|
static int oid_atom_parser(struct ref_format *format UNUSED,
|
|
|
|
struct used_atom *atom,
|
2020-08-22 05:41:46 +08:00
|
|
|
const char *arg, struct strbuf *err)
|
2016-02-18 02:06:19 +08:00
|
|
|
{
|
|
|
|
if (!arg)
|
2020-08-22 05:41:46 +08:00
|
|
|
atom->u.oid.option = O_FULL;
|
2016-02-18 02:06:19 +08:00
|
|
|
else if (!strcmp(arg, "short"))
|
2020-08-22 05:41:46 +08:00
|
|
|
atom->u.oid.option = O_SHORT;
|
2017-01-10 16:49:37 +08:00
|
|
|
else if (skip_prefix(arg, "short=", &arg)) {
|
2020-08-22 05:41:46 +08:00
|
|
|
atom->u.oid.option = O_LENGTH;
|
|
|
|
if (strtoul_ui(arg, 10, &atom->u.oid.length) ||
|
|
|
|
atom->u.oid.length == 0)
|
2020-08-22 05:41:45 +08:00
|
|
|
return strbuf_addf_ret(err, -1, _("positive value expected '%s' in %%(%s)"), arg, atom->name);
|
2020-08-22 05:41:46 +08:00
|
|
|
if (atom->u.oid.length < MINIMUM_ABBREV)
|
|
|
|
atom->u.oid.length = MINIMUM_ABBREV;
|
2017-01-10 16:49:37 +08:00
|
|
|
} else
|
ref-filter: truncate atom names in error messages
If you pass a bogus argument to %(refname), you may end up with a
message like this:
$ git for-each-ref --format='%(refname:foo)'
fatal: unrecognized %(refname:foo) argument: foo
which is confusing. It should just say:
fatal: unrecognized %(refname) argument: foo
which is clearer, and is consistent with most other atom parsers. Those
other parsers do not have the same problem because they pass the atom
name from a string literal in the parser function. But because the
parser for %(refname) also handles %(upstream) and %(push), it instead
uses atom->name, which includes the arguments. The oid atom parser which
handles %(tree), %(parent), etc suffers from the same problem.
It seems like the cleanest fix would be for atom->name to be _just_ the
name, since there's already a separate "args" field. But since that
field is also used for other things, we can't change it easily (e.g.,
it's how we find things in the used_atoms array, and clearly %(refname)
and %(refname:short) are not the same thing).
Instead, we'll teach our error_bad_arg() function to stop at the first
":". This is a little hacky, as we're effectively re-parsing the name,
but the format is simple enough to do this as a one-liner, and this
localizes the change to the error-reporting code.
We'll give the same treatment to err_no_arg(). None of its callers use
this atom->name trick, but it's worth future-proofing it while we're
here.
Signed-off-by: Jeff King <peff@peff.net>
Acked-by: Taylor Blau <me@ttaylorr.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2022-12-15 00:23:53 +08:00
|
|
|
return err_bad_arg(err, atom->name, arg);
|
2018-03-29 20:49:45 +08:00
|
|
|
return 0;
|
2016-02-18 02:06:19 +08:00
|
|
|
}
|
|
|
|
|
ref-filter: mark unused callback parameters
The ref-filter code uses virtual functions to handle specific atoms, but
many of the functions ignore some parameters:
- most atom parsers do not need the ref_format itself, unless they are
looking at centralized options like use_color, quote_style, etc.
- meta-atom handlers like append_atom(), align_atom_handler(), etc,
can't generate errors, so ignore their "err" parameter
- likewise, the handlers for then/else/end do not even need to look at
their atom_value, as the "if" handler put everything they need into
the ref_formatting_state stack
Since these functions all have to conform to virtual function
interfaces, we can't just drop the unused parameters, but must mark them
as UNUSED (to appease -Wunused-parameter).
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2023-02-24 14:39:06 +08:00
|
|
|
static int person_email_atom_parser(struct ref_format *format UNUSED,
|
|
|
|
struct used_atom *atom,
|
2020-08-22 05:41:43 +08:00
|
|
|
const char *arg, struct strbuf *err)
|
|
|
|
{
|
|
|
|
if (!arg)
|
|
|
|
atom->u.email_option.option = EO_RAW;
|
|
|
|
else if (!strcmp(arg, "trim"))
|
|
|
|
atom->u.email_option.option = EO_TRIM;
|
|
|
|
else if (!strcmp(arg, "localpart"))
|
|
|
|
atom->u.email_option.option = EO_LOCALPART;
|
|
|
|
else
|
2022-12-15 00:24:03 +08:00
|
|
|
return err_bad_arg(err, atom->name, arg);
|
2018-03-29 20:49:45 +08:00
|
|
|
return 0;
|
2016-02-18 02:06:19 +08:00
|
|
|
}
|
|
|
|
|
ref-filter: mark unused callback parameters
The ref-filter code uses virtual functions to handle specific atoms, but
many of the functions ignore some parameters:
- most atom parsers do not need the ref_format itself, unless they are
looking at centralized options like use_color, quote_style, etc.
- meta-atom handlers like append_atom(), align_atom_handler(), etc,
can't generate errors, so ignore their "err" parameter
- likewise, the handlers for then/else/end do not even need to look at
their atom_value, as the "if" handler put everything they need into
the ref_formatting_state stack
Since these functions all have to conform to virtual function
interfaces, we can't just drop the unused parameters, but must mark them
as UNUSED (to appease -Wunused-parameter).
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2023-02-24 14:39:06 +08:00
|
|
|
static int refname_atom_parser(struct ref_format *format UNUSED,
|
|
|
|
struct used_atom *atom,
|
2018-03-29 20:49:45 +08:00
|
|
|
const char *arg, struct strbuf *err)
|
2017-01-10 16:49:44 +08:00
|
|
|
{
|
2018-03-29 20:49:45 +08:00
|
|
|
return refname_atom_parser_internal(&atom->u.refname, arg, atom->name, err);
|
2017-01-10 16:49:44 +08:00
|
|
|
}
|
|
|
|
|
2016-02-18 02:06:14 +08:00
|
|
|
static align_type parse_align_position(const char *s)
|
|
|
|
{
|
|
|
|
if (!strcmp(s, "right"))
|
|
|
|
return ALIGN_RIGHT;
|
|
|
|
else if (!strcmp(s, "middle"))
|
|
|
|
return ALIGN_MIDDLE;
|
|
|
|
else if (!strcmp(s, "left"))
|
|
|
|
return ALIGN_LEFT;
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
ref-filter: mark unused callback parameters
The ref-filter code uses virtual functions to handle specific atoms, but
many of the functions ignore some parameters:
- most atom parsers do not need the ref_format itself, unless they are
looking at centralized options like use_color, quote_style, etc.
- meta-atom handlers like append_atom(), align_atom_handler(), etc,
can't generate errors, so ignore their "err" parameter
- likewise, the handlers for then/else/end do not even need to look at
their atom_value, as the "if" handler put everything they need into
the ref_formatting_state stack
Since these functions all have to conform to virtual function
interfaces, we can't just drop the unused parameters, but must mark them
as UNUSED (to appease -Wunused-parameter).
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2023-02-24 14:39:06 +08:00
|
|
|
static int align_atom_parser(struct ref_format *format UNUSED,
|
|
|
|
struct used_atom *atom,
|
2018-03-29 20:49:45 +08:00
|
|
|
const char *arg, struct strbuf *err)
|
2016-02-18 02:06:15 +08:00
|
|
|
{
|
|
|
|
struct align *align = &atom->u.align;
|
|
|
|
struct string_list params = STRING_LIST_INIT_DUP;
|
|
|
|
int i;
|
|
|
|
unsigned int width = ~0U;
|
|
|
|
|
|
|
|
if (!arg)
|
2018-03-29 20:49:45 +08:00
|
|
|
return strbuf_addf_ret(err, -1, _("expected format: %%(align:<width>,<position>)"));
|
2016-02-18 02:06:15 +08:00
|
|
|
|
|
|
|
align->position = ALIGN_LEFT;
|
|
|
|
|
|
|
|
string_list_split(¶ms, arg, ',', -1);
|
|
|
|
for (i = 0; i < params.nr; i++) {
|
|
|
|
const char *s = params.items[i].string;
|
|
|
|
int position;
|
|
|
|
|
2016-02-18 02:06:16 +08:00
|
|
|
if (skip_prefix(s, "position=", &s)) {
|
|
|
|
position = parse_align_position(s);
|
2018-03-29 20:49:45 +08:00
|
|
|
if (position < 0) {
|
|
|
|
strbuf_addf(err, _("unrecognized position:%s"), s);
|
|
|
|
string_list_clear(¶ms, 0);
|
|
|
|
return -1;
|
|
|
|
}
|
2016-02-18 02:06:16 +08:00
|
|
|
align->position = position;
|
|
|
|
} else if (skip_prefix(s, "width=", &s)) {
|
2018-03-29 20:49:45 +08:00
|
|
|
if (strtoul_ui(s, 10, &width)) {
|
|
|
|
strbuf_addf(err, _("unrecognized width:%s"), s);
|
|
|
|
string_list_clear(¶ms, 0);
|
|
|
|
return -1;
|
|
|
|
}
|
2016-02-18 02:06:16 +08:00
|
|
|
} else if (!strtoul_ui(s, 10, &width))
|
2016-02-18 02:06:15 +08:00
|
|
|
;
|
|
|
|
else if ((position = parse_align_position(s)) >= 0)
|
|
|
|
align->position = position;
|
2018-03-29 20:49:45 +08:00
|
|
|
else {
|
2022-01-06 04:02:21 +08:00
|
|
|
strbuf_addf(err, _("unrecognized %%(%s) argument: %s"), "align", s);
|
2018-03-29 20:49:45 +08:00
|
|
|
string_list_clear(¶ms, 0);
|
|
|
|
return -1;
|
|
|
|
}
|
2016-02-18 02:06:15 +08:00
|
|
|
}
|
|
|
|
|
2018-03-29 20:49:45 +08:00
|
|
|
if (width == ~0U) {
|
|
|
|
string_list_clear(¶ms, 0);
|
|
|
|
return strbuf_addf_ret(err, -1, _("positive width expected with the %%(align) atom"));
|
|
|
|
}
|
2016-02-18 02:06:15 +08:00
|
|
|
align->width = width;
|
|
|
|
string_list_clear(¶ms, 0);
|
2018-03-29 20:49:45 +08:00
|
|
|
return 0;
|
2016-02-18 02:06:15 +08:00
|
|
|
}
|
|
|
|
|
ref-filter: mark unused callback parameters
The ref-filter code uses virtual functions to handle specific atoms, but
many of the functions ignore some parameters:
- most atom parsers do not need the ref_format itself, unless they are
looking at centralized options like use_color, quote_style, etc.
- meta-atom handlers like append_atom(), align_atom_handler(), etc,
can't generate errors, so ignore their "err" parameter
- likewise, the handlers for then/else/end do not even need to look at
their atom_value, as the "if" handler put everything they need into
the ref_formatting_state stack
Since these functions all have to conform to virtual function
interfaces, we can't just drop the unused parameters, but must mark them
as UNUSED (to appease -Wunused-parameter).
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2023-02-24 14:39:06 +08:00
|
|
|
static int if_atom_parser(struct ref_format *format UNUSED,
|
|
|
|
struct used_atom *atom,
|
2018-03-29 20:49:45 +08:00
|
|
|
const char *arg, struct strbuf *err)
|
2017-01-10 16:49:36 +08:00
|
|
|
{
|
|
|
|
if (!arg) {
|
|
|
|
atom->u.if_then_else.cmp_status = COMPARE_NONE;
|
2018-03-29 20:49:45 +08:00
|
|
|
return 0;
|
2017-01-10 16:49:36 +08:00
|
|
|
} else if (skip_prefix(arg, "equals=", &atom->u.if_then_else.str)) {
|
|
|
|
atom->u.if_then_else.cmp_status = COMPARE_EQUAL;
|
|
|
|
} else if (skip_prefix(arg, "notequals=", &atom->u.if_then_else.str)) {
|
|
|
|
atom->u.if_then_else.cmp_status = COMPARE_UNEQUAL;
|
2018-03-29 20:49:45 +08:00
|
|
|
} else
|
2022-12-15 00:20:19 +08:00
|
|
|
return err_bad_arg(err, "if", arg);
|
2018-03-29 20:49:45 +08:00
|
|
|
return 0;
|
2017-01-10 16:49:36 +08:00
|
|
|
}
|
|
|
|
|
ref-filter: mark unused callback parameters
The ref-filter code uses virtual functions to handle specific atoms, but
many of the functions ignore some parameters:
- most atom parsers do not need the ref_format itself, unless they are
looking at centralized options like use_color, quote_style, etc.
- meta-atom handlers like append_atom(), align_atom_handler(), etc,
can't generate errors, so ignore their "err" parameter
- likewise, the handlers for then/else/end do not even need to look at
their atom_value, as the "if" handler put everything they need into
the ref_formatting_state stack
Since these functions all have to conform to virtual function
interfaces, we can't just drop the unused parameters, but must mark them
as UNUSED (to appease -Wunused-parameter).
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2023-02-24 14:39:06 +08:00
|
|
|
static int rest_atom_parser(struct ref_format *format,
|
|
|
|
struct used_atom *atom UNUSED,
|
2021-07-26 11:26:50 +08:00
|
|
|
const char *arg, struct strbuf *err)
|
|
|
|
{
|
|
|
|
if (arg)
|
2022-12-15 00:19:43 +08:00
|
|
|
return err_no_arg(err, "rest");
|
2021-07-26 11:26:50 +08:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
for-each-ref: add ahead-behind format atom
The previous change implemented the ahead_behind() method, including an
algorithm to compute the ahead/behind values for a number of commit tips
relative to a number of commit bases. Now, integrate that algorithm as
part of 'git for-each-ref' hidden behind a new format atom,
ahead-behind. This naturally extends to 'git branch' and 'git tag'
builtins, as well.
This format allows specifying multiple bases, if so desired, and all
matching references are compared against all of those bases. For this
reason, failing to read a reference provided from these atoms results in
an error.
In order to translate the ahead_behind() method information to the
format output code in ref-filter.c, we must populate arrays of
ahead_behind_count structs. In struct ref_array, we store the full array
that will be passed to ahead_behind(). In struct ref_array_item, we
store an array of pointers that point to the relvant items within the
full array. In this way, we can pull all relevant ahead/behind values
directly when formatting output for a specific item. It also ensures the
lifetime of the ahead_behind_count structs matches the time that the
array is being used.
Add specific tests of the ahead/behind counts in t6600-test-reach.sh, as
it has an interesting repository shape. In particular, its merging
strategy and its use of different commit-graphs would demonstrate over-
counting if the ahead_behind() method did not already account for that
possibility.
Also add tests for the specific for-each-ref, branch, and tag builtins.
In the case of 'git tag', there are intersting cases that happen when
some of the selected tips are not commits. This requires careful logic
around commits_nr in the second loop of filter_ahead_behind(). Also, the
test in t7004 is carefully located to avoid being dependent on the GPG
prereq. It also avoids using the test_commit helper, as that will add
ticks to the time and disrupt the expected timestamps in later tag
tests.
Also add performance tests in a new p1300-graph-walks.sh script. This
will be useful for more uses in the future, but for now compare the
ahead-behind counting algorithm in 'git for-each-ref' to the naive
implementation by running 'git rev-list --count' processes for each
input.
For the Git source code repository, the improvement is already obvious:
Test this tree
---------------------------------------------------------------
1500.2: ahead-behind counts: git for-each-ref 0.07(0.07+0.00)
1500.3: ahead-behind counts: git branch 0.07(0.06+0.00)
1500.4: ahead-behind counts: git tag 0.07(0.06+0.00)
1500.5: ahead-behind counts: git rev-list 1.32(1.04+0.27)
But the standard performance benchmark is the Linux kernel repository,
which demosntrates a significant improvement:
Test this tree
---------------------------------------------------------------
1500.2: ahead-behind counts: git for-each-ref 0.27(0.24+0.02)
1500.3: ahead-behind counts: git branch 0.27(0.24+0.03)
1500.4: ahead-behind counts: git tag 0.28(0.27+0.01)
1500.5: ahead-behind counts: git rev-list 4.57(4.03+0.54)
The 'git rev-list' test exists in this change as a demonstration, but it
will be removed in the next change to avoid wasting time on this
comparison.
Signed-off-by: Derrick Stolee <derrickstolee@github.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2023-03-20 19:26:54 +08:00
|
|
|
static int ahead_behind_atom_parser(struct ref_format *format, struct used_atom *atom,
|
|
|
|
const char *arg, struct strbuf *err)
|
|
|
|
{
|
|
|
|
struct string_list_item *item;
|
|
|
|
|
|
|
|
if (!arg)
|
|
|
|
return strbuf_addf_ret(err, -1, _("expected format: %%(ahead-behind:<committish>)"));
|
|
|
|
|
|
|
|
item = string_list_append(&format->bases, arg);
|
|
|
|
item->util = lookup_commit_reference_by_name(arg);
|
|
|
|
if (!item->util)
|
|
|
|
die("failed to find '%s'", arg);
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
ref-filter: mark unused callback parameters
The ref-filter code uses virtual functions to handle specific atoms, but
many of the functions ignore some parameters:
- most atom parsers do not need the ref_format itself, unless they are
looking at centralized options like use_color, quote_style, etc.
- meta-atom handlers like append_atom(), align_atom_handler(), etc,
can't generate errors, so ignore their "err" parameter
- likewise, the handlers for then/else/end do not even need to look at
their atom_value, as the "if" handler put everything they need into
the ref_formatting_state stack
Since these functions all have to conform to virtual function
interfaces, we can't just drop the unused parameters, but must mark them
as UNUSED (to appease -Wunused-parameter).
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2023-02-24 14:39:06 +08:00
|
|
|
static int head_atom_parser(struct ref_format *format UNUSED,
|
|
|
|
struct used_atom *atom,
|
2022-12-15 00:18:49 +08:00
|
|
|
const char *arg, struct strbuf *err)
|
2017-05-19 14:12:12 +08:00
|
|
|
{
|
2022-12-15 00:18:49 +08:00
|
|
|
if (arg)
|
2022-12-15 00:19:43 +08:00
|
|
|
return err_no_arg(err, "HEAD");
|
2017-10-01 15:29:03 +08:00
|
|
|
atom->u.head = resolve_refdup("HEAD", RESOLVE_REF_READING, NULL, NULL);
|
2018-03-29 20:49:45 +08:00
|
|
|
return 0;
|
2017-05-19 14:12:12 +08:00
|
|
|
}
|
2017-01-10 16:49:36 +08:00
|
|
|
|
2015-06-14 03:37:27 +08:00
|
|
|
static struct {
|
|
|
|
const char *name;
|
2018-07-17 16:22:57 +08:00
|
|
|
info_source source;
|
2015-06-14 03:37:27 +08:00
|
|
|
cmp_type cmp_type;
|
2021-07-26 11:26:49 +08:00
|
|
|
int (*parser)(struct ref_format *format, struct used_atom *atom,
|
2018-03-29 20:49:45 +08:00
|
|
|
const char *arg, struct strbuf *err);
|
2015-06-14 03:37:27 +08:00
|
|
|
} valid_atom[] = {
|
2021-05-13 23:15:38 +08:00
|
|
|
[ATOM_REFNAME] = { "refname", SOURCE_NONE, FIELD_STR, refname_atom_parser },
|
|
|
|
[ATOM_OBJECTTYPE] = { "objecttype", SOURCE_OTHER, FIELD_STR, objecttype_atom_parser },
|
|
|
|
[ATOM_OBJECTSIZE] = { "objectsize", SOURCE_OTHER, FIELD_ULONG, objectsize_atom_parser },
|
|
|
|
[ATOM_OBJECTNAME] = { "objectname", SOURCE_OTHER, FIELD_STR, oid_atom_parser },
|
|
|
|
[ATOM_DELTABASE] = { "deltabase", SOURCE_OTHER, FIELD_STR, deltabase_atom_parser },
|
|
|
|
[ATOM_TREE] = { "tree", SOURCE_OBJ, FIELD_STR, oid_atom_parser },
|
|
|
|
[ATOM_PARENT] = { "parent", SOURCE_OBJ, FIELD_STR, oid_atom_parser },
|
|
|
|
[ATOM_NUMPARENT] = { "numparent", SOURCE_OBJ, FIELD_ULONG },
|
|
|
|
[ATOM_OBJECT] = { "object", SOURCE_OBJ },
|
|
|
|
[ATOM_TYPE] = { "type", SOURCE_OBJ },
|
|
|
|
[ATOM_TAG] = { "tag", SOURCE_OBJ },
|
|
|
|
[ATOM_AUTHOR] = { "author", SOURCE_OBJ },
|
|
|
|
[ATOM_AUTHORNAME] = { "authorname", SOURCE_OBJ },
|
|
|
|
[ATOM_AUTHOREMAIL] = { "authoremail", SOURCE_OBJ, FIELD_STR, person_email_atom_parser },
|
|
|
|
[ATOM_AUTHORDATE] = { "authordate", SOURCE_OBJ, FIELD_TIME },
|
|
|
|
[ATOM_COMMITTER] = { "committer", SOURCE_OBJ },
|
|
|
|
[ATOM_COMMITTERNAME] = { "committername", SOURCE_OBJ },
|
|
|
|
[ATOM_COMMITTEREMAIL] = { "committeremail", SOURCE_OBJ, FIELD_STR, person_email_atom_parser },
|
|
|
|
[ATOM_COMMITTERDATE] = { "committerdate", SOURCE_OBJ, FIELD_TIME },
|
|
|
|
[ATOM_TAGGER] = { "tagger", SOURCE_OBJ },
|
|
|
|
[ATOM_TAGGERNAME] = { "taggername", SOURCE_OBJ },
|
|
|
|
[ATOM_TAGGEREMAIL] = { "taggeremail", SOURCE_OBJ, FIELD_STR, person_email_atom_parser },
|
|
|
|
[ATOM_TAGGERDATE] = { "taggerdate", SOURCE_OBJ, FIELD_TIME },
|
|
|
|
[ATOM_CREATOR] = { "creator", SOURCE_OBJ },
|
|
|
|
[ATOM_CREATORDATE] = { "creatordate", SOURCE_OBJ, FIELD_TIME },
|
2023-07-24 00:19:59 +08:00
|
|
|
[ATOM_DESCRIBE] = { "describe", SOURCE_OBJ, FIELD_STR, describe_atom_parser },
|
2021-05-13 23:15:38 +08:00
|
|
|
[ATOM_SUBJECT] = { "subject", SOURCE_OBJ, FIELD_STR, subject_atom_parser },
|
|
|
|
[ATOM_BODY] = { "body", SOURCE_OBJ, FIELD_STR, body_atom_parser },
|
|
|
|
[ATOM_TRAILERS] = { "trailers", SOURCE_OBJ, FIELD_STR, trailers_atom_parser },
|
|
|
|
[ATOM_CONTENTS] = { "contents", SOURCE_OBJ, FIELD_STR, contents_atom_parser },
|
2023-06-05 02:22:47 +08:00
|
|
|
[ATOM_SIGNATURE] = { "signature", SOURCE_OBJ, FIELD_STR, signature_atom_parser },
|
ref-filter: add %(raw) atom
Add new formatting option `%(raw)`, which will print the raw
object data without any changes. It will help further to migrate
all cat-file formatting logic from cat-file to ref-filter.
The raw data of blob, tree objects may contain '\0', but most of
the logic in `ref-filter` depends on the output of the atom being
text (specifically, no embedded NULs in it).
E.g. `quote_formatting()` use `strbuf_addstr()` or `*._quote_buf()`
add the data to the buffer. The raw data of a tree object is
`100644 one\0...`, only the `100644 one` will be added to the buffer,
which is incorrect.
Therefore, we need to find a way to record the length of the
atom_value's member `s`. Although strbuf can already record the
string and its length, if we want to replace the type of atom_value's
member `s` with strbuf, many places in ref-filter that are filled
with dynamically allocated mermory in `v->s` are not easy to replace.
At the same time, we need to check if `v->s == NULL` in
populate_value(), and strbuf cannot easily distinguish NULL and empty
strings, but c-style "const char *" can do it. So add a new member in
`struct atom_value`: `s_size`, which can record raw object size, it
can help us add raw object data to the buffer or compare two buffers
which contain raw object data.
Note that `--format=%(raw)` cannot be used with `--python`, `--shell`,
`--tcl`, and `--perl` because if the binary raw data is passed to a
variable in such languages, these may not support arbitrary binary data
in their string variable type.
Reviewed-by: Jacob Keller <jacob.keller@gmail.com>
Mentored-by: Christian Couder <christian.couder@gmail.com>
Mentored-by: Hariom Verma <hariom18599@gmail.com>
Helped-by: Bagas Sanjaya <bagasdotme@gmail.com>
Helped-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Helped-by: Felipe Contreras <felipe.contreras@gmail.com>
Helped-by: Phillip Wood <phillip.wood@dunelm.org.uk>
Helped-by: Junio C Hamano <gitster@pobox.com>
Based-on-patch-by: Olga Telezhnaya <olyatelezhnaya@gmail.com>
Signed-off-by: ZheNing Hu <adlternative@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2021-07-26 11:26:47 +08:00
|
|
|
[ATOM_RAW] = { "raw", SOURCE_OBJ, FIELD_STR, raw_atom_parser },
|
2021-05-13 23:15:38 +08:00
|
|
|
[ATOM_UPSTREAM] = { "upstream", SOURCE_NONE, FIELD_STR, remote_ref_atom_parser },
|
|
|
|
[ATOM_PUSH] = { "push", SOURCE_NONE, FIELD_STR, remote_ref_atom_parser },
|
|
|
|
[ATOM_SYMREF] = { "symref", SOURCE_NONE, FIELD_STR, refname_atom_parser },
|
|
|
|
[ATOM_FLAG] = { "flag", SOURCE_NONE },
|
|
|
|
[ATOM_HEAD] = { "HEAD", SOURCE_NONE, FIELD_STR, head_atom_parser },
|
|
|
|
[ATOM_COLOR] = { "color", SOURCE_NONE, FIELD_STR, color_atom_parser },
|
|
|
|
[ATOM_WORKTREEPATH] = { "worktreepath", SOURCE_NONE },
|
|
|
|
[ATOM_ALIGN] = { "align", SOURCE_NONE, FIELD_STR, align_atom_parser },
|
|
|
|
[ATOM_END] = { "end", SOURCE_NONE },
|
|
|
|
[ATOM_IF] = { "if", SOURCE_NONE, FIELD_STR, if_atom_parser },
|
|
|
|
[ATOM_THEN] = { "then", SOURCE_NONE },
|
|
|
|
[ATOM_ELSE] = { "else", SOURCE_NONE },
|
2021-07-26 11:26:50 +08:00
|
|
|
[ATOM_REST] = { "rest", SOURCE_NONE, FIELD_STR, rest_atom_parser },
|
for-each-ref: add ahead-behind format atom
The previous change implemented the ahead_behind() method, including an
algorithm to compute the ahead/behind values for a number of commit tips
relative to a number of commit bases. Now, integrate that algorithm as
part of 'git for-each-ref' hidden behind a new format atom,
ahead-behind. This naturally extends to 'git branch' and 'git tag'
builtins, as well.
This format allows specifying multiple bases, if so desired, and all
matching references are compared against all of those bases. For this
reason, failing to read a reference provided from these atoms results in
an error.
In order to translate the ahead_behind() method information to the
format output code in ref-filter.c, we must populate arrays of
ahead_behind_count structs. In struct ref_array, we store the full array
that will be passed to ahead_behind(). In struct ref_array_item, we
store an array of pointers that point to the relvant items within the
full array. In this way, we can pull all relevant ahead/behind values
directly when formatting output for a specific item. It also ensures the
lifetime of the ahead_behind_count structs matches the time that the
array is being used.
Add specific tests of the ahead/behind counts in t6600-test-reach.sh, as
it has an interesting repository shape. In particular, its merging
strategy and its use of different commit-graphs would demonstrate over-
counting if the ahead_behind() method did not already account for that
possibility.
Also add tests for the specific for-each-ref, branch, and tag builtins.
In the case of 'git tag', there are intersting cases that happen when
some of the selected tips are not commits. This requires careful logic
around commits_nr in the second loop of filter_ahead_behind(). Also, the
test in t7004 is carefully located to avoid being dependent on the GPG
prereq. It also avoids using the test_commit helper, as that will add
ticks to the time and disrupt the expected timestamps in later tag
tests.
Also add performance tests in a new p1300-graph-walks.sh script. This
will be useful for more uses in the future, but for now compare the
ahead-behind counting algorithm in 'git for-each-ref' to the naive
implementation by running 'git rev-list --count' processes for each
input.
For the Git source code repository, the improvement is already obvious:
Test this tree
---------------------------------------------------------------
1500.2: ahead-behind counts: git for-each-ref 0.07(0.07+0.00)
1500.3: ahead-behind counts: git branch 0.07(0.06+0.00)
1500.4: ahead-behind counts: git tag 0.07(0.06+0.00)
1500.5: ahead-behind counts: git rev-list 1.32(1.04+0.27)
But the standard performance benchmark is the Linux kernel repository,
which demosntrates a significant improvement:
Test this tree
---------------------------------------------------------------
1500.2: ahead-behind counts: git for-each-ref 0.27(0.24+0.02)
1500.3: ahead-behind counts: git branch 0.27(0.24+0.03)
1500.4: ahead-behind counts: git tag 0.28(0.27+0.01)
1500.5: ahead-behind counts: git rev-list 4.57(4.03+0.54)
The 'git rev-list' test exists in this change as a demonstration, but it
will be removed in the next change to avoid wasting time on this
comparison.
Signed-off-by: Derrick Stolee <derrickstolee@github.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2023-03-20 19:26:54 +08:00
|
|
|
[ATOM_AHEADBEHIND] = { "ahead-behind", SOURCE_OTHER, FIELD_STR, ahead_behind_atom_parser },
|
2019-02-16 19:24:41 +08:00
|
|
|
/*
|
|
|
|
* Please update $__git_ref_fieldlist in git-completion.bash
|
|
|
|
* when you add new atoms
|
|
|
|
*/
|
2015-06-14 03:37:27 +08:00
|
|
|
};
|
|
|
|
|
2021-09-27 20:54:25 +08:00
|
|
|
#define REF_FORMATTING_STATE_INIT { 0 }
|
2015-09-10 23:48:18 +08:00
|
|
|
|
|
|
|
struct ref_formatting_stack {
|
|
|
|
struct ref_formatting_stack *prev;
|
|
|
|
struct strbuf output;
|
2017-01-10 16:49:34 +08:00
|
|
|
void (*at_end)(struct ref_formatting_stack **stack);
|
2015-09-11 23:03:07 +08:00
|
|
|
void *at_end_data;
|
2015-09-10 23:48:18 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
struct ref_formatting_state {
|
|
|
|
int quote_style;
|
|
|
|
struct ref_formatting_stack *stack;
|
|
|
|
};
|
|
|
|
|
2015-08-22 11:39:37 +08:00
|
|
|
struct atom_value {
|
|
|
|
const char *s;
|
ref-filter: add %(raw) atom
Add new formatting option `%(raw)`, which will print the raw
object data without any changes. It will help further to migrate
all cat-file formatting logic from cat-file to ref-filter.
The raw data of blob, tree objects may contain '\0', but most of
the logic in `ref-filter` depends on the output of the atom being
text (specifically, no embedded NULs in it).
E.g. `quote_formatting()` use `strbuf_addstr()` or `*._quote_buf()`
add the data to the buffer. The raw data of a tree object is
`100644 one\0...`, only the `100644 one` will be added to the buffer,
which is incorrect.
Therefore, we need to find a way to record the length of the
atom_value's member `s`. Although strbuf can already record the
string and its length, if we want to replace the type of atom_value's
member `s` with strbuf, many places in ref-filter that are filled
with dynamically allocated mermory in `v->s` are not easy to replace.
At the same time, we need to check if `v->s == NULL` in
populate_value(), and strbuf cannot easily distinguish NULL and empty
strings, but c-style "const char *" can do it. So add a new member in
`struct atom_value`: `s_size`, which can record raw object size, it
can help us add raw object data to the buffer or compare two buffers
which contain raw object data.
Note that `--format=%(raw)` cannot be used with `--python`, `--shell`,
`--tcl`, and `--perl` because if the binary raw data is passed to a
variable in such languages, these may not support arbitrary binary data
in their string variable type.
Reviewed-by: Jacob Keller <jacob.keller@gmail.com>
Mentored-by: Christian Couder <christian.couder@gmail.com>
Mentored-by: Hariom Verma <hariom18599@gmail.com>
Helped-by: Bagas Sanjaya <bagasdotme@gmail.com>
Helped-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Helped-by: Felipe Contreras <felipe.contreras@gmail.com>
Helped-by: Phillip Wood <phillip.wood@dunelm.org.uk>
Helped-by: Junio C Hamano <gitster@pobox.com>
Based-on-patch-by: Olga Telezhnaya <olyatelezhnaya@gmail.com>
Signed-off-by: ZheNing Hu <adlternative@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2021-07-26 11:26:47 +08:00
|
|
|
ssize_t s_size;
|
2018-03-29 20:49:45 +08:00
|
|
|
int (*handler)(struct atom_value *atomv, struct ref_formatting_state *state,
|
|
|
|
struct strbuf *err);
|
2017-04-21 04:52:09 +08:00
|
|
|
uintmax_t value; /* used for sorting when not FIELD_STR */
|
2017-01-10 16:49:35 +08:00
|
|
|
struct used_atom *atom;
|
2015-08-22 11:39:37 +08:00
|
|
|
};
|
|
|
|
|
ref-filter: add %(raw) atom
Add new formatting option `%(raw)`, which will print the raw
object data without any changes. It will help further to migrate
all cat-file formatting logic from cat-file to ref-filter.
The raw data of blob, tree objects may contain '\0', but most of
the logic in `ref-filter` depends on the output of the atom being
text (specifically, no embedded NULs in it).
E.g. `quote_formatting()` use `strbuf_addstr()` or `*._quote_buf()`
add the data to the buffer. The raw data of a tree object is
`100644 one\0...`, only the `100644 one` will be added to the buffer,
which is incorrect.
Therefore, we need to find a way to record the length of the
atom_value's member `s`. Although strbuf can already record the
string and its length, if we want to replace the type of atom_value's
member `s` with strbuf, many places in ref-filter that are filled
with dynamically allocated mermory in `v->s` are not easy to replace.
At the same time, we need to check if `v->s == NULL` in
populate_value(), and strbuf cannot easily distinguish NULL and empty
strings, but c-style "const char *" can do it. So add a new member in
`struct atom_value`: `s_size`, which can record raw object size, it
can help us add raw object data to the buffer or compare two buffers
which contain raw object data.
Note that `--format=%(raw)` cannot be used with `--python`, `--shell`,
`--tcl`, and `--perl` because if the binary raw data is passed to a
variable in such languages, these may not support arbitrary binary data
in their string variable type.
Reviewed-by: Jacob Keller <jacob.keller@gmail.com>
Mentored-by: Christian Couder <christian.couder@gmail.com>
Mentored-by: Hariom Verma <hariom18599@gmail.com>
Helped-by: Bagas Sanjaya <bagasdotme@gmail.com>
Helped-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Helped-by: Felipe Contreras <felipe.contreras@gmail.com>
Helped-by: Phillip Wood <phillip.wood@dunelm.org.uk>
Helped-by: Junio C Hamano <gitster@pobox.com>
Based-on-patch-by: Olga Telezhnaya <olyatelezhnaya@gmail.com>
Signed-off-by: ZheNing Hu <adlternative@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2021-07-26 11:26:47 +08:00
|
|
|
#define ATOM_SIZE_UNSPECIFIED (-1)
|
|
|
|
|
|
|
|
#define ATOM_VALUE_INIT { \
|
|
|
|
.s_size = ATOM_SIZE_UNSPECIFIED \
|
|
|
|
}
|
|
|
|
|
2015-06-14 03:37:27 +08:00
|
|
|
/*
|
|
|
|
* Used to parse format string and sort specifiers
|
|
|
|
*/
|
2021-07-26 11:26:49 +08:00
|
|
|
static int parse_ref_filter_atom(struct ref_format *format,
|
2018-03-29 20:49:45 +08:00
|
|
|
const char *atom, const char *ep,
|
|
|
|
struct strbuf *err)
|
2015-06-14 03:37:27 +08:00
|
|
|
{
|
|
|
|
const char *sp;
|
2016-02-18 02:06:12 +08:00
|
|
|
const char *arg;
|
2016-10-03 00:35:11 +08:00
|
|
|
int i, at, atom_len;
|
2015-06-14 03:37:27 +08:00
|
|
|
|
|
|
|
sp = atom;
|
|
|
|
if (*sp == '*' && sp < ep)
|
|
|
|
sp++; /* deref */
|
|
|
|
if (ep <= sp)
|
2018-03-29 20:49:45 +08:00
|
|
|
return strbuf_addf_ret(err, -1, _("malformed field name: %.*s"),
|
|
|
|
(int)(ep-atom), atom);
|
2015-06-14 03:37:27 +08:00
|
|
|
|
2016-10-03 00:35:11 +08:00
|
|
|
/*
|
|
|
|
* If the atom name has a colon, strip it and everything after
|
|
|
|
* it off - it specifies the format for this entry, and
|
|
|
|
* shouldn't be used for checking against the valid_atom
|
|
|
|
* table.
|
|
|
|
*/
|
|
|
|
arg = memchr(sp, ':', ep - sp);
|
|
|
|
atom_len = (arg ? arg : ep) - sp;
|
|
|
|
|
ref-filter: add %(raw) atom
Add new formatting option `%(raw)`, which will print the raw
object data without any changes. It will help further to migrate
all cat-file formatting logic from cat-file to ref-filter.
The raw data of blob, tree objects may contain '\0', but most of
the logic in `ref-filter` depends on the output of the atom being
text (specifically, no embedded NULs in it).
E.g. `quote_formatting()` use `strbuf_addstr()` or `*._quote_buf()`
add the data to the buffer. The raw data of a tree object is
`100644 one\0...`, only the `100644 one` will be added to the buffer,
which is incorrect.
Therefore, we need to find a way to record the length of the
atom_value's member `s`. Although strbuf can already record the
string and its length, if we want to replace the type of atom_value's
member `s` with strbuf, many places in ref-filter that are filled
with dynamically allocated mermory in `v->s` are not easy to replace.
At the same time, we need to check if `v->s == NULL` in
populate_value(), and strbuf cannot easily distinguish NULL and empty
strings, but c-style "const char *" can do it. So add a new member in
`struct atom_value`: `s_size`, which can record raw object size, it
can help us add raw object data to the buffer or compare two buffers
which contain raw object data.
Note that `--format=%(raw)` cannot be used with `--python`, `--shell`,
`--tcl`, and `--perl` because if the binary raw data is passed to a
variable in such languages, these may not support arbitrary binary data
in their string variable type.
Reviewed-by: Jacob Keller <jacob.keller@gmail.com>
Mentored-by: Christian Couder <christian.couder@gmail.com>
Mentored-by: Hariom Verma <hariom18599@gmail.com>
Helped-by: Bagas Sanjaya <bagasdotme@gmail.com>
Helped-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Helped-by: Felipe Contreras <felipe.contreras@gmail.com>
Helped-by: Phillip Wood <phillip.wood@dunelm.org.uk>
Helped-by: Junio C Hamano <gitster@pobox.com>
Based-on-patch-by: Olga Telezhnaya <olyatelezhnaya@gmail.com>
Signed-off-by: ZheNing Hu <adlternative@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2021-07-26 11:26:47 +08:00
|
|
|
/* Do we have the atom already used elsewhere? */
|
|
|
|
for (i = 0; i < used_atom_cnt; i++) {
|
|
|
|
int len = strlen(used_atom[i].name);
|
|
|
|
if (len == ep - atom && !memcmp(used_atom[i].name, atom, len))
|
|
|
|
return i;
|
|
|
|
}
|
|
|
|
|
2015-06-14 03:37:27 +08:00
|
|
|
/* Is the atom a valid one? */
|
|
|
|
for (i = 0; i < ARRAY_SIZE(valid_atom); i++) {
|
|
|
|
int len = strlen(valid_atom[i].name);
|
2016-10-03 00:35:11 +08:00
|
|
|
if (len == atom_len && !memcmp(valid_atom[i].name, sp, len))
|
2015-06-14 03:37:27 +08:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (ARRAY_SIZE(valid_atom) <= i)
|
2018-03-29 20:49:45 +08:00
|
|
|
return strbuf_addf_ret(err, -1, _("unknown field name: %.*s"),
|
|
|
|
(int)(ep-atom), atom);
|
2018-11-14 20:27:25 +08:00
|
|
|
if (valid_atom[i].source != SOURCE_NONE && !have_git_dir())
|
|
|
|
return strbuf_addf_ret(err, -1,
|
|
|
|
_("not a git repository, but the field '%.*s' requires access to object data"),
|
|
|
|
(int)(ep-atom), atom);
|
2015-06-14 03:37:27 +08:00
|
|
|
|
|
|
|
/* Add it in, including the deref prefix */
|
|
|
|
at = used_atom_cnt;
|
|
|
|
used_atom_cnt++;
|
|
|
|
REALLOC_ARRAY(used_atom, used_atom_cnt);
|
2021-05-13 23:15:38 +08:00
|
|
|
used_atom[at].atom_type = i;
|
2016-02-18 02:06:11 +08:00
|
|
|
used_atom[at].name = xmemdupz(atom, ep - atom);
|
|
|
|
used_atom[at].type = valid_atom[i].cmp_type;
|
2018-07-17 16:22:57 +08:00
|
|
|
used_atom[at].source = valid_atom[i].source;
|
2018-07-17 16:22:57 +08:00
|
|
|
if (used_atom[at].source == SOURCE_OBJ) {
|
|
|
|
if (*atom == '*')
|
|
|
|
oi_deref.info.contentp = &oi_deref.content;
|
|
|
|
else
|
|
|
|
oi.info.contentp = &oi.content;
|
|
|
|
}
|
2017-10-03 00:10:34 +08:00
|
|
|
if (arg) {
|
2016-02-18 02:06:12 +08:00
|
|
|
arg = used_atom[at].name + (arg - atom) + 1;
|
2017-10-03 00:10:34 +08:00
|
|
|
if (!*arg) {
|
|
|
|
/*
|
|
|
|
* Treat empty sub-arguments list as NULL (i.e.,
|
|
|
|
* "%(atom:)" is equivalent to "%(atom)").
|
|
|
|
*/
|
|
|
|
arg = NULL;
|
|
|
|
}
|
|
|
|
}
|
2016-02-18 02:06:13 +08:00
|
|
|
memset(&used_atom[at].u, 0, sizeof(used_atom[at].u));
|
2018-03-29 20:49:45 +08:00
|
|
|
if (valid_atom[i].parser && valid_atom[i].parser(format, &used_atom[at], arg, err))
|
|
|
|
return -1;
|
2015-06-14 03:37:27 +08:00
|
|
|
if (*atom == '*')
|
|
|
|
need_tagged = 1;
|
2021-05-13 23:15:38 +08:00
|
|
|
if (i == ATOM_SYMREF)
|
2015-06-14 03:37:27 +08:00
|
|
|
need_symref = 1;
|
|
|
|
return at;
|
|
|
|
}
|
|
|
|
|
ref-filter: add %(raw) atom
Add new formatting option `%(raw)`, which will print the raw
object data without any changes. It will help further to migrate
all cat-file formatting logic from cat-file to ref-filter.
The raw data of blob, tree objects may contain '\0', but most of
the logic in `ref-filter` depends on the output of the atom being
text (specifically, no embedded NULs in it).
E.g. `quote_formatting()` use `strbuf_addstr()` or `*._quote_buf()`
add the data to the buffer. The raw data of a tree object is
`100644 one\0...`, only the `100644 one` will be added to the buffer,
which is incorrect.
Therefore, we need to find a way to record the length of the
atom_value's member `s`. Although strbuf can already record the
string and its length, if we want to replace the type of atom_value's
member `s` with strbuf, many places in ref-filter that are filled
with dynamically allocated mermory in `v->s` are not easy to replace.
At the same time, we need to check if `v->s == NULL` in
populate_value(), and strbuf cannot easily distinguish NULL and empty
strings, but c-style "const char *" can do it. So add a new member in
`struct atom_value`: `s_size`, which can record raw object size, it
can help us add raw object data to the buffer or compare two buffers
which contain raw object data.
Note that `--format=%(raw)` cannot be used with `--python`, `--shell`,
`--tcl`, and `--perl` because if the binary raw data is passed to a
variable in such languages, these may not support arbitrary binary data
in their string variable type.
Reviewed-by: Jacob Keller <jacob.keller@gmail.com>
Mentored-by: Christian Couder <christian.couder@gmail.com>
Mentored-by: Hariom Verma <hariom18599@gmail.com>
Helped-by: Bagas Sanjaya <bagasdotme@gmail.com>
Helped-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Helped-by: Felipe Contreras <felipe.contreras@gmail.com>
Helped-by: Phillip Wood <phillip.wood@dunelm.org.uk>
Helped-by: Junio C Hamano <gitster@pobox.com>
Based-on-patch-by: Olga Telezhnaya <olyatelezhnaya@gmail.com>
Signed-off-by: ZheNing Hu <adlternative@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2021-07-26 11:26:47 +08:00
|
|
|
static void quote_formatting(struct strbuf *s, const char *str, ssize_t len, int quote_style)
|
2015-09-10 23:48:20 +08:00
|
|
|
{
|
|
|
|
switch (quote_style) {
|
|
|
|
case QUOTE_NONE:
|
ref-filter: add %(raw) atom
Add new formatting option `%(raw)`, which will print the raw
object data without any changes. It will help further to migrate
all cat-file formatting logic from cat-file to ref-filter.
The raw data of blob, tree objects may contain '\0', but most of
the logic in `ref-filter` depends on the output of the atom being
text (specifically, no embedded NULs in it).
E.g. `quote_formatting()` use `strbuf_addstr()` or `*._quote_buf()`
add the data to the buffer. The raw data of a tree object is
`100644 one\0...`, only the `100644 one` will be added to the buffer,
which is incorrect.
Therefore, we need to find a way to record the length of the
atom_value's member `s`. Although strbuf can already record the
string and its length, if we want to replace the type of atom_value's
member `s` with strbuf, many places in ref-filter that are filled
with dynamically allocated mermory in `v->s` are not easy to replace.
At the same time, we need to check if `v->s == NULL` in
populate_value(), and strbuf cannot easily distinguish NULL and empty
strings, but c-style "const char *" can do it. So add a new member in
`struct atom_value`: `s_size`, which can record raw object size, it
can help us add raw object data to the buffer or compare two buffers
which contain raw object data.
Note that `--format=%(raw)` cannot be used with `--python`, `--shell`,
`--tcl`, and `--perl` because if the binary raw data is passed to a
variable in such languages, these may not support arbitrary binary data
in their string variable type.
Reviewed-by: Jacob Keller <jacob.keller@gmail.com>
Mentored-by: Christian Couder <christian.couder@gmail.com>
Mentored-by: Hariom Verma <hariom18599@gmail.com>
Helped-by: Bagas Sanjaya <bagasdotme@gmail.com>
Helped-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Helped-by: Felipe Contreras <felipe.contreras@gmail.com>
Helped-by: Phillip Wood <phillip.wood@dunelm.org.uk>
Helped-by: Junio C Hamano <gitster@pobox.com>
Based-on-patch-by: Olga Telezhnaya <olyatelezhnaya@gmail.com>
Signed-off-by: ZheNing Hu <adlternative@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2021-07-26 11:26:47 +08:00
|
|
|
if (len < 0)
|
|
|
|
strbuf_addstr(s, str);
|
|
|
|
else
|
|
|
|
strbuf_add(s, str, len);
|
2015-09-10 23:48:20 +08:00
|
|
|
break;
|
|
|
|
case QUOTE_SHELL:
|
|
|
|
sq_quote_buf(s, str);
|
|
|
|
break;
|
|
|
|
case QUOTE_PERL:
|
2021-07-26 11:26:48 +08:00
|
|
|
if (len < 0)
|
|
|
|
perl_quote_buf(s, str);
|
|
|
|
else
|
|
|
|
perl_quote_buf_with_len(s, str, len);
|
2015-09-10 23:48:20 +08:00
|
|
|
break;
|
|
|
|
case QUOTE_PYTHON:
|
|
|
|
python_quote_buf(s, str);
|
|
|
|
break;
|
|
|
|
case QUOTE_TCL:
|
|
|
|
tcl_quote_buf(s, str);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-03-29 20:49:45 +08:00
|
|
|
static int append_atom(struct atom_value *v, struct ref_formatting_state *state,
|
ref-filter: mark unused callback parameters
The ref-filter code uses virtual functions to handle specific atoms, but
many of the functions ignore some parameters:
- most atom parsers do not need the ref_format itself, unless they are
looking at centralized options like use_color, quote_style, etc.
- meta-atom handlers like append_atom(), align_atom_handler(), etc,
can't generate errors, so ignore their "err" parameter
- likewise, the handlers for then/else/end do not even need to look at
their atom_value, as the "if" handler put everything they need into
the ref_formatting_state stack
Since these functions all have to conform to virtual function
interfaces, we can't just drop the unused parameters, but must mark them
as UNUSED (to appease -Wunused-parameter).
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2023-02-24 14:39:06 +08:00
|
|
|
struct strbuf *err UNUSED)
|
2015-09-10 23:48:20 +08:00
|
|
|
{
|
2015-09-11 23:03:07 +08:00
|
|
|
/*
|
|
|
|
* Quote formatting is only done when the stack has a single
|
|
|
|
* element. Otherwise quote formatting is done on the
|
|
|
|
* element's entire output strbuf when the %(end) atom is
|
|
|
|
* encountered.
|
|
|
|
*/
|
|
|
|
if (!state->stack->prev)
|
ref-filter: add %(raw) atom
Add new formatting option `%(raw)`, which will print the raw
object data without any changes. It will help further to migrate
all cat-file formatting logic from cat-file to ref-filter.
The raw data of blob, tree objects may contain '\0', but most of
the logic in `ref-filter` depends on the output of the atom being
text (specifically, no embedded NULs in it).
E.g. `quote_formatting()` use `strbuf_addstr()` or `*._quote_buf()`
add the data to the buffer. The raw data of a tree object is
`100644 one\0...`, only the `100644 one` will be added to the buffer,
which is incorrect.
Therefore, we need to find a way to record the length of the
atom_value's member `s`. Although strbuf can already record the
string and its length, if we want to replace the type of atom_value's
member `s` with strbuf, many places in ref-filter that are filled
with dynamically allocated mermory in `v->s` are not easy to replace.
At the same time, we need to check if `v->s == NULL` in
populate_value(), and strbuf cannot easily distinguish NULL and empty
strings, but c-style "const char *" can do it. So add a new member in
`struct atom_value`: `s_size`, which can record raw object size, it
can help us add raw object data to the buffer or compare two buffers
which contain raw object data.
Note that `--format=%(raw)` cannot be used with `--python`, `--shell`,
`--tcl`, and `--perl` because if the binary raw data is passed to a
variable in such languages, these may not support arbitrary binary data
in their string variable type.
Reviewed-by: Jacob Keller <jacob.keller@gmail.com>
Mentored-by: Christian Couder <christian.couder@gmail.com>
Mentored-by: Hariom Verma <hariom18599@gmail.com>
Helped-by: Bagas Sanjaya <bagasdotme@gmail.com>
Helped-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Helped-by: Felipe Contreras <felipe.contreras@gmail.com>
Helped-by: Phillip Wood <phillip.wood@dunelm.org.uk>
Helped-by: Junio C Hamano <gitster@pobox.com>
Based-on-patch-by: Olga Telezhnaya <olyatelezhnaya@gmail.com>
Signed-off-by: ZheNing Hu <adlternative@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2021-07-26 11:26:47 +08:00
|
|
|
quote_formatting(&state->stack->output, v->s, v->s_size, state->quote_style);
|
|
|
|
else if (v->s_size < 0)
|
2015-09-11 23:03:07 +08:00
|
|
|
strbuf_addstr(&state->stack->output, v->s);
|
ref-filter: add %(raw) atom
Add new formatting option `%(raw)`, which will print the raw
object data without any changes. It will help further to migrate
all cat-file formatting logic from cat-file to ref-filter.
The raw data of blob, tree objects may contain '\0', but most of
the logic in `ref-filter` depends on the output of the atom being
text (specifically, no embedded NULs in it).
E.g. `quote_formatting()` use `strbuf_addstr()` or `*._quote_buf()`
add the data to the buffer. The raw data of a tree object is
`100644 one\0...`, only the `100644 one` will be added to the buffer,
which is incorrect.
Therefore, we need to find a way to record the length of the
atom_value's member `s`. Although strbuf can already record the
string and its length, if we want to replace the type of atom_value's
member `s` with strbuf, many places in ref-filter that are filled
with dynamically allocated mermory in `v->s` are not easy to replace.
At the same time, we need to check if `v->s == NULL` in
populate_value(), and strbuf cannot easily distinguish NULL and empty
strings, but c-style "const char *" can do it. So add a new member in
`struct atom_value`: `s_size`, which can record raw object size, it
can help us add raw object data to the buffer or compare two buffers
which contain raw object data.
Note that `--format=%(raw)` cannot be used with `--python`, `--shell`,
`--tcl`, and `--perl` because if the binary raw data is passed to a
variable in such languages, these may not support arbitrary binary data
in their string variable type.
Reviewed-by: Jacob Keller <jacob.keller@gmail.com>
Mentored-by: Christian Couder <christian.couder@gmail.com>
Mentored-by: Hariom Verma <hariom18599@gmail.com>
Helped-by: Bagas Sanjaya <bagasdotme@gmail.com>
Helped-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Helped-by: Felipe Contreras <felipe.contreras@gmail.com>
Helped-by: Phillip Wood <phillip.wood@dunelm.org.uk>
Helped-by: Junio C Hamano <gitster@pobox.com>
Based-on-patch-by: Olga Telezhnaya <olyatelezhnaya@gmail.com>
Signed-off-by: ZheNing Hu <adlternative@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2021-07-26 11:26:47 +08:00
|
|
|
else
|
|
|
|
strbuf_add(&state->stack->output, v->s, v->s_size);
|
2018-03-29 20:49:45 +08:00
|
|
|
return 0;
|
2015-09-10 23:48:20 +08:00
|
|
|
}
|
|
|
|
|
2015-09-10 23:48:18 +08:00
|
|
|
static void push_stack_element(struct ref_formatting_stack **stack)
|
|
|
|
{
|
|
|
|
struct ref_formatting_stack *s = xcalloc(1, sizeof(struct ref_formatting_stack));
|
|
|
|
|
|
|
|
strbuf_init(&s->output, 0);
|
|
|
|
s->prev = *stack;
|
|
|
|
*stack = s;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void pop_stack_element(struct ref_formatting_stack **stack)
|
|
|
|
{
|
|
|
|
struct ref_formatting_stack *current = *stack;
|
|
|
|
struct ref_formatting_stack *prev = current->prev;
|
|
|
|
|
|
|
|
if (prev)
|
|
|
|
strbuf_addbuf(&prev->output, ¤t->output);
|
|
|
|
strbuf_release(¤t->output);
|
|
|
|
free(current);
|
|
|
|
*stack = prev;
|
|
|
|
}
|
|
|
|
|
2017-01-10 16:49:34 +08:00
|
|
|
static void end_align_handler(struct ref_formatting_stack **stack)
|
2015-09-11 23:03:07 +08:00
|
|
|
{
|
2017-01-10 16:49:34 +08:00
|
|
|
struct ref_formatting_stack *cur = *stack;
|
|
|
|
struct align *align = (struct align *)cur->at_end_data;
|
2015-09-11 23:03:07 +08:00
|
|
|
struct strbuf s = STRBUF_INIT;
|
|
|
|
|
2017-01-10 16:49:34 +08:00
|
|
|
strbuf_utf8_align(&s, align->position, align->width, cur->output.buf);
|
|
|
|
strbuf_swap(&cur->output, &s);
|
2015-09-11 23:03:07 +08:00
|
|
|
strbuf_release(&s);
|
|
|
|
}
|
|
|
|
|
2018-03-29 20:49:45 +08:00
|
|
|
static int align_atom_handler(struct atom_value *atomv, struct ref_formatting_state *state,
|
ref-filter: mark unused callback parameters
The ref-filter code uses virtual functions to handle specific atoms, but
many of the functions ignore some parameters:
- most atom parsers do not need the ref_format itself, unless they are
looking at centralized options like use_color, quote_style, etc.
- meta-atom handlers like append_atom(), align_atom_handler(), etc,
can't generate errors, so ignore their "err" parameter
- likewise, the handlers for then/else/end do not even need to look at
their atom_value, as the "if" handler put everything they need into
the ref_formatting_state stack
Since these functions all have to conform to virtual function
interfaces, we can't just drop the unused parameters, but must mark them
as UNUSED (to appease -Wunused-parameter).
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2023-02-24 14:39:06 +08:00
|
|
|
struct strbuf *err UNUSED)
|
2015-09-11 23:03:07 +08:00
|
|
|
{
|
2018-02-15 02:59:46 +08:00
|
|
|
struct ref_formatting_stack *new_stack;
|
2015-09-11 23:03:07 +08:00
|
|
|
|
|
|
|
push_stack_element(&state->stack);
|
2018-02-15 02:59:46 +08:00
|
|
|
new_stack = state->stack;
|
|
|
|
new_stack->at_end = end_align_handler;
|
|
|
|
new_stack->at_end_data = &atomv->atom->u.align;
|
2018-03-29 20:49:45 +08:00
|
|
|
return 0;
|
2015-09-11 23:03:07 +08:00
|
|
|
}
|
|
|
|
|
2017-01-10 16:49:34 +08:00
|
|
|
static void if_then_else_handler(struct ref_formatting_stack **stack)
|
|
|
|
{
|
|
|
|
struct ref_formatting_stack *cur = *stack;
|
|
|
|
struct ref_formatting_stack *prev = cur->prev;
|
|
|
|
struct if_then_else *if_then_else = (struct if_then_else *)cur->at_end_data;
|
|
|
|
|
|
|
|
if (!if_then_else->then_atom_seen)
|
2022-01-06 04:02:23 +08:00
|
|
|
die(_("format: %%(%s) atom used without a %%(%s) atom"), "if", "then");
|
2017-01-10 16:49:34 +08:00
|
|
|
|
|
|
|
if (if_then_else->else_atom_seen) {
|
|
|
|
/*
|
|
|
|
* There is an %(else) atom: we need to drop one state from the
|
|
|
|
* stack, either the %(else) branch if the condition is satisfied, or
|
|
|
|
* the %(then) branch if it isn't.
|
|
|
|
*/
|
|
|
|
if (if_then_else->condition_satisfied) {
|
|
|
|
strbuf_reset(&cur->output);
|
|
|
|
pop_stack_element(&cur);
|
|
|
|
} else {
|
|
|
|
strbuf_swap(&cur->output, &prev->output);
|
|
|
|
strbuf_reset(&cur->output);
|
|
|
|
pop_stack_element(&cur);
|
|
|
|
}
|
|
|
|
} else if (!if_then_else->condition_satisfied) {
|
|
|
|
/*
|
|
|
|
* No %(else) atom: just drop the %(then) branch if the
|
|
|
|
* condition is not satisfied.
|
|
|
|
*/
|
|
|
|
strbuf_reset(&cur->output);
|
|
|
|
}
|
|
|
|
|
|
|
|
*stack = cur;
|
|
|
|
free(if_then_else);
|
|
|
|
}
|
|
|
|
|
2018-03-29 20:49:45 +08:00
|
|
|
static int if_atom_handler(struct atom_value *atomv, struct ref_formatting_state *state,
|
ref-filter: mark unused callback parameters
The ref-filter code uses virtual functions to handle specific atoms, but
many of the functions ignore some parameters:
- most atom parsers do not need the ref_format itself, unless they are
looking at centralized options like use_color, quote_style, etc.
- meta-atom handlers like append_atom(), align_atom_handler(), etc,
can't generate errors, so ignore their "err" parameter
- likewise, the handlers for then/else/end do not even need to look at
their atom_value, as the "if" handler put everything they need into
the ref_formatting_state stack
Since these functions all have to conform to virtual function
interfaces, we can't just drop the unused parameters, but must mark them
as UNUSED (to appease -Wunused-parameter).
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2023-02-24 14:39:06 +08:00
|
|
|
struct strbuf *err UNUSED)
|
2017-01-10 16:49:34 +08:00
|
|
|
{
|
2018-02-15 02:59:46 +08:00
|
|
|
struct ref_formatting_stack *new_stack;
|
2021-03-06 19:26:19 +08:00
|
|
|
struct if_then_else *if_then_else = xcalloc(1,
|
|
|
|
sizeof(struct if_then_else));
|
2017-01-10 16:49:34 +08:00
|
|
|
|
2017-01-10 16:49:36 +08:00
|
|
|
if_then_else->str = atomv->atom->u.if_then_else.str;
|
|
|
|
if_then_else->cmp_status = atomv->atom->u.if_then_else.cmp_status;
|
|
|
|
|
2017-01-10 16:49:34 +08:00
|
|
|
push_stack_element(&state->stack);
|
2018-02-15 02:59:46 +08:00
|
|
|
new_stack = state->stack;
|
|
|
|
new_stack->at_end = if_then_else_handler;
|
|
|
|
new_stack->at_end_data = if_then_else;
|
2018-03-29 20:49:45 +08:00
|
|
|
return 0;
|
2017-01-10 16:49:34 +08:00
|
|
|
}
|
|
|
|
|
ref-filter: add %(raw) atom
Add new formatting option `%(raw)`, which will print the raw
object data without any changes. It will help further to migrate
all cat-file formatting logic from cat-file to ref-filter.
The raw data of blob, tree objects may contain '\0', but most of
the logic in `ref-filter` depends on the output of the atom being
text (specifically, no embedded NULs in it).
E.g. `quote_formatting()` use `strbuf_addstr()` or `*._quote_buf()`
add the data to the buffer. The raw data of a tree object is
`100644 one\0...`, only the `100644 one` will be added to the buffer,
which is incorrect.
Therefore, we need to find a way to record the length of the
atom_value's member `s`. Although strbuf can already record the
string and its length, if we want to replace the type of atom_value's
member `s` with strbuf, many places in ref-filter that are filled
with dynamically allocated mermory in `v->s` are not easy to replace.
At the same time, we need to check if `v->s == NULL` in
populate_value(), and strbuf cannot easily distinguish NULL and empty
strings, but c-style "const char *" can do it. So add a new member in
`struct atom_value`: `s_size`, which can record raw object size, it
can help us add raw object data to the buffer or compare two buffers
which contain raw object data.
Note that `--format=%(raw)` cannot be used with `--python`, `--shell`,
`--tcl`, and `--perl` because if the binary raw data is passed to a
variable in such languages, these may not support arbitrary binary data
in their string variable type.
Reviewed-by: Jacob Keller <jacob.keller@gmail.com>
Mentored-by: Christian Couder <christian.couder@gmail.com>
Mentored-by: Hariom Verma <hariom18599@gmail.com>
Helped-by: Bagas Sanjaya <bagasdotme@gmail.com>
Helped-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Helped-by: Felipe Contreras <felipe.contreras@gmail.com>
Helped-by: Phillip Wood <phillip.wood@dunelm.org.uk>
Helped-by: Junio C Hamano <gitster@pobox.com>
Based-on-patch-by: Olga Telezhnaya <olyatelezhnaya@gmail.com>
Signed-off-by: ZheNing Hu <adlternative@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2021-07-26 11:26:47 +08:00
|
|
|
static int is_empty(struct strbuf *buf)
|
2017-01-10 16:49:34 +08:00
|
|
|
{
|
ref-filter: add %(raw) atom
Add new formatting option `%(raw)`, which will print the raw
object data without any changes. It will help further to migrate
all cat-file formatting logic from cat-file to ref-filter.
The raw data of blob, tree objects may contain '\0', but most of
the logic in `ref-filter` depends on the output of the atom being
text (specifically, no embedded NULs in it).
E.g. `quote_formatting()` use `strbuf_addstr()` or `*._quote_buf()`
add the data to the buffer. The raw data of a tree object is
`100644 one\0...`, only the `100644 one` will be added to the buffer,
which is incorrect.
Therefore, we need to find a way to record the length of the
atom_value's member `s`. Although strbuf can already record the
string and its length, if we want to replace the type of atom_value's
member `s` with strbuf, many places in ref-filter that are filled
with dynamically allocated mermory in `v->s` are not easy to replace.
At the same time, we need to check if `v->s == NULL` in
populate_value(), and strbuf cannot easily distinguish NULL and empty
strings, but c-style "const char *" can do it. So add a new member in
`struct atom_value`: `s_size`, which can record raw object size, it
can help us add raw object data to the buffer or compare two buffers
which contain raw object data.
Note that `--format=%(raw)` cannot be used with `--python`, `--shell`,
`--tcl`, and `--perl` because if the binary raw data is passed to a
variable in such languages, these may not support arbitrary binary data
in their string variable type.
Reviewed-by: Jacob Keller <jacob.keller@gmail.com>
Mentored-by: Christian Couder <christian.couder@gmail.com>
Mentored-by: Hariom Verma <hariom18599@gmail.com>
Helped-by: Bagas Sanjaya <bagasdotme@gmail.com>
Helped-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Helped-by: Felipe Contreras <felipe.contreras@gmail.com>
Helped-by: Phillip Wood <phillip.wood@dunelm.org.uk>
Helped-by: Junio C Hamano <gitster@pobox.com>
Based-on-patch-by: Olga Telezhnaya <olyatelezhnaya@gmail.com>
Signed-off-by: ZheNing Hu <adlternative@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2021-07-26 11:26:47 +08:00
|
|
|
const char *cur = buf->buf;
|
|
|
|
const char *end = buf->buf + buf->len;
|
|
|
|
|
|
|
|
while (cur != end && (isspace(*cur)))
|
|
|
|
cur++;
|
|
|
|
|
|
|
|
return cur == end;
|
|
|
|
}
|
2017-01-10 16:49:34 +08:00
|
|
|
|
ref-filter: mark unused callback parameters
The ref-filter code uses virtual functions to handle specific atoms, but
many of the functions ignore some parameters:
- most atom parsers do not need the ref_format itself, unless they are
looking at centralized options like use_color, quote_style, etc.
- meta-atom handlers like append_atom(), align_atom_handler(), etc,
can't generate errors, so ignore their "err" parameter
- likewise, the handlers for then/else/end do not even need to look at
their atom_value, as the "if" handler put everything they need into
the ref_formatting_state stack
Since these functions all have to conform to virtual function
interfaces, we can't just drop the unused parameters, but must mark them
as UNUSED (to appease -Wunused-parameter).
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2023-02-24 14:39:06 +08:00
|
|
|
static int then_atom_handler(struct atom_value *atomv UNUSED,
|
|
|
|
struct ref_formatting_state *state,
|
2018-03-29 20:49:45 +08:00
|
|
|
struct strbuf *err)
|
2017-01-10 16:49:34 +08:00
|
|
|
{
|
|
|
|
struct ref_formatting_stack *cur = state->stack;
|
|
|
|
struct if_then_else *if_then_else = NULL;
|
ref-filter: add %(raw) atom
Add new formatting option `%(raw)`, which will print the raw
object data without any changes. It will help further to migrate
all cat-file formatting logic from cat-file to ref-filter.
The raw data of blob, tree objects may contain '\0', but most of
the logic in `ref-filter` depends on the output of the atom being
text (specifically, no embedded NULs in it).
E.g. `quote_formatting()` use `strbuf_addstr()` or `*._quote_buf()`
add the data to the buffer. The raw data of a tree object is
`100644 one\0...`, only the `100644 one` will be added to the buffer,
which is incorrect.
Therefore, we need to find a way to record the length of the
atom_value's member `s`. Although strbuf can already record the
string and its length, if we want to replace the type of atom_value's
member `s` with strbuf, many places in ref-filter that are filled
with dynamically allocated mermory in `v->s` are not easy to replace.
At the same time, we need to check if `v->s == NULL` in
populate_value(), and strbuf cannot easily distinguish NULL and empty
strings, but c-style "const char *" can do it. So add a new member in
`struct atom_value`: `s_size`, which can record raw object size, it
can help us add raw object data to the buffer or compare two buffers
which contain raw object data.
Note that `--format=%(raw)` cannot be used with `--python`, `--shell`,
`--tcl`, and `--perl` because if the binary raw data is passed to a
variable in such languages, these may not support arbitrary binary data
in their string variable type.
Reviewed-by: Jacob Keller <jacob.keller@gmail.com>
Mentored-by: Christian Couder <christian.couder@gmail.com>
Mentored-by: Hariom Verma <hariom18599@gmail.com>
Helped-by: Bagas Sanjaya <bagasdotme@gmail.com>
Helped-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Helped-by: Felipe Contreras <felipe.contreras@gmail.com>
Helped-by: Phillip Wood <phillip.wood@dunelm.org.uk>
Helped-by: Junio C Hamano <gitster@pobox.com>
Based-on-patch-by: Olga Telezhnaya <olyatelezhnaya@gmail.com>
Signed-off-by: ZheNing Hu <adlternative@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2021-07-26 11:26:47 +08:00
|
|
|
size_t str_len = 0;
|
2017-01-10 16:49:34 +08:00
|
|
|
|
|
|
|
if (cur->at_end == if_then_else_handler)
|
|
|
|
if_then_else = (struct if_then_else *)cur->at_end_data;
|
|
|
|
if (!if_then_else)
|
2022-01-06 04:02:23 +08:00
|
|
|
return strbuf_addf_ret(err, -1, _("format: %%(%s) atom used without a %%(%s) atom"), "then", "if");
|
2017-01-10 16:49:34 +08:00
|
|
|
if (if_then_else->then_atom_seen)
|
2018-03-29 20:49:45 +08:00
|
|
|
return strbuf_addf_ret(err, -1, _("format: %%(then) atom used more than once"));
|
2017-01-10 16:49:34 +08:00
|
|
|
if (if_then_else->else_atom_seen)
|
2018-03-29 20:49:45 +08:00
|
|
|
return strbuf_addf_ret(err, -1, _("format: %%(then) atom used after %%(else)"));
|
2017-01-10 16:49:34 +08:00
|
|
|
if_then_else->then_atom_seen = 1;
|
ref-filter: add %(raw) atom
Add new formatting option `%(raw)`, which will print the raw
object data without any changes. It will help further to migrate
all cat-file formatting logic from cat-file to ref-filter.
The raw data of blob, tree objects may contain '\0', but most of
the logic in `ref-filter` depends on the output of the atom being
text (specifically, no embedded NULs in it).
E.g. `quote_formatting()` use `strbuf_addstr()` or `*._quote_buf()`
add the data to the buffer. The raw data of a tree object is
`100644 one\0...`, only the `100644 one` will be added to the buffer,
which is incorrect.
Therefore, we need to find a way to record the length of the
atom_value's member `s`. Although strbuf can already record the
string and its length, if we want to replace the type of atom_value's
member `s` with strbuf, many places in ref-filter that are filled
with dynamically allocated mermory in `v->s` are not easy to replace.
At the same time, we need to check if `v->s == NULL` in
populate_value(), and strbuf cannot easily distinguish NULL and empty
strings, but c-style "const char *" can do it. So add a new member in
`struct atom_value`: `s_size`, which can record raw object size, it
can help us add raw object data to the buffer or compare two buffers
which contain raw object data.
Note that `--format=%(raw)` cannot be used with `--python`, `--shell`,
`--tcl`, and `--perl` because if the binary raw data is passed to a
variable in such languages, these may not support arbitrary binary data
in their string variable type.
Reviewed-by: Jacob Keller <jacob.keller@gmail.com>
Mentored-by: Christian Couder <christian.couder@gmail.com>
Mentored-by: Hariom Verma <hariom18599@gmail.com>
Helped-by: Bagas Sanjaya <bagasdotme@gmail.com>
Helped-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Helped-by: Felipe Contreras <felipe.contreras@gmail.com>
Helped-by: Phillip Wood <phillip.wood@dunelm.org.uk>
Helped-by: Junio C Hamano <gitster@pobox.com>
Based-on-patch-by: Olga Telezhnaya <olyatelezhnaya@gmail.com>
Signed-off-by: ZheNing Hu <adlternative@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2021-07-26 11:26:47 +08:00
|
|
|
if (if_then_else->str)
|
|
|
|
str_len = strlen(if_then_else->str);
|
2017-01-10 16:49:34 +08:00
|
|
|
/*
|
2017-01-10 16:49:36 +08:00
|
|
|
* If the 'equals' or 'notequals' attribute is used then
|
|
|
|
* perform the required comparison. If not, only non-empty
|
|
|
|
* strings satisfy the 'if' condition.
|
2017-01-10 16:49:34 +08:00
|
|
|
*/
|
2017-01-10 16:49:36 +08:00
|
|
|
if (if_then_else->cmp_status == COMPARE_EQUAL) {
|
ref-filter: add %(raw) atom
Add new formatting option `%(raw)`, which will print the raw
object data without any changes. It will help further to migrate
all cat-file formatting logic from cat-file to ref-filter.
The raw data of blob, tree objects may contain '\0', but most of
the logic in `ref-filter` depends on the output of the atom being
text (specifically, no embedded NULs in it).
E.g. `quote_formatting()` use `strbuf_addstr()` or `*._quote_buf()`
add the data to the buffer. The raw data of a tree object is
`100644 one\0...`, only the `100644 one` will be added to the buffer,
which is incorrect.
Therefore, we need to find a way to record the length of the
atom_value's member `s`. Although strbuf can already record the
string and its length, if we want to replace the type of atom_value's
member `s` with strbuf, many places in ref-filter that are filled
with dynamically allocated mermory in `v->s` are not easy to replace.
At the same time, we need to check if `v->s == NULL` in
populate_value(), and strbuf cannot easily distinguish NULL and empty
strings, but c-style "const char *" can do it. So add a new member in
`struct atom_value`: `s_size`, which can record raw object size, it
can help us add raw object data to the buffer or compare two buffers
which contain raw object data.
Note that `--format=%(raw)` cannot be used with `--python`, `--shell`,
`--tcl`, and `--perl` because if the binary raw data is passed to a
variable in such languages, these may not support arbitrary binary data
in their string variable type.
Reviewed-by: Jacob Keller <jacob.keller@gmail.com>
Mentored-by: Christian Couder <christian.couder@gmail.com>
Mentored-by: Hariom Verma <hariom18599@gmail.com>
Helped-by: Bagas Sanjaya <bagasdotme@gmail.com>
Helped-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Helped-by: Felipe Contreras <felipe.contreras@gmail.com>
Helped-by: Phillip Wood <phillip.wood@dunelm.org.uk>
Helped-by: Junio C Hamano <gitster@pobox.com>
Based-on-patch-by: Olga Telezhnaya <olyatelezhnaya@gmail.com>
Signed-off-by: ZheNing Hu <adlternative@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2021-07-26 11:26:47 +08:00
|
|
|
if (str_len == cur->output.len &&
|
|
|
|
!memcmp(if_then_else->str, cur->output.buf, cur->output.len))
|
2017-01-10 16:49:36 +08:00
|
|
|
if_then_else->condition_satisfied = 1;
|
|
|
|
} else if (if_then_else->cmp_status == COMPARE_UNEQUAL) {
|
ref-filter: add %(raw) atom
Add new formatting option `%(raw)`, which will print the raw
object data without any changes. It will help further to migrate
all cat-file formatting logic from cat-file to ref-filter.
The raw data of blob, tree objects may contain '\0', but most of
the logic in `ref-filter` depends on the output of the atom being
text (specifically, no embedded NULs in it).
E.g. `quote_formatting()` use `strbuf_addstr()` or `*._quote_buf()`
add the data to the buffer. The raw data of a tree object is
`100644 one\0...`, only the `100644 one` will be added to the buffer,
which is incorrect.
Therefore, we need to find a way to record the length of the
atom_value's member `s`. Although strbuf can already record the
string and its length, if we want to replace the type of atom_value's
member `s` with strbuf, many places in ref-filter that are filled
with dynamically allocated mermory in `v->s` are not easy to replace.
At the same time, we need to check if `v->s == NULL` in
populate_value(), and strbuf cannot easily distinguish NULL and empty
strings, but c-style "const char *" can do it. So add a new member in
`struct atom_value`: `s_size`, which can record raw object size, it
can help us add raw object data to the buffer or compare two buffers
which contain raw object data.
Note that `--format=%(raw)` cannot be used with `--python`, `--shell`,
`--tcl`, and `--perl` because if the binary raw data is passed to a
variable in such languages, these may not support arbitrary binary data
in their string variable type.
Reviewed-by: Jacob Keller <jacob.keller@gmail.com>
Mentored-by: Christian Couder <christian.couder@gmail.com>
Mentored-by: Hariom Verma <hariom18599@gmail.com>
Helped-by: Bagas Sanjaya <bagasdotme@gmail.com>
Helped-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Helped-by: Felipe Contreras <felipe.contreras@gmail.com>
Helped-by: Phillip Wood <phillip.wood@dunelm.org.uk>
Helped-by: Junio C Hamano <gitster@pobox.com>
Based-on-patch-by: Olga Telezhnaya <olyatelezhnaya@gmail.com>
Signed-off-by: ZheNing Hu <adlternative@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2021-07-26 11:26:47 +08:00
|
|
|
if (str_len != cur->output.len ||
|
|
|
|
memcmp(if_then_else->str, cur->output.buf, cur->output.len))
|
2017-01-10 16:49:36 +08:00
|
|
|
if_then_else->condition_satisfied = 1;
|
ref-filter: add %(raw) atom
Add new formatting option `%(raw)`, which will print the raw
object data without any changes. It will help further to migrate
all cat-file formatting logic from cat-file to ref-filter.
The raw data of blob, tree objects may contain '\0', but most of
the logic in `ref-filter` depends on the output of the atom being
text (specifically, no embedded NULs in it).
E.g. `quote_formatting()` use `strbuf_addstr()` or `*._quote_buf()`
add the data to the buffer. The raw data of a tree object is
`100644 one\0...`, only the `100644 one` will be added to the buffer,
which is incorrect.
Therefore, we need to find a way to record the length of the
atom_value's member `s`. Although strbuf can already record the
string and its length, if we want to replace the type of atom_value's
member `s` with strbuf, many places in ref-filter that are filled
with dynamically allocated mermory in `v->s` are not easy to replace.
At the same time, we need to check if `v->s == NULL` in
populate_value(), and strbuf cannot easily distinguish NULL and empty
strings, but c-style "const char *" can do it. So add a new member in
`struct atom_value`: `s_size`, which can record raw object size, it
can help us add raw object data to the buffer or compare two buffers
which contain raw object data.
Note that `--format=%(raw)` cannot be used with `--python`, `--shell`,
`--tcl`, and `--perl` because if the binary raw data is passed to a
variable in such languages, these may not support arbitrary binary data
in their string variable type.
Reviewed-by: Jacob Keller <jacob.keller@gmail.com>
Mentored-by: Christian Couder <christian.couder@gmail.com>
Mentored-by: Hariom Verma <hariom18599@gmail.com>
Helped-by: Bagas Sanjaya <bagasdotme@gmail.com>
Helped-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Helped-by: Felipe Contreras <felipe.contreras@gmail.com>
Helped-by: Phillip Wood <phillip.wood@dunelm.org.uk>
Helped-by: Junio C Hamano <gitster@pobox.com>
Based-on-patch-by: Olga Telezhnaya <olyatelezhnaya@gmail.com>
Signed-off-by: ZheNing Hu <adlternative@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2021-07-26 11:26:47 +08:00
|
|
|
} else if (cur->output.len && !is_empty(&cur->output))
|
2017-01-10 16:49:34 +08:00
|
|
|
if_then_else->condition_satisfied = 1;
|
|
|
|
strbuf_reset(&cur->output);
|
2018-03-29 20:49:45 +08:00
|
|
|
return 0;
|
2017-01-10 16:49:34 +08:00
|
|
|
}
|
|
|
|
|
ref-filter: mark unused callback parameters
The ref-filter code uses virtual functions to handle specific atoms, but
many of the functions ignore some parameters:
- most atom parsers do not need the ref_format itself, unless they are
looking at centralized options like use_color, quote_style, etc.
- meta-atom handlers like append_atom(), align_atom_handler(), etc,
can't generate errors, so ignore their "err" parameter
- likewise, the handlers for then/else/end do not even need to look at
their atom_value, as the "if" handler put everything they need into
the ref_formatting_state stack
Since these functions all have to conform to virtual function
interfaces, we can't just drop the unused parameters, but must mark them
as UNUSED (to appease -Wunused-parameter).
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2023-02-24 14:39:06 +08:00
|
|
|
static int else_atom_handler(struct atom_value *atomv UNUSED,
|
|
|
|
struct ref_formatting_state *state,
|
2018-03-29 20:49:45 +08:00
|
|
|
struct strbuf *err)
|
2017-01-10 16:49:34 +08:00
|
|
|
{
|
|
|
|
struct ref_formatting_stack *prev = state->stack;
|
|
|
|
struct if_then_else *if_then_else = NULL;
|
|
|
|
|
|
|
|
if (prev->at_end == if_then_else_handler)
|
|
|
|
if_then_else = (struct if_then_else *)prev->at_end_data;
|
|
|
|
if (!if_then_else)
|
2022-01-06 04:02:23 +08:00
|
|
|
return strbuf_addf_ret(err, -1, _("format: %%(%s) atom used without a %%(%s) atom"), "else", "if");
|
2017-01-10 16:49:34 +08:00
|
|
|
if (!if_then_else->then_atom_seen)
|
2022-01-06 04:02:23 +08:00
|
|
|
return strbuf_addf_ret(err, -1, _("format: %%(%s) atom used without a %%(%s) atom"), "else", "then");
|
2017-01-10 16:49:34 +08:00
|
|
|
if (if_then_else->else_atom_seen)
|
2018-03-29 20:49:45 +08:00
|
|
|
return strbuf_addf_ret(err, -1, _("format: %%(else) atom used more than once"));
|
2017-01-10 16:49:34 +08:00
|
|
|
if_then_else->else_atom_seen = 1;
|
|
|
|
push_stack_element(&state->stack);
|
|
|
|
state->stack->at_end_data = prev->at_end_data;
|
|
|
|
state->stack->at_end = prev->at_end;
|
2018-03-29 20:49:45 +08:00
|
|
|
return 0;
|
2015-09-11 23:03:07 +08:00
|
|
|
}
|
|
|
|
|
ref-filter: mark unused callback parameters
The ref-filter code uses virtual functions to handle specific atoms, but
many of the functions ignore some parameters:
- most atom parsers do not need the ref_format itself, unless they are
looking at centralized options like use_color, quote_style, etc.
- meta-atom handlers like append_atom(), align_atom_handler(), etc,
can't generate errors, so ignore their "err" parameter
- likewise, the handlers for then/else/end do not even need to look at
their atom_value, as the "if" handler put everything they need into
the ref_formatting_state stack
Since these functions all have to conform to virtual function
interfaces, we can't just drop the unused parameters, but must mark them
as UNUSED (to appease -Wunused-parameter).
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2023-02-24 14:39:06 +08:00
|
|
|
static int end_atom_handler(struct atom_value *atomv UNUSED,
|
|
|
|
struct ref_formatting_state *state,
|
2018-03-29 20:49:45 +08:00
|
|
|
struct strbuf *err)
|
2015-09-11 23:03:07 +08:00
|
|
|
{
|
|
|
|
struct ref_formatting_stack *current = state->stack;
|
|
|
|
struct strbuf s = STRBUF_INIT;
|
|
|
|
|
|
|
|
if (!current->at_end)
|
2018-03-29 20:49:45 +08:00
|
|
|
return strbuf_addf_ret(err, -1, _("format: %%(end) atom used without corresponding atom"));
|
2017-01-10 16:49:34 +08:00
|
|
|
current->at_end(&state->stack);
|
|
|
|
|
|
|
|
/* Stack may have been popped within at_end(), hence reset the current pointer */
|
|
|
|
current = state->stack;
|
2015-09-11 23:03:07 +08:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Perform quote formatting when the stack element is that of
|
|
|
|
* a supporting atom. If nested then perform quote formatting
|
|
|
|
* only on the topmost supporting atom.
|
|
|
|
*/
|
2017-01-10 16:49:34 +08:00
|
|
|
if (!current->prev->prev) {
|
ref-filter: add %(raw) atom
Add new formatting option `%(raw)`, which will print the raw
object data without any changes. It will help further to migrate
all cat-file formatting logic from cat-file to ref-filter.
The raw data of blob, tree objects may contain '\0', but most of
the logic in `ref-filter` depends on the output of the atom being
text (specifically, no embedded NULs in it).
E.g. `quote_formatting()` use `strbuf_addstr()` or `*._quote_buf()`
add the data to the buffer. The raw data of a tree object is
`100644 one\0...`, only the `100644 one` will be added to the buffer,
which is incorrect.
Therefore, we need to find a way to record the length of the
atom_value's member `s`. Although strbuf can already record the
string and its length, if we want to replace the type of atom_value's
member `s` with strbuf, many places in ref-filter that are filled
with dynamically allocated mermory in `v->s` are not easy to replace.
At the same time, we need to check if `v->s == NULL` in
populate_value(), and strbuf cannot easily distinguish NULL and empty
strings, but c-style "const char *" can do it. So add a new member in
`struct atom_value`: `s_size`, which can record raw object size, it
can help us add raw object data to the buffer or compare two buffers
which contain raw object data.
Note that `--format=%(raw)` cannot be used with `--python`, `--shell`,
`--tcl`, and `--perl` because if the binary raw data is passed to a
variable in such languages, these may not support arbitrary binary data
in their string variable type.
Reviewed-by: Jacob Keller <jacob.keller@gmail.com>
Mentored-by: Christian Couder <christian.couder@gmail.com>
Mentored-by: Hariom Verma <hariom18599@gmail.com>
Helped-by: Bagas Sanjaya <bagasdotme@gmail.com>
Helped-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Helped-by: Felipe Contreras <felipe.contreras@gmail.com>
Helped-by: Phillip Wood <phillip.wood@dunelm.org.uk>
Helped-by: Junio C Hamano <gitster@pobox.com>
Based-on-patch-by: Olga Telezhnaya <olyatelezhnaya@gmail.com>
Signed-off-by: ZheNing Hu <adlternative@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2021-07-26 11:26:47 +08:00
|
|
|
quote_formatting(&s, current->output.buf, current->output.len, state->quote_style);
|
2015-09-11 23:03:07 +08:00
|
|
|
strbuf_swap(¤t->output, &s);
|
|
|
|
}
|
|
|
|
strbuf_release(&s);
|
|
|
|
pop_stack_element(&state->stack);
|
2018-03-29 20:49:45 +08:00
|
|
|
return 0;
|
2015-09-11 23:03:07 +08:00
|
|
|
}
|
|
|
|
|
2015-06-14 03:37:27 +08:00
|
|
|
/*
|
|
|
|
* In a format string, find the next occurrence of %(atom).
|
|
|
|
*/
|
|
|
|
static const char *find_next(const char *cp)
|
|
|
|
{
|
|
|
|
while (*cp) {
|
|
|
|
if (*cp == '%') {
|
|
|
|
/*
|
|
|
|
* %( is the start of an atom;
|
|
|
|
* %% is a quoted per-cent.
|
|
|
|
*/
|
|
|
|
if (cp[1] == '(')
|
|
|
|
return cp;
|
|
|
|
else if (cp[1] == '%')
|
|
|
|
cp++; /* skip over two % */
|
|
|
|
/* otherwise this is a singleton, literal % */
|
|
|
|
}
|
|
|
|
cp++;
|
|
|
|
}
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
2021-07-26 11:26:50 +08:00
|
|
|
static int reject_atom(enum atom_type atom_type)
|
|
|
|
{
|
|
|
|
return atom_type == ATOM_REST;
|
|
|
|
}
|
|
|
|
|
2015-06-14 03:37:27 +08:00
|
|
|
/*
|
|
|
|
* Make sure the format string is well formed, and parse out
|
|
|
|
* the used atoms.
|
|
|
|
*/
|
2017-07-13 23:01:18 +08:00
|
|
|
int verify_ref_format(struct ref_format *format)
|
2015-06-14 03:37:27 +08:00
|
|
|
{
|
|
|
|
const char *cp, *sp;
|
|
|
|
|
2017-07-13 23:02:30 +08:00
|
|
|
format->need_color_reset_at_eol = 0;
|
2017-07-13 23:01:18 +08:00
|
|
|
for (cp = format->format; *cp && (sp = find_next(cp)); ) {
|
2018-03-29 20:49:45 +08:00
|
|
|
struct strbuf err = STRBUF_INIT;
|
2015-06-14 03:37:27 +08:00
|
|
|
const char *color, *ep = strchr(sp, ')');
|
|
|
|
int at;
|
|
|
|
|
|
|
|
if (!ep)
|
2016-02-27 14:42:04 +08:00
|
|
|
return error(_("malformed format string %s"), sp);
|
2015-06-14 03:37:27 +08:00
|
|
|
/* sp points at "%(" and ep points at the closing ")" */
|
2018-03-29 20:49:45 +08:00
|
|
|
at = parse_ref_filter_atom(format, sp + 2, ep, &err);
|
|
|
|
if (at < 0)
|
|
|
|
die("%s", err.buf);
|
2021-07-26 11:26:50 +08:00
|
|
|
if (reject_atom(used_atom[at].atom_type))
|
|
|
|
die(_("this command reject atom %%(%.*s)"), (int)(ep - sp - 2), sp + 2);
|
2021-07-26 11:26:48 +08:00
|
|
|
|
|
|
|
if ((format->quote_style == QUOTE_PYTHON ||
|
|
|
|
format->quote_style == QUOTE_SHELL ||
|
|
|
|
format->quote_style == QUOTE_TCL) &&
|
|
|
|
used_atom[at].atom_type == ATOM_RAW &&
|
|
|
|
used_atom[at].u.raw_data.option == RAW_BARE)
|
2021-11-01 10:14:17 +08:00
|
|
|
die(_("--format=%.*s cannot be used with "
|
2021-07-26 11:26:48 +08:00
|
|
|
"--python, --shell, --tcl"), (int)(ep - sp - 2), sp + 2);
|
2015-06-14 03:37:27 +08:00
|
|
|
cp = ep + 1;
|
|
|
|
|
2016-02-18 02:06:11 +08:00
|
|
|
if (skip_prefix(used_atom[at].name, "color:", &color))
|
2017-07-13 23:02:30 +08:00
|
|
|
format->need_color_reset_at_eol = !!strcmp(color, "reset");
|
2018-03-29 20:49:45 +08:00
|
|
|
strbuf_release(&err);
|
2015-06-14 03:37:27 +08:00
|
|
|
}
|
ref-filter: consult want_color() before emitting colors
When color placeholders like %(color:red) are used in a
ref-filter format, we unconditionally output the colors,
even if the user has asked us for no colors. This usually
isn't a problem when the user is constructing a --format on
the command line, but it means we may do the wrong thing
when the format is fed from a script or alias. For example:
$ git config alias.b 'branch --format=%(color:green)%(refname)'
$ git b --no-color
should probably omit the green color. Likewise, running:
$ git b >branches
should probably also omit the color, just as we would for
all baked-in coloring (and as we recently started to do for
user-specified colors in --pretty formats).
This commit makes both of those cases work by teaching
the ref-filter code to consult want_color() before
outputting any color. The color flag in ref_format defaults
to "-1", which means we'll consult color.ui, which in turn
defaults to the usual isatty() check on stdout. However,
callers like git-branch which support their own color config
(and command-line options) can override that.
The new tests independently cover all three of the callers
of ref-filter (for-each-ref, tag, and branch). Even though
these seem redundant, it confirms that we've correctly
plumbed through all of the necessary config to make colors
work by default.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-07-13 23:09:32 +08:00
|
|
|
if (format->need_color_reset_at_eol && !want_color(format->use_color))
|
|
|
|
format->need_color_reset_at_eol = 0;
|
2015-06-14 03:37:27 +08:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2020-08-22 05:41:46 +08:00
|
|
|
static const char *do_grab_oid(const char *field, const struct object_id *oid,
|
|
|
|
struct used_atom *atom)
|
2015-06-14 03:37:27 +08:00
|
|
|
{
|
2020-08-22 05:41:46 +08:00
|
|
|
switch (atom->u.oid.option) {
|
2020-08-22 05:41:44 +08:00
|
|
|
case O_FULL:
|
|
|
|
return oid_to_hex(oid);
|
|
|
|
case O_LENGTH:
|
2023-03-28 21:58:46 +08:00
|
|
|
return repo_find_unique_abbrev(the_repository, oid,
|
|
|
|
atom->u.oid.length);
|
2020-08-22 05:41:44 +08:00
|
|
|
case O_SHORT:
|
2023-03-28 21:58:46 +08:00
|
|
|
return repo_find_unique_abbrev(the_repository, oid,
|
|
|
|
DEFAULT_ABBREV);
|
2020-08-22 05:41:44 +08:00
|
|
|
default:
|
|
|
|
BUG("unknown %%(%s) option", field);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-08-22 05:41:46 +08:00
|
|
|
static int grab_oid(const char *name, const char *field, const struct object_id *oid,
|
|
|
|
struct atom_value *v, struct used_atom *atom)
|
2015-06-14 03:37:27 +08:00
|
|
|
{
|
2020-08-22 05:41:44 +08:00
|
|
|
if (starts_with(name, field)) {
|
2020-08-22 05:41:46 +08:00
|
|
|
v->s = xstrdup(do_grab_oid(field, oid, atom));
|
2020-08-22 05:41:44 +08:00
|
|
|
return 1;
|
2015-06-14 03:37:27 +08:00
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* See grab_values */
|
2018-07-17 16:22:57 +08:00
|
|
|
static void grab_common_values(struct atom_value *val, int deref, struct expand_data *oi)
|
2015-06-14 03:37:27 +08:00
|
|
|
{
|
|
|
|
int i;
|
|
|
|
|
|
|
|
for (i = 0; i < used_atom_cnt; i++) {
|
2016-02-18 02:06:11 +08:00
|
|
|
const char *name = used_atom[i].name;
|
2021-05-13 23:15:38 +08:00
|
|
|
enum atom_type atom_type = used_atom[i].atom_type;
|
2015-06-14 03:37:27 +08:00
|
|
|
struct atom_value *v = &val[i];
|
|
|
|
if (!!deref != (*name == '*'))
|
|
|
|
continue;
|
|
|
|
if (deref)
|
|
|
|
name++;
|
2021-05-13 23:15:38 +08:00
|
|
|
if (atom_type == ATOM_OBJECTTYPE)
|
2018-10-18 15:28:54 +08:00
|
|
|
v->s = xstrdup(type_name(oi->type));
|
2021-05-13 23:15:38 +08:00
|
|
|
else if (atom_type == ATOM_OBJECTSIZE) {
|
2021-05-13 23:15:37 +08:00
|
|
|
if (used_atom[i].u.objectsize.option == O_SIZE_DISK) {
|
|
|
|
v->value = oi->disk_size;
|
|
|
|
v->s = xstrfmt("%"PRIuMAX, (uintmax_t)oi->disk_size);
|
|
|
|
} else if (used_atom[i].u.objectsize.option == O_SIZE) {
|
|
|
|
v->value = oi->size;
|
|
|
|
v->s = xstrfmt("%"PRIuMAX , (uintmax_t)oi->size);
|
|
|
|
}
|
2021-05-13 23:15:38 +08:00
|
|
|
} else if (atom_type == ATOM_DELTABASE)
|
2018-12-24 21:24:30 +08:00
|
|
|
v->s = xstrdup(oid_to_hex(&oi->delta_base_oid));
|
2021-05-13 23:15:38 +08:00
|
|
|
else if (atom_type == ATOM_OBJECTNAME && deref)
|
2020-08-22 05:41:46 +08:00
|
|
|
grab_oid(name, "objectname", &oi->oid, v, &used_atom[i]);
|
2015-06-14 03:37:27 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* See grab_values */
|
2019-02-14 13:50:54 +08:00
|
|
|
static void grab_tag_values(struct atom_value *val, int deref, struct object *obj)
|
2015-06-14 03:37:27 +08:00
|
|
|
{
|
|
|
|
int i;
|
|
|
|
struct tag *tag = (struct tag *) obj;
|
|
|
|
|
|
|
|
for (i = 0; i < used_atom_cnt; i++) {
|
2016-02-18 02:06:11 +08:00
|
|
|
const char *name = used_atom[i].name;
|
2021-05-13 23:15:38 +08:00
|
|
|
enum atom_type atom_type = used_atom[i].atom_type;
|
2015-06-14 03:37:27 +08:00
|
|
|
struct atom_value *v = &val[i];
|
|
|
|
if (!!deref != (*name == '*'))
|
|
|
|
continue;
|
|
|
|
if (deref)
|
|
|
|
name++;
|
2021-05-13 23:15:38 +08:00
|
|
|
if (atom_type == ATOM_TAG)
|
2018-10-18 15:28:54 +08:00
|
|
|
v->s = xstrdup(tag->tag);
|
2021-05-13 23:15:38 +08:00
|
|
|
else if (atom_type == ATOM_TYPE && tag->tagged)
|
2018-10-18 15:28:54 +08:00
|
|
|
v->s = xstrdup(type_name(tag->tagged->type));
|
2021-05-13 23:15:38 +08:00
|
|
|
else if (atom_type == ATOM_OBJECT && tag->tagged)
|
2015-11-10 10:22:28 +08:00
|
|
|
v->s = xstrdup(oid_to_hex(&tag->tagged->oid));
|
2015-06-14 03:37:27 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* See grab_values */
|
2019-02-14 13:50:54 +08:00
|
|
|
static void grab_commit_values(struct atom_value *val, int deref, struct object *obj)
|
2015-06-14 03:37:27 +08:00
|
|
|
{
|
|
|
|
int i;
|
|
|
|
struct commit *commit = (struct commit *) obj;
|
|
|
|
|
|
|
|
for (i = 0; i < used_atom_cnt; i++) {
|
2016-02-18 02:06:11 +08:00
|
|
|
const char *name = used_atom[i].name;
|
2021-05-13 23:15:38 +08:00
|
|
|
enum atom_type atom_type = used_atom[i].atom_type;
|
2015-06-14 03:37:27 +08:00
|
|
|
struct atom_value *v = &val[i];
|
|
|
|
if (!!deref != (*name == '*'))
|
|
|
|
continue;
|
|
|
|
if (deref)
|
|
|
|
name++;
|
2021-05-13 23:15:38 +08:00
|
|
|
if (atom_type == ATOM_TREE &&
|
|
|
|
grab_oid(name, "tree", get_commit_tree_oid(commit), v, &used_atom[i]))
|
2020-08-22 05:41:47 +08:00
|
|
|
continue;
|
2021-05-13 23:15:38 +08:00
|
|
|
if (atom_type == ATOM_NUMPARENT) {
|
2017-04-21 04:52:09 +08:00
|
|
|
v->value = commit_list_count(commit->parents);
|
|
|
|
v->s = xstrfmt("%lu", (unsigned long)v->value);
|
2015-06-14 03:37:27 +08:00
|
|
|
}
|
2021-05-13 23:15:38 +08:00
|
|
|
else if (atom_type == ATOM_PARENT) {
|
2015-06-14 03:37:27 +08:00
|
|
|
struct commit_list *parents;
|
2015-09-25 05:07:12 +08:00
|
|
|
struct strbuf s = STRBUF_INIT;
|
|
|
|
for (parents = commit->parents; parents; parents = parents->next) {
|
2020-08-22 05:41:48 +08:00
|
|
|
struct object_id *oid = &parents->item->object.oid;
|
2015-09-25 05:07:12 +08:00
|
|
|
if (parents != commit->parents)
|
|
|
|
strbuf_addch(&s, ' ');
|
2020-08-22 05:41:48 +08:00
|
|
|
strbuf_addstr(&s, do_grab_oid("parent", oid, &used_atom[i]));
|
2015-06-14 03:37:27 +08:00
|
|
|
}
|
2015-09-25 05:07:12 +08:00
|
|
|
v->s = strbuf_detach(&s, NULL);
|
2015-06-14 03:37:27 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-02-14 13:51:03 +08:00
|
|
|
static const char *find_wholine(const char *who, int wholen, const char *buf)
|
2015-06-14 03:37:27 +08:00
|
|
|
{
|
|
|
|
const char *eol;
|
|
|
|
while (*buf) {
|
|
|
|
if (!strncmp(buf, who, wholen) &&
|
|
|
|
buf[wholen] == ' ')
|
|
|
|
return buf + wholen + 1;
|
|
|
|
eol = strchr(buf, '\n');
|
|
|
|
if (!eol)
|
|
|
|
return "";
|
|
|
|
eol++;
|
|
|
|
if (*eol == '\n')
|
|
|
|
return ""; /* end of header */
|
|
|
|
buf = eol;
|
|
|
|
}
|
|
|
|
return "";
|
|
|
|
}
|
|
|
|
|
|
|
|
static const char *copy_line(const char *buf)
|
|
|
|
{
|
|
|
|
const char *eol = strchrnul(buf, '\n');
|
|
|
|
return xmemdupz(buf, eol - buf);
|
|
|
|
}
|
|
|
|
|
|
|
|
static const char *copy_name(const char *buf)
|
|
|
|
{
|
|
|
|
const char *cp;
|
|
|
|
for (cp = buf; *cp && *cp != '\n'; cp++) {
|
convert trivial uses of strncmp() to starts_with()
It's more readable to use starts_with() instead of strncmp() to match a
prefix, as the latter requires a manually-computed length, and has the
funny "matching is zero" return value common to cmp functions. This
patch converts several cases which were found with:
git grep 'strncmp(.*, [0-9]*)'
But note that it doesn't convert all such cases. There are several where
the magic length number is repeated elsewhere in the code, like:
/* handle "buf" which isn't NUL-terminated and might be too small */
if (len >= 3 && !strncmp(buf, "foo", 3))
or:
/* exact match for "foo", but within a larger string */
if (end - buf == 3 && !strncmp(buf, "foo", 3))
While it would not produce the wrong outcome to use starts_with() in
these cases, we'd still be left with one instance of "3". We're better
to leave them for now, as the repeated "3" makes it clear that the two
are linked (there may be other refactorings that handle both, but
they're out of scope for this patch).
A few things to note while reading the patch:
- all cases but one are trying to match, and so lose the extra "!".
The case in the first hunk of urlmatch.c is not-matching, and hence
gains a "!".
- the case in remote-fd.c is matching the beginning of "connect foo",
but we never look at str+8 to parse the "foo" part (which would make
this a candidate for skip_prefix(), not starts_with()). This seems
at first glance like a bug, but is a limitation of how remote-fd
works.
- the second hunk in urlmatch.c shows some cases adjacent to other
strncmp() calls that are left. These are of the "exact match within
a larger string" type, as described above.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2023-01-07 21:26:18 +08:00
|
|
|
if (starts_with(cp, " <"))
|
2015-06-14 03:37:27 +08:00
|
|
|
return xmemdupz(buf, cp - buf);
|
|
|
|
}
|
2019-08-18 05:51:07 +08:00
|
|
|
return xstrdup("");
|
2015-06-14 03:37:27 +08:00
|
|
|
}
|
|
|
|
|
2020-08-22 05:41:43 +08:00
|
|
|
static const char *copy_email(const char *buf, struct used_atom *atom)
|
2015-06-14 03:37:27 +08:00
|
|
|
{
|
|
|
|
const char *email = strchr(buf, '<');
|
|
|
|
const char *eoemail;
|
|
|
|
if (!email)
|
2019-08-18 05:51:07 +08:00
|
|
|
return xstrdup("");
|
2020-08-22 05:41:43 +08:00
|
|
|
switch (atom->u.email_option.option) {
|
|
|
|
case EO_RAW:
|
|
|
|
eoemail = strchr(email, '>');
|
|
|
|
if (eoemail)
|
|
|
|
eoemail++;
|
|
|
|
break;
|
|
|
|
case EO_TRIM:
|
|
|
|
email++;
|
|
|
|
eoemail = strchr(email, '>');
|
|
|
|
break;
|
|
|
|
case EO_LOCALPART:
|
|
|
|
email++;
|
|
|
|
eoemail = strchr(email, '@');
|
|
|
|
if (!eoemail)
|
|
|
|
eoemail = strchr(email, '>');
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
BUG("unknown email option");
|
|
|
|
}
|
|
|
|
|
2015-06-14 03:37:27 +08:00
|
|
|
if (!eoemail)
|
2019-08-18 05:51:07 +08:00
|
|
|
return xstrdup("");
|
2020-08-22 05:41:43 +08:00
|
|
|
return xmemdupz(email, eoemail - email);
|
2015-06-14 03:37:27 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
static char *copy_subject(const char *buf, unsigned long len)
|
|
|
|
{
|
ref-filter: handle CRLF at end-of-line more gracefully
The ref-filter code does not correctly handle commit or tag messages
that use CRLF as the line terminator. Such messages can be created with
the `--cleanup=verbatim` option of `git commit` and `git tag`, or by
using `git commit-tree` directly.
The function `find_subpos` in ref-filter.c looks for two consecutive
LFs to find the end of the subject line, a sequence which is absent in
messages using CRLF. This results in the whole message being parsed as
the subject line (`%(contents:subject)`), and the body of the message
(`%(contents:body)`) being empty.
Moreover, in `copy_subject`, which wants to return the subject as a
single line, '\n' is replaced by space, but '\r' is
untouched.
This impacts the output of `git branch`, `git tag` and `git
for-each-ref`.
This behaviour is a regression for `git branch --verbose`, which
bisects down to 949af0684c (branch: use ref-filter printing APIs,
2017-01-10).
Adjust the ref-filter code to be more lenient by hardening the logic in
`copy_subject` and `find_subpos` to correctly parse messages containing
CRLF.
Add a new test script, 't3920-crlf-messages.sh', to test the behaviour
of commands using either the ref-filter or the pretty APIs with messages
using CRLF line endings. The function `test_crlf_subject_body_and_contents`
can be used to test that the `--format` option of `branch`, `tag`,
`for-each-ref`, `log` and `show` correctly displays the subject, body
and raw content of commit and tag messages using CRLF. Test the
output of `branch`, `tag` and `for-each-ref` with such commits.
Helped-by: Junio C Hamano <gitster@pobox.com>
Helped-by: Eric Sunshine <sunshine@sunshineco.com>
Signed-off-by: Philippe Blain <levraiphilippeblain@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2020-10-29 20:48:28 +08:00
|
|
|
struct strbuf sb = STRBUF_INIT;
|
2015-06-14 03:37:27 +08:00
|
|
|
int i;
|
|
|
|
|
ref-filter: handle CRLF at end-of-line more gracefully
The ref-filter code does not correctly handle commit or tag messages
that use CRLF as the line terminator. Such messages can be created with
the `--cleanup=verbatim` option of `git commit` and `git tag`, or by
using `git commit-tree` directly.
The function `find_subpos` in ref-filter.c looks for two consecutive
LFs to find the end of the subject line, a sequence which is absent in
messages using CRLF. This results in the whole message being parsed as
the subject line (`%(contents:subject)`), and the body of the message
(`%(contents:body)`) being empty.
Moreover, in `copy_subject`, which wants to return the subject as a
single line, '\n' is replaced by space, but '\r' is
untouched.
This impacts the output of `git branch`, `git tag` and `git
for-each-ref`.
This behaviour is a regression for `git branch --verbose`, which
bisects down to 949af0684c (branch: use ref-filter printing APIs,
2017-01-10).
Adjust the ref-filter code to be more lenient by hardening the logic in
`copy_subject` and `find_subpos` to correctly parse messages containing
CRLF.
Add a new test script, 't3920-crlf-messages.sh', to test the behaviour
of commands using either the ref-filter or the pretty APIs with messages
using CRLF line endings. The function `test_crlf_subject_body_and_contents`
can be used to test that the `--format` option of `branch`, `tag`,
`for-each-ref`, `log` and `show` correctly displays the subject, body
and raw content of commit and tag messages using CRLF. Test the
output of `branch`, `tag` and `for-each-ref` with such commits.
Helped-by: Junio C Hamano <gitster@pobox.com>
Helped-by: Eric Sunshine <sunshine@sunshineco.com>
Signed-off-by: Philippe Blain <levraiphilippeblain@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2020-10-29 20:48:28 +08:00
|
|
|
for (i = 0; i < len; i++) {
|
|
|
|
if (buf[i] == '\r' && i + 1 < len && buf[i + 1] == '\n')
|
|
|
|
continue; /* ignore CR in CRLF */
|
2015-06-14 03:37:27 +08:00
|
|
|
|
ref-filter: handle CRLF at end-of-line more gracefully
The ref-filter code does not correctly handle commit or tag messages
that use CRLF as the line terminator. Such messages can be created with
the `--cleanup=verbatim` option of `git commit` and `git tag`, or by
using `git commit-tree` directly.
The function `find_subpos` in ref-filter.c looks for two consecutive
LFs to find the end of the subject line, a sequence which is absent in
messages using CRLF. This results in the whole message being parsed as
the subject line (`%(contents:subject)`), and the body of the message
(`%(contents:body)`) being empty.
Moreover, in `copy_subject`, which wants to return the subject as a
single line, '\n' is replaced by space, but '\r' is
untouched.
This impacts the output of `git branch`, `git tag` and `git
for-each-ref`.
This behaviour is a regression for `git branch --verbose`, which
bisects down to 949af0684c (branch: use ref-filter printing APIs,
2017-01-10).
Adjust the ref-filter code to be more lenient by hardening the logic in
`copy_subject` and `find_subpos` to correctly parse messages containing
CRLF.
Add a new test script, 't3920-crlf-messages.sh', to test the behaviour
of commands using either the ref-filter or the pretty APIs with messages
using CRLF line endings. The function `test_crlf_subject_body_and_contents`
can be used to test that the `--format` option of `branch`, `tag`,
`for-each-ref`, `log` and `show` correctly displays the subject, body
and raw content of commit and tag messages using CRLF. Test the
output of `branch`, `tag` and `for-each-ref` with such commits.
Helped-by: Junio C Hamano <gitster@pobox.com>
Helped-by: Eric Sunshine <sunshine@sunshineco.com>
Signed-off-by: Philippe Blain <levraiphilippeblain@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2020-10-29 20:48:28 +08:00
|
|
|
if (buf[i] == '\n')
|
|
|
|
strbuf_addch(&sb, ' ');
|
|
|
|
else
|
|
|
|
strbuf_addch(&sb, buf[i]);
|
|
|
|
}
|
|
|
|
return strbuf_detach(&sb, NULL);
|
2015-06-14 03:37:27 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
static void grab_date(const char *buf, struct atom_value *v, const char *atomname)
|
|
|
|
{
|
|
|
|
const char *eoemail = strstr(buf, "> ");
|
|
|
|
char *zone;
|
2017-04-27 03:29:31 +08:00
|
|
|
timestamp_t timestamp;
|
2015-06-14 03:37:27 +08:00
|
|
|
long tz;
|
2022-02-16 16:14:03 +08:00
|
|
|
struct date_mode date_mode = DATE_MODE_INIT;
|
2015-06-14 03:37:27 +08:00
|
|
|
const char *formatp;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* We got here because atomname ends in "date" or "date<something>";
|
|
|
|
* it's not possible that <something> is not ":<format>" because
|
|
|
|
* parse_ref_filter_atom() wouldn't have allowed it, so we can assume that no
|
|
|
|
* ":" means no format is specified, and use the default.
|
|
|
|
*/
|
|
|
|
formatp = strchr(atomname, ':');
|
2022-05-03 00:50:37 +08:00
|
|
|
if (formatp) {
|
2015-06-14 03:37:27 +08:00
|
|
|
formatp++;
|
2015-08-04 02:01:27 +08:00
|
|
|
parse_date_format(formatp, &date_mode);
|
2015-06-14 03:37:27 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
if (!eoemail)
|
|
|
|
goto bad;
|
2017-04-21 18:45:44 +08:00
|
|
|
timestamp = parse_timestamp(eoemail + 2, &zone, 10);
|
2017-04-27 03:29:31 +08:00
|
|
|
if (timestamp == TIME_MAX)
|
2015-06-14 03:37:27 +08:00
|
|
|
goto bad;
|
|
|
|
tz = strtol(zone, NULL, 10);
|
|
|
|
if ((tz == LONG_MIN || tz == LONG_MAX) && errno == ERANGE)
|
|
|
|
goto bad;
|
2015-08-04 02:01:27 +08:00
|
|
|
v->s = xstrdup(show_date(timestamp, tz, &date_mode));
|
2017-04-21 04:52:09 +08:00
|
|
|
v->value = timestamp;
|
2022-02-16 16:14:05 +08:00
|
|
|
date_mode_release(&date_mode);
|
2015-06-14 03:37:27 +08:00
|
|
|
return;
|
|
|
|
bad:
|
2018-10-18 15:28:54 +08:00
|
|
|
v->s = xstrdup("");
|
2017-04-21 04:52:09 +08:00
|
|
|
v->value = 0;
|
2015-06-14 03:37:27 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
/* See grab_values */
|
2019-02-14 13:51:03 +08:00
|
|
|
static void grab_person(const char *who, struct atom_value *val, int deref, void *buf)
|
2015-06-14 03:37:27 +08:00
|
|
|
{
|
|
|
|
int i;
|
|
|
|
int wholen = strlen(who);
|
|
|
|
const char *wholine = NULL;
|
|
|
|
|
|
|
|
for (i = 0; i < used_atom_cnt; i++) {
|
2016-02-18 02:06:11 +08:00
|
|
|
const char *name = used_atom[i].name;
|
2015-06-14 03:37:27 +08:00
|
|
|
struct atom_value *v = &val[i];
|
|
|
|
if (!!deref != (*name == '*'))
|
|
|
|
continue;
|
|
|
|
if (deref)
|
|
|
|
name++;
|
|
|
|
if (strncmp(who, name, wholen))
|
|
|
|
continue;
|
|
|
|
if (name[wholen] != 0 &&
|
|
|
|
strcmp(name + wholen, "name") &&
|
2020-08-22 05:41:43 +08:00
|
|
|
!starts_with(name + wholen, "email") &&
|
2015-06-14 03:37:27 +08:00
|
|
|
!starts_with(name + wholen, "date"))
|
|
|
|
continue;
|
|
|
|
if (!wholine)
|
2019-02-14 13:51:03 +08:00
|
|
|
wholine = find_wholine(who, wholen, buf);
|
2015-06-14 03:37:27 +08:00
|
|
|
if (!wholine)
|
|
|
|
return; /* no point looking for it */
|
|
|
|
if (name[wholen] == 0)
|
|
|
|
v->s = copy_line(wholine);
|
|
|
|
else if (!strcmp(name + wholen, "name"))
|
|
|
|
v->s = copy_name(wholine);
|
2020-08-22 05:41:43 +08:00
|
|
|
else if (starts_with(name + wholen, "email"))
|
|
|
|
v->s = copy_email(wholine, &used_atom[i]);
|
2015-06-14 03:37:27 +08:00
|
|
|
else if (starts_with(name + wholen, "date"))
|
|
|
|
grab_date(wholine, v, name);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* For a tag or a commit object, if "creator" or "creatordate" is
|
|
|
|
* requested, do something special.
|
|
|
|
*/
|
|
|
|
if (strcmp(who, "tagger") && strcmp(who, "committer"))
|
|
|
|
return; /* "author" for commit object is not wanted */
|
|
|
|
if (!wholine)
|
2019-02-14 13:51:03 +08:00
|
|
|
wholine = find_wholine(who, wholen, buf);
|
2015-06-14 03:37:27 +08:00
|
|
|
if (!wholine)
|
|
|
|
return;
|
|
|
|
for (i = 0; i < used_atom_cnt; i++) {
|
2016-02-18 02:06:11 +08:00
|
|
|
const char *name = used_atom[i].name;
|
2021-05-13 23:15:38 +08:00
|
|
|
enum atom_type atom_type = used_atom[i].atom_type;
|
2015-06-14 03:37:27 +08:00
|
|
|
struct atom_value *v = &val[i];
|
|
|
|
if (!!deref != (*name == '*'))
|
|
|
|
continue;
|
|
|
|
if (deref)
|
|
|
|
name++;
|
|
|
|
|
2021-05-13 23:15:38 +08:00
|
|
|
if (atom_type == ATOM_CREATORDATE)
|
2015-06-14 03:37:27 +08:00
|
|
|
grab_date(wholine, v, name);
|
2021-05-13 23:15:38 +08:00
|
|
|
else if (atom_type == ATOM_CREATOR)
|
2015-06-14 03:37:27 +08:00
|
|
|
v->s = copy_line(wholine);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-06-05 02:22:47 +08:00
|
|
|
static void grab_signature(struct atom_value *val, int deref, struct object *obj)
|
|
|
|
{
|
|
|
|
int i;
|
|
|
|
struct commit *commit = (struct commit *) obj;
|
|
|
|
struct signature_check sigc = { 0 };
|
|
|
|
int signature_checked = 0;
|
|
|
|
|
|
|
|
for (i = 0; i < used_atom_cnt; i++) {
|
|
|
|
struct used_atom *atom = &used_atom[i];
|
|
|
|
const char *name = atom->name;
|
|
|
|
struct atom_value *v = &val[i];
|
|
|
|
int opt;
|
|
|
|
|
|
|
|
if (!!deref != (*name == '*'))
|
|
|
|
continue;
|
|
|
|
if (deref)
|
|
|
|
name++;
|
|
|
|
|
|
|
|
if (!skip_prefix(name, "signature", &name) ||
|
|
|
|
(*name && *name != ':'))
|
|
|
|
continue;
|
|
|
|
if (!*name)
|
|
|
|
name = NULL;
|
|
|
|
else
|
|
|
|
name++;
|
|
|
|
|
|
|
|
opt = parse_signature_option(name);
|
|
|
|
if (opt < 0)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
if (!signature_checked) {
|
|
|
|
check_commit_signature(commit, &sigc);
|
|
|
|
signature_checked = 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
switch (opt) {
|
|
|
|
case S_BARE:
|
|
|
|
v->s = xstrdup(sigc.output ? sigc.output: "");
|
|
|
|
break;
|
|
|
|
case S_SIGNER:
|
|
|
|
v->s = xstrdup(sigc.signer ? sigc.signer : "");
|
|
|
|
break;
|
|
|
|
case S_GRADE:
|
|
|
|
switch (sigc.result) {
|
|
|
|
case 'G':
|
|
|
|
switch (sigc.trust_level) {
|
|
|
|
case TRUST_UNDEFINED:
|
|
|
|
case TRUST_NEVER:
|
|
|
|
v->s = xstrfmt("%c", (char)'U');
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
v->s = xstrfmt("%c", (char)'G');
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case 'B':
|
|
|
|
case 'E':
|
|
|
|
case 'N':
|
|
|
|
case 'X':
|
|
|
|
case 'Y':
|
|
|
|
case 'R':
|
|
|
|
v->s = xstrfmt("%c", (char)sigc.result);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case S_KEY:
|
|
|
|
v->s = xstrdup(sigc.key ? sigc.key : "");
|
|
|
|
break;
|
|
|
|
case S_FINGERPRINT:
|
|
|
|
v->s = xstrdup(sigc.fingerprint ?
|
|
|
|
sigc.fingerprint : "");
|
|
|
|
break;
|
|
|
|
case S_PRI_KEY_FP:
|
|
|
|
v->s = xstrdup(sigc.primary_key_fingerprint ?
|
|
|
|
sigc.primary_key_fingerprint : "");
|
|
|
|
break;
|
|
|
|
case S_TRUST_LEVEL:
|
|
|
|
v->s = xstrdup(gpg_trust_level_to_str(sigc.trust_level));
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (signature_checked)
|
|
|
|
signature_check_clear(&sigc);
|
|
|
|
}
|
|
|
|
|
2019-02-14 13:51:03 +08:00
|
|
|
static void find_subpos(const char *buf,
|
2021-01-19 07:49:10 +08:00
|
|
|
const char **sub, size_t *sublen,
|
|
|
|
const char **body, size_t *bodylen,
|
|
|
|
size_t *nonsiglen,
|
|
|
|
const char **sig, size_t *siglen)
|
2015-06-14 03:37:27 +08:00
|
|
|
{
|
2021-02-11 10:08:03 +08:00
|
|
|
struct strbuf payload = STRBUF_INIT;
|
|
|
|
struct strbuf signature = STRBUF_INIT;
|
2015-06-14 03:37:27 +08:00
|
|
|
const char *eol;
|
2021-02-11 10:08:03 +08:00
|
|
|
const char *end = buf + strlen(buf);
|
|
|
|
const char *sigstart;
|
|
|
|
|
2021-02-11 10:08:05 +08:00
|
|
|
/* parse signature first; we might not even have a subject line */
|
|
|
|
parse_signature(buf, end - buf, &payload, &signature);
|
built-ins & libs & helpers: add/move destructors, fix leaks
Fix various leaks in built-ins, libraries and a test helper here we
were missing a call to strbuf_release(), string_list_clear() etc, or
were calling them after a potential "return".
Comments on individual changes:
- builtin/checkout.c: Fix a memory leak that was introduced in [1]. A
sibling leak introduced in [2] was recently fixed in [3]. As with [3]
we should be using the wt_status_state_free_buffers() API introduced
in [4].
- builtin/repack.c: Fix a leak that's been here since this use of
"strbuf_release()" was added in a1bbc6c0176 (repack: rewrite the shell
script in C, 2013-09-15). We don't use the variable for anything
except this loop, so we can instead free it right afterwards.
- builtin/rev-parse: Fix a leak that's been here since this code was
added in 21d47835386 (Add a parseopt mode to git-rev-parse to bring
parse-options to shell scripts., 2007-11-04).
- builtin/stash.c: Fix a couple of leaks that have been here since
this code was added in d4788af875c (stash: convert create to builtin,
2019-02-25), we strbuf_release()'d only some of the "struct strbuf" we
allocated earlier in the function, let's release all of them.
- ref-filter.c: Fix a leak in 482c1191869 (gpg-interface: improve
interface for parsing tags, 2021-02-11), we don't use the "payload"
variable that we ask parse_signature() to populate for us, so let's
free it.
- t/helper/test-fake-ssh.c: Fix a leak that's been here since this
code was added in 3064d5a38c7 (mingw: fix t5601-clone.sh,
2016-01-27). Let's free the "struct strbuf" as soon as we don't need
it anymore.
1. c45f0f525de (switch: reject if some operation is in progress,
2019-03-29)
2. 2708ce62d21 (branch: sort detached HEAD based on a flag,
2021-01-07)
3. abcac2e19fa (ref-filter.c: fix a leak in get_head_description,
2022-09-25)
4. 962dd7ebc3e (wt-status: introduce wt_status_state_free_buffers(),
2020-09-27).
Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: Taylor Blau <me@ttaylorr.com>
2022-11-09 02:17:42 +08:00
|
|
|
strbuf_release(&payload);
|
2021-02-11 10:08:03 +08:00
|
|
|
|
2015-06-14 03:37:27 +08:00
|
|
|
/* skip past header until we hit empty line */
|
|
|
|
while (*buf && *buf != '\n') {
|
|
|
|
eol = strchrnul(buf, '\n');
|
|
|
|
if (*eol)
|
|
|
|
eol++;
|
|
|
|
buf = eol;
|
|
|
|
}
|
|
|
|
/* skip any empty lines */
|
|
|
|
while (*buf == '\n')
|
|
|
|
buf++;
|
2021-02-11 10:08:03 +08:00
|
|
|
*sig = strbuf_detach(&signature, siglen);
|
|
|
|
sigstart = buf + parse_signed_buffer(buf, strlen(buf));
|
2015-06-14 03:37:27 +08:00
|
|
|
|
|
|
|
/* subject is first non-empty line */
|
|
|
|
*sub = buf;
|
ref-filter: handle CRLF at end-of-line more gracefully
The ref-filter code does not correctly handle commit or tag messages
that use CRLF as the line terminator. Such messages can be created with
the `--cleanup=verbatim` option of `git commit` and `git tag`, or by
using `git commit-tree` directly.
The function `find_subpos` in ref-filter.c looks for two consecutive
LFs to find the end of the subject line, a sequence which is absent in
messages using CRLF. This results in the whole message being parsed as
the subject line (`%(contents:subject)`), and the body of the message
(`%(contents:body)`) being empty.
Moreover, in `copy_subject`, which wants to return the subject as a
single line, '\n' is replaced by space, but '\r' is
untouched.
This impacts the output of `git branch`, `git tag` and `git
for-each-ref`.
This behaviour is a regression for `git branch --verbose`, which
bisects down to 949af0684c (branch: use ref-filter printing APIs,
2017-01-10).
Adjust the ref-filter code to be more lenient by hardening the logic in
`copy_subject` and `find_subpos` to correctly parse messages containing
CRLF.
Add a new test script, 't3920-crlf-messages.sh', to test the behaviour
of commands using either the ref-filter or the pretty APIs with messages
using CRLF line endings. The function `test_crlf_subject_body_and_contents`
can be used to test that the `--format` option of `branch`, `tag`,
`for-each-ref`, `log` and `show` correctly displays the subject, body
and raw content of commit and tag messages using CRLF. Test the
output of `branch`, `tag` and `for-each-ref` with such commits.
Helped-by: Junio C Hamano <gitster@pobox.com>
Helped-by: Eric Sunshine <sunshine@sunshineco.com>
Signed-off-by: Philippe Blain <levraiphilippeblain@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2020-10-29 20:48:28 +08:00
|
|
|
/* subject goes to first empty line before signature begins */
|
ref-filter: fix parsing of signatures with CRLF and no body
This commit fixes a bug when parsing tags that have CRLF line endings, a
signature, and no body, like this (the "^M" are marking the CRs):
this is the subject^M
-----BEGIN PGP SIGNATURE-----^M
^M
...some stuff...^M
-----END PGP SIGNATURE-----^M
When trying to find the start of the body, we look for a blank line
separating the subject and body. In this case, there isn't one. But we
search for it using strstr(), which will find the blank line in the
signature.
In the non-CRLF code path, we check whether the line we found is past
the start of the signature, and if so, put the body pointer at the start
of the signature (effectively making the body empty). But the CRLF code
path doesn't catch the same case, and we end up with the body pointer in
the middle of the signature field. This has two visible problems:
- printing %(contents:subject) will show part of the signature, too,
since the subject length is computed as (body - subject)
- the length of the body is (sig - body), which makes it negative.
Asking for %(contents:body) causes us to cast this to a very large
size_t when we feed it to xmemdupz(), which then complains about
trying to allocate too much memory.
These are essentially the same bugs fixed in the previous commit, except
that they happen when there is a CRLF blank line in the signature,
rather than no blank line at all. Both are caused by the refactoring in
9f75ce3d8f (ref-filter: handle CRLF at end-of-line more gracefully,
2020-10-29).
We can fix this by doing the same "sigstart" check that we do in the
non-CRLF case. And rather than repeat ourselves, we can just use
short-circuiting OR to collapse both cases into a single conditional.
I.e., rather than:
if (strstr("\n\n"))
...found blank, check if it's in signature...
else if (strstr("\r\n\r\n"))
...found blank, check if it's in signature...
else
...no blank line found...
we can collapse this to:
if (strstr("\n\n")) ||
strstr("\r\n\r\n")))
...found blank, check if it's in signature...
else
...no blank line found...
The tests show the problem and the fix. Though it wasn't broken, I
included contents:signature here to make sure it still behaves as
expected, but note the shell hackery needed to make it work. A
less-clever option would be to skip using test_atom and just "append_cr
>expected" ourselves.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Taylor Blau <me@ttaylorr.com>
2022-11-02 15:44:00 +08:00
|
|
|
if ((eol = strstr(*sub, "\n\n")) ||
|
|
|
|
(eol = strstr(*sub, "\r\n\r\n"))) {
|
2021-02-11 10:08:03 +08:00
|
|
|
eol = eol < sigstart ? eol : sigstart;
|
ref-filter: fix parsing of signatures with CRLF and no body
This commit fixes a bug when parsing tags that have CRLF line endings, a
signature, and no body, like this (the "^M" are marking the CRs):
this is the subject^M
-----BEGIN PGP SIGNATURE-----^M
^M
...some stuff...^M
-----END PGP SIGNATURE-----^M
When trying to find the start of the body, we look for a blank line
separating the subject and body. In this case, there isn't one. But we
search for it using strstr(), which will find the blank line in the
signature.
In the non-CRLF code path, we check whether the line we found is past
the start of the signature, and if so, put the body pointer at the start
of the signature (effectively making the body empty). But the CRLF code
path doesn't catch the same case, and we end up with the body pointer in
the middle of the signature field. This has two visible problems:
- printing %(contents:subject) will show part of the signature, too,
since the subject length is computed as (body - subject)
- the length of the body is (sig - body), which makes it negative.
Asking for %(contents:body) causes us to cast this to a very large
size_t when we feed it to xmemdupz(), which then complains about
trying to allocate too much memory.
These are essentially the same bugs fixed in the previous commit, except
that they happen when there is a CRLF blank line in the signature,
rather than no blank line at all. Both are caused by the refactoring in
9f75ce3d8f (ref-filter: handle CRLF at end-of-line more gracefully,
2020-10-29).
We can fix this by doing the same "sigstart" check that we do in the
non-CRLF case. And rather than repeat ourselves, we can just use
short-circuiting OR to collapse both cases into a single conditional.
I.e., rather than:
if (strstr("\n\n"))
...found blank, check if it's in signature...
else if (strstr("\r\n\r\n"))
...found blank, check if it's in signature...
else
...no blank line found...
we can collapse this to:
if (strstr("\n\n")) ||
strstr("\r\n\r\n")))
...found blank, check if it's in signature...
else
...no blank line found...
The tests show the problem and the fix. Though it wasn't broken, I
included contents:signature here to make sure it still behaves as
expected, but note the shell hackery needed to make it work. A
less-clever option would be to skip using test_atom and just "append_cr
>expected" ourselves.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Taylor Blau <me@ttaylorr.com>
2022-11-02 15:44:00 +08:00
|
|
|
} else {
|
ref-filter: handle CRLF at end-of-line more gracefully
The ref-filter code does not correctly handle commit or tag messages
that use CRLF as the line terminator. Such messages can be created with
the `--cleanup=verbatim` option of `git commit` and `git tag`, or by
using `git commit-tree` directly.
The function `find_subpos` in ref-filter.c looks for two consecutive
LFs to find the end of the subject line, a sequence which is absent in
messages using CRLF. This results in the whole message being parsed as
the subject line (`%(contents:subject)`), and the body of the message
(`%(contents:body)`) being empty.
Moreover, in `copy_subject`, which wants to return the subject as a
single line, '\n' is replaced by space, but '\r' is
untouched.
This impacts the output of `git branch`, `git tag` and `git
for-each-ref`.
This behaviour is a regression for `git branch --verbose`, which
bisects down to 949af0684c (branch: use ref-filter printing APIs,
2017-01-10).
Adjust the ref-filter code to be more lenient by hardening the logic in
`copy_subject` and `find_subpos` to correctly parse messages containing
CRLF.
Add a new test script, 't3920-crlf-messages.sh', to test the behaviour
of commands using either the ref-filter or the pretty APIs with messages
using CRLF line endings. The function `test_crlf_subject_body_and_contents`
can be used to test that the `--format` option of `branch`, `tag`,
`for-each-ref`, `log` and `show` correctly displays the subject, body
and raw content of commit and tag messages using CRLF. Test the
output of `branch`, `tag` and `for-each-ref` with such commits.
Helped-by: Junio C Hamano <gitster@pobox.com>
Helped-by: Eric Sunshine <sunshine@sunshineco.com>
Signed-off-by: Philippe Blain <levraiphilippeblain@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2020-10-29 20:48:28 +08:00
|
|
|
/* treat whole message as subject */
|
ref-filter: fix parsing of signatures without blank lines
When ref-filter is asked to show %(content:subject), etc, we end up in
find_subpos() to parse out the three major parts: the subject, the body,
and the signature (if any).
When searching for the blank line between the subject and body, if we
don't find anything, we try to treat the whole message as the subject,
with no body. But our idea of "the whole message" needs to take into
account the signature, too. Since 9f75ce3d8f (ref-filter: handle CRLF at
end-of-line more gracefully, 2020-10-29), the code instead goes all the
way to the end of the buffer, which produces confusing output.
Here's an example. If we have a tag message like this:
this is the subject
-----BEGIN SSH SIGNATURE-----
...some stuff...
-----END SSH SIGNATURE-----
then the current parser will put the start of the body at the end of the
whole buffer. This produces two buggy outcomes:
- since the subject length is computed as (body - subject), showing
%(contents:subject) will print both the subject and the signature,
rather than just the single line
- since the body length is computed as (sig - body), and the body now
starts _after_ the signature, we end up with a negative length!
Fortunately we never access out-of-bounds memory, because the
negative length is fed to xmemdupz(), which casts it to a size_t,
and xmalloc() bails trying to allocate an absurdly large value.
In theory it would be possible for somebody making a malicious tag
to wrap it around to a more reasonable value, but it would require a
tag on the order of 2^63 bytes. And even if they did, all they get
is an out of bounds string read. So the security implications are
probably not interesting.
We can fix both by correctly putting the start of the body at the same
index as the start of the signature (effectively making the body empty).
Note that this is a real issue with signatures generated with gpg.format
set to "ssh", which would look like the example above. In the new tests
here I use a hard-coded tag message, for a few reasons:
- regardless of what the ssh-signing code produces now or in the
future, we should be testing this particular case
- skipping the actual signature makes the tests simpler to write (and
allows them to run on more systems)
- t6300 has helpers for working with gpg signatures; for the purposes
of this bug, "BEGIN PGP" is just as good a demonstration, and this
simplifies the tests
Curiously, the same issue doesn't happen with real gpg signatures (and
there are even existing tests in t6300 with cover this). Those have a
blank line between the header and the content, like:
this is the subject
-----BEGIN PGP SIGNATURE-----
...some stuff...
-----END PGP SIGNATURE-----
Because we search for the subject/body separator line with a strstr(),
we find the blank line in the signature, even though it's outside of
what we'd consider the body. But that puts us unto a separate code path,
which realizes that we're now in the signature and adjusts the line back
to "sigstart". So this patch is basically just making the "no line found
at all" case match that. And note that "sigstart" is always defined (if
there is no signature, it points to the end of the buffer as you'd
expect).
Reported-by: Martin Englund <martin@englund.nu>
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Taylor Blau <me@ttaylorr.com>
2022-11-02 15:42:07 +08:00
|
|
|
eol = sigstart;
|
2015-06-14 03:37:27 +08:00
|
|
|
}
|
ref-filter: handle CRLF at end-of-line more gracefully
The ref-filter code does not correctly handle commit or tag messages
that use CRLF as the line terminator. Such messages can be created with
the `--cleanup=verbatim` option of `git commit` and `git tag`, or by
using `git commit-tree` directly.
The function `find_subpos` in ref-filter.c looks for two consecutive
LFs to find the end of the subject line, a sequence which is absent in
messages using CRLF. This results in the whole message being parsed as
the subject line (`%(contents:subject)`), and the body of the message
(`%(contents:body)`) being empty.
Moreover, in `copy_subject`, which wants to return the subject as a
single line, '\n' is replaced by space, but '\r' is
untouched.
This impacts the output of `git branch`, `git tag` and `git
for-each-ref`.
This behaviour is a regression for `git branch --verbose`, which
bisects down to 949af0684c (branch: use ref-filter printing APIs,
2017-01-10).
Adjust the ref-filter code to be more lenient by hardening the logic in
`copy_subject` and `find_subpos` to correctly parse messages containing
CRLF.
Add a new test script, 't3920-crlf-messages.sh', to test the behaviour
of commands using either the ref-filter or the pretty APIs with messages
using CRLF line endings. The function `test_crlf_subject_body_and_contents`
can be used to test that the `--format` option of `branch`, `tag`,
`for-each-ref`, `log` and `show` correctly displays the subject, body
and raw content of commit and tag messages using CRLF. Test the
output of `branch`, `tag` and `for-each-ref` with such commits.
Helped-by: Junio C Hamano <gitster@pobox.com>
Helped-by: Eric Sunshine <sunshine@sunshineco.com>
Signed-off-by: Philippe Blain <levraiphilippeblain@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2020-10-29 20:48:28 +08:00
|
|
|
buf = eol;
|
2015-06-14 03:37:27 +08:00
|
|
|
*sublen = buf - *sub;
|
|
|
|
/* drop trailing newline, if present */
|
ref-filter: handle CRLF at end-of-line more gracefully
The ref-filter code does not correctly handle commit or tag messages
that use CRLF as the line terminator. Such messages can be created with
the `--cleanup=verbatim` option of `git commit` and `git tag`, or by
using `git commit-tree` directly.
The function `find_subpos` in ref-filter.c looks for two consecutive
LFs to find the end of the subject line, a sequence which is absent in
messages using CRLF. This results in the whole message being parsed as
the subject line (`%(contents:subject)`), and the body of the message
(`%(contents:body)`) being empty.
Moreover, in `copy_subject`, which wants to return the subject as a
single line, '\n' is replaced by space, but '\r' is
untouched.
This impacts the output of `git branch`, `git tag` and `git
for-each-ref`.
This behaviour is a regression for `git branch --verbose`, which
bisects down to 949af0684c (branch: use ref-filter printing APIs,
2017-01-10).
Adjust the ref-filter code to be more lenient by hardening the logic in
`copy_subject` and `find_subpos` to correctly parse messages containing
CRLF.
Add a new test script, 't3920-crlf-messages.sh', to test the behaviour
of commands using either the ref-filter or the pretty APIs with messages
using CRLF line endings. The function `test_crlf_subject_body_and_contents`
can be used to test that the `--format` option of `branch`, `tag`,
`for-each-ref`, `log` and `show` correctly displays the subject, body
and raw content of commit and tag messages using CRLF. Test the
output of `branch`, `tag` and `for-each-ref` with such commits.
Helped-by: Junio C Hamano <gitster@pobox.com>
Helped-by: Eric Sunshine <sunshine@sunshineco.com>
Signed-off-by: Philippe Blain <levraiphilippeblain@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2020-10-29 20:48:28 +08:00
|
|
|
while (*sublen && ((*sub)[*sublen - 1] == '\n' ||
|
|
|
|
(*sub)[*sublen - 1] == '\r'))
|
2015-06-14 03:37:27 +08:00
|
|
|
*sublen -= 1;
|
|
|
|
|
|
|
|
/* skip any empty lines */
|
ref-filter: handle CRLF at end-of-line more gracefully
The ref-filter code does not correctly handle commit or tag messages
that use CRLF as the line terminator. Such messages can be created with
the `--cleanup=verbatim` option of `git commit` and `git tag`, or by
using `git commit-tree` directly.
The function `find_subpos` in ref-filter.c looks for two consecutive
LFs to find the end of the subject line, a sequence which is absent in
messages using CRLF. This results in the whole message being parsed as
the subject line (`%(contents:subject)`), and the body of the message
(`%(contents:body)`) being empty.
Moreover, in `copy_subject`, which wants to return the subject as a
single line, '\n' is replaced by space, but '\r' is
untouched.
This impacts the output of `git branch`, `git tag` and `git
for-each-ref`.
This behaviour is a regression for `git branch --verbose`, which
bisects down to 949af0684c (branch: use ref-filter printing APIs,
2017-01-10).
Adjust the ref-filter code to be more lenient by hardening the logic in
`copy_subject` and `find_subpos` to correctly parse messages containing
CRLF.
Add a new test script, 't3920-crlf-messages.sh', to test the behaviour
of commands using either the ref-filter or the pretty APIs with messages
using CRLF line endings. The function `test_crlf_subject_body_and_contents`
can be used to test that the `--format` option of `branch`, `tag`,
`for-each-ref`, `log` and `show` correctly displays the subject, body
and raw content of commit and tag messages using CRLF. Test the
output of `branch`, `tag` and `for-each-ref` with such commits.
Helped-by: Junio C Hamano <gitster@pobox.com>
Helped-by: Eric Sunshine <sunshine@sunshineco.com>
Signed-off-by: Philippe Blain <levraiphilippeblain@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2020-10-29 20:48:28 +08:00
|
|
|
while (*buf == '\n' || *buf == '\r')
|
2015-06-14 03:37:27 +08:00
|
|
|
buf++;
|
|
|
|
*body = buf;
|
|
|
|
*bodylen = strlen(buf);
|
2021-02-11 10:08:03 +08:00
|
|
|
*nonsiglen = sigstart - buf;
|
2015-06-14 03:37:27 +08:00
|
|
|
}
|
|
|
|
|
2015-09-11 23:04:16 +08:00
|
|
|
/*
|
|
|
|
* If 'lines' is greater than 0, append that many lines from the given
|
|
|
|
* 'buf' of length 'size' to the given strbuf.
|
|
|
|
*/
|
|
|
|
static void append_lines(struct strbuf *out, const char *buf, unsigned long size, int lines)
|
|
|
|
{
|
|
|
|
int i;
|
|
|
|
const char *sp, *eol;
|
|
|
|
size_t len;
|
|
|
|
|
|
|
|
sp = buf;
|
|
|
|
|
|
|
|
for (i = 0; i < lines && sp < buf + size; i++) {
|
|
|
|
if (i)
|
|
|
|
strbuf_addstr(out, "\n ");
|
|
|
|
eol = memchr(sp, '\n', size - (sp - buf));
|
|
|
|
len = eol ? eol - sp : size - (sp - buf);
|
|
|
|
strbuf_add(out, sp, len);
|
|
|
|
if (!eol)
|
|
|
|
break;
|
|
|
|
sp = eol + 1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-07-24 00:19:59 +08:00
|
|
|
static void grab_describe_values(struct atom_value *val, int deref,
|
|
|
|
struct object *obj)
|
|
|
|
{
|
|
|
|
struct commit *commit = (struct commit *)obj;
|
|
|
|
int i;
|
|
|
|
|
|
|
|
for (i = 0; i < used_atom_cnt; i++) {
|
|
|
|
struct used_atom *atom = &used_atom[i];
|
|
|
|
enum atom_type type = atom->atom_type;
|
|
|
|
const char *name = atom->name;
|
|
|
|
struct atom_value *v = &val[i];
|
|
|
|
|
|
|
|
struct child_process cmd = CHILD_PROCESS_INIT;
|
|
|
|
struct strbuf out = STRBUF_INIT;
|
|
|
|
struct strbuf err = STRBUF_INIT;
|
|
|
|
|
|
|
|
if (type != ATOM_DESCRIBE)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
if (!!deref != (*name == '*'))
|
|
|
|
continue;
|
|
|
|
|
|
|
|
cmd.git_cmd = 1;
|
|
|
|
strvec_push(&cmd.args, "describe");
|
|
|
|
strvec_pushv(&cmd.args, atom->u.describe_args);
|
|
|
|
strvec_push(&cmd.args, oid_to_hex(&commit->object.oid));
|
|
|
|
if (pipe_command(&cmd, NULL, 0, &out, 0, &err, 0) < 0) {
|
|
|
|
error(_("failed to run 'describe'"));
|
|
|
|
v->s = xstrdup("");
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
strbuf_rtrim(&out);
|
|
|
|
v->s = strbuf_detach(&out, NULL);
|
|
|
|
|
|
|
|
strbuf_release(&err);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-06-14 03:37:27 +08:00
|
|
|
/* See grab_values */
|
2021-07-26 11:26:46 +08:00
|
|
|
static void grab_sub_body_contents(struct atom_value *val, int deref, struct expand_data *data)
|
2015-06-14 03:37:27 +08:00
|
|
|
{
|
|
|
|
int i;
|
|
|
|
const char *subpos = NULL, *bodypos = NULL, *sigpos = NULL;
|
2021-01-19 07:49:10 +08:00
|
|
|
size_t sublen = 0, bodylen = 0, nonsiglen = 0, siglen = 0;
|
2021-07-26 11:26:46 +08:00
|
|
|
void *buf = data->content;
|
2015-06-14 03:37:27 +08:00
|
|
|
|
|
|
|
for (i = 0; i < used_atom_cnt; i++) {
|
2016-02-18 02:06:18 +08:00
|
|
|
struct used_atom *atom = &used_atom[i];
|
|
|
|
const char *name = atom->name;
|
2015-06-14 03:37:27 +08:00
|
|
|
struct atom_value *v = &val[i];
|
ref-filter: add %(raw) atom
Add new formatting option `%(raw)`, which will print the raw
object data without any changes. It will help further to migrate
all cat-file formatting logic from cat-file to ref-filter.
The raw data of blob, tree objects may contain '\0', but most of
the logic in `ref-filter` depends on the output of the atom being
text (specifically, no embedded NULs in it).
E.g. `quote_formatting()` use `strbuf_addstr()` or `*._quote_buf()`
add the data to the buffer. The raw data of a tree object is
`100644 one\0...`, only the `100644 one` will be added to the buffer,
which is incorrect.
Therefore, we need to find a way to record the length of the
atom_value's member `s`. Although strbuf can already record the
string and its length, if we want to replace the type of atom_value's
member `s` with strbuf, many places in ref-filter that are filled
with dynamically allocated mermory in `v->s` are not easy to replace.
At the same time, we need to check if `v->s == NULL` in
populate_value(), and strbuf cannot easily distinguish NULL and empty
strings, but c-style "const char *" can do it. So add a new member in
`struct atom_value`: `s_size`, which can record raw object size, it
can help us add raw object data to the buffer or compare two buffers
which contain raw object data.
Note that `--format=%(raw)` cannot be used with `--python`, `--shell`,
`--tcl`, and `--perl` because if the binary raw data is passed to a
variable in such languages, these may not support arbitrary binary data
in their string variable type.
Reviewed-by: Jacob Keller <jacob.keller@gmail.com>
Mentored-by: Christian Couder <christian.couder@gmail.com>
Mentored-by: Hariom Verma <hariom18599@gmail.com>
Helped-by: Bagas Sanjaya <bagasdotme@gmail.com>
Helped-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Helped-by: Felipe Contreras <felipe.contreras@gmail.com>
Helped-by: Phillip Wood <phillip.wood@dunelm.org.uk>
Helped-by: Junio C Hamano <gitster@pobox.com>
Based-on-patch-by: Olga Telezhnaya <olyatelezhnaya@gmail.com>
Signed-off-by: ZheNing Hu <adlternative@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2021-07-26 11:26:47 +08:00
|
|
|
enum atom_type atom_type = atom->atom_type;
|
2021-02-11 10:08:03 +08:00
|
|
|
|
2015-06-14 03:37:27 +08:00
|
|
|
if (!!deref != (*name == '*'))
|
|
|
|
continue;
|
|
|
|
if (deref)
|
|
|
|
name++;
|
2021-07-26 11:26:46 +08:00
|
|
|
|
ref-filter: add %(raw) atom
Add new formatting option `%(raw)`, which will print the raw
object data without any changes. It will help further to migrate
all cat-file formatting logic from cat-file to ref-filter.
The raw data of blob, tree objects may contain '\0', but most of
the logic in `ref-filter` depends on the output of the atom being
text (specifically, no embedded NULs in it).
E.g. `quote_formatting()` use `strbuf_addstr()` or `*._quote_buf()`
add the data to the buffer. The raw data of a tree object is
`100644 one\0...`, only the `100644 one` will be added to the buffer,
which is incorrect.
Therefore, we need to find a way to record the length of the
atom_value's member `s`. Although strbuf can already record the
string and its length, if we want to replace the type of atom_value's
member `s` with strbuf, many places in ref-filter that are filled
with dynamically allocated mermory in `v->s` are not easy to replace.
At the same time, we need to check if `v->s == NULL` in
populate_value(), and strbuf cannot easily distinguish NULL and empty
strings, but c-style "const char *" can do it. So add a new member in
`struct atom_value`: `s_size`, which can record raw object size, it
can help us add raw object data to the buffer or compare two buffers
which contain raw object data.
Note that `--format=%(raw)` cannot be used with `--python`, `--shell`,
`--tcl`, and `--perl` because if the binary raw data is passed to a
variable in such languages, these may not support arbitrary binary data
in their string variable type.
Reviewed-by: Jacob Keller <jacob.keller@gmail.com>
Mentored-by: Christian Couder <christian.couder@gmail.com>
Mentored-by: Hariom Verma <hariom18599@gmail.com>
Helped-by: Bagas Sanjaya <bagasdotme@gmail.com>
Helped-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Helped-by: Felipe Contreras <felipe.contreras@gmail.com>
Helped-by: Phillip Wood <phillip.wood@dunelm.org.uk>
Helped-by: Junio C Hamano <gitster@pobox.com>
Based-on-patch-by: Olga Telezhnaya <olyatelezhnaya@gmail.com>
Signed-off-by: ZheNing Hu <adlternative@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2021-07-26 11:26:47 +08:00
|
|
|
if (atom_type == ATOM_RAW) {
|
|
|
|
unsigned long buf_size = data->size;
|
|
|
|
|
|
|
|
if (atom->u.raw_data.option == RAW_BARE) {
|
|
|
|
v->s = xmemdupz(buf, buf_size);
|
|
|
|
v->s_size = buf_size;
|
|
|
|
} else if (atom->u.raw_data.option == RAW_LENGTH) {
|
2023-09-02 17:00:39 +08:00
|
|
|
v->value = buf_size;
|
|
|
|
v->s = xstrfmt("%"PRIuMAX, v->value);
|
ref-filter: add %(raw) atom
Add new formatting option `%(raw)`, which will print the raw
object data without any changes. It will help further to migrate
all cat-file formatting logic from cat-file to ref-filter.
The raw data of blob, tree objects may contain '\0', but most of
the logic in `ref-filter` depends on the output of the atom being
text (specifically, no embedded NULs in it).
E.g. `quote_formatting()` use `strbuf_addstr()` or `*._quote_buf()`
add the data to the buffer. The raw data of a tree object is
`100644 one\0...`, only the `100644 one` will be added to the buffer,
which is incorrect.
Therefore, we need to find a way to record the length of the
atom_value's member `s`. Although strbuf can already record the
string and its length, if we want to replace the type of atom_value's
member `s` with strbuf, many places in ref-filter that are filled
with dynamically allocated mermory in `v->s` are not easy to replace.
At the same time, we need to check if `v->s == NULL` in
populate_value(), and strbuf cannot easily distinguish NULL and empty
strings, but c-style "const char *" can do it. So add a new member in
`struct atom_value`: `s_size`, which can record raw object size, it
can help us add raw object data to the buffer or compare two buffers
which contain raw object data.
Note that `--format=%(raw)` cannot be used with `--python`, `--shell`,
`--tcl`, and `--perl` because if the binary raw data is passed to a
variable in such languages, these may not support arbitrary binary data
in their string variable type.
Reviewed-by: Jacob Keller <jacob.keller@gmail.com>
Mentored-by: Christian Couder <christian.couder@gmail.com>
Mentored-by: Hariom Verma <hariom18599@gmail.com>
Helped-by: Bagas Sanjaya <bagasdotme@gmail.com>
Helped-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Helped-by: Felipe Contreras <felipe.contreras@gmail.com>
Helped-by: Phillip Wood <phillip.wood@dunelm.org.uk>
Helped-by: Junio C Hamano <gitster@pobox.com>
Based-on-patch-by: Olga Telezhnaya <olyatelezhnaya@gmail.com>
Signed-off-by: ZheNing Hu <adlternative@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2021-07-26 11:26:47 +08:00
|
|
|
}
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
2021-07-26 11:26:46 +08:00
|
|
|
if ((data->type != OBJ_TAG &&
|
|
|
|
data->type != OBJ_COMMIT) ||
|
|
|
|
(strcmp(name, "body") &&
|
|
|
|
!starts_with(name, "subject") &&
|
|
|
|
!starts_with(name, "trailers") &&
|
|
|
|
!starts_with(name, "contents")))
|
2015-06-14 03:37:27 +08:00
|
|
|
continue;
|
|
|
|
if (!subpos)
|
2019-02-14 13:51:03 +08:00
|
|
|
find_subpos(buf,
|
2015-06-14 03:37:27 +08:00
|
|
|
&subpos, &sublen,
|
|
|
|
&bodypos, &bodylen, &nonsiglen,
|
|
|
|
&sigpos, &siglen);
|
|
|
|
|
2016-02-18 02:06:18 +08:00
|
|
|
if (atom->u.contents.option == C_SUB)
|
2015-06-14 03:37:27 +08:00
|
|
|
v->s = copy_subject(subpos, sublen);
|
2020-08-22 05:41:50 +08:00
|
|
|
else if (atom->u.contents.option == C_SUB_SANITIZE) {
|
|
|
|
struct strbuf sb = STRBUF_INIT;
|
|
|
|
format_sanitized_subject(&sb, subpos, sublen);
|
|
|
|
v->s = strbuf_detach(&sb, NULL);
|
|
|
|
} else if (atom->u.contents.option == C_BODY_DEP)
|
2015-06-14 03:37:27 +08:00
|
|
|
v->s = xmemdupz(bodypos, bodylen);
|
2023-09-02 17:00:39 +08:00
|
|
|
else if (atom->u.contents.option == C_LENGTH) {
|
|
|
|
v->value = strlen(subpos);
|
|
|
|
v->s = xstrfmt("%"PRIuMAX, v->value);
|
|
|
|
} else if (atom->u.contents.option == C_BODY)
|
2015-06-14 03:37:27 +08:00
|
|
|
v->s = xmemdupz(bodypos, nonsiglen);
|
2016-02-18 02:06:18 +08:00
|
|
|
else if (atom->u.contents.option == C_SIG)
|
2015-06-14 03:37:27 +08:00
|
|
|
v->s = xmemdupz(sigpos, siglen);
|
2016-02-18 02:06:18 +08:00
|
|
|
else if (atom->u.contents.option == C_LINES) {
|
2015-09-11 23:04:16 +08:00
|
|
|
struct strbuf s = STRBUF_INIT;
|
2021-02-11 10:08:05 +08:00
|
|
|
const char *contents_end = bodypos + nonsiglen;
|
2015-09-11 23:04:16 +08:00
|
|
|
|
|
|
|
/* Size is the length of the message after removing the signature */
|
2016-02-18 02:06:18 +08:00
|
|
|
append_lines(&s, subpos, contents_end - subpos, atom->u.contents.nlines);
|
2015-09-11 23:04:16 +08:00
|
|
|
v->s = strbuf_detach(&s, NULL);
|
2016-11-19 08:58:15 +08:00
|
|
|
} else if (atom->u.contents.option == C_TRAILERS) {
|
2017-10-02 13:25:23 +08:00
|
|
|
struct strbuf s = STRBUF_INIT;
|
2016-11-19 08:58:15 +08:00
|
|
|
|
2017-10-02 13:25:23 +08:00
|
|
|
/* Format the trailer info according to the trailer_opts given */
|
|
|
|
format_trailers_from_commit(&s, subpos, &atom->u.contents.trailer_opts);
|
|
|
|
|
|
|
|
v->s = strbuf_detach(&s, NULL);
|
2016-02-18 02:06:18 +08:00
|
|
|
} else if (atom->u.contents.option == C_BARE)
|
|
|
|
v->s = xstrdup(subpos);
|
2021-02-11 10:08:03 +08:00
|
|
|
|
2015-06-14 03:37:27 +08:00
|
|
|
}
|
2021-02-11 10:08:03 +08:00
|
|
|
free((void *)sigpos);
|
2015-06-14 03:37:27 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* We want to have empty print-string for field requests
|
|
|
|
* that do not apply (e.g. "authordate" for a tag object)
|
|
|
|
*/
|
|
|
|
static void fill_missing_values(struct atom_value *val)
|
|
|
|
{
|
|
|
|
int i;
|
|
|
|
for (i = 0; i < used_atom_cnt; i++) {
|
|
|
|
struct atom_value *v = &val[i];
|
2022-05-03 00:50:37 +08:00
|
|
|
if (!v->s)
|
2018-10-18 15:28:54 +08:00
|
|
|
v->s = xstrdup("");
|
2015-06-14 03:37:27 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* val is a list of atom_value to hold returned values. Extract
|
|
|
|
* the values for atoms in used_atom array out of (obj, buf, sz).
|
|
|
|
* when deref is false, (obj, buf, sz) is the object that is
|
|
|
|
* pointed at by the ref itself; otherwise it is the object the
|
|
|
|
* ref (which is a tag) refers to.
|
|
|
|
*/
|
2021-07-26 11:26:46 +08:00
|
|
|
static void grab_values(struct atom_value *val, int deref, struct object *obj, struct expand_data *data)
|
2015-06-14 03:37:27 +08:00
|
|
|
{
|
2021-07-26 11:26:46 +08:00
|
|
|
void *buf = data->content;
|
|
|
|
|
2015-06-14 03:37:27 +08:00
|
|
|
switch (obj->type) {
|
|
|
|
case OBJ_TAG:
|
2019-02-14 13:50:54 +08:00
|
|
|
grab_tag_values(val, deref, obj);
|
2021-07-26 11:26:46 +08:00
|
|
|
grab_sub_body_contents(val, deref, data);
|
2019-02-14 13:51:03 +08:00
|
|
|
grab_person("tagger", val, deref, buf);
|
2023-07-24 00:19:59 +08:00
|
|
|
grab_describe_values(val, deref, obj);
|
2015-06-14 03:37:27 +08:00
|
|
|
break;
|
|
|
|
case OBJ_COMMIT:
|
2019-02-14 13:50:54 +08:00
|
|
|
grab_commit_values(val, deref, obj);
|
2021-07-26 11:26:46 +08:00
|
|
|
grab_sub_body_contents(val, deref, data);
|
2019-02-14 13:51:03 +08:00
|
|
|
grab_person("author", val, deref, buf);
|
|
|
|
grab_person("committer", val, deref, buf);
|
2023-06-05 02:22:47 +08:00
|
|
|
grab_signature(val, deref, obj);
|
2023-07-24 00:19:59 +08:00
|
|
|
grab_describe_values(val, deref, obj);
|
2015-06-14 03:37:27 +08:00
|
|
|
break;
|
|
|
|
case OBJ_TREE:
|
|
|
|
/* grab_tree_values(val, deref, obj, buf, sz); */
|
ref-filter: add %(raw) atom
Add new formatting option `%(raw)`, which will print the raw
object data without any changes. It will help further to migrate
all cat-file formatting logic from cat-file to ref-filter.
The raw data of blob, tree objects may contain '\0', but most of
the logic in `ref-filter` depends on the output of the atom being
text (specifically, no embedded NULs in it).
E.g. `quote_formatting()` use `strbuf_addstr()` or `*._quote_buf()`
add the data to the buffer. The raw data of a tree object is
`100644 one\0...`, only the `100644 one` will be added to the buffer,
which is incorrect.
Therefore, we need to find a way to record the length of the
atom_value's member `s`. Although strbuf can already record the
string and its length, if we want to replace the type of atom_value's
member `s` with strbuf, many places in ref-filter that are filled
with dynamically allocated mermory in `v->s` are not easy to replace.
At the same time, we need to check if `v->s == NULL` in
populate_value(), and strbuf cannot easily distinguish NULL and empty
strings, but c-style "const char *" can do it. So add a new member in
`struct atom_value`: `s_size`, which can record raw object size, it
can help us add raw object data to the buffer or compare two buffers
which contain raw object data.
Note that `--format=%(raw)` cannot be used with `--python`, `--shell`,
`--tcl`, and `--perl` because if the binary raw data is passed to a
variable in such languages, these may not support arbitrary binary data
in their string variable type.
Reviewed-by: Jacob Keller <jacob.keller@gmail.com>
Mentored-by: Christian Couder <christian.couder@gmail.com>
Mentored-by: Hariom Verma <hariom18599@gmail.com>
Helped-by: Bagas Sanjaya <bagasdotme@gmail.com>
Helped-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Helped-by: Felipe Contreras <felipe.contreras@gmail.com>
Helped-by: Phillip Wood <phillip.wood@dunelm.org.uk>
Helped-by: Junio C Hamano <gitster@pobox.com>
Based-on-patch-by: Olga Telezhnaya <olyatelezhnaya@gmail.com>
Signed-off-by: ZheNing Hu <adlternative@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2021-07-26 11:26:47 +08:00
|
|
|
grab_sub_body_contents(val, deref, data);
|
2015-06-14 03:37:27 +08:00
|
|
|
break;
|
|
|
|
case OBJ_BLOB:
|
|
|
|
/* grab_blob_values(val, deref, obj, buf, sz); */
|
ref-filter: add %(raw) atom
Add new formatting option `%(raw)`, which will print the raw
object data without any changes. It will help further to migrate
all cat-file formatting logic from cat-file to ref-filter.
The raw data of blob, tree objects may contain '\0', but most of
the logic in `ref-filter` depends on the output of the atom being
text (specifically, no embedded NULs in it).
E.g. `quote_formatting()` use `strbuf_addstr()` or `*._quote_buf()`
add the data to the buffer. The raw data of a tree object is
`100644 one\0...`, only the `100644 one` will be added to the buffer,
which is incorrect.
Therefore, we need to find a way to record the length of the
atom_value's member `s`. Although strbuf can already record the
string and its length, if we want to replace the type of atom_value's
member `s` with strbuf, many places in ref-filter that are filled
with dynamically allocated mermory in `v->s` are not easy to replace.
At the same time, we need to check if `v->s == NULL` in
populate_value(), and strbuf cannot easily distinguish NULL and empty
strings, but c-style "const char *" can do it. So add a new member in
`struct atom_value`: `s_size`, which can record raw object size, it
can help us add raw object data to the buffer or compare two buffers
which contain raw object data.
Note that `--format=%(raw)` cannot be used with `--python`, `--shell`,
`--tcl`, and `--perl` because if the binary raw data is passed to a
variable in such languages, these may not support arbitrary binary data
in their string variable type.
Reviewed-by: Jacob Keller <jacob.keller@gmail.com>
Mentored-by: Christian Couder <christian.couder@gmail.com>
Mentored-by: Hariom Verma <hariom18599@gmail.com>
Helped-by: Bagas Sanjaya <bagasdotme@gmail.com>
Helped-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Helped-by: Felipe Contreras <felipe.contreras@gmail.com>
Helped-by: Phillip Wood <phillip.wood@dunelm.org.uk>
Helped-by: Junio C Hamano <gitster@pobox.com>
Based-on-patch-by: Olga Telezhnaya <olyatelezhnaya@gmail.com>
Signed-off-by: ZheNing Hu <adlternative@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2021-07-26 11:26:47 +08:00
|
|
|
grab_sub_body_contents(val, deref, data);
|
2015-06-14 03:37:27 +08:00
|
|
|
break;
|
|
|
|
default:
|
|
|
|
die("Eh? Object of type %d?", obj->type);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline char *copy_advance(char *dst, const char *src)
|
|
|
|
{
|
|
|
|
while (*src)
|
|
|
|
*dst++ = *src++;
|
|
|
|
return dst;
|
|
|
|
}
|
|
|
|
|
2017-01-10 16:49:48 +08:00
|
|
|
static const char *lstrip_ref_components(const char *refname, int len)
|
tag: do not show ambiguous tag names as "tags/foo"
Since b7cc53e9 (tag.c: use 'ref-filter' APIs, 2015-07-11),
git-tag has started showing tags with ambiguous names (i.e.,
when both "heads/foo" and "tags/foo" exists) as "tags/foo"
instead of just "foo". This is both:
- pointless; the output of "git tag" includes only
refs/tags, so we know that "foo" means the one in
"refs/tags".
and
- ambiguous; in the original output, we know that the line
"foo" means that "refs/tags/foo" exists. In the new
output, it is unclear whether we mean "refs/tags/foo" or
"refs/tags/tags/foo".
The reason this happens is that commit b7cc53e9 switched
git-tag to use ref-filter's "%(refname:short)" output
formatting, which was adapted from for-each-ref. This more
general code does not know that we care only about tags, and
uses shorten_unambiguous_ref to get the short-name. We need
to tell it that we care only about "refs/tags/", and it
should shorten with respect to that value.
In theory, the ref-filter code could figure this out by us
passing FILTER_REFS_TAGS. But there are two complications
there:
1. The handling of refname:short is deep in formatting
code that does not even have our ref_filter struct, let
alone the arguments to the filter_ref struct.
2. In git v2.7.0, we expose the formatting language to the
user. If we follow this path, it will mean that
"%(refname:short)" behaves differently for "tag" versus
"for-each-ref" (including "for-each-ref refs/tags/"),
which can lead to confusion.
Instead, let's add a new modifier to the formatting
language, "strip", to remove a specific set of prefix
components. This fixes "git tag", and lets users invoke the
same behavior from their own custom formats (for "tag" or
"for-each-ref") while leaving ":short" with its same
consistent meaning in all places.
We introduce a test in t7004 for "git tag", which fails
without this patch. We also add a similar test in t3203 for
"git branch", which does not actually fail. But since it is
likely that "branch" will eventually use the same formatting
code, the test helps defend against future regressions.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2016-01-26 11:00:05 +08:00
|
|
|
{
|
2017-01-10 16:49:44 +08:00
|
|
|
long remaining = len;
|
2018-10-18 15:28:54 +08:00
|
|
|
const char *start = xstrdup(refname);
|
|
|
|
const char *to_free = start;
|
tag: do not show ambiguous tag names as "tags/foo"
Since b7cc53e9 (tag.c: use 'ref-filter' APIs, 2015-07-11),
git-tag has started showing tags with ambiguous names (i.e.,
when both "heads/foo" and "tags/foo" exists) as "tags/foo"
instead of just "foo". This is both:
- pointless; the output of "git tag" includes only
refs/tags, so we know that "foo" means the one in
"refs/tags".
and
- ambiguous; in the original output, we know that the line
"foo" means that "refs/tags/foo" exists. In the new
output, it is unclear whether we mean "refs/tags/foo" or
"refs/tags/tags/foo".
The reason this happens is that commit b7cc53e9 switched
git-tag to use ref-filter's "%(refname:short)" output
formatting, which was adapted from for-each-ref. This more
general code does not know that we care only about tags, and
uses shorten_unambiguous_ref to get the short-name. We need
to tell it that we care only about "refs/tags/", and it
should shorten with respect to that value.
In theory, the ref-filter code could figure this out by us
passing FILTER_REFS_TAGS. But there are two complications
there:
1. The handling of refname:short is deep in formatting
code that does not even have our ref_filter struct, let
alone the arguments to the filter_ref struct.
2. In git v2.7.0, we expose the formatting language to the
user. If we follow this path, it will mean that
"%(refname:short)" behaves differently for "tag" versus
"for-each-ref" (including "for-each-ref refs/tags/"),
which can lead to confusion.
Instead, let's add a new modifier to the formatting
language, "strip", to remove a specific set of prefix
components. This fixes "git tag", and lets users invoke the
same behavior from their own custom formats (for "tag" or
"for-each-ref") while leaving ":short" with its same
consistent meaning in all places.
We introduce a test in t7004 for "git tag", which fails
without this patch. We also add a similar test in t3203 for
"git branch", which does not actually fail. But since it is
likely that "branch" will eventually use the same formatting
code, the test helps defend against future regressions.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2016-01-26 11:00:05 +08:00
|
|
|
|
2017-01-10 16:49:48 +08:00
|
|
|
if (len < 0) {
|
|
|
|
int i;
|
|
|
|
const char *p = refname;
|
|
|
|
|
|
|
|
/* Find total no of '/' separated path-components */
|
|
|
|
for (i = 0; p[i]; p[i] == '/' ? i++ : *p++)
|
|
|
|
;
|
|
|
|
/*
|
|
|
|
* The number of components we need to strip is now
|
|
|
|
* the total minus the components to be left (Plus one
|
|
|
|
* because we count the number of '/', but the number
|
|
|
|
* of components is one more than the no of '/').
|
|
|
|
*/
|
|
|
|
remaining = i + len + 1;
|
|
|
|
}
|
tag: do not show ambiguous tag names as "tags/foo"
Since b7cc53e9 (tag.c: use 'ref-filter' APIs, 2015-07-11),
git-tag has started showing tags with ambiguous names (i.e.,
when both "heads/foo" and "tags/foo" exists) as "tags/foo"
instead of just "foo". This is both:
- pointless; the output of "git tag" includes only
refs/tags, so we know that "foo" means the one in
"refs/tags".
and
- ambiguous; in the original output, we know that the line
"foo" means that "refs/tags/foo" exists. In the new
output, it is unclear whether we mean "refs/tags/foo" or
"refs/tags/tags/foo".
The reason this happens is that commit b7cc53e9 switched
git-tag to use ref-filter's "%(refname:short)" output
formatting, which was adapted from for-each-ref. This more
general code does not know that we care only about tags, and
uses shorten_unambiguous_ref to get the short-name. We need
to tell it that we care only about "refs/tags/", and it
should shorten with respect to that value.
In theory, the ref-filter code could figure this out by us
passing FILTER_REFS_TAGS. But there are two complications
there:
1. The handling of refname:short is deep in formatting
code that does not even have our ref_filter struct, let
alone the arguments to the filter_ref struct.
2. In git v2.7.0, we expose the formatting language to the
user. If we follow this path, it will mean that
"%(refname:short)" behaves differently for "tag" versus
"for-each-ref" (including "for-each-ref refs/tags/"),
which can lead to confusion.
Instead, let's add a new modifier to the formatting
language, "strip", to remove a specific set of prefix
components. This fixes "git tag", and lets users invoke the
same behavior from their own custom formats (for "tag" or
"for-each-ref") while leaving ":short" with its same
consistent meaning in all places.
We introduce a test in t7004 for "git tag", which fails
without this patch. We also add a similar test in t3203 for
"git branch", which does not actually fail. But since it is
likely that "branch" will eventually use the same formatting
code, the test helps defend against future regressions.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2016-01-26 11:00:05 +08:00
|
|
|
|
2017-01-10 16:49:48 +08:00
|
|
|
while (remaining > 0) {
|
tag: do not show ambiguous tag names as "tags/foo"
Since b7cc53e9 (tag.c: use 'ref-filter' APIs, 2015-07-11),
git-tag has started showing tags with ambiguous names (i.e.,
when both "heads/foo" and "tags/foo" exists) as "tags/foo"
instead of just "foo". This is both:
- pointless; the output of "git tag" includes only
refs/tags, so we know that "foo" means the one in
"refs/tags".
and
- ambiguous; in the original output, we know that the line
"foo" means that "refs/tags/foo" exists. In the new
output, it is unclear whether we mean "refs/tags/foo" or
"refs/tags/tags/foo".
The reason this happens is that commit b7cc53e9 switched
git-tag to use ref-filter's "%(refname:short)" output
formatting, which was adapted from for-each-ref. This more
general code does not know that we care only about tags, and
uses shorten_unambiguous_ref to get the short-name. We need
to tell it that we care only about "refs/tags/", and it
should shorten with respect to that value.
In theory, the ref-filter code could figure this out by us
passing FILTER_REFS_TAGS. But there are two complications
there:
1. The handling of refname:short is deep in formatting
code that does not even have our ref_filter struct, let
alone the arguments to the filter_ref struct.
2. In git v2.7.0, we expose the formatting language to the
user. If we follow this path, it will mean that
"%(refname:short)" behaves differently for "tag" versus
"for-each-ref" (including "for-each-ref refs/tags/"),
which can lead to confusion.
Instead, let's add a new modifier to the formatting
language, "strip", to remove a specific set of prefix
components. This fixes "git tag", and lets users invoke the
same behavior from their own custom formats (for "tag" or
"for-each-ref") while leaving ":short" with its same
consistent meaning in all places.
We introduce a test in t7004 for "git tag", which fails
without this patch. We also add a similar test in t3203 for
"git branch", which does not actually fail. But since it is
likely that "branch" will eventually use the same formatting
code, the test helps defend against future regressions.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2016-01-26 11:00:05 +08:00
|
|
|
switch (*start++) {
|
|
|
|
case '\0':
|
2018-10-18 15:28:54 +08:00
|
|
|
free((char *)to_free);
|
|
|
|
return xstrdup("");
|
tag: do not show ambiguous tag names as "tags/foo"
Since b7cc53e9 (tag.c: use 'ref-filter' APIs, 2015-07-11),
git-tag has started showing tags with ambiguous names (i.e.,
when both "heads/foo" and "tags/foo" exists) as "tags/foo"
instead of just "foo". This is both:
- pointless; the output of "git tag" includes only
refs/tags, so we know that "foo" means the one in
"refs/tags".
and
- ambiguous; in the original output, we know that the line
"foo" means that "refs/tags/foo" exists. In the new
output, it is unclear whether we mean "refs/tags/foo" or
"refs/tags/tags/foo".
The reason this happens is that commit b7cc53e9 switched
git-tag to use ref-filter's "%(refname:short)" output
formatting, which was adapted from for-each-ref. This more
general code does not know that we care only about tags, and
uses shorten_unambiguous_ref to get the short-name. We need
to tell it that we care only about "refs/tags/", and it
should shorten with respect to that value.
In theory, the ref-filter code could figure this out by us
passing FILTER_REFS_TAGS. But there are two complications
there:
1. The handling of refname:short is deep in formatting
code that does not even have our ref_filter struct, let
alone the arguments to the filter_ref struct.
2. In git v2.7.0, we expose the formatting language to the
user. If we follow this path, it will mean that
"%(refname:short)" behaves differently for "tag" versus
"for-each-ref" (including "for-each-ref refs/tags/"),
which can lead to confusion.
Instead, let's add a new modifier to the formatting
language, "strip", to remove a specific set of prefix
components. This fixes "git tag", and lets users invoke the
same behavior from their own custom formats (for "tag" or
"for-each-ref") while leaving ":short" with its same
consistent meaning in all places.
We introduce a test in t7004 for "git tag", which fails
without this patch. We also add a similar test in t3203 for
"git branch", which does not actually fail. But since it is
likely that "branch" will eventually use the same formatting
code, the test helps defend against future regressions.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2016-01-26 11:00:05 +08:00
|
|
|
case '/':
|
|
|
|
remaining--;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
2017-01-10 16:49:48 +08:00
|
|
|
|
2018-10-18 15:28:54 +08:00
|
|
|
start = xstrdup(start);
|
|
|
|
free((char *)to_free);
|
tag: do not show ambiguous tag names as "tags/foo"
Since b7cc53e9 (tag.c: use 'ref-filter' APIs, 2015-07-11),
git-tag has started showing tags with ambiguous names (i.e.,
when both "heads/foo" and "tags/foo" exists) as "tags/foo"
instead of just "foo". This is both:
- pointless; the output of "git tag" includes only
refs/tags, so we know that "foo" means the one in
"refs/tags".
and
- ambiguous; in the original output, we know that the line
"foo" means that "refs/tags/foo" exists. In the new
output, it is unclear whether we mean "refs/tags/foo" or
"refs/tags/tags/foo".
The reason this happens is that commit b7cc53e9 switched
git-tag to use ref-filter's "%(refname:short)" output
formatting, which was adapted from for-each-ref. This more
general code does not know that we care only about tags, and
uses shorten_unambiguous_ref to get the short-name. We need
to tell it that we care only about "refs/tags/", and it
should shorten with respect to that value.
In theory, the ref-filter code could figure this out by us
passing FILTER_REFS_TAGS. But there are two complications
there:
1. The handling of refname:short is deep in formatting
code that does not even have our ref_filter struct, let
alone the arguments to the filter_ref struct.
2. In git v2.7.0, we expose the formatting language to the
user. If we follow this path, it will mean that
"%(refname:short)" behaves differently for "tag" versus
"for-each-ref" (including "for-each-ref refs/tags/"),
which can lead to confusion.
Instead, let's add a new modifier to the formatting
language, "strip", to remove a specific set of prefix
components. This fixes "git tag", and lets users invoke the
same behavior from their own custom formats (for "tag" or
"for-each-ref") while leaving ":short" with its same
consistent meaning in all places.
We introduce a test in t7004 for "git tag", which fails
without this patch. We also add a similar test in t3203 for
"git branch", which does not actually fail. But since it is
likely that "branch" will eventually use the same formatting
code, the test helps defend against future regressions.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2016-01-26 11:00:05 +08:00
|
|
|
return start;
|
|
|
|
}
|
|
|
|
|
2017-01-10 16:49:49 +08:00
|
|
|
static const char *rstrip_ref_components(const char *refname, int len)
|
|
|
|
{
|
|
|
|
long remaining = len;
|
2018-10-18 15:28:54 +08:00
|
|
|
const char *start = xstrdup(refname);
|
|
|
|
const char *to_free = start;
|
2017-01-10 16:49:49 +08:00
|
|
|
|
|
|
|
if (len < 0) {
|
|
|
|
int i;
|
|
|
|
const char *p = refname;
|
|
|
|
|
|
|
|
/* Find total no of '/' separated path-components */
|
|
|
|
for (i = 0; p[i]; p[i] == '/' ? i++ : *p++)
|
|
|
|
;
|
|
|
|
/*
|
|
|
|
* The number of components we need to strip is now
|
|
|
|
* the total minus the components to be left (Plus one
|
|
|
|
* because we count the number of '/', but the number
|
|
|
|
* of components is one more than the no of '/').
|
|
|
|
*/
|
|
|
|
remaining = i + len + 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
while (remaining-- > 0) {
|
|
|
|
char *p = strrchr(start, '/');
|
2022-05-03 00:50:37 +08:00
|
|
|
if (!p) {
|
2018-10-18 15:28:54 +08:00
|
|
|
free((char *)to_free);
|
|
|
|
return xstrdup("");
|
|
|
|
} else
|
2017-01-10 16:49:49 +08:00
|
|
|
p[0] = '\0';
|
|
|
|
}
|
|
|
|
return start;
|
|
|
|
}
|
|
|
|
|
2017-01-10 16:49:44 +08:00
|
|
|
static const char *show_ref(struct refname_atom *atom, const char *refname)
|
|
|
|
{
|
|
|
|
if (atom->option == R_SHORT)
|
|
|
|
return shorten_unambiguous_ref(refname, warn_ambiguous_refs);
|
2017-01-10 16:49:46 +08:00
|
|
|
else if (atom->option == R_LSTRIP)
|
|
|
|
return lstrip_ref_components(refname, atom->lstrip);
|
2017-01-10 16:49:49 +08:00
|
|
|
else if (atom->option == R_RSTRIP)
|
|
|
|
return rstrip_ref_components(refname, atom->rstrip);
|
2017-01-10 16:49:44 +08:00
|
|
|
else
|
2018-10-18 15:28:54 +08:00
|
|
|
return xstrdup(refname);
|
2017-01-10 16:49:44 +08:00
|
|
|
}
|
|
|
|
|
2016-02-18 02:06:17 +08:00
|
|
|
static void fill_remote_ref_details(struct used_atom *atom, const char *refname,
|
|
|
|
struct branch *branch, const char **s)
|
|
|
|
{
|
|
|
|
int num_ours, num_theirs;
|
2017-01-10 16:49:45 +08:00
|
|
|
if (atom->u.remote_ref.option == RR_REF)
|
|
|
|
*s = show_ref(&atom->u.remote_ref.refname, refname);
|
2017-01-10 16:49:41 +08:00
|
|
|
else if (atom->u.remote_ref.option == RR_TRACK) {
|
2018-01-10 02:50:15 +08:00
|
|
|
if (stat_tracking_info(branch, &num_ours, &num_theirs,
|
2019-04-16 20:16:46 +08:00
|
|
|
NULL, atom->u.remote_ref.push,
|
|
|
|
AHEAD_BEHIND_FULL) < 0) {
|
2017-01-10 16:49:50 +08:00
|
|
|
*s = xstrdup(msgs.gone);
|
2017-01-10 16:49:41 +08:00
|
|
|
} else if (!num_ours && !num_theirs)
|
2018-10-18 15:28:54 +08:00
|
|
|
*s = xstrdup("");
|
2016-02-18 02:06:17 +08:00
|
|
|
else if (!num_ours)
|
2017-01-10 16:49:50 +08:00
|
|
|
*s = xstrfmt(msgs.behind, num_theirs);
|
2016-02-18 02:06:17 +08:00
|
|
|
else if (!num_theirs)
|
2017-01-10 16:49:50 +08:00
|
|
|
*s = xstrfmt(msgs.ahead, num_ours);
|
2016-02-18 02:06:17 +08:00
|
|
|
else
|
2017-01-10 16:49:50 +08:00
|
|
|
*s = xstrfmt(msgs.ahead_behind,
|
2016-02-18 02:06:17 +08:00
|
|
|
num_ours, num_theirs);
|
2017-01-10 16:49:41 +08:00
|
|
|
if (!atom->u.remote_ref.nobracket && *s[0]) {
|
|
|
|
const char *to_free = *s;
|
|
|
|
*s = xstrfmt("[%s]", *s);
|
|
|
|
free((void *)to_free);
|
|
|
|
}
|
|
|
|
} else if (atom->u.remote_ref.option == RR_TRACKSHORT) {
|
2018-01-10 02:50:15 +08:00
|
|
|
if (stat_tracking_info(branch, &num_ours, &num_theirs,
|
2019-04-16 20:16:46 +08:00
|
|
|
NULL, atom->u.remote_ref.push,
|
|
|
|
AHEAD_BEHIND_FULL) < 0) {
|
2018-10-18 15:28:54 +08:00
|
|
|
*s = xstrdup("");
|
2016-02-18 02:06:17 +08:00
|
|
|
return;
|
2018-10-18 15:28:54 +08:00
|
|
|
}
|
2016-02-18 02:06:17 +08:00
|
|
|
if (!num_ours && !num_theirs)
|
2018-10-18 15:28:54 +08:00
|
|
|
*s = xstrdup("=");
|
2016-02-18 02:06:17 +08:00
|
|
|
else if (!num_ours)
|
2018-10-18 15:28:54 +08:00
|
|
|
*s = xstrdup("<");
|
2016-02-18 02:06:17 +08:00
|
|
|
else if (!num_theirs)
|
2018-10-18 15:28:54 +08:00
|
|
|
*s = xstrdup(">");
|
2016-02-18 02:06:17 +08:00
|
|
|
else
|
2018-10-18 15:28:54 +08:00
|
|
|
*s = xstrdup("<>");
|
2017-10-05 20:19:09 +08:00
|
|
|
} else if (atom->u.remote_ref.option == RR_REMOTE_NAME) {
|
|
|
|
int explicit;
|
|
|
|
const char *remote = atom->u.remote_ref.push ?
|
|
|
|
pushremote_for_branch(branch, &explicit) :
|
|
|
|
remote_for_branch(branch, &explicit);
|
2018-10-18 15:28:54 +08:00
|
|
|
*s = xstrdup(explicit ? remote : "");
|
2017-11-08 00:31:08 +08:00
|
|
|
} else if (atom->u.remote_ref.option == RR_REMOTE_REF) {
|
|
|
|
const char *merge;
|
|
|
|
|
2020-03-04 00:12:22 +08:00
|
|
|
merge = remote_ref_for_branch(branch, atom->u.remote_ref.push);
|
|
|
|
*s = xstrdup(merge ? merge : "");
|
2017-01-10 16:49:45 +08:00
|
|
|
} else
|
2018-05-02 17:38:39 +08:00
|
|
|
BUG("unhandled RR_* enum");
|
2016-02-18 02:06:17 +08:00
|
|
|
}
|
|
|
|
|
2017-01-10 16:49:38 +08:00
|
|
|
char *get_head_description(void)
|
|
|
|
{
|
|
|
|
struct strbuf desc = STRBUF_INIT;
|
|
|
|
struct wt_status_state state;
|
|
|
|
memset(&state, 0, sizeof(state));
|
2018-11-10 13:48:50 +08:00
|
|
|
wt_status_get_state(the_repository, &state, 1);
|
2017-01-10 16:49:38 +08:00
|
|
|
if (state.rebase_in_progress ||
|
2018-04-03 12:31:00 +08:00
|
|
|
state.rebase_interactive_in_progress) {
|
|
|
|
if (state.branch)
|
branch: sort detached HEAD based on a flag
Change the ref-filter sorting of detached HEAD to check the
FILTER_REFS_DETACHED_HEAD flag, instead of relying on the ref
description filled-in by get_head_description() to start with "(",
which in turn we expect to ASCII-sort before any other reference.
For context, we'd like the detached line to appear first at the start
of "git branch -l", e.g.:
$ git branch -l
* (HEAD detached at <hash>)
master
This doesn't change that, but improves on a fix made in
28438e84e04 (ref-filter: sort detached HEAD lines firstly, 2019-06-18)
and gives the Chinese translation the ability to use its preferred
punctuation marks again.
In Chinese the fullwidth versions of punctuation like "()" are
typically written as (U+FF08 fullwidth left parenthesis), (U+FF09
fullwidth right parenthesis) instead[1]. This form is used in both
po/zh_{CN,TW}.po in most cases where "()" is translated in a string.
Aside from that improvement to the Chinese translation, it also just
makes for cleaner code that we mark any special cases in the ref_array
we're sorting with flags and make the sort function aware of them,
instead of piggy-backing on the general-case of strcmp() doing the
right thing.
As seen in the amended tests this made reverse sorting a bit more
consistent. Before this we'd sometimes sort this message in the
middle, now it's consistently at the beginning or end, depending on
whether we're doing a normal or reverse sort. Having it at the end
doesn't make much sense either, but at least it behaves consistently
now. A follow-up commit will make this behavior under reverse sorting
even better.
I'm removing the "TRANSLATORS" comments that were in the old code
while I'm at it. Those were added in d4919bb288e (ref-filter: move
get_head_description() from branch.c, 2017-01-10). I think it's
obvious from context, string and translation memory in typical
translation tools that these are the same or similar string.
1. https://en.wikipedia.org/wiki/Chinese_punctuation#Marks_similar_to_European_punctuation
Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2021-01-07 17:51:52 +08:00
|
|
|
strbuf_addf(&desc, _("(no branch, rebasing %s)"),
|
2018-04-03 12:31:00 +08:00
|
|
|
state.branch);
|
|
|
|
else
|
branch: sort detached HEAD based on a flag
Change the ref-filter sorting of detached HEAD to check the
FILTER_REFS_DETACHED_HEAD flag, instead of relying on the ref
description filled-in by get_head_description() to start with "(",
which in turn we expect to ASCII-sort before any other reference.
For context, we'd like the detached line to appear first at the start
of "git branch -l", e.g.:
$ git branch -l
* (HEAD detached at <hash>)
master
This doesn't change that, but improves on a fix made in
28438e84e04 (ref-filter: sort detached HEAD lines firstly, 2019-06-18)
and gives the Chinese translation the ability to use its preferred
punctuation marks again.
In Chinese the fullwidth versions of punctuation like "()" are
typically written as (U+FF08 fullwidth left parenthesis), (U+FF09
fullwidth right parenthesis) instead[1]. This form is used in both
po/zh_{CN,TW}.po in most cases where "()" is translated in a string.
Aside from that improvement to the Chinese translation, it also just
makes for cleaner code that we mark any special cases in the ref_array
we're sorting with flags and make the sort function aware of them,
instead of piggy-backing on the general-case of strcmp() doing the
right thing.
As seen in the amended tests this made reverse sorting a bit more
consistent. Before this we'd sometimes sort this message in the
middle, now it's consistently at the beginning or end, depending on
whether we're doing a normal or reverse sort. Having it at the end
doesn't make much sense either, but at least it behaves consistently
now. A follow-up commit will make this behavior under reverse sorting
even better.
I'm removing the "TRANSLATORS" comments that were in the old code
while I'm at it. Those were added in d4919bb288e (ref-filter: move
get_head_description() from branch.c, 2017-01-10). I think it's
obvious from context, string and translation memory in typical
translation tools that these are the same or similar string.
1. https://en.wikipedia.org/wiki/Chinese_punctuation#Marks_similar_to_European_punctuation
Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2021-01-07 17:51:52 +08:00
|
|
|
strbuf_addf(&desc, _("(no branch, rebasing detached HEAD %s)"),
|
2018-04-03 12:31:00 +08:00
|
|
|
state.detached_from);
|
|
|
|
} else if (state.bisect_in_progress)
|
branch: sort detached HEAD based on a flag
Change the ref-filter sorting of detached HEAD to check the
FILTER_REFS_DETACHED_HEAD flag, instead of relying on the ref
description filled-in by get_head_description() to start with "(",
which in turn we expect to ASCII-sort before any other reference.
For context, we'd like the detached line to appear first at the start
of "git branch -l", e.g.:
$ git branch -l
* (HEAD detached at <hash>)
master
This doesn't change that, but improves on a fix made in
28438e84e04 (ref-filter: sort detached HEAD lines firstly, 2019-06-18)
and gives the Chinese translation the ability to use its preferred
punctuation marks again.
In Chinese the fullwidth versions of punctuation like "()" are
typically written as (U+FF08 fullwidth left parenthesis), (U+FF09
fullwidth right parenthesis) instead[1]. This form is used in both
po/zh_{CN,TW}.po in most cases where "()" is translated in a string.
Aside from that improvement to the Chinese translation, it also just
makes for cleaner code that we mark any special cases in the ref_array
we're sorting with flags and make the sort function aware of them,
instead of piggy-backing on the general-case of strcmp() doing the
right thing.
As seen in the amended tests this made reverse sorting a bit more
consistent. Before this we'd sometimes sort this message in the
middle, now it's consistently at the beginning or end, depending on
whether we're doing a normal or reverse sort. Having it at the end
doesn't make much sense either, but at least it behaves consistently
now. A follow-up commit will make this behavior under reverse sorting
even better.
I'm removing the "TRANSLATORS" comments that were in the old code
while I'm at it. Those were added in d4919bb288e (ref-filter: move
get_head_description() from branch.c, 2017-01-10). I think it's
obvious from context, string and translation memory in typical
translation tools that these are the same or similar string.
1. https://en.wikipedia.org/wiki/Chinese_punctuation#Marks_similar_to_European_punctuation
Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2021-01-07 17:51:52 +08:00
|
|
|
strbuf_addf(&desc, _("(no branch, bisect started on %s)"),
|
2017-01-10 16:49:38 +08:00
|
|
|
state.branch);
|
|
|
|
else if (state.detached_from) {
|
|
|
|
if (state.detached_at)
|
branch: sort detached HEAD based on a flag
Change the ref-filter sorting of detached HEAD to check the
FILTER_REFS_DETACHED_HEAD flag, instead of relying on the ref
description filled-in by get_head_description() to start with "(",
which in turn we expect to ASCII-sort before any other reference.
For context, we'd like the detached line to appear first at the start
of "git branch -l", e.g.:
$ git branch -l
* (HEAD detached at <hash>)
master
This doesn't change that, but improves on a fix made in
28438e84e04 (ref-filter: sort detached HEAD lines firstly, 2019-06-18)
and gives the Chinese translation the ability to use its preferred
punctuation marks again.
In Chinese the fullwidth versions of punctuation like "()" are
typically written as (U+FF08 fullwidth left parenthesis), (U+FF09
fullwidth right parenthesis) instead[1]. This form is used in both
po/zh_{CN,TW}.po in most cases where "()" is translated in a string.
Aside from that improvement to the Chinese translation, it also just
makes for cleaner code that we mark any special cases in the ref_array
we're sorting with flags and make the sort function aware of them,
instead of piggy-backing on the general-case of strcmp() doing the
right thing.
As seen in the amended tests this made reverse sorting a bit more
consistent. Before this we'd sometimes sort this message in the
middle, now it's consistently at the beginning or end, depending on
whether we're doing a normal or reverse sort. Having it at the end
doesn't make much sense either, but at least it behaves consistently
now. A follow-up commit will make this behavior under reverse sorting
even better.
I'm removing the "TRANSLATORS" comments that were in the old code
while I'm at it. Those were added in d4919bb288e (ref-filter: move
get_head_description() from branch.c, 2017-01-10). I think it's
obvious from context, string and translation memory in typical
translation tools that these are the same or similar string.
1. https://en.wikipedia.org/wiki/Chinese_punctuation#Marks_similar_to_European_punctuation
Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2021-01-07 17:51:52 +08:00
|
|
|
strbuf_addf(&desc, _("(HEAD detached at %s)"),
|
|
|
|
state.detached_from);
|
2017-01-10 16:49:38 +08:00
|
|
|
else
|
branch: sort detached HEAD based on a flag
Change the ref-filter sorting of detached HEAD to check the
FILTER_REFS_DETACHED_HEAD flag, instead of relying on the ref
description filled-in by get_head_description() to start with "(",
which in turn we expect to ASCII-sort before any other reference.
For context, we'd like the detached line to appear first at the start
of "git branch -l", e.g.:
$ git branch -l
* (HEAD detached at <hash>)
master
This doesn't change that, but improves on a fix made in
28438e84e04 (ref-filter: sort detached HEAD lines firstly, 2019-06-18)
and gives the Chinese translation the ability to use its preferred
punctuation marks again.
In Chinese the fullwidth versions of punctuation like "()" are
typically written as (U+FF08 fullwidth left parenthesis), (U+FF09
fullwidth right parenthesis) instead[1]. This form is used in both
po/zh_{CN,TW}.po in most cases where "()" is translated in a string.
Aside from that improvement to the Chinese translation, it also just
makes for cleaner code that we mark any special cases in the ref_array
we're sorting with flags and make the sort function aware of them,
instead of piggy-backing on the general-case of strcmp() doing the
right thing.
As seen in the amended tests this made reverse sorting a bit more
consistent. Before this we'd sometimes sort this message in the
middle, now it's consistently at the beginning or end, depending on
whether we're doing a normal or reverse sort. Having it at the end
doesn't make much sense either, but at least it behaves consistently
now. A follow-up commit will make this behavior under reverse sorting
even better.
I'm removing the "TRANSLATORS" comments that were in the old code
while I'm at it. Those were added in d4919bb288e (ref-filter: move
get_head_description() from branch.c, 2017-01-10). I think it's
obvious from context, string and translation memory in typical
translation tools that these are the same or similar string.
1. https://en.wikipedia.org/wiki/Chinese_punctuation#Marks_similar_to_European_punctuation
Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2021-01-07 17:51:52 +08:00
|
|
|
strbuf_addf(&desc, _("(HEAD detached from %s)"),
|
|
|
|
state.detached_from);
|
|
|
|
} else
|
|
|
|
strbuf_addstr(&desc, _("(no branch)"));
|
2019-06-19 06:29:15 +08:00
|
|
|
|
2022-09-25 06:53:18 +08:00
|
|
|
wt_status_state_free_buffers(&state);
|
|
|
|
|
2017-01-10 16:49:38 +08:00
|
|
|
return strbuf_detach(&desc, NULL);
|
|
|
|
}
|
|
|
|
|
2017-01-10 16:49:44 +08:00
|
|
|
static const char *get_symref(struct used_atom *atom, struct ref_array_item *ref)
|
|
|
|
{
|
|
|
|
if (!ref->symref)
|
2018-10-18 15:28:54 +08:00
|
|
|
return xstrdup("");
|
2017-01-10 16:49:44 +08:00
|
|
|
else
|
|
|
|
return show_ref(&atom->u.refname, ref->symref);
|
|
|
|
}
|
|
|
|
|
|
|
|
static const char *get_refname(struct used_atom *atom, struct ref_array_item *ref)
|
|
|
|
{
|
|
|
|
if (ref->kind & FILTER_REFS_DETACHED_HEAD)
|
|
|
|
return get_head_description();
|
|
|
|
return show_ref(&atom->u.refname, ref->refname);
|
2016-02-18 02:06:17 +08:00
|
|
|
}
|
|
|
|
|
2018-07-17 16:22:57 +08:00
|
|
|
static int get_object(struct ref_array_item *ref, int deref, struct object **obj,
|
|
|
|
struct expand_data *oi, struct strbuf *err)
|
2018-02-21 14:59:00 +08:00
|
|
|
{
|
2018-07-17 16:22:57 +08:00
|
|
|
/* parse_object_buffer() will set eaten to 0 if free() will be needed */
|
|
|
|
int eaten = 1;
|
2018-07-17 16:22:57 +08:00
|
|
|
if (oi->info.contentp) {
|
|
|
|
/* We need to know that to use parse_object_buffer properly */
|
|
|
|
oi->info.sizep = &oi->size;
|
|
|
|
oi->info.typep = &oi->type;
|
|
|
|
}
|
|
|
|
if (oid_object_info_extended(the_repository, &oi->oid, &oi->info,
|
|
|
|
OBJECT_INFO_LOOKUP_REPLACE))
|
|
|
|
return strbuf_addf_ret(err, -1, _("missing object %s for %s"),
|
|
|
|
oid_to_hex(&oi->oid), ref->refname);
|
2018-12-24 21:24:30 +08:00
|
|
|
if (oi->info.disk_sizep && oi->disk_size < 0)
|
|
|
|
BUG("Object size is less than zero.");
|
2018-07-17 16:22:57 +08:00
|
|
|
|
|
|
|
if (oi->info.contentp) {
|
2018-08-18 04:09:57 +08:00
|
|
|
*obj = parse_object_buffer(the_repository, &oi->oid, oi->type, oi->size, oi->content, &eaten);
|
2021-04-01 16:32:24 +08:00
|
|
|
if (!*obj) {
|
2018-07-17 16:22:57 +08:00
|
|
|
if (!eaten)
|
|
|
|
free(oi->content);
|
|
|
|
return strbuf_addf_ret(err, -1, _("parse_object_buffer failed on %s for %s"),
|
|
|
|
oid_to_hex(&oi->oid), ref->refname);
|
|
|
|
}
|
2021-07-26 11:26:46 +08:00
|
|
|
grab_values(ref->value, deref, *obj, oi);
|
2018-07-17 16:22:57 +08:00
|
|
|
}
|
2018-07-17 16:22:57 +08:00
|
|
|
|
|
|
|
grab_common_values(ref->value, deref, oi);
|
2018-02-21 14:59:00 +08:00
|
|
|
if (!eaten)
|
2018-07-17 16:22:57 +08:00
|
|
|
free(oi->content);
|
|
|
|
return 0;
|
2018-02-21 14:59:00 +08:00
|
|
|
}
|
|
|
|
|
2019-04-29 13:19:42 +08:00
|
|
|
static void populate_worktree_map(struct hashmap *map, struct worktree **worktrees)
|
|
|
|
{
|
|
|
|
int i;
|
|
|
|
|
|
|
|
for (i = 0; worktrees[i]; i++) {
|
|
|
|
if (worktrees[i]->head_ref) {
|
|
|
|
struct ref_to_worktree_entry *entry;
|
|
|
|
entry = xmalloc(sizeof(*entry));
|
|
|
|
entry->wt = worktrees[i];
|
2019-10-07 07:30:27 +08:00
|
|
|
hashmap_entry_init(&entry->ent,
|
|
|
|
strhash(worktrees[i]->head_ref));
|
2019-04-29 13:19:42 +08:00
|
|
|
|
2019-10-07 07:30:29 +08:00
|
|
|
hashmap_add(map, &entry->ent);
|
2019-04-29 13:19:42 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static void lazy_init_worktree_map(void)
|
|
|
|
{
|
|
|
|
if (ref_to_worktree_map.worktrees)
|
|
|
|
return;
|
|
|
|
|
2020-06-20 07:35:44 +08:00
|
|
|
ref_to_worktree_map.worktrees = get_worktrees();
|
2019-04-29 13:19:42 +08:00
|
|
|
hashmap_init(&(ref_to_worktree_map.map), ref_to_worktree_map_cmpfnc, NULL, 0);
|
|
|
|
populate_worktree_map(&(ref_to_worktree_map.map), ref_to_worktree_map.worktrees);
|
|
|
|
}
|
|
|
|
|
2023-02-24 14:34:44 +08:00
|
|
|
static char *get_worktree_path(const struct ref_array_item *ref)
|
2019-04-29 13:19:42 +08:00
|
|
|
{
|
2019-10-07 07:30:36 +08:00
|
|
|
struct hashmap_entry entry, *e;
|
2019-04-29 13:19:42 +08:00
|
|
|
struct ref_to_worktree_entry *lookup_result;
|
|
|
|
|
|
|
|
lazy_init_worktree_map();
|
|
|
|
|
|
|
|
hashmap_entry_init(&entry, strhash(ref->refname));
|
2019-10-07 07:30:36 +08:00
|
|
|
e = hashmap_get(&(ref_to_worktree_map.map), &entry, ref->refname);
|
2019-04-29 13:19:42 +08:00
|
|
|
|
2019-10-07 07:30:36 +08:00
|
|
|
if (!e)
|
2019-04-29 13:19:42 +08:00
|
|
|
return xstrdup("");
|
2019-10-07 07:30:36 +08:00
|
|
|
|
|
|
|
lookup_result = container_of(e, struct ref_to_worktree_entry, ent);
|
|
|
|
|
|
|
|
return xstrdup(lookup_result->wt->path);
|
2019-04-29 13:19:42 +08:00
|
|
|
}
|
|
|
|
|
2015-06-14 03:37:27 +08:00
|
|
|
/*
|
|
|
|
* Parse the object referred by ref, and grab needed value.
|
|
|
|
*/
|
2018-03-29 20:49:45 +08:00
|
|
|
static int populate_value(struct ref_array_item *ref, struct strbuf *err)
|
2015-06-14 03:37:27 +08:00
|
|
|
{
|
|
|
|
struct object *obj;
|
2018-02-21 14:59:00 +08:00
|
|
|
int i;
|
2018-07-17 16:22:57 +08:00
|
|
|
struct object_info empty = OBJECT_INFO_INIT;
|
for-each-ref: add ahead-behind format atom
The previous change implemented the ahead_behind() method, including an
algorithm to compute the ahead/behind values for a number of commit tips
relative to a number of commit bases. Now, integrate that algorithm as
part of 'git for-each-ref' hidden behind a new format atom,
ahead-behind. This naturally extends to 'git branch' and 'git tag'
builtins, as well.
This format allows specifying multiple bases, if so desired, and all
matching references are compared against all of those bases. For this
reason, failing to read a reference provided from these atoms results in
an error.
In order to translate the ahead_behind() method information to the
format output code in ref-filter.c, we must populate arrays of
ahead_behind_count structs. In struct ref_array, we store the full array
that will be passed to ahead_behind(). In struct ref_array_item, we
store an array of pointers that point to the relvant items within the
full array. In this way, we can pull all relevant ahead/behind values
directly when formatting output for a specific item. It also ensures the
lifetime of the ahead_behind_count structs matches the time that the
array is being used.
Add specific tests of the ahead/behind counts in t6600-test-reach.sh, as
it has an interesting repository shape. In particular, its merging
strategy and its use of different commit-graphs would demonstrate over-
counting if the ahead_behind() method did not already account for that
possibility.
Also add tests for the specific for-each-ref, branch, and tag builtins.
In the case of 'git tag', there are intersting cases that happen when
some of the selected tips are not commits. This requires careful logic
around commits_nr in the second loop of filter_ahead_behind(). Also, the
test in t7004 is carefully located to avoid being dependent on the GPG
prereq. It also avoids using the test_commit helper, as that will add
ticks to the time and disrupt the expected timestamps in later tag
tests.
Also add performance tests in a new p1300-graph-walks.sh script. This
will be useful for more uses in the future, but for now compare the
ahead-behind counting algorithm in 'git for-each-ref' to the naive
implementation by running 'git rev-list --count' processes for each
input.
For the Git source code repository, the improvement is already obvious:
Test this tree
---------------------------------------------------------------
1500.2: ahead-behind counts: git for-each-ref 0.07(0.07+0.00)
1500.3: ahead-behind counts: git branch 0.07(0.06+0.00)
1500.4: ahead-behind counts: git tag 0.07(0.06+0.00)
1500.5: ahead-behind counts: git rev-list 1.32(1.04+0.27)
But the standard performance benchmark is the Linux kernel repository,
which demosntrates a significant improvement:
Test this tree
---------------------------------------------------------------
1500.2: ahead-behind counts: git for-each-ref 0.27(0.24+0.02)
1500.3: ahead-behind counts: git branch 0.27(0.24+0.03)
1500.4: ahead-behind counts: git tag 0.28(0.27+0.01)
1500.5: ahead-behind counts: git rev-list 4.57(4.03+0.54)
The 'git rev-list' test exists in this change as a demonstration, but it
will be removed in the next change to avoid wasting time on this
comparison.
Signed-off-by: Derrick Stolee <derrickstolee@github.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2023-03-20 19:26:54 +08:00
|
|
|
int ahead_behind_atoms = 0;
|
2015-06-14 03:37:27 +08:00
|
|
|
|
2021-03-14 00:17:22 +08:00
|
|
|
CALLOC_ARRAY(ref->value, used_atom_cnt);
|
2015-06-14 03:37:27 +08:00
|
|
|
|
|
|
|
if (need_symref && (ref->flag & REF_ISSYMREF) && !ref->symref) {
|
|
|
|
ref->symref = resolve_refdup(ref->refname, RESOLVE_REF_READING,
|
2017-10-01 15:29:03 +08:00
|
|
|
NULL, NULL);
|
2015-06-14 03:37:27 +08:00
|
|
|
if (!ref->symref)
|
2018-10-18 15:28:54 +08:00
|
|
|
ref->symref = xstrdup("");
|
2015-06-14 03:37:27 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
/* Fill in specials first */
|
|
|
|
for (i = 0; i < used_atom_cnt; i++) {
|
2016-02-18 02:06:13 +08:00
|
|
|
struct used_atom *atom = &used_atom[i];
|
2021-05-13 23:15:38 +08:00
|
|
|
enum atom_type atom_type = atom->atom_type;
|
2016-02-18 02:06:11 +08:00
|
|
|
const char *name = used_atom[i].name;
|
2015-06-14 03:37:27 +08:00
|
|
|
struct atom_value *v = &ref->value[i];
|
|
|
|
int deref = 0;
|
|
|
|
const char *refname;
|
|
|
|
struct branch *branch = NULL;
|
|
|
|
|
ref-filter: add %(raw) atom
Add new formatting option `%(raw)`, which will print the raw
object data without any changes. It will help further to migrate
all cat-file formatting logic from cat-file to ref-filter.
The raw data of blob, tree objects may contain '\0', but most of
the logic in `ref-filter` depends on the output of the atom being
text (specifically, no embedded NULs in it).
E.g. `quote_formatting()` use `strbuf_addstr()` or `*._quote_buf()`
add the data to the buffer. The raw data of a tree object is
`100644 one\0...`, only the `100644 one` will be added to the buffer,
which is incorrect.
Therefore, we need to find a way to record the length of the
atom_value's member `s`. Although strbuf can already record the
string and its length, if we want to replace the type of atom_value's
member `s` with strbuf, many places in ref-filter that are filled
with dynamically allocated mermory in `v->s` are not easy to replace.
At the same time, we need to check if `v->s == NULL` in
populate_value(), and strbuf cannot easily distinguish NULL and empty
strings, but c-style "const char *" can do it. So add a new member in
`struct atom_value`: `s_size`, which can record raw object size, it
can help us add raw object data to the buffer or compare two buffers
which contain raw object data.
Note that `--format=%(raw)` cannot be used with `--python`, `--shell`,
`--tcl`, and `--perl` because if the binary raw data is passed to a
variable in such languages, these may not support arbitrary binary data
in their string variable type.
Reviewed-by: Jacob Keller <jacob.keller@gmail.com>
Mentored-by: Christian Couder <christian.couder@gmail.com>
Mentored-by: Hariom Verma <hariom18599@gmail.com>
Helped-by: Bagas Sanjaya <bagasdotme@gmail.com>
Helped-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Helped-by: Felipe Contreras <felipe.contreras@gmail.com>
Helped-by: Phillip Wood <phillip.wood@dunelm.org.uk>
Helped-by: Junio C Hamano <gitster@pobox.com>
Based-on-patch-by: Olga Telezhnaya <olyatelezhnaya@gmail.com>
Signed-off-by: ZheNing Hu <adlternative@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2021-07-26 11:26:47 +08:00
|
|
|
v->s_size = ATOM_SIZE_UNSPECIFIED;
|
2015-09-10 23:48:20 +08:00
|
|
|
v->handler = append_atom;
|
2023-09-02 17:00:39 +08:00
|
|
|
v->value = 0;
|
2017-01-10 16:49:35 +08:00
|
|
|
v->atom = atom;
|
2015-09-10 23:48:20 +08:00
|
|
|
|
2015-06-14 03:37:27 +08:00
|
|
|
if (*name == '*') {
|
|
|
|
deref = 1;
|
|
|
|
name++;
|
|
|
|
}
|
|
|
|
|
2021-05-13 23:15:38 +08:00
|
|
|
if (atom_type == ATOM_REFNAME)
|
2017-01-10 16:49:44 +08:00
|
|
|
refname = get_refname(atom, ref);
|
2021-05-13 23:15:38 +08:00
|
|
|
else if (atom_type == ATOM_WORKTREEPATH) {
|
2019-04-29 13:19:42 +08:00
|
|
|
if (ref->kind == FILTER_REFS_BRANCHES)
|
2023-02-24 14:34:44 +08:00
|
|
|
v->s = get_worktree_path(ref);
|
2019-04-29 13:19:42 +08:00
|
|
|
else
|
|
|
|
v->s = xstrdup("");
|
|
|
|
continue;
|
|
|
|
}
|
2021-05-13 23:15:38 +08:00
|
|
|
else if (atom_type == ATOM_SYMREF)
|
2017-01-10 16:49:44 +08:00
|
|
|
refname = get_symref(atom, ref);
|
2021-05-13 23:15:38 +08:00
|
|
|
else if (atom_type == ATOM_UPSTREAM) {
|
2015-06-14 03:37:27 +08:00
|
|
|
const char *branch_name;
|
|
|
|
/* only local branches may have an upstream */
|
|
|
|
if (!skip_prefix(ref->refname, "refs/heads/",
|
2018-10-18 15:28:54 +08:00
|
|
|
&branch_name)) {
|
|
|
|
v->s = xstrdup("");
|
2015-06-14 03:37:27 +08:00
|
|
|
continue;
|
2018-10-18 15:28:54 +08:00
|
|
|
}
|
2015-06-14 03:37:27 +08:00
|
|
|
branch = branch_get(branch_name);
|
|
|
|
|
|
|
|
refname = branch_get_upstream(branch, NULL);
|
2016-02-18 02:06:17 +08:00
|
|
|
if (refname)
|
|
|
|
fill_remote_ref_details(atom, refname, branch, &v->s);
|
2018-10-18 15:28:54 +08:00
|
|
|
else
|
|
|
|
v->s = xstrdup("");
|
2016-02-18 02:06:17 +08:00
|
|
|
continue;
|
2021-05-13 23:15:38 +08:00
|
|
|
} else if (atom_type == ATOM_PUSH && atom->u.remote_ref.push) {
|
2015-06-14 03:37:27 +08:00
|
|
|
const char *branch_name;
|
2018-10-18 15:28:54 +08:00
|
|
|
v->s = xstrdup("");
|
2015-06-14 03:37:27 +08:00
|
|
|
if (!skip_prefix(ref->refname, "refs/heads/",
|
|
|
|
&branch_name))
|
|
|
|
continue;
|
|
|
|
branch = branch_get(branch_name);
|
|
|
|
|
2017-10-05 20:19:09 +08:00
|
|
|
if (atom->u.remote_ref.push_remote)
|
|
|
|
refname = NULL;
|
|
|
|
else {
|
|
|
|
refname = branch_get_push(branch, NULL);
|
|
|
|
if (!refname)
|
|
|
|
continue;
|
|
|
|
}
|
2018-10-18 15:28:54 +08:00
|
|
|
/* We will definitely re-init v->s on the next line. */
|
|
|
|
free((char *)v->s);
|
2016-02-18 02:06:17 +08:00
|
|
|
fill_remote_ref_details(atom, refname, branch, &v->s);
|
|
|
|
continue;
|
2021-05-13 23:15:38 +08:00
|
|
|
} else if (atom_type == ATOM_COLOR) {
|
2018-10-18 15:28:54 +08:00
|
|
|
v->s = xstrdup(atom->u.color);
|
2015-06-14 03:37:27 +08:00
|
|
|
continue;
|
2021-05-13 23:15:38 +08:00
|
|
|
} else if (atom_type == ATOM_FLAG) {
|
2015-06-14 03:37:27 +08:00
|
|
|
char buf[256], *cp = buf;
|
|
|
|
if (ref->flag & REF_ISSYMREF)
|
|
|
|
cp = copy_advance(cp, ",symref");
|
|
|
|
if (ref->flag & REF_ISPACKED)
|
|
|
|
cp = copy_advance(cp, ",packed");
|
|
|
|
if (cp == buf)
|
2018-10-18 15:28:54 +08:00
|
|
|
v->s = xstrdup("");
|
2015-06-14 03:37:27 +08:00
|
|
|
else {
|
|
|
|
*cp = '\0';
|
|
|
|
v->s = xstrdup(buf + 1);
|
|
|
|
}
|
|
|
|
continue;
|
2021-05-13 23:15:38 +08:00
|
|
|
} else if (!deref && atom_type == ATOM_OBJECTNAME &&
|
|
|
|
grab_oid(name, "objectname", &ref->objectname, v, atom)) {
|
|
|
|
continue;
|
|
|
|
} else if (atom_type == ATOM_HEAD) {
|
2017-05-19 14:12:12 +08:00
|
|
|
if (atom->u.head && !strcmp(ref->refname, atom->u.head))
|
2018-10-18 15:28:54 +08:00
|
|
|
v->s = xstrdup("*");
|
2015-06-14 03:37:27 +08:00
|
|
|
else
|
2018-10-18 15:28:54 +08:00
|
|
|
v->s = xstrdup(" ");
|
2015-06-14 03:37:27 +08:00
|
|
|
continue;
|
2021-05-13 23:15:38 +08:00
|
|
|
} else if (atom_type == ATOM_ALIGN) {
|
2015-09-11 23:03:07 +08:00
|
|
|
v->handler = align_atom_handler;
|
2018-10-18 15:28:54 +08:00
|
|
|
v->s = xstrdup("");
|
2015-09-11 23:03:07 +08:00
|
|
|
continue;
|
2021-05-13 23:15:38 +08:00
|
|
|
} else if (atom_type == ATOM_END) {
|
2015-09-11 23:03:07 +08:00
|
|
|
v->handler = end_atom_handler;
|
2018-10-18 15:28:54 +08:00
|
|
|
v->s = xstrdup("");
|
2015-09-11 23:03:07 +08:00
|
|
|
continue;
|
2021-05-13 23:15:38 +08:00
|
|
|
} else if (atom_type == ATOM_IF) {
|
2017-01-10 16:49:36 +08:00
|
|
|
const char *s;
|
|
|
|
if (skip_prefix(name, "if:", &s))
|
|
|
|
v->s = xstrdup(s);
|
2018-10-18 15:28:54 +08:00
|
|
|
else
|
|
|
|
v->s = xstrdup("");
|
2017-01-10 16:49:34 +08:00
|
|
|
v->handler = if_atom_handler;
|
|
|
|
continue;
|
2021-05-13 23:15:38 +08:00
|
|
|
} else if (atom_type == ATOM_THEN) {
|
2017-01-10 16:49:34 +08:00
|
|
|
v->handler = then_atom_handler;
|
2018-10-18 15:28:54 +08:00
|
|
|
v->s = xstrdup("");
|
2017-01-10 16:49:34 +08:00
|
|
|
continue;
|
2021-05-13 23:15:38 +08:00
|
|
|
} else if (atom_type == ATOM_ELSE) {
|
2017-01-10 16:49:34 +08:00
|
|
|
v->handler = else_atom_handler;
|
2018-10-18 15:28:54 +08:00
|
|
|
v->s = xstrdup("");
|
2017-01-10 16:49:34 +08:00
|
|
|
continue;
|
2021-07-26 11:26:50 +08:00
|
|
|
} else if (atom_type == ATOM_REST) {
|
|
|
|
if (ref->rest)
|
|
|
|
v->s = xstrdup(ref->rest);
|
|
|
|
else
|
|
|
|
v->s = xstrdup("");
|
|
|
|
continue;
|
for-each-ref: add ahead-behind format atom
The previous change implemented the ahead_behind() method, including an
algorithm to compute the ahead/behind values for a number of commit tips
relative to a number of commit bases. Now, integrate that algorithm as
part of 'git for-each-ref' hidden behind a new format atom,
ahead-behind. This naturally extends to 'git branch' and 'git tag'
builtins, as well.
This format allows specifying multiple bases, if so desired, and all
matching references are compared against all of those bases. For this
reason, failing to read a reference provided from these atoms results in
an error.
In order to translate the ahead_behind() method information to the
format output code in ref-filter.c, we must populate arrays of
ahead_behind_count structs. In struct ref_array, we store the full array
that will be passed to ahead_behind(). In struct ref_array_item, we
store an array of pointers that point to the relvant items within the
full array. In this way, we can pull all relevant ahead/behind values
directly when formatting output for a specific item. It also ensures the
lifetime of the ahead_behind_count structs matches the time that the
array is being used.
Add specific tests of the ahead/behind counts in t6600-test-reach.sh, as
it has an interesting repository shape. In particular, its merging
strategy and its use of different commit-graphs would demonstrate over-
counting if the ahead_behind() method did not already account for that
possibility.
Also add tests for the specific for-each-ref, branch, and tag builtins.
In the case of 'git tag', there are intersting cases that happen when
some of the selected tips are not commits. This requires careful logic
around commits_nr in the second loop of filter_ahead_behind(). Also, the
test in t7004 is carefully located to avoid being dependent on the GPG
prereq. It also avoids using the test_commit helper, as that will add
ticks to the time and disrupt the expected timestamps in later tag
tests.
Also add performance tests in a new p1300-graph-walks.sh script. This
will be useful for more uses in the future, but for now compare the
ahead-behind counting algorithm in 'git for-each-ref' to the naive
implementation by running 'git rev-list --count' processes for each
input.
For the Git source code repository, the improvement is already obvious:
Test this tree
---------------------------------------------------------------
1500.2: ahead-behind counts: git for-each-ref 0.07(0.07+0.00)
1500.3: ahead-behind counts: git branch 0.07(0.06+0.00)
1500.4: ahead-behind counts: git tag 0.07(0.06+0.00)
1500.5: ahead-behind counts: git rev-list 1.32(1.04+0.27)
But the standard performance benchmark is the Linux kernel repository,
which demosntrates a significant improvement:
Test this tree
---------------------------------------------------------------
1500.2: ahead-behind counts: git for-each-ref 0.27(0.24+0.02)
1500.3: ahead-behind counts: git branch 0.27(0.24+0.03)
1500.4: ahead-behind counts: git tag 0.28(0.27+0.01)
1500.5: ahead-behind counts: git rev-list 4.57(4.03+0.54)
The 'git rev-list' test exists in this change as a demonstration, but it
will be removed in the next change to avoid wasting time on this
comparison.
Signed-off-by: Derrick Stolee <derrickstolee@github.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2023-03-20 19:26:54 +08:00
|
|
|
} else if (atom_type == ATOM_AHEADBEHIND) {
|
|
|
|
if (ref->counts) {
|
|
|
|
const struct ahead_behind_count *count;
|
|
|
|
count = ref->counts[ahead_behind_atoms++];
|
|
|
|
v->s = xstrfmt("%d %d", count->ahead, count->behind);
|
|
|
|
} else {
|
|
|
|
/* Not a commit. */
|
|
|
|
v->s = xstrdup("");
|
|
|
|
}
|
|
|
|
continue;
|
2015-06-14 03:37:27 +08:00
|
|
|
} else
|
|
|
|
continue;
|
|
|
|
|
|
|
|
if (!deref)
|
2018-10-18 15:28:54 +08:00
|
|
|
v->s = xstrdup(refname);
|
2015-09-25 05:07:12 +08:00
|
|
|
else
|
|
|
|
v->s = xstrfmt("%s^{}", refname);
|
2018-10-18 15:28:54 +08:00
|
|
|
free((char *)refname);
|
2015-06-14 03:37:27 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
for (i = 0; i < used_atom_cnt; i++) {
|
|
|
|
struct atom_value *v = &ref->value[i];
|
2018-07-17 16:22:57 +08:00
|
|
|
if (v->s == NULL && used_atom[i].source == SOURCE_NONE)
|
|
|
|
return strbuf_addf_ret(err, -1, _("missing object %s for %s"),
|
|
|
|
oid_to_hex(&ref->objectname), ref->refname);
|
2015-06-14 03:37:27 +08:00
|
|
|
}
|
2018-07-17 16:22:57 +08:00
|
|
|
|
|
|
|
if (need_tagged)
|
|
|
|
oi.info.contentp = &oi.content;
|
|
|
|
if (!memcmp(&oi.info, &empty, sizeof(empty)) &&
|
|
|
|
!memcmp(&oi_deref.info, &empty, sizeof(empty)))
|
2018-03-29 20:49:45 +08:00
|
|
|
return 0;
|
2015-06-14 03:37:27 +08:00
|
|
|
|
2018-07-17 16:22:57 +08:00
|
|
|
|
|
|
|
oi.oid = ref->objectname;
|
|
|
|
if (get_object(ref, 0, &obj, &oi, err))
|
2018-03-29 20:49:45 +08:00
|
|
|
return -1;
|
2015-06-14 03:37:27 +08:00
|
|
|
|
|
|
|
/*
|
|
|
|
* If there is no atom that wants to know about tagged
|
|
|
|
* object, we are done.
|
|
|
|
*/
|
|
|
|
if (!need_tagged || (obj->type != OBJ_TAG))
|
2018-03-29 20:49:45 +08:00
|
|
|
return 0;
|
2015-06-14 03:37:27 +08:00
|
|
|
|
|
|
|
/*
|
ref-filter.c: use peeled tag for '*' format fields
In most builtins ('rev-parse <revision>^{}', 'show-ref --dereference'),
"dereferencing" a tag refers to a recursive peel of the tag object. Unlike
these cases, the dereferencing prefix ('*') in 'for-each-ref' format
specifiers triggers only a single, non-recursive dereference of a given tag
object. For most annotated tags, a single dereference is all that is needed
to access the tag's associated commit or tree; "recursive" and
"non-recursive" dereferencing are functionally equivalent in these cases.
However, nested tags (annotated tags whose target is another annotated tag)
dereferenced once return another tag, where a recursive dereference would
return the commit or tree.
Currently, if a user wants to filter & format refs and include information
about a recursively-dereferenced tag, they can do so with something like
'cat-file --batch-check':
git for-each-ref --format="%(objectname)^{} %(refname)" <pattern> |
git cat-file --batch-check="%(objectname) %(rest)"
But the combination of commands is inefficient. So, to improve the
performance of this use case and align the defererencing behavior of
'for-each-ref' with that of other commands, update the ref formatting code
to use the peeled tag (from 'peel_iterated_oid()') to populate '*' fields
rather than the tag's immediate target object (from 'get_tagged_oid()').
Additionally, add a test to 't6300-for-each-ref' to verify new nested tag
behavior and update 't6302-for-each-ref-filter.sh' to print the correct
value for nested dereferenced fields.
Signed-off-by: Victoria Dye <vdye@github.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2023-11-15 03:53:57 +08:00
|
|
|
* If it is a tag object, see if we use the peeled value. If we do,
|
|
|
|
* grab the peeled OID.
|
2015-06-14 03:37:27 +08:00
|
|
|
*/
|
ref-filter.c: use peeled tag for '*' format fields
In most builtins ('rev-parse <revision>^{}', 'show-ref --dereference'),
"dereferencing" a tag refers to a recursive peel of the tag object. Unlike
these cases, the dereferencing prefix ('*') in 'for-each-ref' format
specifiers triggers only a single, non-recursive dereference of a given tag
object. For most annotated tags, a single dereference is all that is needed
to access the tag's associated commit or tree; "recursive" and
"non-recursive" dereferencing are functionally equivalent in these cases.
However, nested tags (annotated tags whose target is another annotated tag)
dereferenced once return another tag, where a recursive dereference would
return the commit or tree.
Currently, if a user wants to filter & format refs and include information
about a recursively-dereferenced tag, they can do so with something like
'cat-file --batch-check':
git for-each-ref --format="%(objectname)^{} %(refname)" <pattern> |
git cat-file --batch-check="%(objectname) %(rest)"
But the combination of commands is inefficient. So, to improve the
performance of this use case and align the defererencing behavior of
'for-each-ref' with that of other commands, update the ref formatting code
to use the peeled tag (from 'peel_iterated_oid()') to populate '*' fields
rather than the tag's immediate target object (from 'get_tagged_oid()').
Additionally, add a test to 't6300-for-each-ref' to verify new nested tag
behavior and update 't6302-for-each-ref-filter.sh' to print the correct
value for nested dereferenced fields.
Signed-off-by: Victoria Dye <vdye@github.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2023-11-15 03:53:57 +08:00
|
|
|
if (need_tagged && peel_iterated_oid(&obj->oid, &oi_deref.oid))
|
|
|
|
die("bad tag");
|
2015-06-14 03:37:27 +08:00
|
|
|
|
2018-07-17 16:22:57 +08:00
|
|
|
return get_object(ref, 1, &obj, &oi_deref, err);
|
2015-06-14 03:37:27 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Given a ref, return the value for the atom. This lazily gets value
|
|
|
|
* out of the object by calling populate value.
|
|
|
|
*/
|
2018-03-29 20:49:45 +08:00
|
|
|
static int get_ref_atom_value(struct ref_array_item *ref, int atom,
|
|
|
|
struct atom_value **v, struct strbuf *err)
|
2015-06-14 03:37:27 +08:00
|
|
|
{
|
|
|
|
if (!ref->value) {
|
2018-03-29 20:49:45 +08:00
|
|
|
if (populate_value(ref, err))
|
|
|
|
return -1;
|
2015-06-14 03:37:27 +08:00
|
|
|
fill_missing_values(ref->value);
|
|
|
|
}
|
|
|
|
*v = &ref->value[atom];
|
2018-03-29 20:49:45 +08:00
|
|
|
return 0;
|
2015-06-14 03:37:27 +08:00
|
|
|
}
|
|
|
|
|
2015-09-10 23:48:26 +08:00
|
|
|
/*
|
|
|
|
* Return 1 if the refname matches one of the patterns, otherwise 0.
|
|
|
|
* A pattern can be a literal prefix (e.g. a refname "refs/heads/master"
|
|
|
|
* matches a pattern "refs/heads/mas") or a wildcard (e.g. the same ref
|
|
|
|
* matches "refs/heads/mas*", too).
|
|
|
|
*/
|
2023-07-11 05:12:16 +08:00
|
|
|
static int match_pattern(const char **patterns, const char *refname,
|
|
|
|
int ignore_case)
|
2015-09-10 23:48:26 +08:00
|
|
|
{
|
2016-12-04 10:52:25 +08:00
|
|
|
unsigned flags = 0;
|
|
|
|
|
2023-07-11 05:12:16 +08:00
|
|
|
if (ignore_case)
|
2016-12-04 10:52:25 +08:00
|
|
|
flags |= WM_CASEFOLD;
|
|
|
|
|
2015-09-10 23:48:26 +08:00
|
|
|
/*
|
|
|
|
* When no '--format' option is given we need to skip the prefix
|
|
|
|
* for matching refs of tags and branches.
|
|
|
|
*/
|
|
|
|
(void)(skip_prefix(refname, "refs/tags/", &refname) ||
|
|
|
|
skip_prefix(refname, "refs/heads/", &refname) ||
|
|
|
|
skip_prefix(refname, "refs/remotes/", &refname) ||
|
|
|
|
skip_prefix(refname, "refs/", &refname));
|
|
|
|
|
|
|
|
for (; *patterns; patterns++) {
|
2017-06-23 05:38:08 +08:00
|
|
|
if (!wildmatch(*patterns, refname, flags))
|
2015-09-10 23:48:26 +08:00
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2015-06-14 03:37:27 +08:00
|
|
|
/*
|
|
|
|
* Return 1 if the refname matches one of the patterns, otherwise 0.
|
|
|
|
* A pattern can be path prefix (e.g. a refname "refs/heads/master"
|
2015-09-10 23:48:26 +08:00
|
|
|
* matches a pattern "refs/heads/" but not "refs/heads/m") or a
|
|
|
|
* wildcard (e.g. the same ref matches "refs/heads/m*", too).
|
2015-06-14 03:37:27 +08:00
|
|
|
*/
|
2023-07-11 05:12:16 +08:00
|
|
|
static int match_name_as_path(const char **pattern, const char *refname,
|
|
|
|
int ignore_case)
|
2015-06-14 03:37:27 +08:00
|
|
|
{
|
|
|
|
int namelen = strlen(refname);
|
2016-12-04 10:52:25 +08:00
|
|
|
unsigned flags = WM_PATHNAME;
|
|
|
|
|
2023-07-11 05:12:16 +08:00
|
|
|
if (ignore_case)
|
2016-12-04 10:52:25 +08:00
|
|
|
flags |= WM_CASEFOLD;
|
|
|
|
|
2015-06-14 03:37:27 +08:00
|
|
|
for (; *pattern; pattern++) {
|
|
|
|
const char *p = *pattern;
|
|
|
|
int plen = strlen(p);
|
|
|
|
|
|
|
|
if ((plen <= namelen) &&
|
|
|
|
!strncmp(refname, p, plen) &&
|
|
|
|
(refname[plen] == '\0' ||
|
|
|
|
refname[plen] == '/' ||
|
|
|
|
p[plen-1] == '/'))
|
|
|
|
return 1;
|
2018-07-03 05:11:59 +08:00
|
|
|
if (!wildmatch(p, refname, flags))
|
2015-06-14 03:37:27 +08:00
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2015-09-10 23:48:26 +08:00
|
|
|
/* Return 1 if the refname matches one of the patterns, otherwise 0. */
|
|
|
|
static int filter_pattern_match(struct ref_filter *filter, const char *refname)
|
|
|
|
{
|
|
|
|
if (!*filter->name_patterns)
|
|
|
|
return 1; /* No pattern always matches */
|
|
|
|
if (filter->match_as_path)
|
2023-07-11 05:12:16 +08:00
|
|
|
return match_name_as_path(filter->name_patterns, refname,
|
|
|
|
filter->ignore_case);
|
|
|
|
return match_pattern(filter->name_patterns, refname,
|
|
|
|
filter->ignore_case);
|
2015-09-10 23:48:26 +08:00
|
|
|
}
|
|
|
|
|
builtin/for-each-ref.c: add `--exclude` option
When using `for-each-ref`, it is sometimes convenient for the caller to
be able to exclude certain parts of the references.
For example, if there are many `refs/__hidden__/*` references, the
caller may want to emit all references *except* the hidden ones.
Currently, the only way to do this is to post-process the output, like:
$ git for-each-ref --format='%(refname)' | grep -v '^refs/hidden/'
Which is do-able, but requires processing a potentially large quantity
of references.
Teach `git for-each-ref` a new `--exclude=<pattern>` option, which
excludes references from the results if they match one or more excluded
patterns.
This patch provides a naive implementation where the `ref_filter` still
sees all references (including ones that it will discard) and is left to
check whether each reference matches any excluded pattern(s) before
emitting them.
By culling out references we know the caller doesn't care about, we can
avoid allocating memory for their storage, as well as spending time
sorting the output (among other things). Even the naive implementation
provides a significant speed-up on a modified copy of linux.git (that
has a hidden ref pointing at each commit):
$ hyperfine \
'git.compile for-each-ref --format="%(objectname) %(refname)" | grep -vE "[0-9a-f]{40} refs/pull/"' \
'git.compile for-each-ref --format="%(objectname) %(refname)" --exclude refs/pull/'
Benchmark 1: git.compile for-each-ref --format="%(objectname) %(refname)" | grep -vE "[0-9a-f]{40} refs/pull/"
Time (mean ± σ): 820.1 ms ± 2.0 ms [User: 703.7 ms, System: 152.0 ms]
Range (min … max): 817.7 ms … 823.3 ms 10 runs
Benchmark 2: git.compile for-each-ref --format="%(objectname) %(refname)" --exclude refs/pull/
Time (mean ± σ): 106.6 ms ± 1.1 ms [User: 99.4 ms, System: 7.1 ms]
Range (min … max): 104.7 ms … 109.1 ms 27 runs
Summary
'git.compile for-each-ref --format="%(objectname) %(refname)" --exclude refs/pull/' ran
7.69 ± 0.08 times faster than 'git.compile for-each-ref --format="%(objectname) %(refname)" | grep -vE "[0-9a-f]{40} refs/pull/"'
Subsequent patches will improve on this by avoiding visiting excluded
sections of the `packed-refs` file in certain cases.
Co-authored-by: Jeff King <peff@peff.net>
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Taylor Blau <me@ttaylorr.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2023-07-11 05:12:19 +08:00
|
|
|
static int filter_exclude_match(struct ref_filter *filter, const char *refname)
|
|
|
|
{
|
|
|
|
if (!filter->exclude.nr)
|
|
|
|
return 0;
|
|
|
|
if (filter->match_as_path)
|
|
|
|
return match_name_as_path(filter->exclude.v, refname,
|
|
|
|
filter->ignore_case);
|
|
|
|
return match_pattern(filter->exclude.v, refname, filter->ignore_case);
|
2015-09-10 23:48:26 +08:00
|
|
|
}
|
|
|
|
|
ref-filter: limit traversal to prefix
When we are matching refnames against a pattern, then we know that the
beginning of any refname that can match the pattern has to match the
part of the pattern up to the first glob character. For example, if
the pattern is `refs/heads/foo*bar`, then it can only match a
reference that has the prefix `refs/heads/foo`.
So pass that prefix to `for_each_fullref_in()`. This lets the ref code
avoid passing us the full set of refs, and in some cases avoid reading
them in the first place.
Note that this applies only when the `match_as_path` flag is set
(i.e., when `for-each-ref` is the caller), as the matching rules for
git-branch and git-tag are subtly different.
This could be generalized to the case of multiple patterns, but (a) it
probably doesn't come up that often, and (b) it is more awkward to
deal with multiple patterns (e.g., the patterns might not be
disjoint). So, since this is just an optimization, punt on the case of
multiple patterns.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Michael Haggerty <mhagger@alum.mit.edu>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-05-22 22:17:54 +08:00
|
|
|
/*
|
|
|
|
* This is the same as for_each_fullref_in(), but it tries to iterate
|
|
|
|
* only over the patterns we'll care about. Note that it _doesn't_ do a full
|
|
|
|
* pattern match, so the callback still has to match each ref individually.
|
|
|
|
*/
|
|
|
|
static int for_each_fullref_in_pattern(struct ref_filter *filter,
|
|
|
|
each_ref_fn cb,
|
2021-09-25 02:48:48 +08:00
|
|
|
void *cb_data)
|
ref-filter: limit traversal to prefix
When we are matching refnames against a pattern, then we know that the
beginning of any refname that can match the pattern has to match the
part of the pattern up to the first glob character. For example, if
the pattern is `refs/heads/foo*bar`, then it can only match a
reference that has the prefix `refs/heads/foo`.
So pass that prefix to `for_each_fullref_in()`. This lets the ref code
avoid passing us the full set of refs, and in some cases avoid reading
them in the first place.
Note that this applies only when the `match_as_path` flag is set
(i.e., when `for-each-ref` is the caller), as the matching rules for
git-branch and git-tag are subtly different.
This could be generalized to the case of multiple patterns, but (a) it
probably doesn't come up that often, and (b) it is more awkward to
deal with multiple patterns (e.g., the patterns might not be
disjoint). So, since this is just an optimization, punt on the case of
multiple patterns.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Michael Haggerty <mhagger@alum.mit.edu>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-05-22 22:17:54 +08:00
|
|
|
{
|
|
|
|
if (!filter->match_as_path) {
|
|
|
|
/*
|
|
|
|
* in this case, the patterns are applied after
|
|
|
|
* prefixes like "refs/heads/" etc. are stripped off,
|
|
|
|
* so we have to look at everything:
|
|
|
|
*/
|
2021-09-25 02:48:48 +08:00
|
|
|
return for_each_fullref_in("", cb, cb_data);
|
ref-filter: limit traversal to prefix
When we are matching refnames against a pattern, then we know that the
beginning of any refname that can match the pattern has to match the
part of the pattern up to the first glob character. For example, if
the pattern is `refs/heads/foo*bar`, then it can only match a
reference that has the prefix `refs/heads/foo`.
So pass that prefix to `for_each_fullref_in()`. This lets the ref code
avoid passing us the full set of refs, and in some cases avoid reading
them in the first place.
Note that this applies only when the `match_as_path` flag is set
(i.e., when `for-each-ref` is the caller), as the matching rules for
git-branch and git-tag are subtly different.
This could be generalized to the case of multiple patterns, but (a) it
probably doesn't come up that often, and (b) it is more awkward to
deal with multiple patterns (e.g., the patterns might not be
disjoint). So, since this is just an optimization, punt on the case of
multiple patterns.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Michael Haggerty <mhagger@alum.mit.edu>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-05-22 22:17:54 +08:00
|
|
|
}
|
|
|
|
|
2018-07-03 05:12:42 +08:00
|
|
|
if (filter->ignore_case) {
|
|
|
|
/*
|
|
|
|
* we can't handle case-insensitive comparisons,
|
|
|
|
* so just return everything and let the caller
|
|
|
|
* sort it out.
|
|
|
|
*/
|
2021-09-25 02:48:48 +08:00
|
|
|
return for_each_fullref_in("", cb, cb_data);
|
2018-07-03 05:12:42 +08:00
|
|
|
}
|
|
|
|
|
ref-filter: limit traversal to prefix
When we are matching refnames against a pattern, then we know that the
beginning of any refname that can match the pattern has to match the
part of the pattern up to the first glob character. For example, if
the pattern is `refs/heads/foo*bar`, then it can only match a
reference that has the prefix `refs/heads/foo`.
So pass that prefix to `for_each_fullref_in()`. This lets the ref code
avoid passing us the full set of refs, and in some cases avoid reading
them in the first place.
Note that this applies only when the `match_as_path` flag is set
(i.e., when `for-each-ref` is the caller), as the matching rules for
git-branch and git-tag are subtly different.
This could be generalized to the case of multiple patterns, but (a) it
probably doesn't come up that often, and (b) it is more awkward to
deal with multiple patterns (e.g., the patterns might not be
disjoint). So, since this is just an optimization, punt on the case of
multiple patterns.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Michael Haggerty <mhagger@alum.mit.edu>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-05-22 22:17:54 +08:00
|
|
|
if (!filter->name_patterns[0]) {
|
|
|
|
/* no patterns; we have to look at everything */
|
refs/packed-backend.c: implement jump lists to avoid excluded pattern(s)
When iterating through the `packed-refs` file in order to answer a query
like:
$ git for-each-ref --exclude=refs/__hidden__
it would be useful to avoid walking over all of the entries in
`refs/__hidden__/*` when possible, since we know that the ref-filter
code is going to throw them away anyways.
In certain circumstances, doing so is possible. The algorithm for doing
so is as follows:
- For each excluded pattern, find the first record that matches it,
and the first record that *doesn't* match it (i.e. the location
you'd next want to consider when excluding that pattern).
- Sort the set of excluded regions from the previous step in ascending
order of the first location within the `packed-refs` file that
matches.
- Clean up the results from the previous step: discard empty regions,
and combine adjacent regions. The set of regions which remains is
referred to as the "jump list", and never contains any references
which should be included in the result set.
Then when iterating through the `packed-refs` file, if `iter->pos` is
ever contained in one of the regions from the previous steps, advance
`iter->pos` past the end of that region, and continue enumeration.
Note that we only perform this optimization when none of the excluded
pattern(s) have special meta-characters in them. For a pattern like
"refs/foo[ac]", the excluded regions ("refs/fooa", "refs/fooc", and
everything underneath them) are not connected. A future implementation
that handles this case may split the character class (pretending as if
two patterns were excluded: "refs/fooa", and "refs/fooc").
There are a few other gotchas worth considering. First, note that the
jump list is sorted, so once we jump past a region, we can avoid
considering it (or any regions preceding it) again. The member
`jump_pos` is used to track the first next-possible region to jump
through.
Second, note that the jump list is best-effort, since we do not handle
loose references, and because of the meta-character issue above. The
jump list may not skip past all references which won't appear in the
results, but will never skip over a reference which does appear in the
result set.
In repositories with a large number of hidden references, the speed-up
can be significant. Tests here are done with a copy of linux.git with a
reference "refs/pull/N" pointing at every commit, as in:
$ git rev-list HEAD | awk '{ print "create refs/pull/" NR " " $0 }' |
git update-ref --stdin
$ git pack-refs --all
, it is significantly faster to have `for-each-ref` jump over the
excluded references, as opposed to filtering them out after the fact:
$ hyperfine \
'git for-each-ref --format="%(objectname) %(refname)" | grep -vE "^[0-9a-f]{40} refs/pull/"' \
'git.prev for-each-ref --format="%(objectname) %(refname)" --exclude="refs/pull"' \
'git.compile for-each-ref --format="%(objectname) %(refname)" --exclude="refs/pull"'
Benchmark 1: git for-each-ref --format="%(objectname) %(refname)" | grep -vE "^[0-9a-f]{40} refs/pull/"
Time (mean ± σ): 798.1 ms ± 3.3 ms [User: 687.6 ms, System: 146.4 ms]
Range (min … max): 794.5 ms … 805.5 ms 10 runs
Benchmark 2: git.prev for-each-ref --format="%(objectname) %(refname)" --exclude="refs/pull"
Time (mean ± σ): 98.9 ms ± 1.4 ms [User: 93.1 ms, System: 5.7 ms]
Range (min … max): 97.0 ms … 104.0 ms 29 runs
Benchmark 3: git.compile for-each-ref --format="%(objectname) %(refname)" --exclude="refs/pull"
Time (mean ± σ): 4.5 ms ± 0.2 ms [User: 0.7 ms, System: 3.8 ms]
Range (min … max): 4.1 ms … 5.8 ms 524 runs
Summary
'git.compile for-each-ref --format="%(objectname) %(refname)" --exclude="refs/pull"' ran
21.87 ± 1.05 times faster than 'git.prev for-each-ref --format="%(objectname) %(refname)" --exclude="refs/pull"'
176.52 ± 8.19 times faster than 'git for-each-ref --format="%(objectname) %(refname)" | grep -vE "^[0-9a-f]{40} refs/pull/"'
(Comparing stock git and this patch isn't quite fair, since an earlier
commit in this series adds a naive implementation of the `--exclude`
option. `git.prev` is built from the previous commit and includes this
naive implementation).
Using the jump list is fairly straightforward (see the changes to
`refs/packed-backend.c::next_record()`), but constructing the list is
not. To ensure that the construction is correct, add a new suite of
tests in t1419 covering various corner cases (overlapping regions,
partially overlapping regions, adjacent regions, etc.).
Co-authored-by: Jeff King <peff@peff.net>
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Taylor Blau <me@ttaylorr.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2023-07-11 05:12:28 +08:00
|
|
|
return refs_for_each_fullref_in(get_main_ref_store(the_repository),
|
|
|
|
"", filter->exclude.v, cb, cb_data);
|
ref-filter: limit traversal to prefix
When we are matching refnames against a pattern, then we know that the
beginning of any refname that can match the pattern has to match the
part of the pattern up to the first glob character. For example, if
the pattern is `refs/heads/foo*bar`, then it can only match a
reference that has the prefix `refs/heads/foo`.
So pass that prefix to `for_each_fullref_in()`. This lets the ref code
avoid passing us the full set of refs, and in some cases avoid reading
them in the first place.
Note that this applies only when the `match_as_path` flag is set
(i.e., when `for-each-ref` is the caller), as the matching rules for
git-branch and git-tag are subtly different.
This could be generalized to the case of multiple patterns, but (a) it
probably doesn't come up that often, and (b) it is more awkward to
deal with multiple patterns (e.g., the patterns might not be
disjoint). So, since this is just an optimization, punt on the case of
multiple patterns.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Michael Haggerty <mhagger@alum.mit.edu>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-05-22 22:17:54 +08:00
|
|
|
}
|
|
|
|
|
2022-12-13 19:11:10 +08:00
|
|
|
return refs_for_each_fullref_in_prefixes(get_main_ref_store(the_repository),
|
|
|
|
NULL, filter->name_patterns,
|
refs/packed-backend.c: implement jump lists to avoid excluded pattern(s)
When iterating through the `packed-refs` file in order to answer a query
like:
$ git for-each-ref --exclude=refs/__hidden__
it would be useful to avoid walking over all of the entries in
`refs/__hidden__/*` when possible, since we know that the ref-filter
code is going to throw them away anyways.
In certain circumstances, doing so is possible. The algorithm for doing
so is as follows:
- For each excluded pattern, find the first record that matches it,
and the first record that *doesn't* match it (i.e. the location
you'd next want to consider when excluding that pattern).
- Sort the set of excluded regions from the previous step in ascending
order of the first location within the `packed-refs` file that
matches.
- Clean up the results from the previous step: discard empty regions,
and combine adjacent regions. The set of regions which remains is
referred to as the "jump list", and never contains any references
which should be included in the result set.
Then when iterating through the `packed-refs` file, if `iter->pos` is
ever contained in one of the regions from the previous steps, advance
`iter->pos` past the end of that region, and continue enumeration.
Note that we only perform this optimization when none of the excluded
pattern(s) have special meta-characters in them. For a pattern like
"refs/foo[ac]", the excluded regions ("refs/fooa", "refs/fooc", and
everything underneath them) are not connected. A future implementation
that handles this case may split the character class (pretending as if
two patterns were excluded: "refs/fooa", and "refs/fooc").
There are a few other gotchas worth considering. First, note that the
jump list is sorted, so once we jump past a region, we can avoid
considering it (or any regions preceding it) again. The member
`jump_pos` is used to track the first next-possible region to jump
through.
Second, note that the jump list is best-effort, since we do not handle
loose references, and because of the meta-character issue above. The
jump list may not skip past all references which won't appear in the
results, but will never skip over a reference which does appear in the
result set.
In repositories with a large number of hidden references, the speed-up
can be significant. Tests here are done with a copy of linux.git with a
reference "refs/pull/N" pointing at every commit, as in:
$ git rev-list HEAD | awk '{ print "create refs/pull/" NR " " $0 }' |
git update-ref --stdin
$ git pack-refs --all
, it is significantly faster to have `for-each-ref` jump over the
excluded references, as opposed to filtering them out after the fact:
$ hyperfine \
'git for-each-ref --format="%(objectname) %(refname)" | grep -vE "^[0-9a-f]{40} refs/pull/"' \
'git.prev for-each-ref --format="%(objectname) %(refname)" --exclude="refs/pull"' \
'git.compile for-each-ref --format="%(objectname) %(refname)" --exclude="refs/pull"'
Benchmark 1: git for-each-ref --format="%(objectname) %(refname)" | grep -vE "^[0-9a-f]{40} refs/pull/"
Time (mean ± σ): 798.1 ms ± 3.3 ms [User: 687.6 ms, System: 146.4 ms]
Range (min … max): 794.5 ms … 805.5 ms 10 runs
Benchmark 2: git.prev for-each-ref --format="%(objectname) %(refname)" --exclude="refs/pull"
Time (mean ± σ): 98.9 ms ± 1.4 ms [User: 93.1 ms, System: 5.7 ms]
Range (min … max): 97.0 ms … 104.0 ms 29 runs
Benchmark 3: git.compile for-each-ref --format="%(objectname) %(refname)" --exclude="refs/pull"
Time (mean ± σ): 4.5 ms ± 0.2 ms [User: 0.7 ms, System: 3.8 ms]
Range (min … max): 4.1 ms … 5.8 ms 524 runs
Summary
'git.compile for-each-ref --format="%(objectname) %(refname)" --exclude="refs/pull"' ran
21.87 ± 1.05 times faster than 'git.prev for-each-ref --format="%(objectname) %(refname)" --exclude="refs/pull"'
176.52 ± 8.19 times faster than 'git for-each-ref --format="%(objectname) %(refname)" | grep -vE "^[0-9a-f]{40} refs/pull/"'
(Comparing stock git and this patch isn't quite fair, since an earlier
commit in this series adds a naive implementation of the `--exclude`
option. `git.prev` is built from the previous commit and includes this
naive implementation).
Using the jump list is fairly straightforward (see the changes to
`refs/packed-backend.c::next_record()`), but constructing the list is
not. To ensure that the construction is correct, add a new suite of
tests in t1419 covering various corner cases (overlapping regions,
partially overlapping regions, adjacent regions, etc.).
Co-authored-by: Jeff King <peff@peff.net>
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Taylor Blau <me@ttaylorr.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2023-07-11 05:12:28 +08:00
|
|
|
filter->exclude.v,
|
2022-12-13 19:11:10 +08:00
|
|
|
cb, cb_data);
|
ref-filter: limit traversal to prefix
When we are matching refnames against a pattern, then we know that the
beginning of any refname that can match the pattern has to match the
part of the pattern up to the first glob character. For example, if
the pattern is `refs/heads/foo*bar`, then it can only match a
reference that has the prefix `refs/heads/foo`.
So pass that prefix to `for_each_fullref_in()`. This lets the ref code
avoid passing us the full set of refs, and in some cases avoid reading
them in the first place.
Note that this applies only when the `match_as_path` flag is set
(i.e., when `for-each-ref` is the caller), as the matching rules for
git-branch and git-tag are subtly different.
This could be generalized to the case of multiple patterns, but (a) it
probably doesn't come up that often, and (b) it is more awkward to
deal with multiple patterns (e.g., the patterns might not be
disjoint). So, since this is just an optimization, punt on the case of
multiple patterns.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Michael Haggerty <mhagger@alum.mit.edu>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-05-22 22:17:54 +08:00
|
|
|
}
|
|
|
|
|
2015-07-08 00:06:09 +08:00
|
|
|
/*
|
2020-03-30 22:04:11 +08:00
|
|
|
* Given a ref (oid, refname), check if the ref belongs to the array
|
|
|
|
* of oids. If the given ref is a tag, check if the given tag points
|
2023-07-03 06:38:29 +08:00
|
|
|
* at one of the oids in the given oid array. Returns non-zero if a
|
|
|
|
* match is found.
|
|
|
|
*
|
2015-07-08 00:06:09 +08:00
|
|
|
* NEEDSWORK:
|
2023-07-02 04:57:02 +08:00
|
|
|
* As the refs are cached we might know what refname peels to without
|
2015-07-08 00:06:09 +08:00
|
|
|
* the need to parse the object via parse_object(). peel_ref() might be a
|
|
|
|
* more efficient alternative to obtain the pointee.
|
|
|
|
*/
|
2023-07-03 06:38:29 +08:00
|
|
|
static int match_points_at(struct oid_array *points_at,
|
|
|
|
const struct object_id *oid,
|
|
|
|
const char *refname)
|
2015-07-08 00:06:09 +08:00
|
|
|
{
|
|
|
|
struct object *obj;
|
|
|
|
|
2017-03-31 09:40:00 +08:00
|
|
|
if (oid_array_lookup(points_at, oid) >= 0)
|
2023-07-03 06:38:29 +08:00
|
|
|
return 1;
|
ref-filter: avoid parsing non-tags in match_points_at()
When handling --points-at, we have to try to peel each ref to see if
it's a tag that points at a requested oid. We start this process by
calling parse_object() on the oid pointed to by each ref.
The cost of parsing each object adds up, especially in an output that
doesn't otherwise need to open the objects at all. Ideally we'd use
peel_iterated_oid() here, which uses the cached information in the
packed-refs file. But we can't, because our --points-at must match not
only the fully peeled value, but any interim values (so if tag A points
to tag B which points to commit C, we should match --points-at=B, but
peel_iterated_oid() will only tell us about C).
So the best we can do (absent changes to the packed-refs peel traits) is
to avoid parsing non-tags. The obvious way to do that is to call
oid_object_info() to check the type before parsing. But there are a few
gotchas there, like checking if the object has already been parsed.
Instead we can just tell parse_object() that we are OK skipping the hash
check, which lets it turn on several optimizations. Commits can be
loaded via the commit graph (so it's both fast and we have the benefit
of the parsed data if we need it later at the output stage). Blobs are
not loaded at all. Trees are still loaded, but it's rather rare to have
a ref point directly to a tree (and since this is just an optimization,
kicking in 99% of the time is OK).
Even though we're paying for an extra lookup, the cost to avoid parsing
the non-tags is a net benefit. In my git.git repository with 941 tags
and 1440 other refs pointing to commits, this significantly cuts the
runtime:
Benchmark 1: ./git.old for-each-ref --points-at=HEAD
Time (mean ± σ): 26.8 ms ± 0.5 ms [User: 24.5 ms, System: 2.2 ms]
Range (min … max): 25.9 ms … 29.2 ms 107 runs
Benchmark 2: ./git.new for-each-ref --points-at=HEAD
Time (mean ± σ): 9.1 ms ± 0.3 ms [User: 6.8 ms, System: 2.2 ms]
Range (min … max): 8.6 ms … 10.2 ms 308 runs
Summary
'./git.new for-each-ref --points-at=HEAD' ran
2.96 ± 0.10 times faster than './git.old for-each-ref --points-at=HEAD'
In a repository that is mostly annotated tags, we'd expect less
improvement (we might still skip a few object loads, but that's balanced
by the extra lookups). In my clone of linux.git, which has 782 tags and
3 branches, the run-time is about the same (it's actually ~1% faster on
average after this patch, but that's within the run-to-run noise).
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2023-07-03 06:37:47 +08:00
|
|
|
obj = parse_object_with_flags(the_repository, oid,
|
|
|
|
PARSE_OBJECT_SKIP_HASH_CHECK);
|
2023-07-02 04:57:02 +08:00
|
|
|
while (obj && obj->type == OBJ_TAG) {
|
2023-07-03 06:35:40 +08:00
|
|
|
struct tag *tag = (struct tag *)obj;
|
|
|
|
|
|
|
|
if (parse_tag(tag) < 0) {
|
|
|
|
obj = NULL;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
2023-07-03 06:38:29 +08:00
|
|
|
if (oid_array_lookup(points_at, get_tagged_oid(tag)) >= 0)
|
|
|
|
return 1;
|
2023-07-03 06:35:40 +08:00
|
|
|
|
|
|
|
obj = tag->tagged;
|
2023-07-02 04:57:02 +08:00
|
|
|
}
|
2015-07-08 00:06:09 +08:00
|
|
|
if (!obj)
|
|
|
|
die(_("malformed object at '%s'"), refname);
|
2023-07-03 06:38:29 +08:00
|
|
|
return 0;
|
2015-07-08 00:06:09 +08:00
|
|
|
}
|
|
|
|
|
2018-04-07 02:59:26 +08:00
|
|
|
/*
|
|
|
|
* Allocate space for a new ref_array_item and copy the name and oid to it.
|
|
|
|
*
|
|
|
|
* Callers can then fill in other struct members at their leisure.
|
|
|
|
*/
|
2015-06-14 03:37:27 +08:00
|
|
|
static struct ref_array_item *new_ref_array_item(const char *refname,
|
2018-04-07 02:59:26 +08:00
|
|
|
const struct object_id *oid)
|
2015-06-14 03:37:27 +08:00
|
|
|
{
|
2016-02-23 06:44:32 +08:00
|
|
|
struct ref_array_item *ref;
|
2018-04-07 02:59:26 +08:00
|
|
|
|
2016-02-23 06:44:32 +08:00
|
|
|
FLEX_ALLOC_STR(ref, refname, refname);
|
2018-04-07 02:58:32 +08:00
|
|
|
oidcpy(&ref->objectname, oid);
|
2021-07-26 11:26:50 +08:00
|
|
|
ref->rest = NULL;
|
2015-06-14 03:37:27 +08:00
|
|
|
|
|
|
|
return ref;
|
|
|
|
}
|
|
|
|
|
2023-11-15 03:53:54 +08:00
|
|
|
static void ref_array_append(struct ref_array *array, struct ref_array_item *ref)
|
|
|
|
{
|
|
|
|
ALLOC_GROW(array->items, array->nr + 1, array->alloc);
|
|
|
|
array->items[array->nr++] = ref;
|
|
|
|
}
|
|
|
|
|
2018-04-07 02:59:45 +08:00
|
|
|
struct ref_array_item *ref_array_push(struct ref_array *array,
|
|
|
|
const char *refname,
|
|
|
|
const struct object_id *oid)
|
|
|
|
{
|
|
|
|
struct ref_array_item *ref = new_ref_array_item(refname, oid);
|
2023-11-15 03:53:54 +08:00
|
|
|
ref_array_append(array, ref);
|
2015-06-14 03:37:27 +08:00
|
|
|
return ref;
|
|
|
|
}
|
|
|
|
|
2017-01-18 07:37:19 +08:00
|
|
|
static int ref_kind_from_refname(const char *refname)
|
2015-09-10 23:48:23 +08:00
|
|
|
{
|
|
|
|
unsigned int i;
|
|
|
|
|
|
|
|
static struct {
|
|
|
|
const char *prefix;
|
|
|
|
unsigned int kind;
|
|
|
|
} ref_kind[] = {
|
|
|
|
{ "refs/heads/" , FILTER_REFS_BRANCHES },
|
|
|
|
{ "refs/remotes/" , FILTER_REFS_REMOTES },
|
|
|
|
{ "refs/tags/", FILTER_REFS_TAGS}
|
|
|
|
};
|
|
|
|
|
2017-01-18 07:37:19 +08:00
|
|
|
if (!strcmp(refname, "HEAD"))
|
2015-09-10 23:48:23 +08:00
|
|
|
return FILTER_REFS_DETACHED_HEAD;
|
|
|
|
|
|
|
|
for (i = 0; i < ARRAY_SIZE(ref_kind); i++) {
|
|
|
|
if (starts_with(refname, ref_kind[i].prefix))
|
|
|
|
return ref_kind[i].kind;
|
|
|
|
}
|
|
|
|
|
|
|
|
return FILTER_REFS_OTHERS;
|
|
|
|
}
|
|
|
|
|
2017-01-18 07:37:19 +08:00
|
|
|
static int filter_ref_kind(struct ref_filter *filter, const char *refname)
|
|
|
|
{
|
|
|
|
if (filter->kind == FILTER_REFS_BRANCHES ||
|
|
|
|
filter->kind == FILTER_REFS_REMOTES ||
|
|
|
|
filter->kind == FILTER_REFS_TAGS)
|
|
|
|
return filter->kind;
|
|
|
|
return ref_kind_from_refname(refname);
|
|
|
|
}
|
|
|
|
|
2023-11-15 03:53:54 +08:00
|
|
|
static struct ref_array_item *apply_ref_filter(const char *refname, const struct object_id *oid,
|
|
|
|
int flag, struct ref_filter *filter)
|
2015-06-14 03:37:27 +08:00
|
|
|
{
|
|
|
|
struct ref_array_item *ref;
|
2015-07-08 00:06:12 +08:00
|
|
|
struct commit *commit = NULL;
|
2015-09-10 23:48:23 +08:00
|
|
|
unsigned int kind;
|
2015-06-14 03:37:27 +08:00
|
|
|
|
|
|
|
if (flag & REF_BAD_NAME) {
|
2016-02-27 14:42:04 +08:00
|
|
|
warning(_("ignoring ref with broken name %s"), refname);
|
2023-11-15 03:53:54 +08:00
|
|
|
return NULL;
|
2015-06-14 03:37:27 +08:00
|
|
|
}
|
|
|
|
|
2015-08-04 02:01:10 +08:00
|
|
|
if (flag & REF_ISBROKEN) {
|
2016-02-27 14:42:04 +08:00
|
|
|
warning(_("ignoring broken ref %s"), refname);
|
2023-11-15 03:53:54 +08:00
|
|
|
return NULL;
|
2015-08-04 02:01:10 +08:00
|
|
|
}
|
|
|
|
|
2015-09-10 23:48:23 +08:00
|
|
|
/* Obtain the current ref kind from filter_ref_kind() and ignore unwanted refs. */
|
|
|
|
kind = filter_ref_kind(filter, refname);
|
|
|
|
if (!(kind & filter->kind))
|
2023-11-15 03:53:54 +08:00
|
|
|
return NULL;
|
2015-09-10 23:48:23 +08:00
|
|
|
|
2015-09-10 23:48:26 +08:00
|
|
|
if (!filter_pattern_match(filter, refname))
|
2023-11-15 03:53:54 +08:00
|
|
|
return NULL;
|
2015-06-14 03:37:27 +08:00
|
|
|
|
builtin/for-each-ref.c: add `--exclude` option
When using `for-each-ref`, it is sometimes convenient for the caller to
be able to exclude certain parts of the references.
For example, if there are many `refs/__hidden__/*` references, the
caller may want to emit all references *except* the hidden ones.
Currently, the only way to do this is to post-process the output, like:
$ git for-each-ref --format='%(refname)' | grep -v '^refs/hidden/'
Which is do-able, but requires processing a potentially large quantity
of references.
Teach `git for-each-ref` a new `--exclude=<pattern>` option, which
excludes references from the results if they match one or more excluded
patterns.
This patch provides a naive implementation where the `ref_filter` still
sees all references (including ones that it will discard) and is left to
check whether each reference matches any excluded pattern(s) before
emitting them.
By culling out references we know the caller doesn't care about, we can
avoid allocating memory for their storage, as well as spending time
sorting the output (among other things). Even the naive implementation
provides a significant speed-up on a modified copy of linux.git (that
has a hidden ref pointing at each commit):
$ hyperfine \
'git.compile for-each-ref --format="%(objectname) %(refname)" | grep -vE "[0-9a-f]{40} refs/pull/"' \
'git.compile for-each-ref --format="%(objectname) %(refname)" --exclude refs/pull/'
Benchmark 1: git.compile for-each-ref --format="%(objectname) %(refname)" | grep -vE "[0-9a-f]{40} refs/pull/"
Time (mean ± σ): 820.1 ms ± 2.0 ms [User: 703.7 ms, System: 152.0 ms]
Range (min … max): 817.7 ms … 823.3 ms 10 runs
Benchmark 2: git.compile for-each-ref --format="%(objectname) %(refname)" --exclude refs/pull/
Time (mean ± σ): 106.6 ms ± 1.1 ms [User: 99.4 ms, System: 7.1 ms]
Range (min … max): 104.7 ms … 109.1 ms 27 runs
Summary
'git.compile for-each-ref --format="%(objectname) %(refname)" --exclude refs/pull/' ran
7.69 ± 0.08 times faster than 'git.compile for-each-ref --format="%(objectname) %(refname)" | grep -vE "[0-9a-f]{40} refs/pull/"'
Subsequent patches will improve on this by avoiding visiting excluded
sections of the `packed-refs` file in certain cases.
Co-authored-by: Jeff King <peff@peff.net>
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Taylor Blau <me@ttaylorr.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2023-07-11 05:12:19 +08:00
|
|
|
if (filter_exclude_match(filter, refname))
|
2023-11-15 03:53:54 +08:00
|
|
|
return NULL;
|
builtin/for-each-ref.c: add `--exclude` option
When using `for-each-ref`, it is sometimes convenient for the caller to
be able to exclude certain parts of the references.
For example, if there are many `refs/__hidden__/*` references, the
caller may want to emit all references *except* the hidden ones.
Currently, the only way to do this is to post-process the output, like:
$ git for-each-ref --format='%(refname)' | grep -v '^refs/hidden/'
Which is do-able, but requires processing a potentially large quantity
of references.
Teach `git for-each-ref` a new `--exclude=<pattern>` option, which
excludes references from the results if they match one or more excluded
patterns.
This patch provides a naive implementation where the `ref_filter` still
sees all references (including ones that it will discard) and is left to
check whether each reference matches any excluded pattern(s) before
emitting them.
By culling out references we know the caller doesn't care about, we can
avoid allocating memory for their storage, as well as spending time
sorting the output (among other things). Even the naive implementation
provides a significant speed-up on a modified copy of linux.git (that
has a hidden ref pointing at each commit):
$ hyperfine \
'git.compile for-each-ref --format="%(objectname) %(refname)" | grep -vE "[0-9a-f]{40} refs/pull/"' \
'git.compile for-each-ref --format="%(objectname) %(refname)" --exclude refs/pull/'
Benchmark 1: git.compile for-each-ref --format="%(objectname) %(refname)" | grep -vE "[0-9a-f]{40} refs/pull/"
Time (mean ± σ): 820.1 ms ± 2.0 ms [User: 703.7 ms, System: 152.0 ms]
Range (min … max): 817.7 ms … 823.3 ms 10 runs
Benchmark 2: git.compile for-each-ref --format="%(objectname) %(refname)" --exclude refs/pull/
Time (mean ± σ): 106.6 ms ± 1.1 ms [User: 99.4 ms, System: 7.1 ms]
Range (min … max): 104.7 ms … 109.1 ms 27 runs
Summary
'git.compile for-each-ref --format="%(objectname) %(refname)" --exclude refs/pull/' ran
7.69 ± 0.08 times faster than 'git.compile for-each-ref --format="%(objectname) %(refname)" | grep -vE "[0-9a-f]{40} refs/pull/"'
Subsequent patches will improve on this by avoiding visiting excluded
sections of the `packed-refs` file in certain cases.
Co-authored-by: Jeff King <peff@peff.net>
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Taylor Blau <me@ttaylorr.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2023-07-11 05:12:19 +08:00
|
|
|
|
2017-03-31 09:39:57 +08:00
|
|
|
if (filter->points_at.nr && !match_points_at(&filter->points_at, oid, refname))
|
2023-11-15 03:53:54 +08:00
|
|
|
return NULL;
|
2015-07-08 00:06:09 +08:00
|
|
|
|
2015-07-08 00:06:12 +08:00
|
|
|
/*
|
|
|
|
* A merge filter is applied on refs pointing to commits. Hence
|
|
|
|
* obtain the commit using the 'oid' available and discard all
|
|
|
|
* non-commits early. The actual filtering is done later.
|
|
|
|
*/
|
2020-09-16 10:08:40 +08:00
|
|
|
if (filter->reachable_from || filter->unreachable_from ||
|
|
|
|
filter->with_commit || filter->no_commit || filter->verbose) {
|
|
|
|
commit = lookup_commit_reference_gently(the_repository, oid, 1);
|
2015-07-08 00:06:12 +08:00
|
|
|
if (!commit)
|
2023-11-15 03:53:54 +08:00
|
|
|
return NULL;
|
ref-filter: add --no-contains option to tag/branch/for-each-ref
Change the tag, branch & for-each-ref commands to have a --no-contains
option in addition to their longstanding --contains options.
This allows for finding the last-good rollout tag given a known-bad
<commit>. Given a hypothetically bad commit cf5c7253e0, the git
version to revert to can be found with this hacky two-liner:
(git tag -l 'v[0-9]*'; git tag -l --contains cf5c7253e0 'v[0-9]*') |
sort | uniq -c | grep -E '^ *1 ' | awk '{print $2}' | tail -n 10
With this new --no-contains option the same can be achieved with:
git tag -l --no-contains cf5c7253e0 'v[0-9]*' | sort | tail -n 10
As the filtering machinery is shared between the tag, branch &
for-each-ref commands, implement this for those commands too. A
practical use for this with "branch" is e.g. finding branches which
were branched off between v2.8.0 and v2.10.0:
git branch --contains v2.8.0 --no-contains v2.10.0
The "describe" command also has a --contains option, but its semantics
are unrelated to what tag/branch/for-each-ref use --contains for. A
--no-contains option for "describe" wouldn't make any sense, other
than being exactly equivalent to not supplying --contains at all,
which would be confusing at best.
Add a --without option to "tag" as an alias for --no-contains, for
consistency with --with and --contains. The --with option is
undocumented, and possibly the only user of it is
Junio (<xmqqefy71iej.fsf@gitster.mtv.corp.google.com>). But it's
trivial to support, so let's do that.
The additions to the the test suite are inverse copies of the
corresponding --contains tests. With this change --no-contains for
tag, branch & for-each-ref is just as well tested as the existing
--contains option.
In addition to those tests, add a test for "tag" which asserts that
--no-contains won't find tree/blob tags, which is slightly
unintuitive, but consistent with how --contains works & is documented.
Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-03-25 02:40:57 +08:00
|
|
|
/* We perform the filtering for the '--contains' option... */
|
2015-07-08 00:06:16 +08:00
|
|
|
if (filter->with_commit &&
|
2023-11-15 03:53:51 +08:00
|
|
|
!commit_contains(filter, commit, filter->with_commit, &filter->internal.contains_cache))
|
2023-11-15 03:53:54 +08:00
|
|
|
return NULL;
|
ref-filter: add --no-contains option to tag/branch/for-each-ref
Change the tag, branch & for-each-ref commands to have a --no-contains
option in addition to their longstanding --contains options.
This allows for finding the last-good rollout tag given a known-bad
<commit>. Given a hypothetically bad commit cf5c7253e0, the git
version to revert to can be found with this hacky two-liner:
(git tag -l 'v[0-9]*'; git tag -l --contains cf5c7253e0 'v[0-9]*') |
sort | uniq -c | grep -E '^ *1 ' | awk '{print $2}' | tail -n 10
With this new --no-contains option the same can be achieved with:
git tag -l --no-contains cf5c7253e0 'v[0-9]*' | sort | tail -n 10
As the filtering machinery is shared between the tag, branch &
for-each-ref commands, implement this for those commands too. A
practical use for this with "branch" is e.g. finding branches which
were branched off between v2.8.0 and v2.10.0:
git branch --contains v2.8.0 --no-contains v2.10.0
The "describe" command also has a --contains option, but its semantics
are unrelated to what tag/branch/for-each-ref use --contains for. A
--no-contains option for "describe" wouldn't make any sense, other
than being exactly equivalent to not supplying --contains at all,
which would be confusing at best.
Add a --without option to "tag" as an alias for --no-contains, for
consistency with --with and --contains. The --with option is
undocumented, and possibly the only user of it is
Junio (<xmqqefy71iej.fsf@gitster.mtv.corp.google.com>). But it's
trivial to support, so let's do that.
The additions to the the test suite are inverse copies of the
corresponding --contains tests. With this change --no-contains for
tag, branch & for-each-ref is just as well tested as the existing
--contains option.
In addition to those tests, add a test for "tag" which asserts that
--no-contains won't find tree/blob tags, which is slightly
unintuitive, but consistent with how --contains works & is documented.
Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-03-25 02:40:57 +08:00
|
|
|
/* ...or for the `--no-contains' option */
|
|
|
|
if (filter->no_commit &&
|
2023-11-15 03:53:51 +08:00
|
|
|
commit_contains(filter, commit, filter->no_commit, &filter->internal.no_contains_cache))
|
2023-11-15 03:53:54 +08:00
|
|
|
return NULL;
|
2015-07-08 00:06:12 +08:00
|
|
|
}
|
|
|
|
|
2015-06-14 03:37:27 +08:00
|
|
|
/*
|
|
|
|
* We do not open the object yet; sort may only need refname
|
|
|
|
* to do its job and the resulting list may yet to be pruned
|
|
|
|
* by maxcount logic.
|
|
|
|
*/
|
2023-11-15 03:53:54 +08:00
|
|
|
ref = new_ref_array_item(refname, oid);
|
2015-07-08 00:06:12 +08:00
|
|
|
ref->commit = commit;
|
2018-04-07 02:59:26 +08:00
|
|
|
ref->flag = flag;
|
2015-09-10 23:48:23 +08:00
|
|
|
ref->kind = kind;
|
2015-06-14 03:37:27 +08:00
|
|
|
|
2023-11-15 03:53:54 +08:00
|
|
|
return ref;
|
|
|
|
}
|
|
|
|
|
|
|
|
struct ref_filter_cbdata {
|
|
|
|
struct ref_array *array;
|
|
|
|
struct ref_filter *filter;
|
|
|
|
};
|
|
|
|
|
|
|
|
/*
|
|
|
|
* A call-back given to for_each_ref(). Filter refs and keep them for
|
|
|
|
* later object processing.
|
|
|
|
*/
|
|
|
|
static int filter_one(const char *refname, const struct object_id *oid, int flag, void *cb_data)
|
|
|
|
{
|
|
|
|
struct ref_filter_cbdata *ref_cbdata = cb_data;
|
|
|
|
struct ref_array_item *ref;
|
|
|
|
|
|
|
|
ref = apply_ref_filter(refname, oid, flag, ref_cbdata->filter);
|
|
|
|
if (ref)
|
|
|
|
ref_array_append(ref_cbdata->array, ref);
|
|
|
|
|
2015-06-14 03:37:27 +08:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Free memory allocated for a ref_array_item */
|
|
|
|
static void free_array_item(struct ref_array_item *item)
|
|
|
|
{
|
|
|
|
free((char *)item->symref);
|
2018-10-18 15:28:54 +08:00
|
|
|
if (item->value) {
|
2019-07-11 02:36:39 +08:00
|
|
|
int i;
|
|
|
|
for (i = 0; i < used_atom_cnt; i++)
|
|
|
|
free((char *)item->value[i].s);
|
2018-10-18 15:28:54 +08:00
|
|
|
free(item->value);
|
|
|
|
}
|
for-each-ref: add ahead-behind format atom
The previous change implemented the ahead_behind() method, including an
algorithm to compute the ahead/behind values for a number of commit tips
relative to a number of commit bases. Now, integrate that algorithm as
part of 'git for-each-ref' hidden behind a new format atom,
ahead-behind. This naturally extends to 'git branch' and 'git tag'
builtins, as well.
This format allows specifying multiple bases, if so desired, and all
matching references are compared against all of those bases. For this
reason, failing to read a reference provided from these atoms results in
an error.
In order to translate the ahead_behind() method information to the
format output code in ref-filter.c, we must populate arrays of
ahead_behind_count structs. In struct ref_array, we store the full array
that will be passed to ahead_behind(). In struct ref_array_item, we
store an array of pointers that point to the relvant items within the
full array. In this way, we can pull all relevant ahead/behind values
directly when formatting output for a specific item. It also ensures the
lifetime of the ahead_behind_count structs matches the time that the
array is being used.
Add specific tests of the ahead/behind counts in t6600-test-reach.sh, as
it has an interesting repository shape. In particular, its merging
strategy and its use of different commit-graphs would demonstrate over-
counting if the ahead_behind() method did not already account for that
possibility.
Also add tests for the specific for-each-ref, branch, and tag builtins.
In the case of 'git tag', there are intersting cases that happen when
some of the selected tips are not commits. This requires careful logic
around commits_nr in the second loop of filter_ahead_behind(). Also, the
test in t7004 is carefully located to avoid being dependent on the GPG
prereq. It also avoids using the test_commit helper, as that will add
ticks to the time and disrupt the expected timestamps in later tag
tests.
Also add performance tests in a new p1300-graph-walks.sh script. This
will be useful for more uses in the future, but for now compare the
ahead-behind counting algorithm in 'git for-each-ref' to the naive
implementation by running 'git rev-list --count' processes for each
input.
For the Git source code repository, the improvement is already obvious:
Test this tree
---------------------------------------------------------------
1500.2: ahead-behind counts: git for-each-ref 0.07(0.07+0.00)
1500.3: ahead-behind counts: git branch 0.07(0.06+0.00)
1500.4: ahead-behind counts: git tag 0.07(0.06+0.00)
1500.5: ahead-behind counts: git rev-list 1.32(1.04+0.27)
But the standard performance benchmark is the Linux kernel repository,
which demosntrates a significant improvement:
Test this tree
---------------------------------------------------------------
1500.2: ahead-behind counts: git for-each-ref 0.27(0.24+0.02)
1500.3: ahead-behind counts: git branch 0.27(0.24+0.03)
1500.4: ahead-behind counts: git tag 0.28(0.27+0.01)
1500.5: ahead-behind counts: git rev-list 4.57(4.03+0.54)
The 'git rev-list' test exists in this change as a demonstration, but it
will be removed in the next change to avoid wasting time on this
comparison.
Signed-off-by: Derrick Stolee <derrickstolee@github.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2023-03-20 19:26:54 +08:00
|
|
|
free(item->counts);
|
2015-06-14 03:37:27 +08:00
|
|
|
free(item);
|
|
|
|
}
|
|
|
|
|
2023-11-15 03:53:55 +08:00
|
|
|
struct ref_filter_and_format_cbdata {
|
|
|
|
struct ref_filter *filter;
|
|
|
|
struct ref_format *format;
|
|
|
|
|
|
|
|
struct ref_filter_and_format_internal {
|
|
|
|
int count;
|
|
|
|
} internal;
|
|
|
|
};
|
|
|
|
|
|
|
|
static int filter_and_format_one(const char *refname, const struct object_id *oid, int flag, void *cb_data)
|
|
|
|
{
|
|
|
|
struct ref_filter_and_format_cbdata *ref_cbdata = cb_data;
|
|
|
|
struct ref_array_item *ref;
|
|
|
|
struct strbuf output = STRBUF_INIT, err = STRBUF_INIT;
|
|
|
|
|
|
|
|
ref = apply_ref_filter(refname, oid, flag, ref_cbdata->filter);
|
|
|
|
if (!ref)
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
if (format_ref_array_item(ref, ref_cbdata->format, &output, &err))
|
|
|
|
die("%s", err.buf);
|
|
|
|
|
|
|
|
if (output.len || !ref_cbdata->format->array_opts.omit_empty) {
|
|
|
|
fwrite(output.buf, 1, output.len, stdout);
|
|
|
|
putchar('\n');
|
|
|
|
}
|
|
|
|
|
|
|
|
strbuf_release(&output);
|
|
|
|
strbuf_release(&err);
|
|
|
|
free_array_item(ref);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Increment the running count of refs that match the filter. If
|
|
|
|
* max_count is set and we've reached the max, stop the ref
|
|
|
|
* iteration by returning a nonzero value.
|
|
|
|
*/
|
|
|
|
if (ref_cbdata->format->array_opts.max_count &&
|
|
|
|
++ref_cbdata->internal.count >= ref_cbdata->format->array_opts.max_count)
|
|
|
|
return 1;
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2015-06-14 03:37:27 +08:00
|
|
|
/* Free all memory allocated for ref_array */
|
|
|
|
void ref_array_clear(struct ref_array *array)
|
|
|
|
{
|
|
|
|
int i;
|
|
|
|
|
|
|
|
for (i = 0; i < array->nr; i++)
|
|
|
|
free_array_item(array->items[i]);
|
2017-06-16 07:15:46 +08:00
|
|
|
FREE_AND_NULL(array->items);
|
2015-06-14 03:37:27 +08:00
|
|
|
array->nr = array->alloc = 0;
|
2019-07-11 02:36:39 +08:00
|
|
|
|
2021-07-25 21:08:24 +08:00
|
|
|
for (i = 0; i < used_atom_cnt; i++) {
|
|
|
|
struct used_atom *atom = &used_atom[i];
|
|
|
|
if (atom->atom_type == ATOM_HEAD)
|
|
|
|
free(atom->u.head);
|
|
|
|
free((char *)atom->name);
|
|
|
|
}
|
2019-07-11 02:36:39 +08:00
|
|
|
FREE_AND_NULL(used_atom);
|
|
|
|
used_atom_cnt = 0;
|
|
|
|
|
2019-04-29 13:19:42 +08:00
|
|
|
if (ref_to_worktree_map.worktrees) {
|
2020-11-03 02:55:05 +08:00
|
|
|
hashmap_clear_and_free(&(ref_to_worktree_map.map),
|
2019-10-07 07:30:40 +08:00
|
|
|
struct ref_to_worktree_entry, ent);
|
2019-04-29 13:19:42 +08:00
|
|
|
free_worktrees(ref_to_worktree_map.worktrees);
|
|
|
|
ref_to_worktree_map.worktrees = NULL;
|
|
|
|
}
|
for-each-ref: add ahead-behind format atom
The previous change implemented the ahead_behind() method, including an
algorithm to compute the ahead/behind values for a number of commit tips
relative to a number of commit bases. Now, integrate that algorithm as
part of 'git for-each-ref' hidden behind a new format atom,
ahead-behind. This naturally extends to 'git branch' and 'git tag'
builtins, as well.
This format allows specifying multiple bases, if so desired, and all
matching references are compared against all of those bases. For this
reason, failing to read a reference provided from these atoms results in
an error.
In order to translate the ahead_behind() method information to the
format output code in ref-filter.c, we must populate arrays of
ahead_behind_count structs. In struct ref_array, we store the full array
that will be passed to ahead_behind(). In struct ref_array_item, we
store an array of pointers that point to the relvant items within the
full array. In this way, we can pull all relevant ahead/behind values
directly when formatting output for a specific item. It also ensures the
lifetime of the ahead_behind_count structs matches the time that the
array is being used.
Add specific tests of the ahead/behind counts in t6600-test-reach.sh, as
it has an interesting repository shape. In particular, its merging
strategy and its use of different commit-graphs would demonstrate over-
counting if the ahead_behind() method did not already account for that
possibility.
Also add tests for the specific for-each-ref, branch, and tag builtins.
In the case of 'git tag', there are intersting cases that happen when
some of the selected tips are not commits. This requires careful logic
around commits_nr in the second loop of filter_ahead_behind(). Also, the
test in t7004 is carefully located to avoid being dependent on the GPG
prereq. It also avoids using the test_commit helper, as that will add
ticks to the time and disrupt the expected timestamps in later tag
tests.
Also add performance tests in a new p1300-graph-walks.sh script. This
will be useful for more uses in the future, but for now compare the
ahead-behind counting algorithm in 'git for-each-ref' to the naive
implementation by running 'git rev-list --count' processes for each
input.
For the Git source code repository, the improvement is already obvious:
Test this tree
---------------------------------------------------------------
1500.2: ahead-behind counts: git for-each-ref 0.07(0.07+0.00)
1500.3: ahead-behind counts: git branch 0.07(0.06+0.00)
1500.4: ahead-behind counts: git tag 0.07(0.06+0.00)
1500.5: ahead-behind counts: git rev-list 1.32(1.04+0.27)
But the standard performance benchmark is the Linux kernel repository,
which demosntrates a significant improvement:
Test this tree
---------------------------------------------------------------
1500.2: ahead-behind counts: git for-each-ref 0.27(0.24+0.02)
1500.3: ahead-behind counts: git branch 0.27(0.24+0.03)
1500.4: ahead-behind counts: git tag 0.28(0.27+0.01)
1500.5: ahead-behind counts: git rev-list 4.57(4.03+0.54)
The 'git rev-list' test exists in this change as a demonstration, but it
will be removed in the next change to avoid wasting time on this
comparison.
Signed-off-by: Derrick Stolee <derrickstolee@github.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2023-03-20 19:26:54 +08:00
|
|
|
|
|
|
|
FREE_AND_NULL(array->counts);
|
2015-06-14 03:37:27 +08:00
|
|
|
}
|
|
|
|
|
2020-09-19 05:58:41 +08:00
|
|
|
#define EXCLUDE_REACHED 0
|
|
|
|
#define INCLUDE_REACHED 1
|
|
|
|
static void reach_filter(struct ref_array *array,
|
2023-07-11 05:12:10 +08:00
|
|
|
struct commit_list **check_reachable,
|
2020-09-19 05:58:41 +08:00
|
|
|
int include_reached)
|
2015-07-08 00:06:12 +08:00
|
|
|
{
|
|
|
|
int i, old_nr;
|
2020-09-26 16:37:29 +08:00
|
|
|
struct commit **to_clear;
|
2020-09-16 10:08:40 +08:00
|
|
|
|
2023-07-11 05:12:10 +08:00
|
|
|
if (!*check_reachable)
|
2020-09-16 10:08:40 +08:00
|
|
|
return;
|
2015-07-08 00:06:12 +08:00
|
|
|
|
2021-03-14 00:17:22 +08:00
|
|
|
CALLOC_ARRAY(to_clear, array->nr);
|
2015-07-08 00:06:12 +08:00
|
|
|
for (i = 0; i < array->nr; i++) {
|
|
|
|
struct ref_array_item *item = array->items[i];
|
|
|
|
to_clear[i] = item->commit;
|
|
|
|
}
|
|
|
|
|
commit-reach: add tips_reachable_from_bases()
Both 'git for-each-ref --merged=<X>' and 'git branch --merged=<X>' use
the ref-filter machinery to select references or branches (respectively)
that are reachable from a set of commits presented by one or more
--merged arguments. This happens within reach_filter(), which uses the
revision-walk machinery to walk history in a standard way.
However, the commit-reach.c file is full of custom searches that are
more efficient, especially for reachability queries that can terminate
early when reachability is discovered. Add a new
tips_reachable_from_bases() method to commit-reach.c and call it from
within reach_filter() in ref-filter.c. This affects both 'git branch'
and 'git for-each-ref' as tested in p1500-graph-walks.sh.
For the Linux kernel repository, we take an already-fast algorithm and
make it even faster:
Test HEAD~1 HEAD
-------------------------------------------------------------------
1500.5: contains: git for-each-ref --merged 0.13 0.02 -84.6%
1500.6: contains: git branch --merged 0.14 0.02 -85.7%
1500.7: contains: git tag --merged 0.15 0.03 -80.0%
(Note that we remove the iterative 'git rev-list' test from p1500
because it no longer makes sense as a comparison to 'git for-each-ref'
and would just waste time running it for these comparisons.)
The algorithm is implemented in commit-reach.c in the method
tips_reachable_from_base(). This method takes a string_list of tips and
assigns the 'util' for each item with the value 1 if the base commit can
reach those tips.
Like other reachability queries in commit-reach.c, the fastest way to
search for "can A reach B?" is to do a depth-first search up to the
generation number of B, preferring to explore first parents before later
parents. While we must walk all reachable commits up to that generation
number when the answer is "no", the depth-first search can answer "yes"
much faster than other approaches in most cases.
This search becomes trickier when there are multiple targets for the
depth-first search. The commits with lower generation number are more
likely to be within the history of the start commit, but we don't want
to waste time searching commits of low generation number if the commit
target with lowest generation number has already been found.
The trick here is to take the input commits and sort them by generation
number in ascending order. Track the index within this order as
min_generation_index. When we find a commit, if its index in the list is
equal to min_generation_index, then we can increase the generation
number boundary of our search to the next-lowest value in the list.
With this mechanism, the number of commits to search is minimized with
respect to the depth-first search heuristic. We will walk all commits up
to the minimum generation number of a commit that is _not_ reachable
from the start, but we will walk only the necessary portion of the
depth-first search for the reachable commits of lower generation.
Add extra tests for this behavior in t6600-test-reach.sh as the
interesting data shape of that repository can sometimes demonstrate
corner case bugs.
Signed-off-by: Derrick Stolee <derrickstolee@github.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2023-03-20 19:26:55 +08:00
|
|
|
tips_reachable_from_bases(the_repository,
|
2023-07-11 05:12:10 +08:00
|
|
|
*check_reachable,
|
commit-reach: add tips_reachable_from_bases()
Both 'git for-each-ref --merged=<X>' and 'git branch --merged=<X>' use
the ref-filter machinery to select references or branches (respectively)
that are reachable from a set of commits presented by one or more
--merged arguments. This happens within reach_filter(), which uses the
revision-walk machinery to walk history in a standard way.
However, the commit-reach.c file is full of custom searches that are
more efficient, especially for reachability queries that can terminate
early when reachability is discovered. Add a new
tips_reachable_from_bases() method to commit-reach.c and call it from
within reach_filter() in ref-filter.c. This affects both 'git branch'
and 'git for-each-ref' as tested in p1500-graph-walks.sh.
For the Linux kernel repository, we take an already-fast algorithm and
make it even faster:
Test HEAD~1 HEAD
-------------------------------------------------------------------
1500.5: contains: git for-each-ref --merged 0.13 0.02 -84.6%
1500.6: contains: git branch --merged 0.14 0.02 -85.7%
1500.7: contains: git tag --merged 0.15 0.03 -80.0%
(Note that we remove the iterative 'git rev-list' test from p1500
because it no longer makes sense as a comparison to 'git for-each-ref'
and would just waste time running it for these comparisons.)
The algorithm is implemented in commit-reach.c in the method
tips_reachable_from_base(). This method takes a string_list of tips and
assigns the 'util' for each item with the value 1 if the base commit can
reach those tips.
Like other reachability queries in commit-reach.c, the fastest way to
search for "can A reach B?" is to do a depth-first search up to the
generation number of B, preferring to explore first parents before later
parents. While we must walk all reachable commits up to that generation
number when the answer is "no", the depth-first search can answer "yes"
much faster than other approaches in most cases.
This search becomes trickier when there are multiple targets for the
depth-first search. The commits with lower generation number are more
likely to be within the history of the start commit, but we don't want
to waste time searching commits of low generation number if the commit
target with lowest generation number has already been found.
The trick here is to take the input commits and sort them by generation
number in ascending order. Track the index within this order as
min_generation_index. When we find a commit, if its index in the list is
equal to min_generation_index, then we can increase the generation
number boundary of our search to the next-lowest value in the list.
With this mechanism, the number of commits to search is minimized with
respect to the depth-first search heuristic. We will walk all commits up
to the minimum generation number of a commit that is _not_ reachable
from the start, but we will walk only the necessary portion of the
depth-first search for the reachable commits of lower generation.
Add extra tests for this behavior in t6600-test-reach.sh as the
interesting data shape of that repository can sometimes demonstrate
corner case bugs.
Signed-off-by: Derrick Stolee <derrickstolee@github.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2023-03-20 19:26:55 +08:00
|
|
|
to_clear, array->nr,
|
|
|
|
UNINTERESTING);
|
2015-07-08 00:06:12 +08:00
|
|
|
|
|
|
|
old_nr = array->nr;
|
|
|
|
array->nr = 0;
|
|
|
|
|
|
|
|
for (i = 0; i < old_nr; i++) {
|
|
|
|
struct ref_array_item *item = array->items[i];
|
|
|
|
struct commit *commit = item->commit;
|
|
|
|
|
|
|
|
int is_merged = !!(commit->object.flags & UNINTERESTING);
|
|
|
|
|
2020-09-19 05:58:41 +08:00
|
|
|
if (is_merged == include_reached)
|
2015-07-08 00:06:12 +08:00
|
|
|
array->items[array->nr++] = array->items[i];
|
|
|
|
else
|
|
|
|
free_array_item(item);
|
|
|
|
}
|
|
|
|
|
2017-12-26 01:44:12 +08:00
|
|
|
clear_commit_marks_many(old_nr, to_clear, ALL_REV_FLAGS);
|
2020-09-16 10:08:40 +08:00
|
|
|
|
2023-07-11 05:12:10 +08:00
|
|
|
while (*check_reachable) {
|
|
|
|
struct commit *merge_commit = pop_commit(check_reachable);
|
2020-09-16 10:08:40 +08:00
|
|
|
clear_commit_marks(merge_commit, ALL_REV_FLAGS);
|
|
|
|
}
|
|
|
|
|
2015-07-08 00:06:12 +08:00
|
|
|
free(to_clear);
|
|
|
|
}
|
|
|
|
|
for-each-ref: add ahead-behind format atom
The previous change implemented the ahead_behind() method, including an
algorithm to compute the ahead/behind values for a number of commit tips
relative to a number of commit bases. Now, integrate that algorithm as
part of 'git for-each-ref' hidden behind a new format atom,
ahead-behind. This naturally extends to 'git branch' and 'git tag'
builtins, as well.
This format allows specifying multiple bases, if so desired, and all
matching references are compared against all of those bases. For this
reason, failing to read a reference provided from these atoms results in
an error.
In order to translate the ahead_behind() method information to the
format output code in ref-filter.c, we must populate arrays of
ahead_behind_count structs. In struct ref_array, we store the full array
that will be passed to ahead_behind(). In struct ref_array_item, we
store an array of pointers that point to the relvant items within the
full array. In this way, we can pull all relevant ahead/behind values
directly when formatting output for a specific item. It also ensures the
lifetime of the ahead_behind_count structs matches the time that the
array is being used.
Add specific tests of the ahead/behind counts in t6600-test-reach.sh, as
it has an interesting repository shape. In particular, its merging
strategy and its use of different commit-graphs would demonstrate over-
counting if the ahead_behind() method did not already account for that
possibility.
Also add tests for the specific for-each-ref, branch, and tag builtins.
In the case of 'git tag', there are intersting cases that happen when
some of the selected tips are not commits. This requires careful logic
around commits_nr in the second loop of filter_ahead_behind(). Also, the
test in t7004 is carefully located to avoid being dependent on the GPG
prereq. It also avoids using the test_commit helper, as that will add
ticks to the time and disrupt the expected timestamps in later tag
tests.
Also add performance tests in a new p1300-graph-walks.sh script. This
will be useful for more uses in the future, but for now compare the
ahead-behind counting algorithm in 'git for-each-ref' to the naive
implementation by running 'git rev-list --count' processes for each
input.
For the Git source code repository, the improvement is already obvious:
Test this tree
---------------------------------------------------------------
1500.2: ahead-behind counts: git for-each-ref 0.07(0.07+0.00)
1500.3: ahead-behind counts: git branch 0.07(0.06+0.00)
1500.4: ahead-behind counts: git tag 0.07(0.06+0.00)
1500.5: ahead-behind counts: git rev-list 1.32(1.04+0.27)
But the standard performance benchmark is the Linux kernel repository,
which demosntrates a significant improvement:
Test this tree
---------------------------------------------------------------
1500.2: ahead-behind counts: git for-each-ref 0.27(0.24+0.02)
1500.3: ahead-behind counts: git branch 0.27(0.24+0.03)
1500.4: ahead-behind counts: git tag 0.28(0.27+0.01)
1500.5: ahead-behind counts: git rev-list 4.57(4.03+0.54)
The 'git rev-list' test exists in this change as a demonstration, but it
will be removed in the next change to avoid wasting time on this
comparison.
Signed-off-by: Derrick Stolee <derrickstolee@github.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2023-03-20 19:26:54 +08:00
|
|
|
void filter_ahead_behind(struct repository *r,
|
|
|
|
struct ref_format *format,
|
|
|
|
struct ref_array *array)
|
|
|
|
{
|
|
|
|
struct commit **commits;
|
|
|
|
size_t commits_nr = format->bases.nr + array->nr;
|
|
|
|
|
|
|
|
if (!format->bases.nr || !array->nr)
|
|
|
|
return;
|
|
|
|
|
|
|
|
ALLOC_ARRAY(commits, commits_nr);
|
|
|
|
for (size_t i = 0; i < format->bases.nr; i++)
|
|
|
|
commits[i] = format->bases.items[i].util;
|
|
|
|
|
|
|
|
ALLOC_ARRAY(array->counts, st_mult(format->bases.nr, array->nr));
|
|
|
|
|
|
|
|
commits_nr = format->bases.nr;
|
|
|
|
array->counts_nr = 0;
|
|
|
|
for (size_t i = 0; i < array->nr; i++) {
|
|
|
|
const char *name = array->items[i]->refname;
|
|
|
|
commits[commits_nr] = lookup_commit_reference_by_name(name);
|
|
|
|
|
|
|
|
if (!commits[commits_nr])
|
|
|
|
continue;
|
|
|
|
|
|
|
|
CALLOC_ARRAY(array->items[i]->counts, format->bases.nr);
|
|
|
|
for (size_t j = 0; j < format->bases.nr; j++) {
|
|
|
|
struct ahead_behind_count *count;
|
|
|
|
count = &array->counts[array->counts_nr++];
|
|
|
|
count->tip_index = commits_nr;
|
|
|
|
count->base_index = j;
|
|
|
|
|
|
|
|
array->items[i]->counts[j] = count;
|
|
|
|
}
|
|
|
|
commits_nr++;
|
|
|
|
}
|
|
|
|
|
|
|
|
ahead_behind(r, commits, commits_nr, array->counts, array->counts_nr);
|
|
|
|
free(commits);
|
|
|
|
}
|
|
|
|
|
2023-11-15 03:53:54 +08:00
|
|
|
static int do_filter_refs(struct ref_filter *filter, unsigned int type, each_ref_fn fn, void *cb_data)
|
2015-06-14 03:37:28 +08:00
|
|
|
{
|
2015-07-08 00:06:12 +08:00
|
|
|
int ret = 0;
|
2015-06-14 03:37:28 +08:00
|
|
|
|
2015-09-10 23:48:23 +08:00
|
|
|
filter->kind = type & FILTER_REFS_KIND_MASK;
|
|
|
|
|
2023-11-15 03:53:51 +08:00
|
|
|
init_contains_cache(&filter->internal.contains_cache);
|
|
|
|
init_contains_cache(&filter->internal.no_contains_cache);
|
2017-03-09 21:29:49 +08:00
|
|
|
|
2015-07-08 00:06:12 +08:00
|
|
|
/* Simple per-ref filtering */
|
2015-09-10 23:48:23 +08:00
|
|
|
if (!filter->kind)
|
2015-06-14 03:37:28 +08:00
|
|
|
die("filter_refs: invalid type");
|
2015-09-10 23:48:23 +08:00
|
|
|
else {
|
|
|
|
/*
|
|
|
|
* For common cases where we need only branches or remotes or tags,
|
|
|
|
* we only iterate through those refs. If a mix of refs is needed,
|
|
|
|
* we iterate over all refs and filter out required refs with the help
|
|
|
|
* of filter_ref_kind().
|
|
|
|
*/
|
|
|
|
if (filter->kind == FILTER_REFS_BRANCHES)
|
2023-11-15 03:53:54 +08:00
|
|
|
ret = for_each_fullref_in("refs/heads/", fn, cb_data);
|
2015-09-10 23:48:23 +08:00
|
|
|
else if (filter->kind == FILTER_REFS_REMOTES)
|
2023-11-15 03:53:54 +08:00
|
|
|
ret = for_each_fullref_in("refs/remotes/", fn, cb_data);
|
2015-09-10 23:48:23 +08:00
|
|
|
else if (filter->kind == FILTER_REFS_TAGS)
|
2023-11-15 03:53:54 +08:00
|
|
|
ret = for_each_fullref_in("refs/tags/", fn, cb_data);
|
2015-09-10 23:48:23 +08:00
|
|
|
else if (filter->kind & FILTER_REFS_ALL)
|
2023-11-15 03:53:54 +08:00
|
|
|
ret = for_each_fullref_in_pattern(filter, fn, cb_data);
|
2015-09-10 23:48:23 +08:00
|
|
|
if (!ret && (filter->kind & FILTER_REFS_DETACHED_HEAD))
|
2023-11-15 03:53:54 +08:00
|
|
|
head_ref(fn, cb_data);
|
2015-09-10 23:48:23 +08:00
|
|
|
}
|
|
|
|
|
2023-11-15 03:53:51 +08:00
|
|
|
clear_contains_cache(&filter->internal.contains_cache);
|
|
|
|
clear_contains_cache(&filter->internal.no_contains_cache);
|
2015-07-08 00:06:12 +08:00
|
|
|
|
2023-11-15 03:53:54 +08:00
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* API for filtering a set of refs. Based on the type of refs the user
|
|
|
|
* has requested, we iterate through those refs and apply filters
|
|
|
|
* as per the given ref_filter structure and finally store the
|
|
|
|
* filtered refs in the ref_array structure.
|
|
|
|
*/
|
|
|
|
int filter_refs(struct ref_array *array, struct ref_filter *filter, unsigned int type)
|
|
|
|
{
|
|
|
|
struct ref_filter_cbdata ref_cbdata;
|
|
|
|
int save_commit_buffer_orig;
|
|
|
|
int ret = 0;
|
|
|
|
|
|
|
|
ref_cbdata.array = array;
|
|
|
|
ref_cbdata.filter = filter;
|
|
|
|
|
|
|
|
save_commit_buffer_orig = save_commit_buffer;
|
|
|
|
save_commit_buffer = 0;
|
|
|
|
|
|
|
|
ret = do_filter_refs(filter, type, filter_one, &ref_cbdata);
|
|
|
|
|
2015-07-08 00:06:12 +08:00
|
|
|
/* Filters that need revision walking */
|
2023-07-11 05:12:10 +08:00
|
|
|
reach_filter(array, &filter->reachable_from, INCLUDE_REACHED);
|
|
|
|
reach_filter(array, &filter->unreachable_from, EXCLUDE_REACHED);
|
2015-07-08 00:06:12 +08:00
|
|
|
|
ref-filter: disable save_commit_buffer while traversing
Various ref-filter options like "--contains" or "--merged" may cause us
to traverse large segments of the history graph. It's counter-productive
to have save_commit_buffer turned on, as that will instruct the commit
code to cache in-memory the object contents for each commit we traverse.
This increases the amount of heap memory used while providing little or
no benefit, since we're not actually planning to display those commits
(which is the usual reason that tools like git-log want to keep them
around). We can easily disable this feature while ref-filter is running.
This lowers peak heap (as measured by massif) for running:
git tag --contains 1da177e4c3
in linux.git from ~100MB to ~20MB. It also seems to improve runtime by
4-5% (600ms vs 630ms).
A few points to note:
- it should be safe to temporarily disable save_commit_buffer like
this. The saved buffers are accessed through get_commit_buffer(),
which treats the saved ones like a cache, and loads on-demand from
the object database on a cache miss. So any code that was using this
would not be wrong, it might just incur an extra object lookup for
some objects. But...
- I don't think any ref-filter related code is using the cache. While
it's true that an option like "--format=%(*contents:subject)" or
"--sort=*authordate" will need to look at the commit contents,
ref-filter doesn't use get_commit_buffer() to do so! It always reads
the objects directly via read_object_file(), though it does avoid
re-reading objects if the format can be satisfied without them.
Timing "git tag --format=%(*authordate)" shows that we're the same
before and after, as expected.
- Note that all of this assumes you don't have a commit-graph file. if
you do, then the heap usage is even lower, and the runtime is 10x
faster. So in that sense this is not urgent, as there's a much
better solution. But since it's such an obvious and easy win for
fallback cases (including commits which aren't yet in the graph
file), there's no reason not to.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2022-07-11 22:48:06 +08:00
|
|
|
save_commit_buffer = save_commit_buffer_orig;
|
2015-07-08 00:06:12 +08:00
|
|
|
return ret;
|
2015-06-14 03:37:28 +08:00
|
|
|
}
|
|
|
|
|
2023-11-15 03:53:55 +08:00
|
|
|
static inline int can_do_iterative_format(struct ref_filter *filter,
|
|
|
|
struct ref_sorting *sorting,
|
|
|
|
struct ref_format *format)
|
|
|
|
{
|
|
|
|
/*
|
|
|
|
* Filtering & formatting results within a single ref iteration
|
|
|
|
* callback is not compatible with options that require
|
|
|
|
* post-processing a filtered ref_array. These include:
|
|
|
|
* - filtering on reachability
|
|
|
|
* - sorting the filtered results
|
|
|
|
* - including ahead-behind information in the formatted output
|
|
|
|
*/
|
|
|
|
return !(filter->reachable_from ||
|
|
|
|
filter->unreachable_from ||
|
|
|
|
sorting ||
|
|
|
|
format->bases.nr);
|
|
|
|
}
|
|
|
|
|
2023-11-15 03:53:52 +08:00
|
|
|
void filter_and_format_refs(struct ref_filter *filter, unsigned int type,
|
|
|
|
struct ref_sorting *sorting,
|
|
|
|
struct ref_format *format)
|
|
|
|
{
|
2023-11-15 03:53:55 +08:00
|
|
|
if (can_do_iterative_format(filter, sorting, format)) {
|
|
|
|
int save_commit_buffer_orig;
|
|
|
|
struct ref_filter_and_format_cbdata ref_cbdata = {
|
|
|
|
.filter = filter,
|
|
|
|
.format = format,
|
|
|
|
};
|
|
|
|
|
|
|
|
save_commit_buffer_orig = save_commit_buffer;
|
|
|
|
save_commit_buffer = 0;
|
|
|
|
|
|
|
|
do_filter_refs(filter, type, filter_and_format_one, &ref_cbdata);
|
|
|
|
|
|
|
|
save_commit_buffer = save_commit_buffer_orig;
|
|
|
|
} else {
|
|
|
|
struct ref_array array = { 0 };
|
|
|
|
filter_refs(&array, filter, type);
|
|
|
|
filter_ahead_behind(the_repository, format, &array);
|
|
|
|
ref_array_sort(sorting, &array);
|
|
|
|
print_formatted_ref_array(&array, format);
|
|
|
|
ref_array_clear(&array);
|
|
|
|
}
|
2023-11-15 03:53:52 +08:00
|
|
|
}
|
|
|
|
|
branch: sort detached HEAD based on a flag
Change the ref-filter sorting of detached HEAD to check the
FILTER_REFS_DETACHED_HEAD flag, instead of relying on the ref
description filled-in by get_head_description() to start with "(",
which in turn we expect to ASCII-sort before any other reference.
For context, we'd like the detached line to appear first at the start
of "git branch -l", e.g.:
$ git branch -l
* (HEAD detached at <hash>)
master
This doesn't change that, but improves on a fix made in
28438e84e04 (ref-filter: sort detached HEAD lines firstly, 2019-06-18)
and gives the Chinese translation the ability to use its preferred
punctuation marks again.
In Chinese the fullwidth versions of punctuation like "()" are
typically written as (U+FF08 fullwidth left parenthesis), (U+FF09
fullwidth right parenthesis) instead[1]. This form is used in both
po/zh_{CN,TW}.po in most cases where "()" is translated in a string.
Aside from that improvement to the Chinese translation, it also just
makes for cleaner code that we mark any special cases in the ref_array
we're sorting with flags and make the sort function aware of them,
instead of piggy-backing on the general-case of strcmp() doing the
right thing.
As seen in the amended tests this made reverse sorting a bit more
consistent. Before this we'd sometimes sort this message in the
middle, now it's consistently at the beginning or end, depending on
whether we're doing a normal or reverse sort. Having it at the end
doesn't make much sense either, but at least it behaves consistently
now. A follow-up commit will make this behavior under reverse sorting
even better.
I'm removing the "TRANSLATORS" comments that were in the old code
while I'm at it. Those were added in d4919bb288e (ref-filter: move
get_head_description() from branch.c, 2017-01-10). I think it's
obvious from context, string and translation memory in typical
translation tools that these are the same or similar string.
1. https://en.wikipedia.org/wiki/Chinese_punctuation#Marks_similar_to_European_punctuation
Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2021-01-07 17:51:52 +08:00
|
|
|
static int compare_detached_head(struct ref_array_item *a, struct ref_array_item *b)
|
|
|
|
{
|
|
|
|
if (!(a->kind ^ b->kind))
|
|
|
|
BUG("ref_kind_from_refname() should only mark one ref as HEAD");
|
|
|
|
if (a->kind & FILTER_REFS_DETACHED_HEAD)
|
|
|
|
return -1;
|
|
|
|
else if (b->kind & FILTER_REFS_DETACHED_HEAD)
|
|
|
|
return 1;
|
|
|
|
BUG("should have died in the xor check above");
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
ref-filter: add %(raw) atom
Add new formatting option `%(raw)`, which will print the raw
object data without any changes. It will help further to migrate
all cat-file formatting logic from cat-file to ref-filter.
The raw data of blob, tree objects may contain '\0', but most of
the logic in `ref-filter` depends on the output of the atom being
text (specifically, no embedded NULs in it).
E.g. `quote_formatting()` use `strbuf_addstr()` or `*._quote_buf()`
add the data to the buffer. The raw data of a tree object is
`100644 one\0...`, only the `100644 one` will be added to the buffer,
which is incorrect.
Therefore, we need to find a way to record the length of the
atom_value's member `s`. Although strbuf can already record the
string and its length, if we want to replace the type of atom_value's
member `s` with strbuf, many places in ref-filter that are filled
with dynamically allocated mermory in `v->s` are not easy to replace.
At the same time, we need to check if `v->s == NULL` in
populate_value(), and strbuf cannot easily distinguish NULL and empty
strings, but c-style "const char *" can do it. So add a new member in
`struct atom_value`: `s_size`, which can record raw object size, it
can help us add raw object data to the buffer or compare two buffers
which contain raw object data.
Note that `--format=%(raw)` cannot be used with `--python`, `--shell`,
`--tcl`, and `--perl` because if the binary raw data is passed to a
variable in such languages, these may not support arbitrary binary data
in their string variable type.
Reviewed-by: Jacob Keller <jacob.keller@gmail.com>
Mentored-by: Christian Couder <christian.couder@gmail.com>
Mentored-by: Hariom Verma <hariom18599@gmail.com>
Helped-by: Bagas Sanjaya <bagasdotme@gmail.com>
Helped-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Helped-by: Felipe Contreras <felipe.contreras@gmail.com>
Helped-by: Phillip Wood <phillip.wood@dunelm.org.uk>
Helped-by: Junio C Hamano <gitster@pobox.com>
Based-on-patch-by: Olga Telezhnaya <olyatelezhnaya@gmail.com>
Signed-off-by: ZheNing Hu <adlternative@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2021-07-26 11:26:47 +08:00
|
|
|
static int memcasecmp(const void *vs1, const void *vs2, size_t n)
|
|
|
|
{
|
|
|
|
const char *s1 = vs1, *s2 = vs2;
|
|
|
|
const char *end = s1 + n;
|
|
|
|
|
|
|
|
for (; s1 < end; s1++, s2++) {
|
|
|
|
int diff = tolower(*s1) - tolower(*s2);
|
|
|
|
if (diff)
|
|
|
|
return diff;
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
for-each-ref: delay parsing of --sort=<atom> options
The for-each-ref family of commands invoke parsers immediately when
it sees each --sort=<atom> option, and die before even seeing the
other options on the command line when the <atom> is unrecognised.
Instead, accumulate them in a string list, and have them parsed into
a ref_sorting structure after the command line parsing is done. As
a consequence, "git branch --sort=bogus -h" used to fail to give the
brief help, which arguably may have been a feature, now does so,
which is more consistent with how other options work.
The patch is smaller than the actual extent of the "damage" to the
codebase, thanks to the fact that the original code consistently
used OPT_REF_SORT() macro to handle command line options. We only
needed to replace the variable used for the list, and implementation
of the callback function used in the macro.
The old rule was for the users of the API to:
- Declare ref_sorting and ref_sorting_tail variables;
- OPT_REF_SORT() macro will instantiate ref_sorting instance (which
may barf and die) and append it to the tail;
- Append to the tail each ref_sorting read from the configuration
by parsing in the config callback (which may barf and die);
- See if ref_sorting is null and use ref_sorting_default() instead.
Now the rule is not all that different but is simpler:
- Declare ref_sorting_options string list.
- OPT_REF_SORT() macro will append it to the string list;
- Append to the string list the sort key read from the
configuration;
- call ref_sorting_options() to turn the string list to ref_sorting
structure (which also deals with the default value).
As side effects, this change also cleans up a few issues:
- 95be717c (parse_opt_ref_sorting: always use with NONEG flag,
2019-03-20) muses that "git for-each-ref --no-sort" should simply
clear the sort keys accumulated so far; it now does.
- The implementation detail of "struct ref_sorting" and the helper
function parse_ref_sorting() can now be private to the ref-filter
API implementation.
- If you set branch.sort to a bogus value, the any "git branch"
invocation, not only the listing mode, would abort with the
original code; now it doesn't
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2021-10-21 03:23:53 +08:00
|
|
|
struct ref_sorting {
|
|
|
|
struct ref_sorting *next;
|
|
|
|
int atom; /* index into used_atom array (internal) */
|
|
|
|
enum ref_sorting_order sort_flags;
|
|
|
|
};
|
|
|
|
|
2015-06-14 03:37:27 +08:00
|
|
|
static int cmp_ref_sorting(struct ref_sorting *s, struct ref_array_item *a, struct ref_array_item *b)
|
|
|
|
{
|
|
|
|
struct atom_value *va, *vb;
|
|
|
|
int cmp;
|
2021-01-07 17:51:53 +08:00
|
|
|
int cmp_detached_head = 0;
|
2016-02-18 02:06:11 +08:00
|
|
|
cmp_type cmp_type = used_atom[s->atom].type;
|
2018-03-29 20:49:45 +08:00
|
|
|
struct strbuf err = STRBUF_INIT;
|
2015-06-14 03:37:27 +08:00
|
|
|
|
2018-03-29 20:49:45 +08:00
|
|
|
if (get_ref_atom_value(a, s->atom, &va, &err))
|
|
|
|
die("%s", err.buf);
|
|
|
|
if (get_ref_atom_value(b, s->atom, &vb, &err))
|
|
|
|
die("%s", err.buf);
|
|
|
|
strbuf_release(&err);
|
branch: sort detached HEAD based on a flag
Change the ref-filter sorting of detached HEAD to check the
FILTER_REFS_DETACHED_HEAD flag, instead of relying on the ref
description filled-in by get_head_description() to start with "(",
which in turn we expect to ASCII-sort before any other reference.
For context, we'd like the detached line to appear first at the start
of "git branch -l", e.g.:
$ git branch -l
* (HEAD detached at <hash>)
master
This doesn't change that, but improves on a fix made in
28438e84e04 (ref-filter: sort detached HEAD lines firstly, 2019-06-18)
and gives the Chinese translation the ability to use its preferred
punctuation marks again.
In Chinese the fullwidth versions of punctuation like "()" are
typically written as (U+FF08 fullwidth left parenthesis), (U+FF09
fullwidth right parenthesis) instead[1]. This form is used in both
po/zh_{CN,TW}.po in most cases where "()" is translated in a string.
Aside from that improvement to the Chinese translation, it also just
makes for cleaner code that we mark any special cases in the ref_array
we're sorting with flags and make the sort function aware of them,
instead of piggy-backing on the general-case of strcmp() doing the
right thing.
As seen in the amended tests this made reverse sorting a bit more
consistent. Before this we'd sometimes sort this message in the
middle, now it's consistently at the beginning or end, depending on
whether we're doing a normal or reverse sort. Having it at the end
doesn't make much sense either, but at least it behaves consistently
now. A follow-up commit will make this behavior under reverse sorting
even better.
I'm removing the "TRANSLATORS" comments that were in the old code
while I'm at it. Those were added in d4919bb288e (ref-filter: move
get_head_description() from branch.c, 2017-01-10). I think it's
obvious from context, string and translation memory in typical
translation tools that these are the same or similar string.
1. https://en.wikipedia.org/wiki/Chinese_punctuation#Marks_similar_to_European_punctuation
Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2021-01-07 17:51:52 +08:00
|
|
|
if (s->sort_flags & REF_SORTING_DETACHED_HEAD_FIRST &&
|
|
|
|
((a->kind | b->kind) & FILTER_REFS_DETACHED_HEAD)) {
|
|
|
|
cmp = compare_detached_head(a, b);
|
2021-01-07 17:51:53 +08:00
|
|
|
cmp_detached_head = 1;
|
branch: sort detached HEAD based on a flag
Change the ref-filter sorting of detached HEAD to check the
FILTER_REFS_DETACHED_HEAD flag, instead of relying on the ref
description filled-in by get_head_description() to start with "(",
which in turn we expect to ASCII-sort before any other reference.
For context, we'd like the detached line to appear first at the start
of "git branch -l", e.g.:
$ git branch -l
* (HEAD detached at <hash>)
master
This doesn't change that, but improves on a fix made in
28438e84e04 (ref-filter: sort detached HEAD lines firstly, 2019-06-18)
and gives the Chinese translation the ability to use its preferred
punctuation marks again.
In Chinese the fullwidth versions of punctuation like "()" are
typically written as (U+FF08 fullwidth left parenthesis), (U+FF09
fullwidth right parenthesis) instead[1]. This form is used in both
po/zh_{CN,TW}.po in most cases where "()" is translated in a string.
Aside from that improvement to the Chinese translation, it also just
makes for cleaner code that we mark any special cases in the ref_array
we're sorting with flags and make the sort function aware of them,
instead of piggy-backing on the general-case of strcmp() doing the
right thing.
As seen in the amended tests this made reverse sorting a bit more
consistent. Before this we'd sometimes sort this message in the
middle, now it's consistently at the beginning or end, depending on
whether we're doing a normal or reverse sort. Having it at the end
doesn't make much sense either, but at least it behaves consistently
now. A follow-up commit will make this behavior under reverse sorting
even better.
I'm removing the "TRANSLATORS" comments that were in the old code
while I'm at it. Those were added in d4919bb288e (ref-filter: move
get_head_description() from branch.c, 2017-01-10). I think it's
obvious from context, string and translation memory in typical
translation tools that these are the same or similar string.
1. https://en.wikipedia.org/wiki/Chinese_punctuation#Marks_similar_to_European_punctuation
Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2021-01-07 17:51:52 +08:00
|
|
|
} else if (s->sort_flags & REF_SORTING_VERSION) {
|
2015-09-10 23:48:25 +08:00
|
|
|
cmp = versioncmp(va->s, vb->s);
|
2021-01-07 17:51:49 +08:00
|
|
|
} else if (cmp_type == FIELD_STR) {
|
ref-filter: add %(raw) atom
Add new formatting option `%(raw)`, which will print the raw
object data without any changes. It will help further to migrate
all cat-file formatting logic from cat-file to ref-filter.
The raw data of blob, tree objects may contain '\0', but most of
the logic in `ref-filter` depends on the output of the atom being
text (specifically, no embedded NULs in it).
E.g. `quote_formatting()` use `strbuf_addstr()` or `*._quote_buf()`
add the data to the buffer. The raw data of a tree object is
`100644 one\0...`, only the `100644 one` will be added to the buffer,
which is incorrect.
Therefore, we need to find a way to record the length of the
atom_value's member `s`. Although strbuf can already record the
string and its length, if we want to replace the type of atom_value's
member `s` with strbuf, many places in ref-filter that are filled
with dynamically allocated mermory in `v->s` are not easy to replace.
At the same time, we need to check if `v->s == NULL` in
populate_value(), and strbuf cannot easily distinguish NULL and empty
strings, but c-style "const char *" can do it. So add a new member in
`struct atom_value`: `s_size`, which can record raw object size, it
can help us add raw object data to the buffer or compare two buffers
which contain raw object data.
Note that `--format=%(raw)` cannot be used with `--python`, `--shell`,
`--tcl`, and `--perl` because if the binary raw data is passed to a
variable in such languages, these may not support arbitrary binary data
in their string variable type.
Reviewed-by: Jacob Keller <jacob.keller@gmail.com>
Mentored-by: Christian Couder <christian.couder@gmail.com>
Mentored-by: Hariom Verma <hariom18599@gmail.com>
Helped-by: Bagas Sanjaya <bagasdotme@gmail.com>
Helped-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Helped-by: Felipe Contreras <felipe.contreras@gmail.com>
Helped-by: Phillip Wood <phillip.wood@dunelm.org.uk>
Helped-by: Junio C Hamano <gitster@pobox.com>
Based-on-patch-by: Olga Telezhnaya <olyatelezhnaya@gmail.com>
Signed-off-by: ZheNing Hu <adlternative@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2021-07-26 11:26:47 +08:00
|
|
|
if (va->s_size < 0 && vb->s_size < 0) {
|
|
|
|
int (*cmp_fn)(const char *, const char *);
|
|
|
|
cmp_fn = s->sort_flags & REF_SORTING_ICASE
|
|
|
|
? strcasecmp : strcmp;
|
|
|
|
cmp = cmp_fn(va->s, vb->s);
|
|
|
|
} else {
|
|
|
|
size_t a_size = va->s_size < 0 ?
|
|
|
|
strlen(va->s) : va->s_size;
|
|
|
|
size_t b_size = vb->s_size < 0 ?
|
|
|
|
strlen(vb->s) : vb->s_size;
|
|
|
|
int (*cmp_fn)(const void *, const void *, size_t);
|
|
|
|
cmp_fn = s->sort_flags & REF_SORTING_ICASE
|
|
|
|
? memcasecmp : memcmp;
|
|
|
|
|
|
|
|
cmp = cmp_fn(va->s, vb->s, b_size > a_size ?
|
|
|
|
a_size : b_size);
|
|
|
|
if (!cmp) {
|
|
|
|
if (a_size > b_size)
|
|
|
|
cmp = 1;
|
|
|
|
else if (a_size < b_size)
|
|
|
|
cmp = -1;
|
|
|
|
}
|
|
|
|
}
|
2021-01-07 17:51:49 +08:00
|
|
|
} else {
|
2017-04-21 04:52:09 +08:00
|
|
|
if (va->value < vb->value)
|
2015-06-14 03:37:27 +08:00
|
|
|
cmp = -1;
|
2017-04-21 04:52:09 +08:00
|
|
|
else if (va->value == vb->value)
|
ref-filter: apply fallback refname sort only after all user sorts
Commit 9e468334b4 (ref-filter: fallback on alphabetical comparison,
2015-10-30) taught ref-filter's sort to fallback to comparing refnames.
But it did it at the wrong level, overriding the comparison result for a
single "--sort" key from the user, rather than after all sort keys have
been exhausted.
This worked correctly for a single "--sort" option, but not for multiple
ones. We'd break any ties in the first key with the refname and never
evaluate the second key at all.
To make matters even more interesting, we only applied this fallback
sometimes! For a field like "taggeremail" which requires a string
comparison, we'd truly return the result of strcmp(), even if it was 0.
But for numerical "value" fields like "taggerdate", we did apply the
fallback. And that's why our multiple-sort test missed this: it uses
taggeremail as the main comparison.
So let's start by adding a much more rigorous test. We'll have a set of
commits expressing every combination of two tagger emails, dates, and
refnames. Then we can confirm that our sort is applied with the correct
precedence, and we'll be hitting both the string and value comparators.
That does show the bug, and the fix is simple: moving the fallback to
the outer compare_refs() function, after all ref_sorting keys have been
exhausted.
Note that in the outer function we don't have an "ignore_case" flag, as
it's part of each individual ref_sorting element. It's debatable what
such a fallback should do, since we didn't use the user's keys to match.
But until now we have been trying to respect that flag, so the
least-invasive thing is to try to continue to do so. Since all callers
in the current code either set the flag for all keys or for none, we can
just pull the flag from the first key. In a hypothetical world where the
user really can flip the case-insensitivity of keys separately, we may
want to extend the code to distinguish that case from a blanket
"--ignore-case".
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2020-05-03 17:13:09 +08:00
|
|
|
cmp = 0;
|
2015-06-14 03:37:27 +08:00
|
|
|
else
|
|
|
|
cmp = 1;
|
|
|
|
}
|
2015-09-10 23:48:25 +08:00
|
|
|
|
2021-01-07 17:51:53 +08:00
|
|
|
return (s->sort_flags & REF_SORTING_REVERSE && !cmp_detached_head)
|
|
|
|
? -cmp : cmp;
|
2015-06-14 03:37:27 +08:00
|
|
|
}
|
|
|
|
|
2017-01-23 01:58:07 +08:00
|
|
|
static int compare_refs(const void *a_, const void *b_, void *ref_sorting)
|
2015-06-14 03:37:27 +08:00
|
|
|
{
|
|
|
|
struct ref_array_item *a = *((struct ref_array_item **)a_);
|
|
|
|
struct ref_array_item *b = *((struct ref_array_item **)b_);
|
|
|
|
struct ref_sorting *s;
|
|
|
|
|
|
|
|
for (s = ref_sorting; s; s = s->next) {
|
|
|
|
int cmp = cmp_ref_sorting(s, a, b);
|
|
|
|
if (cmp)
|
|
|
|
return cmp;
|
|
|
|
}
|
ref-filter: apply fallback refname sort only after all user sorts
Commit 9e468334b4 (ref-filter: fallback on alphabetical comparison,
2015-10-30) taught ref-filter's sort to fallback to comparing refnames.
But it did it at the wrong level, overriding the comparison result for a
single "--sort" key from the user, rather than after all sort keys have
been exhausted.
This worked correctly for a single "--sort" option, but not for multiple
ones. We'd break any ties in the first key with the refname and never
evaluate the second key at all.
To make matters even more interesting, we only applied this fallback
sometimes! For a field like "taggeremail" which requires a string
comparison, we'd truly return the result of strcmp(), even if it was 0.
But for numerical "value" fields like "taggerdate", we did apply the
fallback. And that's why our multiple-sort test missed this: it uses
taggeremail as the main comparison.
So let's start by adding a much more rigorous test. We'll have a set of
commits expressing every combination of two tagger emails, dates, and
refnames. Then we can confirm that our sort is applied with the correct
precedence, and we'll be hitting both the string and value comparators.
That does show the bug, and the fix is simple: moving the fallback to
the outer compare_refs() function, after all ref_sorting keys have been
exhausted.
Note that in the outer function we don't have an "ignore_case" flag, as
it's part of each individual ref_sorting element. It's debatable what
such a fallback should do, since we didn't use the user's keys to match.
But until now we have been trying to respect that flag, so the
least-invasive thing is to try to continue to do so. Since all callers
in the current code either set the flag for all keys or for none, we can
just pull the flag from the first key. In a hypothetical world where the
user really can flip the case-insensitivity of keys separately, we may
want to extend the code to distinguish that case from a blanket
"--ignore-case".
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2020-05-03 17:13:09 +08:00
|
|
|
s = ref_sorting;
|
2021-01-07 17:51:51 +08:00
|
|
|
return s && s->sort_flags & REF_SORTING_ICASE ?
|
ref-filter: apply fallback refname sort only after all user sorts
Commit 9e468334b4 (ref-filter: fallback on alphabetical comparison,
2015-10-30) taught ref-filter's sort to fallback to comparing refnames.
But it did it at the wrong level, overriding the comparison result for a
single "--sort" key from the user, rather than after all sort keys have
been exhausted.
This worked correctly for a single "--sort" option, but not for multiple
ones. We'd break any ties in the first key with the refname and never
evaluate the second key at all.
To make matters even more interesting, we only applied this fallback
sometimes! For a field like "taggeremail" which requires a string
comparison, we'd truly return the result of strcmp(), even if it was 0.
But for numerical "value" fields like "taggerdate", we did apply the
fallback. And that's why our multiple-sort test missed this: it uses
taggeremail as the main comparison.
So let's start by adding a much more rigorous test. We'll have a set of
commits expressing every combination of two tagger emails, dates, and
refnames. Then we can confirm that our sort is applied with the correct
precedence, and we'll be hitting both the string and value comparators.
That does show the bug, and the fix is simple: moving the fallback to
the outer compare_refs() function, after all ref_sorting keys have been
exhausted.
Note that in the outer function we don't have an "ignore_case" flag, as
it's part of each individual ref_sorting element. It's debatable what
such a fallback should do, since we didn't use the user's keys to match.
But until now we have been trying to respect that flag, so the
least-invasive thing is to try to continue to do so. Since all callers
in the current code either set the flag for all keys or for none, we can
just pull the flag from the first key. In a hypothetical world where the
user really can flip the case-insensitivity of keys separately, we may
want to extend the code to distinguish that case from a blanket
"--ignore-case".
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2020-05-03 17:13:09 +08:00
|
|
|
strcasecmp(a->refname, b->refname) :
|
|
|
|
strcmp(a->refname, b->refname);
|
2015-06-14 03:37:27 +08:00
|
|
|
}
|
|
|
|
|
2021-01-07 17:51:51 +08:00
|
|
|
void ref_sorting_set_sort_flags_all(struct ref_sorting *sorting,
|
|
|
|
unsigned int mask, int on)
|
ref-filter: apply --ignore-case to all sorting keys
All of the ref-filter users (for-each-ref, branch, and tag) take an
--ignore-case option which makes filtering and sorting case-insensitive.
However, this option was applied only to the first element of the
ref_sorting list. So:
git for-each-ref --ignore-case --sort=refname
would do what you expect, but:
git for-each-ref --ignore-case --sort=refname --sort=taggername
would sort the primary key (taggername) case-insensitively, but sort the
refname case-sensitively. We have two options here:
- teach callers to set ignore_case on the whole list
- replace the ref_sorting list with a struct that contains both the
list of sorting keys, as well as options that apply to _all_
keys
I went with the first one here, as it gives more flexibility if we later
want to let the users set the flag per-key (presumably through some
special syntax when defining the key; for now it's all or nothing
through --ignore-case).
The new test covers this by sorting on both tagger and subject
case-insensitively, which should compare "a" and "A" identically, but
still sort them before "b" and "B". We'll break ties by sorting on the
refname to give ourselves a stable output (this is actually supposed to
be done automatically, but there's another bug which will be fixed in
the next commit).
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2020-05-03 17:11:57 +08:00
|
|
|
{
|
2021-01-07 17:51:51 +08:00
|
|
|
for (; sorting; sorting = sorting->next) {
|
|
|
|
if (on)
|
|
|
|
sorting->sort_flags |= mask;
|
|
|
|
else
|
|
|
|
sorting->sort_flags &= ~mask;
|
|
|
|
}
|
2015-06-14 03:37:27 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
void ref_array_sort(struct ref_sorting *sorting, struct ref_array *array)
|
|
|
|
{
|
ref-filter.c: really don't sort when using --no-sort
When '--no-sort' is passed to 'for-each-ref', 'tag', and 'branch', the
printed refs are still sorted by ascending refname. Change the handling of
sort options in these commands so that '--no-sort' to truly disables
sorting.
'--no-sort' does not disable sorting in these commands is because their
option parsing does not distinguish between "the absence of '--sort'"
(and/or values for tag.sort & branch.sort) and '--no-sort'. Both result in
an empty 'sorting_options' string list, which is parsed by
'ref_sorting_options()' to create the 'struct ref_sorting *' for the
command. If the string list is empty, 'ref_sorting_options()' interprets
that as "the absence of '--sort'" and returns the default ref sorting
structure (equivalent to "refname" sort).
To handle '--no-sort' properly while preserving the "refname" sort in the
"absence of --sort'" case, first explicitly add "refname" to the string list
*before* parsing options. This alone doesn't actually change any behavior,
since 'compare_refs()' already falls back on comparing refnames if two refs
are equal w.r.t all other sort keys.
Now that the string list is populated by default, '--no-sort' is the only
way to empty the 'sorting_options' string list. Update
'ref_sorting_options()' to return a NULL 'struct ref_sorting *' if the
string list is empty, and add a condition to 'ref_array_sort()' to skip the
sort altogether if the sort structure is NULL. Note that other functions
using 'struct ref_sorting *' do not need any changes because they already
ignore NULL values.
Finally, remove the condition around sorting in 'ls-remote', since it's no
longer necessary. Unlike 'for-each-ref' et. al., it does *not* do any
sorting by default. This default is preserved by simply leaving its sort key
string list empty before parsing options; if no additional sort keys are
set, 'struct ref_sorting *' is NULL and sorting is skipped.
Signed-off-by: Victoria Dye <vdye@github.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2023-11-15 03:53:49 +08:00
|
|
|
if (sorting)
|
|
|
|
QSORT_S(array->items, array->nr, compare_refs, sorting);
|
2015-06-14 03:37:27 +08:00
|
|
|
}
|
|
|
|
|
2015-09-10 23:48:18 +08:00
|
|
|
static void append_literal(const char *cp, const char *ep, struct ref_formatting_state *state)
|
2015-06-14 03:37:27 +08:00
|
|
|
{
|
2015-09-10 23:48:18 +08:00
|
|
|
struct strbuf *s = &state->stack->output;
|
|
|
|
|
2015-06-14 03:37:27 +08:00
|
|
|
while (*cp && (!ep || cp < ep)) {
|
|
|
|
if (*cp == '%') {
|
|
|
|
if (cp[1] == '%')
|
|
|
|
cp++;
|
|
|
|
else {
|
2016-09-03 23:59:20 +08:00
|
|
|
int ch = hex2chr(cp + 1);
|
2015-06-14 03:37:27 +08:00
|
|
|
if (0 <= ch) {
|
2015-09-10 23:48:18 +08:00
|
|
|
strbuf_addch(s, ch);
|
2015-06-14 03:37:27 +08:00
|
|
|
cp += 3;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2015-09-10 23:48:18 +08:00
|
|
|
strbuf_addch(s, *cp);
|
2015-06-14 03:37:27 +08:00
|
|
|
cp++;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-03-29 20:49:45 +08:00
|
|
|
int format_ref_array_item(struct ref_array_item *info,
|
2021-07-26 11:26:49 +08:00
|
|
|
struct ref_format *format,
|
|
|
|
struct strbuf *final_buf,
|
|
|
|
struct strbuf *error_buf)
|
2015-06-14 03:37:27 +08:00
|
|
|
{
|
|
|
|
const char *cp, *sp, *ep;
|
2015-09-10 23:48:18 +08:00
|
|
|
struct ref_formatting_state state = REF_FORMATTING_STATE_INIT;
|
|
|
|
|
2017-07-13 23:01:18 +08:00
|
|
|
state.quote_style = format->quote_style;
|
2015-09-10 23:48:18 +08:00
|
|
|
push_stack_element(&state.stack);
|
2015-06-14 03:37:27 +08:00
|
|
|
|
2017-07-13 23:01:18 +08:00
|
|
|
for (cp = format->format; *cp && (sp = find_next(cp)); cp = ep + 1) {
|
2015-06-14 03:37:27 +08:00
|
|
|
struct atom_value *atomv;
|
2018-03-29 20:49:45 +08:00
|
|
|
int pos;
|
2015-06-14 03:37:27 +08:00
|
|
|
|
|
|
|
ep = strchr(sp, ')');
|
|
|
|
if (cp < sp)
|
2015-09-10 23:48:18 +08:00
|
|
|
append_literal(cp, sp, &state);
|
2018-03-29 20:49:45 +08:00
|
|
|
pos = parse_ref_filter_atom(format, sp + 2, ep, error_buf);
|
2018-03-29 20:49:45 +08:00
|
|
|
if (pos < 0 || get_ref_atom_value(info, pos, &atomv, error_buf) ||
|
|
|
|
atomv->handler(atomv, &state, error_buf)) {
|
2018-03-29 20:49:45 +08:00
|
|
|
pop_stack_element(&state.stack);
|
|
|
|
return -1;
|
|
|
|
}
|
2015-06-14 03:37:27 +08:00
|
|
|
}
|
|
|
|
if (*cp) {
|
|
|
|
sp = cp + strlen(cp);
|
2015-09-10 23:48:18 +08:00
|
|
|
append_literal(cp, sp, &state);
|
2015-06-14 03:37:27 +08:00
|
|
|
}
|
2017-07-13 23:02:30 +08:00
|
|
|
if (format->need_color_reset_at_eol) {
|
ref-filter: add %(raw) atom
Add new formatting option `%(raw)`, which will print the raw
object data without any changes. It will help further to migrate
all cat-file formatting logic from cat-file to ref-filter.
The raw data of blob, tree objects may contain '\0', but most of
the logic in `ref-filter` depends on the output of the atom being
text (specifically, no embedded NULs in it).
E.g. `quote_formatting()` use `strbuf_addstr()` or `*._quote_buf()`
add the data to the buffer. The raw data of a tree object is
`100644 one\0...`, only the `100644 one` will be added to the buffer,
which is incorrect.
Therefore, we need to find a way to record the length of the
atom_value's member `s`. Although strbuf can already record the
string and its length, if we want to replace the type of atom_value's
member `s` with strbuf, many places in ref-filter that are filled
with dynamically allocated mermory in `v->s` are not easy to replace.
At the same time, we need to check if `v->s == NULL` in
populate_value(), and strbuf cannot easily distinguish NULL and empty
strings, but c-style "const char *" can do it. So add a new member in
`struct atom_value`: `s_size`, which can record raw object size, it
can help us add raw object data to the buffer or compare two buffers
which contain raw object data.
Note that `--format=%(raw)` cannot be used with `--python`, `--shell`,
`--tcl`, and `--perl` because if the binary raw data is passed to a
variable in such languages, these may not support arbitrary binary data
in their string variable type.
Reviewed-by: Jacob Keller <jacob.keller@gmail.com>
Mentored-by: Christian Couder <christian.couder@gmail.com>
Mentored-by: Hariom Verma <hariom18599@gmail.com>
Helped-by: Bagas Sanjaya <bagasdotme@gmail.com>
Helped-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Helped-by: Felipe Contreras <felipe.contreras@gmail.com>
Helped-by: Phillip Wood <phillip.wood@dunelm.org.uk>
Helped-by: Junio C Hamano <gitster@pobox.com>
Based-on-patch-by: Olga Telezhnaya <olyatelezhnaya@gmail.com>
Signed-off-by: ZheNing Hu <adlternative@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2021-07-26 11:26:47 +08:00
|
|
|
struct atom_value resetv = ATOM_VALUE_INIT;
|
2017-07-13 22:58:56 +08:00
|
|
|
resetv.s = GIT_COLOR_RESET;
|
2018-03-29 20:49:45 +08:00
|
|
|
if (append_atom(&resetv, &state, error_buf)) {
|
|
|
|
pop_stack_element(&state.stack);
|
|
|
|
return -1;
|
|
|
|
}
|
2015-06-14 03:37:27 +08:00
|
|
|
}
|
2018-03-29 20:49:45 +08:00
|
|
|
if (state.stack->prev) {
|
|
|
|
pop_stack_element(&state.stack);
|
|
|
|
return strbuf_addf_ret(error_buf, -1, _("format: %%(end) atom missing"));
|
2015-06-14 03:37:27 +08:00
|
|
|
}
|
2017-01-10 16:49:39 +08:00
|
|
|
strbuf_addbuf(final_buf, &state.stack->output);
|
2015-09-10 23:48:18 +08:00
|
|
|
pop_stack_element(&state.stack);
|
2018-03-29 20:49:45 +08:00
|
|
|
return 0;
|
2017-01-10 16:49:39 +08:00
|
|
|
}
|
|
|
|
|
2023-11-15 03:53:52 +08:00
|
|
|
void print_formatted_ref_array(struct ref_array *array, struct ref_format *format)
|
|
|
|
{
|
|
|
|
int total;
|
|
|
|
struct strbuf output = STRBUF_INIT, err = STRBUF_INIT;
|
|
|
|
|
|
|
|
total = format->array_opts.max_count;
|
|
|
|
if (!total || array->nr < total)
|
|
|
|
total = array->nr;
|
|
|
|
for (int i = 0; i < total; i++) {
|
|
|
|
strbuf_reset(&err);
|
|
|
|
strbuf_reset(&output);
|
|
|
|
if (format_ref_array_item(array->items[i], format, &output, &err))
|
|
|
|
die("%s", err.buf);
|
|
|
|
if (output.len || !format->array_opts.omit_empty) {
|
|
|
|
fwrite(output.buf, 1, output.len, stdout);
|
|
|
|
putchar('\n');
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
strbuf_release(&err);
|
|
|
|
strbuf_release(&output);
|
|
|
|
}
|
|
|
|
|
2018-04-07 02:58:32 +08:00
|
|
|
void pretty_print_ref(const char *name, const struct object_id *oid,
|
2021-07-26 11:26:49 +08:00
|
|
|
struct ref_format *format)
|
2017-01-18 07:37:19 +08:00
|
|
|
{
|
|
|
|
struct ref_array_item *ref_item;
|
2021-04-19 19:28:44 +08:00
|
|
|
struct strbuf output = STRBUF_INIT;
|
|
|
|
struct strbuf err = STRBUF_INIT;
|
|
|
|
|
2018-04-07 02:59:26 +08:00
|
|
|
ref_item = new_ref_array_item(name, oid);
|
2017-01-18 07:37:19 +08:00
|
|
|
ref_item->kind = ref_kind_from_refname(name);
|
2021-04-19 19:28:44 +08:00
|
|
|
if (format_ref_array_item(ref_item, format, &output, &err))
|
|
|
|
die("%s", err.buf);
|
|
|
|
fwrite(output.buf, 1, output.len, stdout);
|
|
|
|
putchar('\n');
|
|
|
|
|
|
|
|
strbuf_release(&err);
|
|
|
|
strbuf_release(&output);
|
2017-01-18 07:37:19 +08:00
|
|
|
free_array_item(ref_item);
|
|
|
|
}
|
|
|
|
|
2017-07-13 23:02:58 +08:00
|
|
|
static int parse_sorting_atom(const char *atom)
|
|
|
|
{
|
2017-07-13 23:06:40 +08:00
|
|
|
/*
|
|
|
|
* This parses an atom using a dummy ref_format, since we don't
|
|
|
|
* actually care about the formatting details.
|
|
|
|
*/
|
|
|
|
struct ref_format dummy = REF_FORMAT_INIT;
|
2017-07-13 23:02:58 +08:00
|
|
|
const char *end = atom + strlen(atom);
|
2018-03-29 20:49:45 +08:00
|
|
|
struct strbuf err = STRBUF_INIT;
|
|
|
|
int res = parse_ref_filter_atom(&dummy, atom, end, &err);
|
|
|
|
if (res < 0)
|
|
|
|
die("%s", err.buf);
|
|
|
|
strbuf_release(&err);
|
|
|
|
return res;
|
2017-07-13 23:02:58 +08:00
|
|
|
}
|
|
|
|
|
for-each-ref: delay parsing of --sort=<atom> options
The for-each-ref family of commands invoke parsers immediately when
it sees each --sort=<atom> option, and die before even seeing the
other options on the command line when the <atom> is unrecognised.
Instead, accumulate them in a string list, and have them parsed into
a ref_sorting structure after the command line parsing is done. As
a consequence, "git branch --sort=bogus -h" used to fail to give the
brief help, which arguably may have been a feature, now does so,
which is more consistent with how other options work.
The patch is smaller than the actual extent of the "damage" to the
codebase, thanks to the fact that the original code consistently
used OPT_REF_SORT() macro to handle command line options. We only
needed to replace the variable used for the list, and implementation
of the callback function used in the macro.
The old rule was for the users of the API to:
- Declare ref_sorting and ref_sorting_tail variables;
- OPT_REF_SORT() macro will instantiate ref_sorting instance (which
may barf and die) and append it to the tail;
- Append to the tail each ref_sorting read from the configuration
by parsing in the config callback (which may barf and die);
- See if ref_sorting is null and use ref_sorting_default() instead.
Now the rule is not all that different but is simpler:
- Declare ref_sorting_options string list.
- OPT_REF_SORT() macro will append it to the string list;
- Append to the string list the sort key read from the
configuration;
- call ref_sorting_options() to turn the string list to ref_sorting
structure (which also deals with the default value).
As side effects, this change also cleans up a few issues:
- 95be717c (parse_opt_ref_sorting: always use with NONEG flag,
2019-03-20) muses that "git for-each-ref --no-sort" should simply
clear the sort keys accumulated so far; it now does.
- The implementation detail of "struct ref_sorting" and the helper
function parse_ref_sorting() can now be private to the ref-filter
API implementation.
- If you set branch.sort to a bogus value, the any "git branch"
invocation, not only the listing mode, would abort with the
original code; now it doesn't
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2021-10-21 03:23:53 +08:00
|
|
|
static void parse_ref_sorting(struct ref_sorting **sorting_tail, const char *arg)
|
2015-06-14 03:37:27 +08:00
|
|
|
{
|
|
|
|
struct ref_sorting *s;
|
|
|
|
|
2021-03-14 00:17:22 +08:00
|
|
|
CALLOC_ARRAY(s, 1);
|
2015-06-14 03:37:27 +08:00
|
|
|
s->next = *sorting_tail;
|
|
|
|
*sorting_tail = s;
|
|
|
|
|
|
|
|
if (*arg == '-') {
|
2021-01-07 17:51:51 +08:00
|
|
|
s->sort_flags |= REF_SORTING_REVERSE;
|
2015-06-14 03:37:27 +08:00
|
|
|
arg++;
|
|
|
|
}
|
2015-09-10 23:48:25 +08:00
|
|
|
if (skip_prefix(arg, "version:", &arg) ||
|
|
|
|
skip_prefix(arg, "v:", &arg))
|
2021-01-07 17:51:51 +08:00
|
|
|
s->sort_flags |= REF_SORTING_VERSION;
|
2017-07-13 23:02:58 +08:00
|
|
|
s->atom = parse_sorting_atom(arg);
|
2017-07-13 23:02:44 +08:00
|
|
|
}
|
|
|
|
|
for-each-ref: delay parsing of --sort=<atom> options
The for-each-ref family of commands invoke parsers immediately when
it sees each --sort=<atom> option, and die before even seeing the
other options on the command line when the <atom> is unrecognised.
Instead, accumulate them in a string list, and have them parsed into
a ref_sorting structure after the command line parsing is done. As
a consequence, "git branch --sort=bogus -h" used to fail to give the
brief help, which arguably may have been a feature, now does so,
which is more consistent with how other options work.
The patch is smaller than the actual extent of the "damage" to the
codebase, thanks to the fact that the original code consistently
used OPT_REF_SORT() macro to handle command line options. We only
needed to replace the variable used for the list, and implementation
of the callback function used in the macro.
The old rule was for the users of the API to:
- Declare ref_sorting and ref_sorting_tail variables;
- OPT_REF_SORT() macro will instantiate ref_sorting instance (which
may barf and die) and append it to the tail;
- Append to the tail each ref_sorting read from the configuration
by parsing in the config callback (which may barf and die);
- See if ref_sorting is null and use ref_sorting_default() instead.
Now the rule is not all that different but is simpler:
- Declare ref_sorting_options string list.
- OPT_REF_SORT() macro will append it to the string list;
- Append to the string list the sort key read from the
configuration;
- call ref_sorting_options() to turn the string list to ref_sorting
structure (which also deals with the default value).
As side effects, this change also cleans up a few issues:
- 95be717c (parse_opt_ref_sorting: always use with NONEG flag,
2019-03-20) muses that "git for-each-ref --no-sort" should simply
clear the sort keys accumulated so far; it now does.
- The implementation detail of "struct ref_sorting" and the helper
function parse_ref_sorting() can now be private to the ref-filter
API implementation.
- If you set branch.sort to a bogus value, the any "git branch"
invocation, not only the listing mode, would abort with the
original code; now it doesn't
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2021-10-21 03:23:53 +08:00
|
|
|
struct ref_sorting *ref_sorting_options(struct string_list *options)
|
2017-07-13 23:02:44 +08:00
|
|
|
{
|
for-each-ref: delay parsing of --sort=<atom> options
The for-each-ref family of commands invoke parsers immediately when
it sees each --sort=<atom> option, and die before even seeing the
other options on the command line when the <atom> is unrecognised.
Instead, accumulate them in a string list, and have them parsed into
a ref_sorting structure after the command line parsing is done. As
a consequence, "git branch --sort=bogus -h" used to fail to give the
brief help, which arguably may have been a feature, now does so,
which is more consistent with how other options work.
The patch is smaller than the actual extent of the "damage" to the
codebase, thanks to the fact that the original code consistently
used OPT_REF_SORT() macro to handle command line options. We only
needed to replace the variable used for the list, and implementation
of the callback function used in the macro.
The old rule was for the users of the API to:
- Declare ref_sorting and ref_sorting_tail variables;
- OPT_REF_SORT() macro will instantiate ref_sorting instance (which
may barf and die) and append it to the tail;
- Append to the tail each ref_sorting read from the configuration
by parsing in the config callback (which may barf and die);
- See if ref_sorting is null and use ref_sorting_default() instead.
Now the rule is not all that different but is simpler:
- Declare ref_sorting_options string list.
- OPT_REF_SORT() macro will append it to the string list;
- Append to the string list the sort key read from the
configuration;
- call ref_sorting_options() to turn the string list to ref_sorting
structure (which also deals with the default value).
As side effects, this change also cleans up a few issues:
- 95be717c (parse_opt_ref_sorting: always use with NONEG flag,
2019-03-20) muses that "git for-each-ref --no-sort" should simply
clear the sort keys accumulated so far; it now does.
- The implementation detail of "struct ref_sorting" and the helper
function parse_ref_sorting() can now be private to the ref-filter
API implementation.
- If you set branch.sort to a bogus value, the any "git branch"
invocation, not only the listing mode, would abort with the
original code; now it doesn't
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2021-10-21 03:23:53 +08:00
|
|
|
struct string_list_item *item;
|
|
|
|
struct ref_sorting *sorting = NULL, **tail = &sorting;
|
|
|
|
|
ref-filter.c: really don't sort when using --no-sort
When '--no-sort' is passed to 'for-each-ref', 'tag', and 'branch', the
printed refs are still sorted by ascending refname. Change the handling of
sort options in these commands so that '--no-sort' to truly disables
sorting.
'--no-sort' does not disable sorting in these commands is because their
option parsing does not distinguish between "the absence of '--sort'"
(and/or values for tag.sort & branch.sort) and '--no-sort'. Both result in
an empty 'sorting_options' string list, which is parsed by
'ref_sorting_options()' to create the 'struct ref_sorting *' for the
command. If the string list is empty, 'ref_sorting_options()' interprets
that as "the absence of '--sort'" and returns the default ref sorting
structure (equivalent to "refname" sort).
To handle '--no-sort' properly while preserving the "refname" sort in the
"absence of --sort'" case, first explicitly add "refname" to the string list
*before* parsing options. This alone doesn't actually change any behavior,
since 'compare_refs()' already falls back on comparing refnames if two refs
are equal w.r.t all other sort keys.
Now that the string list is populated by default, '--no-sort' is the only
way to empty the 'sorting_options' string list. Update
'ref_sorting_options()' to return a NULL 'struct ref_sorting *' if the
string list is empty, and add a condition to 'ref_array_sort()' to skip the
sort altogether if the sort structure is NULL. Note that other functions
using 'struct ref_sorting *' do not need any changes because they already
ignore NULL values.
Finally, remove the condition around sorting in 'ls-remote', since it's no
longer necessary. Unlike 'for-each-ref' et. al., it does *not* do any
sorting by default. This default is preserved by simply leaving its sort key
string list empty before parsing options; if no additional sort keys are
set, 'struct ref_sorting *' is NULL and sorting is skipped.
Signed-off-by: Victoria Dye <vdye@github.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2023-11-15 03:53:49 +08:00
|
|
|
if (options->nr) {
|
for-each-ref: delay parsing of --sort=<atom> options
The for-each-ref family of commands invoke parsers immediately when
it sees each --sort=<atom> option, and die before even seeing the
other options on the command line when the <atom> is unrecognised.
Instead, accumulate them in a string list, and have them parsed into
a ref_sorting structure after the command line parsing is done. As
a consequence, "git branch --sort=bogus -h" used to fail to give the
brief help, which arguably may have been a feature, now does so,
which is more consistent with how other options work.
The patch is smaller than the actual extent of the "damage" to the
codebase, thanks to the fact that the original code consistently
used OPT_REF_SORT() macro to handle command line options. We only
needed to replace the variable used for the list, and implementation
of the callback function used in the macro.
The old rule was for the users of the API to:
- Declare ref_sorting and ref_sorting_tail variables;
- OPT_REF_SORT() macro will instantiate ref_sorting instance (which
may barf and die) and append it to the tail;
- Append to the tail each ref_sorting read from the configuration
by parsing in the config callback (which may barf and die);
- See if ref_sorting is null and use ref_sorting_default() instead.
Now the rule is not all that different but is simpler:
- Declare ref_sorting_options string list.
- OPT_REF_SORT() macro will append it to the string list;
- Append to the string list the sort key read from the
configuration;
- call ref_sorting_options() to turn the string list to ref_sorting
structure (which also deals with the default value).
As side effects, this change also cleans up a few issues:
- 95be717c (parse_opt_ref_sorting: always use with NONEG flag,
2019-03-20) muses that "git for-each-ref --no-sort" should simply
clear the sort keys accumulated so far; it now does.
- The implementation detail of "struct ref_sorting" and the helper
function parse_ref_sorting() can now be private to the ref-filter
API implementation.
- If you set branch.sort to a bogus value, the any "git branch"
invocation, not only the listing mode, would abort with the
original code; now it doesn't
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2021-10-21 03:23:53 +08:00
|
|
|
for_each_string_list_item(item, options)
|
|
|
|
parse_ref_sorting(tail, item->string);
|
|
|
|
}
|
|
|
|
|
parse_opt_ref_sorting: always use with NONEG flag
The "--sort" parameter of for-each-ref, etc, does not handle negation,
and instead returns an error to the parse-options code. But neither
piece of code prints anything for the user, which may leave them
confused:
$ git for-each-ref --no-sort
$ echo $?
129
As the comment in the callback function notes, this probably should
clear the list, which would make it consistent with other list-like
options (i.e., anything that uses OPT_STRING_LIST currently).
Unfortunately that's a bit tricky due to the way the ref-filter code
works. But in the meantime, let's at least make the error a little less
confusing:
- switch to using PARSE_OPT_NONEG in the option definition, which will
cause the options code to produce a useful message
- since this was cut-and-pasted to four different spots, let's define
a single OPT_REF_SORT() macro that we can use everywhere
- the callback can use BUG_ON_OPT_NEG() to make sure the correct flags
are used (incidentally, this also satisfies -Wunused-parameters,
since we're now looking at "unset")
- expand the comment into a NEEDSWORK to make it clear that the
direction is right, but the details need to be worked out
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2019-03-21 04:22:15 +08:00
|
|
|
/*
|
for-each-ref: delay parsing of --sort=<atom> options
The for-each-ref family of commands invoke parsers immediately when
it sees each --sort=<atom> option, and die before even seeing the
other options on the command line when the <atom> is unrecognised.
Instead, accumulate them in a string list, and have them parsed into
a ref_sorting structure after the command line parsing is done. As
a consequence, "git branch --sort=bogus -h" used to fail to give the
brief help, which arguably may have been a feature, now does so,
which is more consistent with how other options work.
The patch is smaller than the actual extent of the "damage" to the
codebase, thanks to the fact that the original code consistently
used OPT_REF_SORT() macro to handle command line options. We only
needed to replace the variable used for the list, and implementation
of the callback function used in the macro.
The old rule was for the users of the API to:
- Declare ref_sorting and ref_sorting_tail variables;
- OPT_REF_SORT() macro will instantiate ref_sorting instance (which
may barf and die) and append it to the tail;
- Append to the tail each ref_sorting read from the configuration
by parsing in the config callback (which may barf and die);
- See if ref_sorting is null and use ref_sorting_default() instead.
Now the rule is not all that different but is simpler:
- Declare ref_sorting_options string list.
- OPT_REF_SORT() macro will append it to the string list;
- Append to the string list the sort key read from the
configuration;
- call ref_sorting_options() to turn the string list to ref_sorting
structure (which also deals with the default value).
As side effects, this change also cleans up a few issues:
- 95be717c (parse_opt_ref_sorting: always use with NONEG flag,
2019-03-20) muses that "git for-each-ref --no-sort" should simply
clear the sort keys accumulated so far; it now does.
- The implementation detail of "struct ref_sorting" and the helper
function parse_ref_sorting() can now be private to the ref-filter
API implementation.
- If you set branch.sort to a bogus value, the any "git branch"
invocation, not only the listing mode, would abort with the
original code; now it doesn't
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2021-10-21 03:23:53 +08:00
|
|
|
* From here on, the ref_sorting list should be used to talk
|
|
|
|
* about the sort order used for the output. The caller
|
|
|
|
* should not touch the string form anymore.
|
parse_opt_ref_sorting: always use with NONEG flag
The "--sort" parameter of for-each-ref, etc, does not handle negation,
and instead returns an error to the parse-options code. But neither
piece of code prints anything for the user, which may leave them
confused:
$ git for-each-ref --no-sort
$ echo $?
129
As the comment in the callback function notes, this probably should
clear the list, which would make it consistent with other list-like
options (i.e., anything that uses OPT_STRING_LIST currently).
Unfortunately that's a bit tricky due to the way the ref-filter code
works. But in the meantime, let's at least make the error a little less
confusing:
- switch to using PARSE_OPT_NONEG in the option definition, which will
cause the options code to produce a useful message
- since this was cut-and-pasted to four different spots, let's define
a single OPT_REF_SORT() macro that we can use everywhere
- the callback can use BUG_ON_OPT_NEG() to make sure the correct flags
are used (incidentally, this also satisfies -Wunused-parameters,
since we're now looking at "unset")
- expand the comment into a NEEDSWORK to make it clear that the
direction is right, but the details need to be worked out
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2019-03-21 04:22:15 +08:00
|
|
|
*/
|
for-each-ref: delay parsing of --sort=<atom> options
The for-each-ref family of commands invoke parsers immediately when
it sees each --sort=<atom> option, and die before even seeing the
other options on the command line when the <atom> is unrecognised.
Instead, accumulate them in a string list, and have them parsed into
a ref_sorting structure after the command line parsing is done. As
a consequence, "git branch --sort=bogus -h" used to fail to give the
brief help, which arguably may have been a feature, now does so,
which is more consistent with how other options work.
The patch is smaller than the actual extent of the "damage" to the
codebase, thanks to the fact that the original code consistently
used OPT_REF_SORT() macro to handle command line options. We only
needed to replace the variable used for the list, and implementation
of the callback function used in the macro.
The old rule was for the users of the API to:
- Declare ref_sorting and ref_sorting_tail variables;
- OPT_REF_SORT() macro will instantiate ref_sorting instance (which
may barf and die) and append it to the tail;
- Append to the tail each ref_sorting read from the configuration
by parsing in the config callback (which may barf and die);
- See if ref_sorting is null and use ref_sorting_default() instead.
Now the rule is not all that different but is simpler:
- Declare ref_sorting_options string list.
- OPT_REF_SORT() macro will append it to the string list;
- Append to the string list the sort key read from the
configuration;
- call ref_sorting_options() to turn the string list to ref_sorting
structure (which also deals with the default value).
As side effects, this change also cleans up a few issues:
- 95be717c (parse_opt_ref_sorting: always use with NONEG flag,
2019-03-20) muses that "git for-each-ref --no-sort" should simply
clear the sort keys accumulated so far; it now does.
- The implementation detail of "struct ref_sorting" and the helper
function parse_ref_sorting() can now be private to the ref-filter
API implementation.
- If you set branch.sort to a bogus value, the any "git branch"
invocation, not only the listing mode, would abort with the
original code; now it doesn't
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2021-10-21 03:23:53 +08:00
|
|
|
string_list_clear(options, 0);
|
|
|
|
return sorting;
|
2015-06-14 03:37:27 +08:00
|
|
|
}
|
2015-07-08 00:06:11 +08:00
|
|
|
|
2021-10-21 02:27:20 +08:00
|
|
|
void ref_sorting_release(struct ref_sorting *sorting)
|
|
|
|
{
|
|
|
|
while (sorting) {
|
|
|
|
struct ref_sorting *next = sorting->next;
|
|
|
|
free(sorting);
|
|
|
|
sorting = next;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-07-08 00:06:11 +08:00
|
|
|
int parse_opt_merge_filter(const struct option *opt, const char *arg, int unset)
|
|
|
|
{
|
|
|
|
struct ref_filter *rf = opt->value;
|
2017-05-07 06:10:09 +08:00
|
|
|
struct object_id oid;
|
2020-09-16 10:08:40 +08:00
|
|
|
struct commit *merge_commit;
|
2015-07-08 00:06:11 +08:00
|
|
|
|
assert NOARG/NONEG behavior of parse-options callbacks
When we define a parse-options callback, the flags we put in the option
struct must match what the callback expects. For example, a callback
which does not handle the "unset" parameter should only be used with
PARSE_OPT_NONEG. But since the callback and the option struct are not
defined next to each other, it's easy to get this wrong (as earlier
patches in this series show).
Fortunately, the compiler can help us here: compiling with
-Wunused-parameters can show us which callbacks ignore their "unset"
parameters (and likewise, ones that ignore "arg" expect to be triggered
with PARSE_OPT_NOARG).
But after we've inspected a callback and determined that all of its
callers use the right flags, what do we do next? We'd like to silence
the compiler warning, but do so in a way that will catch any wrong calls
in the future.
We can do that by actually checking those variables and asserting that
they match our expectations. Because this is such a common pattern,
we'll introduce some helper macros. The resulting messages aren't
as descriptive as we could make them, but the file/line information from
BUG() is enough to identify the problem (and anyway, the point is that
these should never be seen).
Each of the annotated callbacks in this patch triggers
-Wunused-parameters, and was manually inspected to make sure all callers
use the correct options (so none of these BUGs should be triggerable).
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2018-11-05 14:45:42 +08:00
|
|
|
BUG_ON_OPT_NEG(unset);
|
|
|
|
|
2023-03-28 21:58:46 +08:00
|
|
|
if (repo_get_oid(the_repository, arg, &oid))
|
2015-07-08 00:06:11 +08:00
|
|
|
die(_("malformed object name %s"), arg);
|
|
|
|
|
2020-09-16 10:08:40 +08:00
|
|
|
merge_commit = lookup_commit_reference_gently(the_repository, &oid, 0);
|
|
|
|
|
|
|
|
if (!merge_commit)
|
2018-11-10 13:16:11 +08:00
|
|
|
return error(_("option `%s' must point to a commit"), opt->long_name);
|
2015-07-08 00:06:11 +08:00
|
|
|
|
2020-09-16 10:08:40 +08:00
|
|
|
if (starts_with(opt->long_name, "no"))
|
|
|
|
commit_list_insert(merge_commit, &rf->unreachable_from);
|
|
|
|
else
|
|
|
|
commit_list_insert(merge_commit, &rf->reachable_from);
|
|
|
|
|
2015-07-08 00:06:11 +08:00
|
|
|
return 0;
|
|
|
|
}
|
2023-07-11 05:12:13 +08:00
|
|
|
|
|
|
|
void ref_filter_init(struct ref_filter *filter)
|
|
|
|
{
|
|
|
|
struct ref_filter blank = REF_FILTER_INIT;
|
|
|
|
memcpy(filter, &blank, sizeof(blank));
|
|
|
|
}
|
|
|
|
|
|
|
|
void ref_filter_clear(struct ref_filter *filter)
|
|
|
|
{
|
builtin/for-each-ref.c: add `--exclude` option
When using `for-each-ref`, it is sometimes convenient for the caller to
be able to exclude certain parts of the references.
For example, if there are many `refs/__hidden__/*` references, the
caller may want to emit all references *except* the hidden ones.
Currently, the only way to do this is to post-process the output, like:
$ git for-each-ref --format='%(refname)' | grep -v '^refs/hidden/'
Which is do-able, but requires processing a potentially large quantity
of references.
Teach `git for-each-ref` a new `--exclude=<pattern>` option, which
excludes references from the results if they match one or more excluded
patterns.
This patch provides a naive implementation where the `ref_filter` still
sees all references (including ones that it will discard) and is left to
check whether each reference matches any excluded pattern(s) before
emitting them.
By culling out references we know the caller doesn't care about, we can
avoid allocating memory for their storage, as well as spending time
sorting the output (among other things). Even the naive implementation
provides a significant speed-up on a modified copy of linux.git (that
has a hidden ref pointing at each commit):
$ hyperfine \
'git.compile for-each-ref --format="%(objectname) %(refname)" | grep -vE "[0-9a-f]{40} refs/pull/"' \
'git.compile for-each-ref --format="%(objectname) %(refname)" --exclude refs/pull/'
Benchmark 1: git.compile for-each-ref --format="%(objectname) %(refname)" | grep -vE "[0-9a-f]{40} refs/pull/"
Time (mean ± σ): 820.1 ms ± 2.0 ms [User: 703.7 ms, System: 152.0 ms]
Range (min … max): 817.7 ms … 823.3 ms 10 runs
Benchmark 2: git.compile for-each-ref --format="%(objectname) %(refname)" --exclude refs/pull/
Time (mean ± σ): 106.6 ms ± 1.1 ms [User: 99.4 ms, System: 7.1 ms]
Range (min … max): 104.7 ms … 109.1 ms 27 runs
Summary
'git.compile for-each-ref --format="%(objectname) %(refname)" --exclude refs/pull/' ran
7.69 ± 0.08 times faster than 'git.compile for-each-ref --format="%(objectname) %(refname)" | grep -vE "[0-9a-f]{40} refs/pull/"'
Subsequent patches will improve on this by avoiding visiting excluded
sections of the `packed-refs` file in certain cases.
Co-authored-by: Jeff King <peff@peff.net>
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Taylor Blau <me@ttaylorr.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2023-07-11 05:12:19 +08:00
|
|
|
strvec_clear(&filter->exclude);
|
2023-07-11 05:12:13 +08:00
|
|
|
oid_array_clear(&filter->points_at);
|
|
|
|
free_commit_list(filter->with_commit);
|
|
|
|
free_commit_list(filter->no_commit);
|
|
|
|
free_commit_list(filter->reachable_from);
|
|
|
|
free_commit_list(filter->unreachable_from);
|
|
|
|
ref_filter_init(filter);
|
|
|
|
}
|