git/object.h
Junio C Hamano ecf7fc600a Merge branch 'tb/path-filter-fix'
The Bloom filter used for path limited history traversal was broken
on systems whose "char" is unsigned; update the implementation and
bump the format version to 2.

* tb/path-filter-fix:
  bloom: introduce `deinit_bloom_filters()`
  commit-graph: reuse existing Bloom filters where possible
  object.h: fix mis-aligned flag bits table
  commit-graph: new Bloom filter version that fixes murmur3
  commit-graph: unconditionally load Bloom filters
  bloom: prepare to discard incompatible Bloom filters
  bloom: annotate filters with hash version
  repo-settings: introduce commitgraph.changedPathsVersion
  t4216: test changed path filters with high bit paths
  t/helper/test-read-graph: implement `bloom-filters` mode
  bloom.h: make `load_bloom_filter_from_graph()` public
  t/helper/test-read-graph.c: extract `dump_graph_info()`
  gitformat-commit-graph: describe version 2 of BDAT
  commit-graph: ensure Bloom filters are read with consistent settings
  revision.c: consult Bloom filters for root commits
  t/t4216-log-bloom.sh: harden `test_bloom_filters_not_used()`
2024-07-08 14:53:10 -07:00

346 lines
10 KiB
C

#ifndef OBJECT_H
#define OBJECT_H
#include "hash.h"
struct buffer_slab;
struct repository;
struct parsed_object_pool {
struct object **obj_hash;
int nr_objs, obj_hash_size;
/* TODO: migrate alloc_states to mem-pool? */
struct alloc_state *blob_state;
struct alloc_state *tree_state;
struct alloc_state *commit_state;
struct alloc_state *tag_state;
struct alloc_state *object_state;
/* parent substitutions from .git/info/grafts and .git/shallow */
struct commit_graft **grafts;
int grafts_alloc, grafts_nr;
int is_shallow;
struct stat_validity *shallow_stat;
char *alternate_shallow_file;
int commit_graft_prepared;
int substituted_parent;
struct buffer_slab *buffer_slab;
};
struct parsed_object_pool *parsed_object_pool_new(void);
void parsed_object_pool_clear(struct parsed_object_pool *o);
struct object_list {
struct object *item;
struct object_list *next;
};
struct object_array {
unsigned int nr;
unsigned int alloc;
struct object_array_entry {
struct object *item;
/*
* name or NULL. If non-NULL, the memory pointed to
* is owned by this object *except* if it points at
* object_array_slopbuf, which is a static copy of the
* empty string.
*/
char *name;
char *path;
unsigned mode;
} *objects;
};
#define OBJECT_ARRAY_INIT { 0 }
void object_array_init(struct object_array *array);
/*
* object flag allocation:
* revision.h: 0---------10 15 23------27
* fetch-pack.c: 01 67
* negotiator/default.c: 2--5
* walker.c: 0-2
* upload-pack.c: 4 11-----14 16-----19
* builtin/blame.c: 12-13
* bisect.c: 16
* bundle.c: 16
* http-push.c: 11-----14
* commit-graph.c: 15
* commit-reach.c: 16-----19
* sha1-name.c: 20
* list-objects-filter.c: 21
* bloom.c: 2122
* builtin/fsck.c: 0--3
* builtin/gc.c: 0
* builtin/index-pack.c: 2021
* reflog.c: 10--12
* builtin/show-branch.c: 0-------------------------------------------26
* builtin/unpack-objects.c: 2021
* pack-bitmap.h: 2122
*/
#define FLAG_BITS 28
#define TYPE_BITS 3
/*
* Values in this enum (except those outside the 3 bit range) are part
* of pack file format. See gitformat-pack(5) for more information.
*/
enum object_type {
OBJ_BAD = -1,
OBJ_NONE = 0,
OBJ_COMMIT = 1,
OBJ_TREE = 2,
OBJ_BLOB = 3,
OBJ_TAG = 4,
/* 5 for future expansion */
OBJ_OFS_DELTA = 6,
OBJ_REF_DELTA = 7,
OBJ_ANY,
OBJ_MAX
};
/* unknown mode (impossible combination S_IFIFO|S_IFCHR) */
#define S_IFINVALID 0030000
/*
* A "directory link" is a link to another git directory.
*
* The value 0160000 is not normally a valid mode, and
* also just happens to be S_IFDIR + S_IFLNK
*/
#define S_IFGITLINK 0160000
#define S_ISGITLINK(m) (((m) & S_IFMT) == S_IFGITLINK)
#define S_ISSPARSEDIR(m) ((m) == S_IFDIR)
static inline enum object_type object_type(unsigned int mode)
{
return S_ISDIR(mode) ? OBJ_TREE :
S_ISGITLINK(mode) ? OBJ_COMMIT :
OBJ_BLOB;
}
#define ce_permissions(mode) (((mode) & 0100) ? 0755 : 0644)
static inline unsigned int create_ce_mode(unsigned int mode)
{
if (S_ISLNK(mode))
return S_IFLNK;
if (S_ISSPARSEDIR(mode))
return S_IFDIR;
if (S_ISDIR(mode) || S_ISGITLINK(mode))
return S_IFGITLINK;
return S_IFREG | ce_permissions(mode);
}
static inline unsigned int canon_mode(unsigned int mode)
{
if (S_ISREG(mode))
return S_IFREG | ce_permissions(mode);
if (S_ISLNK(mode))
return S_IFLNK;
if (S_ISDIR(mode))
return S_IFDIR;
return S_IFGITLINK;
}
/*
* The object type is stored in 3 bits.
*/
struct object {
unsigned parsed : 1;
unsigned type : TYPE_BITS;
unsigned flags : FLAG_BITS;
struct object_id oid;
};
const char *type_name(unsigned int type);
int type_from_string_gently(const char *str, ssize_t, int gentle);
#define type_from_string(str) type_from_string_gently(str, -1, 0)
/*
* Return the current number of buckets in the object hashmap.
*/
unsigned int get_max_object_index(void);
/*
* Return the object from the specified bucket in the object hashmap.
*/
struct object *get_indexed_object(unsigned int);
/*
* This can be used to see if we have heard of the object before, but
* it can return "yes we have, and here is a half-initialised object"
* for an object that we haven't loaded/parsed yet.
*
* When parsing a commit to create an in-core commit object, its
* parents list holds commit objects that represent its parents, but
* they are expected to be lazily initialized and do not know what
* their trees or parents are yet. When this function returns such a
* half-initialised objects, the caller is expected to initialize them
* by calling parse_object() on them.
*/
struct object *lookup_object(struct repository *r, const struct object_id *oid);
void *create_object(struct repository *r, const struct object_id *oid, void *obj);
void *object_as_type(struct object *obj, enum object_type type, int quiet);
static inline const char *parse_mode(const char *str, uint16_t *modep)
{
unsigned char c;
unsigned int mode = 0;
if (*str == ' ')
return NULL;
while ((c = *str++) != ' ') {
if (c < '0' || c > '7')
return NULL;
mode = (mode << 3) + (c - '0');
}
*modep = mode;
return str;
}
/*
* Returns the object, having parsed it to find out what it is.
*
* Returns NULL if the object is missing or corrupt.
*/
enum parse_object_flags {
PARSE_OBJECT_SKIP_HASH_CHECK = 1 << 0,
PARSE_OBJECT_DISCARD_TREE = 1 << 1,
};
struct object *parse_object(struct repository *r, const struct object_id *oid);
struct object *parse_object_with_flags(struct repository *r,
const struct object_id *oid,
enum parse_object_flags flags);
/*
* Like parse_object, but will die() instead of returning NULL. If the
* "name" parameter is not NULL, it is included in the error message
* (otherwise, the hex object ID is given).
*/
struct object *parse_object_or_die(const struct object_id *oid, const char *name);
/* Given the result of read_sha1_file(), returns the object after
* parsing it. eaten_p indicates if the object has a borrowed copy
* of buffer and the caller should not free() it.
*/
struct object *parse_object_buffer(struct repository *r, const struct object_id *oid, enum object_type type, unsigned long size, void *buffer, int *eaten_p);
/*
* Allocate and return an object struct, even if you do not know the type of
* the object. The returned object may have its "type" field set to a real type
* (if somebody previously called lookup_blob(), etc), or it may be set to
* OBJ_NONE. In the latter case, subsequent calls to lookup_blob(), etc, will
* set the type field as appropriate.
*
* Use this when you do not know the expected type of an object and want to
* avoid parsing it for efficiency reasons. Try to avoid it otherwise; it
* may allocate excess memory, since the returned object must be as large as
* the maximum struct of any type.
*/
struct object *lookup_unknown_object(struct repository *r, const struct object_id *oid);
/*
* Dispatch to the appropriate lookup_blob(), lookup_commit(), etc, based on
* "type".
*/
struct object *lookup_object_by_type(struct repository *r, const struct object_id *oid,
enum object_type type);
enum peel_status {
/* object was peeled successfully: */
PEEL_PEELED = 0,
/*
* object cannot be peeled because the named object (or an
* object referred to by a tag in the peel chain), does not
* exist.
*/
PEEL_INVALID = -1,
/* object cannot be peeled because it is not a tag: */
PEEL_NON_TAG = -2,
/* ref_entry contains no peeled value because it is a symref: */
PEEL_IS_SYMREF = -3,
/*
* ref_entry cannot be peeled because it is broken (i.e., the
* symbolic reference cannot even be resolved to an object
* name):
*/
PEEL_BROKEN = -4
};
/*
* Peel the named object; i.e., if the object is a tag, resolve the
* tag recursively until a non-tag is found. If successful, store the
* result to oid and return PEEL_PEELED. If the object is not a tag
* or is not valid, return PEEL_NON_TAG or PEEL_INVALID, respectively,
* and leave oid unchanged.
*/
enum peel_status peel_object(struct repository *r,
const struct object_id *name, struct object_id *oid);
struct object_list *object_list_insert(struct object *item,
struct object_list **list_p);
int object_list_contains(struct object_list *list, struct object *obj);
void object_list_free(struct object_list **list);
/* Object array handling .. */
void add_object_array(struct object *obj, const char *name, struct object_array *array);
void add_object_array_with_path(struct object *obj, const char *name, struct object_array *array, unsigned mode, const char *path);
/*
* Returns NULL if the array is empty. Otherwise, returns the last object
* after removing its entry from the array. Other resources associated
* with that object are left in an unspecified state and should not be
* examined.
*/
struct object *object_array_pop(struct object_array *array);
typedef int (*object_array_each_func_t)(struct object_array_entry *, void *);
/*
* Apply want to each entry in array, retaining only the entries for
* which the function returns true. Preserve the order of the entries
* that are retained.
*/
void object_array_filter(struct object_array *array,
object_array_each_func_t want, void *cb_data);
/*
* Remove from array all but the first entry with a given name.
* Warning: this function uses an O(N^2) algorithm.
*/
void object_array_remove_duplicates(struct object_array *array);
/*
* Remove any objects from the array, freeing all used memory; afterwards
* the array is ready to store more objects with add_object_array().
*/
void object_array_clear(struct object_array *array);
void clear_object_flags(unsigned flags);
/*
* Clear the specified object flags from all in-core commit objects from
* the specified repository.
*/
void repo_clear_commit_marks(struct repository *r, unsigned int flags);
#endif /* OBJECT_H */