diff --git a/Documentation/gitrepository-layout.txt b/Documentation/gitrepository-layout.txt index 17d2ea6c1e..79653f3134 100644 --- a/Documentation/gitrepository-layout.txt +++ b/Documentation/gitrepository-layout.txt @@ -155,6 +155,10 @@ index:: The current index file for the repository. It is usually not found in a bare repository. +sharedindex.:: + The shared index part, to be referenced by $GIT_DIR/index and + other temporary index files. Only valid in split index mode. + info:: Additional information about the repository is recorded in this directory. diff --git a/Documentation/technical/index-format.txt b/Documentation/technical/index-format.txt index f352a9b22e..fe6f31667d 100644 --- a/Documentation/technical/index-format.txt +++ b/Documentation/technical/index-format.txt @@ -129,6 +129,9 @@ Git index format (Version 4) In version 4, the padding after the pathname does not exist. + Interpretation of index entries in split index mode is completely + different. See below for details. + == Extensions === Cached tree @@ -198,3 +201,35 @@ Git index format - At most three 160-bit object names of the entry in stages from 1 to 3 (nothing is written for a missing stage). +=== Split index + + In split index mode, the majority of index entries could be stored + in a separate file. This extension records the changes to be made on + top of that to produce the final index. + + The signature for this extension is { 'l', 'i, 'n', 'k' }. + + The extension consists of: + + - 160-bit SHA-1 of the shared index file. The shared index file path + is $GIT_DIR/sharedindex.. If all 160 bits are zero, the + index does not require a shared index file. + + - An ewah-encoded delete bitmap, each bit represents an entry in the + shared index. If a bit is set, its corresponding entry in the + shared index will be removed from the final index. Note, because + a delete operation changes index entry positions, but we do need + original positions in replace phase, it's best to just mark + entries for removal, then do a mass deletion after replacement. + + - An ewah-encoded replace bitmap, each bit represents an entry in + the shared index. If a bit is set, its corresponding entry in the + shared index will be replaced with an entry in this index + file. All replaced entries are stored in sorted order in this + index. The first "1" bit in the replace bitmap corresponds to the + first index entry, the second "1" bit to the second entry and so + on. Replaced entries may have empty path names to save space. + + The remaining index entries after replaced ones will be added to the + final index. These added entries are also sorted by entry namme then + stage. diff --git a/Makefile b/Makefile index 81e8214236..94ad3cec7f 100644 --- a/Makefile +++ b/Makefile @@ -887,6 +887,7 @@ LIB_OBJS += sha1_name.o LIB_OBJS += shallow.o LIB_OBJS += sideband.o LIB_OBJS += sigchain.o +LIB_OBJS += split-index.o LIB_OBJS += strbuf.o LIB_OBJS += streaming.o LIB_OBJS += string-list.o diff --git a/cache.h b/cache.h index 41cdcd0633..47fe3746be 100644 --- a/cache.h +++ b/cache.h @@ -135,6 +135,7 @@ struct cache_entry { unsigned int ce_mode; unsigned int ce_flags; unsigned int ce_namelen; + unsigned int index; /* for link extension */ unsigned char sha1[20]; char name[FLEX_ARRAY]; /* more */ }; @@ -275,12 +276,14 @@ static inline unsigned int canon_mode(unsigned int mode) #define RESOLVE_UNDO_CHANGED (1 << 4) #define CACHE_TREE_CHANGED (1 << 5) +struct split_index; struct index_state { struct cache_entry **cache; unsigned int version; unsigned int cache_nr, cache_alloc, cache_changed; struct string_list *resolve_undo; struct cache_tree *cache_tree; + struct split_index *split_index; struct cache_time timestamp; unsigned name_hash_initialized : 1, initialized : 1; diff --git a/read-cache.c b/read-cache.c index b4653c083a..90a3f09db9 100644 --- a/read-cache.c +++ b/read-cache.c @@ -14,6 +14,7 @@ #include "resolve-undo.h" #include "strbuf.h" #include "varint.h" +#include "split-index.h" static struct cache_entry *refresh_cache_entry(struct cache_entry *ce, unsigned int options); @@ -34,6 +35,10 @@ static struct cache_entry *refresh_cache_entry(struct cache_entry *ce, #define CACHE_EXT(s) ( (s[0]<<24)|(s[1]<<16)|(s[2]<<8)|(s[3]) ) #define CACHE_EXT_TREE 0x54524545 /* "TREE" */ #define CACHE_EXT_RESOLVE_UNDO 0x52455543 /* "REUC" */ +#define CACHE_EXT_LINK 0x6c696e6b /* "link" */ + +/* changes that can be kept in $GIT_DIR/index (basically all extensions) */ +#define EXTMASK (RESOLVE_UNDO_CHANGED | CACHE_TREE_CHANGED) struct index_state the_index; static const char *alternate_index_output; @@ -63,6 +68,7 @@ void rename_index_entry_at(struct index_state *istate, int nr, const char *new_n copy_cache_entry(new, old); new->ce_flags &= ~CE_HASHED; new->ce_namelen = namelen; + new->index = 0; memcpy(new->name, new_name, namelen + 1); cache_tree_invalidate_path(istate, old->name); @@ -1335,6 +1341,10 @@ static int read_index_extension(struct index_state *istate, case CACHE_EXT_RESOLVE_UNDO: istate->resolve_undo = resolve_undo_read(data, sz); break; + case CACHE_EXT_LINK: + if (read_link_extension(istate, data, sz)) + return -1; + break; default: if (*ext < 'A' || 'Z' < *ext) return error("index uses %.4s extension, which we do not understand", @@ -1369,6 +1379,7 @@ static struct cache_entry *cache_entry_from_ondisk(struct ondisk_cache_entry *on ce->ce_stat_data.sd_size = get_be32(&ondisk->size); ce->ce_flags = flags & ~CE_NAMEMASK; ce->ce_namelen = len; + ce->index = 0; hashcpy(ce->sha1, ondisk->sha1); memcpy(ce->name, name, len); ce->name[len] = '\0'; @@ -1443,7 +1454,8 @@ static struct cache_entry *create_from_disk(struct ondisk_cache_entry *ondisk, } /* remember to discard_cache() before reading a different cache! */ -int read_index_from(struct index_state *istate, const char *path) +static int do_read_index(struct index_state *istate, const char *path, + int must_exist) { int fd, i; struct stat st; @@ -1460,9 +1472,9 @@ int read_index_from(struct index_state *istate, const char *path) istate->timestamp.nsec = 0; fd = open(path, O_RDONLY); if (fd < 0) { - if (errno == ENOENT) + if (!must_exist && errno == ENOENT) return 0; - die_errno("index file open failed"); + die_errno("%s: index file open failed", path); } if (fstat(fd, &st)) @@ -1535,6 +1547,42 @@ unmap: die("index file corrupt"); } +int read_index_from(struct index_state *istate, const char *path) +{ + struct split_index *split_index; + int ret; + + /* istate->initialized covers both .git/index and .git/sharedindex.xxx */ + if (istate->initialized) + return istate->cache_nr; + + ret = do_read_index(istate, path, 0); + split_index = istate->split_index; + if (!split_index) + return ret; + + if (is_null_sha1(split_index->base_sha1)) + return ret; + if (istate->cache_nr) + die("index in split-index mode must contain no entries"); + + if (split_index->base) + discard_index(split_index->base); + else + split_index->base = xcalloc(1, sizeof(*split_index->base)); + ret = do_read_index(split_index->base, + git_path("sharedindex.%s", + sha1_to_hex(split_index->base_sha1)), 1); + if (hashcmp(split_index->base_sha1, split_index->base->sha1)) + die("broken index, expect %s in %s, got %s", + sha1_to_hex(split_index->base_sha1), + git_path("sharedindex.%s", + sha1_to_hex(split_index->base_sha1)), + sha1_to_hex(split_index->base->sha1)); + merge_base_index(istate); + return ret; +} + int is_index_unborn(struct index_state *istate) { return (!istate->cache_nr && !istate->timestamp.sec); @@ -1544,8 +1592,15 @@ int discard_index(struct index_state *istate) { int i; - for (i = 0; i < istate->cache_nr; i++) + for (i = 0; i < istate->cache_nr; i++) { + if (istate->cache[i]->index && + istate->split_index && + istate->split_index->base && + istate->cache[i]->index <= istate->split_index->base->cache_nr && + istate->cache[i] == istate->split_index->base->cache[istate->cache[i]->index - 1]) + continue; free(istate->cache[i]); + } resolve_undo_clear_index(istate); istate->cache_nr = 0; istate->cache_changed = 0; @@ -1557,6 +1612,7 @@ int discard_index(struct index_state *istate) free(istate->cache); istate->cache = NULL; istate->cache_alloc = 0; + discard_split_index(istate); return 0; } @@ -1852,6 +1908,17 @@ static int do_write_index(struct index_state *istate, int newfd) strbuf_release(&previous_name_buf); /* Write extension data here */ + if (istate->split_index) { + struct strbuf sb = STRBUF_INIT; + + err = write_link_extension(&sb, istate) < 0 || + write_index_ext_header(&c, newfd, CACHE_EXT_LINK, + sb.len) < 0 || + ce_write(&c, newfd, sb.buf, sb.len) < 0; + strbuf_release(&sb); + if (err) + return -1; + } if (istate->cache_tree) { struct strbuf sb = STRBUF_INIT; @@ -1916,10 +1983,29 @@ static int do_write_locked_index(struct index_state *istate, struct lock_file *l return ret; } +static int write_split_index(struct index_state *istate, + struct lock_file *lock, + unsigned flags) +{ + int ret; + prepare_to_write_split_index(istate); + ret = do_write_locked_index(istate, lock, flags); + finish_writing_split_index(istate); + return ret; +} + int write_locked_index(struct index_state *istate, struct lock_file *lock, unsigned flags) { - return do_write_locked_index(istate, lock, flags); + struct split_index *si = istate->split_index; + + if (!si || (istate->cache_changed & ~EXTMASK)) { + if (si) + hashclr(si->base_sha1); + return do_write_locked_index(istate, lock, flags); + } + + return write_split_index(istate, lock, flags); } /* diff --git a/split-index.c b/split-index.c new file mode 100644 index 0000000000..63b52bb086 --- /dev/null +++ b/split-index.c @@ -0,0 +1,90 @@ +#include "cache.h" +#include "split-index.h" + +struct split_index *init_split_index(struct index_state *istate) +{ + if (!istate->split_index) { + istate->split_index = xcalloc(1, sizeof(*istate->split_index)); + istate->split_index->refcount = 1; + } + return istate->split_index; +} + +int read_link_extension(struct index_state *istate, + const void *data_, unsigned long sz) +{ + const unsigned char *data = data_; + struct split_index *si; + if (sz < 20) + return error("corrupt link extension (too short)"); + si = init_split_index(istate); + hashcpy(si->base_sha1, data); + data += 20; + sz -= 20; + if (sz) + return error("garbage at the end of link extension"); + return 0; +} + +int write_link_extension(struct strbuf *sb, + struct index_state *istate) +{ + struct split_index *si = istate->split_index; + strbuf_add(sb, si->base_sha1, 20); + return 0; +} + +static void mark_base_index_entries(struct index_state *base) +{ + int i; + /* + * To keep track of the shared entries between + * istate->base->cache[] and istate->cache[], base entry + * position is stored in each base entry. All positions start + * from 1 instead of 0, which is resrved to say "this is a new + * entry". + */ + for (i = 0; i < base->cache_nr; i++) + base->cache[i]->index = i + 1; +} + +void merge_base_index(struct index_state *istate) +{ + struct split_index *si = istate->split_index; + + mark_base_index_entries(si->base); + istate->cache_nr = si->base->cache_nr; + ALLOC_GROW(istate->cache, istate->cache_nr, istate->cache_alloc); + memcpy(istate->cache, si->base->cache, + sizeof(*istate->cache) * istate->cache_nr); +} + +void prepare_to_write_split_index(struct index_state *istate) +{ + struct split_index *si = init_split_index(istate); + /* take cache[] out temporarily */ + si->saved_cache_nr = istate->cache_nr; + istate->cache_nr = 0; +} + +void finish_writing_split_index(struct index_state *istate) +{ + struct split_index *si = init_split_index(istate); + istate->cache_nr = si->saved_cache_nr; +} + +void discard_split_index(struct index_state *istate) +{ + struct split_index *si = istate->split_index; + if (!si) + return; + istate->split_index = NULL; + si->refcount--; + if (si->refcount) + return; + if (si->base) { + discard_index(si->base); + free(si->base); + } + free(si); +} diff --git a/split-index.h b/split-index.h new file mode 100644 index 0000000000..8d7404117e --- /dev/null +++ b/split-index.h @@ -0,0 +1,25 @@ +#ifndef SPLIT_INDEX_H +#define SPLIT_INDEX_H + +struct index_state; +struct strbuf; + +struct split_index { + unsigned char base_sha1[20]; + struct index_state *base; + unsigned int saved_cache_nr; + int refcount; +}; + +struct split_index *init_split_index(struct index_state *istate); +int read_link_extension(struct index_state *istate, + const void *data, unsigned long sz); +int write_link_extension(struct strbuf *sb, + struct index_state *istate); +void move_cache_to_base_index(struct index_state *istate); +void merge_base_index(struct index_state *istate); +void prepare_to_write_split_index(struct index_state *istate); +void finish_writing_split_index(struct index_state *istate); +void discard_split_index(struct index_state *istate); + +#endif diff --git a/unpack-trees.c b/unpack-trees.c index f594932080..a941f7c33e 100644 --- a/unpack-trees.c +++ b/unpack-trees.c @@ -8,6 +8,7 @@ #include "progress.h" #include "refs.h" #include "attr.h" +#include "split-index.h" /* * Error messages expected by scripts out of plumbing commands such as @@ -1046,6 +1047,9 @@ int unpack_trees(unsigned len, struct tree_desc *t, struct unpack_trees_options o->result.timestamp.sec = o->src_index->timestamp.sec; o->result.timestamp.nsec = o->src_index->timestamp.nsec; o->result.version = o->src_index->version; + o->result.split_index = o->src_index->split_index; + if (o->result.split_index) + o->result.split_index->refcount++; hashcpy(o->result.sha1, o->src_index->sha1); o->merge_size = len; mark_all_ce_unused(o->src_index);