From debed2a6291a29fd7b0e96f63fbf0142ed3280eb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20Scharfe?= Date: Mon, 24 Oct 2011 23:59:14 +0200 Subject: [PATCH 1/2] read-cache.c: allocate index entries individually The code to estimate the in-memory size of the index based on its on-disk representation is subtly wrong for certain architecture-dependent struct layouts. Instead of fixing it, replace the code to keep the index entries in a single large block of memory and allocate each entry separately instead. This is both simpler and more flexible, as individual entries can now be freed. Actually using that added flexibility is left for a later patch. Suggested-by: Junio C Hamano Signed-off-by: Rene Scharfe Signed-off-by: Junio C Hamano --- cache.h | 1 - read-cache.c | 93 +++++++++++++++++++++------------------------------- 2 files changed, 37 insertions(+), 57 deletions(-) diff --git a/cache.h b/cache.h index 2e6ad3604e..59840a4dce 100644 --- a/cache.h +++ b/cache.h @@ -316,7 +316,6 @@ struct index_state { struct string_list *resolve_undo; struct cache_tree *cache_tree; struct cache_time timestamp; - void *alloc; unsigned name_hash_initialized : 1, initialized : 1; struct hash_table name_hash; diff --git a/read-cache.c b/read-cache.c index 01a0e25051..dea7cd85f4 100644 --- a/read-cache.c +++ b/read-cache.c @@ -1202,10 +1202,35 @@ int read_index(struct index_state *istate) return read_index_from(istate, get_index_file()); } -static void convert_from_disk(struct ondisk_cache_entry *ondisk, struct cache_entry *ce) +static struct cache_entry *create_from_disk(struct ondisk_cache_entry *ondisk) { + struct cache_entry *ce; size_t len; const char *name; + unsigned int flags; + + /* On-disk flags are just 16 bits */ + flags = ntohs(ondisk->flags); + len = flags & CE_NAMEMASK; + + if (flags & CE_EXTENDED) { + struct ondisk_cache_entry_extended *ondisk2; + int extended_flags; + ondisk2 = (struct ondisk_cache_entry_extended *)ondisk; + extended_flags = ntohs(ondisk2->flags2) << 16; + /* We do not yet understand any bit out of CE_EXTENDED_FLAGS */ + if (extended_flags & ~CE_EXTENDED_FLAGS) + die("Unknown index entry format %08x", extended_flags); + flags |= extended_flags; + name = ondisk2->name; + } + else + name = ondisk->name; + + if (len == CE_NAMEMASK) + len = strlen(name); + + ce = xmalloc(cache_entry_size(len)); ce->ce_ctime.sec = ntohl(ondisk->ctime.sec); ce->ce_mtime.sec = ntohl(ondisk->mtime.sec); @@ -1217,48 +1242,13 @@ static void convert_from_disk(struct ondisk_cache_entry *ondisk, struct cache_en ce->ce_uid = ntohl(ondisk->uid); ce->ce_gid = ntohl(ondisk->gid); ce->ce_size = ntohl(ondisk->size); - /* On-disk flags are just 16 bits */ - ce->ce_flags = ntohs(ondisk->flags); + ce->ce_flags = flags; hashcpy(ce->sha1, ondisk->sha1); - len = ce->ce_flags & CE_NAMEMASK; - - if (ce->ce_flags & CE_EXTENDED) { - struct ondisk_cache_entry_extended *ondisk2; - int extended_flags; - ondisk2 = (struct ondisk_cache_entry_extended *)ondisk; - extended_flags = ntohs(ondisk2->flags2) << 16; - /* We do not yet understand any bit out of CE_EXTENDED_FLAGS */ - if (extended_flags & ~CE_EXTENDED_FLAGS) - die("Unknown index entry format %08x", extended_flags); - ce->ce_flags |= extended_flags; - name = ondisk2->name; - } - else - name = ondisk->name; - - if (len == CE_NAMEMASK) - len = strlen(name); - /* - * NEEDSWORK: If the original index is crafted, this copy could - * go unchecked. - */ - memcpy(ce->name, name, len + 1); -} - -static inline size_t estimate_cache_size(size_t ondisk_size, unsigned int entries) -{ - long per_entry; - - per_entry = sizeof(struct cache_entry) - sizeof(struct ondisk_cache_entry); - - /* - * Alignment can cause differences. This should be "alignof", but - * since that's a gcc'ism, just use the size of a pointer. - */ - per_entry += sizeof(void *); - return ondisk_size + entries*per_entry; + memcpy(ce->name, name, len); + ce->name[len] = '\0'; + return ce; } /* remember to discard_cache() before reading a different cache! */ @@ -1266,7 +1256,7 @@ int read_index_from(struct index_state *istate, const char *path) { int fd, i; struct stat st; - unsigned long src_offset, dst_offset; + unsigned long src_offset; struct cache_header *hdr; void *mmap; size_t mmap_size; @@ -1305,29 +1295,18 @@ int read_index_from(struct index_state *istate, const char *path) istate->cache_nr = ntohl(hdr->hdr_entries); istate->cache_alloc = alloc_nr(istate->cache_nr); istate->cache = xcalloc(istate->cache_alloc, sizeof(struct cache_entry *)); - - /* - * The disk format is actually larger than the in-memory format, - * due to space for nsec etc, so even though the in-memory one - * has room for a few more flags, we can allocate using the same - * index size - */ - istate->alloc = xmalloc(estimate_cache_size(mmap_size, istate->cache_nr)); istate->initialized = 1; src_offset = sizeof(*hdr); - dst_offset = 0; for (i = 0; i < istate->cache_nr; i++) { struct ondisk_cache_entry *disk_ce; struct cache_entry *ce; disk_ce = (struct ondisk_cache_entry *)((char *)mmap + src_offset); - ce = (struct cache_entry *)((char *)istate->alloc + dst_offset); - convert_from_disk(disk_ce, ce); + ce = create_from_disk(disk_ce); set_index_entry(istate, i, ce); src_offset += ondisk_ce_size(ce); - dst_offset += ce_size(ce); } istate->timestamp.sec = st.st_mtime; istate->timestamp.nsec = ST_MTIME_NSEC(st); @@ -1361,11 +1340,15 @@ unmap: int is_index_unborn(struct index_state *istate) { - return (!istate->cache_nr && !istate->alloc && !istate->timestamp.sec); + return (!istate->cache_nr && !istate->timestamp.sec); } int discard_index(struct index_state *istate) { + int i; + + for (i = 0; i < istate->cache_nr; i++) + free(istate->cache[i]); resolve_undo_clear_index(istate); istate->cache_nr = 0; istate->cache_changed = 0; @@ -1374,8 +1357,6 @@ int discard_index(struct index_state *istate) istate->name_hash_initialized = 0; free_hash(&istate->name_hash); cache_tree_free(&(istate->cache_tree)); - free(istate->alloc); - istate->alloc = NULL; istate->initialized = 0; /* no need to throw away allocated active_cache */ From ee7825b58cd423ea63a215c59fc2b7c7a2878af5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20Scharfe?= Date: Tue, 25 Oct 2011 20:00:04 +0200 Subject: [PATCH 2/2] cache.h: put single NUL at end of struct cache_entry Since in-memory index entries are allocated individually now, the variable slack at the end meant to provide an eight byte alignment is not needed anymore. Have a single NUL instead. This saves zero to seven bytes for an entry, depending on its filename length. Signed-off-by: Rene Scharfe Signed-off-by: Junio C Hamano --- cache.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cache.h b/cache.h index 59840a4dce..15c619d161 100644 --- a/cache.h +++ b/cache.h @@ -306,7 +306,7 @@ static inline unsigned int canon_mode(unsigned int mode) } #define flexible_size(STRUCT,len) ((offsetof(struct STRUCT,name) + (len) + 8) & ~7) -#define cache_entry_size(len) flexible_size(cache_entry,len) +#define cache_entry_size(len) (offsetof(struct cache_entry,name) + (len) + 1) #define ondisk_cache_entry_size(len) flexible_size(ondisk_cache_entry,len) #define ondisk_cache_entry_extended_size(len) flexible_size(ondisk_cache_entry_extended,len)