e2fsck: speed up runs when using tdb for large atypical filesystems

Optimize how the tdb library so that running with [scratch_files] in /etc/e2fsck.conf is more efficient. Use a better hash function, supplied by Rogier Wolff, and supply an estimate of the size of the hash table to tdb_open instead of using the default (which is way too small in most cases). Also, disable the tdb locking and fsync calls, since it's not necessary for our use in this case (which is essentially as cheap swap space; the tdb files do not contain persistent data.) Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
2024-12-11 19:03:40 +08:00 · 2011-11-29 11:24:52 -05:00 · 2011-11-29 11:24:52 -05:00 · 4e523bbe00
commit 4e523bbe00
parent db40c20e3f
3 changed files with 21 additions and 6 deletions
--- a/e2fsck/dirinfo.c
+++ b/e2fsck/dirinfo.c
@ -67,7 +67,11 @@ static void setup_tdb(e2fsck_t ctx, ext2_ino_t num_dirs)
 		db->tdb = NULL;
 		return;
 	}
-	db->tdb = tdb_open(db->tdb_fn, 0, TDB_CLEAR_IF_FIRST,
+
+	if (num_dirs < 99991)
+		num_dirs = 99991; /* largest 5 digit prime */
+
+	db->tdb = tdb_open(db->tdb_fn, num_dirs, TDB_NOLOCK | TDB_NOSYNC,
 			   O_RDWR | O_CREAT | O_TRUNC, 0600);
 	close(fd);
 }
--- a/lib/ext2fs/icount.c
+++ b/lib/ext2fs/icount.c
@ -180,6 +180,7 @@ errcode_t ext2fs_create_icount_tdb(ext2_filsys fs, char *tdb_dir,
 	ext2_icount_t	icount;
 	errcode_t	retval;
 	char 		*fn, uuid[40];
+	ext2_ino_t	num_inodes;
 	int		fd;

 	retval = alloc_icount(fs, flags,  &icount);
@ -193,8 +194,18 @@ errcode_t ext2fs_create_icount_tdb(ext2_filsys fs, char *tdb_dir,
 	sprintf(fn, "%s/%s-icount-XXXXXX", tdb_dir, uuid);
 	fd = mkstemp(fn);

+	/*
+	 * This is an overestimate of the size that we will need; the
+	 * ideal value is the number of used inodes with a count
+	 * greater than 1.  OTOH the times when we really need this is
+	 * with the backup programs that use lots of hard links, in
+	 * which case the number of inodes in use approaches the ideal
+	 * value.
+	 */
+	num_inodes = fs->super->s_inodes_count - fs->super->s_free_inodes_count;
+
 	icount->tdb_fn = fn;
-	icount->tdb = tdb_open(fn, 0, TDB_CLEAR_IF_FIRST,
+	icount->tdb = tdb_open(fn, num_inodes, TDB_NOLOCK | TDB_NOSYNC,
 			       O_RDWR | O_CREAT | O_TRUNC, 0600);
 	if (icount->tdb) {
 		close(fd);
--- a/lib/ext2fs/tdb.c
+++ b/lib/ext2fs/tdb.c
@ -3711,17 +3711,17 @@ void tdb_enable_seqnum(struct tdb_context *tdb)
 static struct tdb_context *tdbs = NULL;


-/* This is based on the hash algorithm from gdbm */
+/* This is from a hash algorithm suggested by Rogier Wolff */
 static unsigned int default_tdb_hash(TDB_DATA *key)
 {
 	u32 value;	/* Used to compute the hash value.  */
 	u32   i;	/* Used to cycle through random values. */

 	/* Set the initial value from the key size. */
-	for (value = 0x238F13AF * key->dsize, i=0; i < key->dsize; i++)
-		value = (value + (key->dptr[i] << (i*5 % 24)));
+	for (value = 0, i=0; i < key->dsize; i++)
+		value = value * 256 + key->dptr[i] + (value >> 24) * 241;

-	return (1103515243 * value + 12345);
+	return value;
 }