radix_tree: exceptional entries and indices

A patchset to extend tmpfs to MAX_LFS_FILESIZE by abandoning its
peculiar swap vector, instead keeping a file's swap entries in the same
radix tree as its struct page pointers: thus saving memory, and
simplifying its code and locking.

This patch:

The radix_tree is used by several subsystems for different purposes.  A
major use is to store the struct page pointers of a file's pagecache for
memory management.  But what if mm wanted to store something other than
page pointers there too?

The low bit of a radix_tree entry is already used to denote an indirect
pointer, for internal use, and the unlikely radix_tree_deref_retry()
case.

Define the next bit as denoting an exceptional entry, and supply inline
functions radix_tree_exception() to return non-0 in either unlikely
case, and radix_tree_exceptional_entry() to return non-0 in the second
case.

If a subsystem already uses radix_tree with that bit set, no problem: it
does not affect internal workings at all, but is defined for the
convenience of those storing well-aligned pointers in the radix_tree.

The radix_tree_gang_lookups have an implicit assumption that the caller
can deduce the offset of each entry returned e.g.  by the page->index of
a struct page.  But that may not be feasible for some kinds of item to
be stored there.

radix_tree_gang_lookup_slot() allow for an optional indices argument,
output array in which to return those offsets.  The same could be added
to other radix_tree_gang_lookups, but for now keep it to the only one
for which we need it.

Signed-off-by: Hugh Dickins <hughd@google.com>
Acked-by: Rik van Riel <riel@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:
Hugh Dickins 2011-08-03 16:21:18 -07:00 committed by Linus Torvalds
parent 70d327198a
commit 6328650bb4
3 changed files with 54 additions and 15 deletions

View File

@ -39,7 +39,15 @@
* when it is shrunk, before we rcu free the node. See shrink code for * when it is shrunk, before we rcu free the node. See shrink code for
* details. * details.
*/ */
#define RADIX_TREE_INDIRECT_PTR 1 #define RADIX_TREE_INDIRECT_PTR 1
/*
* A common use of the radix tree is to store pointers to struct pages;
* but shmem/tmpfs needs also to store swap entries in the same tree:
* those are marked as exceptional entries to distinguish them.
* EXCEPTIONAL_ENTRY tests the bit, EXCEPTIONAL_SHIFT shifts content past it.
*/
#define RADIX_TREE_EXCEPTIONAL_ENTRY 2
#define RADIX_TREE_EXCEPTIONAL_SHIFT 2
#define radix_tree_indirect_to_ptr(ptr) \ #define radix_tree_indirect_to_ptr(ptr) \
radix_tree_indirect_to_ptr((void __force *)(ptr)) radix_tree_indirect_to_ptr((void __force *)(ptr))
@ -173,6 +181,28 @@ static inline int radix_tree_deref_retry(void *arg)
return unlikely((unsigned long)arg & RADIX_TREE_INDIRECT_PTR); return unlikely((unsigned long)arg & RADIX_TREE_INDIRECT_PTR);
} }
/**
* radix_tree_exceptional_entry - radix_tree_deref_slot gave exceptional entry?
* @arg: value returned by radix_tree_deref_slot
* Returns: 0 if well-aligned pointer, non-0 if exceptional entry.
*/
static inline int radix_tree_exceptional_entry(void *arg)
{
/* Not unlikely because radix_tree_exception often tested first */
return (unsigned long)arg & RADIX_TREE_EXCEPTIONAL_ENTRY;
}
/**
* radix_tree_exception - radix_tree_deref_slot returned either exception?
* @arg: value returned by radix_tree_deref_slot
* Returns: 0 if well-aligned pointer, non-0 if either kind of exception.
*/
static inline int radix_tree_exception(void *arg)
{
return unlikely((unsigned long)arg &
(RADIX_TREE_INDIRECT_PTR | RADIX_TREE_EXCEPTIONAL_ENTRY));
}
/** /**
* radix_tree_replace_slot - replace item in a slot * radix_tree_replace_slot - replace item in a slot
* @pslot: pointer to slot, returned by radix_tree_lookup_slot * @pslot: pointer to slot, returned by radix_tree_lookup_slot
@ -194,8 +224,8 @@ void *radix_tree_delete(struct radix_tree_root *, unsigned long);
unsigned int unsigned int
radix_tree_gang_lookup(struct radix_tree_root *root, void **results, radix_tree_gang_lookup(struct radix_tree_root *root, void **results,
unsigned long first_index, unsigned int max_items); unsigned long first_index, unsigned int max_items);
unsigned int unsigned int radix_tree_gang_lookup_slot(struct radix_tree_root *root,
radix_tree_gang_lookup_slot(struct radix_tree_root *root, void ***results, void ***results, unsigned long *indices,
unsigned long first_index, unsigned int max_items); unsigned long first_index, unsigned int max_items);
unsigned long radix_tree_next_hole(struct radix_tree_root *root, unsigned long radix_tree_next_hole(struct radix_tree_root *root,
unsigned long index, unsigned long max_scan); unsigned long index, unsigned long max_scan);

View File

@ -823,8 +823,8 @@ unsigned long radix_tree_prev_hole(struct radix_tree_root *root,
EXPORT_SYMBOL(radix_tree_prev_hole); EXPORT_SYMBOL(radix_tree_prev_hole);
static unsigned int static unsigned int
__lookup(struct radix_tree_node *slot, void ***results, unsigned long index, __lookup(struct radix_tree_node *slot, void ***results, unsigned long *indices,
unsigned int max_items, unsigned long *next_index) unsigned long index, unsigned int max_items, unsigned long *next_index)
{ {
unsigned int nr_found = 0; unsigned int nr_found = 0;
unsigned int shift, height; unsigned int shift, height;
@ -857,12 +857,16 @@ __lookup(struct radix_tree_node *slot, void ***results, unsigned long index,
/* Bottom level: grab some items */ /* Bottom level: grab some items */
for (i = index & RADIX_TREE_MAP_MASK; i < RADIX_TREE_MAP_SIZE; i++) { for (i = index & RADIX_TREE_MAP_MASK; i < RADIX_TREE_MAP_SIZE; i++) {
index++;
if (slot->slots[i]) { if (slot->slots[i]) {
results[nr_found++] = &(slot->slots[i]); results[nr_found] = &(slot->slots[i]);
if (nr_found == max_items) if (indices)
indices[nr_found] = index;
if (++nr_found == max_items) {
index++;
goto out; goto out;
}
} }
index++;
} }
out: out:
*next_index = index; *next_index = index;
@ -918,8 +922,8 @@ radix_tree_gang_lookup(struct radix_tree_root *root, void **results,
if (cur_index > max_index) if (cur_index > max_index)
break; break;
slots_found = __lookup(node, (void ***)results + ret, cur_index, slots_found = __lookup(node, (void ***)results + ret, NULL,
max_items - ret, &next_index); cur_index, max_items - ret, &next_index);
nr_found = 0; nr_found = 0;
for (i = 0; i < slots_found; i++) { for (i = 0; i < slots_found; i++) {
struct radix_tree_node *slot; struct radix_tree_node *slot;
@ -944,6 +948,7 @@ EXPORT_SYMBOL(radix_tree_gang_lookup);
* radix_tree_gang_lookup_slot - perform multiple slot lookup on radix tree * radix_tree_gang_lookup_slot - perform multiple slot lookup on radix tree
* @root: radix tree root * @root: radix tree root
* @results: where the results of the lookup are placed * @results: where the results of the lookup are placed
* @indices: where their indices should be placed (but usually NULL)
* @first_index: start the lookup from this key * @first_index: start the lookup from this key
* @max_items: place up to this many items at *results * @max_items: place up to this many items at *results
* *
@ -958,7 +963,8 @@ EXPORT_SYMBOL(radix_tree_gang_lookup);
* protection, radix_tree_deref_slot may fail requiring a retry. * protection, radix_tree_deref_slot may fail requiring a retry.
*/ */
unsigned int unsigned int
radix_tree_gang_lookup_slot(struct radix_tree_root *root, void ***results, radix_tree_gang_lookup_slot(struct radix_tree_root *root,
void ***results, unsigned long *indices,
unsigned long first_index, unsigned int max_items) unsigned long first_index, unsigned int max_items)
{ {
unsigned long max_index; unsigned long max_index;
@ -974,6 +980,8 @@ radix_tree_gang_lookup_slot(struct radix_tree_root *root, void ***results,
if (first_index > 0) if (first_index > 0)
return 0; return 0;
results[0] = (void **)&root->rnode; results[0] = (void **)&root->rnode;
if (indices)
indices[0] = 0;
return 1; return 1;
} }
node = indirect_to_ptr(node); node = indirect_to_ptr(node);
@ -987,8 +995,9 @@ radix_tree_gang_lookup_slot(struct radix_tree_root *root, void ***results,
if (cur_index > max_index) if (cur_index > max_index)
break; break;
slots_found = __lookup(node, results + ret, cur_index, slots_found = __lookup(node, results + ret,
max_items - ret, &next_index); indices ? indices + ret : NULL,
cur_index, max_items - ret, &next_index);
ret += slots_found; ret += slots_found;
if (next_index == 0) if (next_index == 0)
break; break;

View File

@ -840,7 +840,7 @@ unsigned find_get_pages(struct address_space *mapping, pgoff_t start,
rcu_read_lock(); rcu_read_lock();
restart: restart:
nr_found = radix_tree_gang_lookup_slot(&mapping->page_tree, nr_found = radix_tree_gang_lookup_slot(&mapping->page_tree,
(void ***)pages, start, nr_pages); (void ***)pages, NULL, start, nr_pages);
ret = 0; ret = 0;
for (i = 0; i < nr_found; i++) { for (i = 0; i < nr_found; i++) {
struct page *page; struct page *page;
@ -903,7 +903,7 @@ unsigned find_get_pages_contig(struct address_space *mapping, pgoff_t index,
rcu_read_lock(); rcu_read_lock();
restart: restart:
nr_found = radix_tree_gang_lookup_slot(&mapping->page_tree, nr_found = radix_tree_gang_lookup_slot(&mapping->page_tree,
(void ***)pages, index, nr_pages); (void ***)pages, NULL, index, nr_pages);
ret = 0; ret = 0;
for (i = 0; i < nr_found; i++) { for (i = 0; i < nr_found; i++) {
struct page *page; struct page *page;