From 5e4def20381678ba3ce0a4e117f97e378ecd81bc Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 2 Nov 2017 15:27:44 +0000 Subject: [PATCH 01/35] Pass mode to wait_on_atomic_t() action funcs and provide default actions Make wait_on_atomic_t() pass the TASK_* mode onto its action function as an extra argument and make it 'unsigned int throughout. Also, consolidate a bunch of identical action functions into a default function that can do the appropriate thing for the mode. Also, change the argument name in the bit_wait*() function declarations to reflect the fact that it's the mode and not the bit number. [Peter Z gives this a grudging ACK, but thinks that the whole atomic_t wait should be done differently, though he's not immediately sure as to how] Signed-off-by: David Howells Acked-by: Peter Zijlstra cc: Ingo Molnar --- arch/mips/kernel/traps.c | 14 +--------- drivers/gpu/drm/drm_dp_aux_dev.c | 8 +----- .../drm/i915/selftests/intel_breadcrumbs.c | 10 ++----- drivers/media/platform/qcom/venus/hfi.c | 8 +----- fs/afs/rxrpc.c | 8 +----- fs/btrfs/extent-tree.c | 27 +++---------------- fs/fscache/cookie.c | 2 +- fs/fscache/internal.h | 2 -- fs/fscache/main.c | 9 ------- fs/nfs/inode.c | 4 +-- fs/nfs/internal.h | 2 +- fs/ocfs2/filecheck.c | 8 +----- include/linux/wait_bit.h | 15 ++++++----- kernel/sched/wait_bit.c | 18 ++++++++++--- 14 files changed, 37 insertions(+), 98 deletions(-) diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c index 5669d3b8bd38..5d19ed07e99d 100644 --- a/arch/mips/kernel/traps.c +++ b/arch/mips/kernel/traps.c @@ -1233,18 +1233,6 @@ static int default_cu2_call(struct notifier_block *nfb, unsigned long action, return NOTIFY_OK; } -static int wait_on_fp_mode_switch(atomic_t *p) -{ - /* - * The FP mode for this task is currently being switched. That may - * involve modifications to the format of this tasks FP context which - * make it unsafe to proceed with execution for the moment. Instead, - * schedule some other task. - */ - schedule(); - return 0; -} - static int enable_restore_fp_context(int msa) { int err, was_fpu_owner, prior_msa; @@ -1254,7 +1242,7 @@ static int enable_restore_fp_context(int msa) * complete before proceeding. */ wait_on_atomic_t(¤t->mm->context.fp_mode_switching, - wait_on_fp_mode_switch, TASK_KILLABLE); + atomic_t_wait, TASK_KILLABLE); if (!used_math()) { /* First time FP context user. */ diff --git a/drivers/gpu/drm/drm_dp_aux_dev.c b/drivers/gpu/drm/drm_dp_aux_dev.c index d34e5096887a..053044201e31 100644 --- a/drivers/gpu/drm/drm_dp_aux_dev.c +++ b/drivers/gpu/drm/drm_dp_aux_dev.c @@ -263,12 +263,6 @@ static struct drm_dp_aux_dev *drm_dp_aux_dev_get_by_aux(struct drm_dp_aux *aux) return aux_dev; } -static int auxdev_wait_atomic_t(atomic_t *p) -{ - schedule(); - return 0; -} - void drm_dp_aux_unregister_devnode(struct drm_dp_aux *aux) { struct drm_dp_aux_dev *aux_dev; @@ -283,7 +277,7 @@ void drm_dp_aux_unregister_devnode(struct drm_dp_aux *aux) mutex_unlock(&aux_idr_mutex); atomic_dec(&aux_dev->usecount); - wait_on_atomic_t(&aux_dev->usecount, auxdev_wait_atomic_t, + wait_on_atomic_t(&aux_dev->usecount, atomic_t_wait, TASK_UNINTERRUPTIBLE); minor = aux_dev->index; diff --git a/drivers/gpu/drm/i915/selftests/intel_breadcrumbs.c b/drivers/gpu/drm/i915/selftests/intel_breadcrumbs.c index 828904b7d468..54fc571b1102 100644 --- a/drivers/gpu/drm/i915/selftests/intel_breadcrumbs.c +++ b/drivers/gpu/drm/i915/selftests/intel_breadcrumbs.c @@ -271,13 +271,7 @@ struct igt_wakeup { u32 seqno; }; -static int wait_atomic(atomic_t *p) -{ - schedule(); - return 0; -} - -static int wait_atomic_timeout(atomic_t *p) +static int wait_atomic_timeout(atomic_t *p, unsigned int mode) { return schedule_timeout(10 * HZ) ? 0 : -ETIMEDOUT; } @@ -348,7 +342,7 @@ static void igt_wake_all_sync(atomic_t *ready, atomic_set(ready, 0); wake_up_all(wq); - wait_on_atomic_t(set, wait_atomic, TASK_UNINTERRUPTIBLE); + wait_on_atomic_t(set, atomic_t_wait, TASK_UNINTERRUPTIBLE); atomic_set(ready, count); atomic_set(done, count); } diff --git a/drivers/media/platform/qcom/venus/hfi.c b/drivers/media/platform/qcom/venus/hfi.c index c09490876516..e374c7d1a618 100644 --- a/drivers/media/platform/qcom/venus/hfi.c +++ b/drivers/media/platform/qcom/venus/hfi.c @@ -88,12 +88,6 @@ unlock: return ret; } -static int core_deinit_wait_atomic_t(atomic_t *p) -{ - schedule(); - return 0; -} - int hfi_core_deinit(struct venus_core *core, bool blocking) { int ret = 0, empty; @@ -112,7 +106,7 @@ int hfi_core_deinit(struct venus_core *core, bool blocking) if (!empty) { mutex_unlock(&core->lock); - wait_on_atomic_t(&core->insts_count, core_deinit_wait_atomic_t, + wait_on_atomic_t(&core->insts_count, atomic_t_wait, TASK_UNINTERRUPTIBLE); mutex_lock(&core->lock); } diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c index bb1e2caa1720..77f5420a1a24 100644 --- a/fs/afs/rxrpc.c +++ b/fs/afs/rxrpc.c @@ -41,12 +41,6 @@ static void afs_charge_preallocation(struct work_struct *); static DECLARE_WORK(afs_charge_preallocation_work, afs_charge_preallocation); -static int afs_wait_atomic_t(atomic_t *p) -{ - schedule(); - return 0; -} - /* * open an RxRPC socket and bind it to be a server for callback notifications * - the socket is left in blocking mode and non-blocking ops use MSG_DONTWAIT @@ -121,7 +115,7 @@ void afs_close_socket(void) } _debug("outstanding %u", atomic_read(&afs_outstanding_calls)); - wait_on_atomic_t(&afs_outstanding_calls, afs_wait_atomic_t, + wait_on_atomic_t(&afs_outstanding_calls, atomic_t_wait, TASK_UNINTERRUPTIBLE); _debug("no outstanding calls"); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index e2d7e86b51d1..24cefde30e30 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -4016,16 +4016,9 @@ void btrfs_dec_nocow_writers(struct btrfs_fs_info *fs_info, u64 bytenr) btrfs_put_block_group(bg); } -static int btrfs_wait_nocow_writers_atomic_t(atomic_t *a) -{ - schedule(); - return 0; -} - void btrfs_wait_nocow_writers(struct btrfs_block_group_cache *bg) { - wait_on_atomic_t(&bg->nocow_writers, - btrfs_wait_nocow_writers_atomic_t, + wait_on_atomic_t(&bg->nocow_writers, atomic_t_wait, TASK_UNINTERRUPTIBLE); } @@ -6595,12 +6588,6 @@ void btrfs_dec_block_group_reservations(struct btrfs_fs_info *fs_info, btrfs_put_block_group(bg); } -static int btrfs_wait_bg_reservations_atomic_t(atomic_t *a) -{ - schedule(); - return 0; -} - void btrfs_wait_block_group_reservations(struct btrfs_block_group_cache *bg) { struct btrfs_space_info *space_info = bg->space_info; @@ -6623,8 +6610,7 @@ void btrfs_wait_block_group_reservations(struct btrfs_block_group_cache *bg) down_write(&space_info->groups_sem); up_write(&space_info->groups_sem); - wait_on_atomic_t(&bg->reservations, - btrfs_wait_bg_reservations_atomic_t, + wait_on_atomic_t(&bg->reservations, atomic_t_wait, TASK_UNINTERRUPTIBLE); } @@ -11106,12 +11092,6 @@ int btrfs_start_write_no_snapshotting(struct btrfs_root *root) return 1; } -static int wait_snapshotting_atomic_t(atomic_t *a) -{ - schedule(); - return 0; -} - void btrfs_wait_for_snapshot_creation(struct btrfs_root *root) { while (true) { @@ -11120,8 +11100,7 @@ void btrfs_wait_for_snapshot_creation(struct btrfs_root *root) ret = btrfs_start_write_no_snapshotting(root); if (ret) break; - wait_on_atomic_t(&root->will_be_snapshotted, - wait_snapshotting_atomic_t, + wait_on_atomic_t(&root->will_be_snapshotted, atomic_t_wait, TASK_UNINTERRUPTIBLE); } } diff --git a/fs/fscache/cookie.c b/fs/fscache/cookie.c index 40d61077bead..ff84258132bb 100644 --- a/fs/fscache/cookie.c +++ b/fs/fscache/cookie.c @@ -558,7 +558,7 @@ void __fscache_disable_cookie(struct fscache_cookie *cookie, bool invalidate) * have completed. */ if (!atomic_dec_and_test(&cookie->n_active)) - wait_on_atomic_t(&cookie->n_active, fscache_wait_atomic_t, + wait_on_atomic_t(&cookie->n_active, atomic_t_wait, TASK_UNINTERRUPTIBLE); /* Make sure any pending writes are cancelled. */ diff --git a/fs/fscache/internal.h b/fs/fscache/internal.h index 97ec45110957..0ff4b49a0037 100644 --- a/fs/fscache/internal.h +++ b/fs/fscache/internal.h @@ -97,8 +97,6 @@ static inline bool fscache_object_congested(void) return workqueue_congested(WORK_CPU_UNBOUND, fscache_object_wq); } -extern int fscache_wait_atomic_t(atomic_t *); - /* * object.c */ diff --git a/fs/fscache/main.c b/fs/fscache/main.c index b39d487ccfb0..249968dcbf5c 100644 --- a/fs/fscache/main.c +++ b/fs/fscache/main.c @@ -195,12 +195,3 @@ static void __exit fscache_exit(void) } module_exit(fscache_exit); - -/* - * wait_on_atomic_t() sleep function for uninterruptible waiting - */ -int fscache_wait_atomic_t(atomic_t *p) -{ - schedule(); - return 0; -} diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 134d9f560240..1629056aa2c9 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -85,9 +85,9 @@ int nfs_wait_bit_killable(struct wait_bit_key *key, int mode) } EXPORT_SYMBOL_GPL(nfs_wait_bit_killable); -int nfs_wait_atomic_killable(atomic_t *p) +int nfs_wait_atomic_killable(atomic_t *p, unsigned int mode) { - return nfs_wait_killable(TASK_KILLABLE); + return nfs_wait_killable(mode); } /** diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index f9a4a5524bd5..5ab17fd4700a 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -388,7 +388,7 @@ extern void nfs_evict_inode(struct inode *); void nfs_zap_acl_cache(struct inode *inode); extern bool nfs_check_cache_invalid(struct inode *, unsigned long); extern int nfs_wait_bit_killable(struct wait_bit_key *key, int mode); -extern int nfs_wait_atomic_killable(atomic_t *p); +extern int nfs_wait_atomic_killable(atomic_t *p, unsigned int mode); /* super.c */ extern const struct super_operations nfs_sops; diff --git a/fs/ocfs2/filecheck.c b/fs/ocfs2/filecheck.c index 2cabbcf2f28e..e87279e49ba3 100644 --- a/fs/ocfs2/filecheck.c +++ b/fs/ocfs2/filecheck.c @@ -129,19 +129,13 @@ static struct kobj_attribute ocfs2_attr_filecheck_set = ocfs2_filecheck_show, ocfs2_filecheck_store); -static int ocfs2_filecheck_sysfs_wait(atomic_t *p) -{ - schedule(); - return 0; -} - static void ocfs2_filecheck_sysfs_free(struct ocfs2_filecheck_sysfs_entry *entry) { struct ocfs2_filecheck_entry *p; if (!atomic_dec_and_test(&entry->fs_count)) - wait_on_atomic_t(&entry->fs_count, ocfs2_filecheck_sysfs_wait, + wait_on_atomic_t(&entry->fs_count, atomic_t_wait, TASK_UNINTERRUPTIBLE); spin_lock(&entry->fs_fcheck->fc_lock); diff --git a/include/linux/wait_bit.h b/include/linux/wait_bit.h index af0d495430d7..61b39eaf7cad 100644 --- a/include/linux/wait_bit.h +++ b/include/linux/wait_bit.h @@ -26,6 +26,8 @@ struct wait_bit_queue_entry { { .flags = p, .bit_nr = WAIT_ATOMIC_T_BIT_NR, } typedef int wait_bit_action_f(struct wait_bit_key *key, int mode); +typedef int wait_atomic_t_action_f(atomic_t *counter, unsigned int mode); + void __wake_up_bit(struct wait_queue_head *wq_head, void *word, int bit); int __wait_on_bit(struct wait_queue_head *wq_head, struct wait_bit_queue_entry *wbq_entry, wait_bit_action_f *action, unsigned int mode); int __wait_on_bit_lock(struct wait_queue_head *wq_head, struct wait_bit_queue_entry *wbq_entry, wait_bit_action_f *action, unsigned int mode); @@ -34,7 +36,7 @@ void wake_up_atomic_t(atomic_t *p); int out_of_line_wait_on_bit(void *word, int, wait_bit_action_f *action, unsigned int mode); int out_of_line_wait_on_bit_timeout(void *word, int, wait_bit_action_f *action, unsigned int mode, unsigned long timeout); int out_of_line_wait_on_bit_lock(void *word, int, wait_bit_action_f *action, unsigned int mode); -int out_of_line_wait_on_atomic_t(atomic_t *p, int (*)(atomic_t *), unsigned int mode); +int out_of_line_wait_on_atomic_t(atomic_t *p, wait_atomic_t_action_f action, unsigned int mode); struct wait_queue_head *bit_waitqueue(void *word, int bit); extern void __init wait_bit_init(void); @@ -51,10 +53,11 @@ int wake_bit_function(struct wait_queue_entry *wq_entry, unsigned mode, int sync }, \ } -extern int bit_wait(struct wait_bit_key *key, int bit); -extern int bit_wait_io(struct wait_bit_key *key, int bit); -extern int bit_wait_timeout(struct wait_bit_key *key, int bit); -extern int bit_wait_io_timeout(struct wait_bit_key *key, int bit); +extern int bit_wait(struct wait_bit_key *key, int mode); +extern int bit_wait_io(struct wait_bit_key *key, int mode); +extern int bit_wait_timeout(struct wait_bit_key *key, int mode); +extern int bit_wait_io_timeout(struct wait_bit_key *key, int mode); +extern int atomic_t_wait(atomic_t *counter, unsigned int mode); /** * wait_on_bit - wait for a bit to be cleared @@ -251,7 +254,7 @@ wait_on_bit_lock_action(unsigned long *word, int bit, wait_bit_action_f *action, * outside of the target 'word'. */ static inline -int wait_on_atomic_t(atomic_t *val, int (*action)(atomic_t *), unsigned mode) +int wait_on_atomic_t(atomic_t *val, wait_atomic_t_action_f action, unsigned mode) { might_sleep(); if (atomic_read(val) == 0) diff --git a/kernel/sched/wait_bit.c b/kernel/sched/wait_bit.c index f8159698aa4d..84cb3acd9260 100644 --- a/kernel/sched/wait_bit.c +++ b/kernel/sched/wait_bit.c @@ -183,7 +183,7 @@ static int wake_atomic_t_function(struct wait_queue_entry *wq_entry, unsigned mo */ static __sched int __wait_on_atomic_t(struct wait_queue_head *wq_head, struct wait_bit_queue_entry *wbq_entry, - int (*action)(atomic_t *), unsigned mode) + wait_atomic_t_action_f action, unsigned int mode) { atomic_t *val; int ret = 0; @@ -193,7 +193,7 @@ int __wait_on_atomic_t(struct wait_queue_head *wq_head, struct wait_bit_queue_en val = wbq_entry->key.flags; if (atomic_read(val) == 0) break; - ret = (*action)(val); + ret = (*action)(val, mode); } while (!ret && atomic_read(val) != 0); finish_wait(wq_head, &wbq_entry->wq_entry); return ret; @@ -210,8 +210,9 @@ int __wait_on_atomic_t(struct wait_queue_head *wq_head, struct wait_bit_queue_en }, \ } -__sched int out_of_line_wait_on_atomic_t(atomic_t *p, int (*action)(atomic_t *), - unsigned mode) +__sched int out_of_line_wait_on_atomic_t(atomic_t *p, + wait_atomic_t_action_f action, + unsigned int mode) { struct wait_queue_head *wq_head = atomic_t_waitqueue(p); DEFINE_WAIT_ATOMIC_T(wq_entry, p); @@ -220,6 +221,15 @@ __sched int out_of_line_wait_on_atomic_t(atomic_t *p, int (*action)(atomic_t *), } EXPORT_SYMBOL(out_of_line_wait_on_atomic_t); +__sched int atomic_t_wait(atomic_t *counter, unsigned int mode) +{ + schedule(); + if (signal_pending_state(mode, current)) + return -EINTR; + return 0; +} +EXPORT_SYMBOL(atomic_t_wait); + /** * wake_up_atomic_t - Wake up a waiter on a atomic_t * @p: The atomic_t being waited on, a kernel virtual address From f044c8847bb61eff5e1e95b6f6bb950e7f4a73a4 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 2 Nov 2017 15:27:45 +0000 Subject: [PATCH 02/35] afs: Lay the groundwork for supporting network namespaces Lay the groundwork for supporting network namespaces (netns) to the AFS filesystem by moving various global features to a network-namespace struct (afs_net) and providing an instance of this as a temporary global variable that everything uses via accessor functions for the moment. The following changes have been made: (1) Store the netns in the superblock info. This will be obtained from the mounter's nsproxy on a manual mount and inherited from the parent superblock on an automount. (2) The cell list is made per-netns. It can be viewed through /proc/net/afs/cells and also be modified by writing commands to that file. (3) The local workstation cell is set per-ns in /proc/net/afs/rootcell. This is unset by default. (4) The 'rootcell' module parameter, which sets a cell and VL server list modifies the init net namespace, thereby allowing an AFS root fs to be theoretically used. (5) The volume location lists and the file lock manager are made per-netns. (6) The AF_RXRPC socket and associated I/O bits are made per-ns. The various workqueues remain global for the moment. Changes still to be made: (1) /proc/fs/afs/ should be moved to /proc/net/afs/ and a symlink emplaced from the old name. (2) A per-netns subsys needs to be registered for AFS into which it can store its per-netns data. (3) Rather than the AF_RXRPC socket being opened on module init, it needs to be opened on the creation of a superblock in that netns. (4) The socket needs to be closed when the last superblock using it is destroyed and all outstanding client calls on it have been completed. This prevents a reference loop on the namespace. (5) It is possible that several namespaces will want to use AFS, in which case each one will need its own UDP port. These can either be set through /proc/net/afs/cm_port or the kernel can pick one at random. The init_ns gets 7001 by default. Other issues that need resolving: (1) The DNS keyring needs net-namespacing. (2) Where do upcalls go (eg. DNS request-key upcall)? (3) Need something like open_socket_in_file_ns() syscall so that AFS command line tools attempting to operate on an AFS file/volume have their RPC calls go to the right place. Signed-off-by: David Howells --- fs/afs/afs.h | 9 ++ fs/afs/callback.c | 24 +----- fs/afs/cell.c | 130 ++++++++++++++--------------- fs/afs/cmservice.c | 26 +++--- fs/afs/flock.c | 39 +-------- fs/afs/fsclient.c | 56 ++++++++----- fs/afs/internal.h | 163 +++++++++++++++++++++++++++---------- fs/afs/main.c | 153 +++++++++++++++++++++++----------- fs/afs/proc.c | 64 +++++++++------ fs/afs/rxrpc.c | 132 +++++++++++++++--------------- fs/afs/server.c | 82 +++++++++---------- fs/afs/super.c | 45 +++++----- fs/afs/vlclient.c | 10 ++- fs/afs/vlocation.c | 151 ++++++++++++++++------------------ fs/afs/volume.c | 10 +-- include/uapi/linux/magic.h | 1 + 16 files changed, 603 insertions(+), 492 deletions(-) diff --git a/fs/afs/afs.h b/fs/afs/afs.h index 3c462ff6db63..93053115bcfc 100644 --- a/fs/afs/afs.h +++ b/fs/afs/afs.h @@ -72,6 +72,15 @@ struct afs_callback { #define AFSCBMAX 50 /* maximum callbacks transferred per bulk op */ +struct afs_uuid { + __be32 time_low; /* low part of timestamp */ + __be16 time_mid; /* mid part of timestamp */ + __be16 time_hi_and_version; /* high part of timestamp and version */ + __u8 clock_seq_hi_and_reserved; /* clock seq hi and variant */ + __u8 clock_seq_low; /* clock seq low */ + __u8 node[6]; /* spatially unique node ID (MAC addr) */ +}; + /* * AFS volume information */ diff --git a/fs/afs/callback.c b/fs/afs/callback.c index 25d404d22cae..d12dffb76b67 100644 --- a/fs/afs/callback.c +++ b/fs/afs/callback.c @@ -28,9 +28,7 @@ unsigned afs_vnode_update_timeout = 10; CIRC_SPACE((server)->cb_break_head, (server)->cb_break_tail, \ ARRAY_SIZE((server)->cb_break)) -//static void afs_callback_updater(struct work_struct *); - -static struct workqueue_struct *afs_callback_update_worker; +struct workqueue_struct *afs_callback_update_worker; /* * allow the fileserver to request callback state (re-)initialisation @@ -343,7 +341,7 @@ void afs_dispatch_give_up_callbacks(struct work_struct *work) * had callbacks entirely, and the server will call us later to break * them */ - afs_fs_give_up_callbacks(server, true); + afs_fs_give_up_callbacks(server->cell->net, server, true); } /* @@ -456,21 +454,3 @@ static void afs_callback_updater(struct work_struct *work) afs_put_vnode(vl); } #endif - -/* - * initialise the callback update process - */ -int __init afs_callback_update_init(void) -{ - afs_callback_update_worker = alloc_ordered_workqueue("kafs_callbackd", - WQ_MEM_RECLAIM); - return afs_callback_update_worker ? 0 : -ENOMEM; -} - -/* - * shut down the callback update process - */ -void afs_callback_update_kill(void) -{ - destroy_workqueue(afs_callback_update_worker); -} diff --git a/fs/afs/cell.c b/fs/afs/cell.c index ca0a3cf93791..bd570fa539a0 100644 --- a/fs/afs/cell.c +++ b/fs/afs/cell.c @@ -18,20 +18,12 @@ #include #include "internal.h" -DECLARE_RWSEM(afs_proc_cells_sem); -LIST_HEAD(afs_proc_cells); - -static LIST_HEAD(afs_cells); -static DEFINE_RWLOCK(afs_cells_lock); -static DECLARE_RWSEM(afs_cells_sem); /* add/remove serialisation */ -static DECLARE_WAIT_QUEUE_HEAD(afs_cells_freeable_wq); -static struct afs_cell *afs_cell_root; - /* * allocate a cell record and fill in its name, VL server address list and * allocate an anonymous key */ -static struct afs_cell *afs_cell_alloc(const char *name, unsigned namelen, +static struct afs_cell *afs_cell_alloc(struct afs_net *net, + const char *name, unsigned namelen, char *vllist) { struct afs_cell *cell; @@ -62,6 +54,7 @@ static struct afs_cell *afs_cell_alloc(const char *name, unsigned namelen, atomic_set(&cell->usage, 1); INIT_LIST_HEAD(&cell->link); + cell->net = net; rwlock_init(&cell->servers_lock); INIT_LIST_HEAD(&cell->servers); init_rwsem(&cell->vl_sem); @@ -142,12 +135,14 @@ error: /* * afs_cell_crate() - create a cell record + * @net: The network namespace * @name: is the name of the cell. * @namsesz: is the strlen of the cell name. * @vllist: is a colon separated list of IP addresses in "a.b.c.d" format. * @retref: is T to return the cell reference when the cell exists. */ -struct afs_cell *afs_cell_create(const char *name, unsigned namesz, +struct afs_cell *afs_cell_create(struct afs_net *net, + const char *name, unsigned namesz, char *vllist, bool retref) { struct afs_cell *cell; @@ -155,23 +150,23 @@ struct afs_cell *afs_cell_create(const char *name, unsigned namesz, _enter("%*.*s,%s", namesz, namesz, name ?: "", vllist); - down_write(&afs_cells_sem); - read_lock(&afs_cells_lock); - list_for_each_entry(cell, &afs_cells, link) { + down_write(&net->cells_sem); + read_lock(&net->cells_lock); + list_for_each_entry(cell, &net->cells, link) { if (strncasecmp(cell->name, name, namesz) == 0) goto duplicate_name; } - read_unlock(&afs_cells_lock); + read_unlock(&net->cells_lock); - cell = afs_cell_alloc(name, namesz, vllist); + cell = afs_cell_alloc(net, name, namesz, vllist); if (IS_ERR(cell)) { _leave(" = %ld", PTR_ERR(cell)); - up_write(&afs_cells_sem); + up_write(&net->cells_sem); return cell; } /* add a proc directory for this cell */ - ret = afs_proc_cell_setup(cell); + ret = afs_proc_cell_setup(net, cell); if (ret < 0) goto error; @@ -183,20 +178,20 @@ struct afs_cell *afs_cell_create(const char *name, unsigned namesz, #endif /* add to the cell lists */ - write_lock(&afs_cells_lock); - list_add_tail(&cell->link, &afs_cells); - write_unlock(&afs_cells_lock); + write_lock(&net->cells_lock); + list_add_tail(&cell->link, &net->cells); + write_unlock(&net->cells_lock); - down_write(&afs_proc_cells_sem); - list_add_tail(&cell->proc_link, &afs_proc_cells); - up_write(&afs_proc_cells_sem); - up_write(&afs_cells_sem); + down_write(&net->proc_cells_sem); + list_add_tail(&cell->proc_link, &net->proc_cells); + up_write(&net->proc_cells_sem); + up_write(&net->cells_sem); _leave(" = %p", cell); return cell; error: - up_write(&afs_cells_sem); + up_write(&net->cells_sem); key_put(cell->anonymous_key); kfree(cell); _leave(" = %d", ret); @@ -206,8 +201,8 @@ duplicate_name: if (retref && !IS_ERR(cell)) afs_get_cell(cell); - read_unlock(&afs_cells_lock); - up_write(&afs_cells_sem); + read_unlock(&net->cells_lock); + up_write(&net->cells_sem); if (retref) { _leave(" = %p", cell); @@ -223,7 +218,7 @@ duplicate_name: * - can be called with a module parameter string * - can be called from a write to /proc/fs/afs/rootcell */ -int afs_cell_init(char *rootcell) +int afs_cell_init(struct afs_net *net, char *rootcell) { struct afs_cell *old_root, *new_root; char *cp; @@ -245,17 +240,17 @@ int afs_cell_init(char *rootcell) *cp++ = 0; /* allocate a cell record for the root cell */ - new_root = afs_cell_create(rootcell, strlen(rootcell), cp, false); + new_root = afs_cell_create(net, rootcell, strlen(rootcell), cp, false); if (IS_ERR(new_root)) { _leave(" = %ld", PTR_ERR(new_root)); return PTR_ERR(new_root); } /* install the new cell */ - write_lock(&afs_cells_lock); - old_root = afs_cell_root; - afs_cell_root = new_root; - write_unlock(&afs_cells_lock); + write_lock(&net->cells_lock); + old_root = net->ws_cell; + net->ws_cell = new_root; + write_unlock(&net->cells_lock); afs_put_cell(old_root); _leave(" = 0"); @@ -265,19 +260,20 @@ int afs_cell_init(char *rootcell) /* * lookup a cell record */ -struct afs_cell *afs_cell_lookup(const char *name, unsigned namesz, +struct afs_cell *afs_cell_lookup(struct afs_net *net, + const char *name, unsigned namesz, bool dns_cell) { struct afs_cell *cell; _enter("\"%*.*s\",", namesz, namesz, name ?: ""); - down_read(&afs_cells_sem); - read_lock(&afs_cells_lock); + down_read(&net->cells_sem); + read_lock(&net->cells_lock); if (name) { /* if the cell was named, look for it in the cell record list */ - list_for_each_entry(cell, &afs_cells, link) { + list_for_each_entry(cell, &net->cells, link) { if (strncmp(cell->name, name, namesz) == 0) { afs_get_cell(cell); goto found; @@ -289,7 +285,7 @@ struct afs_cell *afs_cell_lookup(const char *name, unsigned namesz, found: ; } else { - cell = afs_cell_root; + cell = net->ws_cell; if (!cell) { /* this should not happen unless user tries to mount * when root cell is not set. Return an impossibly @@ -304,16 +300,16 @@ struct afs_cell *afs_cell_lookup(const char *name, unsigned namesz, } - read_unlock(&afs_cells_lock); - up_read(&afs_cells_sem); + read_unlock(&net->cells_lock); + up_read(&net->cells_sem); _leave(" = %p", cell); return cell; create_cell: - read_unlock(&afs_cells_lock); - up_read(&afs_cells_sem); + read_unlock(&net->cells_lock); + up_read(&net->cells_sem); - cell = afs_cell_create(name, namesz, NULL, true); + cell = afs_cell_create(net, name, namesz, NULL, true); _leave(" = %p", cell); return cell; @@ -325,14 +321,14 @@ create_cell: */ struct afs_cell *afs_get_cell_maybe(struct afs_cell *cell) { - write_lock(&afs_cells_lock); + write_lock(&net->cells_lock); if (cell && !list_empty(&cell->link)) afs_get_cell(cell); else cell = NULL; - write_unlock(&afs_cells_lock); + write_unlock(&net->cells_lock); return cell; } #endif /* 0 */ @@ -351,10 +347,10 @@ void afs_put_cell(struct afs_cell *cell) /* to prevent a race, the decrement and the dequeue must be effectively * atomic */ - write_lock(&afs_cells_lock); + write_lock(&cell->net->cells_lock); if (likely(!atomic_dec_and_test(&cell->usage))) { - write_unlock(&afs_cells_lock); + write_unlock(&cell->net->cells_lock); _leave(""); return; } @@ -362,19 +358,19 @@ void afs_put_cell(struct afs_cell *cell) ASSERT(list_empty(&cell->servers)); ASSERT(list_empty(&cell->vl_list)); - write_unlock(&afs_cells_lock); + wake_up(&cell->net->cells_freeable_wq); - wake_up(&afs_cells_freeable_wq); + write_unlock(&cell->net->cells_lock); _leave(" [unused]"); } /* * destroy a cell record - * - must be called with the afs_cells_sem write-locked + * - must be called with the net->cells_sem write-locked * - cell->link should have been broken by the caller */ -static void afs_cell_destroy(struct afs_cell *cell) +static void afs_cell_destroy(struct afs_net *net, struct afs_cell *cell) { _enter("%p{%d,%s}", cell, atomic_read(&cell->usage), cell->name); @@ -387,14 +383,14 @@ static void afs_cell_destroy(struct afs_cell *cell) _debug("wait for cell %s", cell->name); set_current_state(TASK_UNINTERRUPTIBLE); - add_wait_queue(&afs_cells_freeable_wq, &myself); + add_wait_queue(&net->cells_freeable_wq, &myself); while (atomic_read(&cell->usage) > 0) { schedule(); set_current_state(TASK_UNINTERRUPTIBLE); } - remove_wait_queue(&afs_cells_freeable_wq, &myself); + remove_wait_queue(&net->cells_freeable_wq, &myself); set_current_state(TASK_RUNNING); } @@ -403,11 +399,11 @@ static void afs_cell_destroy(struct afs_cell *cell) ASSERT(list_empty(&cell->servers)); ASSERT(list_empty(&cell->vl_list)); - afs_proc_cell_remove(cell); + afs_proc_cell_remove(net, cell); - down_write(&afs_proc_cells_sem); + down_write(&net->proc_cells_sem); list_del_init(&cell->proc_link); - up_write(&afs_proc_cells_sem); + up_write(&net->proc_cells_sem); #ifdef CONFIG_AFS_FSCACHE fscache_relinquish_cookie(cell->cache, 0); @@ -422,39 +418,39 @@ static void afs_cell_destroy(struct afs_cell *cell) * purge in-memory cell database on module unload or afs_init() failure * - the timeout daemon is stopped before calling this */ -void afs_cell_purge(void) +void afs_cell_purge(struct afs_net *net) { struct afs_cell *cell; _enter(""); - afs_put_cell(afs_cell_root); + afs_put_cell(net->ws_cell); - down_write(&afs_cells_sem); + down_write(&net->cells_sem); - while (!list_empty(&afs_cells)) { + while (!list_empty(&net->cells)) { cell = NULL; /* remove the next cell from the front of the list */ - write_lock(&afs_cells_lock); + write_lock(&net->cells_lock); - if (!list_empty(&afs_cells)) { - cell = list_entry(afs_cells.next, + if (!list_empty(&net->cells)) { + cell = list_entry(net->cells.next, struct afs_cell, link); list_del_init(&cell->link); } - write_unlock(&afs_cells_lock); + write_unlock(&net->cells_lock); if (cell) { _debug("PURGING CELL %s (%d)", cell->name, atomic_read(&cell->usage)); /* now the cell should be left with no references */ - afs_cell_destroy(cell); + afs_cell_destroy(net, cell); } } - up_write(&afs_cells_sem); + up_write(&net->cells_sem); _leave(""); } diff --git a/fs/afs/cmservice.c b/fs/afs/cmservice.c index 782d4d05a53b..30ce4be4165f 100644 --- a/fs/afs/cmservice.c +++ b/fs/afs/cmservice.c @@ -193,7 +193,7 @@ static int afs_deliver_cb_callback(struct afs_call *call) switch (call->unmarshall) { case 0: - rxrpc_kernel_get_peer(afs_socket, call->rxcall, &srx); + rxrpc_kernel_get_peer(call->net->socket, call->rxcall, &srx); call->offset = 0; call->unmarshall++; @@ -290,7 +290,7 @@ static int afs_deliver_cb_callback(struct afs_call *call) /* we'll need the file server record as that tells us which set of * vnodes to operate upon */ - server = afs_find_server(&srx); + server = afs_find_server(call->net, &srx); if (!server) return -ENOTCONN; call->server = server; @@ -324,7 +324,7 @@ static int afs_deliver_cb_init_call_back_state(struct afs_call *call) _enter(""); - rxrpc_kernel_get_peer(afs_socket, call->rxcall, &srx); + rxrpc_kernel_get_peer(call->net->socket, call->rxcall, &srx); ret = afs_extract_data(call, NULL, 0, false); if (ret < 0) @@ -335,7 +335,7 @@ static int afs_deliver_cb_init_call_back_state(struct afs_call *call) /* we'll need the file server record as that tells us which set of * vnodes to operate upon */ - server = afs_find_server(&srx); + server = afs_find_server(call->net, &srx); if (!server) return -ENOTCONN; call->server = server; @@ -357,7 +357,7 @@ static int afs_deliver_cb_init_call_back_state3(struct afs_call *call) _enter(""); - rxrpc_kernel_get_peer(afs_socket, call->rxcall, &srx); + rxrpc_kernel_get_peer(call->net->socket, call->rxcall, &srx); _enter("{%u}", call->unmarshall); @@ -407,7 +407,7 @@ static int afs_deliver_cb_init_call_back_state3(struct afs_call *call) /* we'll need the file server record as that tells us which set of * vnodes to operate upon */ - server = afs_find_server(&srx); + server = afs_find_server(call->net, &srx); if (!server) return -ENOTCONN; call->server = server; @@ -461,7 +461,7 @@ static void SRXAFSCB_ProbeUuid(struct work_struct *work) _enter(""); - if (memcmp(r, &afs_uuid, sizeof(afs_uuid)) == 0) + if (memcmp(r, &call->net->uuid, sizeof(call->net->uuid)) == 0) reply.match = htonl(0); else reply.match = htonl(1); @@ -568,13 +568,13 @@ static void SRXAFSCB_TellMeAboutYourself(struct work_struct *work) memset(&reply, 0, sizeof(reply)); reply.ia.nifs = htonl(nifs); - reply.ia.uuid[0] = afs_uuid.time_low; - reply.ia.uuid[1] = htonl(ntohs(afs_uuid.time_mid)); - reply.ia.uuid[2] = htonl(ntohs(afs_uuid.time_hi_and_version)); - reply.ia.uuid[3] = htonl((s8) afs_uuid.clock_seq_hi_and_reserved); - reply.ia.uuid[4] = htonl((s8) afs_uuid.clock_seq_low); + reply.ia.uuid[0] = call->net->uuid.time_low; + reply.ia.uuid[1] = htonl(ntohs(call->net->uuid.time_mid)); + reply.ia.uuid[2] = htonl(ntohs(call->net->uuid.time_hi_and_version)); + reply.ia.uuid[3] = htonl((s8) call->net->uuid.clock_seq_hi_and_reserved); + reply.ia.uuid[4] = htonl((s8) call->net->uuid.clock_seq_low); for (loop = 0; loop < 6; loop++) - reply.ia.uuid[loop + 5] = htonl((s8) afs_uuid.node[loop]); + reply.ia.uuid[loop + 5] = htonl((s8) call->net->uuid.node[loop]); if (ifs) { for (loop = 0; loop < nifs; loop++) { diff --git a/fs/afs/flock.c b/fs/afs/flock.c index 3191dff2c156..559ac00af5f7 100644 --- a/fs/afs/flock.c +++ b/fs/afs/flock.c @@ -14,47 +14,16 @@ #define AFS_LOCK_GRANTED 0 #define AFS_LOCK_PENDING 1 +struct workqueue_struct *afs_lock_manager; + static void afs_fl_copy_lock(struct file_lock *new, struct file_lock *fl); static void afs_fl_release_private(struct file_lock *fl); -static struct workqueue_struct *afs_lock_manager; -static DEFINE_MUTEX(afs_lock_manager_mutex); - static const struct file_lock_operations afs_lock_ops = { .fl_copy_lock = afs_fl_copy_lock, .fl_release_private = afs_fl_release_private, }; -/* - * initialise the lock manager thread if it isn't already running - */ -static int afs_init_lock_manager(void) -{ - int ret; - - ret = 0; - if (!afs_lock_manager) { - mutex_lock(&afs_lock_manager_mutex); - if (!afs_lock_manager) { - afs_lock_manager = alloc_workqueue("kafs_lockd", - WQ_MEM_RECLAIM, 0); - if (!afs_lock_manager) - ret = -ENOMEM; - } - mutex_unlock(&afs_lock_manager_mutex); - } - return ret; -} - -/* - * destroy the lock manager thread if it's running - */ -void __exit afs_kill_lock_manager(void) -{ - if (afs_lock_manager) - destroy_workqueue(afs_lock_manager); -} - /* * if the callback is broken on this vnode, then the lock may now be available */ @@ -264,10 +233,6 @@ static int afs_do_setlk(struct file *file, struct file_lock *fl) if (fl->fl_start != 0 || fl->fl_end != OFFSET_MAX) return -EINVAL; - ret = afs_init_lock_manager(); - if (ret < 0) - return ret; - fl->fl_ops = &afs_lock_ops; INIT_LIST_HEAD(&fl->fl_u.afs.link); fl->fl_u.afs.state = AFS_LOCK_PENDING; diff --git a/fs/afs/fsclient.c b/fs/afs/fsclient.c index 19f76ae36982..ce6f0159e1d4 100644 --- a/fs/afs/fsclient.c +++ b/fs/afs/fsclient.c @@ -284,12 +284,13 @@ int afs_fs_fetch_file_status(struct afs_server *server, bool async) { struct afs_call *call; + struct afs_net *net = afs_v2net(vnode); __be32 *bp; _enter(",%x,{%x:%u},,", key_serial(key), vnode->fid.vid, vnode->fid.vnode); - call = afs_alloc_flat_call(&afs_RXFSFetchStatus, 16, (21 + 3 + 6) * 4); + call = afs_alloc_flat_call(net, &afs_RXFSFetchStatus, 16, (21 + 3 + 6) * 4); if (!call) return -ENOMEM; @@ -490,11 +491,12 @@ static int afs_fs_fetch_data64(struct afs_server *server, bool async) { struct afs_call *call; + struct afs_net *net = afs_v2net(vnode); __be32 *bp; _enter(""); - call = afs_alloc_flat_call(&afs_RXFSFetchData64, 32, (21 + 3 + 6) * 4); + call = afs_alloc_flat_call(net, &afs_RXFSFetchData64, 32, (21 + 3 + 6) * 4); if (!call) return -ENOMEM; @@ -531,6 +533,7 @@ int afs_fs_fetch_data(struct afs_server *server, bool async) { struct afs_call *call; + struct afs_net *net = afs_v2net(vnode); __be32 *bp; if (upper_32_bits(req->pos) || @@ -540,7 +543,7 @@ int afs_fs_fetch_data(struct afs_server *server, _enter(""); - call = afs_alloc_flat_call(&afs_RXFSFetchData, 24, (21 + 3 + 6) * 4); + call = afs_alloc_flat_call(net, &afs_RXFSFetchData, 24, (21 + 3 + 6) * 4); if (!call) return -ENOMEM; @@ -590,7 +593,8 @@ static const struct afs_call_type afs_RXFSGiveUpCallBacks = { * give up a set of callbacks * - the callbacks are held in the server->cb_break ring */ -int afs_fs_give_up_callbacks(struct afs_server *server, +int afs_fs_give_up_callbacks(struct afs_net *net, + struct afs_server *server, bool async) { struct afs_call *call; @@ -610,7 +614,7 @@ int afs_fs_give_up_callbacks(struct afs_server *server, _debug("break %zu callbacks", ncallbacks); - call = afs_alloc_flat_call(&afs_RXFSGiveUpCallBacks, + call = afs_alloc_flat_call(net, &afs_RXFSGiveUpCallBacks, 12 + ncallbacks * 6 * 4, 0); if (!call) return -ENOMEM; @@ -699,6 +703,7 @@ int afs_fs_create(struct afs_server *server, bool async) { struct afs_call *call; + struct afs_net *net = afs_v2net(vnode); size_t namesz, reqsz, padsz; __be32 *bp; @@ -708,7 +713,7 @@ int afs_fs_create(struct afs_server *server, padsz = (4 - (namesz & 3)) & 3; reqsz = (5 * 4) + namesz + padsz + (6 * 4); - call = afs_alloc_flat_call(&afs_RXFSCreateXXXX, reqsz, + call = afs_alloc_flat_call(net, &afs_RXFSCreateXXXX, reqsz, (3 + 21 + 21 + 3 + 6) * 4); if (!call) return -ENOMEM; @@ -789,6 +794,7 @@ int afs_fs_remove(struct afs_server *server, bool async) { struct afs_call *call; + struct afs_net *net = afs_v2net(vnode); size_t namesz, reqsz, padsz; __be32 *bp; @@ -798,7 +804,7 @@ int afs_fs_remove(struct afs_server *server, padsz = (4 - (namesz & 3)) & 3; reqsz = (5 * 4) + namesz + padsz; - call = afs_alloc_flat_call(&afs_RXFSRemoveXXXX, reqsz, (21 + 6) * 4); + call = afs_alloc_flat_call(net, &afs_RXFSRemoveXXXX, reqsz, (21 + 6) * 4); if (!call) return -ENOMEM; @@ -870,6 +876,7 @@ int afs_fs_link(struct afs_server *server, bool async) { struct afs_call *call; + struct afs_net *net = afs_v2net(vnode); size_t namesz, reqsz, padsz; __be32 *bp; @@ -879,7 +886,7 @@ int afs_fs_link(struct afs_server *server, padsz = (4 - (namesz & 3)) & 3; reqsz = (5 * 4) + namesz + padsz + (3 * 4); - call = afs_alloc_flat_call(&afs_RXFSLink, reqsz, (21 + 21 + 6) * 4); + call = afs_alloc_flat_call(net, &afs_RXFSLink, reqsz, (21 + 21 + 6) * 4); if (!call) return -ENOMEM; @@ -958,6 +965,7 @@ int afs_fs_symlink(struct afs_server *server, bool async) { struct afs_call *call; + struct afs_net *net = afs_v2net(vnode); size_t namesz, reqsz, padsz, c_namesz, c_padsz; __be32 *bp; @@ -971,7 +979,7 @@ int afs_fs_symlink(struct afs_server *server, reqsz = (6 * 4) + namesz + padsz + c_namesz + c_padsz + (6 * 4); - call = afs_alloc_flat_call(&afs_RXFSSymlink, reqsz, + call = afs_alloc_flat_call(net, &afs_RXFSSymlink, reqsz, (3 + 21 + 21 + 6) * 4); if (!call) return -ENOMEM; @@ -1062,6 +1070,7 @@ int afs_fs_rename(struct afs_server *server, bool async) { struct afs_call *call; + struct afs_net *net = afs_v2net(orig_dvnode); size_t reqsz, o_namesz, o_padsz, n_namesz, n_padsz; __be32 *bp; @@ -1078,7 +1087,7 @@ int afs_fs_rename(struct afs_server *server, (3 * 4) + 4 + n_namesz + n_padsz; - call = afs_alloc_flat_call(&afs_RXFSRename, reqsz, (21 + 21 + 6) * 4); + call = afs_alloc_flat_call(net, &afs_RXFSRename, reqsz, (21 + 21 + 6) * 4); if (!call) return -ENOMEM; @@ -1172,12 +1181,13 @@ static int afs_fs_store_data64(struct afs_server *server, { struct afs_vnode *vnode = wb->vnode; struct afs_call *call; + struct afs_net *net = afs_v2net(vnode); __be32 *bp; _enter(",%x,{%x:%u},,", key_serial(wb->key), vnode->fid.vid, vnode->fid.vnode); - call = afs_alloc_flat_call(&afs_RXFSStoreData64, + call = afs_alloc_flat_call(net, &afs_RXFSStoreData64, (4 + 6 + 3 * 2) * 4, (21 + 6) * 4); if (!call) @@ -1230,6 +1240,7 @@ int afs_fs_store_data(struct afs_server *server, struct afs_writeback *wb, { struct afs_vnode *vnode = wb->vnode; struct afs_call *call; + struct afs_net *net = afs_v2net(vnode); loff_t size, pos, i_size; __be32 *bp; @@ -1254,7 +1265,7 @@ int afs_fs_store_data(struct afs_server *server, struct afs_writeback *wb, return afs_fs_store_data64(server, wb, first, last, offset, to, size, pos, i_size, async); - call = afs_alloc_flat_call(&afs_RXFSStoreData, + call = afs_alloc_flat_call(net, &afs_RXFSStoreData, (4 + 6 + 3) * 4, (21 + 6) * 4); if (!call) @@ -1356,6 +1367,7 @@ static int afs_fs_setattr_size64(struct afs_server *server, struct key *key, bool async) { struct afs_call *call; + struct afs_net *net = afs_v2net(vnode); __be32 *bp; _enter(",%x,{%x:%u},,", @@ -1363,7 +1375,7 @@ static int afs_fs_setattr_size64(struct afs_server *server, struct key *key, ASSERT(attr->ia_valid & ATTR_SIZE); - call = afs_alloc_flat_call(&afs_RXFSStoreData64_as_Status, + call = afs_alloc_flat_call(net, &afs_RXFSStoreData64_as_Status, (4 + 6 + 3 * 2) * 4, (21 + 6) * 4); if (!call) @@ -1404,6 +1416,7 @@ static int afs_fs_setattr_size(struct afs_server *server, struct key *key, bool async) { struct afs_call *call; + struct afs_net *net = afs_v2net(vnode); __be32 *bp; _enter(",%x,{%x:%u},,", @@ -1414,7 +1427,7 @@ static int afs_fs_setattr_size(struct afs_server *server, struct key *key, return afs_fs_setattr_size64(server, key, vnode, attr, async); - call = afs_alloc_flat_call(&afs_RXFSStoreData_as_Status, + call = afs_alloc_flat_call(net, &afs_RXFSStoreData_as_Status, (4 + 6 + 3) * 4, (21 + 6) * 4); if (!call) @@ -1452,6 +1465,7 @@ int afs_fs_setattr(struct afs_server *server, struct key *key, bool async) { struct afs_call *call; + struct afs_net *net = afs_v2net(vnode); __be32 *bp; if (attr->ia_valid & ATTR_SIZE) @@ -1461,7 +1475,7 @@ int afs_fs_setattr(struct afs_server *server, struct key *key, _enter(",%x,{%x:%u},,", key_serial(key), vnode->fid.vid, vnode->fid.vnode); - call = afs_alloc_flat_call(&afs_RXFSStoreStatus, + call = afs_alloc_flat_call(net, &afs_RXFSStoreStatus, (4 + 6) * 4, (21 + 6) * 4); if (!call) @@ -1687,6 +1701,7 @@ int afs_fs_get_volume_status(struct afs_server *server, bool async) { struct afs_call *call; + struct afs_net *net = afs_v2net(vnode); __be32 *bp; void *tmpbuf; @@ -1696,7 +1711,7 @@ int afs_fs_get_volume_status(struct afs_server *server, if (!tmpbuf) return -ENOMEM; - call = afs_alloc_flat_call(&afs_RXFSGetVolumeStatus, 2 * 4, 12 * 4); + call = afs_alloc_flat_call(net, &afs_RXFSGetVolumeStatus, 2 * 4, 12 * 4); if (!call) { kfree(tmpbuf); return -ENOMEM; @@ -1779,11 +1794,12 @@ int afs_fs_set_lock(struct afs_server *server, bool async) { struct afs_call *call; + struct afs_net *net = afs_v2net(vnode); __be32 *bp; _enter(""); - call = afs_alloc_flat_call(&afs_RXFSSetLock, 5 * 4, 6 * 4); + call = afs_alloc_flat_call(net, &afs_RXFSSetLock, 5 * 4, 6 * 4); if (!call) return -ENOMEM; @@ -1812,11 +1828,12 @@ int afs_fs_extend_lock(struct afs_server *server, bool async) { struct afs_call *call; + struct afs_net *net = afs_v2net(vnode); __be32 *bp; _enter(""); - call = afs_alloc_flat_call(&afs_RXFSExtendLock, 4 * 4, 6 * 4); + call = afs_alloc_flat_call(net, &afs_RXFSExtendLock, 4 * 4, 6 * 4); if (!call) return -ENOMEM; @@ -1844,11 +1861,12 @@ int afs_fs_release_lock(struct afs_server *server, bool async) { struct afs_call *call; + struct afs_net *net = afs_v2net(vnode); __be32 *bp; _enter(""); - call = afs_alloc_flat_call(&afs_RXFSReleaseLock, 4 * 4, 6 * 4); + call = afs_alloc_flat_call(net, &afs_RXFSReleaseLock, 4 * 4, 6 * 4); if (!call) return -ENOMEM; diff --git a/fs/afs/internal.h b/fs/afs/internal.h index 3f03f7888302..53bd11d73469 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h @@ -21,6 +21,7 @@ #include #include #include +#include #include #include "afs.h" @@ -48,6 +49,7 @@ struct afs_mount_params { afs_voltype_t type; /* type of volume requested */ int volnamesz; /* size of volume name */ const char *volname; /* name of volume to mount */ + struct afs_net *net; /* Network namespace in effect */ struct afs_cell *cell; /* cell in which to find volume */ struct afs_volume *volume; /* volume record */ struct key *key; /* key to use for secure mounting */ @@ -62,6 +64,7 @@ enum afs_call_state { AFS_CALL_AWAIT_ACK, /* awaiting final ACK of incoming call */ AFS_CALL_COMPLETE, /* Completed or failed */ }; + /* * a record of an in-progress RxRPC call */ @@ -72,6 +75,7 @@ struct afs_call { struct work_struct work; /* actual work processor */ struct rxrpc_call *rxcall; /* RxRPC call handle */ struct key *key; /* security for this call */ + struct afs_net *net; /* The network namespace */ struct afs_server *server; /* server affected by incoming CM call */ void *request; /* request data (first part) */ struct address_space *mapping; /* page set */ @@ -173,6 +177,7 @@ struct afs_writeback { * - there's one superblock per volume */ struct afs_super_info { + struct afs_net *net; /* Network namespace */ struct afs_volume *volume; /* volume record */ char rwparent; /* T if parent is R/W AFS volume */ }; @@ -192,12 +197,62 @@ struct afs_cache_cell { struct in_addr vl_servers[15]; /* cached cell VL servers */ }; +/* + * AFS network namespace record. + */ +struct afs_net { + struct afs_uuid uuid; + bool live; /* F if this namespace is being removed */ + + /* AF_RXRPC I/O stuff */ + struct socket *socket; + struct afs_call *spare_incoming_call; + struct work_struct charge_preallocation_work; + struct mutex socket_mutex; + atomic_t nr_outstanding_calls; + atomic_t nr_superblocks; + + /* Cell database */ + struct list_head cells; + struct afs_cell *ws_cell; + rwlock_t cells_lock; + struct rw_semaphore cells_sem; + wait_queue_head_t cells_freeable_wq; + + struct rw_semaphore proc_cells_sem; + struct list_head proc_cells; + + /* Volume location database */ + struct list_head vl_updates; /* VL records in need-update order */ + struct list_head vl_graveyard; /* Inactive VL records */ + struct delayed_work vl_reaper; + struct delayed_work vl_updater; + spinlock_t vl_updates_lock; + spinlock_t vl_graveyard_lock; + + /* File locking renewal management */ + struct mutex lock_manager_mutex; + + /* Server database */ + struct rb_root servers; /* Active servers */ + rwlock_t servers_lock; + struct list_head server_graveyard; /* Inactive server LRU list */ + spinlock_t server_graveyard_lock; + struct delayed_work server_reaper; + + /* Misc */ + struct proc_dir_entry *proc_afs; /* /proc/net/afs directory */ +}; + +extern struct afs_net __afs_net;// Dummy AFS network namespace; TODO: replace with real netns + /* * AFS cell record */ struct afs_cell { atomic_t usage; struct list_head link; /* main cell list link */ + struct afs_net *net; /* The network namespace */ struct key *anonymous_key; /* anonymous user key for this cell */ struct list_head proc_link; /* /proc cell list link */ #ifdef CONFIG_AFS_FSCACHE @@ -411,15 +466,6 @@ struct afs_interface { unsigned mtu; /* MTU of interface */ }; -struct afs_uuid { - __be32 time_low; /* low part of timestamp */ - __be16 time_mid; /* mid part of timestamp */ - __be16 time_hi_and_version; /* high part of timestamp and version */ - __u8 clock_seq_hi_and_reserved; /* clock seq hi and variant */ - __u8 clock_seq_low; /* clock seq low */ - __u8 node[6]; /* spatially unique node ID (MAC addr) */ -}; - /*****************************************************************************/ /* * cache.c @@ -440,6 +486,8 @@ extern struct fscache_cookie_def afs_vnode_cache_index_def; /* * callback.c */ +extern struct workqueue_struct *afs_callback_update_worker; + extern void afs_init_callback_state(struct afs_server *); extern void afs_broken_callback_work(struct work_struct *); extern void afs_break_callbacks(struct afs_server *, size_t, @@ -448,22 +496,17 @@ extern void afs_discard_callback_on_delete(struct afs_vnode *); extern void afs_give_up_callback(struct afs_vnode *); extern void afs_dispatch_give_up_callbacks(struct work_struct *); extern void afs_flush_callback_breaks(struct afs_server *); -extern int __init afs_callback_update_init(void); -extern void afs_callback_update_kill(void); /* * cell.c */ -extern struct rw_semaphore afs_proc_cells_sem; -extern struct list_head afs_proc_cells; - #define afs_get_cell(C) do { atomic_inc(&(C)->usage); } while(0) -extern int afs_cell_init(char *); -extern struct afs_cell *afs_cell_create(const char *, unsigned, char *, bool); -extern struct afs_cell *afs_cell_lookup(const char *, unsigned, bool); +extern int afs_cell_init(struct afs_net *, char *); +extern struct afs_cell *afs_cell_create(struct afs_net *, const char *, unsigned, char *, bool); +extern struct afs_cell *afs_cell_lookup(struct afs_net *, const char *, unsigned, bool); extern struct afs_cell *afs_grab_cell(struct afs_cell *); extern void afs_put_cell(struct afs_cell *); -extern void afs_cell_purge(void); +extern void __net_exit afs_cell_purge(struct afs_net *); /* * cmservice.c @@ -492,7 +535,8 @@ extern void afs_put_read(struct afs_read *); /* * flock.c */ -extern void __exit afs_kill_lock_manager(void); +extern struct workqueue_struct *afs_lock_manager; + extern void afs_lock_work(struct work_struct *); extern void afs_lock_may_be_available(struct afs_vnode *); extern int afs_lock(struct file *, int, struct file_lock *); @@ -504,7 +548,7 @@ extern int afs_flock(struct file *, int, struct file_lock *); extern int afs_fs_fetch_file_status(struct afs_server *, struct key *, struct afs_vnode *, struct afs_volsync *, bool); -extern int afs_fs_give_up_callbacks(struct afs_server *, bool); +extern int afs_fs_give_up_callbacks(struct afs_net *, struct afs_server *, bool); extern int afs_fs_fetch_data(struct afs_server *, struct key *, struct afs_vnode *, struct afs_read *, bool); extern int afs_fs_create(struct afs_server *, struct key *, @@ -554,7 +598,35 @@ extern int afs_drop_inode(struct inode *); * main.c */ extern struct workqueue_struct *afs_wq; -extern struct afs_uuid afs_uuid; + +static inline struct afs_net *afs_d2net(struct dentry *dentry) +{ + return &__afs_net; +} + +static inline struct afs_net *afs_i2net(struct inode *inode) +{ + return &__afs_net; +} + +static inline struct afs_net *afs_v2net(struct afs_vnode *vnode) +{ + return &__afs_net; +} + +static inline struct afs_net *afs_sock2net(struct sock *sk) +{ + return &__afs_net; +} + +static inline struct afs_net *afs_get_net(struct afs_net *net) +{ + return net; +} + +static inline void afs_put_net(struct afs_net *net) +{ +} /* * misc.c @@ -579,23 +651,24 @@ extern int afs_get_ipv4_interfaces(struct afs_interface *, size_t, bool); /* * proc.c */ -extern int afs_proc_init(void); -extern void afs_proc_cleanup(void); -extern int afs_proc_cell_setup(struct afs_cell *); -extern void afs_proc_cell_remove(struct afs_cell *); +extern int __net_init afs_proc_init(struct afs_net *); +extern void __net_exit afs_proc_cleanup(struct afs_net *); +extern int afs_proc_cell_setup(struct afs_net *, struct afs_cell *); +extern void afs_proc_cell_remove(struct afs_net *, struct afs_cell *); /* * rxrpc.c */ -extern struct socket *afs_socket; -extern atomic_t afs_outstanding_calls; +extern struct workqueue_struct *afs_async_calls; -extern int afs_open_socket(void); -extern void afs_close_socket(void); +extern int __net_init afs_open_socket(struct afs_net *); +extern void __net_exit afs_close_socket(struct afs_net *); +extern void afs_charge_preallocation(struct work_struct *); extern void afs_put_call(struct afs_call *); extern int afs_queue_call_work(struct afs_call *); extern int afs_make_call(struct in_addr *, struct afs_call *, gfp_t, bool); -extern struct afs_call *afs_alloc_flat_call(const struct afs_call_type *, +extern struct afs_call *afs_alloc_flat_call(struct afs_net *, + const struct afs_call_type *, size_t, size_t); extern void afs_flat_call_destructor(struct afs_call *); extern void afs_send_empty_reply(struct afs_call *); @@ -629,37 +702,45 @@ do { \ extern struct afs_server *afs_lookup_server(struct afs_cell *, const struct in_addr *); -extern struct afs_server *afs_find_server(const struct sockaddr_rxrpc *); +extern struct afs_server *afs_find_server(struct afs_net *, + const struct sockaddr_rxrpc *); extern void afs_put_server(struct afs_server *); -extern void __exit afs_purge_servers(void); +extern void afs_reap_server(struct work_struct *); +extern void __net_exit afs_purge_servers(struct afs_net *); /* * super.c */ -extern int afs_fs_init(void); -extern void afs_fs_exit(void); +extern int __init afs_fs_init(void); +extern void __exit afs_fs_exit(void); /* * vlclient.c */ -extern int afs_vl_get_entry_by_name(struct in_addr *, struct key *, +extern int afs_vl_get_entry_by_name(struct afs_net *, + struct in_addr *, struct key *, const char *, struct afs_cache_vlocation *, bool); -extern int afs_vl_get_entry_by_id(struct in_addr *, struct key *, +extern int afs_vl_get_entry_by_id(struct afs_net *, + struct in_addr *, struct key *, afs_volid_t, afs_voltype_t, struct afs_cache_vlocation *, bool); /* * vlocation.c */ +extern struct workqueue_struct *afs_vlocation_update_worker; + #define afs_get_vlocation(V) do { atomic_inc(&(V)->usage); } while(0) -extern int __init afs_vlocation_update_init(void); -extern struct afs_vlocation *afs_vlocation_lookup(struct afs_cell *, +extern struct afs_vlocation *afs_vlocation_lookup(struct afs_net *, + struct afs_cell *, struct key *, const char *, size_t); -extern void afs_put_vlocation(struct afs_vlocation *); -extern void afs_vlocation_purge(void); +extern void afs_put_vlocation(struct afs_net *, struct afs_vlocation *); +extern void afs_vlocation_updater(struct work_struct *); +extern void afs_vlocation_reaper(struct work_struct *); +extern void __net_exit afs_vlocation_purge(struct afs_net *); /* * vnode.c @@ -707,7 +788,7 @@ extern int afs_vnode_release_lock(struct afs_vnode *, struct key *); */ #define afs_get_volume(V) do { atomic_inc(&(V)->usage); } while(0) -extern void afs_put_volume(struct afs_volume *); +extern void afs_put_volume(struct afs_net *, struct afs_volume *); extern struct afs_volume *afs_volume_lookup(struct afs_mount_params *); extern struct afs_server *afs_volume_pick_fileserver(struct afs_vnode *); extern int afs_volume_release_fileserver(struct afs_vnode *, diff --git a/fs/afs/main.c b/fs/afs/main.c index 9944770849da..87b1a9c8000d 100644 --- a/fs/afs/main.c +++ b/fs/afs/main.c @@ -31,30 +31,104 @@ static char *rootcell; module_param(rootcell, charp, 0); MODULE_PARM_DESC(rootcell, "root AFS cell name and VL server IP addr list"); -struct afs_uuid afs_uuid; struct workqueue_struct *afs_wq; +struct afs_net __afs_net; + +/* + * Initialise an AFS network namespace record. + */ +static int __net_init afs_net_init(struct afs_net *net) +{ + int ret; + + net->live = true; + generate_random_uuid((unsigned char *)&net->uuid); + + INIT_WORK(&net->charge_preallocation_work, afs_charge_preallocation); + mutex_init(&net->socket_mutex); + INIT_LIST_HEAD(&net->cells); + rwlock_init(&net->cells_lock); + init_rwsem(&net->cells_sem); + init_waitqueue_head(&net->cells_freeable_wq); + init_rwsem(&net->proc_cells_sem); + INIT_LIST_HEAD(&net->proc_cells); + INIT_LIST_HEAD(&net->vl_updates); + INIT_LIST_HEAD(&net->vl_graveyard); + INIT_DELAYED_WORK(&net->vl_reaper, afs_vlocation_reaper); + INIT_DELAYED_WORK(&net->vl_updater, afs_vlocation_updater); + spin_lock_init(&net->vl_updates_lock); + spin_lock_init(&net->vl_graveyard_lock); + net->servers = RB_ROOT; + rwlock_init(&net->servers_lock); + INIT_LIST_HEAD(&net->server_graveyard); + spin_lock_init(&net->server_graveyard_lock); + INIT_DELAYED_WORK(&net->server_reaper, afs_reap_server); + + /* Register the /proc stuff */ + ret = afs_proc_init(net); + if (ret < 0) + goto error_proc; + + /* Initialise the cell DB */ + ret = afs_cell_init(net, rootcell); + if (ret < 0) + goto error_cell_init; + + /* Create the RxRPC transport */ + ret = afs_open_socket(net); + if (ret < 0) + goto error_open_socket; + + return 0; + +error_open_socket: + afs_vlocation_purge(net); + afs_cell_purge(net); +error_cell_init: + afs_proc_cleanup(net); +error_proc: + return ret; +} + +/* + * Clean up and destroy an AFS network namespace record. + */ +static void __net_exit afs_net_exit(struct afs_net *net) +{ + net->live = false; + afs_close_socket(net); + afs_purge_servers(net); + afs_vlocation_purge(net); + afs_cell_purge(net); + afs_proc_cleanup(net); +} /* * initialise the AFS client FS module */ static int __init afs_init(void) { - int ret; + int ret = -ENOMEM; printk(KERN_INFO "kAFS: Red Hat AFS client v0.1 registering.\n"); - generate_random_uuid((unsigned char *)&afs_uuid); - - /* create workqueue */ - ret = -ENOMEM; afs_wq = alloc_workqueue("afs", 0, 0); if (!afs_wq) - return ret; - - /* register the /proc stuff */ - ret = afs_proc_init(); - if (ret < 0) - goto error_proc; + goto error_afs_wq; + afs_async_calls = alloc_workqueue("kafsd", WQ_MEM_RECLAIM, 0); + if (!afs_async_calls) + goto error_async; + afs_vlocation_update_worker = + alloc_workqueue("kafs_vlupdated", WQ_MEM_RECLAIM, 0); + if (!afs_vlocation_update_worker) + goto error_vl_up; + afs_callback_update_worker = + alloc_ordered_workqueue("kafs_callbackd", WQ_MEM_RECLAIM); + if (!afs_callback_update_worker) + goto error_callback; + afs_lock_manager = alloc_workqueue("kafs_lockd", WQ_MEM_RECLAIM, 0); + if (!afs_lock_manager) + goto error_lockmgr; #ifdef CONFIG_AFS_FSCACHE /* we want to be able to cache */ @@ -63,25 +137,9 @@ static int __init afs_init(void) goto error_cache; #endif - /* initialise the cell DB */ - ret = afs_cell_init(rootcell); + ret = afs_net_init(&__afs_net); if (ret < 0) - goto error_cell_init; - - /* initialise the VL update process */ - ret = afs_vlocation_update_init(); - if (ret < 0) - goto error_vl_update_init; - - /* initialise the callback update process */ - ret = afs_callback_update_init(); - if (ret < 0) - goto error_callback_update_init; - - /* create the RxRPC transport */ - ret = afs_open_socket(); - if (ret < 0) - goto error_open_socket; + goto error_net; /* register the filesystems */ ret = afs_fs_init(); @@ -91,21 +149,22 @@ static int __init afs_init(void) return ret; error_fs: - afs_close_socket(); -error_open_socket: - afs_callback_update_kill(); -error_callback_update_init: - afs_vlocation_purge(); -error_vl_update_init: - afs_cell_purge(); -error_cell_init: + afs_net_exit(&__afs_net); +error_net: #ifdef CONFIG_AFS_FSCACHE fscache_unregister_netfs(&afs_cache_netfs); error_cache: #endif - afs_proc_cleanup(); -error_proc: + destroy_workqueue(afs_lock_manager); +error_lockmgr: + destroy_workqueue(afs_callback_update_worker); +error_callback: + destroy_workqueue(afs_vlocation_update_worker); +error_vl_up: + destroy_workqueue(afs_async_calls); +error_async: destroy_workqueue(afs_wq); +error_afs_wq: rcu_barrier(); printk(KERN_ERR "kAFS: failed to register: %d\n", ret); return ret; @@ -124,17 +183,15 @@ static void __exit afs_exit(void) printk(KERN_INFO "kAFS: Red Hat AFS client v0.1 unregistering.\n"); afs_fs_exit(); - afs_kill_lock_manager(); - afs_close_socket(); - afs_purge_servers(); - afs_callback_update_kill(); - afs_vlocation_purge(); - destroy_workqueue(afs_wq); - afs_cell_purge(); + afs_net_exit(&__afs_net); #ifdef CONFIG_AFS_FSCACHE fscache_unregister_netfs(&afs_cache_netfs); #endif - afs_proc_cleanup(); + destroy_workqueue(afs_lock_manager); + destroy_workqueue(afs_callback_update_worker); + destroy_workqueue(afs_vlocation_update_worker); + destroy_workqueue(afs_async_calls); + destroy_workqueue(afs_wq); rcu_barrier(); } diff --git a/fs/afs/proc.c b/fs/afs/proc.c index 35efb9a31dd7..c93433460348 100644 --- a/fs/afs/proc.c +++ b/fs/afs/proc.c @@ -17,8 +17,15 @@ #include #include "internal.h" -static struct proc_dir_entry *proc_afs; +static inline struct afs_net *afs_proc2net(struct file *f) +{ + return &__afs_net; +} +static inline struct afs_net *afs_seq2net(struct seq_file *m) +{ + return &__afs_net; // TODO: use seq_file_net(m) +} static int afs_proc_cells_open(struct inode *inode, struct file *file); static void *afs_proc_cells_start(struct seq_file *p, loff_t *pos); @@ -122,23 +129,23 @@ static const struct file_operations afs_proc_cell_servers_fops = { /* * initialise the /proc/fs/afs/ directory */ -int afs_proc_init(void) +int afs_proc_init(struct afs_net *net) { _enter(""); - proc_afs = proc_mkdir("fs/afs", NULL); - if (!proc_afs) + net->proc_afs = proc_mkdir("fs/afs", NULL); + if (!net->proc_afs) goto error_dir; - if (!proc_create("cells", 0644, proc_afs, &afs_proc_cells_fops) || - !proc_create("rootcell", 0644, proc_afs, &afs_proc_rootcell_fops)) + if (!proc_create("cells", 0644, net->proc_afs, &afs_proc_cells_fops) || + !proc_create("rootcell", 0644, net->proc_afs, &afs_proc_rootcell_fops)) goto error_tree; _leave(" = 0"); return 0; error_tree: - remove_proc_subtree("fs/afs", NULL); + proc_remove(net->proc_afs); error_dir: _leave(" = -ENOMEM"); return -ENOMEM; @@ -147,9 +154,10 @@ error_dir: /* * clean up the /proc/fs/afs/ directory */ -void afs_proc_cleanup(void) +void afs_proc_cleanup(struct afs_net *net) { - remove_proc_subtree("fs/afs", NULL); + proc_remove(net->proc_afs); + net->proc_afs = NULL; } /* @@ -176,25 +184,30 @@ static int afs_proc_cells_open(struct inode *inode, struct file *file) */ static void *afs_proc_cells_start(struct seq_file *m, loff_t *_pos) { - /* lock the list against modification */ - down_read(&afs_proc_cells_sem); - return seq_list_start_head(&afs_proc_cells, *_pos); + struct afs_net *net = afs_seq2net(m); + + down_read(&net->proc_cells_sem); + return seq_list_start_head(&net->proc_cells, *_pos); } /* * move to next cell in cells list */ -static void *afs_proc_cells_next(struct seq_file *p, void *v, loff_t *pos) +static void *afs_proc_cells_next(struct seq_file *m, void *v, loff_t *pos) { - return seq_list_next(v, &afs_proc_cells, pos); + struct afs_net *net = afs_seq2net(m); + + return seq_list_next(v, &net->proc_cells, pos); } /* * clean up after reading from the cells list */ -static void afs_proc_cells_stop(struct seq_file *p, void *v) +static void afs_proc_cells_stop(struct seq_file *m, void *v) { - up_read(&afs_proc_cells_sem); + struct afs_net *net = afs_seq2net(m); + + up_read(&net->proc_cells_sem); } /* @@ -203,8 +216,9 @@ static void afs_proc_cells_stop(struct seq_file *p, void *v) static int afs_proc_cells_show(struct seq_file *m, void *v) { struct afs_cell *cell = list_entry(v, struct afs_cell, proc_link); + struct afs_net *net = afs_seq2net(m); - if (v == &afs_proc_cells) { + if (v == &net->proc_cells) { /* display header on line 1 */ seq_puts(m, "USE NAME\n"); return 0; @@ -223,6 +237,7 @@ static int afs_proc_cells_show(struct seq_file *m, void *v) static ssize_t afs_proc_cells_write(struct file *file, const char __user *buf, size_t size, loff_t *_pos) { + struct afs_net *net = afs_proc2net(file); char *kbuf, *name, *args; int ret; @@ -264,7 +279,7 @@ static ssize_t afs_proc_cells_write(struct file *file, const char __user *buf, if (strcmp(kbuf, "add") == 0) { struct afs_cell *cell; - cell = afs_cell_create(name, strlen(name), args, false); + cell = afs_cell_create(net, name, strlen(name), args, false); if (IS_ERR(cell)) { ret = PTR_ERR(cell); goto done; @@ -303,6 +318,7 @@ static ssize_t afs_proc_rootcell_write(struct file *file, const char __user *buf, size_t size, loff_t *_pos) { + struct afs_net *net = afs_proc2net(file); char *kbuf, *s; int ret; @@ -322,7 +338,7 @@ static ssize_t afs_proc_rootcell_write(struct file *file, /* determine command to perform */ _debug("rootcell=%s", kbuf); - ret = afs_cell_init(kbuf); + ret = afs_cell_init(net, kbuf); if (ret >= 0) ret = size; /* consume everything, always */ @@ -334,13 +350,13 @@ static ssize_t afs_proc_rootcell_write(struct file *file, /* * initialise /proc/fs/afs// */ -int afs_proc_cell_setup(struct afs_cell *cell) +int afs_proc_cell_setup(struct afs_net *net, struct afs_cell *cell) { struct proc_dir_entry *dir; _enter("%p{%s}", cell, cell->name); - dir = proc_mkdir(cell->name, proc_afs); + dir = proc_mkdir(cell->name, net->proc_afs); if (!dir) goto error_dir; @@ -356,7 +372,7 @@ int afs_proc_cell_setup(struct afs_cell *cell) return 0; error_tree: - remove_proc_subtree(cell->name, proc_afs); + remove_proc_subtree(cell->name, net->proc_afs); error_dir: _leave(" = -ENOMEM"); return -ENOMEM; @@ -365,11 +381,11 @@ error_dir: /* * remove /proc/fs/afs// */ -void afs_proc_cell_remove(struct afs_cell *cell) +void afs_proc_cell_remove(struct afs_net *net, struct afs_cell *cell) { _enter(""); - remove_proc_subtree(cell->name, proc_afs); + remove_proc_subtree(cell->name, net->proc_afs); _leave(""); } diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c index 77f5420a1a24..656ceb285b85 100644 --- a/fs/afs/rxrpc.c +++ b/fs/afs/rxrpc.c @@ -17,10 +17,7 @@ #include "internal.h" #include "afs_cm.h" -struct socket *afs_socket; /* my RxRPC socket */ -static struct workqueue_struct *afs_async_calls; -static struct afs_call *afs_spare_incoming_call; -atomic_t afs_outstanding_calls; +struct workqueue_struct *afs_async_calls; static void afs_wake_up_call_waiter(struct sock *, struct rxrpc_call *, unsigned long); static int afs_wait_for_call_to_complete(struct afs_call *); @@ -37,15 +34,11 @@ static const struct afs_call_type afs_RXCMxxxx = { .abort_to_error = afs_abort_to_error, }; -static void afs_charge_preallocation(struct work_struct *); - -static DECLARE_WORK(afs_charge_preallocation_work, afs_charge_preallocation); - /* * open an RxRPC socket and bind it to be a server for callback notifications * - the socket is left in blocking mode and non-blocking ops use MSG_DONTWAIT */ -int afs_open_socket(void) +int afs_open_socket(struct afs_net *net) { struct sockaddr_rxrpc srx; struct socket *socket; @@ -53,11 +46,6 @@ int afs_open_socket(void) _enter(""); - ret = -ENOMEM; - afs_async_calls = alloc_workqueue("kafsd", WQ_MEM_RECLAIM, 0); - if (!afs_async_calls) - goto error_0; - ret = sock_create_kern(&init_net, AF_RXRPC, SOCK_DGRAM, PF_INET, &socket); if (ret < 0) goto error_1; @@ -85,16 +73,14 @@ int afs_open_socket(void) if (ret < 0) goto error_2; - afs_socket = socket; - afs_charge_preallocation(NULL); + net->socket = socket; + afs_charge_preallocation(&net->charge_preallocation_work); _leave(" = 0"); return 0; error_2: sock_release(socket); error_1: - destroy_workqueue(afs_async_calls); -error_0: _leave(" = %d", ret); return ret; } @@ -102,36 +88,36 @@ error_0: /* * close the RxRPC socket AFS was using */ -void afs_close_socket(void) +void afs_close_socket(struct afs_net *net) { _enter(""); - kernel_listen(afs_socket, 0); + kernel_listen(net->socket, 0); flush_workqueue(afs_async_calls); - if (afs_spare_incoming_call) { - afs_put_call(afs_spare_incoming_call); - afs_spare_incoming_call = NULL; + if (net->spare_incoming_call) { + afs_put_call(net->spare_incoming_call); + net->spare_incoming_call = NULL; } - _debug("outstanding %u", atomic_read(&afs_outstanding_calls)); - wait_on_atomic_t(&afs_outstanding_calls, atomic_t_wait, + _debug("outstanding %u", atomic_read(&net->nr_outstanding_calls)); + wait_on_atomic_t(&net->nr_outstanding_calls, atomic_t_wait, TASK_UNINTERRUPTIBLE); _debug("no outstanding calls"); - kernel_sock_shutdown(afs_socket, SHUT_RDWR); + kernel_sock_shutdown(net->socket, SHUT_RDWR); flush_workqueue(afs_async_calls); - sock_release(afs_socket); + sock_release(net->socket); _debug("dework"); - destroy_workqueue(afs_async_calls); _leave(""); } /* * Allocate a call. */ -static struct afs_call *afs_alloc_call(const struct afs_call_type *type, +static struct afs_call *afs_alloc_call(struct afs_net *net, + const struct afs_call_type *type, gfp_t gfp) { struct afs_call *call; @@ -142,11 +128,12 @@ static struct afs_call *afs_alloc_call(const struct afs_call_type *type, return NULL; call->type = type; + call->net = net; atomic_set(&call->usage, 1); INIT_WORK(&call->async_work, afs_process_async_call); init_waitqueue_head(&call->waitq); - o = atomic_inc_return(&afs_outstanding_calls); + o = atomic_inc_return(&net->nr_outstanding_calls); trace_afs_call(call, afs_call_trace_alloc, 1, o, __builtin_return_address(0)); return call; @@ -157,8 +144,9 @@ static struct afs_call *afs_alloc_call(const struct afs_call_type *type, */ void afs_put_call(struct afs_call *call) { + struct afs_net *net = call->net; int n = atomic_dec_return(&call->usage); - int o = atomic_read(&afs_outstanding_calls); + int o = atomic_read(&net->nr_outstanding_calls); trace_afs_call(call, afs_call_trace_put, n + 1, o, __builtin_return_address(0)); @@ -169,7 +157,7 @@ void afs_put_call(struct afs_call *call) ASSERT(call->type->name != NULL); if (call->rxcall) { - rxrpc_kernel_end_call(afs_socket, call->rxcall); + rxrpc_kernel_end_call(net->socket, call->rxcall); call->rxcall = NULL; } if (call->type->destructor) @@ -178,11 +166,11 @@ void afs_put_call(struct afs_call *call) kfree(call->request); kfree(call); - o = atomic_dec_return(&afs_outstanding_calls); + o = atomic_dec_return(&net->nr_outstanding_calls); trace_afs_call(call, afs_call_trace_free, 0, o, __builtin_return_address(0)); if (o == 0) - wake_up_atomic_t(&afs_outstanding_calls); + wake_up_atomic_t(&net->nr_outstanding_calls); } } @@ -194,7 +182,7 @@ int afs_queue_call_work(struct afs_call *call) int u = atomic_inc_return(&call->usage); trace_afs_call(call, afs_call_trace_work, u, - atomic_read(&afs_outstanding_calls), + atomic_read(&call->net->nr_outstanding_calls), __builtin_return_address(0)); INIT_WORK(&call->work, call->type->work); @@ -207,12 +195,13 @@ int afs_queue_call_work(struct afs_call *call) /* * allocate a call with flat request and reply buffers */ -struct afs_call *afs_alloc_flat_call(const struct afs_call_type *type, +struct afs_call *afs_alloc_flat_call(struct afs_net *net, + const struct afs_call_type *type, size_t request_size, size_t reply_max) { struct afs_call *call; - call = afs_alloc_call(type, GFP_NOFS); + call = afs_alloc_call(net, type, GFP_NOFS); if (!call) goto nomem_call; @@ -317,7 +306,7 @@ static int afs_send_pages(struct afs_call *call, struct msghdr *msg) bytes = msg->msg_iter.count; nr = msg->msg_iter.nr_segs; - ret = rxrpc_kernel_send_data(afs_socket, call->rxcall, msg, + ret = rxrpc_kernel_send_data(call->net->socket, call->rxcall, msg, bytes, afs_notify_end_request_tx); for (loop = 0; loop < nr; loop++) put_page(bv[loop].bv_page); @@ -352,7 +341,7 @@ int afs_make_call(struct in_addr *addr, struct afs_call *call, gfp_t gfp, _debug("____MAKE %p{%s,%x} [%d]____", call, call->type->name, key_serial(call->key), - atomic_read(&afs_outstanding_calls)); + atomic_read(&call->net->nr_outstanding_calls)); call->async = async; @@ -376,7 +365,7 @@ int afs_make_call(struct in_addr *addr, struct afs_call *call, gfp_t gfp, } /* create a call */ - rxcall = rxrpc_kernel_begin_call(afs_socket, &srx, call->key, + rxcall = rxrpc_kernel_begin_call(call->net->socket, &srx, call->key, (unsigned long)call, tx_total_len, gfp, (async ? @@ -410,7 +399,7 @@ int afs_make_call(struct in_addr *addr, struct afs_call *call, gfp_t gfp, */ if (!call->send_pages) call->state = AFS_CALL_AWAIT_REPLY; - ret = rxrpc_kernel_send_data(afs_socket, rxcall, + ret = rxrpc_kernel_send_data(call->net->socket, rxcall, &msg, call->request_size, afs_notify_end_request_tx); if (ret < 0) @@ -432,13 +421,14 @@ int afs_make_call(struct in_addr *addr, struct afs_call *call, gfp_t gfp, error_do_abort: call->state = AFS_CALL_COMPLETE; if (ret != -ECONNABORTED) { - rxrpc_kernel_abort_call(afs_socket, rxcall, RX_USER_ABORT, - ret, "KSD"); + rxrpc_kernel_abort_call(call->net->socket, rxcall, + RX_USER_ABORT, ret, "KSD"); } else { abort_code = 0; offset = 0; - rxrpc_kernel_recv_data(afs_socket, rxcall, NULL, 0, &offset, - false, &abort_code, &call->service_id); + rxrpc_kernel_recv_data(call->net->socket, rxcall, NULL, + 0, &offset, false, &abort_code, + &call->service_id); ret = call->type->abort_to_error(abort_code); } error_kill_call: @@ -464,7 +454,8 @@ static void afs_deliver_to_call(struct afs_call *call) ) { if (call->state == AFS_CALL_AWAIT_ACK) { size_t offset = 0; - ret = rxrpc_kernel_recv_data(afs_socket, call->rxcall, + ret = rxrpc_kernel_recv_data(call->net->socket, + call->rxcall, NULL, 0, &offset, false, &call->abort_code, &call->service_id); @@ -492,12 +483,12 @@ static void afs_deliver_to_call(struct afs_call *call) goto call_complete; case -ENOTCONN: abort_code = RX_CALL_DEAD; - rxrpc_kernel_abort_call(afs_socket, call->rxcall, + rxrpc_kernel_abort_call(call->net->socket, call->rxcall, abort_code, ret, "KNC"); goto save_error; case -ENOTSUPP: abort_code = RXGEN_OPCODE; - rxrpc_kernel_abort_call(afs_socket, call->rxcall, + rxrpc_kernel_abort_call(call->net->socket, call->rxcall, abort_code, ret, "KIV"); goto save_error; case -ENODATA: @@ -507,7 +498,7 @@ static void afs_deliver_to_call(struct afs_call *call) abort_code = RXGEN_CC_UNMARSHAL; if (call->state != AFS_CALL_AWAIT_REPLY) abort_code = RXGEN_SS_UNMARSHAL; - rxrpc_kernel_abort_call(afs_socket, call->rxcall, + rxrpc_kernel_abort_call(call->net->socket, call->rxcall, abort_code, -EBADMSG, "KUM"); goto save_error; } @@ -541,13 +532,13 @@ static int afs_wait_for_call_to_complete(struct afs_call *call) _enter(""); - rtt = rxrpc_kernel_get_rtt(afs_socket, call->rxcall); + rtt = rxrpc_kernel_get_rtt(call->net->socket, call->rxcall); rtt2 = nsecs_to_jiffies64(rtt) * 2; if (rtt2 < 2) rtt2 = 2; timeout = rtt2; - last_life = rxrpc_kernel_check_life(afs_socket, call->rxcall); + last_life = rxrpc_kernel_check_life(call->net->socket, call->rxcall); add_wait_queue(&call->waitq, &myself); for (;;) { @@ -564,7 +555,7 @@ static int afs_wait_for_call_to_complete(struct afs_call *call) if (call->state == AFS_CALL_COMPLETE) break; - life = rxrpc_kernel_check_life(afs_socket, call->rxcall); + life = rxrpc_kernel_check_life(call->net->socket, call->rxcall); if (timeout == 0 && life == last_life && signal_pending(current)) break; @@ -583,7 +574,7 @@ static int afs_wait_for_call_to_complete(struct afs_call *call) /* Kill off the call if it's still live. */ if (call->state < AFS_CALL_COMPLETE) { _debug("call interrupted"); - rxrpc_kernel_abort_call(afs_socket, call->rxcall, + rxrpc_kernel_abort_call(call->net->socket, call->rxcall, RX_USER_ABORT, -EINTR, "KWI"); } @@ -621,7 +612,7 @@ static void afs_wake_up_async_call(struct sock *sk, struct rxrpc_call *rxcall, u = __atomic_add_unless(&call->usage, 1, 0); if (u != 0) { trace_afs_call(call, afs_call_trace_wake, u, - atomic_read(&afs_outstanding_calls), + atomic_read(&call->net->nr_outstanding_calls), __builtin_return_address(0)); if (!queue_work(afs_async_calls, &call->async_work)) @@ -685,13 +676,15 @@ static void afs_rx_attach(struct rxrpc_call *rxcall, unsigned long user_call_ID) /* * Charge the incoming call preallocation. */ -static void afs_charge_preallocation(struct work_struct *work) +void afs_charge_preallocation(struct work_struct *work) { - struct afs_call *call = afs_spare_incoming_call; + struct afs_net *net = + container_of(work, struct afs_net, charge_preallocation_work); + struct afs_call *call = net->spare_incoming_call; for (;;) { if (!call) { - call = afs_alloc_call(&afs_RXCMxxxx, GFP_KERNEL); + call = afs_alloc_call(net, &afs_RXCMxxxx, GFP_KERNEL); if (!call) break; @@ -700,7 +693,7 @@ static void afs_charge_preallocation(struct work_struct *work) init_waitqueue_head(&call->waitq); } - if (rxrpc_kernel_charge_accept(afs_socket, + if (rxrpc_kernel_charge_accept(net->socket, afs_wake_up_async_call, afs_rx_attach, (unsigned long)call, @@ -708,7 +701,7 @@ static void afs_charge_preallocation(struct work_struct *work) break; call = NULL; } - afs_spare_incoming_call = call; + net->spare_incoming_call = call; } /* @@ -729,7 +722,9 @@ static void afs_rx_discard_new_call(struct rxrpc_call *rxcall, static void afs_rx_new_call(struct sock *sk, struct rxrpc_call *rxcall, unsigned long user_call_ID) { - queue_work(afs_wq, &afs_charge_preallocation_work); + struct afs_net *net = afs_sock2net(sk); + + queue_work(afs_wq, &net->charge_preallocation_work); } /* @@ -784,11 +779,12 @@ static void afs_notify_end_reply_tx(struct sock *sock, */ void afs_send_empty_reply(struct afs_call *call) { + struct afs_net *net = call->net; struct msghdr msg; _enter(""); - rxrpc_kernel_set_tx_length(afs_socket, call->rxcall, 0); + rxrpc_kernel_set_tx_length(net->socket, call->rxcall, 0); msg.msg_name = NULL; msg.msg_namelen = 0; @@ -798,7 +794,7 @@ void afs_send_empty_reply(struct afs_call *call) msg.msg_flags = 0; call->state = AFS_CALL_AWAIT_ACK; - switch (rxrpc_kernel_send_data(afs_socket, call->rxcall, &msg, 0, + switch (rxrpc_kernel_send_data(net->socket, call->rxcall, &msg, 0, afs_notify_end_reply_tx)) { case 0: _leave(" [replied]"); @@ -806,7 +802,7 @@ void afs_send_empty_reply(struct afs_call *call) case -ENOMEM: _debug("oom"); - rxrpc_kernel_abort_call(afs_socket, call->rxcall, + rxrpc_kernel_abort_call(net->socket, call->rxcall, RX_USER_ABORT, -ENOMEM, "KOO"); default: _leave(" [error]"); @@ -819,13 +815,14 @@ void afs_send_empty_reply(struct afs_call *call) */ void afs_send_simple_reply(struct afs_call *call, const void *buf, size_t len) { + struct afs_net *net = call->net; struct msghdr msg; struct kvec iov[1]; int n; _enter(""); - rxrpc_kernel_set_tx_length(afs_socket, call->rxcall, len); + rxrpc_kernel_set_tx_length(net->socket, call->rxcall, len); iov[0].iov_base = (void *) buf; iov[0].iov_len = len; @@ -837,7 +834,7 @@ void afs_send_simple_reply(struct afs_call *call, const void *buf, size_t len) msg.msg_flags = 0; call->state = AFS_CALL_AWAIT_ACK; - n = rxrpc_kernel_send_data(afs_socket, call->rxcall, &msg, len, + n = rxrpc_kernel_send_data(net->socket, call->rxcall, &msg, len, afs_notify_end_reply_tx); if (n >= 0) { /* Success */ @@ -847,7 +844,7 @@ void afs_send_simple_reply(struct afs_call *call, const void *buf, size_t len) if (n == -ENOMEM) { _debug("oom"); - rxrpc_kernel_abort_call(afs_socket, call->rxcall, + rxrpc_kernel_abort_call(net->socket, call->rxcall, RX_USER_ABORT, -ENOMEM, "KOO"); } _leave(" [error]"); @@ -859,6 +856,7 @@ void afs_send_simple_reply(struct afs_call *call, const void *buf, size_t len) int afs_extract_data(struct afs_call *call, void *buf, size_t count, bool want_more) { + struct afs_net *net = call->net; int ret; _enter("{%s,%zu},,%zu,%d", @@ -866,7 +864,7 @@ int afs_extract_data(struct afs_call *call, void *buf, size_t count, ASSERTCMP(call->offset, <=, count); - ret = rxrpc_kernel_recv_data(afs_socket, call->rxcall, + ret = rxrpc_kernel_recv_data(net->socket, call->rxcall, buf, count, &call->offset, want_more, &call->abort_code, &call->service_id); diff --git a/fs/afs/server.c b/fs/afs/server.c index c001b1f2455f..e47fd9bc0ddc 100644 --- a/fs/afs/server.c +++ b/fs/afs/server.c @@ -15,32 +15,22 @@ static unsigned afs_server_timeout = 10; /* server timeout in seconds */ -static void afs_reap_server(struct work_struct *); - -/* tree of all the servers, indexed by IP address */ -static struct rb_root afs_servers = RB_ROOT; -static DEFINE_RWLOCK(afs_servers_lock); - -/* LRU list of all the servers not currently in use */ -static LIST_HEAD(afs_server_graveyard); -static DEFINE_SPINLOCK(afs_server_graveyard_lock); -static DECLARE_DELAYED_WORK(afs_server_reaper, afs_reap_server); - /* * install a server record in the master tree */ static int afs_install_server(struct afs_server *server) { struct afs_server *xserver; + struct afs_net *net = server->cell->net; struct rb_node **pp, *p; int ret; _enter("%p", server); - write_lock(&afs_servers_lock); + write_lock(&net->servers_lock); ret = -EEXIST; - pp = &afs_servers.rb_node; + pp = &net->servers.rb_node; p = NULL; while (*pp) { p = *pp; @@ -55,11 +45,11 @@ static int afs_install_server(struct afs_server *server) } rb_link_node(&server->master_rb, p, pp); - rb_insert_color(&server->master_rb, &afs_servers); + rb_insert_color(&server->master_rb, &net->servers); ret = 0; error: - write_unlock(&afs_servers_lock); + write_unlock(&net->servers_lock); return ret; } @@ -150,9 +140,9 @@ found_server_quickly: read_unlock(&cell->servers_lock); no_longer_unused: if (!list_empty(&server->grave)) { - spin_lock(&afs_server_graveyard_lock); + spin_lock(&cell->net->server_graveyard_lock); list_del_init(&server->grave); - spin_unlock(&afs_server_graveyard_lock); + spin_unlock(&cell->net->server_graveyard_lock); } _leave(" = %p{%d}", server, atomic_read(&server->usage)); return server; @@ -178,7 +168,8 @@ server_in_two_cells: /* * look up a server by its IP address */ -struct afs_server *afs_find_server(const struct sockaddr_rxrpc *srx) +struct afs_server *afs_find_server(struct afs_net *net, + const struct sockaddr_rxrpc *srx) { struct afs_server *server = NULL; struct rb_node *p; @@ -191,9 +182,9 @@ struct afs_server *afs_find_server(const struct sockaddr_rxrpc *srx) return NULL; } - read_lock(&afs_servers_lock); + read_lock(&net->servers_lock); - p = afs_servers.rb_node; + p = net->servers.rb_node; while (p) { server = rb_entry(p, struct afs_server, master_rb); @@ -211,7 +202,7 @@ struct afs_server *afs_find_server(const struct sockaddr_rxrpc *srx) server = NULL; found: - read_unlock(&afs_servers_lock); + read_unlock(&net->servers_lock); ASSERTIFCMP(server, server->addr.s_addr, ==, addr.s_addr); _leave(" = %p", server); return server; @@ -223,6 +214,8 @@ found: */ void afs_put_server(struct afs_server *server) { + struct afs_net *net = server->cell->net; + if (!server) return; @@ -239,14 +232,14 @@ void afs_put_server(struct afs_server *server) afs_flush_callback_breaks(server); - spin_lock(&afs_server_graveyard_lock); + spin_lock(&net->server_graveyard_lock); if (atomic_read(&server->usage) == 0) { - list_move_tail(&server->grave, &afs_server_graveyard); + list_move_tail(&server->grave, &net->server_graveyard); server->time_of_death = ktime_get_real_seconds(); - queue_delayed_work(afs_wq, &afs_server_reaper, - afs_server_timeout * HZ); + queue_delayed_work(afs_wq, &net->server_reaper, + net->live ? afs_server_timeout * HZ : 0); } - spin_unlock(&afs_server_graveyard_lock); + spin_unlock(&net->server_graveyard_lock); _leave(" [dead]"); } @@ -272,42 +265,45 @@ static void afs_destroy_server(struct afs_server *server) /* * reap dead server records */ -static void afs_reap_server(struct work_struct *work) +void afs_reap_server(struct work_struct *work) { LIST_HEAD(corpses); struct afs_server *server; + struct afs_net *net = container_of(work, struct afs_net, server_reaper.work); unsigned long delay, expiry; time64_t now; now = ktime_get_real_seconds(); - spin_lock(&afs_server_graveyard_lock); + spin_lock(&net->server_graveyard_lock); - while (!list_empty(&afs_server_graveyard)) { - server = list_entry(afs_server_graveyard.next, + while (!list_empty(&net->server_graveyard)) { + server = list_entry(net->server_graveyard.next, struct afs_server, grave); /* the queue is ordered most dead first */ - expiry = server->time_of_death + afs_server_timeout; - if (expiry > now) { - delay = (expiry - now) * HZ; - mod_delayed_work(afs_wq, &afs_server_reaper, delay); - break; + if (net->live) { + expiry = server->time_of_death + afs_server_timeout; + if (expiry > now) { + delay = (expiry - now) * HZ; + mod_delayed_work(afs_wq, &net->server_reaper, delay); + break; + } } write_lock(&server->cell->servers_lock); - write_lock(&afs_servers_lock); + write_lock(&net->servers_lock); if (atomic_read(&server->usage) > 0) { list_del_init(&server->grave); } else { list_move_tail(&server->grave, &corpses); list_del_init(&server->link); - rb_erase(&server->master_rb, &afs_servers); + rb_erase(&server->master_rb, &net->servers); } - write_unlock(&afs_servers_lock); + write_unlock(&net->servers_lock); write_unlock(&server->cell->servers_lock); } - spin_unlock(&afs_server_graveyard_lock); + spin_unlock(&net->server_graveyard_lock); /* now reap the corpses we've extracted */ while (!list_empty(&corpses)) { @@ -318,10 +314,10 @@ static void afs_reap_server(struct work_struct *work) } /* - * discard all the server records for rmmod + * Discard all the server records from a net namespace when it is destroyed or + * the afs module is removed. */ -void __exit afs_purge_servers(void) +void __net_exit afs_purge_servers(struct afs_net *net) { - afs_server_timeout = 0; - mod_delayed_work(afs_wq, &afs_server_reaper, 0); + mod_delayed_work(afs_wq, &net->server_reaper, 0); } diff --git a/fs/afs/super.c b/fs/afs/super.c index 689173c0a682..d47a9bc46a69 100644 --- a/fs/afs/super.c +++ b/fs/afs/super.c @@ -25,11 +25,10 @@ #include #include #include +#include #include #include "internal.h" -#define AFS_FS_MAGIC 0x6B414653 /* 'kAFS' */ - static void afs_i_init_once(void *foo); static struct dentry *afs_mount(struct file_system_type *fs_type, int flags, const char *dev_name, void *data); @@ -201,7 +200,8 @@ static int afs_parse_options(struct afs_mount_params *params, token = match_token(p, afs_options_list, args); switch (token) { case afs_opt_cell: - cell = afs_cell_lookup(args[0].from, + cell = afs_cell_lookup(params->net, + args[0].from, args[0].to - args[0].from, false); if (IS_ERR(cell)) @@ -308,7 +308,7 @@ static int afs_parse_device_name(struct afs_mount_params *params, /* lookup the cell record */ if (cellname || !params->cell) { - cell = afs_cell_lookup(cellname, cellnamesz, true); + cell = afs_cell_lookup(params->net, cellname, cellnamesz, true); if (IS_ERR(cell)) { printk(KERN_ERR "kAFS: unable to lookup cell '%*.*s'\n", cellnamesz, cellnamesz, cellname ?: ""); @@ -334,7 +334,7 @@ static int afs_test_super(struct super_block *sb, void *data) struct afs_super_info *as1 = data; struct afs_super_info *as = sb->s_fs_info; - return as->volume == as1->volume; + return as->net == as1->net && as->volume == as1->volume; } static int afs_set_super(struct super_block *sb, void *data) @@ -411,6 +411,7 @@ static struct dentry *afs_mount(struct file_system_type *fs_type, _enter(",,%s,%p", dev_name, options); memset(¶ms, 0, sizeof(params)); + params.net = &__afs_net; ret = -EINVAL; if (current->nsproxy->net_ns != &init_net) @@ -444,36 +445,32 @@ static struct dentry *afs_mount(struct file_system_type *fs_type, } /* allocate a superblock info record */ + ret = -ENOMEM; as = kzalloc(sizeof(struct afs_super_info), GFP_KERNEL); - if (!as) { - ret = -ENOMEM; - afs_put_volume(vol); - goto error; - } + if (!as) + goto error_vol; + + as->net = afs_get_net(params.net); as->volume = vol; /* allocate a deviceless superblock */ sb = sget(fs_type, afs_test_super, afs_set_super, flags, as); if (IS_ERR(sb)) { ret = PTR_ERR(sb); - afs_put_volume(vol); - kfree(as); - goto error; + goto error_as; } if (!sb->s_root) { /* initial superblock/root creation */ _debug("create"); ret = afs_fill_super(sb, ¶ms); - if (ret < 0) { - deactivate_locked_super(sb); - goto error; - } + if (ret < 0) + goto error_sb; sb->s_flags |= MS_ACTIVE; } else { _debug("reuse"); ASSERTCMP(sb->s_flags, &, MS_ACTIVE); - afs_put_volume(vol); + afs_put_volume(params.net, vol); kfree(as); } @@ -482,6 +479,14 @@ static struct dentry *afs_mount(struct file_system_type *fs_type, _leave(" = 0 [%p]", sb); return dget(sb->s_root); +error_sb: + deactivate_locked_super(sb); + goto error; +error_as: + afs_put_net(as->net); + kfree(as); +error_vol: + afs_put_volume(params.net, vol); error: afs_put_cell(params.cell); key_put(params.key); @@ -493,8 +498,10 @@ error: static void afs_kill_super(struct super_block *sb) { struct afs_super_info *as = sb->s_fs_info; + struct afs_net *net = as->net; + kill_anon_super(sb); - afs_put_volume(as->volume); + afs_put_volume(net, as->volume); kfree(as); } diff --git a/fs/afs/vlclient.c b/fs/afs/vlclient.c index a5e4cc561b6c..f5a043a9ba61 100644 --- a/fs/afs/vlclient.c +++ b/fs/afs/vlclient.c @@ -143,7 +143,8 @@ static const struct afs_call_type afs_RXVLGetEntryById = { /* * dispatch a get volume entry by name operation */ -int afs_vl_get_entry_by_name(struct in_addr *addr, +int afs_vl_get_entry_by_name(struct afs_net *net, + struct in_addr *addr, struct key *key, const char *volname, struct afs_cache_vlocation *entry, @@ -159,7 +160,7 @@ int afs_vl_get_entry_by_name(struct in_addr *addr, padsz = (4 - (volnamesz & 3)) & 3; reqsz = 8 + volnamesz + padsz; - call = afs_alloc_flat_call(&afs_RXVLGetEntryByName, reqsz, 384); + call = afs_alloc_flat_call(net, &afs_RXVLGetEntryByName, reqsz, 384); if (!call) return -ENOMEM; @@ -183,7 +184,8 @@ int afs_vl_get_entry_by_name(struct in_addr *addr, /* * dispatch a get volume entry by ID operation */ -int afs_vl_get_entry_by_id(struct in_addr *addr, +int afs_vl_get_entry_by_id(struct afs_net *net, + struct in_addr *addr, struct key *key, afs_volid_t volid, afs_voltype_t voltype, @@ -195,7 +197,7 @@ int afs_vl_get_entry_by_id(struct in_addr *addr, _enter(""); - call = afs_alloc_flat_call(&afs_RXVLGetEntryById, 12, 384); + call = afs_alloc_flat_call(net, &afs_RXVLGetEntryById, 12, 384); if (!call) return -ENOMEM; diff --git a/fs/afs/vlocation.c b/fs/afs/vlocation.c index 37b7c3b342a6..ccb7aacfbeca 100644 --- a/fs/afs/vlocation.c +++ b/fs/afs/vlocation.c @@ -16,20 +16,11 @@ #include #include "internal.h" +struct workqueue_struct *afs_vlocation_update_worker; + static unsigned afs_vlocation_timeout = 10; /* volume location timeout in seconds */ static unsigned afs_vlocation_update_timeout = 10 * 60; -static void afs_vlocation_reaper(struct work_struct *); -static void afs_vlocation_updater(struct work_struct *); - -static LIST_HEAD(afs_vlocation_updates); -static LIST_HEAD(afs_vlocation_graveyard); -static DEFINE_SPINLOCK(afs_vlocation_updates_lock); -static DEFINE_SPINLOCK(afs_vlocation_graveyard_lock); -static DECLARE_DELAYED_WORK(afs_vlocation_reap, afs_vlocation_reaper); -static DECLARE_DELAYED_WORK(afs_vlocation_update, afs_vlocation_updater); -static struct workqueue_struct *afs_vlocation_update_worker; - /* * iterate through the VL servers in a cell until one of them admits knowing * about the volume in question @@ -52,8 +43,8 @@ static int afs_vlocation_access_vl_by_name(struct afs_vlocation *vl, _debug("CellServ[%hu]: %08x", cell->vl_curr_svix, addr.s_addr); /* attempt to access the VL server */ - ret = afs_vl_get_entry_by_name(&addr, key, vl->vldb.name, vldb, - false); + ret = afs_vl_get_entry_by_name(cell->net, &addr, key, + vl->vldb.name, vldb, false); switch (ret) { case 0: goto out; @@ -110,8 +101,8 @@ static int afs_vlocation_access_vl_by_id(struct afs_vlocation *vl, _debug("CellServ[%hu]: %08x", cell->vl_curr_svix, addr.s_addr); /* attempt to access the VL server */ - ret = afs_vl_get_entry_by_id(&addr, key, volid, voltype, vldb, - false); + ret = afs_vl_get_entry_by_id(cell->net, &addr, key, volid, + voltype, vldb, false); switch (ret) { case 0: goto out; @@ -335,7 +326,8 @@ static int afs_vlocation_fill_in_record(struct afs_vlocation *vl, /* * queue a vlocation record for updates */ -static void afs_vlocation_queue_for_updates(struct afs_vlocation *vl) +static void afs_vlocation_queue_for_updates(struct afs_net *net, + struct afs_vlocation *vl) { struct afs_vlocation *xvl; @@ -343,25 +335,25 @@ static void afs_vlocation_queue_for_updates(struct afs_vlocation *vl) vl->update_at = ktime_get_real_seconds() + afs_vlocation_update_timeout; - spin_lock(&afs_vlocation_updates_lock); + spin_lock(&net->vl_updates_lock); - if (!list_empty(&afs_vlocation_updates)) { + if (!list_empty(&net->vl_updates)) { /* ... but wait at least 1 second more than the newest record * already queued so that we don't spam the VL server suddenly * with lots of requests */ - xvl = list_entry(afs_vlocation_updates.prev, + xvl = list_entry(net->vl_updates.prev, struct afs_vlocation, update); if (vl->update_at <= xvl->update_at) vl->update_at = xvl->update_at + 1; - } else { + } else if (net->live) { queue_delayed_work(afs_vlocation_update_worker, - &afs_vlocation_update, + &net->vl_updater, afs_vlocation_update_timeout * HZ); } - list_add_tail(&vl->update, &afs_vlocation_updates); - spin_unlock(&afs_vlocation_updates_lock); + list_add_tail(&vl->update, &net->vl_updates); + spin_unlock(&net->vl_updates_lock); } /* @@ -371,7 +363,8 @@ static void afs_vlocation_queue_for_updates(struct afs_vlocation *vl) * - lookup in the local cache if not able to find on the VL server * - insert/update in the local cache if did get a VL response */ -struct afs_vlocation *afs_vlocation_lookup(struct afs_cell *cell, +struct afs_vlocation *afs_vlocation_lookup(struct afs_net *net, + struct afs_cell *cell, struct key *key, const char *name, size_t namesz) @@ -427,7 +420,7 @@ fill_in_record: #endif /* schedule for regular updates */ - afs_vlocation_queue_for_updates(vl); + afs_vlocation_queue_for_updates(net, vl); goto success; found_in_memory: @@ -436,9 +429,9 @@ found_in_memory: atomic_inc(&vl->usage); spin_unlock(&cell->vl_lock); if (!list_empty(&vl->grave)) { - spin_lock(&afs_vlocation_graveyard_lock); + spin_lock(&net->vl_graveyard_lock); list_del_init(&vl->grave); - spin_unlock(&afs_vlocation_graveyard_lock); + spin_unlock(&net->vl_graveyard_lock); } up_write(&cell->vl_sem); @@ -481,7 +474,7 @@ error_abandon: wake_up(&vl->waitq); error: ASSERT(vl != NULL); - afs_put_vlocation(vl); + afs_put_vlocation(net, vl); _leave(" = %d", ret); return ERR_PTR(ret); } @@ -489,7 +482,7 @@ error: /* * finish using a volume location record */ -void afs_put_vlocation(struct afs_vlocation *vl) +void afs_put_vlocation(struct afs_net *net, struct afs_vlocation *vl) { if (!vl) return; @@ -503,22 +496,22 @@ void afs_put_vlocation(struct afs_vlocation *vl) return; } - spin_lock(&afs_vlocation_graveyard_lock); + spin_lock(&net->vl_graveyard_lock); if (atomic_read(&vl->usage) == 0) { _debug("buried"); - list_move_tail(&vl->grave, &afs_vlocation_graveyard); + list_move_tail(&vl->grave, &net->vl_graveyard); vl->time_of_death = ktime_get_real_seconds(); - queue_delayed_work(afs_wq, &afs_vlocation_reap, + queue_delayed_work(afs_wq, &net->vl_reaper, afs_vlocation_timeout * HZ); /* suspend updates on this record */ if (!list_empty(&vl->update)) { - spin_lock(&afs_vlocation_updates_lock); + spin_lock(&net->vl_updates_lock); list_del_init(&vl->update); - spin_unlock(&afs_vlocation_updates_lock); + spin_unlock(&net->vl_updates_lock); } } - spin_unlock(&afs_vlocation_graveyard_lock); + spin_unlock(&net->vl_graveyard_lock); _leave(" [killed?]"); } @@ -539,31 +532,34 @@ static void afs_vlocation_destroy(struct afs_vlocation *vl) /* * reap dead volume location records */ -static void afs_vlocation_reaper(struct work_struct *work) +void afs_vlocation_reaper(struct work_struct *work) { LIST_HEAD(corpses); struct afs_vlocation *vl; + struct afs_net *net = container_of(work, struct afs_net, vl_reaper.work); unsigned long delay, expiry; time64_t now; _enter(""); now = ktime_get_real_seconds(); - spin_lock(&afs_vlocation_graveyard_lock); + spin_lock(&net->vl_graveyard_lock); - while (!list_empty(&afs_vlocation_graveyard)) { - vl = list_entry(afs_vlocation_graveyard.next, + while (!list_empty(&net->vl_graveyard)) { + vl = list_entry(net->vl_graveyard.next, struct afs_vlocation, grave); _debug("check %p", vl); /* the queue is ordered most dead first */ - expiry = vl->time_of_death + afs_vlocation_timeout; - if (expiry > now) { - delay = (expiry - now) * HZ; - _debug("delay %lu", delay); - mod_delayed_work(afs_wq, &afs_vlocation_reap, delay); - break; + if (net->live) { + expiry = vl->time_of_death + afs_vlocation_timeout; + if (expiry > now) { + delay = (expiry - now) * HZ; + _debug("delay %lu", delay); + mod_delayed_work(afs_wq, &net->vl_reaper, delay); + break; + } } spin_lock(&vl->cell->vl_lock); @@ -578,7 +574,7 @@ static void afs_vlocation_reaper(struct work_struct *work) spin_unlock(&vl->cell->vl_lock); } - spin_unlock(&afs_vlocation_graveyard_lock); + spin_unlock(&net->vl_graveyard_lock); /* now reap the corpses we've extracted */ while (!list_empty(&corpses)) { @@ -590,57 +586,47 @@ static void afs_vlocation_reaper(struct work_struct *work) _leave(""); } -/* - * initialise the VL update process - */ -int __init afs_vlocation_update_init(void) -{ - afs_vlocation_update_worker = alloc_workqueue("kafs_vlupdated", - WQ_MEM_RECLAIM, 0); - return afs_vlocation_update_worker ? 0 : -ENOMEM; -} - /* * discard all the volume location records for rmmod */ -void afs_vlocation_purge(void) +void __net_exit afs_vlocation_purge(struct afs_net *net) { - afs_vlocation_timeout = 0; - - spin_lock(&afs_vlocation_updates_lock); - list_del_init(&afs_vlocation_updates); - spin_unlock(&afs_vlocation_updates_lock); - mod_delayed_work(afs_vlocation_update_worker, &afs_vlocation_update, 0); - destroy_workqueue(afs_vlocation_update_worker); - - mod_delayed_work(afs_wq, &afs_vlocation_reap, 0); + spin_lock(&net->vl_updates_lock); + list_del_init(&net->vl_updates); + spin_unlock(&net->vl_updates_lock); + mod_delayed_work(afs_vlocation_update_worker, &net->vl_updater, 0); + mod_delayed_work(afs_wq, &net->vl_reaper, 0); } /* * update a volume location */ -static void afs_vlocation_updater(struct work_struct *work) +void afs_vlocation_updater(struct work_struct *work) { struct afs_cache_vlocation vldb; struct afs_vlocation *vl, *xvl; + struct afs_net *net = container_of(work, struct afs_net, vl_updater.work); time64_t now; long timeout; int ret; + if (!net->live) + return; + _enter(""); now = ktime_get_real_seconds(); /* find a record to update */ - spin_lock(&afs_vlocation_updates_lock); + spin_lock(&net->vl_updates_lock); for (;;) { - if (list_empty(&afs_vlocation_updates)) { - spin_unlock(&afs_vlocation_updates_lock); + if (list_empty(&net->vl_updates) || !net->live) { + spin_unlock(&net->vl_updates_lock); _leave(" [nothing]"); return; } - vl = list_entry(afs_vlocation_updates.next, + vl = list_entry(net->vl_updates.next, struct afs_vlocation, update); if (atomic_read(&vl->usage) > 0) break; @@ -650,15 +636,15 @@ static void afs_vlocation_updater(struct work_struct *work) timeout = vl->update_at - now; if (timeout > 0) { queue_delayed_work(afs_vlocation_update_worker, - &afs_vlocation_update, timeout * HZ); - spin_unlock(&afs_vlocation_updates_lock); + &net->vl_updater, timeout * HZ); + spin_unlock(&net->vl_updates_lock); _leave(" [nothing]"); return; } list_del_init(&vl->update); atomic_inc(&vl->usage); - spin_unlock(&afs_vlocation_updates_lock); + spin_unlock(&net->vl_updates_lock); /* we can now perform the update */ _debug("update %s", vl->vldb.name); @@ -688,18 +674,18 @@ static void afs_vlocation_updater(struct work_struct *work) vl->update_at = ktime_get_real_seconds() + afs_vlocation_update_timeout; - spin_lock(&afs_vlocation_updates_lock); + spin_lock(&net->vl_updates_lock); - if (!list_empty(&afs_vlocation_updates)) { + if (!list_empty(&net->vl_updates)) { /* next update in 10 minutes, but wait at least 1 second more * than the newest record already queued so that we don't spam * the VL server suddenly with lots of requests */ - xvl = list_entry(afs_vlocation_updates.prev, + xvl = list_entry(net->vl_updates.prev, struct afs_vlocation, update); if (vl->update_at <= xvl->update_at) vl->update_at = xvl->update_at + 1; - xvl = list_entry(afs_vlocation_updates.next, + xvl = list_entry(net->vl_updates.next, struct afs_vlocation, update); timeout = xvl->update_at - now; if (timeout < 0) @@ -710,11 +696,10 @@ static void afs_vlocation_updater(struct work_struct *work) ASSERT(list_empty(&vl->update)); - list_add_tail(&vl->update, &afs_vlocation_updates); + list_add_tail(&vl->update, &net->vl_updates); _debug("timeout %ld", timeout); - queue_delayed_work(afs_vlocation_update_worker, - &afs_vlocation_update, timeout * HZ); - spin_unlock(&afs_vlocation_updates_lock); - afs_put_vlocation(vl); + queue_delayed_work(afs_vlocation_update_worker, &net->vl_updater, timeout * HZ); + spin_unlock(&net->vl_updates_lock); + afs_put_vlocation(net, vl); } diff --git a/fs/afs/volume.c b/fs/afs/volume.c index db73d6dad02b..3d5363e0b7e1 100644 --- a/fs/afs/volume.c +++ b/fs/afs/volume.c @@ -54,7 +54,7 @@ struct afs_volume *afs_volume_lookup(struct afs_mount_params *params) params->volnamesz, params->volnamesz, params->volname, params->rwpath); /* lookup the volume location record */ - vlocation = afs_vlocation_lookup(params->cell, params->key, + vlocation = afs_vlocation_lookup(params->net, params->cell, params->key, params->volname, params->volnamesz); if (IS_ERR(vlocation)) { ret = PTR_ERR(vlocation); @@ -138,7 +138,7 @@ success: _debug("kAFS selected %s volume %08x", afs_voltypes[volume->type], volume->vid); up_write(¶ms->cell->vl_sem); - afs_put_vlocation(vlocation); + afs_put_vlocation(params->net, vlocation); _leave(" = %p", volume); return volume; @@ -146,7 +146,7 @@ success: error_up: up_write(¶ms->cell->vl_sem); error: - afs_put_vlocation(vlocation); + afs_put_vlocation(params->net, vlocation); _leave(" = %d", ret); return ERR_PTR(ret); @@ -163,7 +163,7 @@ error_discard: /* * destroy a volume record */ -void afs_put_volume(struct afs_volume *volume) +void afs_put_volume(struct afs_net *net, struct afs_volume *volume) { struct afs_vlocation *vlocation; int loop; @@ -195,7 +195,7 @@ void afs_put_volume(struct afs_volume *volume) #ifdef CONFIG_AFS_FSCACHE fscache_relinquish_cookie(volume->cache, 0); #endif - afs_put_vlocation(vlocation); + afs_put_vlocation(net, vlocation); for (loop = volume->nservers - 1; loop >= 0; loop--) afs_put_server(volume->servers[loop]); diff --git a/include/uapi/linux/magic.h b/include/uapi/linux/magic.h index aa50113ebe5b..1a6fee974116 100644 --- a/include/uapi/linux/magic.h +++ b/include/uapi/linux/magic.h @@ -47,6 +47,7 @@ #define OPENPROM_SUPER_MAGIC 0x9fa1 #define QNX4_SUPER_MAGIC 0x002f /* qnx4 fs detection */ #define QNX6_SUPER_MAGIC 0x68191122 /* qnx6 fs detection */ +#define AFS_FS_MAGIC 0x6B414653 #define REISERFS_SUPER_MAGIC 0x52654973 /* used by gcc */ /* used by file system utilities that From e3b2ffe0f0e1471854dc53bb69ff452e65cc88f2 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 2 Nov 2017 15:27:45 +0000 Subject: [PATCH 03/35] afs: Close the rxrpc socket only after purging the servers Close the rxrpc socket only after we've purged the server records (and also cell and volume records which might refer to servers) so that we can give up the callbacks on each server. Signed-off-by: David Howells --- fs/afs/main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/afs/main.c b/fs/afs/main.c index 87b1a9c8000d..6bd2f3a426de 100644 --- a/fs/afs/main.c +++ b/fs/afs/main.c @@ -96,10 +96,10 @@ error_proc: static void __net_exit afs_net_exit(struct afs_net *net) { net->live = false; - afs_close_socket(net); afs_purge_servers(net); afs_vlocation_purge(net); afs_cell_purge(net); + afs_close_socket(net); afs_proc_cleanup(net); } From 59fa1c4a9f528c2a1556f4b2cd4e055b560c1c0a Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 2 Nov 2017 15:27:45 +0000 Subject: [PATCH 04/35] afs: Fix server reaping Fix server reaping and make sure it's all done before we start trying to purge cells, given that servers currently pin cells. Signed-off-by: David Howells --- fs/afs/internal.h | 5 +++- fs/afs/main.c | 3 ++- fs/afs/server.c | 59 ++++++++++++++++++++++++++++++++++++++++------- 3 files changed, 57 insertions(+), 10 deletions(-) diff --git a/fs/afs/internal.h b/fs/afs/internal.h index 53bd11d73469..bc9ded443b11 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h @@ -238,7 +238,9 @@ struct afs_net { rwlock_t servers_lock; struct list_head server_graveyard; /* Inactive server LRU list */ spinlock_t server_graveyard_lock; - struct delayed_work server_reaper; + struct timer_list server_timer; + struct work_struct server_reaper; + atomic_t servers_outstanding; /* Misc */ struct proc_dir_entry *proc_afs; /* /proc/net/afs directory */ @@ -700,6 +702,7 @@ do { \ atomic_inc(&(S)->usage); \ } while(0) +extern void afs_server_timer(struct timer_list *); extern struct afs_server *afs_lookup_server(struct afs_cell *, const struct in_addr *); extern struct afs_server *afs_find_server(struct afs_net *, diff --git a/fs/afs/main.c b/fs/afs/main.c index 6bd2f3a426de..38e15b1f0eec 100644 --- a/fs/afs/main.c +++ b/fs/afs/main.c @@ -62,7 +62,8 @@ static int __net_init afs_net_init(struct afs_net *net) rwlock_init(&net->servers_lock); INIT_LIST_HEAD(&net->server_graveyard); spin_lock_init(&net->server_graveyard_lock); - INIT_DELAYED_WORK(&net->server_reaper, afs_reap_server); + INIT_WORK(&net->server_reaper, afs_reap_server); + timer_setup(&net->server_timer, afs_server_timer, 0); /* Register the /proc stuff */ ret = afs_proc_init(net); diff --git a/fs/afs/server.c b/fs/afs/server.c index e47fd9bc0ddc..33aeb527ac7e 100644 --- a/fs/afs/server.c +++ b/fs/afs/server.c @@ -15,6 +15,25 @@ static unsigned afs_server_timeout = 10; /* server timeout in seconds */ +static void afs_inc_servers_outstanding(struct afs_net *net) +{ + atomic_inc(&net->servers_outstanding); +} + +static void afs_dec_servers_outstanding(struct afs_net *net) +{ + if (atomic_dec_and_test(&net->servers_outstanding)) + wake_up_atomic_t(&net->servers_outstanding); +} + +void afs_server_timer(struct timer_list *timer) +{ + struct afs_net *net = container_of(timer, struct afs_net, server_timer); + + if (!queue_work(afs_wq, &net->server_reaper)) + afs_dec_servers_outstanding(net); +} + /* * install a server record in the master tree */ @@ -81,6 +100,7 @@ static struct afs_server *afs_alloc_server(struct afs_cell *cell, memcpy(&server->addr, addr, sizeof(struct in_addr)); server->addr.s_addr = addr->s_addr; + afs_inc_servers_outstanding(cell->net); _leave(" = %p{%d}", server, atomic_read(&server->usage)); } else { _leave(" = NULL [nomem]"); @@ -159,6 +179,7 @@ found_server: server_in_two_cells: write_unlock(&cell->servers_lock); kfree(candidate); + afs_dec_servers_outstanding(cell->net); printk(KERN_NOTICE "kAFS: Server %pI4 appears to be in two cells\n", addr); _leave(" = -EEXIST"); @@ -208,6 +229,18 @@ found: return server; } +static void afs_set_server_timer(struct afs_net *net, time64_t delay) +{ + afs_inc_servers_outstanding(net); + if (net->live) { + if (timer_reduce(&net->server_timer, jiffies + delay * HZ)) + afs_dec_servers_outstanding(net); + } else { + if (!queue_work(afs_wq, &net->server_reaper)) + afs_dec_servers_outstanding(net); + } +} + /* * destroy a server record * - removes from the cell list @@ -236,8 +269,7 @@ void afs_put_server(struct afs_server *server) if (atomic_read(&server->usage) == 0) { list_move_tail(&server->grave, &net->server_graveyard); server->time_of_death = ktime_get_real_seconds(); - queue_delayed_work(afs_wq, &net->server_reaper, - net->live ? afs_server_timeout * HZ : 0); + afs_set_server_timer(net, afs_server_timeout); } spin_unlock(&net->server_graveyard_lock); _leave(" [dead]"); @@ -246,7 +278,7 @@ void afs_put_server(struct afs_server *server) /* * destroy a dead server */ -static void afs_destroy_server(struct afs_server *server) +static void afs_destroy_server(struct afs_net *net, struct afs_server *server) { _enter("%p", server); @@ -260,6 +292,7 @@ static void afs_destroy_server(struct afs_server *server) afs_put_cell(server->cell); kfree(server); + afs_dec_servers_outstanding(net); } /* @@ -269,7 +302,7 @@ void afs_reap_server(struct work_struct *work) { LIST_HEAD(corpses); struct afs_server *server; - struct afs_net *net = container_of(work, struct afs_net, server_reaper.work); + struct afs_net *net = container_of(work, struct afs_net, server_reaper); unsigned long delay, expiry; time64_t now; @@ -284,8 +317,8 @@ void afs_reap_server(struct work_struct *work) if (net->live) { expiry = server->time_of_death + afs_server_timeout; if (expiry > now) { - delay = (expiry - now) * HZ; - mod_delayed_work(afs_wq, &net->server_reaper, delay); + delay = (expiry - now); + afs_set_server_timer(net, delay); break; } } @@ -309,8 +342,10 @@ void afs_reap_server(struct work_struct *work) while (!list_empty(&corpses)) { server = list_entry(corpses.next, struct afs_server, grave); list_del(&server->grave); - afs_destroy_server(server); + afs_destroy_server(net, server); } + + afs_dec_servers_outstanding(net); } /* @@ -319,5 +354,13 @@ void afs_reap_server(struct work_struct *work) */ void __net_exit afs_purge_servers(struct afs_net *net) { - mod_delayed_work(afs_wq, &net->server_reaper, 0); + if (del_timer_sync(&net->server_timer)) + atomic_dec(&net->servers_outstanding); + + afs_inc_servers_outstanding(net); + if (!queue_work(afs_wq, &net->server_reaper)) + afs_dec_servers_outstanding(net); + + wait_on_atomic_t(&net->servers_outstanding, atomic_t_wait, + TASK_UNINTERRUPTIBLE); } From 49566f6f06b38d7c1a5c7eacc8a38c6ea2e36549 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 2 Nov 2017 15:27:46 +0000 Subject: [PATCH 05/35] afs: Note the cell in the superblock info also Keep a reference to the cell in the superblock info structure in addition to the volume and net pointers. This will make it easier to clean up in a future patch in which afs_put_volume() will need the cell pointer. Whilst we're at it, make the cell and volume getting functions return a pointer to the object got to make the call sites look neater. Signed-off-by: David Howells --- fs/afs/internal.h | 15 +++++++++++-- fs/afs/super.c | 57 +++++++++++++++++++++++++++++------------------ 2 files changed, 48 insertions(+), 24 deletions(-) diff --git a/fs/afs/internal.h b/fs/afs/internal.h index bc9ded443b11..2d90cb7605f3 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h @@ -178,6 +178,7 @@ struct afs_writeback { */ struct afs_super_info { struct afs_net *net; /* Network namespace */ + struct afs_cell *cell; /* The cell in which the volume resides */ struct afs_volume *volume; /* volume record */ char rwparent; /* T if parent is R/W AFS volume */ }; @@ -502,7 +503,12 @@ extern void afs_flush_callback_breaks(struct afs_server *); /* * cell.c */ -#define afs_get_cell(C) do { atomic_inc(&(C)->usage); } while(0) +static inline struct afs_cell *afs_get_cell(struct afs_cell *cell) +{ + if (cell) + atomic_inc(&cell->usage); + return cell; +} extern int afs_cell_init(struct afs_net *, char *); extern struct afs_cell *afs_cell_create(struct afs_net *, const char *, unsigned, char *, bool); extern struct afs_cell *afs_cell_lookup(struct afs_net *, const char *, unsigned, bool); @@ -789,7 +795,12 @@ extern int afs_vnode_release_lock(struct afs_vnode *, struct key *); /* * volume.c */ -#define afs_get_volume(V) do { atomic_inc(&(V)->usage); } while(0) +static inline struct afs_volume *afs_get_volume(struct afs_volume *volume) +{ + if (volume) + atomic_inc(&volume->usage); + return volume; +} extern void afs_put_volume(struct afs_net *, struct afs_volume *); extern struct afs_volume *afs_volume_lookup(struct afs_mount_params *); diff --git a/fs/afs/super.c b/fs/afs/super.c index d47a9bc46a69..e43f94ecc391 100644 --- a/fs/afs/super.c +++ b/fs/afs/super.c @@ -394,17 +394,38 @@ error: return ret; } +static struct afs_super_info *afs_alloc_sbi(struct afs_mount_params *params) +{ + struct afs_super_info *as; + + as = kzalloc(sizeof(struct afs_super_info), GFP_KERNEL); + if (as) { + as->net = afs_get_net(params->net); + as->cell = afs_get_cell(params->cell); + } + return as; +} + +static void afs_destroy_sbi(struct afs_super_info *as) +{ + if (as) { + afs_put_volume(as->net, as->volume); + afs_put_cell(as->cell); + afs_put_net(as->net); + kfree(as); + } +} + /* * get an AFS superblock */ static struct dentry *afs_mount(struct file_system_type *fs_type, - int flags, const char *dev_name, void *options) + int flags, const char *dev_name, void *options) { struct afs_mount_params params; struct super_block *sb; struct afs_volume *vol; struct key *key; - char *new_opts = kstrdup(options, GFP_KERNEL); struct afs_super_info *as; int ret; @@ -437,20 +458,18 @@ static struct dentry *afs_mount(struct file_system_type *fs_type, } params.key = key; + /* allocate a superblock info record */ + ret = -ENOMEM; + as = afs_alloc_sbi(¶ms); + if (!as) + goto error; + /* parse the device name */ vol = afs_volume_lookup(¶ms); if (IS_ERR(vol)) { ret = PTR_ERR(vol); goto error; } - - /* allocate a superblock info record */ - ret = -ENOMEM; - as = kzalloc(sizeof(struct afs_super_info), GFP_KERNEL); - if (!as) - goto error_vol; - - as->net = afs_get_net(params.net); as->volume = vol; /* allocate a deviceless superblock */ @@ -466,31 +485,27 @@ static struct dentry *afs_mount(struct file_system_type *fs_type, ret = afs_fill_super(sb, ¶ms); if (ret < 0) goto error_sb; + as = NULL; sb->s_flags |= MS_ACTIVE; } else { _debug("reuse"); ASSERTCMP(sb->s_flags, &, MS_ACTIVE); - afs_put_volume(params.net, vol); - kfree(as); + afs_destroy_sbi(as); + as = NULL; } afs_put_cell(params.cell); - kfree(new_opts); + key_put(params.key); _leave(" = 0 [%p]", sb); return dget(sb->s_root); error_sb: deactivate_locked_super(sb); - goto error; error_as: - afs_put_net(as->net); - kfree(as); -error_vol: - afs_put_volume(params.net, vol); + afs_destroy_sbi(as); error: afs_put_cell(params.cell); key_put(params.key); - kfree(new_opts); _leave(" = %d", ret); return ERR_PTR(ret); } @@ -498,11 +513,9 @@ error: static void afs_kill_super(struct super_block *sb) { struct afs_super_info *as = sb->s_fs_info; - struct afs_net *net = as->net; kill_anon_super(sb); - afs_put_volume(net, as->volume); - kfree(as); + afs_destroy_sbi(as); } /* From 9ed900b1160ef306bc74ad0228d7ab199234c758 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 2 Nov 2017 15:27:46 +0000 Subject: [PATCH 06/35] afs: Push the net ns pointer to more places Push the network namespace pointer to more places in AFS, including the afs_server structure (which doesn't hold a ref on the netns). In particular, afs_put_cell() now takes requires a net ns parameter so that it can safely alter the netns after decrementing the cell usage count - the cell will be deallocated by a background thread after being cached for a period, which means that it's not safe to access it after reducing its usage count. Signed-off-by: David Howells --- fs/afs/cell.c | 14 +++++++------- fs/afs/cmservice.c | 2 +- fs/afs/dir.c | 12 ++++++------ fs/afs/inode.c | 2 +- fs/afs/internal.h | 7 ++++--- fs/afs/proc.c | 2 +- fs/afs/server.c | 7 +++---- fs/afs/super.c | 12 ++++++------ fs/afs/vlocation.c | 6 +++--- fs/afs/vnode.c | 28 ++++++++++++++-------------- fs/afs/volume.c | 20 ++++++++++---------- 11 files changed, 56 insertions(+), 56 deletions(-) diff --git a/fs/afs/cell.c b/fs/afs/cell.c index bd570fa539a0..2224e335eed7 100644 --- a/fs/afs/cell.c +++ b/fs/afs/cell.c @@ -251,7 +251,7 @@ int afs_cell_init(struct afs_net *net, char *rootcell) old_root = net->ws_cell; net->ws_cell = new_root; write_unlock(&net->cells_lock); - afs_put_cell(old_root); + afs_put_cell(net, old_root); _leave(" = 0"); return 0; @@ -336,7 +336,7 @@ struct afs_cell *afs_get_cell_maybe(struct afs_cell *cell) /* * destroy a cell record */ -void afs_put_cell(struct afs_cell *cell) +void afs_put_cell(struct afs_net *net, struct afs_cell *cell) { if (!cell) return; @@ -347,10 +347,10 @@ void afs_put_cell(struct afs_cell *cell) /* to prevent a race, the decrement and the dequeue must be effectively * atomic */ - write_lock(&cell->net->cells_lock); + write_lock(&net->cells_lock); if (likely(!atomic_dec_and_test(&cell->usage))) { - write_unlock(&cell->net->cells_lock); + write_unlock(&net->cells_lock); _leave(""); return; } @@ -358,9 +358,9 @@ void afs_put_cell(struct afs_cell *cell) ASSERT(list_empty(&cell->servers)); ASSERT(list_empty(&cell->vl_list)); - wake_up(&cell->net->cells_freeable_wq); + wake_up(&net->cells_freeable_wq); - write_unlock(&cell->net->cells_lock); + write_unlock(&net->cells_lock); _leave(" [unused]"); } @@ -424,7 +424,7 @@ void afs_cell_purge(struct afs_net *net) _enter(""); - afs_put_cell(net->ws_cell); + afs_put_cell(net, net->ws_cell); down_write(&net->cells_sem); diff --git a/fs/afs/cmservice.c b/fs/afs/cmservice.c index 30ce4be4165f..9ad39f8a7e87 100644 --- a/fs/afs/cmservice.c +++ b/fs/afs/cmservice.c @@ -151,7 +151,7 @@ static void afs_cm_destructor(struct afs_call *call) afs_break_callbacks(call->server, call->count, call->request); } - afs_put_server(call->server); + afs_put_server(call->net, call->server); call->server = NULL; kfree(call->buffer); call->buffer = NULL; diff --git a/fs/afs/dir.c b/fs/afs/dir.c index 613a77058263..97ec6a74589e 100644 --- a/fs/afs/dir.c +++ b/fs/afs/dir.c @@ -771,7 +771,7 @@ static int afs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) vnode->update_cnt++; spin_unlock(&vnode->lock); afs_vnode_finalise_status_update(vnode, server); - afs_put_server(server); + afs_put_server(afs_i2net(dir), server); d_instantiate(dentry, inode); if (d_unhashed(dentry)) { @@ -783,7 +783,7 @@ static int afs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) return 0; iget_error: - afs_put_server(server); + afs_put_server(afs_i2net(dir), server); mkdir_error: key_put(key); error: @@ -948,7 +948,7 @@ static int afs_create(struct inode *dir, struct dentry *dentry, umode_t mode, vnode->update_cnt++; spin_unlock(&vnode->lock); afs_vnode_finalise_status_update(vnode, server); - afs_put_server(server); + afs_put_server(afs_i2net(dir), server); d_instantiate(dentry, inode); if (d_unhashed(dentry)) { @@ -960,7 +960,7 @@ static int afs_create(struct inode *dir, struct dentry *dentry, umode_t mode, return 0; iget_error: - afs_put_server(server); + afs_put_server(afs_i2net(dir), server); create_error: key_put(key); error: @@ -1060,7 +1060,7 @@ static int afs_symlink(struct inode *dir, struct dentry *dentry, vnode->update_cnt++; spin_unlock(&vnode->lock); afs_vnode_finalise_status_update(vnode, server); - afs_put_server(server); + afs_put_server(afs_i2net(dir), server); d_instantiate(dentry, inode); if (d_unhashed(dentry)) { @@ -1072,7 +1072,7 @@ static int afs_symlink(struct inode *dir, struct dentry *dentry, return 0; iget_error: - afs_put_server(server); + afs_put_server(afs_i2net(dir), server); create_error: key_put(key); error: diff --git a/fs/afs/inode.c b/fs/afs/inode.c index 342316a9e3e0..fbb441d25022 100644 --- a/fs/afs/inode.c +++ b/fs/afs/inode.c @@ -437,7 +437,7 @@ void afs_evict_inode(struct inode *inode) spin_lock(&vnode->server->fs_lock); rb_erase(&vnode->server_rb, &vnode->server->fs_vnodes); spin_unlock(&vnode->server->fs_lock); - afs_put_server(vnode->server); + afs_put_server(afs_i2net(inode), vnode->server); vnode->server = NULL; } diff --git a/fs/afs/internal.h b/fs/afs/internal.h index 2d90cb7605f3..7cd30ae71f91 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h @@ -335,6 +335,7 @@ struct afs_server { atomic_t usage; time64_t time_of_death; /* time at which put reduced usage to 0 */ struct in_addr addr; /* server address */ + struct afs_net *net; /* Network namespace in which the server resides */ struct afs_cell *cell; /* cell in which server resides */ struct list_head link; /* link in cell's server list */ struct list_head grave; /* link in master graveyard list */ @@ -513,7 +514,7 @@ extern int afs_cell_init(struct afs_net *, char *); extern struct afs_cell *afs_cell_create(struct afs_net *, const char *, unsigned, char *, bool); extern struct afs_cell *afs_cell_lookup(struct afs_net *, const char *, unsigned, bool); extern struct afs_cell *afs_grab_cell(struct afs_cell *); -extern void afs_put_cell(struct afs_cell *); +extern void afs_put_cell(struct afs_net *, struct afs_cell *); extern void __net_exit afs_cell_purge(struct afs_net *); /* @@ -713,7 +714,7 @@ extern struct afs_server *afs_lookup_server(struct afs_cell *, const struct in_addr *); extern struct afs_server *afs_find_server(struct afs_net *, const struct sockaddr_rxrpc *); -extern void afs_put_server(struct afs_server *); +extern void afs_put_server(struct afs_net *, struct afs_server *); extern void afs_reap_server(struct work_struct *); extern void __net_exit afs_purge_servers(struct afs_net *); @@ -802,7 +803,7 @@ static inline struct afs_volume *afs_get_volume(struct afs_volume *volume) return volume; } -extern void afs_put_volume(struct afs_net *, struct afs_volume *); +extern void afs_put_volume(struct afs_cell *, struct afs_volume *); extern struct afs_volume *afs_volume_lookup(struct afs_mount_params *); extern struct afs_server *afs_volume_pick_fileserver(struct afs_vnode *); extern int afs_volume_release_fileserver(struct afs_vnode *, diff --git a/fs/afs/proc.c b/fs/afs/proc.c index c93433460348..677a453b08bf 100644 --- a/fs/afs/proc.c +++ b/fs/afs/proc.c @@ -285,7 +285,7 @@ static ssize_t afs_proc_cells_write(struct file *file, const char __user *buf, goto done; } - afs_put_cell(cell); + afs_put_cell(net, cell); printk("kAFS: Added new cell '%s'\n", name); } else { goto inval; diff --git a/fs/afs/server.c b/fs/afs/server.c index 33aeb527ac7e..d8044be913f0 100644 --- a/fs/afs/server.c +++ b/fs/afs/server.c @@ -85,6 +85,7 @@ static struct afs_server *afs_alloc_server(struct afs_cell *cell, server = kzalloc(sizeof(struct afs_server), GFP_KERNEL); if (server) { atomic_set(&server->usage, 1); + server->net = cell->net; server->cell = cell; INIT_LIST_HEAD(&server->link); @@ -245,10 +246,8 @@ static void afs_set_server_timer(struct afs_net *net, time64_t delay) * destroy a server record * - removes from the cell list */ -void afs_put_server(struct afs_server *server) +void afs_put_server(struct afs_net *net, struct afs_server *server) { - struct afs_net *net = server->cell->net; - if (!server) return; @@ -290,7 +289,7 @@ static void afs_destroy_server(struct afs_net *net, struct afs_server *server) ASSERTCMP(server->cb_break_head, ==, server->cb_break_tail); ASSERTCMP(atomic_read(&server->cb_break_n), ==, 0); - afs_put_cell(server->cell); + afs_put_cell(server->net, server->cell); kfree(server); afs_dec_servers_outstanding(net); } diff --git a/fs/afs/super.c b/fs/afs/super.c index e43f94ecc391..dd218f370359 100644 --- a/fs/afs/super.c +++ b/fs/afs/super.c @@ -206,7 +206,7 @@ static int afs_parse_options(struct afs_mount_params *params, false); if (IS_ERR(cell)) return PTR_ERR(cell); - afs_put_cell(params->cell); + afs_put_cell(params->net, params->cell); params->cell = cell; break; @@ -314,7 +314,7 @@ static int afs_parse_device_name(struct afs_mount_params *params, cellnamesz, cellnamesz, cellname ?: ""); return PTR_ERR(cell); } - afs_put_cell(params->cell); + afs_put_cell(params->net, params->cell); params->cell = cell; } @@ -409,8 +409,8 @@ static struct afs_super_info *afs_alloc_sbi(struct afs_mount_params *params) static void afs_destroy_sbi(struct afs_super_info *as) { if (as) { - afs_put_volume(as->net, as->volume); - afs_put_cell(as->cell); + afs_put_volume(as->cell, as->volume); + afs_put_cell(as->net, as->cell); afs_put_net(as->net); kfree(as); } @@ -494,7 +494,7 @@ static struct dentry *afs_mount(struct file_system_type *fs_type, as = NULL; } - afs_put_cell(params.cell); + afs_put_cell(params.net, params.cell); key_put(params.key); _leave(" = 0 [%p]", sb); return dget(sb->s_root); @@ -504,7 +504,7 @@ error_sb: error_as: afs_destroy_sbi(as); error: - afs_put_cell(params.cell); + afs_put_cell(params.net, params.cell); key_put(params.key); _leave(" = %d", ret); return ERR_PTR(ret); diff --git a/fs/afs/vlocation.c b/fs/afs/vlocation.c index ccb7aacfbeca..cf7e02d5fa3f 100644 --- a/fs/afs/vlocation.c +++ b/fs/afs/vlocation.c @@ -518,14 +518,14 @@ void afs_put_vlocation(struct afs_net *net, struct afs_vlocation *vl) /* * destroy a dead volume location record */ -static void afs_vlocation_destroy(struct afs_vlocation *vl) +static void afs_vlocation_destroy(struct afs_net *net, struct afs_vlocation *vl) { _enter("%p", vl); #ifdef CONFIG_AFS_FSCACHE fscache_relinquish_cookie(vl->cache, 0); #endif - afs_put_cell(vl->cell); + afs_put_cell(net, vl->cell); kfree(vl); } @@ -580,7 +580,7 @@ void afs_vlocation_reaper(struct work_struct *work) while (!list_empty(&corpses)) { vl = list_entry(corpses.next, struct afs_vlocation, grave); list_del(&vl->grave); - afs_vlocation_destroy(vl); + afs_vlocation_destroy(net, vl); } _leave(""); diff --git a/fs/afs/vnode.c b/fs/afs/vnode.c index dcb956143c86..d5ef834ba4ac 100644 --- a/fs/afs/vnode.c +++ b/fs/afs/vnode.c @@ -73,7 +73,7 @@ static void afs_install_vnode(struct afs_vnode *vnode, afs_get_server(server); vnode->server = server; - afs_put_server(old_server); + afs_put_server(afs_v2net(vnode), old_server); /* insert into the server's vnode tree in FID order */ spin_lock(&server->fs_lock); @@ -196,7 +196,7 @@ static void afs_vnode_deleted_remotely(struct afs_vnode *vnode) spin_unlock(&server->fs_lock); vnode->server = NULL; - afs_put_server(server); + afs_put_server(afs_v2net(vnode), server); } else { ASSERT(!vnode->cb_promised); } @@ -225,7 +225,7 @@ void afs_vnode_finalise_status_update(struct afs_vnode *vnode, spin_unlock(&vnode->lock); wake_up_all(&vnode->update_waitq); - afs_put_server(oldserver); + afs_put_server(afs_v2net(vnode), oldserver); _leave(""); } @@ -368,7 +368,7 @@ get_anyway: if (auth_vnode) afs_cache_permit(vnode, key, acl_order); afs_vnode_finalise_status_update(vnode, server); - afs_put_server(server); + afs_put_server(afs_v2net(vnode), server); } else { _debug("failed [%d]", ret); afs_vnode_status_update_failed(vnode, ret); @@ -428,7 +428,7 @@ int afs_vnode_fetch_data(struct afs_vnode *vnode, struct key *key, /* adjust the flags */ if (ret == 0) { afs_vnode_finalise_status_update(vnode, server); - afs_put_server(server); + afs_put_server(afs_v2net(vnode), server); } else { afs_vnode_status_update_failed(vnode, ret); } @@ -540,7 +540,7 @@ int afs_vnode_remove(struct afs_vnode *vnode, struct key *key, const char *name, /* adjust the flags */ if (ret == 0) { afs_vnode_finalise_status_update(vnode, server); - afs_put_server(server); + afs_put_server(afs_v2net(vnode), server); } else { afs_vnode_status_update_failed(vnode, ret); } @@ -603,7 +603,7 @@ int afs_vnode_link(struct afs_vnode *dvnode, struct afs_vnode *vnode, if (ret == 0) { afs_vnode_finalise_status_update(vnode, server); afs_vnode_finalise_status_update(dvnode, server); - afs_put_server(server); + afs_put_server(afs_v2net(dvnode), server); } else { afs_vnode_status_update_failed(vnode, ret); afs_vnode_status_update_failed(dvnode, ret); @@ -738,7 +738,7 @@ int afs_vnode_rename(struct afs_vnode *orig_dvnode, afs_vnode_finalise_status_update(orig_dvnode, server); if (new_dvnode != orig_dvnode) afs_vnode_finalise_status_update(new_dvnode, server); - afs_put_server(server); + afs_put_server(afs_v2net(orig_dvnode), server); } else { afs_vnode_status_update_failed(orig_dvnode, ret); if (new_dvnode != orig_dvnode) @@ -802,7 +802,7 @@ int afs_vnode_store_data(struct afs_writeback *wb, pgoff_t first, pgoff_t last, /* adjust the flags */ if (ret == 0) { afs_vnode_finalise_status_update(vnode, server); - afs_put_server(server); + afs_put_server(afs_v2net(vnode), server); } else { afs_vnode_status_update_failed(vnode, ret); } @@ -854,7 +854,7 @@ int afs_vnode_setattr(struct afs_vnode *vnode, struct key *key, /* adjust the flags */ if (ret == 0) { afs_vnode_finalise_status_update(vnode, server); - afs_put_server(server); + afs_put_server(afs_v2net(vnode), server); } else { afs_vnode_status_update_failed(vnode, ret); } @@ -900,7 +900,7 @@ int afs_vnode_get_volume_status(struct afs_vnode *vnode, struct key *key, /* adjust the flags */ if (ret == 0) - afs_put_server(server); + afs_put_server(afs_v2net(vnode), server); _leave(" = %d", ret); return ret; @@ -939,7 +939,7 @@ int afs_vnode_set_lock(struct afs_vnode *vnode, struct key *key, /* adjust the flags */ if (ret == 0) - afs_put_server(server); + afs_put_server(afs_v2net(vnode), server); _leave(" = %d", ret); return ret; @@ -977,7 +977,7 @@ int afs_vnode_extend_lock(struct afs_vnode *vnode, struct key *key) /* adjust the flags */ if (ret == 0) - afs_put_server(server); + afs_put_server(afs_v2net(vnode), server); _leave(" = %d", ret); return ret; @@ -1015,7 +1015,7 @@ int afs_vnode_release_lock(struct afs_vnode *vnode, struct key *key) /* adjust the flags */ if (ret == 0) - afs_put_server(server); + afs_put_server(afs_v2net(vnode), server); _leave(" = %d", ret); return ret; diff --git a/fs/afs/volume.c b/fs/afs/volume.c index 3d5363e0b7e1..e2f0e8ec527d 100644 --- a/fs/afs/volume.c +++ b/fs/afs/volume.c @@ -154,7 +154,7 @@ error_discard: up_write(¶ms->cell->vl_sem); for (loop = volume->nservers - 1; loop >= 0; loop--) - afs_put_server(volume->servers[loop]); + afs_put_server(params->net, volume->servers[loop]); kfree(volume); goto error; @@ -163,7 +163,7 @@ error_discard: /* * destroy a volume record */ -void afs_put_volume(struct afs_net *net, struct afs_volume *volume) +void afs_put_volume(struct afs_cell *cell, struct afs_volume *volume) { struct afs_vlocation *vlocation; int loop; @@ -179,7 +179,7 @@ void afs_put_volume(struct afs_net *net, struct afs_volume *volume) /* to prevent a race, the decrement and the dequeue must be effectively * atomic */ - down_write(&vlocation->cell->vl_sem); + down_write(&cell->vl_sem); if (likely(!atomic_dec_and_test(&volume->usage))) { up_write(&vlocation->cell->vl_sem); @@ -189,16 +189,16 @@ void afs_put_volume(struct afs_net *net, struct afs_volume *volume) vlocation->vols[volume->type] = NULL; - up_write(&vlocation->cell->vl_sem); + up_write(&cell->vl_sem); /* finish cleaning up the volume */ #ifdef CONFIG_AFS_FSCACHE fscache_relinquish_cookie(volume->cache, 0); #endif - afs_put_vlocation(net, vlocation); + afs_put_vlocation(cell->net, vlocation); for (loop = volume->nservers - 1; loop >= 0; loop--) - afs_put_server(volume->servers[loop]); + afs_put_server(cell->net, volume->servers[loop]); kfree(volume); @@ -336,7 +336,7 @@ int afs_volume_release_fileserver(struct afs_vnode *vnode, sizeof(volume->servers[loop]) * (volume->nservers - loop)); volume->servers[volume->nservers] = NULL; - afs_put_server(server); + afs_put_server(afs_v2net(vnode), server); volume->rjservers++; if (volume->nservers > 0) @@ -350,7 +350,7 @@ int afs_volume_release_fileserver(struct afs_vnode *vnode, * no longer registered */ up_write(&volume->server_sem); - afs_put_server(server); + afs_put_server(afs_v2net(vnode), server); _leave(" [completely rejected]"); return 1; @@ -379,7 +379,7 @@ int afs_volume_release_fileserver(struct afs_vnode *vnode, case -ENOMEM: case -ENONET: /* tell the caller to accept the result */ - afs_put_server(server); + afs_put_server(afs_v2net(vnode), server); _leave(" [local failure]"); return 1; } @@ -388,7 +388,7 @@ int afs_volume_release_fileserver(struct afs_vnode *vnode, try_next_server_upw: up_write(&volume->server_sem); try_next_server: - afs_put_server(server); + afs_put_server(afs_v2net(vnode), server); _leave(" [try next server]"); return 0; } From 91a90380efbc896eb129878553202c97213d0861 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 2 Nov 2017 15:27:46 +0000 Subject: [PATCH 07/35] afs: Add some protocol defs Add some protocol definitions, including max field lengths, flag defs, an XDR-encoded UUID def, more VL operation IDs and more fileserver abort codes. Signed-off-by: David Howells --- fs/afs/afs.h | 25 ++++++++++++++++++++----- fs/afs/afs_fs.h | 4 ++++ fs/afs/afs_vl.h | 15 +++++++++++---- 3 files changed, 35 insertions(+), 9 deletions(-) diff --git a/fs/afs/afs.h b/fs/afs/afs.h index 93053115bcfc..0d837bddbf7d 100644 --- a/fs/afs/afs.h +++ b/fs/afs/afs.h @@ -14,11 +14,14 @@ #include -#define AFS_MAXCELLNAME 64 /* maximum length of a cell name */ -#define AFS_MAXVOLNAME 64 /* maximum length of a volume name */ -#define AFSNAMEMAX 256 /* maximum length of a filename plus NUL */ -#define AFSPATHMAX 1024 /* maximum length of a pathname plus NUL */ -#define AFSOPAQUEMAX 1024 /* maximum length of an opaque field */ +#define AFS_MAXCELLNAME 64 /* Maximum length of a cell name */ +#define AFS_MAXVOLNAME 64 /* Maximum length of a volume name */ +#define AFS_MAXNSERVERS 8 /* Maximum servers in a basic volume record */ +#define AFS_NMAXNSERVERS 13 /* Maximum servers in a N/U-class volume record */ +#define AFS_MAXTYPES 3 /* Maximum number of volume types */ +#define AFSNAMEMAX 256 /* Maximum length of a filename plus NUL */ +#define AFSPATHMAX 1024 /* Maximum length of a pathname plus NUL */ +#define AFSOPAQUEMAX 1024 /* Maximum length of an opaque field */ typedef unsigned afs_volid_t; typedef unsigned afs_vnodeid_t; @@ -176,4 +179,16 @@ struct afs_volume_status { #define AFS_BLOCK_SIZE 1024 +/* + * XDR encoding of UUID in AFS. + */ +struct afs_uuid__xdr { + __be32 time_low; + __be32 time_mid; + __be32 time_hi_and_version; + __be32 clock_seq_hi_and_reserved; + __be32 clock_seq_low; + __be32 node[6]; +}; + #endif /* AFS_H */ diff --git a/fs/afs/afs_fs.h b/fs/afs/afs_fs.h index eb647323d8f0..accd886b4372 100644 --- a/fs/afs/afs_fs.h +++ b/fs/afs/afs_fs.h @@ -40,6 +40,7 @@ enum AFS_FS_Operations { }; enum AFS_FS_Errors { + VRESTARTING = -100, /* Server is restarting */ VSALVAGE = 101, /* volume needs salvaging */ VNOVNODE = 102, /* no such file/dir (vnode) */ VNOVOL = 103, /* no such volume or volume unavailable */ @@ -51,6 +52,9 @@ enum AFS_FS_Errors { VOVERQUOTA = 109, /* volume's maximum quota exceeded */ VBUSY = 110, /* volume is temporarily unavailable */ VMOVED = 111, /* volume moved to new server - ask this FS where */ + VIO = 112, /* I/O error in volume */ + VSALVAGING = 113, /* Volume is being salvaged */ + VRESTRICTED = 120, /* Volume is restricted from using */ }; #endif /* AFS_FS_H */ diff --git a/fs/afs/afs_vl.h b/fs/afs/afs_vl.h index 800f607ffaf5..4eaa620992c8 100644 --- a/fs/afs/afs_vl.h +++ b/fs/afs/afs_vl.h @@ -18,9 +18,12 @@ #define VL_SERVICE 52 /* RxRPC service ID for the Volume Location service */ enum AFSVL_Operations { - VLGETENTRYBYID = 503, /* AFS Get Cache Entry By ID operation ID */ - VLGETENTRYBYNAME = 504, /* AFS Get Cache Entry By Name operation ID */ - VLPROBE = 514, /* AFS Probe Volume Location Service operation ID */ + VLGETENTRYBYID = 503, /* AFS Get VLDB entry by ID */ + VLGETENTRYBYNAME = 504, /* AFS Get VLDB entry by name */ + VLPROBE = 514, /* AFS probe VL service */ + VLGETENTRYBYIDU = 526, /* AFS Get VLDB entry by ID (UUID-variant) */ + VLGETENTRYBYNAMEU = 527, /* AFS Get VLDB entry by name (UUID-variant) */ + VLGETADDRSU = 533, /* AFS Get addrs for fileserver */ }; enum AFSVL_Errors { @@ -74,11 +77,15 @@ struct afs_vldbentry { struct in_addr addr; /* server address */ unsigned partition; /* partition ID on this server */ unsigned flags; /* server specific flags */ -#define AFS_VLSF_NEWREPSITE 0x0001 /* unused */ +#define AFS_VLSF_NEWREPSITE 0x0001 /* Ignore all 'non-new' servers */ #define AFS_VLSF_ROVOL 0x0002 /* this server holds a R/O instance of the volume */ #define AFS_VLSF_RWVOL 0x0004 /* this server holds a R/W instance of the volume */ #define AFS_VLSF_BACKVOL 0x0008 /* this server holds a backup instance of the volume */ +#define AFS_VLSF_UUID 0x0010 /* This server is referred to by its UUID */ +#define AFS_VLSF_DONTUSE 0x0020 /* This server ref should be ignored */ } servers[8]; }; +#define AFS_VLDB_MAXNAMELEN 65 + #endif /* AFS_VL_H */ From ad6a942a9e74edea8a4a126a1e434feff6a6d5c2 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 2 Nov 2017 15:27:47 +0000 Subject: [PATCH 08/35] afs: Update the cache index structure Update the cache index structure in the following ways: (1) Don't use the volume name followed by the volume type as levels in the cache index. Volumes can be renamed. Use the volume ID instead. (2) Don't store the VLDB data for a volume in the tree. If the volume database should be cached locally, then it should be done in a separate tree. (3) Expand the volume ID stored in the cache to 64 bits. (4) Expand the file/vnode ID stored in the cache to 96 bits. (5) Increment the cache structure version number to 1. Signed-off-by: David Howells --- fs/afs/cache.c | 239 ++++++++------------------------------------- fs/afs/internal.h | 21 ---- fs/afs/vlocation.c | 39 ++------ fs/afs/volume.c | 2 +- 4 files changed, 50 insertions(+), 251 deletions(-) diff --git a/fs/afs/cache.c b/fs/afs/cache.c index 1fe855191261..f62ff71d28c9 100644 --- a/fs/afs/cache.c +++ b/fs/afs/cache.c @@ -14,19 +14,6 @@ static uint16_t afs_cell_cache_get_key(const void *cookie_netfs_data, void *buffer, uint16_t buflen); -static uint16_t afs_cell_cache_get_aux(const void *cookie_netfs_data, - void *buffer, uint16_t buflen); -static enum fscache_checkaux afs_cell_cache_check_aux(void *cookie_netfs_data, - const void *buffer, - uint16_t buflen); - -static uint16_t afs_vlocation_cache_get_key(const void *cookie_netfs_data, - void *buffer, uint16_t buflen); -static uint16_t afs_vlocation_cache_get_aux(const void *cookie_netfs_data, - void *buffer, uint16_t buflen); -static enum fscache_checkaux afs_vlocation_cache_check_aux( - void *cookie_netfs_data, const void *buffer, uint16_t buflen); - static uint16_t afs_volume_cache_get_key(const void *cookie_netfs_data, void *buffer, uint16_t buflen); @@ -42,23 +29,13 @@ static enum fscache_checkaux afs_vnode_cache_check_aux(void *cookie_netfs_data, struct fscache_netfs afs_cache_netfs = { .name = "afs", - .version = 0, + .version = 1, }; struct fscache_cookie_def afs_cell_cache_index_def = { .name = "AFS.cell", .type = FSCACHE_COOKIE_TYPE_INDEX, .get_key = afs_cell_cache_get_key, - .get_aux = afs_cell_cache_get_aux, - .check_aux = afs_cell_cache_check_aux, -}; - -struct fscache_cookie_def afs_vlocation_cache_index_def = { - .name = "AFS.vldb", - .type = FSCACHE_COOKIE_TYPE_INDEX, - .get_key = afs_vlocation_cache_get_key, - .get_aux = afs_vlocation_cache_get_aux, - .check_aux = afs_vlocation_cache_check_aux, }; struct fscache_cookie_def afs_volume_cache_index_def = { @@ -95,150 +72,26 @@ static uint16_t afs_cell_cache_get_key(const void *cookie_netfs_data, return klen; } -/* - * provide new auxiliary cache data - */ -static uint16_t afs_cell_cache_get_aux(const void *cookie_netfs_data, - void *buffer, uint16_t bufmax) -{ - const struct afs_cell *cell = cookie_netfs_data; - uint16_t dlen; - - _enter("%p,%p,%u", cell, buffer, bufmax); - - dlen = cell->vl_naddrs * sizeof(cell->vl_addrs[0]); - dlen = min(dlen, bufmax); - dlen &= ~(sizeof(cell->vl_addrs[0]) - 1); - - memcpy(buffer, cell->vl_addrs, dlen); - return dlen; -} - -/* - * check that the auxiliary data indicates that the entry is still valid - */ -static enum fscache_checkaux afs_cell_cache_check_aux(void *cookie_netfs_data, - const void *buffer, - uint16_t buflen) -{ - _leave(" = OKAY"); - return FSCACHE_CHECKAUX_OKAY; -} - -/*****************************************************************************/ -/* - * set the key for the index entry - */ -static uint16_t afs_vlocation_cache_get_key(const void *cookie_netfs_data, - void *buffer, uint16_t bufmax) -{ - const struct afs_vlocation *vlocation = cookie_netfs_data; - uint16_t klen; - - _enter("{%s},%p,%u", vlocation->vldb.name, buffer, bufmax); - - klen = strnlen(vlocation->vldb.name, sizeof(vlocation->vldb.name)); - if (klen > bufmax) - return 0; - - memcpy(buffer, vlocation->vldb.name, klen); - - _leave(" = %u", klen); - return klen; -} - -/* - * provide new auxiliary cache data - */ -static uint16_t afs_vlocation_cache_get_aux(const void *cookie_netfs_data, - void *buffer, uint16_t bufmax) -{ - const struct afs_vlocation *vlocation = cookie_netfs_data; - uint16_t dlen; - - _enter("{%s},%p,%u", vlocation->vldb.name, buffer, bufmax); - - dlen = sizeof(struct afs_cache_vlocation); - dlen -= offsetof(struct afs_cache_vlocation, nservers); - if (dlen > bufmax) - return 0; - - memcpy(buffer, (uint8_t *)&vlocation->vldb.nservers, dlen); - - _leave(" = %u", dlen); - return dlen; -} - -/* - * check that the auxiliary data indicates that the entry is still valid - */ -static -enum fscache_checkaux afs_vlocation_cache_check_aux(void *cookie_netfs_data, - const void *buffer, - uint16_t buflen) -{ - const struct afs_cache_vlocation *cvldb; - struct afs_vlocation *vlocation = cookie_netfs_data; - uint16_t dlen; - - _enter("{%s},%p,%u", vlocation->vldb.name, buffer, buflen); - - /* check the size of the data is what we're expecting */ - dlen = sizeof(struct afs_cache_vlocation); - dlen -= offsetof(struct afs_cache_vlocation, nservers); - if (dlen != buflen) - return FSCACHE_CHECKAUX_OBSOLETE; - - cvldb = container_of(buffer, struct afs_cache_vlocation, nservers); - - /* if what's on disk is more valid than what's in memory, then use the - * VL record from the cache */ - if (!vlocation->valid || vlocation->vldb.rtime == cvldb->rtime) { - memcpy((uint8_t *)&vlocation->vldb.nservers, buffer, dlen); - vlocation->valid = 1; - _leave(" = SUCCESS [c->m]"); - return FSCACHE_CHECKAUX_OKAY; - } - - /* need to update the cache if the cached info differs */ - if (memcmp(&vlocation->vldb, buffer, dlen) != 0) { - /* delete if the volume IDs for this name differ */ - if (memcmp(&vlocation->vldb.vid, &cvldb->vid, - sizeof(cvldb->vid)) != 0 - ) { - _leave(" = OBSOLETE"); - return FSCACHE_CHECKAUX_OBSOLETE; - } - - _leave(" = UPDATE"); - return FSCACHE_CHECKAUX_NEEDS_UPDATE; - } - - _leave(" = OKAY"); - return FSCACHE_CHECKAUX_OKAY; -} - /*****************************************************************************/ /* * set the key for the volume index entry */ static uint16_t afs_volume_cache_get_key(const void *cookie_netfs_data, - void *buffer, uint16_t bufmax) + void *buffer, uint16_t bufmax) { const struct afs_volume *volume = cookie_netfs_data; - uint16_t klen; + struct { + u64 volid; + } __packed key; _enter("{%u},%p,%u", volume->type, buffer, bufmax); - klen = sizeof(volume->type); - if (klen > bufmax) + if (bufmax < sizeof(key)) return 0; - memcpy(buffer, &volume->type, sizeof(volume->type)); - - _leave(" = %u", klen); - return klen; - + key.volid = volume->vid; + memcpy(buffer, &key, sizeof(key)); + return sizeof(key); } /*****************************************************************************/ @@ -249,20 +102,25 @@ static uint16_t afs_vnode_cache_get_key(const void *cookie_netfs_data, void *buffer, uint16_t bufmax) { const struct afs_vnode *vnode = cookie_netfs_data; - uint16_t klen; + struct { + u32 vnode_id[3]; + } __packed key; _enter("{%x,%x,%llx},%p,%u", vnode->fid.vnode, vnode->fid.unique, vnode->status.data_version, buffer, bufmax); - klen = sizeof(vnode->fid.vnode); - if (klen > bufmax) + /* Allow for a 96-bit key */ + memset(&key, 0, sizeof(key)); + key.vnode_id[0] = vnode->fid.vnode; + key.vnode_id[1] = 0; + key.vnode_id[2] = 0; + + if (sizeof(key) > bufmax) return 0; - memcpy(buffer, &vnode->fid.vnode, sizeof(vnode->fid.vnode)); - - _leave(" = %u", klen); - return klen; + memcpy(buffer, &key, sizeof(key)); + return sizeof(key); } /* @@ -280,6 +138,11 @@ static void afs_vnode_cache_get_attr(const void *cookie_netfs_data, *size = vnode->status.size; } +struct afs_vnode_cache_aux { + u64 data_version; + u32 fid_unique; +} __packed; + /* * provide new auxiliary cache data */ @@ -287,23 +150,21 @@ static uint16_t afs_vnode_cache_get_aux(const void *cookie_netfs_data, void *buffer, uint16_t bufmax) { const struct afs_vnode *vnode = cookie_netfs_data; - uint16_t dlen; + struct afs_vnode_cache_aux aux; _enter("{%x,%x,%Lx},%p,%u", vnode->fid.vnode, vnode->fid.unique, vnode->status.data_version, buffer, bufmax); - dlen = sizeof(vnode->fid.unique) + sizeof(vnode->status.data_version); - if (dlen > bufmax) + memset(&aux, 0, sizeof(aux)); + aux.data_version = vnode->status.data_version; + aux.fid_unique = vnode->fid.unique; + + if (bufmax < sizeof(aux)) return 0; - memcpy(buffer, &vnode->fid.unique, sizeof(vnode->fid.unique)); - buffer += sizeof(vnode->fid.unique); - memcpy(buffer, &vnode->status.data_version, - sizeof(vnode->status.data_version)); - - _leave(" = %u", dlen); - return dlen; + memcpy(buffer, &aux, sizeof(aux)); + return sizeof(aux); } /* @@ -314,43 +175,29 @@ static enum fscache_checkaux afs_vnode_cache_check_aux(void *cookie_netfs_data, uint16_t buflen) { struct afs_vnode *vnode = cookie_netfs_data; - uint16_t dlen; + struct afs_vnode_cache_aux aux; _enter("{%x,%x,%llx},%p,%u", vnode->fid.vnode, vnode->fid.unique, vnode->status.data_version, buffer, buflen); + memcpy(&aux, buffer, sizeof(aux)); + /* check the size of the data is what we're expecting */ - dlen = sizeof(vnode->fid.unique) + sizeof(vnode->status.data_version); - if (dlen != buflen) { - _leave(" = OBSOLETE [len %hx != %hx]", dlen, buflen); + if (buflen != sizeof(aux)) { + _leave(" = OBSOLETE [len %hx != %zx]", buflen, sizeof(aux)); return FSCACHE_CHECKAUX_OBSOLETE; } - if (memcmp(buffer, - &vnode->fid.unique, - sizeof(vnode->fid.unique) - ) != 0) { - unsigned unique; - - memcpy(&unique, buffer, sizeof(unique)); - + if (vnode->fid.unique != aux.fid_unique) { _leave(" = OBSOLETE [uniq %x != %x]", - unique, vnode->fid.unique); + aux.fid_unique, vnode->fid.unique); return FSCACHE_CHECKAUX_OBSOLETE; } - if (memcmp(buffer + sizeof(vnode->fid.unique), - &vnode->status.data_version, - sizeof(vnode->status.data_version) - ) != 0) { - afs_dataversion_t version; - - memcpy(&version, buffer + sizeof(vnode->fid.unique), - sizeof(version)); - + if (vnode->status.data_version != aux.data_version) { _leave(" = OBSOLETE [vers %llx != %llx]", - version, vnode->status.data_version); + aux.data_version, vnode->status.data_version); return FSCACHE_CHECKAUX_OBSOLETE; } diff --git a/fs/afs/internal.h b/fs/afs/internal.h index 7cd30ae71f91..b16181b2fa43 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h @@ -190,14 +190,6 @@ static inline struct afs_super_info *AFS_FS_S(struct super_block *sb) extern struct file_system_type afs_fs_type; -/* - * entry in the cached cell catalogue - */ -struct afs_cache_cell { - char name[AFS_MAXCELLNAME]; /* cell name (padded with NULs) */ - struct in_addr vl_servers[15]; /* cached cell VL servers */ -}; - /* * AFS network namespace record. */ @@ -296,14 +288,6 @@ struct afs_cache_vlocation { time_t rtime; /* last retrieval time */ }; -/* - * volume -> vnode hash table entry - */ -struct afs_cache_vhash { - afs_voltype_t vtype; /* which volume variation */ - uint8_t hash_bucket; /* which hash bucket this represents */ -} __attribute__((packed)); - /* * AFS volume location record */ @@ -314,9 +298,6 @@ struct afs_vlocation { struct list_head grave; /* link in master graveyard list */ struct list_head update; /* link in master update list */ struct afs_cell *cell; /* cell to which volume belongs */ -#ifdef CONFIG_AFS_FSCACHE - struct fscache_cookie *cache; /* caching cookie */ -#endif struct afs_cache_vlocation vldb; /* volume information DB record */ struct afs_volume *vols[3]; /* volume access record pointer (index by type) */ wait_queue_head_t waitq; /* status change waitqueue */ @@ -477,12 +458,10 @@ struct afs_interface { #ifdef CONFIG_AFS_FSCACHE extern struct fscache_netfs afs_cache_netfs; extern struct fscache_cookie_def afs_cell_cache_index_def; -extern struct fscache_cookie_def afs_vlocation_cache_index_def; extern struct fscache_cookie_def afs_volume_cache_index_def; extern struct fscache_cookie_def afs_vnode_cache_index_def; #else #define afs_cell_cache_index_def (*(struct fscache_cookie_def *) NULL) -#define afs_vlocation_cache_index_def (*(struct fscache_cookie_def *) NULL) #define afs_volume_cache_index_def (*(struct fscache_cookie_def *) NULL) #define afs_vnode_cache_index_def (*(struct fscache_cookie_def *) NULL) #endif diff --git a/fs/afs/vlocation.c b/fs/afs/vlocation.c index cf7e02d5fa3f..745921171058 100644 --- a/fs/afs/vlocation.c +++ b/fs/afs/vlocation.c @@ -273,10 +273,6 @@ static void afs_vlocation_apply_update(struct afs_vlocation *vl, vl->vldb.name, vldb->name); vl->vldb = *vldb; - -#ifdef CONFIG_AFS_FSCACHE - fscache_update_cookie(vl->cache); -#endif } /* @@ -295,27 +291,12 @@ static int afs_vlocation_fill_in_record(struct afs_vlocation *vl, memset(&vldb, 0, sizeof(vldb)); - /* see if we have an in-cache copy (will set vl->valid if there is) */ -#ifdef CONFIG_AFS_FSCACHE - vl->cache = fscache_acquire_cookie(vl->cell->cache, - &afs_vlocation_cache_index_def, vl, - true); -#endif - - if (vl->valid) { - /* try to update a known volume in the cell VL databases by - * ID as the name may have changed */ - _debug("found in cache"); - ret = afs_vlocation_update_record(vl, key, &vldb); - } else { - /* try to look up an unknown volume in the cell VL databases by - * name */ - ret = afs_vlocation_access_vl_by_name(vl, key, &vldb); - if (ret < 0) { - printk("kAFS: failed to locate '%s' in cell '%s'\n", - vl->vldb.name, vl->cell->name); - return ret; - } + /* Try to look up an unknown volume in the cell VL databases by name */ + ret = afs_vlocation_access_vl_by_name(vl, key, &vldb); + if (ret < 0) { + printk("kAFS: failed to locate '%s' in cell '%s'\n", + vl->vldb.name, vl->cell->name); + return ret; } afs_vlocation_apply_update(vl, &vldb); @@ -414,11 +395,6 @@ fill_in_record: spin_unlock(&vl->lock); wake_up(&vl->waitq); - /* update volume entry in local cache */ -#ifdef CONFIG_AFS_FSCACHE - fscache_update_cookie(vl->cache); -#endif - /* schedule for regular updates */ afs_vlocation_queue_for_updates(net, vl); goto success; @@ -522,9 +498,6 @@ static void afs_vlocation_destroy(struct afs_net *net, struct afs_vlocation *vl) { _enter("%p", vl); -#ifdef CONFIG_AFS_FSCACHE - fscache_relinquish_cookie(vl->cache, 0); -#endif afs_put_cell(net, vl->cell); kfree(vl); } diff --git a/fs/afs/volume.c b/fs/afs/volume.c index e2f0e8ec527d..ccac5b1e079d 100644 --- a/fs/afs/volume.c +++ b/fs/afs/volume.c @@ -125,7 +125,7 @@ struct afs_volume *afs_volume_lookup(struct afs_mount_params *params) /* attach the cache and volume location */ #ifdef CONFIG_AFS_FSCACHE - volume->cache = fscache_acquire_cookie(vlocation->cache, + volume->cache = fscache_acquire_cookie(volume->cell->cache, &afs_volume_cache_index_def, volume, true); #endif From 4d9df9868f31df6725481135c10ac6419ce58d44 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 2 Nov 2017 15:27:47 +0000 Subject: [PATCH 09/35] afs: Keep and pass sockaddr_rxrpc addresses rather than in_addr Keep and pass sockaddr_rxrpc addresses around rather than keeping and passing in_addr addresses to allow for the use of IPv6 and non-standard port numbers in future. This also allows the port and service_id fields to be removed from the afs_call struct. Signed-off-by: David Howells --- fs/afs/cell.c | 18 +++++++++++++++--- fs/afs/fsclient.c | 36 ------------------------------------ fs/afs/internal.h | 15 +++++++-------- fs/afs/proc.c | 10 +++++----- fs/afs/rxrpc.c | 18 ++++-------------- fs/afs/server.c | 31 ++++++++++++++++--------------- fs/afs/vlclient.c | 20 ++++++++++++-------- fs/afs/vlocation.c | 30 ++++++++---------------------- fs/afs/vnode.c | 28 ++++++++++++++-------------- fs/afs/volume.c | 9 ++++----- 10 files changed, 85 insertions(+), 130 deletions(-) diff --git a/fs/afs/cell.c b/fs/afs/cell.c index 2224e335eed7..5523fa3c05d9 100644 --- a/fs/afs/cell.c +++ b/fs/afs/cell.c @@ -31,7 +31,7 @@ static struct afs_cell *afs_cell_alloc(struct afs_net *net, char keyname[4 + AFS_MAXCELLNAME + 1], *cp, *dp, *next; char *dvllist = NULL, *_vllist = NULL; char delimiter = ':'; - int ret; + int ret, i; _enter("%*.*s,%s", namelen, namelen, name ?: "", vllist); @@ -61,6 +61,14 @@ static struct afs_cell *afs_cell_alloc(struct afs_net *net, INIT_LIST_HEAD(&cell->vl_list); spin_lock_init(&cell->vl_lock); + for (i = 0; i < AFS_CELL_MAX_ADDRS; i++) { + struct sockaddr_rxrpc *srx = &cell->vl_addrs[i]; + srx->srx_family = AF_RXRPC; + srx->srx_service = VL_SERVICE; + srx->transport_type = SOCK_DGRAM; + srx->transport.sin.sin_port = htons(AFS_VL_PORT); + } + /* if the ip address is invalid, try dns query */ if (!vllist || strlen(vllist) < 7) { ret = dns_query("afsdb", name, namelen, "ipv4", &dvllist, NULL); @@ -83,6 +91,7 @@ static struct afs_cell *afs_cell_alloc(struct afs_net *net, /* fill in the VL server list from the rest of the string */ do { + struct sockaddr_rxrpc *srx = &cell->vl_addrs[cell->vl_naddrs]; unsigned a, b, c, d; next = strchr(_vllist, delimiter); @@ -95,10 +104,13 @@ static struct afs_cell *afs_cell_alloc(struct afs_net *net, if (a > 255 || b > 255 || c > 255 || d > 255) goto bad_address; - cell->vl_addrs[cell->vl_naddrs++].s_addr = + srx->transport_len = sizeof(struct sockaddr_in); + srx->transport.sin.sin_family = AF_INET; + srx->transport.sin.sin_addr.s_addr = htonl((a << 24) | (b << 16) | (c << 8) | d); - } while (cell->vl_naddrs < AFS_CELL_MAX_ADDRS && (_vllist = next)); + } while (cell->vl_naddrs++, + cell->vl_naddrs < AFS_CELL_MAX_ADDRS && (_vllist = next)); /* create a key to represent an anonymous user */ memcpy(keyname, "afs@", 4); diff --git a/fs/afs/fsclient.c b/fs/afs/fsclient.c index ce6f0159e1d4..bac2e8db6e75 100644 --- a/fs/afs/fsclient.c +++ b/fs/afs/fsclient.c @@ -297,8 +297,6 @@ int afs_fs_fetch_file_status(struct afs_server *server, call->key = key; call->reply = vnode; call->reply2 = volsync; - call->service_id = FS_SERVICE; - call->port = htons(AFS_FS_PORT); /* marshall the parameters */ bp = call->request; @@ -504,8 +502,6 @@ static int afs_fs_fetch_data64(struct afs_server *server, call->reply = vnode; call->reply2 = NULL; /* volsync */ call->reply3 = req; - call->service_id = FS_SERVICE; - call->port = htons(AFS_FS_PORT); call->operation_ID = FSFETCHDATA64; /* marshall the parameters */ @@ -551,8 +547,6 @@ int afs_fs_fetch_data(struct afs_server *server, call->reply = vnode; call->reply2 = NULL; /* volsync */ call->reply3 = req; - call->service_id = FS_SERVICE; - call->port = htons(AFS_FS_PORT); call->operation_ID = FSFETCHDATA; /* marshall the parameters */ @@ -619,8 +613,6 @@ int afs_fs_give_up_callbacks(struct afs_net *net, if (!call) return -ENOMEM; - call->service_id = FS_SERVICE; - call->port = htons(AFS_FS_PORT); /* marshall the parameters */ bp = call->request; @@ -723,8 +715,6 @@ int afs_fs_create(struct afs_server *server, call->reply2 = newfid; call->reply3 = newstatus; call->reply4 = newcb; - call->service_id = FS_SERVICE; - call->port = htons(AFS_FS_PORT); /* marshall the parameters */ bp = call->request; @@ -810,8 +800,6 @@ int afs_fs_remove(struct afs_server *server, call->key = key; call->reply = vnode; - call->service_id = FS_SERVICE; - call->port = htons(AFS_FS_PORT); /* marshall the parameters */ bp = call->request; @@ -893,8 +881,6 @@ int afs_fs_link(struct afs_server *server, call->key = key; call->reply = dvnode; call->reply2 = vnode; - call->service_id = FS_SERVICE; - call->port = htons(AFS_FS_PORT); /* marshall the parameters */ bp = call->request; @@ -988,8 +974,6 @@ int afs_fs_symlink(struct afs_server *server, call->reply = vnode; call->reply2 = newfid; call->reply3 = newstatus; - call->service_id = FS_SERVICE; - call->port = htons(AFS_FS_PORT); /* marshall the parameters */ bp = call->request; @@ -1094,8 +1078,6 @@ int afs_fs_rename(struct afs_server *server, call->key = key; call->reply = orig_dvnode; call->reply2 = new_dvnode; - call->service_id = FS_SERVICE; - call->port = htons(AFS_FS_PORT); /* marshall the parameters */ bp = call->request; @@ -1196,8 +1178,6 @@ static int afs_fs_store_data64(struct afs_server *server, call->wb = wb; call->key = wb->key; call->reply = vnode; - call->service_id = FS_SERVICE; - call->port = htons(AFS_FS_PORT); call->mapping = vnode->vfs_inode.i_mapping; call->first = first; call->last = last; @@ -1274,8 +1254,6 @@ int afs_fs_store_data(struct afs_server *server, struct afs_writeback *wb, call->wb = wb; call->key = wb->key; call->reply = vnode; - call->service_id = FS_SERVICE; - call->port = htons(AFS_FS_PORT); call->mapping = vnode->vfs_inode.i_mapping; call->first = first; call->last = last; @@ -1383,8 +1361,6 @@ static int afs_fs_setattr_size64(struct afs_server *server, struct key *key, call->key = key; call->reply = vnode; - call->service_id = FS_SERVICE; - call->port = htons(AFS_FS_PORT); call->store_version = vnode->status.data_version + 1; call->operation_ID = FSSTOREDATA; @@ -1435,8 +1411,6 @@ static int afs_fs_setattr_size(struct afs_server *server, struct key *key, call->key = key; call->reply = vnode; - call->service_id = FS_SERVICE; - call->port = htons(AFS_FS_PORT); call->store_version = vnode->status.data_version + 1; call->operation_ID = FSSTOREDATA; @@ -1483,8 +1457,6 @@ int afs_fs_setattr(struct afs_server *server, struct key *key, call->key = key; call->reply = vnode; - call->service_id = FS_SERVICE; - call->port = htons(AFS_FS_PORT); call->operation_ID = FSSTORESTATUS; /* marshall the parameters */ @@ -1721,8 +1693,6 @@ int afs_fs_get_volume_status(struct afs_server *server, call->reply = vnode; call->reply2 = vs; call->reply3 = tmpbuf; - call->service_id = FS_SERVICE; - call->port = htons(AFS_FS_PORT); /* marshall the parameters */ bp = call->request; @@ -1805,8 +1775,6 @@ int afs_fs_set_lock(struct afs_server *server, call->key = key; call->reply = vnode; - call->service_id = FS_SERVICE; - call->port = htons(AFS_FS_PORT); /* marshall the parameters */ bp = call->request; @@ -1839,8 +1807,6 @@ int afs_fs_extend_lock(struct afs_server *server, call->key = key; call->reply = vnode; - call->service_id = FS_SERVICE; - call->port = htons(AFS_FS_PORT); /* marshall the parameters */ bp = call->request; @@ -1872,8 +1838,6 @@ int afs_fs_release_lock(struct afs_server *server, call->key = key; call->reply = vnode; - call->service_id = FS_SERVICE; - call->port = htons(AFS_FS_PORT); /* marshall the parameters */ bp = call->request; diff --git a/fs/afs/internal.h b/fs/afs/internal.h index b16181b2fa43..4fe26bd7bfb0 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h @@ -106,7 +106,6 @@ struct afs_call { bool async; /* T if asynchronous */ bool upgrade; /* T to request service upgrade */ u16 service_id; /* RxRPC service ID to call */ - __be16 port; /* target UDP port */ u32 operation_ID; /* operation ID for an incoming call */ u32 count; /* count for use in unmarshalling */ __be32 tmp; /* place to extract temporary data */ @@ -264,7 +263,7 @@ struct afs_cell { spinlock_t vl_lock; /* vl_list lock */ unsigned short vl_naddrs; /* number of VL servers in addr list */ unsigned short vl_curr_svix; /* current server index */ - struct in_addr vl_addrs[AFS_CELL_MAX_ADDRS]; /* cell VL server addresses */ + struct sockaddr_rxrpc vl_addrs[AFS_CELL_MAX_ADDRS]; /* cell VL server addresses */ char name[0]; /* cell name - must go last */ }; @@ -284,7 +283,7 @@ struct afs_cache_vlocation { #define AFS_VOL_VTM_BAK 0x04 /* backup version of the volume is available (on this server) */ afs_volid_t vid[3]; /* volume IDs for R/W, R/O and Bak volumes */ - struct in_addr servers[8]; /* fileserver addresses */ + struct sockaddr_rxrpc servers[8]; /* fileserver addresses */ time_t rtime; /* last retrieval time */ }; @@ -315,7 +314,7 @@ struct afs_vlocation { struct afs_server { atomic_t usage; time64_t time_of_death; /* time at which put reduced usage to 0 */ - struct in_addr addr; /* server address */ + struct sockaddr_rxrpc addr; /* server address */ struct afs_net *net; /* Network namespace in which the server resides */ struct afs_cell *cell; /* cell in which server resides */ struct list_head link; /* link in cell's server list */ @@ -654,7 +653,7 @@ extern void __net_exit afs_close_socket(struct afs_net *); extern void afs_charge_preallocation(struct work_struct *); extern void afs_put_call(struct afs_call *); extern int afs_queue_call_work(struct afs_call *); -extern int afs_make_call(struct in_addr *, struct afs_call *, gfp_t, bool); +extern int afs_make_call(struct sockaddr_rxrpc *, struct afs_call *, gfp_t, bool); extern struct afs_call *afs_alloc_flat_call(struct afs_net *, const struct afs_call_type *, size_t, size_t); @@ -690,7 +689,7 @@ do { \ extern void afs_server_timer(struct timer_list *); extern struct afs_server *afs_lookup_server(struct afs_cell *, - const struct in_addr *); + struct sockaddr_rxrpc *); extern struct afs_server *afs_find_server(struct afs_net *, const struct sockaddr_rxrpc *); extern void afs_put_server(struct afs_net *, struct afs_server *); @@ -707,11 +706,11 @@ extern void __exit afs_fs_exit(void); * vlclient.c */ extern int afs_vl_get_entry_by_name(struct afs_net *, - struct in_addr *, struct key *, + struct sockaddr_rxrpc *, struct key *, const char *, struct afs_cache_vlocation *, bool); extern int afs_vl_get_entry_by_id(struct afs_net *, - struct in_addr *, struct key *, + struct sockaddr_rxrpc *, struct key *, afs_volid_t, afs_voltype_t, struct afs_cache_vlocation *, bool); diff --git a/fs/afs/proc.c b/fs/afs/proc.c index 677a453b08bf..f76018104ae0 100644 --- a/fs/afs/proc.c +++ b/fs/afs/proc.c @@ -570,16 +570,16 @@ static void afs_proc_cell_vlservers_stop(struct seq_file *p, void *v) */ static int afs_proc_cell_vlservers_show(struct seq_file *m, void *v) { - struct in_addr *addr = v; + struct sockaddr_rxrpc *addr = v; /* display header on line 1 */ - if (v == (struct in_addr *) 1) { + if (v == (void *)1) { seq_puts(m, "ADDRESS\n"); return 0; } /* display one cell per line on subsequent lines */ - seq_printf(m, "%pI4\n", &addr->s_addr); + seq_printf(m, "%pISp\n", &addr->transport); return 0; } @@ -652,7 +652,7 @@ static int afs_proc_cell_servers_show(struct seq_file *m, void *v) { struct afs_cell *cell = m->private; struct afs_server *server = list_entry(v, struct afs_server, link); - char ipaddr[20]; + char ipaddr[64]; /* display header on line 1 */ if (v == &cell->servers) { @@ -661,7 +661,7 @@ static int afs_proc_cell_servers_show(struct seq_file *m, void *v) } /* display one cell per line on subsequent lines */ - sprintf(ipaddr, "%pI4", &server->addr); + sprintf(ipaddr, "%pISp", &server->addr.transport); seq_printf(m, "%3d %-15.15s %5d\n", atomic_read(&server->usage), ipaddr, server->fs_state); diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c index 656ceb285b85..c108effb54be 100644 --- a/fs/afs/rxrpc.c +++ b/fs/afs/rxrpc.c @@ -322,10 +322,9 @@ static int afs_send_pages(struct afs_call *call, struct msghdr *msg) /* * initiate a call */ -int afs_make_call(struct in_addr *addr, struct afs_call *call, gfp_t gfp, - bool async) +int afs_make_call(struct sockaddr_rxrpc *srx, struct afs_call *call, + gfp_t gfp, bool async) { - struct sockaddr_rxrpc srx; struct rxrpc_call *rxcall; struct msghdr msg; struct kvec iov[1]; @@ -334,7 +333,7 @@ int afs_make_call(struct in_addr *addr, struct afs_call *call, gfp_t gfp, u32 abort_code; int ret; - _enter("%x,{%d},", addr->s_addr, ntohs(call->port)); + _enter(",{%pISp},", &srx->transport); ASSERT(call->type != NULL); ASSERT(call->type->name != NULL); @@ -345,15 +344,6 @@ int afs_make_call(struct in_addr *addr, struct afs_call *call, gfp_t gfp, call->async = async; - memset(&srx, 0, sizeof(srx)); - srx.srx_family = AF_RXRPC; - srx.srx_service = call->service_id; - srx.transport_type = SOCK_DGRAM; - srx.transport_len = sizeof(srx.transport.sin); - srx.transport.sin.sin_family = AF_INET; - srx.transport.sin.sin_port = call->port; - memcpy(&srx.transport.sin.sin_addr, addr, 4); - /* Work out the length we're going to transmit. This is awkward for * calls such as FS.StoreData where there's an extra injection of data * after the initial fixed part. @@ -365,7 +355,7 @@ int afs_make_call(struct in_addr *addr, struct afs_call *call, gfp_t gfp, } /* create a call */ - rxcall = rxrpc_kernel_begin_call(call->net->socket, &srx, call->key, + rxcall = rxrpc_kernel_begin_call(call->net->socket, srx, call->key, (unsigned long)call, tx_total_len, gfp, (async ? diff --git a/fs/afs/server.c b/fs/afs/server.c index d8044be913f0..662f7fbf5d05 100644 --- a/fs/afs/server.c +++ b/fs/afs/server.c @@ -11,6 +11,7 @@ #include #include +#include "afs_fs.h" #include "internal.h" static unsigned afs_server_timeout = 10; /* server timeout in seconds */ @@ -42,7 +43,7 @@ static int afs_install_server(struct afs_server *server) struct afs_server *xserver; struct afs_net *net = server->cell->net; struct rb_node **pp, *p; - int ret; + int ret, diff; _enter("%p", server); @@ -55,9 +56,10 @@ static int afs_install_server(struct afs_server *server) p = *pp; _debug("- consider %p", p); xserver = rb_entry(p, struct afs_server, master_rb); - if (server->addr.s_addr < xserver->addr.s_addr) + diff = memcmp(&server->addr, &xserver->addr, sizeof(server->addr)); + if (diff < 0) pp = &(*pp)->rb_left; - else if (server->addr.s_addr > xserver->addr.s_addr) + else if (diff > 0) pp = &(*pp)->rb_right; else goto error; @@ -76,7 +78,7 @@ error: * allocate a new server record */ static struct afs_server *afs_alloc_server(struct afs_cell *cell, - const struct in_addr *addr) + const struct sockaddr_rxrpc *addr) { struct afs_server *server; @@ -99,8 +101,7 @@ static struct afs_server *afs_alloc_server(struct afs_cell *cell, INIT_DELAYED_WORK(&server->cb_break_work, afs_dispatch_give_up_callbacks); - memcpy(&server->addr, addr, sizeof(struct in_addr)); - server->addr.s_addr = addr->s_addr; + server->addr = *addr; afs_inc_servers_outstanding(cell->net); _leave(" = %p{%d}", server, atomic_read(&server->usage)); } else { @@ -113,17 +114,17 @@ static struct afs_server *afs_alloc_server(struct afs_cell *cell, * get an FS-server record for a cell */ struct afs_server *afs_lookup_server(struct afs_cell *cell, - const struct in_addr *addr) + struct sockaddr_rxrpc *addr) { struct afs_server *server, *candidate; - _enter("%p,%pI4", cell, &addr->s_addr); + _enter("%p,%pIS", cell, &addr->transport); /* quick scan of the list to see if we already have the server */ read_lock(&cell->servers_lock); list_for_each_entry(server, &cell->servers, link) { - if (server->addr.s_addr == addr->s_addr) + if (memcmp(&server->addr, addr, sizeof(*addr)) == 0) goto found_server_quickly; } read_unlock(&cell->servers_lock); @@ -138,7 +139,7 @@ struct afs_server *afs_lookup_server(struct afs_cell *cell, /* check the cell's server list again */ list_for_each_entry(server, &cell->servers, link) { - if (server->addr.s_addr == addr->s_addr) + if (memcmp(&server->addr, addr, sizeof(*addr)) == 0) goto found_server; } @@ -195,9 +196,9 @@ struct afs_server *afs_find_server(struct afs_net *net, { struct afs_server *server = NULL; struct rb_node *p; - struct in_addr addr = srx->transport.sin.sin_addr; + int diff; - _enter("{%d,%pI4}", srx->transport.family, &addr.s_addr); + _enter("{%d,%pIS}", srx->transport.family, &srx->transport); if (srx->transport.family != AF_INET) { WARN(true, "AFS does not yes support non-IPv4 addresses\n"); @@ -212,9 +213,10 @@ struct afs_server *afs_find_server(struct afs_net *net, _debug("- consider %p", p); - if (addr.s_addr < server->addr.s_addr) { + diff = memcmp(srx, &server->addr, sizeof(*srx)); + if (diff < 0) { p = p->rb_left; - } else if (addr.s_addr > server->addr.s_addr) { + } else if (diff > 0) { p = p->rb_right; } else { afs_get_server(server); @@ -225,7 +227,6 @@ struct afs_server *afs_find_server(struct afs_net *net, server = NULL; found: read_unlock(&net->servers_lock); - ASSERTIFCMP(server, server->addr.s_addr, ==, addr.s_addr); _leave(" = %p", server); return server; } diff --git a/fs/afs/vlclient.c b/fs/afs/vlclient.c index f5a043a9ba61..48d137628d6a 100644 --- a/fs/afs/vlclient.c +++ b/fs/afs/vlclient.c @@ -12,6 +12,7 @@ #include #include #include +#include "afs_fs.h" #include "internal.h" /* @@ -83,8 +84,15 @@ static int afs_deliver_vl_get_entry_by_xxx(struct afs_call *call) bp++; /* type */ entry->nservers = ntohl(*bp++); - for (loop = 0; loop < 8; loop++) - entry->servers[loop].s_addr = *bp++; + for (loop = 0; loop < 8; loop++) { + entry->servers[loop].srx_family = AF_RXRPC; + entry->servers[loop].srx_service = FS_SERVICE; + entry->servers[loop].transport_type = SOCK_DGRAM; + entry->servers[loop].transport_len = sizeof(entry->servers[loop].transport.sin); + entry->servers[loop].transport.sin.sin_family = AF_INET; + entry->servers[loop].transport.sin.sin_port = htons(AFS_FS_PORT); + entry->servers[loop].transport.sin.sin_addr.s_addr = *bp++; + } bp += 8; /* partition IDs */ @@ -144,7 +152,7 @@ static const struct afs_call_type afs_RXVLGetEntryById = { * dispatch a get volume entry by name operation */ int afs_vl_get_entry_by_name(struct afs_net *net, - struct in_addr *addr, + struct sockaddr_rxrpc *addr, struct key *key, const char *volname, struct afs_cache_vlocation *entry, @@ -166,8 +174,6 @@ int afs_vl_get_entry_by_name(struct afs_net *net, call->key = key; call->reply = entry; - call->service_id = VL_SERVICE; - call->port = htons(AFS_VL_PORT); /* marshall the parameters */ bp = call->request; @@ -185,7 +191,7 @@ int afs_vl_get_entry_by_name(struct afs_net *net, * dispatch a get volume entry by ID operation */ int afs_vl_get_entry_by_id(struct afs_net *net, - struct in_addr *addr, + struct sockaddr_rxrpc *addr, struct key *key, afs_volid_t volid, afs_voltype_t voltype, @@ -203,8 +209,6 @@ int afs_vl_get_entry_by_id(struct afs_net *net, call->key = key; call->reply = entry; - call->service_id = VL_SERVICE; - call->port = htons(AFS_VL_PORT); /* marshall the parameters */ bp = call->request; diff --git a/fs/afs/vlocation.c b/fs/afs/vlocation.c index 745921171058..ec5ab8dc9bc8 100644 --- a/fs/afs/vlocation.c +++ b/fs/afs/vlocation.c @@ -30,7 +30,6 @@ static int afs_vlocation_access_vl_by_name(struct afs_vlocation *vl, struct afs_cache_vlocation *vldb) { struct afs_cell *cell = vl->cell; - struct in_addr addr; int count, ret; _enter("%s,%s", cell->name, vl->vldb.name); @@ -38,12 +37,12 @@ static int afs_vlocation_access_vl_by_name(struct afs_vlocation *vl, down_write(&vl->cell->vl_sem); ret = -ENOMEDIUM; for (count = cell->vl_naddrs; count > 0; count--) { - addr = cell->vl_addrs[cell->vl_curr_svix]; + struct sockaddr_rxrpc *addr = &cell->vl_addrs[cell->vl_curr_svix]; - _debug("CellServ[%hu]: %08x", cell->vl_curr_svix, addr.s_addr); + _debug("CellServ[%hu]: %pIS", cell->vl_curr_svix, &addr->transport); /* attempt to access the VL server */ - ret = afs_vl_get_entry_by_name(cell->net, &addr, key, + ret = afs_vl_get_entry_by_name(cell->net, addr, key, vl->vldb.name, vldb, false); switch (ret) { case 0: @@ -88,7 +87,6 @@ static int afs_vlocation_access_vl_by_id(struct afs_vlocation *vl, struct afs_cache_vlocation *vldb) { struct afs_cell *cell = vl->cell; - struct in_addr addr; int count, ret; _enter("%s,%x,%d,", cell->name, volid, voltype); @@ -96,12 +94,12 @@ static int afs_vlocation_access_vl_by_id(struct afs_vlocation *vl, down_write(&vl->cell->vl_sem); ret = -ENOMEDIUM; for (count = cell->vl_naddrs; count > 0; count--) { - addr = cell->vl_addrs[cell->vl_curr_svix]; + struct sockaddr_rxrpc *addr = &cell->vl_addrs[cell->vl_curr_svix]; - _debug("CellServ[%hu]: %08x", cell->vl_curr_svix, addr.s_addr); + _debug("CellServ[%hu]: %pIS", cell->vl_curr_svix, &addr->transport); /* attempt to access the VL server */ - ret = afs_vl_get_entry_by_id(cell->net, &addr, key, volid, + ret = afs_vl_get_entry_by_id(cell->net, addr, key, volid, voltype, vldb, false); switch (ret) { case 0: @@ -192,15 +190,7 @@ static int afs_vlocation_update_record(struct afs_vlocation *vl, int ret; /* try to look up a cached volume in the cell VL databases by ID */ - _debug("Locally Cached: %s %02x { %08x(%x) %08x(%x) %08x(%x) }", - vl->vldb.name, - vl->vldb.vidmask, - ntohl(vl->vldb.servers[0].s_addr), - vl->vldb.srvtmask[0], - ntohl(vl->vldb.servers[1].s_addr), - vl->vldb.srvtmask[1], - ntohl(vl->vldb.servers[2].s_addr), - vl->vldb.srvtmask[2]); + _debug("Locally Cached: %s %02x", vl->vldb.name, vl->vldb.vidmask); _debug("Vids: %08x %08x %08x", vl->vldb.vid[0], @@ -258,11 +248,7 @@ static int afs_vlocation_update_record(struct afs_vlocation *vl, static void afs_vlocation_apply_update(struct afs_vlocation *vl, struct afs_cache_vlocation *vldb) { - _debug("Done VL Lookup: %s %02x { %08x(%x) %08x(%x) %08x(%x) }", - vldb->name, vldb->vidmask, - ntohl(vldb->servers[0].s_addr), vldb->srvtmask[0], - ntohl(vldb->servers[1].s_addr), vldb->srvtmask[1], - ntohl(vldb->servers[2].s_addr), vldb->srvtmask[2]); + _debug("Done VL Lookup: %s %02x", vldb->name, vldb->vidmask); _debug("Vids: %08x %08x %08x", vldb->vid[0], vldb->vid[1], vldb->vid[2]); diff --git a/fs/afs/vnode.c b/fs/afs/vnode.c index d5ef834ba4ac..b79d05374878 100644 --- a/fs/afs/vnode.c +++ b/fs/afs/vnode.c @@ -354,8 +354,8 @@ get_anyway: if (IS_ERR(server)) goto no_server; - _debug("USING SERVER: %p{%08x}", - server, ntohl(server->addr.s_addr)); + _debug("USING SERVER: %p{%pIS}", + server, &server->addr.transport); ret = afs_fs_fetch_file_status(server, key, vnode, NULL, false); @@ -418,7 +418,7 @@ int afs_vnode_fetch_data(struct afs_vnode *vnode, struct key *key, if (IS_ERR(server)) goto no_server; - _debug("USING SERVER: %08x\n", ntohl(server->addr.s_addr)); + _debug("USING SERVER: %pIS\n", &server->addr.transport); ret = afs_fs_fetch_data(server, key, vnode, desc, false); @@ -474,7 +474,7 @@ int afs_vnode_create(struct afs_vnode *vnode, struct key *key, if (IS_ERR(server)) goto no_server; - _debug("USING SERVER: %08x\n", ntohl(server->addr.s_addr)); + _debug("USING SERVER: %pIS\n", &server->addr.transport); ret = afs_fs_create(server, key, vnode, name, mode, newfid, newstatus, newcb, false); @@ -530,7 +530,7 @@ int afs_vnode_remove(struct afs_vnode *vnode, struct key *key, const char *name, if (IS_ERR(server)) goto no_server; - _debug("USING SERVER: %08x\n", ntohl(server->addr.s_addr)); + _debug("USING SERVER: %pIS\n", &server->addr.transport); ret = afs_fs_remove(server, key, vnode, name, isdir, false); @@ -592,7 +592,7 @@ int afs_vnode_link(struct afs_vnode *dvnode, struct afs_vnode *vnode, if (IS_ERR(server)) goto no_server; - _debug("USING SERVER: %08x\n", ntohl(server->addr.s_addr)); + _debug("USING SERVER: %pIS\n", &server->addr.transport); ret = afs_fs_link(server, key, dvnode, vnode, name, false); @@ -656,7 +656,7 @@ int afs_vnode_symlink(struct afs_vnode *vnode, struct key *key, if (IS_ERR(server)) goto no_server; - _debug("USING SERVER: %08x\n", ntohl(server->addr.s_addr)); + _debug("USING SERVER: %pIS\n", &server->addr.transport); ret = afs_fs_symlink(server, key, vnode, name, content, newfid, newstatus, false); @@ -726,7 +726,7 @@ int afs_vnode_rename(struct afs_vnode *orig_dvnode, if (IS_ERR(server)) goto no_server; - _debug("USING SERVER: %08x\n", ntohl(server->addr.s_addr)); + _debug("USING SERVER: %pIS\n", &server->addr.transport); ret = afs_fs_rename(server, key, orig_dvnode, orig_name, new_dvnode, new_name, false); @@ -792,7 +792,7 @@ int afs_vnode_store_data(struct afs_writeback *wb, pgoff_t first, pgoff_t last, if (IS_ERR(server)) goto no_server; - _debug("USING SERVER: %08x\n", ntohl(server->addr.s_addr)); + _debug("USING SERVER: %pIS\n", &server->addr.transport); ret = afs_fs_store_data(server, wb, first, last, offset, to, false); @@ -845,7 +845,7 @@ int afs_vnode_setattr(struct afs_vnode *vnode, struct key *key, if (IS_ERR(server)) goto no_server; - _debug("USING SERVER: %08x\n", ntohl(server->addr.s_addr)); + _debug("USING SERVER: %pIS\n", &server->addr.transport); ret = afs_fs_setattr(server, key, vnode, attr, false); @@ -892,7 +892,7 @@ int afs_vnode_get_volume_status(struct afs_vnode *vnode, struct key *key, if (IS_ERR(server)) goto no_server; - _debug("USING SERVER: %08x\n", ntohl(server->addr.s_addr)); + _debug("USING SERVER: %pIS\n", &server->addr.transport); ret = afs_fs_get_volume_status(server, key, vnode, vs, false); @@ -931,7 +931,7 @@ int afs_vnode_set_lock(struct afs_vnode *vnode, struct key *key, if (IS_ERR(server)) goto no_server; - _debug("USING SERVER: %08x\n", ntohl(server->addr.s_addr)); + _debug("USING SERVER: %pIS\n", &server->addr.transport); ret = afs_fs_set_lock(server, key, vnode, type, false); @@ -969,7 +969,7 @@ int afs_vnode_extend_lock(struct afs_vnode *vnode, struct key *key) if (IS_ERR(server)) goto no_server; - _debug("USING SERVER: %08x\n", ntohl(server->addr.s_addr)); + _debug("USING SERVER: %pIS\n", &server->addr.transport); ret = afs_fs_extend_lock(server, key, vnode, false); @@ -1007,7 +1007,7 @@ int afs_vnode_release_lock(struct afs_vnode *vnode, struct key *key) if (IS_ERR(server)) goto no_server; - _debug("USING SERVER: %08x\n", ntohl(server->addr.s_addr)); + _debug("USING SERVER: %pIS\n", &server->addr.transport); ret = afs_fs_release_lock(server, key, vnode, false); diff --git a/fs/afs/volume.c b/fs/afs/volume.c index ccac5b1e079d..52f0dc40732b 100644 --- a/fs/afs/volume.c +++ b/fs/afs/volume.c @@ -248,8 +248,8 @@ struct afs_server *afs_volume_pick_fileserver(struct afs_vnode *vnode) case 0: afs_get_server(server); up_read(&volume->server_sem); - _leave(" = %p (picked %08x)", - server, ntohl(server->addr.s_addr)); + _leave(" = %p (picked %pIS)", + server, &server->addr.transport); return server; case -ENETUNREACH: @@ -303,9 +303,8 @@ int afs_volume_release_fileserver(struct afs_vnode *vnode, struct afs_volume *volume = vnode->volume; unsigned loop; - _enter("%s,%08x,%d", - volume->vlocation->vldb.name, ntohl(server->addr.s_addr), - result); + _enter("%s,%pIS,%d", + volume->vlocation->vldb.name, &server->addr.transport, result); switch (result) { /* success */ From 3838d3ecdea496699a8c13c183d4df5dfe8e1a3e Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 2 Nov 2017 15:27:47 +0000 Subject: [PATCH 10/35] afs: Allow IPv6 address specification of VL servers Allow VL server specifications to be given IPv6 addresses as well as IPv4 addresses, for example as: echo add foo.org 1111:2222:3333:0:4444:5555:6666:7777 >/proc/fs/afs/cells Note that ':' is the expected separator for separating IPv4 addresses, but if a ',' is detected or no '.' is detected in the string, the delimiter is switched to ','. This also works with DNS AFSDB or SRV record strings fetched by upcall from userspace. Signed-off-by: David Howells --- fs/afs/cell.c | 31 +++++++++++++++++++++---------- fs/afs/proc.c | 2 +- fs/afs/rxrpc.c | 11 +++++------ fs/afs/server.c | 5 ----- fs/afs/vlclient.c | 13 +++++++++---- 5 files changed, 36 insertions(+), 26 deletions(-) diff --git a/fs/afs/cell.c b/fs/afs/cell.c index 5523fa3c05d9..216821fd1a61 100644 --- a/fs/afs/cell.c +++ b/fs/afs/cell.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include "internal.h" @@ -86,28 +87,38 @@ static struct afs_cell *afs_cell_alloc(struct afs_net *net, delimiter = ','; } else { + if (strchr(vllist, ',') || !strchr(vllist, '.')) + delimiter = ','; _vllist = vllist; } /* fill in the VL server list from the rest of the string */ do { struct sockaddr_rxrpc *srx = &cell->vl_addrs[cell->vl_naddrs]; - unsigned a, b, c, d; + const char *end; next = strchr(_vllist, delimiter); if (next) *next++ = 0; - if (sscanf(_vllist, "%u.%u.%u.%u", &a, &b, &c, &d) != 4) + if (in4_pton(_vllist, -1, (u8 *)&srx->transport.sin6.sin6_addr.s6_addr32[3], + -1, &end)) { + srx->transport_len = sizeof(struct sockaddr_in6); + srx->transport.sin6.sin6_family = AF_INET6; + srx->transport.sin6.sin6_flowinfo = 0; + srx->transport.sin6.sin6_scope_id = 0; + srx->transport.sin6.sin6_addr.s6_addr32[0] = 0; + srx->transport.sin6.sin6_addr.s6_addr32[1] = 0; + srx->transport.sin6.sin6_addr.s6_addr32[2] = htonl(0xffff); + } else if (in6_pton(_vllist, -1, srx->transport.sin6.sin6_addr.s6_addr, + -1, &end)) { + srx->transport_len = sizeof(struct sockaddr_in6); + srx->transport.sin6.sin6_family = AF_INET6; + srx->transport.sin6.sin6_flowinfo = 0; + srx->transport.sin6.sin6_scope_id = 0; + } else { goto bad_address; - - if (a > 255 || b > 255 || c > 255 || d > 255) - goto bad_address; - - srx->transport_len = sizeof(struct sockaddr_in); - srx->transport.sin.sin_family = AF_INET; - srx->transport.sin.sin_addr.s_addr = - htonl((a << 24) | (b << 16) | (c << 8) | d); + } } while (cell->vl_naddrs++, cell->vl_naddrs < AFS_CELL_MAX_ADDRS && (_vllist = next)); diff --git a/fs/afs/proc.c b/fs/afs/proc.c index f76018104ae0..d00d550ff2ef 100644 --- a/fs/afs/proc.c +++ b/fs/afs/proc.c @@ -662,7 +662,7 @@ static int afs_proc_cell_servers_show(struct seq_file *m, void *v) /* display one cell per line on subsequent lines */ sprintf(ipaddr, "%pISp", &server->addr.transport); - seq_printf(m, "%3d %-15.15s %5d\n", + seq_printf(m, "%3d %-15s %5d\n", atomic_read(&server->usage), ipaddr, server->fs_state); return 0; diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c index c108effb54be..5d2c1a34ffd5 100644 --- a/fs/afs/rxrpc.c +++ b/fs/afs/rxrpc.c @@ -46,21 +46,20 @@ int afs_open_socket(struct afs_net *net) _enter(""); - ret = sock_create_kern(&init_net, AF_RXRPC, SOCK_DGRAM, PF_INET, &socket); + ret = sock_create_kern(&init_net, AF_RXRPC, SOCK_DGRAM, PF_INET6, &socket); if (ret < 0) goto error_1; socket->sk->sk_allocation = GFP_NOFS; /* bind the callback manager's address to make this a server socket */ + memset(&srx, 0, sizeof(srx)); srx.srx_family = AF_RXRPC; srx.srx_service = CM_SERVICE; srx.transport_type = SOCK_DGRAM; - srx.transport_len = sizeof(srx.transport.sin); - srx.transport.sin.sin_family = AF_INET; - srx.transport.sin.sin_port = htons(AFS_CM_PORT); - memset(&srx.transport.sin.sin_addr, 0, - sizeof(srx.transport.sin.sin_addr)); + srx.transport_len = sizeof(srx.transport.sin6); + srx.transport.sin6.sin6_family = AF_INET6; + srx.transport.sin6.sin6_port = htons(AFS_CM_PORT); ret = kernel_bind(socket, (struct sockaddr *) &srx, sizeof(srx)); if (ret < 0) diff --git a/fs/afs/server.c b/fs/afs/server.c index 662f7fbf5d05..c63974f06385 100644 --- a/fs/afs/server.c +++ b/fs/afs/server.c @@ -200,11 +200,6 @@ struct afs_server *afs_find_server(struct afs_net *net, _enter("{%d,%pIS}", srx->transport.family, &srx->transport); - if (srx->transport.family != AF_INET) { - WARN(true, "AFS does not yes support non-IPv4 addresses\n"); - return NULL; - } - read_lock(&net->servers_lock); p = net->servers.rb_node; diff --git a/fs/afs/vlclient.c b/fs/afs/vlclient.c index 48d137628d6a..276319aa86d8 100644 --- a/fs/afs/vlclient.c +++ b/fs/afs/vlclient.c @@ -88,10 +88,15 @@ static int afs_deliver_vl_get_entry_by_xxx(struct afs_call *call) entry->servers[loop].srx_family = AF_RXRPC; entry->servers[loop].srx_service = FS_SERVICE; entry->servers[loop].transport_type = SOCK_DGRAM; - entry->servers[loop].transport_len = sizeof(entry->servers[loop].transport.sin); - entry->servers[loop].transport.sin.sin_family = AF_INET; - entry->servers[loop].transport.sin.sin_port = htons(AFS_FS_PORT); - entry->servers[loop].transport.sin.sin_addr.s_addr = *bp++; + entry->servers[loop].transport_len = sizeof(entry->servers[loop].transport.sin6); + entry->servers[loop].transport.sin6.sin6_family = AF_INET6; + entry->servers[loop].transport.sin6.sin6_port = htons(AFS_FS_PORT); + entry->servers[loop].transport.sin6.sin6_flowinfo = 0; + entry->servers[loop].transport.sin6.sin6_scope_id = 0; + entry->servers[loop].transport.sin6.sin6_addr.s6_addr32[0] = 0; + entry->servers[loop].transport.sin6.sin6_addr.s6_addr32[1] = 0; + entry->servers[loop].transport.sin6.sin6_addr.s6_addr32[2] = htonl(0xffff); + entry->servers[loop].transport.sin6.sin6_addr.s6_addr32[3] = *bp++; } bp += 8; /* partition IDs */ From f780c8ea0e73c0d371ffbebff91bb7555697219f Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 2 Nov 2017 15:27:48 +0000 Subject: [PATCH 11/35] afs: Consolidate abort_to_error translators The AFS abort code space is shared across all services, so there's no need for separate abort_to_error translators for each service. Consolidate them into a single function and remove the function pointers for them. Signed-off-by: David Howells --- fs/afs/cmservice.c | 6 ------ fs/afs/fsclient.c | 18 ------------------ fs/afs/internal.h | 3 --- fs/afs/misc.c | 38 ++++++++++++++++++++++++++++++++++---- fs/afs/rxrpc.c | 5 ++--- fs/afs/vlclient.c | 43 ------------------------------------------- 6 files changed, 36 insertions(+), 77 deletions(-) diff --git a/fs/afs/cmservice.c b/fs/afs/cmservice.c index 9ad39f8a7e87..24032916e0ca 100644 --- a/fs/afs/cmservice.c +++ b/fs/afs/cmservice.c @@ -41,7 +41,6 @@ static CM_NAME(CallBack); static const struct afs_call_type afs_SRXCBCallBack = { .name = afs_SRXCBCallBack_name, .deliver = afs_deliver_cb_callback, - .abort_to_error = afs_abort_to_error, .destructor = afs_cm_destructor, .work = SRXAFSCB_CallBack, }; @@ -53,7 +52,6 @@ static CM_NAME(InitCallBackState); static const struct afs_call_type afs_SRXCBInitCallBackState = { .name = afs_SRXCBInitCallBackState_name, .deliver = afs_deliver_cb_init_call_back_state, - .abort_to_error = afs_abort_to_error, .destructor = afs_cm_destructor, .work = SRXAFSCB_InitCallBackState, }; @@ -65,7 +63,6 @@ static CM_NAME(InitCallBackState3); static const struct afs_call_type afs_SRXCBInitCallBackState3 = { .name = afs_SRXCBInitCallBackState3_name, .deliver = afs_deliver_cb_init_call_back_state3, - .abort_to_error = afs_abort_to_error, .destructor = afs_cm_destructor, .work = SRXAFSCB_InitCallBackState, }; @@ -77,7 +74,6 @@ static CM_NAME(Probe); static const struct afs_call_type afs_SRXCBProbe = { .name = afs_SRXCBProbe_name, .deliver = afs_deliver_cb_probe, - .abort_to_error = afs_abort_to_error, .destructor = afs_cm_destructor, .work = SRXAFSCB_Probe, }; @@ -89,7 +85,6 @@ static CM_NAME(ProbeUuid); static const struct afs_call_type afs_SRXCBProbeUuid = { .name = afs_SRXCBProbeUuid_name, .deliver = afs_deliver_cb_probe_uuid, - .abort_to_error = afs_abort_to_error, .destructor = afs_cm_destructor, .work = SRXAFSCB_ProbeUuid, }; @@ -101,7 +96,6 @@ static CM_NAME(TellMeAboutYourself); static const struct afs_call_type afs_SRXCBTellMeAboutYourself = { .name = afs_SRXCBTellMeAboutYourself_name, .deliver = afs_deliver_cb_tell_me_about_yourself, - .abort_to_error = afs_abort_to_error, .destructor = afs_cm_destructor, .work = SRXAFSCB_TellMeAboutYourself, }; diff --git a/fs/afs/fsclient.c b/fs/afs/fsclient.c index bac2e8db6e75..7acfbc6e1f20 100644 --- a/fs/afs/fsclient.c +++ b/fs/afs/fsclient.c @@ -270,7 +270,6 @@ static int afs_deliver_fs_fetch_status(struct afs_call *call) static const struct afs_call_type afs_RXFSFetchStatus = { .name = "FS.FetchStatus", .deliver = afs_deliver_fs_fetch_status, - .abort_to_error = afs_abort_to_error, .destructor = afs_flat_call_destructor, }; @@ -468,14 +467,12 @@ static void afs_fetch_data_destructor(struct afs_call *call) static const struct afs_call_type afs_RXFSFetchData = { .name = "FS.FetchData", .deliver = afs_deliver_fs_fetch_data, - .abort_to_error = afs_abort_to_error, .destructor = afs_fetch_data_destructor, }; static const struct afs_call_type afs_RXFSFetchData64 = { .name = "FS.FetchData64", .deliver = afs_deliver_fs_fetch_data, - .abort_to_error = afs_abort_to_error, .destructor = afs_fetch_data_destructor, }; @@ -579,7 +576,6 @@ static int afs_deliver_fs_give_up_callbacks(struct afs_call *call) static const struct afs_call_type afs_RXFSGiveUpCallBacks = { .name = "FS.GiveUpCallBacks", .deliver = afs_deliver_fs_give_up_callbacks, - .abort_to_error = afs_abort_to_error, .destructor = afs_flat_call_destructor, }; @@ -677,7 +673,6 @@ static int afs_deliver_fs_create_vnode(struct afs_call *call) static const struct afs_call_type afs_RXFSCreateXXXX = { .name = "FS.CreateXXXX", .deliver = afs_deliver_fs_create_vnode, - .abort_to_error = afs_abort_to_error, .destructor = afs_flat_call_destructor, }; @@ -769,7 +764,6 @@ static int afs_deliver_fs_remove(struct afs_call *call) static const struct afs_call_type afs_RXFSRemoveXXXX = { .name = "FS.RemoveXXXX", .deliver = afs_deliver_fs_remove, - .abort_to_error = afs_abort_to_error, .destructor = afs_flat_call_destructor, }; @@ -849,7 +843,6 @@ static int afs_deliver_fs_link(struct afs_call *call) static const struct afs_call_type afs_RXFSLink = { .name = "FS.Link", .deliver = afs_deliver_fs_link, - .abort_to_error = afs_abort_to_error, .destructor = afs_flat_call_destructor, }; @@ -934,7 +927,6 @@ static int afs_deliver_fs_symlink(struct afs_call *call) static const struct afs_call_type afs_RXFSSymlink = { .name = "FS.Symlink", .deliver = afs_deliver_fs_symlink, - .abort_to_error = afs_abort_to_error, .destructor = afs_flat_call_destructor, }; @@ -1038,7 +1030,6 @@ static int afs_deliver_fs_rename(struct afs_call *call) static const struct afs_call_type afs_RXFSRename = { .name = "FS.Rename", .deliver = afs_deliver_fs_rename, - .abort_to_error = afs_abort_to_error, .destructor = afs_flat_call_destructor, }; @@ -1140,14 +1131,12 @@ static int afs_deliver_fs_store_data(struct afs_call *call) static const struct afs_call_type afs_RXFSStoreData = { .name = "FS.StoreData", .deliver = afs_deliver_fs_store_data, - .abort_to_error = afs_abort_to_error, .destructor = afs_flat_call_destructor, }; static const struct afs_call_type afs_RXFSStoreData64 = { .name = "FS.StoreData64", .deliver = afs_deliver_fs_store_data, - .abort_to_error = afs_abort_to_error, .destructor = afs_flat_call_destructor, }; @@ -1318,21 +1307,18 @@ static int afs_deliver_fs_store_status(struct afs_call *call) static const struct afs_call_type afs_RXFSStoreStatus = { .name = "FS.StoreStatus", .deliver = afs_deliver_fs_store_status, - .abort_to_error = afs_abort_to_error, .destructor = afs_flat_call_destructor, }; static const struct afs_call_type afs_RXFSStoreData_as_Status = { .name = "FS.StoreData", .deliver = afs_deliver_fs_store_status, - .abort_to_error = afs_abort_to_error, .destructor = afs_flat_call_destructor, }; static const struct afs_call_type afs_RXFSStoreData64_as_Status = { .name = "FS.StoreData64", .deliver = afs_deliver_fs_store_status, - .abort_to_error = afs_abort_to_error, .destructor = afs_flat_call_destructor, }; @@ -1659,7 +1645,6 @@ static void afs_get_volume_status_call_destructor(struct afs_call *call) static const struct afs_call_type afs_RXFSGetVolumeStatus = { .name = "FS.GetVolumeStatus", .deliver = afs_deliver_fs_get_volume_status, - .abort_to_error = afs_abort_to_error, .destructor = afs_get_volume_status_call_destructor, }; @@ -1730,7 +1715,6 @@ static int afs_deliver_fs_xxxx_lock(struct afs_call *call) static const struct afs_call_type afs_RXFSSetLock = { .name = "FS.SetLock", .deliver = afs_deliver_fs_xxxx_lock, - .abort_to_error = afs_abort_to_error, .destructor = afs_flat_call_destructor, }; @@ -1740,7 +1724,6 @@ static const struct afs_call_type afs_RXFSSetLock = { static const struct afs_call_type afs_RXFSExtendLock = { .name = "FS.ExtendLock", .deliver = afs_deliver_fs_xxxx_lock, - .abort_to_error = afs_abort_to_error, .destructor = afs_flat_call_destructor, }; @@ -1750,7 +1733,6 @@ static const struct afs_call_type afs_RXFSExtendLock = { static const struct afs_call_type afs_RXFSReleaseLock = { .name = "FS.ReleaseLock", .deliver = afs_deliver_fs_xxxx_lock, - .abort_to_error = afs_abort_to_error, .destructor = afs_flat_call_destructor, }; diff --git a/fs/afs/internal.h b/fs/afs/internal.h index 4fe26bd7bfb0..ba31a386bb2d 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h @@ -120,9 +120,6 @@ struct afs_call_type { */ int (*deliver)(struct afs_call *call); - /* map an abort code to an error number */ - int (*abort_to_error)(u32 abort_code); - /* clean up a call */ void (*destructor)(struct afs_call *call); diff --git a/fs/afs/misc.c b/fs/afs/misc.c index c05f1f1c0d41..700a5fa7f4ec 100644 --- a/fs/afs/misc.c +++ b/fs/afs/misc.c @@ -21,12 +21,12 @@ int afs_abort_to_error(u32 abort_code) { switch (abort_code) { - /* low errno codes inserted into abort namespace */ + /* Low errno codes inserted into abort namespace */ case 13: return -EACCES; case 27: return -EFBIG; case 30: return -EROFS; - /* VICE "special error" codes; 101 - 111 */ + /* VICE "special error" codes; 101 - 111 */ case VSALVAGE: return -EIO; case VNOVNODE: return -ENOENT; case VNOVOL: return -ENOMEDIUM; @@ -39,7 +39,37 @@ int afs_abort_to_error(u32 abort_code) case VBUSY: return -EBUSY; case VMOVED: return -ENXIO; - /* Unified AFS error table; ET "uae" == 0x2f6df00 */ + /* Volume Location server errors */ + case AFSVL_IDEXIST: return -EEXIST; + case AFSVL_IO: return -EREMOTEIO; + case AFSVL_NAMEEXIST: return -EEXIST; + case AFSVL_CREATEFAIL: return -EREMOTEIO; + case AFSVL_NOENT: return -ENOMEDIUM; + case AFSVL_EMPTY: return -ENOMEDIUM; + case AFSVL_ENTDELETED: return -ENOMEDIUM; + case AFSVL_BADNAME: return -EINVAL; + case AFSVL_BADINDEX: return -EINVAL; + case AFSVL_BADVOLTYPE: return -EINVAL; + case AFSVL_BADSERVER: return -EINVAL; + case AFSVL_BADPARTITION: return -EINVAL; + case AFSVL_REPSFULL: return -EFBIG; + case AFSVL_NOREPSERVER: return -ENOENT; + case AFSVL_DUPREPSERVER: return -EEXIST; + case AFSVL_RWNOTFOUND: return -ENOENT; + case AFSVL_BADREFCOUNT: return -EINVAL; + case AFSVL_SIZEEXCEEDED: return -EINVAL; + case AFSVL_BADENTRY: return -EINVAL; + case AFSVL_BADVOLIDBUMP: return -EINVAL; + case AFSVL_IDALREADYHASHED: return -EINVAL; + case AFSVL_ENTRYLOCKED: return -EBUSY; + case AFSVL_BADVOLOPER: return -EBADRQC; + case AFSVL_BADRELLOCKTYPE: return -EINVAL; + case AFSVL_RERELEASE: return -EREMOTEIO; + case AFSVL_BADSERVERFLAG: return -EINVAL; + case AFSVL_PERM: return -EACCES; + case AFSVL_NOMEM: return -EREMOTEIO; + + /* Unified AFS error table; ET "uae" == 0x2f6df00 */ case 0x2f6df00: return -EPERM; case 0x2f6df01: return -ENOENT; case 0x2f6df04: return -EIO; @@ -68,7 +98,7 @@ int afs_abort_to_error(u32 abort_code) case 0x2f6df6c: return -ETIMEDOUT; case 0x2f6df78: return -EDQUOT; - /* RXKAD abort codes; from include/rxrpc/packet.h. ET "RXK" == 0x1260B00 */ + /* RXKAD abort codes; from include/rxrpc/packet.h. ET "RXK" == 0x1260B00 */ case RXKADINCONSISTENCY: return -EPROTO; case RXKADPACKETSHORT: return -EPROTO; case RXKADLEVELFAIL: return -EKEYREJECTED; diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c index 5d2c1a34ffd5..5f06cf720340 100644 --- a/fs/afs/rxrpc.c +++ b/fs/afs/rxrpc.c @@ -31,7 +31,6 @@ static int afs_deliver_cm_op_id(struct afs_call *); static const struct afs_call_type afs_RXCMxxxx = { .name = "CB.xxxx", .deliver = afs_deliver_cm_op_id, - .abort_to_error = afs_abort_to_error, }; /* @@ -418,7 +417,7 @@ error_do_abort: rxrpc_kernel_recv_data(call->net->socket, rxcall, NULL, 0, &offset, false, &abort_code, &call->service_id); - ret = call->type->abort_to_error(abort_code); + ret = afs_abort_to_error(abort_code); } error_kill_call: afs_put_call(call); @@ -876,7 +875,7 @@ int afs_extract_data(struct afs_call *call, void *buf, size_t count, } if (ret == -ECONNABORTED) - call->error = call->type->abort_to_error(call->abort_code); + call->error = afs_abort_to_error(call->abort_code); else call->error = ret; call->state = AFS_CALL_COMPLETE; diff --git a/fs/afs/vlclient.c b/fs/afs/vlclient.c index 276319aa86d8..66e62be07b63 100644 --- a/fs/afs/vlclient.c +++ b/fs/afs/vlclient.c @@ -15,47 +15,6 @@ #include "afs_fs.h" #include "internal.h" -/* - * map volume locator abort codes to error codes - */ -static int afs_vl_abort_to_error(u32 abort_code) -{ - _enter("%u", abort_code); - - switch (abort_code) { - case AFSVL_IDEXIST: return -EEXIST; - case AFSVL_IO: return -EREMOTEIO; - case AFSVL_NAMEEXIST: return -EEXIST; - case AFSVL_CREATEFAIL: return -EREMOTEIO; - case AFSVL_NOENT: return -ENOMEDIUM; - case AFSVL_EMPTY: return -ENOMEDIUM; - case AFSVL_ENTDELETED: return -ENOMEDIUM; - case AFSVL_BADNAME: return -EINVAL; - case AFSVL_BADINDEX: return -EINVAL; - case AFSVL_BADVOLTYPE: return -EINVAL; - case AFSVL_BADSERVER: return -EINVAL; - case AFSVL_BADPARTITION: return -EINVAL; - case AFSVL_REPSFULL: return -EFBIG; - case AFSVL_NOREPSERVER: return -ENOENT; - case AFSVL_DUPREPSERVER: return -EEXIST; - case AFSVL_RWNOTFOUND: return -ENOENT; - case AFSVL_BADREFCOUNT: return -EINVAL; - case AFSVL_SIZEEXCEEDED: return -EINVAL; - case AFSVL_BADENTRY: return -EINVAL; - case AFSVL_BADVOLIDBUMP: return -EINVAL; - case AFSVL_IDALREADYHASHED: return -EINVAL; - case AFSVL_ENTRYLOCKED: return -EBUSY; - case AFSVL_BADVOLOPER: return -EBADRQC; - case AFSVL_BADRELLOCKTYPE: return -EINVAL; - case AFSVL_RERELEASE: return -EREMOTEIO; - case AFSVL_BADSERVERFLAG: return -EINVAL; - case AFSVL_PERM: return -EACCES; - case AFSVL_NOMEM: return -EREMOTEIO; - default: - return afs_abort_to_error(abort_code); - } -} - /* * deliver reply data to a VL.GetEntryByXXX call */ @@ -139,7 +98,6 @@ static int afs_deliver_vl_get_entry_by_xxx(struct afs_call *call) static const struct afs_call_type afs_RXVLGetEntryByName = { .name = "VL.GetEntryByName", .deliver = afs_deliver_vl_get_entry_by_xxx, - .abort_to_error = afs_vl_abort_to_error, .destructor = afs_flat_call_destructor, }; @@ -149,7 +107,6 @@ static const struct afs_call_type afs_RXVLGetEntryByName = { static const struct afs_call_type afs_RXVLGetEntryById = { .name = "VL.GetEntryById", .deliver = afs_deliver_vl_get_entry_by_xxx, - .abort_to_error = afs_vl_abort_to_error, .destructor = afs_flat_call_destructor, }; From 97e3043ad82c93b7c2e3c4bfc518f7401f175821 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 2 Nov 2017 15:27:48 +0000 Subject: [PATCH 12/35] afs: Condense afs_call's reply{,2,3,4} into an array Condense struct afs_call's reply anchor members - reply{,2,3,4} - into an array. Signed-off-by: David Howells --- fs/afs/file.c | 2 +- fs/afs/fsclient.c | 136 +++++++++++++++++++++++----------------------- fs/afs/internal.h | 5 +- fs/afs/rxrpc.c | 2 +- fs/afs/vlclient.c | 6 +- 5 files changed, 74 insertions(+), 77 deletions(-) diff --git a/fs/afs/file.c b/fs/afs/file.c index 510cba15fa56..08f9f0c5dfac 100644 --- a/fs/afs/file.c +++ b/fs/afs/file.c @@ -281,7 +281,7 @@ static int afs_readpage(struct file *file, struct page *page) static void afs_readpages_page_done(struct afs_call *call, struct afs_read *req) { #ifdef CONFIG_AFS_FSCACHE - struct afs_vnode *vnode = call->reply; + struct afs_vnode *vnode = call->reply[0]; #endif struct page *page = req->pages[req->index]; diff --git a/fs/afs/fsclient.c b/fs/afs/fsclient.c index 7acfbc6e1f20..36f58adde030 100644 --- a/fs/afs/fsclient.c +++ b/fs/afs/fsclient.c @@ -243,7 +243,7 @@ static void xdr_decode_AFSFetchVolumeStatus(const __be32 **_bp, */ static int afs_deliver_fs_fetch_status(struct afs_call *call) { - struct afs_vnode *vnode = call->reply; + struct afs_vnode *vnode = call->reply[0]; const __be32 *bp; int ret; @@ -257,8 +257,8 @@ static int afs_deliver_fs_fetch_status(struct afs_call *call) bp = call->buffer; xdr_decode_AFSFetchStatus(&bp, &vnode->status, vnode, NULL); xdr_decode_AFSCallBack(&bp, vnode); - if (call->reply2) - xdr_decode_AFSVolSync(&bp, call->reply2); + if (call->reply[1]) + xdr_decode_AFSVolSync(&bp, call->reply[1]); _leave(" = 0 [done]"); return 0; @@ -294,8 +294,8 @@ int afs_fs_fetch_file_status(struct afs_server *server, return -ENOMEM; call->key = key; - call->reply = vnode; - call->reply2 = volsync; + call->reply[0] = vnode; + call->reply[1] = volsync; /* marshall the parameters */ bp = call->request; @@ -312,8 +312,8 @@ int afs_fs_fetch_file_status(struct afs_server *server, */ static int afs_deliver_fs_fetch_data(struct afs_call *call) { - struct afs_vnode *vnode = call->reply; - struct afs_read *req = call->reply3; + struct afs_vnode *vnode = call->reply[0]; + struct afs_read *req = call->reply[2]; const __be32 *bp; unsigned int size; void *buffer; @@ -430,8 +430,8 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call) bp = call->buffer; xdr_decode_AFSFetchStatus(&bp, &vnode->status, vnode, NULL); xdr_decode_AFSCallBack(&bp, vnode); - if (call->reply2) - xdr_decode_AFSVolSync(&bp, call->reply2); + if (call->reply[1]) + xdr_decode_AFSVolSync(&bp, call->reply[1]); call->offset = 0; call->unmarshall++; @@ -455,7 +455,7 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call) static void afs_fetch_data_destructor(struct afs_call *call) { - struct afs_read *req = call->reply3; + struct afs_read *req = call->reply[2]; afs_put_read(req); afs_flat_call_destructor(call); @@ -496,9 +496,9 @@ static int afs_fs_fetch_data64(struct afs_server *server, return -ENOMEM; call->key = key; - call->reply = vnode; - call->reply2 = NULL; /* volsync */ - call->reply3 = req; + call->reply[0] = vnode; + call->reply[1] = NULL; /* volsync */ + call->reply[2] = req; call->operation_ID = FSFETCHDATA64; /* marshall the parameters */ @@ -541,9 +541,9 @@ int afs_fs_fetch_data(struct afs_server *server, return -ENOMEM; call->key = key; - call->reply = vnode; - call->reply2 = NULL; /* volsync */ - call->reply3 = req; + call->reply[0] = vnode; + call->reply[1] = NULL; /* volsync */ + call->reply[2] = req; call->operation_ID = FSFETCHDATA; /* marshall the parameters */ @@ -645,7 +645,7 @@ int afs_fs_give_up_callbacks(struct afs_net *net, */ static int afs_deliver_fs_create_vnode(struct afs_call *call) { - struct afs_vnode *vnode = call->reply; + struct afs_vnode *vnode = call->reply[0]; const __be32 *bp; int ret; @@ -657,11 +657,11 @@ static int afs_deliver_fs_create_vnode(struct afs_call *call) /* unmarshall the reply once we've received all of it */ bp = call->buffer; - xdr_decode_AFSFid(&bp, call->reply2); - xdr_decode_AFSFetchStatus(&bp, call->reply3, NULL, NULL); + xdr_decode_AFSFid(&bp, call->reply[1]); + xdr_decode_AFSFetchStatus(&bp, call->reply[2], NULL, NULL); xdr_decode_AFSFetchStatus(&bp, &vnode->status, vnode, NULL); - xdr_decode_AFSCallBack_raw(&bp, call->reply4); - /* xdr_decode_AFSVolSync(&bp, call->replyX); */ + xdr_decode_AFSCallBack_raw(&bp, call->reply[3]); + /* xdr_decode_AFSVolSync(&bp, call->reply[X]); */ _leave(" = 0 [done]"); return 0; @@ -706,10 +706,10 @@ int afs_fs_create(struct afs_server *server, return -ENOMEM; call->key = key; - call->reply = vnode; - call->reply2 = newfid; - call->reply3 = newstatus; - call->reply4 = newcb; + call->reply[0] = vnode; + call->reply[1] = newfid; + call->reply[2] = newstatus; + call->reply[3] = newcb; /* marshall the parameters */ bp = call->request; @@ -739,7 +739,7 @@ int afs_fs_create(struct afs_server *server, */ static int afs_deliver_fs_remove(struct afs_call *call) { - struct afs_vnode *vnode = call->reply; + struct afs_vnode *vnode = call->reply[0]; const __be32 *bp; int ret; @@ -752,7 +752,7 @@ static int afs_deliver_fs_remove(struct afs_call *call) /* unmarshall the reply once we've received all of it */ bp = call->buffer; xdr_decode_AFSFetchStatus(&bp, &vnode->status, vnode, NULL); - /* xdr_decode_AFSVolSync(&bp, call->replyX); */ + /* xdr_decode_AFSVolSync(&bp, call->reply[X]); */ _leave(" = 0 [done]"); return 0; @@ -793,7 +793,7 @@ int afs_fs_remove(struct afs_server *server, return -ENOMEM; call->key = key; - call->reply = vnode; + call->reply[0] = vnode; /* marshall the parameters */ bp = call->request; @@ -817,7 +817,7 @@ int afs_fs_remove(struct afs_server *server, */ static int afs_deliver_fs_link(struct afs_call *call) { - struct afs_vnode *dvnode = call->reply, *vnode = call->reply2; + struct afs_vnode *dvnode = call->reply[0], *vnode = call->reply[1]; const __be32 *bp; int ret; @@ -831,7 +831,7 @@ static int afs_deliver_fs_link(struct afs_call *call) bp = call->buffer; xdr_decode_AFSFetchStatus(&bp, &vnode->status, vnode, NULL); xdr_decode_AFSFetchStatus(&bp, &dvnode->status, dvnode, NULL); - /* xdr_decode_AFSVolSync(&bp, call->replyX); */ + /* xdr_decode_AFSVolSync(&bp, call->reply[X]); */ _leave(" = 0 [done]"); return 0; @@ -872,8 +872,8 @@ int afs_fs_link(struct afs_server *server, return -ENOMEM; call->key = key; - call->reply = dvnode; - call->reply2 = vnode; + call->reply[0] = dvnode; + call->reply[1] = vnode; /* marshall the parameters */ bp = call->request; @@ -900,7 +900,7 @@ int afs_fs_link(struct afs_server *server, */ static int afs_deliver_fs_symlink(struct afs_call *call) { - struct afs_vnode *vnode = call->reply; + struct afs_vnode *vnode = call->reply[0]; const __be32 *bp; int ret; @@ -912,10 +912,10 @@ static int afs_deliver_fs_symlink(struct afs_call *call) /* unmarshall the reply once we've received all of it */ bp = call->buffer; - xdr_decode_AFSFid(&bp, call->reply2); - xdr_decode_AFSFetchStatus(&bp, call->reply3, NULL, NULL); + xdr_decode_AFSFid(&bp, call->reply[1]); + xdr_decode_AFSFetchStatus(&bp, call->reply[2], NULL, NULL); xdr_decode_AFSFetchStatus(&bp, &vnode->status, vnode, NULL); - /* xdr_decode_AFSVolSync(&bp, call->replyX); */ + /* xdr_decode_AFSVolSync(&bp, call->reply[X]); */ _leave(" = 0 [done]"); return 0; @@ -963,9 +963,9 @@ int afs_fs_symlink(struct afs_server *server, return -ENOMEM; call->key = key; - call->reply = vnode; - call->reply2 = newfid; - call->reply3 = newstatus; + call->reply[0] = vnode; + call->reply[1] = newfid; + call->reply[2] = newstatus; /* marshall the parameters */ bp = call->request; @@ -1002,7 +1002,7 @@ int afs_fs_symlink(struct afs_server *server, */ static int afs_deliver_fs_rename(struct afs_call *call) { - struct afs_vnode *orig_dvnode = call->reply, *new_dvnode = call->reply2; + struct afs_vnode *orig_dvnode = call->reply[0], *new_dvnode = call->reply[1]; const __be32 *bp; int ret; @@ -1018,7 +1018,7 @@ static int afs_deliver_fs_rename(struct afs_call *call) if (new_dvnode != orig_dvnode) xdr_decode_AFSFetchStatus(&bp, &new_dvnode->status, new_dvnode, NULL); - /* xdr_decode_AFSVolSync(&bp, call->replyX); */ + /* xdr_decode_AFSVolSync(&bp, call->reply[X]); */ _leave(" = 0 [done]"); return 0; @@ -1067,8 +1067,8 @@ int afs_fs_rename(struct afs_server *server, return -ENOMEM; call->key = key; - call->reply = orig_dvnode; - call->reply2 = new_dvnode; + call->reply[0] = orig_dvnode; + call->reply[1] = new_dvnode; /* marshall the parameters */ bp = call->request; @@ -1103,7 +1103,7 @@ int afs_fs_rename(struct afs_server *server, */ static int afs_deliver_fs_store_data(struct afs_call *call) { - struct afs_vnode *vnode = call->reply; + struct afs_vnode *vnode = call->reply[0]; const __be32 *bp; int ret; @@ -1117,7 +1117,7 @@ static int afs_deliver_fs_store_data(struct afs_call *call) bp = call->buffer; xdr_decode_AFSFetchStatus(&bp, &vnode->status, vnode, &call->store_version); - /* xdr_decode_AFSVolSync(&bp, call->replyX); */ + /* xdr_decode_AFSVolSync(&bp, call->reply[X]); */ afs_pages_written_back(vnode, call); @@ -1166,7 +1166,7 @@ static int afs_fs_store_data64(struct afs_server *server, call->wb = wb; call->key = wb->key; - call->reply = vnode; + call->reply[0] = vnode; call->mapping = vnode->vfs_inode.i_mapping; call->first = first; call->last = last; @@ -1242,7 +1242,7 @@ int afs_fs_store_data(struct afs_server *server, struct afs_writeback *wb, call->wb = wb; call->key = wb->key; - call->reply = vnode; + call->reply[0] = vnode; call->mapping = vnode->vfs_inode.i_mapping; call->first = first; call->last = last; @@ -1278,7 +1278,7 @@ int afs_fs_store_data(struct afs_server *server, struct afs_writeback *wb, static int afs_deliver_fs_store_status(struct afs_call *call) { afs_dataversion_t *store_version; - struct afs_vnode *vnode = call->reply; + struct afs_vnode *vnode = call->reply[0]; const __be32 *bp; int ret; @@ -1295,7 +1295,7 @@ static int afs_deliver_fs_store_status(struct afs_call *call) bp = call->buffer; xdr_decode_AFSFetchStatus(&bp, &vnode->status, vnode, store_version); - /* xdr_decode_AFSVolSync(&bp, call->replyX); */ + /* xdr_decode_AFSVolSync(&bp, call->reply[X]); */ _leave(" = 0 [done]"); return 0; @@ -1346,7 +1346,7 @@ static int afs_fs_setattr_size64(struct afs_server *server, struct key *key, return -ENOMEM; call->key = key; - call->reply = vnode; + call->reply[0] = vnode; call->store_version = vnode->status.data_version + 1; call->operation_ID = FSSTOREDATA; @@ -1396,7 +1396,7 @@ static int afs_fs_setattr_size(struct afs_server *server, struct key *key, return -ENOMEM; call->key = key; - call->reply = vnode; + call->reply[0] = vnode; call->store_version = vnode->status.data_version + 1; call->operation_ID = FSSTOREDATA; @@ -1442,7 +1442,7 @@ int afs_fs_setattr(struct afs_server *server, struct key *key, return -ENOMEM; call->key = key; - call->reply = vnode; + call->reply[0] = vnode; call->operation_ID = FSSTORESTATUS; /* marshall the parameters */ @@ -1482,7 +1482,7 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call) return ret; bp = call->buffer; - xdr_decode_AFSFetchVolumeStatus(&bp, call->reply2); + xdr_decode_AFSFetchVolumeStatus(&bp, call->reply[1]); call->offset = 0; call->unmarshall++; @@ -1503,13 +1503,13 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call) case 3: _debug("extract volname"); if (call->count > 0) { - ret = afs_extract_data(call, call->reply3, + ret = afs_extract_data(call, call->reply[2], call->count, true); if (ret < 0) return ret; } - p = call->reply3; + p = call->reply[2]; p[call->count] = 0; _debug("volname '%s'", p); @@ -1550,13 +1550,13 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call) case 6: _debug("extract offline"); if (call->count > 0) { - ret = afs_extract_data(call, call->reply3, + ret = afs_extract_data(call, call->reply[2], call->count, true); if (ret < 0) return ret; } - p = call->reply3; + p = call->reply[2]; p[call->count] = 0; _debug("offline '%s'", p); @@ -1597,13 +1597,13 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call) case 9: _debug("extract motd"); if (call->count > 0) { - ret = afs_extract_data(call, call->reply3, + ret = afs_extract_data(call, call->reply[2], call->count, true); if (ret < 0) return ret; } - p = call->reply3; + p = call->reply[2]; p[call->count] = 0; _debug("motd '%s'", p); @@ -1634,8 +1634,8 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call) */ static void afs_get_volume_status_call_destructor(struct afs_call *call) { - kfree(call->reply3); - call->reply3 = NULL; + kfree(call->reply[2]); + call->reply[2] = NULL; afs_flat_call_destructor(call); } @@ -1675,9 +1675,9 @@ int afs_fs_get_volume_status(struct afs_server *server, } call->key = key; - call->reply = vnode; - call->reply2 = vs; - call->reply3 = tmpbuf; + call->reply[0] = vnode; + call->reply[1] = vs; + call->reply[2] = tmpbuf; /* marshall the parameters */ bp = call->request; @@ -1703,7 +1703,7 @@ static int afs_deliver_fs_xxxx_lock(struct afs_call *call) /* unmarshall the reply once we've received all of it */ bp = call->buffer; - /* xdr_decode_AFSVolSync(&bp, call->replyX); */ + /* xdr_decode_AFSVolSync(&bp, call->reply[X]); */ _leave(" = 0 [done]"); return 0; @@ -1756,7 +1756,7 @@ int afs_fs_set_lock(struct afs_server *server, return -ENOMEM; call->key = key; - call->reply = vnode; + call->reply[0] = vnode; /* marshall the parameters */ bp = call->request; @@ -1788,7 +1788,7 @@ int afs_fs_extend_lock(struct afs_server *server, return -ENOMEM; call->key = key; - call->reply = vnode; + call->reply[0] = vnode; /* marshall the parameters */ bp = call->request; @@ -1819,7 +1819,7 @@ int afs_fs_release_lock(struct afs_server *server, return -ENOMEM; call->key = key; - call->reply = vnode; + call->reply[0] = vnode; /* marshall the parameters */ bp = call->request; diff --git a/fs/afs/internal.h b/fs/afs/internal.h index ba31a386bb2d..77a83e1bf56f 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h @@ -81,10 +81,7 @@ struct afs_call { struct address_space *mapping; /* page set */ struct afs_writeback *wb; /* writeback being performed */ void *buffer; /* reply receive buffer */ - void *reply; /* reply buffer (first part) */ - void *reply2; /* reply buffer (second part) */ - void *reply3; /* reply buffer (third part) */ - void *reply4; /* reply buffer (fourth part) */ + void *reply[4]; /* Where to put the reply */ pgoff_t first; /* first page in mapping to deal with */ pgoff_t last; /* last page in mapping to deal with */ size_t offset; /* offset into received data store */ diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c index 5f06cf720340..fc49193e12c4 100644 --- a/fs/afs/rxrpc.c +++ b/fs/afs/rxrpc.c @@ -639,7 +639,7 @@ static void afs_process_async_call(struct work_struct *work) } if (call->state == AFS_CALL_COMPLETE) { - call->reply = NULL; + call->reply[0] = NULL; /* We have two refs to release - one from the alloc and one * queued with the work item - and we can't just deallocate the diff --git a/fs/afs/vlclient.c b/fs/afs/vlclient.c index 66e62be07b63..aa79fe3f168b 100644 --- a/fs/afs/vlclient.c +++ b/fs/afs/vlclient.c @@ -32,7 +32,7 @@ static int afs_deliver_vl_get_entry_by_xxx(struct afs_call *call) return ret; /* unmarshall the reply once we've received all of it */ - entry = call->reply; + entry = call->reply[0]; bp = call->buffer; for (loop = 0; loop < 64; loop++) @@ -135,7 +135,7 @@ int afs_vl_get_entry_by_name(struct afs_net *net, return -ENOMEM; call->key = key; - call->reply = entry; + call->reply[0] = entry; /* marshall the parameters */ bp = call->request; @@ -170,7 +170,7 @@ int afs_vl_get_entry_by_id(struct afs_net *net, return -ENOMEM; call->key = key; - call->reply = entry; + call->reply[0] = entry; /* marshall the parameters */ bp = call->request; From 33cd7f2b76717ac8dda566d8b4f518e803ae2618 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 2 Nov 2017 15:27:48 +0000 Subject: [PATCH 13/35] afs: Potentially return call->reply[0] from afs_make_call() If call->ret_reply0 is set, return call->reply[0] on success. Change the return type of afs_make_call() to long so that this can be passed back without bit loss and then cast to a pointer if required. Signed-off-by: David Howells --- fs/afs/internal.h | 3 ++- fs/afs/rxrpc.c | 26 ++++++++++++++++---------- 2 files changed, 18 insertions(+), 11 deletions(-) diff --git a/fs/afs/internal.h b/fs/afs/internal.h index 77a83e1bf56f..94b676c433c7 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h @@ -101,6 +101,7 @@ struct afs_call { bool send_pages; /* T if data from mapping should be sent */ bool need_attention; /* T if RxRPC poked us */ bool async; /* T if asynchronous */ + bool ret_reply0; /* T if should return reply[0] on success */ bool upgrade; /* T to request service upgrade */ u16 service_id; /* RxRPC service ID to call */ u32 operation_ID; /* operation ID for an incoming call */ @@ -647,7 +648,7 @@ extern void __net_exit afs_close_socket(struct afs_net *); extern void afs_charge_preallocation(struct work_struct *); extern void afs_put_call(struct afs_call *); extern int afs_queue_call_work(struct afs_call *); -extern int afs_make_call(struct sockaddr_rxrpc *, struct afs_call *, gfp_t, bool); +extern long afs_make_call(struct sockaddr_rxrpc *, struct afs_call *, gfp_t, bool); extern struct afs_call *afs_alloc_flat_call(struct afs_net *, const struct afs_call_type *, size_t, size_t); diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c index fc49193e12c4..d57f09b5d27b 100644 --- a/fs/afs/rxrpc.c +++ b/fs/afs/rxrpc.c @@ -20,7 +20,7 @@ struct workqueue_struct *afs_async_calls; static void afs_wake_up_call_waiter(struct sock *, struct rxrpc_call *, unsigned long); -static int afs_wait_for_call_to_complete(struct afs_call *); +static long afs_wait_for_call_to_complete(struct afs_call *); static void afs_wake_up_async_call(struct sock *, struct rxrpc_call *, unsigned long); static void afs_process_async_call(struct work_struct *); static void afs_rx_new_call(struct sock *, struct rxrpc_call *, unsigned long); @@ -320,8 +320,8 @@ static int afs_send_pages(struct afs_call *call, struct msghdr *msg) /* * initiate a call */ -int afs_make_call(struct sockaddr_rxrpc *srx, struct afs_call *call, - gfp_t gfp, bool async) +long afs_make_call(struct sockaddr_rxrpc *srx, struct afs_call *call, + gfp_t gfp, bool async) { struct rxrpc_call *rxcall; struct msghdr msg; @@ -415,9 +415,9 @@ error_do_abort: abort_code = 0; offset = 0; rxrpc_kernel_recv_data(call->net->socket, rxcall, NULL, - 0, &offset, false, &abort_code, + 0, &offset, false, &call->abort_code, &call->service_id); - ret = afs_abort_to_error(abort_code); + ret = afs_abort_to_error(call->abort_code); } error_kill_call: afs_put_call(call); @@ -468,7 +468,7 @@ static void afs_deliver_to_call(struct afs_call *call) case -EAGAIN: goto out; case -ECONNABORTED: - goto call_complete; + goto save_error; case -ENOTCONN: abort_code = RX_CALL_DEAD; rxrpc_kernel_abort_call(call->net->socket, call->rxcall, @@ -501,7 +501,6 @@ out: save_error: call->error = ret; -call_complete: call->state = AFS_CALL_COMPLETE; goto done; } @@ -509,10 +508,10 @@ call_complete: /* * wait synchronously for a call to complete */ -static int afs_wait_for_call_to_complete(struct afs_call *call) +static long afs_wait_for_call_to_complete(struct afs_call *call) { signed long rtt2, timeout; - int ret; + long ret; u64 rtt; u32 life, last_life; @@ -567,9 +566,16 @@ static int afs_wait_for_call_to_complete(struct afs_call *call) } ret = call->error; + if (ret < 0) { + ret = afs_abort_to_error(call->abort_code); + } else if (ret == 0 && call->ret_reply0) { + ret = (long)call->reply[0]; + call->reply[0] = NULL; + } + _debug("call complete"); afs_put_call(call); - _leave(" = %d", ret); + _leave(" = %p", (void *)ret); return ret; } From f4b3526d83c40dd8bf5948b9d7a1b2c340f0dcc8 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 2 Nov 2017 15:27:48 +0000 Subject: [PATCH 14/35] afs: Connect up the CB.ProbeUuid The handler for the CB.ProbeUuid operation in the cache manager is implemented, but isn't listed in the switch-statement of operation selection, so won't be used. Fix this by adding it. Signed-off-by: David Howells --- fs/afs/cmservice.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/afs/cmservice.c b/fs/afs/cmservice.c index 24032916e0ca..03c0a8572ef4 100644 --- a/fs/afs/cmservice.c +++ b/fs/afs/cmservice.c @@ -121,6 +121,9 @@ bool afs_cm_incoming_call(struct afs_call *call) case CBProbe: call->type = &afs_SRXCBProbe; return true; + case CBProbeUuid: + call->type = &afs_SRXCBProbeUuid; + return true; case CBTellMeAboutYourself: call->type = &afs_SRXCBTellMeAboutYourself; return true; From 03dc2cfca536df89f4b1747caad9324c9be482fa Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 2 Nov 2017 15:27:49 +0000 Subject: [PATCH 15/35] afs: Fix the afs_uuid struct to make the char-sized fields signed In AFS's encoding of a UUID, the eight 'char' fields are all signed, so represent them with __s8 rather than __u8. This makes the compiler sign-extend them correctly when XDR-encoding them. Signed-off-by: David Howells --- fs/afs/afs.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/afs/afs.h b/fs/afs/afs.h index 0d837bddbf7d..2e2887a7d331 100644 --- a/fs/afs/afs.h +++ b/fs/afs/afs.h @@ -79,9 +79,9 @@ struct afs_uuid { __be32 time_low; /* low part of timestamp */ __be16 time_mid; /* mid part of timestamp */ __be16 time_hi_and_version; /* high part of timestamp and version */ - __u8 clock_seq_hi_and_reserved; /* clock seq hi and variant */ - __u8 clock_seq_low; /* clock seq low */ - __u8 node[6]; /* spatially unique node ID (MAC addr) */ + __s8 clock_seq_hi_and_reserved; /* clock seq hi and variant */ + __s8 clock_seq_low; /* clock seq low */ + __s8 node[6]; /* spatially unique node ID (MAC addr) */ }; /* From d0676a16781d0972969dff8b3f3f819599cc4b07 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 2 Nov 2017 15:27:49 +0000 Subject: [PATCH 16/35] afs: Rename struct afs_call server member to cm_server Rename the server member of struct afs_call to cm_server as we're only going to be using it for incoming calls for the Cache Manager service. This makes it easier to differentiate from the pointer to the target server for the client, which will point to a different structure to allow for callback handling. Signed-off-by: David Howells --- fs/afs/cmservice.c | 18 ++++++++---------- fs/afs/internal.h | 2 +- fs/afs/rxrpc.c | 1 + 3 files changed, 10 insertions(+), 11 deletions(-) diff --git a/fs/afs/cmservice.c b/fs/afs/cmservice.c index 03c0a8572ef4..91e921553453 100644 --- a/fs/afs/cmservice.c +++ b/fs/afs/cmservice.c @@ -144,12 +144,10 @@ static void afs_cm_destructor(struct afs_call *call) * afs_deliver_cb_callback(). */ if (call->unmarshall == 5) { - ASSERT(call->server && call->count && call->request); - afs_break_callbacks(call->server, call->count, call->request); + ASSERT(call->cm_server && call->count && call->request); + afs_break_callbacks(call->cm_server, call->count, call->request); } - afs_put_server(call->net, call->server); - call->server = NULL; kfree(call->buffer); call->buffer = NULL; } @@ -170,7 +168,7 @@ static void SRXAFSCB_CallBack(struct work_struct *work) * yet */ afs_send_empty_reply(call); - afs_break_callbacks(call->server, call->count, call->request); + afs_break_callbacks(call->cm_server, call->count, call->request); afs_put_call(call); _leave(""); } @@ -290,7 +288,7 @@ static int afs_deliver_cb_callback(struct afs_call *call) server = afs_find_server(call->net, &srx); if (!server) return -ENOTCONN; - call->server = server; + call->cm_server = server; return afs_queue_call_work(call); } @@ -302,9 +300,9 @@ static void SRXAFSCB_InitCallBackState(struct work_struct *work) { struct afs_call *call = container_of(work, struct afs_call, work); - _enter("{%p}", call->server); + _enter("{%p}", call->cm_server); - afs_init_callback_state(call->server); + afs_init_callback_state(call->cm_server); afs_send_empty_reply(call); afs_put_call(call); _leave(""); @@ -335,7 +333,7 @@ static int afs_deliver_cb_init_call_back_state(struct afs_call *call) server = afs_find_server(call->net, &srx); if (!server) return -ENOTCONN; - call->server = server; + call->cm_server = server; return afs_queue_call_work(call); } @@ -407,7 +405,7 @@ static int afs_deliver_cb_init_call_back_state3(struct afs_call *call) server = afs_find_server(call->net, &srx); if (!server) return -ENOTCONN; - call->server = server; + call->cm_server = server; return afs_queue_call_work(call); } diff --git a/fs/afs/internal.h b/fs/afs/internal.h index 94b676c433c7..6fa81e04aff3 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h @@ -76,7 +76,7 @@ struct afs_call { struct rxrpc_call *rxcall; /* RxRPC call handle */ struct key *key; /* security for this call */ struct afs_net *net; /* The network namespace */ - struct afs_server *server; /* server affected by incoming CM call */ + struct afs_server *cm_server; /* Server affected by incoming CM call */ void *request; /* request data (first part) */ struct address_space *mapping; /* page set */ struct afs_writeback *wb; /* writeback being performed */ diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c index d57f09b5d27b..ac1e25f957b1 100644 --- a/fs/afs/rxrpc.c +++ b/fs/afs/rxrpc.c @@ -161,6 +161,7 @@ void afs_put_call(struct afs_call *call) if (call->type->destructor) call->type->destructor(call); + afs_put_server(call->net, call->cm_server); kfree(call->request); kfree(call); From c435ee34551e1f5a02a253ca8e235287efd2727c Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 2 Nov 2017 15:27:49 +0000 Subject: [PATCH 17/35] afs: Overhaul the callback handling Overhaul the AFS callback handling by the following means: (1) Don't give up callback promises on vnodes that we are no longer using, rather let them just expire on the server or let the server break them. This is actually more efficient for the server as the callback lookup is expensive if there are lots of extant callbacks. (2) Only give up the callback promises we have from a server when the server record is destroyed. Then we can just give up *all* the callback promises on it in one go. (3) Servers can end up being shared between cells if cells are aliased, so don't add all the vnodes being backed by a particular server into a big FID-indexed tree on that server as there may be duplicates. Instead have each volume instance (~= superblock) register an interest in a server as it starts to make use of it and use this to allow the processor for callbacks from the server to find the superblock and thence the inode corresponding to the FID being broken by means of ilookup_nowait(). (4) Rather than iterating over the entire callback list when a mass-break comes in from the server, maintain a counter of mass-breaks in afs_server (cb_seq) and make afs_validate() check it against the copy in afs_vnode. It would be nice not to have to take a read_lock whilst doing this, but that's tricky without using RCU. (5) Save a ref on the fileserver we're using for a call in the afs_call struct so that we can access its cb_s_break during call decoding. (6) Write-lock around callback and status storage in a vnode and read-lock around getattr so that we don't see the status mid-update. This has the following consequences: (1) Data invalidation isn't seen until someone calls afs_validate() on a vnode. Unfortunately, we need to use a key to query the server, but getting one from a background thread is tricky without caching loads of keys all over the place. (2) Mass invalidation isn't seen until someone calls afs_validate(). (3) Callback breaking is going to hit the inode_hash_lock quite a bit. Could this be replaced with rcu_read_lock() since inodes are destroyed under RCU conditions. Signed-off-by: David Howells --- fs/afs/afs_fs.h | 1 + fs/afs/callback.c | 504 +++++++++++++-------------------------------- fs/afs/cmservice.c | 2 +- fs/afs/dir.c | 52 +++-- fs/afs/flock.c | 6 +- fs/afs/fsclient.c | 190 ++++++++--------- fs/afs/inode.c | 109 +++++----- fs/afs/internal.h | 93 +++++---- fs/afs/main.c | 7 - fs/afs/security.c | 15 +- fs/afs/server.c | 21 +- fs/afs/super.c | 11 +- fs/afs/vnode.c | 203 ++---------------- fs/afs/volume.c | 43 ++-- 14 files changed, 453 insertions(+), 804 deletions(-) diff --git a/fs/afs/afs_fs.h b/fs/afs/afs_fs.h index accd886b4372..05395d0f1941 100644 --- a/fs/afs/afs_fs.h +++ b/fs/afs/afs_fs.h @@ -37,6 +37,7 @@ enum AFS_FS_Operations { FSLOOKUP = 161, /* AFS lookup file in directory */ FSFETCHDATA64 = 65537, /* AFS Fetch file data */ FSSTOREDATA64 = 65538, /* AFS Store file data */ + FSGIVEUPALLCALLBACKS = 65539, /* AFS Give up all outstanding callbacks on a server */ }; enum AFS_FS_Errors { diff --git a/fs/afs/callback.c b/fs/afs/callback.c index d12dffb76b67..82f4c7a3b7b6 100644 --- a/fs/afs/callback.c +++ b/fs/afs/callback.c @@ -20,116 +20,151 @@ #include #include "internal.h" -#if 0 -unsigned afs_vnode_update_timeout = 10; -#endif /* 0 */ +/* + * Set up an interest-in-callbacks record for a volume on a server and + * register it with the server. + * - Called with volume->server_sem held. + */ +int afs_register_server_cb_interest(struct afs_vnode *vnode, + struct afs_cb_interest **ppcbi, + struct afs_server *server) +{ + struct afs_cb_interest *cbi = *ppcbi, *vcbi, *new, *x; -#define afs_breakring_space(server) \ - CIRC_SPACE((server)->cb_break_head, (server)->cb_break_tail, \ - ARRAY_SIZE((server)->cb_break)) +again: + vcbi = vnode->cb_interest; + if (vcbi) { + if (vcbi == cbi) + return 0; -struct workqueue_struct *afs_callback_update_worker; + if (cbi && vcbi->server == cbi->server) { + write_seqlock(&vnode->cb_lock); + vnode->cb_interest = afs_get_cb_interest(cbi); + write_sequnlock(&vnode->cb_lock); + afs_put_cb_interest(afs_v2net(vnode), cbi); + return 0; + } + + if (!cbi && vcbi->server == server) { + afs_get_cb_interest(vcbi); + x = cmpxchg(ppcbi, cbi, vcbi); + if (x != cbi) { + cbi = x; + afs_put_cb_interest(afs_v2net(vnode), vcbi); + goto again; + } + return 0; + } + } + + if (!cbi) { + new = kzalloc(sizeof(struct afs_cb_interest), GFP_KERNEL); + if (!new) + return -ENOMEM; + + refcount_set(&new->usage, 1); + new->sb = vnode->vfs_inode.i_sb; + new->vid = vnode->volume->vid; + new->server = afs_get_server(server); + INIT_LIST_HEAD(&new->cb_link); + + write_lock(&server->cb_break_lock); + list_add_tail(&new->cb_link, &server->cb_interests); + write_unlock(&server->cb_break_lock); + + x = cmpxchg(ppcbi, cbi, new); + if (x == cbi) { + cbi = new; + } else { + cbi = x; + afs_put_cb_interest(afs_v2net(vnode), new); + } + } + + ASSERT(cbi); + + /* Change the server the vnode is using. This entails scrubbing any + * interest the vnode had in the previous server it was using. + */ + write_seqlock(&vnode->cb_lock); + + vnode->cb_interest = afs_get_cb_interest(cbi); + vnode->cb_s_break = cbi->server->cb_s_break; + clear_bit(AFS_VNODE_CB_PROMISED, &vnode->flags); + + write_sequnlock(&vnode->cb_lock); + return 0; +} + +/* + * Set a vnode's interest on a server. + */ +void afs_set_cb_interest(struct afs_vnode *vnode, struct afs_cb_interest *cbi) +{ + struct afs_cb_interest *old_cbi = NULL; + + if (vnode->cb_interest == cbi) + return; + + write_seqlock(&vnode->cb_lock); + if (vnode->cb_interest != cbi) { + afs_get_cb_interest(cbi); + old_cbi = vnode->cb_interest; + vnode->cb_interest = cbi; + } + write_sequnlock(&vnode->cb_lock); + afs_put_cb_interest(afs_v2net(vnode), cbi); +} + +/* + * Remove an interest on a server. + */ +void afs_put_cb_interest(struct afs_net *net, struct afs_cb_interest *cbi) +{ + if (cbi && refcount_dec_and_test(&cbi->usage)) { + if (!list_empty(&cbi->cb_link)) { + write_lock(&cbi->server->cb_break_lock); + list_del_init(&cbi->cb_link); + write_unlock(&cbi->server->cb_break_lock); + afs_put_server(net, cbi->server); + } + kfree(cbi); + } +} /* * allow the fileserver to request callback state (re-)initialisation */ void afs_init_callback_state(struct afs_server *server) { - struct afs_vnode *vnode; - - _enter("{%p}", server); - - spin_lock(&server->cb_lock); - - /* kill all the promises on record from this server */ - while (!RB_EMPTY_ROOT(&server->cb_promises)) { - vnode = rb_entry(server->cb_promises.rb_node, - struct afs_vnode, cb_promise); - _debug("UNPROMISE { vid=%x:%u uq=%u}", - vnode->fid.vid, vnode->fid.vnode, vnode->fid.unique); - rb_erase(&vnode->cb_promise, &server->cb_promises); - vnode->cb_promised = false; - } - - spin_unlock(&server->cb_lock); - _leave(""); -} - -/* - * handle the data invalidation side of a callback being broken - */ -void afs_broken_callback_work(struct work_struct *work) -{ - struct afs_vnode *vnode = - container_of(work, struct afs_vnode, cb_broken_work); - - _enter(""); - - if (test_bit(AFS_VNODE_DELETED, &vnode->flags)) - return; - - /* we're only interested in dealing with a broken callback on *this* - * vnode and only if no-one else has dealt with it yet */ - if (!mutex_trylock(&vnode->validate_lock)) - return; /* someone else is dealing with it */ - - if (test_bit(AFS_VNODE_CB_BROKEN, &vnode->flags)) { - if (S_ISDIR(vnode->vfs_inode.i_mode)) - afs_clear_permits(vnode); - - if (afs_vnode_fetch_status(vnode, NULL, NULL) < 0) - goto out; - - if (test_bit(AFS_VNODE_DELETED, &vnode->flags)) - goto out; - - /* if the vnode's data version number changed then its contents - * are different */ - if (test_and_clear_bit(AFS_VNODE_ZAP_DATA, &vnode->flags)) - afs_zap_data(vnode); - } - -out: - mutex_unlock(&vnode->validate_lock); - - /* avoid the potential race whereby the mutex_trylock() in this - * function happens again between the clear_bit() and the - * mutex_unlock() */ - if (test_bit(AFS_VNODE_CB_BROKEN, &vnode->flags)) { - _debug("requeue"); - queue_work(afs_callback_update_worker, &vnode->cb_broken_work); - } - _leave(""); + if (!test_and_clear_bit(AFS_SERVER_NEW, &server->flags)) + server->cb_s_break++; } /* * actually break a callback */ -static void afs_break_callback(struct afs_server *server, - struct afs_vnode *vnode) +void afs_break_callback(struct afs_vnode *vnode) { _enter(""); - set_bit(AFS_VNODE_CB_BROKEN, &vnode->flags); + write_seqlock(&vnode->cb_lock); + + if (test_and_clear_bit(AFS_VNODE_CB_PROMISED, &vnode->flags)) { + vnode->cb_break++; + afs_clear_permits(vnode); - if (vnode->cb_promised) { spin_lock(&vnode->lock); _debug("break callback"); - spin_lock(&server->cb_lock); - if (vnode->cb_promised) { - rb_erase(&vnode->cb_promise, &server->cb_promises); - vnode->cb_promised = false; - } - spin_unlock(&server->cb_lock); - - queue_work(afs_callback_update_worker, &vnode->cb_broken_work); if (list_empty(&vnode->granted_locks) && !list_empty(&vnode->pending_locks)) afs_lock_may_be_available(vnode); spin_unlock(&vnode->lock); } + + write_sequnlock(&vnode->cb_lock); } /* @@ -141,49 +176,31 @@ static void afs_break_callback(struct afs_server *server, static void afs_break_one_callback(struct afs_server *server, struct afs_fid *fid) { + struct afs_cb_interest *cbi; + struct afs_iget_data data; struct afs_vnode *vnode; - struct rb_node *p; + struct inode *inode; - _debug("find"); - spin_lock(&server->fs_lock); - p = server->fs_vnodes.rb_node; - while (p) { - vnode = rb_entry(p, struct afs_vnode, server_rb); - if (fid->vid < vnode->fid.vid) - p = p->rb_left; - else if (fid->vid > vnode->fid.vid) - p = p->rb_right; - else if (fid->vnode < vnode->fid.vnode) - p = p->rb_left; - else if (fid->vnode > vnode->fid.vnode) - p = p->rb_right; - else if (fid->unique < vnode->fid.unique) - p = p->rb_left; - else if (fid->unique > vnode->fid.unique) - p = p->rb_right; - else - goto found; + read_lock(&server->cb_break_lock); + + /* Step through all interested superblocks. There may be more than one + * because of cell aliasing. + */ + list_for_each_entry(cbi, &server->cb_interests, cb_link) { + if (cbi->vid != fid->vid) + continue; + + data.volume = NULL; + data.fid = *fid; + inode = ilookup5_nowait(cbi->sb, fid->vnode, afs_iget5_test, &data); + if (inode) { + vnode = AFS_FS_I(inode); + afs_break_callback(vnode); + iput(inode); + } } - /* not found so we just ignore it (it may have moved to another - * server) */ -not_available: - _debug("not avail"); - spin_unlock(&server->fs_lock); - _leave(""); - return; - -found: - _debug("found"); - ASSERTCMP(server, ==, vnode->server); - - if (!igrab(AFS_VNODE_TO_I(vnode))) - goto not_available; - spin_unlock(&server->fs_lock); - - afs_break_callback(server, vnode); - iput(&vnode->vfs_inode); - _leave(""); + read_unlock(&server->cb_break_lock); } /* @@ -214,243 +231,14 @@ void afs_break_callbacks(struct afs_server *server, size_t count, } /* - * record the callback for breaking - * - the caller must hold server->cb_lock + * Clear the callback interests in a server list. */ -static void afs_do_give_up_callback(struct afs_server *server, - struct afs_vnode *vnode) +void afs_clear_callback_interests(struct afs_net *net, struct afs_volume *volume) { - struct afs_callback *cb; + int i; - _enter("%p,%p", server, vnode); - - cb = &server->cb_break[server->cb_break_head]; - cb->fid = vnode->fid; - cb->version = vnode->cb_version; - cb->expiry = vnode->cb_expiry; - cb->type = vnode->cb_type; - smp_wmb(); - server->cb_break_head = - (server->cb_break_head + 1) & - (ARRAY_SIZE(server->cb_break) - 1); - - /* defer the breaking of callbacks to try and collect as many as - * possible to ship in one operation */ - switch (atomic_inc_return(&server->cb_break_n)) { - case 1 ... AFSCBMAX - 1: - queue_delayed_work(afs_callback_update_worker, - &server->cb_break_work, HZ * 2); - break; - case AFSCBMAX: - afs_flush_callback_breaks(server); - break; - default: - break; + for (i = 0; i < ARRAY_SIZE(volume->cb_interests); i++) { + afs_put_cb_interest(net, volume->cb_interests[i]); + volume->cb_interests[i] = NULL; } - - ASSERT(server->cb_promises.rb_node != NULL); - rb_erase(&vnode->cb_promise, &server->cb_promises); - vnode->cb_promised = false; - _leave(""); } - -/* - * discard the callback on a deleted item - */ -void afs_discard_callback_on_delete(struct afs_vnode *vnode) -{ - struct afs_server *server = vnode->server; - - _enter("%d", vnode->cb_promised); - - if (!vnode->cb_promised) { - _leave(" [not promised]"); - return; - } - - ASSERT(server != NULL); - - spin_lock(&server->cb_lock); - if (vnode->cb_promised) { - ASSERT(server->cb_promises.rb_node != NULL); - rb_erase(&vnode->cb_promise, &server->cb_promises); - vnode->cb_promised = false; - } - spin_unlock(&server->cb_lock); - _leave(""); -} - -/* - * give up the callback registered for a vnode on the file server when the - * inode is being cleared - */ -void afs_give_up_callback(struct afs_vnode *vnode) -{ - struct afs_server *server = vnode->server; - - DECLARE_WAITQUEUE(myself, current); - - _enter("%d", vnode->cb_promised); - - _debug("GIVE UP INODE %p", &vnode->vfs_inode); - - if (!vnode->cb_promised) { - _leave(" [not promised]"); - return; - } - - ASSERT(server != NULL); - - spin_lock(&server->cb_lock); - if (vnode->cb_promised && afs_breakring_space(server) == 0) { - add_wait_queue(&server->cb_break_waitq, &myself); - for (;;) { - set_current_state(TASK_UNINTERRUPTIBLE); - if (!vnode->cb_promised || - afs_breakring_space(server) != 0) - break; - spin_unlock(&server->cb_lock); - schedule(); - spin_lock(&server->cb_lock); - } - remove_wait_queue(&server->cb_break_waitq, &myself); - __set_current_state(TASK_RUNNING); - } - - /* of course, it's always possible for the server to break this vnode's - * callback first... */ - if (vnode->cb_promised) - afs_do_give_up_callback(server, vnode); - - spin_unlock(&server->cb_lock); - _leave(""); -} - -/* - * dispatch a deferred give up callbacks operation - */ -void afs_dispatch_give_up_callbacks(struct work_struct *work) -{ - struct afs_server *server = - container_of(work, struct afs_server, cb_break_work.work); - - _enter(""); - - /* tell the fileserver to discard the callback promises it has - * - in the event of ENOMEM or some other error, we just forget that we - * had callbacks entirely, and the server will call us later to break - * them - */ - afs_fs_give_up_callbacks(server->cell->net, server, true); -} - -/* - * flush the outstanding callback breaks on a server - */ -void afs_flush_callback_breaks(struct afs_server *server) -{ - mod_delayed_work(afs_callback_update_worker, &server->cb_break_work, 0); -} - -#if 0 -/* - * update a bunch of callbacks - */ -static void afs_callback_updater(struct work_struct *work) -{ - struct afs_server *server; - struct afs_vnode *vnode, *xvnode; - time64_t now; - long timeout; - int ret; - - server = container_of(work, struct afs_server, updater); - - _enter(""); - - now = ktime_get_real_seconds(); - - /* find the first vnode to update */ - spin_lock(&server->cb_lock); - for (;;) { - if (RB_EMPTY_ROOT(&server->cb_promises)) { - spin_unlock(&server->cb_lock); - _leave(" [nothing]"); - return; - } - - vnode = rb_entry(rb_first(&server->cb_promises), - struct afs_vnode, cb_promise); - if (atomic_read(&vnode->usage) > 0) - break; - rb_erase(&vnode->cb_promise, &server->cb_promises); - vnode->cb_promised = false; - } - - timeout = vnode->update_at - now; - if (timeout > 0) { - queue_delayed_work(afs_vnode_update_worker, - &afs_vnode_update, timeout * HZ); - spin_unlock(&server->cb_lock); - _leave(" [nothing]"); - return; - } - - list_del_init(&vnode->update); - atomic_inc(&vnode->usage); - spin_unlock(&server->cb_lock); - - /* we can now perform the update */ - _debug("update %s", vnode->vldb.name); - vnode->state = AFS_VL_UPDATING; - vnode->upd_rej_cnt = 0; - vnode->upd_busy_cnt = 0; - - ret = afs_vnode_update_record(vl, &vldb); - switch (ret) { - case 0: - afs_vnode_apply_update(vl, &vldb); - vnode->state = AFS_VL_UPDATING; - break; - case -ENOMEDIUM: - vnode->state = AFS_VL_VOLUME_DELETED; - break; - default: - vnode->state = AFS_VL_UNCERTAIN; - break; - } - - /* and then reschedule */ - _debug("reschedule"); - vnode->update_at = ktime_get_real_seconds() + - afs_vnode_update_timeout; - - spin_lock(&server->cb_lock); - - if (!list_empty(&server->cb_promises)) { - /* next update in 10 minutes, but wait at least 1 second more - * than the newest record already queued so that we don't spam - * the VL server suddenly with lots of requests - */ - xvnode = list_entry(server->cb_promises.prev, - struct afs_vnode, update); - if (vnode->update_at <= xvnode->update_at) - vnode->update_at = xvnode->update_at + 1; - xvnode = list_entry(server->cb_promises.next, - struct afs_vnode, update); - timeout = xvnode->update_at - now; - if (timeout < 0) - timeout = 0; - } else { - timeout = afs_vnode_update_timeout; - } - - list_add_tail(&vnode->update, &server->cb_promises); - - _debug("timeout %ld", timeout); - queue_delayed_work(afs_vnode_update_worker, - &afs_vnode_update, timeout * HZ); - spin_unlock(&server->cb_lock); - afs_put_vnode(vl); -} -#endif diff --git a/fs/afs/cmservice.c b/fs/afs/cmservice.c index 91e921553453..5767f540e0e1 100644 --- a/fs/afs/cmservice.c +++ b/fs/afs/cmservice.c @@ -153,7 +153,7 @@ static void afs_cm_destructor(struct afs_call *call) } /* - * allow the fileserver to see if the cache manager is still alive + * The server supplied a list of callbacks that it wanted to break. */ static void SRXAFSCB_CallBack(struct work_struct *work) { diff --git a/fs/afs/dir.c b/fs/afs/dir.c index 97ec6a74589e..37083699a0df 100644 --- a/fs/afs/dir.c +++ b/fs/afs/dir.c @@ -384,7 +384,7 @@ out: */ static int afs_readdir(struct file *file, struct dir_context *ctx) { - return afs_dir_iterate(file_inode(file), + return afs_dir_iterate(file_inode(file), ctx, file->private_data); } @@ -581,6 +581,7 @@ static int afs_d_revalidate(struct dentry *dentry, unsigned int flags) struct afs_vnode *vnode, *dir; struct afs_fid uninitialized_var(fid); struct dentry *parent; + struct inode *inode; struct key *key; void *dir_version; int ret; @@ -588,30 +589,39 @@ static int afs_d_revalidate(struct dentry *dentry, unsigned int flags) if (flags & LOOKUP_RCU) return -ECHILD; - vnode = AFS_FS_I(d_inode(dentry)); - - if (d_really_is_positive(dentry)) + if (d_really_is_positive(dentry)) { + vnode = AFS_FS_I(d_inode(dentry)); _enter("{v={%x:%u} n=%pd fl=%lx},", vnode->fid.vid, vnode->fid.vnode, dentry, vnode->flags); - else + } else { _enter("{neg n=%pd}", dentry); + } key = afs_request_key(AFS_FS_S(dentry->d_sb)->volume->cell); if (IS_ERR(key)) key = NULL; + if (d_really_is_positive(dentry)) { + inode = d_inode(dentry); + if (inode) { + vnode = AFS_FS_I(inode); + afs_validate(vnode, key); + if (test_bit(AFS_VNODE_DELETED, &vnode->flags)) + goto out_bad; + } + } + /* lock down the parent dentry so we can peer at it */ parent = dget_parent(dentry); dir = AFS_FS_I(d_inode(parent)); /* validate the parent directory */ - if (test_bit(AFS_VNODE_MODIFIED, &dir->flags)) - afs_validate(dir, key); + afs_validate(dir, key); if (test_bit(AFS_VNODE_DELETED, &dir->flags)) { _debug("%pd: parent dir deleted", dentry); - goto out_bad; + goto out_bad_parent; } dir_version = (void *) (unsigned long) dir->status.data_version; @@ -626,13 +636,16 @@ static int afs_d_revalidate(struct dentry *dentry, unsigned int flags) case 0: /* the filename maps to something */ if (d_really_is_negative(dentry)) - goto out_bad; - if (is_bad_inode(d_inode(dentry))) { + goto out_bad_parent; + inode = d_inode(dentry); + if (is_bad_inode(inode)) { printk("kAFS: afs_d_revalidate: %pd2 has bad inode\n", dentry); - goto out_bad; + goto out_bad_parent; } + vnode = AFS_FS_I(inode); + /* if the vnode ID has changed, then the dirent points to a * different file */ if (fid.vnode != vnode->fid.vnode) { @@ -649,10 +662,10 @@ static int afs_d_revalidate(struct dentry *dentry, unsigned int flags) _debug("%pd: file deleted (uq %u -> %u I:%u)", dentry, fid.unique, vnode->fid.unique, - d_inode(dentry)->i_generation); - spin_lock(&vnode->lock); + vnode->vfs_inode.i_generation); + write_seqlock(&vnode->cb_lock); set_bit(AFS_VNODE_DELETED, &vnode->flags); - spin_unlock(&vnode->lock); + write_sequnlock(&vnode->cb_lock); goto not_found; } goto out_valid; @@ -667,7 +680,7 @@ static int afs_d_revalidate(struct dentry *dentry, unsigned int flags) default: _debug("failed to iterate dir %pd: %d", parent, ret); - goto out_bad; + goto out_bad_parent; } out_valid: @@ -683,9 +696,10 @@ not_found: dentry->d_flags |= DCACHE_NFSFS_RENAMED; spin_unlock(&dentry->d_lock); -out_bad: +out_bad_parent: _debug("dropping dentry %pd2", dentry); dput(parent); +out_bad: key_put(key); _leave(" = 0 [bad]"); @@ -820,7 +834,7 @@ static int afs_rmdir(struct inode *dir, struct dentry *dentry) vnode = AFS_FS_I(d_inode(dentry)); clear_nlink(&vnode->vfs_inode); set_bit(AFS_VNODE_DELETED, &vnode->flags); - afs_discard_callback_on_delete(vnode); + clear_bit(AFS_VNODE_CB_PROMISED, &vnode->flags); } key_put(key); @@ -884,9 +898,7 @@ static int afs_unlink(struct inode *dir, struct dentry *dentry) vnode = AFS_FS_I(d_inode(dentry)); if (test_bit(AFS_VNODE_DELETED, &vnode->flags)) _debug("AFS_VNODE_DELETED"); - if (test_bit(AFS_VNODE_CB_BROKEN, &vnode->flags)) - _debug("AFS_VNODE_CB_BROKEN"); - set_bit(AFS_VNODE_CB_BROKEN, &vnode->flags); + clear_bit(AFS_VNODE_CB_PROMISED, &vnode->flags); ret = afs_validate(vnode, key); _debug("nlink %d [val %d]", vnode->vfs_inode.i_nlink, ret); } diff --git a/fs/afs/flock.c b/fs/afs/flock.c index 559ac00af5f7..aba36e0b1460 100644 --- a/fs/afs/flock.c +++ b/fs/afs/flock.c @@ -243,7 +243,7 @@ static int afs_do_setlk(struct file *file, struct file_lock *fl) /* make sure we've got a callback on this file and that our view of the * data version is up to date */ - ret = afs_vnode_fetch_status(vnode, NULL, key); + ret = afs_validate(vnode, key); if (ret < 0) goto error; @@ -383,7 +383,7 @@ given_lock: /* again, make sure we've got a callback on this file and, again, make * sure that our view of the data version is up to date (we ignore * errors incurred here and deal with the consequences elsewhere) */ - afs_vnode_fetch_status(vnode, NULL, key); + afs_vnode_fetch_status(vnode, NULL, key, false); error: spin_unlock(&inode->i_lock); @@ -455,7 +455,7 @@ static int afs_do_getlk(struct file *file, struct file_lock *fl) posix_test_lock(file, fl); if (fl->fl_type == F_UNLCK) { /* no local locks; consult the server */ - ret = afs_vnode_fetch_status(vnode, NULL, key); + ret = afs_vnode_fetch_status(vnode, NULL, key, true); if (ret < 0) goto error; lock_count = vnode->status.lock_count; diff --git a/fs/afs/fsclient.c b/fs/afs/fsclient.c index 36f58adde030..c6658405fe91 100644 --- a/fs/afs/fsclient.c +++ b/fs/afs/fsclient.c @@ -22,6 +22,11 @@ */ static u8 afs_discard_buffer[64]; +static inline void afs_use_fs_server(struct afs_call *call, struct afs_server *server) +{ + call->server = afs_get_server(server); +} + /* * decode an AFSFid block */ @@ -47,14 +52,17 @@ static void xdr_decode_AFSFetchStatus(const __be32 **_bp, const __be32 *bp = *_bp; umode_t mode; u64 data_version, size; - u32 changed = 0; /* becomes non-zero if ctime-type changes seen */ + bool changed = false; kuid_t owner; kgid_t group; + write_seqlock(&vnode->cb_lock); + #define EXTRACT(DST) \ do { \ u32 x = ntohl(*bp++); \ - changed |= DST - x; \ + if (DST != x) \ + changed |= true; \ DST = x; \ } while (0) @@ -127,25 +135,39 @@ static void xdr_decode_AFSFetchStatus(const __be32 **_bp, _debug("vnode modified %llx on {%x:%u}", (unsigned long long) data_version, vnode->fid.vid, vnode->fid.vnode); - set_bit(AFS_VNODE_MODIFIED, &vnode->flags); + set_bit(AFS_VNODE_DIR_MODIFIED, &vnode->flags); set_bit(AFS_VNODE_ZAP_DATA, &vnode->flags); } } else if (store_version) { status->data_version = data_version; } + + write_sequnlock(&vnode->cb_lock); } /* * decode an AFSCallBack block */ -static void xdr_decode_AFSCallBack(const __be32 **_bp, struct afs_vnode *vnode) +static void xdr_decode_AFSCallBack(struct afs_call *call, + struct afs_vnode *vnode, + const __be32 **_bp) { const __be32 *bp = *_bp; + u32 cb_expiry; - vnode->cb_version = ntohl(*bp++); - vnode->cb_expiry = ntohl(*bp++); - vnode->cb_type = ntohl(*bp++); - vnode->cb_expires = vnode->cb_expiry + ktime_get_real_seconds(); + write_seqlock(&vnode->cb_lock); + + if (call->cb_break == (vnode->cb_break + call->server->cb_s_break)) { + vnode->cb_version = ntohl(*bp++); + cb_expiry = ntohl(*bp++); + vnode->cb_type = ntohl(*bp++); + vnode->cb_expires_at = cb_expiry + ktime_get_real_seconds(); + set_bit(AFS_VNODE_CB_PROMISED, &vnode->flags); + } else { + bp += 3; + } + + write_sequnlock(&vnode->cb_lock); *_bp = bp; } @@ -247,16 +269,16 @@ static int afs_deliver_fs_fetch_status(struct afs_call *call) const __be32 *bp; int ret; - _enter(""); - ret = afs_transfer_reply(call); if (ret < 0) return ret; + _enter("{%x:%u}", vnode->fid.vid, vnode->fid.vnode); + /* unmarshall the reply once we've received all of it */ bp = call->buffer; xdr_decode_AFSFetchStatus(&bp, &vnode->status, vnode, NULL); - xdr_decode_AFSCallBack(&bp, vnode); + xdr_decode_AFSCallBack(call, vnode, &bp); if (call->reply[1]) xdr_decode_AFSVolSync(&bp, call->reply[1]); @@ -304,6 +326,8 @@ int afs_fs_fetch_file_status(struct afs_server *server, bp[2] = htonl(vnode->fid.vnode); bp[3] = htonl(vnode->fid.unique); + call->cb_break = vnode->cb_break + server->cb_s_break; + afs_use_fs_server(call, server); return afs_make_call(&server->addr, call, GFP_NOFS, async); } @@ -429,7 +453,7 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call) bp = call->buffer; xdr_decode_AFSFetchStatus(&bp, &vnode->status, vnode, NULL); - xdr_decode_AFSCallBack(&bp, vnode); + xdr_decode_AFSCallBack(call, vnode, &bp); if (call->reply[1]) xdr_decode_AFSVolSync(&bp, call->reply[1]); @@ -513,6 +537,8 @@ static int afs_fs_fetch_data64(struct afs_server *server, bp[7] = htonl(lower_32_bits(req->len)); atomic_inc(&req->usage); + call->cb_break = vnode->cb_break + server->cb_s_break; + afs_use_fs_server(call, server); return afs_make_call(&server->addr, call, GFP_NOFS, async); } @@ -556,87 +582,8 @@ int afs_fs_fetch_data(struct afs_server *server, bp[5] = htonl(lower_32_bits(req->len)); atomic_inc(&req->usage); - return afs_make_call(&server->addr, call, GFP_NOFS, async); -} - -/* - * deliver reply data to an FS.GiveUpCallBacks - */ -static int afs_deliver_fs_give_up_callbacks(struct afs_call *call) -{ - _enter(""); - - /* shouldn't be any reply data */ - return afs_extract_data(call, NULL, 0, false); -} - -/* - * FS.GiveUpCallBacks operation type - */ -static const struct afs_call_type afs_RXFSGiveUpCallBacks = { - .name = "FS.GiveUpCallBacks", - .deliver = afs_deliver_fs_give_up_callbacks, - .destructor = afs_flat_call_destructor, -}; - -/* - * give up a set of callbacks - * - the callbacks are held in the server->cb_break ring - */ -int afs_fs_give_up_callbacks(struct afs_net *net, - struct afs_server *server, - bool async) -{ - struct afs_call *call; - size_t ncallbacks; - __be32 *bp, *tp; - int loop; - - ncallbacks = CIRC_CNT(server->cb_break_head, server->cb_break_tail, - ARRAY_SIZE(server->cb_break)); - - _enter("{%zu},", ncallbacks); - - if (ncallbacks == 0) - return 0; - if (ncallbacks > AFSCBMAX) - ncallbacks = AFSCBMAX; - - _debug("break %zu callbacks", ncallbacks); - - call = afs_alloc_flat_call(net, &afs_RXFSGiveUpCallBacks, - 12 + ncallbacks * 6 * 4, 0); - if (!call) - return -ENOMEM; - - - /* marshall the parameters */ - bp = call->request; - tp = bp + 2 + ncallbacks * 3; - *bp++ = htonl(FSGIVEUPCALLBACKS); - *bp++ = htonl(ncallbacks); - *tp++ = htonl(ncallbacks); - - atomic_sub(ncallbacks, &server->cb_break_n); - for (loop = ncallbacks; loop > 0; loop--) { - struct afs_callback *cb = - &server->cb_break[server->cb_break_tail]; - - *bp++ = htonl(cb->fid.vid); - *bp++ = htonl(cb->fid.vnode); - *bp++ = htonl(cb->fid.unique); - *tp++ = htonl(cb->version); - *tp++ = htonl(cb->expiry); - *tp++ = htonl(cb->type); - smp_mb(); - server->cb_break_tail = - (server->cb_break_tail + 1) & - (ARRAY_SIZE(server->cb_break) - 1); - } - - ASSERT(ncallbacks > 0); - wake_up_nr(&server->cb_break_waitq, ncallbacks); - + call->cb_break = vnode->cb_break + server->cb_s_break; + afs_use_fs_server(call, server); return afs_make_call(&server->addr, call, GFP_NOFS, async); } @@ -731,6 +678,7 @@ int afs_fs_create(struct afs_server *server, *bp++ = htonl(mode & S_IALLUGO); /* unix mode */ *bp++ = 0; /* segment size */ + afs_use_fs_server(call, server); return afs_make_call(&server->addr, call, GFP_NOFS, async); } @@ -809,6 +757,7 @@ int afs_fs_remove(struct afs_server *server, bp = (void *) bp + padsz; } + afs_use_fs_server(call, server); return afs_make_call(&server->addr, call, GFP_NOFS, async); } @@ -892,6 +841,7 @@ int afs_fs_link(struct afs_server *server, *bp++ = htonl(vnode->fid.vnode); *bp++ = htonl(vnode->fid.unique); + afs_use_fs_server(call, server); return afs_make_call(&server->addr, call, GFP_NOFS, async); } @@ -994,6 +944,7 @@ int afs_fs_symlink(struct afs_server *server, *bp++ = htonl(S_IRWXUGO); /* unix mode */ *bp++ = 0; /* segment size */ + afs_use_fs_server(call, server); return afs_make_call(&server->addr, call, GFP_NOFS, async); } @@ -1095,6 +1046,7 @@ int afs_fs_rename(struct afs_server *server, bp = (void *) bp + n_padsz; } + afs_use_fs_server(call, server); return afs_make_call(&server->addr, call, GFP_NOFS, async); } @@ -1196,6 +1148,7 @@ static int afs_fs_store_data64(struct afs_server *server, *bp++ = htonl(i_size >> 32); *bp++ = htonl((u32) i_size); + afs_use_fs_server(call, server); return afs_make_call(&server->addr, call, GFP_NOFS, async); } @@ -1269,6 +1222,7 @@ int afs_fs_store_data(struct afs_server *server, struct afs_writeback *wb, *bp++ = htonl(size); *bp++ = htonl(i_size); + afs_use_fs_server(call, server); return afs_make_call(&server->addr, call, GFP_NOFS, async); } @@ -1366,6 +1320,7 @@ static int afs_fs_setattr_size64(struct afs_server *server, struct key *key, *bp++ = htonl(attr->ia_size >> 32); /* new file length */ *bp++ = htonl((u32) attr->ia_size); + afs_use_fs_server(call, server); return afs_make_call(&server->addr, call, GFP_NOFS, async); } @@ -1413,6 +1368,7 @@ static int afs_fs_setattr_size(struct afs_server *server, struct key *key, *bp++ = 0; /* size of write */ *bp++ = htonl(attr->ia_size); /* new file length */ + afs_use_fs_server(call, server); return afs_make_call(&server->addr, call, GFP_NOFS, async); } @@ -1454,6 +1410,7 @@ int afs_fs_setattr(struct afs_server *server, struct key *key, xdr_encode_AFS_StoreStatus(&bp, attr); + afs_use_fs_server(call, server); return afs_make_call(&server->addr, call, GFP_NOFS, async); } @@ -1684,6 +1641,7 @@ int afs_fs_get_volume_status(struct afs_server *server, bp[0] = htonl(FSGETVOLUMESTATUS); bp[1] = htonl(vnode->fid.vid); + afs_use_fs_server(call, server); return afs_make_call(&server->addr, call, GFP_NOFS, async); } @@ -1766,6 +1724,7 @@ int afs_fs_set_lock(struct afs_server *server, *bp++ = htonl(vnode->fid.unique); *bp++ = htonl(type); + afs_use_fs_server(call, server); return afs_make_call(&server->addr, call, GFP_NOFS, async); } @@ -1797,6 +1756,7 @@ int afs_fs_extend_lock(struct afs_server *server, *bp++ = htonl(vnode->fid.vnode); *bp++ = htonl(vnode->fid.unique); + afs_use_fs_server(call, server); return afs_make_call(&server->addr, call, GFP_NOFS, async); } @@ -1828,5 +1788,49 @@ int afs_fs_release_lock(struct afs_server *server, *bp++ = htonl(vnode->fid.vnode); *bp++ = htonl(vnode->fid.unique); + afs_use_fs_server(call, server); + return afs_make_call(&server->addr, call, GFP_NOFS, async); +} + +/* + * Deliver reply data to an FS.GiveUpAllCallBacks operation. + */ +static int afs_deliver_fs_give_up_all_callbacks(struct afs_call *call) +{ + return afs_transfer_reply(call); +} + +/* + * FS.GiveUpAllCallBacks operation type + */ +static const struct afs_call_type afs_RXFSGiveUpAllCallBacks = { + .name = "FS.GiveUpAllCallBacks", + .deliver = afs_deliver_fs_give_up_all_callbacks, + .destructor = afs_flat_call_destructor, +}; + +/* + * Flush all the callbacks we have on a server. + */ +int afs_fs_give_up_all_callbacks(struct afs_server *server, + struct key *key, + bool async) +{ + struct afs_call *call; + __be32 *bp; + + _enter(""); + + call = afs_alloc_flat_call(server->net, &afs_RXFSGiveUpAllCallBacks, 2 * 4, 0); + if (!call) + return -ENOMEM; + + call->key = key; + + /* marshall the parameters */ + bp = call->request; + *bp++ = htonl(FSGIVEUPALLCALLBACKS); + + /* Can't take a ref on server */ return afs_make_call(&server->addr, call, GFP_NOFS, async); } diff --git a/fs/afs/inode.c b/fs/afs/inode.c index fbb441d25022..4822a2a50a61 100644 --- a/fs/afs/inode.c +++ b/fs/afs/inode.c @@ -23,11 +23,6 @@ #include #include "internal.h" -struct afs_iget_data { - struct afs_fid fid; - struct afs_volume *volume; /* volume on which resides */ -}; - static const struct inode_operations afs_symlink_inode_operations = { .get_link = page_get_link, .listxattr = afs_listxattr, @@ -39,6 +34,7 @@ static const struct inode_operations afs_symlink_inode_operations = { static int afs_inode_map_status(struct afs_vnode *vnode, struct key *key) { struct inode *inode = AFS_VNODE_TO_I(vnode); + bool changed; _debug("FS: ft=%d lk=%d sz=%llu ver=%Lu mod=%hu", vnode->status.type, @@ -47,6 +43,8 @@ static int afs_inode_map_status(struct afs_vnode *vnode, struct key *key) vnode->status.data_version, vnode->status.mode); + read_seqlock_excl(&vnode->cb_lock); + switch (vnode->status.type) { case AFS_FTYPE_FILE: inode->i_mode = S_IFREG | vnode->status.mode; @@ -63,9 +61,7 @@ static int afs_inode_map_status(struct afs_vnode *vnode, struct key *key) if ((vnode->status.mode & 0777) == 0644) { inode->i_flags |= S_AUTOMOUNT; - spin_lock(&vnode->lock); set_bit(AFS_VNODE_MOUNTPOINT, &vnode->flags); - spin_unlock(&vnode->lock); inode->i_mode = S_IFDIR | 0555; inode->i_op = &afs_mntpt_inode_operations; @@ -78,13 +74,11 @@ static int afs_inode_map_status(struct afs_vnode *vnode, struct key *key) break; default: printk("kAFS: AFS vnode with undefined type\n"); + read_sequnlock_excl(&vnode->cb_lock); return -EBADMSG; } -#ifdef CONFIG_AFS_FSCACHE - if (vnode->status.size != inode->i_size) - fscache_attr_changed(vnode->cache); -#endif + changed = (vnode->status.size != inode->i_size); set_nlink(inode, vnode->status.nlink); inode->i_uid = vnode->status.owner; @@ -97,13 +91,20 @@ static int afs_inode_map_status(struct afs_vnode *vnode, struct key *key) inode->i_generation = vnode->fid.unique; inode->i_version = vnode->status.data_version; inode->i_mapping->a_ops = &afs_fs_aops; + + read_sequnlock_excl(&vnode->cb_lock); + +#ifdef CONFIG_AFS_FSCACHE + if (changed) + fscache_attr_changed(vnode->cache); +#endif return 0; } /* * iget5() comparator */ -static int afs_iget5_test(struct inode *inode, void *opaque) +int afs_iget5_test(struct inode *inode, void *opaque) { struct afs_iget_data *data = opaque; @@ -237,8 +238,7 @@ struct inode *afs_iget(struct super_block *sb, struct key *key, if (!status) { /* it's a remotely extant inode */ - set_bit(AFS_VNODE_CB_BROKEN, &vnode->flags); - ret = afs_vnode_fetch_status(vnode, NULL, key); + ret = afs_vnode_fetch_status(vnode, NULL, key, true); if (ret < 0) goto bad_inode; } else { @@ -249,16 +249,16 @@ struct inode *afs_iget(struct super_block *sb, struct key *key, /* it's a symlink we just created (the fileserver * didn't give us a callback) */ vnode->cb_version = 0; - vnode->cb_expiry = 0; vnode->cb_type = 0; - vnode->cb_expires = ktime_get_real_seconds(); + vnode->cb_expires_at = 0; } else { vnode->cb_version = cb->version; - vnode->cb_expiry = cb->expiry; vnode->cb_type = cb->type; - vnode->cb_expires = vnode->cb_expiry + - ktime_get_real_seconds(); + vnode->cb_expires_at = cb->expiry; + set_bit(AFS_VNODE_CB_PROMISED, &vnode->flags); } + + vnode->cb_expires_at += ktime_get_real_seconds(); } /* set up caching before mapping the status, as map-status reads the @@ -320,25 +320,34 @@ void afs_zap_data(struct afs_vnode *vnode) */ int afs_validate(struct afs_vnode *vnode, struct key *key) { + time64_t now = ktime_get_real_seconds(); + bool valid = false; int ret; _enter("{v={%x:%u} fl=%lx},%x", vnode->fid.vid, vnode->fid.vnode, vnode->flags, key_serial(key)); - if (vnode->cb_promised && - !test_bit(AFS_VNODE_CB_BROKEN, &vnode->flags) && - !test_bit(AFS_VNODE_MODIFIED, &vnode->flags) && - !test_bit(AFS_VNODE_ZAP_DATA, &vnode->flags)) { - if (vnode->cb_expires < ktime_get_real_seconds() + 10) { - _debug("callback expired"); - set_bit(AFS_VNODE_CB_BROKEN, &vnode->flags); - } else { - goto valid; + /* Quickly check the callback state. Ideally, we'd use read_seqbegin + * here, but we have no way to pass the net namespace to the RCU + * cleanup for the server record. + */ + read_seqlock_excl(&vnode->cb_lock); + + if (test_bit(AFS_VNODE_CB_PROMISED, &vnode->flags)) { + if (vnode->cb_s_break != vnode->cb_interest->server->cb_s_break) { + vnode->cb_s_break = vnode->cb_interest->server->cb_s_break; + } else if (!test_bit(AFS_VNODE_DIR_MODIFIED, &vnode->flags) && + !test_bit(AFS_VNODE_ZAP_DATA, &vnode->flags) && + vnode->cb_expires_at - 10 > now) { + valid = true; } + } else if (test_bit(AFS_VNODE_DELETED, &vnode->flags)) { + valid = true; } - if (test_bit(AFS_VNODE_DELETED, &vnode->flags)) + read_sequnlock_excl(&vnode->cb_lock); + if (valid) goto valid; mutex_lock(&vnode->validate_lock); @@ -347,12 +356,16 @@ int afs_validate(struct afs_vnode *vnode, struct key *key) * a new promise - note that if the (parent) directory's metadata was * changed then the security may be different and we may no longer have * access */ - if (!vnode->cb_promised || - test_bit(AFS_VNODE_CB_BROKEN, &vnode->flags)) { + if (!test_bit(AFS_VNODE_CB_PROMISED, &vnode->flags)) { _debug("not promised"); - ret = afs_vnode_fetch_status(vnode, NULL, key); - if (ret < 0) + ret = afs_vnode_fetch_status(vnode, NULL, key, false); + if (ret < 0) { + if (ret == -ENOENT) { + set_bit(AFS_VNODE_DELETED, &vnode->flags); + ret = -ESTALE; + } goto error_unlock; + } _debug("new promise [fl=%lx]", vnode->flags); } @@ -367,7 +380,7 @@ int afs_validate(struct afs_vnode *vnode, struct key *key) if (test_and_clear_bit(AFS_VNODE_ZAP_DATA, &vnode->flags)) afs_zap_data(vnode); - clear_bit(AFS_VNODE_MODIFIED, &vnode->flags); + clear_bit(AFS_VNODE_DIR_MODIFIED, &vnode->flags); mutex_unlock(&vnode->validate_lock); valid: _leave(" = 0"); @@ -386,10 +399,17 @@ int afs_getattr(const struct path *path, struct kstat *stat, u32 request_mask, unsigned int query_flags) { struct inode *inode = d_inode(path->dentry); + struct afs_vnode *vnode = AFS_FS_I(inode); + int seq = 0; _enter("{ ino=%lu v=%u }", inode->i_ino, inode->i_generation); - generic_fillattr(inode, stat); + do { + read_seqbegin_or_lock(&vnode->cb_lock, &seq); + generic_fillattr(inode, stat); + } while (need_seqretry(&vnode->cb_lock, seq)); + + done_seqretry(&vnode->cb_lock, seq); return 0; } @@ -416,13 +436,10 @@ void afs_evict_inode(struct inode *inode) vnode = AFS_FS_I(inode); - _enter("{%x:%u.%d} v=%u x=%u t=%u }", + _enter("{%x:%u.%d}", vnode->fid.vid, vnode->fid.vnode, - vnode->fid.unique, - vnode->cb_version, - vnode->cb_expiry, - vnode->cb_type); + vnode->fid.unique); _debug("CLEAR INODE %p", inode); @@ -431,18 +448,12 @@ void afs_evict_inode(struct inode *inode) truncate_inode_pages_final(&inode->i_data); clear_inode(inode); - afs_give_up_callback(vnode); - - if (vnode->server) { - spin_lock(&vnode->server->fs_lock); - rb_erase(&vnode->server_rb, &vnode->server->fs_vnodes); - spin_unlock(&vnode->server->fs_lock); - afs_put_server(afs_i2net(inode), vnode->server); - vnode->server = NULL; + if (vnode->cb_interest) { + afs_put_cb_interest(afs_i2net(inode), vnode->cb_interest); + vnode->cb_interest = NULL; } ASSERT(list_empty(&vnode->writebacks)); - ASSERT(!vnode->cb_promised); #ifdef CONFIG_AFS_FSCACHE fscache_relinquish_cookie(vnode->cache, 0); diff --git a/fs/afs/internal.h b/fs/afs/internal.h index 6fa81e04aff3..e3c99437f6e0 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h @@ -55,6 +55,11 @@ struct afs_mount_params { struct key *key; /* key to use for secure mounting */ }; +struct afs_iget_data { + struct afs_fid fid; + struct afs_volume *volume; /* volume on which resides */ +}; + enum afs_call_state { AFS_CALL_REQUESTING, /* request is being sent for outgoing call */ AFS_CALL_AWAIT_REPLY, /* awaiting reply to outgoing call */ @@ -77,6 +82,7 @@ struct afs_call { struct key *key; /* security for this call */ struct afs_net *net; /* The network namespace */ struct afs_server *cm_server; /* Server affected by incoming CM call */ + struct afs_server *server; /* Server used by client call */ void *request; /* request data (first part) */ struct address_space *mapping; /* page set */ struct afs_writeback *wb; /* writeback being performed */ @@ -92,6 +98,7 @@ struct afs_call { unsigned request_size; /* size of request data */ unsigned reply_max; /* maximum size of reply */ unsigned first_offset; /* offset into mapping[first] */ + unsigned int cb_break; /* cb_break + cb_s_break before the call */ union { unsigned last_to; /* amount of mapping[last] */ unsigned count2; /* count used in unmarshalling */ @@ -314,26 +321,31 @@ struct afs_server { struct afs_cell *cell; /* cell in which server resides */ struct list_head link; /* link in cell's server list */ struct list_head grave; /* link in master graveyard list */ + struct rb_node master_rb; /* link in master by-addr tree */ struct rw_semaphore sem; /* access lock */ + unsigned long flags; +#define AFS_SERVER_NEW 0 /* New server, don't inc cb_s_break */ /* file service access */ - struct rb_root fs_vnodes; /* vnodes backed by this server (ordered by FID) */ - unsigned long fs_act_jif; /* time at which last activity occurred */ - unsigned long fs_dead_jif; /* time at which no longer to be considered dead */ - spinlock_t fs_lock; /* access lock */ int fs_state; /* 0 or reason FS currently marked dead (-errno) */ + spinlock_t fs_lock; /* access lock */ /* callback promise management */ - struct rb_root cb_promises; /* vnode expiration list (ordered earliest first) */ - struct delayed_work cb_updater; /* callback updater */ - struct delayed_work cb_break_work; /* collected break dispatcher */ - wait_queue_head_t cb_break_waitq; /* space available in cb_break waitqueue */ - spinlock_t cb_lock; /* access lock */ - struct afs_callback cb_break[64]; /* ring of callbacks awaiting breaking */ - atomic_t cb_break_n; /* number of pending breaks */ - u8 cb_break_head; /* head of callback breaking ring */ - u8 cb_break_tail; /* tail of callback breaking ring */ + struct list_head cb_interests; /* List of superblocks using this server */ + unsigned cb_s_break; /* Break-everything counter. */ + rwlock_t cb_break_lock; /* Volume finding lock */ +}; + +/* + * Interest by a superblock on a server. + */ +struct afs_cb_interest { + struct list_head cb_link; /* Link in server->cb_interests */ + struct afs_server *server; /* Server on which this interest resides */ + struct super_block *sb; /* Superblock on which inodes reside */ + afs_volid_t vid; /* Volume ID to match */ + refcount_t usage; }; /* @@ -352,6 +364,7 @@ struct afs_volume { unsigned short nservers; /* number of server slots filled */ unsigned short rjservers; /* number of servers discarded due to -ENOMEDIUM */ struct afs_server *servers[8]; /* servers on which volume resides (ordered) */ + struct afs_cb_interest *cb_interests[8]; /* Interests on servers for callbacks */ struct rw_semaphore server_sem; /* lock for accessing current server */ }; @@ -371,7 +384,6 @@ struct afs_vnode { struct inode vfs_inode; /* the VFS's inode record */ struct afs_volume *volume; /* volume on which vnode resides */ - struct afs_server *server; /* server currently supplying this file */ struct afs_fid fid; /* the file identifier for this inode */ struct afs_file_status status; /* AFS status info for this file */ #ifdef CONFIG_AFS_FSCACHE @@ -386,9 +398,9 @@ struct afs_vnode { spinlock_t writeback_lock; /* lock for writebacks */ spinlock_t lock; /* waitqueue/flags lock */ unsigned long flags; -#define AFS_VNODE_CB_BROKEN 0 /* set if vnode's callback was broken */ +#define AFS_VNODE_CB_PROMISED 0 /* Set if vnode has a callback promise */ #define AFS_VNODE_UNSET 1 /* set if vnode attributes not yet set */ -#define AFS_VNODE_MODIFIED 2 /* set if vnode's data modified */ +#define AFS_VNODE_DIR_MODIFIED 2 /* set if dir vnode's data modified */ #define AFS_VNODE_ZAP_DATA 3 /* set if vnode's data should be invalidated */ #define AFS_VNODE_DELETED 4 /* set if vnode deleted on server */ #define AFS_VNODE_MOUNTPOINT 5 /* set if vnode is a mountpoint symlink */ @@ -408,15 +420,14 @@ struct afs_vnode { struct key *unlock_key; /* key to be used in unlocking */ /* outstanding callback notification on this file */ - struct rb_node server_rb; /* link in server->fs_vnodes */ - struct rb_node cb_promise; /* link in server->cb_promises */ - struct work_struct cb_broken_work; /* work to be done on callback break */ - time64_t cb_expires; /* time at which callback expires */ - time64_t cb_expires_at; /* time used to order cb_promise */ + struct afs_cb_interest *cb_interest; /* Server on which this resides */ + unsigned int cb_s_break; /* Mass break counter on ->server */ + unsigned int cb_break; /* Break counter on vnode */ + seqlock_t cb_lock; /* Lock for ->cb_interest, ->status, ->cb_*break */ + + time64_t cb_expires_at; /* time at which callback expires */ unsigned cb_version; /* callback version */ - unsigned cb_expiry; /* callback expiry time */ afs_callback_type_t cb_type; /* type of callback */ - bool cb_promised; /* true if promise still holds */ }; /* @@ -463,16 +474,20 @@ extern struct fscache_cookie_def afs_vnode_cache_index_def; /* * callback.c */ -extern struct workqueue_struct *afs_callback_update_worker; - extern void afs_init_callback_state(struct afs_server *); -extern void afs_broken_callback_work(struct work_struct *); -extern void afs_break_callbacks(struct afs_server *, size_t, - struct afs_callback[]); -extern void afs_discard_callback_on_delete(struct afs_vnode *); -extern void afs_give_up_callback(struct afs_vnode *); -extern void afs_dispatch_give_up_callbacks(struct work_struct *); -extern void afs_flush_callback_breaks(struct afs_server *); +extern void afs_break_callback(struct afs_vnode *); +extern void afs_break_callbacks(struct afs_server *, size_t,struct afs_callback[]); + +extern int afs_register_server_cb_interest(struct afs_vnode *, struct afs_cb_interest **, + struct afs_server *); +extern void afs_put_cb_interest(struct afs_net *, struct afs_cb_interest *); +extern void afs_clear_callback_interests(struct afs_net *, struct afs_volume *); + +static inline struct afs_cb_interest *afs_get_cb_interest(struct afs_cb_interest *cbi) +{ + refcount_inc(&cbi->usage); + return cbi; +} /* * cell.c @@ -560,10 +575,12 @@ extern int afs_fs_extend_lock(struct afs_server *, struct key *, struct afs_vnode *, bool); extern int afs_fs_release_lock(struct afs_server *, struct key *, struct afs_vnode *, bool); +extern int afs_fs_give_up_all_callbacks(struct afs_server *, struct key *, bool); /* * inode.c */ +extern int afs_iget5_test(struct inode *, void *); extern struct inode *afs_iget_autocell(struct inode *, const char *, int, struct key *); extern struct inode *afs_iget(struct super_block *, struct key *, @@ -676,11 +693,11 @@ extern int afs_permission(struct inode *, int); */ extern spinlock_t afs_server_peer_lock; -#define afs_get_server(S) \ -do { \ - _debug("GET SERVER %d", atomic_read(&(S)->usage)); \ - atomic_inc(&(S)->usage); \ -} while(0) +static inline struct afs_server *afs_get_server(struct afs_server *server) +{ + atomic_inc(&server->usage); + return server; +} extern void afs_server_timer(struct timer_list *); extern struct afs_server *afs_lookup_server(struct afs_cell *, @@ -741,7 +758,7 @@ static inline struct inode *AFS_VNODE_TO_I(struct afs_vnode *vnode) extern void afs_vnode_finalise_status_update(struct afs_vnode *, struct afs_server *); extern int afs_vnode_fetch_status(struct afs_vnode *, struct afs_vnode *, - struct key *); + struct key *, bool); extern int afs_vnode_fetch_data(struct afs_vnode *, struct key *, struct afs_read *); extern int afs_vnode_create(struct afs_vnode *, struct key *, const char *, diff --git a/fs/afs/main.c b/fs/afs/main.c index 38e15b1f0eec..331c08740861 100644 --- a/fs/afs/main.c +++ b/fs/afs/main.c @@ -123,10 +123,6 @@ static int __init afs_init(void) alloc_workqueue("kafs_vlupdated", WQ_MEM_RECLAIM, 0); if (!afs_vlocation_update_worker) goto error_vl_up; - afs_callback_update_worker = - alloc_ordered_workqueue("kafs_callbackd", WQ_MEM_RECLAIM); - if (!afs_callback_update_worker) - goto error_callback; afs_lock_manager = alloc_workqueue("kafs_lockd", WQ_MEM_RECLAIM, 0); if (!afs_lock_manager) goto error_lockmgr; @@ -158,8 +154,6 @@ error_cache: #endif destroy_workqueue(afs_lock_manager); error_lockmgr: - destroy_workqueue(afs_callback_update_worker); -error_callback: destroy_workqueue(afs_vlocation_update_worker); error_vl_up: destroy_workqueue(afs_async_calls); @@ -189,7 +183,6 @@ static void __exit afs_exit(void) fscache_unregister_netfs(&afs_cache_netfs); #endif destroy_workqueue(afs_lock_manager); - destroy_workqueue(afs_callback_update_worker); destroy_workqueue(afs_vlocation_update_worker); destroy_workqueue(afs_async_calls); destroy_workqueue(afs_wq); diff --git a/fs/afs/security.c b/fs/afs/security.c index faca66227ecf..7cc61c8b748b 100644 --- a/fs/afs/security.c +++ b/fs/afs/security.c @@ -115,6 +115,7 @@ void afs_clear_permits(struct afs_vnode *vnode) mutex_lock(&vnode->permits_lock); permits = vnode->permits; RCU_INIT_POINTER(vnode->permits, NULL); + vnode->cb_break++; mutex_unlock(&vnode->permits_lock); if (permits) @@ -264,8 +265,7 @@ static int afs_check_permit(struct afs_vnode *vnode, struct key *key, * (the post-processing will cache the result on auth_vnode) */ _debug("no valid permit"); - set_bit(AFS_VNODE_CB_BROKEN, &vnode->flags); - ret = afs_vnode_fetch_status(vnode, auth_vnode, key); + ret = afs_vnode_fetch_status(vnode, auth_vnode, key, true); if (ret < 0) { iput(&auth_vnode->vfs_inode); *_access = 0; @@ -304,14 +304,9 @@ int afs_permission(struct inode *inode, int mask) return PTR_ERR(key); } - /* if the promise has expired, we need to check the server again */ - if (!vnode->cb_promised) { - _debug("not promised"); - ret = afs_vnode_fetch_status(vnode, NULL, key); - if (ret < 0) - goto error; - _debug("new promise [fl=%lx]", vnode->flags); - } + ret = afs_validate(vnode, key); + if (ret < 0) + goto error; /* check the permits to see if we've got one yet */ ret = afs_check_permit(vnode, key, &access); diff --git a/fs/afs/server.c b/fs/afs/server.c index c63974f06385..4e66608fc805 100644 --- a/fs/afs/server.c +++ b/fs/afs/server.c @@ -94,12 +94,8 @@ static struct afs_server *afs_alloc_server(struct afs_cell *cell, INIT_LIST_HEAD(&server->grave); init_rwsem(&server->sem); spin_lock_init(&server->fs_lock); - server->fs_vnodes = RB_ROOT; - server->cb_promises = RB_ROOT; - spin_lock_init(&server->cb_lock); - init_waitqueue_head(&server->cb_break_waitq); - INIT_DELAYED_WORK(&server->cb_break_work, - afs_dispatch_give_up_callbacks); + INIT_LIST_HEAD(&server->cb_interests); + rwlock_init(&server->cb_break_lock); server->addr = *addr; afs_inc_servers_outstanding(cell->net); @@ -258,8 +254,6 @@ void afs_put_server(struct afs_net *net, struct afs_server *server) return; } - afs_flush_callback_breaks(server); - spin_lock(&net->server_graveyard_lock); if (atomic_read(&server->usage) == 0) { list_move_tail(&server->grave, &net->server_graveyard); @@ -277,15 +271,8 @@ static void afs_destroy_server(struct afs_net *net, struct afs_server *server) { _enter("%p", server); - ASSERTIF(server->cb_break_head != server->cb_break_tail, - delayed_work_pending(&server->cb_break_work)); - - ASSERTCMP(server->fs_vnodes.rb_node, ==, NULL); - ASSERTCMP(server->cb_promises.rb_node, ==, NULL); - ASSERTCMP(server->cb_break_head, ==, server->cb_break_tail); - ASSERTCMP(atomic_read(&server->cb_break_n), ==, 0); - - afs_put_cell(server->net, server->cell); + afs_fs_give_up_all_callbacks(server, NULL, false); + afs_put_cell(net, server->cell); kfree(server); afs_dec_servers_outstanding(net); } diff --git a/fs/afs/super.c b/fs/afs/super.c index dd218f370359..c8fb1a497a84 100644 --- a/fs/afs/super.c +++ b/fs/afs/super.c @@ -512,8 +512,12 @@ error: static void afs_kill_super(struct super_block *sb) { - struct afs_super_info *as = sb->s_fs_info; + struct afs_super_info *as = AFS_FS_S(sb); + /* Clear the callback interests (which will do ilookup5) before + * deactivating the superblock. + */ + afs_clear_callback_interests(as->net, as->volume); kill_anon_super(sb); afs_destroy_sbi(as); } @@ -536,7 +540,7 @@ static void afs_i_init_once(void *_vnode) INIT_LIST_HEAD(&vnode->pending_locks); INIT_LIST_HEAD(&vnode->granted_locks); INIT_DELAYED_WORK(&vnode->lock_work, afs_lock_work); - INIT_WORK(&vnode->cb_broken_work, afs_broken_callback_work); + seqlock_init(&vnode->cb_lock); } /* @@ -558,7 +562,6 @@ static struct inode *afs_alloc_inode(struct super_block *sb) vnode->volume = NULL; vnode->update_cnt = 0; vnode->flags = 1 << AFS_VNODE_UNSET; - vnode->cb_promised = false; _leave(" = %p", &vnode->vfs_inode); return &vnode->vfs_inode; @@ -582,7 +585,7 @@ static void afs_destroy_inode(struct inode *inode) _debug("DESTROY INODE %p", inode); - ASSERTCMP(vnode->server, ==, NULL); + ASSERTCMP(vnode->cb_interest, ==, NULL); call_rcu(&inode->i_rcu, afs_i_callback); atomic_dec(&afs_count_active_inodes); diff --git a/fs/afs/vnode.c b/fs/afs/vnode.c index b79d05374878..c1bf2124cef5 100644 --- a/fs/afs/vnode.c +++ b/fs/afs/vnode.c @@ -16,189 +16,20 @@ #include #include "internal.h" -#if 0 -static noinline bool dump_tree_aux(struct rb_node *node, struct rb_node *parent, - int depth, char lr) -{ - struct afs_vnode *vnode; - bool bad = false; - - if (!node) - return false; - - if (node->rb_left) - bad = dump_tree_aux(node->rb_left, node, depth + 2, '/'); - - vnode = rb_entry(node, struct afs_vnode, cb_promise); - _debug("%c %*.*s%c%p {%d}", - rb_is_red(node) ? 'R' : 'B', - depth, depth, "", lr, - vnode, vnode->cb_expires_at); - if (rb_parent(node) != parent) { - printk("BAD: %p != %p\n", rb_parent(node), parent); - bad = true; - } - - if (node->rb_right) - bad |= dump_tree_aux(node->rb_right, node, depth + 2, '\\'); - - return bad; -} - -static noinline void dump_tree(const char *name, struct afs_server *server) -{ - _enter("%s", name); - if (dump_tree_aux(server->cb_promises.rb_node, NULL, 0, '-')) - BUG(); -} -#endif - /* - * insert a vnode into the backing server's vnode tree - */ -static void afs_install_vnode(struct afs_vnode *vnode, - struct afs_server *server) -{ - struct afs_server *old_server = vnode->server; - struct afs_vnode *xvnode; - struct rb_node *parent, **p; - - _enter("%p,%p", vnode, server); - - if (old_server) { - spin_lock(&old_server->fs_lock); - rb_erase(&vnode->server_rb, &old_server->fs_vnodes); - spin_unlock(&old_server->fs_lock); - } - - afs_get_server(server); - vnode->server = server; - afs_put_server(afs_v2net(vnode), old_server); - - /* insert into the server's vnode tree in FID order */ - spin_lock(&server->fs_lock); - - parent = NULL; - p = &server->fs_vnodes.rb_node; - while (*p) { - parent = *p; - xvnode = rb_entry(parent, struct afs_vnode, server_rb); - if (vnode->fid.vid < xvnode->fid.vid) - p = &(*p)->rb_left; - else if (vnode->fid.vid > xvnode->fid.vid) - p = &(*p)->rb_right; - else if (vnode->fid.vnode < xvnode->fid.vnode) - p = &(*p)->rb_left; - else if (vnode->fid.vnode > xvnode->fid.vnode) - p = &(*p)->rb_right; - else if (vnode->fid.unique < xvnode->fid.unique) - p = &(*p)->rb_left; - else if (vnode->fid.unique > xvnode->fid.unique) - p = &(*p)->rb_right; - else - BUG(); /* can't happen unless afs_iget() malfunctions */ - } - - rb_link_node(&vnode->server_rb, parent, p); - rb_insert_color(&vnode->server_rb, &server->fs_vnodes); - - spin_unlock(&server->fs_lock); - _leave(""); -} - -/* - * insert a vnode into the promising server's update/expiration tree - * - caller must hold vnode->lock - */ -static void afs_vnode_note_promise(struct afs_vnode *vnode, - struct afs_server *server) -{ - struct afs_server *old_server; - struct afs_vnode *xvnode; - struct rb_node *parent, **p; - - _enter("%p,%p", vnode, server); - - ASSERT(server != NULL); - - old_server = vnode->server; - if (vnode->cb_promised) { - if (server == old_server && - vnode->cb_expires == vnode->cb_expires_at) { - _leave(" [no change]"); - return; - } - - spin_lock(&old_server->cb_lock); - if (vnode->cb_promised) { - _debug("delete"); - rb_erase(&vnode->cb_promise, &old_server->cb_promises); - vnode->cb_promised = false; - } - spin_unlock(&old_server->cb_lock); - } - - if (vnode->server != server) - afs_install_vnode(vnode, server); - - vnode->cb_expires_at = vnode->cb_expires; - _debug("PROMISE on %p {%lu}", - vnode, (unsigned long) vnode->cb_expires_at); - - /* abuse an RB-tree to hold the expiration order (we may have multiple - * items with the same expiration time) */ - spin_lock(&server->cb_lock); - - parent = NULL; - p = &server->cb_promises.rb_node; - while (*p) { - parent = *p; - xvnode = rb_entry(parent, struct afs_vnode, cb_promise); - if (vnode->cb_expires_at < xvnode->cb_expires_at) - p = &(*p)->rb_left; - else - p = &(*p)->rb_right; - } - - rb_link_node(&vnode->cb_promise, parent, p); - rb_insert_color(&vnode->cb_promise, &server->cb_promises); - vnode->cb_promised = true; - - spin_unlock(&server->cb_lock); - _leave(""); -} - -/* - * handle remote file deletion by discarding the callback promise + * Handle remote file deletion. */ static void afs_vnode_deleted_remotely(struct afs_vnode *vnode) { - struct afs_server *server; + struct afs_cb_interest *cbi = vnode->cb_interest; - _enter("{%p}", vnode->server); + _enter("{%p}", cbi); set_bit(AFS_VNODE_DELETED, &vnode->flags); - server = vnode->server; - if (server) { - if (vnode->cb_promised) { - spin_lock(&server->cb_lock); - if (vnode->cb_promised) { - rb_erase(&vnode->cb_promise, - &server->cb_promises); - vnode->cb_promised = false; - } - spin_unlock(&server->cb_lock); - } - - spin_lock(&server->fs_lock); - rb_erase(&vnode->server_rb, &server->fs_vnodes); - spin_unlock(&server->fs_lock); - - vnode->server = NULL; - afs_put_server(afs_v2net(vnode), server); - } else { - ASSERT(!vnode->cb_promised); + if (cbi) { + vnode->cb_interest = NULL; + afs_put_cb_interest(afs_v2net(vnode), cbi); } _leave(""); @@ -218,8 +49,6 @@ void afs_vnode_finalise_status_update(struct afs_vnode *vnode, _enter("%p,%p", vnode, server); spin_lock(&vnode->lock); - clear_bit(AFS_VNODE_CB_BROKEN, &vnode->flags); - afs_vnode_note_promise(vnode, server); vnode->update_cnt--; ASSERTCMP(vnode->update_cnt, >=, 0); spin_unlock(&vnode->lock); @@ -238,8 +67,6 @@ static void afs_vnode_status_update_failed(struct afs_vnode *vnode, int ret) spin_lock(&vnode->lock); - clear_bit(AFS_VNODE_CB_BROKEN, &vnode->flags); - if (ret == -ENOENT) { /* the file was deleted on the server */ _debug("got NOENT from server - marking file deleted"); @@ -261,8 +88,8 @@ static void afs_vnode_status_update_failed(struct afs_vnode *vnode, int ret) * - there are any outstanding ops that will fetch the status * - TODO implement local caching */ -int afs_vnode_fetch_status(struct afs_vnode *vnode, - struct afs_vnode *auth_vnode, struct key *key) +int afs_vnode_fetch_status(struct afs_vnode *vnode, struct afs_vnode *auth_vnode, + struct key *key, bool force) { struct afs_server *server; unsigned long acl_order; @@ -270,12 +97,13 @@ int afs_vnode_fetch_status(struct afs_vnode *vnode, DECLARE_WAITQUEUE(myself, current); - _enter("%s,{%x:%u.%u}", + _enter("%s,{%x:%u.%u,S=%lx},%u", vnode->volume->vlocation->vldb.name, - vnode->fid.vid, vnode->fid.vnode, vnode->fid.unique); + vnode->fid.vid, vnode->fid.vnode, vnode->fid.unique, + vnode->flags, + force); - if (!test_bit(AFS_VNODE_CB_BROKEN, &vnode->flags) && - vnode->cb_promised) { + if (!force && test_bit(AFS_VNODE_CB_PROMISED, &vnode->flags)) { _leave(" [unchanged]"); return 0; } @@ -291,8 +119,7 @@ int afs_vnode_fetch_status(struct afs_vnode *vnode, spin_lock(&vnode->lock); - if (!test_bit(AFS_VNODE_CB_BROKEN, &vnode->flags) && - vnode->cb_promised) { + if (!force && test_bit(AFS_VNODE_CB_PROMISED, &vnode->flags)) { spin_unlock(&vnode->lock); _leave(" [unchanged]"); return 0; @@ -310,7 +137,7 @@ int afs_vnode_fetch_status(struct afs_vnode *vnode, /* wait for the status to be updated */ for (;;) { - if (!test_bit(AFS_VNODE_CB_BROKEN, &vnode->flags)) + if (test_bit(AFS_VNODE_CB_PROMISED, &vnode->flags)) break; if (test_bit(AFS_VNODE_DELETED, &vnode->flags)) break; diff --git a/fs/afs/volume.c b/fs/afs/volume.c index 52f0dc40732b..4f6fd10094c6 100644 --- a/fs/afs/volume.c +++ b/fs/afs/volume.c @@ -153,8 +153,10 @@ error: error_discard: up_write(¶ms->cell->vl_sem); - for (loop = volume->nservers - 1; loop >= 0; loop--) + for (loop = volume->nservers - 1; loop >= 0; loop--) { + afs_put_cb_interest(params->net, volume->cb_interests[loop]); afs_put_server(params->net, volume->servers[loop]); + } kfree(volume); goto error; @@ -197,8 +199,10 @@ void afs_put_volume(struct afs_cell *cell, struct afs_volume *volume) #endif afs_put_vlocation(cell->net, vlocation); - for (loop = volume->nservers - 1; loop >= 0; loop--) + for (loop = volume->nservers - 1; loop >= 0; loop--) { + afs_put_cb_interest(cell->net, volume->cb_interests[loop]); afs_put_server(cell->net, volume->servers[loop]); + } kfree(volume); @@ -218,10 +222,10 @@ struct afs_server *afs_volume_pick_fileserver(struct afs_vnode *vnode) _enter("%s", volume->vlocation->vldb.name); /* stick with the server we're already using if we can */ - if (vnode->server && vnode->server->fs_state == 0) { - afs_get_server(vnode->server); - _leave(" = %p [current]", vnode->server); - return vnode->server; + if (vnode->cb_interest && vnode->cb_interest->server->fs_state == 0) { + afs_get_server(vnode->cb_interest->server); + _leave(" = %p [current]", vnode->cb_interest->server); + return vnode->cb_interest->server; } down_read(&volume->server_sem); @@ -244,13 +248,8 @@ struct afs_server *afs_volume_pick_fileserver(struct afs_vnode *vnode) _debug("consider %d [%d]", loop, state); switch (state) { - /* found an apparently healthy server */ case 0: - afs_get_server(server); - up_read(&volume->server_sem); - _leave(" = %p (picked %pIS)", - server, &server->addr.transport); - return server; + goto picked_server; case -ENETUNREACH: if (ret == 0) @@ -284,9 +283,25 @@ struct afs_server *afs_volume_pick_fileserver(struct afs_vnode *vnode) /* no available servers * - TODO: handle the no active servers case better */ +error: up_read(&volume->server_sem); _leave(" = %d", ret); return ERR_PTR(ret); + +picked_server: + /* Found an apparently healthy server. We need to register an interest + * in receiving callbacks before we talk to it. + */ + ret = afs_register_server_cb_interest(vnode, + &volume->cb_interests[loop], server); + if (ret < 0) + goto error; + + afs_get_server(server); + up_read(&volume->server_sem); + _leave(" = %p (picked %pIS)", + server, &server->addr.transport); + return server; } /* @@ -309,14 +324,12 @@ int afs_volume_release_fileserver(struct afs_vnode *vnode, switch (result) { /* success */ case 0: - server->fs_act_jif = jiffies; server->fs_state = 0; _leave(""); return 1; /* the fileserver denied all knowledge of the volume */ case -ENOMEDIUM: - server->fs_act_jif = jiffies; down_write(&volume->server_sem); /* firstly, find where the server is in the active list (if it @@ -365,7 +378,6 @@ int afs_volume_release_fileserver(struct afs_vnode *vnode, */ spin_lock(&server->fs_lock); if (!server->fs_state) { - server->fs_dead_jif = jiffies + HZ * 10; server->fs_state = result; printk("kAFS: SERVER DEAD state=%d\n", result); } @@ -374,7 +386,6 @@ int afs_volume_release_fileserver(struct afs_vnode *vnode, /* miscellaneous error */ default: - server->fs_act_jif = jiffies; case -ENOMEM: case -ENONET: /* tell the caller to accept the result */ From be080a6f43c40976afc950ee55e9b7f8e2b53525 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 2 Nov 2017 15:27:49 +0000 Subject: [PATCH 18/35] afs: Overhaul permit caching Overhaul permit caching in AFS by making it per-vnode and sharing permit lists where possible. When most of the fileserver operations are called, they return a status structure indicating the (revised) details of the vnode or vnodes involved in the operation. This includes the access mark derived from the ACL (named CallerAccess in the protocol definition file). This is cacheable and if the ACL changes, the server will tell us that it is breaking the callback promise, at which point we can discard the currently cached permits. With this patch, the afs_permits structure has, at the end, an array of { key, CallerAccess } elements, sorted by key pointer. This is then cached in a hash table so that it can be shared between vnodes with the same access permits. Permit lists can only be shared if they contain the exact same set of key->CallerAccess mappings. Note that that table is global rather than being per-net_ns. If the keys in a permit list cross net_ns boundaries, there is no problem sharing the cached permits, since the permits are just integer masks. Since permit lists pin keys, the permit cache also makes it easier for a future patch to find all occurrences of a key and remove them by means of setting the afs_permits::invalidated flag and then clearing the appropriate key pointer. In such an event, memory barriers will need adding. Lastly, the permit caching is skipped if the server has sent either a vnode-specific or an entire-server callback since the start of the operation. Signed-off-by: David Howells --- fs/afs/afs.h | 1 - fs/afs/flock.c | 4 +- fs/afs/fsclient.c | 5 +- fs/afs/inode.c | 13 +- fs/afs/internal.h | 27 ++-- fs/afs/main.c | 1 + fs/afs/security.c | 365 +++++++++++++++++++++++++++------------------- fs/afs/super.c | 1 - fs/afs/vnode.c | 12 +- 9 files changed, 243 insertions(+), 186 deletions(-) diff --git a/fs/afs/afs.h b/fs/afs/afs.h index 2e2887a7d331..b94d0edc2b78 100644 --- a/fs/afs/afs.h +++ b/fs/afs/afs.h @@ -136,7 +136,6 @@ struct afs_file_status { afs_access_t caller_access; /* access rights for authenticated caller */ afs_access_t anon_access; /* access rights for unauthenticated caller */ umode_t mode; /* UNIX mode */ - struct afs_fid parent; /* parent dir ID for non-dirs only */ time_t mtime_client; /* last time client changed data */ time_t mtime_server; /* last time server changed data */ s32 lock_count; /* file lock count (0=UNLK -1=WRLCK +ve=#RDLCK */ diff --git a/fs/afs/flock.c b/fs/afs/flock.c index aba36e0b1460..2b31ea58c50c 100644 --- a/fs/afs/flock.c +++ b/fs/afs/flock.c @@ -383,7 +383,7 @@ given_lock: /* again, make sure we've got a callback on this file and, again, make * sure that our view of the data version is up to date (we ignore * errors incurred here and deal with the consequences elsewhere) */ - afs_vnode_fetch_status(vnode, NULL, key, false); + afs_vnode_fetch_status(vnode, key, false); error: spin_unlock(&inode->i_lock); @@ -455,7 +455,7 @@ static int afs_do_getlk(struct file *file, struct file_lock *fl) posix_test_lock(file, fl); if (fl->fl_type == F_UNLCK) { /* no local locks; consult the server */ - ret = afs_vnode_fetch_status(vnode, NULL, key, true); + ret = afs_vnode_fetch_status(vnode, key, true); if (ret < 0) goto error; lock_count = vnode->status.lock_count; diff --git a/fs/afs/fsclient.c b/fs/afs/fsclient.c index c6658405fe91..680c02d510f7 100644 --- a/fs/afs/fsclient.c +++ b/fs/afs/fsclient.c @@ -78,8 +78,8 @@ static void xdr_decode_AFSFetchStatus(const __be32 **_bp, EXTRACT(status->caller_access); /* call ticket dependent */ EXTRACT(status->anon_access); EXTRACT(status->mode); - EXTRACT(status->parent.vnode); - EXTRACT(status->parent.unique); + bp++; /* parent.vnode */ + bp++; /* parent.unique */ bp++; /* seg size */ status->mtime_client = ntohl(*bp++); status->mtime_server = ntohl(*bp++); @@ -103,7 +103,6 @@ static void xdr_decode_AFSFetchStatus(const __be32 **_bp, status->mtime_client, status->mtime_server); if (vnode) { - status->parent.vid = vnode->fid.vid; if (changed && !test_bit(AFS_VNODE_UNSET, &vnode->flags)) { _debug("vnode changed"); i_size_write(&vnode->vfs_inode, size); diff --git a/fs/afs/inode.c b/fs/afs/inode.c index 4822a2a50a61..ee86d5ad22d1 100644 --- a/fs/afs/inode.c +++ b/fs/afs/inode.c @@ -238,7 +238,7 @@ struct inode *afs_iget(struct super_block *sb, struct key *key, if (!status) { /* it's a remotely extant inode */ - ret = afs_vnode_fetch_status(vnode, NULL, key, true); + ret = afs_vnode_fetch_status(vnode, key, true); if (ret < 0) goto bad_inode; } else { @@ -358,7 +358,7 @@ int afs_validate(struct afs_vnode *vnode, struct key *key) * access */ if (!test_bit(AFS_VNODE_CB_PROMISED, &vnode->flags)) { _debug("not promised"); - ret = afs_vnode_fetch_status(vnode, NULL, key, false); + ret = afs_vnode_fetch_status(vnode, key, false); if (ret < 0) { if (ret == -ENOENT) { set_bit(AFS_VNODE_DELETED, &vnode->flags); @@ -431,7 +431,6 @@ int afs_drop_inode(struct inode *inode) */ void afs_evict_inode(struct inode *inode) { - struct afs_permits *permits; struct afs_vnode *vnode; vnode = AFS_FS_I(inode); @@ -460,13 +459,7 @@ void afs_evict_inode(struct inode *inode) vnode->cache = NULL; #endif - mutex_lock(&vnode->permits_lock); - permits = vnode->permits; - RCU_INIT_POINTER(vnode->permits, NULL); - mutex_unlock(&vnode->permits_lock); - if (permits) - call_rcu(&permits->rcu, afs_zap_permits); - + afs_put_permits(vnode->permit_cache); _leave(""); } diff --git a/fs/afs/internal.h b/fs/afs/internal.h index e3c99437f6e0..7c318666e436 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h @@ -389,8 +389,7 @@ struct afs_vnode { #ifdef CONFIG_AFS_FSCACHE struct fscache_cookie *cache; /* caching cookie */ #endif - struct afs_permits *permits; /* cache of permits so far obtained */ - struct mutex permits_lock; /* lock for altering permits list */ + struct afs_permits *permit_cache; /* cache of permits so far obtained */ struct mutex validate_lock; /* lock for validating this vnode */ wait_queue_head_t update_waitq; /* status fetch waitqueue */ int update_cnt; /* number of outstanding ops that will update the @@ -411,8 +410,6 @@ struct afs_vnode { #define AFS_VNODE_AUTOCELL 10 /* set if Vnode is an auto mount point */ #define AFS_VNODE_PSEUDODIR 11 /* set if Vnode is a pseudo directory */ - long acl_order; /* ACL check count (callback break count) */ - struct list_head writebacks; /* alterations in pagecache that need writing */ struct list_head pending_locks; /* locks waiting to be granted */ struct list_head granted_locks; /* locks granted on this file */ @@ -435,16 +432,21 @@ struct afs_vnode { */ struct afs_permit { struct key *key; /* RxRPC ticket holding a security context */ - afs_access_t access_mask; /* access mask for this key */ + afs_access_t access; /* CallerAccess value for this key */ }; /* - * cache of security records from attempts to access a vnode + * Immutable cache of CallerAccess records from attempts to access vnodes. + * These may be shared between multiple vnodes. */ struct afs_permits { - struct rcu_head rcu; /* disposal procedure */ - int count; /* number of records */ - struct afs_permit permits[0]; /* the permits so far examined */ + struct rcu_head rcu; + struct hlist_node hash_node; /* Link in hash */ + unsigned long h; /* Hash value for this permit list */ + refcount_t usage; + unsigned short nr_permits; /* Number of records */ + bool invalidated; /* Invalidated due to key change */ + struct afs_permit permits[]; /* List of permits sorted by key pointer */ }; /* @@ -682,11 +684,13 @@ static inline int afs_transfer_reply(struct afs_call *call) /* * security.c */ +extern void afs_put_permits(struct afs_permits *); extern void afs_clear_permits(struct afs_vnode *); -extern void afs_cache_permit(struct afs_vnode *, struct key *, long); +extern void afs_cache_permit(struct afs_vnode *, struct key *, unsigned int); extern void afs_zap_permits(struct rcu_head *); extern struct key *afs_request_key(struct afs_cell *); extern int afs_permission(struct inode *, int); +extern void __exit afs_clean_up_permit_cache(void); /* * server.c @@ -757,8 +761,7 @@ static inline struct inode *AFS_VNODE_TO_I(struct afs_vnode *vnode) extern void afs_vnode_finalise_status_update(struct afs_vnode *, struct afs_server *); -extern int afs_vnode_fetch_status(struct afs_vnode *, struct afs_vnode *, - struct key *, bool); +extern int afs_vnode_fetch_status(struct afs_vnode *, struct key *, bool); extern int afs_vnode_fetch_data(struct afs_vnode *, struct key *, struct afs_read *); extern int afs_vnode_create(struct afs_vnode *, struct key *, const char *, diff --git a/fs/afs/main.c b/fs/afs/main.c index 331c08740861..010e2e1a40f4 100644 --- a/fs/afs/main.c +++ b/fs/afs/main.c @@ -186,6 +186,7 @@ static void __exit afs_exit(void) destroy_workqueue(afs_vlocation_update_worker); destroy_workqueue(afs_async_calls); destroy_workqueue(afs_wq); + afs_clean_up_permit_cache(); rcu_barrier(); } diff --git a/fs/afs/security.c b/fs/afs/security.c index 7cc61c8b748b..1b5198fc1657 100644 --- a/fs/afs/security.c +++ b/fs/afs/security.c @@ -1,6 +1,6 @@ /* AFS security handling * - * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved. + * Copyright (C) 2007, 2017 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) * * This program is free software; you can redistribute it and/or @@ -14,9 +14,13 @@ #include #include #include +#include #include #include "internal.h" +static DEFINE_HASHTABLE(afs_permits_cache, 10); +static DEFINE_SPINLOCK(afs_permits_lock); + /* * get a key */ @@ -46,168 +50,233 @@ struct key *afs_request_key(struct afs_cell *cell) } /* - * dispose of a permits list + * Dispose of a list of permits. */ -void afs_zap_permits(struct rcu_head *rcu) +static void afs_permits_rcu(struct rcu_head *rcu) { struct afs_permits *permits = container_of(rcu, struct afs_permits, rcu); - int loop; + int i; - _enter("{%d}", permits->count); - - for (loop = permits->count - 1; loop >= 0; loop--) - key_put(permits->permits[loop].key); + for (i = 0; i < permits->nr_permits; i++) + key_put(permits->permits[i].key); kfree(permits); } /* - * dispose of a permits list in which all the key pointers have been copied + * Discard a permission cache. */ -static void afs_dispose_of_permits(struct rcu_head *rcu) +void afs_put_permits(struct afs_permits *permits) { - struct afs_permits *permits = - container_of(rcu, struct afs_permits, rcu); - - _enter("{%d}", permits->count); - - kfree(permits); -} - -/* - * get the authorising vnode - this is the specified inode itself if it's a - * directory or it's the parent directory if the specified inode is a file or - * symlink - * - the caller must release the ref on the inode - */ -static struct afs_vnode *afs_get_auth_inode(struct afs_vnode *vnode, - struct key *key) -{ - struct afs_vnode *auth_vnode; - struct inode *auth_inode; - - _enter(""); - - if (S_ISDIR(vnode->vfs_inode.i_mode)) { - auth_inode = igrab(&vnode->vfs_inode); - ASSERT(auth_inode != NULL); - } else { - auth_inode = afs_iget(vnode->vfs_inode.i_sb, key, - &vnode->status.parent, NULL, NULL); - if (IS_ERR(auth_inode)) - return ERR_CAST(auth_inode); + if (permits && refcount_dec_and_test(&permits->usage)) { + spin_lock(&afs_permits_lock); + hash_del_rcu(&permits->hash_node); + spin_unlock(&afs_permits_lock); + call_rcu(&permits->rcu, afs_permits_rcu); } - - auth_vnode = AFS_FS_I(auth_inode); - _leave(" = {%x}", auth_vnode->fid.vnode); - return auth_vnode; } /* - * clear the permit cache on a directory vnode + * Clear a permit cache on callback break. */ void afs_clear_permits(struct afs_vnode *vnode) { struct afs_permits *permits; - _enter("{%x:%u}", vnode->fid.vid, vnode->fid.vnode); - - mutex_lock(&vnode->permits_lock); - permits = vnode->permits; - RCU_INIT_POINTER(vnode->permits, NULL); + spin_lock(&vnode->lock); + permits = rcu_dereference_protected(vnode->permit_cache, + lockdep_is_held(&vnode->lock)); + RCU_INIT_POINTER(vnode->permit_cache, NULL); vnode->cb_break++; - mutex_unlock(&vnode->permits_lock); + spin_unlock(&vnode->lock); if (permits) - call_rcu(&permits->rcu, afs_zap_permits); - _leave(""); + afs_put_permits(permits); } /* - * add the result obtained for a vnode to its or its parent directory's cache - * for the key used to access it + * Hash a list of permits. Use simple addition to make it easy to add an extra + * one at an as-yet indeterminate position in the list. */ -void afs_cache_permit(struct afs_vnode *vnode, struct key *key, long acl_order) +static void afs_hash_permits(struct afs_permits *permits) { - struct afs_permits *permits, *xpermits; - struct afs_permit *permit; - struct afs_vnode *auth_vnode; - int count, loop; + unsigned long h = permits->nr_permits; + int i; - _enter("{%x:%u},%x,%lx", - vnode->fid.vid, vnode->fid.vnode, key_serial(key), acl_order); - - auth_vnode = afs_get_auth_inode(vnode, key); - if (IS_ERR(auth_vnode)) { - _leave(" [get error %ld]", PTR_ERR(auth_vnode)); - return; + for (i = 0; i < permits->nr_permits; i++) { + h += (unsigned long)permits->permits[i].key / sizeof(void *); + h += permits->permits[i].access; } - mutex_lock(&auth_vnode->permits_lock); + permits->h = h; +} - /* guard against a rename being detected whilst we waited for the - * lock */ - if (memcmp(&auth_vnode->fid, &vnode->status.parent, - sizeof(struct afs_fid)) != 0) { - _debug("renamed"); - goto out_unlock; - } +/* + * Cache the CallerAccess result obtained from doing a fileserver operation + * that returned a vnode status for a particular key. If a callback break + * occurs whilst the operation was in progress then we have to ditch the cache + * as the ACL *may* have changed. + */ +void afs_cache_permit(struct afs_vnode *vnode, struct key *key, + unsigned int cb_break) +{ + struct afs_permits *permits, *xpermits, *replacement, *new = NULL; + afs_access_t caller_access = READ_ONCE(vnode->status.caller_access); + size_t size = 0; + bool changed = false; + int i, j; - /* have to be careful as the directory's callback may be broken between - * us receiving the status we're trying to cache and us getting the - * lock to update the cache for the status */ - if (auth_vnode->acl_order - acl_order > 0) { - _debug("ACL changed?"); - goto out_unlock; - } + _enter("{%x:%u},%x,%x", + vnode->fid.vid, vnode->fid.vnode, key_serial(key), caller_access); - /* always update the anonymous mask */ - _debug("anon access %x", vnode->status.anon_access); - auth_vnode->status.anon_access = vnode->status.anon_access; - if (key == vnode->volume->cell->anonymous_key) - goto out_unlock; + rcu_read_lock(); - xpermits = auth_vnode->permits; - count = 0; - if (xpermits) { - /* see if the permit is already in the list - * - if it is then we just amend the list - */ - count = xpermits->count; - permit = xpermits->permits; - for (loop = count; loop > 0; loop--) { - if (permit->key == key) { - permit->access_mask = - vnode->status.caller_access; - goto out_unlock; + /* Check for the common case first: We got back the same access as last + * time we tried and already have it recorded. + */ + permits = rcu_dereference(vnode->permit_cache); + if (permits) { + if (!permits->invalidated) { + for (i = 0; i < permits->nr_permits; i++) { + if (permits->permits[i].key < key) + continue; + if (permits->permits[i].key > key) + break; + if (permits->permits[i].access != caller_access) { + changed = true; + break; + } + + if (cb_break != (vnode->cb_break + + vnode->cb_interest->server->cb_s_break)) { + changed = true; + break; + } + + /* The cache is still good. */ + rcu_read_unlock(); + return; } - permit++; + } + + changed |= permits->invalidated; + size = permits->nr_permits; + + /* If this set of permits is now wrong, clear the permits + * pointer so that no one tries to use the stale information. + */ + if (changed) { + spin_lock(&vnode->lock); + if (permits != rcu_access_pointer(vnode->permit_cache)) + goto someone_else_changed_it_unlock; + RCU_INIT_POINTER(vnode->permit_cache, NULL); + spin_unlock(&vnode->lock); + + afs_put_permits(permits); + permits = NULL; + size = 0; } } - permits = kmalloc(sizeof(*permits) + sizeof(*permit) * (count + 1), - GFP_NOFS); - if (!permits) - goto out_unlock; + if (cb_break != (vnode->cb_break + vnode->cb_interest->server->cb_s_break)) { + rcu_read_unlock(); + goto someone_else_changed_it; + } - if (xpermits) - memcpy(permits->permits, xpermits->permits, - count * sizeof(struct afs_permit)); + /* We need a ref on any permits list we want to copy as we'll have to + * drop the lock to do memory allocation. + */ + if (permits && !refcount_inc_not_zero(&permits->usage)) { + rcu_read_unlock(); + goto someone_else_changed_it; + } - _debug("key %x access %x", - key_serial(key), vnode->status.caller_access); - permits->permits[count].access_mask = vnode->status.caller_access; - permits->permits[count].key = key_get(key); - permits->count = count + 1; + rcu_read_unlock(); - rcu_assign_pointer(auth_vnode->permits, permits); - if (xpermits) - call_rcu(&xpermits->rcu, afs_dispose_of_permits); + /* Speculatively create a new list with the revised permission set. We + * discard this if we find an extant match already in the hash, but + * it's easier to compare with memcmp this way. + * + * We fill in the key pointers at this time, but we don't get the refs + * yet. + */ + size++; + new = kzalloc(sizeof(struct afs_permits) + + sizeof(struct afs_permit) * size, GFP_NOFS); + if (!new) + return; -out_unlock: - mutex_unlock(&auth_vnode->permits_lock); - iput(&auth_vnode->vfs_inode); - _leave(""); + refcount_set(&new->usage, 1); + new->nr_permits = size; + i = j = 0; + if (permits) { + for (i = 0; i < permits->nr_permits; i++) { + if (j == i && permits->permits[i].key > key) { + new->permits[j].key = key; + new->permits[j].access = caller_access; + j++; + } + new->permits[j].key = permits->permits[i].key; + new->permits[j].access = permits->permits[i].access; + j++; + } + } + + if (j == i) { + new->permits[j].key = key; + new->permits[j].access = caller_access; + } + + afs_hash_permits(new); + + afs_put_permits(permits); + + /* Now see if the permit list we want is actually already available */ + spin_lock(&afs_permits_lock); + + hash_for_each_possible(afs_permits_cache, xpermits, hash_node, new->h) { + if (xpermits->h != new->h || + xpermits->invalidated || + xpermits->nr_permits != new->nr_permits || + memcmp(xpermits->permits, new->permits, + new->nr_permits * sizeof(struct afs_permit)) != 0) + continue; + + if (refcount_inc_not_zero(&xpermits->usage)) { + replacement = xpermits; + goto found; + } + + break; + } + + for (i = 0; i < new->nr_permits; i++) + key_get(new->permits[i].key); + hash_add_rcu(afs_permits_cache, &new->hash_node, new->h); + replacement = new; + new = NULL; + +found: + spin_unlock(&afs_permits_lock); + + kfree(new); + + spin_lock(&vnode->lock); + if (cb_break != (vnode->cb_break + vnode->cb_interest->server->cb_s_break) || + permits != rcu_access_pointer(vnode->permit_cache)) + goto someone_else_changed_it_unlock; + rcu_assign_pointer(vnode->permit_cache, replacement); + spin_unlock(&vnode->lock); + afs_put_permits(permits); + return; + +someone_else_changed_it_unlock: + spin_unlock(&vnode->lock); +someone_else_changed_it: + /* Someone else changed the cache under us - don't recheck at this + * time. + */ + return; } /* @@ -219,55 +288,45 @@ static int afs_check_permit(struct afs_vnode *vnode, struct key *key, afs_access_t *_access) { struct afs_permits *permits; - struct afs_permit *permit; - struct afs_vnode *auth_vnode; - bool valid; - int loop, ret; + bool valid = false; + int i, ret; _enter("{%x:%u},%x", vnode->fid.vid, vnode->fid.vnode, key_serial(key)); - auth_vnode = afs_get_auth_inode(vnode, key); - if (IS_ERR(auth_vnode)) { - *_access = 0; - _leave(" = %ld", PTR_ERR(auth_vnode)); - return PTR_ERR(auth_vnode); - } - - ASSERT(S_ISDIR(auth_vnode->vfs_inode.i_mode)); + permits = vnode->permit_cache; /* check the permits to see if we've got one yet */ - if (key == auth_vnode->volume->cell->anonymous_key) { + if (key == vnode->volume->cell->anonymous_key) { _debug("anon"); - *_access = auth_vnode->status.anon_access; + *_access = vnode->status.anon_access; valid = true; } else { - valid = false; rcu_read_lock(); - permits = rcu_dereference(auth_vnode->permits); + permits = rcu_dereference(vnode->permit_cache); if (permits) { - permit = permits->permits; - for (loop = permits->count; loop > 0; loop--) { - if (permit->key == key) { - _debug("found in cache"); - *_access = permit->access_mask; - valid = true; + for (i = 0; i < permits->nr_permits; i++) { + if (permits->permits[i].key < key) + continue; + if (permits->permits[i].key > key) break; - } - permit++; + + *_access = permits->permits[i].access; + valid = !permits->invalidated; + break; } } rcu_read_unlock(); } if (!valid) { - /* check the status on the file we're actually interested in - * (the post-processing will cache the result on auth_vnode) */ + /* Check the status on the file we're actually interested in + * (the post-processing will cache the result). + */ _debug("no valid permit"); - ret = afs_vnode_fetch_status(vnode, auth_vnode, key, true); + ret = afs_vnode_fetch_status(vnode, key, true); if (ret < 0) { - iput(&auth_vnode->vfs_inode); *_access = 0; _leave(" = %d", ret); return ret; @@ -275,7 +334,6 @@ static int afs_check_permit(struct afs_vnode *vnode, struct key *key, *_access = vnode->status.caller_access; } - iput(&auth_vnode->vfs_inode); _leave(" = 0 [access %x]", *_access); return 0; } @@ -360,3 +418,12 @@ error: _leave(" = %d", ret); return ret; } + +void __exit afs_clean_up_permit_cache(void) +{ + int i; + + for (i = 0; i < HASH_SIZE(afs_permits_cache); i++) + WARN_ON_ONCE(!hlist_empty(&afs_permits_cache[i])); + +} diff --git a/fs/afs/super.c b/fs/afs/super.c index c8fb1a497a84..e62fb1bdadc6 100644 --- a/fs/afs/super.c +++ b/fs/afs/super.c @@ -532,7 +532,6 @@ static void afs_i_init_once(void *_vnode) memset(vnode, 0, sizeof(*vnode)); inode_init_once(&vnode->vfs_inode); init_waitqueue_head(&vnode->update_waitq); - mutex_init(&vnode->permits_lock); mutex_init(&vnode->validate_lock); spin_lock_init(&vnode->writeback_lock); spin_lock_init(&vnode->lock); diff --git a/fs/afs/vnode.c b/fs/afs/vnode.c index c1bf2124cef5..622e1100099b 100644 --- a/fs/afs/vnode.c +++ b/fs/afs/vnode.c @@ -88,11 +88,10 @@ static void afs_vnode_status_update_failed(struct afs_vnode *vnode, int ret) * - there are any outstanding ops that will fetch the status * - TODO implement local caching */ -int afs_vnode_fetch_status(struct afs_vnode *vnode, struct afs_vnode *auth_vnode, - struct key *key, bool force) +int afs_vnode_fetch_status(struct afs_vnode *vnode, struct key *key, bool force) { struct afs_server *server; - unsigned long acl_order; + unsigned int cb_break = 0; int ret; DECLARE_WAITQUEUE(myself, current); @@ -113,9 +112,7 @@ int afs_vnode_fetch_status(struct afs_vnode *vnode, struct afs_vnode *auth_vnode return -ENOENT; } - acl_order = 0; - if (auth_vnode) - acl_order = auth_vnode->acl_order; + cb_break = vnode->cb_break + vnode->cb_s_break; spin_lock(&vnode->lock); @@ -192,8 +189,7 @@ get_anyway: /* adjust the flags */ if (ret == 0) { _debug("adjust"); - if (auth_vnode) - afs_cache_permit(vnode, key, acl_order); + afs_cache_permit(vnode, key, cb_break); afs_vnode_finalise_status_update(vnode, server); afs_put_server(afs_v2net(vnode), server); } else { From 989782dcdc91a5e6d5999c7a52a84a60a0811e56 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 2 Nov 2017 15:27:50 +0000 Subject: [PATCH 19/35] afs: Overhaul cell database management Overhaul the way that the in-kernel AFS client keeps track of cells in the following manner: (1) Cells are now held in an rbtree to make walking them quicker and RCU managed (though this is probably overkill). (2) Cells now have a manager work item that: (A) Looks after fetching and refreshing the VL server list. (B) Manages cell record lifetime, including initialising and destruction. (B) Manages cell record caching whereby threads are kept around for a certain time after last use and then destroyed. (C) Manages the FS-Cache index cookie for a cell. It is not permitted for a cookie to be in use twice, so we have to be careful to not allow a new cell record to exist at the same time as an old record of the same name. (3) Each AFS network namespace is given a manager work item that manages the cells within it, maintaining a single timer to prod cells into updating their DNS records. This uses the reduce_timer() facility to make the timer expire at the soonest timed event that needs happening. (4) When a module is being unloaded, cells and cell managers are now counted out using dec_after_work() to make sure the module text is pinned until after the data structures have been cleaned up. (5) Each cell's VL server list is now protected by a seqlock rather than a semaphore. Signed-off-by: David Howells --- fs/afs/cell.c | 988 +++++++++++++++++++++++++++++++--------------- fs/afs/internal.h | 60 ++- fs/afs/main.c | 16 +- fs/afs/proc.c | 15 +- fs/afs/super.c | 12 +- fs/afs/xattr.c | 2 +- 6 files changed, 740 insertions(+), 353 deletions(-) diff --git a/fs/afs/cell.c b/fs/afs/cell.c index 216821fd1a61..e83103e8a6fb 100644 --- a/fs/afs/cell.c +++ b/fs/afs/cell.c @@ -1,6 +1,6 @@ /* AFS cell and server record management * - * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved. + * Copyright (C) 2002, 2017 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) * * This program is free software; you can redistribute it and/or @@ -19,221 +19,324 @@ #include #include "internal.h" +unsigned __read_mostly afs_cell_gc_delay = 10; + +static void afs_manage_cell(struct work_struct *); + +static void afs_dec_cells_outstanding(struct afs_net *net) +{ + if (atomic_dec_and_test(&net->cells_outstanding)) + wake_up_atomic_t(&net->cells_outstanding); +} + /* - * allocate a cell record and fill in its name, VL server address list and + * Set the cell timer to fire after a given delay, assuming it's not already + * set for an earlier time. + */ +static void afs_set_cell_timer(struct afs_net *net, time64_t delay) +{ + if (net->live) { + atomic_inc(&net->cells_outstanding); + if (timer_reduce(&net->cells_timer, jiffies + delay * HZ)) + afs_dec_cells_outstanding(net); + } +} + +/* + * Look up and get an activation reference on a cell record under RCU + * conditions. The caller must hold the RCU read lock. + */ +struct afs_cell *afs_lookup_cell_rcu(struct afs_net *net, + const char *name, unsigned int namesz) +{ + struct afs_cell *cell = NULL; + struct rb_node *p; + int n, seq = 0, ret = 0; + + _enter("%*.*s", namesz, namesz, name); + + if (name && namesz == 0) + return ERR_PTR(-EINVAL); + if (namesz > AFS_MAXCELLNAME) + return ERR_PTR(-ENAMETOOLONG); + + do { + /* Unfortunately, rbtree walking doesn't give reliable results + * under just the RCU read lock, so we have to check for + * changes. + */ + if (cell) + afs_put_cell(net, cell); + cell = NULL; + ret = -ENOENT; + + read_seqbegin_or_lock(&net->cells_lock, &seq); + + if (!name) { + cell = rcu_dereference_raw(net->ws_cell); + if (cell) { + afs_get_cell(cell); + continue; + } + ret = -EDESTADDRREQ; + continue; + } + + p = rcu_dereference_raw(net->cells.rb_node); + while (p) { + cell = rb_entry(p, struct afs_cell, net_node); + + n = strncasecmp(cell->name, name, + min_t(size_t, cell->name_len, namesz)); + if (n == 0) + n = cell->name_len - namesz; + if (n < 0) { + p = rcu_dereference_raw(p->rb_left); + } else if (n > 0) { + p = rcu_dereference_raw(p->rb_right); + } else { + if (atomic_inc_not_zero(&cell->usage)) { + ret = 0; + break; + } + /* We want to repeat the search, this time with + * the lock properly locked. + */ + } + cell = NULL; + } + + } while (need_seqretry(&net->cells_lock, seq)); + + done_seqretry(&net->cells_lock, seq); + + return ret == 0 ? cell : ERR_PTR(ret); +} + +/* + * Set up a cell record and fill in its name, VL server address list and * allocate an anonymous key */ -static struct afs_cell *afs_cell_alloc(struct afs_net *net, - const char *name, unsigned namelen, - char *vllist) +static struct afs_cell *afs_alloc_cell(struct afs_net *net, + const char *name, unsigned int namelen, + const char *vllist) { struct afs_cell *cell; - struct key *key; - char keyname[4 + AFS_MAXCELLNAME + 1], *cp, *dp, *next; - char *dvllist = NULL, *_vllist = NULL; - char delimiter = ':'; - int ret, i; - - _enter("%*.*s,%s", namelen, namelen, name ?: "", vllist); - - BUG_ON(!name); /* TODO: want to look up "this cell" in the cache */ + int i, ret; + ASSERT(name); + if (namelen == 0) + return ERR_PTR(-EINVAL); if (namelen > AFS_MAXCELLNAME) { _leave(" = -ENAMETOOLONG"); return ERR_PTR(-ENAMETOOLONG); } - /* allocate and initialise a cell record */ - cell = kzalloc(sizeof(struct afs_cell) + namelen + 1, GFP_KERNEL); + _enter("%*.*s,%s", namelen, namelen, name, vllist); + + cell = kzalloc(sizeof(struct afs_cell), GFP_KERNEL); if (!cell) { _leave(" = -ENOMEM"); return ERR_PTR(-ENOMEM); } - memcpy(cell->name, name, namelen); - cell->name[namelen] = 0; - - atomic_set(&cell->usage, 1); - INIT_LIST_HEAD(&cell->link); cell->net = net; + cell->name_len = namelen; + for (i = 0; i < namelen; i++) + cell->name[i] = tolower(name[i]); + + atomic_set(&cell->usage, 2); + INIT_WORK(&cell->manager, afs_manage_cell); rwlock_init(&cell->servers_lock); INIT_LIST_HEAD(&cell->servers); init_rwsem(&cell->vl_sem); INIT_LIST_HEAD(&cell->vl_list); spin_lock_init(&cell->vl_lock); + seqlock_init(&cell->vl_addrs_lock); + cell->flags = (1 << AFS_CELL_FL_NOT_READY); for (i = 0; i < AFS_CELL_MAX_ADDRS; i++) { struct sockaddr_rxrpc *srx = &cell->vl_addrs[i]; srx->srx_family = AF_RXRPC; srx->srx_service = VL_SERVICE; srx->transport_type = SOCK_DGRAM; - srx->transport.sin.sin_port = htons(AFS_VL_PORT); + srx->transport.sin6.sin6_family = AF_INET6; + srx->transport.sin6.sin6_port = htons(AFS_VL_PORT); } - /* if the ip address is invalid, try dns query */ - if (!vllist || strlen(vllist) < 7) { - ret = dns_query("afsdb", name, namelen, "ipv4", &dvllist, NULL); - if (ret < 0) { - if (ret == -ENODATA || ret == -EAGAIN || ret == -ENOKEY) - /* translate these errors into something - * userspace might understand */ - ret = -EDESTADDRREQ; - _leave(" = %d", ret); - return ERR_PTR(ret); - } - _vllist = dvllist; + /* Fill in the VL server list if we were given a list of addresses to + * use. + */ + if (vllist) { + char delim = ':'; - /* change the delimiter for user-space reply */ - delimiter = ','; - - } else { if (strchr(vllist, ',') || !strchr(vllist, '.')) - delimiter = ','; - _vllist = vllist; + delim = ','; + + do { + struct sockaddr_rxrpc *srx = &cell->vl_addrs[cell->vl_naddrs]; + + if (in4_pton(vllist, -1, + (u8 *)&srx->transport.sin6.sin6_addr.s6_addr32[3], + delim, &vllist)) { + srx->transport_len = sizeof(struct sockaddr_in6); + srx->transport.sin6.sin6_addr.s6_addr32[0] = 0; + srx->transport.sin6.sin6_addr.s6_addr32[1] = 0; + srx->transport.sin6.sin6_addr.s6_addr32[2] = htonl(0xffff); + } else if (in6_pton(vllist, -1, + srx->transport.sin6.sin6_addr.s6_addr, + delim, &vllist)) { + srx->transport_len = sizeof(struct sockaddr_in6); + srx->transport.sin6.sin6_family = AF_INET6; + } else { + goto bad_address; + } + + cell->vl_naddrs++; + if (!*vllist) + break; + vllist++; + + } while (cell->vl_naddrs < AFS_CELL_MAX_ADDRS && vllist); + + /* Disable DNS refresh for manually-specified cells */ + cell->dns_expiry = TIME64_MAX; + } else { + /* We're going to need to 'refresh' this cell's VL server list + * from the DNS before we can use it. + */ + cell->dns_expiry = S64_MIN; } - /* fill in the VL server list from the rest of the string */ - do { - struct sockaddr_rxrpc *srx = &cell->vl_addrs[cell->vl_naddrs]; - const char *end; - - next = strchr(_vllist, delimiter); - if (next) - *next++ = 0; - - if (in4_pton(_vllist, -1, (u8 *)&srx->transport.sin6.sin6_addr.s6_addr32[3], - -1, &end)) { - srx->transport_len = sizeof(struct sockaddr_in6); - srx->transport.sin6.sin6_family = AF_INET6; - srx->transport.sin6.sin6_flowinfo = 0; - srx->transport.sin6.sin6_scope_id = 0; - srx->transport.sin6.sin6_addr.s6_addr32[0] = 0; - srx->transport.sin6.sin6_addr.s6_addr32[1] = 0; - srx->transport.sin6.sin6_addr.s6_addr32[2] = htonl(0xffff); - } else if (in6_pton(_vllist, -1, srx->transport.sin6.sin6_addr.s6_addr, - -1, &end)) { - srx->transport_len = sizeof(struct sockaddr_in6); - srx->transport.sin6.sin6_family = AF_INET6; - srx->transport.sin6.sin6_flowinfo = 0; - srx->transport.sin6.sin6_scope_id = 0; - } else { - goto bad_address; - } - - } while (cell->vl_naddrs++, - cell->vl_naddrs < AFS_CELL_MAX_ADDRS && (_vllist = next)); - - /* create a key to represent an anonymous user */ - memcpy(keyname, "afs@", 4); - dp = keyname + 4; - cp = cell->name; - do { - *dp++ = toupper(*cp); - } while (*cp++); - - key = rxrpc_get_null_key(keyname); - if (IS_ERR(key)) { - _debug("no key"); - ret = PTR_ERR(key); - goto error; - } - cell->anonymous_key = key; - - _debug("anon key %p{%x}", - cell->anonymous_key, key_serial(cell->anonymous_key)); - _leave(" = %p", cell); return cell; bad_address: printk(KERN_ERR "kAFS: bad VL server IP address\n"); ret = -EINVAL; -error: - key_put(cell->anonymous_key); - kfree(dvllist); kfree(cell); _leave(" = %d", ret); return ERR_PTR(ret); } /* - * afs_cell_crate() - create a cell record + * afs_lookup_cell - Look up or create a cell record. * @net: The network namespace - * @name: is the name of the cell. - * @namsesz: is the strlen of the cell name. - * @vllist: is a colon separated list of IP addresses in "a.b.c.d" format. - * @retref: is T to return the cell reference when the cell exists. + * @name: The name of the cell. + * @namesz: The strlen of the cell name. + * @vllist: A colon/comma separated list of numeric IP addresses or NULL. + * @excl: T if an error should be given if the cell name already exists. + * + * Look up a cell record by name and query the DNS for VL server addresses if + * needed. Note that that actual DNS query is punted off to the manager thread + * so that this function can return immediately if interrupted whilst allowing + * cell records to be shared even if not yet fully constructed. */ -struct afs_cell *afs_cell_create(struct afs_net *net, - const char *name, unsigned namesz, - char *vllist, bool retref) +struct afs_cell *afs_lookup_cell(struct afs_net *net, + const char *name, unsigned int namesz, + const char *vllist, bool excl) { - struct afs_cell *cell; - int ret; + struct afs_cell *cell, *candidate, *cursor; + struct rb_node *parent, **pp; + int ret, n; - _enter("%*.*s,%s", namesz, namesz, name ?: "", vllist); + _enter("%s,%s", name, vllist); - down_write(&net->cells_sem); - read_lock(&net->cells_lock); - list_for_each_entry(cell, &net->cells, link) { - if (strncasecmp(cell->name, name, namesz) == 0) - goto duplicate_name; - } - read_unlock(&net->cells_lock); - - cell = afs_cell_alloc(net, name, namesz, vllist); - if (IS_ERR(cell)) { - _leave(" = %ld", PTR_ERR(cell)); - up_write(&net->cells_sem); - return cell; + if (!excl) { + rcu_read_lock(); + cell = afs_lookup_cell_rcu(net, name, namesz); + rcu_read_unlock(); + if (!IS_ERR(cell)) { + if (excl) { + afs_put_cell(net, cell); + return ERR_PTR(-EEXIST); + } + goto wait_for_cell; + } } - /* add a proc directory for this cell */ - ret = afs_proc_cell_setup(net, cell); - if (ret < 0) + /* Assume we're probably going to create a cell and preallocate and + * mostly set up a candidate record. We can then use this to stash the + * name, the net namespace and VL server addresses. + * + * We also want to do this before we hold any locks as it may involve + * upcalling to userspace to make DNS queries. + */ + candidate = afs_alloc_cell(net, name, namesz, vllist); + if (IS_ERR(candidate)) { + _leave(" = %ld", PTR_ERR(candidate)); + return candidate; + } + + /* Find the insertion point and check to see if someone else added a + * cell whilst we were allocating. + */ + write_seqlock(&net->cells_lock); + + pp = &net->cells.rb_node; + parent = NULL; + while (*pp) { + parent = *pp; + cursor = rb_entry(parent, struct afs_cell, net_node); + + n = strncasecmp(cursor->name, name, + min_t(size_t, cursor->name_len, namesz)); + if (n == 0) + n = cursor->name_len - namesz; + if (n < 0) + pp = &(*pp)->rb_left; + else if (n > 0) + pp = &(*pp)->rb_right; + else + goto cell_already_exists; + } + + cell = candidate; + candidate = NULL; + rb_link_node_rcu(&cell->net_node, parent, pp); + rb_insert_color(&cell->net_node, &net->cells); + atomic_inc(&net->cells_outstanding); + write_sequnlock(&net->cells_lock); + + queue_work(afs_wq, &cell->manager); + +wait_for_cell: + _debug("wait_for_cell"); + ret = wait_on_bit(&cell->flags, AFS_CELL_FL_NOT_READY, TASK_INTERRUPTIBLE); + smp_rmb(); + + switch (READ_ONCE(cell->state)) { + case AFS_CELL_FAILED: + ret = cell->error; goto error; + default: + _debug("weird %u %d", cell->state, cell->error); + goto error; + case AFS_CELL_ACTIVE: + break; + } -#ifdef CONFIG_AFS_FSCACHE - /* put it up for caching (this never returns an error) */ - cell->cache = fscache_acquire_cookie(afs_cache_netfs.primary_index, - &afs_cell_cache_index_def, - cell, true); -#endif - - /* add to the cell lists */ - write_lock(&net->cells_lock); - list_add_tail(&cell->link, &net->cells); - write_unlock(&net->cells_lock); - - down_write(&net->proc_cells_sem); - list_add_tail(&cell->proc_link, &net->proc_cells); - up_write(&net->proc_cells_sem); - up_write(&net->cells_sem); - - _leave(" = %p", cell); + _leave(" = %p [cell]", cell); return cell; -error: - up_write(&net->cells_sem); - key_put(cell->anonymous_key); - kfree(cell); - _leave(" = %d", ret); - return ERR_PTR(ret); - -duplicate_name: - if (retref && !IS_ERR(cell)) - afs_get_cell(cell); - - read_unlock(&net->cells_lock); - up_write(&net->cells_sem); - - if (retref) { - _leave(" = %p", cell); - return cell; +cell_already_exists: + _debug("cell exists"); + cell = cursor; + if (excl) { + ret = -EEXIST; + } else { + ASSERTCMP(atomic_read(&cursor->usage), >=, 1); + afs_get_cell(cursor); + ret = 0; } - - _leave(" = -EEXIST"); - return ERR_PTR(-EEXIST); + write_sequnlock(&net->cells_lock); + kfree(candidate); + if (ret == 0) + goto wait_for_cell; +error: + afs_put_cell(net, cell); + _leave(" = %d [error]", ret); + return ERR_PTR(ret); } /* @@ -241,10 +344,11 @@ duplicate_name: * - can be called with a module parameter string * - can be called from a write to /proc/fs/afs/rootcell */ -int afs_cell_init(struct afs_net *net, char *rootcell) +int afs_cell_init(struct afs_net *net, const char *rootcell) { struct afs_cell *old_root, *new_root; - char *cp; + const char *cp, *vllist; + size_t len; _enter(""); @@ -257,180 +361,127 @@ int afs_cell_init(struct afs_net *net, char *rootcell) } cp = strchr(rootcell, ':'); - if (!cp) + if (!cp) { _debug("kAFS: no VL server IP addresses specified"); - else - *cp++ = 0; + vllist = NULL; + len = strlen(rootcell); + } else { + vllist = cp + 1; + len = cp - rootcell; + } /* allocate a cell record for the root cell */ - new_root = afs_cell_create(net, rootcell, strlen(rootcell), cp, false); + new_root = afs_lookup_cell(net, rootcell, len, vllist, false); if (IS_ERR(new_root)) { _leave(" = %ld", PTR_ERR(new_root)); return PTR_ERR(new_root); } + set_bit(AFS_CELL_FL_NO_GC, &new_root->flags); + afs_get_cell(new_root); + /* install the new cell */ - write_lock(&net->cells_lock); + write_seqlock(&net->cells_lock); old_root = net->ws_cell; net->ws_cell = new_root; - write_unlock(&net->cells_lock); - afs_put_cell(net, old_root); + write_sequnlock(&net->cells_lock); + afs_put_cell(net, old_root); _leave(" = 0"); return 0; } /* - * lookup a cell record + * Update a cell's VL server address list from the DNS. */ -struct afs_cell *afs_cell_lookup(struct afs_net *net, - const char *name, unsigned namesz, - bool dns_cell) +static void afs_update_cell(struct afs_cell *cell) { - struct afs_cell *cell; + time64_t now, expiry; + char *vllist = NULL; + int ret; - _enter("\"%*.*s\",", namesz, namesz, name ?: ""); + _enter("%s", cell->name); - down_read(&net->cells_sem); - read_lock(&net->cells_lock); + ret = dns_query("afsdb", cell->name, cell->name_len, + "ipv4", &vllist, &expiry); + _debug("query %d", ret); + switch (ret) { + case 0 ... INT_MAX: + clear_bit(AFS_CELL_FL_DNS_FAIL, &cell->flags); + clear_bit(AFS_CELL_FL_NOT_FOUND, &cell->flags); + goto parse_dns_data; - if (name) { - /* if the cell was named, look for it in the cell record list */ - list_for_each_entry(cell, &net->cells, link) { - if (strncmp(cell->name, name, namesz) == 0) { - afs_get_cell(cell); - goto found; - } - } - cell = ERR_PTR(-ENOENT); - if (dns_cell) - goto create_cell; - found: - ; - } else { - cell = net->ws_cell; - if (!cell) { - /* this should not happen unless user tries to mount - * when root cell is not set. Return an impossibly - * bizarre errno to alert the user. Things like - * ENOENT might be "more appropriate" but they happen - * for other reasons. - */ - cell = ERR_PTR(-EDESTADDRREQ); + case -ENODATA: + clear_bit(AFS_CELL_FL_DNS_FAIL, &cell->flags); + set_bit(AFS_CELL_FL_NOT_FOUND, &cell->flags); + cell->dns_expiry = ktime_get_real_seconds() + 61; + cell->error = -EDESTADDRREQ; + goto out; + + case -EAGAIN: + case -ECONNREFUSED: + default: + /* Unable to query DNS. */ + set_bit(AFS_CELL_FL_DNS_FAIL, &cell->flags); + cell->dns_expiry = ktime_get_real_seconds() + 10; + cell->error = -EDESTADDRREQ; + goto out; + } + +parse_dns_data: + write_seqlock(&cell->vl_addrs_lock); + + ret = -EINVAL; + do { + struct sockaddr_rxrpc *srx = &cell->vl_addrs[cell->vl_naddrs]; + + if (in4_pton(vllist, -1, + (u8 *)&srx->transport.sin6.sin6_addr.s6_addr32[3], + ',', (const char **)&vllist)) { + srx->transport_len = sizeof(struct sockaddr_in6); + srx->transport.sin6.sin6_addr.s6_addr32[0] = 0; + srx->transport.sin6.sin6_addr.s6_addr32[1] = 0; + srx->transport.sin6.sin6_addr.s6_addr32[2] = htonl(0xffff); + } else if (in6_pton(vllist, -1, + srx->transport.sin6.sin6_addr.s6_addr, + ',', (const char **)&vllist)) { + srx->transport_len = sizeof(struct sockaddr_in6); + srx->transport.sin6.sin6_family = AF_INET6; } else { - afs_get_cell(cell); + goto bad_address; } - } + cell->vl_naddrs++; + if (!*vllist) + break; + vllist++; - read_unlock(&net->cells_lock); - up_read(&net->cells_sem); - _leave(" = %p", cell); - return cell; + } while (cell->vl_naddrs < AFS_CELL_MAX_ADDRS); -create_cell: - read_unlock(&net->cells_lock); - up_read(&net->cells_sem); + if (cell->vl_naddrs < AFS_CELL_MAX_ADDRS) + memset(cell->vl_addrs + cell->vl_naddrs, 0, + (AFS_CELL_MAX_ADDRS - cell->vl_naddrs) * sizeof(cell->vl_addrs[0])); - cell = afs_cell_create(net, name, namesz, NULL, true); - - _leave(" = %p", cell); - return cell; -} - -#if 0 -/* - * try and get a cell record - */ -struct afs_cell *afs_get_cell_maybe(struct afs_cell *cell) -{ - write_lock(&net->cells_lock); - - if (cell && !list_empty(&cell->link)) - afs_get_cell(cell); - else - cell = NULL; - - write_unlock(&net->cells_lock); - return cell; -} -#endif /* 0 */ - -/* - * destroy a cell record - */ -void afs_put_cell(struct afs_net *net, struct afs_cell *cell) -{ - if (!cell) - return; - - _enter("%p{%d,%s}", cell, atomic_read(&cell->usage), cell->name); - - ASSERTCMP(atomic_read(&cell->usage), >, 0); - - /* to prevent a race, the decrement and the dequeue must be effectively - * atomic */ - write_lock(&net->cells_lock); - - if (likely(!atomic_dec_and_test(&cell->usage))) { - write_unlock(&net->cells_lock); - _leave(""); - return; - } - - ASSERT(list_empty(&cell->servers)); - ASSERT(list_empty(&cell->vl_list)); - - wake_up(&net->cells_freeable_wq); - - write_unlock(&net->cells_lock); - - _leave(" [unused]"); + now = ktime_get_real_seconds(); + cell->dns_expiry = expiry; + afs_set_cell_timer(cell->net, expiry - now); +bad_address: + write_sequnlock(&cell->vl_addrs_lock); +out: + _leave(""); } /* - * destroy a cell record - * - must be called with the net->cells_sem write-locked - * - cell->link should have been broken by the caller + * Destroy a cell record */ -static void afs_cell_destroy(struct afs_net *net, struct afs_cell *cell) +static void afs_cell_destroy(struct rcu_head *rcu) { - _enter("%p{%d,%s}", cell, atomic_read(&cell->usage), cell->name); + struct afs_cell *cell = container_of(rcu, struct afs_cell, rcu); - ASSERTCMP(atomic_read(&cell->usage), >=, 0); - ASSERT(list_empty(&cell->link)); + _enter("%p{%s}", cell, cell->name); - /* wait for everyone to stop using the cell */ - if (atomic_read(&cell->usage) > 0) { - DECLARE_WAITQUEUE(myself, current); - - _debug("wait for cell %s", cell->name); - set_current_state(TASK_UNINTERRUPTIBLE); - add_wait_queue(&net->cells_freeable_wq, &myself); - - while (atomic_read(&cell->usage) > 0) { - schedule(); - set_current_state(TASK_UNINTERRUPTIBLE); - } - - remove_wait_queue(&net->cells_freeable_wq, &myself); - set_current_state(TASK_RUNNING); - } - - _debug("cell dead"); ASSERTCMP(atomic_read(&cell->usage), ==, 0); - ASSERT(list_empty(&cell->servers)); - ASSERT(list_empty(&cell->vl_list)); - afs_proc_cell_remove(net, cell); - - down_write(&net->proc_cells_sem); - list_del_init(&cell->proc_link); - up_write(&net->proc_cells_sem); - -#ifdef CONFIG_AFS_FSCACHE - fscache_relinquish_cookie(cell->cache, 0); -#endif key_put(cell->anonymous_key); kfree(cell); @@ -438,42 +489,343 @@ static void afs_cell_destroy(struct afs_net *net, struct afs_cell *cell) } /* - * purge in-memory cell database on module unload or afs_init() failure - * - the timeout daemon is stopped before calling this + * Queue the cell manager. */ -void afs_cell_purge(struct afs_net *net) +static void afs_queue_cell_manager(struct afs_net *net) { - struct afs_cell *cell; + int outstanding = atomic_inc_return(&net->cells_outstanding); + + _enter("%d", outstanding); + + if (!queue_work(afs_wq, &net->cells_manager)) + afs_dec_cells_outstanding(net); +} + +/* + * Cell management timer. We have an increment on cells_outstanding that we + * need to pass along to the work item. + */ +void afs_cells_timer(struct timer_list *timer) +{ + struct afs_net *net = container_of(timer, struct afs_net, cells_timer); + + _enter(""); + if (!queue_work(afs_wq, &net->cells_manager)) + afs_dec_cells_outstanding(net); +} + +/* + * Drop a reference on a cell record. + */ +void afs_put_cell(struct afs_net *net, struct afs_cell *cell) +{ + time64_t now, expire_delay; + + if (!cell) + return; + + _enter("%s", cell->name); + + now = ktime_get_real_seconds(); + cell->last_inactive = now; + expire_delay = 0; + if (!test_bit(AFS_CELL_FL_DNS_FAIL, &cell->flags) && + !test_bit(AFS_CELL_FL_NOT_FOUND, &cell->flags)) + expire_delay = afs_cell_gc_delay; + + if (atomic_dec_return(&cell->usage) > 1) + return; + + /* 'cell' may now be garbage collected. */ + afs_set_cell_timer(net, expire_delay); +} + +/* + * Allocate a key to use as a placeholder for anonymous user security. + */ +static int afs_alloc_anon_key(struct afs_cell *cell) +{ + struct key *key; + char keyname[4 + AFS_MAXCELLNAME + 1], *cp, *dp; + + /* Create a key to represent an anonymous user. */ + memcpy(keyname, "afs@", 4); + dp = keyname + 4; + cp = cell->name; + do { + *dp++ = tolower(*cp); + } while (*cp++); + + key = rxrpc_get_null_key(keyname); + if (IS_ERR(key)) + return PTR_ERR(key); + + cell->anonymous_key = key; + + _debug("anon key %p{%x}", + cell->anonymous_key, key_serial(cell->anonymous_key)); + return 0; +} + +/* + * Activate a cell. + */ +static int afs_activate_cell(struct afs_net *net, struct afs_cell *cell) +{ + int ret; + + if (!cell->anonymous_key) { + ret = afs_alloc_anon_key(cell); + if (ret < 0) + return ret; + } + +#ifdef CONFIG_AFS_FSCACHE + cell->cache = fscache_acquire_cookie(afs_cache_netfs.primary_index, + &afs_cell_cache_index_def, + cell, true); +#endif + ret = afs_proc_cell_setup(net, cell); + if (ret < 0) + return ret; + spin_lock(&net->proc_cells_lock); + list_add_tail(&cell->proc_link, &net->proc_cells); + spin_unlock(&net->proc_cells_lock); + return 0; +} + +/* + * Deactivate a cell. + */ +static void afs_deactivate_cell(struct afs_net *net, struct afs_cell *cell) +{ + _enter("%s", cell->name); + + afs_proc_cell_remove(net, cell); + + spin_lock(&net->proc_cells_lock); + list_del_init(&cell->proc_link); + spin_unlock(&net->proc_cells_lock); + +#ifdef CONFIG_AFS_FSCACHE + fscache_relinquish_cookie(cell->cache, 0); + cell->cache = NULL; +#endif + + _leave(""); +} + +/* + * Manage a cell record, initialising and destroying it, maintaining its DNS + * records. + */ +static void afs_manage_cell(struct work_struct *work) +{ + struct afs_cell *cell = container_of(work, struct afs_cell, manager); + struct afs_net *net = cell->net; + bool deleted; + int ret, usage; + + _enter("%s", cell->name); + +again: + _debug("state %u", cell->state); + switch (cell->state) { + case AFS_CELL_INACTIVE: + case AFS_CELL_FAILED: + write_seqlock(&net->cells_lock); + usage = 1; + deleted = atomic_try_cmpxchg_relaxed(&cell->usage, &usage, 0); + if (deleted) + rb_erase(&cell->net_node, &net->cells); + write_sequnlock(&net->cells_lock); + if (deleted) + goto final_destruction; + if (cell->state == AFS_CELL_FAILED) + goto done; + cell->state = AFS_CELL_UNSET; + goto again; + + case AFS_CELL_UNSET: + cell->state = AFS_CELL_ACTIVATING; + goto again; + + case AFS_CELL_ACTIVATING: + ret = afs_activate_cell(net, cell); + if (ret < 0) + goto activation_failed; + + cell->state = AFS_CELL_ACTIVE; + smp_wmb(); + clear_bit(AFS_CELL_FL_NOT_READY, &cell->flags); + wake_up_bit(&cell->flags, AFS_CELL_FL_NOT_READY); + goto again; + + case AFS_CELL_ACTIVE: + if (atomic_read(&cell->usage) > 1) { + time64_t now = ktime_get_real_seconds(); + if (cell->dns_expiry <= now && net->live) + afs_update_cell(cell); + goto done; + } + cell->state = AFS_CELL_DEACTIVATING; + goto again; + + case AFS_CELL_DEACTIVATING: + set_bit(AFS_CELL_FL_NOT_READY, &cell->flags); + if (atomic_read(&cell->usage) > 1) + goto reverse_deactivation; + afs_deactivate_cell(net, cell); + cell->state = AFS_CELL_INACTIVE; + goto again; + + default: + break; + } + _debug("bad state %u", cell->state); + BUG(); /* Unhandled state */ + +activation_failed: + cell->error = ret; + afs_deactivate_cell(net, cell); + + cell->state = AFS_CELL_FAILED; + smp_wmb(); + if (test_and_clear_bit(AFS_CELL_FL_NOT_READY, &cell->flags)) + wake_up_bit(&cell->flags, AFS_CELL_FL_NOT_READY); + goto again; + +reverse_deactivation: + cell->state = AFS_CELL_ACTIVE; + smp_wmb(); + clear_bit(AFS_CELL_FL_NOT_READY, &cell->flags); + wake_up_bit(&cell->flags, AFS_CELL_FL_NOT_READY); + _leave(" [deact->act]"); + return; + +done: + _leave(" [done %u]", cell->state); + return; + +final_destruction: + call_rcu(&cell->rcu, afs_cell_destroy); + afs_dec_cells_outstanding(net); + _leave(" [destruct %d]", atomic_read(&net->cells_outstanding)); +} + +/* + * Manage the records of cells known to a network namespace. This includes + * updating the DNS records and garbage collecting unused cells that were + * automatically added. + * + * Note that constructed cell records may only be removed from net->cells by + * this work item, so it is safe for this work item to stash a cursor pointing + * into the tree and then return to caller (provided it skips cells that are + * still under construction). + * + * Note also that we were given an increment on net->cells_outstanding by + * whoever queued us that we need to deal with before returning. + */ +void afs_manage_cells(struct work_struct *work) +{ + struct afs_net *net = container_of(work, struct afs_net, cells_manager); + struct rb_node *cursor; + time64_t now = ktime_get_real_seconds(), next_manage = TIME64_MAX; + bool purging = !net->live; _enter(""); - afs_put_cell(net, net->ws_cell); + /* Trawl the cell database looking for cells that have expired from + * lack of use and cells whose DNS results have expired and dispatch + * their managers. + */ + read_seqlock_excl(&net->cells_lock); - down_write(&net->cells_sem); + for (cursor = rb_first(&net->cells); cursor; cursor = rb_next(cursor)) { + struct afs_cell *cell = + rb_entry(cursor, struct afs_cell, net_node); + unsigned usage; + bool sched_cell = false; - while (!list_empty(&net->cells)) { - cell = NULL; + usage = atomic_read(&cell->usage); + _debug("manage %s %u", cell->name, usage); - /* remove the next cell from the front of the list */ - write_lock(&net->cells_lock); + ASSERTCMP(usage, >=, 1); - if (!list_empty(&net->cells)) { - cell = list_entry(net->cells.next, - struct afs_cell, link); - list_del_init(&cell->link); + if (purging) { + if (test_and_clear_bit(AFS_CELL_FL_NO_GC, &cell->flags)) + usage = atomic_dec_return(&cell->usage); + ASSERTCMP(usage, ==, 1); } - write_unlock(&net->cells_lock); + if (usage == 1) { + time64_t expire_at = cell->last_inactive; - if (cell) { - _debug("PURGING CELL %s (%d)", - cell->name, atomic_read(&cell->usage)); + if (!test_bit(AFS_CELL_FL_DNS_FAIL, &cell->flags) && + !test_bit(AFS_CELL_FL_NOT_FOUND, &cell->flags)) + expire_at += afs_cell_gc_delay; + if (purging || expire_at <= now) + sched_cell = true; + else if (expire_at < next_manage) + next_manage = expire_at; + } - /* now the cell should be left with no references */ - afs_cell_destroy(net, cell); + if (!purging) { + if (cell->dns_expiry <= now) + sched_cell = true; + else if (cell->dns_expiry <= next_manage) + next_manage = cell->dns_expiry; + } + + if (sched_cell) + queue_work(afs_wq, &cell->manager); + } + + read_sequnlock_excl(&net->cells_lock); + + /* Update the timer on the way out. We have to pass an increment on + * cells_outstanding in the namespace that we are in to the timer or + * the work scheduler. + */ + if (!purging && next_manage < TIME64_MAX) { + now = ktime_get_real_seconds(); + + if (next_manage - now <= 0) { + if (queue_work(afs_wq, &net->cells_manager)) + atomic_inc(&net->cells_outstanding); + } else { + afs_set_cell_timer(net, next_manage - now); } } - up_write(&net->cells_sem); + afs_dec_cells_outstanding(net); + _leave(" [%d]", atomic_read(&net->cells_outstanding)); +} + +/* + * Purge in-memory cell database. + */ +void afs_cell_purge(struct afs_net *net) +{ + struct afs_cell *ws; + + _enter(""); + + write_seqlock(&net->cells_lock); + ws = net->ws_cell; + net->ws_cell = NULL; + write_sequnlock(&net->cells_lock); + afs_put_cell(net, ws); + + _debug("del timer"); + if (del_timer_sync(&net->cells_timer)) + atomic_dec(&net->cells_outstanding); + + _debug("kick mgr"); + afs_queue_cell_manager(net); + + _debug("wait"); + wait_on_atomic_t(&net->cells_outstanding, atomic_t_wait, + TASK_UNINTERRUPTIBLE); _leave(""); } diff --git a/fs/afs/internal.h b/fs/afs/internal.h index 7c318666e436..51e3825b5ffb 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h @@ -207,13 +207,14 @@ struct afs_net { atomic_t nr_superblocks; /* Cell database */ - struct list_head cells; + struct rb_root cells; struct afs_cell *ws_cell; - rwlock_t cells_lock; - struct rw_semaphore cells_sem; - wait_queue_head_t cells_freeable_wq; + struct work_struct cells_manager; + struct timer_list cells_timer; + atomic_t cells_outstanding; + seqlock_t cells_lock; - struct rw_semaphore proc_cells_sem; + spinlock_t proc_cells_lock; struct list_head proc_cells; /* Volume location database */ @@ -242,14 +243,26 @@ struct afs_net { extern struct afs_net __afs_net;// Dummy AFS network namespace; TODO: replace with real netns +enum afs_cell_state { + AFS_CELL_UNSET, + AFS_CELL_ACTIVATING, + AFS_CELL_ACTIVE, + AFS_CELL_DEACTIVATING, + AFS_CELL_INACTIVE, + AFS_CELL_FAILED, +}; + /* * AFS cell record */ struct afs_cell { - atomic_t usage; - struct list_head link; /* main cell list link */ - struct afs_net *net; /* The network namespace */ + union { + struct rcu_head rcu; + struct rb_node net_node; /* Node in net->cells */ + }; + struct afs_net *net; struct key *anonymous_key; /* anonymous user key for this cell */ + struct work_struct manager; /* Manager for init/deinit/dns */ struct list_head proc_link; /* /proc cell list link */ #ifdef CONFIG_AFS_FSCACHE struct fscache_cookie *cache; /* caching cookie */ @@ -262,12 +275,26 @@ struct afs_cell { /* volume location record management */ struct rw_semaphore vl_sem; /* volume management serialisation semaphore */ struct list_head vl_list; /* cell's active VL record list */ + time64_t dns_expiry; /* Time AFSDB/SRV record expires */ + time64_t last_inactive; /* Time of last drop of usage count */ + atomic_t usage; + unsigned long flags; +#define AFS_CELL_FL_NOT_READY 0 /* The cell record is not ready for use */ +#define AFS_CELL_FL_NO_GC 1 /* The cell was added manually, don't auto-gc */ +#define AFS_CELL_FL_NOT_FOUND 2 /* Permanent DNS error */ +#define AFS_CELL_FL_DNS_FAIL 3 /* Failed to access DNS */ + enum afs_cell_state state; + short error; + spinlock_t vl_lock; /* vl_list lock */ + + /* VLDB server list. */ + seqlock_t vl_addrs_lock; unsigned short vl_naddrs; /* number of VL servers in addr list */ unsigned short vl_curr_svix; /* current server index */ struct sockaddr_rxrpc vl_addrs[AFS_CELL_MAX_ADDRS]; /* cell VL server addresses */ - - char name[0]; /* cell name - must go last */ + u8 name_len; /* Length of name */ + char name[64 + 1]; /* Cell name, case-flattened and NUL-padded */ }; /* @@ -494,17 +521,20 @@ static inline struct afs_cb_interest *afs_get_cb_interest(struct afs_cb_interest /* * cell.c */ -static inline struct afs_cell *afs_get_cell(struct afs_cell *cell) + static inline struct afs_cell *afs_get_cell(struct afs_cell *cell) { if (cell) atomic_inc(&cell->usage); return cell; } -extern int afs_cell_init(struct afs_net *, char *); -extern struct afs_cell *afs_cell_create(struct afs_net *, const char *, unsigned, char *, bool); -extern struct afs_cell *afs_cell_lookup(struct afs_net *, const char *, unsigned, bool); -extern struct afs_cell *afs_grab_cell(struct afs_cell *); + +extern int afs_cell_init(struct afs_net *, const char *); +extern struct afs_cell *afs_lookup_cell_rcu(struct afs_net *, const char *, unsigned); +extern struct afs_cell *afs_lookup_cell(struct afs_net *, const char *, unsigned, + const char *, bool); extern void afs_put_cell(struct afs_net *, struct afs_cell *); +extern void afs_manage_cells(struct work_struct *); +extern void afs_cells_timer(struct timer_list *); extern void __net_exit afs_cell_purge(struct afs_net *); /* diff --git a/fs/afs/main.c b/fs/afs/main.c index 010e2e1a40f4..e7f87d723761 100644 --- a/fs/afs/main.c +++ b/fs/afs/main.c @@ -46,12 +46,15 @@ static int __net_init afs_net_init(struct afs_net *net) INIT_WORK(&net->charge_preallocation_work, afs_charge_preallocation); mutex_init(&net->socket_mutex); - INIT_LIST_HEAD(&net->cells); - rwlock_init(&net->cells_lock); - init_rwsem(&net->cells_sem); - init_waitqueue_head(&net->cells_freeable_wq); - init_rwsem(&net->proc_cells_sem); + + net->cells = RB_ROOT; + seqlock_init(&net->cells_lock); + INIT_WORK(&net->cells_manager, afs_manage_cells); + timer_setup(&net->cells_timer, afs_cells_timer, 0); + + spin_lock_init(&net->proc_cells_lock); INIT_LIST_HEAD(&net->proc_cells); + INIT_LIST_HEAD(&net->vl_updates); INIT_LIST_HEAD(&net->vl_graveyard); INIT_DELAYED_WORK(&net->vl_reaper, afs_vlocation_reaper); @@ -83,11 +86,14 @@ static int __net_init afs_net_init(struct afs_net *net) return 0; error_open_socket: + net->live = false; afs_vlocation_purge(net); afs_cell_purge(net); error_cell_init: + net->live = false; afs_proc_cleanup(net); error_proc: + net->live = false; return ret; } diff --git a/fs/afs/proc.c b/fs/afs/proc.c index d00d550ff2ef..08565429615d 100644 --- a/fs/afs/proc.c +++ b/fs/afs/proc.c @@ -186,7 +186,7 @@ static void *afs_proc_cells_start(struct seq_file *m, loff_t *_pos) { struct afs_net *net = afs_seq2net(m); - down_read(&net->proc_cells_sem); + rcu_read_lock(); return seq_list_start_head(&net->proc_cells, *_pos); } @@ -205,9 +205,7 @@ static void *afs_proc_cells_next(struct seq_file *m, void *v, loff_t *pos) */ static void afs_proc_cells_stop(struct seq_file *m, void *v) { - struct afs_net *net = afs_seq2net(m); - - up_read(&net->proc_cells_sem); + rcu_read_unlock(); } /* @@ -225,8 +223,7 @@ static int afs_proc_cells_show(struct seq_file *m, void *v) } /* display one cell per line on subsequent lines */ - seq_printf(m, "%3d %s\n", - atomic_read(&cell->usage), cell->name); + seq_printf(m, "%3u %s\n", atomic_read(&cell->usage), cell->name); return 0; } @@ -279,13 +276,13 @@ static ssize_t afs_proc_cells_write(struct file *file, const char __user *buf, if (strcmp(kbuf, "add") == 0) { struct afs_cell *cell; - cell = afs_cell_create(net, name, strlen(name), args, false); + cell = afs_lookup_cell(net, name, strlen(name), args, true); if (IS_ERR(cell)) { ret = PTR_ERR(cell); goto done; } - afs_put_cell(net, cell); + set_bit(AFS_CELL_FL_NO_GC, &cell->flags); printk("kAFS: Added new cell '%s'\n", name); } else { goto inval; @@ -354,7 +351,7 @@ int afs_proc_cell_setup(struct afs_net *net, struct afs_cell *cell) { struct proc_dir_entry *dir; - _enter("%p{%s}", cell, cell->name); + _enter("%p{%s},%p", cell, cell->name, net->proc_afs); dir = proc_mkdir(cell->name, net->proc_afs); if (!dir) diff --git a/fs/afs/super.c b/fs/afs/super.c index e62fb1bdadc6..3d53b78b350d 100644 --- a/fs/afs/super.c +++ b/fs/afs/super.c @@ -200,10 +200,11 @@ static int afs_parse_options(struct afs_mount_params *params, token = match_token(p, afs_options_list, args); switch (token) { case afs_opt_cell: - cell = afs_cell_lookup(params->net, - args[0].from, - args[0].to - args[0].from, - false); + rcu_read_lock(); + cell = afs_lookup_cell_rcu(params->net, + args[0].from, + args[0].to - args[0].from); + rcu_read_unlock(); if (IS_ERR(cell)) return PTR_ERR(cell); afs_put_cell(params->net, params->cell); @@ -308,7 +309,8 @@ static int afs_parse_device_name(struct afs_mount_params *params, /* lookup the cell record */ if (cellname || !params->cell) { - cell = afs_cell_lookup(params->net, cellname, cellnamesz, true); + cell = afs_lookup_cell(params->net, cellname, cellnamesz, + NULL, false); if (IS_ERR(cell)) { printk(KERN_ERR "kAFS: unable to lookup cell '%*.*s'\n", cellnamesz, cellnamesz, cellname ?: ""); diff --git a/fs/afs/xattr.c b/fs/afs/xattr.c index 2830e4f48d85..e58e00ee9747 100644 --- a/fs/afs/xattr.c +++ b/fs/afs/xattr.c @@ -45,7 +45,7 @@ static int afs_xattr_get_cell(const struct xattr_handler *handler, struct afs_cell *cell = vnode->volume->cell; size_t namelen; - namelen = strlen(cell->name); + namelen = cell->name_len; if (size == 0) return namelen; if (namelen > size) From 8b2a464ced77fe35be72ab7d38152a9439daf8d3 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 2 Nov 2017 15:27:50 +0000 Subject: [PATCH 20/35] afs: Add an address list concept Add an RCU replaceable address list structure to hold a list of server addresses. The list also holds the To this end: (1) A cell's VL server address list can be loaded directly via insmod or echo to /proc/fs/afs/cells or dynamically from a DNS query for AFSDB or SRV records. (2) Anyone wanting to use a cell's VL server address must wait until the cell record comes online and has tried to obtain some addresses. (3) An FS server's address list, for the moment, has a single entry that is the key to the server list. This will change in the future when a server is instead keyed on its UUID and the VL.GetAddrsU operation is used. (4) An 'address cursor' concept is introduced to handle iteration through the address list. This is passed to the afs_make_call() as, in the future, stuff (such as abort code) that doesn't outlast the call will be returned in it. In the future, we might want to annotate the list with information about how each address fares. We might then want to propagate such annotations over address list replacement. Whilst we're at it, we allow IPv6 addresses to be specified in colon-delimited lists by enclosing them in square brackets. Signed-off-by: David Howells --- fs/afs/Makefile | 1 + fs/afs/addr_list.c | 308 +++++++++++++++++++++++++++++++++++ fs/afs/cell.c | 171 +++++++------------- fs/afs/fsclient.c | 121 +++++++------- fs/afs/internal.h | 120 +++++++++----- fs/afs/proc.c | 23 ++- fs/afs/rxrpc.c | 3 +- fs/afs/server.c | 66 +++++--- fs/afs/vlclient.c | 8 +- fs/afs/vlocation.c | 75 +++++---- fs/afs/vnode.c | 392 +++++++++++++++++---------------------------- fs/afs/volume.c | 115 +++++++++---- 12 files changed, 843 insertions(+), 560 deletions(-) create mode 100644 fs/afs/addr_list.c diff --git a/fs/afs/Makefile b/fs/afs/Makefile index 641148208e90..849383986d3b 100644 --- a/fs/afs/Makefile +++ b/fs/afs/Makefile @@ -7,6 +7,7 @@ afs-cache-$(CONFIG_AFS_FSCACHE) := cache.o kafs-objs := \ $(afs-cache-y) \ + addr_list.o \ callback.o \ cell.o \ cmservice.o \ diff --git a/fs/afs/addr_list.c b/fs/afs/addr_list.c new file mode 100644 index 000000000000..ecb9c72aebd2 --- /dev/null +++ b/fs/afs/addr_list.c @@ -0,0 +1,308 @@ +/* Server address list management + * + * Copyright (C) 2017 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public Licence + * as published by the Free Software Foundation; either version + * 2 of the Licence, or (at your option) any later version. + */ + +#include +#include +#include +#include +#include +#include "internal.h" +#include "afs_fs.h" + +#define AFS_MAX_ADDRESSES \ + ((unsigned int)((PAGE_SIZE - sizeof(struct afs_addr_list)) / \ + sizeof(struct sockaddr_rxrpc))) + +/* + * Release an address list. + */ +void afs_put_addrlist(struct afs_addr_list *alist) +{ + if (alist && refcount_dec_and_test(&alist->usage)) + call_rcu(&alist->rcu, (rcu_callback_t)kfree); +} + +/* + * Allocate an address list. + */ +struct afs_addr_list *afs_alloc_addrlist(unsigned int nr, + unsigned short service, + unsigned short port) +{ + struct afs_addr_list *alist; + unsigned int i; + + _enter("%u,%u,%u", nr, service, port); + + alist = kzalloc(sizeof(*alist) + sizeof(alist->addrs[0]) * nr, + GFP_KERNEL); + if (!alist) + return NULL; + + refcount_set(&alist->usage, 1); + + for (i = 0; i < nr; i++) { + struct sockaddr_rxrpc *srx = &alist->addrs[i]; + srx->srx_family = AF_RXRPC; + srx->srx_service = service; + srx->transport_type = SOCK_DGRAM; + srx->transport_len = sizeof(srx->transport.sin6); + srx->transport.sin6.sin6_family = AF_INET6; + srx->transport.sin6.sin6_port = htons(port); + } + + return alist; +} + +/* + * Parse a text string consisting of delimited addresses. + */ +struct afs_addr_list *afs_parse_text_addrs(const char *text, size_t len, + char delim, + unsigned short service, + unsigned short port) +{ + struct afs_addr_list *alist; + const char *p, *end = text + len; + unsigned int nr = 0; + + _enter("%*.*s,%c", (int)len, (int)len, text, delim); + + if (!len) + return ERR_PTR(-EDESTADDRREQ); + + if (delim == ':' && (memchr(text, ',', len) || !memchr(text, '.', len))) + delim = ','; + + /* Count the addresses */ + p = text; + do { + if (!*p) + return ERR_PTR(-EINVAL); + if (*p == delim) + continue; + nr++; + if (*p == '[') { + p++; + if (p == end) + return ERR_PTR(-EINVAL); + p = memchr(p, ']', end - p); + if (!p) + return ERR_PTR(-EINVAL); + p++; + if (p >= end) + break; + } + + p = memchr(p, delim, end - p); + if (!p) + break; + p++; + } while (p < end); + + _debug("%u/%u addresses", nr, AFS_MAX_ADDRESSES); + if (nr > AFS_MAX_ADDRESSES) + nr = AFS_MAX_ADDRESSES; + + alist = afs_alloc_addrlist(nr, service, port); + if (!alist) + return ERR_PTR(-ENOMEM); + + /* Extract the addresses */ + p = text; + do { + struct sockaddr_rxrpc *srx = &alist->addrs[alist->nr_addrs]; + char tdelim = delim; + + if (*p == delim) { + p++; + continue; + } + + if (*p == '[') { + p++; + tdelim = ']'; + } + + if (in4_pton(p, end - p, + (u8 *)&srx->transport.sin6.sin6_addr.s6_addr32[3], + tdelim, &p)) { + srx->transport.sin6.sin6_addr.s6_addr32[0] = 0; + srx->transport.sin6.sin6_addr.s6_addr32[1] = 0; + srx->transport.sin6.sin6_addr.s6_addr32[2] = htonl(0xffff); + } else if (in6_pton(p, end - p, + srx->transport.sin6.sin6_addr.s6_addr, + tdelim, &p)) { + /* Nothing to do */ + } else { + goto bad_address; + } + + if (tdelim == ']') { + if (p == end || *p != ']') + goto bad_address; + p++; + } + + if (p < end) { + if (*p == '+') { + /* Port number specification "+1234" */ + unsigned int xport = 0; + p++; + if (p >= end || !isdigit(*p)) + goto bad_address; + do { + xport *= 10; + xport += *p - '0'; + if (xport > 65535) + goto bad_address; + p++; + } while (p < end && isdigit(*p)); + srx->transport.sin6.sin6_port = htons(xport); + } else if (*p == delim) { + p++; + } else { + goto bad_address; + } + } + + alist->nr_addrs++; + } while (p < end && alist->nr_addrs < AFS_MAX_ADDRESSES); + + _leave(" = [nr %u]", alist->nr_addrs); + return alist; + +bad_address: + kfree(alist); + return ERR_PTR(-EINVAL); +} + +/* + * Compare old and new address lists to see if there's been any change. + * - How to do this in better than O(Nlog(N)) time? + * - We don't really want to sort the address list, but would rather take the + * list as we got it so as not to undo record rotation by the DNS server. + */ +#if 0 +static int afs_cmp_addr_list(const struct afs_addr_list *a1, + const struct afs_addr_list *a2) +{ +} +#endif + +/* + * Perform a DNS query for VL servers and build a up an address list. + */ +struct afs_addr_list *afs_dns_query(struct afs_cell *cell, time64_t *_expiry) +{ + struct afs_addr_list *alist; + char *vllist = NULL; + int ret; + + _enter("%s", cell->name); + + ret = dns_query("afsdb", cell->name, cell->name_len, + "ipv4", &vllist, _expiry); + if (ret < 0) + return ERR_PTR(ret); + + alist = afs_parse_text_addrs(vllist, strlen(vllist), ',', + VL_SERVICE, AFS_VL_PORT); + if (IS_ERR(alist)) { + kfree(vllist); + if (alist != ERR_PTR(-ENOMEM)) + pr_err("Failed to parse DNS data\n"); + return alist; + } + + kfree(vllist); + return alist; +} + +/* + * Get an address to try. + */ +bool afs_iterate_addresses(struct afs_addr_cursor *ac) +{ + _enter("%hu+%hd", ac->start, (short)ac->index); + + if (!ac->alist) + return false; + + if (ac->begun) { + ac->index++; + if (ac->index == ac->alist->nr_addrs) + ac->index = 0; + + if (ac->index == ac->start) { + ac->error = -EDESTADDRREQ; + return false; + } + } + + ac->begun = true; + ac->responded = false; + ac->addr = &ac->alist->addrs[ac->index]; + return true; +} + +/* + * Release an address list cursor. + */ +int afs_end_cursor(struct afs_addr_cursor *ac) +{ + if (ac->responded && ac->index != ac->start) + WRITE_ONCE(ac->alist->index, ac->index); + + afs_put_addrlist(ac->alist); + ac->alist = NULL; + return ac->error; +} + +/* + * Set the address cursor for iterating over VL servers. + */ +int afs_set_vl_cursor(struct afs_addr_cursor *ac, struct afs_cell *cell) +{ + struct afs_addr_list *alist; + int ret; + + if (!rcu_access_pointer(cell->vl_addrs)) { + ret = wait_on_bit(&cell->flags, AFS_CELL_FL_NO_LOOKUP_YET, + TASK_INTERRUPTIBLE); + if (ret < 0) + return ret; + + if (!rcu_access_pointer(cell->vl_addrs) && + ktime_get_real_seconds() < cell->dns_expiry) + return cell->error; + } + + read_lock(&cell->vl_addrs_lock); + alist = rcu_dereference_protected(cell->vl_addrs, + lockdep_is_held(&cell->vl_addrs_lock)); + if (alist->nr_addrs > 0) + afs_get_addrlist(alist); + else + alist = NULL; + read_unlock(&cell->vl_addrs_lock); + + if (!alist) + return -EDESTADDRREQ; + + ac->alist = alist; + ac->addr = NULL; + ac->start = READ_ONCE(alist->index); + ac->index = ac->start; + ac->error = 0; + ac->begun = false; + return 0; +} diff --git a/fs/afs/cell.c b/fs/afs/cell.c index e83103e8a6fb..a0e08d3a108c 100644 --- a/fs/afs/cell.c +++ b/fs/afs/cell.c @@ -9,7 +9,6 @@ * 2 of the License, or (at your option) any later version. */ -#include #include #include #include @@ -152,68 +151,33 @@ static struct afs_cell *afs_alloc_cell(struct afs_net *net, init_rwsem(&cell->vl_sem); INIT_LIST_HEAD(&cell->vl_list); spin_lock_init(&cell->vl_lock); - seqlock_init(&cell->vl_addrs_lock); - cell->flags = (1 << AFS_CELL_FL_NOT_READY); - - for (i = 0; i < AFS_CELL_MAX_ADDRS; i++) { - struct sockaddr_rxrpc *srx = &cell->vl_addrs[i]; - srx->srx_family = AF_RXRPC; - srx->srx_service = VL_SERVICE; - srx->transport_type = SOCK_DGRAM; - srx->transport.sin6.sin6_family = AF_INET6; - srx->transport.sin6.sin6_port = htons(AFS_VL_PORT); - } + cell->flags = ((1 << AFS_CELL_FL_NOT_READY) | + (1 << AFS_CELL_FL_NO_LOOKUP_YET)); + rwlock_init(&cell->vl_addrs_lock); /* Fill in the VL server list if we were given a list of addresses to * use. */ if (vllist) { - char delim = ':'; + struct afs_addr_list *alist; - if (strchr(vllist, ',') || !strchr(vllist, '.')) - delim = ','; + alist = afs_parse_text_addrs(vllist, strlen(vllist), ':', + VL_SERVICE, AFS_VL_PORT); + if (IS_ERR(alist)) { + ret = PTR_ERR(alist); + goto parse_failed; + } - do { - struct sockaddr_rxrpc *srx = &cell->vl_addrs[cell->vl_naddrs]; - - if (in4_pton(vllist, -1, - (u8 *)&srx->transport.sin6.sin6_addr.s6_addr32[3], - delim, &vllist)) { - srx->transport_len = sizeof(struct sockaddr_in6); - srx->transport.sin6.sin6_addr.s6_addr32[0] = 0; - srx->transport.sin6.sin6_addr.s6_addr32[1] = 0; - srx->transport.sin6.sin6_addr.s6_addr32[2] = htonl(0xffff); - } else if (in6_pton(vllist, -1, - srx->transport.sin6.sin6_addr.s6_addr, - delim, &vllist)) { - srx->transport_len = sizeof(struct sockaddr_in6); - srx->transport.sin6.sin6_family = AF_INET6; - } else { - goto bad_address; - } - - cell->vl_naddrs++; - if (!*vllist) - break; - vllist++; - - } while (cell->vl_naddrs < AFS_CELL_MAX_ADDRS && vllist); - - /* Disable DNS refresh for manually-specified cells */ + rcu_assign_pointer(cell->vl_addrs, alist); cell->dns_expiry = TIME64_MAX; - } else { - /* We're going to need to 'refresh' this cell's VL server list - * from the DNS before we can use it. - */ - cell->dns_expiry = S64_MIN; } _leave(" = %p", cell); return cell; -bad_address: - printk(KERN_ERR "kAFS: bad VL server IP address\n"); - ret = -EINVAL; +parse_failed: + if (ret == -EINVAL) + printk(KERN_ERR "kAFS: bad VL server IP address\n"); kfree(cell); _leave(" = %d", ret); return ERR_PTR(ret); @@ -325,7 +289,6 @@ cell_already_exists: if (excl) { ret = -EEXIST; } else { - ASSERTCMP(atomic_read(&cursor->usage), >=, 1); afs_get_cell(cursor); ret = 0; } @@ -333,8 +296,10 @@ cell_already_exists: kfree(candidate); if (ret == 0) goto wait_for_cell; + goto error_noput; error: afs_put_cell(net, cell); +error_noput: _leave(" = %d [error]", ret); return ERR_PTR(ret); } @@ -396,78 +361,50 @@ int afs_cell_init(struct afs_net *net, const char *rootcell) */ static void afs_update_cell(struct afs_cell *cell) { + struct afs_addr_list *alist, *old; time64_t now, expiry; - char *vllist = NULL; - int ret; _enter("%s", cell->name); - ret = dns_query("afsdb", cell->name, cell->name_len, - "ipv4", &vllist, &expiry); - _debug("query %d", ret); - switch (ret) { - case 0 ... INT_MAX: - clear_bit(AFS_CELL_FL_DNS_FAIL, &cell->flags); - clear_bit(AFS_CELL_FL_NOT_FOUND, &cell->flags); - goto parse_dns_data; + alist = afs_dns_query(cell, &expiry); + if (IS_ERR(alist)) { + switch (PTR_ERR(alist)) { + case -ENODATA: + /* The DNS said that the cell does not exist */ + set_bit(AFS_CELL_FL_NOT_FOUND, &cell->flags); + clear_bit(AFS_CELL_FL_DNS_FAIL, &cell->flags); + cell->dns_expiry = ktime_get_real_seconds() + 61; + break; - case -ENODATA: - clear_bit(AFS_CELL_FL_DNS_FAIL, &cell->flags); - set_bit(AFS_CELL_FL_NOT_FOUND, &cell->flags); - cell->dns_expiry = ktime_get_real_seconds() + 61; - cell->error = -EDESTADDRREQ; - goto out; - - case -EAGAIN: - case -ECONNREFUSED: - default: - /* Unable to query DNS. */ - set_bit(AFS_CELL_FL_DNS_FAIL, &cell->flags); - cell->dns_expiry = ktime_get_real_seconds() + 10; - cell->error = -EDESTADDRREQ; - goto out; - } - -parse_dns_data: - write_seqlock(&cell->vl_addrs_lock); - - ret = -EINVAL; - do { - struct sockaddr_rxrpc *srx = &cell->vl_addrs[cell->vl_naddrs]; - - if (in4_pton(vllist, -1, - (u8 *)&srx->transport.sin6.sin6_addr.s6_addr32[3], - ',', (const char **)&vllist)) { - srx->transport_len = sizeof(struct sockaddr_in6); - srx->transport.sin6.sin6_addr.s6_addr32[0] = 0; - srx->transport.sin6.sin6_addr.s6_addr32[1] = 0; - srx->transport.sin6.sin6_addr.s6_addr32[2] = htonl(0xffff); - } else if (in6_pton(vllist, -1, - srx->transport.sin6.sin6_addr.s6_addr, - ',', (const char **)&vllist)) { - srx->transport_len = sizeof(struct sockaddr_in6); - srx->transport.sin6.sin6_family = AF_INET6; - } else { - goto bad_address; + case -EAGAIN: + case -ECONNREFUSED: + default: + set_bit(AFS_CELL_FL_DNS_FAIL, &cell->flags); + cell->dns_expiry = ktime_get_real_seconds() + 10; + break; } - cell->vl_naddrs++; - if (!*vllist) - break; - vllist++; + cell->error = -EDESTADDRREQ; + } else { + clear_bit(AFS_CELL_FL_DNS_FAIL, &cell->flags); + clear_bit(AFS_CELL_FL_NOT_FOUND, &cell->flags); - } while (cell->vl_naddrs < AFS_CELL_MAX_ADDRS); + /* Exclusion on changing vl_addrs is achieved by a + * non-reentrant work item. + */ + old = rcu_dereference_protected(cell->vl_addrs, true); + rcu_assign_pointer(cell->vl_addrs, alist); + cell->dns_expiry = expiry; - if (cell->vl_naddrs < AFS_CELL_MAX_ADDRS) - memset(cell->vl_addrs + cell->vl_naddrs, 0, - (AFS_CELL_MAX_ADDRS - cell->vl_naddrs) * sizeof(cell->vl_addrs[0])); + if (old) + afs_put_addrlist(old); + } + + if (test_and_clear_bit(AFS_CELL_FL_NO_LOOKUP_YET, &cell->flags)) + wake_up_bit(&cell->flags, AFS_CELL_FL_NO_LOOKUP_YET); now = ktime_get_real_seconds(); - cell->dns_expiry = expiry; - afs_set_cell_timer(cell->net, expiry - now); -bad_address: - write_sequnlock(&cell->vl_addrs_lock); -out: + afs_set_cell_timer(cell->net, cell->dns_expiry - now); _leave(""); } @@ -482,6 +419,7 @@ static void afs_cell_destroy(struct rcu_head *rcu) ASSERTCMP(atomic_read(&cell->usage), ==, 0); + afs_put_addrlist(cell->vl_addrs); key_put(cell->anonymous_key); kfree(cell); @@ -514,6 +452,15 @@ void afs_cells_timer(struct timer_list *timer) afs_dec_cells_outstanding(net); } +/* + * Get a reference on a cell record. + */ +struct afs_cell *afs_get_cell(struct afs_cell *cell) +{ + atomic_inc(&cell->usage); + return cell; +} + /* * Drop a reference on a cell record. */ diff --git a/fs/afs/fsclient.c b/fs/afs/fsclient.c index 680c02d510f7..6614d0a78daa 100644 --- a/fs/afs/fsclient.c +++ b/fs/afs/fsclient.c @@ -297,7 +297,7 @@ static const struct afs_call_type afs_RXFSFetchStatus = { /* * fetch the status information for a file */ -int afs_fs_fetch_file_status(struct afs_server *server, +int afs_fs_fetch_file_status(struct afs_fs_cursor *fc, struct key *key, struct afs_vnode *vnode, struct afs_volsync *volsync, @@ -325,9 +325,9 @@ int afs_fs_fetch_file_status(struct afs_server *server, bp[2] = htonl(vnode->fid.vnode); bp[3] = htonl(vnode->fid.unique); - call->cb_break = vnode->cb_break + server->cb_s_break; - afs_use_fs_server(call, server); - return afs_make_call(&server->addr, call, GFP_NOFS, async); + call->cb_break = vnode->cb_break + fc->server->cb_s_break; + afs_use_fs_server(call, fc->server); + return afs_make_call(&fc->ac, call, GFP_NOFS, async); } /* @@ -502,7 +502,7 @@ static const struct afs_call_type afs_RXFSFetchData64 = { /* * fetch data from a very large file */ -static int afs_fs_fetch_data64(struct afs_server *server, +static int afs_fs_fetch_data64(struct afs_fs_cursor *fc, struct key *key, struct afs_vnode *vnode, struct afs_read *req, @@ -536,15 +536,15 @@ static int afs_fs_fetch_data64(struct afs_server *server, bp[7] = htonl(lower_32_bits(req->len)); atomic_inc(&req->usage); - call->cb_break = vnode->cb_break + server->cb_s_break; - afs_use_fs_server(call, server); - return afs_make_call(&server->addr, call, GFP_NOFS, async); + call->cb_break = vnode->cb_break + fc->server->cb_s_break; + afs_use_fs_server(call, fc->server); + return afs_make_call(&fc->ac, call, GFP_NOFS, async); } /* * fetch data from a file */ -int afs_fs_fetch_data(struct afs_server *server, +int afs_fs_fetch_data(struct afs_fs_cursor *fc, struct key *key, struct afs_vnode *vnode, struct afs_read *req, @@ -557,7 +557,7 @@ int afs_fs_fetch_data(struct afs_server *server, if (upper_32_bits(req->pos) || upper_32_bits(req->len) || upper_32_bits(req->pos + req->len)) - return afs_fs_fetch_data64(server, key, vnode, req, async); + return afs_fs_fetch_data64(fc, key, vnode, req, async); _enter(""); @@ -581,9 +581,9 @@ int afs_fs_fetch_data(struct afs_server *server, bp[5] = htonl(lower_32_bits(req->len)); atomic_inc(&req->usage); - call->cb_break = vnode->cb_break + server->cb_s_break; - afs_use_fs_server(call, server); - return afs_make_call(&server->addr, call, GFP_NOFS, async); + call->cb_break = vnode->cb_break + fc->server->cb_s_break; + afs_use_fs_server(call, fc->server); + return afs_make_call(&fc->ac, call, GFP_NOFS, async); } /* @@ -625,7 +625,7 @@ static const struct afs_call_type afs_RXFSCreateXXXX = { /* * create a file or make a directory */ -int afs_fs_create(struct afs_server *server, +int afs_fs_create(struct afs_fs_cursor *fc, struct key *key, struct afs_vnode *vnode, const char *name, @@ -677,8 +677,8 @@ int afs_fs_create(struct afs_server *server, *bp++ = htonl(mode & S_IALLUGO); /* unix mode */ *bp++ = 0; /* segment size */ - afs_use_fs_server(call, server); - return afs_make_call(&server->addr, call, GFP_NOFS, async); + afs_use_fs_server(call, fc->server); + return afs_make_call(&fc->ac, call, GFP_NOFS, async); } /* @@ -717,7 +717,7 @@ static const struct afs_call_type afs_RXFSRemoveXXXX = { /* * remove a file or directory */ -int afs_fs_remove(struct afs_server *server, +int afs_fs_remove(struct afs_fs_cursor *fc, struct key *key, struct afs_vnode *vnode, const char *name, @@ -756,8 +756,8 @@ int afs_fs_remove(struct afs_server *server, bp = (void *) bp + padsz; } - afs_use_fs_server(call, server); - return afs_make_call(&server->addr, call, GFP_NOFS, async); + afs_use_fs_server(call, fc->server); + return afs_make_call(&fc->ac, call, GFP_NOFS, async); } /* @@ -797,7 +797,7 @@ static const struct afs_call_type afs_RXFSLink = { /* * make a hard link */ -int afs_fs_link(struct afs_server *server, +int afs_fs_link(struct afs_fs_cursor *fc, struct key *key, struct afs_vnode *dvnode, struct afs_vnode *vnode, @@ -840,8 +840,8 @@ int afs_fs_link(struct afs_server *server, *bp++ = htonl(vnode->fid.vnode); *bp++ = htonl(vnode->fid.unique); - afs_use_fs_server(call, server); - return afs_make_call(&server->addr, call, GFP_NOFS, async); + afs_use_fs_server(call, fc->server); + return afs_make_call(&fc->ac, call, GFP_NOFS, async); } /* @@ -882,7 +882,7 @@ static const struct afs_call_type afs_RXFSSymlink = { /* * create a symbolic link */ -int afs_fs_symlink(struct afs_server *server, +int afs_fs_symlink(struct afs_fs_cursor *fc, struct key *key, struct afs_vnode *vnode, const char *name, @@ -943,8 +943,8 @@ int afs_fs_symlink(struct afs_server *server, *bp++ = htonl(S_IRWXUGO); /* unix mode */ *bp++ = 0; /* segment size */ - afs_use_fs_server(call, server); - return afs_make_call(&server->addr, call, GFP_NOFS, async); + afs_use_fs_server(call, fc->server); + return afs_make_call(&fc->ac, call, GFP_NOFS, async); } /* @@ -986,7 +986,7 @@ static const struct afs_call_type afs_RXFSRename = { /* * create a symbolic link */ -int afs_fs_rename(struct afs_server *server, +int afs_fs_rename(struct afs_fs_cursor *fc, struct key *key, struct afs_vnode *orig_dvnode, const char *orig_name, @@ -1045,8 +1045,8 @@ int afs_fs_rename(struct afs_server *server, bp = (void *) bp + n_padsz; } - afs_use_fs_server(call, server); - return afs_make_call(&server->addr, call, GFP_NOFS, async); + afs_use_fs_server(call, fc->server); + return afs_make_call(&fc->ac, call, GFP_NOFS, async); } /* @@ -1094,7 +1094,7 @@ static const struct afs_call_type afs_RXFSStoreData64 = { /* * store a set of pages to a very large file */ -static int afs_fs_store_data64(struct afs_server *server, +static int afs_fs_store_data64(struct afs_fs_cursor *fc, struct afs_writeback *wb, pgoff_t first, pgoff_t last, unsigned offset, unsigned to, @@ -1147,14 +1147,14 @@ static int afs_fs_store_data64(struct afs_server *server, *bp++ = htonl(i_size >> 32); *bp++ = htonl((u32) i_size); - afs_use_fs_server(call, server); - return afs_make_call(&server->addr, call, GFP_NOFS, async); + afs_use_fs_server(call, fc->server); + return afs_make_call(&fc->ac, call, GFP_NOFS, async); } /* * store a set of pages */ -int afs_fs_store_data(struct afs_server *server, struct afs_writeback *wb, +int afs_fs_store_data(struct afs_fs_cursor *fc, struct afs_writeback *wb, pgoff_t first, pgoff_t last, unsigned offset, unsigned to, bool async) @@ -1183,7 +1183,7 @@ int afs_fs_store_data(struct afs_server *server, struct afs_writeback *wb, (unsigned long long) i_size); if (pos >> 32 || i_size >> 32 || size >> 32 || (pos + size) >> 32) - return afs_fs_store_data64(server, wb, first, last, offset, to, + return afs_fs_store_data64(fc, wb, first, last, offset, to, size, pos, i_size, async); call = afs_alloc_flat_call(net, &afs_RXFSStoreData, @@ -1221,8 +1221,8 @@ int afs_fs_store_data(struct afs_server *server, struct afs_writeback *wb, *bp++ = htonl(size); *bp++ = htonl(i_size); - afs_use_fs_server(call, server); - return afs_make_call(&server->addr, call, GFP_NOFS, async); + afs_use_fs_server(call, fc->server); + return afs_make_call(&fc->ac, call, GFP_NOFS, async); } /* @@ -1279,7 +1279,7 @@ static const struct afs_call_type afs_RXFSStoreData64_as_Status = { * set the attributes on a very large file, using FS.StoreData rather than * FS.StoreStatus so as to alter the file size also */ -static int afs_fs_setattr_size64(struct afs_server *server, struct key *key, +static int afs_fs_setattr_size64(struct afs_fs_cursor *fc, struct key *key, struct afs_vnode *vnode, struct iattr *attr, bool async) { @@ -1319,15 +1319,15 @@ static int afs_fs_setattr_size64(struct afs_server *server, struct key *key, *bp++ = htonl(attr->ia_size >> 32); /* new file length */ *bp++ = htonl((u32) attr->ia_size); - afs_use_fs_server(call, server); - return afs_make_call(&server->addr, call, GFP_NOFS, async); + afs_use_fs_server(call, fc->server); + return afs_make_call(&fc->ac, call, GFP_NOFS, async); } /* * set the attributes on a file, using FS.StoreData rather than FS.StoreStatus * so as to alter the file size also */ -static int afs_fs_setattr_size(struct afs_server *server, struct key *key, +static int afs_fs_setattr_size(struct afs_fs_cursor *fc, struct key *key, struct afs_vnode *vnode, struct iattr *attr, bool async) { @@ -1340,8 +1340,7 @@ static int afs_fs_setattr_size(struct afs_server *server, struct key *key, ASSERT(attr->ia_valid & ATTR_SIZE); if (attr->ia_size >> 32) - return afs_fs_setattr_size64(server, key, vnode, attr, - async); + return afs_fs_setattr_size64(fc, key, vnode, attr, async); call = afs_alloc_flat_call(net, &afs_RXFSStoreData_as_Status, (4 + 6 + 3) * 4, @@ -1367,15 +1366,15 @@ static int afs_fs_setattr_size(struct afs_server *server, struct key *key, *bp++ = 0; /* size of write */ *bp++ = htonl(attr->ia_size); /* new file length */ - afs_use_fs_server(call, server); - return afs_make_call(&server->addr, call, GFP_NOFS, async); + afs_use_fs_server(call, fc->server); + return afs_make_call(&fc->ac, call, GFP_NOFS, async); } /* * set the attributes on a file, using FS.StoreData if there's a change in file * size, and FS.StoreStatus otherwise */ -int afs_fs_setattr(struct afs_server *server, struct key *key, +int afs_fs_setattr(struct afs_fs_cursor *fc, struct key *key, struct afs_vnode *vnode, struct iattr *attr, bool async) { @@ -1384,8 +1383,7 @@ int afs_fs_setattr(struct afs_server *server, struct key *key, __be32 *bp; if (attr->ia_valid & ATTR_SIZE) - return afs_fs_setattr_size(server, key, vnode, attr, - async); + return afs_fs_setattr_size(fc, key, vnode, attr, async); _enter(",%x,{%x:%u},,", key_serial(key), vnode->fid.vid, vnode->fid.vnode); @@ -1409,8 +1407,8 @@ int afs_fs_setattr(struct afs_server *server, struct key *key, xdr_encode_AFS_StoreStatus(&bp, attr); - afs_use_fs_server(call, server); - return afs_make_call(&server->addr, call, GFP_NOFS, async); + afs_use_fs_server(call, fc->server); + return afs_make_call(&fc->ac, call, GFP_NOFS, async); } /* @@ -1607,7 +1605,7 @@ static const struct afs_call_type afs_RXFSGetVolumeStatus = { /* * fetch the status of a volume */ -int afs_fs_get_volume_status(struct afs_server *server, +int afs_fs_get_volume_status(struct afs_fs_cursor *fc, struct key *key, struct afs_vnode *vnode, struct afs_volume_status *vs, @@ -1640,8 +1638,8 @@ int afs_fs_get_volume_status(struct afs_server *server, bp[0] = htonl(FSGETVOLUMESTATUS); bp[1] = htonl(vnode->fid.vid); - afs_use_fs_server(call, server); - return afs_make_call(&server->addr, call, GFP_NOFS, async); + afs_use_fs_server(call, fc->server); + return afs_make_call(&fc->ac, call, GFP_NOFS, async); } /* @@ -1696,7 +1694,7 @@ static const struct afs_call_type afs_RXFSReleaseLock = { /* * get a lock on a file */ -int afs_fs_set_lock(struct afs_server *server, +int afs_fs_set_lock(struct afs_fs_cursor *fc, struct key *key, struct afs_vnode *vnode, afs_lock_type_t type, @@ -1723,14 +1721,14 @@ int afs_fs_set_lock(struct afs_server *server, *bp++ = htonl(vnode->fid.unique); *bp++ = htonl(type); - afs_use_fs_server(call, server); - return afs_make_call(&server->addr, call, GFP_NOFS, async); + afs_use_fs_server(call, fc->server); + return afs_make_call(&fc->ac, call, GFP_NOFS, async); } /* * extend a lock on a file */ -int afs_fs_extend_lock(struct afs_server *server, +int afs_fs_extend_lock(struct afs_fs_cursor *fc, struct key *key, struct afs_vnode *vnode, bool async) @@ -1755,14 +1753,14 @@ int afs_fs_extend_lock(struct afs_server *server, *bp++ = htonl(vnode->fid.vnode); *bp++ = htonl(vnode->fid.unique); - afs_use_fs_server(call, server); - return afs_make_call(&server->addr, call, GFP_NOFS, async); + afs_use_fs_server(call, fc->server); + return afs_make_call(&fc->ac, call, GFP_NOFS, async); } /* * release a lock on a file */ -int afs_fs_release_lock(struct afs_server *server, +int afs_fs_release_lock(struct afs_fs_cursor *fc, struct key *key, struct afs_vnode *vnode, bool async) @@ -1787,8 +1785,8 @@ int afs_fs_release_lock(struct afs_server *server, *bp++ = htonl(vnode->fid.vnode); *bp++ = htonl(vnode->fid.unique); - afs_use_fs_server(call, server); - return afs_make_call(&server->addr, call, GFP_NOFS, async); + afs_use_fs_server(call, fc->server); + return afs_make_call(&fc->ac, call, GFP_NOFS, async); } /* @@ -1812,6 +1810,7 @@ static const struct afs_call_type afs_RXFSGiveUpAllCallBacks = { * Flush all the callbacks we have on a server. */ int afs_fs_give_up_all_callbacks(struct afs_server *server, + struct afs_addr_cursor *ac, struct key *key, bool async) { @@ -1831,5 +1830,5 @@ int afs_fs_give_up_all_callbacks(struct afs_server *server, *bp++ = htonl(FSGIVEUPALLCALLBACKS); /* Can't take a ref on server */ - return afs_make_call(&server->addr, call, GFP_NOFS, async); + return afs_make_call(ac, call, GFP_NOFS, async); } diff --git a/fs/afs/internal.h b/fs/afs/internal.h index 51e3825b5ffb..df52bf18a263 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h @@ -70,6 +70,17 @@ enum afs_call_state { AFS_CALL_COMPLETE, /* Completed or failed */ }; +/* + * List of server addresses. + */ +struct afs_addr_list { + struct rcu_head rcu; /* Must be first */ + refcount_t usage; + unsigned short nr_addrs; + unsigned short index; /* Address currently in use */ + struct sockaddr_rxrpc addrs[]; +}; + /* * a record of an in-progress RxRPC call */ @@ -283,16 +294,15 @@ struct afs_cell { #define AFS_CELL_FL_NO_GC 1 /* The cell was added manually, don't auto-gc */ #define AFS_CELL_FL_NOT_FOUND 2 /* Permanent DNS error */ #define AFS_CELL_FL_DNS_FAIL 3 /* Failed to access DNS */ +#define AFS_CELL_FL_NO_LOOKUP_YET 4 /* Not completed first DNS lookup yet */ enum afs_cell_state state; short error; spinlock_t vl_lock; /* vl_list lock */ /* VLDB server list. */ - seqlock_t vl_addrs_lock; - unsigned short vl_naddrs; /* number of VL servers in addr list */ - unsigned short vl_curr_svix; /* current server index */ - struct sockaddr_rxrpc vl_addrs[AFS_CELL_MAX_ADDRS]; /* cell VL server addresses */ + rwlock_t vl_addrs_lock; /* Lock on vl_addrs */ + struct afs_addr_list __rcu *vl_addrs; /* List of VL servers */ u8 name_len; /* Length of name */ char name[64 + 1]; /* Cell name, case-flattened and NUL-padded */ }; @@ -343,7 +353,7 @@ struct afs_vlocation { struct afs_server { atomic_t usage; time64_t time_of_death; /* time at which put reduced usage to 0 */ - struct sockaddr_rxrpc addr; /* server address */ + struct afs_addr_list __rcu *addrs; /* List of addresses for this server */ struct afs_net *net; /* Network namespace in which the server resides */ struct afs_cell *cell; /* cell in which server resides */ struct list_head link; /* link in cell's server list */ @@ -485,7 +495,48 @@ struct afs_interface { unsigned mtu; /* MTU of interface */ }; +/* + * Cursor for iterating over a server's address list. + */ +struct afs_addr_cursor { + struct afs_addr_list *alist; /* Current address list (pins ref) */ + struct sockaddr_rxrpc *addr; + unsigned short start; /* Starting point in alist->addrs[] */ + unsigned short index; /* Wrapping offset from start to current addr */ + short error; + bool begun; /* T if we've begun iteration */ + bool responded; /* T if the current address responded */ +}; + +/* + * Cursor for iterating over a set of fileservers. + */ +struct afs_fs_cursor { + struct afs_addr_cursor ac; + struct afs_server *server; /* Current server (pins ref) */ +}; + /*****************************************************************************/ +/* + * addr_list.c + */ +static inline struct afs_addr_list *afs_get_addrlist(struct afs_addr_list *alist) +{ + if (alist) + refcount_inc(&alist->usage); + return alist; +} +extern struct afs_addr_list *afs_alloc_addrlist(unsigned int, + unsigned short, + unsigned short); +extern void afs_put_addrlist(struct afs_addr_list *); +extern struct afs_addr_list *afs_parse_text_addrs(const char *, size_t, char, + unsigned short, unsigned short); +extern struct afs_addr_list *afs_dns_query(struct afs_cell *, time64_t *); +extern bool afs_iterate_addresses(struct afs_addr_cursor *); +extern int afs_end_cursor(struct afs_addr_cursor *); +extern int afs_set_vl_cursor(struct afs_addr_cursor *, struct afs_cell *); + /* * cache.c */ @@ -521,17 +572,11 @@ static inline struct afs_cb_interest *afs_get_cb_interest(struct afs_cb_interest /* * cell.c */ - static inline struct afs_cell *afs_get_cell(struct afs_cell *cell) -{ - if (cell) - atomic_inc(&cell->usage); - return cell; -} - extern int afs_cell_init(struct afs_net *, const char *); extern struct afs_cell *afs_lookup_cell_rcu(struct afs_net *, const char *, unsigned); extern struct afs_cell *afs_lookup_cell(struct afs_net *, const char *, unsigned, const char *, bool); +extern struct afs_cell *afs_get_cell(struct afs_cell *); extern void afs_put_cell(struct afs_net *, struct afs_cell *); extern void afs_manage_cells(struct work_struct *); extern void afs_cells_timer(struct timer_list *); @@ -574,40 +619,41 @@ extern int afs_flock(struct file *, int, struct file_lock *); /* * fsclient.c */ -extern int afs_fs_fetch_file_status(struct afs_server *, struct key *, +extern int afs_fs_fetch_file_status(struct afs_fs_cursor *, struct key *, struct afs_vnode *, struct afs_volsync *, bool); extern int afs_fs_give_up_callbacks(struct afs_net *, struct afs_server *, bool); -extern int afs_fs_fetch_data(struct afs_server *, struct key *, +extern int afs_fs_fetch_data(struct afs_fs_cursor *, struct key *, struct afs_vnode *, struct afs_read *, bool); -extern int afs_fs_create(struct afs_server *, struct key *, +extern int afs_fs_create(struct afs_fs_cursor *, struct key *, struct afs_vnode *, const char *, umode_t, struct afs_fid *, struct afs_file_status *, struct afs_callback *, bool); -extern int afs_fs_remove(struct afs_server *, struct key *, +extern int afs_fs_remove(struct afs_fs_cursor *, struct key *, struct afs_vnode *, const char *, bool, bool); -extern int afs_fs_link(struct afs_server *, struct key *, struct afs_vnode *, +extern int afs_fs_link(struct afs_fs_cursor *, struct key *, struct afs_vnode *, struct afs_vnode *, const char *, bool); -extern int afs_fs_symlink(struct afs_server *, struct key *, +extern int afs_fs_symlink(struct afs_fs_cursor *, struct key *, struct afs_vnode *, const char *, const char *, struct afs_fid *, struct afs_file_status *, bool); -extern int afs_fs_rename(struct afs_server *, struct key *, +extern int afs_fs_rename(struct afs_fs_cursor *, struct key *, struct afs_vnode *, const char *, struct afs_vnode *, const char *, bool); -extern int afs_fs_store_data(struct afs_server *, struct afs_writeback *, +extern int afs_fs_store_data(struct afs_fs_cursor *, struct afs_writeback *, pgoff_t, pgoff_t, unsigned, unsigned, bool); -extern int afs_fs_setattr(struct afs_server *, struct key *, +extern int afs_fs_setattr(struct afs_fs_cursor *, struct key *, struct afs_vnode *, struct iattr *, bool); -extern int afs_fs_get_volume_status(struct afs_server *, struct key *, +extern int afs_fs_get_volume_status(struct afs_fs_cursor *, struct key *, struct afs_vnode *, struct afs_volume_status *, bool); -extern int afs_fs_set_lock(struct afs_server *, struct key *, +extern int afs_fs_set_lock(struct afs_fs_cursor *, struct key *, struct afs_vnode *, afs_lock_type_t, bool); -extern int afs_fs_extend_lock(struct afs_server *, struct key *, +extern int afs_fs_extend_lock(struct afs_fs_cursor *, struct key *, struct afs_vnode *, bool); -extern int afs_fs_release_lock(struct afs_server *, struct key *, +extern int afs_fs_release_lock(struct afs_fs_cursor *, struct key *, struct afs_vnode *, bool); -extern int afs_fs_give_up_all_callbacks(struct afs_server *, struct key *, bool); +extern int afs_fs_give_up_all_callbacks(struct afs_server *, struct afs_addr_cursor *, + struct key *, bool); /* * inode.c @@ -697,7 +743,7 @@ extern void __net_exit afs_close_socket(struct afs_net *); extern void afs_charge_preallocation(struct work_struct *); extern void afs_put_call(struct afs_call *); extern int afs_queue_call_work(struct afs_call *); -extern long afs_make_call(struct sockaddr_rxrpc *, struct afs_call *, gfp_t, bool); +extern long afs_make_call(struct afs_addr_cursor *, struct afs_call *, gfp_t, bool); extern struct afs_call *afs_alloc_flat_call(struct afs_net *, const struct afs_call_type *, size_t, size_t); @@ -751,13 +797,11 @@ extern void __exit afs_fs_exit(void); /* * vlclient.c */ -extern int afs_vl_get_entry_by_name(struct afs_net *, - struct sockaddr_rxrpc *, struct key *, - const char *, struct afs_cache_vlocation *, - bool); -extern int afs_vl_get_entry_by_id(struct afs_net *, - struct sockaddr_rxrpc *, struct key *, - afs_volid_t, afs_voltype_t, +extern int afs_vl_get_entry_by_name(struct afs_net *, struct afs_addr_cursor *, + struct key *, const char *, + struct afs_cache_vlocation *, bool); +extern int afs_vl_get_entry_by_id(struct afs_net *, struct afs_addr_cursor *, + struct key *, afs_volid_t, afs_voltype_t, struct afs_cache_vlocation *, bool); /* @@ -828,9 +872,11 @@ static inline struct afs_volume *afs_get_volume(struct afs_volume *volume) extern void afs_put_volume(struct afs_cell *, struct afs_volume *); extern struct afs_volume *afs_volume_lookup(struct afs_mount_params *); -extern struct afs_server *afs_volume_pick_fileserver(struct afs_vnode *); -extern int afs_volume_release_fileserver(struct afs_vnode *, - struct afs_server *, int); +extern void afs_init_fs_cursor(struct afs_fs_cursor *, struct afs_vnode *); +extern int afs_set_fs_cursor(struct afs_fs_cursor *, struct afs_vnode *); +extern bool afs_volume_pick_fileserver(struct afs_fs_cursor *, struct afs_vnode *); +extern bool afs_iterate_fs_cursor(struct afs_fs_cursor *, struct afs_vnode *); +extern int afs_end_fs_cursor(struct afs_fs_cursor *, struct afs_net *); /* * write.c diff --git a/fs/afs/proc.c b/fs/afs/proc.c index 08565429615d..9cf9ce88a8dd 100644 --- a/fs/afs/proc.c +++ b/fs/afs/proc.c @@ -514,23 +514,23 @@ static int afs_proc_cell_vlservers_open(struct inode *inode, struct file *file) */ static void *afs_proc_cell_vlservers_start(struct seq_file *m, loff_t *_pos) { + struct afs_addr_list *alist; struct afs_cell *cell = m->private; loff_t pos = *_pos; - _enter("cell=%p pos=%Ld", cell, *_pos); + rcu_read_lock(); - /* lock the list against modification */ - down_read(&cell->vl_sem); + alist = rcu_dereference(cell->vl_addrs); /* allow for the header line */ if (!pos) return (void *) 1; pos--; - if (pos >= cell->vl_naddrs) + if (!alist || pos >= alist->nr_addrs) return NULL; - return &cell->vl_addrs[pos]; + return alist->addrs + pos; } /* @@ -539,17 +539,18 @@ static void *afs_proc_cell_vlservers_start(struct seq_file *m, loff_t *_pos) static void *afs_proc_cell_vlservers_next(struct seq_file *p, void *v, loff_t *_pos) { + struct afs_addr_list *alist; struct afs_cell *cell = p->private; loff_t pos; - _enter("cell=%p{nad=%u} pos=%Ld", cell, cell->vl_naddrs, *_pos); + alist = rcu_dereference(cell->vl_addrs); pos = *_pos; (*_pos)++; - if (pos >= cell->vl_naddrs) + if (!alist || pos >= alist->nr_addrs) return NULL; - return &cell->vl_addrs[pos]; + return alist->addrs + pos; } /* @@ -557,9 +558,7 @@ static void *afs_proc_cell_vlservers_next(struct seq_file *p, void *v, */ static void afs_proc_cell_vlservers_stop(struct seq_file *p, void *v) { - struct afs_cell *cell = p->private; - - up_read(&cell->vl_sem); + rcu_read_unlock(); } /* @@ -658,7 +657,7 @@ static int afs_proc_cell_servers_show(struct seq_file *m, void *v) } /* display one cell per line on subsequent lines */ - sprintf(ipaddr, "%pISp", &server->addr.transport); + sprintf(ipaddr, "%pISp", &server->addrs->addrs[0].transport); seq_printf(m, "%3d %-15s %5d\n", atomic_read(&server->usage), ipaddr, server->fs_state); diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c index ac1e25f957b1..5ddfb7c4cf78 100644 --- a/fs/afs/rxrpc.c +++ b/fs/afs/rxrpc.c @@ -321,9 +321,10 @@ static int afs_send_pages(struct afs_call *call, struct msghdr *msg) /* * initiate a call */ -long afs_make_call(struct sockaddr_rxrpc *srx, struct afs_call *call, +long afs_make_call(struct afs_addr_cursor *ac, struct afs_call *call, gfp_t gfp, bool async) { + struct sockaddr_rxrpc *srx = ac->addr; struct rxrpc_call *rxcall; struct msghdr msg; struct kvec iov[1]; diff --git a/fs/afs/server.c b/fs/afs/server.c index 4e66608fc805..9ca174b24f5b 100644 --- a/fs/afs/server.c +++ b/fs/afs/server.c @@ -56,7 +56,9 @@ static int afs_install_server(struct afs_server *server) p = *pp; _debug("- consider %p", p); xserver = rb_entry(p, struct afs_server, master_rb); - diff = memcmp(&server->addr, &xserver->addr, sizeof(server->addr)); + diff = memcmp(&server->addrs->addrs[0], + &xserver->addrs->addrs[0], + sizeof(sizeof(server->addrs->addrs[0]))); if (diff < 0) pp = &(*pp)->rb_left; else if (diff > 0) @@ -85,25 +87,38 @@ static struct afs_server *afs_alloc_server(struct afs_cell *cell, _enter(""); server = kzalloc(sizeof(struct afs_server), GFP_KERNEL); - if (server) { - atomic_set(&server->usage, 1); - server->net = cell->net; - server->cell = cell; + if (!server) + goto enomem; + server->addrs = kzalloc(sizeof(struct afs_addr_list) + + sizeof(struct sockaddr_rxrpc), + GFP_KERNEL); + if (!server->addrs) + goto enomem_server; - INIT_LIST_HEAD(&server->link); - INIT_LIST_HEAD(&server->grave); - init_rwsem(&server->sem); - spin_lock_init(&server->fs_lock); - INIT_LIST_HEAD(&server->cb_interests); - rwlock_init(&server->cb_break_lock); + atomic_set(&server->usage, 1); + server->net = cell->net; + server->cell = cell; - server->addr = *addr; - afs_inc_servers_outstanding(cell->net); - _leave(" = %p{%d}", server, atomic_read(&server->usage)); - } else { - _leave(" = NULL [nomem]"); - } + INIT_LIST_HEAD(&server->link); + INIT_LIST_HEAD(&server->grave); + init_rwsem(&server->sem); + spin_lock_init(&server->fs_lock); + INIT_LIST_HEAD(&server->cb_interests); + rwlock_init(&server->cb_break_lock); + + refcount_set(&server->addrs->usage, 1); + server->addrs->nr_addrs = 1; + server->addrs->addrs[0] = *addr; + afs_inc_servers_outstanding(cell->net); + + _leave(" = %p{%d}", server, atomic_read(&server->usage)); return server; + +enomem_server: + kfree(server); +enomem: + _leave(" = NULL [nomem]"); + return NULL; } /* @@ -120,7 +135,7 @@ struct afs_server *afs_lookup_server(struct afs_cell *cell, read_lock(&cell->servers_lock); list_for_each_entry(server, &cell->servers, link) { - if (memcmp(&server->addr, addr, sizeof(*addr)) == 0) + if (memcmp(&server->addrs->addrs[0], addr, sizeof(*addr)) == 0) goto found_server_quickly; } read_unlock(&cell->servers_lock); @@ -135,7 +150,7 @@ struct afs_server *afs_lookup_server(struct afs_cell *cell, /* check the cell's server list again */ list_for_each_entry(server, &cell->servers, link) { - if (memcmp(&server->addr, addr, sizeof(*addr)) == 0) + if (memcmp(&server->addrs->addrs[0], addr, sizeof(*addr)) == 0) goto found_server; } @@ -204,7 +219,7 @@ struct afs_server *afs_find_server(struct afs_net *net, _debug("- consider %p", p); - diff = memcmp(srx, &server->addr, sizeof(*srx)); + diff = memcmp(srx, &server->addrs->addrs[0], sizeof(*srx)); if (diff < 0) { p = p->rb_left; } else if (diff > 0) { @@ -269,10 +284,19 @@ void afs_put_server(struct afs_net *net, struct afs_server *server) */ static void afs_destroy_server(struct afs_net *net, struct afs_server *server) { + struct afs_addr_list *alist = server->addrs; + struct afs_addr_cursor ac = { + .alist = alist, + .addr = &alist->addrs[0], + .start = alist->index, + .index = alist->index, + .error = 0, + }; _enter("%p", server); - afs_fs_give_up_all_callbacks(server, NULL, false); + afs_fs_give_up_all_callbacks(server, &ac, NULL, false); afs_put_cell(net, server->cell); + afs_put_addrlist(server->addrs); kfree(server); afs_dec_servers_outstanding(net); } diff --git a/fs/afs/vlclient.c b/fs/afs/vlclient.c index aa79fe3f168b..1d1e7df77dd5 100644 --- a/fs/afs/vlclient.c +++ b/fs/afs/vlclient.c @@ -114,7 +114,7 @@ static const struct afs_call_type afs_RXVLGetEntryById = { * dispatch a get volume entry by name operation */ int afs_vl_get_entry_by_name(struct afs_net *net, - struct sockaddr_rxrpc *addr, + struct afs_addr_cursor *ac, struct key *key, const char *volname, struct afs_cache_vlocation *entry, @@ -146,14 +146,14 @@ int afs_vl_get_entry_by_name(struct afs_net *net, memset((void *) bp + volnamesz, 0, padsz); /* initiate the call */ - return afs_make_call(addr, call, GFP_KERNEL, async); + return afs_make_call(ac, call, GFP_KERNEL, async); } /* * dispatch a get volume entry by ID operation */ int afs_vl_get_entry_by_id(struct afs_net *net, - struct sockaddr_rxrpc *addr, + struct afs_addr_cursor *ac, struct key *key, afs_volid_t volid, afs_voltype_t voltype, @@ -179,5 +179,5 @@ int afs_vl_get_entry_by_id(struct afs_net *net, *bp = htonl(voltype); /* initiate the call */ - return afs_make_call(addr, call, GFP_KERNEL, async); + return afs_make_call(ac, call, GFP_KERNEL, async); } diff --git a/fs/afs/vlocation.c b/fs/afs/vlocation.c index ec5ab8dc9bc8..52c31ad0ef60 100644 --- a/fs/afs/vlocation.c +++ b/fs/afs/vlocation.c @@ -29,22 +29,25 @@ static int afs_vlocation_access_vl_by_name(struct afs_vlocation *vl, struct key *key, struct afs_cache_vlocation *vldb) { - struct afs_cell *cell = vl->cell; - int count, ret; + struct afs_addr_cursor ac; + int ret; - _enter("%s,%s", cell->name, vl->vldb.name); + _enter("%s,%s", vl->cell->name, vl->vldb.name); + + ret = afs_set_vl_cursor(&ac, vl->cell); + if (ret < 0) + return ret; down_write(&vl->cell->vl_sem); + ret = -ENOMEDIUM; - for (count = cell->vl_naddrs; count > 0; count--) { - struct sockaddr_rxrpc *addr = &cell->vl_addrs[cell->vl_curr_svix]; - - _debug("CellServ[%hu]: %pIS", cell->vl_curr_svix, &addr->transport); + while (afs_iterate_addresses(&ac)) { + _debug("CellServ[%hu]: %pIS", ac.index, &ac.addr->transport); /* attempt to access the VL server */ - ret = afs_vl_get_entry_by_name(cell->net, addr, key, - vl->vldb.name, vldb, false); - switch (ret) { + ac.error = afs_vl_get_entry_by_name(vl->cell->net, &ac, key, + vl->vldb.name, vldb, false); + switch (ac.error) { case 0: goto out; case -ENOMEM: @@ -52,26 +55,24 @@ static int afs_vlocation_access_vl_by_name(struct afs_vlocation *vl, case -ENETUNREACH: case -EHOSTUNREACH: case -ECONNREFUSED: - if (ret == -ENOMEM || ret == -ENONET) + if (ac.error == -ENOMEM || ac.error == -ENONET) goto out; - goto rotate; + break; case -ENOMEDIUM: case -EKEYREJECTED: case -EKEYEXPIRED: + ac.responded = true; goto out; default: - ret = -EIO; - goto rotate; + ac.responded = true; + ac.error = -EIO; + break; } - - /* rotate the server records upon lookup failure */ - rotate: - cell->vl_curr_svix++; - cell->vl_curr_svix %= cell->vl_naddrs; } out: up_write(&vl->cell->vl_sem); + ret = afs_end_cursor(&ac); _leave(" = %d", ret); return ret; } @@ -86,22 +87,24 @@ static int afs_vlocation_access_vl_by_id(struct afs_vlocation *vl, afs_voltype_t voltype, struct afs_cache_vlocation *vldb) { - struct afs_cell *cell = vl->cell; - int count, ret; + struct afs_addr_cursor ac; + int ret; - _enter("%s,%x,%d,", cell->name, volid, voltype); + _enter("%s,%x,%d,", vl->cell->name, volid, voltype); + + ret = afs_set_vl_cursor(&ac, vl->cell); + if (ret < 0) + return ret; down_write(&vl->cell->vl_sem); ret = -ENOMEDIUM; - for (count = cell->vl_naddrs; count > 0; count--) { - struct sockaddr_rxrpc *addr = &cell->vl_addrs[cell->vl_curr_svix]; - - _debug("CellServ[%hu]: %pIS", cell->vl_curr_svix, &addr->transport); + while (afs_iterate_addresses(&ac)) { + _debug("CellServ[%hu]: %pIS", ac.index, &ac.addr->transport); /* attempt to access the VL server */ - ret = afs_vl_get_entry_by_id(cell->net, addr, key, volid, - voltype, vldb, false); - switch (ret) { + ac.error = afs_vl_get_entry_by_id(vl->cell->net, &ac, key, volid, + voltype, vldb, false); + switch (ac.error) { case 0: goto out; case -ENOMEM: @@ -109,10 +112,11 @@ static int afs_vlocation_access_vl_by_id(struct afs_vlocation *vl, case -ENETUNREACH: case -EHOSTUNREACH: case -ECONNREFUSED: - if (ret == -ENOMEM || ret == -ENONET) + if (ac.error == -ENOMEM || ac.error == -ENONET) goto out; goto rotate; case -EBUSY: + ac.responded = true; vl->upd_busy_cnt++; if (vl->upd_busy_cnt <= 3) { if (vl->upd_busy_cnt > 1) { @@ -124,30 +128,31 @@ static int afs_vlocation_access_vl_by_id(struct afs_vlocation *vl, } break; case -ENOMEDIUM: + ac.responded = true; vl->upd_rej_cnt++; goto rotate; default: - ret = -EIO; + ac.responded = true; + ac.error = -EIO; goto rotate; } /* rotate the server records upon lookup failure */ rotate: - cell->vl_curr_svix++; - cell->vl_curr_svix %= cell->vl_naddrs; vl->upd_busy_cnt = 0; } out: - if (ret < 0 && vl->upd_rej_cnt > 0) { + if (ac.error < 0 && vl->upd_rej_cnt > 0) { printk(KERN_NOTICE "kAFS:" " Active volume no longer valid '%s'\n", vl->vldb.name); vl->valid = 0; - ret = -ENOMEDIUM; + ac.error = -ENOMEDIUM; } up_write(&vl->cell->vl_sem); + ret = afs_end_cursor(&ac); _leave(" = %d", ret); return ret; } diff --git a/fs/afs/vnode.c b/fs/afs/vnode.c index 622e1100099b..9c7333eb01c2 100644 --- a/fs/afs/vnode.c +++ b/fs/afs/vnode.c @@ -44,30 +44,26 @@ static void afs_vnode_deleted_remotely(struct afs_vnode *vnode) void afs_vnode_finalise_status_update(struct afs_vnode *vnode, struct afs_server *server) { - struct afs_server *oldserver = NULL; - - _enter("%p,%p", vnode, server); - spin_lock(&vnode->lock); vnode->update_cnt--; ASSERTCMP(vnode->update_cnt, >=, 0); spin_unlock(&vnode->lock); wake_up_all(&vnode->update_waitq); - afs_put_server(afs_v2net(vnode), oldserver); _leave(""); } /* * finish off updating the recorded status of a file after an operation failed */ -static void afs_vnode_status_update_failed(struct afs_vnode *vnode, int ret) +static void afs_vnode_status_update_failed(struct afs_fs_cursor *fc, + struct afs_vnode *vnode) { - _enter("{%x:%u},%d", vnode->fid.vid, vnode->fid.vnode, ret); + _enter("{%x:%u},%d", vnode->fid.vid, vnode->fid.vnode, fc->ac.error); spin_lock(&vnode->lock); - if (ret == -ENOENT) { + if (fc->ac.error == -ENOENT) { /* the file was deleted on the server */ _debug("got NOENT from server - marking file deleted"); afs_vnode_deleted_remotely(vnode); @@ -90,9 +86,8 @@ static void afs_vnode_status_update_failed(struct afs_vnode *vnode, int ret) */ int afs_vnode_fetch_status(struct afs_vnode *vnode, struct key *key, bool force) { - struct afs_server *server; + struct afs_fs_cursor fc; unsigned int cb_break = 0; - int ret; DECLARE_WAITQUEUE(myself, current); @@ -172,43 +167,37 @@ get_anyway: /* merge AFS status fetches and clear outstanding callback on this * vnode */ + afs_init_fs_cursor(&fc, vnode); do { /* pick a server to query */ - server = afs_volume_pick_fileserver(vnode); - if (IS_ERR(server)) + if (!afs_volume_pick_fileserver(&fc, vnode)) goto no_server; - _debug("USING SERVER: %p{%pIS}", - server, &server->addr.transport); + fc.ac.error = afs_fs_fetch_file_status(&fc, key, vnode, NULL, false); - ret = afs_fs_fetch_file_status(server, key, vnode, NULL, - false); - - } while (!afs_volume_release_fileserver(vnode, server, ret)); + } while (afs_iterate_fs_cursor(&fc, vnode)); /* adjust the flags */ - if (ret == 0) { + if (fc.ac.error == 0) { _debug("adjust"); afs_cache_permit(vnode, key, cb_break); - afs_vnode_finalise_status_update(vnode, server); - afs_put_server(afs_v2net(vnode), server); + afs_vnode_finalise_status_update(vnode, fc.server); } else { - _debug("failed [%d]", ret); - afs_vnode_status_update_failed(vnode, ret); + _debug("failed [%d]", fc.ac.error); + afs_vnode_status_update_failed(&fc, vnode); } +out: + afs_end_fs_cursor(&fc, afs_v2net(vnode)); ASSERTCMP(vnode->update_cnt, >=, 0); - - _leave(" = %d [cnt %d]", ret, vnode->update_cnt); - return ret; + _leave(" = %d [cnt %d]", fc.ac.error, vnode->update_cnt); + return fc.ac.error; no_server: spin_lock(&vnode->lock); vnode->update_cnt--; - ASSERTCMP(vnode->update_cnt, >=, 0); spin_unlock(&vnode->lock); - _leave(" = %ld [cnt %d]", PTR_ERR(server), vnode->update_cnt); - return PTR_ERR(server); + goto out; } /* @@ -218,8 +207,7 @@ no_server: int afs_vnode_fetch_data(struct afs_vnode *vnode, struct key *key, struct afs_read *desc) { - struct afs_server *server; - int ret; + struct afs_fs_cursor fc; _enter("%s{%x:%u.%u},%x,,,", vnode->volume->vlocation->vldb.name, @@ -235,36 +223,31 @@ int afs_vnode_fetch_data(struct afs_vnode *vnode, struct key *key, /* merge in AFS status fetches and clear outstanding callback on this * vnode */ + afs_init_fs_cursor(&fc, vnode); do { /* pick a server to query */ - server = afs_volume_pick_fileserver(vnode); - if (IS_ERR(server)) + if (!afs_volume_pick_fileserver(&fc, vnode)) goto no_server; - _debug("USING SERVER: %pIS\n", &server->addr.transport); + fc.ac.error = afs_fs_fetch_data(&fc, key, vnode, desc, false); - ret = afs_fs_fetch_data(server, key, vnode, desc, - false); - - } while (!afs_volume_release_fileserver(vnode, server, ret)); + } while (afs_iterate_fs_cursor(&fc, vnode)); /* adjust the flags */ - if (ret == 0) { - afs_vnode_finalise_status_update(vnode, server); - afs_put_server(afs_v2net(vnode), server); - } else { - afs_vnode_status_update_failed(vnode, ret); - } + if (fc.ac.error == 0) + afs_vnode_finalise_status_update(vnode, fc.server); + else + afs_vnode_status_update_failed(&fc, vnode); - _leave(" = %d", ret); - return ret; +out: + return afs_end_fs_cursor(&fc, afs_v2net(vnode)); no_server: spin_lock(&vnode->lock); vnode->update_cnt--; ASSERTCMP(vnode->update_cnt, >=, 0); spin_unlock(&vnode->lock); - return PTR_ERR(server); + goto out; } /* @@ -275,8 +258,7 @@ int afs_vnode_create(struct afs_vnode *vnode, struct key *key, struct afs_file_status *newstatus, struct afs_callback *newcb, struct afs_server **_server) { - struct afs_server *server; - int ret; + struct afs_fs_cursor fc; _enter("%s{%x:%u.%u},%x,%s,,", vnode->volume->vlocation->vldb.name, @@ -291,38 +273,36 @@ int afs_vnode_create(struct afs_vnode *vnode, struct key *key, vnode->update_cnt++; spin_unlock(&vnode->lock); + afs_init_fs_cursor(&fc, vnode); do { /* pick a server to query */ - server = afs_volume_pick_fileserver(vnode); - if (IS_ERR(server)) + if (!afs_volume_pick_fileserver(&fc, vnode)) goto no_server; - _debug("USING SERVER: %pIS\n", &server->addr.transport); + fc.ac.error = afs_fs_create(&fc, key, vnode, name, mode, newfid, + newstatus, newcb, false); - ret = afs_fs_create(server, key, vnode, name, mode, newfid, - newstatus, newcb, false); - - } while (!afs_volume_release_fileserver(vnode, server, ret)); + } while (afs_iterate_fs_cursor(&fc, vnode)); /* adjust the flags */ - if (ret == 0) { - afs_vnode_finalise_status_update(vnode, server); - *_server = server; + if (fc.ac.error == 0) { + afs_vnode_finalise_status_update(vnode, fc.server); + *_server = fc.server; + fc.server = NULL; } else { - afs_vnode_status_update_failed(vnode, ret); + afs_vnode_status_update_failed(&fc, vnode); *_server = NULL; } - _leave(" = %d [cnt %d]", ret, vnode->update_cnt); - return ret; +out: + return afs_end_fs_cursor(&fc, afs_v2net(vnode)); no_server: spin_lock(&vnode->lock); vnode->update_cnt--; ASSERTCMP(vnode->update_cnt, >=, 0); spin_unlock(&vnode->lock); - _leave(" = %ld [cnt %d]", PTR_ERR(server), vnode->update_cnt); - return PTR_ERR(server); + goto out; } /* @@ -331,8 +311,7 @@ no_server: int afs_vnode_remove(struct afs_vnode *vnode, struct key *key, const char *name, bool isdir) { - struct afs_server *server; - int ret; + struct afs_fs_cursor fc; _enter("%s{%x:%u.%u},%x,%s", vnode->volume->vlocation->vldb.name, @@ -347,37 +326,31 @@ int afs_vnode_remove(struct afs_vnode *vnode, struct key *key, const char *name, vnode->update_cnt++; spin_unlock(&vnode->lock); + afs_init_fs_cursor(&fc, vnode); do { /* pick a server to query */ - server = afs_volume_pick_fileserver(vnode); - if (IS_ERR(server)) + if (!afs_volume_pick_fileserver(&fc, vnode)) goto no_server; - _debug("USING SERVER: %pIS\n", &server->addr.transport); + fc.ac.error = afs_fs_remove(&fc, key, vnode, name, isdir, false); - ret = afs_fs_remove(server, key, vnode, name, isdir, - false); - - } while (!afs_volume_release_fileserver(vnode, server, ret)); + } while (afs_iterate_fs_cursor(&fc, vnode)); /* adjust the flags */ - if (ret == 0) { - afs_vnode_finalise_status_update(vnode, server); - afs_put_server(afs_v2net(vnode), server); - } else { - afs_vnode_status_update_failed(vnode, ret); - } + if (fc.ac.error == 0) + afs_vnode_finalise_status_update(vnode, fc.server); + else + afs_vnode_status_update_failed(&fc, vnode); - _leave(" = %d [cnt %d]", ret, vnode->update_cnt); - return ret; +out: + return afs_end_fs_cursor(&fc, afs_v2net(vnode)); no_server: spin_lock(&vnode->lock); vnode->update_cnt--; ASSERTCMP(vnode->update_cnt, >=, 0); spin_unlock(&vnode->lock); - _leave(" = %ld [cnt %d]", PTR_ERR(server), vnode->update_cnt); - return PTR_ERR(server); + goto out; } /* @@ -386,8 +359,7 @@ no_server: int afs_vnode_link(struct afs_vnode *dvnode, struct afs_vnode *vnode, struct key *key, const char *name) { - struct afs_server *server; - int ret; + struct afs_fs_cursor fc; _enter("%s{%x:%u.%u},%s{%x:%u.%u},%x,%s", dvnode->volume->vlocation->vldb.name, @@ -409,31 +381,27 @@ int afs_vnode_link(struct afs_vnode *dvnode, struct afs_vnode *vnode, dvnode->update_cnt++; spin_unlock(&dvnode->lock); + afs_init_fs_cursor(&fc, vnode); do { /* pick a server to query */ - server = afs_volume_pick_fileserver(dvnode); - if (IS_ERR(server)) + if (!afs_volume_pick_fileserver(&fc, dvnode)) goto no_server; - _debug("USING SERVER: %pIS\n", &server->addr.transport); + fc.ac.error = afs_fs_link(&fc, key, dvnode, vnode, name, false); - ret = afs_fs_link(server, key, dvnode, vnode, name, - false); - - } while (!afs_volume_release_fileserver(dvnode, server, ret)); + } while (afs_iterate_fs_cursor(&fc, dvnode)); /* adjust the flags */ - if (ret == 0) { - afs_vnode_finalise_status_update(vnode, server); - afs_vnode_finalise_status_update(dvnode, server); - afs_put_server(afs_v2net(dvnode), server); + if (fc.ac.error == 0) { + afs_vnode_finalise_status_update(vnode, fc.server); + afs_vnode_finalise_status_update(dvnode, fc.server); } else { - afs_vnode_status_update_failed(vnode, ret); - afs_vnode_status_update_failed(dvnode, ret); + afs_vnode_status_update_failed(&fc, vnode); + afs_vnode_status_update_failed(&fc, dvnode); } - _leave(" = %d [cnt %d]", ret, vnode->update_cnt); - return ret; +out: + return afs_end_fs_cursor(&fc, afs_v2net(vnode)); no_server: spin_lock(&vnode->lock); @@ -444,8 +412,7 @@ no_server: dvnode->update_cnt--; ASSERTCMP(dvnode->update_cnt, >=, 0); spin_unlock(&dvnode->lock); - _leave(" = %ld [cnt %d]", PTR_ERR(server), vnode->update_cnt); - return PTR_ERR(server); + goto out; } /* @@ -457,8 +424,7 @@ int afs_vnode_symlink(struct afs_vnode *vnode, struct key *key, struct afs_file_status *newstatus, struct afs_server **_server) { - struct afs_server *server; - int ret; + struct afs_fs_cursor fc; _enter("%s{%x:%u.%u},%x,%s,%s,,,", vnode->volume->vlocation->vldb.name, @@ -473,38 +439,37 @@ int afs_vnode_symlink(struct afs_vnode *vnode, struct key *key, vnode->update_cnt++; spin_unlock(&vnode->lock); + afs_init_fs_cursor(&fc, vnode); do { /* pick a server to query */ - server = afs_volume_pick_fileserver(vnode); - if (IS_ERR(server)) + if (!afs_volume_pick_fileserver(&fc, vnode)) goto no_server; - _debug("USING SERVER: %pIS\n", &server->addr.transport); + fc.ac.error = afs_fs_symlink(&fc, key, vnode, name, content, + newfid, newstatus, false); - ret = afs_fs_symlink(server, key, vnode, name, content, - newfid, newstatus, false); - - } while (!afs_volume_release_fileserver(vnode, server, ret)); + } while (afs_iterate_fs_cursor(&fc, vnode)); /* adjust the flags */ - if (ret == 0) { - afs_vnode_finalise_status_update(vnode, server); - *_server = server; + if (fc.ac.error == 0) { + afs_vnode_finalise_status_update(vnode, fc.server); + *_server = fc.server; + fc.server = NULL; } else { - afs_vnode_status_update_failed(vnode, ret); + afs_vnode_status_update_failed(&fc, vnode); *_server = NULL; } - _leave(" = %d [cnt %d]", ret, vnode->update_cnt); - return ret; +out: + return afs_end_fs_cursor(&fc, afs_v2net(vnode)); no_server: spin_lock(&vnode->lock); vnode->update_cnt--; ASSERTCMP(vnode->update_cnt, >=, 0); spin_unlock(&vnode->lock); - _leave(" = %ld [cnt %d]", PTR_ERR(server), vnode->update_cnt); - return PTR_ERR(server); + *_server = NULL; + goto out; } /* @@ -516,8 +481,7 @@ int afs_vnode_rename(struct afs_vnode *orig_dvnode, const char *orig_name, const char *new_name) { - struct afs_server *server; - int ret; + struct afs_fs_cursor fc; _enter("%s{%x:%u.%u},%s{%u,%u,%u},%x,%s,%s", orig_dvnode->volume->vlocation->vldb.name, @@ -543,33 +507,30 @@ int afs_vnode_rename(struct afs_vnode *orig_dvnode, spin_unlock(&new_dvnode->lock); } + afs_init_fs_cursor(&fc, orig_dvnode); do { /* pick a server to query */ - server = afs_volume_pick_fileserver(orig_dvnode); - if (IS_ERR(server)) + if (!afs_volume_pick_fileserver(&fc, orig_dvnode)) goto no_server; - _debug("USING SERVER: %pIS\n", &server->addr.transport); + fc.ac.error = afs_fs_rename(&fc, key, orig_dvnode, orig_name, + new_dvnode, new_name, false); - ret = afs_fs_rename(server, key, orig_dvnode, orig_name, - new_dvnode, new_name, false); - - } while (!afs_volume_release_fileserver(orig_dvnode, server, ret)); + } while (afs_iterate_fs_cursor(&fc, orig_dvnode)); /* adjust the flags */ - if (ret == 0) { - afs_vnode_finalise_status_update(orig_dvnode, server); + if (fc.ac.error == 0) { + afs_vnode_finalise_status_update(orig_dvnode, fc.server); if (new_dvnode != orig_dvnode) - afs_vnode_finalise_status_update(new_dvnode, server); - afs_put_server(afs_v2net(orig_dvnode), server); + afs_vnode_finalise_status_update(new_dvnode, fc.server); } else { - afs_vnode_status_update_failed(orig_dvnode, ret); + afs_vnode_status_update_failed(&fc, orig_dvnode); if (new_dvnode != orig_dvnode) - afs_vnode_status_update_failed(new_dvnode, ret); + afs_vnode_status_update_failed(&fc, new_dvnode); } - _leave(" = %d [cnt %d]", ret, orig_dvnode->update_cnt); - return ret; +out: + return afs_end_fs_cursor(&fc, afs_v2net(orig_dvnode)); no_server: spin_lock(&orig_dvnode->lock); @@ -582,8 +543,7 @@ no_server: ASSERTCMP(new_dvnode->update_cnt, >=, 0); spin_unlock(&new_dvnode->lock); } - _leave(" = %ld [cnt %d]", PTR_ERR(server), orig_dvnode->update_cnt); - return PTR_ERR(server); + goto out; } /* @@ -592,9 +552,8 @@ no_server: int afs_vnode_store_data(struct afs_writeback *wb, pgoff_t first, pgoff_t last, unsigned offset, unsigned to) { - struct afs_server *server; + struct afs_fs_cursor fc; struct afs_vnode *vnode = wb->vnode; - int ret; _enter("%s{%x:%u.%u},%x,%lx,%lx,%x,%x", vnode->volume->vlocation->vldb.name, @@ -609,36 +568,33 @@ int afs_vnode_store_data(struct afs_writeback *wb, pgoff_t first, pgoff_t last, vnode->update_cnt++; spin_unlock(&vnode->lock); + afs_init_fs_cursor(&fc, vnode); do { /* pick a server to query */ - server = afs_volume_pick_fileserver(vnode); - if (IS_ERR(server)) + if (!afs_volume_pick_fileserver(&fc, vnode)) goto no_server; - _debug("USING SERVER: %pIS\n", &server->addr.transport); + fc.ac.error = afs_fs_store_data(&fc, wb, first, last, offset, to, + false); - ret = afs_fs_store_data(server, wb, first, last, offset, to, - false); - - } while (!afs_volume_release_fileserver(vnode, server, ret)); + } while (afs_iterate_fs_cursor(&fc, vnode)); /* adjust the flags */ - if (ret == 0) { - afs_vnode_finalise_status_update(vnode, server); - afs_put_server(afs_v2net(vnode), server); + if (fc.ac.error == 0) { + afs_vnode_finalise_status_update(vnode, fc.server); } else { - afs_vnode_status_update_failed(vnode, ret); + afs_vnode_status_update_failed(&fc, vnode); } - _leave(" = %d", ret); - return ret; +out: + return afs_end_fs_cursor(&fc, afs_v2net(vnode)); no_server: spin_lock(&vnode->lock); vnode->update_cnt--; ASSERTCMP(vnode->update_cnt, >=, 0); spin_unlock(&vnode->lock); - return PTR_ERR(server); + goto out; } /* @@ -647,8 +603,7 @@ no_server: int afs_vnode_setattr(struct afs_vnode *vnode, struct key *key, struct iattr *attr) { - struct afs_server *server; - int ret; + struct afs_fs_cursor fc; _enter("%s{%x:%u.%u},%x", vnode->volume->vlocation->vldb.name, @@ -662,35 +617,32 @@ int afs_vnode_setattr(struct afs_vnode *vnode, struct key *key, vnode->update_cnt++; spin_unlock(&vnode->lock); + afs_init_fs_cursor(&fc, vnode); do { /* pick a server to query */ - server = afs_volume_pick_fileserver(vnode); - if (IS_ERR(server)) + if (!afs_volume_pick_fileserver(&fc, vnode)) goto no_server; - _debug("USING SERVER: %pIS\n", &server->addr.transport); + fc.ac.error = afs_fs_setattr(&fc, key, vnode, attr, false); - ret = afs_fs_setattr(server, key, vnode, attr, false); - - } while (!afs_volume_release_fileserver(vnode, server, ret)); + } while (afs_iterate_fs_cursor(&fc, vnode)); /* adjust the flags */ - if (ret == 0) { - afs_vnode_finalise_status_update(vnode, server); - afs_put_server(afs_v2net(vnode), server); + if (fc.ac.error == 0) { + afs_vnode_finalise_status_update(vnode, fc.server); } else { - afs_vnode_status_update_failed(vnode, ret); + afs_vnode_status_update_failed(&fc, vnode); } - _leave(" = %d", ret); - return ret; +out: + return afs_end_fs_cursor(&fc, afs_v2net(vnode)); no_server: spin_lock(&vnode->lock); vnode->update_cnt--; ASSERTCMP(vnode->update_cnt, >=, 0); spin_unlock(&vnode->lock); - return PTR_ERR(server); + goto out; } /* @@ -699,8 +651,7 @@ no_server: int afs_vnode_get_volume_status(struct afs_vnode *vnode, struct key *key, struct afs_volume_status *vs) { - struct afs_server *server; - int ret; + struct afs_fs_cursor fc; _enter("%s{%x:%u.%u},%x,", vnode->volume->vlocation->vldb.name, @@ -709,27 +660,17 @@ int afs_vnode_get_volume_status(struct afs_vnode *vnode, struct key *key, vnode->fid.unique, key_serial(key)); + afs_init_fs_cursor(&fc, vnode); do { /* pick a server to query */ - server = afs_volume_pick_fileserver(vnode); - if (IS_ERR(server)) - goto no_server; + if (!afs_volume_pick_fileserver(&fc, vnode)) + break; - _debug("USING SERVER: %pIS\n", &server->addr.transport); + fc.ac.error = afs_fs_get_volume_status(&fc, key, vnode, vs, false); - ret = afs_fs_get_volume_status(server, key, vnode, vs, false); + } while (afs_iterate_fs_cursor(&fc, vnode)); - } while (!afs_volume_release_fileserver(vnode, server, ret)); - - /* adjust the flags */ - if (ret == 0) - afs_put_server(afs_v2net(vnode), server); - - _leave(" = %d", ret); - return ret; - -no_server: - return PTR_ERR(server); + return afs_end_fs_cursor(&fc, afs_v2net(vnode)); } /* @@ -738,8 +679,7 @@ no_server: int afs_vnode_set_lock(struct afs_vnode *vnode, struct key *key, afs_lock_type_t type) { - struct afs_server *server; - int ret; + struct afs_fs_cursor fc; _enter("%s{%x:%u.%u},%x,%u", vnode->volume->vlocation->vldb.name, @@ -748,27 +688,17 @@ int afs_vnode_set_lock(struct afs_vnode *vnode, struct key *key, vnode->fid.unique, key_serial(key), type); + afs_init_fs_cursor(&fc, vnode); do { /* pick a server to query */ - server = afs_volume_pick_fileserver(vnode); - if (IS_ERR(server)) - goto no_server; + if (!afs_volume_pick_fileserver(&fc, vnode)) + break; - _debug("USING SERVER: %pIS\n", &server->addr.transport); + fc.ac.error = afs_fs_set_lock(&fc, key, vnode, type, false); - ret = afs_fs_set_lock(server, key, vnode, type, false); + } while (afs_iterate_fs_cursor(&fc, vnode)); - } while (!afs_volume_release_fileserver(vnode, server, ret)); - - /* adjust the flags */ - if (ret == 0) - afs_put_server(afs_v2net(vnode), server); - - _leave(" = %d", ret); - return ret; - -no_server: - return PTR_ERR(server); + return afs_end_fs_cursor(&fc, afs_v2net(vnode)); } /* @@ -776,7 +706,7 @@ no_server: */ int afs_vnode_extend_lock(struct afs_vnode *vnode, struct key *key) { - struct afs_server *server; + struct afs_fs_cursor fc; int ret; _enter("%s{%x:%u.%u},%x", @@ -786,27 +716,13 @@ int afs_vnode_extend_lock(struct afs_vnode *vnode, struct key *key) vnode->fid.unique, key_serial(key)); - do { - /* pick a server to query */ - server = afs_volume_pick_fileserver(vnode); - if (IS_ERR(server)) - goto no_server; + ret = afs_set_fs_cursor(&fc, vnode); + if (ret < 0) + return ret; - _debug("USING SERVER: %pIS\n", &server->addr.transport); + fc.ac.error = afs_fs_extend_lock(&fc, key, vnode, false); - ret = afs_fs_extend_lock(server, key, vnode, false); - - } while (!afs_volume_release_fileserver(vnode, server, ret)); - - /* adjust the flags */ - if (ret == 0) - afs_put_server(afs_v2net(vnode), server); - - _leave(" = %d", ret); - return ret; - -no_server: - return PTR_ERR(server); + return afs_end_fs_cursor(&fc, afs_v2net(vnode)); } /* @@ -814,7 +730,7 @@ no_server: */ int afs_vnode_release_lock(struct afs_vnode *vnode, struct key *key) { - struct afs_server *server; + struct afs_fs_cursor fc; int ret; _enter("%s{%x:%u.%u},%x", @@ -824,25 +740,11 @@ int afs_vnode_release_lock(struct afs_vnode *vnode, struct key *key) vnode->fid.unique, key_serial(key)); - do { - /* pick a server to query */ - server = afs_volume_pick_fileserver(vnode); - if (IS_ERR(server)) - goto no_server; + ret = afs_set_fs_cursor(&fc, vnode); + if (ret < 0) + return ret; - _debug("USING SERVER: %pIS\n", &server->addr.transport); + fc.ac.error = afs_fs_release_lock(&fc, key, vnode, false); - ret = afs_fs_release_lock(server, key, vnode, false); - - } while (!afs_volume_release_fileserver(vnode, server, ret)); - - /* adjust the flags */ - if (ret == 0) - afs_put_server(afs_v2net(vnode), server); - - _leave(" = %d", ret); - return ret; - -no_server: - return PTR_ERR(server); + return afs_end_fs_cursor(&fc, afs_v2net(vnode)); } diff --git a/fs/afs/volume.c b/fs/afs/volume.c index 4f6fd10094c6..d282cd0ff268 100644 --- a/fs/afs/volume.c +++ b/fs/afs/volume.c @@ -209,11 +209,45 @@ void afs_put_volume(struct afs_cell *cell, struct afs_volume *volume) _leave(" [destroyed]"); } +/* + * Initialise a filesystem server cursor for iterating over FS servers. + */ +void afs_init_fs_cursor(struct afs_fs_cursor *fc, struct afs_vnode *vnode) +{ + fc->ac.alist = NULL; + fc->ac.addr = NULL; + fc->ac.start = 0; + fc->ac.index = 0; + fc->ac.error = 0; + fc->server = NULL; +} + +/* + * Set a filesystem server cursor for using a specific FS server. + */ +int afs_set_fs_cursor(struct afs_fs_cursor *fc, struct afs_vnode *vnode) +{ + afs_init_fs_cursor(fc, vnode); + + read_seqlock_excl(&vnode->cb_lock); + if (vnode->cb_interest) { + if (vnode->cb_interest->server->fs_state == 0) + fc->server = afs_get_server(vnode->cb_interest->server); + else + fc->ac.error = vnode->cb_interest->server->fs_state; + } else { + fc->ac.error = -ESTALE; + } + read_sequnlock_excl(&vnode->cb_lock); + + return fc->ac.error; +} + /* * pick a server to use to try accessing this volume * - returns with an elevated usage count on the server chosen */ -struct afs_server *afs_volume_pick_fileserver(struct afs_vnode *vnode) +bool afs_volume_pick_fileserver(struct afs_fs_cursor *fc, struct afs_vnode *vnode) { struct afs_volume *volume = vnode->volume; struct afs_server *server; @@ -223,19 +257,18 @@ struct afs_server *afs_volume_pick_fileserver(struct afs_vnode *vnode) /* stick with the server we're already using if we can */ if (vnode->cb_interest && vnode->cb_interest->server->fs_state == 0) { - afs_get_server(vnode->cb_interest->server); - _leave(" = %p [current]", vnode->cb_interest->server); - return vnode->cb_interest->server; + fc->server = afs_get_server(vnode->cb_interest->server); + goto set_server; } down_read(&volume->server_sem); /* handle the no-server case */ if (volume->nservers == 0) { - ret = volume->rjservers ? -ENOMEDIUM : -ESTALE; + fc->ac.error = volume->rjservers ? -ENOMEDIUM : -ESTALE; up_read(&volume->server_sem); - _leave(" = %d [no servers]", ret); - return ERR_PTR(ret); + _leave(" = f [no servers %d]", fc->ac.error); + return false; } /* basically, just search the list for the first live server and use @@ -280,13 +313,15 @@ struct afs_server *afs_volume_pick_fileserver(struct afs_vnode *vnode) } } +error: + fc->ac.error = ret; + /* no available servers * - TODO: handle the no active servers case better */ -error: up_read(&volume->server_sem); - _leave(" = %d", ret); - return ERR_PTR(ret); + _leave(" = f [%d]", fc->ac.error); + return false; picked_server: /* Found an apparently healthy server. We need to register an interest @@ -296,37 +331,41 @@ picked_server: &volume->cb_interests[loop], server); if (ret < 0) goto error; - - afs_get_server(server); + + fc->server = afs_get_server(server); up_read(&volume->server_sem); - _leave(" = %p (picked %pIS)", - server, &server->addr.transport); - return server; +set_server: + fc->ac.alist = afs_get_addrlist(fc->server->addrs); + fc->ac.addr = &fc->ac.alist->addrs[0]; + _debug("USING SERVER: %pIS\n", &fc->ac.addr->transport); + _leave(" = t (picked %pIS)", &fc->ac.addr->transport); + return true; } /* * release a server after use * - releases the ref on the server struct that was acquired by picking * - records result of using a particular server to access a volume - * - return 0 to try again, 1 if okay or to issue error - * - the caller must release the server struct if result was 0 + * - return true to try again, false if okay or to issue error + * - the caller must release the server struct if result was false */ -int afs_volume_release_fileserver(struct afs_vnode *vnode, - struct afs_server *server, - int result) +bool afs_iterate_fs_cursor(struct afs_fs_cursor *fc, + struct afs_vnode *vnode) { struct afs_volume *volume = vnode->volume; + struct afs_server *server = fc->server; unsigned loop; _enter("%s,%pIS,%d", - volume->vlocation->vldb.name, &server->addr.transport, result); + volume->vlocation->vldb.name, &fc->ac.addr->transport, + fc->ac.error); - switch (result) { + switch (fc->ac.error) { /* success */ case 0: server->fs_state = 0; - _leave(""); - return 1; + _leave(" = f"); + return false; /* the fileserver denied all knowledge of the volume */ case -ENOMEDIUM: @@ -363,8 +402,9 @@ int afs_volume_release_fileserver(struct afs_vnode *vnode, */ up_write(&volume->server_sem); afs_put_server(afs_v2net(vnode), server); - _leave(" [completely rejected]"); - return 1; + fc->server = NULL; + _leave(" = f [completely rejected]"); + return false; /* problem reaching the server */ case -ENETUNREACH: @@ -378,8 +418,8 @@ int afs_volume_release_fileserver(struct afs_vnode *vnode, */ spin_lock(&server->fs_lock); if (!server->fs_state) { - server->fs_state = result; - printk("kAFS: SERVER DEAD state=%d\n", result); + server->fs_state = fc->ac.error; + printk("kAFS: SERVER DEAD state=%d\n", fc->ac.error); } spin_unlock(&server->fs_lock); goto try_next_server; @@ -390,8 +430,9 @@ int afs_volume_release_fileserver(struct afs_vnode *vnode, case -ENONET: /* tell the caller to accept the result */ afs_put_server(afs_v2net(vnode), server); - _leave(" [local failure]"); - return 1; + fc->server = NULL; + _leave(" = f [local failure]"); + return false; } /* tell the caller to loop around and try the next server */ @@ -399,6 +440,16 @@ try_next_server_upw: up_write(&volume->server_sem); try_next_server: afs_put_server(afs_v2net(vnode), server); - _leave(" [try next server]"); - return 0; + _leave(" = t [try next server]"); + return true; +} + +/* + * Clean up a fileserver cursor. + */ +int afs_end_fs_cursor(struct afs_fs_cursor *fc, struct afs_net *net) +{ + afs_end_cursor(&fc->ac); + afs_put_server(net, fc->server); + return fc->ac.error; } From 9cc6fc50f7bc69ac28bee45eed13cbc65a86210f Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 2 Nov 2017 15:27:50 +0000 Subject: [PATCH 21/35] afs: Move server rotation code into its own file Move server rotation code into its own file. Signed-off-by: David Howells --- fs/afs/Makefile | 1 + fs/afs/rotate.c | 254 ++++++++++++++++++++++++++++++++++++++++++++++++ fs/afs/volume.c | 250 ----------------------------------------------- 3 files changed, 255 insertions(+), 250 deletions(-) create mode 100644 fs/afs/rotate.c diff --git a/fs/afs/Makefile b/fs/afs/Makefile index 849383986d3b..192d476d7c76 100644 --- a/fs/afs/Makefile +++ b/fs/afs/Makefile @@ -20,6 +20,7 @@ kafs-objs := \ misc.o \ mntpt.o \ proc.o \ + rotate.o \ rxrpc.o \ security.o \ server.o \ diff --git a/fs/afs/rotate.c b/fs/afs/rotate.c new file mode 100644 index 000000000000..c7975b3ba59a --- /dev/null +++ b/fs/afs/rotate.c @@ -0,0 +1,254 @@ +/* Handle fileserver selection and rotation. + * + * Copyright (C) 2017 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public Licence + * as published by the Free Software Foundation; either version + * 2 of the Licence, or (at your option) any later version. + */ + +#include +#include +#include "internal.h" + +/* + * Initialise a filesystem server cursor for iterating over FS servers. + */ +void afs_init_fs_cursor(struct afs_fs_cursor *fc, struct afs_vnode *vnode) +{ + memset(fc, 0, sizeof(*fc)); +} + +/* + * Set a filesystem server cursor for using a specific FS server. + */ +int afs_set_fs_cursor(struct afs_fs_cursor *fc, struct afs_vnode *vnode) +{ + afs_init_fs_cursor(fc, vnode); + + read_seqlock_excl(&vnode->cb_lock); + if (vnode->cb_interest) { + if (vnode->cb_interest->server->fs_state == 0) + fc->server = afs_get_server(vnode->cb_interest->server); + else + fc->ac.error = vnode->cb_interest->server->fs_state; + } else { + fc->ac.error = -ESTALE; + } + read_sequnlock_excl(&vnode->cb_lock); + + return fc->ac.error; +} + +/* + * pick a server to use to try accessing this volume + * - returns with an elevated usage count on the server chosen + */ +bool afs_volume_pick_fileserver(struct afs_fs_cursor *fc, struct afs_vnode *vnode) +{ + struct afs_volume *volume = vnode->volume; + struct afs_server *server; + int ret, state, loop; + + _enter("%s", volume->vlocation->vldb.name); + + /* stick with the server we're already using if we can */ + if (vnode->cb_interest && vnode->cb_interest->server->fs_state == 0) { + fc->server = afs_get_server(vnode->cb_interest->server); + goto set_server; + } + + down_read(&volume->server_sem); + + /* handle the no-server case */ + if (volume->nservers == 0) { + fc->ac.error = volume->rjservers ? -ENOMEDIUM : -ESTALE; + up_read(&volume->server_sem); + _leave(" = f [no servers %d]", fc->ac.error); + return false; + } + + /* basically, just search the list for the first live server and use + * that */ + ret = 0; + for (loop = 0; loop < volume->nservers; loop++) { + server = volume->servers[loop]; + state = server->fs_state; + + _debug("consider %d [%d]", loop, state); + + switch (state) { + case 0: + goto picked_server; + + case -ENETUNREACH: + if (ret == 0) + ret = state; + break; + + case -EHOSTUNREACH: + if (ret == 0 || + ret == -ENETUNREACH) + ret = state; + break; + + case -ECONNREFUSED: + if (ret == 0 || + ret == -ENETUNREACH || + ret == -EHOSTUNREACH) + ret = state; + break; + + default: + case -EREMOTEIO: + if (ret == 0 || + ret == -ENETUNREACH || + ret == -EHOSTUNREACH || + ret == -ECONNREFUSED) + ret = state; + break; + } + } + +error: + fc->ac.error = ret; + + /* no available servers + * - TODO: handle the no active servers case better + */ + up_read(&volume->server_sem); + _leave(" = f [%d]", fc->ac.error); + return false; + +picked_server: + /* Found an apparently healthy server. We need to register an interest + * in receiving callbacks before we talk to it. + */ + ret = afs_register_server_cb_interest(vnode, + &volume->cb_interests[loop], server); + if (ret < 0) + goto error; + + fc->server = afs_get_server(server); + up_read(&volume->server_sem); +set_server: + fc->ac.alist = afs_get_addrlist(fc->server->addrs); + fc->ac.addr = &fc->ac.alist->addrs[0]; + _debug("USING SERVER: %pIS\n", &fc->ac.addr->transport); + _leave(" = t (picked %pIS)", &fc->ac.addr->transport); + return true; +} + +/* + * release a server after use + * - releases the ref on the server struct that was acquired by picking + * - records result of using a particular server to access a volume + * - return true to try again, false if okay or to issue error + * - the caller must release the server struct if result was false + */ +bool afs_iterate_fs_cursor(struct afs_fs_cursor *fc, + struct afs_vnode *vnode) +{ + struct afs_volume *volume = vnode->volume; + struct afs_server *server = fc->server; + unsigned loop; + + _enter("%s,%pIS,%d", + volume->vlocation->vldb.name, &fc->ac.addr->transport, + fc->ac.error); + + switch (fc->ac.error) { + /* success */ + case 0: + server->fs_state = 0; + _leave(" = f"); + return false; + + /* the fileserver denied all knowledge of the volume */ + case -ENOMEDIUM: + down_write(&volume->server_sem); + + /* firstly, find where the server is in the active list (if it + * is) */ + for (loop = 0; loop < volume->nservers; loop++) + if (volume->servers[loop] == server) + goto present; + + /* no longer there - may have been discarded by another op */ + goto try_next_server_upw; + + present: + volume->nservers--; + memmove(&volume->servers[loop], + &volume->servers[loop + 1], + sizeof(volume->servers[loop]) * + (volume->nservers - loop)); + volume->servers[volume->nservers] = NULL; + afs_put_server(afs_v2net(vnode), server); + volume->rjservers++; + + if (volume->nservers > 0) + /* another server might acknowledge its existence */ + goto try_next_server_upw; + + /* handle the case where all the fileservers have rejected the + * volume + * - TODO: try asking the fileservers for volume information + * - TODO: contact the VL server again to see if the volume is + * no longer registered + */ + up_write(&volume->server_sem); + afs_put_server(afs_v2net(vnode), server); + fc->server = NULL; + _leave(" = f [completely rejected]"); + return false; + + /* problem reaching the server */ + case -ENETUNREACH: + case -EHOSTUNREACH: + case -ECONNREFUSED: + case -ETIME: + case -ETIMEDOUT: + case -EREMOTEIO: + /* mark the server as dead + * TODO: vary dead timeout depending on error + */ + spin_lock(&server->fs_lock); + if (!server->fs_state) { + server->fs_state = fc->ac.error; + printk("kAFS: SERVER DEAD state=%d\n", fc->ac.error); + } + spin_unlock(&server->fs_lock); + goto try_next_server; + + /* miscellaneous error */ + default: + case -ENOMEM: + case -ENONET: + /* tell the caller to accept the result */ + afs_put_server(afs_v2net(vnode), server); + fc->server = NULL; + _leave(" = f [local failure]"); + return false; + } + + /* tell the caller to loop around and try the next server */ +try_next_server_upw: + up_write(&volume->server_sem); +try_next_server: + afs_put_server(afs_v2net(vnode), server); + _leave(" = t [try next server]"); + return true; +} + +/* + * Clean up a fileserver cursor. + */ +int afs_end_fs_cursor(struct afs_fs_cursor *fc, struct afs_net *net) +{ + afs_end_cursor(&fc->ac); + afs_put_server(net, fc->server); + return fc->ac.error; +} diff --git a/fs/afs/volume.c b/fs/afs/volume.c index d282cd0ff268..3c5ad1cc50f3 100644 --- a/fs/afs/volume.c +++ b/fs/afs/volume.c @@ -10,12 +10,7 @@ */ #include -#include -#include #include -#include -#include -#include #include "internal.h" static const char *afs_voltypes[] = { "R/W", "R/O", "BAK" }; @@ -208,248 +203,3 @@ void afs_put_volume(struct afs_cell *cell, struct afs_volume *volume) _leave(" [destroyed]"); } - -/* - * Initialise a filesystem server cursor for iterating over FS servers. - */ -void afs_init_fs_cursor(struct afs_fs_cursor *fc, struct afs_vnode *vnode) -{ - fc->ac.alist = NULL; - fc->ac.addr = NULL; - fc->ac.start = 0; - fc->ac.index = 0; - fc->ac.error = 0; - fc->server = NULL; -} - -/* - * Set a filesystem server cursor for using a specific FS server. - */ -int afs_set_fs_cursor(struct afs_fs_cursor *fc, struct afs_vnode *vnode) -{ - afs_init_fs_cursor(fc, vnode); - - read_seqlock_excl(&vnode->cb_lock); - if (vnode->cb_interest) { - if (vnode->cb_interest->server->fs_state == 0) - fc->server = afs_get_server(vnode->cb_interest->server); - else - fc->ac.error = vnode->cb_interest->server->fs_state; - } else { - fc->ac.error = -ESTALE; - } - read_sequnlock_excl(&vnode->cb_lock); - - return fc->ac.error; -} - -/* - * pick a server to use to try accessing this volume - * - returns with an elevated usage count on the server chosen - */ -bool afs_volume_pick_fileserver(struct afs_fs_cursor *fc, struct afs_vnode *vnode) -{ - struct afs_volume *volume = vnode->volume; - struct afs_server *server; - int ret, state, loop; - - _enter("%s", volume->vlocation->vldb.name); - - /* stick with the server we're already using if we can */ - if (vnode->cb_interest && vnode->cb_interest->server->fs_state == 0) { - fc->server = afs_get_server(vnode->cb_interest->server); - goto set_server; - } - - down_read(&volume->server_sem); - - /* handle the no-server case */ - if (volume->nservers == 0) { - fc->ac.error = volume->rjservers ? -ENOMEDIUM : -ESTALE; - up_read(&volume->server_sem); - _leave(" = f [no servers %d]", fc->ac.error); - return false; - } - - /* basically, just search the list for the first live server and use - * that */ - ret = 0; - for (loop = 0; loop < volume->nservers; loop++) { - server = volume->servers[loop]; - state = server->fs_state; - - _debug("consider %d [%d]", loop, state); - - switch (state) { - case 0: - goto picked_server; - - case -ENETUNREACH: - if (ret == 0) - ret = state; - break; - - case -EHOSTUNREACH: - if (ret == 0 || - ret == -ENETUNREACH) - ret = state; - break; - - case -ECONNREFUSED: - if (ret == 0 || - ret == -ENETUNREACH || - ret == -EHOSTUNREACH) - ret = state; - break; - - default: - case -EREMOTEIO: - if (ret == 0 || - ret == -ENETUNREACH || - ret == -EHOSTUNREACH || - ret == -ECONNREFUSED) - ret = state; - break; - } - } - -error: - fc->ac.error = ret; - - /* no available servers - * - TODO: handle the no active servers case better - */ - up_read(&volume->server_sem); - _leave(" = f [%d]", fc->ac.error); - return false; - -picked_server: - /* Found an apparently healthy server. We need to register an interest - * in receiving callbacks before we talk to it. - */ - ret = afs_register_server_cb_interest(vnode, - &volume->cb_interests[loop], server); - if (ret < 0) - goto error; - - fc->server = afs_get_server(server); - up_read(&volume->server_sem); -set_server: - fc->ac.alist = afs_get_addrlist(fc->server->addrs); - fc->ac.addr = &fc->ac.alist->addrs[0]; - _debug("USING SERVER: %pIS\n", &fc->ac.addr->transport); - _leave(" = t (picked %pIS)", &fc->ac.addr->transport); - return true; -} - -/* - * release a server after use - * - releases the ref on the server struct that was acquired by picking - * - records result of using a particular server to access a volume - * - return true to try again, false if okay or to issue error - * - the caller must release the server struct if result was false - */ -bool afs_iterate_fs_cursor(struct afs_fs_cursor *fc, - struct afs_vnode *vnode) -{ - struct afs_volume *volume = vnode->volume; - struct afs_server *server = fc->server; - unsigned loop; - - _enter("%s,%pIS,%d", - volume->vlocation->vldb.name, &fc->ac.addr->transport, - fc->ac.error); - - switch (fc->ac.error) { - /* success */ - case 0: - server->fs_state = 0; - _leave(" = f"); - return false; - - /* the fileserver denied all knowledge of the volume */ - case -ENOMEDIUM: - down_write(&volume->server_sem); - - /* firstly, find where the server is in the active list (if it - * is) */ - for (loop = 0; loop < volume->nservers; loop++) - if (volume->servers[loop] == server) - goto present; - - /* no longer there - may have been discarded by another op */ - goto try_next_server_upw; - - present: - volume->nservers--; - memmove(&volume->servers[loop], - &volume->servers[loop + 1], - sizeof(volume->servers[loop]) * - (volume->nservers - loop)); - volume->servers[volume->nservers] = NULL; - afs_put_server(afs_v2net(vnode), server); - volume->rjservers++; - - if (volume->nservers > 0) - /* another server might acknowledge its existence */ - goto try_next_server_upw; - - /* handle the case where all the fileservers have rejected the - * volume - * - TODO: try asking the fileservers for volume information - * - TODO: contact the VL server again to see if the volume is - * no longer registered - */ - up_write(&volume->server_sem); - afs_put_server(afs_v2net(vnode), server); - fc->server = NULL; - _leave(" = f [completely rejected]"); - return false; - - /* problem reaching the server */ - case -ENETUNREACH: - case -EHOSTUNREACH: - case -ECONNREFUSED: - case -ETIME: - case -ETIMEDOUT: - case -EREMOTEIO: - /* mark the server as dead - * TODO: vary dead timeout depending on error - */ - spin_lock(&server->fs_lock); - if (!server->fs_state) { - server->fs_state = fc->ac.error; - printk("kAFS: SERVER DEAD state=%d\n", fc->ac.error); - } - spin_unlock(&server->fs_lock); - goto try_next_server; - - /* miscellaneous error */ - default: - case -ENOMEM: - case -ENONET: - /* tell the caller to accept the result */ - afs_put_server(afs_v2net(vnode), server); - fc->server = NULL; - _leave(" = f [local failure]"); - return false; - } - - /* tell the caller to loop around and try the next server */ -try_next_server_upw: - up_write(&volume->server_sem); -try_next_server: - afs_put_server(afs_v2net(vnode), server); - _leave(" = t [try next server]"); - return true; -} - -/* - * Clean up a fileserver cursor. - */ -int afs_end_fs_cursor(struct afs_fs_cursor *fc, struct afs_net *net) -{ - afs_end_cursor(&fc->ac); - afs_put_server(net, fc->server); - return fc->ac.error; -} From d2ddc776a4581d900fc3bdc7803b403daae64d88 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 2 Nov 2017 15:27:50 +0000 Subject: [PATCH 22/35] afs: Overhaul volume and server record caching and fileserver rotation The current code assumes that volumes and servers are per-cell and are never shared, but this is not enforced, and, indeed, public cells do exist that are aliases of each other. Further, an organisation can, say, set up a public cell and a private cell with overlapping, but not identical, sets of servers. The difference is purely in the database attached to the VL servers. The current code will malfunction if it sees a server in two cells as it assumes global address -> server record mappings and that each server is in just one cell. Further, each server may have multiple addresses - and may have addresses of different families (IPv4 and IPv6, say). To this end, the following structural changes are made: (1) Server record management is overhauled: (a) Server records are made independent of cell. The namespace keeps track of them, volume records have lists of them and each vnode has a server on which its callback interest currently resides. (b) The cell record no longer keeps a list of servers known to be in that cell. (c) The server records are now kept in a flat list because there's no single address to sort on. (d) Server records are now keyed by their UUID within the namespace. (e) The addresses for a server are obtained with the VL.GetAddrsU rather than with VL.GetEntryByName, using the server's UUID as a parameter. (f) Cached server records are garbage collected after a period of non-use and are counted out of existence before purging is allowed to complete. This protects the work functions against rmmod. (g) The servers list is now in /proc/fs/afs/servers. (2) Volume record management is overhauled: (a) An RCU-replaceable server list is introduced. This tracks both servers and their coresponding callback interests. (b) The superblock is now keyed on cell record and numeric volume ID. (c) The volume record is now tied to the superblock which mounts it, and is activated when mounted and deactivated when unmounted. This makes it easier to handle the cache cookie without causing a double-use in fscache. (d) The volume record is loaded from the VLDB using VL.GetEntryByNameU to get the server UUID list. (e) The volume name is updated if it is seen to have changed when the volume is updated (the update is keyed on the volume ID). (3) The vlocation record is got rid of and VLDB records are no longer cached. Sufficient information is stored in the volume record, though an update to a volume record is now no longer shared between related volumes (volumes come in bundles of three: R/W, R/O and backup). and the following procedural changes are made: (1) The fileserver cursor introduced previously is now fleshed out and used to iterate over fileservers and their addresses. (2) Volume status is checked during iteration, and the server list is replaced if a change is detected. (3) Server status is checked during iteration, and the address list is replaced if a change is detected. (4) The abort code is saved into the address list cursor and -ECONNABORTED returned in afs_make_call() if a remote abort happened rather than translating the abort into an error message. This allows actions to be taken depending on the abort code more easily. (a) If a VMOVED abort is seen then this is handled by rechecking the volume and restarting the iteration. (b) If a VBUSY, VRESTARTING or VSALVAGING abort is seen then this is handled by sleeping for a short period and retrying and/or trying other servers that might serve that volume. A message is also displayed once until the condition has cleared. (c) If a VOFFLINE abort is seen, then this is handled as VBUSY for the moment. (d) If a VNOVOL abort is seen, the volume is rechecked in the VLDB to see if it has been deleted; if not, the fileserver is probably indicating that the volume couldn't be attached and needs salvaging. (e) If statfs() sees one of these aborts, it does not sleep, but rather returns an error, so as not to block the umount program. (5) The fileserver iteration functions in vnode.c are now merged into their callers and more heavily macroised around the cursor. vnode.c is removed. (6) Operations on a particular vnode are serialised on that vnode because the server will lock that vnode whilst it operates on it, so a second op sent will just have to wait. (7) Fileservers are probed with FS.GetCapabilities before being used. This is where service upgrade will be done. (8) A callback interest on a fileserver is set up before an FS operation is performed and passed through to afs_make_call() so that it can be set on the vnode if the operation returns a callback. The callback interest is passed through to afs_iget() also so that it can be set there too. In general, record updating is done on an as-needed basis when we try to access servers, volumes or vnodes rather than offloading it to work items and special threads. Notes: (1) Pre AFS-3.4 servers are no longer supported, though this can be added back if necessary (AFS-3.4 was released in 1998). (2) VBUSY is retried forever for the moment at intervals of 1s. (3) /proc/fs/afs//servers no longer exists. Signed-off-by: David Howells --- fs/afs/Makefile | 3 +- fs/afs/addr_list.c | 31 ++ fs/afs/afs_fs.h | 1 + fs/afs/afs_vl.h | 42 +++ fs/afs/callback.c | 20 +- fs/afs/cell.c | 7 +- fs/afs/dir.c | 432 +++++++++++++----------- fs/afs/file.c | 37 ++- fs/afs/flock.c | 108 +++++- fs/afs/fsclient.c | 347 ++++++++++++-------- fs/afs/inode.c | 50 ++- fs/afs/internal.h | 457 +++++++++++++------------- fs/afs/main.c | 35 +- fs/afs/proc.c | 151 ++++----- fs/afs/rotate.c | 461 ++++++++++++++++++++++++++ fs/afs/rxrpc.c | 45 +-- fs/afs/security.c | 2 +- fs/afs/server.c | 762 ++++++++++++++++++++++++++++++------------- fs/afs/server_list.c | 153 +++++++++ fs/afs/super.c | 92 ++++-- fs/afs/vlclient.c | 351 +++++++++++++------- fs/afs/vlocation.c | 669 ------------------------------------- fs/afs/vnode.c | 750 ------------------------------------------ fs/afs/volume.c | 450 +++++++++++++++++-------- fs/afs/write.c | 38 ++- fs/afs/xattr.c | 2 +- 26 files changed, 2858 insertions(+), 2638 deletions(-) create mode 100644 fs/afs/server_list.c delete mode 100644 fs/afs/vlocation.c delete mode 100644 fs/afs/vnode.c diff --git a/fs/afs/Makefile b/fs/afs/Makefile index 192d476d7c76..45b7fc405fa6 100644 --- a/fs/afs/Makefile +++ b/fs/afs/Makefile @@ -24,11 +24,10 @@ kafs-objs := \ rxrpc.o \ security.o \ server.o \ + server_list.o \ super.o \ netdevices.o \ vlclient.o \ - vlocation.o \ - vnode.o \ volume.o \ write.o \ xattr.o diff --git a/fs/afs/addr_list.c b/fs/afs/addr_list.c index ecb9c72aebd2..b91e59a77f0e 100644 --- a/fs/afs/addr_list.c +++ b/fs/afs/addr_list.c @@ -227,6 +227,37 @@ struct afs_addr_list *afs_dns_query(struct afs_cell *cell, time64_t *_expiry) return alist; } +/* + * Merge an IPv4 entry into a fileserver address list. + */ +void afs_merge_fs_addr4(struct afs_addr_list *alist, __be32 xdr) +{ + struct sockaddr_in6 *a; + int i; + + for (i = 0; i < alist->nr_ipv4; i++) { + a = &alist->addrs[i].transport.sin6; + if (xdr == a->sin6_addr.s6_addr32[3]) + return; + if (xdr < a->sin6_addr.s6_addr32[3]) + break; + } + + if (i < alist->nr_addrs) + memmove(alist->addrs + i + 1, + alist->addrs + i, + sizeof(alist->addrs[0]) * (alist->nr_addrs - i)); + + a = &alist->addrs[i].transport.sin6; + a->sin6_port = htons(AFS_FS_PORT); + a->sin6_addr.s6_addr32[0] = 0; + a->sin6_addr.s6_addr32[1] = 0; + a->sin6_addr.s6_addr32[2] = htonl(0xffff); + a->sin6_addr.s6_addr32[3] = xdr; + alist->nr_ipv4++; + alist->nr_addrs++; +} + /* * Get an address to try. */ diff --git a/fs/afs/afs_fs.h b/fs/afs/afs_fs.h index 05395d0f1941..d47b6d01e4c0 100644 --- a/fs/afs/afs_fs.h +++ b/fs/afs/afs_fs.h @@ -38,6 +38,7 @@ enum AFS_FS_Operations { FSFETCHDATA64 = 65537, /* AFS Fetch file data */ FSSTOREDATA64 = 65538, /* AFS Store file data */ FSGIVEUPALLCALLBACKS = 65539, /* AFS Give up all outstanding callbacks on a server */ + FSGETCAPABILITIES = 65540, /* Probe and get the capabilities of a fileserver */ }; enum AFS_FS_Errors { diff --git a/fs/afs/afs_vl.h b/fs/afs/afs_vl.h index 4eaa620992c8..6350b417aee9 100644 --- a/fs/afs/afs_vl.h +++ b/fs/afs/afs_vl.h @@ -88,4 +88,46 @@ struct afs_vldbentry { #define AFS_VLDB_MAXNAMELEN 65 + +struct afs_ListAddrByAttributes__xdr { + __be32 Mask; +#define AFS_VLADDR_IPADDR 0x1 /* Match by ->ipaddr */ +#define AFS_VLADDR_INDEX 0x2 /* Match by ->index */ +#define AFS_VLADDR_UUID 0x4 /* Match by ->uuid */ + __be32 ipaddr; + __be32 index; + __be32 spare; + struct afs_uuid__xdr uuid; +}; + +struct afs_uvldbentry__xdr { + __be32 name[AFS_VLDB_MAXNAMELEN]; + __be32 nServers; + struct afs_uuid__xdr serverNumber[AFS_NMAXNSERVERS]; + __be32 serverUnique[AFS_NMAXNSERVERS]; + __be32 serverPartition[AFS_NMAXNSERVERS]; + __be32 serverFlags[AFS_NMAXNSERVERS]; + __be32 volumeId[AFS_MAXTYPES]; + __be32 cloneId; + __be32 flags; + __be32 spares1; + __be32 spares2; + __be32 spares3; + __be32 spares4; + __be32 spares5; + __be32 spares6; + __be32 spares7; + __be32 spares8; + __be32 spares9; +}; + +struct afs_address_list { + refcount_t usage; + unsigned int version; + unsigned int nr_addrs; + struct sockaddr_rxrpc addrs[]; +}; + +extern void afs_put_address_list(struct afs_address_list *alist); + #endif /* AFS_VL_H */ diff --git a/fs/afs/callback.c b/fs/afs/callback.c index 82f4c7a3b7b6..f4291b576054 100644 --- a/fs/afs/callback.c +++ b/fs/afs/callback.c @@ -26,10 +26,10 @@ * - Called with volume->server_sem held. */ int afs_register_server_cb_interest(struct afs_vnode *vnode, - struct afs_cb_interest **ppcbi, - struct afs_server *server) + struct afs_server_entry *entry) { - struct afs_cb_interest *cbi = *ppcbi, *vcbi, *new, *x; + struct afs_cb_interest *cbi = entry->cb_interest, *vcbi, *new, *x; + struct afs_server *server = entry->server; again: vcbi = vnode->cb_interest; @@ -47,7 +47,7 @@ again: if (!cbi && vcbi->server == server) { afs_get_cb_interest(vcbi); - x = cmpxchg(ppcbi, cbi, vcbi); + x = cmpxchg(&entry->cb_interest, cbi, vcbi); if (x != cbi) { cbi = x; afs_put_cb_interest(afs_v2net(vnode), vcbi); @@ -72,7 +72,7 @@ again: list_add_tail(&new->cb_link, &server->cb_interests); write_unlock(&server->cb_break_lock); - x = cmpxchg(ppcbi, cbi, new); + x = cmpxchg(&entry->cb_interest, cbi, new); if (x == cbi) { cbi = new; } else { @@ -137,7 +137,7 @@ void afs_put_cb_interest(struct afs_net *net, struct afs_cb_interest *cbi) */ void afs_init_callback_state(struct afs_server *server) { - if (!test_and_clear_bit(AFS_SERVER_NEW, &server->flags)) + if (!test_and_clear_bit(AFS_SERVER_FL_NEW, &server->flags)) server->cb_s_break++; } @@ -233,12 +233,12 @@ void afs_break_callbacks(struct afs_server *server, size_t count, /* * Clear the callback interests in a server list. */ -void afs_clear_callback_interests(struct afs_net *net, struct afs_volume *volume) +void afs_clear_callback_interests(struct afs_net *net, struct afs_server_list *slist) { int i; - for (i = 0; i < ARRAY_SIZE(volume->cb_interests); i++) { - afs_put_cb_interest(net, volume->cb_interests[i]); - volume->cb_interests[i] = NULL; + for (i = 0; i < slist->nr_servers; i++) { + afs_put_cb_interest(net, slist->servers[i].cb_interest); + slist->servers[i].cb_interest = NULL; } } diff --git a/fs/afs/cell.c b/fs/afs/cell.c index a0e08d3a108c..1858c91169e4 100644 --- a/fs/afs/cell.c +++ b/fs/afs/cell.c @@ -146,13 +146,10 @@ static struct afs_cell *afs_alloc_cell(struct afs_net *net, atomic_set(&cell->usage, 2); INIT_WORK(&cell->manager, afs_manage_cell); - rwlock_init(&cell->servers_lock); - INIT_LIST_HEAD(&cell->servers); - init_rwsem(&cell->vl_sem); - INIT_LIST_HEAD(&cell->vl_list); - spin_lock_init(&cell->vl_lock); cell->flags = ((1 << AFS_CELL_FL_NOT_READY) | (1 << AFS_CELL_FL_NO_LOOKUP_YET)); + INIT_LIST_HEAD(&cell->proc_volumes); + rwlock_init(&cell->proc_lock); rwlock_init(&cell->vl_addrs_lock); /* Fill in the VL server list if we were given a list of addresses to diff --git a/fs/afs/dir.c b/fs/afs/dir.c index 37083699a0df..53f3917440e7 100644 --- a/fs/afs/dir.c +++ b/fs/afs/dir.c @@ -553,7 +553,7 @@ static struct dentry *afs_lookup(struct inode *dir, struct dentry *dentry, dentry->d_fsdata = (void *)(unsigned long) vnode->status.data_version; /* instantiate the dentry */ - inode = afs_iget(dir->i_sb, key, &fid, NULL, NULL); + inode = afs_iget(dir->i_sb, key, &fid, NULL, NULL, NULL); key_put(key); if (IS_ERR(inode)) { _leave(" = %ld", PTR_ERR(inode)); @@ -740,21 +740,49 @@ static void afs_d_release(struct dentry *dentry) _enter("%pd", dentry); } +/* + * Create a new inode for create/mkdir/symlink + */ +static void afs_vnode_new_inode(struct afs_fs_cursor *fc, + struct dentry *new_dentry, + struct afs_fid *newfid, + struct afs_file_status *newstatus, + struct afs_callback *newcb) +{ + struct inode *inode; + + if (fc->ac.error < 0) + return; + + inode = afs_iget(fc->vnode->vfs_inode.i_sb, fc->key, + newfid, newstatus, newcb, fc->cbi); + if (IS_ERR(inode)) { + /* ENOMEM or EINTR at a really inconvenient time - just abandon + * the new directory on the server. + */ + fc->ac.error = PTR_ERR(inode); + return; + } + + d_instantiate(new_dentry, inode); + if (d_unhashed(new_dentry)) + d_rehash(new_dentry); +} + /* * create a directory on an AFS filesystem */ static int afs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) { - struct afs_file_status status; - struct afs_callback cb; - struct afs_server *server; - struct afs_vnode *dvnode, *vnode; - struct afs_fid fid; - struct inode *inode; + struct afs_file_status newstatus; + struct afs_fs_cursor fc; + struct afs_callback newcb; + struct afs_vnode *dvnode = AFS_FS_I(dir); + struct afs_fid newfid; struct key *key; int ret; - dvnode = AFS_FS_I(dir); + mode |= S_IFDIR; _enter("{%x:%u},{%pd},%ho", dvnode->fid.vid, dvnode->fid.vnode, dentry, mode); @@ -765,40 +793,27 @@ static int afs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) goto error; } - mode |= S_IFDIR; - ret = afs_vnode_create(dvnode, key, dentry->d_name.name, - mode, &fid, &status, &cb, &server); - if (ret < 0) - goto mkdir_error; + ret = -ERESTARTSYS; + if (afs_begin_vnode_operation(&fc, dvnode, key)) { + while (afs_select_fileserver(&fc)) { + fc.cb_break = dvnode->cb_break + dvnode->cb_s_break; + afs_fs_create(&fc, dentry->d_name.name, mode, + &newfid, &newstatus, &newcb); + } - inode = afs_iget(dir->i_sb, key, &fid, &status, &cb); - if (IS_ERR(inode)) { - /* ENOMEM at a really inconvenient time - just abandon the new - * directory on the server */ - ret = PTR_ERR(inode); - goto iget_error; + afs_check_for_remote_deletion(&fc, fc.vnode); + afs_vnode_commit_status(&fc, dvnode, fc.cb_break); + afs_vnode_new_inode(&fc, dentry, &newfid, &newstatus, &newcb); + ret = afs_end_vnode_operation(&fc); + if (ret < 0) + goto error_key; } - /* apply the status report we've got for the new vnode */ - vnode = AFS_FS_I(inode); - spin_lock(&vnode->lock); - vnode->update_cnt++; - spin_unlock(&vnode->lock); - afs_vnode_finalise_status_update(vnode, server); - afs_put_server(afs_i2net(dir), server); - - d_instantiate(dentry, inode); - if (d_unhashed(dentry)) { - _debug("not hashed"); - d_rehash(dentry); - } key_put(key); _leave(" = 0"); return 0; -iget_error: - afs_put_server(afs_i2net(dir), server); -mkdir_error: +error_key: key_put(key); error: d_drop(dentry); @@ -807,107 +822,130 @@ error: } /* - * remove a directory from an AFS filesystem + * Remove a subdir from a directory. */ -static int afs_rmdir(struct inode *dir, struct dentry *dentry) +static void afs_dir_remove_subdir(struct dentry *dentry) { - struct afs_vnode *dvnode, *vnode; - struct key *key; - int ret; - - dvnode = AFS_FS_I(dir); - - _enter("{%x:%u},{%pd}", - dvnode->fid.vid, dvnode->fid.vnode, dentry); - - key = afs_request_key(dvnode->volume->cell); - if (IS_ERR(key)) { - ret = PTR_ERR(key); - goto error; - } - - ret = afs_vnode_remove(dvnode, key, dentry->d_name.name, true); - if (ret < 0) - goto rmdir_error; - if (d_really_is_positive(dentry)) { - vnode = AFS_FS_I(d_inode(dentry)); + struct afs_vnode *vnode = AFS_FS_I(d_inode(dentry)); + clear_nlink(&vnode->vfs_inode); set_bit(AFS_VNODE_DELETED, &vnode->flags); clear_bit(AFS_VNODE_CB_PROMISED, &vnode->flags); } - - key_put(key); - _leave(" = 0"); - return 0; - -rmdir_error: - key_put(key); -error: - _leave(" = %d", ret); - return ret; } /* - * remove a file from an AFS filesystem + * remove a directory from an AFS filesystem */ -static int afs_unlink(struct inode *dir, struct dentry *dentry) +static int afs_rmdir(struct inode *dir, struct dentry *dentry) { - struct afs_vnode *dvnode, *vnode; + struct afs_fs_cursor fc; + struct afs_vnode *dvnode = AFS_FS_I(dir); struct key *key; int ret; - dvnode = AFS_FS_I(dir); - _enter("{%x:%u},{%pd}", dvnode->fid.vid, dvnode->fid.vnode, dentry); - ret = -ENAMETOOLONG; - if (dentry->d_name.len >= AFSNAMEMAX) - goto error; - key = afs_request_key(dvnode->volume->cell); if (IS_ERR(key)) { ret = PTR_ERR(key); goto error; } - if (d_really_is_positive(dentry)) { - vnode = AFS_FS_I(d_inode(dentry)); + ret = -ERESTARTSYS; + if (afs_begin_vnode_operation(&fc, dvnode, key)) { + while (afs_select_fileserver(&fc)) { + fc.cb_break = dvnode->cb_break + dvnode->cb_s_break; + afs_fs_remove(&fc, dentry->d_name.name, true); + } - /* make sure we have a callback promise on the victim */ - ret = afs_validate(vnode, key); - if (ret < 0) - goto error; - } - - ret = afs_vnode_remove(dvnode, key, dentry->d_name.name, false); - if (ret < 0) - goto remove_error; - - if (d_really_is_positive(dentry)) { - /* if the file wasn't deleted due to excess hard links, the - * fileserver will break the callback promise on the file - if - * it had one - before it returns to us, and if it was deleted, - * it won't - * - * however, if we didn't have a callback promise outstanding, - * or it was outstanding on a different server, then it won't - * break it either... - */ - vnode = AFS_FS_I(d_inode(dentry)); - if (test_bit(AFS_VNODE_DELETED, &vnode->flags)) - _debug("AFS_VNODE_DELETED"); - clear_bit(AFS_VNODE_CB_PROMISED, &vnode->flags); - ret = afs_validate(vnode, key); - _debug("nlink %d [val %d]", vnode->vfs_inode.i_nlink, ret); + afs_vnode_commit_status(&fc, dvnode, fc.cb_break); + ret = afs_end_vnode_operation(&fc); + if (ret == 0) + afs_dir_remove_subdir(dentry); } key_put(key); - _leave(" = 0"); - return 0; +error: + return ret; +} -remove_error: +/* + * Remove a link to a file or symlink from a directory. + * + * If the file was not deleted due to excess hard links, the fileserver will + * break the callback promise on the file - if it had one - before it returns + * to us, and if it was deleted, it won't + * + * However, if we didn't have a callback promise outstanding, or it was + * outstanding on a different server, then it won't break it either... + */ +static int afs_dir_remove_link(struct dentry *dentry, struct key *key) +{ + int ret = 0; + + if (d_really_is_positive(dentry)) { + struct afs_vnode *vnode = AFS_FS_I(d_inode(dentry)); + + if (test_bit(AFS_VNODE_DELETED, &vnode->flags)) + kdebug("AFS_VNODE_DELETED"); + clear_bit(AFS_VNODE_CB_PROMISED, &vnode->flags); + + ret = afs_validate(vnode, key); + if (ret == -ESTALE) + ret = 0; + _debug("nlink %d [val %d]", vnode->vfs_inode.i_nlink, ret); + } + + return ret; +} + +/* + * Remove a file or symlink from an AFS filesystem. + */ +static int afs_unlink(struct inode *dir, struct dentry *dentry) +{ + struct afs_fs_cursor fc; + struct afs_vnode *dvnode = AFS_FS_I(dir), *vnode; + struct key *key; + int ret; + + _enter("{%x:%u},{%pd}", + dvnode->fid.vid, dvnode->fid.vnode, dentry); + + if (dentry->d_name.len >= AFSNAMEMAX) + return -ENAMETOOLONG; + + key = afs_request_key(dvnode->volume->cell); + if (IS_ERR(key)) { + ret = PTR_ERR(key); + goto error; + } + + /* Try to make sure we have a callback promise on the victim. */ + if (d_really_is_positive(dentry)) { + vnode = AFS_FS_I(d_inode(dentry)); + ret = afs_validate(vnode, key); + if (ret < 0) + goto error_key; + } + + ret = -ERESTARTSYS; + if (afs_begin_vnode_operation(&fc, dvnode, key)) { + while (afs_select_fileserver(&fc)) { + fc.cb_break = dvnode->cb_break + dvnode->cb_s_break; + afs_fs_remove(&fc, dentry->d_name.name, false); + } + + afs_vnode_commit_status(&fc, dvnode, fc.cb_break); + ret = afs_end_vnode_operation(&fc); + if (ret == 0) + ret = afs_dir_remove_link(dentry, key); + } + +error_key: key_put(key); error: _leave(" = %d", ret); @@ -920,60 +958,50 @@ error: static int afs_create(struct inode *dir, struct dentry *dentry, umode_t mode, bool excl) { - struct afs_file_status status; - struct afs_callback cb; - struct afs_server *server; - struct afs_vnode *dvnode, *vnode; - struct afs_fid fid; - struct inode *inode; + struct afs_fs_cursor fc; + struct afs_file_status newstatus; + struct afs_callback newcb; + struct afs_vnode *dvnode = dvnode = AFS_FS_I(dir); + struct afs_fid newfid; struct key *key; int ret; - dvnode = AFS_FS_I(dir); + mode |= S_IFREG; _enter("{%x:%u},{%pd},%ho,", dvnode->fid.vid, dvnode->fid.vnode, dentry, mode); + ret = -ENAMETOOLONG; + if (dentry->d_name.len >= AFSNAMEMAX) + goto error; + key = afs_request_key(dvnode->volume->cell); if (IS_ERR(key)) { ret = PTR_ERR(key); goto error; } - mode |= S_IFREG; - ret = afs_vnode_create(dvnode, key, dentry->d_name.name, - mode, &fid, &status, &cb, &server); - if (ret < 0) - goto create_error; + ret = -ERESTARTSYS; + if (afs_begin_vnode_operation(&fc, dvnode, key)) { + while (afs_select_fileserver(&fc)) { + fc.cb_break = dvnode->cb_break + dvnode->cb_s_break; + afs_fs_create(&fc, dentry->d_name.name, mode, + &newfid, &newstatus, &newcb); + } - inode = afs_iget(dir->i_sb, key, &fid, &status, &cb); - if (IS_ERR(inode)) { - /* ENOMEM at a really inconvenient time - just abandon the new - * directory on the server */ - ret = PTR_ERR(inode); - goto iget_error; + afs_check_for_remote_deletion(&fc, fc.vnode); + afs_vnode_commit_status(&fc, dvnode, fc.cb_break); + afs_vnode_new_inode(&fc, dentry, &newfid, &newstatus, &newcb); + ret = afs_end_vnode_operation(&fc); + if (ret < 0) + goto error_key; } - /* apply the status report we've got for the new vnode */ - vnode = AFS_FS_I(inode); - spin_lock(&vnode->lock); - vnode->update_cnt++; - spin_unlock(&vnode->lock); - afs_vnode_finalise_status_update(vnode, server); - afs_put_server(afs_i2net(dir), server); - - d_instantiate(dentry, inode); - if (d_unhashed(dentry)) { - _debug("not hashed"); - d_rehash(dentry); - } key_put(key); _leave(" = 0"); return 0; -iget_error: - afs_put_server(afs_i2net(dir), server); -create_error: +error_key: key_put(key); error: d_drop(dentry); @@ -987,6 +1015,7 @@ error: static int afs_link(struct dentry *from, struct inode *dir, struct dentry *dentry) { + struct afs_fs_cursor fc; struct afs_vnode *dvnode, *vnode; struct key *key; int ret; @@ -999,23 +1028,45 @@ static int afs_link(struct dentry *from, struct inode *dir, dvnode->fid.vid, dvnode->fid.vnode, dentry); + ret = -ENAMETOOLONG; + if (dentry->d_name.len >= AFSNAMEMAX) + goto error; + key = afs_request_key(dvnode->volume->cell); if (IS_ERR(key)) { ret = PTR_ERR(key); goto error; } - ret = afs_vnode_link(dvnode, vnode, key, dentry->d_name.name); - if (ret < 0) - goto link_error; + ret = -ERESTARTSYS; + if (afs_begin_vnode_operation(&fc, dvnode, key)) { + if (mutex_lock_interruptible_nested(&vnode->io_lock, 1) < 0) { + afs_end_vnode_operation(&fc); + return -ERESTARTSYS; + } + + while (afs_select_fileserver(&fc)) { + fc.cb_break = dvnode->cb_break + dvnode->cb_s_break; + fc.cb_break_2 = vnode->cb_break + vnode->cb_s_break; + afs_fs_link(&fc, vnode, dentry->d_name.name); + } + + afs_vnode_commit_status(&fc, dvnode, fc.cb_break); + afs_vnode_commit_status(&fc, vnode, fc.cb_break_2); + ihold(&vnode->vfs_inode); + d_instantiate(dentry, &vnode->vfs_inode); + + mutex_unlock(&vnode->io_lock); + ret = afs_end_vnode_operation(&fc); + if (ret < 0) + goto error_key; + } - ihold(&vnode->vfs_inode); - d_instantiate(dentry, &vnode->vfs_inode); key_put(key); _leave(" = 0"); return 0; -link_error: +error_key: key_put(key); error: d_drop(dentry); @@ -1029,20 +1080,21 @@ error: static int afs_symlink(struct inode *dir, struct dentry *dentry, const char *content) { - struct afs_file_status status; - struct afs_server *server; - struct afs_vnode *dvnode, *vnode; - struct afs_fid fid; - struct inode *inode; + struct afs_fs_cursor fc; + struct afs_file_status newstatus; + struct afs_vnode *dvnode = AFS_FS_I(dir); + struct afs_fid newfid; struct key *key; int ret; - dvnode = AFS_FS_I(dir); - _enter("{%x:%u},{%pd},%s", dvnode->fid.vid, dvnode->fid.vnode, dentry, content); + ret = -ENAMETOOLONG; + if (dentry->d_name.len >= AFSNAMEMAX) + goto error; + ret = -EINVAL; if (strlen(content) >= AFSPATHMAX) goto error; @@ -1053,39 +1105,27 @@ static int afs_symlink(struct inode *dir, struct dentry *dentry, goto error; } - ret = afs_vnode_symlink(dvnode, key, dentry->d_name.name, content, - &fid, &status, &server); - if (ret < 0) - goto create_error; + ret = -ERESTARTSYS; + if (afs_begin_vnode_operation(&fc, dvnode, key)) { + while (afs_select_fileserver(&fc)) { + fc.cb_break = dvnode->cb_break + dvnode->cb_s_break; + afs_fs_symlink(&fc, dentry->d_name.name, content, + &newfid, &newstatus); + } - inode = afs_iget(dir->i_sb, key, &fid, &status, NULL); - if (IS_ERR(inode)) { - /* ENOMEM at a really inconvenient time - just abandon the new - * directory on the server */ - ret = PTR_ERR(inode); - goto iget_error; + afs_check_for_remote_deletion(&fc, fc.vnode); + afs_vnode_commit_status(&fc, dvnode, fc.cb_break); + afs_vnode_new_inode(&fc, dentry, &newfid, &newstatus, NULL); + ret = afs_end_vnode_operation(&fc); + if (ret < 0) + goto error_key; } - /* apply the status report we've got for the new vnode */ - vnode = AFS_FS_I(inode); - spin_lock(&vnode->lock); - vnode->update_cnt++; - spin_unlock(&vnode->lock); - afs_vnode_finalise_status_update(vnode, server); - afs_put_server(afs_i2net(dir), server); - - d_instantiate(dentry, inode); - if (d_unhashed(dentry)) { - _debug("not hashed"); - d_rehash(dentry); - } key_put(key); _leave(" = 0"); return 0; -iget_error: - afs_put_server(afs_i2net(dir), server); -create_error: +error_key: key_put(key); error: d_drop(dentry); @@ -1100,6 +1140,7 @@ static int afs_rename(struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry, unsigned int flags) { + struct afs_fs_cursor fc; struct afs_vnode *orig_dvnode, *new_dvnode, *vnode; struct key *key; int ret; @@ -1123,16 +1164,35 @@ static int afs_rename(struct inode *old_dir, struct dentry *old_dentry, goto error; } - ret = afs_vnode_rename(orig_dvnode, new_dvnode, key, - old_dentry->d_name.name, - new_dentry->d_name.name); - if (ret < 0) - goto rename_error; + ret = -ERESTARTSYS; + if (afs_begin_vnode_operation(&fc, orig_dvnode, key)) { + if (orig_dvnode != new_dvnode) { + if (mutex_lock_interruptible_nested(&new_dvnode->io_lock, 1) < 0) { + afs_end_vnode_operation(&fc); + return -ERESTARTSYS; + } + } + while (afs_select_fileserver(&fc)) { + fc.cb_break = orig_dvnode->cb_break + orig_dvnode->cb_s_break; + fc.cb_break_2 = new_dvnode->cb_break + new_dvnode->cb_s_break; + afs_fs_rename(&fc, old_dentry->d_name.name, + new_dvnode, new_dentry->d_name.name); + } + + afs_vnode_commit_status(&fc, orig_dvnode, fc.cb_break); + afs_vnode_commit_status(&fc, new_dvnode, fc.cb_break_2); + if (orig_dvnode != new_dvnode) + mutex_unlock(&new_dvnode->io_lock); + ret = afs_end_vnode_operation(&fc); + if (ret < 0) + goto error_key; + } + key_put(key); _leave(" = 0"); return 0; -rename_error: +error_key: key_put(key); error: d_drop(new_dentry); diff --git a/fs/afs/file.c b/fs/afs/file.c index 08f9f0c5dfac..1f26ac9f816d 100644 --- a/fs/afs/file.c +++ b/fs/afs/file.c @@ -137,6 +137,37 @@ static void afs_file_readpage_read_complete(struct page *page, } #endif +/* + * Fetch file data from the volume. + */ +int afs_fetch_data(struct afs_vnode *vnode, struct key *key, struct afs_read *desc) +{ + struct afs_fs_cursor fc; + int ret; + + _enter("%s{%x:%u.%u},%x,,,", + vnode->volume->name, + vnode->fid.vid, + vnode->fid.vnode, + vnode->fid.unique, + key_serial(key)); + + ret = -ERESTARTSYS; + if (afs_begin_vnode_operation(&fc, vnode, key)) { + while (afs_select_fileserver(&fc)) { + fc.cb_break = vnode->cb_break + vnode->cb_s_break; + afs_fs_fetch_data(&fc, desc); + } + + afs_check_for_remote_deletion(&fc, fc.vnode); + afs_vnode_commit_status(&fc, vnode, fc.cb_break); + ret = afs_end_vnode_operation(&fc); + } + + _leave(" = %d", ret); + return ret; +} + /* * read page from file, directory or symlink, given a key to use */ @@ -199,7 +230,7 @@ int afs_page_filler(void *data, struct page *page) /* read the contents of the file from the server into the * page */ - ret = afs_vnode_fetch_data(vnode, key, req); + ret = afs_fetch_data(vnode, key, req); afs_put_read(req); if (ret < 0) { if (ret == -ENOENT) { @@ -264,7 +295,7 @@ static int afs_readpage(struct file *file, struct page *page) ret = afs_page_filler(key, page); } else { struct inode *inode = page->mapping->host; - key = afs_request_key(AFS_FS_S(inode->i_sb)->volume->cell); + key = afs_request_key(AFS_FS_S(inode->i_sb)->cell); if (IS_ERR(key)) { ret = PTR_ERR(key); } else { @@ -369,7 +400,7 @@ static int afs_readpages_one(struct file *file, struct address_space *mapping, return 0; } - ret = afs_vnode_fetch_data(vnode, key, req); + ret = afs_fetch_data(vnode, key, req); if (ret < 0) goto error; diff --git a/fs/afs/flock.c b/fs/afs/flock.c index 2b31ea58c50c..77b0a4606efd 100644 --- a/fs/afs/flock.c +++ b/fs/afs/flock.c @@ -67,6 +67,100 @@ static void afs_grant_locks(struct afs_vnode *vnode, struct file_lock *fl) } } +/* + * Get a lock on a file + */ +static int afs_set_lock(struct afs_vnode *vnode, struct key *key, + afs_lock_type_t type) +{ + struct afs_fs_cursor fc; + int ret; + + _enter("%s{%x:%u.%u},%x,%u", + vnode->volume->name, + vnode->fid.vid, + vnode->fid.vnode, + vnode->fid.unique, + key_serial(key), type); + + ret = -ERESTARTSYS; + if (afs_begin_vnode_operation(&fc, vnode, key)) { + while (afs_select_fileserver(&fc)) { + fc.cb_break = vnode->cb_break + vnode->cb_s_break; + afs_fs_set_lock(&fc, type); + } + + afs_check_for_remote_deletion(&fc, fc.vnode); + afs_vnode_commit_status(&fc, vnode, fc.cb_break); + ret = afs_end_vnode_operation(&fc); + } + + _leave(" = %d", ret); + return ret; +} + +/* + * Extend a lock on a file + */ +static int afs_extend_lock(struct afs_vnode *vnode, struct key *key) +{ + struct afs_fs_cursor fc; + int ret; + + _enter("%s{%x:%u.%u},%x", + vnode->volume->name, + vnode->fid.vid, + vnode->fid.vnode, + vnode->fid.unique, + key_serial(key)); + + ret = -ERESTARTSYS; + if (afs_begin_vnode_operation(&fc, vnode, key)) { + while (afs_select_current_fileserver(&fc)) { + fc.cb_break = vnode->cb_break + vnode->cb_s_break; + afs_fs_extend_lock(&fc); + } + + afs_check_for_remote_deletion(&fc, fc.vnode); + afs_vnode_commit_status(&fc, vnode, fc.cb_break); + ret = afs_end_vnode_operation(&fc); + } + + _leave(" = %d", ret); + return ret; +} + +/* + * Release a lock on a file + */ +static int afs_release_lock(struct afs_vnode *vnode, struct key *key) +{ + struct afs_fs_cursor fc; + int ret; + + _enter("%s{%x:%u.%u},%x", + vnode->volume->name, + vnode->fid.vid, + vnode->fid.vnode, + vnode->fid.unique, + key_serial(key)); + + ret = -ERESTARTSYS; + if (afs_begin_vnode_operation(&fc, vnode, key)) { + while (afs_select_current_fileserver(&fc)) { + fc.cb_break = vnode->cb_break + vnode->cb_s_break; + afs_fs_release_lock(&fc); + } + + afs_check_for_remote_deletion(&fc, fc.vnode); + afs_vnode_commit_status(&fc, vnode, fc.cb_break); + ret = afs_end_vnode_operation(&fc); + } + + _leave(" = %d", ret); + return ret; +} + /* * do work for a lock, including: * - probing for a lock we're waiting on but didn't get immediately @@ -91,7 +185,7 @@ void afs_lock_work(struct work_struct *work) /* attempt to release the server lock; if it fails, we just * wait 5 minutes and it'll time out anyway */ - ret = afs_vnode_release_lock(vnode, vnode->unlock_key); + ret = afs_release_lock(vnode, vnode->unlock_key); if (ret < 0) printk(KERN_WARNING "AFS:" " Failed to release lock on {%x:%x} error %d\n", @@ -115,7 +209,7 @@ void afs_lock_work(struct work_struct *work) key = key_get(fl->fl_file->private_data); spin_unlock(&vnode->lock); - ret = afs_vnode_extend_lock(vnode, key); + ret = afs_extend_lock(vnode, key); clear_bit(AFS_VNODE_LOCKING, &vnode->flags); key_put(key); switch (ret) { @@ -151,7 +245,7 @@ void afs_lock_work(struct work_struct *work) AFS_LOCK_READ : AFS_LOCK_WRITE; spin_unlock(&vnode->lock); - ret = afs_vnode_set_lock(vnode, key, type); + ret = afs_set_lock(vnode, key, type); clear_bit(AFS_VNODE_LOCKING, &vnode->flags); switch (ret) { case -EWOULDBLOCK: @@ -182,7 +276,7 @@ void afs_lock_work(struct work_struct *work) clear_bit(AFS_VNODE_READLOCKED, &vnode->flags); clear_bit(AFS_VNODE_WRITELOCKED, &vnode->flags); spin_unlock(&vnode->lock); - afs_vnode_release_lock(vnode, key); + afs_release_lock(vnode, key); if (!list_empty(&vnode->pending_locks)) afs_lock_may_be_available(vnode); } @@ -280,7 +374,7 @@ static int afs_do_setlk(struct file *file, struct file_lock *fl) set_bit(AFS_VNODE_LOCKING, &vnode->flags); spin_unlock(&vnode->lock); - ret = afs_vnode_set_lock(vnode, key, type); + ret = afs_set_lock(vnode, key, type); clear_bit(AFS_VNODE_LOCKING, &vnode->flags); switch (ret) { case 0: @@ -383,7 +477,7 @@ given_lock: /* again, make sure we've got a callback on this file and, again, make * sure that our view of the data version is up to date (we ignore * errors incurred here and deal with the consequences elsewhere) */ - afs_vnode_fetch_status(vnode, key, false); + afs_validate(vnode, key); error: spin_unlock(&inode->i_lock); @@ -455,7 +549,7 @@ static int afs_do_getlk(struct file *file, struct file_lock *fl) posix_test_lock(file, fl); if (fl->fl_type == F_UNLCK) { /* no local locks; consult the server */ - ret = afs_vnode_fetch_status(vnode, key, true); + ret = afs_fetch_status(vnode, key); if (ret < 0) goto error; lock_count = vnode->status.lock_count; diff --git a/fs/afs/fsclient.c b/fs/afs/fsclient.c index 6614d0a78daa..72ff3679fa2a 100644 --- a/fs/afs/fsclient.c +++ b/fs/afs/fsclient.c @@ -22,9 +22,9 @@ */ static u8 afs_discard_buffer[64]; -static inline void afs_use_fs_server(struct afs_call *call, struct afs_server *server) +static inline void afs_use_fs_server(struct afs_call *call, struct afs_cb_interest *cbi) { - call->server = afs_get_server(server); + call->cbi = afs_get_cb_interest(cbi); } /* @@ -56,7 +56,8 @@ static void xdr_decode_AFSFetchStatus(const __be32 **_bp, kuid_t owner; kgid_t group; - write_seqlock(&vnode->cb_lock); + if (vnode) + write_seqlock(&vnode->cb_lock); #define EXTRACT(DST) \ do { \ @@ -141,7 +142,8 @@ static void xdr_decode_AFSFetchStatus(const __be32 **_bp, status->data_version = data_version; } - write_sequnlock(&vnode->cb_lock); + if (vnode) + write_sequnlock(&vnode->cb_lock); } /* @@ -151,22 +153,29 @@ static void xdr_decode_AFSCallBack(struct afs_call *call, struct afs_vnode *vnode, const __be32 **_bp) { + struct afs_cb_interest *old, *cbi = call->cbi; const __be32 *bp = *_bp; u32 cb_expiry; write_seqlock(&vnode->cb_lock); - if (call->cb_break == (vnode->cb_break + call->server->cb_s_break)) { + if (call->cb_break == (vnode->cb_break + cbi->server->cb_s_break)) { vnode->cb_version = ntohl(*bp++); cb_expiry = ntohl(*bp++); vnode->cb_type = ntohl(*bp++); vnode->cb_expires_at = cb_expiry + ktime_get_real_seconds(); + old = vnode->cb_interest; + if (old != call->cbi) { + vnode->cb_interest = cbi; + cbi = old; + } set_bit(AFS_VNODE_CB_PROMISED, &vnode->flags); } else { bp += 3; } write_sequnlock(&vnode->cb_lock); + call->cbi = cbi; *_bp = bp; } @@ -297,24 +306,23 @@ static const struct afs_call_type afs_RXFSFetchStatus = { /* * fetch the status information for a file */ -int afs_fs_fetch_file_status(struct afs_fs_cursor *fc, - struct key *key, - struct afs_vnode *vnode, - struct afs_volsync *volsync, - bool async) +int afs_fs_fetch_file_status(struct afs_fs_cursor *fc, struct afs_volsync *volsync) { + struct afs_vnode *vnode = fc->vnode; struct afs_call *call; struct afs_net *net = afs_v2net(vnode); __be32 *bp; _enter(",%x,{%x:%u},,", - key_serial(key), vnode->fid.vid, vnode->fid.vnode); + key_serial(fc->key), vnode->fid.vid, vnode->fid.vnode); call = afs_alloc_flat_call(net, &afs_RXFSFetchStatus, 16, (21 + 3 + 6) * 4); - if (!call) + if (!call) { + fc->ac.error = -ENOMEM; return -ENOMEM; + } - call->key = key; + call->key = fc->key; call->reply[0] = vnode; call->reply[1] = volsync; @@ -325,9 +333,9 @@ int afs_fs_fetch_file_status(struct afs_fs_cursor *fc, bp[2] = htonl(vnode->fid.vnode); bp[3] = htonl(vnode->fid.unique); - call->cb_break = vnode->cb_break + fc->server->cb_s_break; - afs_use_fs_server(call, fc->server); - return afs_make_call(&fc->ac, call, GFP_NOFS, async); + call->cb_break = fc->cb_break; + afs_use_fs_server(call, fc->cbi); + return afs_make_call(&fc->ac, call, GFP_NOFS, false); } /* @@ -502,12 +510,9 @@ static const struct afs_call_type afs_RXFSFetchData64 = { /* * fetch data from a very large file */ -static int afs_fs_fetch_data64(struct afs_fs_cursor *fc, - struct key *key, - struct afs_vnode *vnode, - struct afs_read *req, - bool async) +static int afs_fs_fetch_data64(struct afs_fs_cursor *fc, struct afs_read *req) { + struct afs_vnode *vnode = fc->vnode; struct afs_call *call; struct afs_net *net = afs_v2net(vnode); __be32 *bp; @@ -518,7 +523,7 @@ static int afs_fs_fetch_data64(struct afs_fs_cursor *fc, if (!call) return -ENOMEM; - call->key = key; + call->key = fc->key; call->reply[0] = vnode; call->reply[1] = NULL; /* volsync */ call->reply[2] = req; @@ -536,20 +541,17 @@ static int afs_fs_fetch_data64(struct afs_fs_cursor *fc, bp[7] = htonl(lower_32_bits(req->len)); atomic_inc(&req->usage); - call->cb_break = vnode->cb_break + fc->server->cb_s_break; - afs_use_fs_server(call, fc->server); - return afs_make_call(&fc->ac, call, GFP_NOFS, async); + call->cb_break = fc->cb_break; + afs_use_fs_server(call, fc->cbi); + return afs_make_call(&fc->ac, call, GFP_NOFS, false); } /* * fetch data from a file */ -int afs_fs_fetch_data(struct afs_fs_cursor *fc, - struct key *key, - struct afs_vnode *vnode, - struct afs_read *req, - bool async) +int afs_fs_fetch_data(struct afs_fs_cursor *fc, struct afs_read *req) { + struct afs_vnode *vnode = fc->vnode; struct afs_call *call; struct afs_net *net = afs_v2net(vnode); __be32 *bp; @@ -557,7 +559,7 @@ int afs_fs_fetch_data(struct afs_fs_cursor *fc, if (upper_32_bits(req->pos) || upper_32_bits(req->len) || upper_32_bits(req->pos + req->len)) - return afs_fs_fetch_data64(fc, key, vnode, req, async); + return afs_fs_fetch_data64(fc, req); _enter(""); @@ -565,7 +567,7 @@ int afs_fs_fetch_data(struct afs_fs_cursor *fc, if (!call) return -ENOMEM; - call->key = key; + call->key = fc->key; call->reply[0] = vnode; call->reply[1] = NULL; /* volsync */ call->reply[2] = req; @@ -581,9 +583,9 @@ int afs_fs_fetch_data(struct afs_fs_cursor *fc, bp[5] = htonl(lower_32_bits(req->len)); atomic_inc(&req->usage); - call->cb_break = vnode->cb_break + fc->server->cb_s_break; - afs_use_fs_server(call, fc->server); - return afs_make_call(&fc->ac, call, GFP_NOFS, async); + call->cb_break = fc->cb_break; + afs_use_fs_server(call, fc->cbi); + return afs_make_call(&fc->ac, call, GFP_NOFS, false); } /* @@ -626,15 +628,13 @@ static const struct afs_call_type afs_RXFSCreateXXXX = { * create a file or make a directory */ int afs_fs_create(struct afs_fs_cursor *fc, - struct key *key, - struct afs_vnode *vnode, const char *name, umode_t mode, struct afs_fid *newfid, struct afs_file_status *newstatus, - struct afs_callback *newcb, - bool async) + struct afs_callback *newcb) { + struct afs_vnode *vnode = fc->vnode; struct afs_call *call; struct afs_net *net = afs_v2net(vnode); size_t namesz, reqsz, padsz; @@ -651,7 +651,7 @@ int afs_fs_create(struct afs_fs_cursor *fc, if (!call) return -ENOMEM; - call->key = key; + call->key = fc->key; call->reply[0] = vnode; call->reply[1] = newfid; call->reply[2] = newstatus; @@ -677,8 +677,8 @@ int afs_fs_create(struct afs_fs_cursor *fc, *bp++ = htonl(mode & S_IALLUGO); /* unix mode */ *bp++ = 0; /* segment size */ - afs_use_fs_server(call, fc->server); - return afs_make_call(&fc->ac, call, GFP_NOFS, async); + afs_use_fs_server(call, fc->cbi); + return afs_make_call(&fc->ac, call, GFP_NOFS, false); } /* @@ -717,13 +717,9 @@ static const struct afs_call_type afs_RXFSRemoveXXXX = { /* * remove a file or directory */ -int afs_fs_remove(struct afs_fs_cursor *fc, - struct key *key, - struct afs_vnode *vnode, - const char *name, - bool isdir, - bool async) +int afs_fs_remove(struct afs_fs_cursor *fc, const char *name, bool isdir) { + struct afs_vnode *vnode = fc->vnode; struct afs_call *call; struct afs_net *net = afs_v2net(vnode); size_t namesz, reqsz, padsz; @@ -739,7 +735,7 @@ int afs_fs_remove(struct afs_fs_cursor *fc, if (!call) return -ENOMEM; - call->key = key; + call->key = fc->key; call->reply[0] = vnode; /* marshall the parameters */ @@ -756,8 +752,8 @@ int afs_fs_remove(struct afs_fs_cursor *fc, bp = (void *) bp + padsz; } - afs_use_fs_server(call, fc->server); - return afs_make_call(&fc->ac, call, GFP_NOFS, async); + afs_use_fs_server(call, fc->cbi); + return afs_make_call(&fc->ac, call, GFP_NOFS, false); } /* @@ -797,13 +793,10 @@ static const struct afs_call_type afs_RXFSLink = { /* * make a hard link */ -int afs_fs_link(struct afs_fs_cursor *fc, - struct key *key, - struct afs_vnode *dvnode, - struct afs_vnode *vnode, - const char *name, - bool async) +int afs_fs_link(struct afs_fs_cursor *fc, struct afs_vnode *vnode, + const char *name) { + struct afs_vnode *dvnode = fc->vnode; struct afs_call *call; struct afs_net *net = afs_v2net(vnode); size_t namesz, reqsz, padsz; @@ -819,7 +812,7 @@ int afs_fs_link(struct afs_fs_cursor *fc, if (!call) return -ENOMEM; - call->key = key; + call->key = fc->key; call->reply[0] = dvnode; call->reply[1] = vnode; @@ -840,8 +833,8 @@ int afs_fs_link(struct afs_fs_cursor *fc, *bp++ = htonl(vnode->fid.vnode); *bp++ = htonl(vnode->fid.unique); - afs_use_fs_server(call, fc->server); - return afs_make_call(&fc->ac, call, GFP_NOFS, async); + afs_use_fs_server(call, fc->cbi); + return afs_make_call(&fc->ac, call, GFP_NOFS, false); } /* @@ -883,14 +876,12 @@ static const struct afs_call_type afs_RXFSSymlink = { * create a symbolic link */ int afs_fs_symlink(struct afs_fs_cursor *fc, - struct key *key, - struct afs_vnode *vnode, const char *name, const char *contents, struct afs_fid *newfid, - struct afs_file_status *newstatus, - bool async) + struct afs_file_status *newstatus) { + struct afs_vnode *vnode = fc->vnode; struct afs_call *call; struct afs_net *net = afs_v2net(vnode); size_t namesz, reqsz, padsz, c_namesz, c_padsz; @@ -911,7 +902,7 @@ int afs_fs_symlink(struct afs_fs_cursor *fc, if (!call) return -ENOMEM; - call->key = key; + call->key = fc->key; call->reply[0] = vnode; call->reply[1] = newfid; call->reply[2] = newstatus; @@ -943,8 +934,8 @@ int afs_fs_symlink(struct afs_fs_cursor *fc, *bp++ = htonl(S_IRWXUGO); /* unix mode */ *bp++ = 0; /* segment size */ - afs_use_fs_server(call, fc->server); - return afs_make_call(&fc->ac, call, GFP_NOFS, async); + afs_use_fs_server(call, fc->cbi); + return afs_make_call(&fc->ac, call, GFP_NOFS, false); } /* @@ -987,13 +978,11 @@ static const struct afs_call_type afs_RXFSRename = { * create a symbolic link */ int afs_fs_rename(struct afs_fs_cursor *fc, - struct key *key, - struct afs_vnode *orig_dvnode, const char *orig_name, struct afs_vnode *new_dvnode, - const char *new_name, - bool async) + const char *new_name) { + struct afs_vnode *orig_dvnode = fc->vnode; struct afs_call *call; struct afs_net *net = afs_v2net(orig_dvnode); size_t reqsz, o_namesz, o_padsz, n_namesz, n_padsz; @@ -1016,7 +1005,7 @@ int afs_fs_rename(struct afs_fs_cursor *fc, if (!call) return -ENOMEM; - call->key = key; + call->key = fc->key; call->reply[0] = orig_dvnode; call->reply[1] = new_dvnode; @@ -1045,8 +1034,8 @@ int afs_fs_rename(struct afs_fs_cursor *fc, bp = (void *) bp + n_padsz; } - afs_use_fs_server(call, fc->server); - return afs_make_call(&fc->ac, call, GFP_NOFS, async); + afs_use_fs_server(call, fc->cbi); + return afs_make_call(&fc->ac, call, GFP_NOFS, false); } /* @@ -1098,8 +1087,7 @@ static int afs_fs_store_data64(struct afs_fs_cursor *fc, struct afs_writeback *wb, pgoff_t first, pgoff_t last, unsigned offset, unsigned to, - loff_t size, loff_t pos, loff_t i_size, - bool async) + loff_t size, loff_t pos, loff_t i_size) { struct afs_vnode *vnode = wb->vnode; struct afs_call *call; @@ -1147,8 +1135,7 @@ static int afs_fs_store_data64(struct afs_fs_cursor *fc, *bp++ = htonl(i_size >> 32); *bp++ = htonl((u32) i_size); - afs_use_fs_server(call, fc->server); - return afs_make_call(&fc->ac, call, GFP_NOFS, async); + return afs_make_call(&fc->ac, call, GFP_NOFS, false); } /* @@ -1156,8 +1143,7 @@ static int afs_fs_store_data64(struct afs_fs_cursor *fc, */ int afs_fs_store_data(struct afs_fs_cursor *fc, struct afs_writeback *wb, pgoff_t first, pgoff_t last, - unsigned offset, unsigned to, - bool async) + unsigned offset, unsigned to) { struct afs_vnode *vnode = wb->vnode; struct afs_call *call; @@ -1184,7 +1170,7 @@ int afs_fs_store_data(struct afs_fs_cursor *fc, struct afs_writeback *wb, if (pos >> 32 || i_size >> 32 || size >> 32 || (pos + size) >> 32) return afs_fs_store_data64(fc, wb, first, last, offset, to, - size, pos, i_size, async); + size, pos, i_size); call = afs_alloc_flat_call(net, &afs_RXFSStoreData, (4 + 6 + 3) * 4, @@ -1221,8 +1207,8 @@ int afs_fs_store_data(struct afs_fs_cursor *fc, struct afs_writeback *wb, *bp++ = htonl(size); *bp++ = htonl(i_size); - afs_use_fs_server(call, fc->server); - return afs_make_call(&fc->ac, call, GFP_NOFS, async); + afs_use_fs_server(call, fc->cbi); + return afs_make_call(&fc->ac, call, GFP_NOFS, false); } /* @@ -1279,16 +1265,15 @@ static const struct afs_call_type afs_RXFSStoreData64_as_Status = { * set the attributes on a very large file, using FS.StoreData rather than * FS.StoreStatus so as to alter the file size also */ -static int afs_fs_setattr_size64(struct afs_fs_cursor *fc, struct key *key, - struct afs_vnode *vnode, struct iattr *attr, - bool async) +static int afs_fs_setattr_size64(struct afs_fs_cursor *fc, struct iattr *attr) { + struct afs_vnode *vnode = fc->vnode; struct afs_call *call; struct afs_net *net = afs_v2net(vnode); __be32 *bp; _enter(",%x,{%x:%u},,", - key_serial(key), vnode->fid.vid, vnode->fid.vnode); + key_serial(fc->key), vnode->fid.vid, vnode->fid.vnode); ASSERT(attr->ia_valid & ATTR_SIZE); @@ -1298,7 +1283,7 @@ static int afs_fs_setattr_size64(struct afs_fs_cursor *fc, struct key *key, if (!call) return -ENOMEM; - call->key = key; + call->key = fc->key; call->reply[0] = vnode; call->store_version = vnode->status.data_version + 1; call->operation_ID = FSSTOREDATA; @@ -1319,28 +1304,27 @@ static int afs_fs_setattr_size64(struct afs_fs_cursor *fc, struct key *key, *bp++ = htonl(attr->ia_size >> 32); /* new file length */ *bp++ = htonl((u32) attr->ia_size); - afs_use_fs_server(call, fc->server); - return afs_make_call(&fc->ac, call, GFP_NOFS, async); + afs_use_fs_server(call, fc->cbi); + return afs_make_call(&fc->ac, call, GFP_NOFS, false); } /* * set the attributes on a file, using FS.StoreData rather than FS.StoreStatus * so as to alter the file size also */ -static int afs_fs_setattr_size(struct afs_fs_cursor *fc, struct key *key, - struct afs_vnode *vnode, struct iattr *attr, - bool async) +static int afs_fs_setattr_size(struct afs_fs_cursor *fc, struct iattr *attr) { + struct afs_vnode *vnode = fc->vnode; struct afs_call *call; struct afs_net *net = afs_v2net(vnode); __be32 *bp; _enter(",%x,{%x:%u},,", - key_serial(key), vnode->fid.vid, vnode->fid.vnode); + key_serial(fc->key), vnode->fid.vid, vnode->fid.vnode); ASSERT(attr->ia_valid & ATTR_SIZE); if (attr->ia_size >> 32) - return afs_fs_setattr_size64(fc, key, vnode, attr, async); + return afs_fs_setattr_size64(fc, attr); call = afs_alloc_flat_call(net, &afs_RXFSStoreData_as_Status, (4 + 6 + 3) * 4, @@ -1348,7 +1332,7 @@ static int afs_fs_setattr_size(struct afs_fs_cursor *fc, struct key *key, if (!call) return -ENOMEM; - call->key = key; + call->key = fc->key; call->reply[0] = vnode; call->store_version = vnode->status.data_version + 1; call->operation_ID = FSSTOREDATA; @@ -1366,27 +1350,26 @@ static int afs_fs_setattr_size(struct afs_fs_cursor *fc, struct key *key, *bp++ = 0; /* size of write */ *bp++ = htonl(attr->ia_size); /* new file length */ - afs_use_fs_server(call, fc->server); - return afs_make_call(&fc->ac, call, GFP_NOFS, async); + afs_use_fs_server(call, fc->cbi); + return afs_make_call(&fc->ac, call, GFP_NOFS, false); } /* * set the attributes on a file, using FS.StoreData if there's a change in file * size, and FS.StoreStatus otherwise */ -int afs_fs_setattr(struct afs_fs_cursor *fc, struct key *key, - struct afs_vnode *vnode, struct iattr *attr, - bool async) +int afs_fs_setattr(struct afs_fs_cursor *fc, struct iattr *attr) { + struct afs_vnode *vnode = fc->vnode; struct afs_call *call; struct afs_net *net = afs_v2net(vnode); __be32 *bp; if (attr->ia_valid & ATTR_SIZE) - return afs_fs_setattr_size(fc, key, vnode, attr, async); + return afs_fs_setattr_size(fc, attr); _enter(",%x,{%x:%u},,", - key_serial(key), vnode->fid.vid, vnode->fid.vnode); + key_serial(fc->key), vnode->fid.vid, vnode->fid.vnode); call = afs_alloc_flat_call(net, &afs_RXFSStoreStatus, (4 + 6) * 4, @@ -1394,7 +1377,7 @@ int afs_fs_setattr(struct afs_fs_cursor *fc, struct key *key, if (!call) return -ENOMEM; - call->key = key; + call->key = fc->key; call->reply[0] = vnode; call->operation_ID = FSSTORESTATUS; @@ -1407,8 +1390,8 @@ int afs_fs_setattr(struct afs_fs_cursor *fc, struct key *key, xdr_encode_AFS_StoreStatus(&bp, attr); - afs_use_fs_server(call, fc->server); - return afs_make_call(&fc->ac, call, GFP_NOFS, async); + afs_use_fs_server(call, fc->cbi); + return afs_make_call(&fc->ac, call, GFP_NOFS, false); } /* @@ -1606,11 +1589,9 @@ static const struct afs_call_type afs_RXFSGetVolumeStatus = { * fetch the status of a volume */ int afs_fs_get_volume_status(struct afs_fs_cursor *fc, - struct key *key, - struct afs_vnode *vnode, - struct afs_volume_status *vs, - bool async) + struct afs_volume_status *vs) { + struct afs_vnode *vnode = fc->vnode; struct afs_call *call; struct afs_net *net = afs_v2net(vnode); __be32 *bp; @@ -1628,7 +1609,7 @@ int afs_fs_get_volume_status(struct afs_fs_cursor *fc, return -ENOMEM; } - call->key = key; + call->key = fc->key; call->reply[0] = vnode; call->reply[1] = vs; call->reply[2] = tmpbuf; @@ -1638,8 +1619,8 @@ int afs_fs_get_volume_status(struct afs_fs_cursor *fc, bp[0] = htonl(FSGETVOLUMESTATUS); bp[1] = htonl(vnode->fid.vid); - afs_use_fs_server(call, fc->server); - return afs_make_call(&fc->ac, call, GFP_NOFS, async); + afs_use_fs_server(call, fc->cbi); + return afs_make_call(&fc->ac, call, GFP_NOFS, false); } /* @@ -1692,14 +1673,11 @@ static const struct afs_call_type afs_RXFSReleaseLock = { }; /* - * get a lock on a file + * Set a lock on a file */ -int afs_fs_set_lock(struct afs_fs_cursor *fc, - struct key *key, - struct afs_vnode *vnode, - afs_lock_type_t type, - bool async) +int afs_fs_set_lock(struct afs_fs_cursor *fc, afs_lock_type_t type) { + struct afs_vnode *vnode = fc->vnode; struct afs_call *call; struct afs_net *net = afs_v2net(vnode); __be32 *bp; @@ -1710,7 +1688,7 @@ int afs_fs_set_lock(struct afs_fs_cursor *fc, if (!call) return -ENOMEM; - call->key = key; + call->key = fc->key; call->reply[0] = vnode; /* marshall the parameters */ @@ -1721,18 +1699,16 @@ int afs_fs_set_lock(struct afs_fs_cursor *fc, *bp++ = htonl(vnode->fid.unique); *bp++ = htonl(type); - afs_use_fs_server(call, fc->server); - return afs_make_call(&fc->ac, call, GFP_NOFS, async); + afs_use_fs_server(call, fc->cbi); + return afs_make_call(&fc->ac, call, GFP_NOFS, false); } /* * extend a lock on a file */ -int afs_fs_extend_lock(struct afs_fs_cursor *fc, - struct key *key, - struct afs_vnode *vnode, - bool async) +int afs_fs_extend_lock(struct afs_fs_cursor *fc) { + struct afs_vnode *vnode = fc->vnode; struct afs_call *call; struct afs_net *net = afs_v2net(vnode); __be32 *bp; @@ -1743,7 +1719,7 @@ int afs_fs_extend_lock(struct afs_fs_cursor *fc, if (!call) return -ENOMEM; - call->key = key; + call->key = fc->key; call->reply[0] = vnode; /* marshall the parameters */ @@ -1753,18 +1729,16 @@ int afs_fs_extend_lock(struct afs_fs_cursor *fc, *bp++ = htonl(vnode->fid.vnode); *bp++ = htonl(vnode->fid.unique); - afs_use_fs_server(call, fc->server); - return afs_make_call(&fc->ac, call, GFP_NOFS, async); + afs_use_fs_server(call, fc->cbi); + return afs_make_call(&fc->ac, call, GFP_NOFS, false); } /* * release a lock on a file */ -int afs_fs_release_lock(struct afs_fs_cursor *fc, - struct key *key, - struct afs_vnode *vnode, - bool async) +int afs_fs_release_lock(struct afs_fs_cursor *fc) { + struct afs_vnode *vnode = fc->vnode; struct afs_call *call; struct afs_net *net = afs_v2net(vnode); __be32 *bp; @@ -1775,7 +1749,7 @@ int afs_fs_release_lock(struct afs_fs_cursor *fc, if (!call) return -ENOMEM; - call->key = key; + call->key = fc->key; call->reply[0] = vnode; /* marshall the parameters */ @@ -1785,8 +1759,8 @@ int afs_fs_release_lock(struct afs_fs_cursor *fc, *bp++ = htonl(vnode->fid.vnode); *bp++ = htonl(vnode->fid.unique); - afs_use_fs_server(call, fc->server); - return afs_make_call(&fc->ac, call, GFP_NOFS, async); + afs_use_fs_server(call, fc->cbi); + return afs_make_call(&fc->ac, call, GFP_NOFS, false); } /* @@ -1809,17 +1783,17 @@ static const struct afs_call_type afs_RXFSGiveUpAllCallBacks = { /* * Flush all the callbacks we have on a server. */ -int afs_fs_give_up_all_callbacks(struct afs_server *server, +int afs_fs_give_up_all_callbacks(struct afs_net *net, + struct afs_server *server, struct afs_addr_cursor *ac, - struct key *key, - bool async) + struct key *key) { struct afs_call *call; __be32 *bp; _enter(""); - call = afs_alloc_flat_call(server->net, &afs_RXFSGiveUpAllCallBacks, 2 * 4, 0); + call = afs_alloc_flat_call(net, &afs_RXFSGiveUpAllCallBacks, 1 * 4, 0); if (!call) return -ENOMEM; @@ -1830,5 +1804,96 @@ int afs_fs_give_up_all_callbacks(struct afs_server *server, *bp++ = htonl(FSGIVEUPALLCALLBACKS); /* Can't take a ref on server */ - return afs_make_call(ac, call, GFP_NOFS, async); + return afs_make_call(ac, call, GFP_NOFS, false); +} + +/* + * Deliver reply data to an FS.GetCapabilities operation. + */ +static int afs_deliver_fs_get_capabilities(struct afs_call *call) +{ + u32 count; + int ret; + + _enter("{%u,%zu/%u}", call->unmarshall, call->offset, call->count); + +again: + switch (call->unmarshall) { + case 0: + call->offset = 0; + call->unmarshall++; + + /* Extract the capabilities word count */ + case 1: + ret = afs_extract_data(call, &call->tmp, + 1 * sizeof(__be32), + true); + if (ret < 0) + return ret; + + count = ntohl(call->tmp); + + call->count = count; + call->count2 = count; + call->offset = 0; + call->unmarshall++; + + /* Extract capabilities words */ + case 2: + count = min(call->count, 16U); + ret = afs_extract_data(call, call->buffer, + count * sizeof(__be32), + call->count > 16); + if (ret < 0) + return ret; + + /* TODO: Examine capabilities */ + + call->count -= count; + if (call->count > 0) + goto again; + call->offset = 0; + call->unmarshall++; + break; + } + + _leave(" = 0 [done]"); + return 0; +} + +/* + * FS.GetCapabilities operation type + */ +static const struct afs_call_type afs_RXFSGetCapabilities = { + .name = "FS.GetCapabilities", + .deliver = afs_deliver_fs_get_capabilities, + .destructor = afs_flat_call_destructor, +}; + +/* + * Probe a fileserver for the capabilities that it supports. This can + * return up to 196 words. + */ +int afs_fs_get_capabilities(struct afs_net *net, + struct afs_server *server, + struct afs_addr_cursor *ac, + struct key *key) +{ + struct afs_call *call; + __be32 *bp; + + _enter(""); + + call = afs_alloc_flat_call(net, &afs_RXFSGetCapabilities, 1 * 4, 16 * 4); + if (!call) + return -ENOMEM; + + call->key = key; + + /* marshall the parameters */ + bp = call->request; + *bp++ = htonl(FSGETCAPABILITIES); + + /* Can't take a ref on server */ + return afs_make_call(ac, call, GFP_NOFS, false); } diff --git a/fs/afs/inode.c b/fs/afs/inode.c index ee86d5ad22d1..5a2f5854f349 100644 --- a/fs/afs/inode.c +++ b/fs/afs/inode.c @@ -101,6 +101,35 @@ static int afs_inode_map_status(struct afs_vnode *vnode, struct key *key) return 0; } +/* + * Fetch file status from the volume. + */ +int afs_fetch_status(struct afs_vnode *vnode, struct key *key) +{ + struct afs_fs_cursor fc; + int ret; + + _enter("%s,{%x:%u.%u,S=%lx}", + vnode->volume->name, + vnode->fid.vid, vnode->fid.vnode, vnode->fid.unique, + vnode->flags); + + ret = -ERESTARTSYS; + if (afs_begin_vnode_operation(&fc, vnode, key)) { + while (afs_select_fileserver(&fc)) { + fc.cb_break = vnode->cb_break + vnode->cb_s_break; + afs_fs_fetch_file_status(&fc, NULL); + } + + afs_check_for_remote_deletion(&fc, fc.vnode); + afs_vnode_commit_status(&fc, vnode, fc.cb_break); + ret = afs_end_vnode_operation(&fc); + } + + _leave(" = %d", ret); + return ret; +} + /* * iget5() comparator */ @@ -205,7 +234,7 @@ struct inode *afs_iget_autocell(struct inode *dir, const char *dev_name, */ struct inode *afs_iget(struct super_block *sb, struct key *key, struct afs_fid *fid, struct afs_file_status *status, - struct afs_callback *cb) + struct afs_callback *cb, struct afs_cb_interest *cbi) { struct afs_iget_data data = { .fid = *fid }; struct afs_super_info *as; @@ -238,7 +267,7 @@ struct inode *afs_iget(struct super_block *sb, struct key *key, if (!status) { /* it's a remotely extant inode */ - ret = afs_vnode_fetch_status(vnode, key, true); + ret = afs_fetch_status(vnode, key); if (ret < 0) goto bad_inode; } else { @@ -255,6 +284,7 @@ struct inode *afs_iget(struct super_block *sb, struct key *key, vnode->cb_version = cb->version; vnode->cb_type = cb->type; vnode->cb_expires_at = cb->expiry; + vnode->cb_interest = afs_get_cb_interest(cbi); set_bit(AFS_VNODE_CB_PROMISED, &vnode->flags); } @@ -358,7 +388,7 @@ int afs_validate(struct afs_vnode *vnode, struct key *key) * access */ if (!test_bit(AFS_VNODE_CB_PROMISED, &vnode->flags)) { _debug("not promised"); - ret = afs_vnode_fetch_status(vnode, key, false); + ret = afs_fetch_status(vnode, key); if (ret < 0) { if (ret == -ENOENT) { set_bit(AFS_VNODE_DELETED, &vnode->flags); @@ -468,6 +498,7 @@ void afs_evict_inode(struct inode *inode) */ int afs_setattr(struct dentry *dentry, struct iattr *attr) { + struct afs_fs_cursor fc; struct afs_vnode *vnode = AFS_FS_I(d_inode(dentry)); struct key *key; int ret; @@ -498,7 +529,18 @@ int afs_setattr(struct dentry *dentry, struct iattr *attr) } } - ret = afs_vnode_setattr(vnode, key, attr); + ret = -ERESTARTSYS; + if (afs_begin_vnode_operation(&fc, vnode, key)) { + while (afs_select_fileserver(&fc)) { + fc.cb_break = vnode->cb_break + vnode->cb_s_break; + afs_fs_setattr(&fc, attr); + } + + afs_check_for_remote_deletion(&fc, fc.vnode); + afs_vnode_commit_status(&fc, vnode, fc.cb_break); + ret = afs_end_vnode_operation(&fc); + } + if (!(attr->ia_valid & ATTR_FILE)) key_put(key); diff --git a/fs/afs/internal.h b/fs/afs/internal.h index df52bf18a263..1fadf40551fd 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h @@ -32,16 +32,6 @@ struct pagevec; struct afs_call; -typedef enum { - AFS_VL_NEW, /* new, uninitialised record */ - AFS_VL_CREATING, /* creating record */ - AFS_VL_VALID, /* record is pending */ - AFS_VL_NO_VOLUME, /* no such volume available */ - AFS_VL_UPDATING, /* update in progress */ - AFS_VL_VOLUME_DELETED, /* volume was deleted */ - AFS_VL_UNCERTAIN, /* uncertain state (update failed) */ -} __attribute__((packed)) afs_vlocation_state_t; - struct afs_mount_params { bool rwpath; /* T if the parent should be considered R/W */ bool force; /* T to force cell type */ @@ -76,8 +66,10 @@ enum afs_call_state { struct afs_addr_list { struct rcu_head rcu; /* Must be first */ refcount_t usage; + u32 version; /* Version */ unsigned short nr_addrs; unsigned short index; /* Address currently in use */ + unsigned short nr_ipv4; /* Number of IPv4 addresses */ struct sockaddr_rxrpc addrs[]; }; @@ -93,7 +85,7 @@ struct afs_call { struct key *key; /* security for this call */ struct afs_net *net; /* The network namespace */ struct afs_server *cm_server; /* Server affected by incoming CM call */ - struct afs_server *server; /* Server used by client call */ + struct afs_cb_interest *cbi; /* Callback interest for server used */ void *request; /* request data (first part) */ struct address_space *mapping; /* page set */ struct afs_writeback *wb; /* writeback being performed */ @@ -192,7 +184,6 @@ struct afs_super_info { struct afs_net *net; /* Network namespace */ struct afs_cell *cell; /* The cell in which the volume resides */ struct afs_volume *volume; /* volume record */ - char rwparent; /* T if parent is R/W AFS volume */ }; static inline struct afs_super_info *AFS_FS_S(struct super_block *sb) @@ -228,26 +219,26 @@ struct afs_net { spinlock_t proc_cells_lock; struct list_head proc_cells; - /* Volume location database */ - struct list_head vl_updates; /* VL records in need-update order */ - struct list_head vl_graveyard; /* Inactive VL records */ - struct delayed_work vl_reaper; - struct delayed_work vl_updater; - spinlock_t vl_updates_lock; - spinlock_t vl_graveyard_lock; + /* Known servers. Theoretically each fileserver can only be in one + * cell, but in practice, people create aliases and subsets and there's + * no easy way to distinguish them. + */ + seqlock_t fs_lock; /* For fs_servers */ + struct rb_root fs_servers; /* afs_server (by server UUID or address) */ + struct list_head fs_updates; /* afs_server (by update_at) */ + struct hlist_head fs_proc; /* procfs servers list */ + + struct hlist_head fs_addresses4; /* afs_server (by lowest IPv4 addr) */ + struct hlist_head fs_addresses6; /* afs_server (by lowest IPv6 addr) */ + seqlock_t fs_addr_lock; /* For fs_addresses[46] */ + + struct work_struct fs_manager; + struct timer_list fs_timer; + atomic_t servers_outstanding; /* File locking renewal management */ struct mutex lock_manager_mutex; - /* Server database */ - struct rb_root servers; /* Active servers */ - rwlock_t servers_lock; - struct list_head server_graveyard; /* Inactive server LRU list */ - spinlock_t server_graveyard_lock; - struct timer_list server_timer; - struct work_struct server_reaper; - atomic_t servers_outstanding; - /* Misc */ struct proc_dir_entry *proc_afs; /* /proc/net/afs directory */ }; @@ -264,7 +255,21 @@ enum afs_cell_state { }; /* - * AFS cell record + * AFS cell record. + * + * This is a tricky concept to get right as it is possible to create aliases + * simply by pointing AFSDB/SRV records for two names at the same set of VL + * servers; it is also possible to do things like setting up two sets of VL + * servers, one of which provides a superset of the volumes provided by the + * other (for internal/external division, for example). + * + * Cells only exist in the sense that (a) a cell's name maps to a set of VL + * servers and (b) a cell's name is used by the client to select the key to use + * for authentication and encryption. The cell name is not typically used in + * the protocol. + * + * There is no easy way to determine if two cells are aliases or one is a + * subset of another. */ struct afs_cell { union { @@ -278,14 +283,6 @@ struct afs_cell { #ifdef CONFIG_AFS_FSCACHE struct fscache_cookie *cache; /* caching cookie */ #endif - - /* server record management */ - rwlock_t servers_lock; /* active server list lock */ - struct list_head servers; /* active server list */ - - /* volume location record management */ - struct rw_semaphore vl_sem; /* volume management serialisation semaphore */ - struct list_head vl_list; /* cell's active VL record list */ time64_t dns_expiry; /* Time AFSDB/SRV record expires */ time64_t last_inactive; /* Time of last drop of usage count */ atomic_t usage; @@ -298,9 +295,11 @@ struct afs_cell { enum afs_cell_state state; short error; - spinlock_t vl_lock; /* vl_list lock */ + /* Active fileserver interaction state. */ + struct list_head proc_volumes; /* procfs volume list */ + rwlock_t proc_lock; - /* VLDB server list. */ + /* VL server list. */ rwlock_t vl_addrs_lock; /* Lock on vl_addrs */ struct afs_addr_list __rcu *vl_addrs; /* List of VL servers */ u8 name_len; /* Length of name */ @@ -308,65 +307,62 @@ struct afs_cell { }; /* - * entry in the cached volume location catalogue + * Cached VLDB entry. + * + * This is pointed to by cell->vldb_entries, indexed by name. */ -struct afs_cache_vlocation { - /* volume name (lowercase, padded with NULs) */ - uint8_t name[AFS_MAXVOLNAME + 1]; +struct afs_vldb_entry { + afs_volid_t vid[3]; /* Volume IDs for R/W, R/O and Bak volumes */ - uint8_t nservers; /* number of entries used in servers[] */ - uint8_t vidmask; /* voltype mask for vid[] */ - uint8_t srvtmask[8]; /* voltype masks for servers[] */ + unsigned long flags; +#define AFS_VLDB_HAS_RW 0 /* - R/W volume exists */ +#define AFS_VLDB_HAS_RO 1 /* - R/O volume exists */ +#define AFS_VLDB_HAS_BAK 2 /* - Backup volume exists */ +#define AFS_VLDB_QUERY_VALID 3 /* - Record is valid */ +#define AFS_VLDB_QUERY_ERROR 4 /* - VL server returned error */ + + uuid_t fs_server[AFS_NMAXNSERVERS]; + u8 fs_mask[AFS_NMAXNSERVERS]; #define AFS_VOL_VTM_RW 0x01 /* R/W version of the volume is available (on this server) */ #define AFS_VOL_VTM_RO 0x02 /* R/O version of the volume is available (on this server) */ #define AFS_VOL_VTM_BAK 0x04 /* backup version of the volume is available (on this server) */ - - afs_volid_t vid[3]; /* volume IDs for R/W, R/O and Bak volumes */ - struct sockaddr_rxrpc servers[8]; /* fileserver addresses */ - time_t rtime; /* last retrieval time */ + short error; + u8 nr_servers; /* Number of server records */ + u8 name_len; + u8 name[AFS_MAXVOLNAME + 1]; /* NUL-padded volume name */ }; /* - * AFS volume location record - */ -struct afs_vlocation { - atomic_t usage; - time64_t time_of_death; /* time at which put reduced usage to 0 */ - struct list_head link; /* link in cell volume location list */ - struct list_head grave; /* link in master graveyard list */ - struct list_head update; /* link in master update list */ - struct afs_cell *cell; /* cell to which volume belongs */ - struct afs_cache_vlocation vldb; /* volume information DB record */ - struct afs_volume *vols[3]; /* volume access record pointer (index by type) */ - wait_queue_head_t waitq; /* status change waitqueue */ - time64_t update_at; /* time at which record should be updated */ - spinlock_t lock; /* access lock */ - afs_vlocation_state_t state; /* volume location state */ - unsigned short upd_rej_cnt; /* ENOMEDIUM count during update */ - unsigned short upd_busy_cnt; /* EBUSY count during update */ - bool valid; /* T if valid */ -}; - -/* - * AFS fileserver record + * Record of fileserver with which we're actively communicating. */ struct afs_server { - atomic_t usage; - time64_t time_of_death; /* time at which put reduced usage to 0 */ - struct afs_addr_list __rcu *addrs; /* List of addresses for this server */ - struct afs_net *net; /* Network namespace in which the server resides */ - struct afs_cell *cell; /* cell in which server resides */ - struct list_head link; /* link in cell's server list */ - struct list_head grave; /* link in master graveyard list */ + struct rcu_head rcu; + union { + uuid_t uuid; /* Server ID */ + struct afs_uuid _uuid; + }; - struct rb_node master_rb; /* link in master by-addr tree */ - struct rw_semaphore sem; /* access lock */ + struct afs_addr_list __rcu *addresses; + struct rb_node uuid_rb; /* Link in net->servers */ + struct hlist_node addr4_link; /* Link in net->fs_addresses4 */ + struct hlist_node addr6_link; /* Link in net->fs_addresses6 */ + struct hlist_node proc_link; /* Link in net->fs_proc */ + struct afs_server *gc_next; /* Next server in manager's list */ + time64_t put_time; /* Time at which last put */ + time64_t update_at; /* Time at which to next update the record */ unsigned long flags; -#define AFS_SERVER_NEW 0 /* New server, don't inc cb_s_break */ +#define AFS_SERVER_FL_NEW 0 /* New server, don't inc cb_s_break */ +#define AFS_SERVER_FL_NOT_READY 1 /* The record is not ready for use */ +#define AFS_SERVER_FL_NOT_FOUND 2 /* VL server says no such server */ +#define AFS_SERVER_FL_VL_FAIL 3 /* Failed to access VL server */ +#define AFS_SERVER_FL_UPDATING 4 +#define AFS_SERVER_FL_PROBED 5 /* The fileserver has been probed */ +#define AFS_SERVER_FL_PROBING 6 /* Fileserver is being probed */ + atomic_t usage; + u32 addr_version; /* Address list version */ /* file service access */ - int fs_state; /* 0 or reason FS currently marked dead (-errno) */ - spinlock_t fs_lock; /* access lock */ + rwlock_t fs_lock; /* access lock */ /* callback promise management */ struct list_head cb_interests; /* List of superblocks using this server */ @@ -386,32 +382,50 @@ struct afs_cb_interest { }; /* - * AFS volume access record + * Replaceable server list. */ -struct afs_volume { - atomic_t usage; - struct afs_cell *cell; /* cell to which belongs (unrefd ptr) */ - struct afs_vlocation *vlocation; /* volume location */ -#ifdef CONFIG_AFS_FSCACHE - struct fscache_cookie *cache; /* caching cookie */ -#endif - afs_volid_t vid; /* volume ID */ - afs_voltype_t type; /* type of volume */ - char type_force; /* force volume type (suppress R/O -> R/W) */ - unsigned short nservers; /* number of server slots filled */ - unsigned short rjservers; /* number of servers discarded due to -ENOMEDIUM */ - struct afs_server *servers[8]; /* servers on which volume resides (ordered) */ - struct afs_cb_interest *cb_interests[8]; /* Interests on servers for callbacks */ - struct rw_semaphore server_sem; /* lock for accessing current server */ +struct afs_server_entry { + struct afs_server *server; + struct afs_cb_interest *cb_interest; +}; + +struct afs_server_list { + refcount_t usage; + unsigned short nr_servers; + unsigned short index; /* Server currently in use */ + unsigned short vnovol_mask; /* Servers to be skipped due to VNOVOL */ + unsigned int seq; /* Set to ->servers_seq when installed */ + struct afs_server_entry servers[]; }; /* - * vnode catalogue entry + * Live AFS volume management. */ -struct afs_cache_vnode { - afs_vnodeid_t vnode_id; /* vnode ID */ - unsigned vnode_unique; /* vnode ID uniquifier */ - afs_dataversion_t data_version; /* data version */ +struct afs_volume { + afs_volid_t vid; /* volume ID */ + atomic_t usage; + time64_t update_at; /* Time at which to next update */ + struct afs_cell *cell; /* Cell to which belongs (pins ref) */ + struct list_head proc_link; /* Link in cell->vl_proc */ + unsigned long flags; +#define AFS_VOLUME_NEEDS_UPDATE 0 /* - T if an update needs performing */ +#define AFS_VOLUME_UPDATING 1 /* - T if an update is in progress */ +#define AFS_VOLUME_WAIT 2 /* - T if users must wait for update */ +#define AFS_VOLUME_DELETED 3 /* - T if volume appears deleted */ +#define AFS_VOLUME_OFFLINE 4 /* - T if volume offline notice given */ +#define AFS_VOLUME_BUSY 5 /* - T if volume busy notice given */ +#ifdef CONFIG_AFS_FSCACHE + struct fscache_cookie *cache; /* caching cookie */ +#endif + struct afs_server_list *servers; /* List of servers on which volume resides */ + rwlock_t servers_lock; /* Lock for ->servers */ + unsigned int servers_seq; /* Incremented each time ->servers changes */ + + afs_voltype_t type; /* type of volume */ + short error; + char type_force; /* force volume type (suppress R/O -> R/W) */ + u8 name_len; + u8 name[AFS_MAXVOLNAME + 1]; /* NUL-padded volume name */ }; /* @@ -427,10 +441,8 @@ struct afs_vnode { struct fscache_cookie *cache; /* caching cookie */ #endif struct afs_permits *permit_cache; /* cache of permits so far obtained */ + struct mutex io_lock; /* Lock for serialising I/O on this mutex */ struct mutex validate_lock; /* lock for validating this vnode */ - wait_queue_head_t update_waitq; /* status fetch waitqueue */ - int update_cnt; /* number of outstanding ops that will update the - * status */ spinlock_t writeback_lock; /* lock for writebacks */ spinlock_t lock; /* waitqueue/flags lock */ unsigned long flags; @@ -501,6 +513,7 @@ struct afs_interface { struct afs_addr_cursor { struct afs_addr_list *alist; /* Current address list (pins ref) */ struct sockaddr_rxrpc *addr; + u32 abort_code; unsigned short start; /* Starting point in alist->addrs[] */ unsigned short index; /* Wrapping offset from start to current addr */ short error; @@ -513,7 +526,21 @@ struct afs_addr_cursor { */ struct afs_fs_cursor { struct afs_addr_cursor ac; - struct afs_server *server; /* Current server (pins ref) */ + struct afs_vnode *vnode; + struct afs_server_list *server_list; /* Current server list (pins ref) */ + struct afs_cb_interest *cbi; /* Server on which this resides (pins ref) */ + struct key *key; /* Key for the server */ + unsigned int cb_break; /* cb_break + cb_s_break before the call */ + unsigned int cb_break_2; /* cb_break + cb_s_break (2nd vnode) */ + unsigned char start; /* Initial index in server list */ + unsigned char index; /* Number of servers tried beyond start */ + unsigned short flags; +#define AFS_FS_CURSOR_STOP 0x0001 /* Set to cease iteration */ +#define AFS_FS_CURSOR_VBUSY 0x0002 /* Set if seen VBUSY */ +#define AFS_FS_CURSOR_VMOVED 0x0004 /* Set if seen VMOVED */ +#define AFS_FS_CURSOR_VNOVOL 0x0008 /* Set if seen VNOVOL */ +#define AFS_FS_CURSOR_CUR_ONLY 0x0010 /* Set if current server only (file lock held) */ +#define AFS_FS_CURSOR_NO_VSLEEP 0x0020 /* Set to prevent sleep on VBUSY, VOFFLINE, ... */ }; /*****************************************************************************/ @@ -537,6 +564,8 @@ extern bool afs_iterate_addresses(struct afs_addr_cursor *); extern int afs_end_cursor(struct afs_addr_cursor *); extern int afs_set_vl_cursor(struct afs_addr_cursor *, struct afs_cell *); +extern void afs_merge_fs_addr4(struct afs_addr_list *, __be32); + /* * cache.c */ @@ -558,10 +587,9 @@ extern void afs_init_callback_state(struct afs_server *); extern void afs_break_callback(struct afs_vnode *); extern void afs_break_callbacks(struct afs_server *, size_t,struct afs_callback[]); -extern int afs_register_server_cb_interest(struct afs_vnode *, struct afs_cb_interest **, - struct afs_server *); +extern int afs_register_server_cb_interest(struct afs_vnode *, struct afs_server_entry *); extern void afs_put_cb_interest(struct afs_net *, struct afs_cb_interest *); -extern void afs_clear_callback_interests(struct afs_net *, struct afs_volume *); +extern void afs_clear_callback_interests(struct afs_net *, struct afs_server_list *); static inline struct afs_cb_interest *afs_get_cb_interest(struct afs_cb_interest *cbi) { @@ -603,6 +631,7 @@ extern const struct file_operations afs_file_operations; extern int afs_open(struct inode *, struct file *); extern int afs_release(struct inode *, struct file *); +extern int afs_fetch_data(struct afs_vnode *, struct key *, struct afs_read *); extern int afs_page_filler(void *, struct page *); extern void afs_put_read(struct afs_read *); @@ -619,51 +648,40 @@ extern int afs_flock(struct file *, int, struct file_lock *); /* * fsclient.c */ -extern int afs_fs_fetch_file_status(struct afs_fs_cursor *, struct key *, - struct afs_vnode *, struct afs_volsync *, - bool); -extern int afs_fs_give_up_callbacks(struct afs_net *, struct afs_server *, bool); -extern int afs_fs_fetch_data(struct afs_fs_cursor *, struct key *, - struct afs_vnode *, struct afs_read *, bool); -extern int afs_fs_create(struct afs_fs_cursor *, struct key *, - struct afs_vnode *, const char *, umode_t, - struct afs_fid *, struct afs_file_status *, - struct afs_callback *, bool); -extern int afs_fs_remove(struct afs_fs_cursor *, struct key *, - struct afs_vnode *, const char *, bool, bool); -extern int afs_fs_link(struct afs_fs_cursor *, struct key *, struct afs_vnode *, - struct afs_vnode *, const char *, bool); -extern int afs_fs_symlink(struct afs_fs_cursor *, struct key *, - struct afs_vnode *, const char *, const char *, - struct afs_fid *, struct afs_file_status *, bool); -extern int afs_fs_rename(struct afs_fs_cursor *, struct key *, - struct afs_vnode *, const char *, - struct afs_vnode *, const char *, bool); +extern int afs_fs_fetch_file_status(struct afs_fs_cursor *, struct afs_volsync *); +extern int afs_fs_give_up_callbacks(struct afs_net *, struct afs_server *); +extern int afs_fs_fetch_data(struct afs_fs_cursor *, struct afs_read *); +extern int afs_fs_create(struct afs_fs_cursor *, const char *, umode_t, + struct afs_fid *, struct afs_file_status *, struct afs_callback *); +extern int afs_fs_remove(struct afs_fs_cursor *, const char *, bool); +extern int afs_fs_link(struct afs_fs_cursor *, struct afs_vnode *, const char *); +extern int afs_fs_symlink(struct afs_fs_cursor *, const char *, const char *, + struct afs_fid *, struct afs_file_status *); +extern int afs_fs_rename(struct afs_fs_cursor *, const char *, + struct afs_vnode *, const char *); extern int afs_fs_store_data(struct afs_fs_cursor *, struct afs_writeback *, - pgoff_t, pgoff_t, unsigned, unsigned, bool); -extern int afs_fs_setattr(struct afs_fs_cursor *, struct key *, - struct afs_vnode *, struct iattr *, bool); -extern int afs_fs_get_volume_status(struct afs_fs_cursor *, struct key *, - struct afs_vnode *, - struct afs_volume_status *, bool); -extern int afs_fs_set_lock(struct afs_fs_cursor *, struct key *, - struct afs_vnode *, afs_lock_type_t, bool); -extern int afs_fs_extend_lock(struct afs_fs_cursor *, struct key *, - struct afs_vnode *, bool); -extern int afs_fs_release_lock(struct afs_fs_cursor *, struct key *, - struct afs_vnode *, bool); -extern int afs_fs_give_up_all_callbacks(struct afs_server *, struct afs_addr_cursor *, - struct key *, bool); + pgoff_t, pgoff_t, unsigned, unsigned); +extern int afs_fs_setattr(struct afs_fs_cursor *, struct iattr *); +extern int afs_fs_get_volume_status(struct afs_fs_cursor *, struct afs_volume_status *); +extern int afs_fs_set_lock(struct afs_fs_cursor *, afs_lock_type_t); +extern int afs_fs_extend_lock(struct afs_fs_cursor *); +extern int afs_fs_release_lock(struct afs_fs_cursor *); +extern int afs_fs_give_up_all_callbacks(struct afs_net *, struct afs_server *, + struct afs_addr_cursor *, struct key *); +extern int afs_fs_get_capabilities(struct afs_net *, struct afs_server *, + struct afs_addr_cursor *, struct key *); /* * inode.c */ +extern int afs_fetch_status(struct afs_vnode *, struct key *); extern int afs_iget5_test(struct inode *, void *); extern struct inode *afs_iget_autocell(struct inode *, const char *, int, struct key *); extern struct inode *afs_iget(struct super_block *, struct key *, struct afs_fid *, struct afs_file_status *, - struct afs_callback *); + struct afs_callback *, + struct afs_cb_interest *); extern void afs_zap_data(struct afs_vnode *); extern int afs_validate(struct afs_vnode *, struct key *); extern int afs_getattr(const struct path *, struct kstat *, u32, unsigned int); @@ -733,6 +751,15 @@ extern void __net_exit afs_proc_cleanup(struct afs_net *); extern int afs_proc_cell_setup(struct afs_net *, struct afs_cell *); extern void afs_proc_cell_remove(struct afs_net *, struct afs_cell *); +/* + * rotate.c + */ +extern bool afs_begin_vnode_operation(struct afs_fs_cursor *, struct afs_vnode *, + struct key *); +extern bool afs_select_fileserver(struct afs_fs_cursor *); +extern bool afs_select_current_fileserver(struct afs_fs_cursor *); +extern int afs_end_vnode_operation(struct afs_fs_cursor *); + /* * rxrpc.c */ @@ -779,14 +806,31 @@ static inline struct afs_server *afs_get_server(struct afs_server *server) return server; } -extern void afs_server_timer(struct timer_list *); -extern struct afs_server *afs_lookup_server(struct afs_cell *, - struct sockaddr_rxrpc *); extern struct afs_server *afs_find_server(struct afs_net *, const struct sockaddr_rxrpc *); +extern struct afs_server *afs_find_server_by_uuid(struct afs_net *, const uuid_t *); +extern struct afs_server *afs_lookup_server(struct afs_cell *, struct key *, const uuid_t *); extern void afs_put_server(struct afs_net *, struct afs_server *); -extern void afs_reap_server(struct work_struct *); +extern void afs_manage_servers(struct work_struct *); +extern void afs_servers_timer(struct timer_list *); extern void __net_exit afs_purge_servers(struct afs_net *); +extern bool afs_probe_fileserver(struct afs_fs_cursor *); +extern bool afs_check_server_record(struct afs_fs_cursor *, struct afs_server *); + +/* + * server_list.c + */ +static inline struct afs_server_list *afs_get_serverlist(struct afs_server_list *slist) +{ + refcount_inc(&slist->usage); + return slist; +} + +extern void afs_put_serverlist(struct afs_net *, struct afs_server_list *); +extern struct afs_server_list *afs_alloc_server_list(struct afs_cell *, struct key *, + struct afs_vldb_entry *, + u8); +extern bool afs_annotate_server_list(struct afs_server_list *, struct afs_server_list *); /* * super.c @@ -797,86 +841,27 @@ extern void __exit afs_fs_exit(void); /* * vlclient.c */ -extern int afs_vl_get_entry_by_name(struct afs_net *, struct afs_addr_cursor *, - struct key *, const char *, - struct afs_cache_vlocation *, bool); -extern int afs_vl_get_entry_by_id(struct afs_net *, struct afs_addr_cursor *, - struct key *, afs_volid_t, afs_voltype_t, - struct afs_cache_vlocation *, bool); - -/* - * vlocation.c - */ -extern struct workqueue_struct *afs_vlocation_update_worker; - -#define afs_get_vlocation(V) do { atomic_inc(&(V)->usage); } while(0) - -extern struct afs_vlocation *afs_vlocation_lookup(struct afs_net *, - struct afs_cell *, - struct key *, - const char *, size_t); -extern void afs_put_vlocation(struct afs_net *, struct afs_vlocation *); -extern void afs_vlocation_updater(struct work_struct *); -extern void afs_vlocation_reaper(struct work_struct *); -extern void __net_exit afs_vlocation_purge(struct afs_net *); - -/* - * vnode.c - */ -static inline struct afs_vnode *AFS_FS_I(struct inode *inode) -{ - return container_of(inode, struct afs_vnode, vfs_inode); -} - -static inline struct inode *AFS_VNODE_TO_I(struct afs_vnode *vnode) -{ - return &vnode->vfs_inode; -} - -extern void afs_vnode_finalise_status_update(struct afs_vnode *, - struct afs_server *); -extern int afs_vnode_fetch_status(struct afs_vnode *, struct key *, bool); -extern int afs_vnode_fetch_data(struct afs_vnode *, struct key *, - struct afs_read *); -extern int afs_vnode_create(struct afs_vnode *, struct key *, const char *, - umode_t, struct afs_fid *, struct afs_file_status *, - struct afs_callback *, struct afs_server **); -extern int afs_vnode_remove(struct afs_vnode *, struct key *, const char *, - bool); -extern int afs_vnode_link(struct afs_vnode *, struct afs_vnode *, struct key *, - const char *); -extern int afs_vnode_symlink(struct afs_vnode *, struct key *, const char *, - const char *, struct afs_fid *, - struct afs_file_status *, struct afs_server **); -extern int afs_vnode_rename(struct afs_vnode *, struct afs_vnode *, - struct key *, const char *, const char *); -extern int afs_vnode_store_data(struct afs_writeback *, pgoff_t, pgoff_t, - unsigned, unsigned); -extern int afs_vnode_setattr(struct afs_vnode *, struct key *, struct iattr *); -extern int afs_vnode_get_volume_status(struct afs_vnode *, struct key *, - struct afs_volume_status *); -extern int afs_vnode_set_lock(struct afs_vnode *, struct key *, - afs_lock_type_t); -extern int afs_vnode_extend_lock(struct afs_vnode *, struct key *); -extern int afs_vnode_release_lock(struct afs_vnode *, struct key *); +extern struct afs_vldb_entry *afs_vl_get_entry_by_name_u(struct afs_net *, + struct afs_addr_cursor *, + struct key *, const char *, int); +extern struct afs_addr_list *afs_vl_get_addrs_u(struct afs_net *, struct afs_addr_cursor *, + struct key *, const uuid_t *); /* * volume.c */ -static inline struct afs_volume *afs_get_volume(struct afs_volume *volume) +static inline struct afs_volume *__afs_get_volume(struct afs_volume *volume) { if (volume) atomic_inc(&volume->usage); return volume; } +extern struct afs_volume *afs_create_volume(struct afs_mount_params *); +extern void afs_activate_volume(struct afs_volume *); +extern void afs_deactivate_volume(struct afs_volume *); extern void afs_put_volume(struct afs_cell *, struct afs_volume *); -extern struct afs_volume *afs_volume_lookup(struct afs_mount_params *); -extern void afs_init_fs_cursor(struct afs_fs_cursor *, struct afs_vnode *); -extern int afs_set_fs_cursor(struct afs_fs_cursor *, struct afs_vnode *); -extern bool afs_volume_pick_fileserver(struct afs_fs_cursor *, struct afs_vnode *); -extern bool afs_iterate_fs_cursor(struct afs_fs_cursor *, struct afs_vnode *); -extern int afs_end_fs_cursor(struct afs_fs_cursor *, struct afs_net *); +extern int afs_check_volume_status(struct afs_volume *, struct key *); /* * write.c @@ -903,6 +888,38 @@ extern int afs_fsync(struct file *, loff_t, loff_t, int); extern const struct xattr_handler *afs_xattr_handlers[]; extern ssize_t afs_listxattr(struct dentry *, char *, size_t); + +/* + * Miscellaneous inline functions. + */ +static inline struct afs_vnode *AFS_FS_I(struct inode *inode) +{ + return container_of(inode, struct afs_vnode, vfs_inode); +} + +static inline struct inode *AFS_VNODE_TO_I(struct afs_vnode *vnode) +{ + return &vnode->vfs_inode; +} + +static inline void afs_vnode_commit_status(struct afs_fs_cursor *fc, + struct afs_vnode *vnode, + unsigned int cb_break) +{ + if (fc->ac.error == 0) + afs_cache_permit(vnode, fc->key, cb_break); +} + +static inline void afs_check_for_remote_deletion(struct afs_fs_cursor *fc, + struct afs_vnode *vnode) +{ + if (fc->ac.error == -ENOENT) { + set_bit(AFS_VNODE_DELETED, &vnode->flags); + afs_break_callback(vnode); + } +} + + /*****************************************************************************/ /* * debug tracing diff --git a/fs/afs/main.c b/fs/afs/main.c index e7f87d723761..15a02a05ff40 100644 --- a/fs/afs/main.c +++ b/fs/afs/main.c @@ -55,18 +55,17 @@ static int __net_init afs_net_init(struct afs_net *net) spin_lock_init(&net->proc_cells_lock); INIT_LIST_HEAD(&net->proc_cells); - INIT_LIST_HEAD(&net->vl_updates); - INIT_LIST_HEAD(&net->vl_graveyard); - INIT_DELAYED_WORK(&net->vl_reaper, afs_vlocation_reaper); - INIT_DELAYED_WORK(&net->vl_updater, afs_vlocation_updater); - spin_lock_init(&net->vl_updates_lock); - spin_lock_init(&net->vl_graveyard_lock); - net->servers = RB_ROOT; - rwlock_init(&net->servers_lock); - INIT_LIST_HEAD(&net->server_graveyard); - spin_lock_init(&net->server_graveyard_lock); - INIT_WORK(&net->server_reaper, afs_reap_server); - timer_setup(&net->server_timer, afs_server_timer, 0); + seqlock_init(&net->fs_lock); + net->fs_servers = RB_ROOT; + INIT_LIST_HEAD(&net->fs_updates); + INIT_HLIST_HEAD(&net->fs_proc); + + INIT_HLIST_HEAD(&net->fs_addresses4); + INIT_HLIST_HEAD(&net->fs_addresses6); + seqlock_init(&net->fs_addr_lock); + + INIT_WORK(&net->fs_manager, afs_manage_servers); + timer_setup(&net->fs_timer, afs_servers_timer, 0); /* Register the /proc stuff */ ret = afs_proc_init(net); @@ -87,8 +86,8 @@ static int __net_init afs_net_init(struct afs_net *net) error_open_socket: net->live = false; - afs_vlocation_purge(net); afs_cell_purge(net); + afs_purge_servers(net); error_cell_init: net->live = false; afs_proc_cleanup(net); @@ -103,9 +102,8 @@ error_proc: static void __net_exit afs_net_exit(struct afs_net *net) { net->live = false; - afs_purge_servers(net); - afs_vlocation_purge(net); afs_cell_purge(net); + afs_purge_servers(net); afs_close_socket(net); afs_proc_cleanup(net); } @@ -125,10 +123,6 @@ static int __init afs_init(void) afs_async_calls = alloc_workqueue("kafsd", WQ_MEM_RECLAIM, 0); if (!afs_async_calls) goto error_async; - afs_vlocation_update_worker = - alloc_workqueue("kafs_vlupdated", WQ_MEM_RECLAIM, 0); - if (!afs_vlocation_update_worker) - goto error_vl_up; afs_lock_manager = alloc_workqueue("kafs_lockd", WQ_MEM_RECLAIM, 0); if (!afs_lock_manager) goto error_lockmgr; @@ -160,8 +154,6 @@ error_cache: #endif destroy_workqueue(afs_lock_manager); error_lockmgr: - destroy_workqueue(afs_vlocation_update_worker); -error_vl_up: destroy_workqueue(afs_async_calls); error_async: destroy_workqueue(afs_wq); @@ -189,7 +181,6 @@ static void __exit afs_exit(void) fscache_unregister_netfs(&afs_cache_netfs); #endif destroy_workqueue(afs_lock_manager); - destroy_workqueue(afs_vlocation_update_worker); destroy_workqueue(afs_async_calls); destroy_workqueue(afs_wq); afs_clean_up_permit_cache(); diff --git a/fs/afs/proc.c b/fs/afs/proc.c index 9cf9ce88a8dd..4508dd54f789 100644 --- a/fs/afs/proc.c +++ b/fs/afs/proc.c @@ -105,22 +105,22 @@ static const struct file_operations afs_proc_cell_vlservers_fops = { .release = seq_release, }; -static int afs_proc_cell_servers_open(struct inode *inode, struct file *file); -static void *afs_proc_cell_servers_start(struct seq_file *p, loff_t *pos); -static void *afs_proc_cell_servers_next(struct seq_file *p, void *v, +static int afs_proc_servers_open(struct inode *inode, struct file *file); +static void *afs_proc_servers_start(struct seq_file *p, loff_t *pos); +static void *afs_proc_servers_next(struct seq_file *p, void *v, loff_t *pos); -static void afs_proc_cell_servers_stop(struct seq_file *p, void *v); -static int afs_proc_cell_servers_show(struct seq_file *m, void *v); +static void afs_proc_servers_stop(struct seq_file *p, void *v); +static int afs_proc_servers_show(struct seq_file *m, void *v); -static const struct seq_operations afs_proc_cell_servers_ops = { - .start = afs_proc_cell_servers_start, - .next = afs_proc_cell_servers_next, - .stop = afs_proc_cell_servers_stop, - .show = afs_proc_cell_servers_show, +static const struct seq_operations afs_proc_servers_ops = { + .start = afs_proc_servers_start, + .next = afs_proc_servers_next, + .stop = afs_proc_servers_stop, + .show = afs_proc_servers_show, }; -static const struct file_operations afs_proc_cell_servers_fops = { - .open = afs_proc_cell_servers_open, +static const struct file_operations afs_proc_servers_fops = { + .open = afs_proc_servers_open, .read = seq_read, .llseek = seq_lseek, .release = seq_release, @@ -138,7 +138,8 @@ int afs_proc_init(struct afs_net *net) goto error_dir; if (!proc_create("cells", 0644, net->proc_afs, &afs_proc_cells_fops) || - !proc_create("rootcell", 0644, net->proc_afs, &afs_proc_rootcell_fops)) + !proc_create("rootcell", 0644, net->proc_afs, &afs_proc_rootcell_fops) || + !proc_create("servers", 0644, net->proc_afs, &afs_proc_servers_fops)) goto error_tree; _leave(" = 0"); @@ -174,7 +175,6 @@ static int afs_proc_cells_open(struct inode *inode, struct file *file) m = file->private_data; m->private = PDE_DATA(inode); - return 0; } @@ -357,12 +357,10 @@ int afs_proc_cell_setup(struct afs_net *net, struct afs_cell *cell) if (!dir) goto error_dir; - if (!proc_create_data("servers", 0, dir, - &afs_proc_cell_servers_fops, cell) || - !proc_create_data("vlservers", 0, dir, - &afs_proc_cell_vlservers_fops, cell) || + if (!proc_create_data("vlservers", 0, dir, + &afs_proc_cell_vlservers_fops, cell) || !proc_create_data("volumes", 0, dir, - &afs_proc_cell_volumes_fops, cell)) + &afs_proc_cell_volumes_fops, cell)) goto error_tree; _leave(" = 0"); @@ -420,9 +418,8 @@ static void *afs_proc_cell_volumes_start(struct seq_file *m, loff_t *_pos) _enter("cell=%p pos=%Ld", cell, *_pos); - /* lock the list against modification */ - down_read(&cell->vl_sem); - return seq_list_start_head(&cell->vl_list, *_pos); + read_lock(&cell->proc_lock); + return seq_list_start_head(&cell->proc_volumes, *_pos); } /* @@ -434,7 +431,7 @@ static void *afs_proc_cell_volumes_next(struct seq_file *p, void *v, struct afs_cell *cell = p->private; _enter("cell=%p pos=%Ld", cell, *_pos); - return seq_list_next(v, &cell->vl_list, _pos); + return seq_list_next(v, &cell->proc_volumes, _pos); } /* @@ -444,17 +441,13 @@ static void afs_proc_cell_volumes_stop(struct seq_file *p, void *v) { struct afs_cell *cell = p->private; - up_read(&cell->vl_sem); + read_unlock(&cell->proc_lock); } -static const char afs_vlocation_states[][4] = { - [AFS_VL_NEW] = "New", - [AFS_VL_CREATING] = "Crt", - [AFS_VL_VALID] = "Val", - [AFS_VL_NO_VOLUME] = "NoV", - [AFS_VL_UPDATING] = "Upd", - [AFS_VL_VOLUME_DELETED] = "Del", - [AFS_VL_UNCERTAIN] = "Unc", +static const char afs_vol_types[3][3] = { + [AFSVL_RWVOL] = "RW", + [AFSVL_ROVOL] = "RO", + [AFSVL_BACKVOL] = "BK", }; /* @@ -463,23 +456,17 @@ static const char afs_vlocation_states[][4] = { static int afs_proc_cell_volumes_show(struct seq_file *m, void *v) { struct afs_cell *cell = m->private; - struct afs_vlocation *vlocation = - list_entry(v, struct afs_vlocation, link); + struct afs_volume *vol = list_entry(v, struct afs_volume, proc_link); - /* display header on line 1 */ - if (v == &cell->vl_list) { - seq_puts(m, "USE STT VLID[0] VLID[1] VLID[2] NAME\n"); + /* Display header on line 1 */ + if (v == &cell->proc_volumes) { + seq_puts(m, "USE VID TY\n"); return 0; } - /* display one cell per line on subsequent lines */ - seq_printf(m, "%3d %s %08x %08x %08x %s\n", - atomic_read(&vlocation->usage), - afs_vlocation_states[vlocation->state], - vlocation->vldb.vid[0], - vlocation->vldb.vid[1], - vlocation->vldb.vid[2], - vlocation->vldb.name); + seq_printf(m, "%3d %08x %s\n", + atomic_read(&vol->usage), vol->vid, + afs_vol_types[vol->type]); return 0; } @@ -580,86 +567,62 @@ static int afs_proc_cell_vlservers_show(struct seq_file *m, void *v) } /* - * open "/proc/fs/afs//servers" which provides a summary of active + * open "/proc/fs/afs/servers" which provides a summary of active * servers */ -static int afs_proc_cell_servers_open(struct inode *inode, struct file *file) +static int afs_proc_servers_open(struct inode *inode, struct file *file) { - struct afs_cell *cell; - struct seq_file *m; - int ret; - - cell = PDE_DATA(inode); - if (!cell) - return -ENOENT; - - ret = seq_open(file, &afs_proc_cell_servers_ops); - if (ret < 0) - return ret; - - m = file->private_data; - m->private = cell; - return 0; + return seq_open(file, &afs_proc_servers_ops); } /* - * set up the iterator to start reading from the cells list and return the - * first item + * Set up the iterator to start reading from the server list and return the + * first item. */ -static void *afs_proc_cell_servers_start(struct seq_file *m, loff_t *_pos) - __acquires(m->private->servers_lock) +static void *afs_proc_servers_start(struct seq_file *m, loff_t *_pos) { - struct afs_cell *cell = m->private; + struct afs_net *net = afs_seq2net(m); - _enter("cell=%p pos=%Ld", cell, *_pos); - - /* lock the list against modification */ - read_lock(&cell->servers_lock); - return seq_list_start_head(&cell->servers, *_pos); + rcu_read_lock(); + return seq_hlist_start_head_rcu(&net->fs_proc, *_pos); } /* * move to next cell in cells list */ -static void *afs_proc_cell_servers_next(struct seq_file *p, void *v, - loff_t *_pos) +static void *afs_proc_servers_next(struct seq_file *m, void *v, loff_t *_pos) { - struct afs_cell *cell = p->private; + struct afs_net *net = afs_seq2net(m); - _enter("cell=%p pos=%Ld", cell, *_pos); - return seq_list_next(v, &cell->servers, _pos); + return seq_hlist_next_rcu(v, &net->fs_proc, _pos); } /* * clean up after reading from the cells list */ -static void afs_proc_cell_servers_stop(struct seq_file *p, void *v) - __releases(p->private->servers_lock) +static void afs_proc_servers_stop(struct seq_file *p, void *v) { - struct afs_cell *cell = p->private; - - read_unlock(&cell->servers_lock); + rcu_read_unlock(); } /* * display a header line followed by a load of volume lines */ -static int afs_proc_cell_servers_show(struct seq_file *m, void *v) +static int afs_proc_servers_show(struct seq_file *m, void *v) { - struct afs_cell *cell = m->private; - struct afs_server *server = list_entry(v, struct afs_server, link); - char ipaddr[64]; + struct afs_server *server; + struct afs_addr_list *alist; - /* display header on line 1 */ - if (v == &cell->servers) { - seq_puts(m, "USE ADDR STATE\n"); + if (v == SEQ_START_TOKEN) { + seq_puts(m, "UUID USE ADDR\n"); return 0; } - /* display one cell per line on subsequent lines */ - sprintf(ipaddr, "%pISp", &server->addrs->addrs[0].transport); - seq_printf(m, "%3d %-15s %5d\n", - atomic_read(&server->usage), ipaddr, server->fs_state); - + server = list_entry(v, struct afs_server, proc_link); + alist = rcu_dereference(server->addresses); + seq_printf(m, "%pU %3d %pISp\n", + &server->uuid, + atomic_read(&server->usage), + &alist->addrs[alist->index].transport); return 0; } diff --git a/fs/afs/rotate.c b/fs/afs/rotate.c index c7975b3ba59a..e728ca1776c9 100644 --- a/fs/afs/rotate.c +++ b/fs/afs/rotate.c @@ -11,7 +11,12 @@ #include #include +#include +#include +#include +#include #include "internal.h" +#include "afs_fs.h" /* * Initialise a filesystem server cursor for iterating over FS servers. @@ -21,6 +26,460 @@ void afs_init_fs_cursor(struct afs_fs_cursor *fc, struct afs_vnode *vnode) memset(fc, 0, sizeof(*fc)); } +/* + * Begin an operation on the fileserver. + * + * Fileserver operations are serialised on the server by vnode, so we serialise + * them here also using the io_lock. + */ +bool afs_begin_vnode_operation(struct afs_fs_cursor *fc, struct afs_vnode *vnode, + struct key *key) +{ + afs_init_fs_cursor(fc, vnode); + fc->vnode = vnode; + fc->key = key; + fc->ac.error = SHRT_MAX; + + if (mutex_lock_interruptible(&vnode->io_lock) < 0) { + fc->ac.error = -EINTR; + fc->flags |= AFS_FS_CURSOR_STOP; + return false; + } + + if (test_bit(AFS_VNODE_READLOCKED, &vnode->flags) || + test_bit(AFS_VNODE_WRITELOCKED, &vnode->flags)) + fc->flags |= AFS_FS_CURSOR_CUR_ONLY; + return true; +} + +/* + * Begin iteration through a server list, starting with the vnode's last used + * server if possible, or the last recorded good server if not. + */ +static bool afs_start_fs_iteration(struct afs_fs_cursor *fc, + struct afs_vnode *vnode) +{ + struct afs_cb_interest *cbi; + int i; + + read_lock(&vnode->volume->servers_lock); + fc->server_list = afs_get_serverlist(vnode->volume->servers); + read_unlock(&vnode->volume->servers_lock); + + cbi = vnode->cb_interest; + if (cbi) { + /* See if the vnode's preferred record is still available */ + for (i = 0; i < fc->server_list->nr_servers; i++) { + if (fc->server_list->servers[i].cb_interest == cbi) { + fc->start = i; + goto found_interest; + } + } + + /* If we have a lock outstanding on a server that's no longer + * serving this vnode, then we can't switch to another server + * and have to return an error. + */ + if (fc->flags & AFS_FS_CURSOR_CUR_ONLY) { + fc->ac.error = -ESTALE; + return false; + } + + /* Note that the callback promise is effectively broken */ + write_seqlock(&vnode->cb_lock); + ASSERTCMP(cbi, ==, vnode->cb_interest); + vnode->cb_interest = NULL; + if (test_and_clear_bit(AFS_VNODE_CB_PROMISED, &vnode->flags)) + vnode->cb_break++; + write_sequnlock(&vnode->cb_lock); + + afs_put_cb_interest(afs_v2net(vnode), cbi); + cbi = NULL; + } else { + fc->start = READ_ONCE(fc->server_list->index); + } + +found_interest: + fc->index = fc->start; + return true; +} + +/* + * Post volume busy note. + */ +static void afs_busy(struct afs_volume *volume, u32 abort_code) +{ + const char *m; + + switch (abort_code) { + case VOFFLINE: m = "offline"; break; + case VRESTARTING: m = "restarting"; break; + case VSALVAGING: m = "being salvaged"; break; + default: m = "busy"; break; + } + + pr_notice("kAFS: Volume %u '%s' is %s\n", volume->vid, volume->name, m); +} + +/* + * Sleep and retry the operation to the same fileserver. + */ +static bool afs_sleep_and_retry(struct afs_fs_cursor *fc) +{ + msleep_interruptible(1000); + if (signal_pending(current)) { + fc->ac.error = -ERESTARTSYS; + return false; + } + + return true; +} + +/* + * Select the fileserver to use. May be called multiple times to rotate + * through the fileservers. + */ +bool afs_select_fileserver(struct afs_fs_cursor *fc) +{ + struct afs_addr_list *alist; + struct afs_server *server; + struct afs_vnode *vnode = fc->vnode; + + _enter("%u/%u,%u/%u,%d,%d", + fc->index, fc->start, + fc->ac.index, fc->ac.start, + fc->ac.error, fc->ac.abort_code); + + if (fc->flags & AFS_FS_CURSOR_STOP) { + _leave(" = f [stopped]"); + return false; + } + + /* Evaluate the result of the previous operation, if there was one. */ + switch (fc->ac.error) { + case SHRT_MAX: + goto start; + + case 0: + default: + /* Success or local failure. Stop. */ + fc->flags |= AFS_FS_CURSOR_STOP; + _leave(" = f [okay/local %d]", fc->ac.error); + return false; + + case -ECONNABORTED: + /* The far side rejected the operation on some grounds. This + * might involve the server being busy or the volume having been moved. + */ + switch (fc->ac.abort_code) { + case VNOVOL: + /* This fileserver doesn't know about the volume. + * - May indicate that the VL is wrong - retry once and compare + * the results. + * - May indicate that the fileserver couldn't attach to the vol. + */ + if (fc->flags & AFS_FS_CURSOR_VNOVOL) { + fc->ac.error = -EREMOTEIO; + goto failed; + } + + write_lock(&vnode->volume->servers_lock); + fc->server_list->vnovol_mask |= 1 << fc->index; + write_unlock(&vnode->volume->servers_lock); + + set_bit(AFS_VOLUME_NEEDS_UPDATE, &vnode->volume->flags); + fc->ac.error = afs_check_volume_status(vnode->volume, fc->key); + if (fc->ac.error < 0) + goto failed; + + if (test_bit(AFS_VOLUME_DELETED, &vnode->volume->flags)) { + fc->ac.error = -ENOMEDIUM; + goto failed; + } + + /* If the server list didn't change, then assume that + * it's the fileserver having trouble. + */ + if (vnode->volume->servers == fc->server_list) { + fc->ac.error = -EREMOTEIO; + goto failed; + } + + /* Try again */ + fc->flags |= AFS_FS_CURSOR_VNOVOL; + _leave(" = t [vnovol]"); + return true; + + case VSALVAGE: /* TODO: Should this return an error or iterate? */ + case VVOLEXISTS: + case VNOSERVICE: + case VONLINE: + case VDISKFULL: + case VOVERQUOTA: + fc->ac.error = afs_abort_to_error(fc->ac.abort_code); + goto next_server; + + case VOFFLINE: + if (!test_and_set_bit(AFS_VOLUME_OFFLINE, &vnode->volume->flags)) { + afs_busy(vnode->volume, fc->ac.abort_code); + clear_bit(AFS_VOLUME_BUSY, &vnode->volume->flags); + } + if (fc->flags & AFS_FS_CURSOR_NO_VSLEEP) { + fc->ac.error = -EADV; + goto failed; + } + if (fc->flags & AFS_FS_CURSOR_CUR_ONLY) { + fc->ac.error = -ESTALE; + goto failed; + } + goto busy; + + case VSALVAGING: + case VRESTARTING: + case VBUSY: + /* Retry after going round all the servers unless we + * have a file lock we need to maintain. + */ + if (fc->flags & AFS_FS_CURSOR_NO_VSLEEP) { + fc->ac.error = -EBUSY; + goto failed; + } + if (!test_and_set_bit(AFS_VOLUME_BUSY, &vnode->volume->flags)) { + afs_busy(vnode->volume, fc->ac.abort_code); + clear_bit(AFS_VOLUME_OFFLINE, &vnode->volume->flags); + } + busy: + if (fc->flags & AFS_FS_CURSOR_CUR_ONLY) { + if (!afs_sleep_and_retry(fc)) + goto failed; + + /* Retry with same server & address */ + _leave(" = t [vbusy]"); + return true; + } + + fc->flags |= AFS_FS_CURSOR_VBUSY; + goto next_server; + + case VMOVED: + /* The volume migrated to another server. We consider + * consider all locks and callbacks broken and request + * an update from the VLDB. + * + * We also limit the number of VMOVED hops we will + * honour, just in case someone sets up a loop. + */ + if (fc->flags & AFS_FS_CURSOR_VMOVED) { + fc->ac.error = -EREMOTEIO; + goto failed; + } + fc->flags |= AFS_FS_CURSOR_VMOVED; + + set_bit(AFS_VOLUME_WAIT, &vnode->volume->flags); + set_bit(AFS_VOLUME_NEEDS_UPDATE, &vnode->volume->flags); + fc->ac.error = afs_check_volume_status(vnode->volume, fc->key); + if (fc->ac.error < 0) + goto failed; + + /* If the server list didn't change, then the VLDB is + * out of sync with the fileservers. This is hopefully + * a temporary condition, however, so we don't want to + * permanently block access to the file. + * + * TODO: Try other fileservers if we can. + * + * TODO: Retry a few times with sleeps. + */ + if (vnode->volume->servers == fc->server_list) { + fc->ac.error = -ENOMEDIUM; + goto failed; + } + + goto restart_from_beginning; + + default: + clear_bit(AFS_VOLUME_OFFLINE, &vnode->volume->flags); + clear_bit(AFS_VOLUME_BUSY, &vnode->volume->flags); + fc->ac.error = afs_abort_to_error(fc->ac.abort_code); + goto failed; + } + + case -ENETUNREACH: + case -EHOSTUNREACH: + case -ECONNREFUSED: + case -ETIMEDOUT: + case -ETIME: + _debug("no conn"); + goto iterate_address; + } + +restart_from_beginning: + _debug("restart"); + afs_end_cursor(&fc->ac); + afs_put_cb_interest(afs_v2net(vnode), fc->cbi); + fc->cbi = NULL; + afs_put_serverlist(afs_v2net(vnode), fc->server_list); + fc->server_list = NULL; +start: + _debug("start"); + /* See if we need to do an update of the volume record. Note that the + * volume may have moved or even have been deleted. + */ + fc->ac.error = afs_check_volume_status(vnode->volume, fc->key); + if (fc->ac.error < 0) + goto failed; + + if (!afs_start_fs_iteration(fc, vnode)) + goto failed; + goto use_server; + +next_server: + _debug("next"); + afs_put_cb_interest(afs_v2net(vnode), fc->cbi); + fc->cbi = NULL; + fc->index++; + if (fc->index >= fc->server_list->nr_servers) + fc->index = 0; + if (fc->index != fc->start) + goto use_server; + + /* That's all the servers poked to no good effect. Try again if some + * of them were busy. + */ + if (fc->flags & AFS_FS_CURSOR_VBUSY) + goto restart_from_beginning; + + fc->ac.error = -EDESTADDRREQ; + goto failed; + +use_server: + _debug("use"); + /* We're starting on a different fileserver from the list. We need to + * check it, create a callback intercept, find its address list and + * probe its capabilities before we use it. + */ + ASSERTCMP(fc->ac.alist, ==, NULL); + server = fc->server_list->servers[fc->index].server; + + if (!afs_check_server_record(fc, server)) + goto failed; + + _debug("USING SERVER: %pU", &server->uuid); + + /* Make sure we've got a callback interest record for this server. We + * have to link it in before we send the request as we can be sent a + * break request before we've finished decoding the reply and + * installing the vnode. + */ + fc->ac.error = afs_register_server_cb_interest( + vnode, &fc->server_list->servers[fc->index]); + if (fc->ac.error < 0) + goto failed; + + fc->cbi = afs_get_cb_interest(vnode->cb_interest); + + read_lock(&server->fs_lock); + alist = rcu_dereference_protected(server->addresses, + lockdep_is_held(&server->fs_lock)); + afs_get_addrlist(alist); + read_unlock(&server->fs_lock); + + + /* Probe the current fileserver if we haven't done so yet. */ + if (!test_bit(AFS_SERVER_FL_PROBED, &server->flags)) { + fc->ac.alist = afs_get_addrlist(alist); + + if (!afs_probe_fileserver(fc)) + goto failed; + } + + if (!fc->ac.alist) + fc->ac.alist = alist; + else + afs_put_addrlist(alist); + + fc->ac.addr = NULL; + fc->ac.start = READ_ONCE(alist->index); + fc->ac.index = fc->ac.start; + fc->ac.error = 0; + fc->ac.begun = false; + goto iterate_address; + +iterate_address: + ASSERT(fc->ac.alist); + _debug("iterate %d/%d", fc->ac.index, fc->ac.alist->nr_addrs); + /* Iterate over the current server's address list to try and find an + * address on which it will respond to us. + */ + if (afs_iterate_addresses(&fc->ac)) { + _leave(" = t"); + return true; + } + + afs_end_cursor(&fc->ac); + goto next_server; + +failed: + fc->flags |= AFS_FS_CURSOR_STOP; + _leave(" = f [failed %d]", fc->ac.error); + return false; +} + +/* + * Select the same fileserver we used for a vnode before and only that + * fileserver. We use this when we have a lock on that file, which is backed + * only by the fileserver we obtained it from. + */ +bool afs_select_current_fileserver(struct afs_fs_cursor *fc) +{ + struct afs_vnode *vnode = fc->vnode; + struct afs_cb_interest *cbi = vnode->cb_interest; + struct afs_addr_list *alist; + + _enter(""); + + if (!cbi) { + fc->ac.error = -ESTALE; + fc->flags |= AFS_FS_CURSOR_STOP; + return false; + } + + read_lock(&cbi->server->fs_lock); + alist = afs_get_addrlist(cbi->server->addresses); + read_unlock(&cbi->server->fs_lock); + if (!alist) { + fc->ac.error = -ESTALE; + fc->flags |= AFS_FS_CURSOR_STOP; + return false; + } + + fc->ac.alist = alist; + fc->ac.error = 0; + return true; +} + +/* + * Tidy up a filesystem cursor and unlock the vnode. + */ +int afs_end_vnode_operation(struct afs_fs_cursor *fc) +{ + struct afs_net *net = afs_v2net(fc->vnode); + int ret; + + mutex_unlock(&fc->vnode->io_lock); + + afs_end_cursor(&fc->ac); + afs_put_cb_interest(net, fc->cbi); + afs_put_serverlist(net, fc->server_list); + + ret = fc->ac.error; + if (ret == -ECONNABORTED) + afs_abort_to_error(fc->ac.abort_code); + + return fc->ac.error; +} + +#if 0 /* * Set a filesystem server cursor for using a specific FS server. */ @@ -252,3 +711,5 @@ int afs_end_fs_cursor(struct afs_fs_cursor *fc, struct afs_net *net) afs_put_server(net, fc->server); return fc->ac.error; } + +#endif diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c index 5ddfb7c4cf78..1bbd5854507d 100644 --- a/fs/afs/rxrpc.c +++ b/fs/afs/rxrpc.c @@ -20,7 +20,7 @@ struct workqueue_struct *afs_async_calls; static void afs_wake_up_call_waiter(struct sock *, struct rxrpc_call *, unsigned long); -static long afs_wait_for_call_to_complete(struct afs_call *); +static long afs_wait_for_call_to_complete(struct afs_call *, struct afs_addr_cursor *); static void afs_wake_up_async_call(struct sock *, struct rxrpc_call *, unsigned long); static void afs_process_async_call(struct work_struct *); static void afs_rx_new_call(struct sock *, struct rxrpc_call *, unsigned long); @@ -162,6 +162,7 @@ void afs_put_call(struct afs_call *call) call->type->destructor(call); afs_put_server(call->net, call->cm_server); + afs_put_cb_interest(call->net, call->cbi); kfree(call->request); kfree(call); @@ -330,7 +331,6 @@ long afs_make_call(struct afs_addr_cursor *ac, struct afs_call *call, struct kvec iov[1]; size_t offset; s64 tx_total_len; - u32 abort_code; int ret; _enter(",{%pISp},", &srx->transport); @@ -362,7 +362,6 @@ long afs_make_call(struct afs_addr_cursor *ac, struct afs_call *call, afs_wake_up_async_call : afs_wake_up_call_waiter), call->upgrade); - call->key = NULL; if (IS_ERR(rxcall)) { ret = PTR_ERR(rxcall); goto error_kill_call; @@ -406,7 +405,7 @@ long afs_make_call(struct afs_addr_cursor *ac, struct afs_call *call, if (call->async) return -EINPROGRESS; - return afs_wait_for_call_to_complete(call); + return afs_wait_for_call_to_complete(call, ac); error_do_abort: call->state = AFS_CALL_COMPLETE; @@ -414,15 +413,16 @@ error_do_abort: rxrpc_kernel_abort_call(call->net->socket, rxcall, RX_USER_ABORT, ret, "KSD"); } else { - abort_code = 0; offset = 0; rxrpc_kernel_recv_data(call->net->socket, rxcall, NULL, 0, &offset, false, &call->abort_code, &call->service_id); - ret = afs_abort_to_error(call->abort_code); + ac->abort_code = call->abort_code; + ac->responded = true; } error_kill_call: afs_put_call(call); + ac->error = ret; _leave(" = %d", ret); return ret; } @@ -510,7 +510,8 @@ save_error: /* * wait synchronously for a call to complete */ -static long afs_wait_for_call_to_complete(struct afs_call *call) +static long afs_wait_for_call_to_complete(struct afs_call *call, + struct afs_addr_cursor *ac) { signed long rtt2, timeout; long ret; @@ -563,16 +564,25 @@ static long afs_wait_for_call_to_complete(struct afs_call *call) /* Kill off the call if it's still live. */ if (call->state < AFS_CALL_COMPLETE) { _debug("call interrupted"); - rxrpc_kernel_abort_call(call->net->socket, call->rxcall, - RX_USER_ABORT, -EINTR, "KWI"); + if (rxrpc_kernel_abort_call(call->net->socket, call->rxcall, + RX_USER_ABORT, -EINTR, "KWI")) + call->error = -ERESTARTSYS; } - ret = call->error; - if (ret < 0) { - ret = afs_abort_to_error(call->abort_code); - } else if (ret == 0 && call->ret_reply0) { - ret = (long)call->reply[0]; - call->reply[0] = NULL; + ac->abort_code = call->abort_code; + ac->error = call->error; + + ret = ac->error; + switch (ret) { + case 0: + if (call->ret_reply0) { + ret = (long)call->reply[0]; + call->reply[0] = NULL; + } + /* Fall through */ + case -ECONNABORTED: + ac->responded = true; + break; } _debug("call complete"); @@ -882,10 +892,7 @@ int afs_extract_data(struct afs_call *call, void *buf, size_t count, return 0; } - if (ret == -ECONNABORTED) - call->error = afs_abort_to_error(call->abort_code); - else - call->error = ret; + call->error = ret; call->state = AFS_CALL_COMPLETE; return ret; } diff --git a/fs/afs/security.c b/fs/afs/security.c index 1b5198fc1657..46a881a4d08f 100644 --- a/fs/afs/security.c +++ b/fs/afs/security.c @@ -325,7 +325,7 @@ static int afs_check_permit(struct afs_vnode *vnode, struct key *key, */ _debug("no valid permit"); - ret = afs_vnode_fetch_status(vnode, key, true); + ret = afs_fetch_status(vnode, key); if (ret < 0) { *_access = 0; _leave(" = %d", ret); diff --git a/fs/afs/server.c b/fs/afs/server.c index 9ca174b24f5b..a6c860bcf391 100644 --- a/fs/afs/server.c +++ b/fs/afs/server.c @@ -14,7 +14,8 @@ #include "afs_fs.h" #include "internal.h" -static unsigned afs_server_timeout = 10; /* server timeout in seconds */ +static unsigned afs_server_gc_delay = 10; /* Server record timeout in seconds */ +static unsigned afs_server_update_delay = 30; /* Time till VLDB recheck in secs */ static void afs_inc_servers_outstanding(struct afs_net *net) { @@ -27,60 +28,201 @@ static void afs_dec_servers_outstanding(struct afs_net *net) wake_up_atomic_t(&net->servers_outstanding); } -void afs_server_timer(struct timer_list *timer) +/* + * Find a server by one of its addresses. + */ +struct afs_server *afs_find_server(struct afs_net *net, + const struct sockaddr_rxrpc *srx) { - struct afs_net *net = container_of(timer, struct afs_net, server_timer); + const struct sockaddr_in6 *a = &srx->transport.sin6, *b; + const struct afs_addr_list *alist; + struct afs_server *server = NULL; + unsigned int i; + bool ipv6 = true; + int seq = 0, diff; - if (!queue_work(afs_wq, &net->server_reaper)) - afs_dec_servers_outstanding(net); + if (srx->transport.sin6.sin6_addr.s6_addr32[0] == 0 || + srx->transport.sin6.sin6_addr.s6_addr32[1] == 0 || + srx->transport.sin6.sin6_addr.s6_addr32[2] == htonl(0xffff)) + ipv6 = false; + + rcu_read_lock(); + + do { + if (server) + afs_put_server(net, server); + server = NULL; + read_seqbegin_or_lock(&net->fs_addr_lock, &seq); + + if (ipv6) { + hlist_for_each_entry_rcu(server, &net->fs_addresses6, addr6_link) { + alist = rcu_dereference(server->addresses); + for (i = alist->nr_ipv4; i < alist->nr_addrs; i++) { + b = &alist->addrs[i].transport.sin6; + diff = (u16)a->sin6_port - (u16)b->sin6_port; + if (diff == 0) + diff = memcmp(&a->sin6_addr, + &b->sin6_addr, + sizeof(struct in6_addr)); + if (diff == 0) + goto found; + if (diff < 0) { + // TODO: Sort the list + //if (i == alist->nr_ipv4) + // goto not_found; + break; + } + } + } + } else { + hlist_for_each_entry_rcu(server, &net->fs_addresses4, addr4_link) { + alist = rcu_dereference(server->addresses); + for (i = 0; i < alist->nr_ipv4; i++) { + b = &alist->addrs[i].transport.sin6; + diff = (u16)a->sin6_port - (u16)b->sin6_port; + if (diff == 0) + diff = ((u32)a->sin6_addr.s6_addr32[3] - + (u32)b->sin6_addr.s6_addr32[3]); + if (diff == 0) + goto found; + if (diff < 0) { + // TODO: Sort the list + //if (i == 0) + // goto not_found; + break; + } + } + } + } + + //not_found: + server = NULL; + found: + if (server && !atomic_inc_not_zero(&server->usage)) + server = NULL; + + } while (need_seqretry(&net->fs_addr_lock, seq)); + + done_seqretry(&net->fs_addr_lock, seq); + + rcu_read_unlock(); + return server; } /* - * install a server record in the master tree + * Look up a server by its UUID */ -static int afs_install_server(struct afs_server *server) +struct afs_server *afs_find_server_by_uuid(struct afs_net *net, const uuid_t *uuid) { - struct afs_server *xserver; - struct afs_net *net = server->cell->net; + struct afs_server *server = NULL; + struct rb_node *p; + int diff, seq = 0; + + _enter("%pU", uuid); + + do { + /* Unfortunately, rbtree walking doesn't give reliable results + * under just the RCU read lock, so we have to check for + * changes. + */ + if (server) + afs_put_server(net, server); + server = NULL; + + read_seqbegin_or_lock(&net->fs_lock, &seq); + + p = net->fs_servers.rb_node; + while (p) { + server = rb_entry(p, struct afs_server, uuid_rb); + + diff = memcmp(uuid, &server->uuid, sizeof(*uuid)); + if (diff < 0) { + p = p->rb_left; + } else if (diff > 0) { + p = p->rb_right; + } else { + afs_get_server(server); + break; + } + + server = NULL; + } + } while (need_seqretry(&net->fs_lock, seq)); + + done_seqretry(&net->fs_lock, seq); + + _leave(" = %p", server); + return server; +} + +/* + * Install a server record in the namespace tree + */ +static struct afs_server *afs_install_server(struct afs_net *net, + struct afs_server *candidate) +{ + const struct afs_addr_list *alist; + struct afs_server *server; struct rb_node **pp, *p; - int ret, diff; + int ret = -EEXIST, diff; - _enter("%p", server); + _enter("%p", candidate); - write_lock(&net->servers_lock); + write_seqlock(&net->fs_lock); - ret = -EEXIST; - pp = &net->servers.rb_node; + /* Firstly install the server in the UUID lookup tree */ + pp = &net->fs_servers.rb_node; p = NULL; while (*pp) { p = *pp; _debug("- consider %p", p); - xserver = rb_entry(p, struct afs_server, master_rb); - diff = memcmp(&server->addrs->addrs[0], - &xserver->addrs->addrs[0], - sizeof(sizeof(server->addrs->addrs[0]))); + server = rb_entry(p, struct afs_server, uuid_rb); + diff = memcmp(&candidate->uuid, &server->uuid, sizeof(uuid_t)); if (diff < 0) pp = &(*pp)->rb_left; else if (diff > 0) pp = &(*pp)->rb_right; else - goto error; + goto exists; } - rb_link_node(&server->master_rb, p, pp); - rb_insert_color(&server->master_rb, &net->servers); + server = candidate; + rb_link_node(&server->uuid_rb, p, pp); + rb_insert_color(&server->uuid_rb, &net->fs_servers); + hlist_add_head_rcu(&server->proc_link, &net->fs_proc); + + write_seqlock(&net->fs_addr_lock); + alist = rcu_dereference_protected(server->addresses, + lockdep_is_held(&net->fs_addr_lock.lock)); + + /* Secondly, if the server has any IPv4 and/or IPv6 addresses, install + * it in the IPv4 and/or IPv6 reverse-map lists. + * + * TODO: For speed we want to use something other than a flat list + * here; even sorting the list in terms of lowest address would help a + * bit, but anything we might want to do gets messy and memory + * intensive. + */ + if (alist->nr_ipv4 > 0) + hlist_add_head_rcu(&server->addr4_link, &net->fs_addresses4); + if (alist->nr_addrs > alist->nr_ipv4) + hlist_add_head_rcu(&server->addr6_link, &net->fs_addresses6); + + write_sequnlock(&net->fs_addr_lock); ret = 0; -error: - write_unlock(&net->servers_lock); - return ret; +exists: + afs_get_server(server); + write_sequnlock(&net->fs_lock); + return server; } /* * allocate a new server record */ -static struct afs_server *afs_alloc_server(struct afs_cell *cell, - const struct sockaddr_rxrpc *addr) +static struct afs_server *afs_alloc_server(struct afs_net *net, + const uuid_t *uuid, + struct afs_addr_list *alist) { struct afs_server *server; @@ -89,194 +231,155 @@ static struct afs_server *afs_alloc_server(struct afs_cell *cell, server = kzalloc(sizeof(struct afs_server), GFP_KERNEL); if (!server) goto enomem; - server->addrs = kzalloc(sizeof(struct afs_addr_list) + - sizeof(struct sockaddr_rxrpc), - GFP_KERNEL); - if (!server->addrs) - goto enomem_server; atomic_set(&server->usage, 1); - server->net = cell->net; - server->cell = cell; - - INIT_LIST_HEAD(&server->link); - INIT_LIST_HEAD(&server->grave); - init_rwsem(&server->sem); - spin_lock_init(&server->fs_lock); + RCU_INIT_POINTER(server->addresses, alist); + server->addr_version = alist->version; + server->uuid = *uuid; + server->flags = (1UL << AFS_SERVER_FL_NEW); + server->update_at = ktime_get_real_seconds() + afs_server_update_delay; + rwlock_init(&server->fs_lock); INIT_LIST_HEAD(&server->cb_interests); rwlock_init(&server->cb_break_lock); - refcount_set(&server->addrs->usage, 1); - server->addrs->nr_addrs = 1; - server->addrs->addrs[0] = *addr; - afs_inc_servers_outstanding(cell->net); - - _leave(" = %p{%d}", server, atomic_read(&server->usage)); + afs_inc_servers_outstanding(net); + _leave(" = %p", server); return server; -enomem_server: - kfree(server); enomem: _leave(" = NULL [nomem]"); return NULL; } /* - * get an FS-server record for a cell + * Look up an address record for a server */ -struct afs_server *afs_lookup_server(struct afs_cell *cell, - struct sockaddr_rxrpc *addr) +static struct afs_addr_list *afs_vl_lookup_addrs(struct afs_cell *cell, + struct key *key, const uuid_t *uuid) { - struct afs_server *server, *candidate; + struct afs_addr_cursor ac; + struct afs_addr_list *alist; + int ret; - _enter("%p,%pIS", cell, &addr->transport); + ret = afs_set_vl_cursor(&ac, cell); + if (ret < 0) + return ERR_PTR(ret); - /* quick scan of the list to see if we already have the server */ - read_lock(&cell->servers_lock); - - list_for_each_entry(server, &cell->servers, link) { - if (memcmp(&server->addrs->addrs[0], addr, sizeof(*addr)) == 0) - goto found_server_quickly; - } - read_unlock(&cell->servers_lock); - - candidate = afs_alloc_server(cell, addr); - if (!candidate) { - _leave(" = -ENOMEM"); - return ERR_PTR(-ENOMEM); - } - - write_lock(&cell->servers_lock); - - /* check the cell's server list again */ - list_for_each_entry(server, &cell->servers, link) { - if (memcmp(&server->addrs->addrs[0], addr, sizeof(*addr)) == 0) - goto found_server; - } - - _debug("new"); - server = candidate; - if (afs_install_server(server) < 0) - goto server_in_two_cells; - - afs_get_cell(cell); - list_add_tail(&server->link, &cell->servers); - - write_unlock(&cell->servers_lock); - _leave(" = %p{%d}", server, atomic_read(&server->usage)); - return server; - - /* found a matching server quickly */ -found_server_quickly: - _debug("found quickly"); - afs_get_server(server); - read_unlock(&cell->servers_lock); -no_longer_unused: - if (!list_empty(&server->grave)) { - spin_lock(&cell->net->server_graveyard_lock); - list_del_init(&server->grave); - spin_unlock(&cell->net->server_graveyard_lock); - } - _leave(" = %p{%d}", server, atomic_read(&server->usage)); - return server; - - /* found a matching server on the second pass */ -found_server: - _debug("found"); - afs_get_server(server); - write_unlock(&cell->servers_lock); - kfree(candidate); - goto no_longer_unused; - - /* found a server that seems to be in two cells */ -server_in_two_cells: - write_unlock(&cell->servers_lock); - kfree(candidate); - afs_dec_servers_outstanding(cell->net); - printk(KERN_NOTICE "kAFS: Server %pI4 appears to be in two cells\n", - addr); - _leave(" = -EEXIST"); - return ERR_PTR(-EEXIST); -} - -/* - * look up a server by its IP address - */ -struct afs_server *afs_find_server(struct afs_net *net, - const struct sockaddr_rxrpc *srx) -{ - struct afs_server *server = NULL; - struct rb_node *p; - int diff; - - _enter("{%d,%pIS}", srx->transport.family, &srx->transport); - - read_lock(&net->servers_lock); - - p = net->servers.rb_node; - while (p) { - server = rb_entry(p, struct afs_server, master_rb); - - _debug("- consider %p", p); - - diff = memcmp(srx, &server->addrs->addrs[0], sizeof(*srx)); - if (diff < 0) { - p = p->rb_left; - } else if (diff > 0) { - p = p->rb_right; - } else { - afs_get_server(server); - goto found; + while (afs_iterate_addresses(&ac)) { + alist = afs_vl_get_addrs_u(cell->net, &ac, key, uuid); + switch (ac.error) { + case 0: + afs_end_cursor(&ac); + return alist; + case -ECONNABORTED: + ac.error = afs_abort_to_error(ac.abort_code); + goto error; + case -ENOMEM: + case -ENONET: + goto error; + case -ENETUNREACH: + case -EHOSTUNREACH: + case -ECONNREFUSED: + break; + default: + ac.error = -EIO; + goto error; } } - server = NULL; -found: - read_unlock(&net->servers_lock); - _leave(" = %p", server); +error: + return ERR_PTR(afs_end_cursor(&ac)); +} + +/* + * Get or create a fileserver record. + */ +struct afs_server *afs_lookup_server(struct afs_cell *cell, struct key *key, + const uuid_t *uuid) +{ + struct afs_addr_list *alist; + struct afs_server *server, *candidate; + + _enter("%p,%pU", cell->net, uuid); + + server = afs_find_server_by_uuid(cell->net, uuid); + if (server) + return server; + + alist = afs_vl_lookup_addrs(cell, key, uuid); + if (IS_ERR(alist)) + return ERR_CAST(alist); + + candidate = afs_alloc_server(cell->net, uuid, alist); + if (!candidate) { + afs_put_addrlist(alist); + return ERR_PTR(-ENOMEM); + } + + server = afs_install_server(cell->net, candidate); + if (server != candidate) { + afs_put_addrlist(alist); + kfree(candidate); + } + + _leave(" = %p{%d}", server, atomic_read(&server->usage)); return server; } +/* + * Set the server timer to fire after a given delay, assuming it's not already + * set for an earlier time. + */ static void afs_set_server_timer(struct afs_net *net, time64_t delay) { - afs_inc_servers_outstanding(net); if (net->live) { - if (timer_reduce(&net->server_timer, jiffies + delay * HZ)) - afs_dec_servers_outstanding(net); - } else { - if (!queue_work(afs_wq, &net->server_reaper)) + afs_inc_servers_outstanding(net); + if (timer_reduce(&net->fs_timer, jiffies + delay * HZ)) afs_dec_servers_outstanding(net); } } /* - * destroy a server record - * - removes from the cell list + * Server management timer. We have an increment on fs_outstanding that we + * need to pass along to the work item. + */ +void afs_servers_timer(struct timer_list *timer) +{ + struct afs_net *net = container_of(timer, struct afs_net, fs_timer); + + _enter(""); + if (!queue_work(afs_wq, &net->fs_manager)) + afs_dec_servers_outstanding(net); +} + +/* + * Release a reference on a server record. */ void afs_put_server(struct afs_net *net, struct afs_server *server) { + unsigned int usage; + if (!server) return; - _enter("%p{%d}", server, atomic_read(&server->usage)); + server->put_time = ktime_get_real_seconds(); - _debug("PUT SERVER %d", atomic_read(&server->usage)); + usage = atomic_dec_return(&server->usage); - ASSERTCMP(atomic_read(&server->usage), >, 0); + _enter("{%u}", usage); - if (likely(!atomic_dec_and_test(&server->usage))) { - _leave(""); + if (likely(usage > 0)) return; - } - spin_lock(&net->server_graveyard_lock); - if (atomic_read(&server->usage) == 0) { - list_move_tail(&server->grave, &net->server_graveyard); - server->time_of_death = ktime_get_real_seconds(); - afs_set_server_timer(net, afs_server_timeout); - } - spin_unlock(&net->server_graveyard_lock); - _leave(" [dead]"); + afs_set_server_timer(net, afs_server_gc_delay); +} + +static void afs_server_rcu(struct rcu_head *rcu) +{ + struct afs_server *server = container_of(rcu, struct afs_server, rcu); + + afs_put_addrlist(server->addresses); + kfree(server); } /* @@ -284,7 +387,7 @@ void afs_put_server(struct afs_net *net, struct afs_server *server) */ static void afs_destroy_server(struct afs_net *net, struct afs_server *server) { - struct afs_addr_list *alist = server->addrs; + struct afs_addr_list *alist = server->addresses; struct afs_addr_cursor ac = { .alist = alist, .addr = &alist->addrs[0], @@ -294,79 +397,300 @@ static void afs_destroy_server(struct afs_net *net, struct afs_server *server) }; _enter("%p", server); - afs_fs_give_up_all_callbacks(server, &ac, NULL, false); - afs_put_cell(net, server->cell); - afs_put_addrlist(server->addrs); - kfree(server); + afs_fs_give_up_all_callbacks(net, server, &ac, NULL); + call_rcu(&server->rcu, afs_server_rcu); afs_dec_servers_outstanding(net); } /* - * reap dead server records + * Garbage collect any expired servers. */ -void afs_reap_server(struct work_struct *work) +static void afs_gc_servers(struct afs_net *net, struct afs_server *gc_list) { - LIST_HEAD(corpses); struct afs_server *server; - struct afs_net *net = container_of(work, struct afs_net, server_reaper); - unsigned long delay, expiry; - time64_t now; + bool deleted; + int usage; - now = ktime_get_real_seconds(); - spin_lock(&net->server_graveyard_lock); + while ((server = gc_list)) { + gc_list = server->gc_next; - while (!list_empty(&net->server_graveyard)) { - server = list_entry(net->server_graveyard.next, - struct afs_server, grave); + write_seqlock(&net->fs_lock); + usage = 1; + deleted = atomic_try_cmpxchg(&server->usage, &usage, 0); + if (deleted) { + rb_erase(&server->uuid_rb, &net->fs_servers); + hlist_del_rcu(&server->proc_link); + } + write_sequnlock(&net->fs_lock); - /* the queue is ordered most dead first */ - if (net->live) { - expiry = server->time_of_death + afs_server_timeout; - if (expiry > now) { - delay = (expiry - now); - afs_set_server_timer(net, delay); - break; + if (deleted) + afs_destroy_server(net, server); + } +} + +/* + * Manage the records of servers known to be within a network namespace. This + * includes garbage collecting unused servers. + * + * Note also that we were given an increment on net->servers_outstanding by + * whoever queued us that we need to deal with before returning. + */ +void afs_manage_servers(struct work_struct *work) +{ + struct afs_net *net = container_of(work, struct afs_net, fs_manager); + struct afs_server *gc_list = NULL; + struct rb_node *cursor; + time64_t now = ktime_get_real_seconds(), next_manage = TIME64_MAX; + bool purging = !net->live; + + _enter(""); + + /* Trawl the server list looking for servers that have expired from + * lack of use. + */ + read_seqlock_excl(&net->fs_lock); + + for (cursor = rb_first(&net->fs_servers); cursor; cursor = rb_next(cursor)) { + struct afs_server *server = + rb_entry(cursor, struct afs_server, uuid_rb); + int usage = atomic_read(&server->usage); + + _debug("manage %pU %u", &server->uuid, usage); + + ASSERTCMP(usage, >=, 1); + ASSERTIFCMP(purging, usage, ==, 1); + + if (usage == 1) { + time64_t expire_at = server->put_time; + + if (!test_bit(AFS_SERVER_FL_VL_FAIL, &server->flags) && + !test_bit(AFS_SERVER_FL_NOT_FOUND, &server->flags)) + expire_at += afs_server_gc_delay; + if (purging || expire_at <= now) { + server->gc_next = gc_list; + gc_list = server; + } else if (expire_at < next_manage) { + next_manage = expire_at; } } + } - write_lock(&server->cell->servers_lock); - write_lock(&net->servers_lock); - if (atomic_read(&server->usage) > 0) { - list_del_init(&server->grave); + read_sequnlock_excl(&net->fs_lock); + + /* Update the timer on the way out. We have to pass an increment on + * servers_outstanding in the namespace that we are in to the timer or + * the work scheduler. + */ + if (!purging && next_manage < TIME64_MAX) { + now = ktime_get_real_seconds(); + + if (next_manage - now <= 0) { + if (queue_work(afs_wq, &net->fs_manager)) + afs_inc_servers_outstanding(net); } else { - list_move_tail(&server->grave, &corpses); - list_del_init(&server->link); - rb_erase(&server->master_rb, &net->servers); + afs_set_server_timer(net, next_manage - now); } - write_unlock(&net->servers_lock); - write_unlock(&server->cell->servers_lock); } - spin_unlock(&net->server_graveyard_lock); - - /* now reap the corpses we've extracted */ - while (!list_empty(&corpses)) { - server = list_entry(corpses.next, struct afs_server, grave); - list_del(&server->grave); - afs_destroy_server(net, server); - } + afs_gc_servers(net, gc_list); afs_dec_servers_outstanding(net); + _leave(" [%d]", atomic_read(&net->servers_outstanding)); +} + +static void afs_queue_server_manager(struct afs_net *net) +{ + afs_inc_servers_outstanding(net); + if (!queue_work(afs_wq, &net->fs_manager)) + afs_dec_servers_outstanding(net); } /* - * Discard all the server records from a net namespace when it is destroyed or - * the afs module is removed. + * Purge list of servers. */ -void __net_exit afs_purge_servers(struct afs_net *net) +void afs_purge_servers(struct afs_net *net) { - if (del_timer_sync(&net->server_timer)) + _enter(""); + + if (del_timer_sync(&net->fs_timer)) atomic_dec(&net->servers_outstanding); - afs_inc_servers_outstanding(net); - if (!queue_work(afs_wq, &net->server_reaper)) - afs_dec_servers_outstanding(net); + afs_queue_server_manager(net); + _debug("wait"); wait_on_atomic_t(&net->servers_outstanding, atomic_t_wait, TASK_UNINTERRUPTIBLE); + _leave(""); +} + +/* + * Probe a fileserver to find its capabilities. + * + * TODO: Try service upgrade. + */ +static bool afs_do_probe_fileserver(struct afs_fs_cursor *fc) +{ + _enter(""); + + fc->ac.addr = NULL; + fc->ac.start = READ_ONCE(fc->ac.alist->index); + fc->ac.index = fc->ac.start; + fc->ac.error = 0; + fc->ac.begun = false; + + while (afs_iterate_addresses(&fc->ac)) { + afs_fs_get_capabilities(afs_v2net(fc->vnode), fc->cbi->server, + &fc->ac, fc->key); + switch (fc->ac.error) { + case 0: + afs_end_cursor(&fc->ac); + set_bit(AFS_SERVER_FL_PROBED, &fc->cbi->server->flags); + return true; + case -ECONNABORTED: + fc->ac.error = afs_abort_to_error(fc->ac.abort_code); + goto error; + case -ENOMEM: + case -ENONET: + goto error; + case -ENETUNREACH: + case -EHOSTUNREACH: + case -ECONNREFUSED: + case -ETIMEDOUT: + case -ETIME: + break; + default: + fc->ac.error = -EIO; + goto error; + } + } + +error: + afs_end_cursor(&fc->ac); + return false; +} + +/* + * If we haven't already, try probing the fileserver to get its capabilities. + * We try not to instigate parallel probes, but it's possible that the parallel + * probes will fail due to authentication failure when ours would succeed. + * + * TODO: Try sending an anonymous probe if an authenticated probe fails. + */ +bool afs_probe_fileserver(struct afs_fs_cursor *fc) +{ + bool success; + int ret, retries = 0; + + _enter(""); + +retry: + if (test_bit(AFS_SERVER_FL_PROBED, &fc->cbi->server->flags)) { + _leave(" = t"); + return true; + } + + if (!test_and_set_bit_lock(AFS_SERVER_FL_PROBING, &fc->cbi->server->flags)) { + success = afs_do_probe_fileserver(fc); + clear_bit_unlock(AFS_SERVER_FL_PROBING, &fc->cbi->server->flags); + wake_up_bit(&fc->cbi->server->flags, AFS_SERVER_FL_PROBING); + _leave(" = t"); + return success; + } + + _debug("wait"); + ret = wait_on_bit(&fc->cbi->server->flags, AFS_SERVER_FL_PROBING, + TASK_INTERRUPTIBLE); + if (ret == -ERESTARTSYS) { + fc->ac.error = ret; + _leave(" = f [%d]", ret); + return false; + } + + retries++; + if (retries == 4) { + fc->ac.error = -ESTALE; + _leave(" = f [stale]"); + return false; + } + _debug("retry"); + goto retry; +} + +/* + * Get an update for a server's address list. + */ +static noinline bool afs_update_server_record(struct afs_fs_cursor *fc, struct afs_server *server) +{ + struct afs_addr_list *alist, *discard; + + _enter(""); + + alist = afs_vl_lookup_addrs(fc->vnode->volume->cell, fc->key, + &server->uuid); + if (IS_ERR(alist)) { + fc->ac.error = PTR_ERR(alist); + _leave(" = f [%d]", fc->ac.error); + return false; + } + + discard = alist; + if (server->addr_version != alist->version) { + write_lock(&server->fs_lock); + discard = rcu_dereference_protected(server->addresses, + lockdep_is_held(&server->fs_lock)); + rcu_assign_pointer(server->addresses, alist); + server->addr_version = alist->version; + write_unlock(&server->fs_lock); + } + + server->update_at = ktime_get_real_seconds() + afs_server_update_delay; + afs_put_addrlist(discard); + _leave(" = t"); + return true; +} + +/* + * See if a server's address list needs updating. + */ +bool afs_check_server_record(struct afs_fs_cursor *fc, struct afs_server *server) +{ + time64_t now = ktime_get_real_seconds(); + long diff; + bool success; + int ret, retries = 0; + + _enter(""); + + ASSERT(server); + +retry: + diff = READ_ONCE(server->update_at) - now; + if (diff > 0) { + _leave(" = t [not now %ld]", diff); + return true; + } + + if (!test_and_set_bit_lock(AFS_SERVER_FL_UPDATING, &server->flags)) { + success = afs_update_server_record(fc, server); + clear_bit_unlock(AFS_SERVER_FL_UPDATING, &server->flags); + wake_up_bit(&server->flags, AFS_SERVER_FL_UPDATING); + _leave(" = %d", success); + return success; + } + + ret = wait_on_bit(&server->flags, AFS_SERVER_FL_UPDATING, + TASK_INTERRUPTIBLE); + if (ret == -ERESTARTSYS) { + fc->ac.error = ret; + _leave(" = f [intr]"); + return false; + } + + retries++; + if (retries == 4) { + _leave(" = f [stale]"); + ret = -ESTALE; + return false; + } + goto retry; } diff --git a/fs/afs/server_list.c b/fs/afs/server_list.c new file mode 100644 index 000000000000..26bad7032bba --- /dev/null +++ b/fs/afs/server_list.c @@ -0,0 +1,153 @@ +/* AFS fileserver list management. + * + * Copyright (C) 2017 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include +#include "internal.h" + +void afs_put_serverlist(struct afs_net *net, struct afs_server_list *slist) +{ + int i; + + if (refcount_dec_and_test(&slist->usage)) { + for (i = 0; i < slist->nr_servers; i++) { + afs_put_cb_interest(net, slist->servers[i].cb_interest); + afs_put_server(net, slist->servers[i].server); + } + kfree(slist); + } +} + +/* + * Build a server list from a VLDB record. + */ +struct afs_server_list *afs_alloc_server_list(struct afs_cell *cell, + struct key *key, + struct afs_vldb_entry *vldb, + u8 type_mask) +{ + struct afs_server_list *slist; + struct afs_server *server; + int ret = -ENOMEM, nr_servers = 0, i, j; + + for (i = 0; i < vldb->nr_servers; i++) + if (vldb->fs_mask[i] & type_mask) + nr_servers++; + + slist = kzalloc(sizeof(struct afs_server_list) + + sizeof(struct afs_server_entry) * nr_servers, + GFP_KERNEL); + if (!slist) + goto error; + + refcount_set(&slist->usage, 1); + + /* Make sure a records exists for each server in the list. */ + for (i = 0; i < vldb->nr_servers; i++) { + if (!(vldb->fs_mask[i] & type_mask)) + continue; + + server = afs_lookup_server(cell, key, &vldb->fs_server[i]); + if (IS_ERR(server)) { + ret = PTR_ERR(server); + if (ret == -ENOENT) + continue; + goto error_2; + } + + /* Insertion-sort by server pointer */ + for (j = 0; j < slist->nr_servers; j++) + if (slist->servers[j].server >= server) + break; + if (j < slist->nr_servers) { + if (slist->servers[j].server == server) { + afs_put_server(cell->net, server); + continue; + } + + memmove(slist->servers + j + 1, + slist->servers + j, + (slist->nr_servers - j) * sizeof(struct afs_server_entry)); + } + + slist->servers[j].server = server; + slist->nr_servers++; + } + + if (slist->nr_servers == 0) { + ret = -EDESTADDRREQ; + goto error_2; + } + + return slist; + +error_2: + afs_put_serverlist(cell->net, slist); +error: + return ERR_PTR(ret); +} + +/* + * Copy the annotations from an old server list to its potential replacement. + */ +bool afs_annotate_server_list(struct afs_server_list *new, + struct afs_server_list *old) +{ + struct afs_server *cur; + int i, j; + + if (old->nr_servers != new->nr_servers) + goto changed; + + for (i = 0; i < old->nr_servers; i++) + if (old->servers[i].server != new->servers[i].server) + goto changed; + + return false; + +changed: + /* Maintain the same current server as before if possible. */ + cur = old->servers[old->index].server; + for (j = 0; j < new->nr_servers; j++) { + if (new->servers[j].server == cur) { + new->index = j; + break; + } + } + + /* Keep the old callback interest records where possible so that we + * maintain callback interception. + */ + i = 0; + j = 0; + while (i < old->nr_servers && j < new->nr_servers) { + if (new->servers[j].server == old->servers[i].server) { + struct afs_cb_interest *cbi = old->servers[i].cb_interest; + if (cbi) { + new->servers[j].cb_interest = cbi; + refcount_inc(&cbi->usage); + } + i++; + j++; + continue; + } + + if (new->servers[j].server < old->servers[i].server) { + j++; + continue; + } + + i++; + continue; + } + + return true; +} diff --git a/fs/afs/super.c b/fs/afs/super.c index 3d53b78b350d..af1e769aaebf 100644 --- a/fs/afs/super.c +++ b/fs/afs/super.c @@ -142,9 +142,9 @@ void __exit afs_fs_exit(void) */ static int afs_show_devname(struct seq_file *m, struct dentry *root) { - struct afs_super_info *as = root->d_sb->s_fs_info; + struct afs_super_info *as = AFS_FS_S(root->d_sb); struct afs_volume *volume = as->volume; - struct afs_cell *cell = volume->cell; + struct afs_cell *cell = as->cell; const char *suf = ""; char pref = '%'; @@ -162,7 +162,7 @@ static int afs_show_devname(struct seq_file *m, struct dentry *root) break; } - seq_printf(m, "%c%s:%s%s", pref, cell->name, volume->vlocation->vldb.name, suf); + seq_printf(m, "%c%s:%s%s", pref, cell->name, volume->name, suf); return 0; } @@ -334,14 +334,16 @@ static int afs_parse_device_name(struct afs_mount_params *params, static int afs_test_super(struct super_block *sb, void *data) { struct afs_super_info *as1 = data; - struct afs_super_info *as = sb->s_fs_info; + struct afs_super_info *as = AFS_FS_S(sb); - return as->net == as1->net && as->volume == as1->volume; + return as->net == as1->net && as->volume->vid == as1->volume->vid; } static int afs_set_super(struct super_block *sb, void *data) { - sb->s_fs_info = data; + struct afs_super_info *as = data; + + sb->s_fs_info = as; return set_anon_super(sb, NULL); } @@ -351,7 +353,7 @@ static int afs_set_super(struct super_block *sb, void *data) static int afs_fill_super(struct super_block *sb, struct afs_mount_params *params) { - struct afs_super_info *as = sb->s_fs_info; + struct afs_super_info *as = AFS_FS_S(sb); struct afs_fid fid; struct inode *inode = NULL; int ret; @@ -368,13 +370,15 @@ static int afs_fill_super(struct super_block *sb, if (ret) return ret; sb->s_bdi->ra_pages = VM_MAX_READAHEAD * 1024 / PAGE_SIZE; - strlcpy(sb->s_id, as->volume->vlocation->vldb.name, sizeof(sb->s_id)); + sprintf(sb->s_id, "%u", as->volume->vid); + + afs_activate_volume(as->volume); /* allocate the root inode and dentry */ fid.vid = as->volume->vid; fid.vnode = 1; fid.unique = 1; - inode = afs_iget(sb, params->key, &fid, NULL, NULL); + inode = afs_iget(sb, params->key, &fid, NULL, NULL, NULL); if (IS_ERR(inode)) return PTR_ERR(inode); @@ -426,7 +430,7 @@ static struct dentry *afs_mount(struct file_system_type *fs_type, { struct afs_mount_params params; struct super_block *sb; - struct afs_volume *vol; + struct afs_volume *candidate; struct key *key; struct afs_super_info *as; int ret; @@ -464,15 +468,19 @@ static struct dentry *afs_mount(struct file_system_type *fs_type, ret = -ENOMEM; as = afs_alloc_sbi(¶ms); if (!as) - goto error; + goto error_key; - /* parse the device name */ - vol = afs_volume_lookup(¶ms); - if (IS_ERR(vol)) { - ret = PTR_ERR(vol); - goto error; + /* Assume we're going to need a volume record; at the very least we can + * use it to update the volume record if we have one already. This + * checks that the volume exists within the cell. + */ + candidate = afs_create_volume(¶ms); + if (IS_ERR(candidate)) { + ret = PTR_ERR(candidate); + goto error_as; } - as->volume = vol; + + as->volume = candidate; /* allocate a deviceless superblock */ sb = sget(fs_type, afs_test_super, afs_set_super, flags, as); @@ -503,11 +511,13 @@ static struct dentry *afs_mount(struct file_system_type *fs_type, error_sb: deactivate_locked_super(sb); + goto error_key; error_as: afs_destroy_sbi(as); +error_key: + key_put(params.key); error: afs_put_cell(params.net, params.cell); - key_put(params.key); _leave(" = %d", ret); return ERR_PTR(ret); } @@ -519,8 +529,9 @@ static void afs_kill_super(struct super_block *sb) /* Clear the callback interests (which will do ilookup5) before * deactivating the superblock. */ - afs_clear_callback_interests(as->net, as->volume); + afs_clear_callback_interests(as->net, as->volume->servers); kill_anon_super(sb); + afs_deactivate_volume(as->volume); afs_destroy_sbi(as); } @@ -533,7 +544,7 @@ static void afs_i_init_once(void *_vnode) memset(vnode, 0, sizeof(*vnode)); inode_init_once(&vnode->vfs_inode); - init_waitqueue_head(&vnode->update_waitq); + mutex_init(&vnode->io_lock); mutex_init(&vnode->validate_lock); spin_lock_init(&vnode->writeback_lock); spin_lock_init(&vnode->lock); @@ -561,7 +572,6 @@ static struct inode *afs_alloc_inode(struct super_block *sb) memset(&vnode->status, 0, sizeof(vnode->status)); vnode->volume = NULL; - vnode->update_cnt = 0; vnode->flags = 1 << AFS_VNODE_UNSET; _leave(" = %p", &vnode->vfs_inode); @@ -597,6 +607,7 @@ static void afs_destroy_inode(struct inode *inode) */ static int afs_statfs(struct dentry *dentry, struct kstatfs *buf) { + struct afs_fs_cursor fc; struct afs_volume_status vs; struct afs_vnode *vnode = AFS_FS_I(d_inode(dentry)); struct key *key; @@ -606,21 +617,32 @@ static int afs_statfs(struct dentry *dentry, struct kstatfs *buf) if (IS_ERR(key)) return PTR_ERR(key); - ret = afs_vnode_get_volume_status(vnode, key, &vs); - key_put(key); - if (ret < 0) { - _leave(" = %d", ret); - return ret; + ret = -ERESTARTSYS; + if (afs_begin_vnode_operation(&fc, vnode, key)) { + fc.flags |= AFS_FS_CURSOR_NO_VSLEEP; + while (afs_select_fileserver(&fc)) { + fc.cb_break = vnode->cb_break + vnode->cb_s_break; + afs_fs_get_volume_status(&fc, &vs); + } + + afs_check_for_remote_deletion(&fc, fc.vnode); + afs_vnode_commit_status(&fc, vnode, fc.cb_break); + ret = afs_end_vnode_operation(&fc); } - buf->f_type = dentry->d_sb->s_magic; - buf->f_bsize = AFS_BLOCK_SIZE; - buf->f_namelen = AFSNAMEMAX - 1; + key_put(key); - if (vs.max_quota == 0) - buf->f_blocks = vs.part_max_blocks; - else - buf->f_blocks = vs.max_quota; - buf->f_bavail = buf->f_bfree = buf->f_blocks - vs.blocks_in_use; - return 0; + if (ret == 0) { + buf->f_type = dentry->d_sb->s_magic; + buf->f_bsize = AFS_BLOCK_SIZE; + buf->f_namelen = AFSNAMEMAX - 1; + + if (vs.max_quota == 0) + buf->f_blocks = vs.part_max_blocks; + else + buf->f_blocks = vs.max_quota; + buf->f_bavail = buf->f_bfree = buf->f_blocks - vs.blocks_in_use; + } + + return ret; } diff --git a/fs/afs/vlclient.c b/fs/afs/vlclient.c index 1d1e7df77dd5..173c652fe875 100644 --- a/fs/afs/vlclient.c +++ b/fs/afs/vlclient.c @@ -16,14 +16,15 @@ #include "internal.h" /* - * deliver reply data to a VL.GetEntryByXXX call + * Deliver reply data to a VL.GetEntryByNameU call. */ -static int afs_deliver_vl_get_entry_by_xxx(struct afs_call *call) +static int afs_deliver_vl_get_entry_by_name_u(struct afs_call *call) { - struct afs_cache_vlocation *entry; - __be32 *bp; + struct afs_uvldbentry__xdr *uvldb; + struct afs_vldb_entry *entry; + bool new_only = false; u32 tmp; - int loop, ret; + int i, ret; _enter(""); @@ -32,152 +33,270 @@ static int afs_deliver_vl_get_entry_by_xxx(struct afs_call *call) return ret; /* unmarshall the reply once we've received all of it */ + uvldb = call->buffer; entry = call->reply[0]; - bp = call->buffer; - for (loop = 0; loop < 64; loop++) - entry->name[loop] = ntohl(*bp++); - entry->name[loop] = 0; - bp++; /* final NUL */ + for (i = 0; i < ARRAY_SIZE(uvldb->name) - 1; i++) + entry->name[i] = (u8)ntohl(uvldb->name[i]); + entry->name[i] = 0; + entry->name_len = strlen(entry->name); - bp++; /* type */ - entry->nservers = ntohl(*bp++); - - for (loop = 0; loop < 8; loop++) { - entry->servers[loop].srx_family = AF_RXRPC; - entry->servers[loop].srx_service = FS_SERVICE; - entry->servers[loop].transport_type = SOCK_DGRAM; - entry->servers[loop].transport_len = sizeof(entry->servers[loop].transport.sin6); - entry->servers[loop].transport.sin6.sin6_family = AF_INET6; - entry->servers[loop].transport.sin6.sin6_port = htons(AFS_FS_PORT); - entry->servers[loop].transport.sin6.sin6_flowinfo = 0; - entry->servers[loop].transport.sin6.sin6_scope_id = 0; - entry->servers[loop].transport.sin6.sin6_addr.s6_addr32[0] = 0; - entry->servers[loop].transport.sin6.sin6_addr.s6_addr32[1] = 0; - entry->servers[loop].transport.sin6.sin6_addr.s6_addr32[2] = htonl(0xffff); - entry->servers[loop].transport.sin6.sin6_addr.s6_addr32[3] = *bp++; + /* If there is a new replication site that we can use, ignore all the + * sites that aren't marked as new. + */ + for (i = 0; i < AFS_NMAXNSERVERS; i++) { + tmp = ntohl(uvldb->serverFlags[i]); + if (!(tmp & AFS_VLSF_DONTUSE) && + (tmp & AFS_VLSF_NEWREPSITE)) + new_only = true; } - bp += 8; /* partition IDs */ + for (i = 0; i < AFS_NMAXNSERVERS; i++) { + struct afs_uuid__xdr *xdr; + struct afs_uuid *uuid; + int j; - for (loop = 0; loop < 8; loop++) { - tmp = ntohl(*bp++); - entry->srvtmask[loop] = 0; + tmp = ntohl(uvldb->serverFlags[i]); + if (tmp & AFS_VLSF_DONTUSE || + (new_only && !(tmp & AFS_VLSF_NEWREPSITE))) + continue; if (tmp & AFS_VLSF_RWVOL) - entry->srvtmask[loop] |= AFS_VOL_VTM_RW; + entry->fs_mask[i] |= AFS_VOL_VTM_RW; if (tmp & AFS_VLSF_ROVOL) - entry->srvtmask[loop] |= AFS_VOL_VTM_RO; + entry->fs_mask[i] |= AFS_VOL_VTM_RO; if (tmp & AFS_VLSF_BACKVOL) - entry->srvtmask[loop] |= AFS_VOL_VTM_BAK; + entry->fs_mask[i] |= AFS_VOL_VTM_BAK; + if (!entry->fs_mask[i]) + continue; + + xdr = &uvldb->serverNumber[i]; + uuid = (struct afs_uuid *)&entry->fs_server[i]; + uuid->time_low = xdr->time_low; + uuid->time_mid = htons(ntohl(xdr->time_mid)); + uuid->time_hi_and_version = htons(ntohl(xdr->time_hi_and_version)); + uuid->clock_seq_hi_and_reserved = (u8)ntohl(xdr->clock_seq_hi_and_reserved); + uuid->clock_seq_low = (u8)ntohl(xdr->clock_seq_low); + for (j = 0; j < 6; j++) + uuid->node[j] = (u8)ntohl(xdr->node[j]); + + entry->nr_servers++; } - entry->vid[0] = ntohl(*bp++); - entry->vid[1] = ntohl(*bp++); - entry->vid[2] = ntohl(*bp++); + for (i = 0; i < AFS_MAXTYPES; i++) + entry->vid[i] = ntohl(uvldb->volumeId[i]); - bp++; /* clone ID */ - - tmp = ntohl(*bp++); /* flags */ - entry->vidmask = 0; + tmp = ntohl(uvldb->flags); if (tmp & AFS_VLF_RWEXISTS) - entry->vidmask |= AFS_VOL_VTM_RW; + __set_bit(AFS_VLDB_HAS_RW, &entry->flags); if (tmp & AFS_VLF_ROEXISTS) - entry->vidmask |= AFS_VOL_VTM_RO; + __set_bit(AFS_VLDB_HAS_RO, &entry->flags); if (tmp & AFS_VLF_BACKEXISTS) - entry->vidmask |= AFS_VOL_VTM_BAK; - if (!entry->vidmask) - return -EBADMSG; + __set_bit(AFS_VLDB_HAS_BAK, &entry->flags); + + if (!(tmp & (AFS_VLF_RWEXISTS | AFS_VLF_ROEXISTS | AFS_VLF_BACKEXISTS))) { + entry->error = -ENOMEDIUM; + __set_bit(AFS_VLDB_QUERY_ERROR, &entry->flags); + } + + __set_bit(AFS_VLDB_QUERY_VALID, &entry->flags); + _leave(" = 0 [done]"); + return 0; +} + +static void afs_destroy_vl_get_entry_by_name_u(struct afs_call *call) +{ + kfree(call->reply[0]); + afs_flat_call_destructor(call); +} + +/* + * VL.GetEntryByNameU operation type. + */ +static const struct afs_call_type afs_RXVLGetEntryByNameU = { + .name = "VL.GetEntryByNameU", + .deliver = afs_deliver_vl_get_entry_by_name_u, + .destructor = afs_destroy_vl_get_entry_by_name_u, +}; + +/* + * Dispatch a get volume entry by name or ID operation (uuid variant). If the + * volname is a decimal number then it's a volume ID not a volume name. + */ +struct afs_vldb_entry *afs_vl_get_entry_by_name_u(struct afs_net *net, + struct afs_addr_cursor *ac, + struct key *key, + const char *volname, + int volnamesz) +{ + struct afs_vldb_entry *entry; + struct afs_call *call; + size_t reqsz, padsz; + __be32 *bp; + + _enter(""); + + padsz = (4 - (volnamesz & 3)) & 3; + reqsz = 8 + volnamesz + padsz; + + entry = kzalloc(sizeof(struct afs_vldb_entry), GFP_KERNEL); + if (!entry) + return ERR_PTR(-ENOMEM); + + call = afs_alloc_flat_call(net, &afs_RXVLGetEntryByNameU, reqsz, + sizeof(struct afs_uvldbentry__xdr)); + if (!call) { + kfree(entry); + return ERR_PTR(-ENOMEM); + } + + call->key = key; + call->reply[0] = entry; + call->ret_reply0 = true; + + /* Marshall the parameters */ + bp = call->request; + *bp++ = htonl(VLGETENTRYBYNAMEU); + *bp++ = htonl(volnamesz); + memcpy(bp, volname, volnamesz); + if (padsz > 0) + memset((void *)bp + volnamesz, 0, padsz); + + return (struct afs_vldb_entry *)afs_make_call(ac, call, GFP_KERNEL, false); +} + +/* + * Deliver reply data to a VL.GetAddrsU call. + * + * GetAddrsU(IN ListAddrByAttributes *inaddr, + * OUT afsUUID *uuidp1, + * OUT uint32_t *uniquifier, + * OUT uint32_t *nentries, + * OUT bulkaddrs *blkaddrs); + */ +static int afs_deliver_vl_get_addrs_u(struct afs_call *call) +{ + struct afs_addr_list *alist; + __be32 *bp; + u32 uniquifier, nentries, count; + int i, ret; + + _enter("{%u,%zu/%u}", call->unmarshall, call->offset, call->count); + +again: + switch (call->unmarshall) { + case 0: + call->offset = 0; + call->unmarshall++; + + /* Extract the returned uuid, uniquifier, nentries and blkaddrs size */ + case 1: + ret = afs_extract_data(call, call->buffer, + sizeof(struct afs_uuid__xdr) + 3 * sizeof(__be32), + true); + if (ret < 0) + return ret; + + bp = call->buffer + sizeof(struct afs_uuid__xdr); + uniquifier = ntohl(*bp++); + nentries = ntohl(*bp++); + count = ntohl(*bp); + + nentries = min(nentries, count); + alist = afs_alloc_addrlist(nentries, FS_SERVICE, AFS_FS_PORT); + if (!alist) + return -ENOMEM; + alist->version = uniquifier; + call->reply[0] = alist; + call->count = count; + call->count2 = nentries; + call->offset = 0; + call->unmarshall++; + + /* Extract entries */ + case 2: + count = min(call->count, 4U); + ret = afs_extract_data(call, call->buffer, + count * sizeof(__be32), + call->count > 4); + if (ret < 0) + return ret; + + alist = call->reply[0]; + bp = call->buffer; + for (i = 0; i < count; i++) + if (alist->nr_addrs < call->count2) + afs_merge_fs_addr4(alist, *bp++); + + call->count -= count; + if (call->count > 0) + goto again; + call->offset = 0; + call->unmarshall++; + break; + } _leave(" = 0 [done]"); return 0; } -/* - * VL.GetEntryByName operation type - */ -static const struct afs_call_type afs_RXVLGetEntryByName = { - .name = "VL.GetEntryByName", - .deliver = afs_deliver_vl_get_entry_by_xxx, - .destructor = afs_flat_call_destructor, -}; - -/* - * VL.GetEntryById operation type - */ -static const struct afs_call_type afs_RXVLGetEntryById = { - .name = "VL.GetEntryById", - .deliver = afs_deliver_vl_get_entry_by_xxx, - .destructor = afs_flat_call_destructor, -}; - -/* - * dispatch a get volume entry by name operation - */ -int afs_vl_get_entry_by_name(struct afs_net *net, - struct afs_addr_cursor *ac, - struct key *key, - const char *volname, - struct afs_cache_vlocation *entry, - bool async) +static void afs_vl_get_addrs_u_destructor(struct afs_call *call) { - struct afs_call *call; - size_t volnamesz, reqsz, padsz; - __be32 *bp; - - _enter(""); - - volnamesz = strlen(volname); - padsz = (4 - (volnamesz & 3)) & 3; - reqsz = 8 + volnamesz + padsz; - - call = afs_alloc_flat_call(net, &afs_RXVLGetEntryByName, reqsz, 384); - if (!call) - return -ENOMEM; - - call->key = key; - call->reply[0] = entry; - - /* marshall the parameters */ - bp = call->request; - *bp++ = htonl(VLGETENTRYBYNAME); - *bp++ = htonl(volnamesz); - memcpy(bp, volname, volnamesz); - if (padsz > 0) - memset((void *) bp + volnamesz, 0, padsz); - - /* initiate the call */ - return afs_make_call(ac, call, GFP_KERNEL, async); + afs_put_server(call->net, (struct afs_server *)call->reply[0]); + kfree(call->reply[1]); + return afs_flat_call_destructor(call); } /* - * dispatch a get volume entry by ID operation + * VL.GetAddrsU operation type. */ -int afs_vl_get_entry_by_id(struct afs_net *net, - struct afs_addr_cursor *ac, - struct key *key, - afs_volid_t volid, - afs_voltype_t voltype, - struct afs_cache_vlocation *entry, - bool async) +static const struct afs_call_type afs_RXVLGetAddrsU = { + .name = "VL.GetAddrsU", + .deliver = afs_deliver_vl_get_addrs_u, + .destructor = afs_vl_get_addrs_u_destructor, +}; + +/* + * Dispatch an operation to get the addresses for a server, where the server is + * nominated by UUID. + */ +struct afs_addr_list *afs_vl_get_addrs_u(struct afs_net *net, + struct afs_addr_cursor *ac, + struct key *key, + const uuid_t *uuid) { + struct afs_ListAddrByAttributes__xdr *r; + const struct afs_uuid *u = (const struct afs_uuid *)uuid; struct afs_call *call; __be32 *bp; + int i; _enter(""); - call = afs_alloc_flat_call(net, &afs_RXVLGetEntryById, 12, 384); + call = afs_alloc_flat_call(net, &afs_RXVLGetAddrsU, + sizeof(__be32) + sizeof(struct afs_ListAddrByAttributes__xdr), + sizeof(struct afs_uuid__xdr) + 3 * sizeof(__be32)); if (!call) - return -ENOMEM; + return ERR_PTR(-ENOMEM); call->key = key; - call->reply[0] = entry; + call->reply[0] = NULL; + call->ret_reply0 = true; - /* marshall the parameters */ + /* Marshall the parameters */ bp = call->request; - *bp++ = htonl(VLGETENTRYBYID); - *bp++ = htonl(volid); - *bp = htonl(voltype); + *bp++ = htonl(VLGETADDRSU); + r = (struct afs_ListAddrByAttributes__xdr *)bp; + r->Mask = htonl(AFS_VLADDR_UUID); + r->ipaddr = 0; + r->index = 0; + r->spare = 0; + r->uuid.time_low = u->time_low; + r->uuid.time_mid = htonl(ntohs(u->time_mid)); + r->uuid.time_hi_and_version = htonl(ntohs(u->time_hi_and_version)); + r->uuid.clock_seq_hi_and_reserved = htonl(u->clock_seq_hi_and_reserved); + r->uuid.clock_seq_low = htonl(u->clock_seq_low); + for (i = 0; i < 6; i++) + r->uuid.node[i] = ntohl(u->node[i]); - /* initiate the call */ - return afs_make_call(ac, call, GFP_KERNEL, async); + return (struct afs_addr_list *)afs_make_call(ac, call, GFP_KERNEL, false); } diff --git a/fs/afs/vlocation.c b/fs/afs/vlocation.c deleted file mode 100644 index 52c31ad0ef60..000000000000 --- a/fs/afs/vlocation.c +++ /dev/null @@ -1,669 +0,0 @@ -/* AFS volume location management - * - * Copyright (C) 2002, 2007 Red Hat, Inc. All Rights Reserved. - * Written by David Howells (dhowells@redhat.com) - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - -#include -#include -#include -#include -#include -#include "internal.h" - -struct workqueue_struct *afs_vlocation_update_worker; - -static unsigned afs_vlocation_timeout = 10; /* volume location timeout in seconds */ -static unsigned afs_vlocation_update_timeout = 10 * 60; - -/* - * iterate through the VL servers in a cell until one of them admits knowing - * about the volume in question - */ -static int afs_vlocation_access_vl_by_name(struct afs_vlocation *vl, - struct key *key, - struct afs_cache_vlocation *vldb) -{ - struct afs_addr_cursor ac; - int ret; - - _enter("%s,%s", vl->cell->name, vl->vldb.name); - - ret = afs_set_vl_cursor(&ac, vl->cell); - if (ret < 0) - return ret; - - down_write(&vl->cell->vl_sem); - - ret = -ENOMEDIUM; - while (afs_iterate_addresses(&ac)) { - _debug("CellServ[%hu]: %pIS", ac.index, &ac.addr->transport); - - /* attempt to access the VL server */ - ac.error = afs_vl_get_entry_by_name(vl->cell->net, &ac, key, - vl->vldb.name, vldb, false); - switch (ac.error) { - case 0: - goto out; - case -ENOMEM: - case -ENONET: - case -ENETUNREACH: - case -EHOSTUNREACH: - case -ECONNREFUSED: - if (ac.error == -ENOMEM || ac.error == -ENONET) - goto out; - break; - case -ENOMEDIUM: - case -EKEYREJECTED: - case -EKEYEXPIRED: - ac.responded = true; - goto out; - default: - ac.responded = true; - ac.error = -EIO; - break; - } - } - -out: - up_write(&vl->cell->vl_sem); - ret = afs_end_cursor(&ac); - _leave(" = %d", ret); - return ret; -} - -/* - * iterate through the VL servers in a cell until one of them admits knowing - * about the volume in question - */ -static int afs_vlocation_access_vl_by_id(struct afs_vlocation *vl, - struct key *key, - afs_volid_t volid, - afs_voltype_t voltype, - struct afs_cache_vlocation *vldb) -{ - struct afs_addr_cursor ac; - int ret; - - _enter("%s,%x,%d,", vl->cell->name, volid, voltype); - - ret = afs_set_vl_cursor(&ac, vl->cell); - if (ret < 0) - return ret; - - down_write(&vl->cell->vl_sem); - ret = -ENOMEDIUM; - while (afs_iterate_addresses(&ac)) { - _debug("CellServ[%hu]: %pIS", ac.index, &ac.addr->transport); - - /* attempt to access the VL server */ - ac.error = afs_vl_get_entry_by_id(vl->cell->net, &ac, key, volid, - voltype, vldb, false); - switch (ac.error) { - case 0: - goto out; - case -ENOMEM: - case -ENONET: - case -ENETUNREACH: - case -EHOSTUNREACH: - case -ECONNREFUSED: - if (ac.error == -ENOMEM || ac.error == -ENONET) - goto out; - goto rotate; - case -EBUSY: - ac.responded = true; - vl->upd_busy_cnt++; - if (vl->upd_busy_cnt <= 3) { - if (vl->upd_busy_cnt > 1) { - /* second+ BUSY - sleep a little bit */ - set_current_state(TASK_UNINTERRUPTIBLE); - schedule_timeout(1); - } - continue; - } - break; - case -ENOMEDIUM: - ac.responded = true; - vl->upd_rej_cnt++; - goto rotate; - default: - ac.responded = true; - ac.error = -EIO; - goto rotate; - } - - /* rotate the server records upon lookup failure */ - rotate: - vl->upd_busy_cnt = 0; - } - -out: - if (ac.error < 0 && vl->upd_rej_cnt > 0) { - printk(KERN_NOTICE "kAFS:" - " Active volume no longer valid '%s'\n", - vl->vldb.name); - vl->valid = 0; - ac.error = -ENOMEDIUM; - } - - up_write(&vl->cell->vl_sem); - ret = afs_end_cursor(&ac); - _leave(" = %d", ret); - return ret; -} - -/* - * allocate a volume location record - */ -static struct afs_vlocation *afs_vlocation_alloc(struct afs_cell *cell, - const char *name, - size_t namesz) -{ - struct afs_vlocation *vl; - - vl = kzalloc(sizeof(struct afs_vlocation), GFP_KERNEL); - if (vl) { - vl->cell = cell; - vl->state = AFS_VL_NEW; - atomic_set(&vl->usage, 1); - INIT_LIST_HEAD(&vl->link); - INIT_LIST_HEAD(&vl->grave); - INIT_LIST_HEAD(&vl->update); - init_waitqueue_head(&vl->waitq); - spin_lock_init(&vl->lock); - memcpy(vl->vldb.name, name, namesz); - } - - _leave(" = %p", vl); - return vl; -} - -/* - * update record if we found it in the cache - */ -static int afs_vlocation_update_record(struct afs_vlocation *vl, - struct key *key, - struct afs_cache_vlocation *vldb) -{ - afs_voltype_t voltype; - afs_volid_t vid; - int ret; - - /* try to look up a cached volume in the cell VL databases by ID */ - _debug("Locally Cached: %s %02x", vl->vldb.name, vl->vldb.vidmask); - - _debug("Vids: %08x %08x %08x", - vl->vldb.vid[0], - vl->vldb.vid[1], - vl->vldb.vid[2]); - - if (vl->vldb.vidmask & AFS_VOL_VTM_RW) { - vid = vl->vldb.vid[0]; - voltype = AFSVL_RWVOL; - } else if (vl->vldb.vidmask & AFS_VOL_VTM_RO) { - vid = vl->vldb.vid[1]; - voltype = AFSVL_ROVOL; - } else if (vl->vldb.vidmask & AFS_VOL_VTM_BAK) { - vid = vl->vldb.vid[2]; - voltype = AFSVL_BACKVOL; - } else { - BUG(); - vid = 0; - voltype = 0; - } - - /* contact the server to make sure the volume is still available - * - TODO: need to handle disconnected operation here - */ - ret = afs_vlocation_access_vl_by_id(vl, key, vid, voltype, vldb); - switch (ret) { - /* net error */ - default: - printk(KERN_WARNING "kAFS:" - " failed to update volume '%s' (%x) up in '%s': %d\n", - vl->vldb.name, vid, vl->cell->name, ret); - _leave(" = %d", ret); - return ret; - - /* pulled from local cache into memory */ - case 0: - _leave(" = 0"); - return 0; - - /* uh oh... looks like the volume got deleted */ - case -ENOMEDIUM: - printk(KERN_ERR "kAFS:" - " volume '%s' (%x) does not exist '%s'\n", - vl->vldb.name, vid, vl->cell->name); - - /* TODO: make existing record unavailable */ - _leave(" = %d", ret); - return ret; - } -} - -/* - * apply the update to a VL record - */ -static void afs_vlocation_apply_update(struct afs_vlocation *vl, - struct afs_cache_vlocation *vldb) -{ - _debug("Done VL Lookup: %s %02x", vldb->name, vldb->vidmask); - - _debug("Vids: %08x %08x %08x", - vldb->vid[0], vldb->vid[1], vldb->vid[2]); - - if (strcmp(vldb->name, vl->vldb.name) != 0) - printk(KERN_NOTICE "kAFS:" - " name of volume '%s' changed to '%s' on server\n", - vl->vldb.name, vldb->name); - - vl->vldb = *vldb; -} - -/* - * fill in a volume location record, consulting the cache and the VL server - * both - */ -static int afs_vlocation_fill_in_record(struct afs_vlocation *vl, - struct key *key) -{ - struct afs_cache_vlocation vldb; - int ret; - - _enter(""); - - ASSERTCMP(vl->valid, ==, 0); - - memset(&vldb, 0, sizeof(vldb)); - - /* Try to look up an unknown volume in the cell VL databases by name */ - ret = afs_vlocation_access_vl_by_name(vl, key, &vldb); - if (ret < 0) { - printk("kAFS: failed to locate '%s' in cell '%s'\n", - vl->vldb.name, vl->cell->name); - return ret; - } - - afs_vlocation_apply_update(vl, &vldb); - _leave(" = 0"); - return 0; -} - -/* - * queue a vlocation record for updates - */ -static void afs_vlocation_queue_for_updates(struct afs_net *net, - struct afs_vlocation *vl) -{ - struct afs_vlocation *xvl; - - /* wait at least 10 minutes before updating... */ - vl->update_at = ktime_get_real_seconds() + - afs_vlocation_update_timeout; - - spin_lock(&net->vl_updates_lock); - - if (!list_empty(&net->vl_updates)) { - /* ... but wait at least 1 second more than the newest record - * already queued so that we don't spam the VL server suddenly - * with lots of requests - */ - xvl = list_entry(net->vl_updates.prev, - struct afs_vlocation, update); - if (vl->update_at <= xvl->update_at) - vl->update_at = xvl->update_at + 1; - } else if (net->live) { - queue_delayed_work(afs_vlocation_update_worker, - &net->vl_updater, - afs_vlocation_update_timeout * HZ); - } - - list_add_tail(&vl->update, &net->vl_updates); - spin_unlock(&net->vl_updates_lock); -} - -/* - * lookup volume location - * - iterate through the VL servers in a cell until one of them admits knowing - * about the volume in question - * - lookup in the local cache if not able to find on the VL server - * - insert/update in the local cache if did get a VL response - */ -struct afs_vlocation *afs_vlocation_lookup(struct afs_net *net, - struct afs_cell *cell, - struct key *key, - const char *name, - size_t namesz) -{ - struct afs_vlocation *vl; - int ret; - - _enter("{%s},{%x},%*.*s,%zu", - cell->name, key_serial(key), - (int) namesz, (int) namesz, name, namesz); - - if (namesz >= sizeof(vl->vldb.name)) { - _leave(" = -ENAMETOOLONG"); - return ERR_PTR(-ENAMETOOLONG); - } - - /* see if we have an in-memory copy first */ - down_write(&cell->vl_sem); - spin_lock(&cell->vl_lock); - list_for_each_entry(vl, &cell->vl_list, link) { - if (vl->vldb.name[namesz] != '\0') - continue; - if (memcmp(vl->vldb.name, name, namesz) == 0) - goto found_in_memory; - } - spin_unlock(&cell->vl_lock); - - /* not in the cell's in-memory lists - create a new record */ - vl = afs_vlocation_alloc(cell, name, namesz); - if (!vl) { - up_write(&cell->vl_sem); - return ERR_PTR(-ENOMEM); - } - - afs_get_cell(cell); - - list_add_tail(&vl->link, &cell->vl_list); - vl->state = AFS_VL_CREATING; - up_write(&cell->vl_sem); - -fill_in_record: - ret = afs_vlocation_fill_in_record(vl, key); - if (ret < 0) - goto error_abandon; - spin_lock(&vl->lock); - vl->state = AFS_VL_VALID; - spin_unlock(&vl->lock); - wake_up(&vl->waitq); - - /* schedule for regular updates */ - afs_vlocation_queue_for_updates(net, vl); - goto success; - -found_in_memory: - /* found in memory */ - _debug("found in memory"); - atomic_inc(&vl->usage); - spin_unlock(&cell->vl_lock); - if (!list_empty(&vl->grave)) { - spin_lock(&net->vl_graveyard_lock); - list_del_init(&vl->grave); - spin_unlock(&net->vl_graveyard_lock); - } - up_write(&cell->vl_sem); - - /* see if it was an abandoned record that we might try filling in */ - spin_lock(&vl->lock); - while (vl->state != AFS_VL_VALID) { - afs_vlocation_state_t state = vl->state; - - _debug("invalid [state %d]", state); - - if (state == AFS_VL_NEW || state == AFS_VL_NO_VOLUME) { - vl->state = AFS_VL_CREATING; - spin_unlock(&vl->lock); - goto fill_in_record; - } - - /* must now wait for creation or update by someone else to - * complete */ - _debug("wait"); - - spin_unlock(&vl->lock); - ret = wait_event_interruptible(vl->waitq, - vl->state == AFS_VL_NEW || - vl->state == AFS_VL_VALID || - vl->state == AFS_VL_NO_VOLUME); - if (ret < 0) - goto error; - spin_lock(&vl->lock); - } - spin_unlock(&vl->lock); - -success: - _leave(" = %p", vl); - return vl; - -error_abandon: - spin_lock(&vl->lock); - vl->state = AFS_VL_NEW; - spin_unlock(&vl->lock); - wake_up(&vl->waitq); -error: - ASSERT(vl != NULL); - afs_put_vlocation(net, vl); - _leave(" = %d", ret); - return ERR_PTR(ret); -} - -/* - * finish using a volume location record - */ -void afs_put_vlocation(struct afs_net *net, struct afs_vlocation *vl) -{ - if (!vl) - return; - - _enter("%s", vl->vldb.name); - - ASSERTCMP(atomic_read(&vl->usage), >, 0); - - if (likely(!atomic_dec_and_test(&vl->usage))) { - _leave(""); - return; - } - - spin_lock(&net->vl_graveyard_lock); - if (atomic_read(&vl->usage) == 0) { - _debug("buried"); - list_move_tail(&vl->grave, &net->vl_graveyard); - vl->time_of_death = ktime_get_real_seconds(); - queue_delayed_work(afs_wq, &net->vl_reaper, - afs_vlocation_timeout * HZ); - - /* suspend updates on this record */ - if (!list_empty(&vl->update)) { - spin_lock(&net->vl_updates_lock); - list_del_init(&vl->update); - spin_unlock(&net->vl_updates_lock); - } - } - spin_unlock(&net->vl_graveyard_lock); - _leave(" [killed?]"); -} - -/* - * destroy a dead volume location record - */ -static void afs_vlocation_destroy(struct afs_net *net, struct afs_vlocation *vl) -{ - _enter("%p", vl); - - afs_put_cell(net, vl->cell); - kfree(vl); -} - -/* - * reap dead volume location records - */ -void afs_vlocation_reaper(struct work_struct *work) -{ - LIST_HEAD(corpses); - struct afs_vlocation *vl; - struct afs_net *net = container_of(work, struct afs_net, vl_reaper.work); - unsigned long delay, expiry; - time64_t now; - - _enter(""); - - now = ktime_get_real_seconds(); - spin_lock(&net->vl_graveyard_lock); - - while (!list_empty(&net->vl_graveyard)) { - vl = list_entry(net->vl_graveyard.next, - struct afs_vlocation, grave); - - _debug("check %p", vl); - - /* the queue is ordered most dead first */ - if (net->live) { - expiry = vl->time_of_death + afs_vlocation_timeout; - if (expiry > now) { - delay = (expiry - now) * HZ; - _debug("delay %lu", delay); - mod_delayed_work(afs_wq, &net->vl_reaper, delay); - break; - } - } - - spin_lock(&vl->cell->vl_lock); - if (atomic_read(&vl->usage) > 0) { - _debug("no reap"); - list_del_init(&vl->grave); - } else { - _debug("reap"); - list_move_tail(&vl->grave, &corpses); - list_del_init(&vl->link); - } - spin_unlock(&vl->cell->vl_lock); - } - - spin_unlock(&net->vl_graveyard_lock); - - /* now reap the corpses we've extracted */ - while (!list_empty(&corpses)) { - vl = list_entry(corpses.next, struct afs_vlocation, grave); - list_del(&vl->grave); - afs_vlocation_destroy(net, vl); - } - - _leave(""); -} - -/* - * discard all the volume location records for rmmod - */ -void __net_exit afs_vlocation_purge(struct afs_net *net) -{ - spin_lock(&net->vl_updates_lock); - list_del_init(&net->vl_updates); - spin_unlock(&net->vl_updates_lock); - mod_delayed_work(afs_vlocation_update_worker, &net->vl_updater, 0); - mod_delayed_work(afs_wq, &net->vl_reaper, 0); -} - -/* - * update a volume location - */ -void afs_vlocation_updater(struct work_struct *work) -{ - struct afs_cache_vlocation vldb; - struct afs_vlocation *vl, *xvl; - struct afs_net *net = container_of(work, struct afs_net, vl_updater.work); - time64_t now; - long timeout; - int ret; - - if (!net->live) - return; - - _enter(""); - - now = ktime_get_real_seconds(); - - /* find a record to update */ - spin_lock(&net->vl_updates_lock); - for (;;) { - if (list_empty(&net->vl_updates) || !net->live) { - spin_unlock(&net->vl_updates_lock); - _leave(" [nothing]"); - return; - } - - vl = list_entry(net->vl_updates.next, - struct afs_vlocation, update); - if (atomic_read(&vl->usage) > 0) - break; - list_del_init(&vl->update); - } - - timeout = vl->update_at - now; - if (timeout > 0) { - queue_delayed_work(afs_vlocation_update_worker, - &net->vl_updater, timeout * HZ); - spin_unlock(&net->vl_updates_lock); - _leave(" [nothing]"); - return; - } - - list_del_init(&vl->update); - atomic_inc(&vl->usage); - spin_unlock(&net->vl_updates_lock); - - /* we can now perform the update */ - _debug("update %s", vl->vldb.name); - vl->state = AFS_VL_UPDATING; - vl->upd_rej_cnt = 0; - vl->upd_busy_cnt = 0; - - ret = afs_vlocation_update_record(vl, NULL, &vldb); - spin_lock(&vl->lock); - switch (ret) { - case 0: - afs_vlocation_apply_update(vl, &vldb); - vl->state = AFS_VL_VALID; - break; - case -ENOMEDIUM: - vl->state = AFS_VL_VOLUME_DELETED; - break; - default: - vl->state = AFS_VL_UNCERTAIN; - break; - } - spin_unlock(&vl->lock); - wake_up(&vl->waitq); - - /* and then reschedule */ - _debug("reschedule"); - vl->update_at = ktime_get_real_seconds() + - afs_vlocation_update_timeout; - - spin_lock(&net->vl_updates_lock); - - if (!list_empty(&net->vl_updates)) { - /* next update in 10 minutes, but wait at least 1 second more - * than the newest record already queued so that we don't spam - * the VL server suddenly with lots of requests - */ - xvl = list_entry(net->vl_updates.prev, - struct afs_vlocation, update); - if (vl->update_at <= xvl->update_at) - vl->update_at = xvl->update_at + 1; - xvl = list_entry(net->vl_updates.next, - struct afs_vlocation, update); - timeout = xvl->update_at - now; - if (timeout < 0) - timeout = 0; - } else { - timeout = afs_vlocation_update_timeout; - } - - ASSERT(list_empty(&vl->update)); - - list_add_tail(&vl->update, &net->vl_updates); - - _debug("timeout %ld", timeout); - queue_delayed_work(afs_vlocation_update_worker, &net->vl_updater, timeout * HZ); - spin_unlock(&net->vl_updates_lock); - afs_put_vlocation(net, vl); -} diff --git a/fs/afs/vnode.c b/fs/afs/vnode.c deleted file mode 100644 index 9c7333eb01c2..000000000000 --- a/fs/afs/vnode.c +++ /dev/null @@ -1,750 +0,0 @@ -/* AFS vnode management - * - * Copyright (C) 2002, 2007 Red Hat, Inc. All Rights Reserved. - * Written by David Howells (dhowells@redhat.com) - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - -#include -#include -#include -#include -#include -#include "internal.h" - -/* - * Handle remote file deletion. - */ -static void afs_vnode_deleted_remotely(struct afs_vnode *vnode) -{ - struct afs_cb_interest *cbi = vnode->cb_interest; - - _enter("{%p}", cbi); - - set_bit(AFS_VNODE_DELETED, &vnode->flags); - - if (cbi) { - vnode->cb_interest = NULL; - afs_put_cb_interest(afs_v2net(vnode), cbi); - } - - _leave(""); -} - -/* - * finish off updating the recorded status of a file after a successful - * operation completion - * - starts callback expiry timer - * - adds to server's callback list - */ -void afs_vnode_finalise_status_update(struct afs_vnode *vnode, - struct afs_server *server) -{ - spin_lock(&vnode->lock); - vnode->update_cnt--; - ASSERTCMP(vnode->update_cnt, >=, 0); - spin_unlock(&vnode->lock); - - wake_up_all(&vnode->update_waitq); - _leave(""); -} - -/* - * finish off updating the recorded status of a file after an operation failed - */ -static void afs_vnode_status_update_failed(struct afs_fs_cursor *fc, - struct afs_vnode *vnode) -{ - _enter("{%x:%u},%d", vnode->fid.vid, vnode->fid.vnode, fc->ac.error); - - spin_lock(&vnode->lock); - - if (fc->ac.error == -ENOENT) { - /* the file was deleted on the server */ - _debug("got NOENT from server - marking file deleted"); - afs_vnode_deleted_remotely(vnode); - } - - vnode->update_cnt--; - ASSERTCMP(vnode->update_cnt, >=, 0); - spin_unlock(&vnode->lock); - - wake_up_all(&vnode->update_waitq); - _leave(""); -} - -/* - * fetch file status from the volume - * - don't issue a fetch if: - * - the changed bit is not set and there's a valid callback - * - there are any outstanding ops that will fetch the status - * - TODO implement local caching - */ -int afs_vnode_fetch_status(struct afs_vnode *vnode, struct key *key, bool force) -{ - struct afs_fs_cursor fc; - unsigned int cb_break = 0; - - DECLARE_WAITQUEUE(myself, current); - - _enter("%s,{%x:%u.%u,S=%lx},%u", - vnode->volume->vlocation->vldb.name, - vnode->fid.vid, vnode->fid.vnode, vnode->fid.unique, - vnode->flags, - force); - - if (!force && test_bit(AFS_VNODE_CB_PROMISED, &vnode->flags)) { - _leave(" [unchanged]"); - return 0; - } - - if (test_bit(AFS_VNODE_DELETED, &vnode->flags)) { - _leave(" [deleted]"); - return -ENOENT; - } - - cb_break = vnode->cb_break + vnode->cb_s_break; - - spin_lock(&vnode->lock); - - if (!force && test_bit(AFS_VNODE_CB_PROMISED, &vnode->flags)) { - spin_unlock(&vnode->lock); - _leave(" [unchanged]"); - return 0; - } - - ASSERTCMP(vnode->update_cnt, >=, 0); - - if (vnode->update_cnt > 0) { - /* someone else started a fetch */ - _debug("wait on fetch %d", vnode->update_cnt); - - set_current_state(TASK_UNINTERRUPTIBLE); - ASSERT(myself.func != NULL); - add_wait_queue(&vnode->update_waitq, &myself); - - /* wait for the status to be updated */ - for (;;) { - if (test_bit(AFS_VNODE_CB_PROMISED, &vnode->flags)) - break; - if (test_bit(AFS_VNODE_DELETED, &vnode->flags)) - break; - - /* check to see if it got updated and invalidated all - * before we saw it */ - if (vnode->update_cnt == 0) { - remove_wait_queue(&vnode->update_waitq, - &myself); - set_current_state(TASK_RUNNING); - goto get_anyway; - } - - spin_unlock(&vnode->lock); - - schedule(); - set_current_state(TASK_UNINTERRUPTIBLE); - - spin_lock(&vnode->lock); - } - - remove_wait_queue(&vnode->update_waitq, &myself); - spin_unlock(&vnode->lock); - set_current_state(TASK_RUNNING); - - return test_bit(AFS_VNODE_DELETED, &vnode->flags) ? - -ENOENT : 0; - } - -get_anyway: - /* okay... we're going to have to initiate the op */ - vnode->update_cnt++; - - spin_unlock(&vnode->lock); - - /* merge AFS status fetches and clear outstanding callback on this - * vnode */ - afs_init_fs_cursor(&fc, vnode); - do { - /* pick a server to query */ - if (!afs_volume_pick_fileserver(&fc, vnode)) - goto no_server; - - fc.ac.error = afs_fs_fetch_file_status(&fc, key, vnode, NULL, false); - - } while (afs_iterate_fs_cursor(&fc, vnode)); - - /* adjust the flags */ - if (fc.ac.error == 0) { - _debug("adjust"); - afs_cache_permit(vnode, key, cb_break); - afs_vnode_finalise_status_update(vnode, fc.server); - } else { - _debug("failed [%d]", fc.ac.error); - afs_vnode_status_update_failed(&fc, vnode); - } - -out: - afs_end_fs_cursor(&fc, afs_v2net(vnode)); - ASSERTCMP(vnode->update_cnt, >=, 0); - _leave(" = %d [cnt %d]", fc.ac.error, vnode->update_cnt); - return fc.ac.error; - -no_server: - spin_lock(&vnode->lock); - vnode->update_cnt--; - spin_unlock(&vnode->lock); - goto out; -} - -/* - * fetch file data from the volume - * - TODO implement caching - */ -int afs_vnode_fetch_data(struct afs_vnode *vnode, struct key *key, - struct afs_read *desc) -{ - struct afs_fs_cursor fc; - - _enter("%s{%x:%u.%u},%x,,,", - vnode->volume->vlocation->vldb.name, - vnode->fid.vid, - vnode->fid.vnode, - vnode->fid.unique, - key_serial(key)); - - /* this op will fetch the status */ - spin_lock(&vnode->lock); - vnode->update_cnt++; - spin_unlock(&vnode->lock); - - /* merge in AFS status fetches and clear outstanding callback on this - * vnode */ - afs_init_fs_cursor(&fc, vnode); - do { - /* pick a server to query */ - if (!afs_volume_pick_fileserver(&fc, vnode)) - goto no_server; - - fc.ac.error = afs_fs_fetch_data(&fc, key, vnode, desc, false); - - } while (afs_iterate_fs_cursor(&fc, vnode)); - - /* adjust the flags */ - if (fc.ac.error == 0) - afs_vnode_finalise_status_update(vnode, fc.server); - else - afs_vnode_status_update_failed(&fc, vnode); - -out: - return afs_end_fs_cursor(&fc, afs_v2net(vnode)); - -no_server: - spin_lock(&vnode->lock); - vnode->update_cnt--; - ASSERTCMP(vnode->update_cnt, >=, 0); - spin_unlock(&vnode->lock); - goto out; -} - -/* - * make a file or a directory - */ -int afs_vnode_create(struct afs_vnode *vnode, struct key *key, - const char *name, umode_t mode, struct afs_fid *newfid, - struct afs_file_status *newstatus, - struct afs_callback *newcb, struct afs_server **_server) -{ - struct afs_fs_cursor fc; - - _enter("%s{%x:%u.%u},%x,%s,,", - vnode->volume->vlocation->vldb.name, - vnode->fid.vid, - vnode->fid.vnode, - vnode->fid.unique, - key_serial(key), - name); - - /* this op will fetch the status on the directory we're creating in */ - spin_lock(&vnode->lock); - vnode->update_cnt++; - spin_unlock(&vnode->lock); - - afs_init_fs_cursor(&fc, vnode); - do { - /* pick a server to query */ - if (!afs_volume_pick_fileserver(&fc, vnode)) - goto no_server; - - fc.ac.error = afs_fs_create(&fc, key, vnode, name, mode, newfid, - newstatus, newcb, false); - - } while (afs_iterate_fs_cursor(&fc, vnode)); - - /* adjust the flags */ - if (fc.ac.error == 0) { - afs_vnode_finalise_status_update(vnode, fc.server); - *_server = fc.server; - fc.server = NULL; - } else { - afs_vnode_status_update_failed(&fc, vnode); - *_server = NULL; - } - -out: - return afs_end_fs_cursor(&fc, afs_v2net(vnode)); - -no_server: - spin_lock(&vnode->lock); - vnode->update_cnt--; - ASSERTCMP(vnode->update_cnt, >=, 0); - spin_unlock(&vnode->lock); - goto out; -} - -/* - * remove a file or directory - */ -int afs_vnode_remove(struct afs_vnode *vnode, struct key *key, const char *name, - bool isdir) -{ - struct afs_fs_cursor fc; - - _enter("%s{%x:%u.%u},%x,%s", - vnode->volume->vlocation->vldb.name, - vnode->fid.vid, - vnode->fid.vnode, - vnode->fid.unique, - key_serial(key), - name); - - /* this op will fetch the status on the directory we're removing from */ - spin_lock(&vnode->lock); - vnode->update_cnt++; - spin_unlock(&vnode->lock); - - afs_init_fs_cursor(&fc, vnode); - do { - /* pick a server to query */ - if (!afs_volume_pick_fileserver(&fc, vnode)) - goto no_server; - - fc.ac.error = afs_fs_remove(&fc, key, vnode, name, isdir, false); - - } while (afs_iterate_fs_cursor(&fc, vnode)); - - /* adjust the flags */ - if (fc.ac.error == 0) - afs_vnode_finalise_status_update(vnode, fc.server); - else - afs_vnode_status_update_failed(&fc, vnode); - -out: - return afs_end_fs_cursor(&fc, afs_v2net(vnode)); - -no_server: - spin_lock(&vnode->lock); - vnode->update_cnt--; - ASSERTCMP(vnode->update_cnt, >=, 0); - spin_unlock(&vnode->lock); - goto out; -} - -/* - * create a hard link - */ -int afs_vnode_link(struct afs_vnode *dvnode, struct afs_vnode *vnode, - struct key *key, const char *name) -{ - struct afs_fs_cursor fc; - - _enter("%s{%x:%u.%u},%s{%x:%u.%u},%x,%s", - dvnode->volume->vlocation->vldb.name, - dvnode->fid.vid, - dvnode->fid.vnode, - dvnode->fid.unique, - vnode->volume->vlocation->vldb.name, - vnode->fid.vid, - vnode->fid.vnode, - vnode->fid.unique, - key_serial(key), - name); - - /* this op will fetch the status on the directory we're removing from */ - spin_lock(&vnode->lock); - vnode->update_cnt++; - spin_unlock(&vnode->lock); - spin_lock(&dvnode->lock); - dvnode->update_cnt++; - spin_unlock(&dvnode->lock); - - afs_init_fs_cursor(&fc, vnode); - do { - /* pick a server to query */ - if (!afs_volume_pick_fileserver(&fc, dvnode)) - goto no_server; - - fc.ac.error = afs_fs_link(&fc, key, dvnode, vnode, name, false); - - } while (afs_iterate_fs_cursor(&fc, dvnode)); - - /* adjust the flags */ - if (fc.ac.error == 0) { - afs_vnode_finalise_status_update(vnode, fc.server); - afs_vnode_finalise_status_update(dvnode, fc.server); - } else { - afs_vnode_status_update_failed(&fc, vnode); - afs_vnode_status_update_failed(&fc, dvnode); - } - -out: - return afs_end_fs_cursor(&fc, afs_v2net(vnode)); - -no_server: - spin_lock(&vnode->lock); - vnode->update_cnt--; - ASSERTCMP(vnode->update_cnt, >=, 0); - spin_unlock(&vnode->lock); - spin_lock(&dvnode->lock); - dvnode->update_cnt--; - ASSERTCMP(dvnode->update_cnt, >=, 0); - spin_unlock(&dvnode->lock); - goto out; -} - -/* - * create a symbolic link - */ -int afs_vnode_symlink(struct afs_vnode *vnode, struct key *key, - const char *name, const char *content, - struct afs_fid *newfid, - struct afs_file_status *newstatus, - struct afs_server **_server) -{ - struct afs_fs_cursor fc; - - _enter("%s{%x:%u.%u},%x,%s,%s,,,", - vnode->volume->vlocation->vldb.name, - vnode->fid.vid, - vnode->fid.vnode, - vnode->fid.unique, - key_serial(key), - name, content); - - /* this op will fetch the status on the directory we're creating in */ - spin_lock(&vnode->lock); - vnode->update_cnt++; - spin_unlock(&vnode->lock); - - afs_init_fs_cursor(&fc, vnode); - do { - /* pick a server to query */ - if (!afs_volume_pick_fileserver(&fc, vnode)) - goto no_server; - - fc.ac.error = afs_fs_symlink(&fc, key, vnode, name, content, - newfid, newstatus, false); - - } while (afs_iterate_fs_cursor(&fc, vnode)); - - /* adjust the flags */ - if (fc.ac.error == 0) { - afs_vnode_finalise_status_update(vnode, fc.server); - *_server = fc.server; - fc.server = NULL; - } else { - afs_vnode_status_update_failed(&fc, vnode); - *_server = NULL; - } - -out: - return afs_end_fs_cursor(&fc, afs_v2net(vnode)); - -no_server: - spin_lock(&vnode->lock); - vnode->update_cnt--; - ASSERTCMP(vnode->update_cnt, >=, 0); - spin_unlock(&vnode->lock); - *_server = NULL; - goto out; -} - -/* - * rename a file - */ -int afs_vnode_rename(struct afs_vnode *orig_dvnode, - struct afs_vnode *new_dvnode, - struct key *key, - const char *orig_name, - const char *new_name) -{ - struct afs_fs_cursor fc; - - _enter("%s{%x:%u.%u},%s{%u,%u,%u},%x,%s,%s", - orig_dvnode->volume->vlocation->vldb.name, - orig_dvnode->fid.vid, - orig_dvnode->fid.vnode, - orig_dvnode->fid.unique, - new_dvnode->volume->vlocation->vldb.name, - new_dvnode->fid.vid, - new_dvnode->fid.vnode, - new_dvnode->fid.unique, - key_serial(key), - orig_name, - new_name); - - /* this op will fetch the status on both the directories we're dealing - * with */ - spin_lock(&orig_dvnode->lock); - orig_dvnode->update_cnt++; - spin_unlock(&orig_dvnode->lock); - if (new_dvnode != orig_dvnode) { - spin_lock(&new_dvnode->lock); - new_dvnode->update_cnt++; - spin_unlock(&new_dvnode->lock); - } - - afs_init_fs_cursor(&fc, orig_dvnode); - do { - /* pick a server to query */ - if (!afs_volume_pick_fileserver(&fc, orig_dvnode)) - goto no_server; - - fc.ac.error = afs_fs_rename(&fc, key, orig_dvnode, orig_name, - new_dvnode, new_name, false); - - } while (afs_iterate_fs_cursor(&fc, orig_dvnode)); - - /* adjust the flags */ - if (fc.ac.error == 0) { - afs_vnode_finalise_status_update(orig_dvnode, fc.server); - if (new_dvnode != orig_dvnode) - afs_vnode_finalise_status_update(new_dvnode, fc.server); - } else { - afs_vnode_status_update_failed(&fc, orig_dvnode); - if (new_dvnode != orig_dvnode) - afs_vnode_status_update_failed(&fc, new_dvnode); - } - -out: - return afs_end_fs_cursor(&fc, afs_v2net(orig_dvnode)); - -no_server: - spin_lock(&orig_dvnode->lock); - orig_dvnode->update_cnt--; - ASSERTCMP(orig_dvnode->update_cnt, >=, 0); - spin_unlock(&orig_dvnode->lock); - if (new_dvnode != orig_dvnode) { - spin_lock(&new_dvnode->lock); - new_dvnode->update_cnt--; - ASSERTCMP(new_dvnode->update_cnt, >=, 0); - spin_unlock(&new_dvnode->lock); - } - goto out; -} - -/* - * write to a file - */ -int afs_vnode_store_data(struct afs_writeback *wb, pgoff_t first, pgoff_t last, - unsigned offset, unsigned to) -{ - struct afs_fs_cursor fc; - struct afs_vnode *vnode = wb->vnode; - - _enter("%s{%x:%u.%u},%x,%lx,%lx,%x,%x", - vnode->volume->vlocation->vldb.name, - vnode->fid.vid, - vnode->fid.vnode, - vnode->fid.unique, - key_serial(wb->key), - first, last, offset, to); - - /* this op will fetch the status */ - spin_lock(&vnode->lock); - vnode->update_cnt++; - spin_unlock(&vnode->lock); - - afs_init_fs_cursor(&fc, vnode); - do { - /* pick a server to query */ - if (!afs_volume_pick_fileserver(&fc, vnode)) - goto no_server; - - fc.ac.error = afs_fs_store_data(&fc, wb, first, last, offset, to, - false); - - } while (afs_iterate_fs_cursor(&fc, vnode)); - - /* adjust the flags */ - if (fc.ac.error == 0) { - afs_vnode_finalise_status_update(vnode, fc.server); - } else { - afs_vnode_status_update_failed(&fc, vnode); - } - -out: - return afs_end_fs_cursor(&fc, afs_v2net(vnode)); - -no_server: - spin_lock(&vnode->lock); - vnode->update_cnt--; - ASSERTCMP(vnode->update_cnt, >=, 0); - spin_unlock(&vnode->lock); - goto out; -} - -/* - * set the attributes on a file - */ -int afs_vnode_setattr(struct afs_vnode *vnode, struct key *key, - struct iattr *attr) -{ - struct afs_fs_cursor fc; - - _enter("%s{%x:%u.%u},%x", - vnode->volume->vlocation->vldb.name, - vnode->fid.vid, - vnode->fid.vnode, - vnode->fid.unique, - key_serial(key)); - - /* this op will fetch the status */ - spin_lock(&vnode->lock); - vnode->update_cnt++; - spin_unlock(&vnode->lock); - - afs_init_fs_cursor(&fc, vnode); - do { - /* pick a server to query */ - if (!afs_volume_pick_fileserver(&fc, vnode)) - goto no_server; - - fc.ac.error = afs_fs_setattr(&fc, key, vnode, attr, false); - - } while (afs_iterate_fs_cursor(&fc, vnode)); - - /* adjust the flags */ - if (fc.ac.error == 0) { - afs_vnode_finalise_status_update(vnode, fc.server); - } else { - afs_vnode_status_update_failed(&fc, vnode); - } - -out: - return afs_end_fs_cursor(&fc, afs_v2net(vnode)); - -no_server: - spin_lock(&vnode->lock); - vnode->update_cnt--; - ASSERTCMP(vnode->update_cnt, >=, 0); - spin_unlock(&vnode->lock); - goto out; -} - -/* - * get the status of a volume - */ -int afs_vnode_get_volume_status(struct afs_vnode *vnode, struct key *key, - struct afs_volume_status *vs) -{ - struct afs_fs_cursor fc; - - _enter("%s{%x:%u.%u},%x,", - vnode->volume->vlocation->vldb.name, - vnode->fid.vid, - vnode->fid.vnode, - vnode->fid.unique, - key_serial(key)); - - afs_init_fs_cursor(&fc, vnode); - do { - /* pick a server to query */ - if (!afs_volume_pick_fileserver(&fc, vnode)) - break; - - fc.ac.error = afs_fs_get_volume_status(&fc, key, vnode, vs, false); - - } while (afs_iterate_fs_cursor(&fc, vnode)); - - return afs_end_fs_cursor(&fc, afs_v2net(vnode)); -} - -/* - * get a lock on a file - */ -int afs_vnode_set_lock(struct afs_vnode *vnode, struct key *key, - afs_lock_type_t type) -{ - struct afs_fs_cursor fc; - - _enter("%s{%x:%u.%u},%x,%u", - vnode->volume->vlocation->vldb.name, - vnode->fid.vid, - vnode->fid.vnode, - vnode->fid.unique, - key_serial(key), type); - - afs_init_fs_cursor(&fc, vnode); - do { - /* pick a server to query */ - if (!afs_volume_pick_fileserver(&fc, vnode)) - break; - - fc.ac.error = afs_fs_set_lock(&fc, key, vnode, type, false); - - } while (afs_iterate_fs_cursor(&fc, vnode)); - - return afs_end_fs_cursor(&fc, afs_v2net(vnode)); -} - -/* - * extend a lock on a file - */ -int afs_vnode_extend_lock(struct afs_vnode *vnode, struct key *key) -{ - struct afs_fs_cursor fc; - int ret; - - _enter("%s{%x:%u.%u},%x", - vnode->volume->vlocation->vldb.name, - vnode->fid.vid, - vnode->fid.vnode, - vnode->fid.unique, - key_serial(key)); - - ret = afs_set_fs_cursor(&fc, vnode); - if (ret < 0) - return ret; - - fc.ac.error = afs_fs_extend_lock(&fc, key, vnode, false); - - return afs_end_fs_cursor(&fc, afs_v2net(vnode)); -} - -/* - * release a lock on a file - */ -int afs_vnode_release_lock(struct afs_vnode *vnode, struct key *key) -{ - struct afs_fs_cursor fc; - int ret; - - _enter("%s{%x:%u.%u},%x", - vnode->volume->vlocation->vldb.name, - vnode->fid.vid, - vnode->fid.vnode, - vnode->fid.unique, - key_serial(key)); - - ret = afs_set_fs_cursor(&fc, vnode); - if (ret < 0) - return ret; - - fc.ac.error = afs_fs_release_lock(&fc, key, vnode, false); - - return afs_end_fs_cursor(&fc, afs_v2net(vnode)); -} diff --git a/fs/afs/volume.c b/fs/afs/volume.c index 3c5ad1cc50f3..2295dd4f9b15 100644 --- a/fs/afs/volume.c +++ b/fs/afs/volume.c @@ -13,11 +13,148 @@ #include #include "internal.h" -static const char *afs_voltypes[] = { "R/W", "R/O", "BAK" }; +unsigned __read_mostly afs_volume_gc_delay = 10; +unsigned __read_mostly afs_volume_record_life = 60 * 60; + +static const char *const afs_voltypes[] = { "R/W", "R/O", "BAK" }; /* - * lookup a volume by name - * - this can be one of the following: + * Allocate a volume record and load it up from a vldb record. + */ +static struct afs_volume *afs_alloc_volume(struct afs_mount_params *params, + struct afs_vldb_entry *vldb, + unsigned long type_mask) +{ + struct afs_server_list *slist; + struct afs_server *server; + struct afs_volume *volume; + int ret = -ENOMEM, nr_servers = 0, i, j; + + for (i = 0; i < vldb->nr_servers; i++) + if (vldb->fs_mask[i] & type_mask) + nr_servers++; + + volume = kzalloc(sizeof(struct afs_volume), GFP_KERNEL); + if (!volume) + goto error_0; + + volume->vid = vldb->vid[params->type]; + volume->update_at = ktime_get_real_seconds() + afs_volume_record_life; + volume->cell = afs_get_cell(params->cell); + volume->type = params->type; + volume->type_force = params->force; + volume->name_len = vldb->name_len; + + atomic_set(&volume->usage, 1); + INIT_LIST_HEAD(&volume->proc_link); + rwlock_init(&volume->servers_lock); + memcpy(volume->name, vldb->name, vldb->name_len + 1); + + slist = afs_alloc_server_list(params->cell, params->key, vldb, type_mask); + if (IS_ERR(slist)) { + ret = PTR_ERR(slist); + goto error_1; + } + + refcount_set(&slist->usage, 1); + volume->servers = slist; + + /* Make sure a records exists for each server this volume occupies. */ + for (i = 0; i < nr_servers; i++) { + if (!(vldb->fs_mask[i] & type_mask)) + continue; + + server = afs_lookup_server(params->cell, params->key, + &vldb->fs_server[i]); + if (IS_ERR(server)) { + ret = PTR_ERR(server); + if (ret == -ENOENT) + continue; + goto error_2; + } + + /* Insertion-sort by server pointer */ + for (j = 0; j < slist->nr_servers; j++) + if (slist->servers[j].server >= server) + break; + if (j < slist->nr_servers) { + if (slist->servers[j].server == server) { + afs_put_server(params->net, server); + continue; + } + + memmove(slist->servers + j + 1, + slist->servers + j, + (slist->nr_servers - j) * sizeof(struct afs_server_entry)); + } + + slist->servers[j].server = server; + slist->nr_servers++; + } + + if (slist->nr_servers == 0) { + ret = -EDESTADDRREQ; + goto error_2; + } + + return volume; + +error_2: + afs_put_serverlist(params->net, slist); +error_1: + kfree(volume); +error_0: + return ERR_PTR(ret); +} + +/* + * Look up a VLDB record for a volume. + */ +static struct afs_vldb_entry *afs_vl_lookup_vldb(struct afs_cell *cell, + struct key *key, + const char *volname, + size_t volnamesz) +{ + struct afs_addr_cursor ac; + struct afs_vldb_entry *vldb; + int ret; + + ret = afs_set_vl_cursor(&ac, cell); + if (ret < 0) + return ERR_PTR(ret); + + while (afs_iterate_addresses(&ac)) { + vldb = afs_vl_get_entry_by_name_u(cell->net, &ac, key, + volname, volnamesz); + switch (ac.error) { + case 0: + afs_end_cursor(&ac); + return vldb; + case -ECONNABORTED: + ac.error = afs_abort_to_error(ac.abort_code); + goto error; + case -ENOMEM: + case -ENONET: + goto error; + case -ENETUNREACH: + case -EHOSTUNREACH: + case -ECONNREFUSED: + break; + default: + ac.error = -EIO; + goto error; + } + } + +error: + return ERR_PTR(afs_end_cursor(&ac)); +} + +/* + * Look up a volume in the VL server and create a candidate volume record for + * it. + * + * The volume name can be one of the following: * "%[cell:]volume[.]" R/W volume * "#[cell:]volume[.]" R/O or R/W volume (rwparent=0), * or R/W (rwparent=1) volume @@ -37,169 +174,218 @@ static const char *afs_voltypes[] = { "R/W", "R/O", "BAK" }; * - Rule 3: If parent volume is R/W, then only mount R/W volume unless * explicitly told otherwise */ -struct afs_volume *afs_volume_lookup(struct afs_mount_params *params) +struct afs_volume *afs_create_volume(struct afs_mount_params *params) { - struct afs_vlocation *vlocation = NULL; - struct afs_volume *volume = NULL; - struct afs_server *server = NULL; - char srvtmask; - int ret, loop; + struct afs_vldb_entry *vldb; + struct afs_volume *volume; + unsigned long type_mask = 1UL << params->type; - _enter("{%*.*s,%d}", - params->volnamesz, params->volnamesz, params->volname, params->rwpath); + vldb = afs_vl_lookup_vldb(params->cell, params->key, + params->volname, params->volnamesz); + if (IS_ERR(vldb)) + return ERR_CAST(vldb); - /* lookup the volume location record */ - vlocation = afs_vlocation_lookup(params->net, params->cell, params->key, - params->volname, params->volnamesz); - if (IS_ERR(vlocation)) { - ret = PTR_ERR(vlocation); - vlocation = NULL; + if (test_bit(AFS_VLDB_QUERY_ERROR, &vldb->flags)) { + volume = ERR_PTR(vldb->error); goto error; } - /* make the final decision on the type we want */ - ret = -ENOMEDIUM; - if (params->force && !(vlocation->vldb.vidmask & (1 << params->type))) - goto error; - - srvtmask = 0; - for (loop = 0; loop < vlocation->vldb.nservers; loop++) - srvtmask |= vlocation->vldb.srvtmask[loop]; - + /* Make the final decision on the type we want */ + volume = ERR_PTR(-ENOMEDIUM); if (params->force) { - if (!(srvtmask & (1 << params->type))) + if (!(vldb->flags & type_mask)) goto error; - } else if (srvtmask & AFS_VOL_VTM_RO) { + } else if (test_bit(AFS_VLDB_HAS_RO, &vldb->flags)) { params->type = AFSVL_ROVOL; - } else if (srvtmask & AFS_VOL_VTM_RW) { + } else if (test_bit(AFS_VLDB_HAS_RW, &vldb->flags)) { params->type = AFSVL_RWVOL; } else { goto error; } - down_write(¶ms->cell->vl_sem); + type_mask = 1UL << params->type; + volume = afs_alloc_volume(params, vldb, type_mask); - /* is the volume already active? */ - if (vlocation->vols[params->type]) { - /* yes - re-use it */ - volume = vlocation->vols[params->type]; - afs_get_volume(volume); - goto success; +error: + kfree(vldb); + return volume; +} + +/* + * Destroy a volume record + */ +static void afs_destroy_volume(struct afs_net *net, struct afs_volume *volume) +{ + _enter("%p", volume); + +#ifdef CONFIG_AFS_FSCACHE + ASSERTCMP(volume->cache, ==, NULL); +#endif + + afs_put_serverlist(net, volume->servers); + afs_put_cell(net, volume->cell); + kfree(volume); + + _leave(" [destroyed]"); +} + +/* + * Drop a reference on a volume record. + */ +void afs_put_volume(struct afs_cell *cell, struct afs_volume *volume) +{ + if (volume) { + _enter("%s", volume->name); + + if (atomic_dec_and_test(&volume->usage)) + afs_destroy_volume(cell->net, volume); } +} - /* create a new volume record */ - _debug("creating new volume record"); - - ret = -ENOMEM; - volume = kzalloc(sizeof(struct afs_volume), GFP_KERNEL); - if (!volume) - goto error_up; - - atomic_set(&volume->usage, 1); - volume->type = params->type; - volume->type_force = params->force; - volume->cell = params->cell; - volume->vid = vlocation->vldb.vid[params->type]; - - init_rwsem(&volume->server_sem); - - /* look up all the applicable server records */ - for (loop = 0; loop < 8; loop++) { - if (vlocation->vldb.srvtmask[loop] & (1 << volume->type)) { - server = afs_lookup_server( - volume->cell, &vlocation->vldb.servers[loop]); - if (IS_ERR(server)) { - ret = PTR_ERR(server); - goto error_discard; - } - - volume->servers[volume->nservers] = server; - volume->nservers++; - } - } - - /* attach the cache and volume location */ +/* + * Activate a volume. + */ +void afs_activate_volume(struct afs_volume *volume) +{ #ifdef CONFIG_AFS_FSCACHE volume->cache = fscache_acquire_cookie(volume->cell->cache, &afs_volume_cache_index_def, volume, true); #endif - afs_get_vlocation(vlocation); - volume->vlocation = vlocation; - vlocation->vols[volume->type] = volume; - -success: - _debug("kAFS selected %s volume %08x", - afs_voltypes[volume->type], volume->vid); - up_write(¶ms->cell->vl_sem); - afs_put_vlocation(params->net, vlocation); - _leave(" = %p", volume); - return volume; - - /* clean up */ -error_up: - up_write(¶ms->cell->vl_sem); -error: - afs_put_vlocation(params->net, vlocation); - _leave(" = %d", ret); - return ERR_PTR(ret); - -error_discard: - up_write(¶ms->cell->vl_sem); - - for (loop = volume->nservers - 1; loop >= 0; loop--) { - afs_put_cb_interest(params->net, volume->cb_interests[loop]); - afs_put_server(params->net, volume->servers[loop]); - } - - kfree(volume); - goto error; + write_lock(&volume->cell->proc_lock); + list_add_tail(&volume->proc_link, &volume->cell->proc_volumes); + write_unlock(&volume->cell->proc_lock); } /* - * destroy a volume record + * Deactivate a volume. */ -void afs_put_volume(struct afs_cell *cell, struct afs_volume *volume) +void afs_deactivate_volume(struct afs_volume *volume) { - struct afs_vlocation *vlocation; - int loop; + _enter("%s", volume->name); - if (!volume) - return; + write_lock(&volume->cell->proc_lock); + list_del_init(&volume->proc_link); + write_unlock(&volume->cell->proc_lock); - _enter("%p", volume); - - ASSERTCMP(atomic_read(&volume->usage), >, 0); - - vlocation = volume->vlocation; - - /* to prevent a race, the decrement and the dequeue must be effectively - * atomic */ - down_write(&cell->vl_sem); - - if (likely(!atomic_dec_and_test(&volume->usage))) { - up_write(&vlocation->cell->vl_sem); - _leave(""); - return; - } - - vlocation->vols[volume->type] = NULL; - - up_write(&cell->vl_sem); - - /* finish cleaning up the volume */ #ifdef CONFIG_AFS_FSCACHE - fscache_relinquish_cookie(volume->cache, 0); + fscache_relinquish_cookie(volume->cache, + test_bit(AFS_VOLUME_DELETED, &volume->flags)); + volume->cache = NULL; #endif - afs_put_vlocation(cell->net, vlocation); - for (loop = volume->nservers - 1; loop >= 0; loop--) { - afs_put_cb_interest(cell->net, volume->cb_interests[loop]); - afs_put_server(cell->net, volume->servers[loop]); + _leave(""); +} + +/* + * Query the VL service to update the volume status. + */ +static int afs_update_volume_status(struct afs_volume *volume, struct key *key) +{ + struct afs_server_list *new, *old, *discard; + struct afs_vldb_entry *vldb; + char idbuf[16]; + int ret, idsz; + + _enter(""); + + /* We look up an ID by passing it as a decimal string in the + * operation's name parameter. + */ + idsz = sprintf(idbuf, "%u", volume->vid); + + vldb = afs_vl_lookup_vldb(volume->cell, key, idbuf, idsz); + if (IS_ERR(vldb)) { + ret = PTR_ERR(vldb); + goto error; } - kfree(volume); + /* See if the volume got renamed. */ + if (vldb->name_len != volume->name_len || + memcmp(vldb->name, volume->name, vldb->name_len) != 0) { + /* TODO: Use RCU'd string. */ + memcpy(volume->name, vldb->name, AFS_MAXVOLNAME); + volume->name_len = vldb->name_len; + } - _leave(" [destroyed]"); + /* See if the volume's server list got updated. */ + new = afs_alloc_server_list(volume->cell, key, + vldb, (1 << volume->type)); + if (IS_ERR(new)) { + ret = PTR_ERR(new); + goto error_vldb; + } + + write_lock(&volume->servers_lock); + + discard = new; + old = volume->servers; + if (afs_annotate_server_list(new, old)) { + new->seq = volume->servers_seq + 1; + volume->servers = new; + smp_wmb(); + volume->servers_seq++; + discard = old; + } + + volume->update_at = ktime_get_real_seconds() + afs_volume_record_life; + clear_bit(AFS_VOLUME_NEEDS_UPDATE, &volume->flags); + write_unlock(&volume->servers_lock); + ret = 0; + + afs_put_serverlist(volume->cell->net, discard); +error_vldb: + kfree(vldb); +error: + _leave(" = %d", ret); + return ret; +} + +/* + * Make sure the volume record is up to date. + */ +int afs_check_volume_status(struct afs_volume *volume, struct key *key) +{ + time64_t now = ktime_get_real_seconds(); + int ret, retries = 0; + + _enter(""); + + if (volume->update_at <= now) + set_bit(AFS_VOLUME_NEEDS_UPDATE, &volume->flags); + +retry: + if (!test_bit(AFS_VOLUME_NEEDS_UPDATE, &volume->flags) && + !test_bit(AFS_VOLUME_WAIT, &volume->flags)) { + _leave(" = 0"); + return 0; + } + + if (!test_and_set_bit_lock(AFS_VOLUME_UPDATING, &volume->flags)) { + ret = afs_update_volume_status(volume, key); + clear_bit_unlock(AFS_VOLUME_WAIT, &volume->flags); + clear_bit_unlock(AFS_VOLUME_UPDATING, &volume->flags); + wake_up_bit(&volume->flags, AFS_VOLUME_WAIT); + _leave(" = %d", ret); + return ret; + } + + if (!test_bit(AFS_VOLUME_WAIT, &volume->flags)) { + _leave(" = 0 [no wait]"); + return 0; + } + + ret = wait_on_bit(&volume->flags, AFS_VOLUME_WAIT, TASK_INTERRUPTIBLE); + if (ret == -ERESTARTSYS) { + _leave(" = %d", ret); + return ret; + } + + retries++; + if (retries == 4) { + _leave(" = -ESTALE"); + return -ESTALE; + } + goto retry; } diff --git a/fs/afs/write.c b/fs/afs/write.c index 106e43db1115..1377a40ecdbb 100644 --- a/fs/afs/write.c +++ b/fs/afs/write.c @@ -103,7 +103,7 @@ static int afs_fill_page(struct afs_vnode *vnode, struct key *key, req->pages[0] = page; get_page(page); - ret = afs_vnode_fetch_data(vnode, key, req); + ret = afs_fetch_data(vnode, key, req); afs_put_read(req); if (ret < 0) { if (ret == -ENOENT) { @@ -337,6 +337,40 @@ static void afs_kill_pages(struct afs_vnode *vnode, bool error, _leave(""); } +/* + * write to a file + */ +static int afs_store_data(struct afs_writeback *wb, pgoff_t first, pgoff_t last, + unsigned offset, unsigned to) +{ + struct afs_fs_cursor fc; + struct afs_vnode *vnode = wb->vnode; + int ret; + + _enter("%s{%x:%u.%u},%x,%lx,%lx,%x,%x", + vnode->volume->name, + vnode->fid.vid, + vnode->fid.vnode, + vnode->fid.unique, + key_serial(wb->key), + first, last, offset, to); + + ret = -ERESTARTSYS; + if (afs_begin_vnode_operation(&fc, vnode, wb->key)) { + while (afs_select_fileserver(&fc)) { + fc.cb_break = vnode->cb_break + vnode->cb_s_break; + afs_fs_store_data(&fc, wb, first, last, offset, to); + } + + afs_check_for_remote_deletion(&fc, fc.vnode); + afs_vnode_commit_status(&fc, vnode, fc.cb_break); + ret = afs_end_vnode_operation(&fc); + } + + _leave(" = %d", ret); + return ret; +} + /* * synchronously write back the locked page and any subsequent non-locked dirty * pages also covered by the same writeback record @@ -420,7 +454,7 @@ no_more: _debug("write back %lx[%u..] to %lx[..%u]", first, offset, last, to); - ret = afs_vnode_store_data(wb, first, last, offset, to); + ret = afs_store_data(wb, first, last, offset, to); if (ret < 0) { switch (ret) { case -EDQUOT: diff --git a/fs/afs/xattr.c b/fs/afs/xattr.c index e58e00ee9747..cfcc674e64a5 100644 --- a/fs/afs/xattr.c +++ b/fs/afs/xattr.c @@ -96,7 +96,7 @@ static int afs_xattr_get_volume(const struct xattr_handler *handler, void *buffer, size_t size) { struct afs_vnode *vnode = AFS_FS_I(inode); - const char *volname = vnode->volume->vlocation->vldb.name; + const char *volname = vnode->volume->name; size_t namelen; namelen = strlen(volname); From bf99a53ce22a29d64d3190093edf52f1d44d53b3 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 2 Nov 2017 15:27:51 +0000 Subject: [PATCH 23/35] afs: Make use of the YFS service upgrade to fully support IPv6 YFS VL servers offer an upgraded Volume Location service that can return IPv6 addresses to fileservers and volume servers in addition to IPv4 addresses using the YFSVL.GetEndpoints operation which we should use if it's available. To this end: (1) Make rxrpc_kernel_recv_data() return the call's current service ID so that the caller can detect service upgrade and see what the service was upgraded to. (2) When we see a VL server address we haven't seen before, send a VL.GetCapabilities operation to it with the service upgrade bit set. If we get an upgrade to the YFS VL service, change the service ID in the address list for that address to use the upgraded service and set a flag to note that this appears to be a YFS-compatible server. (3) If, when a server's addresses are being looked up, we note that we previously detected a YFS-compatible server, then send the YFSVL.GetEndpoints operation rather than VL.GetAddrsU. (4) Build a fileserver address list from the reply of YFSVL.GetEndpoints, including both IPv4 and IPv6 addresses. Volume server addresses are discarded. (5) The address list is sorted by address and port now, instead of just address. This allows multiple servers on the same host sitting on different ports. Signed-off-by: David Howells --- fs/afs/addr_list.c | 54 +++++++- fs/afs/afs_vl.h | 16 +++ fs/afs/internal.h | 10 +- fs/afs/server.c | 5 +- fs/afs/vlclient.c | 337 ++++++++++++++++++++++++++++++++++++++++++++- fs/afs/volume.c | 16 +++ 6 files changed, 428 insertions(+), 10 deletions(-) diff --git a/fs/afs/addr_list.c b/fs/afs/addr_list.c index b91e59a77f0e..a537368ba0db 100644 --- a/fs/afs/addr_list.c +++ b/fs/afs/addr_list.c @@ -17,9 +17,10 @@ #include "internal.h" #include "afs_fs.h" -#define AFS_MAX_ADDRESSES \ - ((unsigned int)((PAGE_SIZE - sizeof(struct afs_addr_list)) / \ - sizeof(struct sockaddr_rxrpc))) +//#define AFS_MAX_ADDRESSES +// ((unsigned int)((PAGE_SIZE - sizeof(struct afs_addr_list)) / +// sizeof(struct sockaddr_rxrpc))) +#define AFS_MAX_ADDRESSES ((unsigned int)(sizeof(unsigned long) * 8)) /* * Release an address list. @@ -230,15 +231,20 @@ struct afs_addr_list *afs_dns_query(struct afs_cell *cell, time64_t *_expiry) /* * Merge an IPv4 entry into a fileserver address list. */ -void afs_merge_fs_addr4(struct afs_addr_list *alist, __be32 xdr) +void afs_merge_fs_addr4(struct afs_addr_list *alist, __be32 xdr, u16 port) { struct sockaddr_in6 *a; + __be16 xport = htons(port); int i; for (i = 0; i < alist->nr_ipv4; i++) { a = &alist->addrs[i].transport.sin6; - if (xdr == a->sin6_addr.s6_addr32[3]) + if (xdr == a->sin6_addr.s6_addr32[3] && + xport == a->sin6_port) return; + if (xdr == a->sin6_addr.s6_addr32[3] && + xport < a->sin6_port) + break; if (xdr < a->sin6_addr.s6_addr32[3]) break; } @@ -249,7 +255,7 @@ void afs_merge_fs_addr4(struct afs_addr_list *alist, __be32 xdr) sizeof(alist->addrs[0]) * (alist->nr_addrs - i)); a = &alist->addrs[i].transport.sin6; - a->sin6_port = htons(AFS_FS_PORT); + a->sin6_port = xport; a->sin6_addr.s6_addr32[0] = 0; a->sin6_addr.s6_addr32[1] = 0; a->sin6_addr.s6_addr32[2] = htonl(0xffff); @@ -258,6 +264,42 @@ void afs_merge_fs_addr4(struct afs_addr_list *alist, __be32 xdr) alist->nr_addrs++; } +/* + * Merge an IPv6 entry into a fileserver address list. + */ +void afs_merge_fs_addr6(struct afs_addr_list *alist, __be32 *xdr, u16 port) +{ + struct sockaddr_in6 *a; + __be16 xport = htons(port); + int i, diff; + + for (i = alist->nr_ipv4; i < alist->nr_addrs; i++) { + a = &alist->addrs[i].transport.sin6; + diff = memcmp(xdr, &a->sin6_addr, 16); + if (diff == 0 && + xport == a->sin6_port) + return; + if (diff == 0 && + xport < a->sin6_port) + break; + if (diff < 0) + break; + } + + if (i < alist->nr_addrs) + memmove(alist->addrs + i + 1, + alist->addrs + i, + sizeof(alist->addrs[0]) * (alist->nr_addrs - i)); + + a = &alist->addrs[i].transport.sin6; + a->sin6_port = xport; + a->sin6_addr.s6_addr32[0] = xdr[0]; + a->sin6_addr.s6_addr32[1] = xdr[1]; + a->sin6_addr.s6_addr32[2] = xdr[2]; + a->sin6_addr.s6_addr32[3] = xdr[3]; + alist->nr_addrs++; +} + /* * Get an address to try. */ diff --git a/fs/afs/afs_vl.h b/fs/afs/afs_vl.h index 6350b417aee9..e3c4688f573b 100644 --- a/fs/afs/afs_vl.h +++ b/fs/afs/afs_vl.h @@ -16,6 +16,7 @@ #define AFS_VL_PORT 7003 /* volume location service port */ #define VL_SERVICE 52 /* RxRPC service ID for the Volume Location service */ +#define YFS_VL_SERVICE 2503 /* Service ID for AuriStor upgraded VL service */ enum AFSVL_Operations { VLGETENTRYBYID = 503, /* AFS Get VLDB entry by ID */ @@ -24,6 +25,8 @@ enum AFSVL_Operations { VLGETENTRYBYIDU = 526, /* AFS Get VLDB entry by ID (UUID-variant) */ VLGETENTRYBYNAMEU = 527, /* AFS Get VLDB entry by name (UUID-variant) */ VLGETADDRSU = 533, /* AFS Get addrs for fileserver */ + YVLGETENDPOINTS = 64002, /* YFS Get endpoints for file/volume server */ + VLGETCAPABILITIES = 65537, /* AFS Get server capabilities */ }; enum AFSVL_Errors { @@ -57,6 +60,19 @@ enum AFSVL_Errors { AFSVL_NOMEM = 363547, /* malloc/realloc failed to alloc enough memory */ }; +enum { + YFS_SERVER_INDEX = 0, + YFS_SERVER_UUID = 1, + YFS_SERVER_ENDPOINT = 2, +}; + +enum { + YFS_ENDPOINT_IPV4 = 0, + YFS_ENDPOINT_IPV6 = 1, +}; + +#define YFS_MAXENDPOINTS 16 + /* * maps to "struct vldbentry" in vvl-spec.pdf */ diff --git a/fs/afs/internal.h b/fs/afs/internal.h index 1fadf40551fd..767317bf33db 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h @@ -70,6 +70,8 @@ struct afs_addr_list { unsigned short nr_addrs; unsigned short index; /* Address currently in use */ unsigned short nr_ipv4; /* Number of IPv4 addresses */ + unsigned long probed; /* Mask of servers that have been probed */ + unsigned long yfs; /* Mask of servers that are YFS */ struct sockaddr_rxrpc addrs[]; }; @@ -113,7 +115,7 @@ struct afs_call { bool async; /* T if asynchronous */ bool ret_reply0; /* T if should return reply[0] on success */ bool upgrade; /* T to request service upgrade */ - u16 service_id; /* RxRPC service ID to call */ + u16 service_id; /* Actual service ID (after upgrade) */ u32 operation_ID; /* operation ID for an incoming call */ u32 count; /* count for use in unmarshalling */ __be32 tmp; /* place to extract temporary data */ @@ -564,7 +566,8 @@ extern bool afs_iterate_addresses(struct afs_addr_cursor *); extern int afs_end_cursor(struct afs_addr_cursor *); extern int afs_set_vl_cursor(struct afs_addr_cursor *, struct afs_cell *); -extern void afs_merge_fs_addr4(struct afs_addr_list *, __be32); +extern void afs_merge_fs_addr4(struct afs_addr_list *, __be32, u16); +extern void afs_merge_fs_addr6(struct afs_addr_list *, __be32 *, u16); /* * cache.c @@ -846,6 +849,9 @@ extern struct afs_vldb_entry *afs_vl_get_entry_by_name_u(struct afs_net *, struct key *, const char *, int); extern struct afs_addr_list *afs_vl_get_addrs_u(struct afs_net *, struct afs_addr_cursor *, struct key *, const uuid_t *); +extern int afs_vl_get_capabilities(struct afs_net *, struct afs_addr_cursor *, struct key *); +extern struct afs_addr_list *afs_yfsvl_get_endpoints(struct afs_net *, struct afs_addr_cursor *, + struct key *, const uuid_t *); /* * volume.c diff --git a/fs/afs/server.c b/fs/afs/server.c index a6c860bcf391..1880f1b6a9f1 100644 --- a/fs/afs/server.c +++ b/fs/afs/server.c @@ -266,7 +266,10 @@ static struct afs_addr_list *afs_vl_lookup_addrs(struct afs_cell *cell, return ERR_PTR(ret); while (afs_iterate_addresses(&ac)) { - alist = afs_vl_get_addrs_u(cell->net, &ac, key, uuid); + if (test_bit(ac.index, &ac.alist->yfs)) + alist = afs_yfsvl_get_endpoints(cell->net, &ac, key, uuid); + else + alist = afs_vl_get_addrs_u(cell->net, &ac, key, uuid); switch (ac.error) { case 0: afs_end_cursor(&ac); diff --git a/fs/afs/vlclient.c b/fs/afs/vlclient.c index 173c652fe875..1d38cbdf6cad 100644 --- a/fs/afs/vlclient.c +++ b/fs/afs/vlclient.c @@ -225,7 +225,7 @@ again: bp = call->buffer; for (i = 0; i < count; i++) if (alist->nr_addrs < call->count2) - afs_merge_fs_addr4(alist, *bp++); + afs_merge_fs_addr4(alist, *bp++, AFS_FS_PORT); call->count -= count; if (call->count > 0) @@ -300,3 +300,338 @@ struct afs_addr_list *afs_vl_get_addrs_u(struct afs_net *net, return (struct afs_addr_list *)afs_make_call(ac, call, GFP_KERNEL, false); } + +/* + * Deliver reply data to an VL.GetCapabilities operation. + */ +static int afs_deliver_vl_get_capabilities(struct afs_call *call) +{ + u32 count; + int ret; + + _enter("{%u,%zu/%u}", call->unmarshall, call->offset, call->count); + +again: + switch (call->unmarshall) { + case 0: + call->offset = 0; + call->unmarshall++; + + /* Extract the capabilities word count */ + case 1: + ret = afs_extract_data(call, &call->tmp, + 1 * sizeof(__be32), + true); + if (ret < 0) + return ret; + + count = ntohl(call->tmp); + + call->count = count; + call->count2 = count; + call->offset = 0; + call->unmarshall++; + + /* Extract capabilities words */ + case 2: + count = min(call->count, 16U); + ret = afs_extract_data(call, call->buffer, + count * sizeof(__be32), + call->count > 16); + if (ret < 0) + return ret; + + /* TODO: Examine capabilities */ + + call->count -= count; + if (call->count > 0) + goto again; + call->offset = 0; + call->unmarshall++; + break; + } + + call->reply[0] = (void *)(unsigned long)call->service_id; + + _leave(" = 0 [done]"); + return 0; +} + +/* + * VL.GetCapabilities operation type + */ +static const struct afs_call_type afs_RXVLGetCapabilities = { + .name = "VL.GetCapabilities", + .deliver = afs_deliver_vl_get_capabilities, + .destructor = afs_flat_call_destructor, +}; + +/* + * Probe a fileserver for the capabilities that it supports. This can + * return up to 196 words. + * + * We use this to probe for service upgrade to determine what the server at the + * other end supports. + */ +int afs_vl_get_capabilities(struct afs_net *net, + struct afs_addr_cursor *ac, + struct key *key) +{ + struct afs_call *call; + __be32 *bp; + + _enter(""); + + call = afs_alloc_flat_call(net, &afs_RXVLGetCapabilities, 1 * 4, 16 * 4); + if (!call) + return -ENOMEM; + + call->key = key; + call->upgrade = true; /* Let's see if this is a YFS server */ + call->reply[0] = (void *)VLGETCAPABILITIES; + call->ret_reply0 = true; + + /* marshall the parameters */ + bp = call->request; + *bp++ = htonl(VLGETCAPABILITIES); + + /* Can't take a ref on server */ + return afs_make_call(ac, call, GFP_KERNEL, false); +} + +/* + * Deliver reply data to a YFSVL.GetEndpoints call. + * + * GetEndpoints(IN yfsServerAttributes *attr, + * OUT opr_uuid *uuid, + * OUT afs_int32 *uniquifier, + * OUT endpoints *fsEndpoints, + * OUT endpoints *volEndpoints) + */ +static int afs_deliver_yfsvl_get_endpoints(struct afs_call *call) +{ + struct afs_addr_list *alist; + __be32 *bp; + u32 uniquifier, size; + int ret; + + _enter("{%u,%zu/%u,%u}", call->unmarshall, call->offset, call->count, call->count2); + +again: + switch (call->unmarshall) { + case 0: + call->offset = 0; + call->unmarshall = 1; + + /* Extract the returned uuid, uniquifier, fsEndpoints count and + * either the first fsEndpoint type or the volEndpoints + * count if there are no fsEndpoints. */ + case 1: + ret = afs_extract_data(call, call->buffer, + sizeof(uuid_t) + + 3 * sizeof(__be32), + true); + if (ret < 0) + return ret; + + bp = call->buffer + sizeof(uuid_t); + uniquifier = ntohl(*bp++); + call->count = ntohl(*bp++); + call->count2 = ntohl(*bp); /* Type or next count */ + + if (call->count > YFS_MAXENDPOINTS) + return -EBADMSG; + + alist = afs_alloc_addrlist(call->count, FS_SERVICE, AFS_FS_PORT); + if (!alist) + return -ENOMEM; + alist->version = uniquifier; + call->reply[0] = alist; + call->offset = 0; + + if (call->count == 0) + goto extract_volendpoints; + + call->unmarshall = 2; + + /* Extract fsEndpoints[] entries */ + case 2: + switch (call->count2) { + case YFS_ENDPOINT_IPV4: + size = sizeof(__be32) * (1 + 1 + 1); + break; + case YFS_ENDPOINT_IPV6: + size = sizeof(__be32) * (1 + 4 + 1); + break; + default: + return -EBADMSG; + } + + size += sizeof(__be32); + ret = afs_extract_data(call, call->buffer, size, true); + if (ret < 0) + return ret; + + alist = call->reply[0]; + bp = call->buffer; + switch (call->count2) { + case YFS_ENDPOINT_IPV4: + if (ntohl(bp[0]) != sizeof(__be32) * 2) + return -EBADMSG; + afs_merge_fs_addr4(alist, bp[1], ntohl(bp[2])); + bp += 3; + break; + case YFS_ENDPOINT_IPV6: + if (ntohl(bp[0]) != sizeof(__be32) * 5) + return -EBADMSG; + afs_merge_fs_addr6(alist, bp + 1, ntohl(bp[5])); + bp += 6; + break; + default: + return -EBADMSG; + } + + /* Got either the type of the next entry or the count of + * volEndpoints if no more fsEndpoints. + */ + call->count2 = htonl(*bp++); + + call->offset = 0; + call->count--; + if (call->count > 0) + goto again; + + extract_volendpoints: + /* Extract the list of volEndpoints. */ + call->count = call->count2; + if (!call->count) + goto end; + if (call->count > YFS_MAXENDPOINTS) + return -EBADMSG; + + call->unmarshall = 3; + + /* Extract the type of volEndpoints[0]. Normally we would + * extract the type of the next endpoint when we extract the + * data of the current one, but this is the first... + */ + case 3: + ret = afs_extract_data(call, call->buffer, sizeof(__be32), true); + if (ret < 0) + return ret; + + bp = call->buffer; + call->count2 = htonl(*bp++); + call->offset = 0; + call->unmarshall = 4; + + /* Extract volEndpoints[] entries */ + case 4: + switch (call->count2) { + case YFS_ENDPOINT_IPV4: + size = sizeof(__be32) * (1 + 1 + 1); + break; + case YFS_ENDPOINT_IPV6: + size = sizeof(__be32) * (1 + 4 + 1); + break; + default: + return -EBADMSG; + } + + if (call->count > 1) + size += sizeof(__be32); + ret = afs_extract_data(call, call->buffer, size, true); + if (ret < 0) + return ret; + + bp = call->buffer; + switch (call->count2) { + case YFS_ENDPOINT_IPV4: + if (ntohl(bp[0]) != sizeof(__be32) * 2) + return -EBADMSG; + bp += 3; + break; + case YFS_ENDPOINT_IPV6: + if (ntohl(bp[0]) != sizeof(__be32) * 5) + return -EBADMSG; + bp += 6; + break; + default: + return -EBADMSG; + } + + /* Got either the type of the next entry or the count of + * volEndpoints if no more fsEndpoints. + */ + call->offset = 0; + call->count--; + if (call->count > 0) { + call->count2 = htonl(*bp++); + goto again; + } + + end: + call->unmarshall = 5; + + /* Done */ + case 5: + ret = afs_extract_data(call, call->buffer, 0, false); + if (ret < 0) + return ret; + call->unmarshall = 6; + + case 6: + break; + } + + alist = call->reply[0]; + + /* Start with IPv6 if available. */ + if (alist->nr_ipv4 < alist->nr_addrs) + alist->index = alist->nr_ipv4; + + _leave(" = 0 [done]"); + return 0; +} + +/* + * YFSVL.GetEndpoints operation type. + */ +static const struct afs_call_type afs_YFSVLGetEndpoints = { + .name = "VL.GetEndpoints", + .deliver = afs_deliver_yfsvl_get_endpoints, + .destructor = afs_vl_get_addrs_u_destructor, +}; + +/* + * Dispatch an operation to get the addresses for a server, where the server is + * nominated by UUID. + */ +struct afs_addr_list *afs_yfsvl_get_endpoints(struct afs_net *net, + struct afs_addr_cursor *ac, + struct key *key, + const uuid_t *uuid) +{ + struct afs_call *call; + __be32 *bp; + + _enter(""); + + call = afs_alloc_flat_call(net, &afs_YFSVLGetEndpoints, + sizeof(__be32) * 2 + sizeof(*uuid), + sizeof(struct in6_addr) + sizeof(__be32) * 3); + if (!call) + return ERR_PTR(-ENOMEM); + + call->key = key; + call->reply[0] = NULL; + call->ret_reply0 = true; + + /* Marshall the parameters */ + bp = call->request; + *bp++ = htonl(YVLGETENDPOINTS); + *bp++ = htonl(YFS_SERVER_UUID); + memcpy(bp, uuid, sizeof(*uuid)); /* Type opr_uuid */ + + return (struct afs_addr_list *)afs_make_call(ac, call, GFP_KERNEL, false); +} diff --git a/fs/afs/volume.c b/fs/afs/volume.c index 2295dd4f9b15..684c48293353 100644 --- a/fs/afs/volume.c +++ b/fs/afs/volume.c @@ -124,6 +124,22 @@ static struct afs_vldb_entry *afs_vl_lookup_vldb(struct afs_cell *cell, return ERR_PTR(ret); while (afs_iterate_addresses(&ac)) { + if (!test_bit(ac.index, &ac.alist->probed)) { + ret = afs_vl_get_capabilities(cell->net, &ac, key); + switch (ret) { + case VL_SERVICE: + clear_bit(ac.index, &ac.alist->yfs); + set_bit(ac.index, &ac.alist->probed); + ac.addr->srx_service = ret; + break; + case YFS_VL_SERVICE: + set_bit(ac.index, &ac.alist->yfs); + set_bit(ac.index, &ac.alist->probed); + ac.addr->srx_service = ret; + break; + } + } + vldb = afs_vl_get_entry_by_name_u(cell->net, &ac, key, volname, volnamesz); switch (ac.error) { From 5f0fc8ba6a1eec510a1e43def48697985d948a2c Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 2 Nov 2017 15:27:51 +0000 Subject: [PATCH 24/35] afs: Only progress call state at end of Tx phase from rxrpc callback Only progress the AFS call state at the end of Tx phase from the callback passed to rxrpc_kernel_send_data() rather than setting it before the last data send call. Signed-off-by: David Howells --- fs/afs/rxrpc.c | 9 --------- 1 file changed, 9 deletions(-) diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c index 1bbd5854507d..21f43d3acb91 100644 --- a/fs/afs/rxrpc.c +++ b/fs/afs/rxrpc.c @@ -381,13 +381,6 @@ long afs_make_call(struct afs_addr_cursor *ac, struct afs_call *call, msg.msg_controllen = 0; msg.msg_flags = MSG_WAITALL | (call->send_pages ? MSG_MORE : 0); - /* We have to change the state *before* sending the last packet as - * rxrpc might give us the reply before it returns from sending the - * request. Further, if the send fails, we may already have been given - * a notification and may have collected it. - */ - if (!call->send_pages) - call->state = AFS_CALL_AWAIT_REPLY; ret = rxrpc_kernel_send_data(call->net->socket, rxcall, &msg, call->request_size, afs_notify_end_request_tx); @@ -799,7 +792,6 @@ void afs_send_empty_reply(struct afs_call *call) msg.msg_controllen = 0; msg.msg_flags = 0; - call->state = AFS_CALL_AWAIT_ACK; switch (rxrpc_kernel_send_data(net->socket, call->rxcall, &msg, 0, afs_notify_end_reply_tx)) { case 0: @@ -839,7 +831,6 @@ void afs_send_simple_reply(struct afs_call *call, const void *buf, size_t len) msg.msg_controllen = 0; msg.msg_flags = 0; - call->state = AFS_CALL_AWAIT_ACK; n = rxrpc_kernel_send_data(net->socket, call->rxcall, &msg, len, afs_notify_end_reply_tx); if (n >= 0) { From 1199db603511d7463d9d3840f96f61967affc766 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 2 Nov 2017 15:27:51 +0000 Subject: [PATCH 25/35] afs: Fix total-length calculation for multiple-page send Fix the total-length calculation in afs_make_call() when the operation being dispatched has data from a series of pages attached. Despite the patched code looking like that it should reduce mathematically to the current code, it doesn't because the 32-bit unsigned arithmetic being used to calculate the page-offset-difference doesn't correctly extend to a 64-bit value when the result is effectively negative. Without this, some FS.StoreData operations that span multiple pages fail, reporting too little or too much data. Signed-off-by: David Howells --- fs/afs/rxrpc.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c index 21f43d3acb91..1d075696bf55 100644 --- a/fs/afs/rxrpc.c +++ b/fs/afs/rxrpc.c @@ -350,8 +350,17 @@ long afs_make_call(struct afs_addr_cursor *ac, struct afs_call *call, */ tx_total_len = call->request_size; if (call->send_pages) { - tx_total_len += call->last_to - call->first_offset; - tx_total_len += (call->last - call->first) * PAGE_SIZE; + if (call->last == call->first) { + tx_total_len += call->last_to - call->first_offset; + } else { + /* It looks mathematically like you should be able to + * combine the following lines with the ones above, but + * unsigned arithmetic is fun when it wraps... + */ + tx_total_len += PAGE_SIZE - call->first_offset; + tx_total_len += call->last_to; + tx_total_len += (call->last - call->first - 1) * PAGE_SIZE; + } } /* create a call */ From becfcc7e576eed03b93f412769573c93de550527 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 2 Nov 2017 15:27:51 +0000 Subject: [PATCH 26/35] afs: Fix documentation on # vs % prefix in mount source specification The documentation that describes the #-prefix and the %-prefix used when specifying the source to mount is has the descriptions the wrong way round. Switch them over. Reported-by: Marc Dionne Signed-off-by: David Howells --- Documentation/filesystems/afs.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Documentation/filesystems/afs.txt b/Documentation/filesystems/afs.txt index 060da408923b..ba99b5ac4fd8 100644 --- a/Documentation/filesystems/afs.txt +++ b/Documentation/filesystems/afs.txt @@ -91,8 +91,8 @@ Filesystems can be mounted anywhere by commands similar to the following: mount -t afs "#root.cell." /afs/cambridge Where the initial character is either a hash or a percent symbol depending on -whether you definitely want a R/W volume (hash) or whether you'd prefer a R/O -volume, but are willing to use a R/W volume instead (percent). +whether you definitely want a R/W volume (percent) or whether you'd prefer a +R/O volume, but are willing to use a R/W volume instead (hash). The name of the volume can be suffixes with ".backup" or ".readonly" to specify connection to only volumes of those types. From 025db80c9e421efef11f2b83b7f78a11476f06db Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 2 Nov 2017 15:27:51 +0000 Subject: [PATCH 27/35] afs: Trace the initiation and completion of client calls Add tracepoints to trace the initiation and completion of client calls within the kafs filesystem. The afs_make_vl_call tracepoint watches calls to the volume location database server. The afs_make_fs_call tracepoint watches calls to the file server. The afs_call_done tracepoint watches for call completion. Signed-off-by: David Howells --- fs/afs/fsclient.c | 75 ++++++++++++++++---- fs/afs/internal.h | 1 + fs/afs/rxrpc.c | 25 +++++-- fs/afs/vlclient.c | 10 ++- include/trace/events/afs.h | 142 +++++++++++++++++++++++++++++++++++++ 5 files changed, 233 insertions(+), 20 deletions(-) diff --git a/fs/afs/fsclient.c b/fs/afs/fsclient.c index 72ff3679fa2a..10e5ead629c2 100644 --- a/fs/afs/fsclient.c +++ b/fs/afs/fsclient.c @@ -16,6 +16,8 @@ #include "internal.h" #include "afs_fs.h" +static const struct afs_fid afs_zero_fid; + /* * We need somewhere to discard into in case the server helpfully returns more * than we asked for in FS.FetchData{,64}. @@ -299,6 +301,7 @@ static int afs_deliver_fs_fetch_status(struct afs_call *call) */ static const struct afs_call_type afs_RXFSFetchStatus = { .name = "FS.FetchStatus", + .op = afs_FS_FetchStatus, .deliver = afs_deliver_fs_fetch_status, .destructor = afs_flat_call_destructor, }; @@ -335,6 +338,7 @@ int afs_fs_fetch_file_status(struct afs_fs_cursor *fc, struct afs_volsync *volsy call->cb_break = fc->cb_break; afs_use_fs_server(call, fc->cbi); + trace_afs_make_fs_call(call, &vnode->fid); return afs_make_call(&fc->ac, call, GFP_NOFS, false); } @@ -497,12 +501,14 @@ static void afs_fetch_data_destructor(struct afs_call *call) */ static const struct afs_call_type afs_RXFSFetchData = { .name = "FS.FetchData", + .op = afs_FS_FetchData, .deliver = afs_deliver_fs_fetch_data, .destructor = afs_fetch_data_destructor, }; static const struct afs_call_type afs_RXFSFetchData64 = { .name = "FS.FetchData64", + .op = afs_FS_FetchData64, .deliver = afs_deliver_fs_fetch_data, .destructor = afs_fetch_data_destructor, }; @@ -527,7 +533,6 @@ static int afs_fs_fetch_data64(struct afs_fs_cursor *fc, struct afs_read *req) call->reply[0] = vnode; call->reply[1] = NULL; /* volsync */ call->reply[2] = req; - call->operation_ID = FSFETCHDATA64; /* marshall the parameters */ bp = call->request; @@ -543,6 +548,7 @@ static int afs_fs_fetch_data64(struct afs_fs_cursor *fc, struct afs_read *req) atomic_inc(&req->usage); call->cb_break = fc->cb_break; afs_use_fs_server(call, fc->cbi); + trace_afs_make_fs_call(call, &vnode->fid); return afs_make_call(&fc->ac, call, GFP_NOFS, false); } @@ -571,7 +577,6 @@ int afs_fs_fetch_data(struct afs_fs_cursor *fc, struct afs_read *req) call->reply[0] = vnode; call->reply[1] = NULL; /* volsync */ call->reply[2] = req; - call->operation_ID = FSFETCHDATA; /* marshall the parameters */ bp = call->request; @@ -585,6 +590,7 @@ int afs_fs_fetch_data(struct afs_fs_cursor *fc, struct afs_read *req) atomic_inc(&req->usage); call->cb_break = fc->cb_break; afs_use_fs_server(call, fc->cbi); + trace_afs_make_fs_call(call, &vnode->fid); return afs_make_call(&fc->ac, call, GFP_NOFS, false); } @@ -618,8 +624,16 @@ static int afs_deliver_fs_create_vnode(struct afs_call *call) /* * FS.CreateFile and FS.MakeDir operation type */ -static const struct afs_call_type afs_RXFSCreateXXXX = { - .name = "FS.CreateXXXX", +static const struct afs_call_type afs_RXFSCreateFile = { + .name = "FS.CreateFile", + .op = afs_FS_CreateFile, + .deliver = afs_deliver_fs_create_vnode, + .destructor = afs_flat_call_destructor, +}; + +static const struct afs_call_type afs_RXFSMakeDir = { + .name = "FS.MakeDir", + .op = afs_FS_MakeDir, .deliver = afs_deliver_fs_create_vnode, .destructor = afs_flat_call_destructor, }; @@ -646,8 +660,9 @@ int afs_fs_create(struct afs_fs_cursor *fc, padsz = (4 - (namesz & 3)) & 3; reqsz = (5 * 4) + namesz + padsz + (6 * 4); - call = afs_alloc_flat_call(net, &afs_RXFSCreateXXXX, reqsz, - (3 + 21 + 21 + 3 + 6) * 4); + call = afs_alloc_flat_call( + net, S_ISDIR(mode) ? &afs_RXFSMakeDir : &afs_RXFSCreateFile, + reqsz, (3 + 21 + 21 + 3 + 6) * 4); if (!call) return -ENOMEM; @@ -678,6 +693,7 @@ int afs_fs_create(struct afs_fs_cursor *fc, *bp++ = 0; /* segment size */ afs_use_fs_server(call, fc->cbi); + trace_afs_make_fs_call(call, &vnode->fid); return afs_make_call(&fc->ac, call, GFP_NOFS, false); } @@ -708,8 +724,16 @@ static int afs_deliver_fs_remove(struct afs_call *call) /* * FS.RemoveDir/FS.RemoveFile operation type */ -static const struct afs_call_type afs_RXFSRemoveXXXX = { - .name = "FS.RemoveXXXX", +static const struct afs_call_type afs_RXFSRemoveFile = { + .name = "FS.RemoveFile", + .op = afs_FS_RemoveFile, + .deliver = afs_deliver_fs_remove, + .destructor = afs_flat_call_destructor, +}; + +static const struct afs_call_type afs_RXFSRemoveDir = { + .name = "FS.RemoveDir", + .op = afs_FS_RemoveDir, .deliver = afs_deliver_fs_remove, .destructor = afs_flat_call_destructor, }; @@ -731,7 +755,9 @@ int afs_fs_remove(struct afs_fs_cursor *fc, const char *name, bool isdir) padsz = (4 - (namesz & 3)) & 3; reqsz = (5 * 4) + namesz + padsz; - call = afs_alloc_flat_call(net, &afs_RXFSRemoveXXXX, reqsz, (21 + 6) * 4); + call = afs_alloc_flat_call( + net, isdir ? &afs_RXFSRemoveDir : &afs_RXFSRemoveFile, + reqsz, (21 + 6) * 4); if (!call) return -ENOMEM; @@ -753,6 +779,7 @@ int afs_fs_remove(struct afs_fs_cursor *fc, const char *name, bool isdir) } afs_use_fs_server(call, fc->cbi); + trace_afs_make_fs_call(call, &vnode->fid); return afs_make_call(&fc->ac, call, GFP_NOFS, false); } @@ -786,6 +813,7 @@ static int afs_deliver_fs_link(struct afs_call *call) */ static const struct afs_call_type afs_RXFSLink = { .name = "FS.Link", + .op = afs_FS_Link, .deliver = afs_deliver_fs_link, .destructor = afs_flat_call_destructor, }; @@ -834,6 +862,7 @@ int afs_fs_link(struct afs_fs_cursor *fc, struct afs_vnode *vnode, *bp++ = htonl(vnode->fid.unique); afs_use_fs_server(call, fc->cbi); + trace_afs_make_fs_call(call, &vnode->fid); return afs_make_call(&fc->ac, call, GFP_NOFS, false); } @@ -868,6 +897,7 @@ static int afs_deliver_fs_symlink(struct afs_call *call) */ static const struct afs_call_type afs_RXFSSymlink = { .name = "FS.Symlink", + .op = afs_FS_Symlink, .deliver = afs_deliver_fs_symlink, .destructor = afs_flat_call_destructor, }; @@ -935,6 +965,7 @@ int afs_fs_symlink(struct afs_fs_cursor *fc, *bp++ = 0; /* segment size */ afs_use_fs_server(call, fc->cbi); + trace_afs_make_fs_call(call, &vnode->fid); return afs_make_call(&fc->ac, call, GFP_NOFS, false); } @@ -970,6 +1001,7 @@ static int afs_deliver_fs_rename(struct afs_call *call) */ static const struct afs_call_type afs_RXFSRename = { .name = "FS.Rename", + .op = afs_FS_Rename, .deliver = afs_deliver_fs_rename, .destructor = afs_flat_call_destructor, }; @@ -1035,6 +1067,7 @@ int afs_fs_rename(struct afs_fs_cursor *fc, } afs_use_fs_server(call, fc->cbi); + trace_afs_make_fs_call(call, &orig_dvnode->fid); return afs_make_call(&fc->ac, call, GFP_NOFS, false); } @@ -1070,12 +1103,14 @@ static int afs_deliver_fs_store_data(struct afs_call *call) */ static const struct afs_call_type afs_RXFSStoreData = { .name = "FS.StoreData", + .op = afs_FS_StoreData, .deliver = afs_deliver_fs_store_data, .destructor = afs_flat_call_destructor, }; static const struct afs_call_type afs_RXFSStoreData64 = { .name = "FS.StoreData64", + .op = afs_FS_StoreData64, .deliver = afs_deliver_fs_store_data, .destructor = afs_flat_call_destructor, }; @@ -1135,6 +1170,7 @@ static int afs_fs_store_data64(struct afs_fs_cursor *fc, *bp++ = htonl(i_size >> 32); *bp++ = htonl((u32) i_size); + trace_afs_make_fs_call(call, &vnode->fid); return afs_make_call(&fc->ac, call, GFP_NOFS, false); } @@ -1208,6 +1244,7 @@ int afs_fs_store_data(struct afs_fs_cursor *fc, struct afs_writeback *wb, *bp++ = htonl(i_size); afs_use_fs_server(call, fc->cbi); + trace_afs_make_fs_call(call, &vnode->fid); return afs_make_call(&fc->ac, call, GFP_NOFS, false); } @@ -1245,18 +1282,21 @@ static int afs_deliver_fs_store_status(struct afs_call *call) */ static const struct afs_call_type afs_RXFSStoreStatus = { .name = "FS.StoreStatus", + .op = afs_FS_StoreStatus, .deliver = afs_deliver_fs_store_status, .destructor = afs_flat_call_destructor, }; static const struct afs_call_type afs_RXFSStoreData_as_Status = { .name = "FS.StoreData", + .op = afs_FS_StoreData, .deliver = afs_deliver_fs_store_status, .destructor = afs_flat_call_destructor, }; static const struct afs_call_type afs_RXFSStoreData64_as_Status = { .name = "FS.StoreData64", + .op = afs_FS_StoreData64, .deliver = afs_deliver_fs_store_status, .destructor = afs_flat_call_destructor, }; @@ -1286,7 +1326,6 @@ static int afs_fs_setattr_size64(struct afs_fs_cursor *fc, struct iattr *attr) call->key = fc->key; call->reply[0] = vnode; call->store_version = vnode->status.data_version + 1; - call->operation_ID = FSSTOREDATA; /* marshall the parameters */ bp = call->request; @@ -1305,6 +1344,7 @@ static int afs_fs_setattr_size64(struct afs_fs_cursor *fc, struct iattr *attr) *bp++ = htonl((u32) attr->ia_size); afs_use_fs_server(call, fc->cbi); + trace_afs_make_fs_call(call, &vnode->fid); return afs_make_call(&fc->ac, call, GFP_NOFS, false); } @@ -1335,7 +1375,6 @@ static int afs_fs_setattr_size(struct afs_fs_cursor *fc, struct iattr *attr) call->key = fc->key; call->reply[0] = vnode; call->store_version = vnode->status.data_version + 1; - call->operation_ID = FSSTOREDATA; /* marshall the parameters */ bp = call->request; @@ -1351,6 +1390,7 @@ static int afs_fs_setattr_size(struct afs_fs_cursor *fc, struct iattr *attr) *bp++ = htonl(attr->ia_size); /* new file length */ afs_use_fs_server(call, fc->cbi); + trace_afs_make_fs_call(call, &vnode->fid); return afs_make_call(&fc->ac, call, GFP_NOFS, false); } @@ -1379,7 +1419,6 @@ int afs_fs_setattr(struct afs_fs_cursor *fc, struct iattr *attr) call->key = fc->key; call->reply[0] = vnode; - call->operation_ID = FSSTORESTATUS; /* marshall the parameters */ bp = call->request; @@ -1391,6 +1430,7 @@ int afs_fs_setattr(struct afs_fs_cursor *fc, struct iattr *attr) xdr_encode_AFS_StoreStatus(&bp, attr); afs_use_fs_server(call, fc->cbi); + trace_afs_make_fs_call(call, &vnode->fid); return afs_make_call(&fc->ac, call, GFP_NOFS, false); } @@ -1581,6 +1621,7 @@ static void afs_get_volume_status_call_destructor(struct afs_call *call) */ static const struct afs_call_type afs_RXFSGetVolumeStatus = { .name = "FS.GetVolumeStatus", + .op = afs_FS_GetVolumeStatus, .deliver = afs_deliver_fs_get_volume_status, .destructor = afs_get_volume_status_call_destructor, }; @@ -1620,6 +1661,7 @@ int afs_fs_get_volume_status(struct afs_fs_cursor *fc, bp[1] = htonl(vnode->fid.vid); afs_use_fs_server(call, fc->cbi); + trace_afs_make_fs_call(call, &vnode->fid); return afs_make_call(&fc->ac, call, GFP_NOFS, false); } @@ -1650,6 +1692,7 @@ static int afs_deliver_fs_xxxx_lock(struct afs_call *call) */ static const struct afs_call_type afs_RXFSSetLock = { .name = "FS.SetLock", + .op = afs_FS_SetLock, .deliver = afs_deliver_fs_xxxx_lock, .destructor = afs_flat_call_destructor, }; @@ -1659,6 +1702,7 @@ static const struct afs_call_type afs_RXFSSetLock = { */ static const struct afs_call_type afs_RXFSExtendLock = { .name = "FS.ExtendLock", + .op = afs_FS_ExtendLock, .deliver = afs_deliver_fs_xxxx_lock, .destructor = afs_flat_call_destructor, }; @@ -1668,6 +1712,7 @@ static const struct afs_call_type afs_RXFSExtendLock = { */ static const struct afs_call_type afs_RXFSReleaseLock = { .name = "FS.ReleaseLock", + .op = afs_FS_ReleaseLock, .deliver = afs_deliver_fs_xxxx_lock, .destructor = afs_flat_call_destructor, }; @@ -1700,6 +1745,7 @@ int afs_fs_set_lock(struct afs_fs_cursor *fc, afs_lock_type_t type) *bp++ = htonl(type); afs_use_fs_server(call, fc->cbi); + trace_afs_make_fs_call(call, &vnode->fid); return afs_make_call(&fc->ac, call, GFP_NOFS, false); } @@ -1730,6 +1776,7 @@ int afs_fs_extend_lock(struct afs_fs_cursor *fc) *bp++ = htonl(vnode->fid.unique); afs_use_fs_server(call, fc->cbi); + trace_afs_make_fs_call(call, &vnode->fid); return afs_make_call(&fc->ac, call, GFP_NOFS, false); } @@ -1760,6 +1807,7 @@ int afs_fs_release_lock(struct afs_fs_cursor *fc) *bp++ = htonl(vnode->fid.unique); afs_use_fs_server(call, fc->cbi); + trace_afs_make_fs_call(call, &vnode->fid); return afs_make_call(&fc->ac, call, GFP_NOFS, false); } @@ -1776,6 +1824,7 @@ static int afs_deliver_fs_give_up_all_callbacks(struct afs_call *call) */ static const struct afs_call_type afs_RXFSGiveUpAllCallBacks = { .name = "FS.GiveUpAllCallBacks", + .op = afs_FS_GiveUpAllCallBacks, .deliver = afs_deliver_fs_give_up_all_callbacks, .destructor = afs_flat_call_destructor, }; @@ -1866,6 +1915,7 @@ again: */ static const struct afs_call_type afs_RXFSGetCapabilities = { .name = "FS.GetCapabilities", + .op = afs_FS_GetCapabilities, .deliver = afs_deliver_fs_get_capabilities, .destructor = afs_flat_call_destructor, }; @@ -1895,5 +1945,6 @@ int afs_fs_get_capabilities(struct afs_net *net, *bp++ = htonl(FSGETCAPABILITIES); /* Can't take a ref on server */ + trace_afs_make_fs_call(call, NULL); return afs_make_call(ac, call, GFP_NOFS, false); } diff --git a/fs/afs/internal.h b/fs/afs/internal.h index 767317bf33db..aad12546e0ea 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h @@ -124,6 +124,7 @@ struct afs_call { struct afs_call_type { const char *name; + unsigned int op; /* Really enum afs_fs_operation */ /* deliver request or reply data to an call * - returning an error will cause the call to be aborted diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c index 1d075696bf55..59cc58022c4d 100644 --- a/fs/afs/rxrpc.c +++ b/fs/afs/rxrpc.c @@ -219,6 +219,7 @@ struct afs_call *afs_alloc_flat_call(struct afs_net *net, goto nomem_free; } + call->operation_ID = type->op; init_waitqueue_head(&call->waitq); return call; @@ -422,6 +423,8 @@ error_do_abort: ac->abort_code = call->abort_code; ac->responded = true; } + call->error = ret; + trace_afs_call_done(call); error_kill_call: afs_put_call(call); ac->error = ret; @@ -455,10 +458,10 @@ static void afs_deliver_to_call(struct afs_call *call) if (ret == -EINPROGRESS || ret == -EAGAIN) return; - if (ret == 1 || ret < 0) { - call->state = AFS_CALL_COMPLETE; - goto done; - } + if (ret < 0) + call->error = ret; + if (ret < 0 || ret == 1) + goto call_complete; return; } @@ -466,7 +469,7 @@ static void afs_deliver_to_call(struct afs_call *call) switch (ret) { case 0: if (call->state == AFS_CALL_AWAIT_REPLY) - call->state = AFS_CALL_COMPLETE; + goto call_complete; goto done; case -EINPROGRESS: case -EAGAIN: @@ -505,7 +508,11 @@ out: save_error: call->error = ret; - call->state = AFS_CALL_COMPLETE; +call_complete: + if (call->state != AFS_CALL_COMPLETE) { + call->state = AFS_CALL_COMPLETE; + trace_afs_call_done(call); + } goto done; } @@ -567,8 +574,10 @@ static long afs_wait_for_call_to_complete(struct afs_call *call, if (call->state < AFS_CALL_COMPLETE) { _debug("call interrupted"); if (rxrpc_kernel_abort_call(call->net->socket, call->rxcall, - RX_USER_ABORT, -EINTR, "KWI")) + RX_USER_ABORT, -EINTR, "KWI")) { call->error = -ERESTARTSYS; + trace_afs_call_done(call); + } } ac->abort_code = call->abort_code; @@ -882,6 +891,7 @@ int afs_extract_data(struct afs_call *call, void *buf, size_t count, switch (call->state) { case AFS_CALL_AWAIT_REPLY: call->state = AFS_CALL_COMPLETE; + trace_afs_call_done(call); break; case AFS_CALL_AWAIT_REQUEST: call->state = AFS_CALL_REPLYING; @@ -894,5 +904,6 @@ int afs_extract_data(struct afs_call *call, void *buf, size_t count, call->error = ret; call->state = AFS_CALL_COMPLETE; + trace_afs_call_done(call); return ret; } diff --git a/fs/afs/vlclient.c b/fs/afs/vlclient.c index 1d38cbdf6cad..e372f89fd36a 100644 --- a/fs/afs/vlclient.c +++ b/fs/afs/vlclient.c @@ -114,6 +114,7 @@ static void afs_destroy_vl_get_entry_by_name_u(struct afs_call *call) */ static const struct afs_call_type afs_RXVLGetEntryByNameU = { .name = "VL.GetEntryByNameU", + .op = afs_VL_GetEntryByNameU, .deliver = afs_deliver_vl_get_entry_by_name_u, .destructor = afs_destroy_vl_get_entry_by_name_u, }; @@ -161,6 +162,7 @@ struct afs_vldb_entry *afs_vl_get_entry_by_name_u(struct afs_net *net, if (padsz > 0) memset((void *)bp + volnamesz, 0, padsz); + trace_afs_make_vl_call(call); return (struct afs_vldb_entry *)afs_make_call(ac, call, GFP_KERNEL, false); } @@ -251,6 +253,7 @@ static void afs_vl_get_addrs_u_destructor(struct afs_call *call) */ static const struct afs_call_type afs_RXVLGetAddrsU = { .name = "VL.GetAddrsU", + .op = afs_VL_GetAddrsU, .deliver = afs_deliver_vl_get_addrs_u, .destructor = afs_vl_get_addrs_u_destructor, }; @@ -298,6 +301,7 @@ struct afs_addr_list *afs_vl_get_addrs_u(struct afs_net *net, for (i = 0; i < 6; i++) r->uuid.node[i] = ntohl(u->node[i]); + trace_afs_make_vl_call(call); return (struct afs_addr_list *)afs_make_call(ac, call, GFP_KERNEL, false); } @@ -362,6 +366,7 @@ again: */ static const struct afs_call_type afs_RXVLGetCapabilities = { .name = "VL.GetCapabilities", + .op = afs_VL_GetCapabilities, .deliver = afs_deliver_vl_get_capabilities, .destructor = afs_flat_call_destructor, }; @@ -396,6 +401,7 @@ int afs_vl_get_capabilities(struct afs_net *net, *bp++ = htonl(VLGETCAPABILITIES); /* Can't take a ref on server */ + trace_afs_make_vl_call(call); return afs_make_call(ac, call, GFP_KERNEL, false); } @@ -598,7 +604,8 @@ again: * YFSVL.GetEndpoints operation type. */ static const struct afs_call_type afs_YFSVLGetEndpoints = { - .name = "VL.GetEndpoints", + .name = "YFSVL.GetEndpoints", + .op = afs_YFSVL_GetEndpoints, .deliver = afs_deliver_yfsvl_get_endpoints, .destructor = afs_vl_get_addrs_u_destructor, }; @@ -633,5 +640,6 @@ struct afs_addr_list *afs_yfsvl_get_endpoints(struct afs_net *net, *bp++ = htonl(YFS_SERVER_UUID); memcpy(bp, uuid, sizeof(*uuid)); /* Type opr_uuid */ + trace_afs_make_vl_call(call); return (struct afs_addr_list *)afs_make_call(ac, call, GFP_KERNEL, false); } diff --git a/include/trace/events/afs.h b/include/trace/events/afs.h index 8b95c16b7045..dd9d2600ee98 100644 --- a/include/trace/events/afs.h +++ b/include/trace/events/afs.h @@ -30,6 +30,38 @@ enum afs_call_trace { afs_call_trace_work, }; +enum afs_fs_operation { + afs_FS_FetchData = 130, /* AFS Fetch file data */ + afs_FS_FetchStatus = 132, /* AFS Fetch file status */ + afs_FS_StoreData = 133, /* AFS Store file data */ + afs_FS_StoreStatus = 135, /* AFS Store file status */ + afs_FS_RemoveFile = 136, /* AFS Remove a file */ + afs_FS_CreateFile = 137, /* AFS Create a file */ + afs_FS_Rename = 138, /* AFS Rename or move a file or directory */ + afs_FS_Symlink = 139, /* AFS Create a symbolic link */ + afs_FS_Link = 140, /* AFS Create a hard link */ + afs_FS_MakeDir = 141, /* AFS Create a directory */ + afs_FS_RemoveDir = 142, /* AFS Remove a directory */ + afs_FS_GetVolumeInfo = 148, /* AFS Get information about a volume */ + afs_FS_GetVolumeStatus = 149, /* AFS Get volume status information */ + afs_FS_GetRootVolume = 151, /* AFS Get root volume name */ + afs_FS_SetLock = 156, /* AFS Request a file lock */ + afs_FS_ExtendLock = 157, /* AFS Extend a file lock */ + afs_FS_ReleaseLock = 158, /* AFS Release a file lock */ + afs_FS_Lookup = 161, /* AFS lookup file in directory */ + afs_FS_FetchData64 = 65537, /* AFS Fetch file data */ + afs_FS_StoreData64 = 65538, /* AFS Store file data */ + afs_FS_GiveUpAllCallBacks = 65539, /* AFS Give up all our callbacks on a server */ + afs_FS_GetCapabilities = 65540, /* AFS Get FS server capabilities */ +}; + +enum afs_vl_operation { + afs_VL_GetEntryByNameU = 527, /* AFS Get Vol Entry By Name operation ID */ + afs_VL_GetAddrsU = 533, /* AFS Get FS server addresses */ + afs_YFSVL_GetEndpoints = 64002, /* YFS Get FS & Vol server addresses */ + afs_VL_GetCapabilities = 65537, /* AFS Get VL server capabilities */ +}; + #endif /* end __AFS_DECLARE_TRACE_ENUMS_ONCE_ONLY */ /* @@ -42,6 +74,37 @@ enum afs_call_trace { EM(afs_call_trace_wake, "WAKE ") \ E_(afs_call_trace_work, "WORK ") +#define afs_fs_operations \ + EM(afs_FS_FetchData, "FS.FetchData") \ + EM(afs_FS_FetchStatus, "FS.FetchStatus") \ + EM(afs_FS_StoreData, "FS.StoreData") \ + EM(afs_FS_StoreStatus, "FS.StoreStatus") \ + EM(afs_FS_RemoveFile, "FS.RemoveFile") \ + EM(afs_FS_CreateFile, "FS.CreateFile") \ + EM(afs_FS_Rename, "FS.Rename") \ + EM(afs_FS_Symlink, "FS.Symlink") \ + EM(afs_FS_Link, "FS.Link") \ + EM(afs_FS_MakeDir, "FS.MakeDir") \ + EM(afs_FS_RemoveDir, "FS.RemoveDir") \ + EM(afs_FS_GetVolumeInfo, "FS.GetVolumeInfo") \ + EM(afs_FS_GetVolumeStatus, "FS.GetVolumeStatus") \ + EM(afs_FS_GetRootVolume, "FS.GetRootVolume") \ + EM(afs_FS_SetLock, "FS.SetLock") \ + EM(afs_FS_ExtendLock, "FS.ExtendLock") \ + EM(afs_FS_ReleaseLock, "FS.ReleaseLock") \ + EM(afs_FS_Lookup, "FS.Lookup") \ + EM(afs_FS_FetchData64, "FS.FetchData64") \ + EM(afs_FS_StoreData64, "FS.StoreData64") \ + EM(afs_FS_GiveUpAllCallBacks, "FS.GiveUpAllCallBacks") \ + E_(afs_FS_GetCapabilities, "FS.GetCapabilities") + +#define afs_vl_operations \ + EM(afs_VL_GetEntryByNameU, "VL.GetEntryByNameU") \ + EM(afs_VL_GetAddrsU, "VL.GetAddrsU") \ + EM(afs_YFSVL_GetEndpoints, "YFSVL.GetEndpoints") \ + E_(afs_VL_GetCapabilities, "VL.GetCapabilities") + + /* * Export enum symbols via userspace. */ @@ -51,6 +114,8 @@ enum afs_call_trace { #define E_(a, b) TRACE_DEFINE_ENUM(a); afs_call_traces; +afs_fs_operations; +afs_vl_operations; /* * Now redefine the EM() and E_() macros to map the enums to the strings that @@ -178,6 +243,83 @@ TRACE_EVENT(afs_call, __entry->where) ); +TRACE_EVENT(afs_make_fs_call, + TP_PROTO(struct afs_call *call, const struct afs_fid *fid), + + TP_ARGS(call, fid), + + TP_STRUCT__entry( + __field(struct afs_call *, call ) + __field(enum afs_fs_operation, op ) + __field_struct(struct afs_fid, fid ) + ), + + TP_fast_assign( + __entry->call = call; + __entry->op = call->operation_ID; + if (fid) { + __entry->fid = *fid; + } else { + __entry->fid.vid = 0; + __entry->fid.vnode = 0; + __entry->fid.unique = 0; + } + ), + + TP_printk("c=%p %06x:%06x:%06x %s", + __entry->call, + __entry->fid.vid, + __entry->fid.vnode, + __entry->fid.unique, + __print_symbolic(__entry->op, afs_fs_operations)) + ); + +TRACE_EVENT(afs_make_vl_call, + TP_PROTO(struct afs_call *call), + + TP_ARGS(call), + + TP_STRUCT__entry( + __field(struct afs_call *, call ) + __field(enum afs_vl_operation, op ) + ), + + TP_fast_assign( + __entry->call = call; + __entry->op = call->operation_ID; + ), + + TP_printk("c=%p %s", + __entry->call, + __print_symbolic(__entry->op, afs_vl_operations)) + ); + +TRACE_EVENT(afs_call_done, + TP_PROTO(struct afs_call *call), + + TP_ARGS(call), + + TP_STRUCT__entry( + __field(struct afs_call *, call ) + __field(struct rxrpc_call *, rx_call ) + __field(int, ret ) + __field(u32, abort_code ) + ), + + TP_fast_assign( + __entry->call = call; + __entry->rx_call = call->rxcall; + __entry->ret = call->error; + __entry->abort_code = call->abort_code; + ), + + TP_printk(" c=%p ret=%d ab=%d [%p]", + __entry->call, + __entry->ret, + __entry->abort_code, + __entry->rx_call) + ); + #endif /* _TRACE_AFS_H */ /* This part must be outside protection */ From 2c099014a0a456012c1778e80adce839bf956b77 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 2 Nov 2017 15:27:51 +0000 Subject: [PATCH 28/35] afs: Trace the sending of pages Add a pair of tracepoints to log the sending of pages for an FS.StoreData or FS.StoreData64 operation. Tracepoint afs_send_pages notes each set of pages added to the operation. There may be several of these per operation as we get up at most 8 contiguous pages in one go because the bvec we're using is on the stack. Tracepoint afs_sent_pages notes the end of adding data from a whole run of pages to the operation and the completion of the request phase. Signed-off-by: David Howells --- fs/afs/rxrpc.c | 3 ++ include/trace/events/afs.h | 61 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 64 insertions(+) diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c index 59cc58022c4d..06a51d70b82b 100644 --- a/fs/afs/rxrpc.c +++ b/fs/afs/rxrpc.c @@ -303,6 +303,8 @@ static int afs_send_pages(struct afs_call *call, struct msghdr *msg) do { afs_load_bvec(call, msg, bv, first, last, offset); + trace_afs_send_pages(call, msg, first, last, offset); + offset = 0; bytes = msg->msg_iter.count; nr = msg->msg_iter.nr_segs; @@ -317,6 +319,7 @@ static int afs_send_pages(struct afs_call *call, struct msghdr *msg) first += nr; } while (first <= last); + trace_afs_sent_pages(call, call->first, last, first, ret); return ret; } diff --git a/include/trace/events/afs.h b/include/trace/events/afs.h index dd9d2600ee98..c780f4c39a53 100644 --- a/include/trace/events/afs.h +++ b/include/trace/events/afs.h @@ -320,6 +320,67 @@ TRACE_EVENT(afs_call_done, __entry->rx_call) ); +TRACE_EVENT(afs_send_pages, + TP_PROTO(struct afs_call *call, struct msghdr *msg, + pgoff_t first, pgoff_t last, unsigned int offset), + + TP_ARGS(call, msg, first, last, offset), + + TP_STRUCT__entry( + __field(struct afs_call *, call ) + __field(pgoff_t, first ) + __field(pgoff_t, last ) + __field(unsigned int, nr ) + __field(unsigned int, bytes ) + __field(unsigned int, offset ) + __field(unsigned int, flags ) + ), + + TP_fast_assign( + __entry->call = call; + __entry->first = first; + __entry->last = last; + __entry->nr = msg->msg_iter.nr_segs; + __entry->bytes = msg->msg_iter.count; + __entry->offset = offset; + __entry->flags = msg->msg_flags; + ), + + TP_printk(" c=%p %lx-%lx-%lx b=%x o=%x f=%x", + __entry->call, + __entry->first, __entry->first + __entry->nr - 1, __entry->last, + __entry->bytes, __entry->offset, + __entry->flags) + ); + +TRACE_EVENT(afs_sent_pages, + TP_PROTO(struct afs_call *call, pgoff_t first, pgoff_t last, + pgoff_t cursor, int ret), + + TP_ARGS(call, first, last, cursor, ret), + + TP_STRUCT__entry( + __field(struct afs_call *, call ) + __field(pgoff_t, first ) + __field(pgoff_t, last ) + __field(pgoff_t, cursor ) + __field(int, ret ) + ), + + TP_fast_assign( + __entry->call = call; + __entry->first = first; + __entry->last = last; + __entry->cursor = cursor; + __entry->ret = ret; + ), + + TP_printk(" c=%p %lx-%lx c=%lx r=%d", + __entry->call, + __entry->first, __entry->last, + __entry->cursor, __entry->ret) + ); + #endif /* _TRACE_AFS_H */ /* This part must be outside protection */ From dab17c1add5c51b68027a9a3861af3a99cb5485a Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 2 Nov 2017 15:27:52 +0000 Subject: [PATCH 29/35] afs: Fix directory read/modify race Because parsing of the directory wasn't being done under any sort of lock, the pages holding the directory content can get invalidated whilst the parsing is ongoing. Further, the directory page check function gets called outside of the page lock, so if the page gets cleared or updated, this may return reports of bad magic numbers in the directory page. Also, the directory may change size whilst checking and parsing are ongoing, so more care needs to be taken here. Fix this by: (1) Perform the page check from the page filling function before we set PageUptodate and drop the page lock. (2) Check for the file having shrunk and the page having been abandoned before checking the page contents. (3) Lock the page whilst parsing it for the directory iterator. Whilst we're at it, add a tracepoint to report check failure. Signed-off-by: David Howells --- fs/afs/dir.c | 27 +++++++++++++++++++-------- fs/afs/file.c | 5 +++++ fs/afs/internal.h | 1 + include/trace/events/afs.h | 21 +++++++++++++++++++++ 4 files changed, 46 insertions(+), 8 deletions(-) diff --git a/fs/afs/dir.c b/fs/afs/dir.c index 53f3917440e7..ecda0e6a9f7e 100644 --- a/fs/afs/dir.c +++ b/fs/afs/dir.c @@ -130,10 +130,11 @@ struct afs_lookup_cookie { /* * check that a directory page is valid */ -static inline bool afs_dir_check_page(struct inode *dir, struct page *page) +bool afs_dir_check_page(struct inode *dir, struct page *page) { struct afs_dir_page *dbuf; - loff_t latter; + struct afs_vnode *vnode = AFS_FS_I(dir); + loff_t latter, i_size, off; int tmp, qty; #if 0 @@ -150,8 +151,15 @@ static inline bool afs_dir_check_page(struct inode *dir, struct page *page) } #endif - /* determine how many magic numbers there should be in this page */ - latter = dir->i_size - page_offset(page); + /* Determine how many magic numbers there should be in this page, but + * we must take care because the directory may change size under us. + */ + off = page_offset(page); + i_size = i_size_read(dir); + if (i_size <= off) + goto checked; + + latter = i_size - off; if (latter >= PAGE_SIZE) qty = PAGE_SIZE; else @@ -162,13 +170,15 @@ static inline bool afs_dir_check_page(struct inode *dir, struct page *page) dbuf = page_address(page); for (tmp = 0; tmp < qty; tmp++) { if (dbuf->blocks[tmp].pagehdr.magic != AFS_DIR_MAGIC) { - printk("kAFS: %s(%lu): bad magic %d/%d is %04hx\n", + printk("kAFS: %s(%lx): bad magic %d/%d is %04hx\n", __func__, dir->i_ino, tmp, qty, ntohs(dbuf->blocks[tmp].pagehdr.magic)); + trace_afs_dir_check_failed(vnode, off, i_size); goto error; } } +checked: SetPageChecked(page); return true; @@ -183,6 +193,7 @@ error: static inline void afs_dir_put_page(struct page *page) { kunmap(page); + unlock_page(page); put_page(page); } @@ -197,9 +208,10 @@ static struct page *afs_dir_get_page(struct inode *dir, unsigned long index, page = read_cache_page(dir->i_mapping, index, afs_page_filler, key); if (!IS_ERR(page)) { + lock_page(page); kmap(page); if (unlikely(!PageChecked(page))) { - if (PageError(page) || !afs_dir_check_page(dir, page)) + if (PageError(page)) goto fail; } } @@ -384,8 +396,7 @@ out: */ static int afs_readdir(struct file *file, struct dir_context *ctx) { - return afs_dir_iterate(file_inode(file), - ctx, file->private_data); + return afs_dir_iterate(file_inode(file), ctx, file->private_data); } /* diff --git a/fs/afs/file.c b/fs/afs/file.c index 1f26ac9f816d..5786f68f87f1 100644 --- a/fs/afs/file.c +++ b/fs/afs/file.c @@ -232,6 +232,11 @@ int afs_page_filler(void *data, struct page *page) * page */ ret = afs_fetch_data(vnode, key, req); afs_put_read(req); + + if (ret >= 0 && S_ISDIR(inode->i_mode) && + !afs_dir_check_page(inode, page)) + ret = -EIO; + if (ret < 0) { if (ret == -ENOENT) { _debug("got NOENT from server" diff --git a/fs/afs/internal.h b/fs/afs/internal.h index aad12546e0ea..6aa6e9957c44 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h @@ -622,6 +622,7 @@ extern bool afs_cm_incoming_call(struct afs_call *); /* * dir.c */ +extern bool afs_dir_check_page(struct inode *, struct page *); extern const struct inode_operations afs_dir_inode_operations; extern const struct dentry_operations afs_fs_dentry_operations; extern const struct file_operations afs_dir_file_operations; diff --git a/include/trace/events/afs.h b/include/trace/events/afs.h index c780f4c39a53..9cfb7657b72c 100644 --- a/include/trace/events/afs.h +++ b/include/trace/events/afs.h @@ -381,6 +381,27 @@ TRACE_EVENT(afs_sent_pages, __entry->cursor, __entry->ret) ); +TRACE_EVENT(afs_dir_check_failed, + TP_PROTO(struct afs_vnode *vnode, loff_t off, loff_t i_size), + + TP_ARGS(vnode, off, i_size), + + TP_STRUCT__entry( + __field(struct afs_vnode *, vnode ) + __field(loff_t, off ) + __field(loff_t, i_size ) + ), + + TP_fast_assign( + __entry->vnode = vnode; + __entry->off = off; + __entry->i_size = i_size; + ), + + TP_printk("vn=%p %llx/%llx", + __entry->vnode, __entry->off, __entry->i_size) + ); + #endif /* _TRACE_AFS_H */ /* This part must be outside protection */ From 83732ec5146916bd49b3036b0ea7dedb7831b90e Mon Sep 17 00:00:00 2001 From: Marc Dionne Date: Thu, 2 Nov 2017 15:27:52 +0000 Subject: [PATCH 30/35] afs: Use a dynamic port if 7001 is in use It is not required that the afs client operate on port 7001. The port could be in use because another kernel or userspace client has already bound to it. If the port is in use, just fallback to using a dynamic port. Signed-off-by: Marc Dionne Signed-off-by: David Howells --- fs/afs/rxrpc.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c index 06a51d70b82b..bd44ae8b63d8 100644 --- a/fs/afs/rxrpc.c +++ b/fs/afs/rxrpc.c @@ -61,6 +61,10 @@ int afs_open_socket(struct afs_net *net) srx.transport.sin6.sin6_port = htons(AFS_CM_PORT); ret = kernel_bind(socket, (struct sockaddr *) &srx, sizeof(srx)); + if (ret == -EADDRINUSE) { + srx.transport.sin6.sin6_port = 0; + ret = kernel_bind(socket, (struct sockaddr *) &srx, sizeof(srx)); + } if (ret < 0) goto error_2; From 215804a99283c57fdd869aab350fdf6acc3460b6 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 2 Nov 2017 15:27:52 +0000 Subject: [PATCH 31/35] afs: Introduce a file-private data record Introduce a file-private data record for kAFS and put the key into it rather than storing the key in file->private_data. Signed-off-by: David Howells --- fs/afs/dir.c | 2 +- fs/afs/file.c | 39 ++++++++++++++++++++++++++++----------- fs/afs/flock.c | 10 +++++----- fs/afs/inode.c | 2 +- fs/afs/internal.h | 14 ++++++++++++++ fs/afs/write.c | 4 ++-- 6 files changed, 51 insertions(+), 20 deletions(-) diff --git a/fs/afs/dir.c b/fs/afs/dir.c index ecda0e6a9f7e..ab618d32554c 100644 --- a/fs/afs/dir.c +++ b/fs/afs/dir.c @@ -396,7 +396,7 @@ out: */ static int afs_readdir(struct file *file, struct dir_context *ctx) { - return afs_dir_iterate(file_inode(file), ctx, file->private_data); + return afs_dir_iterate(file_inode(file), ctx, afs_file_key(file)); } /* diff --git a/fs/afs/file.c b/fs/afs/file.c index 5786f68f87f1..e33b34f01795 100644 --- a/fs/afs/file.c +++ b/fs/afs/file.c @@ -68,6 +68,7 @@ const struct address_space_operations afs_fs_aops = { int afs_open(struct inode *inode, struct file *file) { struct afs_vnode *vnode = AFS_FS_I(inode); + struct afs_file *af; struct key *key; int ret; @@ -75,19 +76,32 @@ int afs_open(struct inode *inode, struct file *file) key = afs_request_key(vnode->volume->cell); if (IS_ERR(key)) { - _leave(" = %ld [key]", PTR_ERR(key)); - return PTR_ERR(key); + ret = PTR_ERR(key); + goto error; + } + + af = kzalloc(sizeof(*af), GFP_KERNEL); + if (!af) { + ret = -ENOMEM; + goto error_key; } ret = afs_validate(vnode, key); - if (ret < 0) { - _leave(" = %d [val]", ret); - return ret; - } + if (ret < 0) + goto error_af; - file->private_data = key; + af->key = key; + file->private_data = af; _leave(" = 0"); return 0; + +error_af: + kfree(af); +error_key: + key_put(key); +error: + _leave(" = %d", ret); + return ret; } /* @@ -96,10 +110,13 @@ int afs_open(struct inode *inode, struct file *file) int afs_release(struct inode *inode, struct file *file) { struct afs_vnode *vnode = AFS_FS_I(inode); + struct afs_file *af = file->private_data; _enter("{%x:%u},", vnode->fid.vid, vnode->fid.vnode); - key_put(file->private_data); + file->private_data = NULL; + key_put(af->key); + kfree(af); _leave(" = 0"); return 0; } @@ -295,7 +312,7 @@ static int afs_readpage(struct file *file, struct page *page) int ret; if (file) { - key = file->private_data; + key = afs_file_key(file); ASSERT(key != NULL); ret = afs_page_filler(key, page); } else { @@ -346,7 +363,7 @@ static int afs_readpages_one(struct file *file, struct address_space *mapping, struct afs_read *req; struct list_head *p; struct page *first, *page; - struct key *key = file->private_data; + struct key *key = afs_file_key(file); pgoff_t index; int ret, n, i; @@ -442,7 +459,7 @@ error: static int afs_readpages(struct file *file, struct address_space *mapping, struct list_head *pages, unsigned nr_pages) { - struct key *key = file->private_data; + struct key *key = afs_file_key(file); struct afs_vnode *vnode; int ret = 0; diff --git a/fs/afs/flock.c b/fs/afs/flock.c index 77b0a4606efd..7571a5dfd5a3 100644 --- a/fs/afs/flock.c +++ b/fs/afs/flock.c @@ -206,7 +206,7 @@ void afs_lock_work(struct work_struct *work) BUG(); fl = list_entry(vnode->granted_locks.next, struct file_lock, fl_u.afs.link); - key = key_get(fl->fl_file->private_data); + key = key_get(afs_file_key(fl->fl_file)); spin_unlock(&vnode->lock); ret = afs_extend_lock(vnode, key); @@ -240,7 +240,7 @@ void afs_lock_work(struct work_struct *work) BUG(); fl = list_entry(vnode->pending_locks.next, struct file_lock, fl_u.afs.link); - key = key_get(fl->fl_file->private_data); + key = key_get(afs_file_key(fl->fl_file)); type = (fl->fl_type == F_RDLCK) ? AFS_LOCK_READ : AFS_LOCK_WRITE; spin_unlock(&vnode->lock); @@ -318,7 +318,7 @@ static int afs_do_setlk(struct file *file, struct file_lock *fl) struct inode *inode = file_inode(file); struct afs_vnode *vnode = AFS_FS_I(inode); afs_lock_type_t type; - struct key *key = file->private_data; + struct key *key = afs_file_key(file); int ret; _enter("{%x:%u},%u", vnode->fid.vid, vnode->fid.vnode, fl->fl_type); @@ -500,7 +500,7 @@ vfs_rejected_lock: static int afs_do_unlk(struct file *file, struct file_lock *fl) { struct afs_vnode *vnode = AFS_FS_I(file->f_mapping->host); - struct key *key = file->private_data; + struct key *key = afs_file_key(file); int ret; _enter("{%x:%u},%u", vnode->fid.vid, vnode->fid.vnode, fl->fl_type); @@ -535,7 +535,7 @@ static int afs_do_unlk(struct file *file, struct file_lock *fl) static int afs_do_getlk(struct file *file, struct file_lock *fl) { struct afs_vnode *vnode = AFS_FS_I(file->f_mapping->host); - struct key *key = file->private_data; + struct key *key = afs_file_key(file); int ret, lock_count; _enter(""); diff --git a/fs/afs/inode.c b/fs/afs/inode.c index 5a2f5854f349..da2ba7a68cac 100644 --- a/fs/afs/inode.c +++ b/fs/afs/inode.c @@ -520,7 +520,7 @@ int afs_setattr(struct dentry *dentry, struct iattr *attr) } if (attr->ia_valid & ATTR_FILE) { - key = attr->ia_file->private_data; + key = afs_file_key(attr->ia_file); } else { key = afs_request_key(vnode->volume->cell); if (IS_ERR(key)) { diff --git a/fs/afs/internal.h b/fs/afs/internal.h index 6aa6e9957c44..facf5b9844d2 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h @@ -138,6 +138,20 @@ struct afs_call_type { void (*work)(struct work_struct *work); }; +/* + * AFS open file information record. Pointed to by file->private_data. + */ +struct afs_file { + struct key *key; /* The key this file was opened with */ +}; + +static inline struct key *afs_file_key(struct file *file) +{ + struct afs_file *af = file->private_data; + + return af->key; +} + /* * Record of an outstanding read operation on a vnode. */ diff --git a/fs/afs/write.c b/fs/afs/write.c index 1377a40ecdbb..1cdd0e3cd531 100644 --- a/fs/afs/write.c +++ b/fs/afs/write.c @@ -128,7 +128,7 @@ int afs_write_begin(struct file *file, struct address_space *mapping, struct afs_writeback *candidate, *wb; struct afs_vnode *vnode = AFS_FS_I(file_inode(file)); struct page *page; - struct key *key = file->private_data; + struct key *key = afs_file_key(file); unsigned from = pos & (PAGE_SIZE - 1); unsigned to = from + len; pgoff_t index = pos >> PAGE_SHIFT; @@ -255,7 +255,7 @@ int afs_write_end(struct file *file, struct address_space *mapping, struct page *page, void *fsdata) { struct afs_vnode *vnode = AFS_FS_I(file_inode(file)); - struct key *key = file->private_data; + struct key *key = afs_file_key(file); loff_t i_size, maybe_i_size; int ret; From 4343d00872e1de9a470d951bf09bdd18bc73f555 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 2 Nov 2017 15:27:52 +0000 Subject: [PATCH 32/35] afs: Get rid of the afs_writeback record Get rid of the afs_writeback record that kAFS is using to match keys with writes made by that key. Instead, keep a list of keys that have a file open for writing and/or sync'ing and iterate through those. Signed-off-by: David Howells --- fs/afs/file.c | 83 ++++-- fs/afs/fsclient.c | 24 +- fs/afs/inode.c | 11 +- fs/afs/internal.h | 51 ++-- fs/afs/super.c | 4 +- fs/afs/write.c | 633 +++++++++++++++++++++++----------------------- mm/filemap.c | 1 + 7 files changed, 412 insertions(+), 395 deletions(-) diff --git a/fs/afs/file.c b/fs/afs/file.c index e33b34f01795..c3a7bc1281f5 100644 --- a/fs/afs/file.c +++ b/fs/afs/file.c @@ -23,7 +23,6 @@ static int afs_readpage(struct file *file, struct page *page); static void afs_invalidatepage(struct page *page, unsigned int offset, unsigned int length); static int afs_releasepage(struct page *page, gfp_t gfp_flags); -static int afs_launder_page(struct page *page); static int afs_readpages(struct file *filp, struct address_space *mapping, struct list_head *pages, unsigned nr_pages); @@ -62,6 +61,50 @@ const struct address_space_operations afs_fs_aops = { .writepages = afs_writepages, }; +/* + * Discard a pin on a writeback key. + */ +void afs_put_wb_key(struct afs_wb_key *wbk) +{ + if (refcount_dec_and_test(&wbk->usage)) { + key_put(wbk->key); + kfree(wbk); + } +} + +/* + * Cache key for writeback. + */ +int afs_cache_wb_key(struct afs_vnode *vnode, struct afs_file *af) +{ + struct afs_wb_key *wbk, *p; + + wbk = kzalloc(sizeof(struct afs_wb_key), GFP_KERNEL); + if (!wbk) + return -ENOMEM; + refcount_set(&wbk->usage, 2); + wbk->key = af->key; + + spin_lock(&vnode->wb_lock); + list_for_each_entry(p, &vnode->wb_keys, vnode_link) { + if (p->key == wbk->key) + goto found; + } + + key_get(wbk->key); + list_add_tail(&wbk->vnode_link, &vnode->wb_keys); + spin_unlock(&vnode->wb_lock); + af->wb = wbk; + return 0; + +found: + refcount_inc(&p->usage); + spin_unlock(&vnode->wb_lock); + af->wb = p; + kfree(wbk); + return 0; +} + /* * open an AFS file or directory and attach a key to it */ @@ -85,12 +128,18 @@ int afs_open(struct inode *inode, struct file *file) ret = -ENOMEM; goto error_key; } + af->key = key; ret = afs_validate(vnode, key); if (ret < 0) goto error_af; - af->key = key; + if (file->f_mode & FMODE_WRITE) { + ret = afs_cache_wb_key(vnode, af); + if (ret < 0) + goto error_af; + } + file->private_data = af; _leave(" = 0"); return 0; @@ -115,8 +164,11 @@ int afs_release(struct inode *inode, struct file *file) _enter("{%x:%u},", vnode->fid.vid, vnode->fid.vnode); file->private_data = NULL; + if (af->wb) + afs_put_wb_key(af->wb); key_put(af->key); kfree(af); + afs_prune_wb_keys(vnode); _leave(" = 0"); return 0; } @@ -516,16 +568,6 @@ static int afs_readpages(struct file *file, struct address_space *mapping, return ret; } -/* - * write back a dirty page - */ -static int afs_launder_page(struct page *page) -{ - _enter("{%lu}", page->index); - - return 0; -} - /* * invalidate part or all of a page * - release a page and clean up its private data if offset is 0 (indicating @@ -534,8 +576,6 @@ static int afs_launder_page(struct page *page) static void afs_invalidatepage(struct page *page, unsigned int offset, unsigned int length) { - struct afs_writeback *wb = (struct afs_writeback *) page_private(page); - _enter("{%lu},%u,%u", page->index, offset, length); BUG_ON(!PageLocked(page)); @@ -551,13 +591,8 @@ static void afs_invalidatepage(struct page *page, unsigned int offset, #endif if (PagePrivate(page)) { - if (wb && !PageWriteback(page)) { - set_page_private(page, 0); - afs_put_writeback(wb); - } - - if (!page_private(page)) - ClearPagePrivate(page); + set_page_private(page, 0); + ClearPagePrivate(page); } } @@ -570,7 +605,6 @@ static void afs_invalidatepage(struct page *page, unsigned int offset, */ static int afs_releasepage(struct page *page, gfp_t gfp_flags) { - struct afs_writeback *wb = (struct afs_writeback *) page_private(page); struct afs_vnode *vnode = AFS_FS_I(page->mapping->host); _enter("{{%x:%u}[%lu],%lx},%x", @@ -587,10 +621,7 @@ static int afs_releasepage(struct page *page, gfp_t gfp_flags) #endif if (PagePrivate(page)) { - if (wb) { - set_page_private(page, 0); - afs_put_writeback(wb); - } + set_page_private(page, 0); ClearPagePrivate(page); } diff --git a/fs/afs/fsclient.c b/fs/afs/fsclient.c index 10e5ead629c2..b90ef39ae914 100644 --- a/fs/afs/fsclient.c +++ b/fs/afs/fsclient.c @@ -1119,18 +1119,18 @@ static const struct afs_call_type afs_RXFSStoreData64 = { * store a set of pages to a very large file */ static int afs_fs_store_data64(struct afs_fs_cursor *fc, - struct afs_writeback *wb, + struct address_space *mapping, pgoff_t first, pgoff_t last, unsigned offset, unsigned to, loff_t size, loff_t pos, loff_t i_size) { - struct afs_vnode *vnode = wb->vnode; + struct afs_vnode *vnode = fc->vnode; struct afs_call *call; struct afs_net *net = afs_v2net(vnode); __be32 *bp; _enter(",%x,{%x:%u},,", - key_serial(wb->key), vnode->fid.vid, vnode->fid.vnode); + key_serial(fc->key), vnode->fid.vid, vnode->fid.vnode); call = afs_alloc_flat_call(net, &afs_RXFSStoreData64, (4 + 6 + 3 * 2) * 4, @@ -1138,10 +1138,9 @@ static int afs_fs_store_data64(struct afs_fs_cursor *fc, if (!call) return -ENOMEM; - call->wb = wb; - call->key = wb->key; + call->key = fc->key; + call->mapping = mapping; call->reply[0] = vnode; - call->mapping = vnode->vfs_inode.i_mapping; call->first = first; call->last = last; call->first_offset = offset; @@ -1177,18 +1176,18 @@ static int afs_fs_store_data64(struct afs_fs_cursor *fc, /* * store a set of pages */ -int afs_fs_store_data(struct afs_fs_cursor *fc, struct afs_writeback *wb, +int afs_fs_store_data(struct afs_fs_cursor *fc, struct address_space *mapping, pgoff_t first, pgoff_t last, unsigned offset, unsigned to) { - struct afs_vnode *vnode = wb->vnode; + struct afs_vnode *vnode = fc->vnode; struct afs_call *call; struct afs_net *net = afs_v2net(vnode); loff_t size, pos, i_size; __be32 *bp; _enter(",%x,{%x:%u},,", - key_serial(wb->key), vnode->fid.vid, vnode->fid.vnode); + key_serial(fc->key), vnode->fid.vid, vnode->fid.vnode); size = (loff_t)to - (loff_t)offset; if (first != last) @@ -1205,7 +1204,7 @@ int afs_fs_store_data(struct afs_fs_cursor *fc, struct afs_writeback *wb, (unsigned long long) i_size); if (pos >> 32 || i_size >> 32 || size >> 32 || (pos + size) >> 32) - return afs_fs_store_data64(fc, wb, first, last, offset, to, + return afs_fs_store_data64(fc, mapping, first, last, offset, to, size, pos, i_size); call = afs_alloc_flat_call(net, &afs_RXFSStoreData, @@ -1214,10 +1213,9 @@ int afs_fs_store_data(struct afs_fs_cursor *fc, struct afs_writeback *wb, if (!call) return -ENOMEM; - call->wb = wb; - call->key = wb->key; + call->key = fc->key; + call->mapping = mapping; call->reply[0] = vnode; - call->mapping = vnode->vfs_inode.i_mapping; call->first = first; call->last = last; call->first_offset = offset; diff --git a/fs/afs/inode.c b/fs/afs/inode.c index da2ba7a68cac..3415eb7484f6 100644 --- a/fs/afs/inode.c +++ b/fs/afs/inode.c @@ -482,7 +482,12 @@ void afs_evict_inode(struct inode *inode) vnode->cb_interest = NULL; } - ASSERT(list_empty(&vnode->writebacks)); + while (!list_empty(&vnode->wb_keys)) { + struct afs_wb_key *wbk = list_entry(vnode->wb_keys.next, + struct afs_wb_key, vnode_link); + list_del(&wbk->vnode_link); + afs_put_wb_key(wbk); + } #ifdef CONFIG_AFS_FSCACHE fscache_relinquish_cookie(vnode->cache, 0); @@ -514,10 +519,8 @@ int afs_setattr(struct dentry *dentry, struct iattr *attr) } /* flush any dirty data outstanding on a regular file */ - if (S_ISREG(vnode->vfs_inode.i_mode)) { + if (S_ISREG(vnode->vfs_inode.i_mode)) filemap_write_and_wait(vnode->vfs_inode.i_mapping); - afs_writeback_all(vnode); - } if (attr->ia_valid & ATTR_FILE) { key = afs_file_key(attr->ia_file); diff --git a/fs/afs/internal.h b/fs/afs/internal.h index facf5b9844d2..688562ae3bf8 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h @@ -89,8 +89,7 @@ struct afs_call { struct afs_server *cm_server; /* Server affected by incoming CM call */ struct afs_cb_interest *cbi; /* Callback interest for server used */ void *request; /* request data (first part) */ - struct address_space *mapping; /* page set */ - struct afs_writeback *wb; /* writeback being performed */ + struct address_space *mapping; /* Pages being written from */ void *buffer; /* reply receive buffer */ void *reply[4]; /* Where to put the reply */ pgoff_t first; /* first page in mapping to deal with */ @@ -138,11 +137,21 @@ struct afs_call_type { void (*work)(struct work_struct *work); }; +/* + * Key available for writeback on a file. + */ +struct afs_wb_key { + refcount_t usage; + struct key *key; + struct list_head vnode_link; /* Link in vnode->wb_keys */ +}; + /* * AFS open file information record. Pointed to by file->private_data. */ struct afs_file { struct key *key; /* The key this file was opened with */ + struct afs_wb_key *wb; /* Writeback key record for this file */ }; static inline struct key *afs_file_key(struct file *file) @@ -167,32 +176,6 @@ struct afs_read { struct page *pages[]; }; -/* - * record of an outstanding writeback on a vnode - */ -struct afs_writeback { - struct list_head link; /* link in vnode->writebacks */ - struct work_struct writer; /* work item to perform the writeback */ - struct afs_vnode *vnode; /* vnode to which this write applies */ - struct key *key; /* owner of this write */ - wait_queue_head_t waitq; /* completion and ready wait queue */ - pgoff_t first; /* first page in batch */ - pgoff_t point; /* last page in current store op */ - pgoff_t last; /* last page in batch (inclusive) */ - unsigned offset_first; /* offset into first page of start of write */ - unsigned to_last; /* offset into last page of end of write */ - int num_conflicts; /* count of conflicting writes in list */ - int usage; - bool conflicts; /* T if has dependent conflicts */ - enum { - AFS_WBACK_SYNCING, /* synchronisation being performed */ - AFS_WBACK_PENDING, /* write pending */ - AFS_WBACK_CONFLICTING, /* conflicting writes posted */ - AFS_WBACK_WRITING, /* writing back */ - AFS_WBACK_COMPLETE /* the writeback record has been unlinked */ - } state __attribute__((packed)); -}; - /* * AFS superblock private data * - there's one superblock per volume @@ -460,7 +443,7 @@ struct afs_vnode { struct afs_permits *permit_cache; /* cache of permits so far obtained */ struct mutex io_lock; /* Lock for serialising I/O on this mutex */ struct mutex validate_lock; /* lock for validating this vnode */ - spinlock_t writeback_lock; /* lock for writebacks */ + spinlock_t wb_lock; /* lock for wb_keys */ spinlock_t lock; /* waitqueue/flags lock */ unsigned long flags; #define AFS_VNODE_CB_PROMISED 0 /* Set if vnode has a callback promise */ @@ -476,7 +459,7 @@ struct afs_vnode { #define AFS_VNODE_AUTOCELL 10 /* set if Vnode is an auto mount point */ #define AFS_VNODE_PSEUDODIR 11 /* set if Vnode is a pseudo directory */ - struct list_head writebacks; /* alterations in pagecache that need writing */ + struct list_head wb_keys; /* List of keys available for writeback */ struct list_head pending_locks; /* locks waiting to be granted */ struct list_head granted_locks; /* locks granted on this file */ struct delayed_work lock_work; /* work to be done in locking */ @@ -648,6 +631,8 @@ extern const struct address_space_operations afs_fs_aops; extern const struct inode_operations afs_file_inode_operations; extern const struct file_operations afs_file_operations; +extern int afs_cache_wb_key(struct afs_vnode *, struct afs_file *); +extern void afs_put_wb_key(struct afs_wb_key *); extern int afs_open(struct inode *, struct file *); extern int afs_release(struct inode *, struct file *); extern int afs_fetch_data(struct afs_vnode *, struct key *, struct afs_read *); @@ -678,7 +663,7 @@ extern int afs_fs_symlink(struct afs_fs_cursor *, const char *, const char *, struct afs_fid *, struct afs_file_status *); extern int afs_fs_rename(struct afs_fs_cursor *, const char *, struct afs_vnode *, const char *); -extern int afs_fs_store_data(struct afs_fs_cursor *, struct afs_writeback *, +extern int afs_fs_store_data(struct afs_fs_cursor *, struct address_space *, pgoff_t, pgoff_t, unsigned, unsigned); extern int afs_fs_setattr(struct afs_fs_cursor *, struct iattr *); extern int afs_fs_get_volume_status(struct afs_fs_cursor *, struct afs_volume_status *); @@ -889,7 +874,6 @@ extern int afs_check_volume_status(struct afs_volume *, struct key *); * write.c */ extern int afs_set_page_dirty(struct page *); -extern void afs_put_writeback(struct afs_writeback *); extern int afs_write_begin(struct file *file, struct address_space *mapping, loff_t pos, unsigned len, unsigned flags, struct page **pagep, void **fsdata); @@ -900,9 +884,10 @@ extern int afs_writepage(struct page *, struct writeback_control *); extern int afs_writepages(struct address_space *, struct writeback_control *); extern void afs_pages_written_back(struct afs_vnode *, struct afs_call *); extern ssize_t afs_file_write(struct kiocb *, struct iov_iter *); -extern int afs_writeback_all(struct afs_vnode *); extern int afs_flush(struct file *, fl_owner_t); extern int afs_fsync(struct file *, loff_t, loff_t, int); +extern void afs_prune_wb_keys(struct afs_vnode *); +extern int afs_launder_page(struct page *); /* * xattr.c diff --git a/fs/afs/super.c b/fs/afs/super.c index af1e769aaebf..875b5eb02242 100644 --- a/fs/afs/super.c +++ b/fs/afs/super.c @@ -546,9 +546,9 @@ static void afs_i_init_once(void *_vnode) inode_init_once(&vnode->vfs_inode); mutex_init(&vnode->io_lock); mutex_init(&vnode->validate_lock); - spin_lock_init(&vnode->writeback_lock); + spin_lock_init(&vnode->wb_lock); spin_lock_init(&vnode->lock); - INIT_LIST_HEAD(&vnode->writebacks); + INIT_LIST_HEAD(&vnode->wb_keys); INIT_LIST_HEAD(&vnode->pending_locks); INIT_LIST_HEAD(&vnode->granted_locks); INIT_DELAYED_WORK(&vnode->lock_work, afs_lock_work); diff --git a/fs/afs/write.c b/fs/afs/write.c index 1cdd0e3cd531..4c131371005b 100644 --- a/fs/afs/write.c +++ b/fs/afs/write.c @@ -8,6 +8,7 @@ * as published by the Free Software Foundation; either version * 2 of the License, or (at your option) any later version. */ + #include #include #include @@ -16,8 +17,18 @@ #include #include "internal.h" -static int afs_write_back_from_locked_page(struct afs_writeback *wb, - struct page *page); +/* + * We use page->private to hold the amount of the page that we've written to, + * splitting the field into two parts. However, we need to represent a range + * 0...PAGE_SIZE inclusive, so we can't support 64K pages on a 32-bit system. + */ +#if PAGE_SIZE > 32768 +#define AFS_PRIV_MAX 0xffffffff +#define AFS_PRIV_SHIFT 32 +#else +#define AFS_PRIV_MAX 0xffff +#define AFS_PRIV_SHIFT 16 +#endif /* * mark a page as having been made dirty and thus needing writeback @@ -28,58 +39,6 @@ int afs_set_page_dirty(struct page *page) return __set_page_dirty_nobuffers(page); } -/* - * unlink a writeback record because its usage has reached zero - * - must be called with the wb->vnode->writeback_lock held - */ -static void afs_unlink_writeback(struct afs_writeback *wb) -{ - struct afs_writeback *front; - struct afs_vnode *vnode = wb->vnode; - - list_del_init(&wb->link); - if (!list_empty(&vnode->writebacks)) { - /* if an fsync rises to the front of the queue then wake it - * up */ - front = list_entry(vnode->writebacks.next, - struct afs_writeback, link); - if (front->state == AFS_WBACK_SYNCING) { - _debug("wake up sync"); - front->state = AFS_WBACK_COMPLETE; - wake_up(&front->waitq); - } - } -} - -/* - * free a writeback record - */ -static void afs_free_writeback(struct afs_writeback *wb) -{ - _enter(""); - key_put(wb->key); - kfree(wb); -} - -/* - * dispose of a reference to a writeback record - */ -void afs_put_writeback(struct afs_writeback *wb) -{ - struct afs_vnode *vnode = wb->vnode; - - _enter("{%d}", wb->usage); - - spin_lock(&vnode->writeback_lock); - if (--wb->usage == 0) - afs_unlink_writeback(wb); - else - wb = NULL; - spin_unlock(&vnode->writeback_lock); - if (wb) - afs_free_writeback(wb); -} - /* * partly or wholly fill a page that's under preparation for writing */ @@ -125,42 +84,32 @@ int afs_write_begin(struct file *file, struct address_space *mapping, loff_t pos, unsigned len, unsigned flags, struct page **pagep, void **fsdata) { - struct afs_writeback *candidate, *wb; struct afs_vnode *vnode = AFS_FS_I(file_inode(file)); struct page *page; struct key *key = afs_file_key(file); - unsigned from = pos & (PAGE_SIZE - 1); - unsigned to = from + len; + unsigned long priv; + unsigned f, from = pos & (PAGE_SIZE - 1); + unsigned t, to = from + len; pgoff_t index = pos >> PAGE_SHIFT; int ret; _enter("{%x:%u},{%lx},%u,%u", vnode->fid.vid, vnode->fid.vnode, index, from, to); - candidate = kzalloc(sizeof(*candidate), GFP_KERNEL); - if (!candidate) - return -ENOMEM; - candidate->vnode = vnode; - candidate->first = candidate->last = index; - candidate->offset_first = from; - candidate->to_last = to; - INIT_LIST_HEAD(&candidate->link); - candidate->usage = 1; - candidate->state = AFS_WBACK_PENDING; - init_waitqueue_head(&candidate->waitq); + /* We want to store information about how much of a page is altered in + * page->private. + */ + BUILD_BUG_ON(PAGE_SIZE > 32768 && sizeof(page->private) < 8); page = grab_cache_page_write_begin(mapping, index, flags); - if (!page) { - kfree(candidate); + if (!page) return -ENOMEM; - } if (!PageUptodate(page) && len != PAGE_SIZE) { ret = afs_fill_page(vnode, key, pos & PAGE_MASK, PAGE_SIZE, page); if (ret < 0) { unlock_page(page); put_page(page); - kfree(candidate); _leave(" = %d [prep]", ret); return ret; } @@ -171,79 +120,52 @@ int afs_write_begin(struct file *file, struct address_space *mapping, *pagep = page; try_again: - spin_lock(&vnode->writeback_lock); - - /* see if this page is already pending a writeback under a suitable key - * - if so we can just join onto that one */ - wb = (struct afs_writeback *) page_private(page); - if (wb) { - if (wb->key == key && wb->state == AFS_WBACK_PENDING) - goto subsume_in_current_wb; - goto flush_conflicting_wb; + /* See if this page is already partially written in a way that we can + * merge the new write with. + */ + t = f = 0; + if (PagePrivate(page)) { + priv = page_private(page); + f = priv & AFS_PRIV_MAX; + t = priv >> AFS_PRIV_SHIFT; + ASSERTCMP(f, <=, t); } - if (index > 0) { - /* see if we can find an already pending writeback that we can - * append this page to */ - list_for_each_entry(wb, &vnode->writebacks, link) { - if (wb->last == index - 1 && wb->key == key && - wb->state == AFS_WBACK_PENDING) - goto append_to_previous_wb; - } + if (f != t) { + if (to < f || from > t) + goto flush_conflicting_write; + if (from < f) + f = from; + if (to > t) + t = to; + } else { + f = from; + t = to; } - list_add_tail(&candidate->link, &vnode->writebacks); - candidate->key = key_get(key); - spin_unlock(&vnode->writeback_lock); + priv = (unsigned long)t << AFS_PRIV_SHIFT; + priv |= f; SetPagePrivate(page); - set_page_private(page, (unsigned long) candidate); - _leave(" = 0 [new]"); + set_page_private(page, priv); + _leave(" = 0"); return 0; -subsume_in_current_wb: - _debug("subsume"); - ASSERTRANGE(wb->first, <=, index, <=, wb->last); - if (index == wb->first && from < wb->offset_first) - wb->offset_first = from; - if (index == wb->last && to > wb->to_last) - wb->to_last = to; - spin_unlock(&vnode->writeback_lock); - kfree(candidate); - _leave(" = 0 [sub]"); - return 0; - -append_to_previous_wb: - _debug("append into %lx-%lx", wb->first, wb->last); - wb->usage++; - wb->last++; - wb->to_last = to; - spin_unlock(&vnode->writeback_lock); - SetPagePrivate(page); - set_page_private(page, (unsigned long) wb); - kfree(candidate); - _leave(" = 0 [app]"); - return 0; - - /* the page is currently bound to another context, so if it's dirty we - * need to flush it before we can use the new context */ -flush_conflicting_wb: + /* The previous write and this write aren't adjacent or overlapping, so + * flush the page out. + */ +flush_conflicting_write: _debug("flush conflict"); - if (wb->state == AFS_WBACK_PENDING) - wb->state = AFS_WBACK_CONFLICTING; - spin_unlock(&vnode->writeback_lock); - if (clear_page_dirty_for_io(page)) { - ret = afs_write_back_from_locked_page(wb, page); - if (ret < 0) { - afs_put_writeback(candidate); - _leave(" = %d", ret); - return ret; - } + ret = write_one_page(page); + if (ret < 0) { + _leave(" = %d", ret); + return ret; } - /* the page holds a ref on the writeback record */ - afs_put_writeback(wb); - set_page_private(page, 0); - ClearPagePrivate(page); + ret = lock_page_killable(page); + if (ret < 0) { + _leave(" = %d", ret); + return ret; + } goto try_again; } @@ -266,11 +188,11 @@ int afs_write_end(struct file *file, struct address_space *mapping, i_size = i_size_read(&vnode->vfs_inode); if (maybe_i_size > i_size) { - spin_lock(&vnode->writeback_lock); + spin_lock(&vnode->wb_lock); i_size = i_size_read(&vnode->vfs_inode); if (maybe_i_size > i_size) i_size_write(&vnode->vfs_inode, maybe_i_size); - spin_unlock(&vnode->writeback_lock); + spin_unlock(&vnode->wb_lock); } if (!PageUptodate(page)) { @@ -299,9 +221,10 @@ int afs_write_end(struct file *file, struct address_space *mapping, /* * kill all the pages in the given range */ -static void afs_kill_pages(struct afs_vnode *vnode, bool error, +static void afs_kill_pages(struct address_space *mapping, pgoff_t first, pgoff_t last) { + struct afs_vnode *vnode = AFS_FS_I(mapping->host); struct pagevec pv; unsigned count, loop; @@ -316,23 +239,62 @@ static void afs_kill_pages(struct afs_vnode *vnode, bool error, count = last - first + 1; if (count > PAGEVEC_SIZE) count = PAGEVEC_SIZE; - pv.nr = find_get_pages_contig(vnode->vfs_inode.i_mapping, - first, count, pv.pages); + pv.nr = find_get_pages_contig(mapping, first, count, pv.pages); ASSERTCMP(pv.nr, ==, count); for (loop = 0; loop < count; loop++) { struct page *page = pv.pages[loop]; ClearPageUptodate(page); - if (error) - SetPageError(page); - if (PageWriteback(page)) - end_page_writeback(page); + SetPageError(page); + end_page_writeback(page); + if (page->index >= first) + first = page->index + 1; + lock_page(page); + generic_error_remove_page(mapping, page); + } + + __pagevec_release(&pv); + } while (first <= last); + + _leave(""); +} + +/* + * Redirty all the pages in a given range. + */ +static void afs_redirty_pages(struct writeback_control *wbc, + struct address_space *mapping, + pgoff_t first, pgoff_t last) +{ + struct afs_vnode *vnode = AFS_FS_I(mapping->host); + struct pagevec pv; + unsigned count, loop; + + _enter("{%x:%u},%lx-%lx", + vnode->fid.vid, vnode->fid.vnode, first, last); + + pagevec_init(&pv, 0); + + do { + _debug("redirty %lx-%lx", first, last); + + count = last - first + 1; + if (count > PAGEVEC_SIZE) + count = PAGEVEC_SIZE; + pv.nr = find_get_pages_contig(mapping, first, count, pv.pages); + ASSERTCMP(pv.nr, ==, count); + + for (loop = 0; loop < count; loop++) { + struct page *page = pv.pages[loop]; + + redirty_page_for_writepage(wbc, page); + end_page_writeback(page); if (page->index >= first) first = page->index + 1; } __pagevec_release(&pv); - } while (first < last); + } while (first <= last); _leave(""); } @@ -340,26 +302,55 @@ static void afs_kill_pages(struct afs_vnode *vnode, bool error, /* * write to a file */ -static int afs_store_data(struct afs_writeback *wb, pgoff_t first, pgoff_t last, +static int afs_store_data(struct address_space *mapping, + pgoff_t first, pgoff_t last, unsigned offset, unsigned to) { + struct afs_vnode *vnode = AFS_FS_I(mapping->host); struct afs_fs_cursor fc; - struct afs_vnode *vnode = wb->vnode; - int ret; + struct afs_wb_key *wbk = NULL; + struct list_head *p; + int ret = -ENOKEY, ret2; - _enter("%s{%x:%u.%u},%x,%lx,%lx,%x,%x", + _enter("%s{%x:%u.%u},%lx,%lx,%x,%x", vnode->volume->name, vnode->fid.vid, vnode->fid.vnode, vnode->fid.unique, - key_serial(wb->key), first, last, offset, to); + spin_lock(&vnode->wb_lock); + p = vnode->wb_keys.next; + + /* Iterate through the list looking for a valid key to use. */ +try_next_key: + while (p != &vnode->wb_keys) { + wbk = list_entry(p, struct afs_wb_key, vnode_link); + _debug("wbk %u", key_serial(wbk->key)); + ret2 = key_validate(wbk->key); + if (ret2 == 0) + goto found_key; + if (ret == -ENOKEY) + ret = ret2; + p = p->next; + } + + spin_unlock(&vnode->wb_lock); + afs_put_wb_key(wbk); + _leave(" = %d [no keys]", ret); + return ret; + +found_key: + refcount_inc(&wbk->usage); + spin_unlock(&vnode->wb_lock); + + _debug("USE WB KEY %u", key_serial(wbk->key)); + ret = -ERESTARTSYS; - if (afs_begin_vnode_operation(&fc, vnode, wb->key)) { + if (afs_begin_vnode_operation(&fc, vnode, wbk->key)) { while (afs_select_fileserver(&fc)) { fc.cb_break = vnode->cb_break + vnode->cb_s_break; - afs_fs_store_data(&fc, wb, first, last, offset, to); + afs_fs_store_data(&fc, mapping, first, last, offset, to); } afs_check_for_remote_deletion(&fc, fc.vnode); @@ -367,20 +358,37 @@ static int afs_store_data(struct afs_writeback *wb, pgoff_t first, pgoff_t last, ret = afs_end_vnode_operation(&fc); } + switch (ret) { + case -EACCES: + case -EPERM: + case -ENOKEY: + case -EKEYEXPIRED: + case -EKEYREJECTED: + case -EKEYREVOKED: + _debug("next"); + spin_lock(&vnode->wb_lock); + p = wbk->vnode_link.next; + afs_put_wb_key(wbk); + goto try_next_key; + } + + afs_put_wb_key(wbk); _leave(" = %d", ret); return ret; } /* - * synchronously write back the locked page and any subsequent non-locked dirty - * pages also covered by the same writeback record + * Synchronously write back the locked page and any subsequent non-locked dirty + * pages. */ -static int afs_write_back_from_locked_page(struct afs_writeback *wb, - struct page *primary_page) +static int afs_write_back_from_locked_page(struct address_space *mapping, + struct writeback_control *wbc, + struct page *primary_page, + pgoff_t final_page) { struct page *pages[8], *page; - unsigned long count; - unsigned n, offset, to; + unsigned long count, priv; + unsigned n, offset, to, f, t; pgoff_t start, first, last; int loop, ret; @@ -390,20 +398,28 @@ static int afs_write_back_from_locked_page(struct afs_writeback *wb, if (test_set_page_writeback(primary_page)) BUG(); - /* find all consecutive lockable dirty pages, stopping when we find a - * page that is not immediately lockable, is not dirty or is missing, - * or we reach the end of the range */ + /* Find all consecutive lockable dirty pages that have contiguous + * written regions, stopping when we find a page that is not + * immediately lockable, is not dirty or is missing, or we reach the + * end of the range. + */ start = primary_page->index; - if (start >= wb->last) + priv = page_private(primary_page); + offset = priv & AFS_PRIV_MAX; + to = priv >> AFS_PRIV_SHIFT; + + WARN_ON(offset == to); + + if (start >= final_page || to < PAGE_SIZE) goto no_more; + start++; do { _debug("more %lx [%lx]", start, count); - n = wb->last - start + 1; + n = final_page - start + 1; if (n > ARRAY_SIZE(pages)) n = ARRAY_SIZE(pages); - n = find_get_pages_contig(wb->vnode->vfs_inode.i_mapping, - start, n, pages); + n = find_get_pages_contig(mapping, start, ARRAY_SIZE(pages), pages); _debug("fgpc %u", n); if (n == 0) goto no_more; @@ -415,16 +431,27 @@ static int afs_write_back_from_locked_page(struct afs_writeback *wb, } for (loop = 0; loop < n; loop++) { + if (to != PAGE_SIZE) + break; page = pages[loop]; - if (page->index > wb->last) + if (page->index > final_page) break; if (!trylock_page(page)) break; - if (!PageDirty(page) || - page_private(page) != (unsigned long) wb) { + if (!PageDirty(page) || PageWriteback(page)) { unlock_page(page); break; } + + priv = page_private(page); + f = priv & AFS_PRIV_MAX; + t = priv >> AFS_PRIV_SHIFT; + if (f != 0) { + unlock_page(page); + break; + } + to = t; + if (!clear_page_dirty_for_io(page)) BUG(); if (test_set_page_writeback(page)) @@ -440,50 +467,55 @@ static int afs_write_back_from_locked_page(struct afs_writeback *wb, } start += loop; - } while (start <= wb->last && count < 65536); + } while (start <= final_page && count < 65536); no_more: - /* we now have a contiguous set of dirty pages, each with writeback set - * and the dirty mark cleared; the first page is locked and must remain - * so, all the rest are unlocked */ + /* We now have a contiguous set of dirty pages, each with writeback + * set; the first page is still locked at this point, but all the rest + * have been unlocked. + */ + unlock_page(primary_page); + first = primary_page->index; last = first + count - 1; - offset = (first == wb->first) ? wb->offset_first : 0; - to = (last == wb->last) ? wb->to_last : PAGE_SIZE; - _debug("write back %lx[%u..] to %lx[..%u]", first, offset, last, to); - ret = afs_store_data(wb, first, last, offset, to); - if (ret < 0) { - switch (ret) { - case -EDQUOT: - case -ENOSPC: - mapping_set_error(wb->vnode->vfs_inode.i_mapping, -ENOSPC); - break; - case -EROFS: - case -EIO: - case -EREMOTEIO: - case -EFBIG: - case -ENOENT: - case -ENOMEDIUM: - case -ENXIO: - afs_kill_pages(wb->vnode, true, first, last); - mapping_set_error(wb->vnode->vfs_inode.i_mapping, -EIO); - break; - case -EACCES: - case -EPERM: - case -ENOKEY: - case -EKEYEXPIRED: - case -EKEYREJECTED: - case -EKEYREVOKED: - afs_kill_pages(wb->vnode, false, first, last); - break; - default: - break; - } - } else { + ret = afs_store_data(mapping, first, last, offset, to); + switch (ret) { + case 0: ret = count; + break; + + default: + pr_notice("kAFS: Unexpected error from FS.StoreData %d\n", ret); + /* Fall through */ + case -EACCES: + case -EPERM: + case -ENOKEY: + case -EKEYEXPIRED: + case -EKEYREJECTED: + case -EKEYREVOKED: + afs_redirty_pages(wbc, mapping, first, last); + mapping_set_error(mapping, ret); + break; + + case -EDQUOT: + case -ENOSPC: + afs_redirty_pages(wbc, mapping, first, last); + mapping_set_error(mapping, -ENOSPC); + break; + + case -EROFS: + case -EIO: + case -EREMOTEIO: + case -EFBIG: + case -ENOENT: + case -ENOMEDIUM: + case -ENXIO: + afs_kill_pages(mapping, first, last); + mapping_set_error(mapping, ret); + break; } _leave(" = %d", ret); @@ -496,16 +528,12 @@ no_more: */ int afs_writepage(struct page *page, struct writeback_control *wbc) { - struct afs_writeback *wb; int ret; _enter("{%lx},", page->index); - wb = (struct afs_writeback *) page_private(page); - ASSERT(wb != NULL); - - ret = afs_write_back_from_locked_page(wb, page); - unlock_page(page); + ret = afs_write_back_from_locked_page(page->mapping, wbc, page, + wbc->range_end >> PAGE_SHIFT); if (ret < 0) { _leave(" = %d", ret); return 0; @@ -524,7 +552,6 @@ static int afs_writepages_region(struct address_space *mapping, struct writeback_control *wbc, pgoff_t index, pgoff_t end, pgoff_t *_next) { - struct afs_writeback *wb; struct page *page; int ret, n; @@ -550,7 +577,12 @@ static int afs_writepages_region(struct address_space *mapping, * (changing page->mapping to NULL), or even swizzled back from * swapper_space to tmpfs file mapping */ - lock_page(page); + ret = lock_page_killable(page); + if (ret < 0) { + put_page(page); + _leave(" = %d", ret); + return ret; + } if (page->mapping != mapping || !PageDirty(page)) { unlock_page(page); @@ -566,17 +598,9 @@ static int afs_writepages_region(struct address_space *mapping, continue; } - wb = (struct afs_writeback *) page_private(page); - ASSERT(wb != NULL); - - spin_lock(&wb->vnode->writeback_lock); - wb->state = AFS_WBACK_WRITING; - spin_unlock(&wb->vnode->writeback_lock); - if (!clear_page_dirty_for_io(page)) BUG(); - ret = afs_write_back_from_locked_page(wb, page); - unlock_page(page); + ret = afs_write_back_from_locked_page(mapping, wbc, page, end); put_page(page); if (ret < 0) { _leave(" = %d", ret); @@ -632,17 +656,13 @@ int afs_writepages(struct address_space *mapping, */ void afs_pages_written_back(struct afs_vnode *vnode, struct afs_call *call) { - struct afs_writeback *wb = call->wb; struct pagevec pv; unsigned count, loop; pgoff_t first = call->first, last = call->last; - bool free_wb; _enter("{%x:%u},{%lx-%lx}", vnode->fid.vid, vnode->fid.vnode, first, last); - ASSERT(wb != NULL); - pagevec_init(&pv, 0); do { @@ -651,35 +671,19 @@ void afs_pages_written_back(struct afs_vnode *vnode, struct afs_call *call) count = last - first + 1; if (count > PAGEVEC_SIZE) count = PAGEVEC_SIZE; - pv.nr = find_get_pages_contig(call->mapping, first, count, - pv.pages); + pv.nr = find_get_pages_contig(vnode->vfs_inode.i_mapping, + first, count, pv.pages); ASSERTCMP(pv.nr, ==, count); - spin_lock(&vnode->writeback_lock); for (loop = 0; loop < count; loop++) { - struct page *page = pv.pages[loop]; - end_page_writeback(page); - if (page_private(page) == (unsigned long) wb) { - set_page_private(page, 0); - ClearPagePrivate(page); - wb->usage--; - } + set_page_private(pv.pages[loop], 0); + end_page_writeback(pv.pages[loop]); } - free_wb = false; - if (wb->usage == 0) { - afs_unlink_writeback(wb); - free_wb = true; - } - spin_unlock(&vnode->writeback_lock); first += count; - if (free_wb) { - afs_free_writeback(wb); - wb = NULL; - } - __pagevec_release(&pv); } while (first <= last); + afs_prune_wb_keys(vnode); _leave(""); } @@ -710,28 +714,6 @@ ssize_t afs_file_write(struct kiocb *iocb, struct iov_iter *from) return result; } -/* - * flush the vnode to the fileserver - */ -int afs_writeback_all(struct afs_vnode *vnode) -{ - struct address_space *mapping = vnode->vfs_inode.i_mapping; - struct writeback_control wbc = { - .sync_mode = WB_SYNC_ALL, - .nr_to_write = LONG_MAX, - .range_cyclic = 1, - }; - int ret; - - _enter(""); - - ret = mapping->a_ops->writepages(mapping, &wbc); - __mark_inode_dirty(mapping->host, I_DIRTY_PAGES); - - _leave(" = %d", ret); - return ret; -} - /* * flush any dirty pages for this process, and check for write errors. * - the return status from this call provides a reliable indication of @@ -740,61 +722,13 @@ int afs_writeback_all(struct afs_vnode *vnode) int afs_fsync(struct file *file, loff_t start, loff_t end, int datasync) { struct inode *inode = file_inode(file); - struct afs_writeback *wb, *xwb; struct afs_vnode *vnode = AFS_FS_I(inode); - int ret; _enter("{%x:%u},{n=%pD},%d", vnode->fid.vid, vnode->fid.vnode, file, datasync); - ret = file_write_and_wait_range(file, start, end); - if (ret) - return ret; - inode_lock(inode); - - /* use a writeback record as a marker in the queue - when this reaches - * the front of the queue, all the outstanding writes are either - * completed or rejected */ - wb = kzalloc(sizeof(*wb), GFP_KERNEL); - if (!wb) { - ret = -ENOMEM; - goto out; - } - wb->vnode = vnode; - wb->first = 0; - wb->last = -1; - wb->offset_first = 0; - wb->to_last = PAGE_SIZE; - wb->usage = 1; - wb->state = AFS_WBACK_SYNCING; - init_waitqueue_head(&wb->waitq); - - spin_lock(&vnode->writeback_lock); - list_for_each_entry(xwb, &vnode->writebacks, link) { - if (xwb->state == AFS_WBACK_PENDING) - xwb->state = AFS_WBACK_CONFLICTING; - } - list_add_tail(&wb->link, &vnode->writebacks); - spin_unlock(&vnode->writeback_lock); - - /* push all the outstanding writebacks to the server */ - ret = afs_writeback_all(vnode); - if (ret < 0) { - afs_put_writeback(wb); - _leave(" = %d [wb]", ret); - goto out; - } - - /* wait for the preceding writes to actually complete */ - ret = wait_event_interruptible(wb->waitq, - wb->state == AFS_WBACK_COMPLETE || - vnode->writebacks.next == &wb->link); - afs_put_writeback(wb); - _leave(" = %d", ret); -out: - inode_unlock(inode); - return ret; + return file_write_and_wait_range(file, start, end); } /* @@ -831,3 +765,68 @@ int afs_page_mkwrite(struct vm_area_struct *vma, struct page *page) _leave(" = 0"); return 0; } + +/* + * Prune the keys cached for writeback. The caller must hold vnode->wb_lock. + */ +void afs_prune_wb_keys(struct afs_vnode *vnode) +{ + LIST_HEAD(graveyard); + struct afs_wb_key *wbk, *tmp; + + /* Discard unused keys */ + spin_lock(&vnode->wb_lock); + + if (!mapping_tagged(&vnode->vfs_inode.i_data, PAGECACHE_TAG_WRITEBACK) && + !mapping_tagged(&vnode->vfs_inode.i_data, PAGECACHE_TAG_DIRTY)) { + list_for_each_entry_safe(wbk, tmp, &vnode->wb_keys, vnode_link) { + if (refcount_read(&wbk->usage) == 1) + list_move(&wbk->vnode_link, &graveyard); + } + } + + spin_unlock(&vnode->wb_lock); + + while (!list_empty(&graveyard)) { + wbk = list_entry(graveyard.next, struct afs_wb_key, vnode_link); + list_del(&wbk->vnode_link); + afs_put_wb_key(wbk); + } +} + +/* + * Clean up a page during invalidation. + */ +int afs_launder_page(struct page *page) +{ + struct address_space *mapping = page->mapping; + struct afs_vnode *vnode = AFS_FS_I(mapping->host); + unsigned long priv; + unsigned int f, t; + int ret = 0; + + _enter("{%lx}", page->index); + + priv = page_private(page); + if (clear_page_dirty_for_io(page)) { + f = 0; + t = PAGE_SIZE; + if (PagePrivate(page)) { + f = priv & AFS_PRIV_MAX; + t = priv >> AFS_PRIV_SHIFT; + } + + ret = afs_store_data(mapping, page->index, page->index, t, f); + } + + set_page_private(page, 0); + ClearPagePrivate(page); + +#ifdef CONFIG_AFS_FSCACHE + if (PageFsCache(page)) { + fscache_wait_on_page_write(vnode->cache, page); + fscache_uncache_page(vnode->cache, page); + } +#endif + return ret; +} diff --git a/mm/filemap.c b/mm/filemap.c index 594d73fef8b4..5bcc87adbeeb 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -1041,6 +1041,7 @@ int wait_on_page_bit_killable(struct page *page, int bit_nr) wait_queue_head_t *q = page_waitqueue(page); return wait_on_page_bit_common(q, page, bit_nr, TASK_KILLABLE, false); } +EXPORT_SYMBOL(wait_on_page_bit_killable); /** * add_page_wait_queue - Add an arbitrary waiter to a page's wait queue From 1cf7a1518aefa69ac6ba0c3f9206073e4221e3c8 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 2 Nov 2017 15:27:52 +0000 Subject: [PATCH 33/35] afs: Implement shared-writeable mmap Implement shared-writeable mmap for AFS. Signed-off-by: David Howells --- fs/afs/file.c | 22 +++++++++++++++++++++- fs/afs/internal.h | 1 + fs/afs/write.c | 40 ++++++++++++++++++++++++++++++++-------- 3 files changed, 54 insertions(+), 9 deletions(-) diff --git a/fs/afs/file.c b/fs/afs/file.c index c3a7bc1281f5..675c5c268a52 100644 --- a/fs/afs/file.c +++ b/fs/afs/file.c @@ -19,6 +19,7 @@ #include #include "internal.h" +static int afs_file_mmap(struct file *file, struct vm_area_struct *vma); static int afs_readpage(struct file *file, struct page *page); static void afs_invalidatepage(struct page *page, unsigned int offset, unsigned int length); @@ -34,7 +35,7 @@ const struct file_operations afs_file_operations = { .llseek = generic_file_llseek, .read_iter = generic_file_read_iter, .write_iter = afs_file_write, - .mmap = generic_file_readonly_mmap, + .mmap = afs_file_mmap, .splice_read = generic_file_splice_read, .fsync = afs_fsync, .lock = afs_lock, @@ -61,6 +62,12 @@ const struct address_space_operations afs_fs_aops = { .writepages = afs_writepages, }; +static const struct vm_operations_struct afs_vm_ops = { + .fault = filemap_fault, + .map_pages = filemap_map_pages, + .page_mkwrite = afs_page_mkwrite, +}; + /* * Discard a pin on a writeback key. */ @@ -629,3 +636,16 @@ static int afs_releasepage(struct page *page, gfp_t gfp_flags) _leave(" = T"); return 1; } + +/* + * Handle setting up a memory mapping on an AFS file. + */ +static int afs_file_mmap(struct file *file, struct vm_area_struct *vma) +{ + int ret; + + ret = generic_file_mmap(file, vma); + if (ret == 0) + vma->vm_ops = &afs_vm_ops; + return ret; +} diff --git a/fs/afs/internal.h b/fs/afs/internal.h index 688562ae3bf8..1de36e6abd5e 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h @@ -886,6 +886,7 @@ extern void afs_pages_written_back(struct afs_vnode *, struct afs_call *); extern ssize_t afs_file_write(struct kiocb *, struct iov_iter *); extern int afs_flush(struct file *, fl_owner_t); extern int afs_fsync(struct file *, loff_t, loff_t, int); +extern int afs_page_mkwrite(struct vm_fault *); extern void afs_prune_wb_keys(struct afs_vnode *); extern int afs_launder_page(struct page *); diff --git a/fs/afs/write.c b/fs/afs/write.c index 4c131371005b..6807277ef956 100644 --- a/fs/afs/write.c +++ b/fs/afs/write.c @@ -749,21 +749,45 @@ int afs_flush(struct file *file, fl_owner_t id) * notification that a previously read-only page is about to become writable * - if it returns an error, the caller will deliver a bus error signal */ -int afs_page_mkwrite(struct vm_area_struct *vma, struct page *page) +int afs_page_mkwrite(struct vm_fault *vmf) { - struct afs_vnode *vnode = AFS_FS_I(vma->vm_file->f_mapping->host); + struct file *file = vmf->vma->vm_file; + struct inode *inode = file_inode(file); + struct afs_vnode *vnode = AFS_FS_I(inode); + unsigned long priv; _enter("{{%x:%u}},{%lx}", - vnode->fid.vid, vnode->fid.vnode, page->index); + vnode->fid.vid, vnode->fid.vnode, vmf->page->index); - /* wait for the page to be written to the cache before we allow it to - * be modified */ + sb_start_pagefault(inode->i_sb); + + /* Wait for the page to be written to the cache before we allow it to + * be modified. We then assume the entire page will need writing back. + */ #ifdef CONFIG_AFS_FSCACHE - fscache_wait_on_page_write(vnode->cache, page); + fscache_wait_on_page_write(vnode->cache, vmf->page); #endif - _leave(" = 0"); - return 0; + if (PageWriteback(vmf->page) && + wait_on_page_bit_killable(vmf->page, PG_writeback) < 0) + return VM_FAULT_RETRY; + + if (lock_page_killable(vmf->page) < 0) + return VM_FAULT_RETRY; + + /* We mustn't change page->private until writeback is complete as that + * details the portion of the page we need to write back and we might + * need to redirty the page if there's a problem. + */ + wait_on_page_writeback(vmf->page); + + priv = (unsigned long)PAGE_SIZE << AFS_PRIV_SHIFT; /* To */ + priv |= 0; /* From */ + SetPagePrivate(vmf->page); + set_page_private(vmf->page, priv); + + sb_end_pagefault(inode->i_sb); + return VM_FAULT_LOCKED; } /* From 13524ab3c6f41bcd257d28644414297bea8282b7 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 2 Nov 2017 15:27:53 +0000 Subject: [PATCH 34/35] afs: Trace page dirty/clean Add a trace event that logs the dirtying and cleaning of pages attached to AFS inodes. Signed-off-by: David Howells --- fs/afs/file.c | 10 ++++++++++ fs/afs/write.c | 34 ++++++++++++++++++++------------- include/trace/events/afs.h | 39 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 70 insertions(+), 13 deletions(-) diff --git a/fs/afs/file.c b/fs/afs/file.c index 675c5c268a52..a39192ced99e 100644 --- a/fs/afs/file.c +++ b/fs/afs/file.c @@ -583,6 +583,9 @@ static int afs_readpages(struct file *file, struct address_space *mapping, static void afs_invalidatepage(struct page *page, unsigned int offset, unsigned int length) { + struct afs_vnode *vnode = AFS_FS_I(page->mapping->host); + unsigned long priv; + _enter("{%lu},%u,%u", page->index, offset, length); BUG_ON(!PageLocked(page)); @@ -598,6 +601,9 @@ static void afs_invalidatepage(struct page *page, unsigned int offset, #endif if (PagePrivate(page)) { + priv = page_private(page); + trace_afs_page_dirty(vnode, tracepoint_string("inval"), + page->index, priv); set_page_private(page, 0); ClearPagePrivate(page); } @@ -613,6 +619,7 @@ static void afs_invalidatepage(struct page *page, unsigned int offset, static int afs_releasepage(struct page *page, gfp_t gfp_flags) { struct afs_vnode *vnode = AFS_FS_I(page->mapping->host); + unsigned long priv; _enter("{{%x:%u}[%lu],%lx},%x", vnode->fid.vid, vnode->fid.vnode, page->index, page->flags, @@ -628,6 +635,9 @@ static int afs_releasepage(struct page *page, gfp_t gfp_flags) #endif if (PagePrivate(page)) { + priv = page_private(page); + trace_afs_page_dirty(vnode, tracepoint_string("rel"), + page->index, priv); set_page_private(page, 0); ClearPagePrivate(page); } diff --git a/fs/afs/write.c b/fs/afs/write.c index 6807277ef956..4472882f06df 100644 --- a/fs/afs/write.c +++ b/fs/afs/write.c @@ -17,19 +17,6 @@ #include #include "internal.h" -/* - * We use page->private to hold the amount of the page that we've written to, - * splitting the field into two parts. However, we need to represent a range - * 0...PAGE_SIZE inclusive, so we can't support 64K pages on a 32-bit system. - */ -#if PAGE_SIZE > 32768 -#define AFS_PRIV_MAX 0xffffffff -#define AFS_PRIV_SHIFT 32 -#else -#define AFS_PRIV_MAX 0xffff -#define AFS_PRIV_SHIFT 16 -#endif - /* * mark a page as having been made dirty and thus needing writeback */ @@ -145,6 +132,8 @@ try_again: priv = (unsigned long)t << AFS_PRIV_SHIFT; priv |= f; + trace_afs_page_dirty(vnode, tracepoint_string("begin"), + page->index, priv); SetPagePrivate(page); set_page_private(page, priv); _leave(" = 0"); @@ -386,6 +375,7 @@ static int afs_write_back_from_locked_page(struct address_space *mapping, struct page *primary_page, pgoff_t final_page) { + struct afs_vnode *vnode = AFS_FS_I(mapping->host); struct page *pages[8], *page; unsigned long count, priv; unsigned n, offset, to, f, t; @@ -407,8 +397,13 @@ static int afs_write_back_from_locked_page(struct address_space *mapping, priv = page_private(primary_page); offset = priv & AFS_PRIV_MAX; to = priv >> AFS_PRIV_SHIFT; + trace_afs_page_dirty(vnode, tracepoint_string("store"), + primary_page->index, priv); WARN_ON(offset == to); + if (offset == to) + trace_afs_page_dirty(vnode, tracepoint_string("WARN"), + primary_page->index, priv); if (start >= final_page || to < PAGE_SIZE) goto no_more; @@ -452,6 +447,9 @@ static int afs_write_back_from_locked_page(struct address_space *mapping, } to = t; + trace_afs_page_dirty(vnode, tracepoint_string("store+"), + page->index, priv); + if (!clear_page_dirty_for_io(page)) BUG(); if (test_set_page_writeback(page)) @@ -657,6 +655,7 @@ int afs_writepages(struct address_space *mapping, void afs_pages_written_back(struct afs_vnode *vnode, struct afs_call *call) { struct pagevec pv; + unsigned long priv; unsigned count, loop; pgoff_t first = call->first, last = call->last; @@ -676,6 +675,9 @@ void afs_pages_written_back(struct afs_vnode *vnode, struct afs_call *call) ASSERTCMP(pv.nr, ==, count); for (loop = 0; loop < count; loop++) { + priv = page_private(pv.pages[loop]); + trace_afs_page_dirty(vnode, tracepoint_string("clear"), + pv.pages[loop]->index, priv); set_page_private(pv.pages[loop], 0); end_page_writeback(pv.pages[loop]); } @@ -783,6 +785,8 @@ int afs_page_mkwrite(struct vm_fault *vmf) priv = (unsigned long)PAGE_SIZE << AFS_PRIV_SHIFT; /* To */ priv |= 0; /* From */ + trace_afs_page_dirty(vnode, tracepoint_string("mkwrite"), + vmf->page->index, priv); SetPagePrivate(vmf->page); set_page_private(vmf->page, priv); @@ -840,9 +844,13 @@ int afs_launder_page(struct page *page) t = priv >> AFS_PRIV_SHIFT; } + trace_afs_page_dirty(vnode, tracepoint_string("launder"), + page->index, priv); ret = afs_store_data(mapping, page->index, page->index, t, f); } + trace_afs_page_dirty(vnode, tracepoint_string("laundered"), + page->index, priv); set_page_private(page, 0); ClearPagePrivate(page); diff --git a/include/trace/events/afs.h b/include/trace/events/afs.h index 9cfb7657b72c..5f4e8193932d 100644 --- a/include/trace/events/afs.h +++ b/include/trace/events/afs.h @@ -402,6 +402,45 @@ TRACE_EVENT(afs_dir_check_failed, __entry->vnode, __entry->off, __entry->i_size) ); +/* + * We use page->private to hold the amount of the page that we've written to, + * splitting the field into two parts. However, we need to represent a range + * 0...PAGE_SIZE inclusive, so we can't support 64K pages on a 32-bit system. + */ +#if PAGE_SIZE > 32768 +#define AFS_PRIV_MAX 0xffffffff +#define AFS_PRIV_SHIFT 32 +#else +#define AFS_PRIV_MAX 0xffff +#define AFS_PRIV_SHIFT 16 +#endif + +TRACE_EVENT(afs_page_dirty, + TP_PROTO(struct afs_vnode *vnode, const char *where, + pgoff_t page, unsigned long priv), + + TP_ARGS(vnode, where, page, priv), + + TP_STRUCT__entry( + __field(struct afs_vnode *, vnode ) + __field(const char *, where ) + __field(pgoff_t, page ) + __field(unsigned long, priv ) + ), + + TP_fast_assign( + __entry->vnode = vnode; + __entry->where = where; + __entry->page = page; + __entry->priv = priv; + ), + + TP_printk("vn=%p %lx %s %lu-%lu", + __entry->vnode, __entry->page, __entry->where, + __entry->priv & AFS_PRIV_MAX, + __entry->priv >> AFS_PRIV_SHIFT) + ); + #endif /* _TRACE_AFS_H */ /* This part must be outside protection */ From 98bf40cd99fcfed0705812b6cbdbb3b441a42970 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 2 Nov 2017 15:27:53 +0000 Subject: [PATCH 35/35] afs: Protect call->state changes against signals Protect call->state changes against the call being prematurely terminated due to a signal. What can happen is that a signal causes afs_wait_for_call_to_complete() to abort an afs_call because it's not yet complete whilst afs_deliver_to_call() is delivering data to that call. If the data delivery causes the state to change, this may overwrite the state of the afs_call, making it not-yet-complete again - but no further notifications will be forthcoming from AF_RXRPC as the rxrpc call has been aborted and completed, so kAFS will just hang in various places waiting for that call or on page bits that need clearing by that call. A tracepoint to monitor call state changes is also provided. Signed-off-by: David Howells --- fs/afs/cmservice.c | 26 +++++----- fs/afs/internal.h | 63 +++++++++++++++++++---- fs/afs/rxrpc.c | 100 ++++++++++++++++++++----------------- include/trace/events/afs.h | 30 +++++++++++ 4 files changed, 150 insertions(+), 69 deletions(-) diff --git a/fs/afs/cmservice.c b/fs/afs/cmservice.c index 5767f540e0e1..41e277f57b20 100644 --- a/fs/afs/cmservice.c +++ b/fs/afs/cmservice.c @@ -188,7 +188,6 @@ static int afs_deliver_cb_callback(struct afs_call *call) switch (call->unmarshall) { case 0: - rxrpc_kernel_get_peer(call->net->socket, call->rxcall, &srx); call->offset = 0; call->unmarshall++; @@ -281,10 +280,12 @@ static int afs_deliver_cb_callback(struct afs_call *call) break; } - call->state = AFS_CALL_REPLYING; + if (!afs_check_call_state(call, AFS_CALL_SV_REPLYING)) + return -EIO; /* we'll need the file server record as that tells us which set of * vnodes to operate upon */ + rxrpc_kernel_get_peer(call->net->socket, call->rxcall, &srx); server = afs_find_server(call->net, &srx); if (!server) return -ENOTCONN; @@ -325,9 +326,6 @@ static int afs_deliver_cb_init_call_back_state(struct afs_call *call) if (ret < 0) return ret; - /* no unmarshalling required */ - call->state = AFS_CALL_REPLYING; - /* we'll need the file server record as that tells us which set of * vnodes to operate upon */ server = afs_find_server(call->net, &srx); @@ -352,8 +350,6 @@ static int afs_deliver_cb_init_call_back_state3(struct afs_call *call) _enter(""); - rxrpc_kernel_get_peer(call->net->socket, call->rxcall, &srx); - _enter("{%u}", call->unmarshall); switch (call->unmarshall) { @@ -397,11 +393,12 @@ static int afs_deliver_cb_init_call_back_state3(struct afs_call *call) break; } - /* no unmarshalling required */ - call->state = AFS_CALL_REPLYING; + if (!afs_check_call_state(call, AFS_CALL_SV_REPLYING)) + return -EIO; /* we'll need the file server record as that tells us which set of * vnodes to operate upon */ + rxrpc_kernel_get_peer(call->net->socket, call->rxcall, &srx); server = afs_find_server(call->net, &srx); if (!server) return -ENOTCONN; @@ -436,8 +433,8 @@ static int afs_deliver_cb_probe(struct afs_call *call) if (ret < 0) return ret; - /* no unmarshalling required */ - call->state = AFS_CALL_REPLYING; + if (!afs_check_call_state(call, AFS_CALL_SV_REPLYING)) + return -EIO; return afs_queue_call_work(call); } @@ -519,7 +516,8 @@ static int afs_deliver_cb_probe_uuid(struct afs_call *call) break; } - call->state = AFS_CALL_REPLYING; + if (!afs_check_call_state(call, AFS_CALL_SV_REPLYING)) + return -EIO; return afs_queue_call_work(call); } @@ -600,8 +598,8 @@ static int afs_deliver_cb_tell_me_about_yourself(struct afs_call *call) if (ret < 0) return ret; - /* no unmarshalling required */ - call->state = AFS_CALL_REPLYING; + if (!afs_check_call_state(call, AFS_CALL_SV_REPLYING)) + return -EIO; return afs_queue_call_work(call); } diff --git a/fs/afs/internal.h b/fs/afs/internal.h index 1de36e6abd5e..bd8dcee7e066 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h @@ -51,13 +51,14 @@ struct afs_iget_data { }; enum afs_call_state { - AFS_CALL_REQUESTING, /* request is being sent for outgoing call */ - AFS_CALL_AWAIT_REPLY, /* awaiting reply to outgoing call */ - AFS_CALL_AWAIT_OP_ID, /* awaiting op ID on incoming call */ - AFS_CALL_AWAIT_REQUEST, /* awaiting request data on incoming call */ - AFS_CALL_REPLYING, /* replying to incoming call */ - AFS_CALL_AWAIT_ACK, /* awaiting final ACK of incoming call */ - AFS_CALL_COMPLETE, /* Completed or failed */ + AFS_CALL_CL_REQUESTING, /* Client: Request is being sent */ + AFS_CALL_CL_AWAIT_REPLY, /* Client: Awaiting reply */ + AFS_CALL_CL_PROC_REPLY, /* Client: rxrpc call complete; processing reply */ + AFS_CALL_SV_AWAIT_OP_ID, /* Server: Awaiting op ID */ + AFS_CALL_SV_AWAIT_REQUEST, /* Server: Awaiting request data */ + AFS_CALL_SV_REPLYING, /* Server: Replying */ + AFS_CALL_SV_AWAIT_ACK, /* Server: Awaiting final ACK */ + AFS_CALL_COMPLETE, /* Completed or failed */ }; /* @@ -97,6 +98,7 @@ struct afs_call { size_t offset; /* offset into received data store */ atomic_t usage; enum afs_call_state state; + spinlock_t state_lock; int error; /* error code */ u32 abort_code; /* Remote abort ID or 0 */ unsigned request_size; /* size of request data */ @@ -543,6 +545,8 @@ struct afs_fs_cursor { #define AFS_FS_CURSOR_NO_VSLEEP 0x0020 /* Set to prevent sleep on VBUSY, VOFFLINE, ... */ }; +#include + /*****************************************************************************/ /* * addr_list.c @@ -788,6 +792,49 @@ static inline int afs_transfer_reply(struct afs_call *call) return afs_extract_data(call, call->buffer, call->reply_max, false); } +static inline bool afs_check_call_state(struct afs_call *call, + enum afs_call_state state) +{ + return READ_ONCE(call->state) == state; +} + +static inline bool afs_set_call_state(struct afs_call *call, + enum afs_call_state from, + enum afs_call_state to) +{ + bool ok = false; + + spin_lock_bh(&call->state_lock); + if (call->state == from) { + call->state = to; + trace_afs_call_state(call, from, to, 0, 0); + ok = true; + } + spin_unlock_bh(&call->state_lock); + return ok; +} + +static inline void afs_set_call_complete(struct afs_call *call, + int error, u32 remote_abort) +{ + enum afs_call_state state; + bool ok = false; + + spin_lock_bh(&call->state_lock); + state = call->state; + if (state != AFS_CALL_COMPLETE) { + call->abort_code = remote_abort; + call->error = error; + call->state = AFS_CALL_COMPLETE; + trace_afs_call_state(call, state, AFS_CALL_COMPLETE, + error, remote_abort); + ok = true; + } + spin_unlock_bh(&call->state_lock); + if (ok) + trace_afs_call_done(call); +} + /* * security.c */ @@ -932,8 +979,6 @@ static inline void afs_check_for_remote_deletion(struct afs_fs_cursor *fc, /* * debug tracing */ -#include - extern unsigned afs_debug; #define dbgprintk(FMT,...) \ diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c index bd44ae8b63d8..ea1460b9b71a 100644 --- a/fs/afs/rxrpc.c +++ b/fs/afs/rxrpc.c @@ -134,6 +134,7 @@ static struct afs_call *afs_alloc_call(struct afs_net *net, atomic_set(&call->usage, 1); INIT_WORK(&call->async_work, afs_process_async_call); init_waitqueue_head(&call->waitq); + spin_lock_init(&call->state_lock); o = atomic_inc_return(&net->nr_outstanding_calls); trace_afs_call(call, afs_call_trace_alloc, 1, o, @@ -288,8 +289,7 @@ static void afs_notify_end_request_tx(struct sock *sock, { struct afs_call *call = (struct afs_call *)call_user_ID; - if (call->state == AFS_CALL_REQUESTING) - call->state = AFS_CALL_AWAIT_REPLY; + afs_set_call_state(call, AFS_CALL_CL_REQUESTING, AFS_CALL_CL_AWAIT_REPLY); } /* @@ -444,82 +444,87 @@ error_kill_call: */ static void afs_deliver_to_call(struct afs_call *call) { - u32 abort_code; + enum afs_call_state state; + u32 abort_code, remote_abort = 0; int ret; _enter("%s", call->type->name); - while (call->state == AFS_CALL_AWAIT_REPLY || - call->state == AFS_CALL_AWAIT_OP_ID || - call->state == AFS_CALL_AWAIT_REQUEST || - call->state == AFS_CALL_AWAIT_ACK + while (state = READ_ONCE(call->state), + state == AFS_CALL_CL_AWAIT_REPLY || + state == AFS_CALL_SV_AWAIT_OP_ID || + state == AFS_CALL_SV_AWAIT_REQUEST || + state == AFS_CALL_SV_AWAIT_ACK ) { - if (call->state == AFS_CALL_AWAIT_ACK) { + if (state == AFS_CALL_SV_AWAIT_ACK) { size_t offset = 0; ret = rxrpc_kernel_recv_data(call->net->socket, call->rxcall, NULL, 0, &offset, false, - &call->abort_code, + &remote_abort, &call->service_id); trace_afs_recv_data(call, 0, offset, false, ret); if (ret == -EINPROGRESS || ret == -EAGAIN) return; - if (ret < 0) - call->error = ret; - if (ret < 0 || ret == 1) + if (ret < 0 || ret == 1) { + if (ret == 1) + ret = 0; goto call_complete; + } return; } ret = call->type->deliver(call); + state = READ_ONCE(call->state); switch (ret) { case 0: - if (call->state == AFS_CALL_AWAIT_REPLY) + if (state == AFS_CALL_CL_PROC_REPLY) goto call_complete; + ASSERTCMP(state, >, AFS_CALL_CL_PROC_REPLY); goto done; case -EINPROGRESS: case -EAGAIN: goto out; + case -EIO: case -ECONNABORTED: - goto save_error; + ASSERTCMP(state, ==, AFS_CALL_COMPLETE); + goto done; case -ENOTCONN: abort_code = RX_CALL_DEAD; rxrpc_kernel_abort_call(call->net->socket, call->rxcall, abort_code, ret, "KNC"); - goto save_error; + goto local_abort; case -ENOTSUPP: abort_code = RXGEN_OPCODE; rxrpc_kernel_abort_call(call->net->socket, call->rxcall, abort_code, ret, "KIV"); - goto save_error; + goto local_abort; case -ENODATA: case -EBADMSG: case -EMSGSIZE: default: abort_code = RXGEN_CC_UNMARSHAL; - if (call->state != AFS_CALL_AWAIT_REPLY) + if (state != AFS_CALL_CL_AWAIT_REPLY) abort_code = RXGEN_SS_UNMARSHAL; rxrpc_kernel_abort_call(call->net->socket, call->rxcall, abort_code, -EBADMSG, "KUM"); - goto save_error; + goto local_abort; } } done: - if (call->state == AFS_CALL_COMPLETE && call->incoming) + if (state == AFS_CALL_COMPLETE && call->incoming) afs_put_call(call); out: _leave(""); return; -save_error: - call->error = ret; +local_abort: + abort_code = 0; call_complete: - if (call->state != AFS_CALL_COMPLETE) { - call->state = AFS_CALL_COMPLETE; - trace_afs_call_done(call); - } + afs_set_call_complete(call, ret, remote_abort); + state = AFS_CALL_COMPLETE; goto done; } @@ -551,14 +556,15 @@ static long afs_wait_for_call_to_complete(struct afs_call *call, set_current_state(TASK_UNINTERRUPTIBLE); /* deliver any messages that are in the queue */ - if (call->state < AFS_CALL_COMPLETE && call->need_attention) { + if (!afs_check_call_state(call, AFS_CALL_COMPLETE) && + call->need_attention) { call->need_attention = false; __set_current_state(TASK_RUNNING); afs_deliver_to_call(call); continue; } - if (call->state == AFS_CALL_COMPLETE) + if (afs_check_call_state(call, AFS_CALL_COMPLETE)) break; life = rxrpc_kernel_check_life(call->net->socket, call->rxcall); @@ -578,17 +584,17 @@ static long afs_wait_for_call_to_complete(struct afs_call *call, __set_current_state(TASK_RUNNING); /* Kill off the call if it's still live. */ - if (call->state < AFS_CALL_COMPLETE) { + if (!afs_check_call_state(call, AFS_CALL_COMPLETE)) { _debug("call interrupted"); if (rxrpc_kernel_abort_call(call->net->socket, call->rxcall, - RX_USER_ABORT, -EINTR, "KWI")) { - call->error = -ERESTARTSYS; - trace_afs_call_done(call); - } + RX_USER_ABORT, -EINTR, "KWI")) + afs_set_call_complete(call, -EINTR, 0); } + spin_lock_bh(&call->state_lock); ac->abort_code = call->abort_code; ac->error = call->error; + spin_unlock_bh(&call->state_lock); ret = ac->error; switch (ret) { @@ -713,7 +719,7 @@ void afs_charge_preallocation(struct work_struct *work) break; call->async = true; - call->state = AFS_CALL_AWAIT_OP_ID; + call->state = AFS_CALL_SV_AWAIT_OP_ID; init_waitqueue_head(&call->waitq); } @@ -769,7 +775,7 @@ static int afs_deliver_cm_op_id(struct afs_call *call) return ret; call->operation_ID = ntohl(call->tmp); - call->state = AFS_CALL_AWAIT_REQUEST; + afs_set_call_state(call, AFS_CALL_SV_AWAIT_OP_ID, AFS_CALL_SV_AWAIT_REQUEST); call->offset = 0; /* ask the cache manager to route the call (it'll change the call type @@ -794,8 +800,7 @@ static void afs_notify_end_reply_tx(struct sock *sock, { struct afs_call *call = (struct afs_call *)call_user_ID; - if (call->state == AFS_CALL_REPLYING) - call->state = AFS_CALL_AWAIT_ACK; + afs_set_call_state(call, AFS_CALL_SV_REPLYING, AFS_CALL_SV_AWAIT_ACK); } /* @@ -879,6 +884,8 @@ int afs_extract_data(struct afs_call *call, void *buf, size_t count, bool want_more) { struct afs_net *net = call->net; + enum afs_call_state state; + u32 remote_abort; int ret; _enter("{%s,%zu},,%zu,%d", @@ -888,29 +895,30 @@ int afs_extract_data(struct afs_call *call, void *buf, size_t count, ret = rxrpc_kernel_recv_data(net->socket, call->rxcall, buf, count, &call->offset, - want_more, &call->abort_code, + want_more, &remote_abort, &call->service_id); trace_afs_recv_data(call, count, call->offset, want_more, ret); if (ret == 0 || ret == -EAGAIN) return ret; + state = READ_ONCE(call->state); if (ret == 1) { - switch (call->state) { - case AFS_CALL_AWAIT_REPLY: - call->state = AFS_CALL_COMPLETE; - trace_afs_call_done(call); + switch (state) { + case AFS_CALL_CL_AWAIT_REPLY: + afs_set_call_state(call, state, AFS_CALL_CL_PROC_REPLY); break; - case AFS_CALL_AWAIT_REQUEST: - call->state = AFS_CALL_REPLYING; + case AFS_CALL_SV_AWAIT_REQUEST: + afs_set_call_state(call, state, AFS_CALL_SV_REPLYING); break; + case AFS_CALL_COMPLETE: + kdebug("prem complete %d", call->error); + return -EIO; default: break; } return 0; } - call->error = ret; - call->state = AFS_CALL_COMPLETE; - trace_afs_call_done(call); + afs_set_call_complete(call, ret, remote_abort); return ret; } diff --git a/include/trace/events/afs.h b/include/trace/events/afs.h index 5f4e8193932d..6b59c63a8e51 100644 --- a/include/trace/events/afs.h +++ b/include/trace/events/afs.h @@ -441,6 +441,36 @@ TRACE_EVENT(afs_page_dirty, __entry->priv >> AFS_PRIV_SHIFT) ); +TRACE_EVENT(afs_call_state, + TP_PROTO(struct afs_call *call, + enum afs_call_state from, + enum afs_call_state to, + int ret, u32 remote_abort), + + TP_ARGS(call, from, to, ret, remote_abort), + + TP_STRUCT__entry( + __field(struct afs_call *, call ) + __field(enum afs_call_state, from ) + __field(enum afs_call_state, to ) + __field(int, ret ) + __field(u32, abort ) + ), + + TP_fast_assign( + __entry->call = call; + __entry->from = from; + __entry->to = to; + __entry->ret = ret; + __entry->abort = remote_abort; + ), + + TP_printk("c=%p %u->%u r=%d ab=%d", + __entry->call, + __entry->from, __entry->to, + __entry->ret, __entry->abort) + ); + #endif /* _TRACE_AFS_H */ /* This part must be outside protection */