2005-04-17 06:20:36 +08:00
|
|
|
#ifndef _NFS_FS_SB
|
|
|
|
#define _NFS_FS_SB
|
|
|
|
|
|
|
|
#include <linux/list.h>
|
|
|
|
#include <linux/backing-dev.h>
|
2012-01-18 11:04:24 +08:00
|
|
|
#include <linux/idr.h>
|
2007-11-08 17:05:04 +08:00
|
|
|
#include <linux/wait.h>
|
2009-04-01 21:21:53 +08:00
|
|
|
#include <linux/nfs_xdr.h>
|
|
|
|
#include <linux/sunrpc/xprt.h>
|
2007-11-08 17:05:04 +08:00
|
|
|
|
2011-07-27 07:09:06 +08:00
|
|
|
#include <linux/atomic.h>
|
2005-04-17 06:20:36 +08:00
|
|
|
|
2009-04-01 21:21:53 +08:00
|
|
|
struct nfs4_session;
|
2006-03-21 02:44:13 +08:00
|
|
|
struct nfs_iostats;
|
2008-01-12 06:09:52 +08:00
|
|
|
struct nlm_host;
|
2009-04-01 21:22:03 +08:00
|
|
|
struct nfs4_sequence_args;
|
|
|
|
struct nfs4_sequence_res;
|
|
|
|
struct nfs_server;
|
2010-06-16 21:52:26 +08:00
|
|
|
struct nfs4_minor_version_ops;
|
2012-05-22 10:44:31 +08:00
|
|
|
struct nfs41_server_scope;
|
2012-02-18 04:20:26 +08:00
|
|
|
struct nfs41_impl_id;
|
2006-03-21 02:44:13 +08:00
|
|
|
|
2006-08-23 08:06:10 +08:00
|
|
|
/*
|
|
|
|
* The nfs_client identifies our client state to the server.
|
|
|
|
*/
|
|
|
|
struct nfs_client {
|
|
|
|
atomic_t cl_count;
|
|
|
|
int cl_cons_state; /* current construction state (-ve: init error) */
|
|
|
|
#define NFS_CS_READY 0 /* ready to be used */
|
|
|
|
#define NFS_CS_INITING 1 /* busy initialising */
|
2009-04-01 21:22:38 +08:00
|
|
|
#define NFS_CS_SESSION_INITING 2 /* busy initialising session */
|
2006-08-23 08:06:10 +08:00
|
|
|
unsigned long cl_res_state; /* NFS resources state */
|
|
|
|
#define NFS_CS_CALLBACK 1 /* - callback started */
|
|
|
|
#define NFS_CS_IDMAP 2 /* - idmap started */
|
2006-08-24 13:03:05 +08:00
|
|
|
#define NFS_CS_RENEWD 3 /* - renewd started */
|
2011-03-01 09:34:10 +08:00
|
|
|
#define NFS_CS_STOP_RENEW 4 /* no more state to renew */
|
2011-03-01 09:34:11 +08:00
|
|
|
#define NFS_CS_CHECK_LEASE_TIME 5 /* need to check lease time */
|
2012-05-22 10:46:07 +08:00
|
|
|
unsigned long cl_flags; /* behavior switches */
|
|
|
|
#define NFS_CS_NORESVPORT 0 /* - use ephemeral src port */
|
|
|
|
#define NFS_CS_DISCRTRY 1 /* - disconnect on RPC retry */
|
2007-12-11 03:58:15 +08:00
|
|
|
struct sockaddr_storage cl_addr; /* server identifier */
|
|
|
|
size_t cl_addrlen;
|
2006-08-23 08:06:10 +08:00
|
|
|
char * cl_hostname; /* hostname of server */
|
|
|
|
struct list_head cl_share_link; /* link in global client list */
|
|
|
|
struct list_head cl_superblocks; /* List of nfs_server structs */
|
|
|
|
|
|
|
|
struct rpc_clnt * cl_rpcclient;
|
2006-08-23 08:06:12 +08:00
|
|
|
const struct nfs_rpc_ops *rpc_ops; /* NFS protocol vector */
|
2008-01-04 05:29:06 +08:00
|
|
|
int cl_proto; /* Network transport protocol */
|
2006-08-23 08:06:10 +08:00
|
|
|
|
2009-04-01 21:21:49 +08:00
|
|
|
u32 cl_minorversion;/* NFSv4 minorversion */
|
2008-04-08 08:50:11 +08:00
|
|
|
struct rpc_cred *cl_machine_cred;
|
|
|
|
|
2006-08-23 08:06:10 +08:00
|
|
|
#ifdef CONFIG_NFS_V4
|
|
|
|
u64 cl_clientid; /* constant */
|
2011-04-25 02:28:18 +08:00
|
|
|
nfs4_verifier cl_confirm; /* Clientid verifier */
|
2006-08-23 08:06:10 +08:00
|
|
|
unsigned long cl_state;
|
|
|
|
|
|
|
|
spinlock_t cl_lock;
|
|
|
|
|
|
|
|
unsigned long cl_lease_time;
|
|
|
|
unsigned long cl_last_renewal;
|
2006-11-22 22:54:01 +08:00
|
|
|
struct delayed_work cl_renewd;
|
2006-08-23 08:06:10 +08:00
|
|
|
|
|
|
|
struct rpc_wait_queue cl_rpcwaitq;
|
|
|
|
|
|
|
|
/* idmapper */
|
|
|
|
struct idmap * cl_idmap;
|
|
|
|
|
|
|
|
/* Our own IP address, as a null-terminated string.
|
|
|
|
* This is used to generate the clientid, and the callback address.
|
|
|
|
*/
|
2007-12-11 03:57:01 +08:00
|
|
|
char cl_ipaddr[48];
|
2006-08-23 08:06:10 +08:00
|
|
|
unsigned char cl_id_uniquifier;
|
2011-01-06 10:04:30 +08:00
|
|
|
u32 cl_cb_ident; /* v4.0 callback identifier */
|
2010-06-16 21:52:26 +08:00
|
|
|
const struct nfs4_minor_version_ops *cl_mvops;
|
2009-04-03 23:42:42 +08:00
|
|
|
|
2009-04-01 21:22:29 +08:00
|
|
|
/* The sequence id to use for the next CREATE_SESSION */
|
|
|
|
u32 cl_seqid;
|
|
|
|
/* The flags used for obtaining the clientid during EXCHANGE_ID */
|
|
|
|
u32 cl_exchange_flags;
|
2012-05-22 10:44:22 +08:00
|
|
|
struct nfs4_session *cl_session; /* shared session */
|
2012-05-22 10:46:16 +08:00
|
|
|
struct nfs41_server_owner *cl_serverowner;
|
2012-05-22 10:44:31 +08:00
|
|
|
struct nfs41_server_scope *cl_serverscope;
|
2012-05-22 10:44:41 +08:00
|
|
|
struct nfs41_impl_id *cl_implid;
|
2011-03-10 05:00:53 +08:00
|
|
|
#endif /* CONFIG_NFS_V4 */
|
2009-04-01 21:21:53 +08:00
|
|
|
|
2009-04-03 23:42:42 +08:00
|
|
|
#ifdef CONFIG_NFS_FSCACHE
|
|
|
|
struct fscache_cookie *fscache; /* client index cache cookie */
|
|
|
|
#endif
|
2011-06-01 07:05:47 +08:00
|
|
|
|
2012-05-22 10:44:50 +08:00
|
|
|
struct net *cl_net;
|
2006-08-23 08:06:10 +08:00
|
|
|
};
|
|
|
|
|
2005-04-17 06:20:36 +08:00
|
|
|
/*
|
|
|
|
* NFS client parameters stored in the superblock.
|
|
|
|
*/
|
|
|
|
struct nfs_server {
|
2006-08-23 08:06:11 +08:00
|
|
|
struct nfs_client * nfs_client; /* shared client and NFS4 state */
|
NFS: Share NFS superblocks per-protocol per-server per-FSID
The attached patch makes NFS share superblocks between mounts from the same
server and FSID over the same protocol.
It does this by creating each superblock with a false root and returning the
real root dentry in the vfsmount presented by get_sb(). The root dentry set
starts off as an anonymous dentry if we don't already have the dentry for its
inode, otherwise it simply returns the dentry we already have.
We may thus end up with several trees of dentries in the superblock, and if at
some later point one of anonymous tree roots is discovered by normal filesystem
activity to be located in another tree within the superblock, the anonymous
root is named and materialises attached to the second tree at the appropriate
point.
Why do it this way? Why not pass an extra argument to the mount() syscall to
indicate the subpath and then pathwalk from the server root to the desired
directory? You can't guarantee this will work for two reasons:
(1) The root and intervening nodes may not be accessible to the client.
With NFS2 and NFS3, for instance, mountd is called on the server to get
the filehandle for the tip of a path. mountd won't give us handles for
anything we don't have permission to access, and so we can't set up NFS
inodes for such nodes, and so can't easily set up dentries (we'd have to
have ghost inodes or something).
With this patch we don't actually create dentries until we get handles
from the server that we can use to set up their inodes, and we don't
actually bind them into the tree until we know for sure where they go.
(2) Inaccessible symbolic links.
If we're asked to mount two exports from the server, eg:
mount warthog:/warthog/aaa/xxx /mmm
mount warthog:/warthog/bbb/yyy /nnn
We may not be able to access anything nearer the root than xxx and yyy,
but we may find out later that /mmm/www/yyy, say, is actually the same
directory as the one mounted on /nnn. What we might then find out, for
example, is that /warthog/bbb was actually a symbolic link to
/warthog/aaa/xxx/www, but we can't actually determine that by talking to
the server until /warthog is made available by NFS.
This would lead to having constructed an errneous dentry tree which we
can't easily fix. We can end up with a dentry marked as a directory when
it should actually be a symlink, or we could end up with an apparently
hardlinked directory.
With this patch we need not make assumptions about the type of a dentry
for which we can't retrieve information, nor need we assume we know its
place in the grand scheme of things until we actually see that place.
This patch reduces the possibility of aliasing in the inode and page caches for
inodes that may be accessed by more than one NFS export. It also reduces the
number of superblocks required for NFS where there are many NFS exports being
used from a server (home directory server + autofs for example).
This in turn makes it simpler to do local caching of network filesystems, as it
can then be guaranteed that there won't be links from multiple inodes in
separate superblocks to the same cache file.
Obviously, cache aliasing between different levels of NFS protocol could still
be a problem, but at least that gives us another key to use when indexing the
cache.
This patch makes the following changes:
(1) The server record construction/destruction has been abstracted out into
its own set of functions to make things easier to get right. These have
been moved into fs/nfs/client.c.
All the code in fs/nfs/client.c has to do with the management of
connections to servers, and doesn't touch superblocks in any way; the
remaining code in fs/nfs/super.c has to do with VFS superblock management.
(2) The sequence of events undertaken by NFS mount is now reordered:
(a) A volume representation (struct nfs_server) is allocated.
(b) A server representation (struct nfs_client) is acquired. This may be
allocated or shared, and is keyed on server address, port and NFS
version.
(c) If allocated, the client representation is initialised. The state
member variable of nfs_client is used to prevent a race during
initialisation from two mounts.
(d) For NFS4 a simple pathwalk is performed, walking from FH to FH to find
the root filehandle for the mount (fs/nfs/getroot.c). For NFS2/3 we
are given the root FH in advance.
(e) The volume FSID is probed for on the root FH.
(f) The volume representation is initialised from the FSINFO record
retrieved on the root FH.
(g) sget() is called to acquire a superblock. This may be allocated or
shared, keyed on client pointer and FSID.
(h) If allocated, the superblock is initialised.
(i) If the superblock is shared, then the new nfs_server record is
discarded.
(j) The root dentry for this mount is looked up from the root FH.
(k) The root dentry for this mount is assigned to the vfsmount.
(3) nfs_readdir_lookup() creates dentries for each of the entries readdir()
returns; this function now attaches disconnected trees from alternate
roots that happen to be discovered attached to a directory being read (in
the same way nfs_lookup() is made to do for lookup ops).
The new d_materialise_unique() function is now used to do this, thus
permitting the whole thing to be done under one set of locks, and thus
avoiding any race between mount and lookup operations on the same
directory.
(4) The client management code uses a new debug facility: NFSDBG_CLIENT which
is set by echoing 1024 to /proc/net/sunrpc/nfs_debug.
(5) Clone mounts are now called xdev mounts.
(6) Use the dentry passed to the statfs() op as the handle for retrieving fs
statistics rather than the root dentry of the superblock (which is now a
dummy).
Signed-Off-By: David Howells <dhowells@redhat.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
2006-08-23 08:06:13 +08:00
|
|
|
struct list_head client_link; /* List of other nfs_server structs
|
|
|
|
* that share the same client
|
|
|
|
*/
|
|
|
|
struct list_head master_link; /* link in master servers list */
|
2005-04-17 06:20:36 +08:00
|
|
|
struct rpc_clnt * client; /* RPC client handle */
|
2005-06-23 01:16:27 +08:00
|
|
|
struct rpc_clnt * client_acl; /* ACL RPC client handle */
|
2008-01-12 06:09:52 +08:00
|
|
|
struct nlm_host *nlm_host; /* NLM client handle */
|
2010-02-02 13:39:01 +08:00
|
|
|
struct nfs_iostats __percpu *io_stats; /* I/O statistics */
|
2005-04-17 06:20:36 +08:00
|
|
|
struct backing_dev_info backing_dev_info;
|
2007-05-08 15:35:12 +08:00
|
|
|
atomic_long_t writeback; /* number of writeback pages */
|
2005-04-17 06:20:36 +08:00
|
|
|
int flags; /* various flags */
|
|
|
|
unsigned int caps; /* server capabilities */
|
|
|
|
unsigned int rsize; /* read size */
|
|
|
|
unsigned int rpages; /* read size (in pages) */
|
|
|
|
unsigned int wsize; /* write size */
|
|
|
|
unsigned int wpages; /* write size (in pages) */
|
|
|
|
unsigned int wtmult; /* server disk block size */
|
|
|
|
unsigned int dtsize; /* readdir size */
|
2008-03-15 02:10:22 +08:00
|
|
|
unsigned short port; /* "port=" setting */
|
2005-04-17 06:20:36 +08:00
|
|
|
unsigned int bsize; /* server block size */
|
|
|
|
unsigned int acregmin; /* attr cache timeouts */
|
|
|
|
unsigned int acregmax;
|
|
|
|
unsigned int acdirmin;
|
|
|
|
unsigned int acdirmax;
|
|
|
|
unsigned int namelen;
|
2009-04-03 23:42:42 +08:00
|
|
|
unsigned int options; /* extra options enabled by mount */
|
|
|
|
#define NFS_OPTION_FSCACHE 0x00000001 /* - local caching enabled */
|
NFS: Share NFS superblocks per-protocol per-server per-FSID
The attached patch makes NFS share superblocks between mounts from the same
server and FSID over the same protocol.
It does this by creating each superblock with a false root and returning the
real root dentry in the vfsmount presented by get_sb(). The root dentry set
starts off as an anonymous dentry if we don't already have the dentry for its
inode, otherwise it simply returns the dentry we already have.
We may thus end up with several trees of dentries in the superblock, and if at
some later point one of anonymous tree roots is discovered by normal filesystem
activity to be located in another tree within the superblock, the anonymous
root is named and materialises attached to the second tree at the appropriate
point.
Why do it this way? Why not pass an extra argument to the mount() syscall to
indicate the subpath and then pathwalk from the server root to the desired
directory? You can't guarantee this will work for two reasons:
(1) The root and intervening nodes may not be accessible to the client.
With NFS2 and NFS3, for instance, mountd is called on the server to get
the filehandle for the tip of a path. mountd won't give us handles for
anything we don't have permission to access, and so we can't set up NFS
inodes for such nodes, and so can't easily set up dentries (we'd have to
have ghost inodes or something).
With this patch we don't actually create dentries until we get handles
from the server that we can use to set up their inodes, and we don't
actually bind them into the tree until we know for sure where they go.
(2) Inaccessible symbolic links.
If we're asked to mount two exports from the server, eg:
mount warthog:/warthog/aaa/xxx /mmm
mount warthog:/warthog/bbb/yyy /nnn
We may not be able to access anything nearer the root than xxx and yyy,
but we may find out later that /mmm/www/yyy, say, is actually the same
directory as the one mounted on /nnn. What we might then find out, for
example, is that /warthog/bbb was actually a symbolic link to
/warthog/aaa/xxx/www, but we can't actually determine that by talking to
the server until /warthog is made available by NFS.
This would lead to having constructed an errneous dentry tree which we
can't easily fix. We can end up with a dentry marked as a directory when
it should actually be a symlink, or we could end up with an apparently
hardlinked directory.
With this patch we need not make assumptions about the type of a dentry
for which we can't retrieve information, nor need we assume we know its
place in the grand scheme of things until we actually see that place.
This patch reduces the possibility of aliasing in the inode and page caches for
inodes that may be accessed by more than one NFS export. It also reduces the
number of superblocks required for NFS where there are many NFS exports being
used from a server (home directory server + autofs for example).
This in turn makes it simpler to do local caching of network filesystems, as it
can then be guaranteed that there won't be links from multiple inodes in
separate superblocks to the same cache file.
Obviously, cache aliasing between different levels of NFS protocol could still
be a problem, but at least that gives us another key to use when indexing the
cache.
This patch makes the following changes:
(1) The server record construction/destruction has been abstracted out into
its own set of functions to make things easier to get right. These have
been moved into fs/nfs/client.c.
All the code in fs/nfs/client.c has to do with the management of
connections to servers, and doesn't touch superblocks in any way; the
remaining code in fs/nfs/super.c has to do with VFS superblock management.
(2) The sequence of events undertaken by NFS mount is now reordered:
(a) A volume representation (struct nfs_server) is allocated.
(b) A server representation (struct nfs_client) is acquired. This may be
allocated or shared, and is keyed on server address, port and NFS
version.
(c) If allocated, the client representation is initialised. The state
member variable of nfs_client is used to prevent a race during
initialisation from two mounts.
(d) For NFS4 a simple pathwalk is performed, walking from FH to FH to find
the root filehandle for the mount (fs/nfs/getroot.c). For NFS2/3 we
are given the root FH in advance.
(e) The volume FSID is probed for on the root FH.
(f) The volume representation is initialised from the FSINFO record
retrieved on the root FH.
(g) sget() is called to acquire a superblock. This may be allocated or
shared, keyed on client pointer and FSID.
(h) If allocated, the superblock is initialised.
(i) If the superblock is shared, then the new nfs_server record is
discarded.
(j) The root dentry for this mount is looked up from the root FH.
(k) The root dentry for this mount is assigned to the vfsmount.
(3) nfs_readdir_lookup() creates dentries for each of the entries readdir()
returns; this function now attaches disconnected trees from alternate
roots that happen to be discovered attached to a directory being read (in
the same way nfs_lookup() is made to do for lookup ops).
The new d_materialise_unique() function is now used to do this, thus
permitting the whole thing to be done under one set of locks, and thus
avoiding any race between mount and lookup operations on the same
directory.
(4) The client management code uses a new debug facility: NFSDBG_CLIENT which
is set by echoing 1024 to /proc/net/sunrpc/nfs_debug.
(5) Clone mounts are now called xdev mounts.
(6) Use the dentry passed to the statfs() op as the handle for retrieving fs
statistics rather than the root dentry of the superblock (which is now a
dummy).
Signed-Off-By: David Howells <dhowells@redhat.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
2006-08-23 08:06:13 +08:00
|
|
|
|
2006-06-09 21:34:19 +08:00
|
|
|
struct nfs_fsid fsid;
|
NFS: Share NFS superblocks per-protocol per-server per-FSID
The attached patch makes NFS share superblocks between mounts from the same
server and FSID over the same protocol.
It does this by creating each superblock with a false root and returning the
real root dentry in the vfsmount presented by get_sb(). The root dentry set
starts off as an anonymous dentry if we don't already have the dentry for its
inode, otherwise it simply returns the dentry we already have.
We may thus end up with several trees of dentries in the superblock, and if at
some later point one of anonymous tree roots is discovered by normal filesystem
activity to be located in another tree within the superblock, the anonymous
root is named and materialises attached to the second tree at the appropriate
point.
Why do it this way? Why not pass an extra argument to the mount() syscall to
indicate the subpath and then pathwalk from the server root to the desired
directory? You can't guarantee this will work for two reasons:
(1) The root and intervening nodes may not be accessible to the client.
With NFS2 and NFS3, for instance, mountd is called on the server to get
the filehandle for the tip of a path. mountd won't give us handles for
anything we don't have permission to access, and so we can't set up NFS
inodes for such nodes, and so can't easily set up dentries (we'd have to
have ghost inodes or something).
With this patch we don't actually create dentries until we get handles
from the server that we can use to set up their inodes, and we don't
actually bind them into the tree until we know for sure where they go.
(2) Inaccessible symbolic links.
If we're asked to mount two exports from the server, eg:
mount warthog:/warthog/aaa/xxx /mmm
mount warthog:/warthog/bbb/yyy /nnn
We may not be able to access anything nearer the root than xxx and yyy,
but we may find out later that /mmm/www/yyy, say, is actually the same
directory as the one mounted on /nnn. What we might then find out, for
example, is that /warthog/bbb was actually a symbolic link to
/warthog/aaa/xxx/www, but we can't actually determine that by talking to
the server until /warthog is made available by NFS.
This would lead to having constructed an errneous dentry tree which we
can't easily fix. We can end up with a dentry marked as a directory when
it should actually be a symlink, or we could end up with an apparently
hardlinked directory.
With this patch we need not make assumptions about the type of a dentry
for which we can't retrieve information, nor need we assume we know its
place in the grand scheme of things until we actually see that place.
This patch reduces the possibility of aliasing in the inode and page caches for
inodes that may be accessed by more than one NFS export. It also reduces the
number of superblocks required for NFS where there are many NFS exports being
used from a server (home directory server + autofs for example).
This in turn makes it simpler to do local caching of network filesystems, as it
can then be guaranteed that there won't be links from multiple inodes in
separate superblocks to the same cache file.
Obviously, cache aliasing between different levels of NFS protocol could still
be a problem, but at least that gives us another key to use when indexing the
cache.
This patch makes the following changes:
(1) The server record construction/destruction has been abstracted out into
its own set of functions to make things easier to get right. These have
been moved into fs/nfs/client.c.
All the code in fs/nfs/client.c has to do with the management of
connections to servers, and doesn't touch superblocks in any way; the
remaining code in fs/nfs/super.c has to do with VFS superblock management.
(2) The sequence of events undertaken by NFS mount is now reordered:
(a) A volume representation (struct nfs_server) is allocated.
(b) A server representation (struct nfs_client) is acquired. This may be
allocated or shared, and is keyed on server address, port and NFS
version.
(c) If allocated, the client representation is initialised. The state
member variable of nfs_client is used to prevent a race during
initialisation from two mounts.
(d) For NFS4 a simple pathwalk is performed, walking from FH to FH to find
the root filehandle for the mount (fs/nfs/getroot.c). For NFS2/3 we
are given the root FH in advance.
(e) The volume FSID is probed for on the root FH.
(f) The volume representation is initialised from the FSINFO record
retrieved on the root FH.
(g) sget() is called to acquire a superblock. This may be allocated or
shared, keyed on client pointer and FSID.
(h) If allocated, the superblock is initialised.
(i) If the superblock is shared, then the new nfs_server record is
discarded.
(j) The root dentry for this mount is looked up from the root FH.
(k) The root dentry for this mount is assigned to the vfsmount.
(3) nfs_readdir_lookup() creates dentries for each of the entries readdir()
returns; this function now attaches disconnected trees from alternate
roots that happen to be discovered attached to a directory being read (in
the same way nfs_lookup() is made to do for lookup ops).
The new d_materialise_unique() function is now used to do this, thus
permitting the whole thing to be done under one set of locks, and thus
avoiding any race between mount and lookup operations on the same
directory.
(4) The client management code uses a new debug facility: NFSDBG_CLIENT which
is set by echoing 1024 to /proc/net/sunrpc/nfs_debug.
(5) Clone mounts are now called xdev mounts.
(6) Use the dentry passed to the statfs() op as the handle for retrieving fs
statistics rather than the root dentry of the superblock (which is now a
dummy).
Signed-Off-By: David Howells <dhowells@redhat.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
2006-08-23 08:06:13 +08:00
|
|
|
__u64 maxfilesize; /* maximum file size */
|
2010-10-13 07:30:05 +08:00
|
|
|
struct timespec time_delta; /* smallest time granularity */
|
2006-03-21 02:44:15 +08:00
|
|
|
unsigned long mount_time; /* when this fs was mounted */
|
NFS: Share NFS superblocks per-protocol per-server per-FSID
The attached patch makes NFS share superblocks between mounts from the same
server and FSID over the same protocol.
It does this by creating each superblock with a false root and returning the
real root dentry in the vfsmount presented by get_sb(). The root dentry set
starts off as an anonymous dentry if we don't already have the dentry for its
inode, otherwise it simply returns the dentry we already have.
We may thus end up with several trees of dentries in the superblock, and if at
some later point one of anonymous tree roots is discovered by normal filesystem
activity to be located in another tree within the superblock, the anonymous
root is named and materialises attached to the second tree at the appropriate
point.
Why do it this way? Why not pass an extra argument to the mount() syscall to
indicate the subpath and then pathwalk from the server root to the desired
directory? You can't guarantee this will work for two reasons:
(1) The root and intervening nodes may not be accessible to the client.
With NFS2 and NFS3, for instance, mountd is called on the server to get
the filehandle for the tip of a path. mountd won't give us handles for
anything we don't have permission to access, and so we can't set up NFS
inodes for such nodes, and so can't easily set up dentries (we'd have to
have ghost inodes or something).
With this patch we don't actually create dentries until we get handles
from the server that we can use to set up their inodes, and we don't
actually bind them into the tree until we know for sure where they go.
(2) Inaccessible symbolic links.
If we're asked to mount two exports from the server, eg:
mount warthog:/warthog/aaa/xxx /mmm
mount warthog:/warthog/bbb/yyy /nnn
We may not be able to access anything nearer the root than xxx and yyy,
but we may find out later that /mmm/www/yyy, say, is actually the same
directory as the one mounted on /nnn. What we might then find out, for
example, is that /warthog/bbb was actually a symbolic link to
/warthog/aaa/xxx/www, but we can't actually determine that by talking to
the server until /warthog is made available by NFS.
This would lead to having constructed an errneous dentry tree which we
can't easily fix. We can end up with a dentry marked as a directory when
it should actually be a symlink, or we could end up with an apparently
hardlinked directory.
With this patch we need not make assumptions about the type of a dentry
for which we can't retrieve information, nor need we assume we know its
place in the grand scheme of things until we actually see that place.
This patch reduces the possibility of aliasing in the inode and page caches for
inodes that may be accessed by more than one NFS export. It also reduces the
number of superblocks required for NFS where there are many NFS exports being
used from a server (home directory server + autofs for example).
This in turn makes it simpler to do local caching of network filesystems, as it
can then be guaranteed that there won't be links from multiple inodes in
separate superblocks to the same cache file.
Obviously, cache aliasing between different levels of NFS protocol could still
be a problem, but at least that gives us another key to use when indexing the
cache.
This patch makes the following changes:
(1) The server record construction/destruction has been abstracted out into
its own set of functions to make things easier to get right. These have
been moved into fs/nfs/client.c.
All the code in fs/nfs/client.c has to do with the management of
connections to servers, and doesn't touch superblocks in any way; the
remaining code in fs/nfs/super.c has to do with VFS superblock management.
(2) The sequence of events undertaken by NFS mount is now reordered:
(a) A volume representation (struct nfs_server) is allocated.
(b) A server representation (struct nfs_client) is acquired. This may be
allocated or shared, and is keyed on server address, port and NFS
version.
(c) If allocated, the client representation is initialised. The state
member variable of nfs_client is used to prevent a race during
initialisation from two mounts.
(d) For NFS4 a simple pathwalk is performed, walking from FH to FH to find
the root filehandle for the mount (fs/nfs/getroot.c). For NFS2/3 we
are given the root FH in advance.
(e) The volume FSID is probed for on the root FH.
(f) The volume representation is initialised from the FSINFO record
retrieved on the root FH.
(g) sget() is called to acquire a superblock. This may be allocated or
shared, keyed on client pointer and FSID.
(h) If allocated, the superblock is initialised.
(i) If the superblock is shared, then the new nfs_server record is
discarded.
(j) The root dentry for this mount is looked up from the root FH.
(k) The root dentry for this mount is assigned to the vfsmount.
(3) nfs_readdir_lookup() creates dentries for each of the entries readdir()
returns; this function now attaches disconnected trees from alternate
roots that happen to be discovered attached to a directory being read (in
the same way nfs_lookup() is made to do for lookup ops).
The new d_materialise_unique() function is now used to do this, thus
permitting the whole thing to be done under one set of locks, and thus
avoiding any race between mount and lookup operations on the same
directory.
(4) The client management code uses a new debug facility: NFSDBG_CLIENT which
is set by echoing 1024 to /proc/net/sunrpc/nfs_debug.
(5) Clone mounts are now called xdev mounts.
(6) Use the dentry passed to the statfs() op as the handle for retrieving fs
statistics rather than the root dentry of the superblock (which is now a
dummy).
Signed-Off-By: David Howells <dhowells@redhat.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
2006-08-23 08:06:13 +08:00
|
|
|
dev_t s_dev; /* superblock dev numbers */
|
|
|
|
|
2009-04-03 23:42:42 +08:00
|
|
|
#ifdef CONFIG_NFS_FSCACHE
|
|
|
|
struct nfs_fscache_key *fscache_key; /* unique key for superblock */
|
|
|
|
struct fscache_cookie *fscache; /* superblock cookie */
|
|
|
|
#endif
|
|
|
|
|
2011-08-01 04:39:04 +08:00
|
|
|
u32 pnfs_blksize; /* layout_blksize attr */
|
2005-04-17 06:20:36 +08:00
|
|
|
#ifdef CONFIG_NFS_V4
|
2011-07-31 08:52:37 +08:00
|
|
|
u32 attr_bitmask[3];/* V4 bitmask representing the set
|
2005-04-17 06:20:36 +08:00
|
|
|
of attributes supported on this
|
|
|
|
filesystem */
|
2009-03-12 02:10:28 +08:00
|
|
|
u32 cache_consistency_bitmask[2];
|
|
|
|
/* V4 bitmask representing the subset
|
|
|
|
of change attribute, size, ctime
|
|
|
|
and mtime attributes supported by
|
|
|
|
the server */
|
2005-04-17 06:20:36 +08:00
|
|
|
u32 acl_bitmask; /* V4 bitmask representing the ACEs
|
|
|
|
that are supported on this
|
|
|
|
filesystem */
|
2012-03-02 06:02:05 +08:00
|
|
|
u32 fh_expire_type; /* V4 bitmask representing file
|
|
|
|
handle volatility type for
|
|
|
|
this filesystem */
|
2010-10-20 12:17:58 +08:00
|
|
|
struct pnfs_layoutdriver_type *pnfs_curr_ld; /* Active layout driver */
|
2011-01-06 19:36:32 +08:00
|
|
|
struct rpc_wait_queue roc_rpcwaitq;
|
2011-07-31 08:52:46 +08:00
|
|
|
void *pnfs_ld_data; /* per mount point data */
|
2010-12-24 09:32:43 +08:00
|
|
|
|
|
|
|
/* the following fields are protected by nfs_client->cl_lock */
|
|
|
|
struct rb_root state_owners;
|
2005-04-17 06:20:36 +08:00
|
|
|
#endif
|
2012-01-18 11:04:24 +08:00
|
|
|
struct ida openowner_id;
|
2012-01-18 11:04:25 +08:00
|
|
|
struct ida lockowner_id;
|
NFS: Cache state owners after files are closed
Servers have a finite amount of memory to store NFSv4 open and lock
owners. Moreover, servers may have a difficult time determining when
they can reap their state owner table, thanks to gray areas in the
NFSv4 protocol specification. Thus clients should be careful to reuse
state owners when possible.
Currently Linux is not too careful. When a user has closed all her
files on one mount point, the state owner's reference count goes to
zero, and it is released. The next OPEN allocates a new one. A
workload that serially opens and closes files can run through a large
number of open owners this way.
When a state owner's reference count goes to zero, slap it onto a free
list for that nfs_server, with an expiry time. Garbage collect before
looking for a state owner. This makes state owners for active users
available for re-use.
Now that there can be unused state owners remaining at umount time,
purge the state owner free list when a server is destroyed. Also be
sure not to reclaim unused state owners during state recovery.
This change has benefits for the client as well. For some workloads,
this approach drops the number of OPEN_CONFIRM calls from the same as
the number of OPEN calls, down to just one. This reduces wire traffic
and thus open(2) latency. Before this patch, untarring a kernel
source tarball shows the OPEN_CONFIRM call counter steadily increasing
through the test. With the patch, the OPEN_CONFIRM count remains at 1
throughout the entire untar.
As long as the expiry time is kept short, I don't think garbage
collection should be terribly expensive, although it does bounce the
clp->cl_lock around a bit.
[ At some point we should rationalize the use of the nfs_server
->destroy method. ]
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
[Trond: Fixed a garbage collection race and a few efficiency issues]
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
2011-12-07 05:13:48 +08:00
|
|
|
struct list_head state_owners_lru;
|
2011-06-02 04:44:44 +08:00
|
|
|
struct list_head layouts;
|
2010-12-24 09:33:04 +08:00
|
|
|
struct list_head delegations;
|
NFS: Share NFS superblocks per-protocol per-server per-FSID
The attached patch makes NFS share superblocks between mounts from the same
server and FSID over the same protocol.
It does this by creating each superblock with a false root and returning the
real root dentry in the vfsmount presented by get_sb(). The root dentry set
starts off as an anonymous dentry if we don't already have the dentry for its
inode, otherwise it simply returns the dentry we already have.
We may thus end up with several trees of dentries in the superblock, and if at
some later point one of anonymous tree roots is discovered by normal filesystem
activity to be located in another tree within the superblock, the anonymous
root is named and materialises attached to the second tree at the appropriate
point.
Why do it this way? Why not pass an extra argument to the mount() syscall to
indicate the subpath and then pathwalk from the server root to the desired
directory? You can't guarantee this will work for two reasons:
(1) The root and intervening nodes may not be accessible to the client.
With NFS2 and NFS3, for instance, mountd is called on the server to get
the filehandle for the tip of a path. mountd won't give us handles for
anything we don't have permission to access, and so we can't set up NFS
inodes for such nodes, and so can't easily set up dentries (we'd have to
have ghost inodes or something).
With this patch we don't actually create dentries until we get handles
from the server that we can use to set up their inodes, and we don't
actually bind them into the tree until we know for sure where they go.
(2) Inaccessible symbolic links.
If we're asked to mount two exports from the server, eg:
mount warthog:/warthog/aaa/xxx /mmm
mount warthog:/warthog/bbb/yyy /nnn
We may not be able to access anything nearer the root than xxx and yyy,
but we may find out later that /mmm/www/yyy, say, is actually the same
directory as the one mounted on /nnn. What we might then find out, for
example, is that /warthog/bbb was actually a symbolic link to
/warthog/aaa/xxx/www, but we can't actually determine that by talking to
the server until /warthog is made available by NFS.
This would lead to having constructed an errneous dentry tree which we
can't easily fix. We can end up with a dentry marked as a directory when
it should actually be a symlink, or we could end up with an apparently
hardlinked directory.
With this patch we need not make assumptions about the type of a dentry
for which we can't retrieve information, nor need we assume we know its
place in the grand scheme of things until we actually see that place.
This patch reduces the possibility of aliasing in the inode and page caches for
inodes that may be accessed by more than one NFS export. It also reduces the
number of superblocks required for NFS where there are many NFS exports being
used from a server (home directory server + autofs for example).
This in turn makes it simpler to do local caching of network filesystems, as it
can then be guaranteed that there won't be links from multiple inodes in
separate superblocks to the same cache file.
Obviously, cache aliasing between different levels of NFS protocol could still
be a problem, but at least that gives us another key to use when indexing the
cache.
This patch makes the following changes:
(1) The server record construction/destruction has been abstracted out into
its own set of functions to make things easier to get right. These have
been moved into fs/nfs/client.c.
All the code in fs/nfs/client.c has to do with the management of
connections to servers, and doesn't touch superblocks in any way; the
remaining code in fs/nfs/super.c has to do with VFS superblock management.
(2) The sequence of events undertaken by NFS mount is now reordered:
(a) A volume representation (struct nfs_server) is allocated.
(b) A server representation (struct nfs_client) is acquired. This may be
allocated or shared, and is keyed on server address, port and NFS
version.
(c) If allocated, the client representation is initialised. The state
member variable of nfs_client is used to prevent a race during
initialisation from two mounts.
(d) For NFS4 a simple pathwalk is performed, walking from FH to FH to find
the root filehandle for the mount (fs/nfs/getroot.c). For NFS2/3 we
are given the root FH in advance.
(e) The volume FSID is probed for on the root FH.
(f) The volume representation is initialised from the FSINFO record
retrieved on the root FH.
(g) sget() is called to acquire a superblock. This may be allocated or
shared, keyed on client pointer and FSID.
(h) If allocated, the superblock is initialised.
(i) If the superblock is shared, then the new nfs_server record is
discarded.
(j) The root dentry for this mount is looked up from the root FH.
(k) The root dentry for this mount is assigned to the vfsmount.
(3) nfs_readdir_lookup() creates dentries for each of the entries readdir()
returns; this function now attaches disconnected trees from alternate
roots that happen to be discovered attached to a directory being read (in
the same way nfs_lookup() is made to do for lookup ops).
The new d_materialise_unique() function is now used to do this, thus
permitting the whole thing to be done under one set of locks, and thus
avoiding any race between mount and lookup operations on the same
directory.
(4) The client management code uses a new debug facility: NFSDBG_CLIENT which
is set by echoing 1024 to /proc/net/sunrpc/nfs_debug.
(5) Clone mounts are now called xdev mounts.
(6) Use the dentry passed to the statfs() op as the handle for retrieving fs
statistics rather than the root dentry of the superblock (which is now a
dummy).
Signed-Off-By: David Howells <dhowells@redhat.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
2006-08-23 08:06:13 +08:00
|
|
|
void (*destroy)(struct nfs_server *);
|
2007-11-08 17:05:04 +08:00
|
|
|
|
|
|
|
atomic_t active; /* Keep trace of any activity to this server */
|
2008-03-15 02:10:30 +08:00
|
|
|
|
|
|
|
/* mountd-related mount options */
|
|
|
|
struct sockaddr_storage mountd_address;
|
|
|
|
size_t mountd_addrlen;
|
|
|
|
u32 mountd_version;
|
|
|
|
unsigned short mountd_port;
|
|
|
|
unsigned short mountd_protocol;
|
2005-04-17 06:20:36 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
/* Server capabilities */
|
|
|
|
#define NFS_CAP_READDIRPLUS (1U << 0)
|
|
|
|
#define NFS_CAP_HARDLINKS (1U << 1)
|
|
|
|
#define NFS_CAP_SYMLINKS (1U << 2)
|
|
|
|
#define NFS_CAP_ACLS (1U << 3)
|
|
|
|
#define NFS_CAP_ATOMIC_OPEN (1U << 4)
|
2009-08-10 03:06:19 +08:00
|
|
|
#define NFS_CAP_CHANGE_ATTR (1U << 5)
|
|
|
|
#define NFS_CAP_FILEID (1U << 6)
|
|
|
|
#define NFS_CAP_MODE (1U << 7)
|
|
|
|
#define NFS_CAP_NLINK (1U << 8)
|
|
|
|
#define NFS_CAP_OWNER (1U << 9)
|
|
|
|
#define NFS_CAP_OWNER_GROUP (1U << 10)
|
|
|
|
#define NFS_CAP_ATIME (1U << 11)
|
|
|
|
#define NFS_CAP_CTIME (1U << 12)
|
|
|
|
#define NFS_CAP_MTIME (1U << 13)
|
2010-04-12 04:48:44 +08:00
|
|
|
#define NFS_CAP_POSIX_LOCK (1U << 14)
|
2011-02-23 07:44:32 +08:00
|
|
|
#define NFS_CAP_UIDGID_NOMAP (1U << 15)
|
2005-04-17 06:20:36 +08:00
|
|
|
|
2009-04-01 21:21:53 +08:00
|
|
|
|
|
|
|
/* maximum number of slots to use */
|
2012-02-07 08:50:40 +08:00
|
|
|
#define NFS4_DEF_SLOT_TABLE_SIZE (16U)
|
|
|
|
#define NFS4_MAX_SLOT_TABLE (256U)
|
2012-02-07 08:38:51 +08:00
|
|
|
#define NFS4_NO_SLOT ((u32)-1)
|
2009-04-01 21:21:53 +08:00
|
|
|
|
2011-03-10 05:00:53 +08:00
|
|
|
#if defined(CONFIG_NFS_V4)
|
2009-04-01 21:21:53 +08:00
|
|
|
|
|
|
|
/* Sessions */
|
2012-02-07 08:50:40 +08:00
|
|
|
#define SLOT_TABLE_SZ DIV_ROUND_UP(NFS4_MAX_SLOT_TABLE, 8*sizeof(long))
|
2009-04-01 21:21:53 +08:00
|
|
|
struct nfs4_slot_table {
|
|
|
|
struct nfs4_slot *slots; /* seqid per slot */
|
|
|
|
unsigned long used_slots[SLOT_TABLE_SZ]; /* used/unused bitmap */
|
|
|
|
spinlock_t slot_tbl_lock;
|
|
|
|
struct rpc_wait_queue slot_tbl_waitq; /* allocators may wait here */
|
2012-02-07 08:38:51 +08:00
|
|
|
u32 max_slots; /* # slots in table */
|
|
|
|
u32 highest_used_slotid; /* sent to server on each SEQ.
|
2009-04-01 21:21:53 +08:00
|
|
|
* op for dynamic resizing */
|
2012-02-07 08:38:51 +08:00
|
|
|
u32 target_max_slots; /* Set by CB_RECALL_SLOT as
|
2010-01-21 05:06:27 +08:00
|
|
|
* the new max_slots */
|
2011-01-06 10:04:34 +08:00
|
|
|
struct completion complete;
|
2009-04-01 21:21:53 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
static inline int slot_idx(struct nfs4_slot_table *tbl, struct nfs4_slot *sp)
|
|
|
|
{
|
|
|
|
return sp - tbl->slots;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Session related parameters
|
|
|
|
*/
|
|
|
|
struct nfs4_session {
|
|
|
|
struct nfs4_sessionid sess_id;
|
|
|
|
u32 flags;
|
|
|
|
unsigned long session_state;
|
|
|
|
u32 hash_alg;
|
|
|
|
u32 ssv_len;
|
|
|
|
|
|
|
|
/* The fore and back channel */
|
|
|
|
struct nfs4_channel_attrs fc_attrs;
|
|
|
|
struct nfs4_slot_table fc_slot_table;
|
|
|
|
struct nfs4_channel_attrs bc_attrs;
|
2009-04-01 21:23:33 +08:00
|
|
|
struct nfs4_slot_table bc_slot_table;
|
2009-04-01 21:21:53 +08:00
|
|
|
struct nfs_client *clp;
|
|
|
|
};
|
|
|
|
|
2011-03-10 05:00:53 +08:00
|
|
|
#endif /* CONFIG_NFS_V4 */
|
2005-04-17 06:20:36 +08:00
|
|
|
#endif
|