2007-04-27 06:49:28 +08:00
|
|
|
/* internal AFS stuff
|
2005-04-17 06:20:36 +08:00
|
|
|
*
|
2007-04-27 06:55:03 +08:00
|
|
|
* Copyright (C) 2002, 2007 Red Hat, Inc. All Rights Reserved.
|
2005-04-17 06:20:36 +08:00
|
|
|
* Written by David Howells (dhowells@redhat.com)
|
|
|
|
*
|
|
|
|
* This program is free software; you can redistribute it and/or
|
|
|
|
* modify it under the terms of the GNU General Public License
|
|
|
|
* as published by the Free Software Foundation; either version
|
|
|
|
* 2 of the License, or (at your option) any later version.
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include <linux/compiler.h>
|
|
|
|
#include <linux/kernel.h>
|
2017-03-17 00:27:46 +08:00
|
|
|
#include <linux/ktime.h>
|
2005-04-17 06:20:36 +08:00
|
|
|
#include <linux/fs.h>
|
|
|
|
#include <linux/pagemap.h>
|
2007-04-27 06:55:03 +08:00
|
|
|
#include <linux/rxrpc.h>
|
2007-04-27 06:57:07 +08:00
|
|
|
#include <linux/key.h>
|
Detach sched.h from mm.h
First thing mm.h does is including sched.h solely for can_do_mlock() inline
function which has "current" dereference inside. By dealing with can_do_mlock()
mm.h can be detached from sched.h which is good. See below, why.
This patch
a) removes unconditional inclusion of sched.h from mm.h
b) makes can_do_mlock() normal function in mm/mlock.c
c) exports can_do_mlock() to not break compilation
d) adds sched.h inclusions back to files that were getting it indirectly.
e) adds less bloated headers to some files (asm/signal.h, jiffies.h) that were
getting them indirectly
Net result is:
a) mm.h users would get less code to open, read, preprocess, parse, ... if
they don't need sched.h
b) sched.h stops being dependency for significant number of files:
on x86_64 allmodconfig touching sched.h results in recompile of 4083 files,
after patch it's only 3744 (-8.3%).
Cross-compile tested on
all arm defconfigs, all mips defconfigs, all powerpc defconfigs,
alpha alpha-up
arm
i386 i386-up i386-defconfig i386-allnoconfig
ia64 ia64-up
m68k
mips
parisc parisc-up
powerpc powerpc-up
s390 s390-up
sparc sparc-up
sparc64 sparc64-up
um-x86_64
x86_64 x86_64-up x86_64-defconfig x86_64-allnoconfig
as well as my two usual configs.
Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2007-05-21 05:22:52 +08:00
|
|
|
#include <linux/workqueue.h>
|
2007-05-31 15:40:52 +08:00
|
|
|
#include <linux/sched.h>
|
2009-10-02 06:44:27 +08:00
|
|
|
#include <linux/fscache.h>
|
2010-04-22 17:58:18 +08:00
|
|
|
#include <linux/backing-dev.h>
|
2017-02-11 00:34:07 +08:00
|
|
|
#include <linux/uuid.h>
|
2017-11-02 23:27:45 +08:00
|
|
|
#include <net/net_namespace.h>
|
2016-08-30 16:49:29 +08:00
|
|
|
#include <net/af_rxrpc.h>
|
2007-05-31 15:40:52 +08:00
|
|
|
|
2007-04-27 06:55:03 +08:00
|
|
|
#include "afs.h"
|
|
|
|
#include "afs_vl.h"
|
|
|
|
|
|
|
|
#define AFS_CELL_MAX_ADDRS 15
|
|
|
|
|
AFS: implement basic file write support
Implement support for writing to regular AFS files, including:
(1) write
(2) truncate
(3) fsync, fdatasync
(4) chmod, chown, chgrp, utime.
AFS writeback attempts to batch writes into as chunks as large as it can manage
up to the point that it writes back 65535 pages in one chunk or it meets a
locked page.
Furthermore, if a page has been written to using a particular key, then should
another write to that page use some other key, the first write will be flushed
before the second is allowed to take place. If the first write fails due to a
security error, then the page will be scrapped and reread before the second
write takes place.
If a page is dirty and the callback on it is broken by the server, then the
dirty data is not discarded (same behaviour as NFS).
Shared-writable mappings are not supported by this patch.
[akpm@linux-foundation.org: fix a bunch of warnings]
Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2007-05-09 17:33:46 +08:00
|
|
|
struct pagevec;
|
2007-04-27 06:55:03 +08:00
|
|
|
struct afs_call;
|
|
|
|
|
|
|
|
typedef enum {
|
|
|
|
AFS_VL_NEW, /* new, uninitialised record */
|
|
|
|
AFS_VL_CREATING, /* creating record */
|
|
|
|
AFS_VL_VALID, /* record is pending */
|
|
|
|
AFS_VL_NO_VOLUME, /* no such volume available */
|
|
|
|
AFS_VL_UPDATING, /* update in progress */
|
|
|
|
AFS_VL_VOLUME_DELETED, /* volume was deleted */
|
|
|
|
AFS_VL_UNCERTAIN, /* uncertain state (update failed) */
|
|
|
|
} __attribute__((packed)) afs_vlocation_state_t;
|
2005-04-17 06:20:36 +08:00
|
|
|
|
2007-04-27 06:57:07 +08:00
|
|
|
struct afs_mount_params {
|
|
|
|
bool rwpath; /* T if the parent should be considered R/W */
|
|
|
|
bool force; /* T to force cell type */
|
2010-08-11 16:38:04 +08:00
|
|
|
bool autocell; /* T if set auto mount operation */
|
2007-04-27 06:57:07 +08:00
|
|
|
afs_voltype_t type; /* type of volume requested */
|
|
|
|
int volnamesz; /* size of volume name */
|
|
|
|
const char *volname; /* name of volume to mount */
|
2017-11-02 23:27:45 +08:00
|
|
|
struct afs_net *net; /* Network namespace in effect */
|
2007-04-27 06:57:07 +08:00
|
|
|
struct afs_cell *cell; /* cell in which to find volume */
|
|
|
|
struct afs_volume *volume; /* volume record */
|
|
|
|
struct key *key; /* key to use for secure mounting */
|
|
|
|
};
|
|
|
|
|
2017-11-02 23:27:49 +08:00
|
|
|
struct afs_iget_data {
|
|
|
|
struct afs_fid fid;
|
|
|
|
struct afs_volume *volume; /* volume on which resides */
|
|
|
|
};
|
|
|
|
|
2017-01-05 18:38:34 +08:00
|
|
|
enum afs_call_state {
|
|
|
|
AFS_CALL_REQUESTING, /* request is being sent for outgoing call */
|
|
|
|
AFS_CALL_AWAIT_REPLY, /* awaiting reply to outgoing call */
|
|
|
|
AFS_CALL_AWAIT_OP_ID, /* awaiting op ID on incoming call */
|
|
|
|
AFS_CALL_AWAIT_REQUEST, /* awaiting request data on incoming call */
|
|
|
|
AFS_CALL_REPLYING, /* replying to incoming call */
|
|
|
|
AFS_CALL_AWAIT_ACK, /* awaiting final ACK of incoming call */
|
|
|
|
AFS_CALL_COMPLETE, /* Completed or failed */
|
|
|
|
};
|
2017-11-02 23:27:45 +08:00
|
|
|
|
2007-04-27 06:55:03 +08:00
|
|
|
/*
|
|
|
|
* a record of an in-progress RxRPC call
|
|
|
|
*/
|
|
|
|
struct afs_call {
|
|
|
|
const struct afs_call_type *type; /* type of call */
|
|
|
|
wait_queue_head_t waitq; /* processes awaiting completion */
|
2017-01-05 18:38:36 +08:00
|
|
|
struct work_struct async_work; /* async I/O processor */
|
2007-04-27 06:55:03 +08:00
|
|
|
struct work_struct work; /* actual work processor */
|
|
|
|
struct rxrpc_call *rxcall; /* RxRPC call handle */
|
|
|
|
struct key *key; /* security for this call */
|
2017-11-02 23:27:45 +08:00
|
|
|
struct afs_net *net; /* The network namespace */
|
2017-11-02 23:27:49 +08:00
|
|
|
struct afs_server *cm_server; /* Server affected by incoming CM call */
|
2017-11-02 23:27:49 +08:00
|
|
|
struct afs_server *server; /* Server used by client call */
|
2007-04-27 06:55:03 +08:00
|
|
|
void *request; /* request data (first part) */
|
AFS: implement basic file write support
Implement support for writing to regular AFS files, including:
(1) write
(2) truncate
(3) fsync, fdatasync
(4) chmod, chown, chgrp, utime.
AFS writeback attempts to batch writes into as chunks as large as it can manage
up to the point that it writes back 65535 pages in one chunk or it meets a
locked page.
Furthermore, if a page has been written to using a particular key, then should
another write to that page use some other key, the first write will be flushed
before the second is allowed to take place. If the first write fails due to a
security error, then the page will be scrapped and reread before the second
write takes place.
If a page is dirty and the callback on it is broken by the server, then the
dirty data is not discarded (same behaviour as NFS).
Shared-writable mappings are not supported by this patch.
[akpm@linux-foundation.org: fix a bunch of warnings]
Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2007-05-09 17:33:46 +08:00
|
|
|
struct address_space *mapping; /* page set */
|
|
|
|
struct afs_writeback *wb; /* writeback being performed */
|
2007-04-27 06:55:03 +08:00
|
|
|
void *buffer; /* reply receive buffer */
|
2017-11-02 23:27:48 +08:00
|
|
|
void *reply[4]; /* Where to put the reply */
|
AFS: implement basic file write support
Implement support for writing to regular AFS files, including:
(1) write
(2) truncate
(3) fsync, fdatasync
(4) chmod, chown, chgrp, utime.
AFS writeback attempts to batch writes into as chunks as large as it can manage
up to the point that it writes back 65535 pages in one chunk or it meets a
locked page.
Furthermore, if a page has been written to using a particular key, then should
another write to that page use some other key, the first write will be flushed
before the second is allowed to take place. If the first write fails due to a
security error, then the page will be scrapped and reread before the second
write takes place.
If a page is dirty and the callback on it is broken by the server, then the
dirty data is not discarded (same behaviour as NFS).
Shared-writable mappings are not supported by this patch.
[akpm@linux-foundation.org: fix a bunch of warnings]
Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2007-05-09 17:33:46 +08:00
|
|
|
pgoff_t first; /* first page in mapping to deal with */
|
|
|
|
pgoff_t last; /* last page in mapping to deal with */
|
rxrpc: Don't expose skbs to in-kernel users [ver #2]
Don't expose skbs to in-kernel users, such as the AFS filesystem, but
instead provide a notification hook the indicates that a call needs
attention and another that indicates that there's a new call to be
collected.
This makes the following possibilities more achievable:
(1) Call refcounting can be made simpler if skbs don't hold refs to calls.
(2) skbs referring to non-data events will be able to be freed much sooner
rather than being queued for AFS to pick up as rxrpc_kernel_recv_data
will be able to consult the call state.
(3) We can shortcut the receive phase when a call is remotely aborted
because we don't have to go through all the packets to get to the one
cancelling the operation.
(4) It makes it easier to do encryption/decryption directly between AFS's
buffers and sk_buffs.
(5) Encryption/decryption can more easily be done in the AFS's thread
contexts - usually that of the userspace process that issued a syscall
- rather than in one of rxrpc's background threads on a workqueue.
(6) AFS will be able to wait synchronously on a call inside AF_RXRPC.
To make this work, the following interface function has been added:
int rxrpc_kernel_recv_data(
struct socket *sock, struct rxrpc_call *call,
void *buffer, size_t bufsize, size_t *_offset,
bool want_more, u32 *_abort_code);
This is the recvmsg equivalent. It allows the caller to find out about the
state of a specific call and to transfer received data into a buffer
piecemeal.
afs_extract_data() and rxrpc_kernel_recv_data() now do all the extraction
logic between them. They don't wait synchronously yet because the socket
lock needs to be dealt with.
Five interface functions have been removed:
rxrpc_kernel_is_data_last()
rxrpc_kernel_get_abort_code()
rxrpc_kernel_get_error_number()
rxrpc_kernel_free_skb()
rxrpc_kernel_data_consumed()
As a temporary hack, sk_buffs going to an in-kernel call are queued on the
rxrpc_call struct (->knlrecv_queue) rather than being handed over to the
in-kernel user. To process the queue internally, a temporary function,
temp_deliver_data() has been added. This will be replaced with common code
between the rxrpc_recvmsg() path and the kernel_rxrpc_recv_data() path in a
future patch.
Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2016-08-31 03:42:14 +08:00
|
|
|
size_t offset; /* offset into received data store */
|
2017-01-05 18:38:36 +08:00
|
|
|
atomic_t usage;
|
2017-01-05 18:38:34 +08:00
|
|
|
enum afs_call_state state;
|
2007-04-27 06:55:03 +08:00
|
|
|
int error; /* error code */
|
rxrpc: Don't expose skbs to in-kernel users [ver #2]
Don't expose skbs to in-kernel users, such as the AFS filesystem, but
instead provide a notification hook the indicates that a call needs
attention and another that indicates that there's a new call to be
collected.
This makes the following possibilities more achievable:
(1) Call refcounting can be made simpler if skbs don't hold refs to calls.
(2) skbs referring to non-data events will be able to be freed much sooner
rather than being queued for AFS to pick up as rxrpc_kernel_recv_data
will be able to consult the call state.
(3) We can shortcut the receive phase when a call is remotely aborted
because we don't have to go through all the packets to get to the one
cancelling the operation.
(4) It makes it easier to do encryption/decryption directly between AFS's
buffers and sk_buffs.
(5) Encryption/decryption can more easily be done in the AFS's thread
contexts - usually that of the userspace process that issued a syscall
- rather than in one of rxrpc's background threads on a workqueue.
(6) AFS will be able to wait synchronously on a call inside AF_RXRPC.
To make this work, the following interface function has been added:
int rxrpc_kernel_recv_data(
struct socket *sock, struct rxrpc_call *call,
void *buffer, size_t bufsize, size_t *_offset,
bool want_more, u32 *_abort_code);
This is the recvmsg equivalent. It allows the caller to find out about the
state of a specific call and to transfer received data into a buffer
piecemeal.
afs_extract_data() and rxrpc_kernel_recv_data() now do all the extraction
logic between them. They don't wait synchronously yet because the socket
lock needs to be dealt with.
Five interface functions have been removed:
rxrpc_kernel_is_data_last()
rxrpc_kernel_get_abort_code()
rxrpc_kernel_get_error_number()
rxrpc_kernel_free_skb()
rxrpc_kernel_data_consumed()
As a temporary hack, sk_buffs going to an in-kernel call are queued on the
rxrpc_call struct (->knlrecv_queue) rather than being handed over to the
in-kernel user. To process the queue internally, a temporary function,
temp_deliver_data() has been added. This will be replaced with common code
between the rxrpc_recvmsg() path and the kernel_rxrpc_recv_data() path in a
future patch.
Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2016-08-31 03:42:14 +08:00
|
|
|
u32 abort_code; /* Remote abort ID or 0 */
|
2007-04-27 06:55:03 +08:00
|
|
|
unsigned request_size; /* size of request data */
|
|
|
|
unsigned reply_max; /* maximum size of reply */
|
AFS: implement basic file write support
Implement support for writing to regular AFS files, including:
(1) write
(2) truncate
(3) fsync, fdatasync
(4) chmod, chown, chgrp, utime.
AFS writeback attempts to batch writes into as chunks as large as it can manage
up to the point that it writes back 65535 pages in one chunk or it meets a
locked page.
Furthermore, if a page has been written to using a particular key, then should
another write to that page use some other key, the first write will be flushed
before the second is allowed to take place. If the first write fails due to a
security error, then the page will be scrapped and reread before the second
write takes place.
If a page is dirty and the callback on it is broken by the server, then the
dirty data is not discarded (same behaviour as NFS).
Shared-writable mappings are not supported by this patch.
[akpm@linux-foundation.org: fix a bunch of warnings]
Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2007-05-09 17:33:46 +08:00
|
|
|
unsigned first_offset; /* offset into mapping[first] */
|
2017-11-02 23:27:49 +08:00
|
|
|
unsigned int cb_break; /* cb_break + cb_s_break before the call */
|
2017-03-17 00:27:44 +08:00
|
|
|
union {
|
|
|
|
unsigned last_to; /* amount of mapping[last] */
|
|
|
|
unsigned count2; /* count used in unmarshalling */
|
|
|
|
};
|
2007-04-27 06:55:03 +08:00
|
|
|
unsigned char unmarshall; /* unmarshalling phase */
|
|
|
|
bool incoming; /* T if incoming call */
|
AFS: implement basic file write support
Implement support for writing to regular AFS files, including:
(1) write
(2) truncate
(3) fsync, fdatasync
(4) chmod, chown, chgrp, utime.
AFS writeback attempts to batch writes into as chunks as large as it can manage
up to the point that it writes back 65535 pages in one chunk or it meets a
locked page.
Furthermore, if a page has been written to using a particular key, then should
another write to that page use some other key, the first write will be flushed
before the second is allowed to take place. If the first write fails due to a
security error, then the page will be scrapped and reread before the second
write takes place.
If a page is dirty and the callback on it is broken by the server, then the
dirty data is not discarded (same behaviour as NFS).
Shared-writable mappings are not supported by this patch.
[akpm@linux-foundation.org: fix a bunch of warnings]
Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2007-05-09 17:33:46 +08:00
|
|
|
bool send_pages; /* T if data from mapping should be sent */
|
rxrpc: Don't expose skbs to in-kernel users [ver #2]
Don't expose skbs to in-kernel users, such as the AFS filesystem, but
instead provide a notification hook the indicates that a call needs
attention and another that indicates that there's a new call to be
collected.
This makes the following possibilities more achievable:
(1) Call refcounting can be made simpler if skbs don't hold refs to calls.
(2) skbs referring to non-data events will be able to be freed much sooner
rather than being queued for AFS to pick up as rxrpc_kernel_recv_data
will be able to consult the call state.
(3) We can shortcut the receive phase when a call is remotely aborted
because we don't have to go through all the packets to get to the one
cancelling the operation.
(4) It makes it easier to do encryption/decryption directly between AFS's
buffers and sk_buffs.
(5) Encryption/decryption can more easily be done in the AFS's thread
contexts - usually that of the userspace process that issued a syscall
- rather than in one of rxrpc's background threads on a workqueue.
(6) AFS will be able to wait synchronously on a call inside AF_RXRPC.
To make this work, the following interface function has been added:
int rxrpc_kernel_recv_data(
struct socket *sock, struct rxrpc_call *call,
void *buffer, size_t bufsize, size_t *_offset,
bool want_more, u32 *_abort_code);
This is the recvmsg equivalent. It allows the caller to find out about the
state of a specific call and to transfer received data into a buffer
piecemeal.
afs_extract_data() and rxrpc_kernel_recv_data() now do all the extraction
logic between them. They don't wait synchronously yet because the socket
lock needs to be dealt with.
Five interface functions have been removed:
rxrpc_kernel_is_data_last()
rxrpc_kernel_get_abort_code()
rxrpc_kernel_get_error_number()
rxrpc_kernel_free_skb()
rxrpc_kernel_data_consumed()
As a temporary hack, sk_buffs going to an in-kernel call are queued on the
rxrpc_call struct (->knlrecv_queue) rather than being handed over to the
in-kernel user. To process the queue internally, a temporary function,
temp_deliver_data() has been added. This will be replaced with common code
between the rxrpc_recvmsg() path and the kernel_rxrpc_recv_data() path in a
future patch.
Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2016-08-31 03:42:14 +08:00
|
|
|
bool need_attention; /* T if RxRPC poked us */
|
2017-01-05 18:38:36 +08:00
|
|
|
bool async; /* T if asynchronous */
|
2017-11-02 23:27:48 +08:00
|
|
|
bool ret_reply0; /* T if should return reply[0] on success */
|
2017-10-18 18:36:39 +08:00
|
|
|
bool upgrade; /* T to request service upgrade */
|
2007-04-27 06:55:03 +08:00
|
|
|
u16 service_id; /* RxRPC service ID to call */
|
2016-10-13 15:27:10 +08:00
|
|
|
u32 operation_ID; /* operation ID for an incoming call */
|
2007-04-27 06:55:03 +08:00
|
|
|
u32 count; /* count for use in unmarshalling */
|
|
|
|
__be32 tmp; /* place to extract temporary data */
|
AFS: implement basic file write support
Implement support for writing to regular AFS files, including:
(1) write
(2) truncate
(3) fsync, fdatasync
(4) chmod, chown, chgrp, utime.
AFS writeback attempts to batch writes into as chunks as large as it can manage
up to the point that it writes back 65535 pages in one chunk or it meets a
locked page.
Furthermore, if a page has been written to using a particular key, then should
another write to that page use some other key, the first write will be flushed
before the second is allowed to take place. If the first write fails due to a
security error, then the page will be scrapped and reread before the second
write takes place.
If a page is dirty and the callback on it is broken by the server, then the
dirty data is not discarded (same behaviour as NFS).
Shared-writable mappings are not supported by this patch.
[akpm@linux-foundation.org: fix a bunch of warnings]
Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2007-05-09 17:33:46 +08:00
|
|
|
afs_dataversion_t store_version; /* updated version expected from store */
|
2007-04-27 06:55:03 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
struct afs_call_type {
|
2007-04-27 06:57:07 +08:00
|
|
|
const char *name;
|
|
|
|
|
2007-04-27 06:55:03 +08:00
|
|
|
/* deliver request or reply data to an call
|
|
|
|
* - returning an error will cause the call to be aborted
|
|
|
|
*/
|
rxrpc: Don't expose skbs to in-kernel users [ver #2]
Don't expose skbs to in-kernel users, such as the AFS filesystem, but
instead provide a notification hook the indicates that a call needs
attention and another that indicates that there's a new call to be
collected.
This makes the following possibilities more achievable:
(1) Call refcounting can be made simpler if skbs don't hold refs to calls.
(2) skbs referring to non-data events will be able to be freed much sooner
rather than being queued for AFS to pick up as rxrpc_kernel_recv_data
will be able to consult the call state.
(3) We can shortcut the receive phase when a call is remotely aborted
because we don't have to go through all the packets to get to the one
cancelling the operation.
(4) It makes it easier to do encryption/decryption directly between AFS's
buffers and sk_buffs.
(5) Encryption/decryption can more easily be done in the AFS's thread
contexts - usually that of the userspace process that issued a syscall
- rather than in one of rxrpc's background threads on a workqueue.
(6) AFS will be able to wait synchronously on a call inside AF_RXRPC.
To make this work, the following interface function has been added:
int rxrpc_kernel_recv_data(
struct socket *sock, struct rxrpc_call *call,
void *buffer, size_t bufsize, size_t *_offset,
bool want_more, u32 *_abort_code);
This is the recvmsg equivalent. It allows the caller to find out about the
state of a specific call and to transfer received data into a buffer
piecemeal.
afs_extract_data() and rxrpc_kernel_recv_data() now do all the extraction
logic between them. They don't wait synchronously yet because the socket
lock needs to be dealt with.
Five interface functions have been removed:
rxrpc_kernel_is_data_last()
rxrpc_kernel_get_abort_code()
rxrpc_kernel_get_error_number()
rxrpc_kernel_free_skb()
rxrpc_kernel_data_consumed()
As a temporary hack, sk_buffs going to an in-kernel call are queued on the
rxrpc_call struct (->knlrecv_queue) rather than being handed over to the
in-kernel user. To process the queue internally, a temporary function,
temp_deliver_data() has been added. This will be replaced with common code
between the rxrpc_recvmsg() path and the kernel_rxrpc_recv_data() path in a
future patch.
Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2016-08-31 03:42:14 +08:00
|
|
|
int (*deliver)(struct afs_call *call);
|
2007-04-27 06:55:03 +08:00
|
|
|
|
|
|
|
/* clean up a call */
|
|
|
|
void (*destructor)(struct afs_call *call);
|
2017-01-05 18:38:36 +08:00
|
|
|
|
|
|
|
/* Work function */
|
|
|
|
void (*work)(struct work_struct *work);
|
2007-04-27 06:55:03 +08:00
|
|
|
};
|
|
|
|
|
2017-01-05 18:38:34 +08:00
|
|
|
/*
|
|
|
|
* Record of an outstanding read operation on a vnode.
|
|
|
|
*/
|
|
|
|
struct afs_read {
|
|
|
|
loff_t pos; /* Where to start reading */
|
2017-03-17 00:27:44 +08:00
|
|
|
loff_t len; /* How much we're asking for */
|
2017-01-05 18:38:34 +08:00
|
|
|
loff_t actual_len; /* How much we're actually getting */
|
2017-03-17 00:27:46 +08:00
|
|
|
loff_t remain; /* Amount remaining */
|
2017-01-05 18:38:34 +08:00
|
|
|
atomic_t usage;
|
|
|
|
unsigned int index; /* Which page we're reading into */
|
|
|
|
unsigned int nr_pages;
|
|
|
|
void (*page_done)(struct afs_call *, struct afs_read *);
|
|
|
|
struct page *pages[];
|
|
|
|
};
|
|
|
|
|
AFS: implement basic file write support
Implement support for writing to regular AFS files, including:
(1) write
(2) truncate
(3) fsync, fdatasync
(4) chmod, chown, chgrp, utime.
AFS writeback attempts to batch writes into as chunks as large as it can manage
up to the point that it writes back 65535 pages in one chunk or it meets a
locked page.
Furthermore, if a page has been written to using a particular key, then should
another write to that page use some other key, the first write will be flushed
before the second is allowed to take place. If the first write fails due to a
security error, then the page will be scrapped and reread before the second
write takes place.
If a page is dirty and the callback on it is broken by the server, then the
dirty data is not discarded (same behaviour as NFS).
Shared-writable mappings are not supported by this patch.
[akpm@linux-foundation.org: fix a bunch of warnings]
Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2007-05-09 17:33:46 +08:00
|
|
|
/*
|
|
|
|
* record of an outstanding writeback on a vnode
|
|
|
|
*/
|
|
|
|
struct afs_writeback {
|
|
|
|
struct list_head link; /* link in vnode->writebacks */
|
|
|
|
struct work_struct writer; /* work item to perform the writeback */
|
|
|
|
struct afs_vnode *vnode; /* vnode to which this write applies */
|
|
|
|
struct key *key; /* owner of this write */
|
|
|
|
wait_queue_head_t waitq; /* completion and ready wait queue */
|
|
|
|
pgoff_t first; /* first page in batch */
|
|
|
|
pgoff_t point; /* last page in current store op */
|
|
|
|
pgoff_t last; /* last page in batch (inclusive) */
|
|
|
|
unsigned offset_first; /* offset into first page of start of write */
|
|
|
|
unsigned to_last; /* offset into last page of end of write */
|
|
|
|
int num_conflicts; /* count of conflicting writes in list */
|
|
|
|
int usage;
|
|
|
|
bool conflicts; /* T if has dependent conflicts */
|
|
|
|
enum {
|
|
|
|
AFS_WBACK_SYNCING, /* synchronisation being performed */
|
|
|
|
AFS_WBACK_PENDING, /* write pending */
|
|
|
|
AFS_WBACK_CONFLICTING, /* conflicting writes posted */
|
|
|
|
AFS_WBACK_WRITING, /* writing back */
|
|
|
|
AFS_WBACK_COMPLETE /* the writeback record has been unlinked */
|
|
|
|
} state __attribute__((packed));
|
|
|
|
};
|
|
|
|
|
2007-04-27 06:55:03 +08:00
|
|
|
/*
|
|
|
|
* AFS superblock private data
|
|
|
|
* - there's one superblock per volume
|
|
|
|
*/
|
|
|
|
struct afs_super_info {
|
2017-11-02 23:27:45 +08:00
|
|
|
struct afs_net *net; /* Network namespace */
|
2017-11-02 23:27:46 +08:00
|
|
|
struct afs_cell *cell; /* The cell in which the volume resides */
|
2007-04-27 06:55:03 +08:00
|
|
|
struct afs_volume *volume; /* volume record */
|
|
|
|
char rwparent; /* T if parent is R/W AFS volume */
|
|
|
|
};
|
2005-04-17 06:20:36 +08:00
|
|
|
|
2007-04-27 06:55:03 +08:00
|
|
|
static inline struct afs_super_info *AFS_FS_S(struct super_block *sb)
|
|
|
|
{
|
|
|
|
return sb->s_fs_info;
|
2005-04-17 06:20:36 +08:00
|
|
|
}
|
|
|
|
|
2007-04-27 06:55:03 +08:00
|
|
|
extern struct file_system_type afs_fs_type;
|
|
|
|
|
2017-11-02 23:27:45 +08:00
|
|
|
/*
|
|
|
|
* AFS network namespace record.
|
|
|
|
*/
|
|
|
|
struct afs_net {
|
|
|
|
struct afs_uuid uuid;
|
|
|
|
bool live; /* F if this namespace is being removed */
|
|
|
|
|
|
|
|
/* AF_RXRPC I/O stuff */
|
|
|
|
struct socket *socket;
|
|
|
|
struct afs_call *spare_incoming_call;
|
|
|
|
struct work_struct charge_preallocation_work;
|
|
|
|
struct mutex socket_mutex;
|
|
|
|
atomic_t nr_outstanding_calls;
|
|
|
|
atomic_t nr_superblocks;
|
|
|
|
|
|
|
|
/* Cell database */
|
2017-11-02 23:27:50 +08:00
|
|
|
struct rb_root cells;
|
2017-11-02 23:27:45 +08:00
|
|
|
struct afs_cell *ws_cell;
|
2017-11-02 23:27:50 +08:00
|
|
|
struct work_struct cells_manager;
|
|
|
|
struct timer_list cells_timer;
|
|
|
|
atomic_t cells_outstanding;
|
|
|
|
seqlock_t cells_lock;
|
2017-11-02 23:27:45 +08:00
|
|
|
|
2017-11-02 23:27:50 +08:00
|
|
|
spinlock_t proc_cells_lock;
|
2017-11-02 23:27:45 +08:00
|
|
|
struct list_head proc_cells;
|
|
|
|
|
|
|
|
/* Volume location database */
|
|
|
|
struct list_head vl_updates; /* VL records in need-update order */
|
|
|
|
struct list_head vl_graveyard; /* Inactive VL records */
|
|
|
|
struct delayed_work vl_reaper;
|
|
|
|
struct delayed_work vl_updater;
|
|
|
|
spinlock_t vl_updates_lock;
|
|
|
|
spinlock_t vl_graveyard_lock;
|
|
|
|
|
|
|
|
/* File locking renewal management */
|
|
|
|
struct mutex lock_manager_mutex;
|
|
|
|
|
|
|
|
/* Server database */
|
|
|
|
struct rb_root servers; /* Active servers */
|
|
|
|
rwlock_t servers_lock;
|
|
|
|
struct list_head server_graveyard; /* Inactive server LRU list */
|
|
|
|
spinlock_t server_graveyard_lock;
|
2017-11-02 23:27:45 +08:00
|
|
|
struct timer_list server_timer;
|
|
|
|
struct work_struct server_reaper;
|
|
|
|
atomic_t servers_outstanding;
|
2017-11-02 23:27:45 +08:00
|
|
|
|
|
|
|
/* Misc */
|
|
|
|
struct proc_dir_entry *proc_afs; /* /proc/net/afs directory */
|
|
|
|
};
|
|
|
|
|
|
|
|
extern struct afs_net __afs_net;// Dummy AFS network namespace; TODO: replace with real netns
|
|
|
|
|
2017-11-02 23:27:50 +08:00
|
|
|
enum afs_cell_state {
|
|
|
|
AFS_CELL_UNSET,
|
|
|
|
AFS_CELL_ACTIVATING,
|
|
|
|
AFS_CELL_ACTIVE,
|
|
|
|
AFS_CELL_DEACTIVATING,
|
|
|
|
AFS_CELL_INACTIVE,
|
|
|
|
AFS_CELL_FAILED,
|
|
|
|
};
|
|
|
|
|
2007-04-27 06:55:03 +08:00
|
|
|
/*
|
|
|
|
* AFS cell record
|
|
|
|
*/
|
|
|
|
struct afs_cell {
|
2017-11-02 23:27:50 +08:00
|
|
|
union {
|
|
|
|
struct rcu_head rcu;
|
|
|
|
struct rb_node net_node; /* Node in net->cells */
|
|
|
|
};
|
|
|
|
struct afs_net *net;
|
2007-04-27 06:57:07 +08:00
|
|
|
struct key *anonymous_key; /* anonymous user key for this cell */
|
2017-11-02 23:27:50 +08:00
|
|
|
struct work_struct manager; /* Manager for init/deinit/dns */
|
2007-04-27 06:55:03 +08:00
|
|
|
struct list_head proc_link; /* /proc cell list link */
|
2009-04-03 23:42:41 +08:00
|
|
|
#ifdef CONFIG_AFS_FSCACHE
|
|
|
|
struct fscache_cookie *cache; /* caching cookie */
|
2007-04-27 06:55:03 +08:00
|
|
|
#endif
|
|
|
|
|
|
|
|
/* server record management */
|
|
|
|
rwlock_t servers_lock; /* active server list lock */
|
|
|
|
struct list_head servers; /* active server list */
|
|
|
|
|
|
|
|
/* volume location record management */
|
|
|
|
struct rw_semaphore vl_sem; /* volume management serialisation semaphore */
|
|
|
|
struct list_head vl_list; /* cell's active VL record list */
|
2017-11-02 23:27:50 +08:00
|
|
|
time64_t dns_expiry; /* Time AFSDB/SRV record expires */
|
|
|
|
time64_t last_inactive; /* Time of last drop of usage count */
|
|
|
|
atomic_t usage;
|
|
|
|
unsigned long flags;
|
|
|
|
#define AFS_CELL_FL_NOT_READY 0 /* The cell record is not ready for use */
|
|
|
|
#define AFS_CELL_FL_NO_GC 1 /* The cell was added manually, don't auto-gc */
|
|
|
|
#define AFS_CELL_FL_NOT_FOUND 2 /* Permanent DNS error */
|
|
|
|
#define AFS_CELL_FL_DNS_FAIL 3 /* Failed to access DNS */
|
|
|
|
enum afs_cell_state state;
|
|
|
|
short error;
|
|
|
|
|
2007-04-27 06:55:03 +08:00
|
|
|
spinlock_t vl_lock; /* vl_list lock */
|
2017-11-02 23:27:50 +08:00
|
|
|
|
|
|
|
/* VLDB server list. */
|
|
|
|
seqlock_t vl_addrs_lock;
|
2007-04-27 06:55:03 +08:00
|
|
|
unsigned short vl_naddrs; /* number of VL servers in addr list */
|
|
|
|
unsigned short vl_curr_svix; /* current server index */
|
2017-11-02 23:27:47 +08:00
|
|
|
struct sockaddr_rxrpc vl_addrs[AFS_CELL_MAX_ADDRS]; /* cell VL server addresses */
|
2017-11-02 23:27:50 +08:00
|
|
|
u8 name_len; /* Length of name */
|
|
|
|
char name[64 + 1]; /* Cell name, case-flattened and NUL-padded */
|
2007-04-27 06:55:03 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
/*
|
|
|
|
* entry in the cached volume location catalogue
|
|
|
|
*/
|
|
|
|
struct afs_cache_vlocation {
|
2007-04-27 06:57:07 +08:00
|
|
|
/* volume name (lowercase, padded with NULs) */
|
|
|
|
uint8_t name[AFS_MAXVOLNAME + 1];
|
|
|
|
|
2007-04-27 06:55:03 +08:00
|
|
|
uint8_t nservers; /* number of entries used in servers[] */
|
|
|
|
uint8_t vidmask; /* voltype mask for vid[] */
|
|
|
|
uint8_t srvtmask[8]; /* voltype masks for servers[] */
|
|
|
|
#define AFS_VOL_VTM_RW 0x01 /* R/W version of the volume is available (on this server) */
|
|
|
|
#define AFS_VOL_VTM_RO 0x02 /* R/O version of the volume is available (on this server) */
|
|
|
|
#define AFS_VOL_VTM_BAK 0x04 /* backup version of the volume is available (on this server) */
|
|
|
|
|
|
|
|
afs_volid_t vid[3]; /* volume IDs for R/W, R/O and Bak volumes */
|
2017-11-02 23:27:47 +08:00
|
|
|
struct sockaddr_rxrpc servers[8]; /* fileserver addresses */
|
2007-04-27 06:55:03 +08:00
|
|
|
time_t rtime; /* last retrieval time */
|
|
|
|
};
|
|
|
|
|
|
|
|
/*
|
|
|
|
* AFS volume location record
|
|
|
|
*/
|
|
|
|
struct afs_vlocation {
|
|
|
|
atomic_t usage;
|
2017-03-17 00:27:46 +08:00
|
|
|
time64_t time_of_death; /* time at which put reduced usage to 0 */
|
2007-04-27 06:55:03 +08:00
|
|
|
struct list_head link; /* link in cell volume location list */
|
|
|
|
struct list_head grave; /* link in master graveyard list */
|
|
|
|
struct list_head update; /* link in master update list */
|
|
|
|
struct afs_cell *cell; /* cell to which volume belongs */
|
|
|
|
struct afs_cache_vlocation vldb; /* volume information DB record */
|
|
|
|
struct afs_volume *vols[3]; /* volume access record pointer (index by type) */
|
|
|
|
wait_queue_head_t waitq; /* status change waitqueue */
|
2017-03-17 00:27:46 +08:00
|
|
|
time64_t update_at; /* time at which record should be updated */
|
2007-04-27 11:39:14 +08:00
|
|
|
spinlock_t lock; /* access lock */
|
2007-04-27 06:55:03 +08:00
|
|
|
afs_vlocation_state_t state; /* volume location state */
|
|
|
|
unsigned short upd_rej_cnt; /* ENOMEDIUM count during update */
|
|
|
|
unsigned short upd_busy_cnt; /* EBUSY count during update */
|
|
|
|
bool valid; /* T if valid */
|
|
|
|
};
|
|
|
|
|
|
|
|
/*
|
|
|
|
* AFS fileserver record
|
|
|
|
*/
|
|
|
|
struct afs_server {
|
|
|
|
atomic_t usage;
|
2017-03-17 00:27:46 +08:00
|
|
|
time64_t time_of_death; /* time at which put reduced usage to 0 */
|
2017-11-02 23:27:47 +08:00
|
|
|
struct sockaddr_rxrpc addr; /* server address */
|
2017-11-02 23:27:46 +08:00
|
|
|
struct afs_net *net; /* Network namespace in which the server resides */
|
2007-04-27 06:55:03 +08:00
|
|
|
struct afs_cell *cell; /* cell in which server resides */
|
|
|
|
struct list_head link; /* link in cell's server list */
|
|
|
|
struct list_head grave; /* link in master graveyard list */
|
2017-11-02 23:27:49 +08:00
|
|
|
|
2007-04-27 06:55:03 +08:00
|
|
|
struct rb_node master_rb; /* link in master by-addr tree */
|
|
|
|
struct rw_semaphore sem; /* access lock */
|
2017-11-02 23:27:49 +08:00
|
|
|
unsigned long flags;
|
|
|
|
#define AFS_SERVER_NEW 0 /* New server, don't inc cb_s_break */
|
2007-04-27 06:55:03 +08:00
|
|
|
|
|
|
|
/* file service access */
|
|
|
|
int fs_state; /* 0 or reason FS currently marked dead (-errno) */
|
2017-11-02 23:27:49 +08:00
|
|
|
spinlock_t fs_lock; /* access lock */
|
2007-04-27 06:55:03 +08:00
|
|
|
|
|
|
|
/* callback promise management */
|
2017-11-02 23:27:49 +08:00
|
|
|
struct list_head cb_interests; /* List of superblocks using this server */
|
|
|
|
unsigned cb_s_break; /* Break-everything counter. */
|
|
|
|
rwlock_t cb_break_lock; /* Volume finding lock */
|
|
|
|
};
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Interest by a superblock on a server.
|
|
|
|
*/
|
|
|
|
struct afs_cb_interest {
|
|
|
|
struct list_head cb_link; /* Link in server->cb_interests */
|
|
|
|
struct afs_server *server; /* Server on which this interest resides */
|
|
|
|
struct super_block *sb; /* Superblock on which inodes reside */
|
|
|
|
afs_volid_t vid; /* Volume ID to match */
|
|
|
|
refcount_t usage;
|
2007-04-27 06:55:03 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
/*
|
|
|
|
* AFS volume access record
|
|
|
|
*/
|
|
|
|
struct afs_volume {
|
|
|
|
atomic_t usage;
|
|
|
|
struct afs_cell *cell; /* cell to which belongs (unrefd ptr) */
|
|
|
|
struct afs_vlocation *vlocation; /* volume location */
|
2009-04-03 23:42:41 +08:00
|
|
|
#ifdef CONFIG_AFS_FSCACHE
|
|
|
|
struct fscache_cookie *cache; /* caching cookie */
|
2007-04-27 06:55:03 +08:00
|
|
|
#endif
|
|
|
|
afs_volid_t vid; /* volume ID */
|
|
|
|
afs_voltype_t type; /* type of volume */
|
|
|
|
char type_force; /* force volume type (suppress R/O -> R/W) */
|
|
|
|
unsigned short nservers; /* number of server slots filled */
|
|
|
|
unsigned short rjservers; /* number of servers discarded due to -ENOMEDIUM */
|
|
|
|
struct afs_server *servers[8]; /* servers on which volume resides (ordered) */
|
2017-11-02 23:27:49 +08:00
|
|
|
struct afs_cb_interest *cb_interests[8]; /* Interests on servers for callbacks */
|
2007-04-27 06:55:03 +08:00
|
|
|
struct rw_semaphore server_sem; /* lock for accessing current server */
|
|
|
|
};
|
|
|
|
|
|
|
|
/*
|
|
|
|
* vnode catalogue entry
|
|
|
|
*/
|
|
|
|
struct afs_cache_vnode {
|
|
|
|
afs_vnodeid_t vnode_id; /* vnode ID */
|
|
|
|
unsigned vnode_unique; /* vnode ID uniquifier */
|
|
|
|
afs_dataversion_t data_version; /* data version */
|
|
|
|
};
|
|
|
|
|
|
|
|
/*
|
|
|
|
* AFS inode private data
|
|
|
|
*/
|
|
|
|
struct afs_vnode {
|
|
|
|
struct inode vfs_inode; /* the VFS's inode record */
|
|
|
|
|
|
|
|
struct afs_volume *volume; /* volume on which vnode resides */
|
|
|
|
struct afs_fid fid; /* the file identifier for this inode */
|
|
|
|
struct afs_file_status status; /* AFS status info for this file */
|
2009-04-03 23:42:41 +08:00
|
|
|
#ifdef CONFIG_AFS_FSCACHE
|
|
|
|
struct fscache_cookie *cache; /* caching cookie */
|
2007-04-27 06:55:03 +08:00
|
|
|
#endif
|
afs: Overhaul permit caching
Overhaul permit caching in AFS by making it per-vnode and sharing permit
lists where possible.
When most of the fileserver operations are called, they return a status
structure indicating the (revised) details of the vnode or vnodes involved
in the operation. This includes the access mark derived from the ACL
(named CallerAccess in the protocol definition file). This is cacheable
and if the ACL changes, the server will tell us that it is breaking the
callback promise, at which point we can discard the currently cached
permits.
With this patch, the afs_permits structure has, at the end, an array of
{ key, CallerAccess } elements, sorted by key pointer. This is then cached
in a hash table so that it can be shared between vnodes with the same
access permits.
Permit lists can only be shared if they contain the exact same set of
key->CallerAccess mappings.
Note that that table is global rather than being per-net_ns. If the keys
in a permit list cross net_ns boundaries, there is no problem sharing the
cached permits, since the permits are just integer masks.
Since permit lists pin keys, the permit cache also makes it easier for a
future patch to find all occurrences of a key and remove them by means of
setting the afs_permits::invalidated flag and then clearing the appropriate
key pointer. In such an event, memory barriers will need adding.
Lastly, the permit caching is skipped if the server has sent either a
vnode-specific or an entire-server callback since the start of the
operation.
Signed-off-by: David Howells <dhowells@redhat.com>
2017-11-02 23:27:49 +08:00
|
|
|
struct afs_permits *permit_cache; /* cache of permits so far obtained */
|
2007-04-27 06:59:35 +08:00
|
|
|
struct mutex validate_lock; /* lock for validating this vnode */
|
2007-04-27 06:55:03 +08:00
|
|
|
wait_queue_head_t update_waitq; /* status fetch waitqueue */
|
2007-04-27 06:59:35 +08:00
|
|
|
int update_cnt; /* number of outstanding ops that will update the
|
2007-04-27 06:55:03 +08:00
|
|
|
* status */
|
AFS: implement basic file write support
Implement support for writing to regular AFS files, including:
(1) write
(2) truncate
(3) fsync, fdatasync
(4) chmod, chown, chgrp, utime.
AFS writeback attempts to batch writes into as chunks as large as it can manage
up to the point that it writes back 65535 pages in one chunk or it meets a
locked page.
Furthermore, if a page has been written to using a particular key, then should
another write to that page use some other key, the first write will be flushed
before the second is allowed to take place. If the first write fails due to a
security error, then the page will be scrapped and reread before the second
write takes place.
If a page is dirty and the callback on it is broken by the server, then the
dirty data is not discarded (same behaviour as NFS).
Shared-writable mappings are not supported by this patch.
[akpm@linux-foundation.org: fix a bunch of warnings]
Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2007-05-09 17:33:46 +08:00
|
|
|
spinlock_t writeback_lock; /* lock for writebacks */
|
2007-04-27 06:55:03 +08:00
|
|
|
spinlock_t lock; /* waitqueue/flags lock */
|
|
|
|
unsigned long flags;
|
2017-11-02 23:27:49 +08:00
|
|
|
#define AFS_VNODE_CB_PROMISED 0 /* Set if vnode has a callback promise */
|
2007-04-27 06:59:35 +08:00
|
|
|
#define AFS_VNODE_UNSET 1 /* set if vnode attributes not yet set */
|
2017-11-02 23:27:49 +08:00
|
|
|
#define AFS_VNODE_DIR_MODIFIED 2 /* set if dir vnode's data modified */
|
2007-04-27 06:55:03 +08:00
|
|
|
#define AFS_VNODE_ZAP_DATA 3 /* set if vnode's data should be invalidated */
|
|
|
|
#define AFS_VNODE_DELETED 4 /* set if vnode deleted on server */
|
|
|
|
#define AFS_VNODE_MOUNTPOINT 5 /* set if vnode is a mountpoint symlink */
|
2007-07-16 14:40:12 +08:00
|
|
|
#define AFS_VNODE_LOCKING 6 /* set if waiting for lock on vnode */
|
|
|
|
#define AFS_VNODE_READLOCKED 7 /* set if vnode is read-locked on the server */
|
|
|
|
#define AFS_VNODE_WRITELOCKED 8 /* set if vnode is write-locked on the server */
|
|
|
|
#define AFS_VNODE_UNLOCKING 9 /* set if vnode is being unlocked on the server */
|
2010-08-11 16:38:04 +08:00
|
|
|
#define AFS_VNODE_AUTOCELL 10 /* set if Vnode is an auto mount point */
|
|
|
|
#define AFS_VNODE_PSEUDODIR 11 /* set if Vnode is a pseudo directory */
|
2007-04-27 06:55:03 +08:00
|
|
|
|
AFS: implement basic file write support
Implement support for writing to regular AFS files, including:
(1) write
(2) truncate
(3) fsync, fdatasync
(4) chmod, chown, chgrp, utime.
AFS writeback attempts to batch writes into as chunks as large as it can manage
up to the point that it writes back 65535 pages in one chunk or it meets a
locked page.
Furthermore, if a page has been written to using a particular key, then should
another write to that page use some other key, the first write will be flushed
before the second is allowed to take place. If the first write fails due to a
security error, then the page will be scrapped and reread before the second
write takes place.
If a page is dirty and the callback on it is broken by the server, then the
dirty data is not discarded (same behaviour as NFS).
Shared-writable mappings are not supported by this patch.
[akpm@linux-foundation.org: fix a bunch of warnings]
Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2007-05-09 17:33:46 +08:00
|
|
|
struct list_head writebacks; /* alterations in pagecache that need writing */
|
2007-07-16 14:40:12 +08:00
|
|
|
struct list_head pending_locks; /* locks waiting to be granted */
|
|
|
|
struct list_head granted_locks; /* locks granted on this file */
|
|
|
|
struct delayed_work lock_work; /* work to be done in locking */
|
|
|
|
struct key *unlock_key; /* key to be used in unlocking */
|
AFS: implement basic file write support
Implement support for writing to regular AFS files, including:
(1) write
(2) truncate
(3) fsync, fdatasync
(4) chmod, chown, chgrp, utime.
AFS writeback attempts to batch writes into as chunks as large as it can manage
up to the point that it writes back 65535 pages in one chunk or it meets a
locked page.
Furthermore, if a page has been written to using a particular key, then should
another write to that page use some other key, the first write will be flushed
before the second is allowed to take place. If the first write fails due to a
security error, then the page will be scrapped and reread before the second
write takes place.
If a page is dirty and the callback on it is broken by the server, then the
dirty data is not discarded (same behaviour as NFS).
Shared-writable mappings are not supported by this patch.
[akpm@linux-foundation.org: fix a bunch of warnings]
Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2007-05-09 17:33:46 +08:00
|
|
|
|
2007-04-27 06:55:03 +08:00
|
|
|
/* outstanding callback notification on this file */
|
2017-11-02 23:27:49 +08:00
|
|
|
struct afs_cb_interest *cb_interest; /* Server on which this resides */
|
|
|
|
unsigned int cb_s_break; /* Mass break counter on ->server */
|
|
|
|
unsigned int cb_break; /* Break counter on vnode */
|
|
|
|
seqlock_t cb_lock; /* Lock for ->cb_interest, ->status, ->cb_*break */
|
|
|
|
|
|
|
|
time64_t cb_expires_at; /* time at which callback expires */
|
2007-04-27 06:55:03 +08:00
|
|
|
unsigned cb_version; /* callback version */
|
|
|
|
afs_callback_type_t cb_type; /* type of callback */
|
|
|
|
};
|
|
|
|
|
2007-04-27 06:57:07 +08:00
|
|
|
/*
|
|
|
|
* cached security record for one user's attempt to access a vnode
|
|
|
|
*/
|
|
|
|
struct afs_permit {
|
|
|
|
struct key *key; /* RxRPC ticket holding a security context */
|
afs: Overhaul permit caching
Overhaul permit caching in AFS by making it per-vnode and sharing permit
lists where possible.
When most of the fileserver operations are called, they return a status
structure indicating the (revised) details of the vnode or vnodes involved
in the operation. This includes the access mark derived from the ACL
(named CallerAccess in the protocol definition file). This is cacheable
and if the ACL changes, the server will tell us that it is breaking the
callback promise, at which point we can discard the currently cached
permits.
With this patch, the afs_permits structure has, at the end, an array of
{ key, CallerAccess } elements, sorted by key pointer. This is then cached
in a hash table so that it can be shared between vnodes with the same
access permits.
Permit lists can only be shared if they contain the exact same set of
key->CallerAccess mappings.
Note that that table is global rather than being per-net_ns. If the keys
in a permit list cross net_ns boundaries, there is no problem sharing the
cached permits, since the permits are just integer masks.
Since permit lists pin keys, the permit cache also makes it easier for a
future patch to find all occurrences of a key and remove them by means of
setting the afs_permits::invalidated flag and then clearing the appropriate
key pointer. In such an event, memory barriers will need adding.
Lastly, the permit caching is skipped if the server has sent either a
vnode-specific or an entire-server callback since the start of the
operation.
Signed-off-by: David Howells <dhowells@redhat.com>
2017-11-02 23:27:49 +08:00
|
|
|
afs_access_t access; /* CallerAccess value for this key */
|
2007-04-27 06:57:07 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
/*
|
afs: Overhaul permit caching
Overhaul permit caching in AFS by making it per-vnode and sharing permit
lists where possible.
When most of the fileserver operations are called, they return a status
structure indicating the (revised) details of the vnode or vnodes involved
in the operation. This includes the access mark derived from the ACL
(named CallerAccess in the protocol definition file). This is cacheable
and if the ACL changes, the server will tell us that it is breaking the
callback promise, at which point we can discard the currently cached
permits.
With this patch, the afs_permits structure has, at the end, an array of
{ key, CallerAccess } elements, sorted by key pointer. This is then cached
in a hash table so that it can be shared between vnodes with the same
access permits.
Permit lists can only be shared if they contain the exact same set of
key->CallerAccess mappings.
Note that that table is global rather than being per-net_ns. If the keys
in a permit list cross net_ns boundaries, there is no problem sharing the
cached permits, since the permits are just integer masks.
Since permit lists pin keys, the permit cache also makes it easier for a
future patch to find all occurrences of a key and remove them by means of
setting the afs_permits::invalidated flag and then clearing the appropriate
key pointer. In such an event, memory barriers will need adding.
Lastly, the permit caching is skipped if the server has sent either a
vnode-specific or an entire-server callback since the start of the
operation.
Signed-off-by: David Howells <dhowells@redhat.com>
2017-11-02 23:27:49 +08:00
|
|
|
* Immutable cache of CallerAccess records from attempts to access vnodes.
|
|
|
|
* These may be shared between multiple vnodes.
|
2007-04-27 06:57:07 +08:00
|
|
|
*/
|
|
|
|
struct afs_permits {
|
afs: Overhaul permit caching
Overhaul permit caching in AFS by making it per-vnode and sharing permit
lists where possible.
When most of the fileserver operations are called, they return a status
structure indicating the (revised) details of the vnode or vnodes involved
in the operation. This includes the access mark derived from the ACL
(named CallerAccess in the protocol definition file). This is cacheable
and if the ACL changes, the server will tell us that it is breaking the
callback promise, at which point we can discard the currently cached
permits.
With this patch, the afs_permits structure has, at the end, an array of
{ key, CallerAccess } elements, sorted by key pointer. This is then cached
in a hash table so that it can be shared between vnodes with the same
access permits.
Permit lists can only be shared if they contain the exact same set of
key->CallerAccess mappings.
Note that that table is global rather than being per-net_ns. If the keys
in a permit list cross net_ns boundaries, there is no problem sharing the
cached permits, since the permits are just integer masks.
Since permit lists pin keys, the permit cache also makes it easier for a
future patch to find all occurrences of a key and remove them by means of
setting the afs_permits::invalidated flag and then clearing the appropriate
key pointer. In such an event, memory barriers will need adding.
Lastly, the permit caching is skipped if the server has sent either a
vnode-specific or an entire-server callback since the start of the
operation.
Signed-off-by: David Howells <dhowells@redhat.com>
2017-11-02 23:27:49 +08:00
|
|
|
struct rcu_head rcu;
|
|
|
|
struct hlist_node hash_node; /* Link in hash */
|
|
|
|
unsigned long h; /* Hash value for this permit list */
|
|
|
|
refcount_t usage;
|
|
|
|
unsigned short nr_permits; /* Number of records */
|
|
|
|
bool invalidated; /* Invalidated due to key change */
|
|
|
|
struct afs_permit permits[]; /* List of permits sorted by key pointer */
|
2007-04-27 06:57:07 +08:00
|
|
|
};
|
|
|
|
|
2007-04-27 06:58:17 +08:00
|
|
|
/*
|
|
|
|
* record of one of a system's set of network interfaces
|
|
|
|
*/
|
|
|
|
struct afs_interface {
|
|
|
|
struct in_addr address; /* IPv4 address bound to interface */
|
|
|
|
struct in_addr netmask; /* netmask applied to address */
|
|
|
|
unsigned mtu; /* MTU of interface */
|
|
|
|
};
|
|
|
|
|
2007-04-27 06:55:03 +08:00
|
|
|
/*****************************************************************************/
|
2009-04-03 23:42:41 +08:00
|
|
|
/*
|
|
|
|
* cache.c
|
|
|
|
*/
|
|
|
|
#ifdef CONFIG_AFS_FSCACHE
|
|
|
|
extern struct fscache_netfs afs_cache_netfs;
|
|
|
|
extern struct fscache_cookie_def afs_cell_cache_index_def;
|
|
|
|
extern struct fscache_cookie_def afs_volume_cache_index_def;
|
|
|
|
extern struct fscache_cookie_def afs_vnode_cache_index_def;
|
|
|
|
#else
|
|
|
|
#define afs_cell_cache_index_def (*(struct fscache_cookie_def *) NULL)
|
|
|
|
#define afs_volume_cache_index_def (*(struct fscache_cookie_def *) NULL)
|
|
|
|
#define afs_vnode_cache_index_def (*(struct fscache_cookie_def *) NULL)
|
|
|
|
#endif
|
|
|
|
|
2007-04-27 06:55:03 +08:00
|
|
|
/*
|
|
|
|
* callback.c
|
|
|
|
*/
|
|
|
|
extern void afs_init_callback_state(struct afs_server *);
|
2017-11-02 23:27:49 +08:00
|
|
|
extern void afs_break_callback(struct afs_vnode *);
|
|
|
|
extern void afs_break_callbacks(struct afs_server *, size_t,struct afs_callback[]);
|
|
|
|
|
|
|
|
extern int afs_register_server_cb_interest(struct afs_vnode *, struct afs_cb_interest **,
|
|
|
|
struct afs_server *);
|
|
|
|
extern void afs_put_cb_interest(struct afs_net *, struct afs_cb_interest *);
|
|
|
|
extern void afs_clear_callback_interests(struct afs_net *, struct afs_volume *);
|
|
|
|
|
|
|
|
static inline struct afs_cb_interest *afs_get_cb_interest(struct afs_cb_interest *cbi)
|
|
|
|
{
|
|
|
|
refcount_inc(&cbi->usage);
|
|
|
|
return cbi;
|
|
|
|
}
|
2007-04-27 06:55:03 +08:00
|
|
|
|
2005-04-17 06:20:36 +08:00
|
|
|
/*
|
|
|
|
* cell.c
|
|
|
|
*/
|
2017-11-02 23:27:50 +08:00
|
|
|
static inline struct afs_cell *afs_get_cell(struct afs_cell *cell)
|
2017-11-02 23:27:46 +08:00
|
|
|
{
|
|
|
|
if (cell)
|
|
|
|
atomic_inc(&cell->usage);
|
|
|
|
return cell;
|
|
|
|
}
|
2017-11-02 23:27:50 +08:00
|
|
|
|
|
|
|
extern int afs_cell_init(struct afs_net *, const char *);
|
|
|
|
extern struct afs_cell *afs_lookup_cell_rcu(struct afs_net *, const char *, unsigned);
|
|
|
|
extern struct afs_cell *afs_lookup_cell(struct afs_net *, const char *, unsigned,
|
|
|
|
const char *, bool);
|
2017-11-02 23:27:46 +08:00
|
|
|
extern void afs_put_cell(struct afs_net *, struct afs_cell *);
|
2017-11-02 23:27:50 +08:00
|
|
|
extern void afs_manage_cells(struct work_struct *);
|
|
|
|
extern void afs_cells_timer(struct timer_list *);
|
2017-11-02 23:27:45 +08:00
|
|
|
extern void __net_exit afs_cell_purge(struct afs_net *);
|
2007-04-27 06:55:03 +08:00
|
|
|
|
|
|
|
/*
|
|
|
|
* cmservice.c
|
|
|
|
*/
|
|
|
|
extern bool afs_cm_incoming_call(struct afs_call *);
|
|
|
|
|
2005-04-17 06:20:36 +08:00
|
|
|
/*
|
|
|
|
* dir.c
|
|
|
|
*/
|
2007-02-12 16:55:38 +08:00
|
|
|
extern const struct inode_operations afs_dir_inode_operations;
|
2011-01-13 09:04:20 +08:00
|
|
|
extern const struct dentry_operations afs_fs_dentry_operations;
|
2006-03-28 17:56:42 +08:00
|
|
|
extern const struct file_operations afs_dir_file_operations;
|
2005-04-17 06:20:36 +08:00
|
|
|
|
|
|
|
/*
|
|
|
|
* file.c
|
|
|
|
*/
|
2006-06-28 19:26:44 +08:00
|
|
|
extern const struct address_space_operations afs_fs_aops;
|
2007-02-12 16:55:38 +08:00
|
|
|
extern const struct inode_operations afs_file_inode_operations;
|
2007-04-27 06:57:07 +08:00
|
|
|
extern const struct file_operations afs_file_operations;
|
|
|
|
|
|
|
|
extern int afs_open(struct inode *, struct file *);
|
|
|
|
extern int afs_release(struct inode *, struct file *);
|
2010-05-21 22:27:09 +08:00
|
|
|
extern int afs_page_filler(void *, struct page *);
|
2017-01-05 18:38:34 +08:00
|
|
|
extern void afs_put_read(struct afs_read *);
|
2005-04-17 06:20:36 +08:00
|
|
|
|
2007-07-16 14:40:12 +08:00
|
|
|
/*
|
|
|
|
* flock.c
|
|
|
|
*/
|
2017-11-02 23:27:45 +08:00
|
|
|
extern struct workqueue_struct *afs_lock_manager;
|
|
|
|
|
2007-07-16 14:40:12 +08:00
|
|
|
extern void afs_lock_work(struct work_struct *);
|
|
|
|
extern void afs_lock_may_be_available(struct afs_vnode *);
|
|
|
|
extern int afs_lock(struct file *, int, struct file_lock *);
|
|
|
|
extern int afs_flock(struct file *, int, struct file_lock *);
|
|
|
|
|
2007-04-27 06:55:03 +08:00
|
|
|
/*
|
|
|
|
* fsclient.c
|
|
|
|
*/
|
2007-04-27 06:57:07 +08:00
|
|
|
extern int afs_fs_fetch_file_status(struct afs_server *, struct key *,
|
|
|
|
struct afs_vnode *, struct afs_volsync *,
|
2017-01-05 18:38:36 +08:00
|
|
|
bool);
|
2017-11-02 23:27:45 +08:00
|
|
|
extern int afs_fs_give_up_callbacks(struct afs_net *, struct afs_server *, bool);
|
2007-04-27 06:57:07 +08:00
|
|
|
extern int afs_fs_fetch_data(struct afs_server *, struct key *,
|
2017-01-05 18:38:36 +08:00
|
|
|
struct afs_vnode *, struct afs_read *, bool);
|
2007-04-27 06:59:35 +08:00
|
|
|
extern int afs_fs_create(struct afs_server *, struct key *,
|
|
|
|
struct afs_vnode *, const char *, umode_t,
|
|
|
|
struct afs_fid *, struct afs_file_status *,
|
2017-01-05 18:38:36 +08:00
|
|
|
struct afs_callback *, bool);
|
2007-04-27 06:59:35 +08:00
|
|
|
extern int afs_fs_remove(struct afs_server *, struct key *,
|
2017-01-05 18:38:36 +08:00
|
|
|
struct afs_vnode *, const char *, bool, bool);
|
2007-04-27 06:59:35 +08:00
|
|
|
extern int afs_fs_link(struct afs_server *, struct key *, struct afs_vnode *,
|
2017-01-05 18:38:36 +08:00
|
|
|
struct afs_vnode *, const char *, bool);
|
2007-04-27 06:59:35 +08:00
|
|
|
extern int afs_fs_symlink(struct afs_server *, struct key *,
|
|
|
|
struct afs_vnode *, const char *, const char *,
|
2017-01-05 18:38:36 +08:00
|
|
|
struct afs_fid *, struct afs_file_status *, bool);
|
2007-04-27 06:59:35 +08:00
|
|
|
extern int afs_fs_rename(struct afs_server *, struct key *,
|
|
|
|
struct afs_vnode *, const char *,
|
2017-01-05 18:38:36 +08:00
|
|
|
struct afs_vnode *, const char *, bool);
|
AFS: implement basic file write support
Implement support for writing to regular AFS files, including:
(1) write
(2) truncate
(3) fsync, fdatasync
(4) chmod, chown, chgrp, utime.
AFS writeback attempts to batch writes into as chunks as large as it can manage
up to the point that it writes back 65535 pages in one chunk or it meets a
locked page.
Furthermore, if a page has been written to using a particular key, then should
another write to that page use some other key, the first write will be flushed
before the second is allowed to take place. If the first write fails due to a
security error, then the page will be scrapped and reread before the second
write takes place.
If a page is dirty and the callback on it is broken by the server, then the
dirty data is not discarded (same behaviour as NFS).
Shared-writable mappings are not supported by this patch.
[akpm@linux-foundation.org: fix a bunch of warnings]
Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2007-05-09 17:33:46 +08:00
|
|
|
extern int afs_fs_store_data(struct afs_server *, struct afs_writeback *,
|
2017-01-05 18:38:36 +08:00
|
|
|
pgoff_t, pgoff_t, unsigned, unsigned, bool);
|
AFS: implement basic file write support
Implement support for writing to regular AFS files, including:
(1) write
(2) truncate
(3) fsync, fdatasync
(4) chmod, chown, chgrp, utime.
AFS writeback attempts to batch writes into as chunks as large as it can manage
up to the point that it writes back 65535 pages in one chunk or it meets a
locked page.
Furthermore, if a page has been written to using a particular key, then should
another write to that page use some other key, the first write will be flushed
before the second is allowed to take place. If the first write fails due to a
security error, then the page will be scrapped and reread before the second
write takes place.
If a page is dirty and the callback on it is broken by the server, then the
dirty data is not discarded (same behaviour as NFS).
Shared-writable mappings are not supported by this patch.
[akpm@linux-foundation.org: fix a bunch of warnings]
Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2007-05-09 17:33:46 +08:00
|
|
|
extern int afs_fs_setattr(struct afs_server *, struct key *,
|
2017-01-05 18:38:36 +08:00
|
|
|
struct afs_vnode *, struct iattr *, bool);
|
2007-05-11 13:22:20 +08:00
|
|
|
extern int afs_fs_get_volume_status(struct afs_server *, struct key *,
|
|
|
|
struct afs_vnode *,
|
2017-01-05 18:38:36 +08:00
|
|
|
struct afs_volume_status *, bool);
|
2007-07-16 14:40:12 +08:00
|
|
|
extern int afs_fs_set_lock(struct afs_server *, struct key *,
|
2017-01-05 18:38:36 +08:00
|
|
|
struct afs_vnode *, afs_lock_type_t, bool);
|
2007-07-16 14:40:12 +08:00
|
|
|
extern int afs_fs_extend_lock(struct afs_server *, struct key *,
|
2017-01-05 18:38:36 +08:00
|
|
|
struct afs_vnode *, bool);
|
2007-07-16 14:40:12 +08:00
|
|
|
extern int afs_fs_release_lock(struct afs_server *, struct key *,
|
2017-01-05 18:38:36 +08:00
|
|
|
struct afs_vnode *, bool);
|
2017-11-02 23:27:49 +08:00
|
|
|
extern int afs_fs_give_up_all_callbacks(struct afs_server *, struct key *, bool);
|
2007-04-27 06:55:03 +08:00
|
|
|
|
2005-04-17 06:20:36 +08:00
|
|
|
/*
|
|
|
|
* inode.c
|
|
|
|
*/
|
2017-11-02 23:27:49 +08:00
|
|
|
extern int afs_iget5_test(struct inode *, void *);
|
2010-08-11 16:38:04 +08:00
|
|
|
extern struct inode *afs_iget_autocell(struct inode *, const char *, int,
|
|
|
|
struct key *);
|
2007-04-27 06:57:07 +08:00
|
|
|
extern struct inode *afs_iget(struct super_block *, struct key *,
|
2007-04-27 06:59:35 +08:00
|
|
|
struct afs_fid *, struct afs_file_status *,
|
|
|
|
struct afs_callback *);
|
2007-05-09 17:33:45 +08:00
|
|
|
extern void afs_zap_data(struct afs_vnode *);
|
2007-04-27 06:59:35 +08:00
|
|
|
extern int afs_validate(struct afs_vnode *, struct key *);
|
statx: Add a system call to make enhanced file info available
Add a system call to make extended file information available, including
file creation and some attribute flags where available through the
underlying filesystem.
The getattr inode operation is altered to take two additional arguments: a
u32 request_mask and an unsigned int flags that indicate the
synchronisation mode. This change is propagated to the vfs_getattr*()
function.
Functions like vfs_stat() are now inline wrappers around new functions
vfs_statx() and vfs_statx_fd() to reduce stack usage.
========
OVERVIEW
========
The idea was initially proposed as a set of xattrs that could be retrieved
with getxattr(), but the general preference proved to be for a new syscall
with an extended stat structure.
A number of requests were gathered for features to be included. The
following have been included:
(1) Make the fields a consistent size on all arches and make them large.
(2) Spare space, request flags and information flags are provided for
future expansion.
(3) Better support for the y2038 problem [Arnd Bergmann] (tv_sec is an
__s64).
(4) Creation time: The SMB protocol carries the creation time, which could
be exported by Samba, which will in turn help CIFS make use of
FS-Cache as that can be used for coherency data (stx_btime).
This is also specified in NFSv4 as a recommended attribute and could
be exported by NFSD [Steve French].
(5) Lightweight stat: Ask for just those details of interest, and allow a
netfs (such as NFS) to approximate anything not of interest, possibly
without going to the server [Trond Myklebust, Ulrich Drepper, Andreas
Dilger] (AT_STATX_DONT_SYNC).
(6) Heavyweight stat: Force a netfs to go to the server, even if it thinks
its cached attributes are up to date [Trond Myklebust]
(AT_STATX_FORCE_SYNC).
And the following have been left out for future extension:
(7) Data version number: Could be used by userspace NFS servers [Aneesh
Kumar].
Can also be used to modify fill_post_wcc() in NFSD which retrieves
i_version directly, but has just called vfs_getattr(). It could get
it from the kstat struct if it used vfs_xgetattr() instead.
(There's disagreement on the exact semantics of a single field, since
not all filesystems do this the same way).
(8) BSD stat compatibility: Including more fields from the BSD stat such
as creation time (st_btime) and inode generation number (st_gen)
[Jeremy Allison, Bernd Schubert].
(9) Inode generation number: Useful for FUSE and userspace NFS servers
[Bernd Schubert].
(This was asked for but later deemed unnecessary with the
open-by-handle capability available and caused disagreement as to
whether it's a security hole or not).
(10) Extra coherency data may be useful in making backups [Andreas Dilger].
(No particular data were offered, but things like last backup
timestamp, the data version number and the DOS archive bit would come
into this category).
(11) Allow the filesystem to indicate what it can/cannot provide: A
filesystem can now say it doesn't support a standard stat feature if
that isn't available, so if, for instance, inode numbers or UIDs don't
exist or are fabricated locally...
(This requires a separate system call - I have an fsinfo() call idea
for this).
(12) Store a 16-byte volume ID in the superblock that can be returned in
struct xstat [Steve French].
(Deferred to fsinfo).
(13) Include granularity fields in the time data to indicate the
granularity of each of the times (NFSv4 time_delta) [Steve French].
(Deferred to fsinfo).
(14) FS_IOC_GETFLAGS value. These could be translated to BSD's st_flags.
Note that the Linux IOC flags are a mess and filesystems such as Ext4
define flags that aren't in linux/fs.h, so translation in the kernel
may be a necessity (or, possibly, we provide the filesystem type too).
(Some attributes are made available in stx_attributes, but the general
feeling was that the IOC flags were to ext[234]-specific and shouldn't
be exposed through statx this way).
(15) Mask of features available on file (eg: ACLs, seclabel) [Brad Boyer,
Michael Kerrisk].
(Deferred, probably to fsinfo. Finding out if there's an ACL or
seclabal might require extra filesystem operations).
(16) Femtosecond-resolution timestamps [Dave Chinner].
(A __reserved field has been left in the statx_timestamp struct for
this - if there proves to be a need).
(17) A set multiple attributes syscall to go with this.
===============
NEW SYSTEM CALL
===============
The new system call is:
int ret = statx(int dfd,
const char *filename,
unsigned int flags,
unsigned int mask,
struct statx *buffer);
The dfd, filename and flags parameters indicate the file to query, in a
similar way to fstatat(). There is no equivalent of lstat() as that can be
emulated with statx() by passing AT_SYMLINK_NOFOLLOW in flags. There is
also no equivalent of fstat() as that can be emulated by passing a NULL
filename to statx() with the fd of interest in dfd.
Whether or not statx() synchronises the attributes with the backing store
can be controlled by OR'ing a value into the flags argument (this typically
only affects network filesystems):
(1) AT_STATX_SYNC_AS_STAT tells statx() to behave as stat() does in this
respect.
(2) AT_STATX_FORCE_SYNC will require a network filesystem to synchronise
its attributes with the server - which might require data writeback to
occur to get the timestamps correct.
(3) AT_STATX_DONT_SYNC will suppress synchronisation with the server in a
network filesystem. The resulting values should be considered
approximate.
mask is a bitmask indicating the fields in struct statx that are of
interest to the caller. The user should set this to STATX_BASIC_STATS to
get the basic set returned by stat(). It should be noted that asking for
more information may entail extra I/O operations.
buffer points to the destination for the data. This must be 256 bytes in
size.
======================
MAIN ATTRIBUTES RECORD
======================
The following structures are defined in which to return the main attribute
set:
struct statx_timestamp {
__s64 tv_sec;
__s32 tv_nsec;
__s32 __reserved;
};
struct statx {
__u32 stx_mask;
__u32 stx_blksize;
__u64 stx_attributes;
__u32 stx_nlink;
__u32 stx_uid;
__u32 stx_gid;
__u16 stx_mode;
__u16 __spare0[1];
__u64 stx_ino;
__u64 stx_size;
__u64 stx_blocks;
__u64 __spare1[1];
struct statx_timestamp stx_atime;
struct statx_timestamp stx_btime;
struct statx_timestamp stx_ctime;
struct statx_timestamp stx_mtime;
__u32 stx_rdev_major;
__u32 stx_rdev_minor;
__u32 stx_dev_major;
__u32 stx_dev_minor;
__u64 __spare2[14];
};
The defined bits in request_mask and stx_mask are:
STATX_TYPE Want/got stx_mode & S_IFMT
STATX_MODE Want/got stx_mode & ~S_IFMT
STATX_NLINK Want/got stx_nlink
STATX_UID Want/got stx_uid
STATX_GID Want/got stx_gid
STATX_ATIME Want/got stx_atime{,_ns}
STATX_MTIME Want/got stx_mtime{,_ns}
STATX_CTIME Want/got stx_ctime{,_ns}
STATX_INO Want/got stx_ino
STATX_SIZE Want/got stx_size
STATX_BLOCKS Want/got stx_blocks
STATX_BASIC_STATS [The stuff in the normal stat struct]
STATX_BTIME Want/got stx_btime{,_ns}
STATX_ALL [All currently available stuff]
stx_btime is the file creation time, stx_mask is a bitmask indicating the
data provided and __spares*[] are where as-yet undefined fields can be
placed.
Time fields are structures with separate seconds and nanoseconds fields
plus a reserved field in case we want to add even finer resolution. Note
that times will be negative if before 1970; in such a case, the nanosecond
fields will also be negative if not zero.
The bits defined in the stx_attributes field convey information about a
file, how it is accessed, where it is and what it does. The following
attributes map to FS_*_FL flags and are the same numerical value:
STATX_ATTR_COMPRESSED File is compressed by the fs
STATX_ATTR_IMMUTABLE File is marked immutable
STATX_ATTR_APPEND File is append-only
STATX_ATTR_NODUMP File is not to be dumped
STATX_ATTR_ENCRYPTED File requires key to decrypt in fs
Within the kernel, the supported flags are listed by:
KSTAT_ATTR_FS_IOC_FLAGS
[Are any other IOC flags of sufficient general interest to be exposed
through this interface?]
New flags include:
STATX_ATTR_AUTOMOUNT Object is an automount trigger
These are for the use of GUI tools that might want to mark files specially,
depending on what they are.
Fields in struct statx come in a number of classes:
(0) stx_dev_*, stx_blksize.
These are local system information and are always available.
(1) stx_mode, stx_nlinks, stx_uid, stx_gid, stx_[amc]time, stx_ino,
stx_size, stx_blocks.
These will be returned whether the caller asks for them or not. The
corresponding bits in stx_mask will be set to indicate whether they
actually have valid values.
If the caller didn't ask for them, then they may be approximated. For
example, NFS won't waste any time updating them from the server,
unless as a byproduct of updating something requested.
If the values don't actually exist for the underlying object (such as
UID or GID on a DOS file), then the bit won't be set in the stx_mask,
even if the caller asked for the value. In such a case, the returned
value will be a fabrication.
Note that there are instances where the type might not be valid, for
instance Windows reparse points.
(2) stx_rdev_*.
This will be set only if stx_mode indicates we're looking at a
blockdev or a chardev, otherwise will be 0.
(3) stx_btime.
Similar to (1), except this will be set to 0 if it doesn't exist.
=======
TESTING
=======
The following test program can be used to test the statx system call:
samples/statx/test-statx.c
Just compile and run, passing it paths to the files you want to examine.
The file is built automatically if CONFIG_SAMPLES is enabled.
Here's some example output. Firstly, an NFS directory that crosses to
another FSID. Note that the AUTOMOUNT attribute is set because transiting
this directory will cause d_automount to be invoked by the VFS.
[root@andromeda ~]# /tmp/test-statx -A /warthog/data
statx(/warthog/data) = 0
results=7ff
Size: 4096 Blocks: 8 IO Block: 1048576 directory
Device: 00:26 Inode: 1703937 Links: 125
Access: (3777/drwxrwxrwx) Uid: 0 Gid: 4041
Access: 2016-11-24 09:02:12.219699527+0000
Modify: 2016-11-17 10:44:36.225653653+0000
Change: 2016-11-17 10:44:36.225653653+0000
Attributes: 0000000000001000 (-------- -------- -------- -------- -------- -------- ---m---- --------)
Secondly, the result of automounting on that directory.
[root@andromeda ~]# /tmp/test-statx /warthog/data
statx(/warthog/data) = 0
results=7ff
Size: 4096 Blocks: 8 IO Block: 1048576 directory
Device: 00:27 Inode: 2 Links: 125
Access: (3777/drwxrwxrwx) Uid: 0 Gid: 4041
Access: 2016-11-24 09:02:12.219699527+0000
Modify: 2016-11-17 10:44:36.225653653+0000
Change: 2016-11-17 10:44:36.225653653+0000
Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
2017-02-01 00:46:22 +08:00
|
|
|
extern int afs_getattr(const struct path *, struct kstat *, u32, unsigned int);
|
AFS: implement basic file write support
Implement support for writing to regular AFS files, including:
(1) write
(2) truncate
(3) fsync, fdatasync
(4) chmod, chown, chgrp, utime.
AFS writeback attempts to batch writes into as chunks as large as it can manage
up to the point that it writes back 65535 pages in one chunk or it meets a
locked page.
Furthermore, if a page has been written to using a particular key, then should
another write to that page use some other key, the first write will be flushed
before the second is allowed to take place. If the first write fails due to a
security error, then the page will be scrapped and reread before the second
write takes place.
If a page is dirty and the callback on it is broken by the server, then the
dirty data is not discarded (same behaviour as NFS).
Shared-writable mappings are not supported by this patch.
[akpm@linux-foundation.org: fix a bunch of warnings]
Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2007-05-09 17:33:46 +08:00
|
|
|
extern int afs_setattr(struct dentry *, struct iattr *);
|
2010-06-08 02:34:48 +08:00
|
|
|
extern void afs_evict_inode(struct inode *);
|
2010-08-11 16:38:04 +08:00
|
|
|
extern int afs_drop_inode(struct inode *);
|
2005-04-17 06:20:36 +08:00
|
|
|
|
|
|
|
/*
|
|
|
|
* main.c
|
|
|
|
*/
|
2011-01-14 23:56:37 +08:00
|
|
|
extern struct workqueue_struct *afs_wq;
|
2017-11-02 23:27:45 +08:00
|
|
|
|
|
|
|
static inline struct afs_net *afs_d2net(struct dentry *dentry)
|
|
|
|
{
|
|
|
|
return &__afs_net;
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline struct afs_net *afs_i2net(struct inode *inode)
|
|
|
|
{
|
|
|
|
return &__afs_net;
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline struct afs_net *afs_v2net(struct afs_vnode *vnode)
|
|
|
|
{
|
|
|
|
return &__afs_net;
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline struct afs_net *afs_sock2net(struct sock *sk)
|
|
|
|
{
|
|
|
|
return &__afs_net;
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline struct afs_net *afs_get_net(struct afs_net *net)
|
|
|
|
{
|
|
|
|
return net;
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline void afs_put_net(struct afs_net *net)
|
|
|
|
{
|
|
|
|
}
|
2005-04-17 06:20:36 +08:00
|
|
|
|
2007-04-27 06:55:03 +08:00
|
|
|
/*
|
|
|
|
* misc.c
|
|
|
|
*/
|
|
|
|
extern int afs_abort_to_error(u32);
|
|
|
|
|
2005-04-17 06:20:36 +08:00
|
|
|
/*
|
|
|
|
* mntpt.c
|
|
|
|
*/
|
2007-02-12 16:55:38 +08:00
|
|
|
extern const struct inode_operations afs_mntpt_inode_operations;
|
2010-08-11 16:38:04 +08:00
|
|
|
extern const struct inode_operations afs_autocell_inode_operations;
|
2006-03-28 17:56:42 +08:00
|
|
|
extern const struct file_operations afs_mntpt_file_operations;
|
2005-04-17 06:20:36 +08:00
|
|
|
|
2011-01-15 03:04:05 +08:00
|
|
|
extern struct vfsmount *afs_d_automount(struct path *);
|
2007-04-27 06:55:03 +08:00
|
|
|
extern void afs_mntpt_kill_timer(void);
|
2005-04-17 06:20:36 +08:00
|
|
|
|
2017-02-11 00:34:07 +08:00
|
|
|
/*
|
|
|
|
* netdevices.c
|
|
|
|
*/
|
|
|
|
extern int afs_get_ipv4_interfaces(struct afs_interface *, size_t, bool);
|
|
|
|
|
2005-04-17 06:20:36 +08:00
|
|
|
/*
|
|
|
|
* proc.c
|
|
|
|
*/
|
2017-11-02 23:27:45 +08:00
|
|
|
extern int __net_init afs_proc_init(struct afs_net *);
|
|
|
|
extern void __net_exit afs_proc_cleanup(struct afs_net *);
|
|
|
|
extern int afs_proc_cell_setup(struct afs_net *, struct afs_cell *);
|
|
|
|
extern void afs_proc_cell_remove(struct afs_net *, struct afs_cell *);
|
2005-04-17 06:20:36 +08:00
|
|
|
|
2007-04-27 06:55:03 +08:00
|
|
|
/*
|
|
|
|
* rxrpc.c
|
|
|
|
*/
|
2017-11-02 23:27:45 +08:00
|
|
|
extern struct workqueue_struct *afs_async_calls;
|
2016-08-30 16:49:29 +08:00
|
|
|
|
2017-11-02 23:27:45 +08:00
|
|
|
extern int __net_init afs_open_socket(struct afs_net *);
|
|
|
|
extern void __net_exit afs_close_socket(struct afs_net *);
|
|
|
|
extern void afs_charge_preallocation(struct work_struct *);
|
2017-01-05 18:38:36 +08:00
|
|
|
extern void afs_put_call(struct afs_call *);
|
|
|
|
extern int afs_queue_call_work(struct afs_call *);
|
2017-11-02 23:27:48 +08:00
|
|
|
extern long afs_make_call(struct sockaddr_rxrpc *, struct afs_call *, gfp_t, bool);
|
2017-11-02 23:27:45 +08:00
|
|
|
extern struct afs_call *afs_alloc_flat_call(struct afs_net *,
|
|
|
|
const struct afs_call_type *,
|
2007-04-27 06:55:03 +08:00
|
|
|
size_t, size_t);
|
|
|
|
extern void afs_flat_call_destructor(struct afs_call *);
|
|
|
|
extern void afs_send_empty_reply(struct afs_call *);
|
2007-04-27 06:58:17 +08:00
|
|
|
extern void afs_send_simple_reply(struct afs_call *, const void *, size_t);
|
rxrpc: Don't expose skbs to in-kernel users [ver #2]
Don't expose skbs to in-kernel users, such as the AFS filesystem, but
instead provide a notification hook the indicates that a call needs
attention and another that indicates that there's a new call to be
collected.
This makes the following possibilities more achievable:
(1) Call refcounting can be made simpler if skbs don't hold refs to calls.
(2) skbs referring to non-data events will be able to be freed much sooner
rather than being queued for AFS to pick up as rxrpc_kernel_recv_data
will be able to consult the call state.
(3) We can shortcut the receive phase when a call is remotely aborted
because we don't have to go through all the packets to get to the one
cancelling the operation.
(4) It makes it easier to do encryption/decryption directly between AFS's
buffers and sk_buffs.
(5) Encryption/decryption can more easily be done in the AFS's thread
contexts - usually that of the userspace process that issued a syscall
- rather than in one of rxrpc's background threads on a workqueue.
(6) AFS will be able to wait synchronously on a call inside AF_RXRPC.
To make this work, the following interface function has been added:
int rxrpc_kernel_recv_data(
struct socket *sock, struct rxrpc_call *call,
void *buffer, size_t bufsize, size_t *_offset,
bool want_more, u32 *_abort_code);
This is the recvmsg equivalent. It allows the caller to find out about the
state of a specific call and to transfer received data into a buffer
piecemeal.
afs_extract_data() and rxrpc_kernel_recv_data() now do all the extraction
logic between them. They don't wait synchronously yet because the socket
lock needs to be dealt with.
Five interface functions have been removed:
rxrpc_kernel_is_data_last()
rxrpc_kernel_get_abort_code()
rxrpc_kernel_get_error_number()
rxrpc_kernel_free_skb()
rxrpc_kernel_data_consumed()
As a temporary hack, sk_buffs going to an in-kernel call are queued on the
rxrpc_call struct (->knlrecv_queue) rather than being handed over to the
in-kernel user. To process the queue internally, a temporary function,
temp_deliver_data() has been added. This will be replaced with common code
between the rxrpc_recvmsg() path and the kernel_rxrpc_recv_data() path in a
future patch.
Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2016-08-31 03:42:14 +08:00
|
|
|
extern int afs_extract_data(struct afs_call *, void *, size_t, bool);
|
2007-04-27 06:55:03 +08:00
|
|
|
|
rxrpc: Don't expose skbs to in-kernel users [ver #2]
Don't expose skbs to in-kernel users, such as the AFS filesystem, but
instead provide a notification hook the indicates that a call needs
attention and another that indicates that there's a new call to be
collected.
This makes the following possibilities more achievable:
(1) Call refcounting can be made simpler if skbs don't hold refs to calls.
(2) skbs referring to non-data events will be able to be freed much sooner
rather than being queued for AFS to pick up as rxrpc_kernel_recv_data
will be able to consult the call state.
(3) We can shortcut the receive phase when a call is remotely aborted
because we don't have to go through all the packets to get to the one
cancelling the operation.
(4) It makes it easier to do encryption/decryption directly between AFS's
buffers and sk_buffs.
(5) Encryption/decryption can more easily be done in the AFS's thread
contexts - usually that of the userspace process that issued a syscall
- rather than in one of rxrpc's background threads on a workqueue.
(6) AFS will be able to wait synchronously on a call inside AF_RXRPC.
To make this work, the following interface function has been added:
int rxrpc_kernel_recv_data(
struct socket *sock, struct rxrpc_call *call,
void *buffer, size_t bufsize, size_t *_offset,
bool want_more, u32 *_abort_code);
This is the recvmsg equivalent. It allows the caller to find out about the
state of a specific call and to transfer received data into a buffer
piecemeal.
afs_extract_data() and rxrpc_kernel_recv_data() now do all the extraction
logic between them. They don't wait synchronously yet because the socket
lock needs to be dealt with.
Five interface functions have been removed:
rxrpc_kernel_is_data_last()
rxrpc_kernel_get_abort_code()
rxrpc_kernel_get_error_number()
rxrpc_kernel_free_skb()
rxrpc_kernel_data_consumed()
As a temporary hack, sk_buffs going to an in-kernel call are queued on the
rxrpc_call struct (->knlrecv_queue) rather than being handed over to the
in-kernel user. To process the queue internally, a temporary function,
temp_deliver_data() has been added. This will be replaced with common code
between the rxrpc_recvmsg() path and the kernel_rxrpc_recv_data() path in a
future patch.
Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2016-08-31 03:42:14 +08:00
|
|
|
static inline int afs_transfer_reply(struct afs_call *call)
|
rxrpc: Fix races between skb free, ACK generation and replying
Inside the kafs filesystem it is possible to occasionally have a call
processed and terminated before we've had a chance to check whether we need
to clean up the rx queue for that call because afs_send_simple_reply() ends
the call when it is done, but this is done in a workqueue item that might
happen to run to completion before afs_deliver_to_call() completes.
Further, it is possible for rxrpc_kernel_send_data() to be called to send a
reply before the last request-phase data skb is released. The rxrpc skb
destructor is where the ACK processing is done and the call state is
advanced upon release of the last skb. ACK generation is also deferred to
a work item because it's possible that the skb destructor is not called in
a context where kernel_sendmsg() can be invoked.
To this end, the following changes are made:
(1) kernel_rxrpc_data_consumed() is added. This should be called whenever
an skb is emptied so as to crank the ACK and call states. This does
not release the skb, however. kernel_rxrpc_free_skb() must now be
called to achieve that. These together replace
rxrpc_kernel_data_delivered().
(2) kernel_rxrpc_data_consumed() is wrapped by afs_data_consumed().
This makes afs_deliver_to_call() easier to work as the skb can simply
be discarded unconditionally here without trying to work out what the
return value of the ->deliver() function means.
The ->deliver() functions can, via afs_data_complete(),
afs_transfer_reply() and afs_extract_data() mark that an skb has been
consumed (thereby cranking the state) without the need to
conditionally free the skb to make sure the state is correct on an
incoming call for when the call processor tries to send the reply.
(3) rxrpc_recvmsg() now has to call kernel_rxrpc_data_consumed() when it
has finished with a packet and MSG_PEEK isn't set.
(4) rxrpc_packet_destructor() no longer calls rxrpc_hard_ACK_data().
Because of this, we no longer need to clear the destructor and put the
call before we free the skb in cases where we don't want the ACK/call
state to be cranked.
(5) The ->deliver() call-type callbacks are made to return -EAGAIN rather
than 0 if they expect more data (afs_extract_data() returns -EAGAIN to
the delivery function already), and the caller is now responsible for
producing an abort if that was the last packet.
(6) There are many bits of unmarshalling code where:
ret = afs_extract_data(call, skb, last, ...);
switch (ret) {
case 0: break;
case -EAGAIN: return 0;
default: return ret;
}
is to be found. As -EAGAIN can now be passed back to the caller, we
now just return if ret < 0:
ret = afs_extract_data(call, skb, last, ...);
if (ret < 0)
return ret;
(7) Checks for trailing data and empty final data packets has been
consolidated as afs_data_complete(). So:
if (skb->len > 0)
return -EBADMSG;
if (!last)
return 0;
becomes:
ret = afs_data_complete(call, skb, last);
if (ret < 0)
return ret;
(8) afs_transfer_reply() now checks the amount of data it has against the
amount of data desired and the amount of data in the skb and returns
an error to induce an abort if we don't get exactly what we want.
Without these changes, the following oops can occasionally be observed,
particularly if some printks are inserted into the delivery path:
general protection fault: 0000 [#1] SMP
Modules linked in: kafs(E) af_rxrpc(E) [last unloaded: af_rxrpc]
CPU: 0 PID: 1305 Comm: kworker/u8:3 Tainted: G E 4.7.0-fsdevel+ #1303
Hardware name: ASUS All Series/H97-PLUS, BIOS 2306 10/09/2014
Workqueue: kafsd afs_async_workfn [kafs]
task: ffff88040be041c0 ti: ffff88040c070000 task.ti: ffff88040c070000
RIP: 0010:[<ffffffff8108fd3c>] [<ffffffff8108fd3c>] __lock_acquire+0xcf/0x15a1
RSP: 0018:ffff88040c073bc0 EFLAGS: 00010002
RAX: 6b6b6b6b6b6b6b6b RBX: 0000000000000000 RCX: ffff88040d29a710
RDX: 0000000000000000 RSI: 0000000000000000 RDI: ffff88040d29a710
RBP: ffff88040c073c70 R08: 0000000000000001 R09: 0000000000000001
R10: 0000000000000001 R11: 0000000000000000 R12: 0000000000000000
R13: 0000000000000000 R14: ffff88040be041c0 R15: ffffffff814c928f
FS: 0000000000000000(0000) GS:ffff88041fa00000(0000) knlGS:0000000000000000
CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 00007fa4595f4750 CR3: 0000000001c14000 CR4: 00000000001406f0
Stack:
0000000000000006 000000000be04930 0000000000000000 ffff880400000000
ffff880400000000 ffffffff8108f847 ffff88040be041c0 ffffffff81050446
ffff8803fc08a920 ffff8803fc08a958 ffff88040be041c0 ffff88040c073c38
Call Trace:
[<ffffffff8108f847>] ? mark_held_locks+0x5e/0x74
[<ffffffff81050446>] ? __local_bh_enable_ip+0x9b/0xa1
[<ffffffff8108f9ca>] ? trace_hardirqs_on_caller+0x16d/0x189
[<ffffffff810915f4>] lock_acquire+0x122/0x1b6
[<ffffffff810915f4>] ? lock_acquire+0x122/0x1b6
[<ffffffff814c928f>] ? skb_dequeue+0x18/0x61
[<ffffffff81609dbf>] _raw_spin_lock_irqsave+0x35/0x49
[<ffffffff814c928f>] ? skb_dequeue+0x18/0x61
[<ffffffff814c928f>] skb_dequeue+0x18/0x61
[<ffffffffa009aa92>] afs_deliver_to_call+0x344/0x39d [kafs]
[<ffffffffa009ab37>] afs_process_async_call+0x4c/0xd5 [kafs]
[<ffffffffa0099e9c>] afs_async_workfn+0xe/0x10 [kafs]
[<ffffffff81063a3a>] process_one_work+0x29d/0x57c
[<ffffffff81064ac2>] worker_thread+0x24a/0x385
[<ffffffff81064878>] ? rescuer_thread+0x2d0/0x2d0
[<ffffffff810696f5>] kthread+0xf3/0xfb
[<ffffffff8160a6ff>] ret_from_fork+0x1f/0x40
[<ffffffff81069602>] ? kthread_create_on_node+0x1cf/0x1cf
Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2016-08-03 21:11:40 +08:00
|
|
|
{
|
rxrpc: Don't expose skbs to in-kernel users [ver #2]
Don't expose skbs to in-kernel users, such as the AFS filesystem, but
instead provide a notification hook the indicates that a call needs
attention and another that indicates that there's a new call to be
collected.
This makes the following possibilities more achievable:
(1) Call refcounting can be made simpler if skbs don't hold refs to calls.
(2) skbs referring to non-data events will be able to be freed much sooner
rather than being queued for AFS to pick up as rxrpc_kernel_recv_data
will be able to consult the call state.
(3) We can shortcut the receive phase when a call is remotely aborted
because we don't have to go through all the packets to get to the one
cancelling the operation.
(4) It makes it easier to do encryption/decryption directly between AFS's
buffers and sk_buffs.
(5) Encryption/decryption can more easily be done in the AFS's thread
contexts - usually that of the userspace process that issued a syscall
- rather than in one of rxrpc's background threads on a workqueue.
(6) AFS will be able to wait synchronously on a call inside AF_RXRPC.
To make this work, the following interface function has been added:
int rxrpc_kernel_recv_data(
struct socket *sock, struct rxrpc_call *call,
void *buffer, size_t bufsize, size_t *_offset,
bool want_more, u32 *_abort_code);
This is the recvmsg equivalent. It allows the caller to find out about the
state of a specific call and to transfer received data into a buffer
piecemeal.
afs_extract_data() and rxrpc_kernel_recv_data() now do all the extraction
logic between them. They don't wait synchronously yet because the socket
lock needs to be dealt with.
Five interface functions have been removed:
rxrpc_kernel_is_data_last()
rxrpc_kernel_get_abort_code()
rxrpc_kernel_get_error_number()
rxrpc_kernel_free_skb()
rxrpc_kernel_data_consumed()
As a temporary hack, sk_buffs going to an in-kernel call are queued on the
rxrpc_call struct (->knlrecv_queue) rather than being handed over to the
in-kernel user. To process the queue internally, a temporary function,
temp_deliver_data() has been added. This will be replaced with common code
between the rxrpc_recvmsg() path and the kernel_rxrpc_recv_data() path in a
future patch.
Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2016-08-31 03:42:14 +08:00
|
|
|
return afs_extract_data(call, call->buffer, call->reply_max, false);
|
rxrpc: Fix races between skb free, ACK generation and replying
Inside the kafs filesystem it is possible to occasionally have a call
processed and terminated before we've had a chance to check whether we need
to clean up the rx queue for that call because afs_send_simple_reply() ends
the call when it is done, but this is done in a workqueue item that might
happen to run to completion before afs_deliver_to_call() completes.
Further, it is possible for rxrpc_kernel_send_data() to be called to send a
reply before the last request-phase data skb is released. The rxrpc skb
destructor is where the ACK processing is done and the call state is
advanced upon release of the last skb. ACK generation is also deferred to
a work item because it's possible that the skb destructor is not called in
a context where kernel_sendmsg() can be invoked.
To this end, the following changes are made:
(1) kernel_rxrpc_data_consumed() is added. This should be called whenever
an skb is emptied so as to crank the ACK and call states. This does
not release the skb, however. kernel_rxrpc_free_skb() must now be
called to achieve that. These together replace
rxrpc_kernel_data_delivered().
(2) kernel_rxrpc_data_consumed() is wrapped by afs_data_consumed().
This makes afs_deliver_to_call() easier to work as the skb can simply
be discarded unconditionally here without trying to work out what the
return value of the ->deliver() function means.
The ->deliver() functions can, via afs_data_complete(),
afs_transfer_reply() and afs_extract_data() mark that an skb has been
consumed (thereby cranking the state) without the need to
conditionally free the skb to make sure the state is correct on an
incoming call for when the call processor tries to send the reply.
(3) rxrpc_recvmsg() now has to call kernel_rxrpc_data_consumed() when it
has finished with a packet and MSG_PEEK isn't set.
(4) rxrpc_packet_destructor() no longer calls rxrpc_hard_ACK_data().
Because of this, we no longer need to clear the destructor and put the
call before we free the skb in cases where we don't want the ACK/call
state to be cranked.
(5) The ->deliver() call-type callbacks are made to return -EAGAIN rather
than 0 if they expect more data (afs_extract_data() returns -EAGAIN to
the delivery function already), and the caller is now responsible for
producing an abort if that was the last packet.
(6) There are many bits of unmarshalling code where:
ret = afs_extract_data(call, skb, last, ...);
switch (ret) {
case 0: break;
case -EAGAIN: return 0;
default: return ret;
}
is to be found. As -EAGAIN can now be passed back to the caller, we
now just return if ret < 0:
ret = afs_extract_data(call, skb, last, ...);
if (ret < 0)
return ret;
(7) Checks for trailing data and empty final data packets has been
consolidated as afs_data_complete(). So:
if (skb->len > 0)
return -EBADMSG;
if (!last)
return 0;
becomes:
ret = afs_data_complete(call, skb, last);
if (ret < 0)
return ret;
(8) afs_transfer_reply() now checks the amount of data it has against the
amount of data desired and the amount of data in the skb and returns
an error to induce an abort if we don't get exactly what we want.
Without these changes, the following oops can occasionally be observed,
particularly if some printks are inserted into the delivery path:
general protection fault: 0000 [#1] SMP
Modules linked in: kafs(E) af_rxrpc(E) [last unloaded: af_rxrpc]
CPU: 0 PID: 1305 Comm: kworker/u8:3 Tainted: G E 4.7.0-fsdevel+ #1303
Hardware name: ASUS All Series/H97-PLUS, BIOS 2306 10/09/2014
Workqueue: kafsd afs_async_workfn [kafs]
task: ffff88040be041c0 ti: ffff88040c070000 task.ti: ffff88040c070000
RIP: 0010:[<ffffffff8108fd3c>] [<ffffffff8108fd3c>] __lock_acquire+0xcf/0x15a1
RSP: 0018:ffff88040c073bc0 EFLAGS: 00010002
RAX: 6b6b6b6b6b6b6b6b RBX: 0000000000000000 RCX: ffff88040d29a710
RDX: 0000000000000000 RSI: 0000000000000000 RDI: ffff88040d29a710
RBP: ffff88040c073c70 R08: 0000000000000001 R09: 0000000000000001
R10: 0000000000000001 R11: 0000000000000000 R12: 0000000000000000
R13: 0000000000000000 R14: ffff88040be041c0 R15: ffffffff814c928f
FS: 0000000000000000(0000) GS:ffff88041fa00000(0000) knlGS:0000000000000000
CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 00007fa4595f4750 CR3: 0000000001c14000 CR4: 00000000001406f0
Stack:
0000000000000006 000000000be04930 0000000000000000 ffff880400000000
ffff880400000000 ffffffff8108f847 ffff88040be041c0 ffffffff81050446
ffff8803fc08a920 ffff8803fc08a958 ffff88040be041c0 ffff88040c073c38
Call Trace:
[<ffffffff8108f847>] ? mark_held_locks+0x5e/0x74
[<ffffffff81050446>] ? __local_bh_enable_ip+0x9b/0xa1
[<ffffffff8108f9ca>] ? trace_hardirqs_on_caller+0x16d/0x189
[<ffffffff810915f4>] lock_acquire+0x122/0x1b6
[<ffffffff810915f4>] ? lock_acquire+0x122/0x1b6
[<ffffffff814c928f>] ? skb_dequeue+0x18/0x61
[<ffffffff81609dbf>] _raw_spin_lock_irqsave+0x35/0x49
[<ffffffff814c928f>] ? skb_dequeue+0x18/0x61
[<ffffffff814c928f>] skb_dequeue+0x18/0x61
[<ffffffffa009aa92>] afs_deliver_to_call+0x344/0x39d [kafs]
[<ffffffffa009ab37>] afs_process_async_call+0x4c/0xd5 [kafs]
[<ffffffffa0099e9c>] afs_async_workfn+0xe/0x10 [kafs]
[<ffffffff81063a3a>] process_one_work+0x29d/0x57c
[<ffffffff81064ac2>] worker_thread+0x24a/0x385
[<ffffffff81064878>] ? rescuer_thread+0x2d0/0x2d0
[<ffffffff810696f5>] kthread+0xf3/0xfb
[<ffffffff8160a6ff>] ret_from_fork+0x1f/0x40
[<ffffffff81069602>] ? kthread_create_on_node+0x1cf/0x1cf
Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2016-08-03 21:11:40 +08:00
|
|
|
}
|
|
|
|
|
2007-04-27 06:57:07 +08:00
|
|
|
/*
|
|
|
|
* security.c
|
|
|
|
*/
|
afs: Overhaul permit caching
Overhaul permit caching in AFS by making it per-vnode and sharing permit
lists where possible.
When most of the fileserver operations are called, they return a status
structure indicating the (revised) details of the vnode or vnodes involved
in the operation. This includes the access mark derived from the ACL
(named CallerAccess in the protocol definition file). This is cacheable
and if the ACL changes, the server will tell us that it is breaking the
callback promise, at which point we can discard the currently cached
permits.
With this patch, the afs_permits structure has, at the end, an array of
{ key, CallerAccess } elements, sorted by key pointer. This is then cached
in a hash table so that it can be shared between vnodes with the same
access permits.
Permit lists can only be shared if they contain the exact same set of
key->CallerAccess mappings.
Note that that table is global rather than being per-net_ns. If the keys
in a permit list cross net_ns boundaries, there is no problem sharing the
cached permits, since the permits are just integer masks.
Since permit lists pin keys, the permit cache also makes it easier for a
future patch to find all occurrences of a key and remove them by means of
setting the afs_permits::invalidated flag and then clearing the appropriate
key pointer. In such an event, memory barriers will need adding.
Lastly, the permit caching is skipped if the server has sent either a
vnode-specific or an entire-server callback since the start of the
operation.
Signed-off-by: David Howells <dhowells@redhat.com>
2017-11-02 23:27:49 +08:00
|
|
|
extern void afs_put_permits(struct afs_permits *);
|
2007-04-27 06:57:07 +08:00
|
|
|
extern void afs_clear_permits(struct afs_vnode *);
|
afs: Overhaul permit caching
Overhaul permit caching in AFS by making it per-vnode and sharing permit
lists where possible.
When most of the fileserver operations are called, they return a status
structure indicating the (revised) details of the vnode or vnodes involved
in the operation. This includes the access mark derived from the ACL
(named CallerAccess in the protocol definition file). This is cacheable
and if the ACL changes, the server will tell us that it is breaking the
callback promise, at which point we can discard the currently cached
permits.
With this patch, the afs_permits structure has, at the end, an array of
{ key, CallerAccess } elements, sorted by key pointer. This is then cached
in a hash table so that it can be shared between vnodes with the same
access permits.
Permit lists can only be shared if they contain the exact same set of
key->CallerAccess mappings.
Note that that table is global rather than being per-net_ns. If the keys
in a permit list cross net_ns boundaries, there is no problem sharing the
cached permits, since the permits are just integer masks.
Since permit lists pin keys, the permit cache also makes it easier for a
future patch to find all occurrences of a key and remove them by means of
setting the afs_permits::invalidated flag and then clearing the appropriate
key pointer. In such an event, memory barriers will need adding.
Lastly, the permit caching is skipped if the server has sent either a
vnode-specific or an entire-server callback since the start of the
operation.
Signed-off-by: David Howells <dhowells@redhat.com>
2017-11-02 23:27:49 +08:00
|
|
|
extern void afs_cache_permit(struct afs_vnode *, struct key *, unsigned int);
|
2007-05-09 17:33:45 +08:00
|
|
|
extern void afs_zap_permits(struct rcu_head *);
|
2007-04-27 06:57:07 +08:00
|
|
|
extern struct key *afs_request_key(struct afs_cell *);
|
2011-06-21 07:28:19 +08:00
|
|
|
extern int afs_permission(struct inode *, int);
|
afs: Overhaul permit caching
Overhaul permit caching in AFS by making it per-vnode and sharing permit
lists where possible.
When most of the fileserver operations are called, they return a status
structure indicating the (revised) details of the vnode or vnodes involved
in the operation. This includes the access mark derived from the ACL
(named CallerAccess in the protocol definition file). This is cacheable
and if the ACL changes, the server will tell us that it is breaking the
callback promise, at which point we can discard the currently cached
permits.
With this patch, the afs_permits structure has, at the end, an array of
{ key, CallerAccess } elements, sorted by key pointer. This is then cached
in a hash table so that it can be shared between vnodes with the same
access permits.
Permit lists can only be shared if they contain the exact same set of
key->CallerAccess mappings.
Note that that table is global rather than being per-net_ns. If the keys
in a permit list cross net_ns boundaries, there is no problem sharing the
cached permits, since the permits are just integer masks.
Since permit lists pin keys, the permit cache also makes it easier for a
future patch to find all occurrences of a key and remove them by means of
setting the afs_permits::invalidated flag and then clearing the appropriate
key pointer. In such an event, memory barriers will need adding.
Lastly, the permit caching is skipped if the server has sent either a
vnode-specific or an entire-server callback since the start of the
operation.
Signed-off-by: David Howells <dhowells@redhat.com>
2017-11-02 23:27:49 +08:00
|
|
|
extern void __exit afs_clean_up_permit_cache(void);
|
2007-04-27 06:57:07 +08:00
|
|
|
|
2007-04-27 06:55:03 +08:00
|
|
|
/*
|
|
|
|
* server.c
|
|
|
|
*/
|
|
|
|
extern spinlock_t afs_server_peer_lock;
|
|
|
|
|
2017-11-02 23:27:49 +08:00
|
|
|
static inline struct afs_server *afs_get_server(struct afs_server *server)
|
|
|
|
{
|
|
|
|
atomic_inc(&server->usage);
|
|
|
|
return server;
|
|
|
|
}
|
2007-04-27 06:55:03 +08:00
|
|
|
|
2017-11-02 23:27:45 +08:00
|
|
|
extern void afs_server_timer(struct timer_list *);
|
2007-04-27 06:55:03 +08:00
|
|
|
extern struct afs_server *afs_lookup_server(struct afs_cell *,
|
2017-11-02 23:27:47 +08:00
|
|
|
struct sockaddr_rxrpc *);
|
2017-11-02 23:27:45 +08:00
|
|
|
extern struct afs_server *afs_find_server(struct afs_net *,
|
|
|
|
const struct sockaddr_rxrpc *);
|
2017-11-02 23:27:46 +08:00
|
|
|
extern void afs_put_server(struct afs_net *, struct afs_server *);
|
2017-11-02 23:27:45 +08:00
|
|
|
extern void afs_reap_server(struct work_struct *);
|
|
|
|
extern void __net_exit afs_purge_servers(struct afs_net *);
|
2007-04-27 06:55:03 +08:00
|
|
|
|
2007-04-27 06:57:07 +08:00
|
|
|
/*
|
|
|
|
* super.c
|
|
|
|
*/
|
2017-11-02 23:27:45 +08:00
|
|
|
extern int __init afs_fs_init(void);
|
|
|
|
extern void __exit afs_fs_exit(void);
|
2007-04-27 06:57:07 +08:00
|
|
|
|
2007-04-27 06:55:03 +08:00
|
|
|
/*
|
|
|
|
* vlclient.c
|
|
|
|
*/
|
2017-11-02 23:27:45 +08:00
|
|
|
extern int afs_vl_get_entry_by_name(struct afs_net *,
|
2017-11-02 23:27:47 +08:00
|
|
|
struct sockaddr_rxrpc *, struct key *,
|
2007-04-27 06:57:07 +08:00
|
|
|
const char *, struct afs_cache_vlocation *,
|
2017-01-05 18:38:36 +08:00
|
|
|
bool);
|
2017-11-02 23:27:45 +08:00
|
|
|
extern int afs_vl_get_entry_by_id(struct afs_net *,
|
2017-11-02 23:27:47 +08:00
|
|
|
struct sockaddr_rxrpc *, struct key *,
|
2007-04-27 06:57:07 +08:00
|
|
|
afs_volid_t, afs_voltype_t,
|
2017-01-05 18:38:36 +08:00
|
|
|
struct afs_cache_vlocation *, bool);
|
2007-04-27 06:55:03 +08:00
|
|
|
|
|
|
|
/*
|
|
|
|
* vlocation.c
|
|
|
|
*/
|
2017-11-02 23:27:45 +08:00
|
|
|
extern struct workqueue_struct *afs_vlocation_update_worker;
|
|
|
|
|
2007-04-27 06:55:03 +08:00
|
|
|
#define afs_get_vlocation(V) do { atomic_inc(&(V)->usage); } while(0)
|
|
|
|
|
2017-11-02 23:27:45 +08:00
|
|
|
extern struct afs_vlocation *afs_vlocation_lookup(struct afs_net *,
|
|
|
|
struct afs_cell *,
|
2007-04-27 06:57:07 +08:00
|
|
|
struct key *,
|
2007-04-27 06:55:03 +08:00
|
|
|
const char *, size_t);
|
2017-11-02 23:27:45 +08:00
|
|
|
extern void afs_put_vlocation(struct afs_net *, struct afs_vlocation *);
|
|
|
|
extern void afs_vlocation_updater(struct work_struct *);
|
|
|
|
extern void afs_vlocation_reaper(struct work_struct *);
|
|
|
|
extern void __net_exit afs_vlocation_purge(struct afs_net *);
|
2007-04-27 06:55:03 +08:00
|
|
|
|
|
|
|
/*
|
|
|
|
* vnode.c
|
|
|
|
*/
|
|
|
|
static inline struct afs_vnode *AFS_FS_I(struct inode *inode)
|
|
|
|
{
|
|
|
|
return container_of(inode, struct afs_vnode, vfs_inode);
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline struct inode *AFS_VNODE_TO_I(struct afs_vnode *vnode)
|
|
|
|
{
|
|
|
|
return &vnode->vfs_inode;
|
|
|
|
}
|
|
|
|
|
2007-04-27 06:59:35 +08:00
|
|
|
extern void afs_vnode_finalise_status_update(struct afs_vnode *,
|
|
|
|
struct afs_server *);
|
afs: Overhaul permit caching
Overhaul permit caching in AFS by making it per-vnode and sharing permit
lists where possible.
When most of the fileserver operations are called, they return a status
structure indicating the (revised) details of the vnode or vnodes involved
in the operation. This includes the access mark derived from the ACL
(named CallerAccess in the protocol definition file). This is cacheable
and if the ACL changes, the server will tell us that it is breaking the
callback promise, at which point we can discard the currently cached
permits.
With this patch, the afs_permits structure has, at the end, an array of
{ key, CallerAccess } elements, sorted by key pointer. This is then cached
in a hash table so that it can be shared between vnodes with the same
access permits.
Permit lists can only be shared if they contain the exact same set of
key->CallerAccess mappings.
Note that that table is global rather than being per-net_ns. If the keys
in a permit list cross net_ns boundaries, there is no problem sharing the
cached permits, since the permits are just integer masks.
Since permit lists pin keys, the permit cache also makes it easier for a
future patch to find all occurrences of a key and remove them by means of
setting the afs_permits::invalidated flag and then clearing the appropriate
key pointer. In such an event, memory barriers will need adding.
Lastly, the permit caching is skipped if the server has sent either a
vnode-specific or an entire-server callback since the start of the
operation.
Signed-off-by: David Howells <dhowells@redhat.com>
2017-11-02 23:27:49 +08:00
|
|
|
extern int afs_vnode_fetch_status(struct afs_vnode *, struct key *, bool);
|
2007-04-27 06:57:07 +08:00
|
|
|
extern int afs_vnode_fetch_data(struct afs_vnode *, struct key *,
|
2017-01-05 18:38:34 +08:00
|
|
|
struct afs_read *);
|
2007-04-27 06:59:35 +08:00
|
|
|
extern int afs_vnode_create(struct afs_vnode *, struct key *, const char *,
|
|
|
|
umode_t, struct afs_fid *, struct afs_file_status *,
|
|
|
|
struct afs_callback *, struct afs_server **);
|
|
|
|
extern int afs_vnode_remove(struct afs_vnode *, struct key *, const char *,
|
|
|
|
bool);
|
|
|
|
extern int afs_vnode_link(struct afs_vnode *, struct afs_vnode *, struct key *,
|
|
|
|
const char *);
|
|
|
|
extern int afs_vnode_symlink(struct afs_vnode *, struct key *, const char *,
|
|
|
|
const char *, struct afs_fid *,
|
|
|
|
struct afs_file_status *, struct afs_server **);
|
|
|
|
extern int afs_vnode_rename(struct afs_vnode *, struct afs_vnode *,
|
|
|
|
struct key *, const char *, const char *);
|
AFS: implement basic file write support
Implement support for writing to regular AFS files, including:
(1) write
(2) truncate
(3) fsync, fdatasync
(4) chmod, chown, chgrp, utime.
AFS writeback attempts to batch writes into as chunks as large as it can manage
up to the point that it writes back 65535 pages in one chunk or it meets a
locked page.
Furthermore, if a page has been written to using a particular key, then should
another write to that page use some other key, the first write will be flushed
before the second is allowed to take place. If the first write fails due to a
security error, then the page will be scrapped and reread before the second
write takes place.
If a page is dirty and the callback on it is broken by the server, then the
dirty data is not discarded (same behaviour as NFS).
Shared-writable mappings are not supported by this patch.
[akpm@linux-foundation.org: fix a bunch of warnings]
Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2007-05-09 17:33:46 +08:00
|
|
|
extern int afs_vnode_store_data(struct afs_writeback *, pgoff_t, pgoff_t,
|
|
|
|
unsigned, unsigned);
|
|
|
|
extern int afs_vnode_setattr(struct afs_vnode *, struct key *, struct iattr *);
|
2007-05-11 13:22:20 +08:00
|
|
|
extern int afs_vnode_get_volume_status(struct afs_vnode *, struct key *,
|
|
|
|
struct afs_volume_status *);
|
2007-07-16 14:40:12 +08:00
|
|
|
extern int afs_vnode_set_lock(struct afs_vnode *, struct key *,
|
|
|
|
afs_lock_type_t);
|
|
|
|
extern int afs_vnode_extend_lock(struct afs_vnode *, struct key *);
|
|
|
|
extern int afs_vnode_release_lock(struct afs_vnode *, struct key *);
|
2007-04-27 06:55:03 +08:00
|
|
|
|
|
|
|
/*
|
|
|
|
* volume.c
|
|
|
|
*/
|
2017-11-02 23:27:46 +08:00
|
|
|
static inline struct afs_volume *afs_get_volume(struct afs_volume *volume)
|
|
|
|
{
|
|
|
|
if (volume)
|
|
|
|
atomic_inc(&volume->usage);
|
|
|
|
return volume;
|
|
|
|
}
|
2007-04-27 06:55:03 +08:00
|
|
|
|
2017-11-02 23:27:46 +08:00
|
|
|
extern void afs_put_volume(struct afs_cell *, struct afs_volume *);
|
2007-04-27 06:57:07 +08:00
|
|
|
extern struct afs_volume *afs_volume_lookup(struct afs_mount_params *);
|
2007-04-27 06:55:03 +08:00
|
|
|
extern struct afs_server *afs_volume_pick_fileserver(struct afs_vnode *);
|
|
|
|
extern int afs_volume_release_fileserver(struct afs_vnode *,
|
|
|
|
struct afs_server *, int);
|
|
|
|
|
AFS: implement basic file write support
Implement support for writing to regular AFS files, including:
(1) write
(2) truncate
(3) fsync, fdatasync
(4) chmod, chown, chgrp, utime.
AFS writeback attempts to batch writes into as chunks as large as it can manage
up to the point that it writes back 65535 pages in one chunk or it meets a
locked page.
Furthermore, if a page has been written to using a particular key, then should
another write to that page use some other key, the first write will be flushed
before the second is allowed to take place. If the first write fails due to a
security error, then the page will be scrapped and reread before the second
write takes place.
If a page is dirty and the callback on it is broken by the server, then the
dirty data is not discarded (same behaviour as NFS).
Shared-writable mappings are not supported by this patch.
[akpm@linux-foundation.org: fix a bunch of warnings]
Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2007-05-09 17:33:46 +08:00
|
|
|
/*
|
|
|
|
* write.c
|
|
|
|
*/
|
|
|
|
extern int afs_set_page_dirty(struct page *);
|
|
|
|
extern void afs_put_writeback(struct afs_writeback *);
|
2008-10-16 13:04:32 +08:00
|
|
|
extern int afs_write_begin(struct file *file, struct address_space *mapping,
|
|
|
|
loff_t pos, unsigned len, unsigned flags,
|
|
|
|
struct page **pagep, void **fsdata);
|
|
|
|
extern int afs_write_end(struct file *file, struct address_space *mapping,
|
|
|
|
loff_t pos, unsigned len, unsigned copied,
|
|
|
|
struct page *page, void *fsdata);
|
AFS: implement basic file write support
Implement support for writing to regular AFS files, including:
(1) write
(2) truncate
(3) fsync, fdatasync
(4) chmod, chown, chgrp, utime.
AFS writeback attempts to batch writes into as chunks as large as it can manage
up to the point that it writes back 65535 pages in one chunk or it meets a
locked page.
Furthermore, if a page has been written to using a particular key, then should
another write to that page use some other key, the first write will be flushed
before the second is allowed to take place. If the first write fails due to a
security error, then the page will be scrapped and reread before the second
write takes place.
If a page is dirty and the callback on it is broken by the server, then the
dirty data is not discarded (same behaviour as NFS).
Shared-writable mappings are not supported by this patch.
[akpm@linux-foundation.org: fix a bunch of warnings]
Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2007-05-09 17:33:46 +08:00
|
|
|
extern int afs_writepage(struct page *, struct writeback_control *);
|
|
|
|
extern int afs_writepages(struct address_space *, struct writeback_control *);
|
|
|
|
extern void afs_pages_written_back(struct afs_vnode *, struct afs_call *);
|
2014-04-04 02:13:46 +08:00
|
|
|
extern ssize_t afs_file_write(struct kiocb *, struct iov_iter *);
|
AFS: implement basic file write support
Implement support for writing to regular AFS files, including:
(1) write
(2) truncate
(3) fsync, fdatasync
(4) chmod, chown, chgrp, utime.
AFS writeback attempts to batch writes into as chunks as large as it can manage
up to the point that it writes back 65535 pages in one chunk or it meets a
locked page.
Furthermore, if a page has been written to using a particular key, then should
another write to that page use some other key, the first write will be flushed
before the second is allowed to take place. If the first write fails due to a
security error, then the page will be scrapped and reread before the second
write takes place.
If a page is dirty and the callback on it is broken by the server, then the
dirty data is not discarded (same behaviour as NFS).
Shared-writable mappings are not supported by this patch.
[akpm@linux-foundation.org: fix a bunch of warnings]
Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2007-05-09 17:33:46 +08:00
|
|
|
extern int afs_writeback_all(struct afs_vnode *);
|
2017-03-17 00:27:45 +08:00
|
|
|
extern int afs_flush(struct file *, fl_owner_t);
|
2011-07-17 08:44:56 +08:00
|
|
|
extern int afs_fsync(struct file *, loff_t, loff_t, int);
|
AFS: implement basic file write support
Implement support for writing to regular AFS files, including:
(1) write
(2) truncate
(3) fsync, fdatasync
(4) chmod, chown, chgrp, utime.
AFS writeback attempts to batch writes into as chunks as large as it can manage
up to the point that it writes back 65535 pages in one chunk or it meets a
locked page.
Furthermore, if a page has been written to using a particular key, then should
another write to that page use some other key, the first write will be flushed
before the second is allowed to take place. If the first write fails due to a
security error, then the page will be scrapped and reread before the second
write takes place.
If a page is dirty and the callback on it is broken by the server, then the
dirty data is not discarded (same behaviour as NFS).
Shared-writable mappings are not supported by this patch.
[akpm@linux-foundation.org: fix a bunch of warnings]
Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2007-05-09 17:33:46 +08:00
|
|
|
|
2017-07-06 22:50:27 +08:00
|
|
|
/*
|
|
|
|
* xattr.c
|
|
|
|
*/
|
|
|
|
extern const struct xattr_handler *afs_xattr_handlers[];
|
|
|
|
extern ssize_t afs_listxattr(struct dentry *, char *, size_t);
|
AFS: implement basic file write support
Implement support for writing to regular AFS files, including:
(1) write
(2) truncate
(3) fsync, fdatasync
(4) chmod, chown, chgrp, utime.
AFS writeback attempts to batch writes into as chunks as large as it can manage
up to the point that it writes back 65535 pages in one chunk or it meets a
locked page.
Furthermore, if a page has been written to using a particular key, then should
another write to that page use some other key, the first write will be flushed
before the second is allowed to take place. If the first write fails due to a
security error, then the page will be scrapped and reread before the second
write takes place.
If a page is dirty and the callback on it is broken by the server, then the
dirty data is not discarded (same behaviour as NFS).
Shared-writable mappings are not supported by this patch.
[akpm@linux-foundation.org: fix a bunch of warnings]
Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2007-05-09 17:33:46 +08:00
|
|
|
|
2007-04-27 06:55:03 +08:00
|
|
|
/*****************************************************************************/
|
|
|
|
/*
|
|
|
|
* debug tracing
|
|
|
|
*/
|
2017-01-05 18:38:34 +08:00
|
|
|
#include <trace/events/afs.h>
|
|
|
|
|
2007-04-27 06:55:03 +08:00
|
|
|
extern unsigned afs_debug;
|
|
|
|
|
|
|
|
#define dbgprintk(FMT,...) \
|
2008-04-03 17:44:01 +08:00
|
|
|
printk("[%-6.6s] "FMT"\n", current->comm ,##__VA_ARGS__)
|
2007-04-27 06:55:03 +08:00
|
|
|
|
2008-04-30 15:55:09 +08:00
|
|
|
#define kenter(FMT,...) dbgprintk("==> %s("FMT")",__func__ ,##__VA_ARGS__)
|
|
|
|
#define kleave(FMT,...) dbgprintk("<== %s()"FMT"",__func__ ,##__VA_ARGS__)
|
2007-04-27 06:55:03 +08:00
|
|
|
#define kdebug(FMT,...) dbgprintk(" "FMT ,##__VA_ARGS__)
|
|
|
|
|
|
|
|
|
|
|
|
#if defined(__KDEBUG)
|
|
|
|
#define _enter(FMT,...) kenter(FMT,##__VA_ARGS__)
|
|
|
|
#define _leave(FMT,...) kleave(FMT,##__VA_ARGS__)
|
|
|
|
#define _debug(FMT,...) kdebug(FMT,##__VA_ARGS__)
|
|
|
|
|
|
|
|
#elif defined(CONFIG_AFS_DEBUG)
|
|
|
|
#define AFS_DEBUG_KENTER 0x01
|
|
|
|
#define AFS_DEBUG_KLEAVE 0x02
|
|
|
|
#define AFS_DEBUG_KDEBUG 0x04
|
|
|
|
|
|
|
|
#define _enter(FMT,...) \
|
|
|
|
do { \
|
|
|
|
if (unlikely(afs_debug & AFS_DEBUG_KENTER)) \
|
|
|
|
kenter(FMT,##__VA_ARGS__); \
|
|
|
|
} while (0)
|
|
|
|
|
|
|
|
#define _leave(FMT,...) \
|
|
|
|
do { \
|
|
|
|
if (unlikely(afs_debug & AFS_DEBUG_KLEAVE)) \
|
|
|
|
kleave(FMT,##__VA_ARGS__); \
|
|
|
|
} while (0)
|
|
|
|
|
|
|
|
#define _debug(FMT,...) \
|
|
|
|
do { \
|
|
|
|
if (unlikely(afs_debug & AFS_DEBUG_KDEBUG)) \
|
|
|
|
kdebug(FMT,##__VA_ARGS__); \
|
|
|
|
} while (0)
|
|
|
|
|
|
|
|
#else
|
2010-08-12 23:54:57 +08:00
|
|
|
#define _enter(FMT,...) no_printk("==> %s("FMT")",__func__ ,##__VA_ARGS__)
|
|
|
|
#define _leave(FMT,...) no_printk("<== %s()"FMT"",__func__ ,##__VA_ARGS__)
|
|
|
|
#define _debug(FMT,...) no_printk(" "FMT ,##__VA_ARGS__)
|
2007-04-27 06:55:03 +08:00
|
|
|
#endif
|
|
|
|
|
|
|
|
/*
|
|
|
|
* debug assertion checking
|
|
|
|
*/
|
|
|
|
#if 1 // defined(__KDEBUGALL)
|
|
|
|
|
|
|
|
#define ASSERT(X) \
|
|
|
|
do { \
|
|
|
|
if (unlikely(!(X))) { \
|
|
|
|
printk(KERN_ERR "\n"); \
|
|
|
|
printk(KERN_ERR "AFS: Assertion failed\n"); \
|
|
|
|
BUG(); \
|
|
|
|
} \
|
|
|
|
} while(0)
|
|
|
|
|
|
|
|
#define ASSERTCMP(X, OP, Y) \
|
|
|
|
do { \
|
|
|
|
if (unlikely(!((X) OP (Y)))) { \
|
|
|
|
printk(KERN_ERR "\n"); \
|
|
|
|
printk(KERN_ERR "AFS: Assertion failed\n"); \
|
|
|
|
printk(KERN_ERR "%lu " #OP " %lu is false\n", \
|
|
|
|
(unsigned long)(X), (unsigned long)(Y)); \
|
|
|
|
printk(KERN_ERR "0x%lx " #OP " 0x%lx is false\n", \
|
|
|
|
(unsigned long)(X), (unsigned long)(Y)); \
|
|
|
|
BUG(); \
|
|
|
|
} \
|
|
|
|
} while(0)
|
|
|
|
|
2007-05-09 17:33:45 +08:00
|
|
|
#define ASSERTRANGE(L, OP1, N, OP2, H) \
|
|
|
|
do { \
|
|
|
|
if (unlikely(!((L) OP1 (N)) || !((N) OP2 (H)))) { \
|
|
|
|
printk(KERN_ERR "\n"); \
|
|
|
|
printk(KERN_ERR "AFS: Assertion failed\n"); \
|
|
|
|
printk(KERN_ERR "%lu "#OP1" %lu "#OP2" %lu is false\n", \
|
|
|
|
(unsigned long)(L), (unsigned long)(N), \
|
|
|
|
(unsigned long)(H)); \
|
|
|
|
printk(KERN_ERR "0x%lx "#OP1" 0x%lx "#OP2" 0x%lx is false\n", \
|
|
|
|
(unsigned long)(L), (unsigned long)(N), \
|
|
|
|
(unsigned long)(H)); \
|
|
|
|
BUG(); \
|
|
|
|
} \
|
|
|
|
} while(0)
|
|
|
|
|
2007-04-27 06:55:03 +08:00
|
|
|
#define ASSERTIF(C, X) \
|
|
|
|
do { \
|
|
|
|
if (unlikely((C) && !(X))) { \
|
|
|
|
printk(KERN_ERR "\n"); \
|
|
|
|
printk(KERN_ERR "AFS: Assertion failed\n"); \
|
|
|
|
BUG(); \
|
|
|
|
} \
|
|
|
|
} while(0)
|
|
|
|
|
|
|
|
#define ASSERTIFCMP(C, X, OP, Y) \
|
|
|
|
do { \
|
|
|
|
if (unlikely((C) && !((X) OP (Y)))) { \
|
|
|
|
printk(KERN_ERR "\n"); \
|
|
|
|
printk(KERN_ERR "AFS: Assertion failed\n"); \
|
|
|
|
printk(KERN_ERR "%lu " #OP " %lu is false\n", \
|
|
|
|
(unsigned long)(X), (unsigned long)(Y)); \
|
|
|
|
printk(KERN_ERR "0x%lx " #OP " 0x%lx is false\n", \
|
|
|
|
(unsigned long)(X), (unsigned long)(Y)); \
|
|
|
|
BUG(); \
|
|
|
|
} \
|
|
|
|
} while(0)
|
|
|
|
|
|
|
|
#else
|
|
|
|
|
|
|
|
#define ASSERT(X) \
|
|
|
|
do { \
|
|
|
|
} while(0)
|
|
|
|
|
|
|
|
#define ASSERTCMP(X, OP, Y) \
|
|
|
|
do { \
|
|
|
|
} while(0)
|
|
|
|
|
2007-05-09 17:33:45 +08:00
|
|
|
#define ASSERTRANGE(L, OP1, N, OP2, H) \
|
|
|
|
do { \
|
|
|
|
} while(0)
|
|
|
|
|
2007-04-27 06:55:03 +08:00
|
|
|
#define ASSERTIF(C, X) \
|
|
|
|
do { \
|
|
|
|
} while(0)
|
|
|
|
|
|
|
|
#define ASSERTIFCMP(C, X, OP, Y) \
|
|
|
|
do { \
|
|
|
|
} while(0)
|
|
|
|
|
|
|
|
#endif /* __KDEBUGALL */
|