linux/include/net/9p/client.h

301 lines
8.7 KiB
C
Raw Normal View History

/* SPDX-License-Identifier: GPL-2.0-only */
/*
* 9P Client Definitions
*
* Copyright (C) 2008 by Eric Van Hensbergen <ericvh@gmail.com>
* Copyright (C) 2007 by Latchesar Ionkov <lucho@ionkov.net>
*/
#ifndef NET_9P_CLIENT_H
#define NET_9P_CLIENT_H
#include <linux/utsname.h>
#include <linux/idr.h>
#include <linux/tracepoint-defs.h>
/* Number of requests per row */
#define P9_ROW_MAXTAG 255
/** enum p9_proto_versions - 9P protocol versions
* @p9_proto_legacy: 9P Legacy mode, pre-9P2000.u
* @p9_proto_2000u: 9P2000.u extension
* @p9_proto_2000L: 9P2000.L extension
*/
enum p9_proto_versions {
p9_proto_legacy,
p9_proto_2000u,
p9_proto_2000L,
};
/**
* enum p9_trans_status - different states of underlying transports
* @Connected: transport is connected and healthy
* @Disconnected: transport has been disconnected
* @Hung: transport is connected by wedged
*
* This enumeration details the various states a transport
* instatiation can be in.
*/
enum p9_trans_status {
Connected,
BeginDisconnect,
Disconnected,
Hung,
};
/**
* enum p9_req_status_t - status of a request
* @REQ_STATUS_ALLOC: request has been allocated but not sent
* @REQ_STATUS_UNSENT: request waiting to be sent
* @REQ_STATUS_SENT: request sent to server
* @REQ_STATUS_RCVD: response received from server
* @REQ_STATUS_FLSHD: request has been flushed
* @REQ_STATUS_ERROR: request encountered an error on the client side
*/
enum p9_req_status_t {
REQ_STATUS_ALLOC,
REQ_STATUS_UNSENT,
REQ_STATUS_SENT,
REQ_STATUS_RCVD,
REQ_STATUS_FLSHD,
REQ_STATUS_ERROR,
};
/**
* struct p9_req_t - request slots
* @status: status of this request slot
* @t_err: transport error
* @wq: wait_queue for the client to block on for this request
* @tc: the request fcall structure
* @rc: the response fcall structure
* @req_list: link for higher level objects to chain requests
*/
struct p9_req_t {
int status;
int t_err;
refcount_t refcount;
wait_queue_head_t wq;
struct p9_fcall tc;
struct p9_fcall rc;
struct list_head req_list;
};
/**
* struct p9_client - per client instance state
* @lock: protect @fids and @reqs
* @msize: maximum data size negotiated by protocol
* @proto_version: 9P protocol version to use
* @trans_mod: module API instantiated with this client
* @status: connection state
* @trans: tranport instance state and API
* @fids: All active FID handles
* @reqs: All active requests.
* @name: node name used as client id
*
* The client structure is used to keep track of various per-client
* state that has been instantiated.
*/
struct p9_client {
spinlock_t lock;
unsigned int msize;
unsigned char proto_version;
struct p9_trans_module *trans_mod;
enum p9_trans_status status;
void *trans;
struct kmem_cache *fcall_cache;
union {
struct {
int rfd;
int wfd;
} fd;
struct {
u16 port;
bool privport;
} tcp;
} trans_opts;
struct idr fids;
struct idr reqs;
char name[__NEW_UTS_LEN + 1];
};
/**
* struct p9_fid - file system entity handle
* @clnt: back pointer to instantiating &p9_client
* @fid: numeric identifier for this handle
* @mode: current mode of this fid (enum?)
* @qid: the &p9_qid server identifier this handle points to
* @iounit: the server reported maximum transaction size for this file
* @uid: the numeric uid of the local user who owns this handle
* @rdir: readdir accounting structure (allocated on demand)
* @dlist: per-dentry fid tracking
*
* TODO: This needs lots of explanation.
*/
9p: add refcount to p9_fid struct Fix race issue in fid contention. Eric's and Greg's patch offer a mechanism to fix open-unlink-f*syscall bug in 9p. But there is race issue in fid parallel accesses. As Greg's patch stores all of fids from opened files into according inode, so all the lookup fid ops can retrieve fid from inode preferentially. But there is no mechanism to handle the fid contention issue. For example, there are two threads get the same fid in the same time and one of them clunk the fid before the other thread ready to discard the fid. In this scenario, it will lead to some fatal problems, even kernel core dump. I introduce a mechanism to fix this race issue. A counter field introduced into p9_fid struct to store the reference counter to the fid. When a fid is allocated from the inode or dentry, the counter will increase, and will decrease at the end of its occupation. It is guaranteed that the fid won't be clunked before the reference counter go down to 0, then we can avoid the clunked fid to be used. tests: race issue test from the old test case: for file in {01..50}; do touch f.${file}; done seq 1 1000 | xargs -n 1 -P 50 -I{} cat f.* > /dev/null open-unlink-f*syscall test: I have tested for f*syscall include: ftruncate fstat fchown fchmod faccessat. Link: http://lkml.kernel.org/r/20200923141146.90046-5-jianyong.wu@arm.com Fixes: 478ba09edc1f ("fs/9p: search open fids first") Signed-off-by: Jianyong Wu <jianyong.wu@arm.com> Signed-off-by: Dominique Martinet <asmadeus@codewreck.org>
2020-09-23 22:11:46 +08:00
enum fid_source {
FID_FROM_OTHER,
FID_FROM_INODE,
FID_FROM_DENTRY,
};
struct p9_fid {
struct p9_client *clnt;
u32 fid;
refcount_t count;
int mode;
struct p9_qid qid;
u32 iounit;
kuid_t uid;
void *rdir;
struct hlist_node dlist; /* list of all fids attached to a dentry */
struct hlist_node ilist;
};
9p: readdir implementation for 9p2000.L This patch implements the kernel part of readdir() implementation for 9p2000.L Change from V3: Instead of inode, server now sends qids for each dirent SYNOPSIS size[4] Treaddir tag[2] fid[4] offset[8] count[4] size[4] Rreaddir tag[2] count[4] data[count] DESCRIPTION The readdir request asks the server to read the directory specified by 'fid' at an offset specified by 'offset' and return as many dirent structures as possible that fit into count bytes. Each dirent structure is laid out as follows. qid.type[1] the type of the file (directory, etc.), represented as a bit vector corresponding to the high 8 bits of the file's mode word. qid.vers[4] version number for given path qid.path[8] the file server's unique identification for the file offset[8] offset into the next dirent. type[1] type of this directory entry. name[256] name of this directory entry. This patch adds v9fs_dir_readdir_dotl() as the readdir() call for 9p2000.L. This function sends P9_TREADDIR command to the server. In response the server sends a buffer filled with dirent structures. This is different from the existing v9fs_dir_readdir() call which receives stat structures from the server. This results in significant speedup of readdir() on large directories. For example, doing 'ls >/dev/null' on a directory with 10000 files on my laptop takes 1.088 seconds with the existing code, but only takes 0.339 seconds with the new readdir. Signed-off-by: Sripathi Kodi <sripathik@in.ibm.com> Reviewed-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com> Signed-off-by: Eric Van Hensbergen <ericvh@gmail.com>
2010-06-04 21:41:26 +08:00
/**
* struct p9_dirent - directory entry structure
* @qid: The p9 server qid for this dirent
* @d_off: offset to the next dirent
* @d_type: type of file
* @d_name: file name
*/
struct p9_dirent {
struct p9_qid qid;
u64 d_off;
unsigned char d_type;
char d_name[256];
};
struct iov_iter;
int p9_show_client_options(struct seq_file *m, struct p9_client *clnt);
int p9_client_statfs(struct p9_fid *fid, struct p9_rstatfs *sb);
int p9_client_rename(struct p9_fid *fid, struct p9_fid *newdirfid,
const char *name);
int p9_client_renameat(struct p9_fid *olddirfid, const char *old_name,
struct p9_fid *newdirfid, const char *new_name);
struct p9_client *p9_client_create(const char *dev_name, char *options);
void p9_client_destroy(struct p9_client *clnt);
void p9_client_disconnect(struct p9_client *clnt);
void p9_client_begin_disconnect(struct p9_client *clnt);
struct p9_fid *p9_client_attach(struct p9_client *clnt, struct p9_fid *afid,
const char *uname, kuid_t n_uname, const char *aname);
struct p9_fid *p9_client_walk(struct p9_fid *oldfid, uint16_t nwname,
const unsigned char * const *wnames, int clone);
int p9_client_open(struct p9_fid *fid, int mode);
int p9_client_fcreate(struct p9_fid *fid, const char *name, u32 perm, int mode,
char *extension);
int p9_client_link(struct p9_fid *fid, struct p9_fid *oldfid, const char *newname);
int p9_client_symlink(struct p9_fid *fid, const char *name, const char *symname,
kgid_t gid, struct p9_qid *qid);
int p9_client_create_dotl(struct p9_fid *ofid, const char *name, u32 flags, u32 mode,
kgid_t gid, struct p9_qid *qid);
int p9_client_clunk(struct p9_fid *fid);
int p9_client_fsync(struct p9_fid *fid, int datasync);
int p9_client_remove(struct p9_fid *fid);
int p9_client_unlinkat(struct p9_fid *dfid, const char *name, int flags);
int p9_client_read(struct p9_fid *fid, u64 offset, struct iov_iter *to, int *err);
int p9_client_read_once(struct p9_fid *fid, u64 offset, struct iov_iter *to,
int *err);
int p9_client_write(struct p9_fid *fid, u64 offset, struct iov_iter *from, int *err);
9p: readdir implementation for 9p2000.L This patch implements the kernel part of readdir() implementation for 9p2000.L Change from V3: Instead of inode, server now sends qids for each dirent SYNOPSIS size[4] Treaddir tag[2] fid[4] offset[8] count[4] size[4] Rreaddir tag[2] count[4] data[count] DESCRIPTION The readdir request asks the server to read the directory specified by 'fid' at an offset specified by 'offset' and return as many dirent structures as possible that fit into count bytes. Each dirent structure is laid out as follows. qid.type[1] the type of the file (directory, etc.), represented as a bit vector corresponding to the high 8 bits of the file's mode word. qid.vers[4] version number for given path qid.path[8] the file server's unique identification for the file offset[8] offset into the next dirent. type[1] type of this directory entry. name[256] name of this directory entry. This patch adds v9fs_dir_readdir_dotl() as the readdir() call for 9p2000.L. This function sends P9_TREADDIR command to the server. In response the server sends a buffer filled with dirent structures. This is different from the existing v9fs_dir_readdir() call which receives stat structures from the server. This results in significant speedup of readdir() on large directories. For example, doing 'ls >/dev/null' on a directory with 10000 files on my laptop takes 1.088 seconds with the existing code, but only takes 0.339 seconds with the new readdir. Signed-off-by: Sripathi Kodi <sripathik@in.ibm.com> Reviewed-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com> Signed-off-by: Eric Van Hensbergen <ericvh@gmail.com>
2010-06-04 21:41:26 +08:00
int p9_client_readdir(struct p9_fid *fid, char *data, u32 count, u64 offset);
int p9dirent_read(struct p9_client *clnt, char *buf, int len,
struct p9_dirent *dirent);
struct p9_wstat *p9_client_stat(struct p9_fid *fid);
int p9_client_wstat(struct p9_fid *fid, struct p9_wstat *wst);
9p: Implement client side of setattr for 9P2000.L protocol. SYNOPSIS size[4] Tsetattr tag[2] attr[n] size[4] Rsetattr tag[2] DESCRIPTION The setattr command changes some of the file status information. attr resembles the iattr structure used in Linux kernel. It specifies which status parameter is to be changed and to what value. It is laid out as follows: valid[4] specifies which status information is to be changed. Possible values are: ATTR_MODE (1 << 0) ATTR_UID (1 << 1) ATTR_GID (1 << 2) ATTR_SIZE (1 << 3) ATTR_ATIME (1 << 4) ATTR_MTIME (1 << 5) ATTR_ATIME_SET (1 << 7) ATTR_MTIME_SET (1 << 8) The last two bits represent whether the time information is being sent by the client's user space. In the absense of these bits the server always uses server's time. mode[4] File permission bits uid[4] Owner id of file gid[4] Group id of the file size[8] File size atime_sec[8] Time of last file access, seconds atime_nsec[8] Time of last file access, nanoseconds mtime_sec[8] Time of last file modification, seconds mtime_nsec[8] Time of last file modification, nanoseconds Explanation of the patches: -------------------------- *) The kernel just copies relevent contents of iattr structure to p9_iattr_dotl structure and passes it down to the client. The only check it has is calling inode_change_ok() *) The p9_iattr_dotl structure does not have ctime and ia_file parameters because I don't think these are needed in our case. The client user space can request updating just ctime by calling chown(fd, -1, -1). This is handled on server side without a need for putting ctime on the wire. *) The server currently supports changing mode, time, ownership and size of the file. *) 9P RFC says "Either all the changes in wstat request happen, or none of them does: if the request succeeds, all changes were made; if it fails, none were." I have not done anything to implement this specifically because I don't see a reason. Signed-off-by: Sripathi Kodi <sripathik@in.ibm.com> Signed-off-by: Venkateswararao Jujjuri <jvrao@linux.vnet.ibm.com> Signed-off-by: Eric Van Hensbergen <ericvh@gmail.com>
2010-06-18 14:20:10 +08:00
int p9_client_setattr(struct p9_fid *fid, struct p9_iattr_dotl *attr);
9p: getattr client implementation for 9P2000.L protocol. SYNOPSIS size[4] Tgetattr tag[2] fid[4] request_mask[8] size[4] Rgetattr tag[2] lstat[n] DESCRIPTION The getattr transaction inquires about the file identified by fid. request_mask is a bit mask that specifies which fields of the stat structure is the client interested in. The reply will contain a machine-independent directory entry, laid out as follows: st_result_mask[8] Bit mask that indicates which fields in the stat structure have been populated by the server qid.type[1] the type of the file (directory, etc.), represented as a bit vector corresponding to the high 8 bits of the file's mode word. qid.vers[4] version number for given path qid.path[8] the file server's unique identification for the file st_mode[4] Permission and flags st_uid[4] User id of owner st_gid[4] Group ID of owner st_nlink[8] Number of hard links st_rdev[8] Device ID (if special file) st_size[8] Size, in bytes st_blksize[8] Block size for file system IO st_blocks[8] Number of file system blocks allocated st_atime_sec[8] Time of last access, seconds st_atime_nsec[8] Time of last access, nanoseconds st_mtime_sec[8] Time of last modification, seconds st_mtime_nsec[8] Time of last modification, nanoseconds st_ctime_sec[8] Time of last status change, seconds st_ctime_nsec[8] Time of last status change, nanoseconds st_btime_sec[8] Time of creation (birth) of file, seconds st_btime_nsec[8] Time of creation (birth) of file, nanoseconds st_gen[8] Inode generation st_data_version[8] Data version number request_mask and result_mask bit masks contain the following bits #define P9_STATS_MODE 0x00000001ULL #define P9_STATS_NLINK 0x00000002ULL #define P9_STATS_UID 0x00000004ULL #define P9_STATS_GID 0x00000008ULL #define P9_STATS_RDEV 0x00000010ULL #define P9_STATS_ATIME 0x00000020ULL #define P9_STATS_MTIME 0x00000040ULL #define P9_STATS_CTIME 0x00000080ULL #define P9_STATS_INO 0x00000100ULL #define P9_STATS_SIZE 0x00000200ULL #define P9_STATS_BLOCKS 0x00000400ULL #define P9_STATS_BTIME 0x00000800ULL #define P9_STATS_GEN 0x00001000ULL #define P9_STATS_DATA_VERSION 0x00002000ULL #define P9_STATS_BASIC 0x000007ffULL #define P9_STATS_ALL 0x00003fffULL This patch implements the client side of getattr implementation for 9P2000.L. It introduces a new structure p9_stat_dotl for getting Linux stat information along with QID. The data layout is similar to stat structure in Linux user space with the following major differences: inode (st_ino) is not part of data. Instead qid is. device (st_dev) is not part of data because this doesn't make sense on the client. All time variables are 64 bit wide on the wire. The kernel seems to use 32 bit variables for these variables. However, some of the architectures have used 64 bit variables and glibc exposes 64 bit variables to user space on some architectures. Hence to be on the safer side we have made these 64 bit in the protocol. Refer to the comments in include/asm-generic/stat.h There are some additional fields: st_btime_sec, st_btime_nsec, st_gen, st_data_version apart from the bitmask, st_result_mask. The bit mask is filled by the server to indicate which stat fields have been populated by the server. Currently there is no clean way for the server to obtain these additional fields, so it sends back just the basic fields. Signed-off-by: Sripathi Kodi <sripathik@in.ibm.com> Signed-off-by: Eric Van Hensbegren <ericvh@gmail.com>
2010-07-12 22:37:23 +08:00
struct p9_stat_dotl *p9_client_getattr_dotl(struct p9_fid *fid,
u64 request_mask);
int p9_client_mknod_dotl(struct p9_fid *oldfid, const char *name, int mode,
dev_t rdev, kgid_t gid, struct p9_qid *qid);
int p9_client_mkdir_dotl(struct p9_fid *fid, const char *name, int mode,
kgid_t gid, struct p9_qid *qid);
9p: Implement TLOCK Synopsis size[4] TLock tag[2] fid[4] flock[n] size[4] RLock tag[2] status[1] Description Tlock is used to acquire/release byte range posix locks on a file identified by given fid. The reply contains status of the lock request flock structure: type[1] - Type of lock: F_RDLCK, F_WRLCK, F_UNLCK flags[4] - Flags could be either of P9_LOCK_FLAGS_BLOCK - Blocked lock request, if there is a conflicting lock exists, wait for that lock to be released. P9_LOCK_FLAGS_RECLAIM - Reclaim lock request, used when client is trying to reclaim a lock after a server restrart (due to crash) start[8] - Starting offset for lock length[8] - Number of bytes to lock If length is 0, lock all bytes starting at the location 'start' through to the end of file pid[4] - PID of the process that wants to take lock client_id[4] - Unique client id status[1] - Status of the lock request, can be P9_LOCK_SUCCESS(0), P9_LOCK_BLOCKED(1), P9_LOCK_ERROR(2) or P9_LOCK_GRACE(3) P9_LOCK_SUCCESS - Request was successful P9_LOCK_BLOCKED - A conflicting lock is held by another process P9_LOCK_ERROR - Error while processing the lock request P9_LOCK_GRACE - Server is in grace period, it can't accept new lock requests in this period (except locks with P9_LOCK_FLAGS_RECLAIM flag set) Signed-off-by: M. Mohan Kumar <mohan@in.ibm.com> Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com> Signed-off-by: Venkateswararao Jujjuri <jvrao@linux.vnet.ibm.com> Signed-off-by: Eric Van Hensbergen <ericvh@gmail.com>
2010-09-27 14:04:24 +08:00
int p9_client_lock_dotl(struct p9_fid *fid, struct p9_flock *flock, u8 *status);
int p9_client_getlock_dotl(struct p9_fid *fid, struct p9_getlock *fl);
void p9_fcall_fini(struct p9_fcall *fc);
struct p9_req_t *p9_tag_lookup(struct p9_client *c, u16 tag);
static inline void p9_req_get(struct p9_req_t *r)
{
refcount_inc(&r->refcount);
}
static inline int p9_req_try_get(struct p9_req_t *r)
{
return refcount_inc_not_zero(&r->refcount);
}
int p9_req_put(struct p9_client *c, struct p9_req_t *r);
/* We cannot have the real tracepoints in header files,
* use a wrapper function */
DECLARE_TRACEPOINT(9p_fid_ref);
void do_trace_9p_fid_get(struct p9_fid *fid);
void do_trace_9p_fid_put(struct p9_fid *fid);
/* fid reference counting helpers:
* - fids used for any length of time should always be referenced through
* p9_fid_get(), and released with p9_fid_put()
* - v9fs_fid_lookup() or similar will automatically call get for you
* and also require a put
* - the *_fid_add() helpers will stash the fid in the inode,
* at which point it is the responsibility of evict_inode()
* to call the put
* - the last put will automatically send a clunk to the server
*/
static inline struct p9_fid *p9_fid_get(struct p9_fid *fid)
{
if (tracepoint_enabled(9p_fid_ref))
do_trace_9p_fid_get(fid);
refcount_inc(&fid->count);
return fid;
}
static inline int p9_fid_put(struct p9_fid *fid)
{
if (!fid || IS_ERR(fid))
return 0;
if (tracepoint_enabled(9p_fid_ref))
do_trace_9p_fid_put(fid);
if (!refcount_dec_and_test(&fid->count))
return 0;
return p9_client_clunk(fid);
}
void p9_client_cb(struct p9_client *c, struct p9_req_t *req, int status);
int p9_parse_header(struct p9_fcall *pdu, int32_t *size, int8_t *type,
int16_t *tag, int rewind);
int p9stat_read(struct p9_client *clnt, char *buf, int len,
struct p9_wstat *st);
void p9stat_free(struct p9_wstat *stbuf);
int p9_is_proto_dotu(struct p9_client *clnt);
int p9_is_proto_dotl(struct p9_client *clnt);
struct p9_fid *p9_client_xattrwalk(struct p9_fid *file_fid,
const char *attr_name, u64 *attr_size);
int p9_client_xattrcreate(struct p9_fid *fid, const char *name,
u64 attr_size, int flags);
int p9_client_readlink(struct p9_fid *fid, char **target);
int p9_client_init(void);
void p9_client_exit(void);
#endif /* NET_9P_CLIENT_H */