2
0
mirror of https://github.com/edk2-porting/linux-next.git synced 2024-12-21 03:33:59 +08:00
linux-next/include/linux/uio.h

196 lines
6.3 KiB
C
Raw Normal View History

/*
* Berkeley style UIO structures - Alan Cox 1994.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*/
#ifndef __LINUX_UIO_H
#define __LINUX_UIO_H
#include <linux/kernel.h>
#include <uapi/linux/uio.h>
struct page;
struct pipe_inode_info;
struct kvec {
void *iov_base; /* and that should *never* hold a userland pointer */
size_t iov_len;
};
enum {
ITER_IOVEC = 0,
ITER_KVEC = 2,
ITER_BVEC = 4,
ITER_PIPE = 8,
};
struct iov_iter {
int type;
size_t iov_offset;
size_t count;
union {
const struct iovec *iov;
const struct kvec *kvec;
const struct bio_vec *bvec;
struct pipe_inode_info *pipe;
};
union {
unsigned long nr_segs;
struct {
int idx;
int start_idx;
};
};
};
/*
* Total number of bytes covered by an iovec.
*
* NOTE that it is not safe to use this function until all the iovec's
* segment lengths have been validated. Because the individual lengths can
* overflow a size_t when added together.
*/
static inline size_t iov_length(const struct iovec *iov, unsigned long nr_segs)
{
unsigned long seg;
size_t ret = 0;
for (seg = 0; seg < nr_segs; seg++)
ret += iov[seg].iov_len;
return ret;
}
static inline struct iovec iov_iter_iovec(const struct iov_iter *iter)
{
return (struct iovec) {
.iov_base = iter->iov->iov_base + iter->iov_offset,
.iov_len = min(iter->count,
iter->iov->iov_len - iter->iov_offset),
};
}
#define iov_for_each(iov, iter, start) \
if (!((start).type & (ITER_BVEC | ITER_PIPE))) \
for (iter = (start); \
(iter).count && \
((iov = iov_iter_iovec(&(iter))), 1); \
iov_iter_advance(&(iter), (iov).iov_len))
unsigned long iov_shorten(struct iovec *iov, unsigned long nr_segs, size_t to);
size_t iov_iter_copy_from_user_atomic(struct page *page,
struct iov_iter *i, unsigned long offset, size_t bytes);
void iov_iter_advance(struct iov_iter *i, size_t bytes);
void iov_iter_revert(struct iov_iter *i, size_t bytes);
int iov_iter_fault_in_readable(struct iov_iter *i, size_t bytes);
size_t iov_iter_single_seg_count(const struct iov_iter *i);
size_t copy_page_to_iter(struct page *page, size_t offset, size_t bytes,
struct iov_iter *i);
size_t copy_page_from_iter(struct page *page, size_t offset, size_t bytes,
struct iov_iter *i);
size_t copy_to_iter(const void *addr, size_t bytes, struct iov_iter *i);
size_t copy_from_iter(void *addr, size_t bytes, struct iov_iter *i);
bool copy_from_iter_full(void *addr, size_t bytes, struct iov_iter *i);
size_t copy_from_iter_nocache(void *addr, size_t bytes, struct iov_iter *i);
x86, uaccess: introduce copy_from_iter_flushcache for pmem / cache-bypass operations The pmem driver has a need to transfer data with a persistent memory destination and be able to rely on the fact that the destination writes are not cached. It is sufficient for the writes to be flushed to a cpu-store-buffer (non-temporal / "movnt" in x86 terms), as we expect userspace to call fsync() to ensure data-writes have reached a power-fail-safe zone in the platform. The fsync() triggers a REQ_FUA or REQ_FLUSH to the pmem driver which will turn around and fence previous writes with an "sfence". Implement a __copy_from_user_inatomic_flushcache, memcpy_page_flushcache, and memcpy_flushcache, that guarantee that the destination buffer is not dirty in the cpu cache on completion. The new copy_from_iter_flushcache and sub-routines will be used to replace the "pmem api" (include/linux/pmem.h + arch/x86/include/asm/pmem.h). The availability of copy_from_iter_flushcache() and memcpy_flushcache() are gated by the CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE config symbol, and fallback to copy_from_iter_nocache() and plain memcpy() otherwise. This is meant to satisfy the concern from Linus that if a driver wants to do something beyond the normal nocache semantics it should be something private to that driver [1], and Al's concern that anything uaccess related belongs with the rest of the uaccess code [2]. The first consumer of this interface is a new 'copy_from_iter' dax operation so that pmem can inject cache maintenance operations without imposing this overhead on other dax-capable drivers. [1]: https://lists.01.org/pipermail/linux-nvdimm/2017-January/008364.html [2]: https://lists.01.org/pipermail/linux-nvdimm/2017-April/009942.html Cc: <x86@kernel.org> Cc: Jan Kara <jack@suse.cz> Cc: Jeff Moyer <jmoyer@redhat.com> Cc: Ingo Molnar <mingo@redhat.com> Cc: Christoph Hellwig <hch@lst.de> Cc: Toshi Kani <toshi.kani@hpe.com> Cc: "H. Peter Anvin" <hpa@zytor.com> Cc: Al Viro <viro@zeniv.linux.org.uk> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Matthew Wilcox <mawilcox@microsoft.com> Reviewed-by: Ross Zwisler <ross.zwisler@linux.intel.com> Signed-off-by: Dan Williams <dan.j.williams@intel.com>
2017-05-30 03:22:50 +08:00
#ifdef CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE
/*
* Note, users like pmem that depend on the stricter semantics of
* copy_from_iter_flushcache() than copy_from_iter_nocache() must check for
* IS_ENABLED(CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE) before assuming that the
* destination is flushed from the cache on return.
*/
size_t copy_from_iter_flushcache(void *addr, size_t bytes, struct iov_iter *i);
#else
static inline size_t copy_from_iter_flushcache(void *addr, size_t bytes,
struct iov_iter *i)
{
return copy_from_iter_nocache(addr, bytes, i);
}
#endif
bool copy_from_iter_full_nocache(void *addr, size_t bytes, struct iov_iter *i);
size_t iov_iter_zero(size_t bytes, struct iov_iter *);
unsigned long iov_iter_alignment(const struct iov_iter *i);
unsigned long iov_iter_gap_alignment(const struct iov_iter *i);
void iov_iter_init(struct iov_iter *i, int direction, const struct iovec *iov,
unsigned long nr_segs, size_t count);
void iov_iter_kvec(struct iov_iter *i, int direction, const struct kvec *kvec,
unsigned long nr_segs, size_t count);
void iov_iter_bvec(struct iov_iter *i, int direction, const struct bio_vec *bvec,
unsigned long nr_segs, size_t count);
void iov_iter_pipe(struct iov_iter *i, int direction, struct pipe_inode_info *pipe,
size_t count);
ssize_t iov_iter_get_pages(struct iov_iter *i, struct page **pages,
size_t maxsize, unsigned maxpages, size_t *start);
ssize_t iov_iter_get_pages_alloc(struct iov_iter *i, struct page ***pages,
size_t maxsize, size_t *start);
int iov_iter_npages(const struct iov_iter *i, int maxpages);
const void *dup_iter(struct iov_iter *new, struct iov_iter *old, gfp_t flags);
static inline size_t iov_iter_count(const struct iov_iter *i)
{
return i->count;
}
static inline bool iter_is_iovec(const struct iov_iter *i)
{
return !(i->type & (ITER_BVEC | ITER_KVEC | ITER_PIPE));
}
/*
* Get one of READ or WRITE out of iter->type without any other flags OR'd in
* with it.
*
* The ?: is just for type safety.
*/
#define iov_iter_rw(i) ((0 ? (struct iov_iter *)0 : (i))->type & (READ | WRITE))
/*
* Cap the iov_iter by given limit; note that the second argument is
* *not* the new size - it's upper limit for such. Passing it a value
* greater than the amount of data in iov_iter is fine - it'll just do
* nothing in that case.
*/
static inline void iov_iter_truncate(struct iov_iter *i, u64 count)
{
/*
* count doesn't have to fit in size_t - comparison extends both
* operands to u64 here and any value that would be truncated by
* conversion in assignement is by definition greater than all
* values of size_t, including old i->count.
*/
if (i->count > count)
i->count = count;
}
/*
* reexpand a previously truncated iterator; count must be no more than how much
* we had shrunk it.
*/
static inline void iov_iter_reexpand(struct iov_iter *i, size_t count)
{
i->count = count;
}
size_t csum_and_copy_to_iter(const void *addr, size_t bytes, __wsum *csum, struct iov_iter *i);
size_t csum_and_copy_from_iter(void *addr, size_t bytes, __wsum *csum, struct iov_iter *i);
bool csum_and_copy_from_iter_full(void *addr, size_t bytes, __wsum *csum, struct iov_iter *i);
saner iov_iter initialization primitives iovec-backed iov_iter instances are assumed to satisfy several properties: * no more than UIO_MAXIOV elements in iovec array * total size of all ranges is no more than MAX_RW_COUNT * all ranges pass access_ok(). The problem is, invariants of data structures should be established in the primitives creating those data structures, not in the code using those primitives. And iov_iter_init() violates that principle. For a while we managed to get away with that, but once the use of iov_iter started to spread, it didn't take long for shit to hit the fan - missed check in sys_sendto() had introduced a roothole. We _do_ have primitives for importing and validating iovecs (both native and compat ones) and those primitives are almost always followed by shoving the resulting iovec into iov_iter. Life would be considerably simpler (and safer) if we combined those primitives with initializing iov_iter. That gives us two new primitives - import_iovec() and compat_import_iovec(). Calling conventions: iovec = iov_array; err = import_iovec(direction, uvec, nr_segs, ARRAY_SIZE(iov_array), &iovec, &iter); imports user vector into kernel space (into iov_array if it fits, allocated if it doesn't fit or if iovec was NULL), validates it and sets iter up to refer to it. On success 0 is returned and allocated kernel copy (or NULL if the array had fit into caller-supplied one) is returned via iovec. On failure all allocations are undone and -E... is returned. If the total size of ranges exceeds MAX_RW_COUNT, the excess is silently truncated. compat_import_iovec() expects uvec to be a pointer to user array of compat_iovec; otherwise it's identical to import_iovec(). Finally, import_single_range() sets iov_iter backed by single-element iovec covering a user-supplied range - err = import_single_range(direction, address, size, iovec, &iter); does validation and sets iter up. Again, size in excess of MAX_RW_COUNT gets silently truncated. Next commits will be switching the things up to use of those and reducing the amount of iov_iter_init() instances. Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
2015-03-22 05:45:43 +08:00
int import_iovec(int type, const struct iovec __user * uvector,
unsigned nr_segs, unsigned fast_segs,
struct iovec **iov, struct iov_iter *i);
#ifdef CONFIG_COMPAT
struct compat_iovec;
int compat_import_iovec(int type, const struct compat_iovec __user * uvector,
unsigned nr_segs, unsigned fast_segs,
struct iovec **iov, struct iov_iter *i);
#endif
int import_single_range(int type, void __user *buf, size_t len,
struct iovec *iov, struct iov_iter *i);
#endif