Merge branch 'next' of git://git.kernel.org/pub/scm/linux/kernel/git/djbw/async_tx into for-linus

This commit is contained in:
NeilBrown 2009-09-23 18:31:11 +10:00
commit 4b3df5668c
61 changed files with 9135 additions and 3307 deletions

View File

@ -54,20 +54,23 @@ features surfaced as a result:
3.1 General format of the API:
struct dma_async_tx_descriptor *
async_<operation>(<op specific parameters>,
enum async_tx_flags flags,
struct dma_async_tx_descriptor *dependency,
dma_async_tx_callback callback_routine,
void *callback_parameter);
async_<operation>(<op specific parameters>, struct async_submit ctl *submit)
3.2 Supported operations:
memcpy - memory copy between a source and a destination buffer
memset - fill a destination buffer with a byte value
xor - xor a series of source buffers and write the result to a
destination buffer
xor_zero_sum - xor a series of source buffers and set a flag if the
xor_val - xor a series of source buffers and set a flag if the
result is zero. The implementation attempts to prevent
writes to memory
pq - generate the p+q (raid6 syndrome) from a series of source buffers
pq_val - validate that a p and or q buffer are in sync with a given series of
sources
datap - (raid6_datap_recov) recover a raid6 data block and the p block
from the given sources
2data - (raid6_2data_recov) recover 2 raid6 data blocks from the given
sources
3.3 Descriptor management:
The return value is non-NULL and points to a 'descriptor' when the operation
@ -80,8 +83,8 @@ acknowledged by the application before the offload engine driver is allowed to
recycle (or free) the descriptor. A descriptor can be acked by one of the
following methods:
1/ setting the ASYNC_TX_ACK flag if no child operations are to be submitted
2/ setting the ASYNC_TX_DEP_ACK flag to acknowledge the parent
descriptor of a new operation.
2/ submitting an unacknowledged descriptor as a dependency to another
async_tx call will implicitly set the acknowledged state.
3/ calling async_tx_ack() on the descriptor.
3.4 When does the operation execute?
@ -119,12 +122,14 @@ of an operation.
Perform a xor->copy->xor operation where each operation depends on the
result from the previous operation:
void complete_xor_copy_xor(void *param)
void callback(void *param)
{
printk("complete\n");
struct completion *cmp = param;
complete(cmp);
}
int run_xor_copy_xor(struct page **xor_srcs,
void run_xor_copy_xor(struct page **xor_srcs,
int xor_src_cnt,
struct page *xor_dest,
size_t xor_len,
@ -133,16 +138,26 @@ int run_xor_copy_xor(struct page **xor_srcs,
size_t copy_len)
{
struct dma_async_tx_descriptor *tx;
addr_conv_t addr_conv[xor_src_cnt];
struct async_submit_ctl submit;
addr_conv_t addr_conv[NDISKS];
struct completion cmp;
tx = async_xor(xor_dest, xor_srcs, 0, xor_src_cnt, xor_len,
ASYNC_TX_XOR_DROP_DST, NULL, NULL, NULL);
tx = async_memcpy(copy_dest, copy_src, 0, 0, copy_len,
ASYNC_TX_DEP_ACK, tx, NULL, NULL);
tx = async_xor(xor_dest, xor_srcs, 0, xor_src_cnt, xor_len,
ASYNC_TX_XOR_DROP_DST | ASYNC_TX_DEP_ACK | ASYNC_TX_ACK,
tx, complete_xor_copy_xor, NULL);
init_async_submit(&submit, ASYNC_TX_XOR_DROP_DST, NULL, NULL, NULL,
addr_conv);
tx = async_xor(xor_dest, xor_srcs, 0, xor_src_cnt, xor_len, &submit)
submit->depend_tx = tx;
tx = async_memcpy(copy_dest, copy_src, 0, 0, copy_len, &submit);
init_completion(&cmp);
init_async_submit(&submit, ASYNC_TX_XOR_DROP_DST | ASYNC_TX_ACK, tx,
callback, &cmp, addr_conv);
tx = async_xor(xor_dest, xor_srcs, 0, xor_src_cnt, xor_len, &submit);
async_tx_issue_pending_all();
wait_for_completion(&cmp);
}
See include/linux/async_tx.h for more information on the flags. See the

View File

@ -187,11 +187,74 @@ union iop3xx_desc {
void *ptr;
};
/* No support for p+q operations */
static inline int
iop_chan_pq_slot_count(size_t len, int src_cnt, int *slots_per_op)
{
BUG();
return 0;
}
static inline void
iop_desc_init_pq(struct iop_adma_desc_slot *desc, int src_cnt,
unsigned long flags)
{
BUG();
}
static inline void
iop_desc_set_pq_addr(struct iop_adma_desc_slot *desc, dma_addr_t *addr)
{
BUG();
}
static inline void
iop_desc_set_pq_src_addr(struct iop_adma_desc_slot *desc, int src_idx,
dma_addr_t addr, unsigned char coef)
{
BUG();
}
static inline int
iop_chan_pq_zero_sum_slot_count(size_t len, int src_cnt, int *slots_per_op)
{
BUG();
return 0;
}
static inline void
iop_desc_init_pq_zero_sum(struct iop_adma_desc_slot *desc, int src_cnt,
unsigned long flags)
{
BUG();
}
static inline void
iop_desc_set_pq_zero_sum_byte_count(struct iop_adma_desc_slot *desc, u32 len)
{
BUG();
}
#define iop_desc_set_pq_zero_sum_src_addr iop_desc_set_pq_src_addr
static inline void
iop_desc_set_pq_zero_sum_addr(struct iop_adma_desc_slot *desc, int pq_idx,
dma_addr_t *src)
{
BUG();
}
static inline int iop_adma_get_max_xor(void)
{
return 32;
}
static inline int iop_adma_get_max_pq(void)
{
BUG();
return 0;
}
static inline u32 iop_chan_get_current_descriptor(struct iop_adma_chan *chan)
{
int id = chan->device->id;
@ -332,6 +395,11 @@ static inline int iop_chan_zero_sum_slot_count(size_t len, int src_cnt,
return slot_cnt;
}
static inline int iop_desc_is_pq(struct iop_adma_desc_slot *desc)
{
return 0;
}
static inline u32 iop_desc_get_dest_addr(struct iop_adma_desc_slot *desc,
struct iop_adma_chan *chan)
{
@ -349,6 +417,14 @@ static inline u32 iop_desc_get_dest_addr(struct iop_adma_desc_slot *desc,
return 0;
}
static inline u32 iop_desc_get_qdest_addr(struct iop_adma_desc_slot *desc,
struct iop_adma_chan *chan)
{
BUG();
return 0;
}
static inline u32 iop_desc_get_byte_count(struct iop_adma_desc_slot *desc,
struct iop_adma_chan *chan)
{
@ -756,13 +832,14 @@ static inline void iop_desc_set_block_fill_val(struct iop_adma_desc_slot *desc,
hw_desc->src[0] = val;
}
static inline int iop_desc_get_zero_result(struct iop_adma_desc_slot *desc)
static inline enum sum_check_flags
iop_desc_get_zero_result(struct iop_adma_desc_slot *desc)
{
struct iop3xx_desc_aau *hw_desc = desc->hw_desc;
struct iop3xx_aau_desc_ctrl desc_ctrl = hw_desc->desc_ctrl_field;
iop_paranoia(!(desc_ctrl.tx_complete && desc_ctrl.zero_result_en));
return desc_ctrl.zero_result_err;
return desc_ctrl.zero_result_err << SUM_CHECK_P;
}
static inline void iop_chan_append(struct iop_adma_chan *chan)

View File

@ -86,6 +86,7 @@ struct iop_adma_chan {
* @idx: pool index
* @unmap_src_cnt: number of xor sources
* @unmap_len: transaction bytecount
* @tx_list: list of descriptors that are associated with one operation
* @async_tx: support for the async_tx api
* @group_list: list of slots that make up a multi-descriptor transaction
* for example transfer lengths larger than the supported hw max
@ -102,10 +103,12 @@ struct iop_adma_desc_slot {
u16 idx;
u16 unmap_src_cnt;
size_t unmap_len;
struct list_head tx_list;
struct dma_async_tx_descriptor async_tx;
union {
u32 *xor_check_result;
u32 *crc32_result;
u32 *pq_check_result;
};
};

View File

@ -150,6 +150,8 @@ static inline int iop_adma_get_max_xor(void)
return 16;
}
#define iop_adma_get_max_pq iop_adma_get_max_xor
static inline u32 iop_chan_get_current_descriptor(struct iop_adma_chan *chan)
{
return __raw_readl(ADMA_ADAR(chan));
@ -211,7 +213,10 @@ iop_chan_xor_slot_count(size_t len, int src_cnt, int *slots_per_op)
#define IOP_ADMA_MAX_BYTE_COUNT ADMA_MAX_BYTE_COUNT
#define IOP_ADMA_ZERO_SUM_MAX_BYTE_COUNT ADMA_MAX_BYTE_COUNT
#define IOP_ADMA_XOR_MAX_BYTE_COUNT ADMA_MAX_BYTE_COUNT
#define IOP_ADMA_PQ_MAX_BYTE_COUNT ADMA_MAX_BYTE_COUNT
#define iop_chan_zero_sum_slot_count(l, s, o) iop_chan_xor_slot_count(l, s, o)
#define iop_chan_pq_slot_count iop_chan_xor_slot_count
#define iop_chan_pq_zero_sum_slot_count iop_chan_xor_slot_count
static inline u32 iop_desc_get_dest_addr(struct iop_adma_desc_slot *desc,
struct iop_adma_chan *chan)
@ -220,6 +225,13 @@ static inline u32 iop_desc_get_dest_addr(struct iop_adma_desc_slot *desc,
return hw_desc->dest_addr;
}
static inline u32 iop_desc_get_qdest_addr(struct iop_adma_desc_slot *desc,
struct iop_adma_chan *chan)
{
struct iop13xx_adma_desc_hw *hw_desc = desc->hw_desc;
return hw_desc->q_dest_addr;
}
static inline u32 iop_desc_get_byte_count(struct iop_adma_desc_slot *desc,
struct iop_adma_chan *chan)
{
@ -319,6 +331,58 @@ iop_desc_init_zero_sum(struct iop_adma_desc_slot *desc, int src_cnt,
return 1;
}
static inline void
iop_desc_init_pq(struct iop_adma_desc_slot *desc, int src_cnt,
unsigned long flags)
{
struct iop13xx_adma_desc_hw *hw_desc = desc->hw_desc;
union {
u32 value;
struct iop13xx_adma_desc_ctrl field;
} u_desc_ctrl;
u_desc_ctrl.value = 0;
u_desc_ctrl.field.src_select = src_cnt - 1;
u_desc_ctrl.field.xfer_dir = 3; /* local to internal bus */
u_desc_ctrl.field.pq_xfer_en = 1;
u_desc_ctrl.field.p_xfer_dis = !!(flags & DMA_PREP_PQ_DISABLE_P);
u_desc_ctrl.field.int_en = flags & DMA_PREP_INTERRUPT;
hw_desc->desc_ctrl = u_desc_ctrl.value;
}
static inline int iop_desc_is_pq(struct iop_adma_desc_slot *desc)
{
struct iop13xx_adma_desc_hw *hw_desc = desc->hw_desc;
union {
u32 value;
struct iop13xx_adma_desc_ctrl field;
} u_desc_ctrl;
u_desc_ctrl.value = hw_desc->desc_ctrl;
return u_desc_ctrl.field.pq_xfer_en;
}
static inline void
iop_desc_init_pq_zero_sum(struct iop_adma_desc_slot *desc, int src_cnt,
unsigned long flags)
{
struct iop13xx_adma_desc_hw *hw_desc = desc->hw_desc;
union {
u32 value;
struct iop13xx_adma_desc_ctrl field;
} u_desc_ctrl;
u_desc_ctrl.value = 0;
u_desc_ctrl.field.src_select = src_cnt - 1;
u_desc_ctrl.field.xfer_dir = 3; /* local to internal bus */
u_desc_ctrl.field.zero_result = 1;
u_desc_ctrl.field.status_write_back_en = 1;
u_desc_ctrl.field.pq_xfer_en = 1;
u_desc_ctrl.field.p_xfer_dis = !!(flags & DMA_PREP_PQ_DISABLE_P);
u_desc_ctrl.field.int_en = flags & DMA_PREP_INTERRUPT;
hw_desc->desc_ctrl = u_desc_ctrl.value;
}
static inline void iop_desc_set_byte_count(struct iop_adma_desc_slot *desc,
struct iop_adma_chan *chan,
u32 byte_count)
@ -351,6 +415,7 @@ iop_desc_set_zero_sum_byte_count(struct iop_adma_desc_slot *desc, u32 len)
}
}
#define iop_desc_set_pq_zero_sum_byte_count iop_desc_set_zero_sum_byte_count
static inline void iop_desc_set_dest_addr(struct iop_adma_desc_slot *desc,
struct iop_adma_chan *chan,
@ -361,6 +426,16 @@ static inline void iop_desc_set_dest_addr(struct iop_adma_desc_slot *desc,
hw_desc->upper_dest_addr = 0;
}
static inline void
iop_desc_set_pq_addr(struct iop_adma_desc_slot *desc, dma_addr_t *addr)
{
struct iop13xx_adma_desc_hw *hw_desc = desc->hw_desc;
hw_desc->dest_addr = addr[0];
hw_desc->q_dest_addr = addr[1];
hw_desc->upper_dest_addr = 0;
}
static inline void iop_desc_set_memcpy_src_addr(struct iop_adma_desc_slot *desc,
dma_addr_t addr)
{
@ -388,6 +463,29 @@ static inline void iop_desc_set_xor_src_addr(struct iop_adma_desc_slot *desc,
} while (slot_cnt);
}
static inline void
iop_desc_set_pq_src_addr(struct iop_adma_desc_slot *desc, int src_idx,
dma_addr_t addr, unsigned char coef)
{
int slot_cnt = desc->slot_cnt, slots_per_op = desc->slots_per_op;
struct iop13xx_adma_desc_hw *hw_desc = desc->hw_desc, *iter;
struct iop13xx_adma_src *src;
int i = 0;
do {
iter = iop_hw_desc_slot_idx(hw_desc, i);
src = &iter->src[src_idx];
src->src_addr = addr;
src->pq_upper_src_addr = 0;
src->pq_dmlt = coef;
slot_cnt -= slots_per_op;
if (slot_cnt) {
i += slots_per_op;
addr += IOP_ADMA_PQ_MAX_BYTE_COUNT;
}
} while (slot_cnt);
}
static inline void
iop_desc_init_interrupt(struct iop_adma_desc_slot *desc,
struct iop_adma_chan *chan)
@ -399,6 +497,15 @@ iop_desc_init_interrupt(struct iop_adma_desc_slot *desc,
}
#define iop_desc_set_zero_sum_src_addr iop_desc_set_xor_src_addr
#define iop_desc_set_pq_zero_sum_src_addr iop_desc_set_pq_src_addr
static inline void
iop_desc_set_pq_zero_sum_addr(struct iop_adma_desc_slot *desc, int pq_idx,
dma_addr_t *src)
{
iop_desc_set_xor_src_addr(desc, pq_idx, src[pq_idx]);
iop_desc_set_xor_src_addr(desc, pq_idx+1, src[pq_idx+1]);
}
static inline void iop_desc_set_next_desc(struct iop_adma_desc_slot *desc,
u32 next_desc_addr)
@ -428,18 +535,20 @@ static inline void iop_desc_set_block_fill_val(struct iop_adma_desc_slot *desc,
hw_desc->block_fill_data = val;
}
static inline int iop_desc_get_zero_result(struct iop_adma_desc_slot *desc)
static inline enum sum_check_flags
iop_desc_get_zero_result(struct iop_adma_desc_slot *desc)
{
struct iop13xx_adma_desc_hw *hw_desc = desc->hw_desc;
struct iop13xx_adma_desc_ctrl desc_ctrl = hw_desc->desc_ctrl_field;
struct iop13xx_adma_byte_count byte_count = hw_desc->byte_count_field;
enum sum_check_flags flags;
BUG_ON(!(byte_count.tx_complete && desc_ctrl.zero_result));
if (desc_ctrl.pq_xfer_en)
return byte_count.zero_result_err_q;
else
return byte_count.zero_result_err;
flags = byte_count.zero_result_err_q << SUM_CHECK_Q;
flags |= byte_count.zero_result_err << SUM_CHECK_P;
return flags;
}
static inline void iop_chan_append(struct iop_adma_chan *chan)

View File

@ -477,10 +477,8 @@ void __init iop13xx_platform_init(void)
plat_data = &iop13xx_adma_0_data;
dma_cap_set(DMA_MEMCPY, plat_data->cap_mask);
dma_cap_set(DMA_XOR, plat_data->cap_mask);
dma_cap_set(DMA_DUAL_XOR, plat_data->cap_mask);
dma_cap_set(DMA_ZERO_SUM, plat_data->cap_mask);
dma_cap_set(DMA_XOR_VAL, plat_data->cap_mask);
dma_cap_set(DMA_MEMSET, plat_data->cap_mask);
dma_cap_set(DMA_MEMCPY_CRC32C, plat_data->cap_mask);
dma_cap_set(DMA_INTERRUPT, plat_data->cap_mask);
break;
case IOP13XX_INIT_ADMA_1:
@ -489,10 +487,8 @@ void __init iop13xx_platform_init(void)
plat_data = &iop13xx_adma_1_data;
dma_cap_set(DMA_MEMCPY, plat_data->cap_mask);
dma_cap_set(DMA_XOR, plat_data->cap_mask);
dma_cap_set(DMA_DUAL_XOR, plat_data->cap_mask);
dma_cap_set(DMA_ZERO_SUM, plat_data->cap_mask);
dma_cap_set(DMA_XOR_VAL, plat_data->cap_mask);
dma_cap_set(DMA_MEMSET, plat_data->cap_mask);
dma_cap_set(DMA_MEMCPY_CRC32C, plat_data->cap_mask);
dma_cap_set(DMA_INTERRUPT, plat_data->cap_mask);
break;
case IOP13XX_INIT_ADMA_2:
@ -501,14 +497,11 @@ void __init iop13xx_platform_init(void)
plat_data = &iop13xx_adma_2_data;
dma_cap_set(DMA_MEMCPY, plat_data->cap_mask);
dma_cap_set(DMA_XOR, plat_data->cap_mask);
dma_cap_set(DMA_DUAL_XOR, plat_data->cap_mask);
dma_cap_set(DMA_ZERO_SUM, plat_data->cap_mask);
dma_cap_set(DMA_XOR_VAL, plat_data->cap_mask);
dma_cap_set(DMA_MEMSET, plat_data->cap_mask);
dma_cap_set(DMA_MEMCPY_CRC32C, plat_data->cap_mask);
dma_cap_set(DMA_INTERRUPT, plat_data->cap_mask);
dma_cap_set(DMA_PQ_XOR, plat_data->cap_mask);
dma_cap_set(DMA_PQ_UPDATE, plat_data->cap_mask);
dma_cap_set(DMA_PQ_ZERO_SUM, plat_data->cap_mask);
dma_cap_set(DMA_PQ, plat_data->cap_mask);
dma_cap_set(DMA_PQ_VAL, plat_data->cap_mask);
break;
}
}

View File

@ -179,7 +179,6 @@ static int __init iop3xx_adma_cap_init(void)
dma_cap_set(DMA_INTERRUPT, iop3xx_dma_0_data.cap_mask);
#else
dma_cap_set(DMA_MEMCPY, iop3xx_dma_0_data.cap_mask);
dma_cap_set(DMA_MEMCPY_CRC32C, iop3xx_dma_0_data.cap_mask);
dma_cap_set(DMA_INTERRUPT, iop3xx_dma_0_data.cap_mask);
#endif
@ -188,7 +187,6 @@ static int __init iop3xx_adma_cap_init(void)
dma_cap_set(DMA_INTERRUPT, iop3xx_dma_1_data.cap_mask);
#else
dma_cap_set(DMA_MEMCPY, iop3xx_dma_1_data.cap_mask);
dma_cap_set(DMA_MEMCPY_CRC32C, iop3xx_dma_1_data.cap_mask);
dma_cap_set(DMA_INTERRUPT, iop3xx_dma_1_data.cap_mask);
#endif
@ -198,7 +196,7 @@ static int __init iop3xx_adma_cap_init(void)
dma_cap_set(DMA_INTERRUPT, iop3xx_aau_data.cap_mask);
#else
dma_cap_set(DMA_XOR, iop3xx_aau_data.cap_mask);
dma_cap_set(DMA_ZERO_SUM, iop3xx_aau_data.cap_mask);
dma_cap_set(DMA_XOR_VAL, iop3xx_aau_data.cap_mask);
dma_cap_set(DMA_MEMSET, iop3xx_aau_data.cap_mask);
dma_cap_set(DMA_INTERRUPT, iop3xx_aau_data.cap_mask);
#endif

View File

@ -0,0 +1,136 @@
/*
* Freescale MPC83XX / MPC85XX DMA Controller
*
* Copyright (c) 2009 Ira W. Snyder <iws@ovro.caltech.edu>
*
* This file is licensed under the terms of the GNU General Public License
* version 2. This program is licensed "as is" without any warranty of any
* kind, whether express or implied.
*/
#ifndef __ARCH_POWERPC_ASM_FSLDMA_H__
#define __ARCH_POWERPC_ASM_FSLDMA_H__
#include <linux/dmaengine.h>
/*
* Definitions for the Freescale DMA controller's DMA_SLAVE implemention
*
* The Freescale DMA_SLAVE implementation was designed to handle many-to-many
* transfers. An example usage would be an accelerated copy between two
* scatterlists. Another example use would be an accelerated copy from
* multiple non-contiguous device buffers into a single scatterlist.
*
* A DMA_SLAVE transaction is defined by a struct fsl_dma_slave. This
* structure contains a list of hardware addresses that should be copied
* to/from the scatterlist passed into device_prep_slave_sg(). The structure
* also has some fields to enable hardware-specific features.
*/
/**
* struct fsl_dma_hw_addr
* @entry: linked list entry
* @address: the hardware address
* @length: length to transfer
*
* Holds a single physical hardware address / length pair for use
* with the DMAEngine DMA_SLAVE API.
*/
struct fsl_dma_hw_addr {
struct list_head entry;
dma_addr_t address;
size_t length;
};
/**
* struct fsl_dma_slave
* @addresses: a linked list of struct fsl_dma_hw_addr structures
* @request_count: value for DMA request count
* @src_loop_size: setup and enable constant source-address DMA transfers
* @dst_loop_size: setup and enable constant destination address DMA transfers
* @external_start: enable externally started DMA transfers
* @external_pause: enable externally paused DMA transfers
*
* Holds a list of address / length pairs for use with the DMAEngine
* DMA_SLAVE API implementation for the Freescale DMA controller.
*/
struct fsl_dma_slave {
/* List of hardware address/length pairs */
struct list_head addresses;
/* Support for extra controller features */
unsigned int request_count;
unsigned int src_loop_size;
unsigned int dst_loop_size;
bool external_start;
bool external_pause;
};
/**
* fsl_dma_slave_append - add an address/length pair to a struct fsl_dma_slave
* @slave: the &struct fsl_dma_slave to add to
* @address: the hardware address to add
* @length: the length of bytes to transfer from @address
*
* Add a hardware address/length pair to a struct fsl_dma_slave. Returns 0 on
* success, -ERRNO otherwise.
*/
static inline int fsl_dma_slave_append(struct fsl_dma_slave *slave,
dma_addr_t address, size_t length)
{
struct fsl_dma_hw_addr *addr;
addr = kzalloc(sizeof(*addr), GFP_ATOMIC);
if (!addr)
return -ENOMEM;
INIT_LIST_HEAD(&addr->entry);
addr->address = address;
addr->length = length;
list_add_tail(&addr->entry, &slave->addresses);
return 0;
}
/**
* fsl_dma_slave_free - free a struct fsl_dma_slave
* @slave: the struct fsl_dma_slave to free
*
* Free a struct fsl_dma_slave and all associated address/length pairs
*/
static inline void fsl_dma_slave_free(struct fsl_dma_slave *slave)
{
struct fsl_dma_hw_addr *addr, *tmp;
if (slave) {
list_for_each_entry_safe(addr, tmp, &slave->addresses, entry) {
list_del(&addr->entry);
kfree(addr);
}
kfree(slave);
}
}
/**
* fsl_dma_slave_alloc - allocate a struct fsl_dma_slave
* @gfp: the flags to pass to kmalloc when allocating this structure
*
* Allocate a struct fsl_dma_slave for use by the DMA_SLAVE API. Returns a new
* struct fsl_dma_slave on success, or NULL on failure.
*/
static inline struct fsl_dma_slave *fsl_dma_slave_alloc(gfp_t gfp)
{
struct fsl_dma_slave *slave;
slave = kzalloc(sizeof(*slave), gfp);
if (!slave)
return NULL;
INIT_LIST_HEAD(&slave->addresses);
return slave;
}
#endif /* __ARCH_POWERPC_ASM_FSLDMA_H__ */

View File

@ -1,12 +1,9 @@
menu "DMA support"
config SH_DMA_API
bool
config SH_DMA
bool "SuperH on-chip DMA controller (DMAC) support"
depends on CPU_SH3 || CPU_SH4
select SH_DMA_API
default n
config SH_DMA_IRQ_MULTI
@ -19,6 +16,15 @@ config SH_DMA_IRQ_MULTI
CPU_SUBTYPE_SH7780 || CPU_SUBTYPE_SH7785 || \
CPU_SUBTYPE_SH7760
config SH_DMA_API
depends on SH_DMA
bool "SuperH DMA API support"
default n
help
SH_DMA_API always enabled DMA API of used SuperH.
If you want to use DMA ENGINE, you must not enable this.
Please enable DMA_ENGINE and SH_DMAE.
config NR_ONCHIP_DMA_CHANNELS
int
depends on SH_DMA

View File

@ -2,8 +2,7 @@
# Makefile for the SuperH DMA specific kernel interface routines under Linux.
#
obj-$(CONFIG_SH_DMA_API) += dma-api.o dma-sysfs.o
obj-$(CONFIG_SH_DMA) += dma-sh.o
obj-$(CONFIG_SH_DMA_API) += dma-sh.o dma-api.o dma-sysfs.o
obj-$(CONFIG_PVR2_DMA) += dma-pvr2.o
obj-$(CONFIG_G2_DMA) += dma-g2.o
obj-$(CONFIG_SH_DMABRG) += dmabrg.o

View File

@ -116,4 +116,17 @@ static u32 dma_base_addr[] __maybe_unused = {
#define CHCR 0x0C
#define DMAOR 0x40
/*
* for dma engine
*
* SuperH DMA mode
*/
#define SHDMA_MIX_IRQ (1 << 1)
#define SHDMA_DMAOR1 (1 << 2)
#define SHDMA_DMAE1 (1 << 3)
struct sh_dmae_pdata {
unsigned int mode;
};
#endif /* __DMA_SH_H */

View File

@ -14,3 +14,12 @@ config ASYNC_MEMSET
tristate
select ASYNC_CORE
config ASYNC_PQ
tristate
select ASYNC_CORE
config ASYNC_RAID6_RECOV
tristate
select ASYNC_CORE
select ASYNC_PQ

View File

@ -2,3 +2,6 @@ obj-$(CONFIG_ASYNC_CORE) += async_tx.o
obj-$(CONFIG_ASYNC_MEMCPY) += async_memcpy.o
obj-$(CONFIG_ASYNC_MEMSET) += async_memset.o
obj-$(CONFIG_ASYNC_XOR) += async_xor.o
obj-$(CONFIG_ASYNC_PQ) += async_pq.o
obj-$(CONFIG_ASYNC_RAID6_RECOV) += async_raid6_recov.o
obj-$(CONFIG_ASYNC_RAID6_TEST) += raid6test.o

View File

@ -33,28 +33,31 @@
* async_memcpy - attempt to copy memory with a dma engine.
* @dest: destination page
* @src: src page
* @offset: offset in pages to start transaction
* @dest_offset: offset into 'dest' to start transaction
* @src_offset: offset into 'src' to start transaction
* @len: length in bytes
* @flags: ASYNC_TX_ACK, ASYNC_TX_DEP_ACK,
* @depend_tx: memcpy depends on the result of this transaction
* @cb_fn: function to call when the memcpy completes
* @cb_param: parameter to pass to the callback routine
* @submit: submission / completion modifiers
*
* honored flags: ASYNC_TX_ACK
*/
struct dma_async_tx_descriptor *
async_memcpy(struct page *dest, struct page *src, unsigned int dest_offset,
unsigned int src_offset, size_t len, enum async_tx_flags flags,
struct dma_async_tx_descriptor *depend_tx,
dma_async_tx_callback cb_fn, void *cb_param)
unsigned int src_offset, size_t len,
struct async_submit_ctl *submit)
{
struct dma_chan *chan = async_tx_find_channel(depend_tx, DMA_MEMCPY,
struct dma_chan *chan = async_tx_find_channel(submit, DMA_MEMCPY,
&dest, 1, &src, 1, len);
struct dma_device *device = chan ? chan->device : NULL;
struct dma_async_tx_descriptor *tx = NULL;
if (device) {
if (device && is_dma_copy_aligned(device, src_offset, dest_offset, len)) {
dma_addr_t dma_dest, dma_src;
unsigned long dma_prep_flags = cb_fn ? DMA_PREP_INTERRUPT : 0;
unsigned long dma_prep_flags = 0;
if (submit->cb_fn)
dma_prep_flags |= DMA_PREP_INTERRUPT;
if (submit->flags & ASYNC_TX_FENCE)
dma_prep_flags |= DMA_PREP_FENCE;
dma_dest = dma_map_page(device->dev, dest, dest_offset, len,
DMA_FROM_DEVICE);
@ -67,13 +70,13 @@ async_memcpy(struct page *dest, struct page *src, unsigned int dest_offset,
if (tx) {
pr_debug("%s: (async) len: %zu\n", __func__, len);
async_tx_submit(chan, tx, flags, depend_tx, cb_fn, cb_param);
async_tx_submit(chan, tx, submit);
} else {
void *dest_buf, *src_buf;
pr_debug("%s: (sync) len: %zu\n", __func__, len);
/* wait for any prerequisite operations */
async_tx_quiesce(&depend_tx);
async_tx_quiesce(&submit->depend_tx);
dest_buf = kmap_atomic(dest, KM_USER0) + dest_offset;
src_buf = kmap_atomic(src, KM_USER1) + src_offset;
@ -83,26 +86,13 @@ async_memcpy(struct page *dest, struct page *src, unsigned int dest_offset,
kunmap_atomic(dest_buf, KM_USER0);
kunmap_atomic(src_buf, KM_USER1);
async_tx_sync_epilog(cb_fn, cb_param);
async_tx_sync_epilog(submit);
}
return tx;
}
EXPORT_SYMBOL_GPL(async_memcpy);
static int __init async_memcpy_init(void)
{
return 0;
}
static void __exit async_memcpy_exit(void)
{
do { } while (0);
}
module_init(async_memcpy_init);
module_exit(async_memcpy_exit);
MODULE_AUTHOR("Intel Corporation");
MODULE_DESCRIPTION("asynchronous memcpy api");
MODULE_LICENSE("GPL");

View File

@ -35,26 +35,26 @@
* @val: fill value
* @offset: offset in pages to start transaction
* @len: length in bytes
* @flags: ASYNC_TX_ACK, ASYNC_TX_DEP_ACK
* @depend_tx: memset depends on the result of this transaction
* @cb_fn: function to call when the memcpy completes
* @cb_param: parameter to pass to the callback routine
*
* honored flags: ASYNC_TX_ACK
*/
struct dma_async_tx_descriptor *
async_memset(struct page *dest, int val, unsigned int offset,
size_t len, enum async_tx_flags flags,
struct dma_async_tx_descriptor *depend_tx,
dma_async_tx_callback cb_fn, void *cb_param)
async_memset(struct page *dest, int val, unsigned int offset, size_t len,
struct async_submit_ctl *submit)
{
struct dma_chan *chan = async_tx_find_channel(depend_tx, DMA_MEMSET,
struct dma_chan *chan = async_tx_find_channel(submit, DMA_MEMSET,
&dest, 1, NULL, 0, len);
struct dma_device *device = chan ? chan->device : NULL;
struct dma_async_tx_descriptor *tx = NULL;
if (device) {
if (device && is_dma_fill_aligned(device, offset, 0, len)) {
dma_addr_t dma_dest;
unsigned long dma_prep_flags = cb_fn ? DMA_PREP_INTERRUPT : 0;
unsigned long dma_prep_flags = 0;
if (submit->cb_fn)
dma_prep_flags |= DMA_PREP_INTERRUPT;
if (submit->flags & ASYNC_TX_FENCE)
dma_prep_flags |= DMA_PREP_FENCE;
dma_dest = dma_map_page(device->dev, dest, offset, len,
DMA_FROM_DEVICE);
@ -64,38 +64,25 @@ async_memset(struct page *dest, int val, unsigned int offset,
if (tx) {
pr_debug("%s: (async) len: %zu\n", __func__, len);
async_tx_submit(chan, tx, flags, depend_tx, cb_fn, cb_param);
async_tx_submit(chan, tx, submit);
} else { /* run the memset synchronously */
void *dest_buf;
pr_debug("%s: (sync) len: %zu\n", __func__, len);
dest_buf = (void *) (((char *) page_address(dest)) + offset);
dest_buf = page_address(dest) + offset;
/* wait for any prerequisite operations */
async_tx_quiesce(&depend_tx);
async_tx_quiesce(&submit->depend_tx);
memset(dest_buf, val, len);
async_tx_sync_epilog(cb_fn, cb_param);
async_tx_sync_epilog(submit);
}
return tx;
}
EXPORT_SYMBOL_GPL(async_memset);
static int __init async_memset_init(void)
{
return 0;
}
static void __exit async_memset_exit(void)
{
do { } while (0);
}
module_init(async_memset_init);
module_exit(async_memset_exit);
MODULE_AUTHOR("Intel Corporation");
MODULE_DESCRIPTION("asynchronous memset api");
MODULE_LICENSE("GPL");

395
crypto/async_tx/async_pq.c Normal file
View File

@ -0,0 +1,395 @@
/*
* Copyright(c) 2007 Yuri Tikhonov <yur@emcraft.com>
* Copyright(c) 2009 Intel Corporation
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the Free
* Software Foundation; either version 2 of the License, or (at your option)
* any later version.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License along with
* this program; if not, write to the Free Software Foundation, Inc., 59
* Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*
* The full GNU General Public License is included in this distribution in the
* file called COPYING.
*/
#include <linux/kernel.h>
#include <linux/interrupt.h>
#include <linux/dma-mapping.h>
#include <linux/raid/pq.h>
#include <linux/async_tx.h>
/**
* scribble - space to hold throwaway P buffer for synchronous gen_syndrome
*/
static struct page *scribble;
static bool is_raid6_zero_block(struct page *p)
{
return p == (void *) raid6_empty_zero_page;
}
/* the struct page *blocks[] parameter passed to async_gen_syndrome()
* and async_syndrome_val() contains the 'P' destination address at
* blocks[disks-2] and the 'Q' destination address at blocks[disks-1]
*
* note: these are macros as they are used as lvalues
*/
#define P(b, d) (b[d-2])
#define Q(b, d) (b[d-1])
/**
* do_async_gen_syndrome - asynchronously calculate P and/or Q
*/
static __async_inline struct dma_async_tx_descriptor *
do_async_gen_syndrome(struct dma_chan *chan, struct page **blocks,
const unsigned char *scfs, unsigned int offset, int disks,
size_t len, dma_addr_t *dma_src,
struct async_submit_ctl *submit)
{
struct dma_async_tx_descriptor *tx = NULL;
struct dma_device *dma = chan->device;
enum dma_ctrl_flags dma_flags = 0;
enum async_tx_flags flags_orig = submit->flags;
dma_async_tx_callback cb_fn_orig = submit->cb_fn;
dma_async_tx_callback cb_param_orig = submit->cb_param;
int src_cnt = disks - 2;
unsigned char coefs[src_cnt];
unsigned short pq_src_cnt;
dma_addr_t dma_dest[2];
int src_off = 0;
int idx;
int i;
/* DMAs use destinations as sources, so use BIDIRECTIONAL mapping */
if (P(blocks, disks))
dma_dest[0] = dma_map_page(dma->dev, P(blocks, disks), offset,
len, DMA_BIDIRECTIONAL);
else
dma_flags |= DMA_PREP_PQ_DISABLE_P;
if (Q(blocks, disks))
dma_dest[1] = dma_map_page(dma->dev, Q(blocks, disks), offset,
len, DMA_BIDIRECTIONAL);
else
dma_flags |= DMA_PREP_PQ_DISABLE_Q;
/* convert source addresses being careful to collapse 'empty'
* sources and update the coefficients accordingly
*/
for (i = 0, idx = 0; i < src_cnt; i++) {
if (is_raid6_zero_block(blocks[i]))
continue;
dma_src[idx] = dma_map_page(dma->dev, blocks[i], offset, len,
DMA_TO_DEVICE);
coefs[idx] = scfs[i];
idx++;
}
src_cnt = idx;
while (src_cnt > 0) {
submit->flags = flags_orig;
pq_src_cnt = min(src_cnt, dma_maxpq(dma, dma_flags));
/* if we are submitting additional pqs, leave the chain open,
* clear the callback parameters, and leave the destination
* buffers mapped
*/
if (src_cnt > pq_src_cnt) {
submit->flags &= ~ASYNC_TX_ACK;
submit->flags |= ASYNC_TX_FENCE;
dma_flags |= DMA_COMPL_SKIP_DEST_UNMAP;
submit->cb_fn = NULL;
submit->cb_param = NULL;
} else {
dma_flags &= ~DMA_COMPL_SKIP_DEST_UNMAP;
submit->cb_fn = cb_fn_orig;
submit->cb_param = cb_param_orig;
if (cb_fn_orig)
dma_flags |= DMA_PREP_INTERRUPT;
}
if (submit->flags & ASYNC_TX_FENCE)
dma_flags |= DMA_PREP_FENCE;
/* Since we have clobbered the src_list we are committed
* to doing this asynchronously. Drivers force forward
* progress in case they can not provide a descriptor
*/
for (;;) {
tx = dma->device_prep_dma_pq(chan, dma_dest,
&dma_src[src_off],
pq_src_cnt,
&coefs[src_off], len,
dma_flags);
if (likely(tx))
break;
async_tx_quiesce(&submit->depend_tx);
dma_async_issue_pending(chan);
}
async_tx_submit(chan, tx, submit);
submit->depend_tx = tx;
/* drop completed sources */
src_cnt -= pq_src_cnt;
src_off += pq_src_cnt;
dma_flags |= DMA_PREP_CONTINUE;
}
return tx;
}
/**
* do_sync_gen_syndrome - synchronously calculate a raid6 syndrome
*/
static void
do_sync_gen_syndrome(struct page **blocks, unsigned int offset, int disks,
size_t len, struct async_submit_ctl *submit)
{
void **srcs;
int i;
if (submit->scribble)
srcs = submit->scribble;
else
srcs = (void **) blocks;
for (i = 0; i < disks; i++) {
if (is_raid6_zero_block(blocks[i])) {
BUG_ON(i > disks - 3); /* P or Q can't be zero */
srcs[i] = blocks[i];
} else
srcs[i] = page_address(blocks[i]) + offset;
}
raid6_call.gen_syndrome(disks, len, srcs);
async_tx_sync_epilog(submit);
}
/**
* async_gen_syndrome - asynchronously calculate a raid6 syndrome
* @blocks: source blocks from idx 0..disks-3, P @ disks-2 and Q @ disks-1
* @offset: common offset into each block (src and dest) to start transaction
* @disks: number of blocks (including missing P or Q, see below)
* @len: length of operation in bytes
* @submit: submission/completion modifiers
*
* General note: This routine assumes a field of GF(2^8) with a
* primitive polynomial of 0x11d and a generator of {02}.
*
* 'disks' note: callers can optionally omit either P or Q (but not
* both) from the calculation by setting blocks[disks-2] or
* blocks[disks-1] to NULL. When P or Q is omitted 'len' must be <=
* PAGE_SIZE as a temporary buffer of this size is used in the
* synchronous path. 'disks' always accounts for both destination
* buffers.
*
* 'blocks' note: if submit->scribble is NULL then the contents of
* 'blocks' may be overridden
*/
struct dma_async_tx_descriptor *
async_gen_syndrome(struct page **blocks, unsigned int offset, int disks,
size_t len, struct async_submit_ctl *submit)
{
int src_cnt = disks - 2;
struct dma_chan *chan = async_tx_find_channel(submit, DMA_PQ,
&P(blocks, disks), 2,
blocks, src_cnt, len);
struct dma_device *device = chan ? chan->device : NULL;
dma_addr_t *dma_src = NULL;
BUG_ON(disks > 255 || !(P(blocks, disks) || Q(blocks, disks)));
if (submit->scribble)
dma_src = submit->scribble;
else if (sizeof(dma_addr_t) <= sizeof(struct page *))
dma_src = (dma_addr_t *) blocks;
if (dma_src && device &&
(src_cnt <= dma_maxpq(device, 0) ||
dma_maxpq(device, DMA_PREP_CONTINUE) > 0) &&
is_dma_pq_aligned(device, offset, 0, len)) {
/* run the p+q asynchronously */
pr_debug("%s: (async) disks: %d len: %zu\n",
__func__, disks, len);
return do_async_gen_syndrome(chan, blocks, raid6_gfexp, offset,
disks, len, dma_src, submit);
}
/* run the pq synchronously */
pr_debug("%s: (sync) disks: %d len: %zu\n", __func__, disks, len);
/* wait for any prerequisite operations */
async_tx_quiesce(&submit->depend_tx);
if (!P(blocks, disks)) {
P(blocks, disks) = scribble;
BUG_ON(len + offset > PAGE_SIZE);
}
if (!Q(blocks, disks)) {
Q(blocks, disks) = scribble;
BUG_ON(len + offset > PAGE_SIZE);
}
do_sync_gen_syndrome(blocks, offset, disks, len, submit);
return NULL;
}
EXPORT_SYMBOL_GPL(async_gen_syndrome);
/**
* async_syndrome_val - asynchronously validate a raid6 syndrome
* @blocks: source blocks from idx 0..disks-3, P @ disks-2 and Q @ disks-1
* @offset: common offset into each block (src and dest) to start transaction
* @disks: number of blocks (including missing P or Q, see below)
* @len: length of operation in bytes
* @pqres: on val failure SUM_CHECK_P_RESULT and/or SUM_CHECK_Q_RESULT are set
* @spare: temporary result buffer for the synchronous case
* @submit: submission / completion modifiers
*
* The same notes from async_gen_syndrome apply to the 'blocks',
* and 'disks' parameters of this routine. The synchronous path
* requires a temporary result buffer and submit->scribble to be
* specified.
*/
struct dma_async_tx_descriptor *
async_syndrome_val(struct page **blocks, unsigned int offset, int disks,
size_t len, enum sum_check_flags *pqres, struct page *spare,
struct async_submit_ctl *submit)
{
struct dma_chan *chan = async_tx_find_channel(submit, DMA_PQ_VAL,
NULL, 0, blocks, disks,
len);
struct dma_device *device = chan ? chan->device : NULL;
struct dma_async_tx_descriptor *tx;
enum dma_ctrl_flags dma_flags = submit->cb_fn ? DMA_PREP_INTERRUPT : 0;
dma_addr_t *dma_src = NULL;
BUG_ON(disks < 4);
if (submit->scribble)
dma_src = submit->scribble;
else if (sizeof(dma_addr_t) <= sizeof(struct page *))
dma_src = (dma_addr_t *) blocks;
if (dma_src && device && disks <= dma_maxpq(device, 0) &&
is_dma_pq_aligned(device, offset, 0, len)) {
struct device *dev = device->dev;
dma_addr_t *pq = &dma_src[disks-2];
int i;
pr_debug("%s: (async) disks: %d len: %zu\n",
__func__, disks, len);
if (!P(blocks, disks))
dma_flags |= DMA_PREP_PQ_DISABLE_P;
if (!Q(blocks, disks))
dma_flags |= DMA_PREP_PQ_DISABLE_Q;
if (submit->flags & ASYNC_TX_FENCE)
dma_flags |= DMA_PREP_FENCE;
for (i = 0; i < disks; i++)
if (likely(blocks[i])) {
BUG_ON(is_raid6_zero_block(blocks[i]));
dma_src[i] = dma_map_page(dev, blocks[i],
offset, len,
DMA_TO_DEVICE);
}
for (;;) {
tx = device->device_prep_dma_pq_val(chan, pq, dma_src,
disks - 2,
raid6_gfexp,
len, pqres,
dma_flags);
if (likely(tx))
break;
async_tx_quiesce(&submit->depend_tx);
dma_async_issue_pending(chan);
}
async_tx_submit(chan, tx, submit);
return tx;
} else {
struct page *p_src = P(blocks, disks);
struct page *q_src = Q(blocks, disks);
enum async_tx_flags flags_orig = submit->flags;
dma_async_tx_callback cb_fn_orig = submit->cb_fn;
void *scribble = submit->scribble;
void *cb_param_orig = submit->cb_param;
void *p, *q, *s;
pr_debug("%s: (sync) disks: %d len: %zu\n",
__func__, disks, len);
/* caller must provide a temporary result buffer and
* allow the input parameters to be preserved
*/
BUG_ON(!spare || !scribble);
/* wait for any prerequisite operations */
async_tx_quiesce(&submit->depend_tx);
/* recompute p and/or q into the temporary buffer and then
* check to see the result matches the current value
*/
tx = NULL;
*pqres = 0;
if (p_src) {
init_async_submit(submit, ASYNC_TX_XOR_ZERO_DST, NULL,
NULL, NULL, scribble);
tx = async_xor(spare, blocks, offset, disks-2, len, submit);
async_tx_quiesce(&tx);
p = page_address(p_src) + offset;
s = page_address(spare) + offset;
*pqres |= !!memcmp(p, s, len) << SUM_CHECK_P;
}
if (q_src) {
P(blocks, disks) = NULL;
Q(blocks, disks) = spare;
init_async_submit(submit, 0, NULL, NULL, NULL, scribble);
tx = async_gen_syndrome(blocks, offset, disks, len, submit);
async_tx_quiesce(&tx);
q = page_address(q_src) + offset;
s = page_address(spare) + offset;
*pqres |= !!memcmp(q, s, len) << SUM_CHECK_Q;
}
/* restore P, Q and submit */
P(blocks, disks) = p_src;
Q(blocks, disks) = q_src;
submit->cb_fn = cb_fn_orig;
submit->cb_param = cb_param_orig;
submit->flags = flags_orig;
async_tx_sync_epilog(submit);
return NULL;
}
}
EXPORT_SYMBOL_GPL(async_syndrome_val);
static int __init async_pq_init(void)
{
scribble = alloc_page(GFP_KERNEL);
if (scribble)
return 0;
pr_err("%s: failed to allocate required spare page\n", __func__);
return -ENOMEM;
}
static void __exit async_pq_exit(void)
{
put_page(scribble);
}
module_init(async_pq_init);
module_exit(async_pq_exit);
MODULE_DESCRIPTION("asynchronous raid6 syndrome generation/validation");
MODULE_LICENSE("GPL");

View File

@ -0,0 +1,468 @@
/*
* Asynchronous RAID-6 recovery calculations ASYNC_TX API.
* Copyright(c) 2009 Intel Corporation
*
* based on raid6recov.c:
* Copyright 2002 H. Peter Anvin
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the Free
* Software Foundation; either version 2 of the License, or (at your option)
* any later version.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License along with
* this program; if not, write to the Free Software Foundation, Inc., 51
* Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
*
*/
#include <linux/kernel.h>
#include <linux/interrupt.h>
#include <linux/dma-mapping.h>
#include <linux/raid/pq.h>
#include <linux/async_tx.h>
static struct dma_async_tx_descriptor *
async_sum_product(struct page *dest, struct page **srcs, unsigned char *coef,
size_t len, struct async_submit_ctl *submit)
{
struct dma_chan *chan = async_tx_find_channel(submit, DMA_PQ,
&dest, 1, srcs, 2, len);
struct dma_device *dma = chan ? chan->device : NULL;
const u8 *amul, *bmul;
u8 ax, bx;
u8 *a, *b, *c;
if (dma) {
dma_addr_t dma_dest[2];
dma_addr_t dma_src[2];
struct device *dev = dma->dev;
struct dma_async_tx_descriptor *tx;
enum dma_ctrl_flags dma_flags = DMA_PREP_PQ_DISABLE_P;
if (submit->flags & ASYNC_TX_FENCE)
dma_flags |= DMA_PREP_FENCE;
dma_dest[1] = dma_map_page(dev, dest, 0, len, DMA_BIDIRECTIONAL);
dma_src[0] = dma_map_page(dev, srcs[0], 0, len, DMA_TO_DEVICE);
dma_src[1] = dma_map_page(dev, srcs[1], 0, len, DMA_TO_DEVICE);
tx = dma->device_prep_dma_pq(chan, dma_dest, dma_src, 2, coef,
len, dma_flags);
if (tx) {
async_tx_submit(chan, tx, submit);
return tx;
}
/* could not get a descriptor, unmap and fall through to
* the synchronous path
*/
dma_unmap_page(dev, dma_dest[1], len, DMA_BIDIRECTIONAL);
dma_unmap_page(dev, dma_src[0], len, DMA_TO_DEVICE);
dma_unmap_page(dev, dma_src[1], len, DMA_TO_DEVICE);
}
/* run the operation synchronously */
async_tx_quiesce(&submit->depend_tx);
amul = raid6_gfmul[coef[0]];
bmul = raid6_gfmul[coef[1]];
a = page_address(srcs[0]);
b = page_address(srcs[1]);
c = page_address(dest);
while (len--) {
ax = amul[*a++];
bx = bmul[*b++];
*c++ = ax ^ bx;
}
return NULL;
}
static struct dma_async_tx_descriptor *
async_mult(struct page *dest, struct page *src, u8 coef, size_t len,
struct async_submit_ctl *submit)
{
struct dma_chan *chan = async_tx_find_channel(submit, DMA_PQ,
&dest, 1, &src, 1, len);
struct dma_device *dma = chan ? chan->device : NULL;
const u8 *qmul; /* Q multiplier table */
u8 *d, *s;
if (dma) {
dma_addr_t dma_dest[2];
dma_addr_t dma_src[1];
struct device *dev = dma->dev;
struct dma_async_tx_descriptor *tx;
enum dma_ctrl_flags dma_flags = DMA_PREP_PQ_DISABLE_P;
if (submit->flags & ASYNC_TX_FENCE)
dma_flags |= DMA_PREP_FENCE;
dma_dest[1] = dma_map_page(dev, dest, 0, len, DMA_BIDIRECTIONAL);
dma_src[0] = dma_map_page(dev, src, 0, len, DMA_TO_DEVICE);
tx = dma->device_prep_dma_pq(chan, dma_dest, dma_src, 1, &coef,
len, dma_flags);
if (tx) {
async_tx_submit(chan, tx, submit);
return tx;
}
/* could not get a descriptor, unmap and fall through to
* the synchronous path
*/
dma_unmap_page(dev, dma_dest[1], len, DMA_BIDIRECTIONAL);
dma_unmap_page(dev, dma_src[0], len, DMA_TO_DEVICE);
}
/* no channel available, or failed to allocate a descriptor, so
* perform the operation synchronously
*/
async_tx_quiesce(&submit->depend_tx);
qmul = raid6_gfmul[coef];
d = page_address(dest);
s = page_address(src);
while (len--)
*d++ = qmul[*s++];
return NULL;
}
static struct dma_async_tx_descriptor *
__2data_recov_4(size_t bytes, int faila, int failb, struct page **blocks,
struct async_submit_ctl *submit)
{
struct dma_async_tx_descriptor *tx = NULL;
struct page *p, *q, *a, *b;
struct page *srcs[2];
unsigned char coef[2];
enum async_tx_flags flags = submit->flags;
dma_async_tx_callback cb_fn = submit->cb_fn;
void *cb_param = submit->cb_param;
void *scribble = submit->scribble;
p = blocks[4-2];
q = blocks[4-1];
a = blocks[faila];
b = blocks[failb];
/* in the 4 disk case P + Pxy == P and Q + Qxy == Q */
/* Dx = A*(P+Pxy) + B*(Q+Qxy) */
srcs[0] = p;
srcs[1] = q;
coef[0] = raid6_gfexi[failb-faila];
coef[1] = raid6_gfinv[raid6_gfexp[faila]^raid6_gfexp[failb]];
init_async_submit(submit, ASYNC_TX_FENCE, tx, NULL, NULL, scribble);
tx = async_sum_product(b, srcs, coef, bytes, submit);
/* Dy = P+Pxy+Dx */
srcs[0] = p;
srcs[1] = b;
init_async_submit(submit, flags | ASYNC_TX_XOR_ZERO_DST, tx, cb_fn,
cb_param, scribble);
tx = async_xor(a, srcs, 0, 2, bytes, submit);
return tx;
}
static struct dma_async_tx_descriptor *
__2data_recov_5(size_t bytes, int faila, int failb, struct page **blocks,
struct async_submit_ctl *submit)
{
struct dma_async_tx_descriptor *tx = NULL;
struct page *p, *q, *g, *dp, *dq;
struct page *srcs[2];
unsigned char coef[2];
enum async_tx_flags flags = submit->flags;
dma_async_tx_callback cb_fn = submit->cb_fn;
void *cb_param = submit->cb_param;
void *scribble = submit->scribble;
int uninitialized_var(good);
int i;
for (i = 0; i < 3; i++) {
if (i == faila || i == failb)
continue;
else {
good = i;
break;
}
}
BUG_ON(i >= 3);
p = blocks[5-2];
q = blocks[5-1];
g = blocks[good];
/* Compute syndrome with zero for the missing data pages
* Use the dead data pages as temporary storage for delta p and
* delta q
*/
dp = blocks[faila];
dq = blocks[failb];
init_async_submit(submit, ASYNC_TX_FENCE, tx, NULL, NULL, scribble);
tx = async_memcpy(dp, g, 0, 0, bytes, submit);
init_async_submit(submit, ASYNC_TX_FENCE, tx, NULL, NULL, scribble);
tx = async_mult(dq, g, raid6_gfexp[good], bytes, submit);
/* compute P + Pxy */
srcs[0] = dp;
srcs[1] = p;
init_async_submit(submit, ASYNC_TX_FENCE|ASYNC_TX_XOR_DROP_DST, tx,
NULL, NULL, scribble);
tx = async_xor(dp, srcs, 0, 2, bytes, submit);
/* compute Q + Qxy */
srcs[0] = dq;
srcs[1] = q;
init_async_submit(submit, ASYNC_TX_FENCE|ASYNC_TX_XOR_DROP_DST, tx,
NULL, NULL, scribble);
tx = async_xor(dq, srcs, 0, 2, bytes, submit);
/* Dx = A*(P+Pxy) + B*(Q+Qxy) */
srcs[0] = dp;
srcs[1] = dq;
coef[0] = raid6_gfexi[failb-faila];
coef[1] = raid6_gfinv[raid6_gfexp[faila]^raid6_gfexp[failb]];
init_async_submit(submit, ASYNC_TX_FENCE, tx, NULL, NULL, scribble);
tx = async_sum_product(dq, srcs, coef, bytes, submit);
/* Dy = P+Pxy+Dx */
srcs[0] = dp;
srcs[1] = dq;
init_async_submit(submit, flags | ASYNC_TX_XOR_DROP_DST, tx, cb_fn,
cb_param, scribble);
tx = async_xor(dp, srcs, 0, 2, bytes, submit);
return tx;
}
static struct dma_async_tx_descriptor *
__2data_recov_n(int disks, size_t bytes, int faila, int failb,
struct page **blocks, struct async_submit_ctl *submit)
{
struct dma_async_tx_descriptor *tx = NULL;
struct page *p, *q, *dp, *dq;
struct page *srcs[2];
unsigned char coef[2];
enum async_tx_flags flags = submit->flags;
dma_async_tx_callback cb_fn = submit->cb_fn;
void *cb_param = submit->cb_param;
void *scribble = submit->scribble;
p = blocks[disks-2];
q = blocks[disks-1];
/* Compute syndrome with zero for the missing data pages
* Use the dead data pages as temporary storage for
* delta p and delta q
*/
dp = blocks[faila];
blocks[faila] = (void *)raid6_empty_zero_page;
blocks[disks-2] = dp;
dq = blocks[failb];
blocks[failb] = (void *)raid6_empty_zero_page;
blocks[disks-1] = dq;
init_async_submit(submit, ASYNC_TX_FENCE, tx, NULL, NULL, scribble);
tx = async_gen_syndrome(blocks, 0, disks, bytes, submit);
/* Restore pointer table */
blocks[faila] = dp;
blocks[failb] = dq;
blocks[disks-2] = p;
blocks[disks-1] = q;
/* compute P + Pxy */
srcs[0] = dp;
srcs[1] = p;
init_async_submit(submit, ASYNC_TX_FENCE|ASYNC_TX_XOR_DROP_DST, tx,
NULL, NULL, scribble);
tx = async_xor(dp, srcs, 0, 2, bytes, submit);
/* compute Q + Qxy */
srcs[0] = dq;
srcs[1] = q;
init_async_submit(submit, ASYNC_TX_FENCE|ASYNC_TX_XOR_DROP_DST, tx,
NULL, NULL, scribble);
tx = async_xor(dq, srcs, 0, 2, bytes, submit);
/* Dx = A*(P+Pxy) + B*(Q+Qxy) */
srcs[0] = dp;
srcs[1] = dq;
coef[0] = raid6_gfexi[failb-faila];
coef[1] = raid6_gfinv[raid6_gfexp[faila]^raid6_gfexp[failb]];
init_async_submit(submit, ASYNC_TX_FENCE, tx, NULL, NULL, scribble);
tx = async_sum_product(dq, srcs, coef, bytes, submit);
/* Dy = P+Pxy+Dx */
srcs[0] = dp;
srcs[1] = dq;
init_async_submit(submit, flags | ASYNC_TX_XOR_DROP_DST, tx, cb_fn,
cb_param, scribble);
tx = async_xor(dp, srcs, 0, 2, bytes, submit);
return tx;
}
/**
* async_raid6_2data_recov - asynchronously calculate two missing data blocks
* @disks: number of disks in the RAID-6 array
* @bytes: block size
* @faila: first failed drive index
* @failb: second failed drive index
* @blocks: array of source pointers where the last two entries are p and q
* @submit: submission/completion modifiers
*/
struct dma_async_tx_descriptor *
async_raid6_2data_recov(int disks, size_t bytes, int faila, int failb,
struct page **blocks, struct async_submit_ctl *submit)
{
BUG_ON(faila == failb);
if (failb < faila)
swap(faila, failb);
pr_debug("%s: disks: %d len: %zu\n", __func__, disks, bytes);
/* we need to preserve the contents of 'blocks' for the async
* case, so punt to synchronous if a scribble buffer is not available
*/
if (!submit->scribble) {
void **ptrs = (void **) blocks;
int i;
async_tx_quiesce(&submit->depend_tx);
for (i = 0; i < disks; i++)
ptrs[i] = page_address(blocks[i]);
raid6_2data_recov(disks, bytes, faila, failb, ptrs);
async_tx_sync_epilog(submit);
return NULL;
}
switch (disks) {
case 4:
/* dma devices do not uniformly understand a zero source pq
* operation (in contrast to the synchronous case), so
* explicitly handle the 4 disk special case
*/
return __2data_recov_4(bytes, faila, failb, blocks, submit);
case 5:
/* dma devices do not uniformly understand a single
* source pq operation (in contrast to the synchronous
* case), so explicitly handle the 5 disk special case
*/
return __2data_recov_5(bytes, faila, failb, blocks, submit);
default:
return __2data_recov_n(disks, bytes, faila, failb, blocks, submit);
}
}
EXPORT_SYMBOL_GPL(async_raid6_2data_recov);
/**
* async_raid6_datap_recov - asynchronously calculate a data and the 'p' block
* @disks: number of disks in the RAID-6 array
* @bytes: block size
* @faila: failed drive index
* @blocks: array of source pointers where the last two entries are p and q
* @submit: submission/completion modifiers
*/
struct dma_async_tx_descriptor *
async_raid6_datap_recov(int disks, size_t bytes, int faila,
struct page **blocks, struct async_submit_ctl *submit)
{
struct dma_async_tx_descriptor *tx = NULL;
struct page *p, *q, *dq;
u8 coef;
enum async_tx_flags flags = submit->flags;
dma_async_tx_callback cb_fn = submit->cb_fn;
void *cb_param = submit->cb_param;
void *scribble = submit->scribble;
struct page *srcs[2];
pr_debug("%s: disks: %d len: %zu\n", __func__, disks, bytes);
/* we need to preserve the contents of 'blocks' for the async
* case, so punt to synchronous if a scribble buffer is not available
*/
if (!scribble) {
void **ptrs = (void **) blocks;
int i;
async_tx_quiesce(&submit->depend_tx);
for (i = 0; i < disks; i++)
ptrs[i] = page_address(blocks[i]);
raid6_datap_recov(disks, bytes, faila, ptrs);
async_tx_sync_epilog(submit);
return NULL;
}
p = blocks[disks-2];
q = blocks[disks-1];
/* Compute syndrome with zero for the missing data page
* Use the dead data page as temporary storage for delta q
*/
dq = blocks[faila];
blocks[faila] = (void *)raid6_empty_zero_page;
blocks[disks-1] = dq;
/* in the 4 disk case we only need to perform a single source
* multiplication
*/
if (disks == 4) {
int good = faila == 0 ? 1 : 0;
struct page *g = blocks[good];
init_async_submit(submit, ASYNC_TX_FENCE, tx, NULL, NULL,
scribble);
tx = async_memcpy(p, g, 0, 0, bytes, submit);
init_async_submit(submit, ASYNC_TX_FENCE, tx, NULL, NULL,
scribble);
tx = async_mult(dq, g, raid6_gfexp[good], bytes, submit);
} else {
init_async_submit(submit, ASYNC_TX_FENCE, tx, NULL, NULL,
scribble);
tx = async_gen_syndrome(blocks, 0, disks, bytes, submit);
}
/* Restore pointer table */
blocks[faila] = dq;
blocks[disks-1] = q;
/* calculate g^{-faila} */
coef = raid6_gfinv[raid6_gfexp[faila]];
srcs[0] = dq;
srcs[1] = q;
init_async_submit(submit, ASYNC_TX_FENCE|ASYNC_TX_XOR_DROP_DST, tx,
NULL, NULL, scribble);
tx = async_xor(dq, srcs, 0, 2, bytes, submit);
init_async_submit(submit, ASYNC_TX_FENCE, tx, NULL, NULL, scribble);
tx = async_mult(dq, dq, coef, bytes, submit);
srcs[0] = p;
srcs[1] = dq;
init_async_submit(submit, flags | ASYNC_TX_XOR_DROP_DST, tx, cb_fn,
cb_param, scribble);
tx = async_xor(p, srcs, 0, 2, bytes, submit);
return tx;
}
EXPORT_SYMBOL_GPL(async_raid6_datap_recov);
MODULE_AUTHOR("Dan Williams <dan.j.williams@intel.com>");
MODULE_DESCRIPTION("asynchronous RAID-6 recovery api");
MODULE_LICENSE("GPL");

View File

@ -42,16 +42,21 @@ static void __exit async_tx_exit(void)
async_dmaengine_put();
}
module_init(async_tx_init);
module_exit(async_tx_exit);
/**
* __async_tx_find_channel - find a channel to carry out the operation or let
* the transaction execute synchronously
* @depend_tx: transaction dependency
* @submit: transaction dependency and submission modifiers
* @tx_type: transaction type
*/
struct dma_chan *
__async_tx_find_channel(struct dma_async_tx_descriptor *depend_tx,
__async_tx_find_channel(struct async_submit_ctl *submit,
enum dma_transaction_type tx_type)
{
struct dma_async_tx_descriptor *depend_tx = submit->depend_tx;
/* see if we can keep the chain on one channel */
if (depend_tx &&
dma_has_cap(tx_type, depend_tx->chan->device->cap_mask))
@ -59,17 +64,6 @@ __async_tx_find_channel(struct dma_async_tx_descriptor *depend_tx,
return async_dma_find_channel(tx_type);
}
EXPORT_SYMBOL_GPL(__async_tx_find_channel);
#else
static int __init async_tx_init(void)
{
printk(KERN_INFO "async_tx: api initialized (sync-only)\n");
return 0;
}
static void __exit async_tx_exit(void)
{
do { } while (0);
}
#endif
@ -83,10 +77,14 @@ static void
async_tx_channel_switch(struct dma_async_tx_descriptor *depend_tx,
struct dma_async_tx_descriptor *tx)
{
struct dma_chan *chan;
struct dma_device *device;
struct dma_chan *chan = depend_tx->chan;
struct dma_device *device = chan->device;
struct dma_async_tx_descriptor *intr_tx = (void *) ~0;
#ifdef CONFIG_ASYNC_TX_DISABLE_CHANNEL_SWITCH
BUG();
#endif
/* first check to see if we can still append to depend_tx */
spin_lock_bh(&depend_tx->lock);
if (depend_tx->parent && depend_tx->chan == tx->chan) {
@ -96,11 +94,11 @@ async_tx_channel_switch(struct dma_async_tx_descriptor *depend_tx,
}
spin_unlock_bh(&depend_tx->lock);
if (!intr_tx)
/* attached dependency, flush the parent channel */
if (!intr_tx) {
device->device_issue_pending(chan);
return;
chan = depend_tx->chan;
device = chan->device;
}
/* see if we can schedule an interrupt
* otherwise poll for completion
@ -134,6 +132,7 @@ async_tx_channel_switch(struct dma_async_tx_descriptor *depend_tx,
intr_tx->tx_submit(intr_tx);
async_tx_ack(intr_tx);
}
device->device_issue_pending(chan);
} else {
if (dma_wait_for_async_tx(depend_tx) == DMA_ERROR)
panic("%s: DMA_ERROR waiting for depend_tx\n",
@ -144,13 +143,14 @@ async_tx_channel_switch(struct dma_async_tx_descriptor *depend_tx,
/**
* submit_disposition - while holding depend_tx->lock we must avoid submitting
* new operations to prevent a circular locking dependency with
* drivers that already hold a channel lock when calling
* async_tx_run_dependencies.
* submit_disposition - flags for routing an incoming operation
* @ASYNC_TX_SUBMITTED: we were able to append the new operation under the lock
* @ASYNC_TX_CHANNEL_SWITCH: when the lock is dropped schedule a channel switch
* @ASYNC_TX_DIRECT_SUBMIT: when the lock is dropped submit directly
*
* while holding depend_tx->lock we must avoid submitting new operations
* to prevent a circular locking dependency with drivers that already
* hold a channel lock when calling async_tx_run_dependencies.
*/
enum submit_disposition {
ASYNC_TX_SUBMITTED,
@ -160,11 +160,12 @@ enum submit_disposition {
void
async_tx_submit(struct dma_chan *chan, struct dma_async_tx_descriptor *tx,
enum async_tx_flags flags, struct dma_async_tx_descriptor *depend_tx,
dma_async_tx_callback cb_fn, void *cb_param)
struct async_submit_ctl *submit)
{
tx->callback = cb_fn;
tx->callback_param = cb_param;
struct dma_async_tx_descriptor *depend_tx = submit->depend_tx;
tx->callback = submit->cb_fn;
tx->callback_param = submit->cb_param;
if (depend_tx) {
enum submit_disposition s;
@ -220,30 +221,29 @@ async_tx_submit(struct dma_chan *chan, struct dma_async_tx_descriptor *tx,
tx->tx_submit(tx);
}
if (flags & ASYNC_TX_ACK)
if (submit->flags & ASYNC_TX_ACK)
async_tx_ack(tx);
if (depend_tx && (flags & ASYNC_TX_DEP_ACK))
if (depend_tx)
async_tx_ack(depend_tx);
}
EXPORT_SYMBOL_GPL(async_tx_submit);
/**
* async_trigger_callback - schedules the callback function to be run after
* any dependent operations have been completed.
* @flags: ASYNC_TX_ACK, ASYNC_TX_DEP_ACK
* @depend_tx: 'callback' requires the completion of this transaction
* @cb_fn: function to call after depend_tx completes
* @cb_param: parameter to pass to the callback routine
* async_trigger_callback - schedules the callback function to be run
* @submit: submission and completion parameters
*
* honored flags: ASYNC_TX_ACK
*
* The callback is run after any dependent operations have completed.
*/
struct dma_async_tx_descriptor *
async_trigger_callback(enum async_tx_flags flags,
struct dma_async_tx_descriptor *depend_tx,
dma_async_tx_callback cb_fn, void *cb_param)
async_trigger_callback(struct async_submit_ctl *submit)
{
struct dma_chan *chan;
struct dma_device *device;
struct dma_async_tx_descriptor *tx;
struct dma_async_tx_descriptor *depend_tx = submit->depend_tx;
if (depend_tx) {
chan = depend_tx->chan;
@ -262,14 +262,14 @@ async_trigger_callback(enum async_tx_flags flags,
if (tx) {
pr_debug("%s: (async)\n", __func__);
async_tx_submit(chan, tx, flags, depend_tx, cb_fn, cb_param);
async_tx_submit(chan, tx, submit);
} else {
pr_debug("%s: (sync)\n", __func__);
/* wait for any prerequisite operations */
async_tx_quiesce(&depend_tx);
async_tx_quiesce(&submit->depend_tx);
async_tx_sync_epilog(cb_fn, cb_param);
async_tx_sync_epilog(submit);
}
return tx;
@ -295,9 +295,6 @@ void async_tx_quiesce(struct dma_async_tx_descriptor **tx)
}
EXPORT_SYMBOL_GPL(async_tx_quiesce);
module_init(async_tx_init);
module_exit(async_tx_exit);
MODULE_AUTHOR("Intel Corporation");
MODULE_DESCRIPTION("Asynchronous Bulk Memory Transactions API");
MODULE_LICENSE("GPL");

View File

@ -33,19 +33,16 @@
/* do_async_xor - dma map the pages and perform the xor with an engine */
static __async_inline struct dma_async_tx_descriptor *
do_async_xor(struct dma_chan *chan, struct page *dest, struct page **src_list,
unsigned int offset, int src_cnt, size_t len,
enum async_tx_flags flags,
struct dma_async_tx_descriptor *depend_tx,
dma_async_tx_callback cb_fn, void *cb_param)
unsigned int offset, int src_cnt, size_t len, dma_addr_t *dma_src,
struct async_submit_ctl *submit)
{
struct dma_device *dma = chan->device;
dma_addr_t *dma_src = (dma_addr_t *) src_list;
struct dma_async_tx_descriptor *tx = NULL;
int src_off = 0;
int i;
dma_async_tx_callback _cb_fn;
void *_cb_param;
enum async_tx_flags async_flags;
dma_async_tx_callback cb_fn_orig = submit->cb_fn;
void *cb_param_orig = submit->cb_param;
enum async_tx_flags flags_orig = submit->flags;
enum dma_ctrl_flags dma_flags;
int xor_src_cnt;
dma_addr_t dma_dest;
@ -63,25 +60,27 @@ do_async_xor(struct dma_chan *chan, struct page *dest, struct page **src_list,
}
while (src_cnt) {
async_flags = flags;
submit->flags = flags_orig;
dma_flags = 0;
xor_src_cnt = min(src_cnt, dma->max_xor);
xor_src_cnt = min(src_cnt, (int)dma->max_xor);
/* if we are submitting additional xors, leave the chain open,
* clear the callback parameters, and leave the destination
* buffer mapped
*/
if (src_cnt > xor_src_cnt) {
async_flags &= ~ASYNC_TX_ACK;
submit->flags &= ~ASYNC_TX_ACK;
submit->flags |= ASYNC_TX_FENCE;
dma_flags = DMA_COMPL_SKIP_DEST_UNMAP;
_cb_fn = NULL;
_cb_param = NULL;
submit->cb_fn = NULL;
submit->cb_param = NULL;
} else {
_cb_fn = cb_fn;
_cb_param = cb_param;
submit->cb_fn = cb_fn_orig;
submit->cb_param = cb_param_orig;
}
if (_cb_fn)
if (submit->cb_fn)
dma_flags |= DMA_PREP_INTERRUPT;
if (submit->flags & ASYNC_TX_FENCE)
dma_flags |= DMA_PREP_FENCE;
/* Since we have clobbered the src_list we are committed
* to doing this asynchronously. Drivers force forward progress
* in case they can not provide a descriptor
@ -90,7 +89,7 @@ do_async_xor(struct dma_chan *chan, struct page *dest, struct page **src_list,
xor_src_cnt, len, dma_flags);
if (unlikely(!tx))
async_tx_quiesce(&depend_tx);
async_tx_quiesce(&submit->depend_tx);
/* spin wait for the preceeding transactions to complete */
while (unlikely(!tx)) {
@ -101,11 +100,8 @@ do_async_xor(struct dma_chan *chan, struct page *dest, struct page **src_list,
dma_flags);
}
async_tx_submit(chan, tx, async_flags, depend_tx, _cb_fn,
_cb_param);
depend_tx = tx;
flags |= ASYNC_TX_DEP_ACK;
async_tx_submit(chan, tx, submit);
submit->depend_tx = tx;
if (src_cnt > xor_src_cnt) {
/* drop completed sources */
@ -124,23 +120,27 @@ do_async_xor(struct dma_chan *chan, struct page *dest, struct page **src_list,
static void
do_sync_xor(struct page *dest, struct page **src_list, unsigned int offset,
int src_cnt, size_t len, enum async_tx_flags flags,
dma_async_tx_callback cb_fn, void *cb_param)
int src_cnt, size_t len, struct async_submit_ctl *submit)
{
int i;
int xor_src_cnt;
int src_off = 0;
void *dest_buf;
void **srcs = (void **) src_list;
void **srcs;
/* reuse the 'src_list' array to convert to buffer pointers */
if (submit->scribble)
srcs = submit->scribble;
else
srcs = (void **) src_list;
/* convert to buffer pointers */
for (i = 0; i < src_cnt; i++)
srcs[i] = page_address(src_list[i]) + offset;
/* set destination address */
dest_buf = page_address(dest) + offset;
if (flags & ASYNC_TX_XOR_ZERO_DST)
if (submit->flags & ASYNC_TX_XOR_ZERO_DST)
memset(dest_buf, 0, len);
while (src_cnt > 0) {
@ -153,61 +153,70 @@ do_sync_xor(struct page *dest, struct page **src_list, unsigned int offset,
src_off += xor_src_cnt;
}
async_tx_sync_epilog(cb_fn, cb_param);
async_tx_sync_epilog(submit);
}
/**
* async_xor - attempt to xor a set of blocks with a dma engine.
* xor_blocks always uses the dest as a source so the ASYNC_TX_XOR_ZERO_DST
* flag must be set to not include dest data in the calculation. The
* assumption with dma eninges is that they only use the destination
* buffer as a source when it is explicity specified in the source list.
* @dest: destination page
* @src_list: array of source pages (if the dest is also a source it must be
* at index zero). The contents of this array may be overwritten.
* @offset: offset in pages to start transaction
* @src_list: array of source pages
* @offset: common src/dst offset to start transaction
* @src_cnt: number of source pages
* @len: length in bytes
* @flags: ASYNC_TX_XOR_ZERO_DST, ASYNC_TX_XOR_DROP_DEST,
* ASYNC_TX_ACK, ASYNC_TX_DEP_ACK
* @depend_tx: xor depends on the result of this transaction.
* @cb_fn: function to call when the xor completes
* @cb_param: parameter to pass to the callback routine
* @submit: submission / completion modifiers
*
* honored flags: ASYNC_TX_ACK, ASYNC_TX_XOR_ZERO_DST, ASYNC_TX_XOR_DROP_DST
*
* xor_blocks always uses the dest as a source so the
* ASYNC_TX_XOR_ZERO_DST flag must be set to not include dest data in
* the calculation. The assumption with dma eninges is that they only
* use the destination buffer as a source when it is explicity specified
* in the source list.
*
* src_list note: if the dest is also a source it must be at index zero.
* The contents of this array will be overwritten if a scribble region
* is not specified.
*/
struct dma_async_tx_descriptor *
async_xor(struct page *dest, struct page **src_list, unsigned int offset,
int src_cnt, size_t len, enum async_tx_flags flags,
struct dma_async_tx_descriptor *depend_tx,
dma_async_tx_callback cb_fn, void *cb_param)
int src_cnt, size_t len, struct async_submit_ctl *submit)
{
struct dma_chan *chan = async_tx_find_channel(depend_tx, DMA_XOR,
struct dma_chan *chan = async_tx_find_channel(submit, DMA_XOR,
&dest, 1, src_list,
src_cnt, len);
dma_addr_t *dma_src = NULL;
BUG_ON(src_cnt <= 1);
if (chan) {
if (submit->scribble)
dma_src = submit->scribble;
else if (sizeof(dma_addr_t) <= sizeof(struct page *))
dma_src = (dma_addr_t *) src_list;
if (dma_src && chan && is_dma_xor_aligned(chan->device, offset, 0, len)) {
/* run the xor asynchronously */
pr_debug("%s (async): len: %zu\n", __func__, len);
return do_async_xor(chan, dest, src_list, offset, src_cnt, len,
flags, depend_tx, cb_fn, cb_param);
dma_src, submit);
} else {
/* run the xor synchronously */
pr_debug("%s (sync): len: %zu\n", __func__, len);
WARN_ONCE(chan, "%s: no space for dma address conversion\n",
__func__);
/* in the sync case the dest is an implied source
* (assumes the dest is the first source)
*/
if (flags & ASYNC_TX_XOR_DROP_DST) {
if (submit->flags & ASYNC_TX_XOR_DROP_DST) {
src_cnt--;
src_list++;
}
/* wait for any prerequisite operations */
async_tx_quiesce(&depend_tx);
async_tx_quiesce(&submit->depend_tx);
do_sync_xor(dest, src_list, offset, src_cnt, len,
flags, cb_fn, cb_param);
do_sync_xor(dest, src_list, offset, src_cnt, len, submit);
return NULL;
}
@ -222,104 +231,94 @@ static int page_is_zero(struct page *p, unsigned int offset, size_t len)
}
/**
* async_xor_zero_sum - attempt a xor parity check with a dma engine.
* async_xor_val - attempt a xor parity check with a dma engine.
* @dest: destination page used if the xor is performed synchronously
* @src_list: array of source pages. The dest page must be listed as a source
* at index zero. The contents of this array may be overwritten.
* @src_list: array of source pages
* @offset: offset in pages to start transaction
* @src_cnt: number of source pages
* @len: length in bytes
* @result: 0 if sum == 0 else non-zero
* @flags: ASYNC_TX_ACK, ASYNC_TX_DEP_ACK
* @depend_tx: xor depends on the result of this transaction.
* @cb_fn: function to call when the xor completes
* @cb_param: parameter to pass to the callback routine
* @submit: submission / completion modifiers
*
* honored flags: ASYNC_TX_ACK
*
* src_list note: if the dest is also a source it must be at index zero.
* The contents of this array will be overwritten if a scribble region
* is not specified.
*/
struct dma_async_tx_descriptor *
async_xor_zero_sum(struct page *dest, struct page **src_list,
unsigned int offset, int src_cnt, size_t len,
u32 *result, enum async_tx_flags flags,
struct dma_async_tx_descriptor *depend_tx,
dma_async_tx_callback cb_fn, void *cb_param)
async_xor_val(struct page *dest, struct page **src_list, unsigned int offset,
int src_cnt, size_t len, enum sum_check_flags *result,
struct async_submit_ctl *submit)
{
struct dma_chan *chan = async_tx_find_channel(depend_tx, DMA_ZERO_SUM,
struct dma_chan *chan = async_tx_find_channel(submit, DMA_XOR_VAL,
&dest, 1, src_list,
src_cnt, len);
struct dma_device *device = chan ? chan->device : NULL;
struct dma_async_tx_descriptor *tx = NULL;
dma_addr_t *dma_src = NULL;
BUG_ON(src_cnt <= 1);
if (device && src_cnt <= device->max_xor) {
dma_addr_t *dma_src = (dma_addr_t *) src_list;
unsigned long dma_prep_flags = cb_fn ? DMA_PREP_INTERRUPT : 0;
if (submit->scribble)
dma_src = submit->scribble;
else if (sizeof(dma_addr_t) <= sizeof(struct page *))
dma_src = (dma_addr_t *) src_list;
if (dma_src && device && src_cnt <= device->max_xor &&
is_dma_xor_aligned(device, offset, 0, len)) {
unsigned long dma_prep_flags = 0;
int i;
pr_debug("%s: (async) len: %zu\n", __func__, len);
if (submit->cb_fn)
dma_prep_flags |= DMA_PREP_INTERRUPT;
if (submit->flags & ASYNC_TX_FENCE)
dma_prep_flags |= DMA_PREP_FENCE;
for (i = 0; i < src_cnt; i++)
dma_src[i] = dma_map_page(device->dev, src_list[i],
offset, len, DMA_TO_DEVICE);
tx = device->device_prep_dma_zero_sum(chan, dma_src, src_cnt,
tx = device->device_prep_dma_xor_val(chan, dma_src, src_cnt,
len, result,
dma_prep_flags);
if (unlikely(!tx)) {
async_tx_quiesce(&depend_tx);
async_tx_quiesce(&submit->depend_tx);
while (!tx) {
dma_async_issue_pending(chan);
tx = device->device_prep_dma_zero_sum(chan,
tx = device->device_prep_dma_xor_val(chan,
dma_src, src_cnt, len, result,
dma_prep_flags);
}
}
async_tx_submit(chan, tx, flags, depend_tx, cb_fn, cb_param);
async_tx_submit(chan, tx, submit);
} else {
unsigned long xor_flags = flags;
enum async_tx_flags flags_orig = submit->flags;
pr_debug("%s: (sync) len: %zu\n", __func__, len);
WARN_ONCE(device && src_cnt <= device->max_xor,
"%s: no space for dma address conversion\n",
__func__);
xor_flags |= ASYNC_TX_XOR_DROP_DST;
xor_flags &= ~ASYNC_TX_ACK;
submit->flags |= ASYNC_TX_XOR_DROP_DST;
submit->flags &= ~ASYNC_TX_ACK;
tx = async_xor(dest, src_list, offset, src_cnt, len, xor_flags,
depend_tx, NULL, NULL);
tx = async_xor(dest, src_list, offset, src_cnt, len, submit);
async_tx_quiesce(&tx);
*result = page_is_zero(dest, offset, len) ? 0 : 1;
*result = !page_is_zero(dest, offset, len) << SUM_CHECK_P;
async_tx_sync_epilog(cb_fn, cb_param);
async_tx_sync_epilog(submit);
submit->flags = flags_orig;
}
return tx;
}
EXPORT_SYMBOL_GPL(async_xor_zero_sum);
static int __init async_xor_init(void)
{
#ifdef CONFIG_ASYNC_TX_DMA
/* To conserve stack space the input src_list (array of page pointers)
* is reused to hold the array of dma addresses passed to the driver.
* This conversion is only possible when dma_addr_t is less than the
* the size of a pointer. HIGHMEM64G is known to violate this
* assumption.
*/
BUILD_BUG_ON(sizeof(dma_addr_t) > sizeof(struct page *));
#endif
return 0;
}
static void __exit async_xor_exit(void)
{
do { } while (0);
}
module_init(async_xor_init);
module_exit(async_xor_exit);
EXPORT_SYMBOL_GPL(async_xor_val);
MODULE_AUTHOR("Intel Corporation");
MODULE_DESCRIPTION("asynchronous xor/xor-zero-sum api");

240
crypto/async_tx/raid6test.c Normal file
View File

@ -0,0 +1,240 @@
/*
* asynchronous raid6 recovery self test
* Copyright (c) 2009, Intel Corporation.
*
* based on drivers/md/raid6test/test.c:
* Copyright 2002-2007 H. Peter Anvin
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License along with
* this program; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
*
*/
#include <linux/async_tx.h>
#include <linux/random.h>
#undef pr
#define pr(fmt, args...) pr_info("raid6test: " fmt, ##args)
#define NDISKS 16 /* Including P and Q */
static struct page *dataptrs[NDISKS];
static addr_conv_t addr_conv[NDISKS];
static struct page *data[NDISKS+3];
static struct page *spare;
static struct page *recovi;
static struct page *recovj;
static void callback(void *param)
{
struct completion *cmp = param;
complete(cmp);
}
static void makedata(int disks)
{
int i, j;
for (i = 0; i < disks; i++) {
for (j = 0; j < PAGE_SIZE/sizeof(u32); j += sizeof(u32)) {
u32 *p = page_address(data[i]) + j;
*p = random32();
}
dataptrs[i] = data[i];
}
}
static char disk_type(int d, int disks)
{
if (d == disks - 2)
return 'P';
else if (d == disks - 1)
return 'Q';
else
return 'D';
}
/* Recover two failed blocks. */
static void raid6_dual_recov(int disks, size_t bytes, int faila, int failb, struct page **ptrs)
{
struct async_submit_ctl submit;
struct completion cmp;
struct dma_async_tx_descriptor *tx = NULL;
enum sum_check_flags result = ~0;
if (faila > failb)
swap(faila, failb);
if (failb == disks-1) {
if (faila == disks-2) {
/* P+Q failure. Just rebuild the syndrome. */
init_async_submit(&submit, 0, NULL, NULL, NULL, addr_conv);
tx = async_gen_syndrome(ptrs, 0, disks, bytes, &submit);
} else {
struct page *blocks[disks];
struct page *dest;
int count = 0;
int i;
/* data+Q failure. Reconstruct data from P,
* then rebuild syndrome
*/
for (i = disks; i-- ; ) {
if (i == faila || i == failb)
continue;
blocks[count++] = ptrs[i];
}
dest = ptrs[faila];
init_async_submit(&submit, ASYNC_TX_XOR_ZERO_DST, NULL,
NULL, NULL, addr_conv);
tx = async_xor(dest, blocks, 0, count, bytes, &submit);
init_async_submit(&submit, 0, tx, NULL, NULL, addr_conv);
tx = async_gen_syndrome(ptrs, 0, disks, bytes, &submit);
}
} else {
if (failb == disks-2) {
/* data+P failure. */
init_async_submit(&submit, 0, NULL, NULL, NULL, addr_conv);
tx = async_raid6_datap_recov(disks, bytes, faila, ptrs, &submit);
} else {
/* data+data failure. */
init_async_submit(&submit, 0, NULL, NULL, NULL, addr_conv);
tx = async_raid6_2data_recov(disks, bytes, faila, failb, ptrs, &submit);
}
}
init_completion(&cmp);
init_async_submit(&submit, ASYNC_TX_ACK, tx, callback, &cmp, addr_conv);
tx = async_syndrome_val(ptrs, 0, disks, bytes, &result, spare, &submit);
async_tx_issue_pending(tx);
if (wait_for_completion_timeout(&cmp, msecs_to_jiffies(3000)) == 0)
pr("%s: timeout! (faila: %d failb: %d disks: %d)\n",
__func__, faila, failb, disks);
if (result != 0)
pr("%s: validation failure! faila: %d failb: %d sum_check_flags: %x\n",
__func__, faila, failb, result);
}
static int test_disks(int i, int j, int disks)
{
int erra, errb;
memset(page_address(recovi), 0xf0, PAGE_SIZE);
memset(page_address(recovj), 0xba, PAGE_SIZE);
dataptrs[i] = recovi;
dataptrs[j] = recovj;
raid6_dual_recov(disks, PAGE_SIZE, i, j, dataptrs);
erra = memcmp(page_address(data[i]), page_address(recovi), PAGE_SIZE);
errb = memcmp(page_address(data[j]), page_address(recovj), PAGE_SIZE);
pr("%s(%d, %d): faila=%3d(%c) failb=%3d(%c) %s\n",
__func__, i, j, i, disk_type(i, disks), j, disk_type(j, disks),
(!erra && !errb) ? "OK" : !erra ? "ERRB" : !errb ? "ERRA" : "ERRAB");
dataptrs[i] = data[i];
dataptrs[j] = data[j];
return erra || errb;
}
static int test(int disks, int *tests)
{
struct dma_async_tx_descriptor *tx;
struct async_submit_ctl submit;
struct completion cmp;
int err = 0;
int i, j;
recovi = data[disks];
recovj = data[disks+1];
spare = data[disks+2];
makedata(disks);
/* Nuke syndromes */
memset(page_address(data[disks-2]), 0xee, PAGE_SIZE);
memset(page_address(data[disks-1]), 0xee, PAGE_SIZE);
/* Generate assumed good syndrome */
init_completion(&cmp);
init_async_submit(&submit, ASYNC_TX_ACK, NULL, callback, &cmp, addr_conv);
tx = async_gen_syndrome(dataptrs, 0, disks, PAGE_SIZE, &submit);
async_tx_issue_pending(tx);
if (wait_for_completion_timeout(&cmp, msecs_to_jiffies(3000)) == 0) {
pr("error: initial gen_syndrome(%d) timed out\n", disks);
return 1;
}
pr("testing the %d-disk case...\n", disks);
for (i = 0; i < disks-1; i++)
for (j = i+1; j < disks; j++) {
(*tests)++;
err += test_disks(i, j, disks);
}
return err;
}
static int raid6_test(void)
{
int err = 0;
int tests = 0;
int i;
for (i = 0; i < NDISKS+3; i++) {
data[i] = alloc_page(GFP_KERNEL);
if (!data[i]) {
while (i--)
put_page(data[i]);
return -ENOMEM;
}
}
/* the 4-disk and 5-disk cases are special for the recovery code */
if (NDISKS > 4)
err += test(4, &tests);
if (NDISKS > 5)
err += test(5, &tests);
err += test(NDISKS, &tests);
pr("\n");
pr("complete (%d tests, %d failure%s)\n",
tests, err, err == 1 ? "" : "s");
for (i = 0; i < NDISKS+3; i++)
put_page(data[i]);
return 0;
}
static void raid6_test_exit(void)
{
}
/* when compiled-in wait for drivers to load first (assumes dma drivers
* are also compliled-in)
*/
late_initcall(raid6_test);
module_exit(raid6_test_exit);
MODULE_AUTHOR("Dan Williams <dan.j.williams@intel.com>");
MODULE_DESCRIPTION("asynchronous RAID-6 recovery self tests");
MODULE_LICENSE("GPL");

View File

@ -28,7 +28,7 @@
#include <linux/device.h>
#include <linux/dca.h>
#define DCA_VERSION "1.8"
#define DCA_VERSION "1.12.1"
MODULE_VERSION(DCA_VERSION);
MODULE_LICENSE("GPL");
@ -36,20 +36,92 @@ MODULE_AUTHOR("Intel Corporation");
static DEFINE_SPINLOCK(dca_lock);
static LIST_HEAD(dca_providers);
static LIST_HEAD(dca_domains);
static struct pci_bus *dca_pci_rc_from_dev(struct device *dev)
{
struct pci_dev *pdev = to_pci_dev(dev);
struct pci_bus *bus = pdev->bus;
while (bus->parent)
bus = bus->parent;
return bus;
}
static struct dca_domain *dca_allocate_domain(struct pci_bus *rc)
{
struct dca_domain *domain;
domain = kzalloc(sizeof(*domain), GFP_NOWAIT);
if (!domain)
return NULL;
INIT_LIST_HEAD(&domain->dca_providers);
domain->pci_rc = rc;
return domain;
}
static void dca_free_domain(struct dca_domain *domain)
{
list_del(&domain->node);
kfree(domain);
}
static struct dca_domain *dca_find_domain(struct pci_bus *rc)
{
struct dca_domain *domain;
list_for_each_entry(domain, &dca_domains, node)
if (domain->pci_rc == rc)
return domain;
return NULL;
}
static struct dca_domain *dca_get_domain(struct device *dev)
{
struct pci_bus *rc;
struct dca_domain *domain;
rc = dca_pci_rc_from_dev(dev);
domain = dca_find_domain(rc);
if (!domain) {
domain = dca_allocate_domain(rc);
if (domain)
list_add(&domain->node, &dca_domains);
}
return domain;
}
static struct dca_provider *dca_find_provider_by_dev(struct device *dev)
{
struct dca_provider *dca, *ret = NULL;
struct dca_provider *dca;
struct pci_bus *rc;
struct dca_domain *domain;
list_for_each_entry(dca, &dca_providers, node) {
if ((!dev) || (dca->ops->dev_managed(dca, dev))) {
ret = dca;
break;
}
if (dev) {
rc = dca_pci_rc_from_dev(dev);
domain = dca_find_domain(rc);
if (!domain)
return NULL;
} else {
if (!list_empty(&dca_domains))
domain = list_first_entry(&dca_domains,
struct dca_domain,
node);
else
return NULL;
}
return ret;
list_for_each_entry(dca, &domain->dca_providers, node)
if ((!dev) || (dca->ops->dev_managed(dca, dev)))
return dca;
return NULL;
}
/**
@ -61,6 +133,8 @@ int dca_add_requester(struct device *dev)
struct dca_provider *dca;
int err, slot = -ENODEV;
unsigned long flags;
struct pci_bus *pci_rc;
struct dca_domain *domain;
if (!dev)
return -EFAULT;
@ -74,7 +148,14 @@ int dca_add_requester(struct device *dev)
return -EEXIST;
}
list_for_each_entry(dca, &dca_providers, node) {
pci_rc = dca_pci_rc_from_dev(dev);
domain = dca_find_domain(pci_rc);
if (!domain) {
spin_unlock_irqrestore(&dca_lock, flags);
return -ENODEV;
}
list_for_each_entry(dca, &domain->dca_providers, node) {
slot = dca->ops->add_requester(dca, dev);
if (slot >= 0)
break;
@ -222,13 +303,19 @@ int register_dca_provider(struct dca_provider *dca, struct device *dev)
{
int err;
unsigned long flags;
struct dca_domain *domain;
err = dca_sysfs_add_provider(dca, dev);
if (err)
return err;
spin_lock_irqsave(&dca_lock, flags);
list_add(&dca->node, &dca_providers);
domain = dca_get_domain(dev);
if (!domain) {
spin_unlock_irqrestore(&dca_lock, flags);
return -ENODEV;
}
list_add(&dca->node, &domain->dca_providers);
spin_unlock_irqrestore(&dca_lock, flags);
blocking_notifier_call_chain(&dca_provider_chain,
@ -241,15 +328,24 @@ EXPORT_SYMBOL_GPL(register_dca_provider);
* unregister_dca_provider - remove a dca provider
* @dca - struct created by alloc_dca_provider()
*/
void unregister_dca_provider(struct dca_provider *dca)
void unregister_dca_provider(struct dca_provider *dca, struct device *dev)
{
unsigned long flags;
struct pci_bus *pci_rc;
struct dca_domain *domain;
blocking_notifier_call_chain(&dca_provider_chain,
DCA_PROVIDER_REMOVE, NULL);
spin_lock_irqsave(&dca_lock, flags);
list_del(&dca->node);
pci_rc = dca_pci_rc_from_dev(dev);
domain = dca_find_domain(pci_rc);
if (list_empty(&domain->dca_providers))
dca_free_domain(domain);
spin_unlock_irqrestore(&dca_lock, flags);
dca_sysfs_remove_provider(dca);
@ -276,7 +372,7 @@ EXPORT_SYMBOL_GPL(dca_unregister_notify);
static int __init dca_init(void)
{
printk(KERN_ERR "dca service started, version %s\n", DCA_VERSION);
pr_info("dca service started, version %s\n", DCA_VERSION);
return dca_sysfs_init();
}

View File

@ -17,11 +17,15 @@ if DMADEVICES
comment "DMA Devices"
config ASYNC_TX_DISABLE_CHANNEL_SWITCH
bool
config INTEL_IOATDMA
tristate "Intel I/OAT DMA support"
depends on PCI && X86
select DMA_ENGINE
select DCA
select ASYNC_TX_DISABLE_CHANNEL_SWITCH
help
Enable support for the Intel(R) I/OAT DMA engine present
in recent Intel Xeon chipsets.
@ -97,6 +101,14 @@ config TXX9_DMAC
Support the TXx9 SoC internal DMA controller. This can be
integrated in chips such as the Toshiba TX4927/38/39.
config SH_DMAE
tristate "Renesas SuperH DMAC support"
depends on SUPERH && SH_DMA
depends on !SH_DMA_API
select DMA_ENGINE
help
Enable support for the Renesas SuperH DMA controllers.
config DMA_ENGINE
bool
@ -116,7 +128,7 @@ config NET_DMA
config ASYNC_TX_DMA
bool "Async_tx: Offload support for the async_tx api"
depends on DMA_ENGINE && !HIGHMEM64G
depends on DMA_ENGINE
help
This allows the async_tx api to take advantage of offload engines for
memcpy, memset, xor, and raid6 p+q operations. If your platform has

View File

@ -1,8 +1,7 @@
obj-$(CONFIG_DMA_ENGINE) += dmaengine.o
obj-$(CONFIG_NET_DMA) += iovlock.o
obj-$(CONFIG_DMATEST) += dmatest.o
obj-$(CONFIG_INTEL_IOATDMA) += ioatdma.o
ioatdma-objs := ioat.o ioat_dma.o ioat_dca.o
obj-$(CONFIG_INTEL_IOATDMA) += ioat/
obj-$(CONFIG_INTEL_IOP_ADMA) += iop-adma.o
obj-$(CONFIG_FSL_DMA) += fsldma.o
obj-$(CONFIG_MV_XOR) += mv_xor.o
@ -10,3 +9,4 @@ obj-$(CONFIG_DW_DMAC) += dw_dmac.o
obj-$(CONFIG_AT_HDMAC) += at_hdmac.o
obj-$(CONFIG_MX3_IPU) += ipu/
obj-$(CONFIG_TXX9_DMAC) += txx9dmac.o
obj-$(CONFIG_SH_DMAE) += shdma.o

View File

@ -87,6 +87,7 @@ static struct at_desc *atc_alloc_descriptor(struct dma_chan *chan,
desc = dma_pool_alloc(atdma->dma_desc_pool, gfp_flags, &phys);
if (desc) {
memset(desc, 0, sizeof(struct at_desc));
INIT_LIST_HEAD(&desc->tx_list);
dma_async_tx_descriptor_init(&desc->txd, chan);
/* txd.flags will be overwritten in prep functions */
desc->txd.flags = DMA_CTRL_ACK;
@ -150,11 +151,11 @@ static void atc_desc_put(struct at_dma_chan *atchan, struct at_desc *desc)
struct at_desc *child;
spin_lock_bh(&atchan->lock);
list_for_each_entry(child, &desc->txd.tx_list, desc_node)
list_for_each_entry(child, &desc->tx_list, desc_node)
dev_vdbg(chan2dev(&atchan->chan_common),
"moving child desc %p to freelist\n",
child);
list_splice_init(&desc->txd.tx_list, &atchan->free_list);
list_splice_init(&desc->tx_list, &atchan->free_list);
dev_vdbg(chan2dev(&atchan->chan_common),
"moving desc %p to freelist\n", desc);
list_add(&desc->desc_node, &atchan->free_list);
@ -247,31 +248,34 @@ atc_chain_complete(struct at_dma_chan *atchan, struct at_desc *desc)
param = txd->callback_param;
/* move children to free_list */
list_splice_init(&txd->tx_list, &atchan->free_list);
list_splice_init(&desc->tx_list, &atchan->free_list);
/* move myself to free_list */
list_move(&desc->desc_node, &atchan->free_list);
/* unmap dma addresses */
if (!atchan->chan_common.private) {
struct device *parent = chan2parent(&atchan->chan_common);
if (!(txd->flags & DMA_COMPL_SKIP_DEST_UNMAP)) {
if (txd->flags & DMA_COMPL_DEST_UNMAP_SINGLE)
dma_unmap_single(chan2parent(&atchan->chan_common),
dma_unmap_single(parent,
desc->lli.daddr,
desc->len, DMA_FROM_DEVICE);
else
dma_unmap_page(chan2parent(&atchan->chan_common),
dma_unmap_page(parent,
desc->lli.daddr,
desc->len, DMA_FROM_DEVICE);
}
if (!(txd->flags & DMA_COMPL_SKIP_SRC_UNMAP)) {
if (txd->flags & DMA_COMPL_SRC_UNMAP_SINGLE)
dma_unmap_single(chan2parent(&atchan->chan_common),
dma_unmap_single(parent,
desc->lli.saddr,
desc->len, DMA_TO_DEVICE);
else
dma_unmap_page(chan2parent(&atchan->chan_common),
dma_unmap_page(parent,
desc->lli.saddr,
desc->len, DMA_TO_DEVICE);
}
}
/*
* The API requires that no submissions are done from a
@ -334,7 +338,7 @@ static void atc_cleanup_descriptors(struct at_dma_chan *atchan)
/* This one is currently in progress */
return;
list_for_each_entry(child, &desc->txd.tx_list, desc_node)
list_for_each_entry(child, &desc->tx_list, desc_node)
if (!(child->lli.ctrla & ATC_DONE))
/* Currently in progress */
return;
@ -407,7 +411,7 @@ static void atc_handle_error(struct at_dma_chan *atchan)
dev_crit(chan2dev(&atchan->chan_common),
" cookie: %d\n", bad_desc->txd.cookie);
atc_dump_lli(atchan, &bad_desc->lli);
list_for_each_entry(child, &bad_desc->txd.tx_list, desc_node)
list_for_each_entry(child, &bad_desc->tx_list, desc_node)
atc_dump_lli(atchan, &child->lli);
/* Pretend the descriptor completed successfully */
@ -587,7 +591,7 @@ atc_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src,
prev->lli.dscr = desc->txd.phys;
/* insert the link descriptor to the LD ring */
list_add_tail(&desc->desc_node,
&first->txd.tx_list);
&first->tx_list);
}
prev = desc;
}
@ -646,8 +650,6 @@ atc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
reg_width = atslave->reg_width;
sg_len = dma_map_sg(chan2parent(chan), sgl, sg_len, direction);
ctrla = ATC_DEFAULT_CTRLA | atslave->ctrla;
ctrlb = ATC_DEFAULT_CTRLB | ATC_IEN;
@ -687,7 +689,7 @@ atc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
prev->lli.dscr = desc->txd.phys;
/* insert the link descriptor to the LD ring */
list_add_tail(&desc->desc_node,
&first->txd.tx_list);
&first->tx_list);
}
prev = desc;
total_len += len;
@ -729,7 +731,7 @@ atc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
prev->lli.dscr = desc->txd.phys;
/* insert the link descriptor to the LD ring */
list_add_tail(&desc->desc_node,
&first->txd.tx_list);
&first->tx_list);
}
prev = desc;
total_len += len;

View File

@ -165,6 +165,7 @@ struct at_desc {
struct at_lli lli;
/* THEN values for driver housekeeping */
struct list_head tx_list;
struct dma_async_tx_descriptor txd;
struct list_head desc_node;
size_t len;

View File

@ -608,6 +608,40 @@ void dmaengine_put(void)
}
EXPORT_SYMBOL(dmaengine_put);
static bool device_has_all_tx_types(struct dma_device *device)
{
/* A device that satisfies this test has channels that will never cause
* an async_tx channel switch event as all possible operation types can
* be handled.
*/
#ifdef CONFIG_ASYNC_TX_DMA
if (!dma_has_cap(DMA_INTERRUPT, device->cap_mask))
return false;
#endif
#if defined(CONFIG_ASYNC_MEMCPY) || defined(CONFIG_ASYNC_MEMCPY_MODULE)
if (!dma_has_cap(DMA_MEMCPY, device->cap_mask))
return false;
#endif
#if defined(CONFIG_ASYNC_MEMSET) || defined(CONFIG_ASYNC_MEMSET_MODULE)
if (!dma_has_cap(DMA_MEMSET, device->cap_mask))
return false;
#endif
#if defined(CONFIG_ASYNC_XOR) || defined(CONFIG_ASYNC_XOR_MODULE)
if (!dma_has_cap(DMA_XOR, device->cap_mask))
return false;
#endif
#if defined(CONFIG_ASYNC_PQ) || defined(CONFIG_ASYNC_PQ_MODULE)
if (!dma_has_cap(DMA_PQ, device->cap_mask))
return false;
#endif
return true;
}
static int get_dma_id(struct dma_device *device)
{
int rc;
@ -644,8 +678,12 @@ int dma_async_device_register(struct dma_device *device)
!device->device_prep_dma_memcpy);
BUG_ON(dma_has_cap(DMA_XOR, device->cap_mask) &&
!device->device_prep_dma_xor);
BUG_ON(dma_has_cap(DMA_ZERO_SUM, device->cap_mask) &&
!device->device_prep_dma_zero_sum);
BUG_ON(dma_has_cap(DMA_XOR_VAL, device->cap_mask) &&
!device->device_prep_dma_xor_val);
BUG_ON(dma_has_cap(DMA_PQ, device->cap_mask) &&
!device->device_prep_dma_pq);
BUG_ON(dma_has_cap(DMA_PQ_VAL, device->cap_mask) &&
!device->device_prep_dma_pq_val);
BUG_ON(dma_has_cap(DMA_MEMSET, device->cap_mask) &&
!device->device_prep_dma_memset);
BUG_ON(dma_has_cap(DMA_INTERRUPT, device->cap_mask) &&
@ -661,6 +699,12 @@ int dma_async_device_register(struct dma_device *device)
BUG_ON(!device->device_issue_pending);
BUG_ON(!device->dev);
/* note: this only matters in the
* CONFIG_ASYNC_TX_DISABLE_CHANNEL_SWITCH=y case
*/
if (device_has_all_tx_types(device))
dma_cap_set(DMA_ASYNC_TX, device->cap_mask);
idr_ref = kmalloc(sizeof(*idr_ref), GFP_KERNEL);
if (!idr_ref)
return -ENOMEM;
@ -933,55 +977,29 @@ void dma_async_tx_descriptor_init(struct dma_async_tx_descriptor *tx,
{
tx->chan = chan;
spin_lock_init(&tx->lock);
INIT_LIST_HEAD(&tx->tx_list);
}
EXPORT_SYMBOL(dma_async_tx_descriptor_init);
/* dma_wait_for_async_tx - spin wait for a transaction to complete
* @tx: in-flight transaction to wait on
*
* This routine assumes that tx was obtained from a call to async_memcpy,
* async_xor, async_memset, etc which ensures that tx is "in-flight" (prepped
* and submitted). Walking the parent chain is only meant to cover for DMA
* drivers that do not implement the DMA_INTERRUPT capability and may race with
* the driver's descriptor cleanup routine.
*/
enum dma_status
dma_wait_for_async_tx(struct dma_async_tx_descriptor *tx)
{
enum dma_status status;
struct dma_async_tx_descriptor *iter;
struct dma_async_tx_descriptor *parent;
unsigned long dma_sync_wait_timeout = jiffies + msecs_to_jiffies(5000);
if (!tx)
return DMA_SUCCESS;
WARN_ONCE(tx->parent, "%s: speculatively walking dependency chain for"
" %s\n", __func__, dma_chan_name(tx->chan));
/* poll through the dependency chain, return when tx is complete */
do {
iter = tx;
/* find the root of the unsubmitted dependency chain */
do {
parent = iter->parent;
if (!parent)
break;
else
iter = parent;
} while (parent);
/* there is a small window for ->parent == NULL and
* ->cookie == -EBUSY
*/
while (iter->cookie == -EBUSY)
while (tx->cookie == -EBUSY) {
if (time_after_eq(jiffies, dma_sync_wait_timeout)) {
pr_err("%s timeout waiting for descriptor submission\n",
__func__);
return DMA_ERROR;
}
cpu_relax();
status = dma_sync_wait(iter->chan, iter->cookie);
} while (status == DMA_IN_PROGRESS || (iter != tx));
return status;
}
return dma_sync_wait(tx->chan, tx->cookie);
}
EXPORT_SYMBOL_GPL(dma_wait_for_async_tx);

View File

@ -48,6 +48,11 @@ module_param(xor_sources, uint, S_IRUGO);
MODULE_PARM_DESC(xor_sources,
"Number of xor source buffers (default: 3)");
static unsigned int pq_sources = 3;
module_param(pq_sources, uint, S_IRUGO);
MODULE_PARM_DESC(pq_sources,
"Number of p+q source buffers (default: 3)");
/*
* Initialization patterns. All bytes in the source buffer has bit 7
* set, all bytes in the destination buffer has bit 7 cleared.
@ -232,6 +237,7 @@ static int dmatest_func(void *data)
dma_cookie_t cookie;
enum dma_status status;
enum dma_ctrl_flags flags;
u8 pq_coefs[pq_sources];
int ret;
int src_cnt;
int dst_cnt;
@ -248,6 +254,11 @@ static int dmatest_func(void *data)
else if (thread->type == DMA_XOR) {
src_cnt = xor_sources | 1; /* force odd to ensure dst = src */
dst_cnt = 1;
} else if (thread->type == DMA_PQ) {
src_cnt = pq_sources | 1; /* force odd to ensure dst = src */
dst_cnt = 2;
for (i = 0; i < pq_sources; i++)
pq_coefs[i] = 1;
} else
goto err_srcs;
@ -283,6 +294,7 @@ static int dmatest_func(void *data)
dma_addr_t dma_dsts[dst_cnt];
struct completion cmp;
unsigned long tmo = msecs_to_jiffies(3000);
u8 align = 0;
total_tests++;
@ -290,6 +302,18 @@ static int dmatest_func(void *data)
src_off = dmatest_random() % (test_buf_size - len + 1);
dst_off = dmatest_random() % (test_buf_size - len + 1);
/* honor alignment restrictions */
if (thread->type == DMA_MEMCPY)
align = dev->copy_align;
else if (thread->type == DMA_XOR)
align = dev->xor_align;
else if (thread->type == DMA_PQ)
align = dev->pq_align;
len = (len >> align) << align;
src_off = (src_off >> align) << align;
dst_off = (dst_off >> align) << align;
dmatest_init_srcs(thread->srcs, src_off, len);
dmatest_init_dsts(thread->dsts, dst_off, len);
@ -306,6 +330,7 @@ static int dmatest_func(void *data)
DMA_BIDIRECTIONAL);
}
if (thread->type == DMA_MEMCPY)
tx = dev->device_prep_dma_memcpy(chan,
dma_dsts[0] + dst_off,
@ -316,6 +341,15 @@ static int dmatest_func(void *data)
dma_dsts[0] + dst_off,
dma_srcs, xor_sources,
len, flags);
else if (thread->type == DMA_PQ) {
dma_addr_t dma_pq[dst_cnt];
for (i = 0; i < dst_cnt; i++)
dma_pq[i] = dma_dsts[i] + dst_off;
tx = dev->device_prep_dma_pq(chan, dma_pq, dma_srcs,
pq_sources, pq_coefs,
len, flags);
}
if (!tx) {
for (i = 0; i < src_cnt; i++)
@ -459,6 +493,8 @@ static int dmatest_add_threads(struct dmatest_chan *dtc, enum dma_transaction_ty
op = "copy";
else if (type == DMA_XOR)
op = "xor";
else if (type == DMA_PQ)
op = "pq";
else
return -EINVAL;
@ -514,6 +550,10 @@ static int dmatest_add_channel(struct dma_chan *chan)
cnt = dmatest_add_threads(dtc, DMA_XOR);
thread_count += cnt > 0 ? cnt : 0;
}
if (dma_has_cap(DMA_PQ, dma_dev->cap_mask)) {
cnt = dmatest_add_threads(dtc, DMA_PQ);
thread_count += cnt > 0 ?: 0;
}
pr_info("dmatest: Started %u threads using %s\n",
thread_count, dma_chan_name(chan));

View File

@ -116,7 +116,7 @@ static void dwc_sync_desc_for_cpu(struct dw_dma_chan *dwc, struct dw_desc *desc)
{
struct dw_desc *child;
list_for_each_entry(child, &desc->txd.tx_list, desc_node)
list_for_each_entry(child, &desc->tx_list, desc_node)
dma_sync_single_for_cpu(chan2parent(&dwc->chan),
child->txd.phys, sizeof(child->lli),
DMA_TO_DEVICE);
@ -137,11 +137,11 @@ static void dwc_desc_put(struct dw_dma_chan *dwc, struct dw_desc *desc)
dwc_sync_desc_for_cpu(dwc, desc);
spin_lock_bh(&dwc->lock);
list_for_each_entry(child, &desc->txd.tx_list, desc_node)
list_for_each_entry(child, &desc->tx_list, desc_node)
dev_vdbg(chan2dev(&dwc->chan),
"moving child desc %p to freelist\n",
child);
list_splice_init(&desc->txd.tx_list, &dwc->free_list);
list_splice_init(&desc->tx_list, &dwc->free_list);
dev_vdbg(chan2dev(&dwc->chan), "moving desc %p to freelist\n", desc);
list_add(&desc->desc_node, &dwc->free_list);
spin_unlock_bh(&dwc->lock);
@ -209,19 +209,28 @@ dwc_descriptor_complete(struct dw_dma_chan *dwc, struct dw_desc *desc)
param = txd->callback_param;
dwc_sync_desc_for_cpu(dwc, desc);
list_splice_init(&txd->tx_list, &dwc->free_list);
list_splice_init(&desc->tx_list, &dwc->free_list);
list_move(&desc->desc_node, &dwc->free_list);
/*
* We use dma_unmap_page() regardless of how the buffers were
* mapped before they were submitted...
*/
if (!(txd->flags & DMA_COMPL_SKIP_DEST_UNMAP))
dma_unmap_page(chan2parent(&dwc->chan), desc->lli.dar,
if (!dwc->chan.private) {
struct device *parent = chan2parent(&dwc->chan);
if (!(txd->flags & DMA_COMPL_SKIP_DEST_UNMAP)) {
if (txd->flags & DMA_COMPL_DEST_UNMAP_SINGLE)
dma_unmap_single(parent, desc->lli.dar,
desc->len, DMA_FROM_DEVICE);
if (!(txd->flags & DMA_COMPL_SKIP_SRC_UNMAP))
dma_unmap_page(chan2parent(&dwc->chan), desc->lli.sar,
else
dma_unmap_page(parent, desc->lli.dar,
desc->len, DMA_FROM_DEVICE);
}
if (!(txd->flags & DMA_COMPL_SKIP_SRC_UNMAP)) {
if (txd->flags & DMA_COMPL_SRC_UNMAP_SINGLE)
dma_unmap_single(parent, desc->lli.sar,
desc->len, DMA_TO_DEVICE);
else
dma_unmap_page(parent, desc->lli.sar,
desc->len, DMA_TO_DEVICE);
}
}
/*
* The API requires that no submissions are done from a
@ -289,7 +298,7 @@ static void dwc_scan_descriptors(struct dw_dma *dw, struct dw_dma_chan *dwc)
/* This one is currently in progress */
return;
list_for_each_entry(child, &desc->txd.tx_list, desc_node)
list_for_each_entry(child, &desc->tx_list, desc_node)
if (child->lli.llp == llp)
/* Currently in progress */
return;
@ -356,7 +365,7 @@ static void dwc_handle_error(struct dw_dma *dw, struct dw_dma_chan *dwc)
dev_printk(KERN_CRIT, chan2dev(&dwc->chan),
" cookie: %d\n", bad_desc->txd.cookie);
dwc_dump_lli(dwc, &bad_desc->lli);
list_for_each_entry(child, &bad_desc->txd.tx_list, desc_node)
list_for_each_entry(child, &bad_desc->tx_list, desc_node)
dwc_dump_lli(dwc, &child->lli);
/* Pretend the descriptor completed successfully */
@ -608,7 +617,7 @@ dwc_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src,
prev->txd.phys, sizeof(prev->lli),
DMA_TO_DEVICE);
list_add_tail(&desc->desc_node,
&first->txd.tx_list);
&first->tx_list);
}
prev = desc;
}
@ -658,8 +667,6 @@ dwc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
reg_width = dws->reg_width;
prev = first = NULL;
sg_len = dma_map_sg(chan2parent(chan), sgl, sg_len, direction);
switch (direction) {
case DMA_TO_DEVICE:
ctllo = (DWC_DEFAULT_CTLLO
@ -700,7 +707,7 @@ dwc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
sizeof(prev->lli),
DMA_TO_DEVICE);
list_add_tail(&desc->desc_node,
&first->txd.tx_list);
&first->tx_list);
}
prev = desc;
total_len += len;
@ -746,7 +753,7 @@ dwc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
sizeof(prev->lli),
DMA_TO_DEVICE);
list_add_tail(&desc->desc_node,
&first->txd.tx_list);
&first->tx_list);
}
prev = desc;
total_len += len;
@ -902,6 +909,7 @@ static int dwc_alloc_chan_resources(struct dma_chan *chan)
break;
}
INIT_LIST_HEAD(&desc->tx_list);
dma_async_tx_descriptor_init(&desc->txd, chan);
desc->txd.tx_submit = dwc_tx_submit;
desc->txd.flags = DMA_CTRL_ACK;

View File

@ -217,6 +217,7 @@ struct dw_desc {
/* THEN values for driver housekeeping */
struct list_head desc_node;
struct list_head tx_list;
struct dma_async_tx_descriptor txd;
size_t len;
};

View File

@ -34,6 +34,7 @@
#include <linux/dmapool.h>
#include <linux/of_platform.h>
#include <asm/fsldma.h>
#include "fsldma.h"
static void dma_init(struct fsl_dma_chan *fsl_chan)
@ -280,28 +281,40 @@ static void fsl_chan_set_dest_loop_size(struct fsl_dma_chan *fsl_chan, int size)
}
/**
* fsl_chan_toggle_ext_pause - Toggle channel external pause status
* fsl_chan_set_request_count - Set DMA Request Count for external control
* @fsl_chan : Freescale DMA channel
* @size : Pause control size, 0 for disable external pause control.
* The maximum is 1024.
* @size : Number of bytes to transfer in a single request
*
* The Freescale DMA channel can be controlled by the external
* signal DREQ#. The pause control size is how many bytes are allowed
* to transfer before pausing the channel, after which a new assertion
* of DREQ# resumes channel operation.
* The Freescale DMA channel can be controlled by the external signal DREQ#.
* The DMA request count is how many bytes are allowed to transfer before
* pausing the channel, after which a new assertion of DREQ# resumes channel
* operation.
*
* A size of 0 disables external pause control. The maximum size is 1024.
*/
static void fsl_chan_toggle_ext_pause(struct fsl_dma_chan *fsl_chan, int size)
static void fsl_chan_set_request_count(struct fsl_dma_chan *fsl_chan, int size)
{
if (size > 1024)
return;
if (size) {
BUG_ON(size > 1024);
DMA_OUT(fsl_chan, &fsl_chan->reg_base->mr,
DMA_IN(fsl_chan, &fsl_chan->reg_base->mr, 32)
| ((__ilog2(size) << 24) & 0x0f000000),
32);
}
/**
* fsl_chan_toggle_ext_pause - Toggle channel external pause status
* @fsl_chan : Freescale DMA channel
* @enable : 0 is disabled, 1 is enabled.
*
* The Freescale DMA channel can be controlled by the external signal DREQ#.
* The DMA Request Count feature should be used in addition to this feature
* to set the number of bytes to transfer before pausing the channel.
*/
static void fsl_chan_toggle_ext_pause(struct fsl_dma_chan *fsl_chan, int enable)
{
if (enable)
fsl_chan->feature |= FSL_DMA_CHAN_PAUSE_EXT;
} else
else
fsl_chan->feature &= ~FSL_DMA_CHAN_PAUSE_EXT;
}
@ -326,7 +339,8 @@ static void fsl_chan_toggle_ext_start(struct fsl_dma_chan *fsl_chan, int enable)
static dma_cookie_t fsl_dma_tx_submit(struct dma_async_tx_descriptor *tx)
{
struct fsl_dma_chan *fsl_chan = to_fsl_chan(tx->chan);
struct fsl_desc_sw *desc;
struct fsl_desc_sw *desc = tx_to_fsl_desc(tx);
struct fsl_desc_sw *child;
unsigned long flags;
dma_cookie_t cookie;
@ -334,7 +348,7 @@ static dma_cookie_t fsl_dma_tx_submit(struct dma_async_tx_descriptor *tx)
spin_lock_irqsave(&fsl_chan->desc_lock, flags);
cookie = fsl_chan->common.cookie;
list_for_each_entry(desc, &tx->tx_list, node) {
list_for_each_entry(child, &desc->tx_list, node) {
cookie++;
if (cookie < 0)
cookie = 1;
@ -343,8 +357,8 @@ static dma_cookie_t fsl_dma_tx_submit(struct dma_async_tx_descriptor *tx)
}
fsl_chan->common.cookie = cookie;
append_ld_queue(fsl_chan, tx_to_fsl_desc(tx));
list_splice_init(&tx->tx_list, fsl_chan->ld_queue.prev);
append_ld_queue(fsl_chan, desc);
list_splice_init(&desc->tx_list, fsl_chan->ld_queue.prev);
spin_unlock_irqrestore(&fsl_chan->desc_lock, flags);
@ -366,6 +380,7 @@ static struct fsl_desc_sw *fsl_dma_alloc_descriptor(
desc_sw = dma_pool_alloc(fsl_chan->desc_pool, GFP_ATOMIC, &pdesc);
if (desc_sw) {
memset(desc_sw, 0, sizeof(struct fsl_desc_sw));
INIT_LIST_HEAD(&desc_sw->tx_list);
dma_async_tx_descriptor_init(&desc_sw->async_tx,
&fsl_chan->common);
desc_sw->async_tx.tx_submit = fsl_dma_tx_submit;
@ -455,7 +470,7 @@ fsl_dma_prep_interrupt(struct dma_chan *chan, unsigned long flags)
new->async_tx.flags = flags;
/* Insert the link descriptor to the LD ring */
list_add_tail(&new->node, &new->async_tx.tx_list);
list_add_tail(&new->node, &new->tx_list);
/* Set End-of-link to the last link descriptor of new list*/
set_ld_eol(fsl_chan, new);
@ -513,7 +528,7 @@ static struct dma_async_tx_descriptor *fsl_dma_prep_memcpy(
dma_dest += copy;
/* Insert the link descriptor to the LD ring */
list_add_tail(&new->node, &first->async_tx.tx_list);
list_add_tail(&new->node, &first->tx_list);
} while (len);
new->async_tx.flags = flags; /* client is in control of this ack */
@ -528,7 +543,7 @@ fail:
if (!first)
return NULL;
list = &first->async_tx.tx_list;
list = &first->tx_list;
list_for_each_entry_safe_reverse(new, prev, list, node) {
list_del(&new->node);
dma_pool_free(fsl_chan->desc_pool, new, new->async_tx.phys);
@ -537,6 +552,229 @@ fail:
return NULL;
}
/**
* fsl_dma_prep_slave_sg - prepare descriptors for a DMA_SLAVE transaction
* @chan: DMA channel
* @sgl: scatterlist to transfer to/from
* @sg_len: number of entries in @scatterlist
* @direction: DMA direction
* @flags: DMAEngine flags
*
* Prepare a set of descriptors for a DMA_SLAVE transaction. Following the
* DMA_SLAVE API, this gets the device-specific information from the
* chan->private variable.
*/
static struct dma_async_tx_descriptor *fsl_dma_prep_slave_sg(
struct dma_chan *chan, struct scatterlist *sgl, unsigned int sg_len,
enum dma_data_direction direction, unsigned long flags)
{
struct fsl_dma_chan *fsl_chan;
struct fsl_desc_sw *first = NULL, *prev = NULL, *new = NULL;
struct fsl_dma_slave *slave;
struct list_head *tx_list;
size_t copy;
int i;
struct scatterlist *sg;
size_t sg_used;
size_t hw_used;
struct fsl_dma_hw_addr *hw;
dma_addr_t dma_dst, dma_src;
if (!chan)
return NULL;
if (!chan->private)
return NULL;
fsl_chan = to_fsl_chan(chan);
slave = chan->private;
if (list_empty(&slave->addresses))
return NULL;
hw = list_first_entry(&slave->addresses, struct fsl_dma_hw_addr, entry);
hw_used = 0;
/*
* Build the hardware transaction to copy from the scatterlist to
* the hardware, or from the hardware to the scatterlist
*
* If you are copying from the hardware to the scatterlist and it
* takes two hardware entries to fill an entire page, then both
* hardware entries will be coalesced into the same page
*
* If you are copying from the scatterlist to the hardware and a
* single page can fill two hardware entries, then the data will
* be read out of the page into the first hardware entry, and so on
*/
for_each_sg(sgl, sg, sg_len, i) {
sg_used = 0;
/* Loop until the entire scatterlist entry is used */
while (sg_used < sg_dma_len(sg)) {
/*
* If we've used up the current hardware address/length
* pair, we need to load a new one
*
* This is done in a while loop so that descriptors with
* length == 0 will be skipped
*/
while (hw_used >= hw->length) {
/*
* If the current hardware entry is the last
* entry in the list, we're finished
*/
if (list_is_last(&hw->entry, &slave->addresses))
goto finished;
/* Get the next hardware address/length pair */
hw = list_entry(hw->entry.next,
struct fsl_dma_hw_addr, entry);
hw_used = 0;
}
/* Allocate the link descriptor from DMA pool */
new = fsl_dma_alloc_descriptor(fsl_chan);
if (!new) {
dev_err(fsl_chan->dev, "No free memory for "
"link descriptor\n");
goto fail;
}
#ifdef FSL_DMA_LD_DEBUG
dev_dbg(fsl_chan->dev, "new link desc alloc %p\n", new);
#endif
/*
* Calculate the maximum number of bytes to transfer,
* making sure it is less than the DMA controller limit
*/
copy = min_t(size_t, sg_dma_len(sg) - sg_used,
hw->length - hw_used);
copy = min_t(size_t, copy, FSL_DMA_BCR_MAX_CNT);
/*
* DMA_FROM_DEVICE
* from the hardware to the scatterlist
*
* DMA_TO_DEVICE
* from the scatterlist to the hardware
*/
if (direction == DMA_FROM_DEVICE) {
dma_src = hw->address + hw_used;
dma_dst = sg_dma_address(sg) + sg_used;
} else {
dma_src = sg_dma_address(sg) + sg_used;
dma_dst = hw->address + hw_used;
}
/* Fill in the descriptor */
set_desc_cnt(fsl_chan, &new->hw, copy);
set_desc_src(fsl_chan, &new->hw, dma_src);
set_desc_dest(fsl_chan, &new->hw, dma_dst);
/*
* If this is not the first descriptor, chain the
* current descriptor after the previous descriptor
*/
if (!first) {
first = new;
} else {
set_desc_next(fsl_chan, &prev->hw,
new->async_tx.phys);
}
new->async_tx.cookie = 0;
async_tx_ack(&new->async_tx);
prev = new;
sg_used += copy;
hw_used += copy;
/* Insert the link descriptor into the LD ring */
list_add_tail(&new->node, &first->tx_list);
}
}
finished:
/* All of the hardware address/length pairs had length == 0 */
if (!first || !new)
return NULL;
new->async_tx.flags = flags;
new->async_tx.cookie = -EBUSY;
/* Set End-of-link to the last link descriptor of new list */
set_ld_eol(fsl_chan, new);
/* Enable extra controller features */
if (fsl_chan->set_src_loop_size)
fsl_chan->set_src_loop_size(fsl_chan, slave->src_loop_size);
if (fsl_chan->set_dest_loop_size)
fsl_chan->set_dest_loop_size(fsl_chan, slave->dst_loop_size);
if (fsl_chan->toggle_ext_start)
fsl_chan->toggle_ext_start(fsl_chan, slave->external_start);
if (fsl_chan->toggle_ext_pause)
fsl_chan->toggle_ext_pause(fsl_chan, slave->external_pause);
if (fsl_chan->set_request_count)
fsl_chan->set_request_count(fsl_chan, slave->request_count);
return &first->async_tx;
fail:
/* If first was not set, then we failed to allocate the very first
* descriptor, and we're done */
if (!first)
return NULL;
/*
* First is set, so all of the descriptors we allocated have been added
* to first->tx_list, INCLUDING "first" itself. Therefore we
* must traverse the list backwards freeing each descriptor in turn
*
* We're re-using variables for the loop, oh well
*/
tx_list = &first->tx_list;
list_for_each_entry_safe_reverse(new, prev, tx_list, node) {
list_del_init(&new->node);
dma_pool_free(fsl_chan->desc_pool, new, new->async_tx.phys);
}
return NULL;
}
static void fsl_dma_device_terminate_all(struct dma_chan *chan)
{
struct fsl_dma_chan *fsl_chan;
struct fsl_desc_sw *desc, *tmp;
unsigned long flags;
if (!chan)
return;
fsl_chan = to_fsl_chan(chan);
/* Halt the DMA engine */
dma_halt(fsl_chan);
spin_lock_irqsave(&fsl_chan->desc_lock, flags);
/* Remove and free all of the descriptors in the LD queue */
list_for_each_entry_safe(desc, tmp, &fsl_chan->ld_queue, node) {
list_del(&desc->node);
dma_pool_free(fsl_chan->desc_pool, desc, desc->async_tx.phys);
}
spin_unlock_irqrestore(&fsl_chan->desc_lock, flags);
}
/**
* fsl_dma_update_completed_cookie - Update the completed cookie.
* @fsl_chan : Freescale DMA channel
@ -883,6 +1121,7 @@ static int __devinit fsl_dma_chan_probe(struct fsl_dma_device *fdev,
new_fsl_chan->toggle_ext_start = fsl_chan_toggle_ext_start;
new_fsl_chan->set_src_loop_size = fsl_chan_set_src_loop_size;
new_fsl_chan->set_dest_loop_size = fsl_chan_set_dest_loop_size;
new_fsl_chan->set_request_count = fsl_chan_set_request_count;
}
spin_lock_init(&new_fsl_chan->desc_lock);
@ -962,12 +1201,15 @@ static int __devinit of_fsl_dma_probe(struct of_device *dev,
dma_cap_set(DMA_MEMCPY, fdev->common.cap_mask);
dma_cap_set(DMA_INTERRUPT, fdev->common.cap_mask);
dma_cap_set(DMA_SLAVE, fdev->common.cap_mask);
fdev->common.device_alloc_chan_resources = fsl_dma_alloc_chan_resources;
fdev->common.device_free_chan_resources = fsl_dma_free_chan_resources;
fdev->common.device_prep_dma_interrupt = fsl_dma_prep_interrupt;
fdev->common.device_prep_dma_memcpy = fsl_dma_prep_memcpy;
fdev->common.device_is_tx_complete = fsl_dma_is_complete;
fdev->common.device_issue_pending = fsl_dma_memcpy_issue_pending;
fdev->common.device_prep_slave_sg = fsl_dma_prep_slave_sg;
fdev->common.device_terminate_all = fsl_dma_device_terminate_all;
fdev->common.dev = &dev->dev;
fdev->irq = irq_of_parse_and_map(dev->node, 0);

View File

@ -90,6 +90,7 @@ struct fsl_dma_ld_hw {
struct fsl_desc_sw {
struct fsl_dma_ld_hw hw;
struct list_head node;
struct list_head tx_list;
struct dma_async_tx_descriptor async_tx;
struct list_head *ld;
void *priv;
@ -143,10 +144,11 @@ struct fsl_dma_chan {
struct tasklet_struct tasklet;
u32 feature;
void (*toggle_ext_pause)(struct fsl_dma_chan *fsl_chan, int size);
void (*toggle_ext_pause)(struct fsl_dma_chan *fsl_chan, int enable);
void (*toggle_ext_start)(struct fsl_dma_chan *fsl_chan, int enable);
void (*set_src_loop_size)(struct fsl_dma_chan *fsl_chan, int size);
void (*set_dest_loop_size)(struct fsl_dma_chan *fsl_chan, int size);
void (*set_request_count)(struct fsl_dma_chan *fsl_chan, int size);
};
#define to_fsl_chan(chan) container_of(chan, struct fsl_dma_chan, common)

View File

@ -1,202 +0,0 @@
/*
* Intel I/OAT DMA Linux driver
* Copyright(c) 2007 - 2009 Intel Corporation.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License along with
* this program; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
*
* The full GNU General Public License is included in this distribution in
* the file called "COPYING".
*
*/
/*
* This driver supports an Intel I/OAT DMA engine, which does asynchronous
* copy operations.
*/
#include <linux/init.h>
#include <linux/module.h>
#include <linux/pci.h>
#include <linux/interrupt.h>
#include <linux/dca.h>
#include "ioatdma.h"
#include "ioatdma_registers.h"
#include "ioatdma_hw.h"
MODULE_VERSION(IOAT_DMA_VERSION);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Intel Corporation");
static struct pci_device_id ioat_pci_tbl[] = {
/* I/OAT v1 platforms */
{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT) },
{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_CNB) },
{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_SCNB) },
{ PCI_DEVICE(PCI_VENDOR_ID_UNISYS, PCI_DEVICE_ID_UNISYS_DMA_DIRECTOR) },
/* I/OAT v2 platforms */
{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB) },
/* I/OAT v3 platforms */
{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG0) },
{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG1) },
{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG2) },
{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG3) },
{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG4) },
{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG5) },
{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG6) },
{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG7) },
{ 0, }
};
struct ioat_device {
struct pci_dev *pdev;
void __iomem *iobase;
struct ioatdma_device *dma;
struct dca_provider *dca;
};
static int __devinit ioat_probe(struct pci_dev *pdev,
const struct pci_device_id *id);
static void __devexit ioat_remove(struct pci_dev *pdev);
static int ioat_dca_enabled = 1;
module_param(ioat_dca_enabled, int, 0644);
MODULE_PARM_DESC(ioat_dca_enabled, "control support of dca service (default: 1)");
static struct pci_driver ioat_pci_driver = {
.name = "ioatdma",
.id_table = ioat_pci_tbl,
.probe = ioat_probe,
.remove = __devexit_p(ioat_remove),
};
static int __devinit ioat_probe(struct pci_dev *pdev,
const struct pci_device_id *id)
{
void __iomem *iobase;
struct ioat_device *device;
unsigned long mmio_start, mmio_len;
int err;
err = pci_enable_device(pdev);
if (err)
goto err_enable_device;
err = pci_request_regions(pdev, ioat_pci_driver.name);
if (err)
goto err_request_regions;
err = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
if (err)
err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
if (err)
goto err_set_dma_mask;
err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
if (err)
err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
if (err)
goto err_set_dma_mask;
mmio_start = pci_resource_start(pdev, 0);
mmio_len = pci_resource_len(pdev, 0);
iobase = ioremap(mmio_start, mmio_len);
if (!iobase) {
err = -ENOMEM;
goto err_ioremap;
}
device = kzalloc(sizeof(*device), GFP_KERNEL);
if (!device) {
err = -ENOMEM;
goto err_kzalloc;
}
device->pdev = pdev;
pci_set_drvdata(pdev, device);
device->iobase = iobase;
pci_set_master(pdev);
switch (readb(iobase + IOAT_VER_OFFSET)) {
case IOAT_VER_1_2:
device->dma = ioat_dma_probe(pdev, iobase);
if (device->dma && ioat_dca_enabled)
device->dca = ioat_dca_init(pdev, iobase);
break;
case IOAT_VER_2_0:
device->dma = ioat_dma_probe(pdev, iobase);
if (device->dma && ioat_dca_enabled)
device->dca = ioat2_dca_init(pdev, iobase);
break;
case IOAT_VER_3_0:
device->dma = ioat_dma_probe(pdev, iobase);
if (device->dma && ioat_dca_enabled)
device->dca = ioat3_dca_init(pdev, iobase);
break;
default:
err = -ENODEV;
break;
}
if (!device->dma)
err = -ENODEV;
if (err)
goto err_version;
return 0;
err_version:
kfree(device);
err_kzalloc:
iounmap(iobase);
err_ioremap:
err_set_dma_mask:
pci_release_regions(pdev);
pci_disable_device(pdev);
err_request_regions:
err_enable_device:
return err;
}
static void __devexit ioat_remove(struct pci_dev *pdev)
{
struct ioat_device *device = pci_get_drvdata(pdev);
dev_err(&pdev->dev, "Removing dma and dca services\n");
if (device->dca) {
unregister_dca_provider(device->dca);
free_dca_provider(device->dca);
device->dca = NULL;
}
if (device->dma) {
ioat_dma_remove(device->dma);
device->dma = NULL;
}
kfree(device);
}
static int __init ioat_init_module(void)
{
return pci_register_driver(&ioat_pci_driver);
}
module_init(ioat_init_module);
static void __exit ioat_exit_module(void)
{
pci_unregister_driver(&ioat_pci_driver);
}
module_exit(ioat_exit_module);

View File

@ -0,0 +1,2 @@
obj-$(CONFIG_INTEL_IOATDMA) += ioatdma.o
ioatdma-objs := pci.o dma.o dma_v2.o dma_v3.o dca.o

View File

@ -33,8 +33,8 @@
#define cpu_physical_id(cpu) (cpuid_ebx(1) >> 24)
#endif
#include "ioatdma.h"
#include "ioatdma_registers.h"
#include "dma.h"
#include "registers.h"
/*
* Bit 7 of a tag map entry is the "valid" bit, if it is set then bits 0:6
@ -242,7 +242,8 @@ static struct dca_ops ioat_dca_ops = {
};
struct dca_provider *ioat_dca_init(struct pci_dev *pdev, void __iomem *iobase)
struct dca_provider * __devinit
ioat_dca_init(struct pci_dev *pdev, void __iomem *iobase)
{
struct dca_provider *dca;
struct ioat_dca_priv *ioatdca;
@ -407,7 +408,8 @@ static int ioat2_dca_count_dca_slots(void __iomem *iobase, u16 dca_offset)
return slots;
}
struct dca_provider *ioat2_dca_init(struct pci_dev *pdev, void __iomem *iobase)
struct dca_provider * __devinit
ioat2_dca_init(struct pci_dev *pdev, void __iomem *iobase)
{
struct dca_provider *dca;
struct ioat_dca_priv *ioatdca;
@ -602,7 +604,8 @@ static int ioat3_dca_count_dca_slots(void *iobase, u16 dca_offset)
return slots;
}
struct dca_provider *ioat3_dca_init(struct pci_dev *pdev, void __iomem *iobase)
struct dca_provider * __devinit
ioat3_dca_init(struct pci_dev *pdev, void __iomem *iobase)
{
struct dca_provider *dca;
struct ioat_dca_priv *ioatdca;

1238
drivers/dma/ioat/dma.c Normal file

File diff suppressed because it is too large Load Diff

337
drivers/dma/ioat/dma.h Normal file
View File

@ -0,0 +1,337 @@
/*
* Copyright(c) 2004 - 2009 Intel Corporation. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the Free
* Software Foundation; either version 2 of the License, or (at your option)
* any later version.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License along with
* this program; if not, write to the Free Software Foundation, Inc., 59
* Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*
* The full GNU General Public License is included in this distribution in the
* file called COPYING.
*/
#ifndef IOATDMA_H
#define IOATDMA_H
#include <linux/dmaengine.h>
#include "hw.h"
#include "registers.h"
#include <linux/init.h>
#include <linux/dmapool.h>
#include <linux/cache.h>
#include <linux/pci_ids.h>
#include <net/tcp.h>
#define IOAT_DMA_VERSION "4.00"
#define IOAT_LOW_COMPLETION_MASK 0xffffffc0
#define IOAT_DMA_DCA_ANY_CPU ~0
#define to_ioatdma_device(dev) container_of(dev, struct ioatdma_device, common)
#define to_ioat_desc(lh) container_of(lh, struct ioat_desc_sw, node)
#define tx_to_ioat_desc(tx) container_of(tx, struct ioat_desc_sw, txd)
#define to_dev(ioat_chan) (&(ioat_chan)->device->pdev->dev)
#define chan_num(ch) ((int)((ch)->reg_base - (ch)->device->reg_base) / 0x80)
/*
* workaround for IOAT ver.3.0 null descriptor issue
* (channel returns error when size is 0)
*/
#define NULL_DESC_BUFFER_SIZE 1
/**
* struct ioatdma_device - internal representation of a IOAT device
* @pdev: PCI-Express device
* @reg_base: MMIO register space base address
* @dma_pool: for allocating DMA descriptors
* @common: embedded struct dma_device
* @version: version of ioatdma device
* @msix_entries: irq handlers
* @idx: per channel data
* @dca: direct cache access context
* @intr_quirk: interrupt setup quirk (for ioat_v1 devices)
* @enumerate_channels: hw version specific channel enumeration
* @cleanup_tasklet: select between the v2 and v3 cleanup routines
* @timer_fn: select between the v2 and v3 timer watchdog routines
* @self_test: hardware version specific self test for each supported op type
*
* Note: the v3 cleanup routine supports raid operations
*/
struct ioatdma_device {
struct pci_dev *pdev;
void __iomem *reg_base;
struct pci_pool *dma_pool;
struct pci_pool *completion_pool;
struct dma_device common;
u8 version;
struct msix_entry msix_entries[4];
struct ioat_chan_common *idx[4];
struct dca_provider *dca;
void (*intr_quirk)(struct ioatdma_device *device);
int (*enumerate_channels)(struct ioatdma_device *device);
void (*cleanup_tasklet)(unsigned long data);
void (*timer_fn)(unsigned long data);
int (*self_test)(struct ioatdma_device *device);
};
struct ioat_chan_common {
struct dma_chan common;
void __iomem *reg_base;
unsigned long last_completion;
spinlock_t cleanup_lock;
dma_cookie_t completed_cookie;
unsigned long state;
#define IOAT_COMPLETION_PENDING 0
#define IOAT_COMPLETION_ACK 1
#define IOAT_RESET_PENDING 2
#define IOAT_KOBJ_INIT_FAIL 3
struct timer_list timer;
#define COMPLETION_TIMEOUT msecs_to_jiffies(100)
#define IDLE_TIMEOUT msecs_to_jiffies(2000)
#define RESET_DELAY msecs_to_jiffies(100)
struct ioatdma_device *device;
dma_addr_t completion_dma;
u64 *completion;
struct tasklet_struct cleanup_task;
struct kobject kobj;
};
struct ioat_sysfs_entry {
struct attribute attr;
ssize_t (*show)(struct dma_chan *, char *);
};
/**
* struct ioat_dma_chan - internal representation of a DMA channel
*/
struct ioat_dma_chan {
struct ioat_chan_common base;
size_t xfercap; /* XFERCAP register value expanded out */
spinlock_t desc_lock;
struct list_head free_desc;
struct list_head used_desc;
int pending;
u16 desccount;
u16 active;
};
static inline struct ioat_chan_common *to_chan_common(struct dma_chan *c)
{
return container_of(c, struct ioat_chan_common, common);
}
static inline struct ioat_dma_chan *to_ioat_chan(struct dma_chan *c)
{
struct ioat_chan_common *chan = to_chan_common(c);
return container_of(chan, struct ioat_dma_chan, base);
}
/**
* ioat_is_complete - poll the status of an ioat transaction
* @c: channel handle
* @cookie: transaction identifier
* @done: if set, updated with last completed transaction
* @used: if set, updated with last used transaction
*/
static inline enum dma_status
ioat_is_complete(struct dma_chan *c, dma_cookie_t cookie,
dma_cookie_t *done, dma_cookie_t *used)
{
struct ioat_chan_common *chan = to_chan_common(c);
dma_cookie_t last_used;
dma_cookie_t last_complete;
last_used = c->cookie;
last_complete = chan->completed_cookie;
if (done)
*done = last_complete;
if (used)
*used = last_used;
return dma_async_is_complete(cookie, last_complete, last_used);
}
/* wrapper around hardware descriptor format + additional software fields */
/**
* struct ioat_desc_sw - wrapper around hardware descriptor
* @hw: hardware DMA descriptor (for memcpy)
* @node: this descriptor will either be on the free list,
* or attached to a transaction list (tx_list)
* @txd: the generic software descriptor for all engines
* @id: identifier for debug
*/
struct ioat_desc_sw {
struct ioat_dma_descriptor *hw;
struct list_head node;
size_t len;
struct list_head tx_list;
struct dma_async_tx_descriptor txd;
#ifdef DEBUG
int id;
#endif
};
#ifdef DEBUG
#define set_desc_id(desc, i) ((desc)->id = (i))
#define desc_id(desc) ((desc)->id)
#else
#define set_desc_id(desc, i)
#define desc_id(desc) (0)
#endif
static inline void
__dump_desc_dbg(struct ioat_chan_common *chan, struct ioat_dma_descriptor *hw,
struct dma_async_tx_descriptor *tx, int id)
{
struct device *dev = to_dev(chan);
dev_dbg(dev, "desc[%d]: (%#llx->%#llx) cookie: %d flags: %#x"
" ctl: %#x (op: %d int_en: %d compl: %d)\n", id,
(unsigned long long) tx->phys,
(unsigned long long) hw->next, tx->cookie, tx->flags,
hw->ctl, hw->ctl_f.op, hw->ctl_f.int_en, hw->ctl_f.compl_write);
}
#define dump_desc_dbg(c, d) \
({ if (d) __dump_desc_dbg(&c->base, d->hw, &d->txd, desc_id(d)); 0; })
static inline void ioat_set_tcp_copy_break(unsigned long copybreak)
{
#ifdef CONFIG_NET_DMA
sysctl_tcp_dma_copybreak = copybreak;
#endif
}
static inline struct ioat_chan_common *
ioat_chan_by_index(struct ioatdma_device *device, int index)
{
return device->idx[index];
}
static inline u64 ioat_chansts(struct ioat_chan_common *chan)
{
u8 ver = chan->device->version;
u64 status;
u32 status_lo;
/* We need to read the low address first as this causes the
* chipset to latch the upper bits for the subsequent read
*/
status_lo = readl(chan->reg_base + IOAT_CHANSTS_OFFSET_LOW(ver));
status = readl(chan->reg_base + IOAT_CHANSTS_OFFSET_HIGH(ver));
status <<= 32;
status |= status_lo;
return status;
}
static inline void ioat_start(struct ioat_chan_common *chan)
{
u8 ver = chan->device->version;
writeb(IOAT_CHANCMD_START, chan->reg_base + IOAT_CHANCMD_OFFSET(ver));
}
static inline u64 ioat_chansts_to_addr(u64 status)
{
return status & IOAT_CHANSTS_COMPLETED_DESCRIPTOR_ADDR;
}
static inline u32 ioat_chanerr(struct ioat_chan_common *chan)
{
return readl(chan->reg_base + IOAT_CHANERR_OFFSET);
}
static inline void ioat_suspend(struct ioat_chan_common *chan)
{
u8 ver = chan->device->version;
writeb(IOAT_CHANCMD_SUSPEND, chan->reg_base + IOAT_CHANCMD_OFFSET(ver));
}
static inline void ioat_set_chainaddr(struct ioat_dma_chan *ioat, u64 addr)
{
struct ioat_chan_common *chan = &ioat->base;
writel(addr & 0x00000000FFFFFFFF,
chan->reg_base + IOAT1_CHAINADDR_OFFSET_LOW);
writel(addr >> 32,
chan->reg_base + IOAT1_CHAINADDR_OFFSET_HIGH);
}
static inline bool is_ioat_active(unsigned long status)
{
return ((status & IOAT_CHANSTS_STATUS) == IOAT_CHANSTS_ACTIVE);
}
static inline bool is_ioat_idle(unsigned long status)
{
return ((status & IOAT_CHANSTS_STATUS) == IOAT_CHANSTS_DONE);
}
static inline bool is_ioat_halted(unsigned long status)
{
return ((status & IOAT_CHANSTS_STATUS) == IOAT_CHANSTS_HALTED);
}
static inline bool is_ioat_suspended(unsigned long status)
{
return ((status & IOAT_CHANSTS_STATUS) == IOAT_CHANSTS_SUSPENDED);
}
/* channel was fatally programmed */
static inline bool is_ioat_bug(unsigned long err)
{
return !!(err & (IOAT_CHANERR_SRC_ADDR_ERR|IOAT_CHANERR_DEST_ADDR_ERR|
IOAT_CHANERR_NEXT_ADDR_ERR|IOAT_CHANERR_CONTROL_ERR|
IOAT_CHANERR_LENGTH_ERR));
}
static inline void ioat_unmap(struct pci_dev *pdev, dma_addr_t addr, size_t len,
int direction, enum dma_ctrl_flags flags, bool dst)
{
if ((dst && (flags & DMA_COMPL_DEST_UNMAP_SINGLE)) ||
(!dst && (flags & DMA_COMPL_SRC_UNMAP_SINGLE)))
pci_unmap_single(pdev, addr, len, direction);
else
pci_unmap_page(pdev, addr, len, direction);
}
int __devinit ioat_probe(struct ioatdma_device *device);
int __devinit ioat_register(struct ioatdma_device *device);
int __devinit ioat1_dma_probe(struct ioatdma_device *dev, int dca);
int __devinit ioat_dma_self_test(struct ioatdma_device *device);
void __devexit ioat_dma_remove(struct ioatdma_device *device);
struct dca_provider * __devinit ioat_dca_init(struct pci_dev *pdev,
void __iomem *iobase);
unsigned long ioat_get_current_completion(struct ioat_chan_common *chan);
void ioat_init_channel(struct ioatdma_device *device,
struct ioat_chan_common *chan, int idx,
void (*timer_fn)(unsigned long),
void (*tasklet)(unsigned long),
unsigned long ioat);
void ioat_dma_unmap(struct ioat_chan_common *chan, enum dma_ctrl_flags flags,
size_t len, struct ioat_dma_descriptor *hw);
bool ioat_cleanup_preamble(struct ioat_chan_common *chan,
unsigned long *phys_complete);
void ioat_kobject_add(struct ioatdma_device *device, struct kobj_type *type);
void ioat_kobject_del(struct ioatdma_device *device);
extern struct sysfs_ops ioat_sysfs_ops;
extern struct ioat_sysfs_entry ioat_version_attr;
extern struct ioat_sysfs_entry ioat_cap_attr;
#endif /* IOATDMA_H */

871
drivers/dma/ioat/dma_v2.c Normal file
View File

@ -0,0 +1,871 @@
/*
* Intel I/OAT DMA Linux driver
* Copyright(c) 2004 - 2009 Intel Corporation.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License along with
* this program; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
*
* The full GNU General Public License is included in this distribution in
* the file called "COPYING".
*
*/
/*
* This driver supports an Intel I/OAT DMA engine (versions >= 2), which
* does asynchronous data movement and checksumming operations.
*/
#include <linux/init.h>
#include <linux/module.h>
#include <linux/pci.h>
#include <linux/interrupt.h>
#include <linux/dmaengine.h>
#include <linux/delay.h>
#include <linux/dma-mapping.h>
#include <linux/workqueue.h>
#include <linux/i7300_idle.h>
#include "dma.h"
#include "dma_v2.h"
#include "registers.h"
#include "hw.h"
int ioat_ring_alloc_order = 8;
module_param(ioat_ring_alloc_order, int, 0644);
MODULE_PARM_DESC(ioat_ring_alloc_order,
"ioat2+: allocate 2^n descriptors per channel"
" (default: 8 max: 16)");
static int ioat_ring_max_alloc_order = IOAT_MAX_ORDER;
module_param(ioat_ring_max_alloc_order, int, 0644);
MODULE_PARM_DESC(ioat_ring_max_alloc_order,
"ioat2+: upper limit for ring size (default: 16)");
void __ioat2_issue_pending(struct ioat2_dma_chan *ioat)
{
void * __iomem reg_base = ioat->base.reg_base;
ioat->pending = 0;
ioat->dmacount += ioat2_ring_pending(ioat);
ioat->issued = ioat->head;
/* make descriptor updates globally visible before notifying channel */
wmb();
writew(ioat->dmacount, reg_base + IOAT_CHAN_DMACOUNT_OFFSET);
dev_dbg(to_dev(&ioat->base),
"%s: head: %#x tail: %#x issued: %#x count: %#x\n",
__func__, ioat->head, ioat->tail, ioat->issued, ioat->dmacount);
}
void ioat2_issue_pending(struct dma_chan *chan)
{
struct ioat2_dma_chan *ioat = to_ioat2_chan(chan);
spin_lock_bh(&ioat->ring_lock);
if (ioat->pending == 1)
__ioat2_issue_pending(ioat);
spin_unlock_bh(&ioat->ring_lock);
}
/**
* ioat2_update_pending - log pending descriptors
* @ioat: ioat2+ channel
*
* set pending to '1' unless pending is already set to '2', pending == 2
* indicates that submission is temporarily blocked due to an in-flight
* reset. If we are already above the ioat_pending_level threshold then
* just issue pending.
*
* called with ring_lock held
*/
static void ioat2_update_pending(struct ioat2_dma_chan *ioat)
{
if (unlikely(ioat->pending == 2))
return;
else if (ioat2_ring_pending(ioat) > ioat_pending_level)
__ioat2_issue_pending(ioat);
else
ioat->pending = 1;
}
static void __ioat2_start_null_desc(struct ioat2_dma_chan *ioat)
{
struct ioat_ring_ent *desc;
struct ioat_dma_descriptor *hw;
int idx;
if (ioat2_ring_space(ioat) < 1) {
dev_err(to_dev(&ioat->base),
"Unable to start null desc - ring full\n");
return;
}
dev_dbg(to_dev(&ioat->base), "%s: head: %#x tail: %#x issued: %#x\n",
__func__, ioat->head, ioat->tail, ioat->issued);
idx = ioat2_desc_alloc(ioat, 1);
desc = ioat2_get_ring_ent(ioat, idx);
hw = desc->hw;
hw->ctl = 0;
hw->ctl_f.null = 1;
hw->ctl_f.int_en = 1;
hw->ctl_f.compl_write = 1;
/* set size to non-zero value (channel returns error when size is 0) */
hw->size = NULL_DESC_BUFFER_SIZE;
hw->src_addr = 0;
hw->dst_addr = 0;
async_tx_ack(&desc->txd);
ioat2_set_chainaddr(ioat, desc->txd.phys);
dump_desc_dbg(ioat, desc);
__ioat2_issue_pending(ioat);
}
static void ioat2_start_null_desc(struct ioat2_dma_chan *ioat)
{
spin_lock_bh(&ioat->ring_lock);
__ioat2_start_null_desc(ioat);
spin_unlock_bh(&ioat->ring_lock);
}
static void __cleanup(struct ioat2_dma_chan *ioat, unsigned long phys_complete)
{
struct ioat_chan_common *chan = &ioat->base;
struct dma_async_tx_descriptor *tx;
struct ioat_ring_ent *desc;
bool seen_current = false;
u16 active;
int i;
dev_dbg(to_dev(chan), "%s: head: %#x tail: %#x issued: %#x\n",
__func__, ioat->head, ioat->tail, ioat->issued);
active = ioat2_ring_active(ioat);
for (i = 0; i < active && !seen_current; i++) {
prefetch(ioat2_get_ring_ent(ioat, ioat->tail + i + 1));
desc = ioat2_get_ring_ent(ioat, ioat->tail + i);
tx = &desc->txd;
dump_desc_dbg(ioat, desc);
if (tx->cookie) {
ioat_dma_unmap(chan, tx->flags, desc->len, desc->hw);
chan->completed_cookie = tx->cookie;
tx->cookie = 0;
if (tx->callback) {
tx->callback(tx->callback_param);
tx->callback = NULL;
}
}
if (tx->phys == phys_complete)
seen_current = true;
}
ioat->tail += i;
BUG_ON(!seen_current); /* no active descs have written a completion? */
chan->last_completion = phys_complete;
if (ioat->head == ioat->tail) {
dev_dbg(to_dev(chan), "%s: cancel completion timeout\n",
__func__);
clear_bit(IOAT_COMPLETION_PENDING, &chan->state);
mod_timer(&chan->timer, jiffies + IDLE_TIMEOUT);
}
}
/**
* ioat2_cleanup - clean finished descriptors (advance tail pointer)
* @chan: ioat channel to be cleaned up
*/
static void ioat2_cleanup(struct ioat2_dma_chan *ioat)
{
struct ioat_chan_common *chan = &ioat->base;
unsigned long phys_complete;
prefetch(chan->completion);
if (!spin_trylock_bh(&chan->cleanup_lock))
return;
if (!ioat_cleanup_preamble(chan, &phys_complete)) {
spin_unlock_bh(&chan->cleanup_lock);
return;
}
if (!spin_trylock_bh(&ioat->ring_lock)) {
spin_unlock_bh(&chan->cleanup_lock);
return;
}
__cleanup(ioat, phys_complete);
spin_unlock_bh(&ioat->ring_lock);
spin_unlock_bh(&chan->cleanup_lock);
}
void ioat2_cleanup_tasklet(unsigned long data)
{
struct ioat2_dma_chan *ioat = (void *) data;
ioat2_cleanup(ioat);
writew(IOAT_CHANCTRL_RUN, ioat->base.reg_base + IOAT_CHANCTRL_OFFSET);
}
void __ioat2_restart_chan(struct ioat2_dma_chan *ioat)
{
struct ioat_chan_common *chan = &ioat->base;
/* set the tail to be re-issued */
ioat->issued = ioat->tail;
ioat->dmacount = 0;
set_bit(IOAT_COMPLETION_PENDING, &chan->state);
mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT);
dev_dbg(to_dev(chan),
"%s: head: %#x tail: %#x issued: %#x count: %#x\n",
__func__, ioat->head, ioat->tail, ioat->issued, ioat->dmacount);
if (ioat2_ring_pending(ioat)) {
struct ioat_ring_ent *desc;
desc = ioat2_get_ring_ent(ioat, ioat->tail);
ioat2_set_chainaddr(ioat, desc->txd.phys);
__ioat2_issue_pending(ioat);
} else
__ioat2_start_null_desc(ioat);
}
static void ioat2_restart_channel(struct ioat2_dma_chan *ioat)
{
struct ioat_chan_common *chan = &ioat->base;
unsigned long phys_complete;
u32 status;
status = ioat_chansts(chan);
if (is_ioat_active(status) || is_ioat_idle(status))
ioat_suspend(chan);
while (is_ioat_active(status) || is_ioat_idle(status)) {
status = ioat_chansts(chan);
cpu_relax();
}
if (ioat_cleanup_preamble(chan, &phys_complete))
__cleanup(ioat, phys_complete);
__ioat2_restart_chan(ioat);
}
void ioat2_timer_event(unsigned long data)
{
struct ioat2_dma_chan *ioat = (void *) data;
struct ioat_chan_common *chan = &ioat->base;
spin_lock_bh(&chan->cleanup_lock);
if (test_bit(IOAT_COMPLETION_PENDING, &chan->state)) {
unsigned long phys_complete;
u64 status;
spin_lock_bh(&ioat->ring_lock);
status = ioat_chansts(chan);
/* when halted due to errors check for channel
* programming errors before advancing the completion state
*/
if (is_ioat_halted(status)) {
u32 chanerr;
chanerr = readl(chan->reg_base + IOAT_CHANERR_OFFSET);
BUG_ON(is_ioat_bug(chanerr));
}
/* if we haven't made progress and we have already
* acknowledged a pending completion once, then be more
* forceful with a restart
*/
if (ioat_cleanup_preamble(chan, &phys_complete))
__cleanup(ioat, phys_complete);
else if (test_bit(IOAT_COMPLETION_ACK, &chan->state))
ioat2_restart_channel(ioat);
else {
set_bit(IOAT_COMPLETION_ACK, &chan->state);
mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT);
}
spin_unlock_bh(&ioat->ring_lock);
} else {
u16 active;
/* if the ring is idle, empty, and oversized try to step
* down the size
*/
spin_lock_bh(&ioat->ring_lock);
active = ioat2_ring_active(ioat);
if (active == 0 && ioat->alloc_order > ioat_get_alloc_order())
reshape_ring(ioat, ioat->alloc_order-1);
spin_unlock_bh(&ioat->ring_lock);
/* keep shrinking until we get back to our minimum
* default size
*/
if (ioat->alloc_order > ioat_get_alloc_order())
mod_timer(&chan->timer, jiffies + IDLE_TIMEOUT);
}
spin_unlock_bh(&chan->cleanup_lock);
}
/**
* ioat2_enumerate_channels - find and initialize the device's channels
* @device: the device to be enumerated
*/
int ioat2_enumerate_channels(struct ioatdma_device *device)
{
struct ioat2_dma_chan *ioat;
struct device *dev = &device->pdev->dev;
struct dma_device *dma = &device->common;
u8 xfercap_log;
int i;
INIT_LIST_HEAD(&dma->channels);
dma->chancnt = readb(device->reg_base + IOAT_CHANCNT_OFFSET);
dma->chancnt &= 0x1f; /* bits [4:0] valid */
if (dma->chancnt > ARRAY_SIZE(device->idx)) {
dev_warn(dev, "(%d) exceeds max supported channels (%zu)\n",
dma->chancnt, ARRAY_SIZE(device->idx));
dma->chancnt = ARRAY_SIZE(device->idx);
}
xfercap_log = readb(device->reg_base + IOAT_XFERCAP_OFFSET);
xfercap_log &= 0x1f; /* bits [4:0] valid */
if (xfercap_log == 0)
return 0;
dev_dbg(dev, "%s: xfercap = %d\n", __func__, 1 << xfercap_log);
/* FIXME which i/oat version is i7300? */
#ifdef CONFIG_I7300_IDLE_IOAT_CHANNEL
if (i7300_idle_platform_probe(NULL, NULL, 1) == 0)
dma->chancnt--;
#endif
for (i = 0; i < dma->chancnt; i++) {
ioat = devm_kzalloc(dev, sizeof(*ioat), GFP_KERNEL);
if (!ioat)
break;
ioat_init_channel(device, &ioat->base, i,
device->timer_fn,
device->cleanup_tasklet,
(unsigned long) ioat);
ioat->xfercap_log = xfercap_log;
spin_lock_init(&ioat->ring_lock);
}
dma->chancnt = i;
return i;
}
static dma_cookie_t ioat2_tx_submit_unlock(struct dma_async_tx_descriptor *tx)
{
struct dma_chan *c = tx->chan;
struct ioat2_dma_chan *ioat = to_ioat2_chan(c);
struct ioat_chan_common *chan = &ioat->base;
dma_cookie_t cookie = c->cookie;
cookie++;
if (cookie < 0)
cookie = 1;
tx->cookie = cookie;
c->cookie = cookie;
dev_dbg(to_dev(&ioat->base), "%s: cookie: %d\n", __func__, cookie);
if (!test_and_set_bit(IOAT_COMPLETION_PENDING, &chan->state))
mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT);
ioat2_update_pending(ioat);
spin_unlock_bh(&ioat->ring_lock);
return cookie;
}
static struct ioat_ring_ent *ioat2_alloc_ring_ent(struct dma_chan *chan, gfp_t flags)
{
struct ioat_dma_descriptor *hw;
struct ioat_ring_ent *desc;
struct ioatdma_device *dma;
dma_addr_t phys;
dma = to_ioatdma_device(chan->device);
hw = pci_pool_alloc(dma->dma_pool, flags, &phys);
if (!hw)
return NULL;
memset(hw, 0, sizeof(*hw));
desc = kmem_cache_alloc(ioat2_cache, flags);
if (!desc) {
pci_pool_free(dma->dma_pool, hw, phys);
return NULL;
}
memset(desc, 0, sizeof(*desc));
dma_async_tx_descriptor_init(&desc->txd, chan);
desc->txd.tx_submit = ioat2_tx_submit_unlock;
desc->hw = hw;
desc->txd.phys = phys;
return desc;
}
static void ioat2_free_ring_ent(struct ioat_ring_ent *desc, struct dma_chan *chan)
{
struct ioatdma_device *dma;
dma = to_ioatdma_device(chan->device);
pci_pool_free(dma->dma_pool, desc->hw, desc->txd.phys);
kmem_cache_free(ioat2_cache, desc);
}
static struct ioat_ring_ent **ioat2_alloc_ring(struct dma_chan *c, int order, gfp_t flags)
{
struct ioat_ring_ent **ring;
int descs = 1 << order;
int i;
if (order > ioat_get_max_alloc_order())
return NULL;
/* allocate the array to hold the software ring */
ring = kcalloc(descs, sizeof(*ring), flags);
if (!ring)
return NULL;
for (i = 0; i < descs; i++) {
ring[i] = ioat2_alloc_ring_ent(c, flags);
if (!ring[i]) {
while (i--)
ioat2_free_ring_ent(ring[i], c);
kfree(ring);
return NULL;
}
set_desc_id(ring[i], i);
}
/* link descs */
for (i = 0; i < descs-1; i++) {
struct ioat_ring_ent *next = ring[i+1];
struct ioat_dma_descriptor *hw = ring[i]->hw;
hw->next = next->txd.phys;
}
ring[i]->hw->next = ring[0]->txd.phys;
return ring;
}
/* ioat2_alloc_chan_resources - allocate/initialize ioat2 descriptor ring
* @chan: channel to be initialized
*/
int ioat2_alloc_chan_resources(struct dma_chan *c)
{
struct ioat2_dma_chan *ioat = to_ioat2_chan(c);
struct ioat_chan_common *chan = &ioat->base;
struct ioat_ring_ent **ring;
u32 chanerr;
int order;
/* have we already been set up? */
if (ioat->ring)
return 1 << ioat->alloc_order;
/* Setup register to interrupt and write completion status on error */
writew(IOAT_CHANCTRL_RUN, chan->reg_base + IOAT_CHANCTRL_OFFSET);
chanerr = readl(chan->reg_base + IOAT_CHANERR_OFFSET);
if (chanerr) {
dev_err(to_dev(chan), "CHANERR = %x, clearing\n", chanerr);
writel(chanerr, chan->reg_base + IOAT_CHANERR_OFFSET);
}
/* allocate a completion writeback area */
/* doing 2 32bit writes to mmio since 1 64b write doesn't work */
chan->completion = pci_pool_alloc(chan->device->completion_pool,
GFP_KERNEL, &chan->completion_dma);
if (!chan->completion)
return -ENOMEM;
memset(chan->completion, 0, sizeof(*chan->completion));
writel(((u64) chan->completion_dma) & 0x00000000FFFFFFFF,
chan->reg_base + IOAT_CHANCMP_OFFSET_LOW);
writel(((u64) chan->completion_dma) >> 32,
chan->reg_base + IOAT_CHANCMP_OFFSET_HIGH);
order = ioat_get_alloc_order();
ring = ioat2_alloc_ring(c, order, GFP_KERNEL);
if (!ring)
return -ENOMEM;
spin_lock_bh(&ioat->ring_lock);
ioat->ring = ring;
ioat->head = 0;
ioat->issued = 0;
ioat->tail = 0;
ioat->pending = 0;
ioat->alloc_order = order;
spin_unlock_bh(&ioat->ring_lock);
tasklet_enable(&chan->cleanup_task);
ioat2_start_null_desc(ioat);
return 1 << ioat->alloc_order;
}
bool reshape_ring(struct ioat2_dma_chan *ioat, int order)
{
/* reshape differs from normal ring allocation in that we want
* to allocate a new software ring while only
* extending/truncating the hardware ring
*/
struct ioat_chan_common *chan = &ioat->base;
struct dma_chan *c = &chan->common;
const u16 curr_size = ioat2_ring_mask(ioat) + 1;
const u16 active = ioat2_ring_active(ioat);
const u16 new_size = 1 << order;
struct ioat_ring_ent **ring;
u16 i;
if (order > ioat_get_max_alloc_order())
return false;
/* double check that we have at least 1 free descriptor */
if (active == curr_size)
return false;
/* when shrinking, verify that we can hold the current active
* set in the new ring
*/
if (active >= new_size)
return false;
/* allocate the array to hold the software ring */
ring = kcalloc(new_size, sizeof(*ring), GFP_NOWAIT);
if (!ring)
return false;
/* allocate/trim descriptors as needed */
if (new_size > curr_size) {
/* copy current descriptors to the new ring */
for (i = 0; i < curr_size; i++) {
u16 curr_idx = (ioat->tail+i) & (curr_size-1);
u16 new_idx = (ioat->tail+i) & (new_size-1);
ring[new_idx] = ioat->ring[curr_idx];
set_desc_id(ring[new_idx], new_idx);
}
/* add new descriptors to the ring */
for (i = curr_size; i < new_size; i++) {
u16 new_idx = (ioat->tail+i) & (new_size-1);
ring[new_idx] = ioat2_alloc_ring_ent(c, GFP_NOWAIT);
if (!ring[new_idx]) {
while (i--) {
u16 new_idx = (ioat->tail+i) & (new_size-1);
ioat2_free_ring_ent(ring[new_idx], c);
}
kfree(ring);
return false;
}
set_desc_id(ring[new_idx], new_idx);
}
/* hw link new descriptors */
for (i = curr_size-1; i < new_size; i++) {
u16 new_idx = (ioat->tail+i) & (new_size-1);
struct ioat_ring_ent *next = ring[(new_idx+1) & (new_size-1)];
struct ioat_dma_descriptor *hw = ring[new_idx]->hw;
hw->next = next->txd.phys;
}
} else {
struct ioat_dma_descriptor *hw;
struct ioat_ring_ent *next;
/* copy current descriptors to the new ring, dropping the
* removed descriptors
*/
for (i = 0; i < new_size; i++) {
u16 curr_idx = (ioat->tail+i) & (curr_size-1);
u16 new_idx = (ioat->tail+i) & (new_size-1);
ring[new_idx] = ioat->ring[curr_idx];
set_desc_id(ring[new_idx], new_idx);
}
/* free deleted descriptors */
for (i = new_size; i < curr_size; i++) {
struct ioat_ring_ent *ent;
ent = ioat2_get_ring_ent(ioat, ioat->tail+i);
ioat2_free_ring_ent(ent, c);
}
/* fix up hardware ring */
hw = ring[(ioat->tail+new_size-1) & (new_size-1)]->hw;
next = ring[(ioat->tail+new_size) & (new_size-1)];
hw->next = next->txd.phys;
}
dev_dbg(to_dev(chan), "%s: allocated %d descriptors\n",
__func__, new_size);
kfree(ioat->ring);
ioat->ring = ring;
ioat->alloc_order = order;
return true;
}
/**
* ioat2_alloc_and_lock - common descriptor alloc boilerplate for ioat2,3 ops
* @idx: gets starting descriptor index on successful allocation
* @ioat: ioat2,3 channel (ring) to operate on
* @num_descs: allocation length
*/
int ioat2_alloc_and_lock(u16 *idx, struct ioat2_dma_chan *ioat, int num_descs)
{
struct ioat_chan_common *chan = &ioat->base;
spin_lock_bh(&ioat->ring_lock);
/* never allow the last descriptor to be consumed, we need at
* least one free at all times to allow for on-the-fly ring
* resizing.
*/
while (unlikely(ioat2_ring_space(ioat) <= num_descs)) {
if (reshape_ring(ioat, ioat->alloc_order + 1) &&
ioat2_ring_space(ioat) > num_descs)
break;
if (printk_ratelimit())
dev_dbg(to_dev(chan),
"%s: ring full! num_descs: %d (%x:%x:%x)\n",
__func__, num_descs, ioat->head, ioat->tail,
ioat->issued);
spin_unlock_bh(&ioat->ring_lock);
/* progress reclaim in the allocation failure case we
* may be called under bh_disabled so we need to trigger
* the timer event directly
*/
spin_lock_bh(&chan->cleanup_lock);
if (jiffies > chan->timer.expires &&
timer_pending(&chan->timer)) {
struct ioatdma_device *device = chan->device;
mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT);
spin_unlock_bh(&chan->cleanup_lock);
device->timer_fn((unsigned long) ioat);
} else
spin_unlock_bh(&chan->cleanup_lock);
return -ENOMEM;
}
dev_dbg(to_dev(chan), "%s: num_descs: %d (%x:%x:%x)\n",
__func__, num_descs, ioat->head, ioat->tail, ioat->issued);
*idx = ioat2_desc_alloc(ioat, num_descs);
return 0; /* with ioat->ring_lock held */
}
struct dma_async_tx_descriptor *
ioat2_dma_prep_memcpy_lock(struct dma_chan *c, dma_addr_t dma_dest,
dma_addr_t dma_src, size_t len, unsigned long flags)
{
struct ioat2_dma_chan *ioat = to_ioat2_chan(c);
struct ioat_dma_descriptor *hw;
struct ioat_ring_ent *desc;
dma_addr_t dst = dma_dest;
dma_addr_t src = dma_src;
size_t total_len = len;
int num_descs;
u16 idx;
int i;
num_descs = ioat2_xferlen_to_descs(ioat, len);
if (likely(num_descs) &&
ioat2_alloc_and_lock(&idx, ioat, num_descs) == 0)
/* pass */;
else
return NULL;
i = 0;
do {
size_t copy = min_t(size_t, len, 1 << ioat->xfercap_log);
desc = ioat2_get_ring_ent(ioat, idx + i);
hw = desc->hw;
hw->size = copy;
hw->ctl = 0;
hw->src_addr = src;
hw->dst_addr = dst;
len -= copy;
dst += copy;
src += copy;
dump_desc_dbg(ioat, desc);
} while (++i < num_descs);
desc->txd.flags = flags;
desc->len = total_len;
hw->ctl_f.int_en = !!(flags & DMA_PREP_INTERRUPT);
hw->ctl_f.fence = !!(flags & DMA_PREP_FENCE);
hw->ctl_f.compl_write = 1;
dump_desc_dbg(ioat, desc);
/* we leave the channel locked to ensure in order submission */
return &desc->txd;
}
/**
* ioat2_free_chan_resources - release all the descriptors
* @chan: the channel to be cleaned
*/
void ioat2_free_chan_resources(struct dma_chan *c)
{
struct ioat2_dma_chan *ioat = to_ioat2_chan(c);
struct ioat_chan_common *chan = &ioat->base;
struct ioatdma_device *device = chan->device;
struct ioat_ring_ent *desc;
const u16 total_descs = 1 << ioat->alloc_order;
int descs;
int i;
/* Before freeing channel resources first check
* if they have been previously allocated for this channel.
*/
if (!ioat->ring)
return;
tasklet_disable(&chan->cleanup_task);
del_timer_sync(&chan->timer);
device->cleanup_tasklet((unsigned long) ioat);
/* Delay 100ms after reset to allow internal DMA logic to quiesce
* before removing DMA descriptor resources.
*/
writeb(IOAT_CHANCMD_RESET,
chan->reg_base + IOAT_CHANCMD_OFFSET(chan->device->version));
mdelay(100);
spin_lock_bh(&ioat->ring_lock);
descs = ioat2_ring_space(ioat);
dev_dbg(to_dev(chan), "freeing %d idle descriptors\n", descs);
for (i = 0; i < descs; i++) {
desc = ioat2_get_ring_ent(ioat, ioat->head + i);
ioat2_free_ring_ent(desc, c);
}
if (descs < total_descs)
dev_err(to_dev(chan), "Freeing %d in use descriptors!\n",
total_descs - descs);
for (i = 0; i < total_descs - descs; i++) {
desc = ioat2_get_ring_ent(ioat, ioat->tail + i);
dump_desc_dbg(ioat, desc);
ioat2_free_ring_ent(desc, c);
}
kfree(ioat->ring);
ioat->ring = NULL;
ioat->alloc_order = 0;
pci_pool_free(device->completion_pool, chan->completion,
chan->completion_dma);
spin_unlock_bh(&ioat->ring_lock);
chan->last_completion = 0;
chan->completion_dma = 0;
ioat->pending = 0;
ioat->dmacount = 0;
}
enum dma_status
ioat2_is_complete(struct dma_chan *c, dma_cookie_t cookie,
dma_cookie_t *done, dma_cookie_t *used)
{
struct ioat2_dma_chan *ioat = to_ioat2_chan(c);
struct ioatdma_device *device = ioat->base.device;
if (ioat_is_complete(c, cookie, done, used) == DMA_SUCCESS)
return DMA_SUCCESS;
device->cleanup_tasklet((unsigned long) ioat);
return ioat_is_complete(c, cookie, done, used);
}
static ssize_t ring_size_show(struct dma_chan *c, char *page)
{
struct ioat2_dma_chan *ioat = to_ioat2_chan(c);
return sprintf(page, "%d\n", (1 << ioat->alloc_order) & ~1);
}
static struct ioat_sysfs_entry ring_size_attr = __ATTR_RO(ring_size);
static ssize_t ring_active_show(struct dma_chan *c, char *page)
{
struct ioat2_dma_chan *ioat = to_ioat2_chan(c);
/* ...taken outside the lock, no need to be precise */
return sprintf(page, "%d\n", ioat2_ring_active(ioat));
}
static struct ioat_sysfs_entry ring_active_attr = __ATTR_RO(ring_active);
static struct attribute *ioat2_attrs[] = {
&ring_size_attr.attr,
&ring_active_attr.attr,
&ioat_cap_attr.attr,
&ioat_version_attr.attr,
NULL,
};
struct kobj_type ioat2_ktype = {
.sysfs_ops = &ioat_sysfs_ops,
.default_attrs = ioat2_attrs,
};
int __devinit ioat2_dma_probe(struct ioatdma_device *device, int dca)
{
struct pci_dev *pdev = device->pdev;
struct dma_device *dma;
struct dma_chan *c;
struct ioat_chan_common *chan;
int err;
device->enumerate_channels = ioat2_enumerate_channels;
device->cleanup_tasklet = ioat2_cleanup_tasklet;
device->timer_fn = ioat2_timer_event;
device->self_test = ioat_dma_self_test;
dma = &device->common;
dma->device_prep_dma_memcpy = ioat2_dma_prep_memcpy_lock;
dma->device_issue_pending = ioat2_issue_pending;
dma->device_alloc_chan_resources = ioat2_alloc_chan_resources;
dma->device_free_chan_resources = ioat2_free_chan_resources;
dma->device_is_tx_complete = ioat2_is_complete;
err = ioat_probe(device);
if (err)
return err;
ioat_set_tcp_copy_break(2048);
list_for_each_entry(c, &dma->channels, device_node) {
chan = to_chan_common(c);
writel(IOAT_DCACTRL_CMPL_WRITE_ENABLE | IOAT_DMA_DCA_ANY_CPU,
chan->reg_base + IOAT_DCACTRL_OFFSET);
}
err = ioat_register(device);
if (err)
return err;
ioat_kobject_add(device, &ioat2_ktype);
if (dca)
device->dca = ioat2_dca_init(pdev, device->reg_base);
return err;
}

190
drivers/dma/ioat/dma_v2.h Normal file
View File

@ -0,0 +1,190 @@
/*
* Copyright(c) 2004 - 2009 Intel Corporation. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the Free
* Software Foundation; either version 2 of the License, or (at your option)
* any later version.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License along with
* this program; if not, write to the Free Software Foundation, Inc., 59
* Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*
* The full GNU General Public License is included in this distribution in the
* file called COPYING.
*/
#ifndef IOATDMA_V2_H
#define IOATDMA_V2_H
#include <linux/dmaengine.h>
#include "dma.h"
#include "hw.h"
extern int ioat_pending_level;
extern int ioat_ring_alloc_order;
/*
* workaround for IOAT ver.3.0 null descriptor issue
* (channel returns error when size is 0)
*/
#define NULL_DESC_BUFFER_SIZE 1
#define IOAT_MAX_ORDER 16
#define ioat_get_alloc_order() \
(min(ioat_ring_alloc_order, IOAT_MAX_ORDER))
#define ioat_get_max_alloc_order() \
(min(ioat_ring_max_alloc_order, IOAT_MAX_ORDER))
/* struct ioat2_dma_chan - ioat v2 / v3 channel attributes
* @base: common ioat channel parameters
* @xfercap_log; log2 of channel max transfer length (for fast division)
* @head: allocated index
* @issued: hardware notification point
* @tail: cleanup index
* @pending: lock free indicator for issued != head
* @dmacount: identical to 'head' except for occasionally resetting to zero
* @alloc_order: log2 of the number of allocated descriptors
* @ring: software ring buffer implementation of hardware ring
* @ring_lock: protects ring attributes
*/
struct ioat2_dma_chan {
struct ioat_chan_common base;
size_t xfercap_log;
u16 head;
u16 issued;
u16 tail;
u16 dmacount;
u16 alloc_order;
int pending;
struct ioat_ring_ent **ring;
spinlock_t ring_lock;
};
static inline struct ioat2_dma_chan *to_ioat2_chan(struct dma_chan *c)
{
struct ioat_chan_common *chan = to_chan_common(c);
return container_of(chan, struct ioat2_dma_chan, base);
}
static inline u16 ioat2_ring_mask(struct ioat2_dma_chan *ioat)
{
return (1 << ioat->alloc_order) - 1;
}
/* count of descriptors in flight with the engine */
static inline u16 ioat2_ring_active(struct ioat2_dma_chan *ioat)
{
return (ioat->head - ioat->tail) & ioat2_ring_mask(ioat);
}
/* count of descriptors pending submission to hardware */
static inline u16 ioat2_ring_pending(struct ioat2_dma_chan *ioat)
{
return (ioat->head - ioat->issued) & ioat2_ring_mask(ioat);
}
static inline u16 ioat2_ring_space(struct ioat2_dma_chan *ioat)
{
u16 num_descs = ioat2_ring_mask(ioat) + 1;
u16 active = ioat2_ring_active(ioat);
BUG_ON(active > num_descs);
return num_descs - active;
}
/* assumes caller already checked space */
static inline u16 ioat2_desc_alloc(struct ioat2_dma_chan *ioat, u16 len)
{
ioat->head += len;
return ioat->head - len;
}
static inline u16 ioat2_xferlen_to_descs(struct ioat2_dma_chan *ioat, size_t len)
{
u16 num_descs = len >> ioat->xfercap_log;
num_descs += !!(len & ((1 << ioat->xfercap_log) - 1));
return num_descs;
}
/**
* struct ioat_ring_ent - wrapper around hardware descriptor
* @hw: hardware DMA descriptor (for memcpy)
* @fill: hardware fill descriptor
* @xor: hardware xor descriptor
* @xor_ex: hardware xor extension descriptor
* @pq: hardware pq descriptor
* @pq_ex: hardware pq extension descriptor
* @pqu: hardware pq update descriptor
* @raw: hardware raw (un-typed) descriptor
* @txd: the generic software descriptor for all engines
* @len: total transaction length for unmap
* @result: asynchronous result of validate operations
* @id: identifier for debug
*/
struct ioat_ring_ent {
union {
struct ioat_dma_descriptor *hw;
struct ioat_fill_descriptor *fill;
struct ioat_xor_descriptor *xor;
struct ioat_xor_ext_descriptor *xor_ex;
struct ioat_pq_descriptor *pq;
struct ioat_pq_ext_descriptor *pq_ex;
struct ioat_pq_update_descriptor *pqu;
struct ioat_raw_descriptor *raw;
};
size_t len;
struct dma_async_tx_descriptor txd;
enum sum_check_flags *result;
#ifdef DEBUG
int id;
#endif
};
static inline struct ioat_ring_ent *
ioat2_get_ring_ent(struct ioat2_dma_chan *ioat, u16 idx)
{
return ioat->ring[idx & ioat2_ring_mask(ioat)];
}
static inline void ioat2_set_chainaddr(struct ioat2_dma_chan *ioat, u64 addr)
{
struct ioat_chan_common *chan = &ioat->base;
writel(addr & 0x00000000FFFFFFFF,
chan->reg_base + IOAT2_CHAINADDR_OFFSET_LOW);
writel(addr >> 32,
chan->reg_base + IOAT2_CHAINADDR_OFFSET_HIGH);
}
int __devinit ioat2_dma_probe(struct ioatdma_device *dev, int dca);
int __devinit ioat3_dma_probe(struct ioatdma_device *dev, int dca);
struct dca_provider * __devinit ioat2_dca_init(struct pci_dev *pdev, void __iomem *iobase);
struct dca_provider * __devinit ioat3_dca_init(struct pci_dev *pdev, void __iomem *iobase);
int ioat2_alloc_and_lock(u16 *idx, struct ioat2_dma_chan *ioat, int num_descs);
int ioat2_enumerate_channels(struct ioatdma_device *device);
struct dma_async_tx_descriptor *
ioat2_dma_prep_memcpy_lock(struct dma_chan *c, dma_addr_t dma_dest,
dma_addr_t dma_src, size_t len, unsigned long flags);
void ioat2_issue_pending(struct dma_chan *chan);
int ioat2_alloc_chan_resources(struct dma_chan *c);
void ioat2_free_chan_resources(struct dma_chan *c);
enum dma_status ioat2_is_complete(struct dma_chan *c, dma_cookie_t cookie,
dma_cookie_t *done, dma_cookie_t *used);
void __ioat2_restart_chan(struct ioat2_dma_chan *ioat);
bool reshape_ring(struct ioat2_dma_chan *ioat, int order);
void __ioat2_issue_pending(struct ioat2_dma_chan *ioat);
void ioat2_cleanup_tasklet(unsigned long data);
void ioat2_timer_event(unsigned long data);
extern struct kobj_type ioat2_ktype;
extern struct kmem_cache *ioat2_cache;
#endif /* IOATDMA_V2_H */

1223
drivers/dma/ioat/dma_v3.c Normal file

File diff suppressed because it is too large Load Diff

215
drivers/dma/ioat/hw.h Normal file
View File

@ -0,0 +1,215 @@
/*
* Copyright(c) 2004 - 2009 Intel Corporation. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the Free
* Software Foundation; either version 2 of the License, or (at your option)
* any later version.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License along with
* this program; if not, write to the Free Software Foundation, Inc., 59
* Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*
* The full GNU General Public License is included in this distribution in the
* file called COPYING.
*/
#ifndef _IOAT_HW_H_
#define _IOAT_HW_H_
/* PCI Configuration Space Values */
#define IOAT_PCI_VID 0x8086
#define IOAT_MMIO_BAR 0
/* CB device ID's */
#define IOAT_PCI_DID_5000 0x1A38
#define IOAT_PCI_DID_CNB 0x360B
#define IOAT_PCI_DID_SCNB 0x65FF
#define IOAT_PCI_DID_SNB 0x402F
#define IOAT_PCI_RID 0x00
#define IOAT_PCI_SVID 0x8086
#define IOAT_PCI_SID 0x8086
#define IOAT_VER_1_2 0x12 /* Version 1.2 */
#define IOAT_VER_2_0 0x20 /* Version 2.0 */
#define IOAT_VER_3_0 0x30 /* Version 3.0 */
#define IOAT_VER_3_2 0x32 /* Version 3.2 */
struct ioat_dma_descriptor {
uint32_t size;
union {
uint32_t ctl;
struct {
unsigned int int_en:1;
unsigned int src_snoop_dis:1;
unsigned int dest_snoop_dis:1;
unsigned int compl_write:1;
unsigned int fence:1;
unsigned int null:1;
unsigned int src_brk:1;
unsigned int dest_brk:1;
unsigned int bundle:1;
unsigned int dest_dca:1;
unsigned int hint:1;
unsigned int rsvd2:13;
#define IOAT_OP_COPY 0x00
unsigned int op:8;
} ctl_f;
};
uint64_t src_addr;
uint64_t dst_addr;
uint64_t next;
uint64_t rsv1;
uint64_t rsv2;
/* store some driver data in an unused portion of the descriptor */
union {
uint64_t user1;
uint64_t tx_cnt;
};
uint64_t user2;
};
struct ioat_fill_descriptor {
uint32_t size;
union {
uint32_t ctl;
struct {
unsigned int int_en:1;
unsigned int rsvd:1;
unsigned int dest_snoop_dis:1;
unsigned int compl_write:1;
unsigned int fence:1;
unsigned int rsvd2:2;
unsigned int dest_brk:1;
unsigned int bundle:1;
unsigned int rsvd4:15;
#define IOAT_OP_FILL 0x01
unsigned int op:8;
} ctl_f;
};
uint64_t src_data;
uint64_t dst_addr;
uint64_t next;
uint64_t rsv1;
uint64_t next_dst_addr;
uint64_t user1;
uint64_t user2;
};
struct ioat_xor_descriptor {
uint32_t size;
union {
uint32_t ctl;
struct {
unsigned int int_en:1;
unsigned int src_snoop_dis:1;
unsigned int dest_snoop_dis:1;
unsigned int compl_write:1;
unsigned int fence:1;
unsigned int src_cnt:3;
unsigned int bundle:1;
unsigned int dest_dca:1;
unsigned int hint:1;
unsigned int rsvd:13;
#define IOAT_OP_XOR 0x87
#define IOAT_OP_XOR_VAL 0x88
unsigned int op:8;
} ctl_f;
};
uint64_t src_addr;
uint64_t dst_addr;
uint64_t next;
uint64_t src_addr2;
uint64_t src_addr3;
uint64_t src_addr4;
uint64_t src_addr5;
};
struct ioat_xor_ext_descriptor {
uint64_t src_addr6;
uint64_t src_addr7;
uint64_t src_addr8;
uint64_t next;
uint64_t rsvd[4];
};
struct ioat_pq_descriptor {
uint32_t size;
union {
uint32_t ctl;
struct {
unsigned int int_en:1;
unsigned int src_snoop_dis:1;
unsigned int dest_snoop_dis:1;
unsigned int compl_write:1;
unsigned int fence:1;
unsigned int src_cnt:3;
unsigned int bundle:1;
unsigned int dest_dca:1;
unsigned int hint:1;
unsigned int p_disable:1;
unsigned int q_disable:1;
unsigned int rsvd:11;
#define IOAT_OP_PQ 0x89
#define IOAT_OP_PQ_VAL 0x8a
unsigned int op:8;
} ctl_f;
};
uint64_t src_addr;
uint64_t p_addr;
uint64_t next;
uint64_t src_addr2;
uint64_t src_addr3;
uint8_t coef[8];
uint64_t q_addr;
};
struct ioat_pq_ext_descriptor {
uint64_t src_addr4;
uint64_t src_addr5;
uint64_t src_addr6;
uint64_t next;
uint64_t src_addr7;
uint64_t src_addr8;
uint64_t rsvd[2];
};
struct ioat_pq_update_descriptor {
uint32_t size;
union {
uint32_t ctl;
struct {
unsigned int int_en:1;
unsigned int src_snoop_dis:1;
unsigned int dest_snoop_dis:1;
unsigned int compl_write:1;
unsigned int fence:1;
unsigned int src_cnt:3;
unsigned int bundle:1;
unsigned int dest_dca:1;
unsigned int hint:1;
unsigned int p_disable:1;
unsigned int q_disable:1;
unsigned int rsvd:3;
unsigned int coef:8;
#define IOAT_OP_PQ_UP 0x8b
unsigned int op:8;
} ctl_f;
};
uint64_t src_addr;
uint64_t p_addr;
uint64_t next;
uint64_t src_addr2;
uint64_t p_src;
uint64_t q_src;
uint64_t q_addr;
};
struct ioat_raw_descriptor {
uint64_t field[8];
};
#endif

210
drivers/dma/ioat/pci.c Normal file
View File

@ -0,0 +1,210 @@
/*
* Intel I/OAT DMA Linux driver
* Copyright(c) 2007 - 2009 Intel Corporation.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License along with
* this program; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
*
* The full GNU General Public License is included in this distribution in
* the file called "COPYING".
*
*/
/*
* This driver supports an Intel I/OAT DMA engine, which does asynchronous
* copy operations.
*/
#include <linux/init.h>
#include <linux/module.h>
#include <linux/pci.h>
#include <linux/interrupt.h>
#include <linux/dca.h>
#include "dma.h"
#include "dma_v2.h"
#include "registers.h"
#include "hw.h"
MODULE_VERSION(IOAT_DMA_VERSION);
MODULE_LICENSE("Dual BSD/GPL");
MODULE_AUTHOR("Intel Corporation");
static struct pci_device_id ioat_pci_tbl[] = {
/* I/OAT v1 platforms */
{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT) },
{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_CNB) },
{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_SCNB) },
{ PCI_VDEVICE(UNISYS, PCI_DEVICE_ID_UNISYS_DMA_DIRECTOR) },
/* I/OAT v2 platforms */
{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB) },
/* I/OAT v3 platforms */
{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG0) },
{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG1) },
{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG2) },
{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG3) },
{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG4) },
{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG5) },
{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG6) },
{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG7) },
/* I/OAT v3.2 platforms */
{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF0) },
{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF1) },
{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF2) },
{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF3) },
{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF4) },
{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF5) },
{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF6) },
{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF7) },
{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF8) },
{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF9) },
{ 0, }
};
MODULE_DEVICE_TABLE(pci, ioat_pci_tbl);
static int __devinit ioat_pci_probe(struct pci_dev *pdev,
const struct pci_device_id *id);
static void __devexit ioat_remove(struct pci_dev *pdev);
static int ioat_dca_enabled = 1;
module_param(ioat_dca_enabled, int, 0644);
MODULE_PARM_DESC(ioat_dca_enabled, "control support of dca service (default: 1)");
struct kmem_cache *ioat2_cache;
#define DRV_NAME "ioatdma"
static struct pci_driver ioat_pci_driver = {
.name = DRV_NAME,
.id_table = ioat_pci_tbl,
.probe = ioat_pci_probe,
.remove = __devexit_p(ioat_remove),
};
static struct ioatdma_device *
alloc_ioatdma(struct pci_dev *pdev, void __iomem *iobase)
{
struct device *dev = &pdev->dev;
struct ioatdma_device *d = devm_kzalloc(dev, sizeof(*d), GFP_KERNEL);
if (!d)
return NULL;
d->pdev = pdev;
d->reg_base = iobase;
return d;
}
static int __devinit ioat_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
{
void __iomem * const *iomap;
struct device *dev = &pdev->dev;
struct ioatdma_device *device;
int err;
err = pcim_enable_device(pdev);
if (err)
return err;
err = pcim_iomap_regions(pdev, 1 << IOAT_MMIO_BAR, DRV_NAME);
if (err)
return err;
iomap = pcim_iomap_table(pdev);
if (!iomap)
return -ENOMEM;
err = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
if (err)
err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
if (err)
return err;
err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
if (err)
err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
if (err)
return err;
device = devm_kzalloc(dev, sizeof(*device), GFP_KERNEL);
if (!device)
return -ENOMEM;
pci_set_master(pdev);
device = alloc_ioatdma(pdev, iomap[IOAT_MMIO_BAR]);
if (!device)
return -ENOMEM;
pci_set_drvdata(pdev, device);
device->version = readb(device->reg_base + IOAT_VER_OFFSET);
if (device->version == IOAT_VER_1_2)
err = ioat1_dma_probe(device, ioat_dca_enabled);
else if (device->version == IOAT_VER_2_0)
err = ioat2_dma_probe(device, ioat_dca_enabled);
else if (device->version >= IOAT_VER_3_0)
err = ioat3_dma_probe(device, ioat_dca_enabled);
else
return -ENODEV;
if (err) {
dev_err(dev, "Intel(R) I/OAT DMA Engine init failed\n");
return -ENODEV;
}
return 0;
}
static void __devexit ioat_remove(struct pci_dev *pdev)
{
struct ioatdma_device *device = pci_get_drvdata(pdev);
if (!device)
return;
dev_err(&pdev->dev, "Removing dma and dca services\n");
if (device->dca) {
unregister_dca_provider(device->dca, &pdev->dev);
free_dca_provider(device->dca);
device->dca = NULL;
}
ioat_dma_remove(device);
}
static int __init ioat_init_module(void)
{
int err;
pr_info("%s: Intel(R) QuickData Technology Driver %s\n",
DRV_NAME, IOAT_DMA_VERSION);
ioat2_cache = kmem_cache_create("ioat2", sizeof(struct ioat_ring_ent),
0, SLAB_HWCACHE_ALIGN, NULL);
if (!ioat2_cache)
return -ENOMEM;
err = pci_register_driver(&ioat_pci_driver);
if (err)
kmem_cache_destroy(ioat2_cache);
return err;
}
module_init(ioat_init_module);
static void __exit ioat_exit_module(void)
{
pci_unregister_driver(&ioat_pci_driver);
kmem_cache_destroy(ioat2_cache);
}
module_exit(ioat_exit_module);

View File

@ -64,18 +64,37 @@
#define IOAT_DEVICE_STATUS_OFFSET 0x0E /* 16-bit */
#define IOAT_DEVICE_STATUS_DEGRADED_MODE 0x0001
#define IOAT_DEVICE_MMIO_RESTRICTED 0x0002
#define IOAT_DEVICE_MEMORY_BYPASS 0x0004
#define IOAT_DEVICE_ADDRESS_REMAPPING 0x0008
#define IOAT_DMA_CAP_OFFSET 0x10 /* 32-bit */
#define IOAT_CAP_PAGE_BREAK 0x00000001
#define IOAT_CAP_CRC 0x00000002
#define IOAT_CAP_SKIP_MARKER 0x00000004
#define IOAT_CAP_DCA 0x00000010
#define IOAT_CAP_CRC_MOVE 0x00000020
#define IOAT_CAP_FILL_BLOCK 0x00000040
#define IOAT_CAP_APIC 0x00000080
#define IOAT_CAP_XOR 0x00000100
#define IOAT_CAP_PQ 0x00000200
#define IOAT_CHANNEL_MMIO_SIZE 0x80 /* Each Channel MMIO space is this size */
/* DMA Channel Registers */
#define IOAT_CHANCTRL_OFFSET 0x00 /* 16-bit Channel Control Register */
#define IOAT_CHANCTRL_CHANNEL_PRIORITY_MASK 0xF000
#define IOAT3_CHANCTRL_COMPL_DCA_EN 0x0200
#define IOAT_CHANCTRL_CHANNEL_IN_USE 0x0100
#define IOAT_CHANCTRL_DESCRIPTOR_ADDR_SNOOP_CONTROL 0x0020
#define IOAT_CHANCTRL_ERR_INT_EN 0x0010
#define IOAT_CHANCTRL_ANY_ERR_ABORT_EN 0x0008
#define IOAT_CHANCTRL_ERR_COMPLETION_EN 0x0004
#define IOAT_CHANCTRL_INT_DISABLE 0x0001
#define IOAT_CHANCTRL_INT_REARM 0x0001
#define IOAT_CHANCTRL_RUN (IOAT_CHANCTRL_INT_REARM |\
IOAT_CHANCTRL_ERR_COMPLETION_EN |\
IOAT_CHANCTRL_ANY_ERR_ABORT_EN |\
IOAT_CHANCTRL_ERR_INT_EN)
#define IOAT_DMA_COMP_OFFSET 0x02 /* 16-bit DMA channel compatibility */
#define IOAT_DMA_COMP_V1 0x0001 /* Compatibility with DMA version 1 */
@ -94,14 +113,14 @@
#define IOAT2_CHANSTS_OFFSET_HIGH 0x0C
#define IOAT_CHANSTS_OFFSET_HIGH(ver) ((ver) < IOAT_VER_2_0 \
? IOAT1_CHANSTS_OFFSET_HIGH : IOAT2_CHANSTS_OFFSET_HIGH)
#define IOAT_CHANSTS_COMPLETED_DESCRIPTOR_ADDR ~0x3F
#define IOAT_CHANSTS_SOFT_ERR 0x0000000000000010
#define IOAT_CHANSTS_UNAFFILIATED_ERR 0x0000000000000008
#define IOAT_CHANSTS_DMA_TRANSFER_STATUS 0x0000000000000007
#define IOAT_CHANSTS_DMA_TRANSFER_STATUS_ACTIVE 0x0
#define IOAT_CHANSTS_DMA_TRANSFER_STATUS_DONE 0x1
#define IOAT_CHANSTS_DMA_TRANSFER_STATUS_SUSPENDED 0x2
#define IOAT_CHANSTS_DMA_TRANSFER_STATUS_HALTED 0x3
#define IOAT_CHANSTS_COMPLETED_DESCRIPTOR_ADDR (~0x3fULL)
#define IOAT_CHANSTS_SOFT_ERR 0x10ULL
#define IOAT_CHANSTS_UNAFFILIATED_ERR 0x8ULL
#define IOAT_CHANSTS_STATUS 0x7ULL
#define IOAT_CHANSTS_ACTIVE 0x0
#define IOAT_CHANSTS_DONE 0x1
#define IOAT_CHANSTS_SUSPENDED 0x2
#define IOAT_CHANSTS_HALTED 0x3
@ -204,22 +223,27 @@
#define IOAT_CDAR_OFFSET_HIGH 0x24
#define IOAT_CHANERR_OFFSET 0x28 /* 32-bit Channel Error Register */
#define IOAT_CHANERR_DMA_TRANSFER_SRC_ADDR_ERR 0x0001
#define IOAT_CHANERR_DMA_TRANSFER_DEST_ADDR_ERR 0x0002
#define IOAT_CHANERR_NEXT_DESCRIPTOR_ADDR_ERR 0x0004
#define IOAT_CHANERR_NEXT_DESCRIPTOR_ALIGNMENT_ERR 0x0008
#define IOAT_CHANERR_SRC_ADDR_ERR 0x0001
#define IOAT_CHANERR_DEST_ADDR_ERR 0x0002
#define IOAT_CHANERR_NEXT_ADDR_ERR 0x0004
#define IOAT_CHANERR_NEXT_DESC_ALIGN_ERR 0x0008
#define IOAT_CHANERR_CHAIN_ADDR_VALUE_ERR 0x0010
#define IOAT_CHANERR_CHANCMD_ERR 0x0020
#define IOAT_CHANERR_CHIPSET_UNCORRECTABLE_DATA_INTEGRITY_ERR 0x0040
#define IOAT_CHANERR_DMA_UNCORRECTABLE_DATA_INTEGRITY_ERR 0x0080
#define IOAT_CHANERR_READ_DATA_ERR 0x0100
#define IOAT_CHANERR_WRITE_DATA_ERR 0x0200
#define IOAT_CHANERR_DESCRIPTOR_CONTROL_ERR 0x0400
#define IOAT_CHANERR_DESCRIPTOR_LENGTH_ERR 0x0800
#define IOAT_CHANERR_CONTROL_ERR 0x0400
#define IOAT_CHANERR_LENGTH_ERR 0x0800
#define IOAT_CHANERR_COMPLETION_ADDR_ERR 0x1000
#define IOAT_CHANERR_INT_CONFIGURATION_ERR 0x2000
#define IOAT_CHANERR_SOFT_ERR 0x4000
#define IOAT_CHANERR_UNAFFILIATED_ERR 0x8000
#define IOAT_CHANERR_XOR_P_OR_CRC_ERR 0x10000
#define IOAT_CHANERR_XOR_Q_ERR 0x20000
#define IOAT_CHANERR_DESCRIPTOR_COUNT_ERR 0x40000
#define IOAT_CHANERR_HANDLE_MASK (IOAT_CHANERR_XOR_P_OR_CRC_ERR | IOAT_CHANERR_XOR_Q_ERR)
#define IOAT_CHANERR_MASK_OFFSET 0x2C /* 32-bit Channel Error Register */

File diff suppressed because it is too large Load Diff

View File

@ -1,165 +0,0 @@
/*
* Copyright(c) 2004 - 2009 Intel Corporation. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the Free
* Software Foundation; either version 2 of the License, or (at your option)
* any later version.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License along with
* this program; if not, write to the Free Software Foundation, Inc., 59
* Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*
* The full GNU General Public License is included in this distribution in the
* file called COPYING.
*/
#ifndef IOATDMA_H
#define IOATDMA_H
#include <linux/dmaengine.h>
#include "ioatdma_hw.h"
#include <linux/init.h>
#include <linux/dmapool.h>
#include <linux/cache.h>
#include <linux/pci_ids.h>
#include <net/tcp.h>
#define IOAT_DMA_VERSION "3.64"
enum ioat_interrupt {
none = 0,
msix_multi_vector = 1,
msix_single_vector = 2,
msi = 3,
intx = 4,
};
#define IOAT_LOW_COMPLETION_MASK 0xffffffc0
#define IOAT_DMA_DCA_ANY_CPU ~0
#define IOAT_WATCHDOG_PERIOD (2 * HZ)
/**
* struct ioatdma_device - internal representation of a IOAT device
* @pdev: PCI-Express device
* @reg_base: MMIO register space base address
* @dma_pool: for allocating DMA descriptors
* @common: embedded struct dma_device
* @version: version of ioatdma device
* @irq_mode: which style irq to use
* @msix_entries: irq handlers
* @idx: per channel data
*/
struct ioatdma_device {
struct pci_dev *pdev;
void __iomem *reg_base;
struct pci_pool *dma_pool;
struct pci_pool *completion_pool;
struct dma_device common;
u8 version;
enum ioat_interrupt irq_mode;
struct delayed_work work;
struct msix_entry msix_entries[4];
struct ioat_dma_chan *idx[4];
};
/**
* struct ioat_dma_chan - internal representation of a DMA channel
*/
struct ioat_dma_chan {
void __iomem *reg_base;
dma_cookie_t completed_cookie;
unsigned long last_completion;
unsigned long last_completion_time;
size_t xfercap; /* XFERCAP register value expanded out */
spinlock_t cleanup_lock;
spinlock_t desc_lock;
struct list_head free_desc;
struct list_head used_desc;
unsigned long watchdog_completion;
int watchdog_tcp_cookie;
u32 watchdog_last_tcp_cookie;
struct delayed_work work;
int pending;
int dmacount;
int desccount;
struct ioatdma_device *device;
struct dma_chan common;
dma_addr_t completion_addr;
union {
u64 full; /* HW completion writeback */
struct {
u32 low;
u32 high;
};
} *completion_virt;
unsigned long last_compl_desc_addr_hw;
struct tasklet_struct cleanup_task;
};
/* wrapper around hardware descriptor format + additional software fields */
/**
* struct ioat_desc_sw - wrapper around hardware descriptor
* @hw: hardware DMA descriptor
* @node: this descriptor will either be on the free list,
* or attached to a transaction list (async_tx.tx_list)
* @tx_cnt: number of descriptors required to complete the transaction
* @async_tx: the generic software descriptor for all engines
*/
struct ioat_desc_sw {
struct ioat_dma_descriptor *hw;
struct list_head node;
int tx_cnt;
size_t len;
dma_addr_t src;
dma_addr_t dst;
struct dma_async_tx_descriptor async_tx;
};
static inline void ioat_set_tcp_copy_break(struct ioatdma_device *dev)
{
#ifdef CONFIG_NET_DMA
switch (dev->version) {
case IOAT_VER_1_2:
sysctl_tcp_dma_copybreak = 4096;
break;
case IOAT_VER_2_0:
sysctl_tcp_dma_copybreak = 2048;
break;
case IOAT_VER_3_0:
sysctl_tcp_dma_copybreak = 262144;
break;
}
#endif
}
#if defined(CONFIG_INTEL_IOATDMA) || defined(CONFIG_INTEL_IOATDMA_MODULE)
struct ioatdma_device *ioat_dma_probe(struct pci_dev *pdev,
void __iomem *iobase);
void ioat_dma_remove(struct ioatdma_device *device);
struct dca_provider *ioat_dca_init(struct pci_dev *pdev, void __iomem *iobase);
struct dca_provider *ioat2_dca_init(struct pci_dev *pdev, void __iomem *iobase);
struct dca_provider *ioat3_dca_init(struct pci_dev *pdev, void __iomem *iobase);
#else
#define ioat_dma_probe(pdev, iobase) NULL
#define ioat_dma_remove(device) do { } while (0)
#define ioat_dca_init(pdev, iobase) NULL
#define ioat2_dca_init(pdev, iobase) NULL
#define ioat3_dca_init(pdev, iobase) NULL
#endif
#endif /* IOATDMA_H */

View File

@ -1,70 +0,0 @@
/*
* Copyright(c) 2004 - 2009 Intel Corporation. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the Free
* Software Foundation; either version 2 of the License, or (at your option)
* any later version.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License along with
* this program; if not, write to the Free Software Foundation, Inc., 59
* Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*
* The full GNU General Public License is included in this distribution in the
* file called COPYING.
*/
#ifndef _IOAT_HW_H_
#define _IOAT_HW_H_
/* PCI Configuration Space Values */
#define IOAT_PCI_VID 0x8086
/* CB device ID's */
#define IOAT_PCI_DID_5000 0x1A38
#define IOAT_PCI_DID_CNB 0x360B
#define IOAT_PCI_DID_SCNB 0x65FF
#define IOAT_PCI_DID_SNB 0x402F
#define IOAT_PCI_RID 0x00
#define IOAT_PCI_SVID 0x8086
#define IOAT_PCI_SID 0x8086
#define IOAT_VER_1_2 0x12 /* Version 1.2 */
#define IOAT_VER_2_0 0x20 /* Version 2.0 */
#define IOAT_VER_3_0 0x30 /* Version 3.0 */
struct ioat_dma_descriptor {
uint32_t size;
uint32_t ctl;
uint64_t src_addr;
uint64_t dst_addr;
uint64_t next;
uint64_t rsv1;
uint64_t rsv2;
uint64_t user1;
uint64_t user2;
};
#define IOAT_DMA_DESCRIPTOR_CTL_INT_GN 0x00000001
#define IOAT_DMA_DESCRIPTOR_CTL_SRC_SN 0x00000002
#define IOAT_DMA_DESCRIPTOR_CTL_DST_SN 0x00000004
#define IOAT_DMA_DESCRIPTOR_CTL_CP_STS 0x00000008
#define IOAT_DMA_DESCRIPTOR_CTL_FRAME 0x00000010
#define IOAT_DMA_DESCRIPTOR_NUL 0x00000020
#define IOAT_DMA_DESCRIPTOR_CTL_SP_BRK 0x00000040
#define IOAT_DMA_DESCRIPTOR_CTL_DP_BRK 0x00000080
#define IOAT_DMA_DESCRIPTOR_CTL_BNDL 0x00000100
#define IOAT_DMA_DESCRIPTOR_CTL_DCA 0x00000200
#define IOAT_DMA_DESCRIPTOR_CTL_BUFHINT 0x00000400
#define IOAT_DMA_DESCRIPTOR_CTL_OPCODE_CONTEXT 0xFF000000
#define IOAT_DMA_DESCRIPTOR_CTL_OPCODE_DMA 0x00000000
#define IOAT_DMA_DESCRIPTOR_CTL_CONTEXT_DCA 0x00000001
#define IOAT_DMA_DESCRIPTOR_CTL_OPCODE_MASK 0xFF000000
#endif

View File

@ -31,6 +31,7 @@
#include <linux/platform_device.h>
#include <linux/memory.h>
#include <linux/ioport.h>
#include <linux/raid/pq.h>
#include <mach/adma.h>
@ -57,31 +58,14 @@ static void iop_adma_free_slots(struct iop_adma_desc_slot *slot)
}
}
static dma_cookie_t
iop_adma_run_tx_complete_actions(struct iop_adma_desc_slot *desc,
struct iop_adma_chan *iop_chan, dma_cookie_t cookie)
static void
iop_desc_unmap(struct iop_adma_chan *iop_chan, struct iop_adma_desc_slot *desc)
{
BUG_ON(desc->async_tx.cookie < 0);
if (desc->async_tx.cookie > 0) {
cookie = desc->async_tx.cookie;
desc->async_tx.cookie = 0;
/* call the callback (must not sleep or submit new
* operations to this channel)
*/
if (desc->async_tx.callback)
desc->async_tx.callback(
desc->async_tx.callback_param);
/* unmap dma addresses
* (unmap_single vs unmap_page?)
*/
if (desc->group_head && desc->unmap_len) {
struct dma_async_tx_descriptor *tx = &desc->async_tx;
struct iop_adma_desc_slot *unmap = desc->group_head;
struct device *dev =
&iop_chan->device->pdev->dev;
struct device *dev = &iop_chan->device->pdev->dev;
u32 len = unmap->unmap_len;
enum dma_ctrl_flags flags = desc->async_tx.flags;
enum dma_ctrl_flags flags = tx->flags;
u32 src_cnt;
dma_addr_t addr;
dma_addr_t dest;
@ -101,21 +85,83 @@ iop_adma_run_tx_complete_actions(struct iop_adma_desc_slot *desc,
if (!(flags & DMA_COMPL_SKIP_SRC_UNMAP)) {
while (src_cnt--) {
addr = iop_desc_get_src_addr(unmap,
iop_chan,
src_cnt);
addr = iop_desc_get_src_addr(unmap, iop_chan, src_cnt);
if (addr == dest)
continue;
dma_unmap_page(dev, addr, len,
DMA_TO_DEVICE);
dma_unmap_page(dev, addr, len, DMA_TO_DEVICE);
}
}
desc->group_head = NULL;
}
static void
iop_desc_unmap_pq(struct iop_adma_chan *iop_chan, struct iop_adma_desc_slot *desc)
{
struct dma_async_tx_descriptor *tx = &desc->async_tx;
struct iop_adma_desc_slot *unmap = desc->group_head;
struct device *dev = &iop_chan->device->pdev->dev;
u32 len = unmap->unmap_len;
enum dma_ctrl_flags flags = tx->flags;
u32 src_cnt = unmap->unmap_src_cnt;
dma_addr_t pdest = iop_desc_get_dest_addr(unmap, iop_chan);
dma_addr_t qdest = iop_desc_get_qdest_addr(unmap, iop_chan);
int i;
if (tx->flags & DMA_PREP_CONTINUE)
src_cnt -= 3;
if (!(flags & DMA_COMPL_SKIP_DEST_UNMAP) && !desc->pq_check_result) {
dma_unmap_page(dev, pdest, len, DMA_BIDIRECTIONAL);
dma_unmap_page(dev, qdest, len, DMA_BIDIRECTIONAL);
}
if (!(flags & DMA_COMPL_SKIP_SRC_UNMAP)) {
dma_addr_t addr;
for (i = 0; i < src_cnt; i++) {
addr = iop_desc_get_src_addr(unmap, iop_chan, i);
dma_unmap_page(dev, addr, len, DMA_TO_DEVICE);
}
if (desc->pq_check_result) {
dma_unmap_page(dev, pdest, len, DMA_TO_DEVICE);
dma_unmap_page(dev, qdest, len, DMA_TO_DEVICE);
}
}
desc->group_head = NULL;
}
static dma_cookie_t
iop_adma_run_tx_complete_actions(struct iop_adma_desc_slot *desc,
struct iop_adma_chan *iop_chan, dma_cookie_t cookie)
{
struct dma_async_tx_descriptor *tx = &desc->async_tx;
BUG_ON(tx->cookie < 0);
if (tx->cookie > 0) {
cookie = tx->cookie;
tx->cookie = 0;
/* call the callback (must not sleep or submit new
* operations to this channel)
*/
if (tx->callback)
tx->callback(tx->callback_param);
/* unmap dma addresses
* (unmap_single vs unmap_page?)
*/
if (desc->group_head && desc->unmap_len) {
if (iop_desc_is_pq(desc))
iop_desc_unmap_pq(iop_chan, desc);
else
iop_desc_unmap(iop_chan, desc);
}
}
/* run dependent operations */
dma_run_dependencies(&desc->async_tx);
dma_run_dependencies(tx);
return cookie;
}
@ -287,7 +333,12 @@ static void iop_adma_tasklet(unsigned long data)
{
struct iop_adma_chan *iop_chan = (struct iop_adma_chan *) data;
spin_lock(&iop_chan->lock);
/* lockdep will flag depedency submissions as potentially
* recursive locking, this is not the case as a dependency
* submission will never recurse a channels submit routine.
* There are checks in async_tx.c to prevent this.
*/
spin_lock_nested(&iop_chan->lock, SINGLE_DEPTH_NESTING);
__iop_adma_slot_cleanup(iop_chan);
spin_unlock(&iop_chan->lock);
}
@ -370,7 +421,7 @@ retry:
}
alloc_tail->group_head = alloc_start;
alloc_tail->async_tx.cookie = -EBUSY;
list_splice(&chain, &alloc_tail->async_tx.tx_list);
list_splice(&chain, &alloc_tail->tx_list);
iop_chan->last_used = last_used;
iop_desc_clear_next_desc(alloc_start);
iop_desc_clear_next_desc(alloc_tail);
@ -429,7 +480,7 @@ iop_adma_tx_submit(struct dma_async_tx_descriptor *tx)
old_chain_tail = list_entry(iop_chan->chain.prev,
struct iop_adma_desc_slot, chain_node);
list_splice_init(&sw_desc->async_tx.tx_list,
list_splice_init(&sw_desc->tx_list,
&old_chain_tail->chain_node);
/* fix up the hardware chain */
@ -496,6 +547,7 @@ static int iop_adma_alloc_chan_resources(struct dma_chan *chan)
dma_async_tx_descriptor_init(&slot->async_tx, chan);
slot->async_tx.tx_submit = iop_adma_tx_submit;
INIT_LIST_HEAD(&slot->tx_list);
INIT_LIST_HEAD(&slot->chain_node);
INIT_LIST_HEAD(&slot->slot_node);
hw_desc = (char *) iop_chan->device->dma_desc_pool;
@ -660,7 +712,7 @@ iop_adma_prep_dma_xor(struct dma_chan *chan, dma_addr_t dma_dest,
}
static struct dma_async_tx_descriptor *
iop_adma_prep_dma_zero_sum(struct dma_chan *chan, dma_addr_t *dma_src,
iop_adma_prep_dma_xor_val(struct dma_chan *chan, dma_addr_t *dma_src,
unsigned int src_cnt, size_t len, u32 *result,
unsigned long flags)
{
@ -696,6 +748,118 @@ iop_adma_prep_dma_zero_sum(struct dma_chan *chan, dma_addr_t *dma_src,
return sw_desc ? &sw_desc->async_tx : NULL;
}
static struct dma_async_tx_descriptor *
iop_adma_prep_dma_pq(struct dma_chan *chan, dma_addr_t *dst, dma_addr_t *src,
unsigned int src_cnt, const unsigned char *scf, size_t len,
unsigned long flags)
{
struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan);
struct iop_adma_desc_slot *sw_desc, *g;
int slot_cnt, slots_per_op;
int continue_srcs;
if (unlikely(!len))
return NULL;
BUG_ON(len > IOP_ADMA_XOR_MAX_BYTE_COUNT);
dev_dbg(iop_chan->device->common.dev,
"%s src_cnt: %d len: %u flags: %lx\n",
__func__, src_cnt, len, flags);
if (dmaf_p_disabled_continue(flags))
continue_srcs = 1+src_cnt;
else if (dmaf_continue(flags))
continue_srcs = 3+src_cnt;
else
continue_srcs = 0+src_cnt;
spin_lock_bh(&iop_chan->lock);
slot_cnt = iop_chan_pq_slot_count(len, continue_srcs, &slots_per_op);
sw_desc = iop_adma_alloc_slots(iop_chan, slot_cnt, slots_per_op);
if (sw_desc) {
int i;
g = sw_desc->group_head;
iop_desc_set_byte_count(g, iop_chan, len);
/* even if P is disabled its destination address (bits
* [3:0]) must match Q. It is ok if P points to an
* invalid address, it won't be written.
*/
if (flags & DMA_PREP_PQ_DISABLE_P)
dst[0] = dst[1] & 0x7;
iop_desc_set_pq_addr(g, dst);
sw_desc->unmap_src_cnt = src_cnt;
sw_desc->unmap_len = len;
sw_desc->async_tx.flags = flags;
for (i = 0; i < src_cnt; i++)
iop_desc_set_pq_src_addr(g, i, src[i], scf[i]);
/* if we are continuing a previous operation factor in
* the old p and q values, see the comment for dma_maxpq
* in include/linux/dmaengine.h
*/
if (dmaf_p_disabled_continue(flags))
iop_desc_set_pq_src_addr(g, i++, dst[1], 1);
else if (dmaf_continue(flags)) {
iop_desc_set_pq_src_addr(g, i++, dst[0], 0);
iop_desc_set_pq_src_addr(g, i++, dst[1], 1);
iop_desc_set_pq_src_addr(g, i++, dst[1], 0);
}
iop_desc_init_pq(g, i, flags);
}
spin_unlock_bh(&iop_chan->lock);
return sw_desc ? &sw_desc->async_tx : NULL;
}
static struct dma_async_tx_descriptor *
iop_adma_prep_dma_pq_val(struct dma_chan *chan, dma_addr_t *pq, dma_addr_t *src,
unsigned int src_cnt, const unsigned char *scf,
size_t len, enum sum_check_flags *pqres,
unsigned long flags)
{
struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan);
struct iop_adma_desc_slot *sw_desc, *g;
int slot_cnt, slots_per_op;
if (unlikely(!len))
return NULL;
BUG_ON(len > IOP_ADMA_XOR_MAX_BYTE_COUNT);
dev_dbg(iop_chan->device->common.dev, "%s src_cnt: %d len: %u\n",
__func__, src_cnt, len);
spin_lock_bh(&iop_chan->lock);
slot_cnt = iop_chan_pq_zero_sum_slot_count(len, src_cnt + 2, &slots_per_op);
sw_desc = iop_adma_alloc_slots(iop_chan, slot_cnt, slots_per_op);
if (sw_desc) {
/* for validate operations p and q are tagged onto the
* end of the source list
*/
int pq_idx = src_cnt;
g = sw_desc->group_head;
iop_desc_init_pq_zero_sum(g, src_cnt+2, flags);
iop_desc_set_pq_zero_sum_byte_count(g, len);
g->pq_check_result = pqres;
pr_debug("\t%s: g->pq_check_result: %p\n",
__func__, g->pq_check_result);
sw_desc->unmap_src_cnt = src_cnt+2;
sw_desc->unmap_len = len;
sw_desc->async_tx.flags = flags;
while (src_cnt--)
iop_desc_set_pq_zero_sum_src_addr(g, src_cnt,
src[src_cnt],
scf[src_cnt]);
iop_desc_set_pq_zero_sum_addr(g, pq_idx, src);
}
spin_unlock_bh(&iop_chan->lock);
return sw_desc ? &sw_desc->async_tx : NULL;
}
static void iop_adma_free_chan_resources(struct dma_chan *chan)
{
struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan);
@ -906,7 +1070,7 @@ out:
#define IOP_ADMA_NUM_SRC_TEST 4 /* must be <= 15 */
static int __devinit
iop_adma_xor_zero_sum_self_test(struct iop_adma_device *device)
iop_adma_xor_val_self_test(struct iop_adma_device *device)
{
int i, src_idx;
struct page *dest;
@ -1002,7 +1166,7 @@ iop_adma_xor_zero_sum_self_test(struct iop_adma_device *device)
PAGE_SIZE, DMA_TO_DEVICE);
/* skip zero sum if the capability is not present */
if (!dma_has_cap(DMA_ZERO_SUM, dma_chan->device->cap_mask))
if (!dma_has_cap(DMA_XOR_VAL, dma_chan->device->cap_mask))
goto free_resources;
/* zero sum the sources with the destintation page */
@ -1016,7 +1180,7 @@ iop_adma_xor_zero_sum_self_test(struct iop_adma_device *device)
dma_srcs[i] = dma_map_page(dma_chan->device->dev,
zero_sum_srcs[i], 0, PAGE_SIZE,
DMA_TO_DEVICE);
tx = iop_adma_prep_dma_zero_sum(dma_chan, dma_srcs,
tx = iop_adma_prep_dma_xor_val(dma_chan, dma_srcs,
IOP_ADMA_NUM_SRC_TEST + 1, PAGE_SIZE,
&zero_sum_result,
DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
@ -1072,7 +1236,7 @@ iop_adma_xor_zero_sum_self_test(struct iop_adma_device *device)
dma_srcs[i] = dma_map_page(dma_chan->device->dev,
zero_sum_srcs[i], 0, PAGE_SIZE,
DMA_TO_DEVICE);
tx = iop_adma_prep_dma_zero_sum(dma_chan, dma_srcs,
tx = iop_adma_prep_dma_xor_val(dma_chan, dma_srcs,
IOP_ADMA_NUM_SRC_TEST + 1, PAGE_SIZE,
&zero_sum_result,
DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
@ -1105,6 +1269,170 @@ out:
return err;
}
#ifdef CONFIG_MD_RAID6_PQ
static int __devinit
iop_adma_pq_zero_sum_self_test(struct iop_adma_device *device)
{
/* combined sources, software pq results, and extra hw pq results */
struct page *pq[IOP_ADMA_NUM_SRC_TEST+2+2];
/* ptr to the extra hw pq buffers defined above */
struct page **pq_hw = &pq[IOP_ADMA_NUM_SRC_TEST+2];
/* address conversion buffers (dma_map / page_address) */
void *pq_sw[IOP_ADMA_NUM_SRC_TEST+2];
dma_addr_t pq_src[IOP_ADMA_NUM_SRC_TEST];
dma_addr_t pq_dest[2];
int i;
struct dma_async_tx_descriptor *tx;
struct dma_chan *dma_chan;
dma_cookie_t cookie;
u32 zero_sum_result;
int err = 0;
struct device *dev;
dev_dbg(device->common.dev, "%s\n", __func__);
for (i = 0; i < ARRAY_SIZE(pq); i++) {
pq[i] = alloc_page(GFP_KERNEL);
if (!pq[i]) {
while (i--)
__free_page(pq[i]);
return -ENOMEM;
}
}
/* Fill in src buffers */
for (i = 0; i < IOP_ADMA_NUM_SRC_TEST; i++) {
pq_sw[i] = page_address(pq[i]);
memset(pq_sw[i], 0x11111111 * (1<<i), PAGE_SIZE);
}
pq_sw[i] = page_address(pq[i]);
pq_sw[i+1] = page_address(pq[i+1]);
dma_chan = container_of(device->common.channels.next,
struct dma_chan,
device_node);
if (iop_adma_alloc_chan_resources(dma_chan) < 1) {
err = -ENODEV;
goto out;
}
dev = dma_chan->device->dev;
/* initialize the dests */
memset(page_address(pq_hw[0]), 0 , PAGE_SIZE);
memset(page_address(pq_hw[1]), 0 , PAGE_SIZE);
/* test pq */
pq_dest[0] = dma_map_page(dev, pq_hw[0], 0, PAGE_SIZE, DMA_FROM_DEVICE);
pq_dest[1] = dma_map_page(dev, pq_hw[1], 0, PAGE_SIZE, DMA_FROM_DEVICE);
for (i = 0; i < IOP_ADMA_NUM_SRC_TEST; i++)
pq_src[i] = dma_map_page(dev, pq[i], 0, PAGE_SIZE,
DMA_TO_DEVICE);
tx = iop_adma_prep_dma_pq(dma_chan, pq_dest, pq_src,
IOP_ADMA_NUM_SRC_TEST, (u8 *)raid6_gfexp,
PAGE_SIZE,
DMA_PREP_INTERRUPT |
DMA_CTRL_ACK);
cookie = iop_adma_tx_submit(tx);
iop_adma_issue_pending(dma_chan);
msleep(8);
if (iop_adma_is_complete(dma_chan, cookie, NULL, NULL) !=
DMA_SUCCESS) {
dev_err(dev, "Self-test pq timed out, disabling\n");
err = -ENODEV;
goto free_resources;
}
raid6_call.gen_syndrome(IOP_ADMA_NUM_SRC_TEST+2, PAGE_SIZE, pq_sw);
if (memcmp(pq_sw[IOP_ADMA_NUM_SRC_TEST],
page_address(pq_hw[0]), PAGE_SIZE) != 0) {
dev_err(dev, "Self-test p failed compare, disabling\n");
err = -ENODEV;
goto free_resources;
}
if (memcmp(pq_sw[IOP_ADMA_NUM_SRC_TEST+1],
page_address(pq_hw[1]), PAGE_SIZE) != 0) {
dev_err(dev, "Self-test q failed compare, disabling\n");
err = -ENODEV;
goto free_resources;
}
/* test correct zero sum using the software generated pq values */
for (i = 0; i < IOP_ADMA_NUM_SRC_TEST + 2; i++)
pq_src[i] = dma_map_page(dev, pq[i], 0, PAGE_SIZE,
DMA_TO_DEVICE);
zero_sum_result = ~0;
tx = iop_adma_prep_dma_pq_val(dma_chan, &pq_src[IOP_ADMA_NUM_SRC_TEST],
pq_src, IOP_ADMA_NUM_SRC_TEST,
raid6_gfexp, PAGE_SIZE, &zero_sum_result,
DMA_PREP_INTERRUPT|DMA_CTRL_ACK);
cookie = iop_adma_tx_submit(tx);
iop_adma_issue_pending(dma_chan);
msleep(8);
if (iop_adma_is_complete(dma_chan, cookie, NULL, NULL) !=
DMA_SUCCESS) {
dev_err(dev, "Self-test pq-zero-sum timed out, disabling\n");
err = -ENODEV;
goto free_resources;
}
if (zero_sum_result != 0) {
dev_err(dev, "Self-test pq-zero-sum failed to validate: %x\n",
zero_sum_result);
err = -ENODEV;
goto free_resources;
}
/* test incorrect zero sum */
i = IOP_ADMA_NUM_SRC_TEST;
memset(pq_sw[i] + 100, 0, 100);
memset(pq_sw[i+1] + 200, 0, 200);
for (i = 0; i < IOP_ADMA_NUM_SRC_TEST + 2; i++)
pq_src[i] = dma_map_page(dev, pq[i], 0, PAGE_SIZE,
DMA_TO_DEVICE);
zero_sum_result = 0;
tx = iop_adma_prep_dma_pq_val(dma_chan, &pq_src[IOP_ADMA_NUM_SRC_TEST],
pq_src, IOP_ADMA_NUM_SRC_TEST,
raid6_gfexp, PAGE_SIZE, &zero_sum_result,
DMA_PREP_INTERRUPT|DMA_CTRL_ACK);
cookie = iop_adma_tx_submit(tx);
iop_adma_issue_pending(dma_chan);
msleep(8);
if (iop_adma_is_complete(dma_chan, cookie, NULL, NULL) !=
DMA_SUCCESS) {
dev_err(dev, "Self-test !pq-zero-sum timed out, disabling\n");
err = -ENODEV;
goto free_resources;
}
if (zero_sum_result != (SUM_CHECK_P_RESULT | SUM_CHECK_Q_RESULT)) {
dev_err(dev, "Self-test !pq-zero-sum failed to validate: %x\n",
zero_sum_result);
err = -ENODEV;
goto free_resources;
}
free_resources:
iop_adma_free_chan_resources(dma_chan);
out:
i = ARRAY_SIZE(pq);
while (i--)
__free_page(pq[i]);
return err;
}
#endif
static int __devexit iop_adma_remove(struct platform_device *dev)
{
struct iop_adma_device *device = platform_get_drvdata(dev);
@ -1192,9 +1520,16 @@ static int __devinit iop_adma_probe(struct platform_device *pdev)
dma_dev->max_xor = iop_adma_get_max_xor();
dma_dev->device_prep_dma_xor = iop_adma_prep_dma_xor;
}
if (dma_has_cap(DMA_ZERO_SUM, dma_dev->cap_mask))
dma_dev->device_prep_dma_zero_sum =
iop_adma_prep_dma_zero_sum;
if (dma_has_cap(DMA_XOR_VAL, dma_dev->cap_mask))
dma_dev->device_prep_dma_xor_val =
iop_adma_prep_dma_xor_val;
if (dma_has_cap(DMA_PQ, dma_dev->cap_mask)) {
dma_set_maxpq(dma_dev, iop_adma_get_max_pq(), 0);
dma_dev->device_prep_dma_pq = iop_adma_prep_dma_pq;
}
if (dma_has_cap(DMA_PQ_VAL, dma_dev->cap_mask))
dma_dev->device_prep_dma_pq_val =
iop_adma_prep_dma_pq_val;
if (dma_has_cap(DMA_INTERRUPT, dma_dev->cap_mask))
dma_dev->device_prep_dma_interrupt =
iop_adma_prep_dma_interrupt;
@ -1249,22 +1584,34 @@ static int __devinit iop_adma_probe(struct platform_device *pdev)
if (dma_has_cap(DMA_XOR, dma_dev->cap_mask) ||
dma_has_cap(DMA_MEMSET, dma_dev->cap_mask)) {
ret = iop_adma_xor_zero_sum_self_test(adev);
ret = iop_adma_xor_val_self_test(adev);
dev_dbg(&pdev->dev, "xor self test returned %d\n", ret);
if (ret)
goto err_free_iop_chan;
}
if (dma_has_cap(DMA_PQ, dma_dev->cap_mask) &&
dma_has_cap(DMA_PQ_VAL, dma_dev->cap_mask)) {
#ifdef CONFIG_MD_RAID6_PQ
ret = iop_adma_pq_zero_sum_self_test(adev);
dev_dbg(&pdev->dev, "pq self test returned %d\n", ret);
#else
/* can not test raid6, so do not publish capability */
dma_cap_clear(DMA_PQ, dma_dev->cap_mask);
dma_cap_clear(DMA_PQ_VAL, dma_dev->cap_mask);
ret = 0;
#endif
if (ret)
goto err_free_iop_chan;
}
dev_printk(KERN_INFO, &pdev->dev, "Intel(R) IOP: "
"( %s%s%s%s%s%s%s%s%s%s)\n",
dma_has_cap(DMA_PQ_XOR, dma_dev->cap_mask) ? "pq_xor " : "",
dma_has_cap(DMA_PQ_UPDATE, dma_dev->cap_mask) ? "pq_update " : "",
dma_has_cap(DMA_PQ_ZERO_SUM, dma_dev->cap_mask) ? "pq_zero_sum " : "",
"( %s%s%s%s%s%s%s)\n",
dma_has_cap(DMA_PQ, dma_dev->cap_mask) ? "pq " : "",
dma_has_cap(DMA_PQ_VAL, dma_dev->cap_mask) ? "pq_val " : "",
dma_has_cap(DMA_XOR, dma_dev->cap_mask) ? "xor " : "",
dma_has_cap(DMA_DUAL_XOR, dma_dev->cap_mask) ? "dual_xor " : "",
dma_has_cap(DMA_ZERO_SUM, dma_dev->cap_mask) ? "xor_zero_sum " : "",
dma_has_cap(DMA_XOR_VAL, dma_dev->cap_mask) ? "xor_val " : "",
dma_has_cap(DMA_MEMSET, dma_dev->cap_mask) ? "fill " : "",
dma_has_cap(DMA_MEMCPY_CRC32C, dma_dev->cap_mask) ? "cpy+crc " : "",
dma_has_cap(DMA_MEMCPY, dma_dev->cap_mask) ? "cpy " : "",
dma_has_cap(DMA_INTERRUPT, dma_dev->cap_mask) ? "intr " : "");
@ -1296,7 +1643,7 @@ static void iop_chan_start_null_memcpy(struct iop_adma_chan *iop_chan)
if (sw_desc) {
grp_start = sw_desc->group_head;
list_splice_init(&sw_desc->async_tx.tx_list, &iop_chan->chain);
list_splice_init(&sw_desc->tx_list, &iop_chan->chain);
async_tx_ack(&sw_desc->async_tx);
iop_desc_init_memcpy(grp_start, 0);
iop_desc_set_byte_count(grp_start, iop_chan, 0);
@ -1352,7 +1699,7 @@ static void iop_chan_start_null_xor(struct iop_adma_chan *iop_chan)
sw_desc = iop_adma_alloc_slots(iop_chan, slot_cnt, slots_per_op);
if (sw_desc) {
grp_start = sw_desc->group_head;
list_splice_init(&sw_desc->async_tx.tx_list, &iop_chan->chain);
list_splice_init(&sw_desc->tx_list, &iop_chan->chain);
async_tx_ack(&sw_desc->async_tx);
iop_desc_init_null_xor(grp_start, 2, 0);
iop_desc_set_byte_count(grp_start, iop_chan, 0);

View File

@ -183,6 +183,11 @@ dma_cookie_t dma_memcpy_to_iovec(struct dma_chan *chan, struct iovec *iov,
iov_byte_offset,
kdata,
copy);
/* poll for a descriptor slot */
if (unlikely(dma_cookie < 0)) {
dma_async_issue_pending(chan);
continue;
}
len -= copy;
iov[iovec_idx].iov_len -= copy;
@ -248,6 +253,11 @@ dma_cookie_t dma_memcpy_pg_to_iovec(struct dma_chan *chan, struct iovec *iov,
page,
offset,
copy);
/* poll for a descriptor slot */
if (unlikely(dma_cookie < 0)) {
dma_async_issue_pending(chan);
continue;
}
len -= copy;
iov[iovec_idx].iov_len -= copy;

View File

@ -517,7 +517,7 @@ retry:
}
alloc_tail->group_head = alloc_start;
alloc_tail->async_tx.cookie = -EBUSY;
list_splice(&chain, &alloc_tail->async_tx.tx_list);
list_splice(&chain, &alloc_tail->tx_list);
mv_chan->last_used = last_used;
mv_desc_clear_next_desc(alloc_start);
mv_desc_clear_next_desc(alloc_tail);
@ -565,14 +565,14 @@ mv_xor_tx_submit(struct dma_async_tx_descriptor *tx)
cookie = mv_desc_assign_cookie(mv_chan, sw_desc);
if (list_empty(&mv_chan->chain))
list_splice_init(&sw_desc->async_tx.tx_list, &mv_chan->chain);
list_splice_init(&sw_desc->tx_list, &mv_chan->chain);
else {
new_hw_chain = 0;
old_chain_tail = list_entry(mv_chan->chain.prev,
struct mv_xor_desc_slot,
chain_node);
list_splice_init(&grp_start->async_tx.tx_list,
list_splice_init(&grp_start->tx_list,
&old_chain_tail->chain_node);
if (!mv_can_chain(grp_start))
@ -632,6 +632,7 @@ static int mv_xor_alloc_chan_resources(struct dma_chan *chan)
slot->async_tx.tx_submit = mv_xor_tx_submit;
INIT_LIST_HEAD(&slot->chain_node);
INIT_LIST_HEAD(&slot->slot_node);
INIT_LIST_HEAD(&slot->tx_list);
hw_desc = (char *) mv_chan->device->dma_desc_pool;
slot->async_tx.phys =
(dma_addr_t) &hw_desc[idx * MV_XOR_SLOT_SIZE];

View File

@ -126,9 +126,8 @@ struct mv_xor_chan {
* @idx: pool index
* @unmap_src_cnt: number of xor sources
* @unmap_len: transaction bytecount
* @tx_list: list of slots that make up a multi-descriptor transaction
* @async_tx: support for the async_tx api
* @group_list: list of slots that make up a multi-descriptor transaction
* for example transfer lengths larger than the supported hw max
* @xor_check_result: result of zero sum
* @crc32_result: result crc calculation
*/
@ -145,6 +144,7 @@ struct mv_xor_desc_slot {
u16 unmap_src_cnt;
u32 value;
size_t unmap_len;
struct list_head tx_list;
struct dma_async_tx_descriptor async_tx;
union {
u32 *xor_check_result;

786
drivers/dma/shdma.c Normal file
View File

@ -0,0 +1,786 @@
/*
* Renesas SuperH DMA Engine support
*
* base is drivers/dma/flsdma.c
*
* Copyright (C) 2009 Nobuhiro Iwamatsu <iwamatsu.nobuhiro@renesas.com>
* Copyright (C) 2009 Renesas Solutions, Inc. All rights reserved.
* Copyright (C) 2007 Freescale Semiconductor, Inc. All rights reserved.
*
* This is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* - DMA of SuperH does not have Hardware DMA chain mode.
* - MAX DMA size is 16MB.
*
*/
#include <linux/init.h>
#include <linux/module.h>
#include <linux/interrupt.h>
#include <linux/dmaengine.h>
#include <linux/delay.h>
#include <linux/dma-mapping.h>
#include <linux/dmapool.h>
#include <linux/platform_device.h>
#include <cpu/dma.h>
#include <asm/dma-sh.h>
#include "shdma.h"
/* DMA descriptor control */
#define DESC_LAST (-1)
#define DESC_COMP (1)
#define DESC_NCOMP (0)
#define NR_DESCS_PER_CHANNEL 32
/*
* Define the default configuration for dual address memory-memory transfer.
* The 0x400 value represents auto-request, external->external.
*
* And this driver set 4byte burst mode.
* If you want to change mode, you need to change RS_DEFAULT of value.
* (ex 1byte burst mode -> (RS_DUAL & ~TS_32)
*/
#define RS_DEFAULT (RS_DUAL)
#define SH_DMAC_CHAN_BASE(id) (dma_base_addr[id])
static void sh_dmae_writel(struct sh_dmae_chan *sh_dc, u32 data, u32 reg)
{
ctrl_outl(data, (SH_DMAC_CHAN_BASE(sh_dc->id) + reg));
}
static u32 sh_dmae_readl(struct sh_dmae_chan *sh_dc, u32 reg)
{
return ctrl_inl((SH_DMAC_CHAN_BASE(sh_dc->id) + reg));
}
static void dmae_init(struct sh_dmae_chan *sh_chan)
{
u32 chcr = RS_DEFAULT; /* default is DUAL mode */
sh_dmae_writel(sh_chan, chcr, CHCR);
}
/*
* Reset DMA controller
*
* SH7780 has two DMAOR register
*/
static void sh_dmae_ctl_stop(int id)
{
unsigned short dmaor = dmaor_read_reg(id);
dmaor &= ~(DMAOR_NMIF | DMAOR_AE);
dmaor_write_reg(id, dmaor);
}
static int sh_dmae_rst(int id)
{
unsigned short dmaor;
sh_dmae_ctl_stop(id);
dmaor = (dmaor_read_reg(id)|DMAOR_INIT);
dmaor_write_reg(id, dmaor);
if ((dmaor_read_reg(id) & (DMAOR_AE | DMAOR_NMIF))) {
pr_warning(KERN_ERR "dma-sh: Can't initialize DMAOR.\n");
return -EINVAL;
}
return 0;
}
static int dmae_is_idle(struct sh_dmae_chan *sh_chan)
{
u32 chcr = sh_dmae_readl(sh_chan, CHCR);
if (chcr & CHCR_DE) {
if (!(chcr & CHCR_TE))
return -EBUSY; /* working */
}
return 0; /* waiting */
}
static inline unsigned int calc_xmit_shift(struct sh_dmae_chan *sh_chan)
{
u32 chcr = sh_dmae_readl(sh_chan, CHCR);
return ts_shift[(chcr & CHCR_TS_MASK) >> CHCR_TS_SHIFT];
}
static void dmae_set_reg(struct sh_dmae_chan *sh_chan, struct sh_dmae_regs hw)
{
sh_dmae_writel(sh_chan, hw.sar, SAR);
sh_dmae_writel(sh_chan, hw.dar, DAR);
sh_dmae_writel(sh_chan,
(hw.tcr >> calc_xmit_shift(sh_chan)), TCR);
}
static void dmae_start(struct sh_dmae_chan *sh_chan)
{
u32 chcr = sh_dmae_readl(sh_chan, CHCR);
chcr |= (CHCR_DE|CHCR_IE);
sh_dmae_writel(sh_chan, chcr, CHCR);
}
static void dmae_halt(struct sh_dmae_chan *sh_chan)
{
u32 chcr = sh_dmae_readl(sh_chan, CHCR);
chcr &= ~(CHCR_DE | CHCR_TE | CHCR_IE);
sh_dmae_writel(sh_chan, chcr, CHCR);
}
static int dmae_set_chcr(struct sh_dmae_chan *sh_chan, u32 val)
{
int ret = dmae_is_idle(sh_chan);
/* When DMA was working, can not set data to CHCR */
if (ret)
return ret;
sh_dmae_writel(sh_chan, val, CHCR);
return 0;
}
#define DMARS1_ADDR 0x04
#define DMARS2_ADDR 0x08
#define DMARS_SHIFT 8
#define DMARS_CHAN_MSK 0x01
static int dmae_set_dmars(struct sh_dmae_chan *sh_chan, u16 val)
{
u32 addr;
int shift = 0;
int ret = dmae_is_idle(sh_chan);
if (ret)
return ret;
if (sh_chan->id & DMARS_CHAN_MSK)
shift = DMARS_SHIFT;
switch (sh_chan->id) {
/* DMARS0 */
case 0:
case 1:
addr = SH_DMARS_BASE;
break;
/* DMARS1 */
case 2:
case 3:
addr = (SH_DMARS_BASE + DMARS1_ADDR);
break;
/* DMARS2 */
case 4:
case 5:
addr = (SH_DMARS_BASE + DMARS2_ADDR);
break;
default:
return -EINVAL;
}
ctrl_outw((val << shift) |
(ctrl_inw(addr) & (shift ? 0xFF00 : 0x00FF)),
addr);
return 0;
}
static dma_cookie_t sh_dmae_tx_submit(struct dma_async_tx_descriptor *tx)
{
struct sh_desc *desc = tx_to_sh_desc(tx);
struct sh_dmae_chan *sh_chan = to_sh_chan(tx->chan);
dma_cookie_t cookie;
spin_lock_bh(&sh_chan->desc_lock);
cookie = sh_chan->common.cookie;
cookie++;
if (cookie < 0)
cookie = 1;
/* If desc only in the case of 1 */
if (desc->async_tx.cookie != -EBUSY)
desc->async_tx.cookie = cookie;
sh_chan->common.cookie = desc->async_tx.cookie;
list_splice_init(&desc->tx_list, sh_chan->ld_queue.prev);
spin_unlock_bh(&sh_chan->desc_lock);
return cookie;
}
static struct sh_desc *sh_dmae_get_desc(struct sh_dmae_chan *sh_chan)
{
struct sh_desc *desc, *_desc, *ret = NULL;
spin_lock_bh(&sh_chan->desc_lock);
list_for_each_entry_safe(desc, _desc, &sh_chan->ld_free, node) {
if (async_tx_test_ack(&desc->async_tx)) {
list_del(&desc->node);
ret = desc;
break;
}
}
spin_unlock_bh(&sh_chan->desc_lock);
return ret;
}
static void sh_dmae_put_desc(struct sh_dmae_chan *sh_chan, struct sh_desc *desc)
{
if (desc) {
spin_lock_bh(&sh_chan->desc_lock);
list_splice_init(&desc->tx_list, &sh_chan->ld_free);
list_add(&desc->node, &sh_chan->ld_free);
spin_unlock_bh(&sh_chan->desc_lock);
}
}
static int sh_dmae_alloc_chan_resources(struct dma_chan *chan)
{
struct sh_dmae_chan *sh_chan = to_sh_chan(chan);
struct sh_desc *desc;
spin_lock_bh(&sh_chan->desc_lock);
while (sh_chan->descs_allocated < NR_DESCS_PER_CHANNEL) {
spin_unlock_bh(&sh_chan->desc_lock);
desc = kzalloc(sizeof(struct sh_desc), GFP_KERNEL);
if (!desc) {
spin_lock_bh(&sh_chan->desc_lock);
break;
}
dma_async_tx_descriptor_init(&desc->async_tx,
&sh_chan->common);
desc->async_tx.tx_submit = sh_dmae_tx_submit;
desc->async_tx.flags = DMA_CTRL_ACK;
INIT_LIST_HEAD(&desc->tx_list);
sh_dmae_put_desc(sh_chan, desc);
spin_lock_bh(&sh_chan->desc_lock);
sh_chan->descs_allocated++;
}
spin_unlock_bh(&sh_chan->desc_lock);
return sh_chan->descs_allocated;
}
/*
* sh_dma_free_chan_resources - Free all resources of the channel.
*/
static void sh_dmae_free_chan_resources(struct dma_chan *chan)
{
struct sh_dmae_chan *sh_chan = to_sh_chan(chan);
struct sh_desc *desc, *_desc;
LIST_HEAD(list);
BUG_ON(!list_empty(&sh_chan->ld_queue));
spin_lock_bh(&sh_chan->desc_lock);
list_splice_init(&sh_chan->ld_free, &list);
sh_chan->descs_allocated = 0;
spin_unlock_bh(&sh_chan->desc_lock);
list_for_each_entry_safe(desc, _desc, &list, node)
kfree(desc);
}
static struct dma_async_tx_descriptor *sh_dmae_prep_memcpy(
struct dma_chan *chan, dma_addr_t dma_dest, dma_addr_t dma_src,
size_t len, unsigned long flags)
{
struct sh_dmae_chan *sh_chan;
struct sh_desc *first = NULL, *prev = NULL, *new;
size_t copy_size;
if (!chan)
return NULL;
if (!len)
return NULL;
sh_chan = to_sh_chan(chan);
do {
/* Allocate the link descriptor from DMA pool */
new = sh_dmae_get_desc(sh_chan);
if (!new) {
dev_err(sh_chan->dev,
"No free memory for link descriptor\n");
goto err_get_desc;
}
copy_size = min(len, (size_t)SH_DMA_TCR_MAX);
new->hw.sar = dma_src;
new->hw.dar = dma_dest;
new->hw.tcr = copy_size;
if (!first)
first = new;
new->mark = DESC_NCOMP;
async_tx_ack(&new->async_tx);
prev = new;
len -= copy_size;
dma_src += copy_size;
dma_dest += copy_size;
/* Insert the link descriptor to the LD ring */
list_add_tail(&new->node, &first->tx_list);
} while (len);
new->async_tx.flags = flags; /* client is in control of this ack */
new->async_tx.cookie = -EBUSY; /* Last desc */
return &first->async_tx;
err_get_desc:
sh_dmae_put_desc(sh_chan, first);
return NULL;
}
/*
* sh_chan_ld_cleanup - Clean up link descriptors
*
* This function clean up the ld_queue of DMA channel.
*/
static void sh_dmae_chan_ld_cleanup(struct sh_dmae_chan *sh_chan)
{
struct sh_desc *desc, *_desc;
spin_lock_bh(&sh_chan->desc_lock);
list_for_each_entry_safe(desc, _desc, &sh_chan->ld_queue, node) {
dma_async_tx_callback callback;
void *callback_param;
/* non send data */
if (desc->mark == DESC_NCOMP)
break;
/* send data sesc */
callback = desc->async_tx.callback;
callback_param = desc->async_tx.callback_param;
/* Remove from ld_queue list */
list_splice_init(&desc->tx_list, &sh_chan->ld_free);
dev_dbg(sh_chan->dev, "link descriptor %p will be recycle.\n",
desc);
list_move(&desc->node, &sh_chan->ld_free);
/* Run the link descriptor callback function */
if (callback) {
spin_unlock_bh(&sh_chan->desc_lock);
dev_dbg(sh_chan->dev, "link descriptor %p callback\n",
desc);
callback(callback_param);
spin_lock_bh(&sh_chan->desc_lock);
}
}
spin_unlock_bh(&sh_chan->desc_lock);
}
static void sh_chan_xfer_ld_queue(struct sh_dmae_chan *sh_chan)
{
struct list_head *ld_node;
struct sh_dmae_regs hw;
/* DMA work check */
if (dmae_is_idle(sh_chan))
return;
/* Find the first un-transfer desciptor */
for (ld_node = sh_chan->ld_queue.next;
(ld_node != &sh_chan->ld_queue)
&& (to_sh_desc(ld_node)->mark == DESC_COMP);
ld_node = ld_node->next)
cpu_relax();
if (ld_node != &sh_chan->ld_queue) {
/* Get the ld start address from ld_queue */
hw = to_sh_desc(ld_node)->hw;
dmae_set_reg(sh_chan, hw);
dmae_start(sh_chan);
}
}
static void sh_dmae_memcpy_issue_pending(struct dma_chan *chan)
{
struct sh_dmae_chan *sh_chan = to_sh_chan(chan);
sh_chan_xfer_ld_queue(sh_chan);
}
static enum dma_status sh_dmae_is_complete(struct dma_chan *chan,
dma_cookie_t cookie,
dma_cookie_t *done,
dma_cookie_t *used)
{
struct sh_dmae_chan *sh_chan = to_sh_chan(chan);
dma_cookie_t last_used;
dma_cookie_t last_complete;
sh_dmae_chan_ld_cleanup(sh_chan);
last_used = chan->cookie;
last_complete = sh_chan->completed_cookie;
if (last_complete == -EBUSY)
last_complete = last_used;
if (done)
*done = last_complete;
if (used)
*used = last_used;
return dma_async_is_complete(cookie, last_complete, last_used);
}
static irqreturn_t sh_dmae_interrupt(int irq, void *data)
{
irqreturn_t ret = IRQ_NONE;
struct sh_dmae_chan *sh_chan = (struct sh_dmae_chan *)data;
u32 chcr = sh_dmae_readl(sh_chan, CHCR);
if (chcr & CHCR_TE) {
/* DMA stop */
dmae_halt(sh_chan);
ret = IRQ_HANDLED;
tasklet_schedule(&sh_chan->tasklet);
}
return ret;
}
#if defined(CONFIG_CPU_SH4)
static irqreturn_t sh_dmae_err(int irq, void *data)
{
int err = 0;
struct sh_dmae_device *shdev = (struct sh_dmae_device *)data;
/* IRQ Multi */
if (shdev->pdata.mode & SHDMA_MIX_IRQ) {
int cnt = 0;
switch (irq) {
#if defined(DMTE6_IRQ) && defined(DMAE1_IRQ)
case DMTE6_IRQ:
cnt++;
#endif
case DMTE0_IRQ:
if (dmaor_read_reg(cnt) & (DMAOR_NMIF | DMAOR_AE)) {
disable_irq(irq);
return IRQ_HANDLED;
}
default:
return IRQ_NONE;
}
} else {
/* reset dma controller */
err = sh_dmae_rst(0);
if (err)
return err;
if (shdev->pdata.mode & SHDMA_DMAOR1) {
err = sh_dmae_rst(1);
if (err)
return err;
}
disable_irq(irq);
return IRQ_HANDLED;
}
}
#endif
static void dmae_do_tasklet(unsigned long data)
{
struct sh_dmae_chan *sh_chan = (struct sh_dmae_chan *)data;
struct sh_desc *desc, *_desc, *cur_desc = NULL;
u32 sar_buf = sh_dmae_readl(sh_chan, SAR);
list_for_each_entry_safe(desc, _desc,
&sh_chan->ld_queue, node) {
if ((desc->hw.sar + desc->hw.tcr) == sar_buf) {
cur_desc = desc;
break;
}
}
if (cur_desc) {
switch (cur_desc->async_tx.cookie) {
case 0: /* other desc data */
break;
case -EBUSY: /* last desc */
sh_chan->completed_cookie =
cur_desc->async_tx.cookie;
break;
default: /* first desc ( 0 < )*/
sh_chan->completed_cookie =
cur_desc->async_tx.cookie - 1;
break;
}
cur_desc->mark = DESC_COMP;
}
/* Next desc */
sh_chan_xfer_ld_queue(sh_chan);
sh_dmae_chan_ld_cleanup(sh_chan);
}
static unsigned int get_dmae_irq(unsigned int id)
{
unsigned int irq = 0;
if (id < ARRAY_SIZE(dmte_irq_map))
irq = dmte_irq_map[id];
return irq;
}
static int __devinit sh_dmae_chan_probe(struct sh_dmae_device *shdev, int id)
{
int err;
unsigned int irq = get_dmae_irq(id);
unsigned long irqflags = IRQF_DISABLED;
struct sh_dmae_chan *new_sh_chan;
/* alloc channel */
new_sh_chan = kzalloc(sizeof(struct sh_dmae_chan), GFP_KERNEL);
if (!new_sh_chan) {
dev_err(shdev->common.dev, "No free memory for allocating "
"dma channels!\n");
return -ENOMEM;
}
new_sh_chan->dev = shdev->common.dev;
new_sh_chan->id = id;
/* Init DMA tasklet */
tasklet_init(&new_sh_chan->tasklet, dmae_do_tasklet,
(unsigned long)new_sh_chan);
/* Init the channel */
dmae_init(new_sh_chan);
spin_lock_init(&new_sh_chan->desc_lock);
/* Init descripter manage list */
INIT_LIST_HEAD(&new_sh_chan->ld_queue);
INIT_LIST_HEAD(&new_sh_chan->ld_free);
/* copy struct dma_device */
new_sh_chan->common.device = &shdev->common;
/* Add the channel to DMA device channel list */
list_add_tail(&new_sh_chan->common.device_node,
&shdev->common.channels);
shdev->common.chancnt++;
if (shdev->pdata.mode & SHDMA_MIX_IRQ) {
irqflags = IRQF_SHARED;
#if defined(DMTE6_IRQ)
if (irq >= DMTE6_IRQ)
irq = DMTE6_IRQ;
else
#endif
irq = DMTE0_IRQ;
}
snprintf(new_sh_chan->dev_id, sizeof(new_sh_chan->dev_id),
"sh-dmae%d", new_sh_chan->id);
/* set up channel irq */
err = request_irq(irq, &sh_dmae_interrupt,
irqflags, new_sh_chan->dev_id, new_sh_chan);
if (err) {
dev_err(shdev->common.dev, "DMA channel %d request_irq error "
"with return %d\n", id, err);
goto err_no_irq;
}
/* CHCR register control function */
new_sh_chan->set_chcr = dmae_set_chcr;
/* DMARS register control function */
new_sh_chan->set_dmars = dmae_set_dmars;
shdev->chan[id] = new_sh_chan;
return 0;
err_no_irq:
/* remove from dmaengine device node */
list_del(&new_sh_chan->common.device_node);
kfree(new_sh_chan);
return err;
}
static void sh_dmae_chan_remove(struct sh_dmae_device *shdev)
{
int i;
for (i = shdev->common.chancnt - 1 ; i >= 0 ; i--) {
if (shdev->chan[i]) {
struct sh_dmae_chan *shchan = shdev->chan[i];
if (!(shdev->pdata.mode & SHDMA_MIX_IRQ))
free_irq(dmte_irq_map[i], shchan);
list_del(&shchan->common.device_node);
kfree(shchan);
shdev->chan[i] = NULL;
}
}
shdev->common.chancnt = 0;
}
static int __init sh_dmae_probe(struct platform_device *pdev)
{
int err = 0, cnt, ecnt;
unsigned long irqflags = IRQF_DISABLED;
#if defined(CONFIG_CPU_SH4)
int eirq[] = { DMAE0_IRQ,
#if defined(DMAE1_IRQ)
DMAE1_IRQ
#endif
};
#endif
struct sh_dmae_device *shdev;
shdev = kzalloc(sizeof(struct sh_dmae_device), GFP_KERNEL);
if (!shdev) {
dev_err(&pdev->dev, "No enough memory\n");
err = -ENOMEM;
goto shdev_err;
}
/* get platform data */
if (!pdev->dev.platform_data)
goto shdev_err;
/* platform data */
memcpy(&shdev->pdata, pdev->dev.platform_data,
sizeof(struct sh_dmae_pdata));
/* reset dma controller */
err = sh_dmae_rst(0);
if (err)
goto rst_err;
/* SH7780/85/23 has DMAOR1 */
if (shdev->pdata.mode & SHDMA_DMAOR1) {
err = sh_dmae_rst(1);
if (err)
goto rst_err;
}
INIT_LIST_HEAD(&shdev->common.channels);
dma_cap_set(DMA_MEMCPY, shdev->common.cap_mask);
shdev->common.device_alloc_chan_resources
= sh_dmae_alloc_chan_resources;
shdev->common.device_free_chan_resources = sh_dmae_free_chan_resources;
shdev->common.device_prep_dma_memcpy = sh_dmae_prep_memcpy;
shdev->common.device_is_tx_complete = sh_dmae_is_complete;
shdev->common.device_issue_pending = sh_dmae_memcpy_issue_pending;
shdev->common.dev = &pdev->dev;
#if defined(CONFIG_CPU_SH4)
/* Non Mix IRQ mode SH7722/SH7730 etc... */
if (shdev->pdata.mode & SHDMA_MIX_IRQ) {
irqflags = IRQF_SHARED;
eirq[0] = DMTE0_IRQ;
#if defined(DMTE6_IRQ) && defined(DMAE1_IRQ)
eirq[1] = DMTE6_IRQ;
#endif
}
for (ecnt = 0 ; ecnt < ARRAY_SIZE(eirq); ecnt++) {
err = request_irq(eirq[ecnt], sh_dmae_err,
irqflags, "DMAC Address Error", shdev);
if (err) {
dev_err(&pdev->dev, "DMA device request_irq"
"error (irq %d) with return %d\n",
eirq[ecnt], err);
goto eirq_err;
}
}
#endif /* CONFIG_CPU_SH4 */
/* Create DMA Channel */
for (cnt = 0 ; cnt < MAX_DMA_CHANNELS ; cnt++) {
err = sh_dmae_chan_probe(shdev, cnt);
if (err)
goto chan_probe_err;
}
platform_set_drvdata(pdev, shdev);
dma_async_device_register(&shdev->common);
return err;
chan_probe_err:
sh_dmae_chan_remove(shdev);
eirq_err:
for (ecnt-- ; ecnt >= 0; ecnt--)
free_irq(eirq[ecnt], shdev);
rst_err:
kfree(shdev);
shdev_err:
return err;
}
static int __exit sh_dmae_remove(struct platform_device *pdev)
{
struct sh_dmae_device *shdev = platform_get_drvdata(pdev);
dma_async_device_unregister(&shdev->common);
if (shdev->pdata.mode & SHDMA_MIX_IRQ) {
free_irq(DMTE0_IRQ, shdev);
#if defined(DMTE6_IRQ)
free_irq(DMTE6_IRQ, shdev);
#endif
}
/* channel data remove */
sh_dmae_chan_remove(shdev);
if (!(shdev->pdata.mode & SHDMA_MIX_IRQ)) {
free_irq(DMAE0_IRQ, shdev);
#if defined(DMAE1_IRQ)
free_irq(DMAE1_IRQ, shdev);
#endif
}
kfree(shdev);
return 0;
}
static void sh_dmae_shutdown(struct platform_device *pdev)
{
struct sh_dmae_device *shdev = platform_get_drvdata(pdev);
sh_dmae_ctl_stop(0);
if (shdev->pdata.mode & SHDMA_DMAOR1)
sh_dmae_ctl_stop(1);
}
static struct platform_driver sh_dmae_driver = {
.remove = __exit_p(sh_dmae_remove),
.shutdown = sh_dmae_shutdown,
.driver = {
.name = "sh-dma-engine",
},
};
static int __init sh_dmae_init(void)
{
return platform_driver_probe(&sh_dmae_driver, sh_dmae_probe);
}
module_init(sh_dmae_init);
static void __exit sh_dmae_exit(void)
{
platform_driver_unregister(&sh_dmae_driver);
}
module_exit(sh_dmae_exit);
MODULE_AUTHOR("Nobuhiro Iwamatsu <iwamatsu.nobuhiro@renesas.com>");
MODULE_DESCRIPTION("Renesas SH DMA Engine driver");
MODULE_LICENSE("GPL");

64
drivers/dma/shdma.h Normal file
View File

@ -0,0 +1,64 @@
/*
* Renesas SuperH DMA Engine support
*
* Copyright (C) 2009 Nobuhiro Iwamatsu <iwamatsu.nobuhiro@renesas.com>
* Copyright (C) 2009 Renesas Solutions, Inc. All rights reserved.
*
* This is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
*/
#ifndef __DMA_SHDMA_H
#define __DMA_SHDMA_H
#include <linux/device.h>
#include <linux/dmapool.h>
#include <linux/dmaengine.h>
#define SH_DMA_TCR_MAX 0x00FFFFFF /* 16MB */
struct sh_dmae_regs {
u32 sar; /* SAR / source address */
u32 dar; /* DAR / destination address */
u32 tcr; /* TCR / transfer count */
};
struct sh_desc {
struct list_head tx_list;
struct sh_dmae_regs hw;
struct list_head node;
struct dma_async_tx_descriptor async_tx;
int mark;
};
struct sh_dmae_chan {
dma_cookie_t completed_cookie; /* The maximum cookie completed */
spinlock_t desc_lock; /* Descriptor operation lock */
struct list_head ld_queue; /* Link descriptors queue */
struct list_head ld_free; /* Link descriptors free */
struct dma_chan common; /* DMA common channel */
struct device *dev; /* Channel device */
struct tasklet_struct tasklet; /* Tasklet */
int descs_allocated; /* desc count */
int id; /* Raw id of this channel */
char dev_id[16]; /* unique name per DMAC of channel */
/* Set chcr */
int (*set_chcr)(struct sh_dmae_chan *sh_chan, u32 regs);
/* Set DMA resource */
int (*set_dmars)(struct sh_dmae_chan *sh_chan, u16 res);
};
struct sh_dmae_device {
struct dma_device common;
struct sh_dmae_chan *chan[MAX_DMA_CHANNELS];
struct sh_dmae_pdata pdata;
};
#define to_sh_chan(chan) container_of(chan, struct sh_dmae_chan, common)
#define to_sh_desc(lh) container_of(lh, struct sh_desc, node)
#define tx_to_sh_desc(tx) container_of(tx, struct sh_desc, async_tx)
#endif /* __DMA_SHDMA_H */

View File

@ -180,9 +180,8 @@ static struct txx9dmac_desc *txx9dmac_first_queued(struct txx9dmac_chan *dc)
static struct txx9dmac_desc *txx9dmac_last_child(struct txx9dmac_desc *desc)
{
if (!list_empty(&desc->txd.tx_list))
desc = list_entry(desc->txd.tx_list.prev,
struct txx9dmac_desc, desc_node);
if (!list_empty(&desc->tx_list))
desc = list_entry(desc->tx_list.prev, typeof(*desc), desc_node);
return desc;
}
@ -197,6 +196,7 @@ static struct txx9dmac_desc *txx9dmac_desc_alloc(struct txx9dmac_chan *dc,
desc = kzalloc(sizeof(*desc), flags);
if (!desc)
return NULL;
INIT_LIST_HEAD(&desc->tx_list);
dma_async_tx_descriptor_init(&desc->txd, &dc->chan);
desc->txd.tx_submit = txx9dmac_tx_submit;
/* txd.flags will be overwritten in prep funcs */
@ -245,7 +245,7 @@ static void txx9dmac_sync_desc_for_cpu(struct txx9dmac_chan *dc,
struct txx9dmac_dev *ddev = dc->ddev;
struct txx9dmac_desc *child;
list_for_each_entry(child, &desc->txd.tx_list, desc_node)
list_for_each_entry(child, &desc->tx_list, desc_node)
dma_sync_single_for_cpu(chan2parent(&dc->chan),
child->txd.phys, ddev->descsize,
DMA_TO_DEVICE);
@ -267,11 +267,11 @@ static void txx9dmac_desc_put(struct txx9dmac_chan *dc,
txx9dmac_sync_desc_for_cpu(dc, desc);
spin_lock_bh(&dc->lock);
list_for_each_entry(child, &desc->txd.tx_list, desc_node)
list_for_each_entry(child, &desc->tx_list, desc_node)
dev_vdbg(chan2dev(&dc->chan),
"moving child desc %p to freelist\n",
child);
list_splice_init(&desc->txd.tx_list, &dc->free_list);
list_splice_init(&desc->tx_list, &dc->free_list);
dev_vdbg(chan2dev(&dc->chan), "moving desc %p to freelist\n",
desc);
list_add(&desc->desc_node, &dc->free_list);
@ -429,7 +429,7 @@ txx9dmac_descriptor_complete(struct txx9dmac_chan *dc,
param = txd->callback_param;
txx9dmac_sync_desc_for_cpu(dc, desc);
list_splice_init(&txd->tx_list, &dc->free_list);
list_splice_init(&desc->tx_list, &dc->free_list);
list_move(&desc->desc_node, &dc->free_list);
if (!ds) {
@ -571,7 +571,7 @@ static void txx9dmac_handle_error(struct txx9dmac_chan *dc, u32 csr)
"Bad descriptor submitted for DMA! (cookie: %d)\n",
bad_desc->txd.cookie);
txx9dmac_dump_desc(dc, &bad_desc->hwdesc);
list_for_each_entry(child, &bad_desc->txd.tx_list, desc_node)
list_for_each_entry(child, &bad_desc->tx_list, desc_node)
txx9dmac_dump_desc(dc, &child->hwdesc);
/* Pretend the descriptor completed successfully */
txx9dmac_descriptor_complete(dc, bad_desc);
@ -613,7 +613,7 @@ static void txx9dmac_scan_descriptors(struct txx9dmac_chan *dc)
return;
}
list_for_each_entry(child, &desc->txd.tx_list, desc_node)
list_for_each_entry(child, &desc->tx_list, desc_node)
if (desc_read_CHAR(dc, child) == chain) {
/* Currently in progress */
if (csr & TXX9_DMA_CSR_ABCHC)
@ -823,8 +823,7 @@ txx9dmac_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src,
dma_sync_single_for_device(chan2parent(&dc->chan),
prev->txd.phys, ddev->descsize,
DMA_TO_DEVICE);
list_add_tail(&desc->desc_node,
&first->txd.tx_list);
list_add_tail(&desc->desc_node, &first->tx_list);
}
prev = desc;
}
@ -919,8 +918,7 @@ txx9dmac_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
prev->txd.phys,
ddev->descsize,
DMA_TO_DEVICE);
list_add_tail(&desc->desc_node,
&first->txd.tx_list);
list_add_tail(&desc->desc_node, &first->tx_list);
}
prev = desc;
}

View File

@ -231,6 +231,7 @@ struct txx9dmac_desc {
/* THEN values for driver housekeeping */
struct list_head desc_node ____cacheline_aligned;
struct list_head tx_list;
struct dma_async_tx_descriptor txd;
size_t len;
};

View File

@ -29,8 +29,8 @@
#include <asm/idle.h>
#include "../dma/ioatdma_hw.h"
#include "../dma/ioatdma_registers.h"
#include "../dma/ioat/hw.h"
#include "../dma/ioat/registers.h"
#define I7300_IDLE_DRIVER_VERSION "1.55"
#define I7300_PRINT "i7300_idle:"
@ -126,9 +126,9 @@ static void i7300_idle_ioat_stop(void)
udelay(10);
sts = readq(ioat_chanbase + IOAT1_CHANSTS_OFFSET) &
IOAT_CHANSTS_DMA_TRANSFER_STATUS;
IOAT_CHANSTS_STATUS;
if (sts != IOAT_CHANSTS_DMA_TRANSFER_STATUS_ACTIVE)
if (sts != IOAT_CHANSTS_ACTIVE)
break;
}
@ -160,9 +160,9 @@ static int __init i7300_idle_ioat_selftest(u8 *ctl,
udelay(1000);
chan_sts = readq(ioat_chanbase + IOAT1_CHANSTS_OFFSET) &
IOAT_CHANSTS_DMA_TRANSFER_STATUS;
IOAT_CHANSTS_STATUS;
if (chan_sts != IOAT_CHANSTS_DMA_TRANSFER_STATUS_DONE) {
if (chan_sts != IOAT_CHANSTS_DONE) {
/* Not complete, reset the channel */
writeb(IOAT_CHANCMD_RESET,
ioat_chanbase + IOAT1_CHANCMD_OFFSET);
@ -288,9 +288,9 @@ static void __exit i7300_idle_ioat_exit(void)
ioat_chanbase + IOAT1_CHANCMD_OFFSET);
chan_sts = readq(ioat_chanbase + IOAT1_CHANSTS_OFFSET) &
IOAT_CHANSTS_DMA_TRANSFER_STATUS;
IOAT_CHANSTS_STATUS;
if (chan_sts != IOAT_CHANSTS_DMA_TRANSFER_STATUS_ACTIVE) {
if (chan_sts != IOAT_CHANSTS_ACTIVE) {
writew(0, ioat_chanbase + IOAT_CHANCTRL_OFFSET);
break;
}
@ -298,14 +298,14 @@ static void __exit i7300_idle_ioat_exit(void)
}
chan_sts = readq(ioat_chanbase + IOAT1_CHANSTS_OFFSET) &
IOAT_CHANSTS_DMA_TRANSFER_STATUS;
IOAT_CHANSTS_STATUS;
/*
* We tried to reset multiple times. If IO A/T channel is still active
* flag an error and return without cleanup. Memory leak is better
* than random corruption in that extreme error situation.
*/
if (chan_sts == IOAT_CHANSTS_DMA_TRANSFER_STATUS_ACTIVE) {
if (chan_sts == IOAT_CHANSTS_ACTIVE) {
printk(KERN_ERR I7300_PRINT "Unable to stop IO A/T channels."
" Not freeing resources\n");
return;

View File

@ -124,6 +124,8 @@ config MD_RAID456
select MD_RAID6_PQ
select ASYNC_MEMCPY
select ASYNC_XOR
select ASYNC_PQ
select ASYNC_RAID6_RECOV
---help---
A RAID-5 set of N drives with a capacity of C MB per drive provides
the capacity of C * (N - 1) MB, and protects against a failure
@ -152,9 +154,33 @@ config MD_RAID456
If unsure, say Y.
config MULTICORE_RAID456
bool "RAID-4/RAID-5/RAID-6 Multicore processing (EXPERIMENTAL)"
depends on MD_RAID456
depends on SMP
depends on EXPERIMENTAL
---help---
Enable the raid456 module to dispatch per-stripe raid operations to a
thread pool.
If unsure, say N.
config MD_RAID6_PQ
tristate
config ASYNC_RAID6_TEST
tristate "Self test for hardware accelerated raid6 recovery"
depends on MD_RAID6_PQ
select ASYNC_RAID6_RECOV
---help---
This is a one-shot self test that permutes through the
recovery of all the possible two disk failure scenarios for a
N-disk array. Recovery is performed with the asynchronous
raid6 recovery routines, and will optionally use an offload
engine if one is available.
If unsure, say N.
config MD_MULTIPATH
tristate "Multipath I/O support"
depends on BLK_DEV_MD

File diff suppressed because it is too large Load Diff

View File

@ -2,6 +2,7 @@
#define _RAID5_H
#include <linux/raid/xor.h>
#include <linux/dmaengine.h>
/*
*
@ -175,7 +176,9 @@
*/
enum check_states {
check_state_idle = 0,
check_state_run, /* parity check */
check_state_run, /* xor parity check */
check_state_run_q, /* q-parity check */
check_state_run_pq, /* pq dual parity check */
check_state_check_result,
check_state_compute_run, /* parity repair */
check_state_compute_result,
@ -215,8 +218,8 @@ struct stripe_head {
* @target - STRIPE_OP_COMPUTE_BLK target
*/
struct stripe_operations {
int target;
u32 zero_sum_result;
int target, target2;
enum sum_check_flags zero_sum_result;
} ops;
struct r5dev {
struct bio req;
@ -298,7 +301,7 @@ struct r6_state {
#define STRIPE_OP_COMPUTE_BLK 1
#define STRIPE_OP_PREXOR 2
#define STRIPE_OP_BIODRAIN 3
#define STRIPE_OP_POSTXOR 4
#define STRIPE_OP_RECONSTRUCT 4
#define STRIPE_OP_CHECK 5
/*
@ -385,8 +388,21 @@ struct raid5_private_data {
* (fresh device added).
* Cleared when a sync completes.
*/
/* per cpu variables */
struct raid5_percpu {
struct page *spare_page; /* Used when checking P/Q in raid6 */
void *scribble; /* space for constructing buffer
* lists and performing address
* conversions
*/
} *percpu;
size_t scribble_len; /* size of scribble region must be
* associated with conf to handle
* cpu hotplug while reshaping
*/
#ifdef CONFIG_HOTPLUG_CPU
struct notifier_block cpu_notify;
#endif
/*
* Free stripes pool

View File

@ -576,6 +576,7 @@ atmci_submit_data_dma(struct atmel_mci *host, struct mmc_data *data)
struct scatterlist *sg;
unsigned int i;
enum dma_data_direction direction;
unsigned int sglen;
/*
* We don't do DMA on "complex" transfers, i.e. with
@ -605,11 +606,14 @@ atmci_submit_data_dma(struct atmel_mci *host, struct mmc_data *data)
else
direction = DMA_TO_DEVICE;
sglen = dma_map_sg(&host->pdev->dev, data->sg, data->sg_len, direction);
if (sglen != data->sg_len)
goto unmap_exit;
desc = chan->device->device_prep_slave_sg(chan,
data->sg, data->sg_len, direction,
DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
if (!desc)
return -ENOMEM;
goto unmap_exit;
host->dma.data_desc = desc;
desc->callback = atmci_dma_complete;
@ -620,6 +624,9 @@ atmci_submit_data_dma(struct atmel_mci *host, struct mmc_data *data)
chan->device->device_issue_pending(chan);
return 0;
unmap_exit:
dma_unmap_sg(&host->pdev->dev, data->sg, sglen, direction);
return -ENOMEM;
}
#else /* CONFIG_MMC_ATMELMCI_DMA */

View File

@ -58,24 +58,59 @@ struct dma_chan_ref {
* array.
* @ASYNC_TX_ACK: immediately ack the descriptor, precludes setting up a
* dependency chain
* @ASYNC_TX_DEP_ACK: ack the dependency descriptor. Useful for chaining.
* @ASYNC_TX_FENCE: specify that the next operation in the dependency
* chain uses this operation's result as an input
*/
enum async_tx_flags {
ASYNC_TX_XOR_ZERO_DST = (1 << 0),
ASYNC_TX_XOR_DROP_DST = (1 << 1),
ASYNC_TX_ACK = (1 << 3),
ASYNC_TX_DEP_ACK = (1 << 4),
ASYNC_TX_ACK = (1 << 2),
ASYNC_TX_FENCE = (1 << 3),
};
/**
* struct async_submit_ctl - async_tx submission/completion modifiers
* @flags: submission modifiers
* @depend_tx: parent dependency of the current operation being submitted
* @cb_fn: callback routine to run at operation completion
* @cb_param: parameter for the callback routine
* @scribble: caller provided space for dma/page address conversions
*/
struct async_submit_ctl {
enum async_tx_flags flags;
struct dma_async_tx_descriptor *depend_tx;
dma_async_tx_callback cb_fn;
void *cb_param;
void *scribble;
};
#ifdef CONFIG_DMA_ENGINE
#define async_tx_issue_pending_all dma_issue_pending_all
/**
* async_tx_issue_pending - send pending descriptor to the hardware channel
* @tx: descriptor handle to retrieve hardware context
*
* Note: any dependent operations will have already been issued by
* async_tx_channel_switch, or (in the case of no channel switch) will
* be already pending on this channel.
*/
static inline void async_tx_issue_pending(struct dma_async_tx_descriptor *tx)
{
if (likely(tx)) {
struct dma_chan *chan = tx->chan;
struct dma_device *dma = chan->device;
dma->device_issue_pending(chan);
}
}
#ifdef CONFIG_ARCH_HAS_ASYNC_TX_FIND_CHANNEL
#include <asm/async_tx.h>
#else
#define async_tx_find_channel(dep, type, dst, dst_count, src, src_count, len) \
__async_tx_find_channel(dep, type)
struct dma_chan *
__async_tx_find_channel(struct dma_async_tx_descriptor *depend_tx,
__async_tx_find_channel(struct async_submit_ctl *submit,
enum dma_transaction_type tx_type);
#endif /* CONFIG_ARCH_HAS_ASYNC_TX_FIND_CHANNEL */
#else
@ -84,10 +119,16 @@ static inline void async_tx_issue_pending_all(void)
do { } while (0);
}
static inline void async_tx_issue_pending(struct dma_async_tx_descriptor *tx)
{
do { } while (0);
}
static inline struct dma_chan *
async_tx_find_channel(struct dma_async_tx_descriptor *depend_tx,
enum dma_transaction_type tx_type, struct page **dst, int dst_count,
struct page **src, int src_count, size_t len)
async_tx_find_channel(struct async_submit_ctl *submit,
enum dma_transaction_type tx_type, struct page **dst,
int dst_count, struct page **src, int src_count,
size_t len)
{
return NULL;
}
@ -99,46 +140,70 @@ async_tx_find_channel(struct dma_async_tx_descriptor *depend_tx,
* @cb_fn_param: parameter to pass to the callback routine
*/
static inline void
async_tx_sync_epilog(dma_async_tx_callback cb_fn, void *cb_fn_param)
async_tx_sync_epilog(struct async_submit_ctl *submit)
{
if (cb_fn)
cb_fn(cb_fn_param);
if (submit->cb_fn)
submit->cb_fn(submit->cb_param);
}
void
async_tx_submit(struct dma_chan *chan, struct dma_async_tx_descriptor *tx,
enum async_tx_flags flags, struct dma_async_tx_descriptor *depend_tx,
dma_async_tx_callback cb_fn, void *cb_fn_param);
typedef union {
unsigned long addr;
struct page *page;
dma_addr_t dma;
} addr_conv_t;
static inline void
init_async_submit(struct async_submit_ctl *args, enum async_tx_flags flags,
struct dma_async_tx_descriptor *tx,
dma_async_tx_callback cb_fn, void *cb_param,
addr_conv_t *scribble)
{
args->flags = flags;
args->depend_tx = tx;
args->cb_fn = cb_fn;
args->cb_param = cb_param;
args->scribble = scribble;
}
void async_tx_submit(struct dma_chan *chan, struct dma_async_tx_descriptor *tx,
struct async_submit_ctl *submit);
struct dma_async_tx_descriptor *
async_xor(struct page *dest, struct page **src_list, unsigned int offset,
int src_cnt, size_t len, enum async_tx_flags flags,
struct dma_async_tx_descriptor *depend_tx,
dma_async_tx_callback cb_fn, void *cb_fn_param);
int src_cnt, size_t len, struct async_submit_ctl *submit);
struct dma_async_tx_descriptor *
async_xor_zero_sum(struct page *dest, struct page **src_list,
unsigned int offset, int src_cnt, size_t len,
u32 *result, enum async_tx_flags flags,
struct dma_async_tx_descriptor *depend_tx,
dma_async_tx_callback cb_fn, void *cb_fn_param);
async_xor_val(struct page *dest, struct page **src_list, unsigned int offset,
int src_cnt, size_t len, enum sum_check_flags *result,
struct async_submit_ctl *submit);
struct dma_async_tx_descriptor *
async_memcpy(struct page *dest, struct page *src, unsigned int dest_offset,
unsigned int src_offset, size_t len, enum async_tx_flags flags,
struct dma_async_tx_descriptor *depend_tx,
dma_async_tx_callback cb_fn, void *cb_fn_param);
unsigned int src_offset, size_t len,
struct async_submit_ctl *submit);
struct dma_async_tx_descriptor *
async_memset(struct page *dest, int val, unsigned int offset,
size_t len, enum async_tx_flags flags,
struct dma_async_tx_descriptor *depend_tx,
dma_async_tx_callback cb_fn, void *cb_fn_param);
size_t len, struct async_submit_ctl *submit);
struct dma_async_tx_descriptor *async_trigger_callback(struct async_submit_ctl *submit);
struct dma_async_tx_descriptor *
async_trigger_callback(enum async_tx_flags flags,
struct dma_async_tx_descriptor *depend_tx,
dma_async_tx_callback cb_fn, void *cb_fn_param);
async_gen_syndrome(struct page **blocks, unsigned int offset, int src_cnt,
size_t len, struct async_submit_ctl *submit);
struct dma_async_tx_descriptor *
async_syndrome_val(struct page **blocks, unsigned int offset, int src_cnt,
size_t len, enum sum_check_flags *pqres, struct page *spare,
struct async_submit_ctl *submit);
struct dma_async_tx_descriptor *
async_raid6_2data_recov(int src_num, size_t bytes, int faila, int failb,
struct page **ptrs, struct async_submit_ctl *submit);
struct dma_async_tx_descriptor *
async_raid6_datap_recov(int src_num, size_t bytes, int faila,
struct page **ptrs, struct async_submit_ctl *submit);
void async_tx_quiesce(struct dma_async_tx_descriptor **tx);
#endif /* _ASYNC_TX_H_ */

View File

@ -20,6 +20,9 @@
*/
#ifndef DCA_H
#define DCA_H
#include <linux/pci.h>
/* DCA Provider API */
/* DCA Notifier Interface */
@ -36,6 +39,12 @@ struct dca_provider {
int id;
};
struct dca_domain {
struct list_head node;
struct list_head dca_providers;
struct pci_bus *pci_rc;
};
struct dca_ops {
int (*add_requester) (struct dca_provider *, struct device *);
int (*remove_requester) (struct dca_provider *, struct device *);
@ -47,7 +56,7 @@ struct dca_ops {
struct dca_provider *alloc_dca_provider(struct dca_ops *ops, int priv_size);
void free_dca_provider(struct dca_provider *dca);
int register_dca_provider(struct dca_provider *dca, struct device *dev);
void unregister_dca_provider(struct dca_provider *dca);
void unregister_dca_provider(struct dca_provider *dca, struct device *dev);
static inline void *dca_priv(struct dca_provider *dca)
{

View File

@ -48,19 +48,20 @@ enum dma_status {
/**
* enum dma_transaction_type - DMA transaction types/indexes
*
* Note: The DMA_ASYNC_TX capability is not to be set by drivers. It is
* automatically set as dma devices are registered.
*/
enum dma_transaction_type {
DMA_MEMCPY,
DMA_XOR,
DMA_PQ_XOR,
DMA_DUAL_XOR,
DMA_PQ_UPDATE,
DMA_ZERO_SUM,
DMA_PQ_ZERO_SUM,
DMA_PQ,
DMA_XOR_VAL,
DMA_PQ_VAL,
DMA_MEMSET,
DMA_MEMCPY_CRC32C,
DMA_INTERRUPT,
DMA_PRIVATE,
DMA_ASYNC_TX,
DMA_SLAVE,
};
@ -74,14 +75,21 @@ enum dma_transaction_type {
* @DMA_PREP_INTERRUPT - trigger an interrupt (callback) upon completion of
* this transaction
* @DMA_CTRL_ACK - the descriptor cannot be reused until the client
* acknowledges receipt, i.e. has has a chance to establish any
* dependency chains
* acknowledges receipt, i.e. has has a chance to establish any dependency
* chains
* @DMA_COMPL_SKIP_SRC_UNMAP - set to disable dma-unmapping the source buffer(s)
* @DMA_COMPL_SKIP_DEST_UNMAP - set to disable dma-unmapping the destination(s)
* @DMA_COMPL_SRC_UNMAP_SINGLE - set to do the source dma-unmapping as single
* (if not set, do the source dma-unmapping as page)
* @DMA_COMPL_DEST_UNMAP_SINGLE - set to do the destination dma-unmapping as single
* (if not set, do the destination dma-unmapping as page)
* @DMA_PREP_PQ_DISABLE_P - prevent generation of P while generating Q
* @DMA_PREP_PQ_DISABLE_Q - prevent generation of Q while generating P
* @DMA_PREP_CONTINUE - indicate to a driver that it is reusing buffers as
* sources that were the result of a previous operation, in the case of a PQ
* operation it continues the calculation with new sources
* @DMA_PREP_FENCE - tell the driver that subsequent operations depend
* on the result of this operation
*/
enum dma_ctrl_flags {
DMA_PREP_INTERRUPT = (1 << 0),
@ -90,8 +98,31 @@ enum dma_ctrl_flags {
DMA_COMPL_SKIP_DEST_UNMAP = (1 << 3),
DMA_COMPL_SRC_UNMAP_SINGLE = (1 << 4),
DMA_COMPL_DEST_UNMAP_SINGLE = (1 << 5),
DMA_PREP_PQ_DISABLE_P = (1 << 6),
DMA_PREP_PQ_DISABLE_Q = (1 << 7),
DMA_PREP_CONTINUE = (1 << 8),
DMA_PREP_FENCE = (1 << 9),
};
/**
* enum sum_check_bits - bit position of pq_check_flags
*/
enum sum_check_bits {
SUM_CHECK_P = 0,
SUM_CHECK_Q = 1,
};
/**
* enum pq_check_flags - result of async_{xor,pq}_zero_sum operations
* @SUM_CHECK_P_RESULT - 1 if xor zero sum error, 0 otherwise
* @SUM_CHECK_Q_RESULT - 1 if reed-solomon zero sum error, 0 otherwise
*/
enum sum_check_flags {
SUM_CHECK_P_RESULT = (1 << SUM_CHECK_P),
SUM_CHECK_Q_RESULT = (1 << SUM_CHECK_Q),
};
/**
* dma_cap_mask_t - capabilities bitmap modeled after cpumask_t.
* See linux/cpumask.h
@ -180,8 +211,6 @@ typedef void (*dma_async_tx_callback)(void *dma_async_param);
* @flags: flags to augment operation preparation, control completion, and
* communicate status
* @phys: physical address of the descriptor
* @tx_list: driver common field for operations that require multiple
* descriptors
* @chan: target channel for this operation
* @tx_submit: set the prepared descriptor(s) to be executed by the engine
* @callback: routine to call after this operation is complete
@ -195,7 +224,6 @@ struct dma_async_tx_descriptor {
dma_cookie_t cookie;
enum dma_ctrl_flags flags; /* not a 'long' to pack with cookie */
dma_addr_t phys;
struct list_head tx_list;
struct dma_chan *chan;
dma_cookie_t (*tx_submit)(struct dma_async_tx_descriptor *tx);
dma_async_tx_callback callback;
@ -213,6 +241,11 @@ struct dma_async_tx_descriptor {
* @global_node: list_head for global dma_device_list
* @cap_mask: one or more dma_capability flags
* @max_xor: maximum number of xor sources, 0 if no capability
* @max_pq: maximum number of PQ sources and PQ-continue capability
* @copy_align: alignment shift for memcpy operations
* @xor_align: alignment shift for xor operations
* @pq_align: alignment shift for pq operations
* @fill_align: alignment shift for memset operations
* @dev_id: unique device ID
* @dev: struct device reference for dma mapping api
* @device_alloc_chan_resources: allocate resources and return the
@ -220,7 +253,9 @@ struct dma_async_tx_descriptor {
* @device_free_chan_resources: release DMA channel's resources
* @device_prep_dma_memcpy: prepares a memcpy operation
* @device_prep_dma_xor: prepares a xor operation
* @device_prep_dma_zero_sum: prepares a zero_sum operation
* @device_prep_dma_xor_val: prepares a xor validation operation
* @device_prep_dma_pq: prepares a pq operation
* @device_prep_dma_pq_val: prepares a pqzero_sum operation
* @device_prep_dma_memset: prepares a memset operation
* @device_prep_dma_interrupt: prepares an end of chain interrupt operation
* @device_prep_slave_sg: prepares a slave dma operation
@ -235,7 +270,13 @@ struct dma_device {
struct list_head channels;
struct list_head global_node;
dma_cap_mask_t cap_mask;
int max_xor;
unsigned short max_xor;
unsigned short max_pq;
u8 copy_align;
u8 xor_align;
u8 pq_align;
u8 fill_align;
#define DMA_HAS_PQ_CONTINUE (1 << 15)
int dev_id;
struct device *dev;
@ -249,9 +290,17 @@ struct dma_device {
struct dma_async_tx_descriptor *(*device_prep_dma_xor)(
struct dma_chan *chan, dma_addr_t dest, dma_addr_t *src,
unsigned int src_cnt, size_t len, unsigned long flags);
struct dma_async_tx_descriptor *(*device_prep_dma_zero_sum)(
struct dma_async_tx_descriptor *(*device_prep_dma_xor_val)(
struct dma_chan *chan, dma_addr_t *src, unsigned int src_cnt,
size_t len, u32 *result, unsigned long flags);
size_t len, enum sum_check_flags *result, unsigned long flags);
struct dma_async_tx_descriptor *(*device_prep_dma_pq)(
struct dma_chan *chan, dma_addr_t *dst, dma_addr_t *src,
unsigned int src_cnt, const unsigned char *scf,
size_t len, unsigned long flags);
struct dma_async_tx_descriptor *(*device_prep_dma_pq_val)(
struct dma_chan *chan, dma_addr_t *pq, dma_addr_t *src,
unsigned int src_cnt, const unsigned char *scf, size_t len,
enum sum_check_flags *pqres, unsigned long flags);
struct dma_async_tx_descriptor *(*device_prep_dma_memset)(
struct dma_chan *chan, dma_addr_t dest, int value, size_t len,
unsigned long flags);
@ -270,6 +319,96 @@ struct dma_device {
void (*device_issue_pending)(struct dma_chan *chan);
};
static inline bool dmaengine_check_align(u8 align, size_t off1, size_t off2, size_t len)
{
size_t mask;
if (!align)
return true;
mask = (1 << align) - 1;
if (mask & (off1 | off2 | len))
return false;
return true;
}
static inline bool is_dma_copy_aligned(struct dma_device *dev, size_t off1,
size_t off2, size_t len)
{
return dmaengine_check_align(dev->copy_align, off1, off2, len);
}
static inline bool is_dma_xor_aligned(struct dma_device *dev, size_t off1,
size_t off2, size_t len)
{
return dmaengine_check_align(dev->xor_align, off1, off2, len);
}
static inline bool is_dma_pq_aligned(struct dma_device *dev, size_t off1,
size_t off2, size_t len)
{
return dmaengine_check_align(dev->pq_align, off1, off2, len);
}
static inline bool is_dma_fill_aligned(struct dma_device *dev, size_t off1,
size_t off2, size_t len)
{
return dmaengine_check_align(dev->fill_align, off1, off2, len);
}
static inline void
dma_set_maxpq(struct dma_device *dma, int maxpq, int has_pq_continue)
{
dma->max_pq = maxpq;
if (has_pq_continue)
dma->max_pq |= DMA_HAS_PQ_CONTINUE;
}
static inline bool dmaf_continue(enum dma_ctrl_flags flags)
{
return (flags & DMA_PREP_CONTINUE) == DMA_PREP_CONTINUE;
}
static inline bool dmaf_p_disabled_continue(enum dma_ctrl_flags flags)
{
enum dma_ctrl_flags mask = DMA_PREP_CONTINUE | DMA_PREP_PQ_DISABLE_P;
return (flags & mask) == mask;
}
static inline bool dma_dev_has_pq_continue(struct dma_device *dma)
{
return (dma->max_pq & DMA_HAS_PQ_CONTINUE) == DMA_HAS_PQ_CONTINUE;
}
static unsigned short dma_dev_to_maxpq(struct dma_device *dma)
{
return dma->max_pq & ~DMA_HAS_PQ_CONTINUE;
}
/* dma_maxpq - reduce maxpq in the face of continued operations
* @dma - dma device with PQ capability
* @flags - to check if DMA_PREP_CONTINUE and DMA_PREP_PQ_DISABLE_P are set
*
* When an engine does not support native continuation we need 3 extra
* source slots to reuse P and Q with the following coefficients:
* 1/ {00} * P : remove P from Q', but use it as a source for P'
* 2/ {01} * Q : use Q to continue Q' calculation
* 3/ {00} * Q : subtract Q from P' to cancel (2)
*
* In the case where P is disabled we only need 1 extra source:
* 1/ {01} * Q : use Q to continue Q' calculation
*/
static inline int dma_maxpq(struct dma_device *dma, enum dma_ctrl_flags flags)
{
if (dma_dev_has_pq_continue(dma) || !dmaf_continue(flags))
return dma_dev_to_maxpq(dma);
else if (dmaf_p_disabled_continue(flags))
return dma_dev_to_maxpq(dma) - 1;
else if (dmaf_continue(flags))
return dma_dev_to_maxpq(dma) - 3;
BUG();
}
/* --- public DMA engine API --- */
#ifdef CONFIG_DMA_ENGINE
@ -299,7 +438,11 @@ static inline void net_dmaengine_put(void)
#ifdef CONFIG_ASYNC_TX_DMA
#define async_dmaengine_get() dmaengine_get()
#define async_dmaengine_put() dmaengine_put()
#ifdef CONFIG_ASYNC_TX_DISABLE_CHANNEL_SWITCH
#define async_dma_find_channel(type) dma_find_channel(DMA_ASYNC_TX)
#else
#define async_dma_find_channel(type) dma_find_channel(type)
#endif /* CONFIG_ASYNC_TX_DISABLE_CHANNEL_SWITCH */
#else
static inline void async_dmaengine_get(void)
{
@ -312,7 +455,7 @@ async_dma_find_channel(enum dma_transaction_type type)
{
return NULL;
}
#endif
#endif /* CONFIG_ASYNC_TX_DMA */
dma_cookie_t dma_async_memcpy_buf_to_buf(struct dma_chan *chan,
void *dest, void *src, size_t len);

View File

@ -2526,6 +2526,16 @@
#define PCI_DEVICE_ID_INTEL_E7525_MCH 0x359e
#define PCI_DEVICE_ID_INTEL_IOAT_CNB 0x360b
#define PCI_DEVICE_ID_INTEL_FBD_CNB 0x360c
#define PCI_DEVICE_ID_INTEL_IOAT_JSF0 0x3710
#define PCI_DEVICE_ID_INTEL_IOAT_JSF1 0x3711
#define PCI_DEVICE_ID_INTEL_IOAT_JSF2 0x3712
#define PCI_DEVICE_ID_INTEL_IOAT_JSF3 0x3713
#define PCI_DEVICE_ID_INTEL_IOAT_JSF4 0x3714
#define PCI_DEVICE_ID_INTEL_IOAT_JSF5 0x3715
#define PCI_DEVICE_ID_INTEL_IOAT_JSF6 0x3716
#define PCI_DEVICE_ID_INTEL_IOAT_JSF7 0x3717
#define PCI_DEVICE_ID_INTEL_IOAT_JSF8 0x3718
#define PCI_DEVICE_ID_INTEL_IOAT_JSF9 0x3719
#define PCI_DEVICE_ID_INTEL_ICH10_0 0x3a14
#define PCI_DEVICE_ID_INTEL_ICH10_1 0x3a16
#define PCI_DEVICE_ID_INTEL_ICH10_2 0x3a18