Skip to content

Commit

Permalink
async_tx: kill tx_set_src and tx_set_dest methods
Browse files Browse the repository at this point in the history
The tx_set_src and tx_set_dest methods were originally implemented to allow
an array of addresses to be passed down from async_xor to the dmaengine
driver while minimizing stack overhead.  Removing these methods allows
drivers to have all transaction parameters available at 'prep' time, saves
two function pointers in struct dma_async_tx_descriptor, and reduces the
number of indirect branches..

A consequence of moving this data to the 'prep' routine is that
multi-source routines like async_xor need temporary storage to convert an
array of linear addresses into an array of dma addresses.  In order to keep
the same stack footprint of the previous implementation the input array is
reused as storage for the dma addresses.  This requires that
sizeof(dma_addr_t) be less than or equal to sizeof(void *).  As a
consequence CONFIG_DMADEVICES now depends on !CONFIG_HIGHMEM64G.  It also
requires that drivers be able to make descriptor resources available when
the 'prep' routine is polled.

Signed-off-by: Dan Williams <[email protected]>
Acked-by: Shannon Nelson <[email protected]>
  • Loading branch information
djbw committed Feb 6, 2008
1 parent d909b34 commit 0036731
Show file tree
Hide file tree
Showing 8 changed files with 178 additions and 196 deletions.
27 changes: 13 additions & 14 deletions crypto/async_tx/async_memcpy.c
Original file line number Diff line number Diff line change
Expand Up @@ -48,26 +48,25 @@ async_memcpy(struct page *dest, struct page *src, unsigned int dest_offset,
{
struct dma_chan *chan = async_tx_find_channel(depend_tx, DMA_MEMCPY);
struct dma_device *device = chan ? chan->device : NULL;
int int_en = cb_fn ? 1 : 0;
struct dma_async_tx_descriptor *tx = device ?
device->device_prep_dma_memcpy(chan, len,
int_en) : NULL;
struct dma_async_tx_descriptor *tx = NULL;

if (tx) { /* run the memcpy asynchronously */
dma_addr_t addr;
if (device) {
dma_addr_t dma_dest, dma_src;

pr_debug("%s: (async) len: %zu\n", __FUNCTION__, len);
dma_dest = dma_map_page(device->dev, dest, dest_offset, len,
DMA_FROM_DEVICE);

addr = dma_map_page(device->dev, dest, dest_offset, len,
DMA_FROM_DEVICE);
tx->tx_set_dest(addr, tx, 0);
dma_src = dma_map_page(device->dev, src, src_offset, len,
DMA_TO_DEVICE);

addr = dma_map_page(device->dev, src, src_offset, len,
DMA_TO_DEVICE);
tx->tx_set_src(addr, tx, 0);
tx = device->device_prep_dma_memcpy(chan, dma_dest, dma_src,
len, cb_fn != NULL);
}

if (tx) {
pr_debug("%s: (async) len: %zu\n", __FUNCTION__, len);
async_tx_submit(chan, tx, flags, depend_tx, cb_fn, cb_param);
} else { /* run the memcpy synchronously */
} else {
void *dest_buf, *src_buf;
pr_debug("%s: (sync) len: %zu\n", __FUNCTION__, len);

Expand Down
20 changes: 10 additions & 10 deletions crypto/async_tx/async_memset.c
Original file line number Diff line number Diff line change
Expand Up @@ -48,20 +48,20 @@ async_memset(struct page *dest, int val, unsigned int offset,
{
struct dma_chan *chan = async_tx_find_channel(depend_tx, DMA_MEMSET);
struct dma_device *device = chan ? chan->device : NULL;
int int_en = cb_fn ? 1 : 0;
struct dma_async_tx_descriptor *tx = device ?
device->device_prep_dma_memset(chan, val, len,
int_en) : NULL;
struct dma_async_tx_descriptor *tx = NULL;

if (tx) { /* run the memset asynchronously */
dma_addr_t dma_addr;
if (device) {
dma_addr_t dma_dest;

pr_debug("%s: (async) len: %zu\n", __FUNCTION__, len);

dma_addr = dma_map_page(device->dev, dest, offset, len,
dma_dest = dma_map_page(device->dev, dest, offset, len,
DMA_FROM_DEVICE);
tx->tx_set_dest(dma_addr, tx, 0);

tx = device->device_prep_dma_memset(chan, dma_dest, val, len,
cb_fn != NULL);
}

if (tx) {
pr_debug("%s: (async) len: %zu\n", __FUNCTION__, len);
async_tx_submit(chan, tx, flags, depend_tx, cb_fn, cb_param);
} else { /* run the memset synchronously */
void *dest_buf;
Expand Down
94 changes: 60 additions & 34 deletions crypto/async_tx/async_xor.c
Original file line number Diff line number Diff line change
Expand Up @@ -34,29 +34,46 @@
* This routine is marked __always_inline so it can be compiled away
* when CONFIG_DMA_ENGINE=n
*/
static __always_inline void
do_async_xor(struct dma_async_tx_descriptor *tx, struct dma_device *device,
static __always_inline struct dma_async_tx_descriptor *
do_async_xor(struct dma_device *device,
struct dma_chan *chan, struct page *dest, struct page **src_list,
unsigned int offset, unsigned int src_cnt, size_t len,
enum async_tx_flags flags, struct dma_async_tx_descriptor *depend_tx,
dma_async_tx_callback cb_fn, void *cb_param)
{
dma_addr_t dma_addr;
dma_addr_t dma_dest;
dma_addr_t *dma_src = (dma_addr_t *) src_list;
struct dma_async_tx_descriptor *tx;
int i;

pr_debug("%s: len: %zu\n", __FUNCTION__, len);

dma_addr = dma_map_page(device->dev, dest, offset, len,
dma_dest = dma_map_page(device->dev, dest, offset, len,
DMA_FROM_DEVICE);
tx->tx_set_dest(dma_addr, tx, 0);

for (i = 0; i < src_cnt; i++) {
dma_addr = dma_map_page(device->dev, src_list[i],
offset, len, DMA_TO_DEVICE);
tx->tx_set_src(dma_addr, tx, i);
for (i = 0; i < src_cnt; i++)
dma_src[i] = dma_map_page(device->dev, src_list[i], offset,
len, DMA_TO_DEVICE);

/* Since we have clobbered the src_list we are committed
* to doing this asynchronously. Drivers force forward progress
* in case they can not provide a descriptor
*/
tx = device->device_prep_dma_xor(chan, dma_dest, dma_src, src_cnt, len,
cb_fn != NULL);
if (!tx) {
if (depend_tx)
dma_wait_for_async_tx(depend_tx);

while (!tx)
tx = device->device_prep_dma_xor(chan, dma_dest,
dma_src, src_cnt, len,
cb_fn != NULL);
}

async_tx_submit(chan, tx, flags, depend_tx, cb_fn, cb_param);

return tx;
}

static void
Expand Down Expand Up @@ -118,7 +135,7 @@ async_xor(struct page *dest, struct page **src_list, unsigned int offset,
void *_cb_param;
unsigned long local_flags;
int xor_src_cnt;
int i = 0, src_off = 0, int_en;
int i = 0, src_off = 0;

BUG_ON(src_cnt <= 1);

Expand All @@ -138,20 +155,11 @@ async_xor(struct page *dest, struct page **src_list, unsigned int offset,
_cb_param = cb_param;
}

int_en = _cb_fn ? 1 : 0;

tx = device->device_prep_dma_xor(
chan, xor_src_cnt, len, int_en);

if (tx) {
do_async_xor(tx, device, chan, dest,
&src_list[src_off], offset, xor_src_cnt, len,
local_flags, depend_tx, _cb_fn,
_cb_param);
} else /* fall through */
goto xor_sync;
tx = do_async_xor(device, chan, dest,
&src_list[src_off], offset,
xor_src_cnt, len, local_flags,
depend_tx, _cb_fn, _cb_param);
} else { /* run the xor synchronously */
xor_sync:
/* in the sync case the dest is an implied source
* (assumes the dest is at the src_off index)
*/
Expand Down Expand Up @@ -254,23 +262,31 @@ async_xor_zero_sum(struct page *dest, struct page **src_list,
{
struct dma_chan *chan = async_tx_find_channel(depend_tx, DMA_ZERO_SUM);
struct dma_device *device = chan ? chan->device : NULL;
int int_en = cb_fn ? 1 : 0;
struct dma_async_tx_descriptor *tx = device ?
device->device_prep_dma_zero_sum(chan, src_cnt, len, result,
int_en) : NULL;
int i;
struct dma_async_tx_descriptor *tx = NULL;

BUG_ON(src_cnt <= 1);

if (tx) {
dma_addr_t dma_addr;
if (device) {
dma_addr_t *dma_src = (dma_addr_t *) src_list;
int i;

pr_debug("%s: (async) len: %zu\n", __FUNCTION__, len);

for (i = 0; i < src_cnt; i++) {
dma_addr = dma_map_page(device->dev, src_list[i],
offset, len, DMA_TO_DEVICE);
tx->tx_set_src(dma_addr, tx, i);
for (i = 0; i < src_cnt; i++)
dma_src[i] = dma_map_page(device->dev, src_list[i],
offset, len, DMA_TO_DEVICE);

tx = device->device_prep_dma_zero_sum(chan, dma_src, src_cnt,
len, result,
cb_fn != NULL);
if (!tx) {
if (depend_tx)
dma_wait_for_async_tx(depend_tx);

while (!tx)
tx = device->device_prep_dma_zero_sum(chan,
dma_src, src_cnt, len, result,
cb_fn != NULL);
}

async_tx_submit(chan, tx, flags, depend_tx, cb_fn, cb_param);
Expand Down Expand Up @@ -305,6 +321,16 @@ EXPORT_SYMBOL_GPL(async_xor_zero_sum);

static int __init async_xor_init(void)
{
#ifdef CONFIG_DMA_ENGINE
/* To conserve stack space the input src_list (array of page pointers)
* is reused to hold the array of dma addresses passed to the driver.
* This conversion is only possible when dma_addr_t is less than the
* the size of a pointer. HIGHMEM64G is known to violate this
* assumption.
*/
BUILD_BUG_ON(sizeof(dma_addr_t) > sizeof(struct page *));
#endif

return 0;
}

Expand Down
1 change: 1 addition & 0 deletions drivers/dma/Kconfig
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
menuconfig DMADEVICES
bool "DMA Engine support"
depends on (PCI && X86) || ARCH_IOP32X || ARCH_IOP33X || ARCH_IOP13XX
depends on !HIGHMEM64G
help
DMA engines can do asynchronous data transfers without
involving the host CPU. Currently, this framework can be
Expand Down
49 changes: 28 additions & 21 deletions drivers/dma/dmaengine.c
Original file line number Diff line number Diff line change
Expand Up @@ -473,20 +473,22 @@ dma_async_memcpy_buf_to_buf(struct dma_chan *chan, void *dest,
{
struct dma_device *dev = chan->device;
struct dma_async_tx_descriptor *tx;
dma_addr_t addr;
dma_addr_t dma_dest, dma_src;
dma_cookie_t cookie;
int cpu;

tx = dev->device_prep_dma_memcpy(chan, len, 0);
if (!tx)
dma_src = dma_map_single(dev->dev, src, len, DMA_TO_DEVICE);
dma_dest = dma_map_single(dev->dev, dest, len, DMA_FROM_DEVICE);
tx = dev->device_prep_dma_memcpy(chan, dma_dest, dma_src, len, 0);

if (!tx) {
dma_unmap_single(dev->dev, dma_src, len, DMA_TO_DEVICE);
dma_unmap_single(dev->dev, dma_dest, len, DMA_FROM_DEVICE);
return -ENOMEM;
}

tx->ack = 1;
tx->callback = NULL;
addr = dma_map_single(dev->dev, src, len, DMA_TO_DEVICE);
tx->tx_set_src(addr, tx, 0);
addr = dma_map_single(dev->dev, dest, len, DMA_FROM_DEVICE);
tx->tx_set_dest(addr, tx, 0);
cookie = tx->tx_submit(tx);

cpu = get_cpu();
Expand Down Expand Up @@ -517,20 +519,22 @@ dma_async_memcpy_buf_to_pg(struct dma_chan *chan, struct page *page,
{
struct dma_device *dev = chan->device;
struct dma_async_tx_descriptor *tx;
dma_addr_t addr;
dma_addr_t dma_dest, dma_src;
dma_cookie_t cookie;
int cpu;

tx = dev->device_prep_dma_memcpy(chan, len, 0);
if (!tx)
dma_src = dma_map_single(dev->dev, kdata, len, DMA_TO_DEVICE);
dma_dest = dma_map_page(dev->dev, page, offset, len, DMA_FROM_DEVICE);
tx = dev->device_prep_dma_memcpy(chan, dma_dest, dma_src, len, 0);

if (!tx) {
dma_unmap_single(dev->dev, dma_src, len, DMA_TO_DEVICE);
dma_unmap_page(dev->dev, dma_dest, len, DMA_FROM_DEVICE);
return -ENOMEM;
}

tx->ack = 1;
tx->callback = NULL;
addr = dma_map_single(dev->dev, kdata, len, DMA_TO_DEVICE);
tx->tx_set_src(addr, tx, 0);
addr = dma_map_page(dev->dev, page, offset, len, DMA_FROM_DEVICE);
tx->tx_set_dest(addr, tx, 0);
cookie = tx->tx_submit(tx);

cpu = get_cpu();
Expand Down Expand Up @@ -563,20 +567,23 @@ dma_async_memcpy_pg_to_pg(struct dma_chan *chan, struct page *dest_pg,
{
struct dma_device *dev = chan->device;
struct dma_async_tx_descriptor *tx;
dma_addr_t addr;
dma_addr_t dma_dest, dma_src;
dma_cookie_t cookie;
int cpu;

tx = dev->device_prep_dma_memcpy(chan, len, 0);
if (!tx)
dma_src = dma_map_page(dev->dev, src_pg, src_off, len, DMA_TO_DEVICE);
dma_dest = dma_map_page(dev->dev, dest_pg, dest_off, len,
DMA_FROM_DEVICE);
tx = dev->device_prep_dma_memcpy(chan, dma_dest, dma_src, len, 0);

if (!tx) {
dma_unmap_page(dev->dev, dma_src, len, DMA_TO_DEVICE);
dma_unmap_page(dev->dev, dma_dest, len, DMA_FROM_DEVICE);
return -ENOMEM;
}

tx->ack = 1;
tx->callback = NULL;
addr = dma_map_page(dev->dev, src_pg, src_off, len, DMA_TO_DEVICE);
tx->tx_set_src(addr, tx, 0);
addr = dma_map_page(dev->dev, dest_pg, dest_off, len, DMA_FROM_DEVICE);
tx->tx_set_dest(addr, tx, 0);
cookie = tx->tx_submit(tx);

cpu = get_cpu();
Expand Down
Loading

0 comments on commit 0036731

Please sign in to comment.