Skip to content

Commit

Permalink
Merge branch 'for-4.5/drivers' of git://git.kernel.dk/linux-block
Browse files Browse the repository at this point in the history
Pull block driver updates from Jens Axboe:
 "This is the block driver pull request for 4.5, with the exception of
  NVMe, which is in a separate branch and will be posted after this one.

  This pull request contains:

   - A set of bcache stability fixes, which have been acked by Kent.
     These have been used and tested for more than a year by the
     community, so it's about time that they got in.

   - A set of drbd updates from the drbd team (Andreas, Lars, Philipp)
     and Markus Elfring, Oleg Drokin.

   - A set of fixes for xen blkback/front from the usual suspects, (Bob,
     Konrad) as well as community based fixes from Kiri, Julien, and
     Peng.

   - A 2038 time fix for sx8 from Shraddha, with a fix from me.

   - A small mtip32xx cleanup from Zhu Yanjun.

   - A null_blk division fix from Arnd"

* 'for-4.5/drivers' of git://git.kernel.dk/linux-block: (71 commits)
  null_blk: use sector_div instead of do_div
  mtip32xx: restrict variables visible in current code module
  xen/blkfront: Fix crash if backend doesn't follow the right states.
  xen/blkback: Fix two memory leaks.
  xen/blkback: make st_ statistics per ring
  xen/blkfront: Handle non-indirect grant with 64KB pages
  xen-blkfront: Introduce blkif_ring_get_request
  xen-blkback: clear PF_NOFREEZE for xen_blkif_schedule()
  xen/blkback: Free resources if connect_ring failed.
  xen/blocks: Return -EXX instead of -1
  xen/blkback: make pool of persistent grants and free pages per-queue
  xen/blkback: get the number of hardware queues/rings from blkfront
  xen/blkback: pseudo support for multi hardware queues/rings
  xen/blkback: separate ring information out of struct xen_blkif
  xen/blkfront: correct setting for xen_blkif_max_ring_order
  xen/blkfront: make persistent grants pool per-queue
  xen/blkfront: Remove duplicate setting of ->xbdev.
  xen/blkfront: Cleanup of comments, fix unaligned variables, and syntax errors.
  xen/blkfront: negotiate number of queues/rings to be used with backend
  xen/blkfront: split per device io_lock
  ...
  • Loading branch information
torvalds committed Jan 22, 2016
2 parents 404a474 + e93d12a commit 6412035
Show file tree
Hide file tree
Showing 33 changed files with 3,893 additions and 1,326 deletions.
11 changes: 5 additions & 6 deletions MAINTAINERS
Original file line number Diff line number Diff line change
Expand Up @@ -3665,13 +3665,12 @@ F: drivers/scsi/dpt*
F: drivers/scsi/dpt/

DRBD DRIVER
P: Philipp Reisner
P: Lars Ellenberg
M: [email protected]
L: [email protected]
M: Philipp Reisner <[email protected]>
M: Lars Ellenberg <[email protected]>
L: [email protected]
W: http://www.drbd.org
T: git git://git.drbd.org/linux-2.6-drbd.git drbd
T: git git://git.drbd.org/drbd-8.3.git
T: git git://git.linbit.com/linux-drbd.git
T: git git://git.linbit.com/drbd-8.4.git
S: Supported
F: drivers/block/drbd/
F: lib/lru_cache.c
Expand Down
323 changes: 167 additions & 156 deletions drivers/block/drbd/drbd_actlog.c
Original file line number Diff line number Diff line change
Expand Up @@ -288,7 +288,162 @@ bool drbd_al_begin_io_prepare(struct drbd_device *device, struct drbd_interval *
return need_transaction;
}

static int al_write_transaction(struct drbd_device *device);
#if (PAGE_SHIFT + 3) < (AL_EXTENT_SHIFT - BM_BLOCK_SHIFT)
/* Currently BM_BLOCK_SHIFT, BM_EXT_SHIFT and AL_EXTENT_SHIFT
* are still coupled, or assume too much about their relation.
* Code below will not work if this is violated.
* Will be cleaned up with some followup patch.
*/
# error FIXME
#endif

static unsigned int al_extent_to_bm_page(unsigned int al_enr)
{
return al_enr >>
/* bit to page */
((PAGE_SHIFT + 3) -
/* al extent number to bit */
(AL_EXTENT_SHIFT - BM_BLOCK_SHIFT));
}

static sector_t al_tr_number_to_on_disk_sector(struct drbd_device *device)
{
const unsigned int stripes = device->ldev->md.al_stripes;
const unsigned int stripe_size_4kB = device->ldev->md.al_stripe_size_4k;

/* transaction number, modulo on-disk ring buffer wrap around */
unsigned int t = device->al_tr_number % (device->ldev->md.al_size_4k);

/* ... to aligned 4k on disk block */
t = ((t % stripes) * stripe_size_4kB) + t/stripes;

/* ... to 512 byte sector in activity log */
t *= 8;

/* ... plus offset to the on disk position */
return device->ldev->md.md_offset + device->ldev->md.al_offset + t;
}

static int __al_write_transaction(struct drbd_device *device, struct al_transaction_on_disk *buffer)
{
struct lc_element *e;
sector_t sector;
int i, mx;
unsigned extent_nr;
unsigned crc = 0;
int err = 0;

memset(buffer, 0, sizeof(*buffer));
buffer->magic = cpu_to_be32(DRBD_AL_MAGIC);
buffer->tr_number = cpu_to_be32(device->al_tr_number);

i = 0;

/* Even though no one can start to change this list
* once we set the LC_LOCKED -- from drbd_al_begin_io(),
* lc_try_lock_for_transaction() --, someone may still
* be in the process of changing it. */
spin_lock_irq(&device->al_lock);
list_for_each_entry(e, &device->act_log->to_be_changed, list) {
if (i == AL_UPDATES_PER_TRANSACTION) {
i++;
break;
}
buffer->update_slot_nr[i] = cpu_to_be16(e->lc_index);
buffer->update_extent_nr[i] = cpu_to_be32(e->lc_new_number);
if (e->lc_number != LC_FREE)
drbd_bm_mark_for_writeout(device,
al_extent_to_bm_page(e->lc_number));
i++;
}
spin_unlock_irq(&device->al_lock);
BUG_ON(i > AL_UPDATES_PER_TRANSACTION);

buffer->n_updates = cpu_to_be16(i);
for ( ; i < AL_UPDATES_PER_TRANSACTION; i++) {
buffer->update_slot_nr[i] = cpu_to_be16(-1);
buffer->update_extent_nr[i] = cpu_to_be32(LC_FREE);
}

buffer->context_size = cpu_to_be16(device->act_log->nr_elements);
buffer->context_start_slot_nr = cpu_to_be16(device->al_tr_cycle);

mx = min_t(int, AL_CONTEXT_PER_TRANSACTION,
device->act_log->nr_elements - device->al_tr_cycle);
for (i = 0; i < mx; i++) {
unsigned idx = device->al_tr_cycle + i;
extent_nr = lc_element_by_index(device->act_log, idx)->lc_number;
buffer->context[i] = cpu_to_be32(extent_nr);
}
for (; i < AL_CONTEXT_PER_TRANSACTION; i++)
buffer->context[i] = cpu_to_be32(LC_FREE);

device->al_tr_cycle += AL_CONTEXT_PER_TRANSACTION;
if (device->al_tr_cycle >= device->act_log->nr_elements)
device->al_tr_cycle = 0;

sector = al_tr_number_to_on_disk_sector(device);

crc = crc32c(0, buffer, 4096);
buffer->crc32c = cpu_to_be32(crc);

if (drbd_bm_write_hinted(device))
err = -EIO;
else {
bool write_al_updates;
rcu_read_lock();
write_al_updates = rcu_dereference(device->ldev->disk_conf)->al_updates;
rcu_read_unlock();
if (write_al_updates) {
if (drbd_md_sync_page_io(device, device->ldev, sector, WRITE)) {
err = -EIO;
drbd_chk_io_error(device, 1, DRBD_META_IO_ERROR);
} else {
device->al_tr_number++;
device->al_writ_cnt++;
}
}
}

return err;
}

static int al_write_transaction(struct drbd_device *device)
{
struct al_transaction_on_disk *buffer;
int err;

if (!get_ldev(device)) {
drbd_err(device, "disk is %s, cannot start al transaction\n",
drbd_disk_str(device->state.disk));
return -EIO;
}

/* The bitmap write may have failed, causing a state change. */
if (device->state.disk < D_INCONSISTENT) {
drbd_err(device,
"disk is %s, cannot write al transaction\n",
drbd_disk_str(device->state.disk));
put_ldev(device);
return -EIO;
}

/* protects md_io_buffer, al_tr_cycle, ... */
buffer = drbd_md_get_buffer(device, __func__);
if (!buffer) {
drbd_err(device, "disk failed while waiting for md_io buffer\n");
put_ldev(device);
return -ENODEV;
}

err = __al_write_transaction(device, buffer);

drbd_md_put_buffer(device);
put_ldev(device);

return err;
}


void drbd_al_begin_io_commit(struct drbd_device *device)
{
Expand Down Expand Up @@ -420,153 +575,6 @@ void drbd_al_complete_io(struct drbd_device *device, struct drbd_interval *i)
wake_up(&device->al_wait);
}

#if (PAGE_SHIFT + 3) < (AL_EXTENT_SHIFT - BM_BLOCK_SHIFT)
/* Currently BM_BLOCK_SHIFT, BM_EXT_SHIFT and AL_EXTENT_SHIFT
* are still coupled, or assume too much about their relation.
* Code below will not work if this is violated.
* Will be cleaned up with some followup patch.
*/
# error FIXME
#endif

static unsigned int al_extent_to_bm_page(unsigned int al_enr)
{
return al_enr >>
/* bit to page */
((PAGE_SHIFT + 3) -
/* al extent number to bit */
(AL_EXTENT_SHIFT - BM_BLOCK_SHIFT));
}

static sector_t al_tr_number_to_on_disk_sector(struct drbd_device *device)
{
const unsigned int stripes = device->ldev->md.al_stripes;
const unsigned int stripe_size_4kB = device->ldev->md.al_stripe_size_4k;

/* transaction number, modulo on-disk ring buffer wrap around */
unsigned int t = device->al_tr_number % (device->ldev->md.al_size_4k);

/* ... to aligned 4k on disk block */
t = ((t % stripes) * stripe_size_4kB) + t/stripes;

/* ... to 512 byte sector in activity log */
t *= 8;

/* ... plus offset to the on disk position */
return device->ldev->md.md_offset + device->ldev->md.al_offset + t;
}

int al_write_transaction(struct drbd_device *device)
{
struct al_transaction_on_disk *buffer;
struct lc_element *e;
sector_t sector;
int i, mx;
unsigned extent_nr;
unsigned crc = 0;
int err = 0;

if (!get_ldev(device)) {
drbd_err(device, "disk is %s, cannot start al transaction\n",
drbd_disk_str(device->state.disk));
return -EIO;
}

/* The bitmap write may have failed, causing a state change. */
if (device->state.disk < D_INCONSISTENT) {
drbd_err(device,
"disk is %s, cannot write al transaction\n",
drbd_disk_str(device->state.disk));
put_ldev(device);
return -EIO;
}

/* protects md_io_buffer, al_tr_cycle, ... */
buffer = drbd_md_get_buffer(device, __func__);
if (!buffer) {
drbd_err(device, "disk failed while waiting for md_io buffer\n");
put_ldev(device);
return -ENODEV;
}

memset(buffer, 0, sizeof(*buffer));
buffer->magic = cpu_to_be32(DRBD_AL_MAGIC);
buffer->tr_number = cpu_to_be32(device->al_tr_number);

i = 0;

/* Even though no one can start to change this list
* once we set the LC_LOCKED -- from drbd_al_begin_io(),
* lc_try_lock_for_transaction() --, someone may still
* be in the process of changing it. */
spin_lock_irq(&device->al_lock);
list_for_each_entry(e, &device->act_log->to_be_changed, list) {
if (i == AL_UPDATES_PER_TRANSACTION) {
i++;
break;
}
buffer->update_slot_nr[i] = cpu_to_be16(e->lc_index);
buffer->update_extent_nr[i] = cpu_to_be32(e->lc_new_number);
if (e->lc_number != LC_FREE)
drbd_bm_mark_for_writeout(device,
al_extent_to_bm_page(e->lc_number));
i++;
}
spin_unlock_irq(&device->al_lock);
BUG_ON(i > AL_UPDATES_PER_TRANSACTION);

buffer->n_updates = cpu_to_be16(i);
for ( ; i < AL_UPDATES_PER_TRANSACTION; i++) {
buffer->update_slot_nr[i] = cpu_to_be16(-1);
buffer->update_extent_nr[i] = cpu_to_be32(LC_FREE);
}

buffer->context_size = cpu_to_be16(device->act_log->nr_elements);
buffer->context_start_slot_nr = cpu_to_be16(device->al_tr_cycle);

mx = min_t(int, AL_CONTEXT_PER_TRANSACTION,
device->act_log->nr_elements - device->al_tr_cycle);
for (i = 0; i < mx; i++) {
unsigned idx = device->al_tr_cycle + i;
extent_nr = lc_element_by_index(device->act_log, idx)->lc_number;
buffer->context[i] = cpu_to_be32(extent_nr);
}
for (; i < AL_CONTEXT_PER_TRANSACTION; i++)
buffer->context[i] = cpu_to_be32(LC_FREE);

device->al_tr_cycle += AL_CONTEXT_PER_TRANSACTION;
if (device->al_tr_cycle >= device->act_log->nr_elements)
device->al_tr_cycle = 0;

sector = al_tr_number_to_on_disk_sector(device);

crc = crc32c(0, buffer, 4096);
buffer->crc32c = cpu_to_be32(crc);

if (drbd_bm_write_hinted(device))
err = -EIO;
else {
bool write_al_updates;
rcu_read_lock();
write_al_updates = rcu_dereference(device->ldev->disk_conf)->al_updates;
rcu_read_unlock();
if (write_al_updates) {
if (drbd_md_sync_page_io(device, device->ldev, sector, WRITE)) {
err = -EIO;
drbd_chk_io_error(device, 1, DRBD_META_IO_ERROR);
} else {
device->al_tr_number++;
device->al_writ_cnt++;
}
}
}

drbd_md_put_buffer(device);
put_ldev(device);

return err;
}

static int _try_lc_del(struct drbd_device *device, struct lc_element *al_ext)
{
int rv;
Expand Down Expand Up @@ -606,21 +614,24 @@ void drbd_al_shrink(struct drbd_device *device)
wake_up(&device->al_wait);
}

int drbd_initialize_al(struct drbd_device *device, void *buffer)
int drbd_al_initialize(struct drbd_device *device, void *buffer)
{
struct al_transaction_on_disk *al = buffer;
struct drbd_md *md = &device->ldev->md;
sector_t al_base = md->md_offset + md->al_offset;
int al_size_4k = md->al_stripes * md->al_stripe_size_4k;
int i;

memset(al, 0, 4096);
al->magic = cpu_to_be32(DRBD_AL_MAGIC);
al->transaction_type = cpu_to_be16(AL_TR_INITIALIZED);
al->crc32c = cpu_to_be32(crc32c(0, al, 4096));
__al_write_transaction(device, al);
/* There may or may not have been a pending transaction. */
spin_lock_irq(&device->al_lock);
lc_committed(device->act_log);
spin_unlock_irq(&device->al_lock);

for (i = 0; i < al_size_4k; i++) {
int err = drbd_md_sync_page_io(device, device->ldev, al_base + i * 8, WRITE);
/* The rest of the transactions will have an empty "updates" list, and
* are written out only to provide the context, and to initialize the
* on-disk ring buffer. */
for (i = 1; i < al_size_4k; i++) {
int err = __al_write_transaction(device, al);
if (err)
return err;
}
Expand Down
Loading

0 comments on commit 6412035

Please sign in to comment.