Merge tag 'dm-4.3-changes' of git://git.kernel.org/pub/scm/linux/kern…

…el/git/device-mapper/linux-dm Pull device mapper update from Mike Snitzer: - a couple small cleanups in dm-cache, dm-verity, persistent-data's dm-btree, and DM core. - a 4.1-stable fix for dm-cache that fixes the leaking of deferred bio prison cells - a 4.2-stable fix that adds feature reporting for the dm-stats features added in 4.2 - improve DM-snapshot to not invalidate the on-disk snapshot if snapshot device write overflow occurs; but a write overflow triggered through the origin device will still invalidate the snapshot. - optimize DM-thinp's async discard submission a bit now that late bio splitting has been included in block core. - switch DM-cache's SMQ policy lock from using a mutex to a spinlock; improves performance on very low latency devices (eg. NVMe SSD). - document DM RAID 4/5/6's discard support [ I did not pull the slab changes, which weren't appropriate for this tree, and weren't obviously the right thing to do anyway. At the very least they need some discussion and explanation before getting merged. Because not pulling the actual tagged commit but doing a partial pull instead, this merge commit thus also obviously is missing the git signature from the original tag ] * tag 'dm-4.3-changes' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm: dm cache: fix use after freeing migrations dm cache: small cleanups related to deferred prison cell cleanup dm cache: fix leaking of deferred bio prison cells dm raid: document RAID 4/5/6 discard support dm stats: report precise_timestamps and histogram in @stats_list output dm thin: optimize async discard submission dm snapshot: don't invalidate on-disk image on snapshot write overflow dm: remove unlikely() before IS_ERR() dm: do not override error code returned from dm_get_device() dm: test return value for DM_MAPIO_SUBMITTED dm verity: remove unused mempool dm cache: move wake_waker() from free_migrations() to where it is needed dm btree remove: remove unused function get_nr_entries() dm btree: remove unused "dm_block_t root" parameter in btree_split_sibling() dm cache policy smq: change the mutex to a spinlock
chewitt · Sep 2, 2015 · 1e1a4e8 · 1e1a4e8
2 parents d975f30 + cc7da0b
commit 1e1a4e8
Show file tree

Hide file tree

Showing 21 changed files with 190 additions and 238 deletions.
diff --git a/Documentation/device-mapper/dm-raid.txt b/Documentation/device-mapper/dm-raid.txt
@@ -209,6 +209,37 @@ include:
 	"repair" - Initiate a repair of the array.
 	"reshape"- Currently unsupported (-EINVAL).
 
+
+Discard Support
+---------------
+The implementation of discard support among hardware vendors varies.
+When a block is discarded, some storage devices will return zeroes when
+the block is read.  These devices set the 'discard_zeroes_data'
+attribute.  Other devices will return random data.  Confusingly, some
+devices that advertise 'discard_zeroes_data' will not reliably return
+zeroes when discarded blocks are read!  Since RAID 4/5/6 uses blocks
+from a number of devices to calculate parity blocks and (for performance
+reasons) relies on 'discard_zeroes_data' being reliable, it is important
+that the devices be consistent.  Blocks may be discarded in the middle
+of a RAID 4/5/6 stripe and if subsequent read results are not
+consistent, the parity blocks may be calculated differently at any time;
+making the parity blocks useless for redundancy.  It is important to
+understand how your hardware behaves with discards if you are going to
+enable discards with RAID 4/5/6.
+
+Since the behavior of storage devices is unreliable in this respect,
+even when reporting 'discard_zeroes_data', by default RAID 4/5/6
+discard support is disabled -- this ensures data integrity at the
+expense of losing some performance.
+
+Storage devices that properly support 'discard_zeroes_data' are
+increasingly whitelisted in the kernel and can thus be trusted.
+
+For trusted devices, the following dm-raid module parameter can be set
+to safely enable discard support for RAID 4/5/6:
+    'devices_handle_discards_safely'
+
+
 Version History
 ---------------
 1.0.0	Initial version.  Support for RAID 4/5/6

diff --git a/Documentation/device-mapper/statistics.txt b/Documentation/device-mapper/statistics.txt
@@ -121,6 +121,10 @@ Messages
 
 	Output format:
 	  <region_id>: <start_sector>+<length> <step> <program_id> <aux_data>
+	        precise_timestamps histogram:n1,n2,n3,...
+
+	The strings "precise_timestamps" and "histogram" are printed only
+	if they were specified when creating the region.
 
     @stats_print <region_id> [<starting_line> <number_of_lines>]
 

diff --git a/drivers/md/dm-cache-policy-smq.c b/drivers/md/dm-cache-policy-smq.c
@@ -772,7 +772,7 @@ struct smq_policy {
 	struct dm_cache_policy policy;
 
 	/* protects everything */
-	struct mutex lock;
+	spinlock_t lock;
 	dm_cblock_t cache_size;
 	sector_t cache_block_size;
 
@@ -807,13 +807,7 @@ struct smq_policy {
 	/*
 	 * Keeps track of time, incremented by the core.  We use this to
 	 * avoid attributing multiple hits within the same tick.
-	 *
-	 * Access to tick_protected should be done with the spin lock held.
-	 * It's copied to tick at the start of the map function (within the
-	 * mutex).
 	 */
-	spinlock_t tick_lock;
-	unsigned tick_protected;
 	unsigned tick;
 
 	/*
@@ -1296,67 +1290,39 @@ static void smq_destroy(struct dm_cache_policy *p)
 	kfree(mq);
 }
 
-static void copy_tick(struct smq_policy *mq)
-{
-	unsigned long flags, tick;
-
-	spin_lock_irqsave(&mq->tick_lock, flags);
-	tick = mq->tick_protected;
-	if (tick != mq->tick) {
-		update_sentinels(mq);
-		end_hotspot_period(mq);
-		end_cache_period(mq);
-		mq->tick = tick;
-	}
-	spin_unlock_irqrestore(&mq->tick_lock, flags);
-}
-
-static bool maybe_lock(struct smq_policy *mq, bool can_block)
-{
-	if (can_block) {
-		mutex_lock(&mq->lock);
-		return true;
-	} else
-		return mutex_trylock(&mq->lock);
-}
-
 static int smq_map(struct dm_cache_policy *p, dm_oblock_t oblock,
 		   bool can_block, bool can_migrate, bool fast_promote,
 		   struct bio *bio, struct policy_locker *locker,
 		   struct policy_result *result)
 {
 	int r;
+	unsigned long flags;
 	struct smq_policy *mq = to_smq_policy(p);
 
 	result->op = POLICY_MISS;
 
-	if (!maybe_lock(mq, can_block))
-		return -EWOULDBLOCK;
-
-	copy_tick(mq);
+	spin_lock_irqsave(&mq->lock, flags);
 	r = map(mq, bio, oblock, can_migrate, fast_promote, locker, result);
-	mutex_unlock(&mq->lock);
+	spin_unlock_irqrestore(&mq->lock, flags);
 
 	return r;
 }
 
 static int smq_lookup(struct dm_cache_policy *p, dm_oblock_t oblock, dm_cblock_t *cblock)
 {
 	int r;
+	unsigned long flags;
 	struct smq_policy *mq = to_smq_policy(p);
 	struct entry *e;
 
-	if (!mutex_trylock(&mq->lock))
-		return -EWOULDBLOCK;
-
+	spin_lock_irqsave(&mq->lock, flags);
 	e = h_lookup(&mq->table, oblock);
 	if (e) {
 		*cblock = infer_cblock(mq, e);
 		r = 0;
 	} else
 		r = -ENOENT;
-
-	mutex_unlock(&mq->lock);
+	spin_unlock_irqrestore(&mq->lock, flags);
 
 	return r;
 }
@@ -1375,20 +1341,22 @@ static void __smq_set_clear_dirty(struct smq_policy *mq, dm_oblock_t oblock, boo
 
 static void smq_set_dirty(struct dm_cache_policy *p, dm_oblock_t oblock)
 {
+	unsigned long flags;
 	struct smq_policy *mq = to_smq_policy(p);
 
-	mutex_lock(&mq->lock);
+	spin_lock_irqsave(&mq->lock, flags);
 	__smq_set_clear_dirty(mq, oblock, true);
-	mutex_unlock(&mq->lock);
+	spin_unlock_irqrestore(&mq->lock, flags);
 }
 
 static void smq_clear_dirty(struct dm_cache_policy *p, dm_oblock_t oblock)
 {
 	struct smq_policy *mq = to_smq_policy(p);
+	unsigned long flags;
 
-	mutex_lock(&mq->lock);
+	spin_lock_irqsave(&mq->lock, flags);
 	__smq_set_clear_dirty(mq, oblock, false);
-	mutex_unlock(&mq->lock);
+	spin_unlock_irqrestore(&mq->lock, flags);
 }
 
 static int smq_load_mapping(struct dm_cache_policy *p,
@@ -1433,14 +1401,14 @@ static int smq_walk_mappings(struct dm_cache_policy *p, policy_walk_fn fn,
 	struct smq_policy *mq = to_smq_policy(p);
 	int r = 0;
 
-	mutex_lock(&mq->lock);
-
+	/*
+	 * We don't need to lock here since this method is only called once
+	 * the IO has stopped.
+	 */
 	r = smq_save_hints(mq, &mq->clean, fn, context);
 	if (!r)
 		r = smq_save_hints(mq, &mq->dirty, fn, context);
 
-	mutex_unlock(&mq->lock);
-
 	return r;
 }
 
@@ -1458,10 +1426,11 @@ static void __remove_mapping(struct smq_policy *mq, dm_oblock_t oblock)
 static void smq_remove_mapping(struct dm_cache_policy *p, dm_oblock_t oblock)
 {
 	struct smq_policy *mq = to_smq_policy(p);
+	unsigned long flags;
 
-	mutex_lock(&mq->lock);
+	spin_lock_irqsave(&mq->lock, flags);
 	__remove_mapping(mq, oblock);
-	mutex_unlock(&mq->lock);
+	spin_unlock_irqrestore(&mq->lock, flags);
 }
 
 static int __remove_cblock(struct smq_policy *mq, dm_cblock_t cblock)
@@ -1480,11 +1449,12 @@ static int __remove_cblock(struct smq_policy *mq, dm_cblock_t cblock)
 static int smq_remove_cblock(struct dm_cache_policy *p, dm_cblock_t cblock)
 {
 	int r;
+	unsigned long flags;
 	struct smq_policy *mq = to_smq_policy(p);
 
-	mutex_lock(&mq->lock);
+	spin_lock_irqsave(&mq->lock, flags);
 	r = __remove_cblock(mq, cblock);
-	mutex_unlock(&mq->lock);
+	spin_unlock_irqrestore(&mq->lock, flags);
 
 	return r;
 }
@@ -1537,11 +1507,12 @@ static int smq_writeback_work(struct dm_cache_policy *p, dm_oblock_t *oblock,
 			      dm_cblock_t *cblock, bool critical_only)
 {
 	int r;
+	unsigned long flags;
 	struct smq_policy *mq = to_smq_policy(p);
 
-	mutex_lock(&mq->lock);
+	spin_lock_irqsave(&mq->lock, flags);
 	r = __smq_writeback_work(mq, oblock, cblock, critical_only);
-	mutex_unlock(&mq->lock);
+	spin_unlock_irqrestore(&mq->lock, flags);
 
 	return r;
 }
@@ -1562,21 +1533,23 @@ static void __force_mapping(struct smq_policy *mq,
 static void smq_force_mapping(struct dm_cache_policy *p,
 			      dm_oblock_t current_oblock, dm_oblock_t new_oblock)
 {
+	unsigned long flags;
 	struct smq_policy *mq = to_smq_policy(p);
 
-	mutex_lock(&mq->lock);
+	spin_lock_irqsave(&mq->lock, flags);
 	__force_mapping(mq, current_oblock, new_oblock);
-	mutex_unlock(&mq->lock);
+	spin_unlock_irqrestore(&mq->lock, flags);
 }
 
 static dm_cblock_t smq_residency(struct dm_cache_policy *p)
 {
 	dm_cblock_t r;
+	unsigned long flags;
 	struct smq_policy *mq = to_smq_policy(p);
 
-	mutex_lock(&mq->lock);
+	spin_lock_irqsave(&mq->lock, flags);
 	r = to_cblock(mq->cache_alloc.nr_allocated);
-	mutex_unlock(&mq->lock);
+	spin_unlock_irqrestore(&mq->lock, flags);
 
 	return r;
 }
@@ -1586,15 +1559,12 @@ static void smq_tick(struct dm_cache_policy *p, bool can_block)
 	struct smq_policy *mq = to_smq_policy(p);
 	unsigned long flags;
 
-	spin_lock_irqsave(&mq->tick_lock, flags);
-	mq->tick_protected++;
-	spin_unlock_irqrestore(&mq->tick_lock, flags);
-
-	if (can_block) {
-		mutex_lock(&mq->lock);
-		copy_tick(mq);
-		mutex_unlock(&mq->lock);
-	}
+	spin_lock_irqsave(&mq->lock, flags);
+	mq->tick++;
+	update_sentinels(mq);
+	end_hotspot_period(mq);
+	end_cache_period(mq);
+	spin_unlock_irqrestore(&mq->lock, flags);
 }
 
 /* Init the policy plugin interface function pointers. */
@@ -1694,10 +1664,8 @@ static struct dm_cache_policy *smq_create(dm_cblock_t cache_size,
 	} else
 		mq->cache_hit_bits = NULL;
 
-	mq->tick_protected = 0;
 	mq->tick = 0;
-	mutex_init(&mq->lock);
-	spin_lock_init(&mq->tick_lock);
+	spin_lock_init(&mq->lock);
 
 	q_init(&mq->hotspot, &mq->es, NR_HOTSPOT_LEVELS);
 	mq->hotspot.nr_top_levels = 8;