block-6.14-20250228
-----BEGIN PGP SIGNATURE----- iQJEBAABCAAuFiEEwPw5LcreJtl1+l5K99NY+ylx4KYFAmfBwygQHGF4Ym9lQGtl cm5lbC5kawAKCRD301j7KXHgpuyVD/9kem557zNDkps/+2k8Q86FGZ/XmD+GPu1H l30qlar1XubeC/AE/bxgyI8G6rWY9li3PPn0tu/LeTgTVW5noIZCyvrtxl8g6yKV Gptm3H5AJypMU9cDz1/KTYTgrEypDJ22092/V1cuoeJxUS3srIEx6rlBp1wXzoG6 WdEIBhk9hM3hwXghyEarJeacHFe6xzd9lJM9ZODXBMkKtee85zXDLSAEPJsnjCcH t2tU/EAa6O0MLuYorG4Lkfs0ggDP+UDRdwh2MbANZXZdUCG2SwBS3pKDYtn684A1 gSsPnJGVZjLTog9jzaGkw64ebZ8tdLU4szjzroAJYkIbz9kO3QxT+H4TfW5UMoip TVPdNDqvypqs8ENKUvv3XuGsKuOfYjpBEiU2oGUUuioHJnWlh6CPnt8V8t3YKnbP xreqnIOjRJni1/OOZOMcWfRLlIRMG2dGFwhskWBWY8dmt4eHoge3RQzPZtAFelcG eM+Gkczz+GAXAnFHt5JQIPnfmcVmXqkbX12uoxUyuoa4AFaDLT+7nVtu3Gj5/beJ bcvk8q6ww8oXGVvJ0sYwic9tOX4XoxHsdr8u80Wd0uvHUB6uU/HTAxQxUO3uMSD5 0pk9l/zGDjDcEcuOUiIAUldl2M1eIyoBIOK3svMq6TKiC13j7+xkGI1uSA9cKws6 /+OsNMd9JQ== =tcA2 -----END PGP SIGNATURE----- Merge tag 'block-6.14-20250228' of git://git.kernel.dk/linux Pull block fixes from Jens Axboe: - Fix plugging for native zone writes - Fix segment limit settings for != 4K page size archs - Fix for slab names overflowing * tag 'block-6.14-20250228' of git://git.kernel.dk/linux: block: fix 'kmem_cache of name 'bio-108' already exists' block: Remove zone write plugs when handling native zone append writes block: make segment size limit workable for > 4K PAGE_SIZE
This commit is contained in:
commit
276f98efb6
6 changed files with 94 additions and 17 deletions
|
@ -77,7 +77,7 @@ struct bio_slab {
|
||||||
struct kmem_cache *slab;
|
struct kmem_cache *slab;
|
||||||
unsigned int slab_ref;
|
unsigned int slab_ref;
|
||||||
unsigned int slab_size;
|
unsigned int slab_size;
|
||||||
char name[8];
|
char name[12];
|
||||||
};
|
};
|
||||||
static DEFINE_MUTEX(bio_slab_lock);
|
static DEFINE_MUTEX(bio_slab_lock);
|
||||||
static DEFINE_XARRAY(bio_slabs);
|
static DEFINE_XARRAY(bio_slabs);
|
||||||
|
|
|
@ -329,7 +329,7 @@ int bio_split_rw_at(struct bio *bio, const struct queue_limits *lim,
|
||||||
|
|
||||||
if (nsegs < lim->max_segments &&
|
if (nsegs < lim->max_segments &&
|
||||||
bytes + bv.bv_len <= max_bytes &&
|
bytes + bv.bv_len <= max_bytes &&
|
||||||
bv.bv_offset + bv.bv_len <= PAGE_SIZE) {
|
bv.bv_offset + bv.bv_len <= lim->min_segment_size) {
|
||||||
nsegs++;
|
nsegs++;
|
||||||
bytes += bv.bv_len;
|
bytes += bv.bv_len;
|
||||||
} else {
|
} else {
|
||||||
|
|
|
@ -246,6 +246,7 @@ int blk_validate_limits(struct queue_limits *lim)
|
||||||
{
|
{
|
||||||
unsigned int max_hw_sectors;
|
unsigned int max_hw_sectors;
|
||||||
unsigned int logical_block_sectors;
|
unsigned int logical_block_sectors;
|
||||||
|
unsigned long seg_size;
|
||||||
int err;
|
int err;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -303,7 +304,7 @@ int blk_validate_limits(struct queue_limits *lim)
|
||||||
max_hw_sectors = min_not_zero(lim->max_hw_sectors,
|
max_hw_sectors = min_not_zero(lim->max_hw_sectors,
|
||||||
lim->max_dev_sectors);
|
lim->max_dev_sectors);
|
||||||
if (lim->max_user_sectors) {
|
if (lim->max_user_sectors) {
|
||||||
if (lim->max_user_sectors < PAGE_SIZE / SECTOR_SIZE)
|
if (lim->max_user_sectors < BLK_MIN_SEGMENT_SIZE / SECTOR_SIZE)
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
lim->max_sectors = min(max_hw_sectors, lim->max_user_sectors);
|
lim->max_sectors = min(max_hw_sectors, lim->max_user_sectors);
|
||||||
} else if (lim->io_opt > (BLK_DEF_MAX_SECTORS_CAP << SECTOR_SHIFT)) {
|
} else if (lim->io_opt > (BLK_DEF_MAX_SECTORS_CAP << SECTOR_SHIFT)) {
|
||||||
|
@ -341,7 +342,7 @@ int blk_validate_limits(struct queue_limits *lim)
|
||||||
*/
|
*/
|
||||||
if (!lim->seg_boundary_mask)
|
if (!lim->seg_boundary_mask)
|
||||||
lim->seg_boundary_mask = BLK_SEG_BOUNDARY_MASK;
|
lim->seg_boundary_mask = BLK_SEG_BOUNDARY_MASK;
|
||||||
if (WARN_ON_ONCE(lim->seg_boundary_mask < PAGE_SIZE - 1))
|
if (WARN_ON_ONCE(lim->seg_boundary_mask < BLK_MIN_SEGMENT_SIZE - 1))
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -362,10 +363,17 @@ int blk_validate_limits(struct queue_limits *lim)
|
||||||
*/
|
*/
|
||||||
if (!lim->max_segment_size)
|
if (!lim->max_segment_size)
|
||||||
lim->max_segment_size = BLK_MAX_SEGMENT_SIZE;
|
lim->max_segment_size = BLK_MAX_SEGMENT_SIZE;
|
||||||
if (WARN_ON_ONCE(lim->max_segment_size < PAGE_SIZE))
|
if (WARN_ON_ONCE(lim->max_segment_size < BLK_MIN_SEGMENT_SIZE))
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* setup min segment size for building new segment in fast path */
|
||||||
|
if (lim->seg_boundary_mask > lim->max_segment_size - 1)
|
||||||
|
seg_size = lim->max_segment_size;
|
||||||
|
else
|
||||||
|
seg_size = lim->seg_boundary_mask + 1;
|
||||||
|
lim->min_segment_size = min_t(unsigned int, seg_size, PAGE_SIZE);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* We require drivers to at least do logical block aligned I/O, but
|
* We require drivers to at least do logical block aligned I/O, but
|
||||||
* historically could not check for that due to the separate calls
|
* historically could not check for that due to the separate calls
|
||||||
|
|
|
@ -410,13 +410,14 @@ static bool disk_insert_zone_wplug(struct gendisk *disk,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
hlist_add_head_rcu(&zwplug->node, &disk->zone_wplugs_hash[idx]);
|
hlist_add_head_rcu(&zwplug->node, &disk->zone_wplugs_hash[idx]);
|
||||||
|
atomic_inc(&disk->nr_zone_wplugs);
|
||||||
spin_unlock_irqrestore(&disk->zone_wplugs_lock, flags);
|
spin_unlock_irqrestore(&disk->zone_wplugs_lock, flags);
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
static struct blk_zone_wplug *disk_get_zone_wplug(struct gendisk *disk,
|
static struct blk_zone_wplug *disk_get_hashed_zone_wplug(struct gendisk *disk,
|
||||||
sector_t sector)
|
sector_t sector)
|
||||||
{
|
{
|
||||||
unsigned int zno = disk_zone_no(disk, sector);
|
unsigned int zno = disk_zone_no(disk, sector);
|
||||||
unsigned int idx = hash_32(zno, disk->zone_wplugs_hash_bits);
|
unsigned int idx = hash_32(zno, disk->zone_wplugs_hash_bits);
|
||||||
|
@ -437,6 +438,15 @@ static struct blk_zone_wplug *disk_get_zone_wplug(struct gendisk *disk,
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline struct blk_zone_wplug *disk_get_zone_wplug(struct gendisk *disk,
|
||||||
|
sector_t sector)
|
||||||
|
{
|
||||||
|
if (!atomic_read(&disk->nr_zone_wplugs))
|
||||||
|
return NULL;
|
||||||
|
|
||||||
|
return disk_get_hashed_zone_wplug(disk, sector);
|
||||||
|
}
|
||||||
|
|
||||||
static void disk_free_zone_wplug_rcu(struct rcu_head *rcu_head)
|
static void disk_free_zone_wplug_rcu(struct rcu_head *rcu_head)
|
||||||
{
|
{
|
||||||
struct blk_zone_wplug *zwplug =
|
struct blk_zone_wplug *zwplug =
|
||||||
|
@ -503,6 +513,7 @@ static void disk_remove_zone_wplug(struct gendisk *disk,
|
||||||
zwplug->flags |= BLK_ZONE_WPLUG_UNHASHED;
|
zwplug->flags |= BLK_ZONE_WPLUG_UNHASHED;
|
||||||
spin_lock_irqsave(&disk->zone_wplugs_lock, flags);
|
spin_lock_irqsave(&disk->zone_wplugs_lock, flags);
|
||||||
hlist_del_init_rcu(&zwplug->node);
|
hlist_del_init_rcu(&zwplug->node);
|
||||||
|
atomic_dec(&disk->nr_zone_wplugs);
|
||||||
spin_unlock_irqrestore(&disk->zone_wplugs_lock, flags);
|
spin_unlock_irqrestore(&disk->zone_wplugs_lock, flags);
|
||||||
disk_put_zone_wplug(zwplug);
|
disk_put_zone_wplug(zwplug);
|
||||||
}
|
}
|
||||||
|
@ -593,6 +604,11 @@ static void disk_zone_wplug_abort(struct blk_zone_wplug *zwplug)
|
||||||
{
|
{
|
||||||
struct bio *bio;
|
struct bio *bio;
|
||||||
|
|
||||||
|
if (bio_list_empty(&zwplug->bio_list))
|
||||||
|
return;
|
||||||
|
|
||||||
|
pr_warn_ratelimited("%s: zone %u: Aborting plugged BIOs\n",
|
||||||
|
zwplug->disk->disk_name, zwplug->zone_no);
|
||||||
while ((bio = bio_list_pop(&zwplug->bio_list)))
|
while ((bio = bio_list_pop(&zwplug->bio_list)))
|
||||||
blk_zone_wplug_bio_io_error(zwplug, bio);
|
blk_zone_wplug_bio_io_error(zwplug, bio);
|
||||||
}
|
}
|
||||||
|
@ -1040,6 +1056,47 @@ plug:
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void blk_zone_wplug_handle_native_zone_append(struct bio *bio)
|
||||||
|
{
|
||||||
|
struct gendisk *disk = bio->bi_bdev->bd_disk;
|
||||||
|
struct blk_zone_wplug *zwplug;
|
||||||
|
unsigned long flags;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* We have native support for zone append operations, so we are not
|
||||||
|
* going to handle @bio through plugging. However, we may already have a
|
||||||
|
* zone write plug for the target zone if that zone was previously
|
||||||
|
* partially written using regular writes. In such case, we risk leaving
|
||||||
|
* the plug in the disk hash table if the zone is fully written using
|
||||||
|
* zone append operations. Avoid this by removing the zone write plug.
|
||||||
|
*/
|
||||||
|
zwplug = disk_get_zone_wplug(disk, bio->bi_iter.bi_sector);
|
||||||
|
if (likely(!zwplug))
|
||||||
|
return;
|
||||||
|
|
||||||
|
spin_lock_irqsave(&zwplug->lock, flags);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* We are about to remove the zone write plug. But if the user
|
||||||
|
* (mistakenly) has issued regular writes together with native zone
|
||||||
|
* append, we must aborts the writes as otherwise the plugged BIOs would
|
||||||
|
* not be executed by the plug BIO work as disk_get_zone_wplug() will
|
||||||
|
* return NULL after the plug is removed. Aborting the plugged write
|
||||||
|
* BIOs is consistent with the fact that these writes will most likely
|
||||||
|
* fail anyway as there is no ordering guarantees between zone append
|
||||||
|
* operations and regular write operations.
|
||||||
|
*/
|
||||||
|
if (!bio_list_empty(&zwplug->bio_list)) {
|
||||||
|
pr_warn_ratelimited("%s: zone %u: Invalid mix of zone append and regular writes\n",
|
||||||
|
disk->disk_name, zwplug->zone_no);
|
||||||
|
disk_zone_wplug_abort(zwplug);
|
||||||
|
}
|
||||||
|
disk_remove_zone_wplug(disk, zwplug);
|
||||||
|
spin_unlock_irqrestore(&zwplug->lock, flags);
|
||||||
|
|
||||||
|
disk_put_zone_wplug(zwplug);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* blk_zone_plug_bio - Handle a zone write BIO with zone write plugging
|
* blk_zone_plug_bio - Handle a zone write BIO with zone write plugging
|
||||||
* @bio: The BIO being submitted
|
* @bio: The BIO being submitted
|
||||||
|
@ -1096,8 +1153,10 @@ bool blk_zone_plug_bio(struct bio *bio, unsigned int nr_segs)
|
||||||
*/
|
*/
|
||||||
switch (bio_op(bio)) {
|
switch (bio_op(bio)) {
|
||||||
case REQ_OP_ZONE_APPEND:
|
case REQ_OP_ZONE_APPEND:
|
||||||
if (!bdev_emulates_zone_append(bdev))
|
if (!bdev_emulates_zone_append(bdev)) {
|
||||||
|
blk_zone_wplug_handle_native_zone_append(bio);
|
||||||
return false;
|
return false;
|
||||||
|
}
|
||||||
fallthrough;
|
fallthrough;
|
||||||
case REQ_OP_WRITE:
|
case REQ_OP_WRITE:
|
||||||
case REQ_OP_WRITE_ZEROES:
|
case REQ_OP_WRITE_ZEROES:
|
||||||
|
@ -1284,6 +1343,7 @@ static int disk_alloc_zone_resources(struct gendisk *disk,
|
||||||
{
|
{
|
||||||
unsigned int i;
|
unsigned int i;
|
||||||
|
|
||||||
|
atomic_set(&disk->nr_zone_wplugs, 0);
|
||||||
disk->zone_wplugs_hash_bits =
|
disk->zone_wplugs_hash_bits =
|
||||||
min(ilog2(pool_size) + 1, BLK_ZONE_WPLUG_MAX_HASH_BITS);
|
min(ilog2(pool_size) + 1, BLK_ZONE_WPLUG_MAX_HASH_BITS);
|
||||||
|
|
||||||
|
@ -1338,6 +1398,7 @@ static void disk_destroy_zone_wplugs_hash_table(struct gendisk *disk)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
WARN_ON_ONCE(atomic_read(&disk->nr_zone_wplugs));
|
||||||
kfree(disk->zone_wplugs_hash);
|
kfree(disk->zone_wplugs_hash);
|
||||||
disk->zone_wplugs_hash = NULL;
|
disk->zone_wplugs_hash = NULL;
|
||||||
disk->zone_wplugs_hash_bits = 0;
|
disk->zone_wplugs_hash_bits = 0;
|
||||||
|
@ -1550,11 +1611,12 @@ static int blk_revalidate_seq_zone(struct blk_zone *zone, unsigned int idx,
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* We need to track the write pointer of all zones that are not
|
* If the device needs zone append emulation, we need to track the
|
||||||
* empty nor full. So make sure we have a zone write plug for
|
* write pointer of all zones that are not empty nor full. So make sure
|
||||||
* such zone if the device has a zone write plug hash table.
|
* we have a zone write plug for such zone if the device has a zone
|
||||||
|
* write plug hash table.
|
||||||
*/
|
*/
|
||||||
if (!disk->zone_wplugs_hash)
|
if (!queue_emulates_zone_append(disk->queue) || !disk->zone_wplugs_hash)
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
disk_zone_wplug_sync_wp_offset(disk, zone);
|
disk_zone_wplug_sync_wp_offset(disk, zone);
|
||||||
|
|
|
@ -14,6 +14,7 @@
|
||||||
struct elevator_type;
|
struct elevator_type;
|
||||||
|
|
||||||
#define BLK_DEV_MAX_SECTORS (LLONG_MAX >> 9)
|
#define BLK_DEV_MAX_SECTORS (LLONG_MAX >> 9)
|
||||||
|
#define BLK_MIN_SEGMENT_SIZE 4096
|
||||||
|
|
||||||
/* Max future timer expiry for timeouts */
|
/* Max future timer expiry for timeouts */
|
||||||
#define BLK_MAX_TIMEOUT (5 * HZ)
|
#define BLK_MAX_TIMEOUT (5 * HZ)
|
||||||
|
@ -358,8 +359,12 @@ struct bio *bio_split_zone_append(struct bio *bio,
|
||||||
static inline bool bio_may_need_split(struct bio *bio,
|
static inline bool bio_may_need_split(struct bio *bio,
|
||||||
const struct queue_limits *lim)
|
const struct queue_limits *lim)
|
||||||
{
|
{
|
||||||
return lim->chunk_sectors || bio->bi_vcnt != 1 ||
|
if (lim->chunk_sectors)
|
||||||
bio->bi_io_vec->bv_len + bio->bi_io_vec->bv_offset > PAGE_SIZE;
|
return true;
|
||||||
|
if (bio->bi_vcnt != 1)
|
||||||
|
return true;
|
||||||
|
return bio->bi_io_vec->bv_len + bio->bi_io_vec->bv_offset >
|
||||||
|
lim->min_segment_size;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
@ -196,10 +196,11 @@ struct gendisk {
|
||||||
unsigned int zone_capacity;
|
unsigned int zone_capacity;
|
||||||
unsigned int last_zone_capacity;
|
unsigned int last_zone_capacity;
|
||||||
unsigned long __rcu *conv_zones_bitmap;
|
unsigned long __rcu *conv_zones_bitmap;
|
||||||
unsigned int zone_wplugs_hash_bits;
|
unsigned int zone_wplugs_hash_bits;
|
||||||
spinlock_t zone_wplugs_lock;
|
atomic_t nr_zone_wplugs;
|
||||||
|
spinlock_t zone_wplugs_lock;
|
||||||
struct mempool_s *zone_wplugs_pool;
|
struct mempool_s *zone_wplugs_pool;
|
||||||
struct hlist_head *zone_wplugs_hash;
|
struct hlist_head *zone_wplugs_hash;
|
||||||
struct workqueue_struct *zone_wplugs_wq;
|
struct workqueue_struct *zone_wplugs_wq;
|
||||||
#endif /* CONFIG_BLK_DEV_ZONED */
|
#endif /* CONFIG_BLK_DEV_ZONED */
|
||||||
|
|
||||||
|
@ -367,6 +368,7 @@ struct queue_limits {
|
||||||
unsigned int max_sectors;
|
unsigned int max_sectors;
|
||||||
unsigned int max_user_sectors;
|
unsigned int max_user_sectors;
|
||||||
unsigned int max_segment_size;
|
unsigned int max_segment_size;
|
||||||
|
unsigned int min_segment_size;
|
||||||
unsigned int physical_block_size;
|
unsigned int physical_block_size;
|
||||||
unsigned int logical_block_size;
|
unsigned int logical_block_size;
|
||||||
unsigned int alignment_offset;
|
unsigned int alignment_offset;
|
||||||
|
|
Loading…
Add table
Reference in a new issue