From e3c3155bc95ab6a7b21ac40418bf80bedb204949 Mon Sep 17 00:00:00 2001 From: Johannes Thumshirn Date: Tue, 21 Jul 2020 21:10:26 +0900 Subject: [PATCH 1/2] zonefs: add zone-capacity support In the zoned storage model, the sectors within a zone are typically all writeable. With the introduction of the Zoned Namespace (ZNS) Command Set in the NVM Express organization, the model was extended to have a specific writeable capacity. This zone capacity can be less than the overall zone size for a NVMe ZNS device or null_blk in zoned-mode. For other ZBC/ZAC devices the zone capacity is always equal to the zone size. Use the zone capacity field instead from blk_zone for determining the maximum inode size and inode blocks in zonefs. Signed-off-by: Johannes Thumshirn Reviewed-by: Christoph Hellwig Signed-off-by: Damien Le Moal --- fs/zonefs/super.c | 16 ++++++++++++---- fs/zonefs/zonefs.h | 3 +++ 2 files changed, 15 insertions(+), 4 deletions(-) diff --git a/fs/zonefs/super.c b/fs/zonefs/super.c index 4a0bff65d3d6..8ec7c8f109d7 100644 --- a/fs/zonefs/super.c +++ b/fs/zonefs/super.c @@ -335,7 +335,7 @@ static void zonefs_io_error(struct inode *inode, bool write) struct zonefs_sb_info *sbi = ZONEFS_SB(sb); unsigned int noio_flag; unsigned int nr_zones = - zi->i_max_size >> (sbi->s_zone_sectors_shift + SECTOR_SHIFT); + zi->i_zone_size >> (sbi->s_zone_sectors_shift + SECTOR_SHIFT); struct zonefs_ioerr_data err = { .inode = inode, .write = write, @@ -398,7 +398,7 @@ static int zonefs_file_truncate(struct inode *inode, loff_t isize) goto unlock; ret = blkdev_zone_mgmt(inode->i_sb->s_bdev, op, zi->i_zsector, - zi->i_max_size >> SECTOR_SHIFT, GFP_NOFS); + zi->i_zone_size >> SECTOR_SHIFT, GFP_NOFS); if (ret) { zonefs_err(inode->i_sb, "Zone management operation at %llu failed %d", @@ -1053,14 +1053,16 @@ static void zonefs_init_file_inode(struct inode *inode, struct blk_zone *zone, zi->i_ztype = type; zi->i_zsector = zone->start; + zi->i_zone_size = zone->len << SECTOR_SHIFT; + zi->i_max_size = min_t(loff_t, MAX_LFS_FILESIZE, - zone->len << SECTOR_SHIFT); + zone->capacity << SECTOR_SHIFT); zi->i_wpoffset = zonefs_check_zone_condition(inode, zone, true, true); inode->i_uid = sbi->s_uid; inode->i_gid = sbi->s_gid; inode->i_size = zi->i_wpoffset; - inode->i_blocks = zone->len; + inode->i_blocks = zi->i_max_size >> SECTOR_SHIFT; inode->i_op = &zonefs_file_inode_operations; inode->i_fop = &zonefs_file_operations; @@ -1167,12 +1169,18 @@ static int zonefs_create_zgroup(struct zonefs_zone_data *zd, if (zonefs_zone_type(next) != type) break; zone->len += next->len; + zone->capacity += next->capacity; if (next->cond == BLK_ZONE_COND_READONLY && zone->cond != BLK_ZONE_COND_OFFLINE) zone->cond = BLK_ZONE_COND_READONLY; else if (next->cond == BLK_ZONE_COND_OFFLINE) zone->cond = BLK_ZONE_COND_OFFLINE; } + if (zone->capacity != zone->len) { + zonefs_err(sb, "Invalid conventional zone capacity\n"); + ret = -EINVAL; + goto free; + } } /* diff --git a/fs/zonefs/zonefs.h b/fs/zonefs/zonefs.h index ad17fef7ce91..55b39970acb2 100644 --- a/fs/zonefs/zonefs.h +++ b/fs/zonefs/zonefs.h @@ -56,6 +56,9 @@ struct zonefs_inode_info { /* File maximum size */ loff_t i_max_size; + /* File zone size */ + loff_t i_zone_size; + /* * To serialise fully against both syscall and mmap based IO and * sequential file truncation, two locks are used. For serializing From 4c96870e58f8bce1c7eba5f92ec69089ae6798f4 Mon Sep 17 00:00:00 2001 From: Johannes Thumshirn Date: Tue, 21 Jul 2020 21:10:27 +0900 Subject: [PATCH 2/2] zonefs: update documentation to reflect zone size vs capacity Update the zonefs documentation to reflect the difference between a zone's size and it's capacity. The maximum file size in zonefs is the zones capacity, for ZBC and ZAC based devices, which do not have a separate zone capacity, the zone capacity is equal to the zone size. Signed-off-by: Johannes Thumshirn Reviewed-by: Christoph Hellwig Signed-off-by: Damien Le Moal --- Documentation/filesystems/zonefs.rst | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/Documentation/filesystems/zonefs.rst b/Documentation/filesystems/zonefs.rst index 71d845c6a700..6c18bc8ce332 100644 --- a/Documentation/filesystems/zonefs.rst +++ b/Documentation/filesystems/zonefs.rst @@ -110,14 +110,14 @@ contain files named "0", "1", "2", ... The file numbers also represent increasing zone start sector on the device. All read and write operations to zone files are not allowed beyond the file -maximum size, that is, beyond the zone size. Any access exceeding the zone -size is failed with the -EFBIG error. +maximum size, that is, beyond the zone capacity. Any access exceeding the zone +capacity is failed with the -EFBIG error. Creating, deleting, renaming or modifying any attribute of files and sub-directories is not allowed. The number of blocks of a file as reported by stat() and fstat() indicates the -size of the file zone, or in other words, the maximum file size. +capacity of the zone file, or in other words, the maximum file size. Conventional zone files ----------------------- @@ -156,8 +156,8 @@ all accepted. Truncating sequential zone files is allowed only down to 0, in which case, the zone is reset to rewind the file zone write pointer position to the start of -the zone, or up to the zone size, in which case the file's zone is transitioned -to the FULL state (finish zone operation). +the zone, or up to the zone capacity, in which case the file's zone is +transitioned to the FULL state (finish zone operation). Format options -------------- @@ -324,7 +324,7 @@ file size set to 0. This is necessary as the write pointer of read-only zones is defined as invalib by the ZBC and ZAC standards, making it impossible to discover the amount of data that has been written to the zone. In the case of a read-only zone discovered at run-time, as indicated in the previous section. -the size of the zone file is left unchanged from its last updated value. +The size of the zone file is left unchanged from its last updated value. Zonefs User Space Tools ======================= @@ -401,8 +401,9 @@ append-writes to the file:: # ls -l /mnt/seq/0 -rw-r----- 1 root root 0 Nov 25 13:49 /mnt/seq/0 -Since files are statically mapped to zones on the disk, the number of blocks of -a file as reported by stat() and fstat() indicates the size of the file zone:: +Since files are statically mapped to zones on the disk, the number of blocks +of a file as reported by stat() and fstat() indicates the capacity of the file +zone:: # stat /mnt/seq/0 File: /mnt/seq/0 @@ -416,5 +417,6 @@ a file as reported by stat() and fstat() indicates the size of the file zone:: The number of blocks of the file ("Blocks") in units of 512B blocks gives the maximum file size of 524288 * 512 B = 256 MB, corresponding to the device zone -size in this example. Of note is that the "IO block" field always indicates the -minimum I/O size for writes and corresponds to the device physical sector size. +capacity in this example. Of note is that the "IO block" field always +indicates the minimum I/O size for writes and corresponds to the device +physical sector size.