diff --git a/Documentation/devicetree/bindings/mtd/partitions/linux,ubi.yaml b/Documentation/devicetree/bindings/mtd/partitions/linux,ubi.yaml new file mode 100644 index 000000000000..27e1ac1f252e --- /dev/null +++ b/Documentation/devicetree/bindings/mtd/partitions/linux,ubi.yaml @@ -0,0 +1,75 @@ +# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/mtd/partitions/linux,ubi.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Unsorted Block Images + +description: | + UBI ("Unsorted Block Images") is a volume management system for raw + flash devices which manages multiple logical volumes on a single + physical flash device and spreads the I/O load (i.e wear-leveling) + across the whole flash chip. + +maintainers: + - Daniel Golle + +allOf: + - $ref: partition.yaml# + +properties: + compatible: + const: linux,ubi + + volumes: + type: object + description: UBI Volumes + + patternProperties: + "^ubi-volume-.*$": + $ref: /schemas/mtd/partitions/ubi-volume.yaml# + + unevaluatedProperties: false + +required: + - compatible + +unevaluatedProperties: false + +examples: + - | + partitions { + compatible = "fixed-partitions"; + #address-cells = <1>; + #size-cells = <1>; + + partition@0 { + reg = <0x0 0x100000>; + label = "bootloader"; + read-only; + }; + + partition@100000 { + reg = <0x100000 0x1ff00000>; + label = "ubi"; + compatible = "linux,ubi"; + + volumes { + ubi-volume-caldata { + volid = <2>; + volname = "rf"; + + nvmem-layout { + compatible = "fixed-layout"; + #address-cells = <1>; + #size-cells = <1>; + + eeprom@0 { + reg = <0x0 0x1000>; + }; + }; + }; + }; + }; + }; diff --git a/Documentation/devicetree/bindings/mtd/partitions/ubi-volume.yaml b/Documentation/devicetree/bindings/mtd/partitions/ubi-volume.yaml new file mode 100644 index 000000000000..19736b26056b --- /dev/null +++ b/Documentation/devicetree/bindings/mtd/partitions/ubi-volume.yaml @@ -0,0 +1,40 @@ +# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/mtd/partitions/ubi-volume.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: UBI volume + +description: | + This binding describes a single UBI volume. Volumes can be matches either + by their ID or their name, or both. + +maintainers: + - Daniel Golle + +properties: + volid: + $ref: /schemas/types.yaml#/definitions/uint32 + description: + Match UBI volume ID + + volname: + $ref: /schemas/types.yaml#/definitions/string + description: + Match UBI volume ID + + nvmem-layout: + $ref: /schemas/nvmem/layouts/nvmem-layout.yaml# + description: + This container may reference an NVMEM layout parser. + +anyOf: + - required: + - volid + + - required: + - volname + +# This is a generic file other binding inherit from and extend +additionalProperties: true diff --git a/Documentation/mm/page_cache.rst b/Documentation/mm/page_cache.rst index 75eba7c431b2..138d61f869df 100644 --- a/Documentation/mm/page_cache.rst +++ b/Documentation/mm/page_cache.rst @@ -3,3 +3,13 @@ ========== Page Cache ========== + +The page cache is the primary way that the user and the rest of the kernel +interact with filesystems. It can be bypassed (e.g. with O_DIRECT), +but normal reads, writes and mmaps go through the page cache. + +Folios +====== + +The folio is the unit of memory management within the page cache. +Operations diff --git a/MAINTAINERS b/MAINTAINERS index f736af98d7b5..97ec8ecf33bb 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -22573,6 +22573,7 @@ F: include/uapi/misc/uacce/ UBI FILE SYSTEM (UBIFS) M: Richard Weinberger +R: Zhihao Cheng L: linux-mtd@lists.infradead.org S: Supported W: http://www.linux-mtd.infradead.org/doc/ubifs.html @@ -22718,6 +22719,7 @@ F: drivers/ufs/host/ufs-renesas.c UNSORTED BLOCK IMAGES (UBI) M: Richard Weinberger +R: Zhihao Cheng L: linux-mtd@lists.infradead.org S: Supported W: http://www.linux-mtd.infradead.org/ diff --git a/drivers/mtd/ubi/Kconfig b/drivers/mtd/ubi/Kconfig index 7499a540121e..e28a3af83c0e 100644 --- a/drivers/mtd/ubi/Kconfig +++ b/drivers/mtd/ubi/Kconfig @@ -113,4 +113,17 @@ config MTD_UBI_FAULT_INJECTION testing purposes. If in doubt, say "N". + +config MTD_UBI_NVMEM + tristate "UBI virtual NVMEM" + default n + depends on NVMEM + help + This option enabled an additional driver exposing UBI volumes as NVMEM + providers, intended for platforms where UBI is part of the firmware + specification and used to store also e.g. MAC addresses or board- + specific Wi-Fi calibration data. + + If in doubt, say "N". + endif # MTD_UBI diff --git a/drivers/mtd/ubi/Makefile b/drivers/mtd/ubi/Makefile index 543673605ca7..4b51aaf00d1a 100644 --- a/drivers/mtd/ubi/Makefile +++ b/drivers/mtd/ubi/Makefile @@ -7,3 +7,4 @@ ubi-$(CONFIG_MTD_UBI_FASTMAP) += fastmap.o ubi-$(CONFIG_MTD_UBI_BLOCK) += block.o obj-$(CONFIG_MTD_UBI_GLUEBI) += gluebi.o +obj-$(CONFIG_MTD_UBI_NVMEM) += nvmem.o diff --git a/drivers/mtd/ubi/block.c b/drivers/mtd/ubi/block.c index 5c8fdcc088a0..f82e3423acb9 100644 --- a/drivers/mtd/ubi/block.c +++ b/drivers/mtd/ubi/block.c @@ -65,10 +65,10 @@ struct ubiblock_pdu { }; /* Numbers of elements set in the @ubiblock_param array */ -static int ubiblock_devs __initdata; +static int ubiblock_devs; /* MTD devices specification parameters */ -static struct ubiblock_param ubiblock_param[UBIBLOCK_MAX_DEVICES] __initdata; +static struct ubiblock_param ubiblock_param[UBIBLOCK_MAX_DEVICES]; struct ubiblock { struct ubi_volume_desc *desc; @@ -536,6 +536,70 @@ static int ubiblock_resize(struct ubi_volume_info *vi) return 0; } +static bool +match_volume_desc(struct ubi_volume_info *vi, const char *name, int ubi_num, int vol_id) +{ + int err, len, cur_ubi_num, cur_vol_id; + + if (ubi_num == -1) { + /* No ubi num, name must be a vol device path */ + err = ubi_get_num_by_path(name, &cur_ubi_num, &cur_vol_id); + if (err || vi->ubi_num != cur_ubi_num || vi->vol_id != cur_vol_id) + return false; + + return true; + } + + if (vol_id == -1) { + /* Got ubi_num, but no vol_id, name must be volume name */ + if (vi->ubi_num != ubi_num) + return false; + + len = strnlen(name, UBI_VOL_NAME_MAX + 1); + if (len < 1 || vi->name_len != len) + return false; + + if (strcmp(name, vi->name)) + return false; + + return true; + } + + if (vi->ubi_num != ubi_num) + return false; + + if (vi->vol_id != vol_id) + return false; + + return true; +} + +static void +ubiblock_create_from_param(struct ubi_volume_info *vi) +{ + int i, ret = 0; + struct ubiblock_param *p; + + /* + * Iterate over ubiblock cmdline parameters. If a parameter matches the + * newly added volume create the ubiblock device for it. + */ + for (i = 0; i < ubiblock_devs; i++) { + p = &ubiblock_param[i]; + + if (!match_volume_desc(vi, p->name, p->ubi_num, p->vol_id)) + continue; + + ret = ubiblock_create(vi); + if (ret) { + pr_err( + "UBI: block: can't add '%s' volume on ubi%d_%d, err=%d\n", + vi->name, p->ubi_num, p->vol_id, ret); + } + break; + } +} + static int ubiblock_notify(struct notifier_block *nb, unsigned long notification_type, void *ns_ptr) { @@ -543,10 +607,7 @@ static int ubiblock_notify(struct notifier_block *nb, switch (notification_type) { case UBI_VOLUME_ADDED: - /* - * We want to enforce explicit block device creation for - * volumes, so when a volume is added we do nothing. - */ + ubiblock_create_from_param(&nt->vi); break; case UBI_VOLUME_REMOVED: ubiblock_remove(&nt->vi); @@ -572,56 +633,6 @@ static struct notifier_block ubiblock_notifier = { .notifier_call = ubiblock_notify, }; -static struct ubi_volume_desc * __init -open_volume_desc(const char *name, int ubi_num, int vol_id) -{ - if (ubi_num == -1) - /* No ubi num, name must be a vol device path */ - return ubi_open_volume_path(name, UBI_READONLY); - else if (vol_id == -1) - /* No vol_id, must be vol_name */ - return ubi_open_volume_nm(ubi_num, name, UBI_READONLY); - else - return ubi_open_volume(ubi_num, vol_id, UBI_READONLY); -} - -static void __init ubiblock_create_from_param(void) -{ - int i, ret = 0; - struct ubiblock_param *p; - struct ubi_volume_desc *desc; - struct ubi_volume_info vi; - - /* - * If there is an error creating one of the ubiblocks, continue on to - * create the following ubiblocks. This helps in a circumstance where - * the kernel command-line specifies multiple block devices and some - * may be broken, but we still want the working ones to come up. - */ - for (i = 0; i < ubiblock_devs; i++) { - p = &ubiblock_param[i]; - - desc = open_volume_desc(p->name, p->ubi_num, p->vol_id); - if (IS_ERR(desc)) { - pr_err( - "UBI: block: can't open volume on ubi%d_%d, err=%ld\n", - p->ubi_num, p->vol_id, PTR_ERR(desc)); - continue; - } - - ubi_get_volume_info(desc, &vi); - ubi_close_volume(desc); - - ret = ubiblock_create(&vi); - if (ret) { - pr_err( - "UBI: block: can't add '%s' volume on ubi%d_%d, err=%d\n", - vi.name, p->ubi_num, p->vol_id, ret); - continue; - } - } -} - static void ubiblock_remove_all(void) { struct ubiblock *next; @@ -647,18 +658,7 @@ int __init ubiblock_init(void) if (ubiblock_major < 0) return ubiblock_major; - /* - * Attach block devices from 'block=' module param. - * Even if one block device in the param list fails to come up, - * still allow the module to load and leave any others up. - */ - ubiblock_create_from_param(); - - /* - * Block devices are only created upon user requests, so we ignore - * existing volumes. - */ - ret = ubi_register_volume_notifier(&ubiblock_notifier, 1); + ret = ubi_register_volume_notifier(&ubiblock_notifier, 0); if (ret) goto err_unreg; return 0; diff --git a/drivers/mtd/ubi/build.c b/drivers/mtd/ubi/build.c index 7d4ff1193db6..a7e3a6246c0e 100644 --- a/drivers/mtd/ubi/build.c +++ b/drivers/mtd/ubi/build.c @@ -27,6 +27,7 @@ #include #include #include +#include #include #include #include "ubi.h" @@ -92,7 +93,7 @@ static struct ubi_device *ubi_devices[UBI_MAX_DEVICES]; /* Serializes UBI devices creations and removals */ DEFINE_MUTEX(ubi_devices_mutex); -/* Protects @ubi_devices and @ubi->ref_count */ +/* Protects @ubi_devices, @ubi->ref_count and @ubi->is_dead */ static DEFINE_SPINLOCK(ubi_devices_lock); /* "Show" method for files in '//class/ubi/' */ @@ -260,6 +261,9 @@ struct ubi_device *ubi_get_device(int ubi_num) spin_lock(&ubi_devices_lock); ubi = ubi_devices[ubi_num]; + if (ubi && ubi->is_dead) + ubi = NULL; + if (ubi) { ubi_assert(ubi->ref_count >= 0); ubi->ref_count += 1; @@ -297,7 +301,7 @@ struct ubi_device *ubi_get_by_major(int major) spin_lock(&ubi_devices_lock); for (i = 0; i < UBI_MAX_DEVICES; i++) { ubi = ubi_devices[i]; - if (ubi && MAJOR(ubi->cdev.dev) == major) { + if (ubi && !ubi->is_dead && MAJOR(ubi->cdev.dev) == major) { ubi_assert(ubi->ref_count >= 0); ubi->ref_count += 1; get_device(&ubi->dev); @@ -326,7 +330,7 @@ int ubi_major2num(int major) for (i = 0; i < UBI_MAX_DEVICES; i++) { struct ubi_device *ubi = ubi_devices[i]; - if (ubi && MAJOR(ubi->cdev.dev) == major) { + if (ubi && !ubi->is_dead && MAJOR(ubi->cdev.dev) == major) { ubi_num = ubi->ubi_num; break; } @@ -513,7 +517,7 @@ static void ubi_free_volumes_from(struct ubi_device *ubi, int from) int i; for (i = from; i < ubi->vtbl_slots + UBI_INT_VOL_COUNT; i++) { - if (!ubi->volumes[i]) + if (!ubi->volumes[i] || ubi->volumes[i]->is_dead) continue; ubi_eba_replace_table(ubi->volumes[i], NULL); ubi_fastmap_destroy_checkmap(ubi->volumes[i]); @@ -1098,7 +1102,6 @@ int ubi_detach_mtd_dev(int ubi_num, int anyway) return -EINVAL; spin_lock(&ubi_devices_lock); - put_device(&ubi->dev); ubi->ref_count -= 1; if (ubi->ref_count) { if (!anyway) { @@ -1109,6 +1112,13 @@ int ubi_detach_mtd_dev(int ubi_num, int anyway) ubi_err(ubi, "%s reference count %d, destroy anyway", ubi->ubi_name, ubi->ref_count); } + ubi->is_dead = true; + spin_unlock(&ubi_devices_lock); + + ubi_notify_all(ubi, UBI_VOLUME_SHUTDOWN, NULL); + + spin_lock(&ubi_devices_lock); + put_device(&ubi->dev); ubi_devices[ubi_num] = NULL; spin_unlock(&ubi_devices_lock); @@ -1219,44 +1229,44 @@ static struct mtd_info * __init open_mtd_device(const char *mtd_dev) return mtd; } -static int __init ubi_init(void) +static void ubi_notify_add(struct mtd_info *mtd) +{ + struct device_node *np = mtd_get_of_node(mtd); + int err; + + if (!of_device_is_compatible(np, "linux,ubi")) + return; + + /* + * we are already holding &mtd_table_mutex, but still need + * to bump refcount + */ + err = __get_mtd_device(mtd); + if (err) + return; + + /* called while holding mtd_table_mutex */ + mutex_lock_nested(&ubi_devices_mutex, SINGLE_DEPTH_NESTING); + err = ubi_attach_mtd_dev(mtd, UBI_DEV_NUM_AUTO, 0, 0, false, false); + mutex_unlock(&ubi_devices_mutex); + if (err < 0) + __put_mtd_device(mtd); +} + +static void ubi_notify_remove(struct mtd_info *mtd) +{ + /* do nothing for now */ +} + +static struct mtd_notifier ubi_mtd_notifier = { + .add = ubi_notify_add, + .remove = ubi_notify_remove, +}; + +static int __init ubi_init_attach(void) { int err, i, k; - /* Ensure that EC and VID headers have correct size */ - BUILD_BUG_ON(sizeof(struct ubi_ec_hdr) != 64); - BUILD_BUG_ON(sizeof(struct ubi_vid_hdr) != 64); - - if (mtd_devs > UBI_MAX_DEVICES) { - pr_err("UBI error: too many MTD devices, maximum is %d\n", - UBI_MAX_DEVICES); - return -EINVAL; - } - - /* Create base sysfs directory and sysfs files */ - err = class_register(&ubi_class); - if (err < 0) - return err; - - err = misc_register(&ubi_ctrl_cdev); - if (err) { - pr_err("UBI error: cannot register device\n"); - goto out; - } - - ubi_wl_entry_slab = kmem_cache_create("ubi_wl_entry_slab", - sizeof(struct ubi_wl_entry), - 0, 0, NULL); - if (!ubi_wl_entry_slab) { - err = -ENOMEM; - goto out_dev_unreg; - } - - err = ubi_debugfs_init(); - if (err) - goto out_slab; - - /* Attach MTD devices */ for (i = 0; i < mtd_devs; i++) { struct mtd_dev_param *p = &mtd_dev_param[i]; @@ -1304,15 +1314,6 @@ static int __init ubi_init(void) } } - err = ubiblock_init(); - if (err) { - pr_err("UBI error: block: cannot initialize, error %d\n", err); - - /* See comment above re-ubi_is_module(). */ - if (ubi_is_module()) - goto out_detach; - } - return 0; out_detach: @@ -1322,7 +1323,70 @@ out_detach: ubi_detach_mtd_dev(ubi_devices[k]->ubi_num, 1); mutex_unlock(&ubi_devices_mutex); } - ubi_debugfs_exit(); + return err; +} +#ifndef CONFIG_MTD_UBI_MODULE +late_initcall(ubi_init_attach); +#endif + +static int __init ubi_init(void) +{ + int err; + + /* Ensure that EC and VID headers have correct size */ + BUILD_BUG_ON(sizeof(struct ubi_ec_hdr) != 64); + BUILD_BUG_ON(sizeof(struct ubi_vid_hdr) != 64); + + if (mtd_devs > UBI_MAX_DEVICES) { + pr_err("UBI error: too many MTD devices, maximum is %d\n", + UBI_MAX_DEVICES); + return -EINVAL; + } + + /* Create base sysfs directory and sysfs files */ + err = class_register(&ubi_class); + if (err < 0) + return err; + + err = misc_register(&ubi_ctrl_cdev); + if (err) { + pr_err("UBI error: cannot register device\n"); + goto out; + } + + ubi_wl_entry_slab = kmem_cache_create("ubi_wl_entry_slab", + sizeof(struct ubi_wl_entry), + 0, 0, NULL); + if (!ubi_wl_entry_slab) { + err = -ENOMEM; + goto out_dev_unreg; + } + + err = ubi_debugfs_init(); + if (err) + goto out_slab; + + err = ubiblock_init(); + if (err) { + pr_err("UBI error: block: cannot initialize, error %d\n", err); + + /* See comment above re-ubi_is_module(). */ + if (ubi_is_module()) + goto out_slab; + } + + register_mtd_user(&ubi_mtd_notifier); + + if (ubi_is_module()) { + err = ubi_init_attach(); + if (err) + goto out_mtd_notifier; + } + + return 0; + +out_mtd_notifier: + unregister_mtd_user(&ubi_mtd_notifier); out_slab: kmem_cache_destroy(ubi_wl_entry_slab); out_dev_unreg: @@ -1332,13 +1396,15 @@ out: pr_err("UBI error: cannot initialize UBI, error %d\n", err); return err; } -late_initcall(ubi_init); +device_initcall(ubi_init); + static void __exit ubi_exit(void) { int i; ubiblock_exit(); + unregister_mtd_user(&ubi_mtd_notifier); for (i = 0; i < UBI_MAX_DEVICES; i++) if (ubi_devices[i]) { diff --git a/drivers/mtd/ubi/eba.c b/drivers/mtd/ubi/eba.c index 8d1f0e05892c..e5ac3cd0bbae 100644 --- a/drivers/mtd/ubi/eba.c +++ b/drivers/mtd/ubi/eba.c @@ -1456,7 +1456,14 @@ int ubi_eba_copy_leb(struct ubi_device *ubi, int from, int to, } ubi_assert(vol->eba_tbl->entries[lnum].pnum == from); + + /** + * The volumes_lock lock is needed here to prevent the expired old eba_tbl + * being updated when the eba_tbl is copied in the ubi_resize_volume() process. + */ + spin_lock(&ubi->volumes_lock); vol->eba_tbl->entries[lnum].pnum = to; + spin_unlock(&ubi->volumes_lock); out_unlock_buf: mutex_unlock(&ubi->buf_mutex); diff --git a/drivers/mtd/ubi/fastmap.c b/drivers/mtd/ubi/fastmap.c index 2a728c31e6b8..9a4940874be5 100644 --- a/drivers/mtd/ubi/fastmap.c +++ b/drivers/mtd/ubi/fastmap.c @@ -85,9 +85,10 @@ size_t ubi_calc_fm_size(struct ubi_device *ubi) sizeof(struct ubi_fm_scan_pool) + sizeof(struct ubi_fm_scan_pool) + (ubi->peb_count * sizeof(struct ubi_fm_ec)) + - (sizeof(struct ubi_fm_eba) + - (ubi->peb_count * sizeof(__be32))) + - sizeof(struct ubi_fm_volhdr) * UBI_MAX_VOLUMES; + ((sizeof(struct ubi_fm_eba) + + sizeof(struct ubi_fm_volhdr)) * + (UBI_MAX_VOLUMES + UBI_INT_VOL_COUNT)) + + (ubi->peb_count * sizeof(__be32)); return roundup(size, ubi->leb_size); } diff --git a/drivers/mtd/ubi/kapi.c b/drivers/mtd/ubi/kapi.c index 5db653eacbd4..f1ea8677467f 100644 --- a/drivers/mtd/ubi/kapi.c +++ b/drivers/mtd/ubi/kapi.c @@ -152,7 +152,7 @@ struct ubi_volume_desc *ubi_open_volume(int ubi_num, int vol_id, int mode) spin_lock(&ubi->volumes_lock); vol = ubi->volumes[vol_id]; - if (!vol) + if (!vol || vol->is_dead) goto out_unlock; err = -EBUSY; @@ -279,6 +279,41 @@ struct ubi_volume_desc *ubi_open_volume_nm(int ubi_num, const char *name, } EXPORT_SYMBOL_GPL(ubi_open_volume_nm); +/** + * ubi_get_num_by_path - get UBI device and volume number from device path + * @pathname: volume character device node path + * @ubi_num: pointer to UBI device number to be set + * @vol_id: pointer to UBI volume ID to be set + * + * Returns 0 on success and sets ubi_num and vol_id, returns error otherwise. + */ +int ubi_get_num_by_path(const char *pathname, int *ubi_num, int *vol_id) +{ + int error; + struct path path; + struct kstat stat; + + error = kern_path(pathname, LOOKUP_FOLLOW, &path); + if (error) + return error; + + error = vfs_getattr(&path, &stat, STATX_TYPE, AT_STATX_SYNC_AS_STAT); + path_put(&path); + if (error) + return error; + + if (!S_ISCHR(stat.mode)) + return -EINVAL; + + *ubi_num = ubi_major2num(MAJOR(stat.rdev)); + *vol_id = MINOR(stat.rdev) - 1; + + if (*vol_id < 0 || *ubi_num < 0) + return -ENODEV; + + return 0; +} + /** * ubi_open_volume_path - open UBI volume by its character device node path. * @pathname: volume character device node path @@ -290,32 +325,17 @@ EXPORT_SYMBOL_GPL(ubi_open_volume_nm); struct ubi_volume_desc *ubi_open_volume_path(const char *pathname, int mode) { int error, ubi_num, vol_id; - struct path path; - struct kstat stat; dbg_gen("open volume %s, mode %d", pathname, mode); if (!pathname || !*pathname) return ERR_PTR(-EINVAL); - error = kern_path(pathname, LOOKUP_FOLLOW, &path); + error = ubi_get_num_by_path(pathname, &ubi_num, &vol_id); if (error) return ERR_PTR(error); - error = vfs_getattr(&path, &stat, STATX_TYPE, AT_STATX_SYNC_AS_STAT); - path_put(&path); - if (error) - return ERR_PTR(error); - - if (!S_ISCHR(stat.mode)) - return ERR_PTR(-EINVAL); - - ubi_num = ubi_major2num(MAJOR(stat.rdev)); - vol_id = MINOR(stat.rdev) - 1; - - if (vol_id >= 0 && ubi_num >= 0) - return ubi_open_volume(ubi_num, vol_id, mode); - return ERR_PTR(-ENODEV); + return ubi_open_volume(ubi_num, vol_id, mode); } EXPORT_SYMBOL_GPL(ubi_open_volume_path); diff --git a/drivers/mtd/ubi/nvmem.c b/drivers/mtd/ubi/nvmem.c new file mode 100644 index 000000000000..8aeb9c428e51 --- /dev/null +++ b/drivers/mtd/ubi/nvmem.c @@ -0,0 +1,191 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright (c) 2023 Daniel Golle + */ + +/* UBI NVMEM provider */ +#include "ubi.h" +#include +#include + +/* List of all NVMEM devices */ +static LIST_HEAD(nvmem_devices); +static DEFINE_MUTEX(devices_mutex); + +struct ubi_nvmem { + struct nvmem_device *nvmem; + int ubi_num; + int vol_id; + int usable_leb_size; + struct list_head list; +}; + +static int ubi_nvmem_reg_read(void *priv, unsigned int from, + void *val, size_t bytes) +{ + size_t to_read, bytes_left = bytes; + struct ubi_nvmem *unv = priv; + struct ubi_volume_desc *desc; + uint32_t offs; + uint64_t lnum = from; + int err = 0; + + desc = ubi_open_volume(unv->ubi_num, unv->vol_id, UBI_READONLY); + if (IS_ERR(desc)) + return PTR_ERR(desc); + + offs = do_div(lnum, unv->usable_leb_size); + while (bytes_left) { + to_read = unv->usable_leb_size - offs; + + if (to_read > bytes_left) + to_read = bytes_left; + + err = ubi_read(desc, lnum, val, offs, to_read); + if (err) + break; + + lnum += 1; + offs = 0; + bytes_left -= to_read; + val += to_read; + } + ubi_close_volume(desc); + + if (err) + return err; + + return bytes_left == 0 ? 0 : -EIO; +} + +static int ubi_nvmem_add(struct ubi_volume_info *vi) +{ + struct device_node *np = dev_of_node(vi->dev); + struct nvmem_config config = {}; + struct ubi_nvmem *unv; + int ret; + + if (!np) + return 0; + + if (!of_get_child_by_name(np, "nvmem-layout")) + return 0; + + if (WARN_ON_ONCE(vi->usable_leb_size <= 0) || + WARN_ON_ONCE(vi->size <= 0)) + return -EINVAL; + + unv = kzalloc(sizeof(struct ubi_nvmem), GFP_KERNEL); + if (!unv) + return -ENOMEM; + + config.id = NVMEM_DEVID_NONE; + config.dev = vi->dev; + config.name = dev_name(vi->dev); + config.owner = THIS_MODULE; + config.priv = unv; + config.reg_read = ubi_nvmem_reg_read; + config.size = vi->usable_leb_size * vi->size; + config.word_size = 1; + config.stride = 1; + config.read_only = true; + config.root_only = true; + config.ignore_wp = true; + config.of_node = np; + + unv->ubi_num = vi->ubi_num; + unv->vol_id = vi->vol_id; + unv->usable_leb_size = vi->usable_leb_size; + unv->nvmem = nvmem_register(&config); + if (IS_ERR(unv->nvmem)) { + ret = dev_err_probe(vi->dev, PTR_ERR(unv->nvmem), + "Failed to register NVMEM device\n"); + kfree(unv); + return ret; + } + + mutex_lock(&devices_mutex); + list_add_tail(&unv->list, &nvmem_devices); + mutex_unlock(&devices_mutex); + + return 0; +} + +static void ubi_nvmem_remove(struct ubi_volume_info *vi) +{ + struct ubi_nvmem *unv_c, *unv = NULL; + + mutex_lock(&devices_mutex); + list_for_each_entry(unv_c, &nvmem_devices, list) + if (unv_c->ubi_num == vi->ubi_num && unv_c->vol_id == vi->vol_id) { + unv = unv_c; + break; + } + + if (!unv) { + mutex_unlock(&devices_mutex); + return; + } + + list_del(&unv->list); + mutex_unlock(&devices_mutex); + nvmem_unregister(unv->nvmem); + kfree(unv); +} + +/** + * nvmem_notify - UBI notification handler. + * @nb: registered notifier block + * @l: notification type + * @ns_ptr: pointer to the &struct ubi_notification object + */ +static int nvmem_notify(struct notifier_block *nb, unsigned long l, + void *ns_ptr) +{ + struct ubi_notification *nt = ns_ptr; + + switch (l) { + case UBI_VOLUME_RESIZED: + ubi_nvmem_remove(&nt->vi); + fallthrough; + case UBI_VOLUME_ADDED: + ubi_nvmem_add(&nt->vi); + break; + case UBI_VOLUME_SHUTDOWN: + ubi_nvmem_remove(&nt->vi); + break; + default: + break; + } + return NOTIFY_OK; +} + +static struct notifier_block nvmem_notifier = { + .notifier_call = nvmem_notify, +}; + +static int __init ubi_nvmem_init(void) +{ + return ubi_register_volume_notifier(&nvmem_notifier, 0); +} + +static void __exit ubi_nvmem_exit(void) +{ + struct ubi_nvmem *unv, *tmp; + + mutex_lock(&devices_mutex); + list_for_each_entry_safe(unv, tmp, &nvmem_devices, list) { + nvmem_unregister(unv->nvmem); + list_del(&unv->list); + kfree(unv); + } + mutex_unlock(&devices_mutex); + + ubi_unregister_volume_notifier(&nvmem_notifier); +} + +module_init(ubi_nvmem_init); +module_exit(ubi_nvmem_exit); +MODULE_DESCRIPTION("NVMEM layer over UBI volumes"); +MODULE_AUTHOR("Daniel Golle"); +MODULE_LICENSE("GPL"); diff --git a/drivers/mtd/ubi/ubi.h b/drivers/mtd/ubi/ubi.h index 0b42bb45dd84..32009a24869e 100644 --- a/drivers/mtd/ubi/ubi.h +++ b/drivers/mtd/ubi/ubi.h @@ -337,6 +337,7 @@ struct ubi_volume { int writers; int exclusive; int metaonly; + bool is_dead; int reserved_pebs; int vol_type; @@ -561,6 +562,7 @@ struct ubi_device { spinlock_t volumes_lock; int ref_count; int image_seq; + bool is_dead; int rsvd_pebs; int avail_pebs; @@ -955,6 +957,7 @@ void ubi_free_internal_volumes(struct ubi_device *ubi); void ubi_do_get_device_info(struct ubi_device *ubi, struct ubi_device_info *di); void ubi_do_get_volume_info(struct ubi_device *ubi, struct ubi_volume *vol, struct ubi_volume_info *vi); +int ubi_get_num_by_path(const char *pathname, int *ubi_num, int *vol_id); /* scan.c */ int ubi_compare_lebs(struct ubi_device *ubi, const struct ubi_ainf_peb *aeb, int pnum, const struct ubi_vid_hdr *vid_hdr); diff --git a/drivers/mtd/ubi/vmt.c b/drivers/mtd/ubi/vmt.c index 2c867d16f89f..5a3558bbb903 100644 --- a/drivers/mtd/ubi/vmt.c +++ b/drivers/mtd/ubi/vmt.c @@ -59,7 +59,7 @@ static ssize_t vol_attribute_show(struct device *dev, struct ubi_device *ubi = vol->ubi; spin_lock(&ubi->volumes_lock); - if (!ubi->volumes[vol->vol_id]) { + if (!ubi->volumes[vol->vol_id] || ubi->volumes[vol->vol_id]->is_dead) { spin_unlock(&ubi->volumes_lock); return -ENODEV; } @@ -124,6 +124,31 @@ static void vol_release(struct device *dev) kfree(vol); } +static struct fwnode_handle *find_volume_fwnode(struct ubi_volume *vol) +{ + struct fwnode_handle *fw_vols, *fw_vol; + const char *volname; + u32 volid; + + fw_vols = device_get_named_child_node(vol->dev.parent->parent, "volumes"); + if (!fw_vols) + return NULL; + + fwnode_for_each_child_node(fw_vols, fw_vol) { + if (!fwnode_property_read_string(fw_vol, "volname", &volname) && + strncmp(volname, vol->name, vol->name_len)) + continue; + + if (!fwnode_property_read_u32(fw_vol, "volid", &volid) && + vol->vol_id != volid) + continue; + + return fw_vol; + } + + return NULL; +} + /** * ubi_create_volume - create volume. * @ubi: UBI device description object @@ -189,7 +214,7 @@ int ubi_create_volume(struct ubi_device *ubi, struct ubi_mkvol_req *req) /* Ensure that the name is unique */ for (i = 0; i < ubi->vtbl_slots; i++) - if (ubi->volumes[i] && + if (ubi->volumes[i] && !ubi->volumes[i]->is_dead && ubi->volumes[i]->name_len == req->name_len && !strcmp(ubi->volumes[i]->name, req->name)) { ubi_err(ubi, "volume \"%s\" exists (ID %d)", @@ -223,6 +248,7 @@ int ubi_create_volume(struct ubi_device *ubi, struct ubi_mkvol_req *req) vol->name_len = req->name_len; memcpy(vol->name, req->name, vol->name_len); vol->ubi = ubi; + device_set_node(&vol->dev, find_volume_fwnode(vol)); /* * Finish all pending erases because there may be some LEBs belonging @@ -352,6 +378,19 @@ int ubi_remove_volume(struct ubi_volume_desc *desc, int no_vtbl) err = -EBUSY; goto out_unlock; } + + /* + * Mark volume as dead at this point to prevent that anyone + * can take a reference to the volume from now on. + * This is necessary as we have to release the spinlock before + * calling ubi_volume_notify. + */ + vol->is_dead = true; + spin_unlock(&ubi->volumes_lock); + + ubi_volume_notify(ubi, vol, UBI_VOLUME_SHUTDOWN); + + spin_lock(&ubi->volumes_lock); ubi->volumes[vol_id] = NULL; spin_unlock(&ubi->volumes_lock); @@ -408,6 +447,7 @@ int ubi_resize_volume(struct ubi_volume_desc *desc, int reserved_pebs) struct ubi_device *ubi = vol->ubi; struct ubi_vtbl_record vtbl_rec; struct ubi_eba_table *new_eba_tbl = NULL; + struct ubi_eba_table *old_eba_tbl = NULL; int vol_id = vol->vol_id; if (ubi->ro_mode) @@ -453,10 +493,13 @@ int ubi_resize_volume(struct ubi_volume_desc *desc, int reserved_pebs) err = -ENOSPC; goto out_free; } + ubi->avail_pebs -= pebs; ubi->rsvd_pebs += pebs; ubi_eba_copy_table(vol, new_eba_tbl, vol->reserved_pebs); - ubi_eba_replace_table(vol, new_eba_tbl); + old_eba_tbl = vol->eba_tbl; + vol->eba_tbl = new_eba_tbl; + vol->reserved_pebs = reserved_pebs; spin_unlock(&ubi->volumes_lock); } @@ -471,7 +514,9 @@ int ubi_resize_volume(struct ubi_volume_desc *desc, int reserved_pebs) ubi->avail_pebs -= pebs; ubi_update_reserved(ubi); ubi_eba_copy_table(vol, new_eba_tbl, reserved_pebs); - ubi_eba_replace_table(vol, new_eba_tbl); + old_eba_tbl = vol->eba_tbl; + vol->eba_tbl = new_eba_tbl; + vol->reserved_pebs = reserved_pebs; spin_unlock(&ubi->volumes_lock); } @@ -493,7 +538,6 @@ int ubi_resize_volume(struct ubi_volume_desc *desc, int reserved_pebs) if (err) goto out_acc; - vol->reserved_pebs = reserved_pebs; if (vol->vol_type == UBI_DYNAMIC_VOLUME) { vol->used_ebs = reserved_pebs; vol->last_eb_bytes = vol->usable_leb_size; @@ -501,19 +545,23 @@ int ubi_resize_volume(struct ubi_volume_desc *desc, int reserved_pebs) (long long)vol->used_ebs * vol->usable_leb_size; } + /* destroy old table */ + ubi_eba_destroy_table(old_eba_tbl); ubi_volume_notify(ubi, vol, UBI_VOLUME_RESIZED); self_check_volumes(ubi); return err; out_acc: - if (pebs > 0) { - spin_lock(&ubi->volumes_lock); - ubi->rsvd_pebs -= pebs; - ubi->avail_pebs += pebs; - spin_unlock(&ubi->volumes_lock); - } - return err; - + spin_lock(&ubi->volumes_lock); + vol->reserved_pebs = reserved_pebs - pebs; + ubi->rsvd_pebs -= pebs; + ubi->avail_pebs += pebs; + if (pebs > 0) + ubi_eba_copy_table(vol, old_eba_tbl, vol->reserved_pebs); + else + ubi_eba_copy_table(vol, old_eba_tbl, reserved_pebs); + vol->eba_tbl = old_eba_tbl; + spin_unlock(&ubi->volumes_lock); out_free: ubi_eba_destroy_table(new_eba_tbl); return err; @@ -592,6 +640,7 @@ int ubi_add_volume(struct ubi_device *ubi, struct ubi_volume *vol) vol->dev.class = &ubi_class; vol->dev.groups = volume_dev_groups; dev_set_name(&vol->dev, "%s_%d", ubi->ubi_name, vol->vol_id); + device_set_node(&vol->dev, find_volume_fwnode(vol)); err = device_register(&vol->dev); if (err) { cdev_del(&vol->cdev); diff --git a/drivers/mtd/ubi/vtbl.c b/drivers/mtd/ubi/vtbl.c index f700f0e4f2ec..6e5489e233dd 100644 --- a/drivers/mtd/ubi/vtbl.c +++ b/drivers/mtd/ubi/vtbl.c @@ -791,6 +791,12 @@ int ubi_read_volume_table(struct ubi_device *ubi, struct ubi_attach_info *ai) * The number of supported volumes is limited by the eraseblock size * and by the UBI_MAX_VOLUMES constant. */ + + if (ubi->leb_size < UBI_VTBL_RECORD_SIZE) { + ubi_err(ubi, "LEB size too small for a volume record"); + return -EINVAL; + } + ubi->vtbl_slots = ubi->leb_size / UBI_VTBL_RECORD_SIZE; if (ubi->vtbl_slots > UBI_MAX_VOLUMES) ubi->vtbl_slots = UBI_MAX_VOLUMES; diff --git a/fs/ubifs/debug.c b/fs/ubifs/debug.c index d013c5b3f1ed..ac77ac1fd73e 100644 --- a/fs/ubifs/debug.c +++ b/fs/ubifs/debug.c @@ -1742,17 +1742,22 @@ int dbg_check_idx_size(struct ubifs_info *c, long long idx_size) err = dbg_walk_index(c, NULL, add_size, &calc); if (err) { ubifs_err(c, "error %d while walking the index", err); - return err; + goto out_err; } if (calc != idx_size) { ubifs_err(c, "index size check failed: calculated size is %lld, should be %lld", calc, idx_size); dump_stack(); - return -EINVAL; + err = -EINVAL; + goto out_err; } return 0; + +out_err: + ubifs_destroy_tnc_tree(c); + return err; } /** diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c index 551148de66cd..eac0fef801f1 100644 --- a/fs/ubifs/dir.c +++ b/fs/ubifs/dir.c @@ -1133,6 +1133,8 @@ out_cancel: dir_ui->ui_size = dir->i_size; mutex_unlock(&dir_ui->ui_mutex); out_inode: + /* Free inode->i_link before inode is marked as bad. */ + fscrypt_free_inode(inode); make_bad_inode(inode); iput(inode); out_fname: diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c index 5029eb3390a5..a1f46919934c 100644 --- a/fs/ubifs/file.c +++ b/fs/ubifs/file.c @@ -96,36 +96,36 @@ dump: return -EINVAL; } -static int do_readpage(struct page *page) +static int do_readpage(struct folio *folio) { void *addr; int err = 0, i; unsigned int block, beyond; - struct ubifs_data_node *dn; - struct inode *inode = page->mapping->host; + struct ubifs_data_node *dn = NULL; + struct inode *inode = folio->mapping->host; struct ubifs_info *c = inode->i_sb->s_fs_info; loff_t i_size = i_size_read(inode); dbg_gen("ino %lu, pg %lu, i_size %lld, flags %#lx", - inode->i_ino, page->index, i_size, page->flags); - ubifs_assert(c, !PageChecked(page)); - ubifs_assert(c, !PagePrivate(page)); + inode->i_ino, folio->index, i_size, folio->flags); + ubifs_assert(c, !folio_test_checked(folio)); + ubifs_assert(c, !folio->private); - addr = kmap(page); + addr = kmap_local_folio(folio, 0); - block = page->index << UBIFS_BLOCKS_PER_PAGE_SHIFT; + block = folio->index << UBIFS_BLOCKS_PER_PAGE_SHIFT; beyond = (i_size + UBIFS_BLOCK_SIZE - 1) >> UBIFS_BLOCK_SHIFT; if (block >= beyond) { /* Reading beyond inode */ - SetPageChecked(page); - memset(addr, 0, PAGE_SIZE); + folio_set_checked(folio); + addr = folio_zero_tail(folio, 0, addr); goto out; } dn = kmalloc(UBIFS_MAX_DATA_NODE_SZ, GFP_NOFS); if (!dn) { err = -ENOMEM; - goto error; + goto out; } i = 0; @@ -150,39 +150,35 @@ static int do_readpage(struct page *page) memset(addr + ilen, 0, dlen - ilen); } } - if (++i >= UBIFS_BLOCKS_PER_PAGE) + if (++i >= (UBIFS_BLOCKS_PER_PAGE << folio_order(folio))) break; block += 1; addr += UBIFS_BLOCK_SIZE; + if (folio_test_highmem(folio) && (offset_in_page(addr) == 0)) { + kunmap_local(addr - UBIFS_BLOCK_SIZE); + addr = kmap_local_folio(folio, i * UBIFS_BLOCK_SIZE); + } } + if (err) { struct ubifs_info *c = inode->i_sb->s_fs_info; if (err == -ENOENT) { /* Not found, so it must be a hole */ - SetPageChecked(page); + folio_set_checked(folio); dbg_gen("hole"); - goto out_free; + err = 0; + } else { + ubifs_err(c, "cannot read page %lu of inode %lu, error %d", + folio->index, inode->i_ino, err); } - ubifs_err(c, "cannot read page %lu of inode %lu, error %d", - page->index, inode->i_ino, err); - goto error; } -out_free: - kfree(dn); out: - SetPageUptodate(page); - ClearPageError(page); - flush_dcache_page(page); - kunmap(page); - return 0; - -error: kfree(dn); - ClearPageUptodate(page); - SetPageError(page); - flush_dcache_page(page); - kunmap(page); + if (!err) + folio_mark_uptodate(folio); + flush_dcache_folio(folio); + kunmap_local(addr); return err; } @@ -222,16 +218,16 @@ static int write_begin_slow(struct address_space *mapping, pgoff_t index = pos >> PAGE_SHIFT; struct ubifs_budget_req req = { .new_page = 1 }; int err, appending = !!(pos + len > inode->i_size); - struct page *page; + struct folio *folio; dbg_gen("ino %lu, pos %llu, len %u, i_size %lld", inode->i_ino, pos, len, inode->i_size); /* - * At the slow path we have to budget before locking the page, because - * budgeting may force write-back, which would wait on locked pages and - * deadlock if we had the page locked. At this point we do not know - * anything about the page, so assume that this is a new page which is + * At the slow path we have to budget before locking the folio, because + * budgeting may force write-back, which would wait on locked folios and + * deadlock if we had the folio locked. At this point we do not know + * anything about the folio, so assume that this is a new folio which is * written to a hole. This corresponds to largest budget. Later the * budget will be amended if this is not true. */ @@ -243,45 +239,43 @@ static int write_begin_slow(struct address_space *mapping, if (unlikely(err)) return err; - page = grab_cache_page_write_begin(mapping, index); - if (unlikely(!page)) { + folio = __filemap_get_folio(mapping, index, FGP_WRITEBEGIN, + mapping_gfp_mask(mapping)); + if (IS_ERR(folio)) { ubifs_release_budget(c, &req); - return -ENOMEM; + return PTR_ERR(folio); } - if (!PageUptodate(page)) { - if (!(pos & ~PAGE_MASK) && len == PAGE_SIZE) - SetPageChecked(page); + if (!folio_test_uptodate(folio)) { + if (pos == folio_pos(folio) && len >= folio_size(folio)) + folio_set_checked(folio); else { - err = do_readpage(page); + err = do_readpage(folio); if (err) { - unlock_page(page); - put_page(page); + folio_unlock(folio); + folio_put(folio); ubifs_release_budget(c, &req); return err; } } - - SetPageUptodate(page); - ClearPageError(page); } - if (PagePrivate(page)) + if (folio->private) /* - * The page is dirty, which means it was budgeted twice: + * The folio is dirty, which means it was budgeted twice: * o first time the budget was allocated by the task which - * made the page dirty and set the PG_private flag; + * made the folio dirty and set the private field; * o and then we budgeted for it for the second time at the * very beginning of this function. * - * So what we have to do is to release the page budget we + * So what we have to do is to release the folio budget we * allocated. */ release_new_page_budget(c); - else if (!PageChecked(page)) + else if (!folio_test_checked(folio)) /* - * We are changing a page which already exists on the media. - * This means that changing the page does not make the amount + * We are changing a folio which already exists on the media. + * This means that changing the folio does not make the amount * of indexing information larger, and this part of the budget * which we have already acquired may be released. */ @@ -304,14 +298,14 @@ static int write_begin_slow(struct address_space *mapping, ubifs_release_dirty_inode_budget(c, ui); } - *pagep = page; + *pagep = &folio->page; return 0; } /** * allocate_budget - allocate budget for 'ubifs_write_begin()'. * @c: UBIFS file-system description object - * @page: page to allocate budget for + * @folio: folio to allocate budget for * @ui: UBIFS inode object the page belongs to * @appending: non-zero if the page is appended * @@ -322,15 +316,15 @@ static int write_begin_slow(struct address_space *mapping, * * Returns: %0 in case of success and %-ENOSPC in case of failure. */ -static int allocate_budget(struct ubifs_info *c, struct page *page, +static int allocate_budget(struct ubifs_info *c, struct folio *folio, struct ubifs_inode *ui, int appending) { struct ubifs_budget_req req = { .fast = 1 }; - if (PagePrivate(page)) { + if (folio->private) { if (!appending) /* - * The page is dirty and we are not appending, which + * The folio is dirty and we are not appending, which * means no budget is needed at all. */ return 0; @@ -354,11 +348,11 @@ static int allocate_budget(struct ubifs_info *c, struct page *page, */ req.dirtied_ino = 1; } else { - if (PageChecked(page)) + if (folio_test_checked(folio)) /* * The page corresponds to a hole and does not * exist on the media. So changing it makes - * make the amount of indexing information + * the amount of indexing information * larger, and we have to budget for a new * page. */ @@ -428,7 +422,7 @@ static int ubifs_write_begin(struct file *file, struct address_space *mapping, pgoff_t index = pos >> PAGE_SHIFT; int err, appending = !!(pos + len > inode->i_size); int skipped_read = 0; - struct page *page; + struct folio *folio; ubifs_assert(c, ubifs_inode(inode)->ui_size == inode->i_size); ubifs_assert(c, !c->ro_media && !c->ro_mount); @@ -437,13 +431,14 @@ static int ubifs_write_begin(struct file *file, struct address_space *mapping, return -EROFS; /* Try out the fast-path part first */ - page = grab_cache_page_write_begin(mapping, index); - if (unlikely(!page)) - return -ENOMEM; + folio = __filemap_get_folio(mapping, index, FGP_WRITEBEGIN, + mapping_gfp_mask(mapping)); + if (IS_ERR(folio)) + return PTR_ERR(folio); - if (!PageUptodate(page)) { + if (!folio_test_uptodate(folio)) { /* The page is not loaded from the flash */ - if (!(pos & ~PAGE_MASK) && len == PAGE_SIZE) { + if (pos == folio_pos(folio) && len >= folio_size(folio)) { /* * We change whole page so no need to load it. But we * do not know whether this page exists on the media or @@ -453,32 +448,27 @@ static int ubifs_write_begin(struct file *file, struct address_space *mapping, * media. Thus, we are setting the @PG_checked flag * here. */ - SetPageChecked(page); + folio_set_checked(folio); skipped_read = 1; } else { - err = do_readpage(page); + err = do_readpage(folio); if (err) { - unlock_page(page); - put_page(page); + folio_unlock(folio); + folio_put(folio); return err; } } - - SetPageUptodate(page); - ClearPageError(page); } - err = allocate_budget(c, page, ui, appending); + err = allocate_budget(c, folio, ui, appending); if (unlikely(err)) { ubifs_assert(c, err == -ENOSPC); /* * If we skipped reading the page because we were going to * write all of it, then it is not up to date. */ - if (skipped_read) { - ClearPageChecked(page); - ClearPageUptodate(page); - } + if (skipped_read) + folio_clear_checked(folio); /* * Budgeting failed which means it would have to force * write-back but didn't, because we set the @fast flag in the @@ -490,8 +480,8 @@ static int ubifs_write_begin(struct file *file, struct address_space *mapping, ubifs_assert(c, mutex_is_locked(&ui->ui_mutex)); mutex_unlock(&ui->ui_mutex); } - unlock_page(page); - put_page(page); + folio_unlock(folio); + folio_put(folio); return write_begin_slow(mapping, pos, len, pagep); } @@ -502,22 +492,21 @@ static int ubifs_write_begin(struct file *file, struct address_space *mapping, * with @ui->ui_mutex locked if we are appending pages, and unlocked * otherwise. This is an optimization (slightly hacky though). */ - *pagep = page; + *pagep = &folio->page; return 0; - } /** * cancel_budget - cancel budget. * @c: UBIFS file-system description object - * @page: page to cancel budget for + * @folio: folio to cancel budget for * @ui: UBIFS inode object the page belongs to * @appending: non-zero if the page is appended * * This is a helper function for a page write operation. It unlocks the * @ui->ui_mutex in case of appending. */ -static void cancel_budget(struct ubifs_info *c, struct page *page, +static void cancel_budget(struct ubifs_info *c, struct folio *folio, struct ubifs_inode *ui, int appending) { if (appending) { @@ -525,8 +514,8 @@ static void cancel_budget(struct ubifs_info *c, struct page *page, ubifs_release_dirty_inode_budget(c, ui); mutex_unlock(&ui->ui_mutex); } - if (!PagePrivate(page)) { - if (PageChecked(page)) + if (!folio->private) { + if (folio_test_checked(folio)) release_new_page_budget(c); else release_existing_page_budget(c); @@ -537,6 +526,7 @@ static int ubifs_write_end(struct file *file, struct address_space *mapping, loff_t pos, unsigned len, unsigned copied, struct page *page, void *fsdata) { + struct folio *folio = page_folio(page); struct inode *inode = mapping->host; struct ubifs_inode *ui = ubifs_inode(inode); struct ubifs_info *c = inode->i_sb->s_fs_info; @@ -544,44 +534,47 @@ static int ubifs_write_end(struct file *file, struct address_space *mapping, int appending = !!(end_pos > inode->i_size); dbg_gen("ino %lu, pos %llu, pg %lu, len %u, copied %d, i_size %lld", - inode->i_ino, pos, page->index, len, copied, inode->i_size); + inode->i_ino, pos, folio->index, len, copied, inode->i_size); - if (unlikely(copied < len && len == PAGE_SIZE)) { + if (unlikely(copied < len && !folio_test_uptodate(folio))) { /* - * VFS copied less data to the page that it intended and + * VFS copied less data to the folio than it intended and * declared in its '->write_begin()' call via the @len - * argument. If the page was not up-to-date, and @len was - * @PAGE_SIZE, the 'ubifs_write_begin()' function did + * argument. If the folio was not up-to-date, + * the 'ubifs_write_begin()' function did * not load it from the media (for optimization reasons). This - * means that part of the page contains garbage. So read the - * page now. + * means that part of the folio contains garbage. So read the + * folio now. */ dbg_gen("copied %d instead of %d, read page and repeat", copied, len); - cancel_budget(c, page, ui, appending); - ClearPageChecked(page); + cancel_budget(c, folio, ui, appending); + folio_clear_checked(folio); /* * Return 0 to force VFS to repeat the whole operation, or the * error code if 'do_readpage()' fails. */ - copied = do_readpage(page); + copied = do_readpage(folio); goto out; } - if (!PagePrivate(page)) { - attach_page_private(page, (void *)1); + if (len == folio_size(folio)) + folio_mark_uptodate(folio); + + if (!folio->private) { + folio_attach_private(folio, (void *)1); atomic_long_inc(&c->dirty_pg_cnt); - __set_page_dirty_nobuffers(page); + filemap_dirty_folio(mapping, folio); } if (appending) { i_size_write(inode, end_pos); ui->ui_size = end_pos; /* - * Note, we do not set @I_DIRTY_PAGES (which means that the - * inode has dirty pages), this has been done in - * '__set_page_dirty_nobuffers()'. + * We do not set @I_DIRTY_PAGES (which means that + * the inode has dirty pages), this was done in + * filemap_dirty_folio(). */ __mark_inode_dirty(inode, I_DIRTY_DATASYNC); ubifs_assert(c, mutex_is_locked(&ui->ui_mutex)); @@ -589,43 +582,43 @@ static int ubifs_write_end(struct file *file, struct address_space *mapping, } out: - unlock_page(page); - put_page(page); + folio_unlock(folio); + folio_put(folio); return copied; } /** * populate_page - copy data nodes into a page for bulk-read. * @c: UBIFS file-system description object - * @page: page + * @folio: folio * @bu: bulk-read information * @n: next zbranch slot * * Returns: %0 on success and a negative error code on failure. */ -static int populate_page(struct ubifs_info *c, struct page *page, +static int populate_page(struct ubifs_info *c, struct folio *folio, struct bu_info *bu, int *n) { int i = 0, nn = *n, offs = bu->zbranch[0].offs, hole = 0, read = 0; - struct inode *inode = page->mapping->host; + struct inode *inode = folio->mapping->host; loff_t i_size = i_size_read(inode); unsigned int page_block; void *addr, *zaddr; pgoff_t end_index; dbg_gen("ino %lu, pg %lu, i_size %lld, flags %#lx", - inode->i_ino, page->index, i_size, page->flags); + inode->i_ino, folio->index, i_size, folio->flags); - addr = zaddr = kmap(page); + addr = zaddr = kmap_local_folio(folio, 0); end_index = (i_size - 1) >> PAGE_SHIFT; - if (!i_size || page->index > end_index) { + if (!i_size || folio->index > end_index) { hole = 1; - memset(addr, 0, PAGE_SIZE); + addr = folio_zero_tail(folio, 0, addr); goto out_hole; } - page_block = page->index << UBIFS_BLOCKS_PER_PAGE_SHIFT; + page_block = folio->index << UBIFS_BLOCKS_PER_PAGE_SHIFT; while (1) { int err, len, out_len, dlen; @@ -674,9 +667,13 @@ static int populate_page(struct ubifs_info *c, struct page *page, break; addr += UBIFS_BLOCK_SIZE; page_block += 1; + if (folio_test_highmem(folio) && (offset_in_page(addr) == 0)) { + kunmap_local(addr - UBIFS_BLOCK_SIZE); + addr = kmap_local_folio(folio, i * UBIFS_BLOCK_SIZE); + } } - if (end_index == page->index) { + if (end_index == folio->index) { int len = i_size & (PAGE_SIZE - 1); if (len && len < read) @@ -685,22 +682,19 @@ static int populate_page(struct ubifs_info *c, struct page *page, out_hole: if (hole) { - SetPageChecked(page); + folio_set_checked(folio); dbg_gen("hole"); } - SetPageUptodate(page); - ClearPageError(page); - flush_dcache_page(page); - kunmap(page); + folio_mark_uptodate(folio); + flush_dcache_folio(folio); + kunmap_local(addr); *n = nn; return 0; out_err: - ClearPageUptodate(page); - SetPageError(page); - flush_dcache_page(page); - kunmap(page); + flush_dcache_folio(folio); + kunmap_local(addr); ubifs_err(c, "bad data node (block %u, inode %lu)", page_block, inode->i_ino); return -EINVAL; @@ -710,15 +704,15 @@ out_err: * ubifs_do_bulk_read - do bulk-read. * @c: UBIFS file-system description object * @bu: bulk-read information - * @page1: first page to read + * @folio1: first folio to read * * Returns: %1 if the bulk-read is done, otherwise %0 is returned. */ static int ubifs_do_bulk_read(struct ubifs_info *c, struct bu_info *bu, - struct page *page1) + struct folio *folio1) { - pgoff_t offset = page1->index, end_index; - struct address_space *mapping = page1->mapping; + pgoff_t offset = folio1->index, end_index; + struct address_space *mapping = folio1->mapping; struct inode *inode = mapping->host; struct ubifs_inode *ui = ubifs_inode(inode); int err, page_idx, page_cnt, ret = 0, n = 0; @@ -768,11 +762,11 @@ static int ubifs_do_bulk_read(struct ubifs_info *c, struct bu_info *bu, goto out_warn; } - err = populate_page(c, page1, bu, &n); + err = populate_page(c, folio1, bu, &n); if (err) goto out_warn; - unlock_page(page1); + folio_unlock(folio1); ret = 1; isize = i_size_read(inode); @@ -782,19 +776,19 @@ static int ubifs_do_bulk_read(struct ubifs_info *c, struct bu_info *bu, for (page_idx = 1; page_idx < page_cnt; page_idx++) { pgoff_t page_offset = offset + page_idx; - struct page *page; + struct folio *folio; if (page_offset > end_index) break; - page = pagecache_get_page(mapping, page_offset, + folio = __filemap_get_folio(mapping, page_offset, FGP_LOCK|FGP_ACCESSED|FGP_CREAT|FGP_NOWAIT, ra_gfp_mask); - if (!page) + if (IS_ERR(folio)) break; - if (!PageUptodate(page)) - err = populate_page(c, page, bu, &n); - unlock_page(page); - put_page(page); + if (!folio_test_uptodate(folio)) + err = populate_page(c, folio, bu, &n); + folio_unlock(folio); + folio_put(folio); if (err) break; } @@ -817,7 +811,7 @@ out_bu_off: /** * ubifs_bulk_read - determine whether to bulk-read and, if so, do it. - * @page: page from which to start bulk-read. + * @folio: folio from which to start bulk-read. * * Some flash media are capable of reading sequentially at faster rates. UBIFS * bulk-read facility is designed to take advantage of that, by reading in one @@ -826,12 +820,12 @@ out_bu_off: * * Returns: %1 if a bulk-read is done and %0 otherwise. */ -static int ubifs_bulk_read(struct page *page) +static int ubifs_bulk_read(struct folio *folio) { - struct inode *inode = page->mapping->host; + struct inode *inode = folio->mapping->host; struct ubifs_info *c = inode->i_sb->s_fs_info; struct ubifs_inode *ui = ubifs_inode(inode); - pgoff_t index = page->index, last_page_read = ui->last_page_read; + pgoff_t index = folio->index, last_page_read = ui->last_page_read; struct bu_info *bu; int err = 0, allocated = 0; @@ -879,8 +873,8 @@ static int ubifs_bulk_read(struct page *page) bu->buf_len = c->max_bu_buf_len; data_key_init(c, &bu->key, inode->i_ino, - page->index << UBIFS_BLOCKS_PER_PAGE_SHIFT); - err = ubifs_do_bulk_read(c, bu, page); + folio->index << UBIFS_BLOCKS_PER_PAGE_SHIFT); + err = ubifs_do_bulk_read(c, bu, folio); if (!allocated) mutex_unlock(&c->bu_mutex); @@ -894,69 +888,71 @@ out_unlock: static int ubifs_read_folio(struct file *file, struct folio *folio) { - struct page *page = &folio->page; - - if (ubifs_bulk_read(page)) + if (ubifs_bulk_read(folio)) return 0; - do_readpage(page); + do_readpage(folio); folio_unlock(folio); return 0; } -static int do_writepage(struct page *page, int len) +static int do_writepage(struct folio *folio, size_t len) { - int err = 0, i, blen; + int err = 0, blen; unsigned int block; void *addr; + size_t offset = 0; union ubifs_key key; - struct inode *inode = page->mapping->host; + struct inode *inode = folio->mapping->host; struct ubifs_info *c = inode->i_sb->s_fs_info; #ifdef UBIFS_DEBUG struct ubifs_inode *ui = ubifs_inode(inode); spin_lock(&ui->ui_lock); - ubifs_assert(c, page->index <= ui->synced_i_size >> PAGE_SHIFT); + ubifs_assert(c, folio->index <= ui->synced_i_size >> PAGE_SHIFT); spin_unlock(&ui->ui_lock); #endif - /* Update radix tree tags */ - set_page_writeback(page); + folio_start_writeback(folio); - addr = kmap(page); - block = page->index << UBIFS_BLOCKS_PER_PAGE_SHIFT; - i = 0; - while (len) { - blen = min_t(int, len, UBIFS_BLOCK_SIZE); + addr = kmap_local_folio(folio, offset); + block = folio->index << UBIFS_BLOCKS_PER_PAGE_SHIFT; + for (;;) { + blen = min_t(size_t, len, UBIFS_BLOCK_SIZE); data_key_init(c, &key, inode->i_ino, block); err = ubifs_jnl_write_data(c, inode, &key, addr, blen); if (err) break; - if (++i >= UBIFS_BLOCKS_PER_PAGE) + len -= blen; + if (!len) break; block += 1; addr += blen; - len -= blen; + if (folio_test_highmem(folio) && !offset_in_page(addr)) { + kunmap_local(addr - blen); + offset += PAGE_SIZE; + addr = kmap_local_folio(folio, offset); + } } + kunmap_local(addr); if (err) { - SetPageError(page); - ubifs_err(c, "cannot write page %lu of inode %lu, error %d", - page->index, inode->i_ino, err); + mapping_set_error(folio->mapping, err); + ubifs_err(c, "cannot write folio %lu of inode %lu, error %d", + folio->index, inode->i_ino, err); ubifs_ro_mode(c, err); } - ubifs_assert(c, PagePrivate(page)); - if (PageChecked(page)) + ubifs_assert(c, folio->private != NULL); + if (folio_test_checked(folio)) release_new_page_budget(c); else release_existing_page_budget(c); atomic_long_dec(&c->dirty_pg_cnt); - detach_page_private(page); - ClearPageChecked(page); + folio_detach_private(folio); + folio_clear_checked(folio); - kunmap(page); - unlock_page(page); - end_page_writeback(page); + folio_unlock(folio); + folio_end_writeback(folio); return err; } @@ -1006,22 +1002,21 @@ static int do_writepage(struct page *page, int len) * on the page lock and it would not write the truncated inode node to the * journal before we have finished. */ -static int ubifs_writepage(struct page *page, struct writeback_control *wbc) +static int ubifs_writepage(struct folio *folio, struct writeback_control *wbc, + void *data) { - struct inode *inode = page->mapping->host; + struct inode *inode = folio->mapping->host; struct ubifs_info *c = inode->i_sb->s_fs_info; struct ubifs_inode *ui = ubifs_inode(inode); loff_t i_size = i_size_read(inode), synced_i_size; - pgoff_t end_index = i_size >> PAGE_SHIFT; - int err, len = i_size & (PAGE_SIZE - 1); - void *kaddr; + int err, len = folio_size(folio); dbg_gen("ino %lu, pg %lu, pg flags %#lx", - inode->i_ino, page->index, page->flags); - ubifs_assert(c, PagePrivate(page)); + inode->i_ino, folio->index, folio->flags); + ubifs_assert(c, folio->private != NULL); - /* Is the page fully outside @i_size? (truncate in progress) */ - if (page->index > end_index || (page->index == end_index && !len)) { + /* Is the folio fully outside @i_size? (truncate in progress) */ + if (folio_pos(folio) >= i_size) { err = 0; goto out_unlock; } @@ -1030,9 +1025,9 @@ static int ubifs_writepage(struct page *page, struct writeback_control *wbc) synced_i_size = ui->synced_i_size; spin_unlock(&ui->ui_lock); - /* Is the page fully inside @i_size? */ - if (page->index < end_index) { - if (page->index >= synced_i_size >> PAGE_SHIFT) { + /* Is the folio fully inside i_size? */ + if (folio_pos(folio) + len <= i_size) { + if (folio_pos(folio) >= synced_i_size) { err = inode->i_sb->s_op->write_inode(inode, NULL); if (err) goto out_redirty; @@ -1045,20 +1040,18 @@ static int ubifs_writepage(struct page *page, struct writeback_control *wbc) * with this. */ } - return do_writepage(page, PAGE_SIZE); + return do_writepage(folio, len); } /* - * The page straddles @i_size. It must be zeroed out on each and every + * The folio straddles @i_size. It must be zeroed out on each and every * writepage invocation because it may be mmapped. "A file is mapped * in multiples of the page size. For a file that is not a multiple of * the page size, the remaining memory is zeroed when mapped, and * writes to that region are not written out to the file." */ - kaddr = kmap_atomic(page); - memset(kaddr + len, 0, PAGE_SIZE - len); - flush_dcache_page(page); - kunmap_atomic(kaddr); + len = i_size - folio_pos(folio); + folio_zero_segment(folio, len, folio_size(folio)); if (i_size > synced_i_size) { err = inode->i_sb->s_op->write_inode(inode, NULL); @@ -1066,19 +1059,25 @@ static int ubifs_writepage(struct page *page, struct writeback_control *wbc) goto out_redirty; } - return do_writepage(page, len); + return do_writepage(folio, len); out_redirty: /* - * redirty_page_for_writepage() won't call ubifs_dirty_inode() because + * folio_redirty_for_writepage() won't call ubifs_dirty_inode() because * it passes I_DIRTY_PAGES flag while calling __mark_inode_dirty(), so * there is no need to do space budget for dirty inode. */ - redirty_page_for_writepage(wbc, page); + folio_redirty_for_writepage(wbc, folio); out_unlock: - unlock_page(page); + folio_unlock(folio); return err; } +static int ubifs_writepages(struct address_space *mapping, + struct writeback_control *wbc) +{ + return write_cache_pages(mapping, wbc, ubifs_writepage, NULL); +} + /** * do_attr_changes - change inode attributes. * @inode: inode to change attributes for @@ -1155,11 +1154,11 @@ static int do_truncation(struct ubifs_info *c, struct inode *inode, if (offset) { pgoff_t index = new_size >> PAGE_SHIFT; - struct page *page; + struct folio *folio; - page = find_lock_page(inode->i_mapping, index); - if (page) { - if (PageDirty(page)) { + folio = filemap_lock_folio(inode->i_mapping, index); + if (!IS_ERR(folio)) { + if (folio_test_dirty(folio)) { /* * 'ubifs_jnl_truncate()' will try to truncate * the last data node, but it contains @@ -1168,14 +1167,14 @@ static int do_truncation(struct ubifs_info *c, struct inode *inode, * 'ubifs_jnl_truncate()' will see an already * truncated (and up to date) data node. */ - ubifs_assert(c, PagePrivate(page)); + ubifs_assert(c, folio->private != NULL); - clear_page_dirty_for_io(page); + folio_clear_dirty_for_io(folio); if (UBIFS_BLOCKS_PER_PAGE_SHIFT) - offset = new_size & - (PAGE_SIZE - 1); - err = do_writepage(page, offset); - put_page(page); + offset = offset_in_folio(folio, + new_size); + err = do_writepage(folio, offset); + folio_put(folio); if (err) goto out_budg; /* @@ -1188,8 +1187,8 @@ static int do_truncation(struct ubifs_info *c, struct inode *inode, * to 'ubifs_jnl_truncate()' to save it from * having to read it. */ - unlock_page(page); - put_page(page); + folio_unlock(folio); + folio_put(folio); } } } @@ -1512,14 +1511,14 @@ static bool ubifs_release_folio(struct folio *folio, gfp_t unused_gfp_flags) */ static vm_fault_t ubifs_vm_page_mkwrite(struct vm_fault *vmf) { - struct page *page = vmf->page; + struct folio *folio = page_folio(vmf->page); struct inode *inode = file_inode(vmf->vma->vm_file); struct ubifs_info *c = inode->i_sb->s_fs_info; struct timespec64 now = current_time(inode); struct ubifs_budget_req req = { .new_page = 1 }; int err, update_time; - dbg_gen("ino %lu, pg %lu, i_size %lld", inode->i_ino, page->index, + dbg_gen("ino %lu, pg %lu, i_size %lld", inode->i_ino, folio->index, i_size_read(inode)); ubifs_assert(c, !c->ro_media && !c->ro_mount); @@ -1527,17 +1526,17 @@ static vm_fault_t ubifs_vm_page_mkwrite(struct vm_fault *vmf) return VM_FAULT_SIGBUS; /* -EROFS */ /* - * We have not locked @page so far so we may budget for changing the - * page. Note, we cannot do this after we locked the page, because + * We have not locked @folio so far so we may budget for changing the + * folio. Note, we cannot do this after we locked the folio, because * budgeting may cause write-back which would cause deadlock. * - * At the moment we do not know whether the page is dirty or not, so we - * assume that it is not and budget for a new page. We could look at + * At the moment we do not know whether the folio is dirty or not, so we + * assume that it is not and budget for a new folio. We could look at * the @PG_private flag and figure this out, but we may race with write - * back and the page state may change by the time we lock it, so this + * back and the folio state may change by the time we lock it, so this * would need additional care. We do not bother with this at the * moment, although it might be good idea to do. Instead, we allocate - * budget for a new page and amend it later on if the page was in fact + * budget for a new folio and amend it later on if the folio was in fact * dirty. * * The budgeting-related logic of this function is similar to what we @@ -1560,21 +1559,21 @@ static vm_fault_t ubifs_vm_page_mkwrite(struct vm_fault *vmf) return VM_FAULT_SIGBUS; } - lock_page(page); - if (unlikely(page->mapping != inode->i_mapping || - page_offset(page) > i_size_read(inode))) { - /* Page got truncated out from underneath us */ + folio_lock(folio); + if (unlikely(folio->mapping != inode->i_mapping || + folio_pos(folio) >= i_size_read(inode))) { + /* Folio got truncated out from underneath us */ goto sigbus; } - if (PagePrivate(page)) + if (folio->private) release_new_page_budget(c); else { - if (!PageChecked(page)) + if (!folio_test_checked(folio)) ubifs_convert_page_budget(c); - attach_page_private(page, (void *)1); + folio_attach_private(folio, (void *)1); atomic_long_inc(&c->dirty_pg_cnt); - __set_page_dirty_nobuffers(page); + filemap_dirty_folio(folio->mapping, folio); } if (update_time) { @@ -1590,11 +1589,11 @@ static vm_fault_t ubifs_vm_page_mkwrite(struct vm_fault *vmf) ubifs_release_dirty_inode_budget(c, ui); } - wait_for_stable_page(page); + folio_wait_stable(folio); return VM_FAULT_LOCKED; sigbus: - unlock_page(page); + folio_unlock(folio); ubifs_release_budget(c, &req); return VM_FAULT_SIGBUS; } @@ -1648,7 +1647,7 @@ static int ubifs_symlink_getattr(struct mnt_idmap *idmap, const struct address_space_operations ubifs_file_address_operations = { .read_folio = ubifs_read_folio, - .writepage = ubifs_writepage, + .writepages = ubifs_writepages, .write_begin = ubifs_write_begin, .write_end = ubifs_write_end, .invalidate_folio = ubifs_invalidate_folio, diff --git a/fs/ubifs/find.c b/fs/ubifs/find.c index 873e6e1c92b5..6ebf3c04ac5f 100644 --- a/fs/ubifs/find.c +++ b/fs/ubifs/find.c @@ -82,8 +82,9 @@ static int valuable(struct ubifs_info *c, const struct ubifs_lprops *lprops) */ static int scan_for_dirty_cb(struct ubifs_info *c, const struct ubifs_lprops *lprops, int in_tree, - struct scan_data *data) + void *arg) { + struct scan_data *data = arg; int ret = LPT_SCAN_CONTINUE; /* Exclude LEBs that are currently in use */ @@ -166,8 +167,7 @@ static const struct ubifs_lprops *scan_for_dirty(struct ubifs_info *c, data.pick_free = pick_free; data.lnum = -1; data.exclude_index = exclude_index; - err = ubifs_lpt_scan_nolock(c, -1, c->lscan_lnum, - (ubifs_lpt_scan_callback)scan_for_dirty_cb, + err = ubifs_lpt_scan_nolock(c, -1, c->lscan_lnum, scan_for_dirty_cb, &data); if (err) return ERR_PTR(err); @@ -349,8 +349,9 @@ out: */ static int scan_for_free_cb(struct ubifs_info *c, const struct ubifs_lprops *lprops, int in_tree, - struct scan_data *data) + void *arg) { + struct scan_data *data = arg; int ret = LPT_SCAN_CONTINUE; /* Exclude LEBs that are currently in use */ @@ -446,7 +447,7 @@ const struct ubifs_lprops *do_find_free_space(struct ubifs_info *c, data.pick_free = pick_free; data.lnum = -1; err = ubifs_lpt_scan_nolock(c, -1, c->lscan_lnum, - (ubifs_lpt_scan_callback)scan_for_free_cb, + scan_for_free_cb, &data); if (err) return ERR_PTR(err); @@ -589,8 +590,9 @@ out: */ static int scan_for_idx_cb(struct ubifs_info *c, const struct ubifs_lprops *lprops, int in_tree, - struct scan_data *data) + void *arg) { + struct scan_data *data = arg; int ret = LPT_SCAN_CONTINUE; /* Exclude LEBs that are currently in use */ @@ -625,8 +627,7 @@ static const struct ubifs_lprops *scan_for_leb_for_idx(struct ubifs_info *c) int err; data.lnum = -1; - err = ubifs_lpt_scan_nolock(c, -1, c->lscan_lnum, - (ubifs_lpt_scan_callback)scan_for_idx_cb, + err = ubifs_lpt_scan_nolock(c, -1, c->lscan_lnum, scan_for_idx_cb, &data); if (err) return ERR_PTR(err); @@ -726,11 +727,10 @@ out: return err; } -static int cmp_dirty_idx(const struct ubifs_lprops **a, - const struct ubifs_lprops **b) +static int cmp_dirty_idx(const void *a, const void *b) { - const struct ubifs_lprops *lpa = *a; - const struct ubifs_lprops *lpb = *b; + const struct ubifs_lprops *lpa = *(const struct ubifs_lprops **)a; + const struct ubifs_lprops *lpb = *(const struct ubifs_lprops **)b; return lpa->dirty + lpa->free - lpb->dirty - lpb->free; } @@ -754,7 +754,7 @@ int ubifs_save_dirty_idx_lnums(struct ubifs_info *c) sizeof(void *) * c->dirty_idx.cnt); /* Sort it so that the dirtiest is now at the end */ sort(c->dirty_idx.arr, c->dirty_idx.cnt, sizeof(void *), - (int (*)(const void *, const void *))cmp_dirty_idx, NULL); + cmp_dirty_idx, NULL); dbg_find("found %d dirty index LEBs", c->dirty_idx.cnt); if (c->dirty_idx.cnt) dbg_find("dirtiest index LEB is %d with dirty %d and free %d", @@ -782,8 +782,9 @@ int ubifs_save_dirty_idx_lnums(struct ubifs_info *c) */ static int scan_dirty_idx_cb(struct ubifs_info *c, const struct ubifs_lprops *lprops, int in_tree, - struct scan_data *data) + void *arg) { + struct scan_data *data = arg; int ret = LPT_SCAN_CONTINUE; /* Exclude LEBs that are currently in use */ @@ -842,8 +843,7 @@ static int find_dirty_idx_leb(struct ubifs_info *c) if (c->pnodes_have >= c->pnode_cnt) /* All pnodes are in memory, so skip scan */ return -ENOSPC; - err = ubifs_lpt_scan_nolock(c, -1, c->lscan_lnum, - (ubifs_lpt_scan_callback)scan_dirty_idx_cb, + err = ubifs_lpt_scan_nolock(c, -1, c->lscan_lnum, scan_dirty_idx_cb, &data); if (err) return err; diff --git a/fs/ubifs/journal.c b/fs/ubifs/journal.c index f0a5538c84b0..74aee92433d7 100644 --- a/fs/ubifs/journal.c +++ b/fs/ubifs/journal.c @@ -292,6 +292,96 @@ static int write_head(struct ubifs_info *c, int jhead, void *buf, int len, return err; } +/** + * __queue_and_wait - queue a task and wait until the task is waked up. + * @c: UBIFS file-system description object + * + * This function adds current task in queue and waits until the task is waked + * up. This function should be called with @c->reserve_space_wq locked. + */ +static void __queue_and_wait(struct ubifs_info *c) +{ + DEFINE_WAIT(wait); + + __add_wait_queue_entry_tail_exclusive(&c->reserve_space_wq, &wait); + set_current_state(TASK_UNINTERRUPTIBLE); + spin_unlock(&c->reserve_space_wq.lock); + + schedule(); + finish_wait(&c->reserve_space_wq, &wait); +} + +/** + * wait_for_reservation - try queuing current task to wait until waked up. + * @c: UBIFS file-system description object + * + * This function queues current task to wait until waked up, if queuing is + * started(@c->need_wait_space is not %0). Returns %true if current task is + * added in queue, otherwise %false is returned. + */ +static bool wait_for_reservation(struct ubifs_info *c) +{ + if (likely(atomic_read(&c->need_wait_space) == 0)) + /* Quick path to check whether queuing is started. */ + return false; + + spin_lock(&c->reserve_space_wq.lock); + if (atomic_read(&c->need_wait_space) == 0) { + /* Queuing is not started, don't queue current task. */ + spin_unlock(&c->reserve_space_wq.lock); + return false; + } + + __queue_and_wait(c); + return true; +} + +/** + * wake_up_reservation - wake up first task in queue or stop queuing. + * @c: UBIFS file-system description object + * + * This function wakes up the first task in queue if it exists, or stops + * queuing if no tasks in queue. + */ +static void wake_up_reservation(struct ubifs_info *c) +{ + spin_lock(&c->reserve_space_wq.lock); + if (waitqueue_active(&c->reserve_space_wq)) + wake_up_locked(&c->reserve_space_wq); + else + /* + * Compared with wait_for_reservation(), set @c->need_wait_space + * under the protection of wait queue lock, which can avoid that + * @c->need_wait_space is set to 0 after new task queued. + */ + atomic_set(&c->need_wait_space, 0); + spin_unlock(&c->reserve_space_wq.lock); +} + +/** + * wake_up_reservation - add current task in queue or start queuing. + * @c: UBIFS file-system description object + * + * This function starts queuing if queuing is not started, otherwise adds + * current task in queue. + */ +static void add_or_start_queue(struct ubifs_info *c) +{ + spin_lock(&c->reserve_space_wq.lock); + if (atomic_cmpxchg(&c->need_wait_space, 0, 1) == 0) { + /* Starts queuing, task can go on directly. */ + spin_unlock(&c->reserve_space_wq.lock); + return; + } + + /* + * There are at least two tasks have retried more than 32 times + * at certain point, first task has started queuing, just queue + * the left tasks. + */ + __queue_and_wait(c); +} + /** * make_reservation - reserve journal space. * @c: UBIFS file-system description object @@ -311,33 +401,27 @@ static int write_head(struct ubifs_info *c, int jhead, void *buf, int len, static int make_reservation(struct ubifs_info *c, int jhead, int len) { int err, cmt_retries = 0, nospc_retries = 0; + bool blocked = wait_for_reservation(c); again: down_read(&c->commit_sem); err = reserve_space(c, jhead, len); - if (!err) + if (!err) { /* c->commit_sem will get released via finish_reservation(). */ - return 0; + goto out_wake_up; + } up_read(&c->commit_sem); if (err == -ENOSPC) { /* * GC could not make any progress. We should try to commit - * once because it could make some dirty space and GC would - * make progress, so make the error -EAGAIN so that the below + * because it could make some dirty space and GC would make + * progress, so make the error -EAGAIN so that the below * will commit and re-try. */ - if (nospc_retries++ < 2) { - dbg_jnl("no space, retry"); - err = -EAGAIN; - } - - /* - * This means that the budgeting is incorrect. We always have - * to be able to write to the media, because all operations are - * budgeted. Deletions are not budgeted, though, but we reserve - * an extra LEB for them. - */ + nospc_retries++; + dbg_jnl("no space, retry"); + err = -EAGAIN; } if (err != -EAGAIN) @@ -349,15 +433,37 @@ again: */ if (cmt_retries > 128) { /* - * This should not happen unless the journal size limitations - * are too tough. + * This should not happen unless: + * 1. The journal size limitations are too tough. + * 2. The budgeting is incorrect. We always have to be able to + * write to the media, because all operations are budgeted. + * Deletions are not budgeted, though, but we reserve an + * extra LEB for them. */ - ubifs_err(c, "stuck in space allocation"); + ubifs_err(c, "stuck in space allocation, nospc_retries %d", + nospc_retries); err = -ENOSPC; goto out; - } else if (cmt_retries > 32) - ubifs_warn(c, "too many space allocation re-tries (%d)", - cmt_retries); + } else if (cmt_retries > 32) { + /* + * It's almost impossible to happen, unless there are many tasks + * making reservation concurrently and someone task has retried + * gc + commit for many times, generated available space during + * this period are grabbed by other tasks. + * But if it happens, start queuing up all tasks that will make + * space reservation, then there is only one task making space + * reservation at any time, and it can always make success under + * the premise of correct budgeting. + */ + ubifs_warn(c, "too many space allocation cmt_retries (%d) " + "nospc_retries (%d), start queuing tasks", + cmt_retries, nospc_retries); + + if (!blocked) { + blocked = true; + add_or_start_queue(c); + } + } dbg_jnl("-EAGAIN, commit and retry (retried %d times)", cmt_retries); @@ -365,7 +471,7 @@ again: err = ubifs_run_commit(c); if (err) - return err; + goto out_wake_up; goto again; out: @@ -380,6 +486,27 @@ out: cmt_retries = dbg_check_lprops(c); up_write(&c->commit_sem); } +out_wake_up: + if (blocked) { + /* + * Only tasks that have ever started queuing or ever been queued + * can wake up other queued tasks, which can make sure that + * there is only one task waked up to make space reservation. + * For example: + * task A task B task C + * make_reservation make_reservation + * reserve_space // 0 + * wake_up_reservation + * atomic_cmpxchg // 0, start queuing + * reserve_space + * wait_for_reservation + * __queue_and_wait + * add_wait_queue + * if (blocked) // false + * // So that task C won't be waked up to race with task B + */ + wake_up_reservation(c); + } return err; } diff --git a/fs/ubifs/lprops.c b/fs/ubifs/lprops.c index 6d6cd85c2b4c..a11c3dab7e16 100644 --- a/fs/ubifs/lprops.c +++ b/fs/ubifs/lprops.c @@ -1014,8 +1014,9 @@ out: */ static int scan_check_cb(struct ubifs_info *c, const struct ubifs_lprops *lp, int in_tree, - struct ubifs_lp_stats *lst) + void *arg) { + struct ubifs_lp_stats *lst = arg; struct ubifs_scan_leb *sleb; struct ubifs_scan_node *snod; int cat, lnum = lp->lnum, is_idx = 0, used = 0, free, dirty, ret; @@ -1269,8 +1270,7 @@ int dbg_check_lprops(struct ubifs_info *c) memset(&lst, 0, sizeof(struct ubifs_lp_stats)); err = ubifs_lpt_scan_nolock(c, c->main_first, c->leb_cnt - 1, - (ubifs_lpt_scan_callback)scan_check_cb, - &lst); + scan_check_cb, &lst); if (err && err != -ENOSPC) goto out; diff --git a/fs/ubifs/lpt_commit.c b/fs/ubifs/lpt_commit.c index c4d079328b92..07351fdce722 100644 --- a/fs/ubifs/lpt_commit.c +++ b/fs/ubifs/lpt_commit.c @@ -1646,7 +1646,6 @@ static int dbg_check_ltab_lnum(struct ubifs_info *c, int lnum) len -= node_len; } - err = 0; out: vfree(buf); return err; diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c index 7f4031a15f4d..291583005dd1 100644 --- a/fs/ubifs/super.c +++ b/fs/ubifs/super.c @@ -2151,6 +2151,8 @@ static struct ubifs_info *alloc_ubifs_info(struct ubi_volume_desc *ubi) mutex_init(&c->bu_mutex); mutex_init(&c->write_reserve_mutex); init_waitqueue_head(&c->cmt_wq); + init_waitqueue_head(&c->reserve_space_wq); + atomic_set(&c->need_wait_space, 0); c->buds = RB_ROOT; c->old_idx = RB_ROOT; c->size_tree = RB_ROOT; diff --git a/fs/ubifs/tnc.c b/fs/ubifs/tnc.c index f4728e65d1bd..45cacdcd4746 100644 --- a/fs/ubifs/tnc.c +++ b/fs/ubifs/tnc.c @@ -3116,14 +3116,7 @@ static void tnc_destroy_cnext(struct ubifs_info *c) void ubifs_tnc_close(struct ubifs_info *c) { tnc_destroy_cnext(c); - if (c->zroot.znode) { - long n, freed; - - n = atomic_long_read(&c->clean_zn_cnt); - freed = ubifs_destroy_tnc_subtree(c, c->zroot.znode); - ubifs_assert(c, freed == n); - atomic_long_sub(n, &ubifs_clean_zn_cnt); - } + ubifs_destroy_tnc_tree(c); kfree(c->gap_lebs); kfree(c->ilebs); destroy_old_idx(c); diff --git a/fs/ubifs/tnc_misc.c b/fs/ubifs/tnc_misc.c index 4d686e34e64d..d3f8a6aa1f49 100644 --- a/fs/ubifs/tnc_misc.c +++ b/fs/ubifs/tnc_misc.c @@ -250,6 +250,28 @@ long ubifs_destroy_tnc_subtree(const struct ubifs_info *c, } } +/** + * ubifs_destroy_tnc_tree - destroy all znodes connected to the TNC tree. + * @c: UBIFS file-system description object + * + * This function destroys the whole TNC tree and updates clean global znode + * count. + */ +void ubifs_destroy_tnc_tree(struct ubifs_info *c) +{ + long n, freed; + + if (!c->zroot.znode) + return; + + n = atomic_long_read(&c->clean_zn_cnt); + freed = ubifs_destroy_tnc_subtree(c, c->zroot.znode); + ubifs_assert(c, freed == n); + atomic_long_sub(n, &ubifs_clean_zn_cnt); + + c->zroot.znode = NULL; +} + /** * read_znode - read an indexing node from flash and fill znode. * @c: UBIFS file-system description object diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h index 3916dc4f30ca..1f3ea879d93a 100644 --- a/fs/ubifs/ubifs.h +++ b/fs/ubifs/ubifs.h @@ -1047,6 +1047,8 @@ struct ubifs_debug_info; * @bg_bud_bytes: number of bud bytes when background commit is initiated * @old_buds: buds to be released after commit ends * @max_bud_cnt: maximum number of buds + * @need_wait_space: Non %0 means space reservation tasks need to wait in queue + * @reserve_space_wq: wait queue to sleep on if @need_wait_space is not %0 * * @commit_sem: synchronizes committer with other processes * @cmt_state: commit state @@ -1305,6 +1307,8 @@ struct ubifs_info { long long bg_bud_bytes; struct list_head old_buds; int max_bud_cnt; + atomic_t need_wait_space; + wait_queue_head_t reserve_space_wq; struct rw_semaphore commit_sem; int cmt_state; @@ -1903,6 +1907,7 @@ struct ubifs_znode *ubifs_tnc_postorder_next(const struct ubifs_info *c, struct ubifs_znode *znode); long ubifs_destroy_tnc_subtree(const struct ubifs_info *c, struct ubifs_znode *zr); +void ubifs_destroy_tnc_tree(struct ubifs_info *c); struct ubifs_znode *ubifs_load_znode(struct ubifs_info *c, struct ubifs_zbranch *zbr, struct ubifs_znode *parent, int iip); diff --git a/include/linux/mtd/ubi.h b/include/linux/mtd/ubi.h index a529347fd75b..562f92504f2b 100644 --- a/include/linux/mtd/ubi.h +++ b/include/linux/mtd/ubi.h @@ -192,6 +192,7 @@ struct ubi_device_info { * or a volume was removed) * @UBI_VOLUME_RESIZED: a volume has been re-sized * @UBI_VOLUME_RENAMED: a volume has been re-named + * @UBI_VOLUME_SHUTDOWN: a volume is going to removed, shutdown users * @UBI_VOLUME_UPDATED: data has been written to a volume * * These constants define which type of event has happened when a volume @@ -202,6 +203,7 @@ enum { UBI_VOLUME_REMOVED, UBI_VOLUME_RESIZED, UBI_VOLUME_RENAMED, + UBI_VOLUME_SHUTDOWN, UBI_VOLUME_UPDATED, };