From 0c67c37e1710b2a8f61c8a02db95a51fe577e2c1 Mon Sep 17 00:00:00 2001 From: Joanne Koong Date: Tue, 11 Feb 2025 13:47:50 -0800 Subject: [PATCH 1/8] fuse: revert back to __readahead_folio() for readahead MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In commit 3eab9d7bc2f4 ("fuse: convert readahead to use folios"), the logic was converted to using the new folio readahead code, which drops the reference on the folio once it is locked, using an inferred reference on the folio. Previously we held a reference on the folio for the entire duration of the readpages call. This is fine, however for the case for splice pipe responses where we will remove the old folio and splice in the new folio (see fuse_try_move_page()), we assume that there is a reference held on the folio for ap->folios, which is no longer the case. To fix this, revert back to __readahead_folio() which allows us to hold the reference on the folio for the duration of readpages until either we drop the reference ourselves in fuse_readpages_end() or the reference is dropped after it's replaced in the page cache in the splice case. This will fix the UAF bug that was reported. Link: https://lore.kernel.org/linux-fsdevel/2f681f48-00f5-4e09-8431-2b3dbfaa881e@heusel.eu/ Fixes: 3eab9d7bc2f4 ("fuse: convert readahead to use folios") Reported-by: Christian Heusel Closes: https://lore.kernel.org/all/2f681f48-00f5-4e09-8431-2b3dbfaa881e@heusel.eu/ Closes: https://gitlab.archlinux.org/archlinux/packaging/packages/linux/-/issues/110 Reported-by: Mantas Mikulėnas Closes: https://lore.kernel.org/all/34feb867-09e2-46e4-aa31-d9660a806d1a@gmail.com/ Closes: https://bugzilla.opensuse.org/show_bug.cgi?id=1236660 Cc: # v6.13 Signed-off-by: Joanne Koong Reviewed-by: Jeff Layton Signed-off-by: Miklos Szeredi --- fs/fuse/dev.c | 6 ++++++ fs/fuse/file.c | 13 +++++++++++-- 2 files changed, 17 insertions(+), 2 deletions(-) diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index 5b5f789b37eb..2b2d1b755544 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -838,6 +838,12 @@ static int fuse_check_folio(struct folio *folio) return 0; } +/* + * Attempt to steal a page from the splice() pipe and move it into the + * pagecache. If successful, the pointer in @pagep will be updated. The + * folio that was originally in @pagep will lose a reference and the new + * folio returned in @pagep will carry a reference. + */ static int fuse_try_move_page(struct fuse_copy_state *cs, struct page **pagep) { int err; diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 7d92a5479998..d63e56fd3dd2 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -955,8 +955,10 @@ static void fuse_readpages_end(struct fuse_mount *fm, struct fuse_args *args, fuse_invalidate_atime(inode); } - for (i = 0; i < ap->num_folios; i++) + for (i = 0; i < ap->num_folios; i++) { folio_end_read(ap->folios[i], !err); + folio_put(ap->folios[i]); + } if (ia->ff) fuse_file_put(ia->ff, false); @@ -1048,7 +1050,14 @@ static void fuse_readahead(struct readahead_control *rac) ap = &ia->ap; while (ap->num_folios < cur_pages) { - folio = readahead_folio(rac); + /* + * This returns a folio with a ref held on it. + * The ref needs to be held until the request is + * completed, since the splice case (see + * fuse_try_move_page()) drops the ref after it's + * replaced in the page cache. + */ + folio = __readahead_folio(rac); ap->folios[ap->num_folios] = folio; ap->descs[ap->num_folios].length = folio_size(folio); ap->num_folios++; From c84e125fff2615b4d9c259e762596134eddd2f27 Mon Sep 17 00:00:00 2001 From: Vasiliy Kovalev Date: Sat, 15 Feb 2025 00:51:48 +0300 Subject: [PATCH 2/8] ovl: fix UAF in ovl_dentry_update_reval by moving dput() in ovl_link_up The issue was caused by dput(upper) being called before ovl_dentry_update_reval(), while upper->d_flags was still accessed in ovl_dentry_remote(). Move dput(upper) after its last use to prevent use-after-free. BUG: KASAN: slab-use-after-free in ovl_dentry_remote fs/overlayfs/util.c:162 [inline] BUG: KASAN: slab-use-after-free in ovl_dentry_update_reval+0xd2/0xf0 fs/overlayfs/util.c:167 Call Trace: __dump_stack lib/dump_stack.c:88 [inline] dump_stack_lvl+0x116/0x1f0 lib/dump_stack.c:114 print_address_description mm/kasan/report.c:377 [inline] print_report+0xc3/0x620 mm/kasan/report.c:488 kasan_report+0xd9/0x110 mm/kasan/report.c:601 ovl_dentry_remote fs/overlayfs/util.c:162 [inline] ovl_dentry_update_reval+0xd2/0xf0 fs/overlayfs/util.c:167 ovl_link_up fs/overlayfs/copy_up.c:610 [inline] ovl_copy_up_one+0x2105/0x3490 fs/overlayfs/copy_up.c:1170 ovl_copy_up_flags+0x18d/0x200 fs/overlayfs/copy_up.c:1223 ovl_rename+0x39e/0x18c0 fs/overlayfs/dir.c:1136 vfs_rename+0xf84/0x20a0 fs/namei.c:4893 ... Fixes: b07d5cc93e1b ("ovl: update of dentry revalidate flags after copy up") Reported-by: syzbot+316db8a1191938280eb6@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=316db8a1191938280eb6 Signed-off-by: Vasiliy Kovalev Link: https://lore.kernel.org/r/20250214215148.761147-1-kovalev@altlinux.org Reviewed-by: Amir Goldstein Signed-off-by: Christian Brauner --- fs/overlayfs/copy_up.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c index 0c28e5fa3407..d7310fcf3888 100644 --- a/fs/overlayfs/copy_up.c +++ b/fs/overlayfs/copy_up.c @@ -618,7 +618,6 @@ static int ovl_link_up(struct ovl_copy_up_ctx *c) err = PTR_ERR(upper); if (!IS_ERR(upper)) { err = ovl_do_link(ofs, ovl_dentry_upper(c->dentry), udir, upper); - dput(upper); if (!err) { /* Restore timestamps on parent (best effort) */ @@ -626,6 +625,7 @@ static int ovl_link_up(struct ovl_copy_up_ctx *c) ovl_dentry_set_upper_alias(c->dentry); ovl_dentry_update_reval(c->dentry, upper); } + dput(upper); } inode_unlock(udir); if (err) From b4c173dfbb6c78568578ff18f9e8822d7bd0e31b Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Thu, 20 Feb 2025 11:02:58 +0100 Subject: [PATCH 3/8] fuse: don't truncate cached, mutated symlink Fuse allows the value of a symlink to change and this property is exploited by some filesystems (e.g. CVMFS). It has been observed, that sometimes after changing the symlink contents, the value is truncated to the old size. This is caused by fuse_getattr() racing with fuse_reverse_inval_inode(). fuse_reverse_inval_inode() updates the fuse_inode's attr_version, which results in fuse_change_attributes() exiting before updating the cached attributes This is okay, as the cached attributes remain invalid and the next call to fuse_change_attributes() will likely update the inode with the correct values. The reason this causes problems is that cached symlinks will be returned through page_get_link(), which truncates the symlink to inode->i_size. This is correct for filesystems that don't mutate symlinks, but in this case it causes bad behavior. The solution is to just remove this truncation. This can cause a regression in a filesystem that relies on supplying a symlink larger than the file size, but this is unlikely. If that happens we'd need to make this behavior conditional. Reported-by: Laura Promberger Tested-by: Sam Lewis Signed-off-by: Miklos Szeredi Link: https://lore.kernel.org/r/20250220100258.793363-1-mszeredi@redhat.com Reviewed-by: Bernd Schubert Signed-off-by: Christian Brauner --- fs/fuse/dir.c | 2 +- fs/namei.c | 24 +++++++++++++++++++----- include/linux/fs.h | 2 ++ 3 files changed, 22 insertions(+), 6 deletions(-) diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index 198862b086ff..3805f9b06c9d 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -1636,7 +1636,7 @@ static const char *fuse_get_link(struct dentry *dentry, struct inode *inode, goto out_err; if (fc->cache_symlinks) - return page_get_link(dentry, inode, callback); + return page_get_link_raw(dentry, inode, callback); err = -ECHILD; if (!dentry) diff --git a/fs/namei.c b/fs/namei.c index 3ab9440c5b93..ecb7b95c2ca3 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -5356,10 +5356,9 @@ const char *vfs_get_link(struct dentry *dentry, struct delayed_call *done) EXPORT_SYMBOL(vfs_get_link); /* get the link contents into pagecache */ -const char *page_get_link(struct dentry *dentry, struct inode *inode, - struct delayed_call *callback) +static char *__page_get_link(struct dentry *dentry, struct inode *inode, + struct delayed_call *callback) { - char *kaddr; struct page *page; struct address_space *mapping = inode->i_mapping; @@ -5378,8 +5377,23 @@ const char *page_get_link(struct dentry *dentry, struct inode *inode, } set_delayed_call(callback, page_put_link, page); BUG_ON(mapping_gfp_mask(mapping) & __GFP_HIGHMEM); - kaddr = page_address(page); - nd_terminate_link(kaddr, inode->i_size, PAGE_SIZE - 1); + return page_address(page); +} + +const char *page_get_link_raw(struct dentry *dentry, struct inode *inode, + struct delayed_call *callback) +{ + return __page_get_link(dentry, inode, callback); +} +EXPORT_SYMBOL_GPL(page_get_link_raw); + +const char *page_get_link(struct dentry *dentry, struct inode *inode, + struct delayed_call *callback) +{ + char *kaddr = __page_get_link(dentry, inode, callback); + + if (!IS_ERR(kaddr)) + nd_terminate_link(kaddr, inode->i_size, PAGE_SIZE - 1); return kaddr; } diff --git a/include/linux/fs.h b/include/linux/fs.h index 2c3b2f8a621f..9346adf28f7b 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -3452,6 +3452,8 @@ extern const struct file_operations generic_ro_fops; extern int readlink_copy(char __user *, int, const char *, int); extern int page_readlink(struct dentry *, char __user *, int); +extern const char *page_get_link_raw(struct dentry *, struct inode *, + struct delayed_call *); extern const char *page_get_link(struct dentry *, struct inode *, struct delayed_call *); extern void page_put_link(void *); From 8510edf191d2df0822ea22d6226e4eef87562271 Mon Sep 17 00:00:00 2001 From: Jingbo Xu Date: Tue, 18 Feb 2025 20:02:08 +0800 Subject: [PATCH 4/8] mm/filemap: fix miscalculated file range for filemap_fdatawrite_range_kick() iocb->ki_pos has been updated with the number of written bytes since generic_perform_write(). Besides __filemap_fdatawrite_range() accepts the inclusive end of the data range. Fixes: 1d4457576570 ("mm: call filemap_fdatawrite_range_kick() after IOCB_DONTCACHE issue") Signed-off-by: Jingbo Xu Link: https://lore.kernel.org/r/20250218120209.88093-2-jefflexu@linux.alibaba.com Reviewed-by: Jens Axboe Signed-off-by: Christian Brauner --- include/linux/fs.h | 4 ++-- mm/filemap.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/include/linux/fs.h b/include/linux/fs.h index 9346adf28f7b..2788df98080f 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2975,8 +2975,8 @@ static inline ssize_t generic_write_sync(struct kiocb *iocb, ssize_t count) } else if (iocb->ki_flags & IOCB_DONTCACHE) { struct address_space *mapping = iocb->ki_filp->f_mapping; - filemap_fdatawrite_range_kick(mapping, iocb->ki_pos, - iocb->ki_pos + count); + filemap_fdatawrite_range_kick(mapping, iocb->ki_pos - count, + iocb->ki_pos - 1); } return count; diff --git a/mm/filemap.c b/mm/filemap.c index 804d7365680c..d4564a79eb35 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -445,7 +445,7 @@ EXPORT_SYMBOL(filemap_fdatawrite_range); * filemap_fdatawrite_range_kick - start writeback on a range * @mapping: target address_space * @start: index to start writeback on - * @end: last (non-inclusive) index for writeback + * @end: last (inclusive) index for writeback * * This is a non-integrity writeback helper, to start writing back folios * for the indicated range. From 927289988068a65ccc168eda881ce60f8712707b Mon Sep 17 00:00:00 2001 From: Jingbo Xu Date: Tue, 18 Feb 2025 20:02:09 +0800 Subject: [PATCH 5/8] mm/truncate: don't skip dirty page in folio_unmap_invalidate() ... otherwise this is a behavior change for the previous callers of invalidate_complete_folio2(), e.g. the page invalidation routine. Fixes: 4a9e23159fd3 ("mm/truncate: add folio_unmap_invalidate() helper") Signed-off-by: Jingbo Xu Link: https://lore.kernel.org/r/20250218120209.88093-3-jefflexu@linux.alibaba.com Signed-off-by: Christian Brauner --- mm/truncate.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/mm/truncate.c b/mm/truncate.c index e2e115adfbc5..76d8fcd89bd0 100644 --- a/mm/truncate.c +++ b/mm/truncate.c @@ -548,8 +548,6 @@ int folio_unmap_invalidate(struct address_space *mapping, struct folio *folio, VM_BUG_ON_FOLIO(!folio_test_locked(folio), folio); - if (folio_test_dirty(folio)) - return 0; if (folio_mapped(folio)) unmap_mapping_folio(folio); BUG_ON(folio_mapped(folio)); From 02cfe2b6529c6c5fcf39d52a826927f4f93392af Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Mon, 24 Feb 2025 11:27:02 +0100 Subject: [PATCH 6/8] pidfs: remove d_op->d_delete Pidfs only deals with unhashed dentries and there's currently no way for them to become hashed. So remove d_op->d_delete. Signed-off-by: Christian Brauner --- fs/pidfs.c | 1 - 1 file changed, 1 deletion(-) diff --git a/fs/pidfs.c b/fs/pidfs.c index 63f9699ebac3..c0478b3c55d9 100644 --- a/fs/pidfs.c +++ b/fs/pidfs.c @@ -521,7 +521,6 @@ static char *pidfs_dname(struct dentry *dentry, char *buffer, int buflen) } const struct dentry_operations pidfs_dentry_operations = { - .d_delete = always_delete_dentry, .d_dname = pidfs_dname, .d_prune = stashed_dentry_prune, }; From 425e3e3bd62c568a4365af0923d6ebad71a7dcfc Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Mon, 24 Feb 2025 11:30:35 +0100 Subject: [PATCH 7/8] nsfs: remove d_op->d_delete Nsfs only deals with unhashed dentries and there's currently no way for them to become hashed. So remove d_op->d_delete. Signed-off-by: Christian Brauner --- fs/nsfs.c | 1 - 1 file changed, 1 deletion(-) diff --git a/fs/nsfs.c b/fs/nsfs.c index 663f8656158d..f7fddf8ecf73 100644 --- a/fs/nsfs.c +++ b/fs/nsfs.c @@ -37,7 +37,6 @@ static char *ns_dname(struct dentry *dentry, char *buffer, int buflen) } const struct dentry_operations ns_dentry_operations = { - .d_delete = always_delete_dentry, .d_dname = ns_dname, .d_prune = stashed_dentry_prune, }; From b5799106b44e1df594f4696500dbbc3b326bba18 Mon Sep 17 00:00:00 2001 From: John Garry Date: Mon, 24 Feb 2025 15:45:38 +0000 Subject: [PATCH 8/8] iomap: Minor code simplification in iomap_dio_bio_iter() Combine 'else' and 'if' conditional statements onto a single line and drop unrequired braces, as is standard coding style. The code had been like this since commit c3b0e880bbfa ("iomap: support REQ_OP_ZONE_APPEND"). Signed-off-by: John Garry Link: https://lore.kernel.org/r/20250224154538.548028-1-john.g.garry@oracle.com Reviewed-by: "Darrick J. Wong" Signed-off-by: Christian Brauner --- fs/iomap/direct-io.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/fs/iomap/direct-io.c b/fs/iomap/direct-io.c index b521eb15759e..0e47da82b0c2 100644 --- a/fs/iomap/direct-io.c +++ b/fs/iomap/direct-io.c @@ -427,12 +427,10 @@ static loff_t iomap_dio_bio_iter(const struct iomap_iter *iter, bio_put(bio); goto zero_tail; } - if (dio->flags & IOMAP_DIO_WRITE) { + if (dio->flags & IOMAP_DIO_WRITE) task_io_account_write(n); - } else { - if (dio->flags & IOMAP_DIO_DIRTY) - bio_set_pages_dirty(bio); - } + else if (dio->flags & IOMAP_DIO_DIRTY) + bio_set_pages_dirty(bio); dio->size += n; copied += n;