From 225c33f8279e041df1f5165a5c58e6281945e3f9 Mon Sep 17 00:00:00 2001 From: ptr1337 Date: Sat, 2 Jul 2022 19:43:22 +0000 Subject: [PATCH] enable BCACHEFS since the module is broken, update bcachefs patch (#548) --- linux-tkg-config/prepare | 3 +- .../5.18/0008-5.18-bcachefs.patch | 4322 ++++++++--------- 2 files changed, 1974 insertions(+), 2351 deletions(-) diff --git a/linux-tkg-config/prepare b/linux-tkg-config/prepare index 5ae5f03..0c17932 100644 --- a/linux-tkg-config/prepare +++ b/linux-tkg-config/prepare @@ -1176,8 +1176,7 @@ _tkg_srcprep() { _msg="Patching Bcache filesystem support override" _tkg_patcher - _module "BCACHEFS_FS" - _enable "BCACHEFS_QUOTA" "BCACHEFS_POSIX_ACL" + _enable "BCACHEFS_FS" "BCACHEFS_QUOTA" "BCACHEFS_POSIX_ACL" _disable "BCACHEFS_DEBUG" "BCACHEFS_TESTS" "DEBUG_CLOSURES" fi fi diff --git a/linux-tkg-patches/5.18/0008-5.18-bcachefs.patch b/linux-tkg-patches/5.18/0008-5.18-bcachefs.patch index 5639395..8cb13cf 100644 --- a/linux-tkg-patches/5.18/0008-5.18-bcachefs.patch +++ b/linux-tkg-patches/5.18/0008-5.18-bcachefs.patch @@ -1,72 +1,417 @@ -diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md -new file mode 100644 -index 000000000000..8af34357dd98 ---- /dev/null -+++ b/.github/ISSUE_TEMPLATE/bug_report.md -@@ -0,0 +1,61 @@ -+--- -+name: Bug report -+about: Create a report to help us improve -+title: " [short commit id]" -+labels: bug -+assignees: YellowOnion -+ -+--- -+ -+**Please search for duplicates** -+ -+**Version** -+ -+Make sure you're using a reasonably new version. -+ -+Provide the commit hash from the kernel version (preferable) or tools, don't say "I'm using the latest master" as that will very quickly become out of date. -+ -+**Generic info** -+Provide the output of: -+``` -+bcachefs fs usage -+bcachefs show-super -+``` -+**Tools bugs** -+ -+* pull the latest version, compile it, do not strip the binary. -+* provide the exact commands you used to run. -+* run with gdb: `gdb -ex run --args ./bcacehfs ` -+ -+If you get an assert/segfault etc: -+* type `bt` in to and provide the output here. -+ -+If the tools lockup: -+* run `perf top -p $(pidof bcachefs)` and provide a screenshot. -+* press ctrl+c to interrupt the process and provide the output of `bt`. -+ -+**Kernel bugs** -+Compile the kernel with these flags: -+ -+``` -+CONFIG_PREEMPT=y -+CONFIG_BCACHEFS_DEBUG=y -+CONFIG_KALLSYMS=y -+CONFIG_KALLSYMS_ALL=y -+CONFIG_DEBUG_FS=y -+CONFIG_DYNAMIC_FTRACE=y -+CONFIG_FTRACE=y -+``` -+Provide the output of `dmesg` either in a paste-bin or as attachment, if less than 30~ lines just provide inline here. -+ -+ -+**Optional Advanced** -+ -+If lockup or performance issues: -+* run `perf record` and `perf record -e 'bcachefs:*' -o events.data` both during the window of issue and then ctrl+c. -+* run `perf archive` to dump symbols. -+* archive, compress and upload the files: `perf.data`, `events.data` and `perf.data.tar.bz2`. -+ -+Upload large files to a file storage provider: -+* provide the output of `bcachefs list_journal -a | zstd -f -T0 -o ../journal.log.zst` -+*compress & upload all the `metdata.dump.*` files from: bcachefs dump -o metadata.dump +From e2bc97d02026d17fad53c5b34ff4ca9aacf45080 Mon Sep 17 00:00:00 2001 +From: Piotr Gorski +Date: Sat, 2 Jul 2022 02:47:15 +0200 +Subject: [PATCH] bcachefs-5.18: introduce bcachefs patchset + +Signed-off-by: Piotr Gorski +--- + Documentation/core-api/printk-formats.rst | 22 + + arch/powerpc/kernel/process.c | 16 +- + arch/powerpc/kernel/security.c | 75 +- + arch/powerpc/platforms/pseries/papr_scm.c | 34 +- + arch/x86/kernel/cpu/resctrl/rdtgroup.c | 16 +- + block/bio.c | 34 +- + block/blk-core.c | 1 + + block/blk.h | 1 - + drivers/acpi/apei/erst-dbg.c | 1 + + drivers/block/loop.c | 2 - + drivers/clk/tegra/clk-bpmp.c | 21 +- + drivers/input/joystick/analog.c | 23 +- + drivers/md/bcache/Kconfig | 10 +- + drivers/md/bcache/Makefile | 4 +- + drivers/md/bcache/bcache.h | 2 +- + drivers/md/bcache/super.c | 1 - + drivers/md/bcache/util.h | 3 +- + drivers/pci/p2pdma.c | 21 +- + fs/Kconfig | 1 + + fs/Makefile | 1 + + fs/bcachefs/Kconfig | 52 + + fs/bcachefs/Makefile | 68 + + fs/bcachefs/acl.c | 406 ++ + fs/bcachefs/acl.h | 58 + + fs/bcachefs/alloc_background.c | 1600 ++++++++ + fs/bcachefs/alloc_background.h | 181 + + fs/bcachefs/alloc_foreground.c | 1282 ++++++ + fs/bcachefs/alloc_foreground.h | 173 + + fs/bcachefs/alloc_types.h | 87 + + fs/bcachefs/backpointers.c | 891 +++++ + fs/bcachefs/backpointers.h | 38 + + fs/bcachefs/bcachefs.h | 988 +++++ + fs/bcachefs/bcachefs_format.h | 2052 ++++++++++ + fs/bcachefs/bcachefs_ioctl.h | 368 ++ + fs/bcachefs/bkey.c | 1175 ++++++ + fs/bcachefs/bkey.h | 566 +++ + fs/bcachefs/bkey_buf.h | 60 + + fs/bcachefs/bkey_methods.c | 503 +++ + fs/bcachefs/bkey_methods.h | 175 + + fs/bcachefs/bkey_sort.c | 198 + + fs/bcachefs/bkey_sort.h | 44 + + fs/bcachefs/bset.c | 1598 ++++++++ + fs/bcachefs/bset.h | 615 +++ + fs/bcachefs/btree_cache.c | 1162 ++++++ + fs/bcachefs/btree_cache.h | 107 + + fs/bcachefs/btree_gc.c | 2128 ++++++++++ + fs/bcachefs/btree_gc.h | 112 + + fs/bcachefs/btree_io.c | 2150 ++++++++++ + fs/bcachefs/btree_io.h | 222 ++ + fs/bcachefs/btree_iter.c | 3471 ++++++++++++++++ + fs/bcachefs/btree_iter.h | 411 ++ + fs/bcachefs/btree_key_cache.c | 850 ++++ + fs/bcachefs/btree_key_cache.h | 47 + + fs/bcachefs/btree_locking.h | 259 ++ + fs/bcachefs/btree_types.h | 687 ++++ + fs/bcachefs/btree_update.h | 156 + + fs/bcachefs/btree_update_interior.c | 2253 +++++++++++ + fs/bcachefs/btree_update_interior.h | 321 ++ + fs/bcachefs/btree_update_leaf.c | 1815 +++++++++ + fs/bcachefs/buckets.c | 2114 ++++++++++ + fs/bcachefs/buckets.h | 300 ++ + fs/bcachefs/buckets_types.h | 103 + + fs/bcachefs/buckets_waiting_for_journal.c | 167 + + fs/bcachefs/buckets_waiting_for_journal.h | 15 + + .../buckets_waiting_for_journal_types.h | 23 + + fs/bcachefs/chardev.c | 760 ++++ + fs/bcachefs/chardev.h | 31 + + fs/bcachefs/checksum.c | 707 ++++ + fs/bcachefs/checksum.h | 204 + + fs/bcachefs/clock.c | 191 + + fs/bcachefs/clock.h | 38 + + fs/bcachefs/clock_types.h | 37 + + fs/bcachefs/compress.c | 639 +++ + fs/bcachefs/compress.h | 18 + + fs/bcachefs/counters.c | 107 + + fs/bcachefs/counters.h | 17 + + fs/bcachefs/darray.h | 77 + + fs/bcachefs/data_update.c | 379 ++ + fs/bcachefs/data_update.h | 38 + + fs/bcachefs/debug.c | 707 ++++ + fs/bcachefs/debug.h | 30 + + fs/bcachefs/dirent.c | 565 +++ + fs/bcachefs/dirent.h | 67 + + fs/bcachefs/disk_groups.c | 506 +++ + fs/bcachefs/disk_groups.h | 90 + + fs/bcachefs/ec.c | 1695 ++++++++ + fs/bcachefs/ec.h | 230 ++ + fs/bcachefs/ec_types.h | 46 + + fs/bcachefs/errcode.h | 12 + + fs/bcachefs/error.c | 185 + + fs/bcachefs/error.h | 238 ++ + fs/bcachefs/extent_update.c | 178 + + fs/bcachefs/extent_update.h | 12 + + fs/bcachefs/extents.c | 1324 +++++++ + fs/bcachefs/extents.h | 685 ++++ + fs/bcachefs/extents_types.h | 40 + + fs/bcachefs/eytzinger.h | 281 ++ + fs/bcachefs/fifo.h | 127 + + fs/bcachefs/fs-common.c | 496 +++ + fs/bcachefs/fs-common.h | 43 + + fs/bcachefs/fs-io.c | 3496 +++++++++++++++++ + fs/bcachefs/fs-io.h | 56 + + fs/bcachefs/fs-ioctl.c | 523 +++ + fs/bcachefs/fs-ioctl.h | 81 + + fs/bcachefs/fs.c | 1939 +++++++++ + fs/bcachefs/fs.h | 208 + + fs/bcachefs/fsck.c | 2413 ++++++++++++ + fs/bcachefs/fsck.h | 8 + + fs/bcachefs/inode.c | 771 ++++ + fs/bcachefs/inode.h | 189 + + fs/bcachefs/io.c | 2417 ++++++++++++ + fs/bcachefs/io.h | 189 + + fs/bcachefs/io_types.h | 161 + + fs/bcachefs/journal.c | 1429 +++++++ + fs/bcachefs/journal.h | 521 +++ + fs/bcachefs/journal_io.c | 1735 ++++++++ + fs/bcachefs/journal_io.h | 59 + + fs/bcachefs/journal_reclaim.c | 849 ++++ + fs/bcachefs/journal_reclaim.h | 86 + + fs/bcachefs/journal_sb.c | 220 ++ + fs/bcachefs/journal_sb.h | 24 + + fs/bcachefs/journal_seq_blacklist.c | 322 ++ + fs/bcachefs/journal_seq_blacklist.h | 22 + + fs/bcachefs/journal_types.h | 340 ++ + fs/bcachefs/keylist.c | 67 + + fs/bcachefs/keylist.h | 76 + + fs/bcachefs/keylist_types.h | 16 + + fs/bcachefs/lru.c | 219 ++ + fs/bcachefs/lru.h | 19 + + fs/bcachefs/migrate.c | 193 + + fs/bcachefs/migrate.h | 7 + + fs/bcachefs/move.c | 951 +++++ + fs/bcachefs/move.h | 67 + + fs/bcachefs/move_types.h | 19 + + fs/bcachefs/movinggc.c | 282 ++ + fs/bcachefs/movinggc.h | 9 + + fs/bcachefs/opts.c | 578 +++ + fs/bcachefs/opts.h | 504 +++ + fs/bcachefs/quota.c | 859 ++++ + fs/bcachefs/quota.h | 71 + + fs/bcachefs/quota_types.h | 43 + + fs/bcachefs/rebalance.c | 358 ++ + fs/bcachefs/rebalance.h | 28 + + fs/bcachefs/rebalance_types.h | 26 + + fs/bcachefs/recovery.c | 1584 ++++++++ + fs/bcachefs/recovery.h | 58 + + fs/bcachefs/reflink.c | 421 ++ + fs/bcachefs/reflink.h | 76 + + fs/bcachefs/replicas.c | 1073 +++++ + fs/bcachefs/replicas.h | 106 + + fs/bcachefs/replicas_types.h | 10 + + fs/bcachefs/siphash.c | 173 + + fs/bcachefs/siphash.h | 87 + + fs/bcachefs/str_hash.h | 351 ++ + fs/bcachefs/subvolume.c | 1095 ++++++ + fs/bcachefs/subvolume.h | 126 + + fs/bcachefs/subvolume_types.h | 9 + + fs/bcachefs/super-io.c | 1602 ++++++++ + fs/bcachefs/super-io.h | 126 + + fs/bcachefs/super.c | 1970 ++++++++++ + fs/bcachefs/super.h | 264 ++ + fs/bcachefs/super_types.h | 51 + + fs/bcachefs/sysfs.c | 943 +++++ + fs/bcachefs/sysfs.h | 48 + + fs/bcachefs/tests.c | 947 +++++ + fs/bcachefs/tests.h | 15 + + fs/bcachefs/trace.c | 12 + + fs/bcachefs/util.c | 958 +++++ + fs/bcachefs/util.h | 783 ++++ + fs/bcachefs/varint.c | 121 + + fs/bcachefs/varint.h | 11 + + fs/bcachefs/vstructs.h | 63 + + fs/bcachefs/xattr.c | 648 +++ + fs/bcachefs/xattr.h | 50 + + fs/d_path.c | 35 + + fs/dcache.c | 10 +- + fs/inode.c | 218 +- + include/linux/bio.h | 7 +- + include/linux/blkdev.h | 1 + + .../md/bcache => include/linux}/closure.h | 39 +- + include/linux/compiler_attributes.h | 5 + + include/linux/dcache.h | 2 + + include/linux/exportfs.h | 6 + + include/linux/fs.h | 9 +- + include/linux/generic-radix-tree.h | 68 +- + include/linux/kernel.h | 12 + + include/linux/list_bl.h | 22 + + include/linux/lockdep.h | 4 + + include/linux/pretty-printers.h | 10 + + include/linux/printbuf.h | 283 ++ + include/linux/sched.h | 1 + + include/linux/seq_buf.h | 162 - + include/linux/shrinker.h | 8 + + include/linux/six.h | 203 + + include/linux/string.h | 5 + + include/linux/string_helpers.h | 8 +- + include/linux/trace_events.h | 2 +- + include/linux/trace_seq.h | 17 +- + include/linux/vmalloc.h | 1 + + include/trace/events/bcachefs.h | 1020 +++++ + init/init_task.c | 1 + + kernel/Kconfig.locks | 3 + + kernel/locking/Makefile | 1 + + kernel/locking/lockdep.c | 20 + + kernel/locking/six.c | 759 ++++ + kernel/module.c | 4 +- + kernel/trace/trace.c | 45 +- + kernel/trace/trace_dynevent.c | 34 +- + kernel/trace/trace_events_filter.c | 2 +- + kernel/trace/trace_events_synth.c | 32 +- + kernel/trace/trace_functions_graph.c | 6 +- + kernel/trace/trace_kprobe.c | 2 +- + kernel/trace/trace_seq.c | 111 +- + lib/Kconfig | 3 + + lib/Kconfig.debug | 9 + + lib/Makefile | 8 +- + {drivers/md/bcache => lib}/closure.c | 35 +- + lib/generic-radix-tree.c | 76 +- + lib/hexdump.c | 246 +- + lib/pretty-printers.c | 60 + + lib/printbuf.c | 258 ++ + lib/seq_buf.c | 397 -- + lib/string_helpers.c | 224 +- + lib/test_hexdump.c | 30 +- + lib/test_printf.c | 33 +- + lib/vsprintf.c | 1740 ++++---- + mm/Makefile | 2 +- + mm/filemap.c | 1 + + mm/memcontrol.c | 68 +- + mm/nommu.c | 18 + + mm/oom_kill.c | 23 - + {lib => mm}/show_mem.c | 8 + + mm/slab.h | 6 +- + mm/slab_common.c | 53 +- + mm/vmalloc.c | 21 + + mm/vmscan.c | 88 + + tools/testing/nvdimm/test/ndtest.c | 22 +- + 237 files changed, 83816 insertions(+), 2162 deletions(-) + create mode 100644 fs/bcachefs/Kconfig + create mode 100644 fs/bcachefs/Makefile + create mode 100644 fs/bcachefs/acl.c + create mode 100644 fs/bcachefs/acl.h + create mode 100644 fs/bcachefs/alloc_background.c + create mode 100644 fs/bcachefs/alloc_background.h + create mode 100644 fs/bcachefs/alloc_foreground.c + create mode 100644 fs/bcachefs/alloc_foreground.h + create mode 100644 fs/bcachefs/alloc_types.h + create mode 100644 fs/bcachefs/backpointers.c + create mode 100644 fs/bcachefs/backpointers.h + create mode 100644 fs/bcachefs/bcachefs.h + create mode 100644 fs/bcachefs/bcachefs_format.h + create mode 100644 fs/bcachefs/bcachefs_ioctl.h + create mode 100644 fs/bcachefs/bkey.c + create mode 100644 fs/bcachefs/bkey.h + create mode 100644 fs/bcachefs/bkey_buf.h + create mode 100644 fs/bcachefs/bkey_methods.c + create mode 100644 fs/bcachefs/bkey_methods.h + create mode 100644 fs/bcachefs/bkey_sort.c + create mode 100644 fs/bcachefs/bkey_sort.h + create mode 100644 fs/bcachefs/bset.c + create mode 100644 fs/bcachefs/bset.h + create mode 100644 fs/bcachefs/btree_cache.c + create mode 100644 fs/bcachefs/btree_cache.h + create mode 100644 fs/bcachefs/btree_gc.c + create mode 100644 fs/bcachefs/btree_gc.h + create mode 100644 fs/bcachefs/btree_io.c + create mode 100644 fs/bcachefs/btree_io.h + create mode 100644 fs/bcachefs/btree_iter.c + create mode 100644 fs/bcachefs/btree_iter.h + create mode 100644 fs/bcachefs/btree_key_cache.c + create mode 100644 fs/bcachefs/btree_key_cache.h + create mode 100644 fs/bcachefs/btree_locking.h + create mode 100644 fs/bcachefs/btree_types.h + create mode 100644 fs/bcachefs/btree_update.h + create mode 100644 fs/bcachefs/btree_update_interior.c + create mode 100644 fs/bcachefs/btree_update_interior.h + create mode 100644 fs/bcachefs/btree_update_leaf.c + create mode 100644 fs/bcachefs/buckets.c + create mode 100644 fs/bcachefs/buckets.h + create mode 100644 fs/bcachefs/buckets_types.h + create mode 100644 fs/bcachefs/buckets_waiting_for_journal.c + create mode 100644 fs/bcachefs/buckets_waiting_for_journal.h + create mode 100644 fs/bcachefs/buckets_waiting_for_journal_types.h + create mode 100644 fs/bcachefs/chardev.c + create mode 100644 fs/bcachefs/chardev.h + create mode 100644 fs/bcachefs/checksum.c + create mode 100644 fs/bcachefs/checksum.h + create mode 100644 fs/bcachefs/clock.c + create mode 100644 fs/bcachefs/clock.h + create mode 100644 fs/bcachefs/clock_types.h + create mode 100644 fs/bcachefs/compress.c + create mode 100644 fs/bcachefs/compress.h + create mode 100644 fs/bcachefs/counters.c + create mode 100644 fs/bcachefs/counters.h + create mode 100644 fs/bcachefs/darray.h + create mode 100644 fs/bcachefs/data_update.c + create mode 100644 fs/bcachefs/data_update.h + create mode 100644 fs/bcachefs/debug.c + create mode 100644 fs/bcachefs/debug.h + create mode 100644 fs/bcachefs/dirent.c + create mode 100644 fs/bcachefs/dirent.h + create mode 100644 fs/bcachefs/disk_groups.c + create mode 100644 fs/bcachefs/disk_groups.h + create mode 100644 fs/bcachefs/ec.c + create mode 100644 fs/bcachefs/ec.h + create mode 100644 fs/bcachefs/ec_types.h + create mode 100644 fs/bcachefs/errcode.h + create mode 100644 fs/bcachefs/error.c + create mode 100644 fs/bcachefs/error.h + create mode 100644 fs/bcachefs/extent_update.c + create mode 100644 fs/bcachefs/extent_update.h + create mode 100644 fs/bcachefs/extents.c + create mode 100644 fs/bcachefs/extents.h + create mode 100644 fs/bcachefs/extents_types.h + create mode 100644 fs/bcachefs/eytzinger.h + create mode 100644 fs/bcachefs/fifo.h + create mode 100644 fs/bcachefs/fs-common.c + create mode 100644 fs/bcachefs/fs-common.h + create mode 100644 fs/bcachefs/fs-io.c + create mode 100644 fs/bcachefs/fs-io.h + create mode 100644 fs/bcachefs/fs-ioctl.c + create mode 100644 fs/bcachefs/fs-ioctl.h + create mode 100644 fs/bcachefs/fs.c + create mode 100644 fs/bcachefs/fs.h + create mode 100644 fs/bcachefs/fsck.c + create mode 100644 fs/bcachefs/fsck.h + create mode 100644 fs/bcachefs/inode.c + create mode 100644 fs/bcachefs/inode.h + create mode 100644 fs/bcachefs/io.c + create mode 100644 fs/bcachefs/io.h + create mode 100644 fs/bcachefs/io_types.h + create mode 100644 fs/bcachefs/journal.c + create mode 100644 fs/bcachefs/journal.h + create mode 100644 fs/bcachefs/journal_io.c + create mode 100644 fs/bcachefs/journal_io.h + create mode 100644 fs/bcachefs/journal_reclaim.c + create mode 100644 fs/bcachefs/journal_reclaim.h + create mode 100644 fs/bcachefs/journal_sb.c + create mode 100644 fs/bcachefs/journal_sb.h + create mode 100644 fs/bcachefs/journal_seq_blacklist.c + create mode 100644 fs/bcachefs/journal_seq_blacklist.h + create mode 100644 fs/bcachefs/journal_types.h + create mode 100644 fs/bcachefs/keylist.c + create mode 100644 fs/bcachefs/keylist.h + create mode 100644 fs/bcachefs/keylist_types.h + create mode 100644 fs/bcachefs/lru.c + create mode 100644 fs/bcachefs/lru.h + create mode 100644 fs/bcachefs/migrate.c + create mode 100644 fs/bcachefs/migrate.h + create mode 100644 fs/bcachefs/move.c + create mode 100644 fs/bcachefs/move.h + create mode 100644 fs/bcachefs/move_types.h + create mode 100644 fs/bcachefs/movinggc.c + create mode 100644 fs/bcachefs/movinggc.h + create mode 100644 fs/bcachefs/opts.c + create mode 100644 fs/bcachefs/opts.h + create mode 100644 fs/bcachefs/quota.c + create mode 100644 fs/bcachefs/quota.h + create mode 100644 fs/bcachefs/quota_types.h + create mode 100644 fs/bcachefs/rebalance.c + create mode 100644 fs/bcachefs/rebalance.h + create mode 100644 fs/bcachefs/rebalance_types.h + create mode 100644 fs/bcachefs/recovery.c + create mode 100644 fs/bcachefs/recovery.h + create mode 100644 fs/bcachefs/reflink.c + create mode 100644 fs/bcachefs/reflink.h + create mode 100644 fs/bcachefs/replicas.c + create mode 100644 fs/bcachefs/replicas.h + create mode 100644 fs/bcachefs/replicas_types.h + create mode 100644 fs/bcachefs/siphash.c + create mode 100644 fs/bcachefs/siphash.h + create mode 100644 fs/bcachefs/str_hash.h + create mode 100644 fs/bcachefs/subvolume.c + create mode 100644 fs/bcachefs/subvolume.h + create mode 100644 fs/bcachefs/subvolume_types.h + create mode 100644 fs/bcachefs/super-io.c + create mode 100644 fs/bcachefs/super-io.h + create mode 100644 fs/bcachefs/super.c + create mode 100644 fs/bcachefs/super.h + create mode 100644 fs/bcachefs/super_types.h + create mode 100644 fs/bcachefs/sysfs.c + create mode 100644 fs/bcachefs/sysfs.h + create mode 100644 fs/bcachefs/tests.c + create mode 100644 fs/bcachefs/tests.h + create mode 100644 fs/bcachefs/trace.c + create mode 100644 fs/bcachefs/util.c + create mode 100644 fs/bcachefs/util.h + create mode 100644 fs/bcachefs/varint.c + create mode 100644 fs/bcachefs/varint.h + create mode 100644 fs/bcachefs/vstructs.h + create mode 100644 fs/bcachefs/xattr.c + create mode 100644 fs/bcachefs/xattr.h + rename {drivers/md/bcache => include/linux}/closure.h (94%) + create mode 100644 include/linux/pretty-printers.h + create mode 100644 include/linux/printbuf.h + delete mode 100644 include/linux/seq_buf.h + create mode 100644 include/linux/six.h + create mode 100644 include/trace/events/bcachefs.h + create mode 100644 kernel/locking/six.c + rename {drivers/md/bcache => lib}/closure.c (88%) + create mode 100644 lib/pretty-printers.c + create mode 100644 lib/printbuf.c + delete mode 100644 lib/seq_buf.c + rename {lib => mm}/show_mem.c (83%) + diff --git a/Documentation/core-api/printk-formats.rst b/Documentation/core-api/printk-formats.rst -index 5e89497ba314..8fc0b62af158 100644 +index 5e89497ba..4f4a35b3a 100644 --- a/Documentation/core-api/printk-formats.rst +++ b/Documentation/core-api/printk-formats.rst @@ -625,6 +625,28 @@ Examples:: @@ -90,16 +435,16 @@ index 5e89497ba314..8fc0b62af158 100644 + pr_buf(out, "bar=%u baz=%u", foo->bar, foo->baz); + } + -+ printf("%pf(%p)", foo_to_text, foo); ++ printf("%pf(%p)", CALL_PP(foo_to_text, foo)); + -+Note that a pretty-printer may not sleep, if called from printk(). If called -+from pr_buf() or sprintf() there are no such restrictions. ++Note that a pretty-printer may not sleep if called from printk(). If called from ++pr_buf() or sprintf() there are no such restrictions. + Thanks ====== diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c -index 984813a4d5dc..fb8ba502239f 100644 +index 9be279469..4212864c8 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -39,7 +39,7 @@ @@ -151,7 +496,7 @@ index 984813a4d5dc..fb8ba502239f 100644 } diff --git a/arch/powerpc/kernel/security.c b/arch/powerpc/kernel/security.c -index d96fd14bd7c9..b34de62e65ce 100644 +index d96fd14bd..b34de62e6 100644 --- a/arch/powerpc/kernel/security.c +++ b/arch/powerpc/kernel/security.c @@ -10,7 +10,7 @@ @@ -300,7 +645,7 @@ index d96fd14bd7c9..b34de62e65ce 100644 #ifdef CONFIG_PPC_BOOK3S_64 diff --git a/arch/powerpc/platforms/pseries/papr_scm.c b/arch/powerpc/platforms/pseries/papr_scm.c -index 39962c905542..317d4513db7b 100644 +index 82cae0897..fe2b41858 100644 --- a/arch/powerpc/platforms/pseries/papr_scm.c +++ b/arch/powerpc/platforms/pseries/papr_scm.c @@ -12,7 +12,7 @@ @@ -312,7 +657,7 @@ index 39962c905542..317d4513db7b 100644 #include #include -@@ -1145,7 +1145,7 @@ static ssize_t perf_stats_show(struct device *dev, +@@ -1142,7 +1142,7 @@ static ssize_t perf_stats_show(struct device *dev, { int index; ssize_t rc; @@ -321,7 +666,7 @@ index 39962c905542..317d4513db7b 100644 struct papr_scm_perf_stat *stat; struct papr_scm_perf_stats *stats; struct nvdimm *dimm = to_nvdimm(dev); -@@ -1168,18 +1168,17 @@ static ssize_t perf_stats_show(struct device *dev, +@@ -1165,18 +1165,17 @@ static ssize_t perf_stats_show(struct device *dev, * values. Since stat_id is essentially a char string of * 8 bytes, simply use the string format specifier to print it. */ @@ -344,7 +689,7 @@ index 39962c905542..317d4513db7b 100644 } static DEVICE_ATTR_ADMIN_RO(perf_stats); -@@ -1188,7 +1187,7 @@ static ssize_t flags_show(struct device *dev, +@@ -1185,7 +1184,7 @@ static ssize_t flags_show(struct device *dev, { struct nvdimm *dimm = to_nvdimm(dev); struct papr_scm_priv *p = nvdimm_provider_data(dimm); @@ -353,7 +698,7 @@ index 39962c905542..317d4513db7b 100644 u64 health; int rc; -@@ -1199,29 +1198,28 @@ static ssize_t flags_show(struct device *dev, +@@ -1196,29 +1195,28 @@ static ssize_t flags_show(struct device *dev, /* Copy health_bitmap locally, check masks & update out buffer */ health = READ_ONCE(p->health_bitmap); @@ -393,7 +738,7 @@ index 39962c905542..317d4513db7b 100644 DEVICE_ATTR_RO(flags); diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c -index 83f901e2c2df..5b6720b6a417 100644 +index 83f901e2c..5b6720b6a 100644 --- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c +++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c @@ -19,7 +19,7 @@ @@ -460,7 +805,7 @@ index 83f901e2c2df..5b6720b6a417 100644 ret = rdtgroup_setup_root(); if (ret) diff --git a/block/bio.c b/block/bio.c -index 4259125e16ab..b2425b8d8851 100644 +index d3ca79c3e..8779a80f8 100644 --- a/block/bio.c +++ b/block/bio.c @@ -553,15 +553,15 @@ struct bio *bio_kmalloc(gfp_t gfp_mask, unsigned short nr_iovecs) @@ -482,7 +827,7 @@ index 4259125e16ab..b2425b8d8851 100644 /** * bio_truncate - truncate the bio to small size of @new_size -@@ -1332,17 +1332,28 @@ EXPORT_SYMBOL(__bio_advance); +@@ -1333,17 +1333,27 @@ EXPORT_SYMBOL(__bio_advance); void bio_copy_data_iter(struct bio *dst, struct bvec_iter *dst_iter, struct bio *src, struct bvec_iter *src_iter) { @@ -507,19 +852,18 @@ index 4259125e16ab..b2425b8d8851 100644 + memcpy(dst_p + dst_bv.bv_offset, + src_p + src_bv.bv_offset, + bytes); -+ + +- memcpy(dst_buf, src_buf, bytes); + kunmap_atomic(dst_p); + kunmap_atomic(src_p); -- memcpy(dst_buf, src_buf, bytes); - - kunmap_local(dst_buf); - kunmap_local(src_buf); + flush_dcache_page(dst_bv.bv_page); bio_advance_iter_single(src, src_iter, bytes); bio_advance_iter_single(dst, dst_iter, bytes); -@@ -1414,6 +1426,7 @@ void bio_set_pages_dirty(struct bio *bio) +@@ -1417,6 +1427,7 @@ void bio_set_pages_dirty(struct bio *bio) set_page_dirty_lock(bvec->bv_page); } } @@ -527,7 +871,7 @@ index 4259125e16ab..b2425b8d8851 100644 /* * bio_check_pages_dirty() will check that all the BIO's pages are still dirty. -@@ -1473,6 +1486,7 @@ void bio_check_pages_dirty(struct bio *bio) +@@ -1476,6 +1487,7 @@ void bio_check_pages_dirty(struct bio *bio) spin_unlock_irqrestore(&bio_dirty_lock, flags); schedule_work(&bio_dirty_work); } @@ -536,7 +880,7 @@ index 4259125e16ab..b2425b8d8851 100644 static inline bool bio_remaining_done(struct bio *bio) { diff --git a/block/blk-core.c b/block/blk-core.c -index bc0506772152..b805a7aa7259 100644 +index a7329475a..a0929889c 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -207,6 +207,7 @@ const char *blk_status_to_str(blk_status_t status) @@ -548,7 +892,7 @@ index bc0506772152..b805a7aa7259 100644 /** * blk_sync_queue - cancel any pending callbacks on a queue diff --git a/block/blk.h b/block/blk.h -index 8ccbc6e07636..16067c4ac775 100644 +index 8ccbc6e07..16067c4ac 100644 --- a/block/blk.h +++ b/block/blk.h @@ -240,7 +240,6 @@ static inline void blk_integrity_del(struct gendisk *disk) @@ -560,7 +904,7 @@ index 8ccbc6e07636..16067c4ac775 100644 bool blk_attempt_plug_merge(struct request_queue *q, struct bio *bio, unsigned int nr_segs); diff --git a/drivers/acpi/apei/erst-dbg.c b/drivers/acpi/apei/erst-dbg.c -index c740f0faad39..90aa034dceb0 100644 +index c740f0faa..90aa034dc 100644 --- a/drivers/acpi/apei/erst-dbg.c +++ b/drivers/acpi/apei/erst-dbg.c @@ -11,6 +11,7 @@ @@ -572,7 +916,7 @@ index c740f0faad39..90aa034dceb0 100644 #include #include diff --git a/drivers/block/loop.c b/drivers/block/loop.c -index a58595f5ee2c..a8a4582621c2 100644 +index 4e1dce3be..0e822f3ef 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -1153,8 +1153,6 @@ static void __loop_clr_fd(struct loop_device *lo, bool release) @@ -585,7 +929,7 @@ index a58595f5ee2c..a8a4582621c2 100644 int err; diff --git a/drivers/clk/tegra/clk-bpmp.c b/drivers/clk/tegra/clk-bpmp.c -index 6ecf18f71c32..301551174c13 100644 +index 6ecf18f71..301551174 100644 --- a/drivers/clk/tegra/clk-bpmp.c +++ b/drivers/clk/tegra/clk-bpmp.c @@ -5,7 +5,7 @@ @@ -647,7 +991,7 @@ index 6ecf18f71c32..301551174c13 100644 static int tegra_bpmp_probe_clocks(struct tegra_bpmp *bpmp, diff --git a/drivers/input/joystick/analog.c b/drivers/input/joystick/analog.c -index 3088c5b829f0..a8c5f90e8208 100644 +index 3088c5b82..a8c5f90e8 100644 --- a/drivers/input/joystick/analog.c +++ b/drivers/input/joystick/analog.c @@ -19,7 +19,7 @@ @@ -694,7 +1038,7 @@ index 3088c5b829f0..a8c5f90e8208 100644 /* diff --git a/drivers/md/bcache/Kconfig b/drivers/md/bcache/Kconfig -index cf3e8096942a..f1a1f0c4a0ea 100644 +index cf3e80969..f1a1f0c4a 100644 --- a/drivers/md/bcache/Kconfig +++ b/drivers/md/bcache/Kconfig @@ -4,6 +4,7 @@ config BCACHE @@ -722,7 +1066,7 @@ index cf3e8096942a..f1a1f0c4a0ea 100644 bool "Asynchronous device registration (EXPERIMENTAL)" depends on BCACHE diff --git a/drivers/md/bcache/Makefile b/drivers/md/bcache/Makefile -index 5b87e59676b8..054e8a33a7ab 100644 +index 5b87e5967..054e8a33a 100644 --- a/drivers/md/bcache/Makefile +++ b/drivers/md/bcache/Makefile @@ -2,6 +2,6 @@ @@ -735,7 +1079,7 @@ index 5b87e59676b8..054e8a33a7ab 100644 + journal.o movinggc.o request.o stats.o super.o sysfs.o trace.o\ util.o writeback.o features.o diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h -index 9ed9c955add7..dbb72beb036c 100644 +index 9ed9c955a..dbb72beb0 100644 --- a/drivers/md/bcache/bcache.h +++ b/drivers/md/bcache/bcache.h @@ -179,6 +179,7 @@ @@ -754,608 +1098,11 @@ index 9ed9c955add7..dbb72beb036c 100644 struct bucket { atomic_t pin; -diff --git a/drivers/md/bcache/closure.c b/drivers/md/bcache/closure.c -deleted file mode 100644 -index d8d9394a6beb..000000000000 ---- a/drivers/md/bcache/closure.c -+++ /dev/null -@@ -1,207 +0,0 @@ --// SPDX-License-Identifier: GPL-2.0 --/* -- * Asynchronous refcounty things -- * -- * Copyright 2010, 2011 Kent Overstreet -- * Copyright 2012 Google, Inc. -- */ -- --#include --#include --#include --#include -- --#include "closure.h" -- --static inline void closure_put_after_sub(struct closure *cl, int flags) --{ -- int r = flags & CLOSURE_REMAINING_MASK; -- -- BUG_ON(flags & CLOSURE_GUARD_MASK); -- BUG_ON(!r && (flags & ~CLOSURE_DESTRUCTOR)); -- -- if (!r) { -- if (cl->fn && !(flags & CLOSURE_DESTRUCTOR)) { -- atomic_set(&cl->remaining, -- CLOSURE_REMAINING_INITIALIZER); -- closure_queue(cl); -- } else { -- struct closure *parent = cl->parent; -- closure_fn *destructor = cl->fn; -- -- closure_debug_destroy(cl); -- -- if (destructor) -- destructor(cl); -- -- if (parent) -- closure_put(parent); -- } -- } --} -- --/* For clearing flags with the same atomic op as a put */ --void closure_sub(struct closure *cl, int v) --{ -- closure_put_after_sub(cl, atomic_sub_return(v, &cl->remaining)); --} -- --/* -- * closure_put - decrement a closure's refcount -- */ --void closure_put(struct closure *cl) --{ -- closure_put_after_sub(cl, atomic_dec_return(&cl->remaining)); --} -- --/* -- * closure_wake_up - wake up all closures on a wait list, without memory barrier -- */ --void __closure_wake_up(struct closure_waitlist *wait_list) --{ -- struct llist_node *list; -- struct closure *cl, *t; -- struct llist_node *reverse = NULL; -- -- list = llist_del_all(&wait_list->list); -- -- /* We first reverse the list to preserve FIFO ordering and fairness */ -- reverse = llist_reverse_order(list); -- -- /* Then do the wakeups */ -- llist_for_each_entry_safe(cl, t, reverse, list) { -- closure_set_waiting(cl, 0); -- closure_sub(cl, CLOSURE_WAITING + 1); -- } --} -- --/** -- * closure_wait - add a closure to a waitlist -- * @waitlist: will own a ref on @cl, which will be released when -- * closure_wake_up() is called on @waitlist. -- * @cl: closure pointer. -- * -- */ --bool closure_wait(struct closure_waitlist *waitlist, struct closure *cl) --{ -- if (atomic_read(&cl->remaining) & CLOSURE_WAITING) -- return false; -- -- closure_set_waiting(cl, _RET_IP_); -- atomic_add(CLOSURE_WAITING + 1, &cl->remaining); -- llist_add(&cl->list, &waitlist->list); -- -- return true; --} -- --struct closure_syncer { -- struct task_struct *task; -- int done; --}; -- --static void closure_sync_fn(struct closure *cl) --{ -- struct closure_syncer *s = cl->s; -- struct task_struct *p; -- -- rcu_read_lock(); -- p = READ_ONCE(s->task); -- s->done = 1; -- wake_up_process(p); -- rcu_read_unlock(); --} -- --void __sched __closure_sync(struct closure *cl) --{ -- struct closure_syncer s = { .task = current }; -- -- cl->s = &s; -- continue_at(cl, closure_sync_fn, NULL); -- -- while (1) { -- set_current_state(TASK_UNINTERRUPTIBLE); -- if (s.done) -- break; -- schedule(); -- } -- -- __set_current_state(TASK_RUNNING); --} -- --#ifdef CONFIG_BCACHE_CLOSURES_DEBUG -- --static LIST_HEAD(closure_list); --static DEFINE_SPINLOCK(closure_list_lock); -- --void closure_debug_create(struct closure *cl) --{ -- unsigned long flags; -- -- BUG_ON(cl->magic == CLOSURE_MAGIC_ALIVE); -- cl->magic = CLOSURE_MAGIC_ALIVE; -- -- spin_lock_irqsave(&closure_list_lock, flags); -- list_add(&cl->all, &closure_list); -- spin_unlock_irqrestore(&closure_list_lock, flags); --} -- --void closure_debug_destroy(struct closure *cl) --{ -- unsigned long flags; -- -- BUG_ON(cl->magic != CLOSURE_MAGIC_ALIVE); -- cl->magic = CLOSURE_MAGIC_DEAD; -- -- spin_lock_irqsave(&closure_list_lock, flags); -- list_del(&cl->all); -- spin_unlock_irqrestore(&closure_list_lock, flags); --} -- --static struct dentry *closure_debug; -- --static int debug_show(struct seq_file *f, void *data) --{ -- struct closure *cl; -- -- spin_lock_irq(&closure_list_lock); -- -- list_for_each_entry(cl, &closure_list, all) { -- int r = atomic_read(&cl->remaining); -- -- seq_printf(f, "%p: %pS -> %pS p %p r %i ", -- cl, (void *) cl->ip, cl->fn, cl->parent, -- r & CLOSURE_REMAINING_MASK); -- -- seq_printf(f, "%s%s\n", -- test_bit(WORK_STRUCT_PENDING_BIT, -- work_data_bits(&cl->work)) ? "Q" : "", -- r & CLOSURE_RUNNING ? "R" : ""); -- -- if (r & CLOSURE_WAITING) -- seq_printf(f, " W %pS\n", -- (void *) cl->waiting_on); -- -- seq_printf(f, "\n"); -- } -- -- spin_unlock_irq(&closure_list_lock); -- return 0; --} -- --DEFINE_SHOW_ATTRIBUTE(debug); -- --void __init closure_debug_init(void) --{ -- if (!IS_ERR_OR_NULL(bcache_debug)) -- /* -- * it is unnecessary to check return value of -- * debugfs_create_file(), we should not care -- * about this. -- */ -- closure_debug = debugfs_create_file( -- "closures", 0400, bcache_debug, NULL, &debug_fops); --} --#endif -- --MODULE_AUTHOR("Kent Overstreet "); --MODULE_LICENSE("GPL"); -diff --git a/drivers/md/bcache/closure.h b/drivers/md/bcache/closure.h -deleted file mode 100644 -index c88cdc4ae4ec..000000000000 ---- a/drivers/md/bcache/closure.h -+++ /dev/null -@@ -1,378 +0,0 @@ --/* SPDX-License-Identifier: GPL-2.0 */ --#ifndef _LINUX_CLOSURE_H --#define _LINUX_CLOSURE_H -- --#include --#include --#include --#include -- --/* -- * Closure is perhaps the most overused and abused term in computer science, but -- * since I've been unable to come up with anything better you're stuck with it -- * again. -- * -- * What are closures? -- * -- * They embed a refcount. The basic idea is they count "things that are in -- * progress" - in flight bios, some other thread that's doing something else - -- * anything you might want to wait on. -- * -- * The refcount may be manipulated with closure_get() and closure_put(). -- * closure_put() is where many of the interesting things happen, when it causes -- * the refcount to go to 0. -- * -- * Closures can be used to wait on things both synchronously and asynchronously, -- * and synchronous and asynchronous use can be mixed without restriction. To -- * wait synchronously, use closure_sync() - you will sleep until your closure's -- * refcount hits 1. -- * -- * To wait asynchronously, use -- * continue_at(cl, next_function, workqueue); -- * -- * passing it, as you might expect, the function to run when nothing is pending -- * and the workqueue to run that function out of. -- * -- * continue_at() also, critically, requires a 'return' immediately following the -- * location where this macro is referenced, to return to the calling function. -- * There's good reason for this. -- * -- * To use safely closures asynchronously, they must always have a refcount while -- * they are running owned by the thread that is running them. Otherwise, suppose -- * you submit some bios and wish to have a function run when they all complete: -- * -- * foo_endio(struct bio *bio) -- * { -- * closure_put(cl); -- * } -- * -- * closure_init(cl); -- * -- * do_stuff(); -- * closure_get(cl); -- * bio1->bi_endio = foo_endio; -- * bio_submit(bio1); -- * -- * do_more_stuff(); -- * closure_get(cl); -- * bio2->bi_endio = foo_endio; -- * bio_submit(bio2); -- * -- * continue_at(cl, complete_some_read, system_wq); -- * -- * If closure's refcount started at 0, complete_some_read() could run before the -- * second bio was submitted - which is almost always not what you want! More -- * importantly, it wouldn't be possible to say whether the original thread or -- * complete_some_read()'s thread owned the closure - and whatever state it was -- * associated with! -- * -- * So, closure_init() initializes a closure's refcount to 1 - and when a -- * closure_fn is run, the refcount will be reset to 1 first. -- * -- * Then, the rule is - if you got the refcount with closure_get(), release it -- * with closure_put() (i.e, in a bio->bi_endio function). If you have a refcount -- * on a closure because you called closure_init() or you were run out of a -- * closure - _always_ use continue_at(). Doing so consistently will help -- * eliminate an entire class of particularly pernicious races. -- * -- * Lastly, you might have a wait list dedicated to a specific event, and have no -- * need for specifying the condition - you just want to wait until someone runs -- * closure_wake_up() on the appropriate wait list. In that case, just use -- * closure_wait(). It will return either true or false, depending on whether the -- * closure was already on a wait list or not - a closure can only be on one wait -- * list at a time. -- * -- * Parents: -- * -- * closure_init() takes two arguments - it takes the closure to initialize, and -- * a (possibly null) parent. -- * -- * If parent is non null, the new closure will have a refcount for its lifetime; -- * a closure is considered to be "finished" when its refcount hits 0 and the -- * function to run is null. Hence -- * -- * continue_at(cl, NULL, NULL); -- * -- * returns up the (spaghetti) stack of closures, precisely like normal return -- * returns up the C stack. continue_at() with non null fn is better thought of -- * as doing a tail call. -- * -- * All this implies that a closure should typically be embedded in a particular -- * struct (which its refcount will normally control the lifetime of), and that -- * struct can very much be thought of as a stack frame. -- */ -- --struct closure; --struct closure_syncer; --typedef void (closure_fn) (struct closure *); --extern struct dentry *bcache_debug; -- --struct closure_waitlist { -- struct llist_head list; --}; -- --enum closure_state { -- /* -- * CLOSURE_WAITING: Set iff the closure is on a waitlist. Must be set by -- * the thread that owns the closure, and cleared by the thread that's -- * waking up the closure. -- * -- * The rest are for debugging and don't affect behaviour: -- * -- * CLOSURE_RUNNING: Set when a closure is running (i.e. by -- * closure_init() and when closure_put() runs then next function), and -- * must be cleared before remaining hits 0. Primarily to help guard -- * against incorrect usage and accidentally transferring references. -- * continue_at() and closure_return() clear it for you, if you're doing -- * something unusual you can use closure_set_dead() which also helps -- * annotate where references are being transferred. -- */ -- -- CLOSURE_BITS_START = (1U << 26), -- CLOSURE_DESTRUCTOR = (1U << 26), -- CLOSURE_WAITING = (1U << 28), -- CLOSURE_RUNNING = (1U << 30), --}; -- --#define CLOSURE_GUARD_MASK \ -- ((CLOSURE_DESTRUCTOR|CLOSURE_WAITING|CLOSURE_RUNNING) << 1) -- --#define CLOSURE_REMAINING_MASK (CLOSURE_BITS_START - 1) --#define CLOSURE_REMAINING_INITIALIZER (1|CLOSURE_RUNNING) -- --struct closure { -- union { -- struct { -- struct workqueue_struct *wq; -- struct closure_syncer *s; -- struct llist_node list; -- closure_fn *fn; -- }; -- struct work_struct work; -- }; -- -- struct closure *parent; -- -- atomic_t remaining; -- --#ifdef CONFIG_BCACHE_CLOSURES_DEBUG --#define CLOSURE_MAGIC_DEAD 0xc054dead --#define CLOSURE_MAGIC_ALIVE 0xc054a11e -- -- unsigned int magic; -- struct list_head all; -- unsigned long ip; -- unsigned long waiting_on; --#endif --}; -- --void closure_sub(struct closure *cl, int v); --void closure_put(struct closure *cl); --void __closure_wake_up(struct closure_waitlist *list); --bool closure_wait(struct closure_waitlist *list, struct closure *cl); --void __closure_sync(struct closure *cl); -- --/** -- * closure_sync - sleep until a closure a closure has nothing left to wait on -- * -- * Sleeps until the refcount hits 1 - the thread that's running the closure owns -- * the last refcount. -- */ --static inline void closure_sync(struct closure *cl) --{ -- if ((atomic_read(&cl->remaining) & CLOSURE_REMAINING_MASK) != 1) -- __closure_sync(cl); --} -- --#ifdef CONFIG_BCACHE_CLOSURES_DEBUG -- --void closure_debug_init(void); --void closure_debug_create(struct closure *cl); --void closure_debug_destroy(struct closure *cl); -- --#else -- --static inline void closure_debug_init(void) {} --static inline void closure_debug_create(struct closure *cl) {} --static inline void closure_debug_destroy(struct closure *cl) {} -- --#endif -- --static inline void closure_set_ip(struct closure *cl) --{ --#ifdef CONFIG_BCACHE_CLOSURES_DEBUG -- cl->ip = _THIS_IP_; --#endif --} -- --static inline void closure_set_ret_ip(struct closure *cl) --{ --#ifdef CONFIG_BCACHE_CLOSURES_DEBUG -- cl->ip = _RET_IP_; --#endif --} -- --static inline void closure_set_waiting(struct closure *cl, unsigned long f) --{ --#ifdef CONFIG_BCACHE_CLOSURES_DEBUG -- cl->waiting_on = f; --#endif --} -- --static inline void closure_set_stopped(struct closure *cl) --{ -- atomic_sub(CLOSURE_RUNNING, &cl->remaining); --} -- --static inline void set_closure_fn(struct closure *cl, closure_fn *fn, -- struct workqueue_struct *wq) --{ -- closure_set_ip(cl); -- cl->fn = fn; -- cl->wq = wq; -- /* between atomic_dec() in closure_put() */ -- smp_mb__before_atomic(); --} -- --static inline void closure_queue(struct closure *cl) --{ -- struct workqueue_struct *wq = cl->wq; -- /** -- * Changes made to closure, work_struct, or a couple of other structs -- * may cause work.func not pointing to the right location. -- */ -- BUILD_BUG_ON(offsetof(struct closure, fn) -- != offsetof(struct work_struct, func)); -- if (wq) { -- INIT_WORK(&cl->work, cl->work.func); -- BUG_ON(!queue_work(wq, &cl->work)); -- } else -- cl->fn(cl); --} -- --/** -- * closure_get - increment a closure's refcount -- */ --static inline void closure_get(struct closure *cl) --{ --#ifdef CONFIG_BCACHE_CLOSURES_DEBUG -- BUG_ON((atomic_inc_return(&cl->remaining) & -- CLOSURE_REMAINING_MASK) <= 1); --#else -- atomic_inc(&cl->remaining); --#endif --} -- --/** -- * closure_init - Initialize a closure, setting the refcount to 1 -- * @cl: closure to initialize -- * @parent: parent of the new closure. cl will take a refcount on it for its -- * lifetime; may be NULL. -- */ --static inline void closure_init(struct closure *cl, struct closure *parent) --{ -- memset(cl, 0, sizeof(struct closure)); -- cl->parent = parent; -- if (parent) -- closure_get(parent); -- -- atomic_set(&cl->remaining, CLOSURE_REMAINING_INITIALIZER); -- -- closure_debug_create(cl); -- closure_set_ip(cl); --} -- --static inline void closure_init_stack(struct closure *cl) --{ -- memset(cl, 0, sizeof(struct closure)); -- atomic_set(&cl->remaining, CLOSURE_REMAINING_INITIALIZER); --} -- --/** -- * closure_wake_up - wake up all closures on a wait list, -- * with memory barrier -- */ --static inline void closure_wake_up(struct closure_waitlist *list) --{ -- /* Memory barrier for the wait list */ -- smp_mb(); -- __closure_wake_up(list); --} -- --/** -- * continue_at - jump to another function with barrier -- * -- * After @cl is no longer waiting on anything (i.e. all outstanding refs have -- * been dropped with closure_put()), it will resume execution at @fn running out -- * of @wq (or, if @wq is NULL, @fn will be called by closure_put() directly). -- * -- * This is because after calling continue_at() you no longer have a ref on @cl, -- * and whatever @cl owns may be freed out from under you - a running closure fn -- * has a ref on its own closure which continue_at() drops. -- * -- * Note you are expected to immediately return after using this macro. -- */ --#define continue_at(_cl, _fn, _wq) \ --do { \ -- set_closure_fn(_cl, _fn, _wq); \ -- closure_sub(_cl, CLOSURE_RUNNING + 1); \ --} while (0) -- --/** -- * closure_return - finish execution of a closure -- * -- * This is used to indicate that @cl is finished: when all outstanding refs on -- * @cl have been dropped @cl's ref on its parent closure (as passed to -- * closure_init()) will be dropped, if one was specified - thus this can be -- * thought of as returning to the parent closure. -- */ --#define closure_return(_cl) continue_at((_cl), NULL, NULL) -- --/** -- * continue_at_nobarrier - jump to another function without barrier -- * -- * Causes @fn to be executed out of @cl, in @wq context (or called directly if -- * @wq is NULL). -- * -- * The ref the caller of continue_at_nobarrier() had on @cl is now owned by @fn, -- * thus it's not safe to touch anything protected by @cl after a -- * continue_at_nobarrier(). -- */ --#define continue_at_nobarrier(_cl, _fn, _wq) \ --do { \ -- set_closure_fn(_cl, _fn, _wq); \ -- closure_queue(_cl); \ --} while (0) -- --/** -- * closure_return_with_destructor - finish execution of a closure, -- * with destructor -- * -- * Works like closure_return(), except @destructor will be called when all -- * outstanding refs on @cl have been dropped; @destructor may be used to safely -- * free the memory occupied by @cl, and it is called with the ref on the parent -- * closure still held - so @destructor could safely return an item to a -- * freelist protected by @cl's parent. -- */ --#define closure_return_with_destructor(_cl, _destructor) \ --do { \ -- set_closure_fn(_cl, _destructor, NULL); \ -- closure_sub(_cl, CLOSURE_RUNNING - CLOSURE_DESTRUCTOR + 1); \ --} while (0) -- --/** -- * closure_call - execute @fn out of a new, uninitialized closure -- * -- * Typically used when running out of one closure, and we want to run @fn -- * asynchronously out of a new closure - @parent will then wait for @cl to -- * finish. -- */ --static inline void closure_call(struct closure *cl, closure_fn fn, -- struct workqueue_struct *wq, -- struct closure *parent) --{ -- closure_init(cl, parent); -- continue_at_nobarrier(cl, fn, wq); --} -- --#endif /* _LINUX_CLOSURE_H */ diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c -index bf3de149d3c9..c0af1f4690d4 100644 +index 2bb55278d..4a517301d 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c -@@ -2913,7 +2913,6 @@ static int __init bcache_init(void) +@@ -2914,7 +2914,6 @@ static int __init bcache_init(void) goto err; bch_debug_init(); @@ -1364,7 +1111,7 @@ index bf3de149d3c9..c0af1f4690d4 100644 bcache_is_reboot = false; diff --git a/drivers/md/bcache/util.h b/drivers/md/bcache/util.h -index 6f3cb7c92130..f61ab1bada6c 100644 +index 6f3cb7c92..f61ab1bad 100644 --- a/drivers/md/bcache/util.h +++ b/drivers/md/bcache/util.h @@ -4,6 +4,7 @@ @@ -1385,7 +1132,7 @@ index 6f3cb7c92130..f61ab1bada6c 100644 #ifdef CONFIG_BCACHE_DEBUG diff --git a/drivers/pci/p2pdma.c b/drivers/pci/p2pdma.c -index 30b1df3c9d2f..3b7a6ca44668 100644 +index 30b1df3c9..3b7a6ca44 100644 --- a/drivers/pci/p2pdma.c +++ b/drivers/pci/p2pdma.c @@ -17,7 +17,7 @@ @@ -1460,7 +1207,7 @@ index 30b1df3c9d2f..3b7a6ca44668 100644 acs_redirects = true; diff --git a/fs/Kconfig b/fs/Kconfig -index 30b751c7f11a..1160311af303 100644 +index 30b751c7f..1160311af 100644 --- a/fs/Kconfig +++ b/fs/Kconfig @@ -40,6 +40,7 @@ source "fs/ocfs2/Kconfig" @@ -1472,7 +1219,7 @@ index 30b751c7f11a..1160311af303 100644 endif # BLOCK diff --git a/fs/Makefile b/fs/Makefile -index 208a74e0b00e..5d5c8c792058 100644 +index 208a74e0b..5d5c8c792 100644 --- a/fs/Makefile +++ b/fs/Makefile @@ -134,6 +134,7 @@ obj-$(CONFIG_OCFS2_FS) += ocfs2/ @@ -1485,7 +1232,7 @@ index 208a74e0b00e..5d5c8c792058 100644 obj-$(CONFIG_EFIVAR_FS) += efivarfs/ diff --git a/fs/bcachefs/Kconfig b/fs/bcachefs/Kconfig new file mode 100644 -index 000000000000..27742ce276cd +index 000000000..27742ce27 --- /dev/null +++ b/fs/bcachefs/Kconfig @@ -0,0 +1,52 @@ @@ -1543,7 +1290,7 @@ index 000000000000..27742ce276cd + Include some unit and performance tests for the core btree code diff --git a/fs/bcachefs/Makefile b/fs/bcachefs/Makefile new file mode 100644 -index 000000000000..d68aaf1a284d +index 000000000..d68aaf1a2 --- /dev/null +++ b/fs/bcachefs/Makefile @@ -0,0 +1,68 @@ @@ -1617,7 +1364,7 @@ index 000000000000..d68aaf1a284d +bcachefs-$(CONFIG_BCACHEFS_POSIX_ACL) += acl.o diff --git a/fs/bcachefs/acl.c b/fs/bcachefs/acl.c new file mode 100644 -index 000000000000..5070caf8f349 +index 000000000..5070caf8f --- /dev/null +++ b/fs/bcachefs/acl.c @@ -0,0 +1,406 @@ @@ -2029,7 +1776,7 @@ index 000000000000..5070caf8f349 +#endif /* CONFIG_BCACHEFS_POSIX_ACL */ diff --git a/fs/bcachefs/acl.h b/fs/bcachefs/acl.h new file mode 100644 -index 000000000000..2d76a4897ba8 +index 000000000..2d76a4897 --- /dev/null +++ b/fs/bcachefs/acl.h @@ -0,0 +1,58 @@ @@ -2093,10 +1840,10 @@ index 000000000000..2d76a4897ba8 +#endif /* _BCACHEFS_ACL_H */ diff --git a/fs/bcachefs/alloc_background.c b/fs/bcachefs/alloc_background.c new file mode 100644 -index 000000000000..359cb23f037b +index 000000000..738567173 --- /dev/null +++ b/fs/bcachefs/alloc_background.c -@@ -0,0 +1,1576 @@ +@@ -0,0 +1,1600 @@ +// SPDX-License-Identifier: GPL-2.0 +#include "bcachefs.h" +#include "alloc_background.h" @@ -2784,21 +2531,23 @@ index 000000000000..359cb23f037b +} + +static int bch2_check_alloc_key(struct btree_trans *trans, -+ struct btree_iter *alloc_iter) ++ struct btree_iter *alloc_iter, ++ struct btree_iter *discard_iter, ++ struct btree_iter *freespace_iter) +{ + struct bch_fs *c = trans->c; + struct bch_dev *ca; -+ struct btree_iter discard_iter, freespace_iter; + struct bch_alloc_v4 a; + unsigned discard_key_type, freespace_key_type; + struct bkey_s_c alloc_k, k; + struct printbuf buf = PRINTBUF; -+ struct printbuf buf2 = PRINTBUF; + int ret; + -+ alloc_k = bch2_btree_iter_peek(alloc_iter); ++ alloc_k = bch2_dev_bucket_exists(c, alloc_iter->pos) ++ ? bch2_btree_iter_peek_slot(alloc_iter) ++ : bch2_btree_iter_peek(alloc_iter); + if (!alloc_k.k) -+ return 0; ++ return 1; + + ret = bkey_err(alloc_k); + if (ret) @@ -2820,12 +2569,10 @@ index 000000000000..359cb23f037b + freespace_key_type = a.data_type == BCH_DATA_free + ? KEY_TYPE_set : 0; + -+ bch2_trans_iter_init(trans, &discard_iter, BTREE_ID_need_discard, -+ alloc_k.k->p, 0); -+ bch2_trans_iter_init(trans, &freespace_iter, BTREE_ID_freespace, -+ alloc_freespace_pos(alloc_k.k->p, a), 0); ++ bch2_btree_iter_set_pos(discard_iter, alloc_k.k->p); ++ bch2_btree_iter_set_pos(freespace_iter, alloc_freespace_pos(alloc_k.k->p, a)); + -+ k = bch2_btree_iter_peek_slot(&discard_iter); ++ k = bch2_btree_iter_peek_slot(discard_iter); + ret = bkey_err(k); + if (ret) + goto err; @@ -2845,14 +2592,14 @@ index 000000000000..359cb23f037b + + bkey_init(&update->k); + update->k.type = discard_key_type; -+ update->k.p = discard_iter.pos; ++ update->k.p = discard_iter->pos; + -+ ret = bch2_trans_update(trans, &discard_iter, update, 0); ++ ret = bch2_trans_update(trans, discard_iter, update, 0); + if (ret) + goto err; + } + -+ k = bch2_btree_iter_peek_slot(&freespace_iter); ++ k = bch2_btree_iter_peek_slot(freespace_iter); + ret = bkey_err(k); + if (ret) + goto err; @@ -2873,18 +2620,15 @@ index 000000000000..359cb23f037b + + bkey_init(&update->k); + update->k.type = freespace_key_type; -+ update->k.p = freespace_iter.pos; ++ update->k.p = freespace_iter->pos; + bch2_key_resize(&update->k, 1); + -+ ret = bch2_trans_update(trans, &freespace_iter, update, 0); ++ ret = bch2_trans_update(trans, freespace_iter, update, 0); + if (ret) + goto err; + } +err: +fsck_err: -+ bch2_trans_iter_exit(trans, &freespace_iter); -+ bch2_trans_iter_exit(trans, &discard_iter); -+ printbuf_exit(&buf2); + printbuf_exit(&buf); + return ret; +} @@ -2954,48 +2698,64 @@ index 000000000000..359cb23f037b +int bch2_check_alloc_info(struct bch_fs *c) +{ + struct btree_trans trans; -+ struct btree_iter iter; -+ struct bkey_s_c k; ++ struct btree_iter iter, discard_iter, freespace_iter; + int ret = 0; + + bch2_trans_init(&trans, c, 0, 0); + -+ for_each_btree_key(&trans, iter, BTREE_ID_alloc, POS_MIN, -+ BTREE_ITER_PREFETCH, k, ret) { -+ ret = __bch2_trans_do(&trans, NULL, NULL, 0, -+ bch2_check_alloc_key(&trans, &iter)); ++ bch2_trans_iter_init(&trans, &iter, BTREE_ID_alloc, POS_MIN, ++ BTREE_ITER_PREFETCH); ++ bch2_trans_iter_init(&trans, &discard_iter, BTREE_ID_need_discard, POS_MIN, ++ BTREE_ITER_PREFETCH); ++ bch2_trans_iter_init(&trans, &freespace_iter, BTREE_ID_freespace, POS_MIN, ++ BTREE_ITER_PREFETCH); ++ while (1) { ++ ret = __bch2_trans_do(&trans, NULL, NULL, ++ BTREE_INSERT_NOFAIL| ++ BTREE_INSERT_LAZY_RW, ++ bch2_check_alloc_key(&trans, &iter, ++ &discard_iter, ++ &freespace_iter)); + if (ret) + break; ++ ++ bch2_btree_iter_advance(&iter); + } ++ bch2_trans_iter_exit(&trans, &freespace_iter); ++ bch2_trans_iter_exit(&trans, &discard_iter); + bch2_trans_iter_exit(&trans, &iter); + -+ if (ret) ++ if (ret < 0) + goto err; + + bch2_trans_iter_init(&trans, &iter, BTREE_ID_need_discard, POS_MIN, + BTREE_ITER_PREFETCH); + while (1) { -+ ret = __bch2_trans_do(&trans, NULL, NULL, 0, ++ ret = __bch2_trans_do(&trans, NULL, NULL, ++ BTREE_INSERT_NOFAIL| ++ BTREE_INSERT_LAZY_RW, + bch2_check_discard_freespace_key(&trans, &iter)); + if (ret) + break; + -+ bch2_btree_iter_set_pos(&iter, bpos_nosnap_successor(iter.pos)); ++ bch2_btree_iter_advance(&iter); + } + bch2_trans_iter_exit(&trans, &iter); + -+ if (ret) ++ if (ret < 0) + goto err; + + bch2_trans_iter_init(&trans, &iter, BTREE_ID_freespace, POS_MIN, + BTREE_ITER_PREFETCH); + while (1) { -+ ret = __bch2_trans_do(&trans, NULL, NULL, 0, ++ ret = __bch2_trans_do(&trans, NULL, NULL, ++ BTREE_INSERT_NOFAIL| ++ BTREE_INSERT_LAZY_RW, + bch2_check_discard_freespace_key(&trans, &iter)); + if (ret) + break; + -+ bch2_btree_iter_set_pos(&iter, bpos_nosnap_successor(iter.pos)); ++ bch2_btree_iter_advance(&iter); + } + bch2_trans_iter_exit(&trans, &iter); +err: @@ -3250,12 +3010,13 @@ index 000000000000..359cb23f037b + +void bch2_do_discards(struct bch_fs *c) +{ -+ if (percpu_ref_tryget(&c->writes) && ++ if (percpu_ref_tryget_live(&c->writes) && + !queue_work(system_long_wq, &c->discard_work)) + percpu_ref_put(&c->writes); +} + -+static int invalidate_one_bucket(struct btree_trans *trans, struct bch_dev *ca) ++static int invalidate_one_bucket(struct btree_trans *trans, struct bch_dev *ca, ++ struct bpos *bucket_pos, unsigned *cached_sectors) +{ + struct bch_fs *c = trans->c; + struct btree_iter lru_iter, alloc_iter = { NULL }; @@ -3273,8 +3034,10 @@ index 000000000000..359cb23f037b + if (ret) + goto out; + -+ if (!k.k || k.k->p.inode != ca->dev_idx) ++ if (!k.k || k.k->p.inode != ca->dev_idx) { ++ ret = 1; + goto out; ++ } + + if (k.k->type != KEY_TYPE_lru) { + prt_printf(&buf, "non lru key in lru btree:\n "); @@ -3294,8 +3057,9 @@ index 000000000000..359cb23f037b + idx = k.k->p.offset; + bucket = le64_to_cpu(bkey_s_c_to_lru(k).v->idx); + -+ a = bch2_trans_start_alloc_update(trans, &alloc_iter, -+ POS(ca->dev_idx, bucket)); ++ *bucket_pos = POS(ca->dev_idx, bucket); ++ ++ a = bch2_trans_start_alloc_update(trans, &alloc_iter, *bucket_pos); + ret = PTR_ERR_OR_ZERO(a); + if (ret) + goto out; @@ -3317,6 +3081,11 @@ index 000000000000..359cb23f037b + } + } + ++ if (!a->v.cached_sectors) ++ bch_err(c, "invalidating empty bucket, confused"); ++ ++ *cached_sectors = a->v.cached_sectors; ++ + SET_BCH_ALLOC_V4_NEED_INC_GEN(&a->v, false); + a->v.gen++; + a->v.data_type = 0; @@ -3329,8 +3098,6 @@ index 000000000000..359cb23f037b + BTREE_TRIGGER_BUCKET_INVALIDATE); + if (ret) + goto out; -+ -+ trace_invalidate_bucket(c, a->k.p.inode, a->k.p.offset); +out: + bch2_trans_iter_exit(trans, &alloc_iter); + bch2_trans_iter_exit(trans, &lru_iter); @@ -3343,7 +3110,8 @@ index 000000000000..359cb23f037b + struct bch_fs *c = container_of(work, struct bch_fs, invalidate_work); + struct bch_dev *ca; + struct btree_trans trans; -+ unsigned i; ++ struct bpos bucket; ++ unsigned i, sectors; + int ret = 0; + + bch2_trans_init(&trans, c, 0, 0); @@ -3356,10 +3124,12 @@ index 000000000000..359cb23f037b + ret = __bch2_trans_do(&trans, NULL, NULL, + BTREE_INSERT_USE_RESERVE| + BTREE_INSERT_NOFAIL, -+ invalidate_one_bucket(&trans, ca)); ++ invalidate_one_bucket(&trans, ca, &bucket, ++ §ors)); + if (ret) + break; + ++ trace_invalidate_bucket(c, bucket.inode, bucket.offset, sectors); + this_cpu_inc(c->counters[BCH_COUNTER_bucket_invalidate]); + } + } @@ -3370,8 +3140,9 @@ index 000000000000..359cb23f037b + +void bch2_do_invalidates(struct bch_fs *c) +{ -+ if (percpu_ref_tryget(&c->writes)) -+ queue_work(system_long_wq, &c->invalidate_work); ++ if (percpu_ref_tryget_live(&c->writes) && ++ !queue_work(system_long_wq, &c->invalidate_work)) ++ percpu_ref_put(&c->writes); +} + +static int bucket_freespace_init(struct btree_trans *trans, struct btree_iter *iter) @@ -3675,7 +3446,7 @@ index 000000000000..359cb23f037b +} diff --git a/fs/bcachefs/alloc_background.h b/fs/bcachefs/alloc_background.h new file mode 100644 -index 000000000000..2ac6b5046c67 +index 000000000..2ac6b5046 --- /dev/null +++ b/fs/bcachefs/alloc_background.h @@ -0,0 +1,181 @@ @@ -3862,7 +3633,7 @@ index 000000000000..2ac6b5046c67 +#endif /* _BCACHEFS_ALLOC_BACKGROUND_H */ diff --git a/fs/bcachefs/alloc_foreground.c b/fs/bcachefs/alloc_foreground.c new file mode 100644 -index 000000000000..7a878a6906ab +index 000000000..7a878a690 --- /dev/null +++ b/fs/bcachefs/alloc_foreground.c @@ -0,0 +1,1282 @@ @@ -5150,7 +4921,7 @@ index 000000000000..7a878a6906ab +} diff --git a/fs/bcachefs/alloc_foreground.h b/fs/bcachefs/alloc_foreground.h new file mode 100644 -index 000000000000..8bc78877f0fc +index 000000000..8bc78877f --- /dev/null +++ b/fs/bcachefs/alloc_foreground.h @@ -0,0 +1,173 @@ @@ -5329,7 +5100,7 @@ index 000000000000..8bc78877f0fc +#endif /* _BCACHEFS_ALLOC_FOREGROUND_H */ diff --git a/fs/bcachefs/alloc_types.h b/fs/bcachefs/alloc_types.h new file mode 100644 -index 000000000000..e078584d46f6 +index 000000000..e078584d4 --- /dev/null +++ b/fs/bcachefs/alloc_types.h @@ -0,0 +1,87 @@ @@ -5422,7 +5193,7 @@ index 000000000000..e078584d46f6 +#endif /* _BCACHEFS_ALLOC_TYPES_H */ diff --git a/fs/bcachefs/backpointers.c b/fs/bcachefs/backpointers.c new file mode 100644 -index 000000000000..f3260bbef71a +index 000000000..f3260bbef --- /dev/null +++ b/fs/bcachefs/backpointers.c @@ -0,0 +1,891 @@ @@ -6319,7 +6090,7 @@ index 000000000000..f3260bbef71a +} diff --git a/fs/bcachefs/backpointers.h b/fs/bcachefs/backpointers.h new file mode 100644 -index 000000000000..fe42af296e9c +index 000000000..fe42af296 --- /dev/null +++ b/fs/bcachefs/backpointers.h @@ -0,0 +1,38 @@ @@ -6363,10 +6134,10 @@ index 000000000000..fe42af296e9c +#endif /* _BCACHEFS_BACKPOINTERS_BACKGROUND_H */ diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h new file mode 100644 -index 000000000000..1f0484aa6501 +index 000000000..8b4d0eb5c --- /dev/null +++ b/fs/bcachefs/bcachefs.h -@@ -0,0 +1,986 @@ +@@ -0,0 +1,988 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _BCACHEFS_H +#define _BCACHEFS_H @@ -7195,6 +6966,8 @@ index 000000000000..1f0484aa6501 + copygc_heap copygc_heap; + struct write_point copygc_write_point; + s64 copygc_wait; ++ bool copygc_running; ++ wait_queue_head_t copygc_running_wq; + + /* DATA PROGRESS STATS */ + struct list_head data_progress_list; @@ -7355,7 +7128,7 @@ index 000000000000..1f0484aa6501 +#endif /* _BCACHEFS_H */ diff --git a/fs/bcachefs/bcachefs_format.h b/fs/bcachefs/bcachefs_format.h new file mode 100644 -index 000000000000..147fde1417b0 +index 000000000..147fde141 --- /dev/null +++ b/fs/bcachefs/bcachefs_format.h @@ -0,0 +1,2052 @@ @@ -9413,7 +9186,7 @@ index 000000000000..147fde1417b0 +#endif /* _BCACHEFS_FORMAT_H */ diff --git a/fs/bcachefs/bcachefs_ioctl.h b/fs/bcachefs/bcachefs_ioctl.h new file mode 100644 -index 000000000000..b2edabf58260 +index 000000000..b2edabf58 --- /dev/null +++ b/fs/bcachefs/bcachefs_ioctl.h @@ -0,0 +1,368 @@ @@ -9787,7 +9560,7 @@ index 000000000000..b2edabf58260 +#endif /* _BCACHEFS_IOCTL_H */ diff --git a/fs/bcachefs/bkey.c b/fs/bcachefs/bkey.c new file mode 100644 -index 000000000000..cc0689635164 +index 000000000..cc0689635 --- /dev/null +++ b/fs/bcachefs/bkey.c @@ -0,0 +1,1175 @@ @@ -10968,7 +10741,7 @@ index 000000000000..cc0689635164 +#endif diff --git a/fs/bcachefs/bkey.h b/fs/bcachefs/bkey.h new file mode 100644 -index 000000000000..7dee3d8e0a3d +index 000000000..7dee3d8e0 --- /dev/null +++ b/fs/bcachefs/bkey.h @@ -0,0 +1,566 @@ @@ -11540,7 +11313,7 @@ index 000000000000..7dee3d8e0a3d +#endif /* _BCACHEFS_BKEY_H */ diff --git a/fs/bcachefs/bkey_buf.h b/fs/bcachefs/bkey_buf.h new file mode 100644 -index 000000000000..0d7c67a959af +index 000000000..0d7c67a95 --- /dev/null +++ b/fs/bcachefs/bkey_buf.h @@ -0,0 +1,60 @@ @@ -11606,7 +11379,7 @@ index 000000000000..0d7c67a959af +#endif /* _BCACHEFS_BKEY_BUF_H */ diff --git a/fs/bcachefs/bkey_methods.c b/fs/bcachefs/bkey_methods.c new file mode 100644 -index 000000000000..e0cbac8811af +index 000000000..e0cbac881 --- /dev/null +++ b/fs/bcachefs/bkey_methods.c @@ -0,0 +1,503 @@ @@ -12115,7 +11888,7 @@ index 000000000000..e0cbac8811af +} diff --git a/fs/bcachefs/bkey_methods.h b/fs/bcachefs/bkey_methods.h new file mode 100644 -index 000000000000..db894b40d2ca +index 000000000..db894b40d --- /dev/null +++ b/fs/bcachefs/bkey_methods.h @@ -0,0 +1,175 @@ @@ -12296,7 +12069,7 @@ index 000000000000..db894b40d2ca +#endif /* _BCACHEFS_BKEY_METHODS_H */ diff --git a/fs/bcachefs/bkey_sort.c b/fs/bcachefs/bkey_sort.c new file mode 100644 -index 000000000000..b1385a77da11 +index 000000000..b1385a77d --- /dev/null +++ b/fs/bcachefs/bkey_sort.c @@ -0,0 +1,198 @@ @@ -12500,7 +12273,7 @@ index 000000000000..b1385a77da11 +} diff --git a/fs/bcachefs/bkey_sort.h b/fs/bcachefs/bkey_sort.h new file mode 100644 -index 000000000000..79cf11d1b4e7 +index 000000000..79cf11d1b --- /dev/null +++ b/fs/bcachefs/bkey_sort.h @@ -0,0 +1,44 @@ @@ -12550,7 +12323,7 @@ index 000000000000..79cf11d1b4e7 +#endif /* _BCACHEFS_BKEY_SORT_H */ diff --git a/fs/bcachefs/bset.c b/fs/bcachefs/bset.c new file mode 100644 -index 000000000000..fa60ef84e4ef +index 000000000..fa60ef84e --- /dev/null +++ b/fs/bcachefs/bset.c @@ -0,0 +1,1598 @@ @@ -14154,7 +13927,7 @@ index 000000000000..fa60ef84e4ef +} diff --git a/fs/bcachefs/bset.h b/fs/bcachefs/bset.h new file mode 100644 -index 000000000000..0d46534c3dcd +index 000000000..0d46534c3 --- /dev/null +++ b/fs/bcachefs/bset.h @@ -0,0 +1,615 @@ @@ -14775,7 +14548,7 @@ index 000000000000..0d46534c3dcd +#endif /* _BCACHEFS_BSET_H */ diff --git a/fs/bcachefs/btree_cache.c b/fs/bcachefs/btree_cache.c new file mode 100644 -index 000000000000..4d032ae3b7f4 +index 000000000..4d032ae3b --- /dev/null +++ b/fs/bcachefs/btree_cache.c @@ -0,0 +1,1162 @@ @@ -15943,7 +15716,7 @@ index 000000000000..4d032ae3b7f4 +} diff --git a/fs/bcachefs/btree_cache.h b/fs/bcachefs/btree_cache.h new file mode 100644 -index 000000000000..25906127c023 +index 000000000..25906127c --- /dev/null +++ b/fs/bcachefs/btree_cache.h @@ -0,0 +1,107 @@ @@ -16056,10 +15829,10 @@ index 000000000000..25906127c023 +#endif /* _BCACHEFS_BTREE_CACHE_H */ diff --git a/fs/bcachefs/btree_gc.c b/fs/bcachefs/btree_gc.c new file mode 100644 -index 000000000000..0bc7896cdda6 +index 000000000..0e2c8745c --- /dev/null +++ b/fs/bcachefs/btree_gc.c -@@ -0,0 +1,2132 @@ +@@ -0,0 +1,2128 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2010 Kent Overstreet @@ -16457,6 +16230,7 @@ index 000000000000..0bc7896cdda6 + bch2_btree_node_evict(c, cur_k.k); + ret = bch2_journal_key_delete(c, b->c.btree_id, + b->c.level, cur_k.k->k.p); ++ cur = NULL; + if (ret) + break; + continue; @@ -16475,6 +16249,7 @@ index 000000000000..0bc7896cdda6 + bch2_btree_node_evict(c, cur_k.k); + ret = bch2_journal_key_delete(c, b->c.btree_id, + b->c.level, cur_k.k->k.p); ++ cur = NULL; + if (ret) + break; + continue; @@ -16911,10 +16686,7 @@ index 000000000000..0bc7896cdda6 + struct bch_fs *c = trans->c; + struct btree_iter iter; + struct btree *b; -+ unsigned depth = metadata_only ? 1 -+ : bch2_expensive_debug_checks ? 0 -+ : !btree_node_type_needs_gc(btree_id) ? 1 -+ : 0; ++ unsigned depth = metadata_only ? 1 : 0; + int ret = 0; + + gc_pos_set(c, gc_pos_btree(btree_id, POS_MIN, 0)); @@ -17057,10 +16829,7 @@ index 000000000000..0bc7896cdda6 +{ + struct bch_fs *c = trans->c; + struct btree *b; -+ unsigned target_depth = metadata_only ? 1 -+ : bch2_expensive_debug_checks ? 0 -+ : !btree_node_type_needs_gc(btree_id) ? 1 -+ : 0; ++ unsigned target_depth = metadata_only ? 1 : 0; + struct printbuf buf = PRINTBUF; + int ret = 0; + @@ -18194,7 +17963,7 @@ index 000000000000..0bc7896cdda6 +} diff --git a/fs/bcachefs/btree_gc.h b/fs/bcachefs/btree_gc.h new file mode 100644 -index 000000000000..95d803b5743d +index 000000000..95d803b57 --- /dev/null +++ b/fs/bcachefs/btree_gc.h @@ -0,0 +1,112 @@ @@ -18312,7 +18081,7 @@ index 000000000000..95d803b5743d +#endif /* _BCACHEFS_BTREE_GC_H */ diff --git a/fs/bcachefs/btree_io.c b/fs/bcachefs/btree_io.c new file mode 100644 -index 000000000000..9bf3f77bcae6 +index 000000000..9bf3f77bc --- /dev/null +++ b/fs/bcachefs/btree_io.c @@ -0,0 +1,2150 @@ @@ -20468,7 +20237,7 @@ index 000000000000..9bf3f77bcae6 +} diff --git a/fs/bcachefs/btree_io.h b/fs/bcachefs/btree_io.h new file mode 100644 -index 000000000000..8af853642123 +index 000000000..8af853642 --- /dev/null +++ b/fs/bcachefs/btree_io.h @@ -0,0 +1,222 @@ @@ -20696,10 +20465,10 @@ index 000000000000..8af853642123 +#endif /* _BCACHEFS_BTREE_IO_H */ diff --git a/fs/bcachefs/btree_iter.c b/fs/bcachefs/btree_iter.c new file mode 100644 -index 000000000000..5c5e14d1d360 +index 000000000..a1512eb06 --- /dev/null +++ b/fs/bcachefs/btree_iter.c -@@ -0,0 +1,3480 @@ +@@ -0,0 +1,3471 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include "bcachefs.h" @@ -23975,11 +23744,14 @@ index 000000000000..5c5e14d1d360 + const char *fn) + __acquires(&c->btree_trans_barrier) +{ ++ struct btree_trans *pos; ++ + BUG_ON(lock_class_is_held(&bch2_btree_node_lock_key)); + + memset(trans, 0, sizeof(*trans)); + trans->c = c; + trans->fn = fn; ++ trans->task = current; + + bch2_trans_alloc_paths(trans, c); + @@ -23995,9 +23767,15 @@ index 000000000000..5c5e14d1d360 + + trans->srcu_idx = srcu_read_lock(&c->btree_trans_barrier); + -+ trans->pid = current->pid; + mutex_lock(&c->btree_trans_lock); -+ list_add(&trans->list, &c->btree_trans_list); ++ list_for_each_entry(pos, &c->btree_trans_list, list) { ++ if (trans->task->pid < pos->task->pid) { ++ list_add_tail(&trans->list, &pos->list); ++ goto list_add_done; ++ } ++ } ++ list_add_tail(&trans->list, &c->btree_trans_list); ++list_add_done: + mutex_unlock(&c->btree_trans_lock); +} + @@ -24085,73 +23863,55 @@ index 000000000000..5c5e14d1d360 + bch2_bpos_to_text(out, btree_node_pos(_b, cached)); +} + -+static bool trans_has_locks(struct btree_trans *trans) ++void bch2_btree_trans_to_text(struct printbuf *out, struct btree_trans *trans) +{ + struct btree_path *path; -+ -+ trans_for_each_path(trans, path) -+ if (path->nodes_locked) -+ return true; -+ return false; -+} -+ -+void bch2_btree_trans_to_text(struct printbuf *out, struct bch_fs *c) -+{ -+ struct btree_trans *trans; -+ struct btree_path *path; + struct btree *b; + static char lock_types[] = { 'r', 'i', 'w' }; + unsigned l; + -+ mutex_lock(&c->btree_trans_lock); -+ list_for_each_entry(trans, &c->btree_trans_list, list) { -+ if (!trans_has_locks(trans)) ++ prt_printf(out, "%i %s\n", trans->task->pid, trans->fn); ++ ++ trans_for_each_path(trans, path) { ++ if (!path->nodes_locked) + continue; + -+ prt_printf(out, "%i %s\n", trans->pid, trans->fn); ++ prt_printf(out, " path %u %c l=%u %s:", ++ path->idx, ++ path->cached ? 'c' : 'b', ++ path->level, ++ bch2_btree_ids[path->btree_id]); ++ bch2_bpos_to_text(out, path->pos); ++ prt_printf(out, "\n"); + -+ trans_for_each_path(trans, path) { -+ if (!path->nodes_locked) -+ continue; -+ -+ prt_printf(out, " path %u %c l=%u %s:", -+ path->idx, -+ path->cached ? 'c' : 'b', -+ path->level, -+ bch2_btree_ids[path->btree_id]); -+ bch2_bpos_to_text(out, path->pos); -+ prt_printf(out, "\n"); -+ -+ for (l = 0; l < BTREE_MAX_DEPTH; l++) { -+ if (btree_node_locked(path, l)) { -+ prt_printf(out, " %s l=%u ", -+ btree_node_intent_locked(path, l) ? "i" : "r", l); -+ bch2_btree_path_node_to_text(out, -+ (void *) path->l[l].b, -+ path->cached); -+ prt_printf(out, "\n"); -+ } ++ for (l = 0; l < BTREE_MAX_DEPTH; l++) { ++ if (btree_node_locked(path, l)) { ++ prt_printf(out, " %s l=%u ", ++ btree_node_intent_locked(path, l) ? "i" : "r", l); ++ bch2_btree_path_node_to_text(out, ++ (void *) path->l[l].b, ++ path->cached); ++ prt_printf(out, "\n"); + } + } -+ -+ b = READ_ONCE(trans->locking); -+ if (b) { -+ path = &trans->paths[trans->locking_path_idx]; -+ prt_printf(out, " locking path %u %c l=%u %c %s:", -+ trans->locking_path_idx, -+ path->cached ? 'c' : 'b', -+ trans->locking_level, -+ lock_types[trans->locking_lock_type], -+ bch2_btree_ids[trans->locking_btree_id]); -+ bch2_bpos_to_text(out, trans->locking_pos); -+ -+ prt_printf(out, " node "); -+ bch2_btree_path_node_to_text(out, -+ (void *) b, path->cached); -+ prt_printf(out, "\n"); -+ } + } -+ mutex_unlock(&c->btree_trans_lock); ++ ++ b = READ_ONCE(trans->locking); ++ if (b) { ++ path = &trans->paths[trans->locking_path_idx]; ++ prt_printf(out, " locking path %u %c l=%u %c %s:", ++ trans->locking_path_idx, ++ path->cached ? 'c' : 'b', ++ trans->locking_level, ++ lock_types[trans->locking_lock_type], ++ bch2_btree_ids[trans->locking_btree_id]); ++ bch2_bpos_to_text(out, trans->locking_pos); ++ ++ prt_printf(out, " node "); ++ bch2_btree_path_node_to_text(out, ++ (void *) b, path->cached); ++ prt_printf(out, "\n"); ++ } +} + +void bch2_fs_btree_iter_exit(struct bch_fs *c) @@ -24182,7 +23942,7 @@ index 000000000000..5c5e14d1d360 +} diff --git a/fs/bcachefs/btree_iter.h b/fs/bcachefs/btree_iter.h new file mode 100644 -index 000000000000..f2b302c8150c +index 000000000..9da0a4152 --- /dev/null +++ b/fs/bcachefs/btree_iter.h @@ -0,0 +1,411 @@ @@ -24591,7 +24351,7 @@ index 000000000000..f2b302c8150c + +#define bch2_trans_init(...) __bch2_trans_init(__VA_ARGS__, __func__) + -+void bch2_btree_trans_to_text(struct printbuf *, struct bch_fs *); ++void bch2_btree_trans_to_text(struct printbuf *, struct btree_trans *); + +void bch2_fs_btree_iter_exit(struct bch_fs *); +int bch2_fs_btree_iter_init(struct bch_fs *); @@ -24599,10 +24359,10 @@ index 000000000000..f2b302c8150c +#endif /* _BCACHEFS_BTREE_ITER_H */ diff --git a/fs/bcachefs/btree_key_cache.c b/fs/bcachefs/btree_key_cache.c new file mode 100644 -index 000000000000..0750951603eb +index 000000000..a5b0a956e --- /dev/null +++ b/fs/bcachefs/btree_key_cache.c -@@ -0,0 +1,769 @@ +@@ -0,0 +1,850 @@ + +#include "bcachefs.h" +#include "btree_cache.h" @@ -24689,7 +24449,7 @@ index 000000000000..0750951603eb + start_poll_synchronize_srcu(&c->btree_trans_barrier); + + list_move_tail(&ck->list, &bc->freed); -+ bc->nr_freed++; ++ atomic_long_inc(&bc->nr_freed); + + kfree(ck->k); + ck->k = NULL; @@ -24699,10 +24459,88 @@ index 000000000000..0750951603eb + six_unlock_intent(&ck->c.lock); +} + ++static void bkey_cached_free_fast(struct btree_key_cache *bc, ++ struct bkey_cached *ck) ++{ ++ struct bch_fs *c = container_of(bc, struct bch_fs, btree_key_cache); ++ struct btree_key_cache_freelist *f; ++ bool freed = false; ++ ++ BUG_ON(test_bit(BKEY_CACHED_DIRTY, &ck->flags)); ++ ++ ck->btree_trans_barrier_seq = ++ start_poll_synchronize_srcu(&c->btree_trans_barrier); ++ ++ list_del_init(&ck->list); ++ atomic_long_inc(&bc->nr_freed); ++ ++ kfree(ck->k); ++ ck->k = NULL; ++ ck->u64s = 0; ++ ++ preempt_disable(); ++ f = this_cpu_ptr(bc->pcpu_freed); ++ ++ if (f->nr < ARRAY_SIZE(f->objs)) { ++ f->objs[f->nr++] = ck; ++ freed = true; ++ } ++ preempt_enable(); ++ ++ if (!freed) { ++ mutex_lock(&bc->lock); ++ preempt_disable(); ++ f = this_cpu_ptr(bc->pcpu_freed); ++ ++ while (f->nr > ARRAY_SIZE(f->objs) / 2) { ++ struct bkey_cached *ck2 = f->objs[--f->nr]; ++ ++ list_move_tail(&ck2->list, &bc->freed); ++ } ++ preempt_enable(); ++ ++ list_move_tail(&ck->list, &bc->freed); ++ mutex_unlock(&bc->lock); ++ } ++ ++ six_unlock_write(&ck->c.lock); ++ six_unlock_intent(&ck->c.lock); ++} ++ +static struct bkey_cached * +bkey_cached_alloc(struct btree_key_cache *c) +{ -+ struct bkey_cached *ck; ++ struct bkey_cached *ck = NULL; ++ struct btree_key_cache_freelist *f; ++ ++ preempt_disable(); ++ f = this_cpu_ptr(c->pcpu_freed); ++ if (f->nr) ++ ck = f->objs[--f->nr]; ++ preempt_enable(); ++ ++ if (!ck) { ++ mutex_lock(&c->lock); ++ preempt_disable(); ++ f = this_cpu_ptr(c->pcpu_freed); ++ ++ while (!list_empty(&c->freed) && ++ f->nr < ARRAY_SIZE(f->objs) / 2) { ++ ck = list_last_entry(&c->freed, struct bkey_cached, list); ++ list_del_init(&ck->list); ++ f->objs[f->nr++] = ck; ++ } ++ ++ ck = f->nr ? f->objs[--f->nr] : NULL; ++ preempt_enable(); ++ mutex_unlock(&c->lock); ++ } ++ ++ if (ck) { ++ six_lock_intent(&ck->c.lock, NULL, NULL); ++ six_lock_write(&ck->c.lock, NULL, NULL); ++ return ck; ++ } + + ck = kmem_cache_alloc(bch2_key_cache, GFP_NOFS|__GFP_ZERO); + if (likely(ck)) { @@ -24724,16 +24562,6 @@ index 000000000000..0750951603eb + struct bkey_cached *ck; + unsigned i; + -+ mutex_lock(&c->lock); -+ list_for_each_entry_reverse(ck, &c->freed, list) -+ if (bkey_cached_lock_for_evict(ck)) { -+ c->nr_freed--; -+ list_del(&ck->list); -+ mutex_unlock(&c->lock); -+ return ck; -+ } -+ mutex_unlock(&c->lock); -+ + rcu_read_lock(); + tbl = rht_dereference_rcu(c->table.tbl, &c->table); + for (i = 0; i < tbl->size; i++) @@ -24794,9 +24622,7 @@ index 000000000000..0750951603eb + six_unlock_intent(&ck->c.lock); + kfree(ck); + } else { -+ mutex_lock(&bc->lock); -+ bkey_cached_free(bc, ck); -+ mutex_unlock(&bc->lock); ++ bkey_cached_free_fast(bc, ck); + } + + return NULL; @@ -25070,9 +24896,7 @@ index 000000000000..0750951603eb + + bkey_cached_evict(&c->btree_key_cache, ck); + -+ mutex_lock(&c->btree_key_cache.lock); -+ bkey_cached_free(&c->btree_key_cache, ck); -+ mutex_unlock(&c->btree_key_cache.lock); ++ bkey_cached_free_fast(&c->btree_key_cache, ck); + } +out: + bch2_trans_iter_exit(trans, &b_iter); @@ -25209,7 +25033,7 @@ index 000000000000..0750951603eb + + list_del(&ck->list); + kmem_cache_free(bch2_key_cache, ck); -+ bc->nr_freed--; ++ atomic_long_dec(&bc->nr_freed); + scanned++; + freed++; + } @@ -25282,6 +25106,7 @@ index 000000000000..0750951603eb + struct bkey_cached *ck, *n; + struct rhash_head *pos; + unsigned i; ++ int cpu; + + if (bc->shrink.list.next) + unregister_shrinker(&bc->shrink); @@ -25298,6 +25123,16 @@ index 000000000000..0750951603eb + } + rcu_read_unlock(); + ++ for_each_possible_cpu(cpu) { ++ struct btree_key_cache_freelist *f = ++ per_cpu_ptr(bc->pcpu_freed, cpu); ++ ++ for (i = 0; i < f->nr; i++) { ++ ck = f->objs[i]; ++ list_add(&ck->list, &bc->freed); ++ } ++ } ++ + list_for_each_entry_safe(ck, n, &bc->freed, list) { + cond_resched(); + @@ -25318,6 +25153,8 @@ index 000000000000..0750951603eb + + if (bc->table_init_done) + rhashtable_destroy(&bc->table); ++ ++ free_percpu(bc->pcpu_freed); +} + +void bch2_fs_btree_key_cache_init_early(struct btree_key_cache *c) @@ -25338,6 +25175,10 @@ index 000000000000..0750951603eb +{ + int ret; + ++ c->pcpu_freed = alloc_percpu(struct btree_key_cache_freelist); ++ if (!c->pcpu_freed) ++ return -ENOMEM; ++ + ret = rhashtable_init(&c->table, &bch2_btree_key_cache_params); + if (ret) + return ret; @@ -25353,7 +25194,7 @@ index 000000000000..0750951603eb + +void bch2_btree_key_cache_to_text(struct printbuf *out, struct btree_key_cache *c) +{ -+ prt_printf(out, "nr_freed:\t%zu\n", c->nr_freed); ++ prt_printf(out, "nr_freed:\t%zu\n", atomic_long_read(&c->nr_freed)); + prt_printf(out, "nr_keys:\t%lu\n", atomic_long_read(&c->nr_keys)); + prt_printf(out, "nr_dirty:\t%lu\n", atomic_long_read(&c->nr_dirty)); +} @@ -25374,7 +25215,7 @@ index 000000000000..0750951603eb +} diff --git a/fs/bcachefs/btree_key_cache.h b/fs/bcachefs/btree_key_cache.h new file mode 100644 -index 000000000000..670746e72dab +index 000000000..670746e72 --- /dev/null +++ b/fs/bcachefs/btree_key_cache.h @@ -0,0 +1,47 @@ @@ -25427,7 +25268,7 @@ index 000000000000..670746e72dab +#endif /* _BCACHEFS_BTREE_KEY_CACHE_H */ diff --git a/fs/bcachefs/btree_locking.h b/fs/bcachefs/btree_locking.h new file mode 100644 -index 000000000000..67c970d727ac +index 000000000..67c970d72 --- /dev/null +++ b/fs/bcachefs/btree_locking.h @@ -0,0 +1,259 @@ @@ -25692,10 +25533,10 @@ index 000000000000..67c970d727ac + diff --git a/fs/bcachefs/btree_types.h b/fs/bcachefs/btree_types.h new file mode 100644 -index 000000000000..5382f2b85e19 +index 000000000..1e4d1fecc --- /dev/null +++ b/fs/bcachefs/btree_types.h -@@ -0,0 +1,681 @@ +@@ -0,0 +1,687 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _BCACHEFS_BTREE_TYPES_H +#define _BCACHEFS_BTREE_TYPES_H @@ -25999,6 +25840,11 @@ index 000000000000..5382f2b85e19 +#endif +}; + ++struct btree_key_cache_freelist { ++ struct bkey_cached *objs[16]; ++ unsigned nr; ++}; ++ +struct btree_key_cache { + struct mutex lock; + struct rhashtable table; @@ -26006,8 +25852,9 @@ index 000000000000..5382f2b85e19 + struct list_head freed; + struct shrinker shrink; + unsigned shrink_iter; ++ struct btree_key_cache_freelist __percpu *pcpu_freed; + -+ size_t nr_freed; ++ atomic_long_t nr_freed; + atomic_long_t nr_keys; + atomic_long_t nr_dirty; +}; @@ -26086,7 +25933,7 @@ index 000000000000..5382f2b85e19 + u8 locking_btree_id; + u8 locking_level; + u8 locking_lock_type; -+ pid_t pid; ++ struct task_struct *task; + int srcu_idx; + + u8 nr_sorted; @@ -26379,7 +26226,7 @@ index 000000000000..5382f2b85e19 +#endif /* _BCACHEFS_BTREE_TYPES_H */ diff --git a/fs/bcachefs/btree_update.h b/fs/bcachefs/btree_update.h new file mode 100644 -index 000000000000..28f958577006 +index 000000000..28f958577 --- /dev/null +++ b/fs/bcachefs/btree_update.h @@ -0,0 +1,156 @@ @@ -26541,7 +26388,7 @@ index 000000000000..28f958577006 +#endif /* _BCACHEFS_BTREE_UPDATE_H */ diff --git a/fs/bcachefs/btree_update_interior.c b/fs/bcachefs/btree_update_interior.c new file mode 100644 -index 000000000000..ceb8484eebe7 +index 000000000..965fdfbfa --- /dev/null +++ b/fs/bcachefs/btree_update_interior.c @@ -0,0 +1,2253 @@ @@ -28430,7 +28277,7 @@ index 000000000000..ceb8484eebe7 +{ + struct async_btree_rewrite *a; + -+ if (!percpu_ref_tryget(&c->writes)) ++ if (!percpu_ref_tryget_live(&c->writes)) + return; + + a = kmalloc(sizeof(*a), GFP_NOFS); @@ -28800,7 +28647,7 @@ index 000000000000..ceb8484eebe7 +} diff --git a/fs/bcachefs/btree_update_interior.h b/fs/bcachefs/btree_update_interior.h new file mode 100644 -index 000000000000..adfc6c24a7a4 +index 000000000..adfc6c24a --- /dev/null +++ b/fs/bcachefs/btree_update_interior.h @@ -0,0 +1,321 @@ @@ -29127,7 +28974,7 @@ index 000000000000..adfc6c24a7a4 +#endif /* _BCACHEFS_BTREE_UPDATE_INTERIOR_H */ diff --git a/fs/bcachefs/btree_update_leaf.c b/fs/bcachefs/btree_update_leaf.c new file mode 100644 -index 000000000000..0ab0dc5742ac +index 000000000..aed26b579 --- /dev/null +++ b/fs/bcachefs/btree_update_leaf.c @@ -0,0 +1,1815 @@ @@ -30235,7 +30082,7 @@ index 000000000000..0ab0dc5742ac + } + + if (!(trans->flags & BTREE_INSERT_NOCHECK_RW) && -+ unlikely(!percpu_ref_tryget(&c->writes))) { ++ unlikely(!percpu_ref_tryget_live(&c->writes))) { + ret = bch2_trans_commit_get_rw_cold(trans); + if (ret) + goto out_reset; @@ -30948,7 +30795,7 @@ index 000000000000..0ab0dc5742ac +} diff --git a/fs/bcachefs/buckets.c b/fs/bcachefs/buckets.c new file mode 100644 -index 000000000000..1ea7e2baf323 +index 000000000..1ea7e2baf --- /dev/null +++ b/fs/bcachefs/buckets.c @@ -0,0 +1,2114 @@ @@ -33068,7 +32915,7 @@ index 000000000000..1ea7e2baf323 +} diff --git a/fs/bcachefs/buckets.h b/fs/bcachefs/buckets.h new file mode 100644 -index 000000000000..670b95b82442 +index 000000000..6881502d9 --- /dev/null +++ b/fs/bcachefs/buckets.h @@ -0,0 +1,300 @@ @@ -33252,12 +33099,12 @@ index 000000000000..670b95b82442 + enum alloc_reserve reserve) +{ + return max_t(s64, 0, -+ usage.d[BCH_DATA_free].buckets - -+ usage.d[BCH_DATA_cached].buckets - -+ usage.d[BCH_DATA_need_gc_gens].buckets - -+ usage.d[BCH_DATA_need_discard].buckets - -+ ca->nr_open_buckets - -+ bch2_dev_buckets_reserved(ca, reserve)); ++ usage.d[BCH_DATA_free].buckets ++ + usage.d[BCH_DATA_cached].buckets ++ + usage.d[BCH_DATA_need_gc_gens].buckets ++ + usage.d[BCH_DATA_need_discard].buckets ++ - ca->nr_open_buckets ++ - bch2_dev_buckets_reserved(ca, reserve)); +} + +static inline u64 dev_buckets_available(struct bch_dev *ca, @@ -33374,7 +33221,7 @@ index 000000000000..670b95b82442 +#endif /* _BUCKETS_H */ diff --git a/fs/bcachefs/buckets_types.h b/fs/bcachefs/buckets_types.h new file mode 100644 -index 000000000000..1dbba7d906dd +index 000000000..1dbba7d90 --- /dev/null +++ b/fs/bcachefs/buckets_types.h @@ -0,0 +1,103 @@ @@ -33483,7 +33330,7 @@ index 000000000000..1dbba7d906dd +#endif /* _BUCKETS_TYPES_H */ diff --git a/fs/bcachefs/buckets_waiting_for_journal.c b/fs/bcachefs/buckets_waiting_for_journal.c new file mode 100644 -index 000000000000..2e5b955080de +index 000000000..2e5b95508 --- /dev/null +++ b/fs/bcachefs/buckets_waiting_for_journal.c @@ -0,0 +1,167 @@ @@ -33656,7 +33503,7 @@ index 000000000000..2e5b955080de +} diff --git a/fs/bcachefs/buckets_waiting_for_journal.h b/fs/bcachefs/buckets_waiting_for_journal.h new file mode 100644 -index 000000000000..d2ae19cbe18c +index 000000000..d2ae19cbe --- /dev/null +++ b/fs/bcachefs/buckets_waiting_for_journal.h @@ -0,0 +1,15 @@ @@ -33677,7 +33524,7 @@ index 000000000000..d2ae19cbe18c +#endif /* _BUCKETS_WAITING_FOR_JOURNAL_H */ diff --git a/fs/bcachefs/buckets_waiting_for_journal_types.h b/fs/bcachefs/buckets_waiting_for_journal_types.h new file mode 100644 -index 000000000000..fea7f944d0ed +index 000000000..fea7f944d --- /dev/null +++ b/fs/bcachefs/buckets_waiting_for_journal_types.h @@ -0,0 +1,23 @@ @@ -33706,7 +33553,7 @@ index 000000000000..fea7f944d0ed +#endif /* _BUCKETS_WAITING_FOR_JOURNAL_TYPES_H */ diff --git a/fs/bcachefs/chardev.c b/fs/bcachefs/chardev.c new file mode 100644 -index 000000000000..dbb7e5e0b35b +index 000000000..dbb7e5e0b --- /dev/null +++ b/fs/bcachefs/chardev.c @@ -0,0 +1,760 @@ @@ -34472,7 +34319,7 @@ index 000000000000..dbb7e5e0b35b +#endif /* NO_BCACHEFS_CHARDEV */ diff --git a/fs/bcachefs/chardev.h b/fs/bcachefs/chardev.h new file mode 100644 -index 000000000000..3a4890d39ff9 +index 000000000..3a4890d39 --- /dev/null +++ b/fs/bcachefs/chardev.h @@ -0,0 +1,31 @@ @@ -34509,10 +34356,10 @@ index 000000000000..3a4890d39ff9 +#endif /* _BCACHEFS_CHARDEV_H */ diff --git a/fs/bcachefs/checksum.c b/fs/bcachefs/checksum.c new file mode 100644 -index 000000000000..e23b221cd377 +index 000000000..7c2af6754 --- /dev/null +++ b/fs/bcachefs/checksum.c -@@ -0,0 +1,698 @@ +@@ -0,0 +1,707 @@ +// SPDX-License-Identifier: GPL-2.0 +#include "bcachefs.h" +#include "checksum.h" @@ -34940,8 +34787,17 @@ index 000000000000..e23b221cd377 + merged = bch2_checksum_bio(c, crc_old.csum_type, + extent_nonce(version, crc_old), bio); + -+ if (bch2_crc_cmp(merged, crc_old.csum)) ++ if (bch2_crc_cmp(merged, crc_old.csum)) { ++ bch_err(c, "checksum error in bch2_rechecksum_bio() (memory corruption or bug?)\n" ++ "expected %0llx:%0llx got %0llx:%0llx (old type %s new type %s)", ++ crc_old.csum.hi, ++ crc_old.csum.lo, ++ merged.hi, ++ merged.lo, ++ bch2_csum_types[crc_old.csum_type], ++ bch2_csum_types[new_csum_type]); + return -EIO; ++ } + + for (i = splits; i < splits + ARRAY_SIZE(splits); i++) { + if (i->crc) @@ -35213,7 +35069,7 @@ index 000000000000..e23b221cd377 +} diff --git a/fs/bcachefs/checksum.h b/fs/bcachefs/checksum.h new file mode 100644 -index 000000000000..c86c3c05d620 +index 000000000..c86c3c05d --- /dev/null +++ b/fs/bcachefs/checksum.h @@ -0,0 +1,204 @@ @@ -35423,7 +35279,7 @@ index 000000000000..c86c3c05d620 +#endif /* _BCACHEFS_CHECKSUM_H */ diff --git a/fs/bcachefs/clock.c b/fs/bcachefs/clock.c new file mode 100644 -index 000000000000..f3ffdbc38485 +index 000000000..f3ffdbc38 --- /dev/null +++ b/fs/bcachefs/clock.c @@ -0,0 +1,191 @@ @@ -35620,7 +35476,7 @@ index 000000000000..f3ffdbc38485 +} diff --git a/fs/bcachefs/clock.h b/fs/bcachefs/clock.h new file mode 100644 -index 000000000000..70a0f7436c84 +index 000000000..70a0f7436 --- /dev/null +++ b/fs/bcachefs/clock.h @@ -0,0 +1,38 @@ @@ -35664,7 +35520,7 @@ index 000000000000..70a0f7436c84 +#endif /* _BCACHEFS_CLOCK_H */ diff --git a/fs/bcachefs/clock_types.h b/fs/bcachefs/clock_types.h new file mode 100644 -index 000000000000..5fae0012d808 +index 000000000..5fae0012d --- /dev/null +++ b/fs/bcachefs/clock_types.h @@ -0,0 +1,37 @@ @@ -35707,7 +35563,7 @@ index 000000000000..5fae0012d808 +#endif /* _BCACHEFS_CLOCK_TYPES_H */ diff --git a/fs/bcachefs/compress.c b/fs/bcachefs/compress.c new file mode 100644 -index 000000000000..f692f35a6a98 +index 000000000..f692f35a6 --- /dev/null +++ b/fs/bcachefs/compress.c @@ -0,0 +1,639 @@ @@ -36352,7 +36208,7 @@ index 000000000000..f692f35a6a98 +} diff --git a/fs/bcachefs/compress.h b/fs/bcachefs/compress.h new file mode 100644 -index 000000000000..4bab1f61b3b5 +index 000000000..4bab1f61b --- /dev/null +++ b/fs/bcachefs/compress.h @@ -0,0 +1,18 @@ @@ -36376,7 +36232,7 @@ index 000000000000..4bab1f61b3b5 +#endif /* _BCACHEFS_COMPRESS_H */ diff --git a/fs/bcachefs/counters.c b/fs/bcachefs/counters.c new file mode 100644 -index 000000000000..745f856e6d3e +index 000000000..745f856e6 --- /dev/null +++ b/fs/bcachefs/counters.c @@ -0,0 +1,107 @@ @@ -36489,7 +36345,7 @@ index 000000000000..745f856e6d3e +}; diff --git a/fs/bcachefs/counters.h b/fs/bcachefs/counters.h new file mode 100644 -index 000000000000..4778aa19bf34 +index 000000000..4778aa19b --- /dev/null +++ b/fs/bcachefs/counters.h @@ -0,0 +1,17 @@ @@ -36512,7 +36368,7 @@ index 000000000000..4778aa19bf34 +#endif // _BCACHEFS_COUNTERS_H diff --git a/fs/bcachefs/darray.h b/fs/bcachefs/darray.h new file mode 100644 -index 000000000000..519ab9b96e67 +index 000000000..519ab9b96 --- /dev/null +++ b/fs/bcachefs/darray.h @@ -0,0 +1,77 @@ @@ -36595,10 +36451,10 @@ index 000000000000..519ab9b96e67 +#endif /* _BCACHEFS_DARRAY_H */ diff --git a/fs/bcachefs/data_update.c b/fs/bcachefs/data_update.c new file mode 100644 -index 000000000000..4b0a866ab1fc +index 000000000..cc9ae6dad --- /dev/null +++ b/fs/bcachefs/data_update.c -@@ -0,0 +1,383 @@ +@@ -0,0 +1,379 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include "bcachefs.h" @@ -36691,6 +36547,16 @@ index 000000000000..4b0a866ab1fc + return ret; +} + ++static void bch2_bkey_mark_dev_cached(struct bkey_s k, unsigned dev) ++{ ++ struct bkey_ptrs ptrs = bch2_bkey_ptrs(k); ++ struct bch_extent_ptr *ptr; ++ ++ bkey_for_each_ptr(ptrs, ptr) ++ if (ptr->dev == dev) ++ ptr->cached = true; ++} ++ +static int bch2_data_update_index_update(struct bch_write_op *op) +{ + struct bch_fs *c = op->c; @@ -36715,6 +36581,7 @@ index 000000000000..4b0a866ab1fc + + while (1) { + struct bkey_s_c k; ++ struct bkey_s_c old = bkey_i_to_s_c(m->k.k); + struct bkey_i *insert; + struct bkey_i_extent *new; + const union bch_extent_entry *entry; @@ -36723,6 +36590,7 @@ index 000000000000..4b0a866ab1fc + bool did_work = false; + bool should_check_enospc; + s64 i_sectors_delta = 0, disk_sectors_delta = 0; ++ unsigned i; + + bch2_trans_begin(&trans); + @@ -36733,8 +36601,7 @@ index 000000000000..4b0a866ab1fc + + new = bkey_i_to_extent(bch2_keylist_front(keys)); + -+ if (bversion_cmp(k.k->version, new->k.version) || -+ !bch2_bkey_matches_ptr(c, k, m->ptr, m->offset)) ++ if (!bch2_extents_match(k, old)) + goto nomatch; + + bkey_reassemble(_insert.k, k); @@ -36748,20 +36615,34 @@ index 000000000000..4b0a866ab1fc + bch2_cut_back(new->k.p, insert); + bch2_cut_back(insert->k.p, &new->k_i); + -+ if (m->data_cmd == DATA_REWRITE) { -+ struct bch_extent_ptr *new_ptr, *old_ptr = (void *) -+ bch2_bkey_has_device(bkey_i_to_s_c(insert), -+ m->data_opts.rewrite_dev); -+ if (!old_ptr) -+ goto nomatch; -+ -+ if (old_ptr->cached) -+ extent_for_each_ptr(extent_i_to_s(new), new_ptr) -+ new_ptr->cached = true; -+ -+ __bch2_bkey_drop_ptr(bkey_i_to_s(insert), old_ptr); ++ /* ++ * @old: extent that we read from ++ * @insert: key that we're going to update, initialized from ++ * extent currently in btree - same as @old unless we raced with ++ * other updates ++ * @new: extent with new pointers that we'll be adding to @insert ++ * ++ * Fist, drop rewrite_ptrs from @new: ++ */ ++ i = 0; ++ bkey_for_each_ptr_decode(old.k, bch2_bkey_ptrs_c(old), p, entry) { ++ if (((1U << i) & m->data_opts.rewrite_ptrs) && ++ bch2_extent_has_ptr(old, p, bkey_i_to_s_c(insert))) { ++ /* ++ * If we're going to be adding a pointer to the ++ * same device, we have to drop the old one - ++ * otherwise, we can just mark it cached: ++ */ ++ if (bch2_bkey_has_device(bkey_i_to_s_c(&new->k_i), p.ptr.dev)) ++ bch2_bkey_drop_device_noerror(bkey_i_to_s(insert), p.ptr.dev); ++ else ++ bch2_bkey_mark_dev_cached(bkey_i_to_s(insert), p.ptr.dev); ++ } ++ i++; + } + ++ ++ /* Add new ptrs: */ + extent_for_each_ptr_decode(extent_i_to_s(new), p, entry) { + if (bch2_bkey_has_device(bkey_i_to_s_c(insert), p.ptr.dev)) { + /* @@ -36779,12 +36660,8 @@ index 000000000000..4b0a866ab1fc + if (!did_work) + goto nomatch; + -+ bch2_bkey_narrow_crcs(insert, -+ (struct bch_extent_crc_unpacked) { 0 }); ++ bch2_bkey_narrow_crcs(insert, (struct bch_extent_crc_unpacked) { 0 }); + bch2_extent_normalize(c, bkey_i_to_s(insert)); -+ bch2_bkey_mark_replicas_cached(c, bkey_i_to_s(insert), -+ op->opts.background_target, -+ op->opts.data_replicas); + + ret = bch2_sum_sector_overwrites(&trans, &iter, insert, + &should_check_enospc, @@ -36831,6 +36708,14 @@ index 000000000000..4b0a866ab1fc + } + continue; +nomatch: ++ if (IS_ENABLED(CONFIG_BCACHEFS_DEBUG)) { ++ struct printbuf buf = PRINTBUF; ++ ++ bch2_bkey_val_to_text(&buf, c, old); ++ bch_info(c, "no match for %s", buf.buf); ++ printbuf_exit(&buf); ++ } ++ + if (m->ctxt) { + BUG_ON(k.k->p.offset <= iter.pos.offset); + atomic64_inc(&m->ctxt->stats->keys_raced); @@ -36851,195 +36736,154 @@ index 000000000000..4b0a866ab1fc + return ret; +} + -+void bch2_data_update_read_done(struct data_update *m, struct bch_read_bio *rbio) ++void bch2_data_update_read_done(struct data_update *m, ++ struct bch_extent_crc_unpacked crc, ++ struct closure *cl) +{ + /* write bio must own pages: */ + BUG_ON(!m->op.wbio.bio.bi_vcnt); + -+ m->ptr = rbio->pick.ptr; -+ m->offset = rbio->data_pos.offset - rbio->pick.crc.offset; -+ m->op.devs_have = rbio->devs_have; -+ m->op.pos = rbio->data_pos; -+ m->op.version = rbio->version; -+ m->op.crc = rbio->pick.crc; -+ m->op.wbio.bio.bi_iter.bi_size = m->op.crc.compressed_size << 9; ++ m->op.crc = crc; ++ m->op.wbio.bio.bi_iter.bi_size = crc.compressed_size << 9; + -+ if (m->data_cmd == DATA_REWRITE) -+ bch2_dev_list_drop_dev(&m->op.devs_have, m->data_opts.rewrite_dev); ++ closure_call(&m->op.cl, bch2_write, NULL, cl); ++} ++ ++void bch2_data_update_exit(struct data_update *update) ++{ ++ struct bch_fs *c = update->op.c; ++ ++ bch2_bkey_buf_exit(&update->k, c); ++ bch2_disk_reservation_put(c, &update->op.res); ++ bch2_bio_free_pages_pool(c, &update->op.wbio.bio); +} + +int bch2_data_update_init(struct bch_fs *c, struct data_update *m, + struct write_point_specifier wp, + struct bch_io_opts io_opts, -+ enum data_cmd data_cmd, -+ struct data_opts data_opts, ++ struct data_update_opts data_opts, + enum btree_id btree_id, + struct bkey_s_c k) +{ + struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); + const union bch_extent_entry *entry; -+ struct bch_extent_crc_unpacked crc; + struct extent_ptr_decoded p; ++ unsigned i, reserve_sectors = k.k->size * data_opts.extra_replicas; + int ret; + ++ bch2_bkey_buf_init(&m->k); ++ bch2_bkey_buf_reassemble(&m->k, c, k); + m->btree_id = btree_id; -+ m->data_cmd = data_cmd; + m->data_opts = data_opts; -+ m->nr_ptrs_reserved = 0; + + bch2_write_op_init(&m->op, c, io_opts); -+ -+ if (!bch2_bkey_is_incompressible(k)) -+ m->op.compression_type = -+ bch2_compression_opt_to_type[io_opts.background_compression ?: -+ io_opts.compression]; -+ else -+ m->op.incompressible = true; -+ ++ m->op.pos = bkey_start_pos(k.k); ++ m->op.version = k.k->version; + m->op.target = data_opts.target, + m->op.write_point = wp; -+ -+ /* -+ * op->csum_type is normally initialized from the fs/file's current -+ * options - but if an extent is encrypted, we require that it stays -+ * encrypted: -+ */ -+ bkey_for_each_crc(k.k, ptrs, crc, entry) -+ if (bch2_csum_type_is_encryption(crc.csum_type)) { -+ m->op.nonce = crc.nonce + crc.offset; -+ m->op.csum_type = crc.csum_type; -+ break; -+ } -+ -+ if (m->data_opts.btree_insert_flags & BTREE_INSERT_USE_RESERVE) { -+ m->op.alloc_reserve = RESERVE_movinggc; -+ } else { -+ /* XXX: this should probably be passed in */ -+ m->op.flags |= BCH_WRITE_ONLY_SPECIFIED_DEVS; -+ } -+ -+ m->op.flags |= BCH_WRITE_PAGES_STABLE| ++ m->op.flags |= BCH_WRITE_PAGES_STABLE| + BCH_WRITE_PAGES_OWNED| + BCH_WRITE_DATA_ENCODED| -+ BCH_WRITE_FROM_INTERNAL; -+ -+ m->op.nr_replicas = data_opts.nr_replicas; -+ m->op.nr_replicas_required = data_opts.nr_replicas; ++ BCH_WRITE_FROM_INTERNAL| ++ m->data_opts.write_flags; ++ m->op.compression_type = ++ bch2_compression_opt_to_type[io_opts.background_compression ?: ++ io_opts.compression]; ++ if (m->data_opts.btree_insert_flags & BTREE_INSERT_USE_RESERVE) ++ m->op.alloc_reserve = RESERVE_movinggc; + m->op.index_update_fn = bch2_data_update_index_update; + -+ switch (data_cmd) { -+ case DATA_ADD_REPLICAS: { ++ i = 0; ++ bkey_for_each_ptr_decode(k.k, ptrs, p, entry) { ++ if (p.ptr.cached) ++ m->data_opts.rewrite_ptrs &= ~(1U << i); ++ ++ if (!((1U << i) & m->data_opts.rewrite_ptrs)) ++ bch2_dev_list_add_dev(&m->op.devs_have, p.ptr.dev); ++ ++ if (((1U << i) & m->data_opts.rewrite_ptrs) && ++ crc_is_compressed(p.crc)) ++ reserve_sectors += k.k->size; ++ + /* -+ * DATA_ADD_REPLICAS is used for moving data to a different -+ * device in the background, and due to compression the new copy -+ * might take up more space than the old copy: ++ * op->csum_type is normally initialized from the fs/file's ++ * current options - but if an extent is encrypted, we require ++ * that it stays encrypted: + */ -+#if 0 -+ int nr = (int) io_opts.data_replicas - -+ bch2_bkey_nr_ptrs_allocated(k); -+#endif -+ int nr = (int) io_opts.data_replicas; -+ -+ if (nr > 0) { -+ m->op.nr_replicas = m->nr_ptrs_reserved = nr; -+ -+ ret = bch2_disk_reservation_get(c, &m->op.res, -+ k.k->size, m->op.nr_replicas, 0); -+ if (ret) -+ return ret; ++ if (bch2_csum_type_is_encryption(p.crc.csum_type)) { ++ m->op.nonce = p.crc.nonce + p.crc.offset; ++ m->op.csum_type = p.crc.csum_type; + } -+ break; -+ } -+ case DATA_REWRITE: { -+ unsigned compressed_sectors = 0; + -+ bkey_for_each_ptr_decode(k.k, ptrs, p, entry) -+ if (p.ptr.dev == data_opts.rewrite_dev) { -+ if (p.ptr.cached) -+ m->op.flags |= BCH_WRITE_CACHED; ++ if (p.crc.compression_type == BCH_COMPRESSION_TYPE_incompressible) ++ m->op.incompressible = true; + -+ if (!p.ptr.cached && -+ crc_is_compressed(p.crc)) -+ compressed_sectors += p.crc.compressed_size; -+ } -+ -+ if (compressed_sectors) { -+ ret = bch2_disk_reservation_add(c, &m->op.res, -+ k.k->size * m->op.nr_replicas, -+ BCH_DISK_RESERVATION_NOFAIL); -+ if (ret) -+ return ret; -+ } -+ break; -+ } -+ case DATA_PROMOTE: -+ m->op.flags |= BCH_WRITE_ALLOC_NOWAIT; -+ m->op.flags |= BCH_WRITE_CACHED; -+ break; -+ default: -+ BUG(); ++ i++; + } + ++ if (reserve_sectors) { ++ ret = bch2_disk_reservation_add(c, &m->op.res, reserve_sectors, ++ m->data_opts.extra_replicas ++ ? 0 ++ : BCH_DISK_RESERVATION_NOFAIL); ++ if (ret) ++ return ret; ++ } ++ ++ m->op.nr_replicas = m->op.nr_replicas_required = ++ hweight32(m->data_opts.rewrite_ptrs) + m->data_opts.extra_replicas; + return 0; +} diff --git a/fs/bcachefs/data_update.h b/fs/bcachefs/data_update.h new file mode 100644 -index 000000000000..81388a442a95 +index 000000000..e64505453 --- /dev/null +++ b/fs/bcachefs/data_update.h -@@ -0,0 +1,46 @@ +@@ -0,0 +1,38 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef _BCACHEFS_DATA_UPDATE_H +#define _BCACHEFS_DATA_UPDATE_H + ++#include "bkey_buf.h" +#include "io_types.h" + -+enum data_cmd { -+ DATA_SKIP, -+ DATA_SCRUB, -+ DATA_ADD_REPLICAS, -+ DATA_REWRITE, -+ DATA_PROMOTE, -+}; ++struct moving_context; + -+struct data_opts { ++struct data_update_opts { ++ unsigned rewrite_ptrs; + u16 target; -+ u8 rewrite_dev; -+ u8 nr_replicas; -+ int btree_insert_flags; ++ u8 extra_replicas; ++ unsigned btree_insert_flags; ++ unsigned write_flags; +}; + +struct data_update { ++ /* extent being updated: */ + enum btree_id btree_id; -+ enum data_cmd data_cmd; -+ struct data_opts data_opts; -+ -+ unsigned nr_ptrs_reserved; -+ ++ struct bkey_buf k; ++ struct data_update_opts data_opts; + struct moving_context *ctxt; -+ -+ /* what we read: */ -+ struct bch_extent_ptr ptr; -+ u64 offset; -+ + struct bch_write_op op; +}; + -+void bch2_data_update_read_done(struct data_update *, struct bch_read_bio *); ++void bch2_data_update_read_done(struct data_update *, ++ struct bch_extent_crc_unpacked, ++ struct closure *); ++ ++void bch2_data_update_exit(struct data_update *); +int bch2_data_update_init(struct bch_fs *, struct data_update *, + struct write_point_specifier, -+ struct bch_io_opts, -+ enum data_cmd, struct data_opts, ++ struct bch_io_opts, struct data_update_opts, + enum btree_id, struct bkey_s_c); + +#endif /* _BCACHEFS_DATA_UPDATE_H */ diff --git a/fs/bcachefs/debug.c b/fs/bcachefs/debug.c new file mode 100644 -index 000000000000..bdc50d5ba3a2 +index 000000000..05cae0ed4 --- /dev/null +++ b/fs/bcachefs/debug.c -@@ -0,0 +1,634 @@ +@@ -0,0 +1,707 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Assorted bcachefs debug code @@ -37571,6 +37415,76 @@ index 000000000000..bdc50d5ba3a2 + .read = bch2_cached_btree_nodes_read, +}; + ++static int prt_backtrace(struct printbuf *out, struct task_struct *task) ++{ ++ unsigned long entries[32]; ++ unsigned i, nr_entries; ++ int ret; ++ ++ ret = down_read_killable(&task->signal->exec_update_lock); ++ if (ret) ++ return ret; ++ ++ nr_entries = stack_trace_save_tsk(task, entries, ARRAY_SIZE(entries), 0); ++ for (i = 0; i < nr_entries; i++) { ++ prt_printf(out, "[<0>] %pB", (void *)entries[i]); ++ prt_newline(out); ++ } ++ ++ up_read(&task->signal->exec_update_lock); ++ return 0; ++} ++ ++static ssize_t bch2_btree_transactions_read(struct file *file, char __user *buf, ++ size_t size, loff_t *ppos) ++{ ++ struct dump_iter *i = file->private_data; ++ struct bch_fs *c = i->c; ++ struct btree_trans *trans; ++ int err; ++ ++ i->ubuf = buf; ++ i->size = size; ++ i->ret = 0; ++ ++ mutex_lock(&c->btree_trans_lock); ++ list_for_each_entry(trans, &c->btree_trans_list, list) { ++ if (trans->task->pid <= i->iter) ++ continue; ++ ++ err = flush_buf(i); ++ if (err) ++ return err; ++ ++ if (!i->size) ++ break; ++ ++ bch2_btree_trans_to_text(&i->buf, trans); ++ ++ prt_printf(&i->buf, "backtrace:"); ++ prt_newline(&i->buf); ++ printbuf_indent_add(&i->buf, 2); ++ prt_backtrace(&i->buf, trans->task); ++ printbuf_indent_sub(&i->buf, 2); ++ prt_newline(&i->buf); ++ ++ i->iter = trans->task->pid; ++ } ++ mutex_unlock(&c->btree_trans_lock); ++ ++ if (i->buf.allocation_failure) ++ return -ENOMEM; ++ ++ return i->ret; ++} ++ ++static const struct file_operations btree_transactions_ops = { ++ .owner = THIS_MODULE, ++ .open = bch2_dump_open, ++ .release = bch2_dump_release, ++ .read = bch2_btree_transactions_read, ++}; ++ +static ssize_t bch2_journal_pins_read(struct file *file, char __user *buf, + size_t size, loff_t *ppos) +{ @@ -37630,6 +37544,9 @@ index 000000000000..bdc50d5ba3a2 + debugfs_create_file("cached_btree_nodes", 0400, c->fs_debug_dir, + c->btree_debug, &cached_btree_nodes_ops); + ++ debugfs_create_file("btree_transactions", 0400, c->fs_debug_dir, ++ c->btree_debug, &btree_transactions_ops); ++ + debugfs_create_file("journal_pins", 0400, c->fs_debug_dir, + c->btree_debug, &journal_pins_ops); + @@ -37676,7 +37593,7 @@ index 000000000000..bdc50d5ba3a2 +} diff --git a/fs/bcachefs/debug.h b/fs/bcachefs/debug.h new file mode 100644 -index 000000000000..0b86736e5e1b +index 000000000..0b86736e5 --- /dev/null +++ b/fs/bcachefs/debug.h @@ -0,0 +1,30 @@ @@ -37712,7 +37629,7 @@ index 000000000000..0b86736e5e1b +#endif /* _BCACHEFS_DEBUG_H */ diff --git a/fs/bcachefs/dirent.c b/fs/bcachefs/dirent.c new file mode 100644 -index 000000000000..0cbb765cde54 +index 000000000..0cbb765cd --- /dev/null +++ b/fs/bcachefs/dirent.c @@ -0,0 +1,565 @@ @@ -38283,7 +38200,7 @@ index 000000000000..0cbb765cde54 +} diff --git a/fs/bcachefs/dirent.h b/fs/bcachefs/dirent.h new file mode 100644 -index 000000000000..b1466932c768 +index 000000000..b1466932c --- /dev/null +++ b/fs/bcachefs/dirent.h @@ -0,0 +1,67 @@ @@ -38356,7 +38273,7 @@ index 000000000000..b1466932c768 +#endif /* _BCACHEFS_DIRENT_H */ diff --git a/fs/bcachefs/disk_groups.c b/fs/bcachefs/disk_groups.c new file mode 100644 -index 000000000000..7bd4413671d2 +index 000000000..7bd441367 --- /dev/null +++ b/fs/bcachefs/disk_groups.c @@ -0,0 +1,506 @@ @@ -38868,7 +38785,7 @@ index 000000000000..7bd4413671d2 +} diff --git a/fs/bcachefs/disk_groups.h b/fs/bcachefs/disk_groups.h new file mode 100644 -index 000000000000..de915480514b +index 000000000..de9154805 --- /dev/null +++ b/fs/bcachefs/disk_groups.h @@ -0,0 +1,90 @@ @@ -38964,7 +38881,7 @@ index 000000000000..de915480514b +#endif /* _BCACHEFS_DISK_GROUPS_H */ diff --git a/fs/bcachefs/ec.c b/fs/bcachefs/ec.c new file mode 100644 -index 000000000000..faabaa64dcdb +index 000000000..6ce352c52 --- /dev/null +++ b/fs/bcachefs/ec.c @@ -0,0 +1,1695 @@ @@ -39909,7 +39826,7 @@ index 000000000000..faabaa64dcdb + + BUG_ON(!s->allocated); + -+ if (!percpu_ref_tryget(&c->writes)) ++ if (!percpu_ref_tryget_live(&c->writes)) + goto err; + + ec_generate_ec(&s->new_stripe); @@ -40665,7 +40582,7 @@ index 000000000000..faabaa64dcdb +} diff --git a/fs/bcachefs/ec.h b/fs/bcachefs/ec.h new file mode 100644 -index 000000000000..a4c13d61af10 +index 000000000..a4c13d61a --- /dev/null +++ b/fs/bcachefs/ec.h @@ -0,0 +1,230 @@ @@ -40901,7 +40818,7 @@ index 000000000000..a4c13d61af10 +#endif /* _BCACHEFS_EC_H */ diff --git a/fs/bcachefs/ec_types.h b/fs/bcachefs/ec_types.h new file mode 100644 -index 000000000000..edd93da663c1 +index 000000000..edd93da66 --- /dev/null +++ b/fs/bcachefs/ec_types.h @@ -0,0 +1,46 @@ @@ -40953,7 +40870,7 @@ index 000000000000..edd93da663c1 +#endif /* _BCACHEFS_EC_TYPES_H */ diff --git a/fs/bcachefs/errcode.h b/fs/bcachefs/errcode.h new file mode 100644 -index 000000000000..f7d12915c1cc +index 000000000..f7d12915c --- /dev/null +++ b/fs/bcachefs/errcode.h @@ -0,0 +1,12 @@ @@ -40971,7 +40888,7 @@ index 000000000000..f7d12915c1cc +#endif /* _BCACHFES_ERRCODE_H */ diff --git a/fs/bcachefs/error.c b/fs/bcachefs/error.c new file mode 100644 -index 000000000000..8279a9ba76a5 +index 000000000..8279a9ba7 --- /dev/null +++ b/fs/bcachefs/error.c @@ -0,0 +1,185 @@ @@ -41162,7 +41079,7 @@ index 000000000000..8279a9ba76a5 +} diff --git a/fs/bcachefs/error.h b/fs/bcachefs/error.h new file mode 100644 -index 000000000000..6e63c38186f3 +index 000000000..6e63c3818 --- /dev/null +++ b/fs/bcachefs/error.h @@ -0,0 +1,238 @@ @@ -41406,7 +41323,7 @@ index 000000000000..6e63c38186f3 +#endif /* _BCACHEFS_ERROR_H */ diff --git a/fs/bcachefs/extent_update.c b/fs/bcachefs/extent_update.c new file mode 100644 -index 000000000000..2fd5d9672a44 +index 000000000..2fd5d9672 --- /dev/null +++ b/fs/bcachefs/extent_update.c @@ -0,0 +1,178 @@ @@ -41590,7 +41507,7 @@ index 000000000000..2fd5d9672a44 +} diff --git a/fs/bcachefs/extent_update.h b/fs/bcachefs/extent_update.h new file mode 100644 -index 000000000000..6f5cf449361a +index 000000000..6f5cf4493 --- /dev/null +++ b/fs/bcachefs/extent_update.h @@ -0,0 +1,12 @@ @@ -41608,10 +41525,10 @@ index 000000000000..6f5cf449361a +#endif /* _BCACHEFS_EXTENT_UPDATE_H */ diff --git a/fs/bcachefs/extents.c b/fs/bcachefs/extents.c new file mode 100644 -index 000000000000..73d756a63572 +index 000000000..2ca13014b --- /dev/null +++ b/fs/bcachefs/extents.c -@@ -0,0 +1,1300 @@ +@@ -0,0 +1,1324 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2010 Kent Overstreet @@ -41640,6 +41557,8 @@ index 000000000000..73d756a63572 + +#include + ++static union bch_extent_entry *__bch2_bkey_drop_ptr(struct bkey_s, struct bch_extent_ptr *); ++ +static unsigned bch2_crc_field_size_max[] = { + [BCH_EXTENT_ENTRY_crc32] = CRC32_SIZE_MAX, + [BCH_EXTENT_ENTRY_crc64] = CRC64_SIZE_MAX, @@ -42302,37 +42221,6 @@ index 000000000000..73d756a63572 + return durability; +} + -+void bch2_bkey_mark_replicas_cached(struct bch_fs *c, struct bkey_s k, -+ unsigned target, -+ unsigned nr_desired_replicas) -+{ -+ struct bkey_ptrs ptrs = bch2_bkey_ptrs(k); -+ union bch_extent_entry *entry; -+ struct extent_ptr_decoded p; -+ int extra = bch2_bkey_durability(c, k.s_c) - nr_desired_replicas; -+ -+ if (target && extra > 0) -+ bkey_for_each_ptr_decode(k.k, ptrs, p, entry) { -+ int n = bch2_extent_ptr_durability(c, p); -+ -+ if (n && n <= extra && -+ !bch2_dev_in_target(c, p.ptr.dev, target)) { -+ entry->ptr.cached = true; -+ extra -= n; -+ } -+ } -+ -+ if (extra > 0) -+ bkey_for_each_ptr_decode(k.k, ptrs, p, entry) { -+ int n = bch2_extent_ptr_durability(c, p); -+ -+ if (n && n <= extra) { -+ entry->ptr.cached = true; -+ extra -= n; -+ } -+ } -+} -+ +void bch2_bkey_extent_entry_drop(struct bkey_i *k, union bch_extent_entry *entry) +{ + union bch_extent_entry *end = bkey_val_end(bkey_i_to_s(k)); @@ -42436,8 +42324,8 @@ index 000000000000..73d756a63572 +/* + * Returns pointer to the next entry after the one being dropped: + */ -+union bch_extent_entry *__bch2_bkey_drop_ptr(struct bkey_s k, -+ struct bch_extent_ptr *ptr) ++static union bch_extent_entry *__bch2_bkey_drop_ptr(struct bkey_s k, ++ struct bch_extent_ptr *ptr) +{ + struct bkey_ptrs ptrs = bch2_bkey_ptrs(k); + union bch_extent_entry *entry = to_entry(ptr), *next; @@ -42509,6 +42397,14 @@ index 000000000000..73d756a63572 + bch2_bkey_drop_ptrs(k, ptr, ptr->dev == dev); +} + ++void bch2_bkey_drop_device_noerror(struct bkey_s k, unsigned dev) ++{ ++ struct bch_extent_ptr *ptr = (void *) bch2_bkey_has_device(k.s_c, dev); ++ ++ if (ptr) ++ __bch2_bkey_drop_ptr(k, ptr); ++} ++ +const struct bch_extent_ptr * +bch2_bkey_has_device(struct bkey_s_c k, unsigned dev) +{ @@ -42554,6 +42450,44 @@ index 000000000000..73d756a63572 +} + +/* ++ * Returns true if two extents refer to the same data: ++ */ ++bool bch2_extents_match(struct bkey_s_c k1, struct bkey_s_c k2) ++{ ++ struct bkey_ptrs_c ptrs1 = bch2_bkey_ptrs_c(k1); ++ struct bkey_ptrs_c ptrs2 = bch2_bkey_ptrs_c(k2); ++ const union bch_extent_entry *entry1, *entry2; ++ struct extent_ptr_decoded p1, p2; ++ ++ bkey_for_each_ptr_decode(k1.k, ptrs1, p1, entry1) ++ bkey_for_each_ptr_decode(k2.k, ptrs2, p2, entry2) ++ if (p1.ptr.dev == p2.ptr.dev && ++ p1.ptr.gen == p2.ptr.gen && ++ (s64) p1.ptr.offset + p1.crc.offset - bkey_start_offset(k1.k) == ++ (s64) p2.ptr.offset + p2.crc.offset - bkey_start_offset(k2.k)) ++ return true; ++ ++ return false; ++} ++ ++bool bch2_extent_has_ptr(struct bkey_s_c k1, struct extent_ptr_decoded p1, ++ struct bkey_s_c k2) ++{ ++ struct bkey_ptrs_c ptrs2 = bch2_bkey_ptrs_c(k2); ++ const union bch_extent_entry *entry2; ++ struct extent_ptr_decoded p2; ++ ++ bkey_for_each_ptr_decode(k2.k, ptrs2, p2, entry2) ++ if (p1.ptr.dev == p2.ptr.dev && ++ p1.ptr.gen == p2.ptr.gen && ++ (s64) p1.ptr.offset + p1.crc.offset - bkey_start_offset(k1.k) == ++ (s64) p2.ptr.offset + p2.crc.offset - bkey_start_offset(k2.k)) ++ return true; ++ ++ return false; ++} ++ ++/* + * bch_extent_normalize - clean up an extent, dropping stale pointers etc. + * + * Returns true if @k should be dropped entirely @@ -42693,6 +42627,7 @@ index 000000000000..73d756a63572 + struct bch_extent_crc_unpacked crc; + unsigned size_ondisk = k.k->size; + unsigned nonce = UINT_MAX; ++ unsigned nr_ptrs = 0; + int ret; + + if (bkey_is_btree_ptr(k.k)) @@ -42717,6 +42652,7 @@ index 000000000000..73d756a63572 + false, err); + if (ret) + return ret; ++ nr_ptrs++; + break; + case BCH_EXTENT_ENTRY_crc32: + case BCH_EXTENT_ENTRY_crc64: @@ -42755,6 +42691,11 @@ index 000000000000..73d756a63572 + } + } + ++ if (nr_ptrs >= BCH_BKEY_PTRS_MAX) { ++ prt_str(err, "too many ptrs"); ++ return -EINVAL; ++ } ++ + return 0; +} + @@ -42914,10 +42855,10 @@ index 000000000000..73d756a63572 +} diff --git a/fs/bcachefs/extents.h b/fs/bcachefs/extents.h new file mode 100644 -index 000000000000..4f41f0fd6cb1 +index 000000000..3c17b8113 --- /dev/null +++ b/fs/bcachefs/extents.h -@@ -0,0 +1,687 @@ +@@ -0,0 +1,685 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _BCACHEFS_EXTENTS_H +#define _BCACHEFS_EXTENTS_H @@ -43497,15 +43438,10 @@ index 000000000000..4f41f0fd6cb1 +unsigned bch2_bkey_replicas(struct bch_fs *, struct bkey_s_c); +unsigned bch2_bkey_durability(struct bch_fs *, struct bkey_s_c); + -+void bch2_bkey_mark_replicas_cached(struct bch_fs *, struct bkey_s, -+ unsigned, unsigned); -+ +void bch2_bkey_extent_entry_drop(struct bkey_i *, union bch_extent_entry *); +void bch2_bkey_append_ptr(struct bkey_i *, struct bch_extent_ptr); +void bch2_extent_ptr_decoded_append(struct bkey_i *, + struct extent_ptr_decoded *); -+union bch_extent_entry *__bch2_bkey_drop_ptr(struct bkey_s, -+ struct bch_extent_ptr *); +union bch_extent_entry *bch2_bkey_drop_ptr(struct bkey_s, + struct bch_extent_ptr *); + @@ -43527,11 +43463,14 @@ index 000000000000..4f41f0fd6cb1 +} while (0) + +void bch2_bkey_drop_device(struct bkey_s, unsigned); ++void bch2_bkey_drop_device_noerror(struct bkey_s, unsigned); +const struct bch_extent_ptr *bch2_bkey_has_device(struct bkey_s_c, unsigned); +bool bch2_bkey_has_target(struct bch_fs *, struct bkey_s_c, unsigned); + +bool bch2_bkey_matches_ptr(struct bch_fs *, struct bkey_s_c, + struct bch_extent_ptr, u64); ++bool bch2_extents_match(struct bkey_s_c, struct bkey_s_c); ++bool bch2_extent_has_ptr(struct bkey_s_c, struct extent_ptr_decoded, struct bkey_s_c); + +bool bch2_extent_normalize(struct bch_fs *, struct bkey_s); +void bch2_bkey_ptrs_to_text(struct printbuf *, struct bch_fs *, @@ -43607,7 +43546,7 @@ index 000000000000..4f41f0fd6cb1 +#endif /* _BCACHEFS_EXTENTS_H */ diff --git a/fs/bcachefs/extents_types.h b/fs/bcachefs/extents_types.h new file mode 100644 -index 000000000000..43d6c341ecca +index 000000000..43d6c341e --- /dev/null +++ b/fs/bcachefs/extents_types.h @@ -0,0 +1,40 @@ @@ -43653,7 +43592,7 @@ index 000000000000..43d6c341ecca +#endif /* _BCACHEFS_EXTENTS_TYPES_H */ diff --git a/fs/bcachefs/eytzinger.h b/fs/bcachefs/eytzinger.h new file mode 100644 -index 000000000000..05429c9631cd +index 000000000..05429c963 --- /dev/null +++ b/fs/bcachefs/eytzinger.h @@ -0,0 +1,281 @@ @@ -43940,7 +43879,7 @@ index 000000000000..05429c9631cd +#endif /* _EYTZINGER_H */ diff --git a/fs/bcachefs/fifo.h b/fs/bcachefs/fifo.h new file mode 100644 -index 000000000000..cdb272708a4b +index 000000000..cdb272708 --- /dev/null +++ b/fs/bcachefs/fifo.h @@ -0,0 +1,127 @@ @@ -44073,10 +44012,10 @@ index 000000000000..cdb272708a4b +#endif /* _BCACHEFS_FIFO_H */ diff --git a/fs/bcachefs/fs-common.c b/fs/bcachefs/fs-common.c new file mode 100644 -index 000000000000..d543480be111 +index 000000000..53ffc6842 --- /dev/null +++ b/fs/bcachefs/fs-common.c -@@ -0,0 +1,494 @@ +@@ -0,0 +1,496 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include "bcachefs.h" @@ -44283,7 +44222,9 @@ index 000000000000..d543480be111 + goto err; + + inode_u->bi_ctime = now; -+ bch2_inode_nlink_inc(inode_u); ++ ret = bch2_inode_nlink_inc(inode_u); ++ if (ret) ++ return ret; + + ret = bch2_inode_peek(trans, &dir_iter, dir_u, dir, BTREE_ITER_INTENT); + if (ret) @@ -44376,7 +44317,7 @@ index 000000000000..d543480be111 + if (ret) + goto err; + } else { -+ bch2_inode_nlink_dec(inode_u); ++ bch2_inode_nlink_dec(trans, inode_u); + } + + if (inode_u->bi_dir == dirent_iter.pos.inode && @@ -44541,7 +44482,7 @@ index 000000000000..d543480be111 + } + + if (mode == BCH_RENAME_OVERWRITE) -+ bch2_inode_nlink_dec(dst_inode_u); ++ bch2_inode_nlink_dec(trans, dst_inode_u); + + src_dir_u->bi_mtime = now; + src_dir_u->bi_ctime = now; @@ -44573,7 +44514,7 @@ index 000000000000..d543480be111 +} diff --git a/fs/bcachefs/fs-common.h b/fs/bcachefs/fs-common.h new file mode 100644 -index 000000000000..dde237859514 +index 000000000..dde237859 --- /dev/null +++ b/fs/bcachefs/fs-common.h @@ -0,0 +1,43 @@ @@ -44622,7 +44563,7 @@ index 000000000000..dde237859514 +#endif /* _BCACHEFS_FS_COMMON_H */ diff --git a/fs/bcachefs/fs-io.c b/fs/bcachefs/fs-io.c new file mode 100644 -index 000000000000..06f3e263a13b +index 000000000..bcfd9e5f3 --- /dev/null +++ b/fs/bcachefs/fs-io.c @@ -0,0 +1,3496 @@ @@ -47755,7 +47696,7 @@ index 000000000000..06f3e263a13b + struct bch_fs *c = inode->v.i_sb->s_fs_info; + long ret; + -+ if (!percpu_ref_tryget(&c->writes)) ++ if (!percpu_ref_tryget_live(&c->writes)) + return -EROFS; + + inode_lock(&inode->v); @@ -48124,7 +48065,7 @@ index 000000000000..06f3e263a13b +#endif /* NO_BCACHEFS_FS */ diff --git a/fs/bcachefs/fs-io.h b/fs/bcachefs/fs-io.h new file mode 100644 -index 000000000000..7f2d7f454be4 +index 000000000..7f2d7f454 --- /dev/null +++ b/fs/bcachefs/fs-io.h @@ -0,0 +1,56 @@ @@ -48186,7 +48127,7 @@ index 000000000000..7f2d7f454be4 +#endif /* _BCACHEFS_FS_IO_H */ diff --git a/fs/bcachefs/fs-ioctl.c b/fs/bcachefs/fs-ioctl.c new file mode 100644 -index 000000000000..9f329a624c12 +index 000000000..9f329a624 --- /dev/null +++ b/fs/bcachefs/fs-ioctl.c @@ -0,0 +1,523 @@ @@ -48715,7 +48656,7 @@ index 000000000000..9f329a624c12 +#endif /* NO_BCACHEFS_FS */ diff --git a/fs/bcachefs/fs-ioctl.h b/fs/bcachefs/fs-ioctl.h new file mode 100644 -index 000000000000..f201980ef2c3 +index 000000000..f201980ef --- /dev/null +++ b/fs/bcachefs/fs-ioctl.h @@ -0,0 +1,81 @@ @@ -48802,7 +48743,7 @@ index 000000000000..f201980ef2c3 +#endif /* _BCACHEFS_FS_IOCTL_H */ diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c new file mode 100644 -index 000000000000..bb94ba58a796 +index 000000000..bb94ba58a --- /dev/null +++ b/fs/bcachefs/fs.c @@ -0,0 +1,1939 @@ @@ -50747,7 +50688,7 @@ index 000000000000..bb94ba58a796 +#endif /* NO_BCACHEFS_FS */ diff --git a/fs/bcachefs/fs.h b/fs/bcachefs/fs.h new file mode 100644 -index 000000000000..9f4b57e30e2a +index 000000000..9f4b57e30 --- /dev/null +++ b/fs/bcachefs/fs.h @@ -0,0 +1,208 @@ @@ -50961,7 +50902,7 @@ index 000000000000..9f4b57e30e2a +#endif /* _BCACHEFS_FS_H */ diff --git a/fs/bcachefs/fsck.c b/fs/bcachefs/fsck.c new file mode 100644 -index 000000000000..81bfd6ea273e +index 000000000..81bfd6ea2 --- /dev/null +++ b/fs/bcachefs/fsck.c @@ -0,0 +1,2413 @@ @@ -53380,7 +53321,7 @@ index 000000000000..81bfd6ea273e +} diff --git a/fs/bcachefs/fsck.h b/fs/bcachefs/fsck.h new file mode 100644 -index 000000000000..264f2706b12d +index 000000000..264f2706b --- /dev/null +++ b/fs/bcachefs/fsck.h @@ -0,0 +1,8 @@ @@ -53394,10 +53335,10 @@ index 000000000000..264f2706b12d +#endif /* _BCACHEFS_FSCK_H */ diff --git a/fs/bcachefs/inode.c b/fs/bcachefs/inode.c new file mode 100644 -index 000000000000..88d83d91546a +index 000000000..6a2b94908 --- /dev/null +++ b/fs/bcachefs/inode.c -@@ -0,0 +1,738 @@ +@@ -0,0 +1,771 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include "bcachefs.h" @@ -54136,12 +54077,45 @@ index 000000000000..88d83d91546a + return bch2_trans_do(c, NULL, NULL, 0, + bch2_inode_find_by_inum_trans(&trans, inum, inode)); +} ++ ++int bch2_inode_nlink_inc(struct bch_inode_unpacked *bi) ++{ ++ if (bi->bi_flags & BCH_INODE_UNLINKED) ++ bi->bi_flags &= ~BCH_INODE_UNLINKED; ++ else { ++ if (bi->bi_nlink == U32_MAX) ++ return -EINVAL; ++ ++ bi->bi_nlink++; ++ } ++ ++ return 0; ++} ++ ++void bch2_inode_nlink_dec(struct btree_trans *trans, struct bch_inode_unpacked *bi) ++{ ++ if (bi->bi_nlink && (bi->bi_flags & BCH_INODE_UNLINKED)) { ++ bch2_trans_inconsistent(trans, "inode %llu unlinked but link count nonzero", ++ bi->bi_inum); ++ return; ++ } ++ ++ if (bi->bi_flags & BCH_INODE_UNLINKED) { ++ bch2_trans_inconsistent(trans, "inode %llu link count underflow", bi->bi_inum); ++ return; ++ } ++ ++ if (bi->bi_nlink) ++ bi->bi_nlink--; ++ else ++ bi->bi_flags |= BCH_INODE_UNLINKED; ++} diff --git a/fs/bcachefs/inode.h b/fs/bcachefs/inode.h new file mode 100644 -index 000000000000..9442600a7440 +index 000000000..2ac2fc105 --- /dev/null +++ b/fs/bcachefs/inode.h -@@ -0,0 +1,203 @@ +@@ -0,0 +1,189 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _BCACHEFS_INODE_H +#define _BCACHEFS_INODE_H @@ -54308,23 +54282,6 @@ index 000000000000..9442600a7440 + return S_ISDIR(mode) ? 2 : 1; +} + -+static inline void bch2_inode_nlink_inc(struct bch_inode_unpacked *bi) -+{ -+ if (bi->bi_flags & BCH_INODE_UNLINKED) -+ bi->bi_flags &= ~BCH_INODE_UNLINKED; -+ else -+ bi->bi_nlink++; -+} -+ -+static inline void bch2_inode_nlink_dec(struct bch_inode_unpacked *bi) -+{ -+ BUG_ON(bi->bi_flags & BCH_INODE_UNLINKED); -+ if (bi->bi_nlink) -+ bi->bi_nlink--; -+ else -+ bi->bi_flags |= BCH_INODE_UNLINKED; -+} -+ +static inline unsigned bch2_inode_nlink_get(struct bch_inode_unpacked *bi) +{ + return bi->bi_flags & BCH_INODE_UNLINKED @@ -54344,13 +54301,16 @@ index 000000000000..9442600a7440 + } +} + ++int bch2_inode_nlink_inc(struct bch_inode_unpacked *); ++void bch2_inode_nlink_dec(struct btree_trans *, struct bch_inode_unpacked *); ++ +#endif /* _BCACHEFS_INODE_H */ diff --git a/fs/bcachefs/io.c b/fs/bcachefs/io.c new file mode 100644 -index 000000000000..7756d4b36ccf +index 000000000..50fa57234 --- /dev/null +++ b/fs/bcachefs/io.c -@@ -0,0 +1,2427 @@ +@@ -0,0 +1,2417 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Some low level IO code, and hacks for various block layer limitations @@ -55396,8 +55356,7 @@ index 000000000000..7756d4b36ccf + *_dst = dst; + return more; +csum_err: -+ bch_err(c, "error verifying existing checksum while " -+ "rewriting existing data (memory corruption?)"); ++ bch_err(c, "error verifying existing checksum while rewriting existing data (memory corruption?)"); + ret = -EIO; +err: + if (to_wbio(dst)->bounce) @@ -55438,12 +55397,6 @@ index 000000000000..7756d4b36ccf + BKEY_EXTENT_U64s_MAX)) + goto flush_io; + -+ if ((op->flags & BCH_WRITE_FROM_INTERNAL) && -+ percpu_ref_is_dying(&c->writes)) { -+ ret = -EROFS; -+ goto err; -+ } -+ + /* + * The copygc thread is now global, which means it's no longer + * freeing up space on specific disks, which means that @@ -55637,7 +55590,7 @@ index 000000000000..7756d4b36ccf + } + + if (c->opts.nochanges || -+ !percpu_ref_tryget(&c->writes)) { ++ !percpu_ref_tryget_live(&c->writes)) { + op->error = -EROFS; + goto err; + } @@ -55734,13 +55687,12 @@ index 000000000000..7756d4b36ccf + bch2_time_stats_update(&c->times[BCH_TIME_data_promote], + op->start_time); + -+ bch2_bio_free_pages_pool(c, &op->write.op.wbio.bio); ++ bch2_data_update_exit(&op->write); + promote_free(c, op); +} + +static void promote_start(struct promote_op *op, struct bch_read_bio *rbio) +{ -+ struct bch_fs *c = rbio->c; + struct closure *cl = &op->cl; + struct bio *bio = &op->write.op.wbio.bio; + @@ -55754,10 +55706,8 @@ index 000000000000..7756d4b36ccf + sizeof(struct bio_vec) * rbio->bio.bi_vcnt); + swap(bio->bi_vcnt, rbio->bio.bi_vcnt); + -+ bch2_data_update_read_done(&op->write, rbio); -+ + closure_init(cl, NULL); -+ closure_call(&op->write.op.cl, bch2_write, c->btree_update_wq, cl); ++ bch2_data_update_read_done(&op->write, rbio->pick.crc, cl); + closure_return_with_destructor(cl, promote_done); +} + @@ -55775,7 +55725,7 @@ index 000000000000..7756d4b36ccf + unsigned pages = DIV_ROUND_UP(sectors, PAGE_SECTORS); + int ret; + -+ if (!percpu_ref_tryget(&c->writes)) ++ if (!percpu_ref_tryget_live(&c->writes)) + return NULL; + + op = kzalloc(sizeof(*op) + sizeof(struct bio_vec) * pages, GFP_NOIO); @@ -55816,10 +55766,10 @@ index 000000000000..7756d4b36ccf + ret = bch2_data_update_init(c, &op->write, + writepoint_hashed((unsigned long) current), + opts, -+ DATA_PROMOTE, -+ (struct data_opts) { ++ (struct data_update_opts) { + .target = opts.promote_target, -+ .nr_replicas = 1, ++ .extra_replicas = 1, ++ .write_flags = BCH_WRITE_ALLOC_NOWAIT|BCH_WRITE_CACHED, + }, + btree_id, k); + BUG_ON(ret); @@ -56223,9 +56173,9 @@ index 000000000000..7756d4b36ccf + } + + bch2_dev_inum_io_error(ca, rbio->read_pos.inode, (u64) rbio->bvec_iter.bi_sector, -+ "data checksum error: expected %0llx:%0llx got %0llx:%0llx (type %u)", ++ "data checksum error: expected %0llx:%0llx got %0llx:%0llx (type %s)", + rbio->pick.crc.csum.hi, rbio->pick.crc.csum.lo, -+ csum.hi, csum.lo, crc.csum_type); ++ csum.hi, csum.lo, bch2_csum_types[crc.csum_type]); + bch2_rbio_error(rbio, READ_RETRY_AVOID, BLK_STS_IOERR); + goto out; +decompression_err: @@ -56780,7 +56730,7 @@ index 000000000000..7756d4b36ccf +} diff --git a/fs/bcachefs/io.h b/fs/bcachefs/io.h new file mode 100644 -index 000000000000..fb5114518666 +index 000000000..fb5114518 --- /dev/null +++ b/fs/bcachefs/io.h @@ -0,0 +1,189 @@ @@ -56975,7 +56925,7 @@ index 000000000000..fb5114518666 +#endif /* _BCACHEFS_IO_H */ diff --git a/fs/bcachefs/io_types.h b/fs/bcachefs/io_types.h new file mode 100644 -index 000000000000..78bff13d36f2 +index 000000000..78bff13d3 --- /dev/null +++ b/fs/bcachefs/io_types.h @@ -0,0 +1,161 @@ @@ -57142,7 +57092,7 @@ index 000000000000..78bff13d36f2 +#endif /* _BCACHEFS_IO_TYPES_H */ diff --git a/fs/bcachefs/journal.c b/fs/bcachefs/journal.c new file mode 100644 -index 000000000000..b561ed787493 +index 000000000..b561ed787 --- /dev/null +++ b/fs/bcachefs/journal.c @@ -0,0 +1,1429 @@ @@ -58577,7 +58527,7 @@ index 000000000000..b561ed787493 +} diff --git a/fs/bcachefs/journal.h b/fs/bcachefs/journal.h new file mode 100644 -index 000000000000..d3caa7ea7ce9 +index 000000000..d3caa7ea7 --- /dev/null +++ b/fs/bcachefs/journal.h @@ -0,0 +1,521 @@ @@ -59104,7 +59054,7 @@ index 000000000000..d3caa7ea7ce9 +#endif /* _BCACHEFS_JOURNAL_H */ diff --git a/fs/bcachefs/journal_io.c b/fs/bcachefs/journal_io.c new file mode 100644 -index 000000000000..0ff78a274d4c +index 000000000..0ff78a274 --- /dev/null +++ b/fs/bcachefs/journal_io.c @@ -0,0 +1,1735 @@ @@ -60845,7 +60795,7 @@ index 000000000000..0ff78a274d4c +} diff --git a/fs/bcachefs/journal_io.h b/fs/bcachefs/journal_io.h new file mode 100644 -index 000000000000..30e995c81fc4 +index 000000000..30e995c81 --- /dev/null +++ b/fs/bcachefs/journal_io.h @@ -0,0 +1,59 @@ @@ -60910,7 +60860,7 @@ index 000000000000..30e995c81fc4 +#endif /* _BCACHEFS_JOURNAL_IO_H */ diff --git a/fs/bcachefs/journal_reclaim.c b/fs/bcachefs/journal_reclaim.c new file mode 100644 -index 000000000000..fdc94e831a86 +index 000000000..fdc94e831 --- /dev/null +++ b/fs/bcachefs/journal_reclaim.c @@ -0,0 +1,849 @@ @@ -61765,7 +61715,7 @@ index 000000000000..fdc94e831a86 +} diff --git a/fs/bcachefs/journal_reclaim.h b/fs/bcachefs/journal_reclaim.h new file mode 100644 -index 000000000000..0fd1af120db5 +index 000000000..0fd1af120 --- /dev/null +++ b/fs/bcachefs/journal_reclaim.h @@ -0,0 +1,86 @@ @@ -61857,7 +61807,7 @@ index 000000000000..0fd1af120db5 +#endif /* _BCACHEFS_JOURNAL_RECLAIM_H */ diff --git a/fs/bcachefs/journal_sb.c b/fs/bcachefs/journal_sb.c new file mode 100644 -index 000000000000..001cecec1291 +index 000000000..001cecec1 --- /dev/null +++ b/fs/bcachefs/journal_sb.c @@ -0,0 +1,220 @@ @@ -62083,7 +62033,7 @@ index 000000000000..001cecec1291 +} diff --git a/fs/bcachefs/journal_sb.h b/fs/bcachefs/journal_sb.h new file mode 100644 -index 000000000000..a39192e9f6f4 +index 000000000..a39192e9f --- /dev/null +++ b/fs/bcachefs/journal_sb.h @@ -0,0 +1,24 @@ @@ -62113,7 +62063,7 @@ index 000000000000..a39192e9f6f4 +int bch2_journal_buckets_to_sb(struct bch_fs *, struct bch_dev *); diff --git a/fs/bcachefs/journal_seq_blacklist.c b/fs/bcachefs/journal_seq_blacklist.c new file mode 100644 -index 000000000000..d9b4042a2e4a +index 000000000..d9b4042a2 --- /dev/null +++ b/fs/bcachefs/journal_seq_blacklist.c @@ -0,0 +1,322 @@ @@ -62441,7 +62391,7 @@ index 000000000000..d9b4042a2e4a +} diff --git a/fs/bcachefs/journal_seq_blacklist.h b/fs/bcachefs/journal_seq_blacklist.h new file mode 100644 -index 000000000000..afb886ec8e25 +index 000000000..afb886ec8 --- /dev/null +++ b/fs/bcachefs/journal_seq_blacklist.h @@ -0,0 +1,22 @@ @@ -62469,7 +62419,7 @@ index 000000000000..afb886ec8e25 +#endif /* _BCACHEFS_JOURNAL_SEQ_BLACKLIST_H */ diff --git a/fs/bcachefs/journal_types.h b/fs/bcachefs/journal_types.h new file mode 100644 -index 000000000000..a6cdb885ad41 +index 000000000..a6cdb885a --- /dev/null +++ b/fs/bcachefs/journal_types.h @@ -0,0 +1,340 @@ @@ -62815,7 +62765,7 @@ index 000000000000..a6cdb885ad41 +#endif /* _BCACHEFS_JOURNAL_TYPES_H */ diff --git a/fs/bcachefs/keylist.c b/fs/bcachefs/keylist.c new file mode 100644 -index 000000000000..cda77835b9ea +index 000000000..cda77835b --- /dev/null +++ b/fs/bcachefs/keylist.c @@ -0,0 +1,67 @@ @@ -62888,7 +62838,7 @@ index 000000000000..cda77835b9ea +#endif diff --git a/fs/bcachefs/keylist.h b/fs/bcachefs/keylist.h new file mode 100644 -index 000000000000..195799bb20bc +index 000000000..195799bb2 --- /dev/null +++ b/fs/bcachefs/keylist.h @@ -0,0 +1,76 @@ @@ -62970,7 +62920,7 @@ index 000000000000..195799bb20bc +#endif /* _BCACHEFS_KEYLIST_H */ diff --git a/fs/bcachefs/keylist_types.h b/fs/bcachefs/keylist_types.h new file mode 100644 -index 000000000000..4b3ff7d8a875 +index 000000000..4b3ff7d8a --- /dev/null +++ b/fs/bcachefs/keylist_types.h @@ -0,0 +1,16 @@ @@ -62992,7 +62942,7 @@ index 000000000000..4b3ff7d8a875 +#endif /* _BCACHEFS_KEYLIST_TYPES_H */ diff --git a/fs/bcachefs/lru.c b/fs/bcachefs/lru.c new file mode 100644 -index 000000000000..5a09b55006ff +index 000000000..5a09b5500 --- /dev/null +++ b/fs/bcachefs/lru.c @@ -0,0 +1,219 @@ @@ -63217,7 +63167,7 @@ index 000000000000..5a09b55006ff +} diff --git a/fs/bcachefs/lru.h b/fs/bcachefs/lru.h new file mode 100644 -index 000000000000..3decb7b1dde2 +index 000000000..3decb7b1d --- /dev/null +++ b/fs/bcachefs/lru.h @@ -0,0 +1,19 @@ @@ -63242,7 +63192,7 @@ index 000000000000..3decb7b1dde2 +#endif /* _BCACHEFS_LRU_H */ diff --git a/fs/bcachefs/migrate.c b/fs/bcachefs/migrate.c new file mode 100644 -index 000000000000..5345697f2712 +index 000000000..5345697f2 --- /dev/null +++ b/fs/bcachefs/migrate.c @@ -0,0 +1,193 @@ @@ -63441,7 +63391,7 @@ index 000000000000..5345697f2712 +} diff --git a/fs/bcachefs/migrate.h b/fs/bcachefs/migrate.h new file mode 100644 -index 000000000000..027efaa0d575 +index 000000000..027efaa0d --- /dev/null +++ b/fs/bcachefs/migrate.h @@ -0,0 +1,7 @@ @@ -63454,10 +63404,10 @@ index 000000000000..027efaa0d575 +#endif /* _BCACHEFS_MIGRATE_H */ diff --git a/fs/bcachefs/move.c b/fs/bcachefs/move.c new file mode 100644 -index 000000000000..bc507c59df33 +index 000000000..9748b8653 --- /dev/null +++ b/fs/bcachefs/move.c -@@ -0,0 +1,925 @@ +@@ -0,0 +1,951 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include "bcachefs.h" @@ -63483,7 +63433,19 @@ index 000000000000..bc507c59df33 + +#include + -+#define SECTORS_IN_FLIGHT_PER_DEVICE 2048 ++static void progress_list_add(struct bch_fs *c, struct bch_move_stats *stats) ++{ ++ mutex_lock(&c->data_progress_lock); ++ list_add(&stats->list, &c->data_progress_list); ++ mutex_unlock(&c->data_progress_lock); ++} ++ ++static void progress_list_del(struct bch_fs *c, struct bch_move_stats *stats) ++{ ++ mutex_lock(&c->data_progress_lock); ++ list_del(&stats->list); ++ mutex_unlock(&c->data_progress_lock); ++} + +struct moving_io { + struct list_head list; @@ -63504,23 +63466,21 @@ index 000000000000..bc507c59df33 +{ + struct moving_io *io = container_of(cl, struct moving_io, cl); + struct moving_context *ctxt = io->write.ctxt; -+ struct bvec_iter_all iter; -+ struct bio_vec *bv; -+ -+ bch2_disk_reservation_put(io->write.op.c, &io->write.op.res); -+ -+ bio_for_each_segment_all(bv, &io->write.op.wbio.bio, iter) -+ if (bv->bv_page) -+ __free_page(bv->bv_page); ++ struct bch_fs *c = ctxt->c; + ++ bch2_data_update_exit(&io->write); + wake_up(&ctxt->wait); -+ ++ percpu_ref_put(&c->writes); + kfree(io); +} + +static void move_write_done(struct closure *cl) +{ + struct moving_io *io = container_of(cl, struct moving_io, cl); ++ struct moving_context *ctxt = io->write.ctxt; ++ ++ if (io->write.op.error) ++ ctxt->write_error = true; + + atomic_sub(io->write_sectors, &io->write.ctxt->write_sectors); + closure_return_with_destructor(cl, move_free); @@ -63535,10 +63495,9 @@ index 000000000000..bc507c59df33 + return; + } + -+ bch2_data_update_read_done(&io->write, &io->rbio); -+ + atomic_add(io->write_sectors, &io->write.ctxt->write_sectors); -+ closure_call(&io->write.op.cl, bch2_write, NULL, cl); ++ ++ bch2_data_update_read_done(&io->write, io->rbio.pick.crc, cl); + continue_at(cl, move_write_done, NULL); +} + @@ -63595,14 +63554,58 @@ index 000000000000..bc507c59df33 + atomic_read(&ctxt->write_sectors) != sectors_pending); +} + ++void bch2_moving_ctxt_exit(struct moving_context *ctxt) ++{ ++ move_ctxt_wait_event(ctxt, NULL, list_empty(&ctxt->reads)); ++ closure_sync(&ctxt->cl); ++ EBUG_ON(atomic_read(&ctxt->write_sectors)); ++ ++ if (ctxt->stats) { ++ progress_list_del(ctxt->c, ctxt->stats); ++ ++ trace_move_data(ctxt->c, ++ atomic64_read(&ctxt->stats->sectors_moved), ++ atomic64_read(&ctxt->stats->keys_moved)); ++ } ++} ++ ++void bch2_moving_ctxt_init(struct moving_context *ctxt, ++ struct bch_fs *c, ++ struct bch_ratelimit *rate, ++ struct bch_move_stats *stats, ++ struct write_point_specifier wp, ++ bool wait_on_copygc) ++{ ++ memset(ctxt, 0, sizeof(*ctxt)); ++ ++ ctxt->c = c; ++ ctxt->rate = rate; ++ ctxt->stats = stats; ++ ctxt->wp = wp; ++ ctxt->wait_on_copygc = wait_on_copygc; ++ ++ closure_init_stack(&ctxt->cl); ++ INIT_LIST_HEAD(&ctxt->reads); ++ init_waitqueue_head(&ctxt->wait); ++ ++ if (stats) { ++ progress_list_add(c, stats); ++ stats->data_type = BCH_DATA_user; ++ } ++} ++ ++void bch_move_stats_init(struct bch_move_stats *stats, char *name) ++{ ++ memset(stats, 0, sizeof(*stats)); ++ scnprintf(stats->name, sizeof(stats->name), "%s", name); ++} ++ +static int bch2_move_extent(struct btree_trans *trans, + struct moving_context *ctxt, -+ struct write_point_specifier wp, + struct bch_io_opts io_opts, + enum btree_id btree_id, + struct bkey_s_c k, -+ enum data_cmd data_cmd, -+ struct data_opts data_opts) ++ struct data_update_opts data_opts) +{ + struct bch_fs *c = trans->c; + struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); @@ -63612,6 +63615,9 @@ index 000000000000..bc507c59df33 + unsigned sectors = k.k->size, pages; + int ret = -ENOMEM; + ++ if (!percpu_ref_tryget_live(&c->writes)) ++ return -EROFS; ++ + /* write path might have to decompress data: */ + bkey_for_each_ptr_decode(k.k, ptrs, p, entry) + sectors = max_t(unsigned, sectors, p.crc.uncompressed_size); @@ -63645,11 +63651,13 @@ index 000000000000..bc507c59df33 + io->rbio.bio.bi_iter.bi_sector = bkey_start_offset(k.k); + io->rbio.bio.bi_end_io = move_read_endio; + -+ ret = bch2_data_update_init(c, &io->write, wp, io_opts, -+ data_cmd, data_opts, btree_id, k); ++ ret = bch2_data_update_init(c, &io->write, ctxt->wp, io_opts, ++ data_opts, btree_id, k); + if (ret) + goto err_free_pages; + ++ io->write.ctxt = ctxt; ++ + atomic64_inc(&ctxt->stats->keys_moved); + atomic64_add(k.k->size, &ctxt->stats->sectors_moved); + this_cpu_add(c->counters[BCH_COUNTER_io_move], k.k->size); @@ -63675,6 +63683,7 @@ index 000000000000..bc507c59df33 +err_free: + kfree(io); +err: ++ percpu_ref_put(&c->writes); + trace_move_alloc_mem_fail(k.k); + return ret; +} @@ -63711,13 +63720,20 @@ index 000000000000..bc507c59df33 +} + +static int move_ratelimit(struct btree_trans *trans, -+ struct moving_context *ctxt, -+ struct bch_ratelimit *rate) ++ struct moving_context *ctxt) +{ ++ struct bch_fs *c = trans->c; + u64 delay; + ++ if (ctxt->wait_on_copygc) { ++ bch2_trans_unlock(trans); ++ wait_event_killable(c->copygc_running_wq, ++ !c->copygc_running || ++ kthread_should_stop()); ++ } ++ + do { -+ delay = rate ? bch2_ratelimit_delay(rate) : 0; ++ delay = ctxt->rate ? bch2_ratelimit_delay(ctxt->rate) : 0; + + if (delay) { + bch2_trans_unlock(trans); @@ -63740,11 +63756,11 @@ index 000000000000..bc507c59df33 + + move_ctxt_wait_event(ctxt, trans, + atomic_read(&ctxt->write_sectors) < -+ SECTORS_IN_FLIGHT_PER_DEVICE); ++ c->opts.move_bytes_in_flight >> 9); + + move_ctxt_wait_event(ctxt, trans, + atomic_read(&ctxt->read_sectors) < -+ SECTORS_IN_FLIGHT_PER_DEVICE); ++ c->opts.move_bytes_in_flight >> 9); + + return 0; +} @@ -63774,41 +63790,37 @@ index 000000000000..bc507c59df33 + return 0; +} + -+static int __bch2_move_data(struct bch_fs *c, -+ struct moving_context *ctxt, -+ struct bch_ratelimit *rate, -+ struct write_point_specifier wp, -+ struct bpos start, -+ struct bpos end, -+ move_pred_fn pred, void *arg, -+ struct bch_move_stats *stats, -+ enum btree_id btree_id) ++static int __bch2_move_data(struct moving_context *ctxt, ++ struct bpos start, ++ struct bpos end, ++ move_pred_fn pred, void *arg, ++ enum btree_id btree_id) +{ ++ struct bch_fs *c = ctxt->c; + struct bch_io_opts io_opts = bch2_opts_to_inode_opts(c->opts); + struct bkey_buf sk; + struct btree_trans trans; + struct btree_iter iter; + struct bkey_s_c k; -+ struct data_opts data_opts; -+ enum data_cmd data_cmd; ++ struct data_update_opts data_opts; + u64 cur_inum = U64_MAX; + int ret = 0, ret2; + + bch2_bkey_buf_init(&sk); + bch2_trans_init(&trans, c, 0, 0); + -+ stats->data_type = BCH_DATA_user; -+ stats->btree_id = btree_id; -+ stats->pos = start; ++ ctxt->stats->data_type = BCH_DATA_user; ++ ctxt->stats->btree_id = btree_id; ++ ctxt->stats->pos = start; + + bch2_trans_iter_init(&trans, &iter, btree_id, start, + BTREE_ITER_PREFETCH| + BTREE_ITER_ALL_SNAPSHOTS); + -+ if (rate) -+ bch2_ratelimit_reset(rate); ++ if (ctxt->rate) ++ bch2_ratelimit_reset(ctxt->rate); + -+ while (!move_ratelimit(&trans, ctxt, rate)) { ++ while (!move_ratelimit(&trans, ctxt)) { + bch2_trans_begin(&trans); + + k = bch2_btree_iter_peek(&iter); @@ -63824,7 +63836,7 @@ index 000000000000..bc507c59df33 + if (bkey_cmp(bkey_start_pos(k.k), end) >= 0) + break; + -+ stats->pos = iter.pos; ++ ctxt->stats->pos = iter.pos; + + if (!bkey_extent_is_direct_data(k.k)) + goto next_nondata; @@ -63833,18 +63845,9 @@ index 000000000000..bc507c59df33 + if (ret) + continue; + -+ switch ((data_cmd = pred(c, arg, k, &io_opts, &data_opts))) { -+ case DATA_SKIP: ++ memset(&data_opts, 0, sizeof(data_opts)); ++ if (!pred(c, arg, k, &io_opts, &data_opts)) + goto next; -+ case DATA_SCRUB: -+ BUG(); -+ case DATA_ADD_REPLICAS: -+ case DATA_REWRITE: -+ case DATA_PROMOTE: -+ break; -+ default: -+ BUG(); -+ } + + /* + * The iterator gets unlocked by __bch2_read_extent - need to @@ -63853,8 +63856,8 @@ index 000000000000..bc507c59df33 + bch2_bkey_buf_reassemble(&sk, c, k); + k = bkey_i_to_s_c(sk.k); + -+ ret2 = bch2_move_extent(&trans, ctxt, wp, io_opts, btree_id, k, -+ data_cmd, data_opts); ++ ret2 = bch2_move_extent(&trans, ctxt, io_opts, ++ btree_id, k, data_opts); + if (ret2) { + if (ret2 == -EINTR) + continue; @@ -63869,10 +63872,10 @@ index 000000000000..bc507c59df33 + goto next; + } + -+ if (rate) -+ bch2_ratelimit_increment(rate, k.k->size); ++ if (ctxt->rate) ++ bch2_ratelimit_increment(ctxt->rate, k.k->size); +next: -+ atomic64_add(k.k->size, &stats->sectors_seen); ++ atomic64_add(k.k->size, &ctxt->stats->sectors_seen); +next_nondata: + bch2_btree_iter_advance(&iter); + } @@ -63884,48 +63887,20 @@ index 000000000000..bc507c59df33 + return ret; +} + -+inline void bch_move_stats_init(struct bch_move_stats *stats, char *name) -+{ -+ memset(stats, 0, sizeof(*stats)); -+ -+ scnprintf(stats->name, sizeof(stats->name), -+ "%s", name); -+} -+ -+static inline void progress_list_add(struct bch_fs *c, -+ struct bch_move_stats *stats) -+{ -+ mutex_lock(&c->data_progress_lock); -+ list_add(&stats->list, &c->data_progress_list); -+ mutex_unlock(&c->data_progress_lock); -+} -+ -+static inline void progress_list_del(struct bch_fs *c, -+ struct bch_move_stats *stats) -+{ -+ mutex_lock(&c->data_progress_lock); -+ list_del(&stats->list); -+ mutex_unlock(&c->data_progress_lock); -+} -+ +int bch2_move_data(struct bch_fs *c, + enum btree_id start_btree_id, struct bpos start_pos, + enum btree_id end_btree_id, struct bpos end_pos, + struct bch_ratelimit *rate, ++ struct bch_move_stats *stats, + struct write_point_specifier wp, -+ move_pred_fn pred, void *arg, -+ struct bch_move_stats *stats) ++ bool wait_on_copygc, ++ move_pred_fn pred, void *arg) +{ -+ struct moving_context ctxt = { .stats = stats }; ++ struct moving_context ctxt; + enum btree_id id; + int ret; + -+ progress_list_add(c, stats); -+ closure_init_stack(&ctxt.cl); -+ INIT_LIST_HEAD(&ctxt.reads); -+ init_waitqueue_head(&ctxt.wait); -+ -+ stats->data_type = BCH_DATA_user; ++ bch2_moving_ctxt_init(&ctxt, c, rate, stats, wp, wait_on_copygc); + + for (id = start_btree_id; + id <= min_t(unsigned, end_btree_id, BTREE_ID_NR - 1); @@ -63936,24 +63911,16 @@ index 000000000000..bc507c59df33 + id != BTREE_ID_reflink) + continue; + -+ ret = __bch2_move_data(c, &ctxt, rate, wp, ++ ret = __bch2_move_data(&ctxt, + id == start_btree_id ? start_pos : POS_MIN, + id == end_btree_id ? end_pos : POS_MAX, -+ pred, arg, stats, id); ++ pred, arg, id); + if (ret) + break; + } + -+ move_ctxt_wait_event(&ctxt, NULL, list_empty(&ctxt.reads)); -+ closure_sync(&ctxt.cl); ++ bch2_moving_ctxt_exit(&ctxt); + -+ EBUG_ON(atomic_read(&ctxt.write_sectors)); -+ -+ trace_move_data(c, -+ atomic64_read(&stats->sectors_moved), -+ atomic64_read(&stats->keys_moved)); -+ -+ progress_list_del(c, stats); + return ret; +} + @@ -63966,6 +63933,7 @@ index 000000000000..bc507c59df33 + + bch2_trans_iter_init(trans, &iter, BTREE_ID_alloc, + bucket, BTREE_ITER_CACHED); ++again: + k = bch2_btree_iter_peek_slot(&iter); + ret = bkey_err(k); + @@ -63976,10 +63944,16 @@ index 000000000000..bc507c59df33 + a.v->dirty_sectors) { + struct printbuf buf = PRINTBUF; + ++ if (a.v->data_type == BCH_DATA_btree) { ++ bch2_trans_unlock(trans); ++ if (bch2_btree_interior_updates_flush(c)) ++ goto again; ++ } ++ + prt_str(&buf, "failed to evacuate bucket "); + bch2_bkey_val_to_text(&buf, c, k); + -+ bch_err_ratelimited(c, "%s", buf.buf); ++ bch2_trans_inconsistent(trans, "%s", buf.buf); + printbuf_exit(&buf); + } + } @@ -63988,33 +63962,24 @@ index 000000000000..bc507c59df33 + return ret; +} + -+int bch2_evacuate_bucket(struct bch_fs *c, -+ struct bpos bucket, int gen, -+ struct bch_ratelimit *rate, -+ struct write_point_specifier wp, -+ enum data_cmd data_cmd, -+ struct data_opts *data_opts, -+ struct bch_move_stats *stats) ++int __bch2_evacuate_bucket(struct moving_context *ctxt, ++ struct bpos bucket, int gen, ++ struct data_update_opts _data_opts) +{ ++ struct bch_fs *c = ctxt->c; + struct bch_io_opts io_opts = bch2_opts_to_inode_opts(c->opts); -+ struct moving_context ctxt = { .stats = stats }; + struct btree_trans trans; + struct btree_iter iter; + struct bkey_buf sk; + struct bch_backpointer bp; ++ struct data_update_opts data_opts; + u64 bp_offset = 0, cur_inum = U64_MAX; + int ret = 0; + + bch2_bkey_buf_init(&sk); + bch2_trans_init(&trans, c, 0, 0); -+ progress_list_add(c, stats); -+ closure_init_stack(&ctxt.cl); -+ INIT_LIST_HEAD(&ctxt.reads); -+ init_waitqueue_head(&ctxt.wait); + -+ stats->data_type = BCH_DATA_user; -+ -+ while (!(ret = move_ratelimit(&trans, &ctxt, rate))) { ++ while (!(ret = move_ratelimit(&trans, ctxt))) { + bch2_trans_begin(&trans); + + ret = bch2_get_next_backpointer(&trans, bucket, gen, @@ -64027,7 +63992,9 @@ index 000000000000..bc507c59df33 + break; + + if (!bp.level) { ++ const struct bch_extent_ptr *ptr; + struct bkey_s_c k; ++ unsigned i = 0; + + k = bch2_backpointer_get_key(&trans, &iter, + bucket, bp_offset, bp); @@ -64047,24 +64014,31 @@ index 000000000000..bc507c59df33 + if (ret) + continue; + -+ data_opts->target = io_opts.background_target; -+ data_opts->rewrite_dev = bucket.inode; ++ data_opts = _data_opts; ++ data_opts.target = io_opts.background_target; ++ data_opts.rewrite_ptrs = 0; + -+ ret = bch2_move_extent(&trans, &ctxt, wp, io_opts, bp.btree_id, k, -+ data_cmd, *data_opts); ++ bkey_for_each_ptr(bch2_bkey_ptrs_c(k), ptr) { ++ if (ptr->dev == bucket.inode) ++ data_opts.rewrite_ptrs |= 1U << i; ++ i++; ++ } ++ ++ ret = bch2_move_extent(&trans, ctxt, io_opts, ++ bp.btree_id, k, data_opts); + if (ret == -EINTR) + continue; + if (ret == -ENOMEM) { + /* memory allocation failure, wait for some IO to finish */ -+ bch2_move_ctxt_wait_for_io(&ctxt, &trans); ++ bch2_move_ctxt_wait_for_io(ctxt, &trans); + continue; + } + if (ret) + goto err; + -+ if (rate) -+ bch2_ratelimit_increment(rate, k.k->size); -+ atomic64_add(k.k->size, &stats->sectors_seen); ++ if (ctxt->rate) ++ bch2_ratelimit_increment(ctxt->rate, k.k->size); ++ atomic64_add(k.k->size, &ctxt->stats->sectors_seen); + } else { + struct btree *b; + @@ -64086,10 +64060,11 @@ index 000000000000..bc507c59df33 + if (ret) + goto err; + -+ if (rate) -+ bch2_ratelimit_increment(rate, c->opts.btree_node_size >> 9); -+ atomic64_add(c->opts.btree_node_size >> 9, &stats->sectors_seen); -+ atomic64_add(c->opts.btree_node_size >> 9, &stats->sectors_moved); ++ if (ctxt->rate) ++ bch2_ratelimit_increment(ctxt->rate, ++ c->opts.btree_node_size >> 9); ++ atomic64_add(c->opts.btree_node_size >> 9, &ctxt->stats->sectors_seen); ++ atomic64_add(c->opts.btree_node_size >> 9, &ctxt->stats->sectors_moved); + } + + bp_offset++; @@ -64097,30 +64072,38 @@ index 000000000000..bc507c59df33 + + if (IS_ENABLED(CONFIG_BCACHEFS_DEBUG) && gen >= 0) { + bch2_trans_unlock(&trans); -+ move_ctxt_wait_event(&ctxt, NULL, list_empty(&ctxt.reads)); -+ closure_sync(&ctxt.cl); -+ lockrestart_do(&trans, verify_bucket_evacuated(&trans, bucket, gen)); ++ move_ctxt_wait_event(ctxt, NULL, list_empty(&ctxt->reads)); ++ closure_sync(&ctxt->cl); ++ if (!ctxt->write_error) ++ lockrestart_do(&trans, verify_bucket_evacuated(&trans, bucket, gen)); + } +err: + bch2_trans_exit(&trans); + bch2_bkey_buf_exit(&sk, c); ++ return ret; ++} + -+ move_ctxt_wait_event(&ctxt, NULL, list_empty(&ctxt.reads)); -+ closure_sync(&ctxt.cl); -+ progress_list_del(c, stats); ++int bch2_evacuate_bucket(struct bch_fs *c, ++ struct bpos bucket, int gen, ++ struct data_update_opts data_opts, ++ struct bch_ratelimit *rate, ++ struct bch_move_stats *stats, ++ struct write_point_specifier wp, ++ bool wait_on_copygc) ++{ ++ struct moving_context ctxt; ++ int ret; + -+ EBUG_ON(atomic_read(&ctxt.write_sectors)); -+ -+ trace_move_data(c, -+ atomic64_read(&stats->sectors_moved), -+ atomic64_read(&stats->keys_moved)); ++ bch2_moving_ctxt_init(&ctxt, c, rate, stats, wp, wait_on_copygc); ++ ret = __bch2_evacuate_bucket(&ctxt, bucket, gen, data_opts); ++ bch2_moving_ctxt_exit(&ctxt); + + return ret; +} + -+typedef enum data_cmd (*move_btree_pred)(struct bch_fs *, void *, -+ struct btree *, struct bch_io_opts *, -+ struct data_opts *); ++typedef bool (*move_btree_pred)(struct bch_fs *, void *, ++ struct btree *, struct bch_io_opts *, ++ struct data_update_opts *); + +static int bch2_move_btree(struct bch_fs *c, + enum btree_id start_btree_id, struct bpos start_pos, @@ -64134,8 +64117,7 @@ index 000000000000..bc507c59df33 + struct btree_iter iter; + struct btree *b; + enum btree_id id; -+ struct data_opts data_opts; -+ enum data_cmd cmd; ++ struct data_update_opts data_opts; + int ret = 0; + + bch2_trans_init(&trans, c, 0, 0); @@ -64164,17 +64146,8 @@ index 000000000000..bc507c59df33 + + stats->pos = iter.pos; + -+ switch ((cmd = pred(c, arg, b, &io_opts, &data_opts))) { -+ case DATA_SKIP: ++ if (!pred(c, arg, b, &io_opts, &data_opts)) + goto next; -+ case DATA_SCRUB: -+ BUG(); -+ case DATA_ADD_REPLICAS: -+ case DATA_REWRITE: -+ break; -+ default: -+ BUG(); -+ } + + ret = bch2_btree_node_rewrite(&trans, &iter, b, 0) ?: ret; + if (ret == -EINTR) @@ -64204,20 +64177,10 @@ index 000000000000..bc507c59df33 + return ret; +} + -+#if 0 -+static enum data_cmd scrub_pred(struct bch_fs *c, void *arg, -+ struct bkey_s_c k, -+ struct bch_io_opts *io_opts, -+ struct data_opts *data_opts) -+{ -+ return DATA_SCRUB; -+} -+#endif -+ -+static enum data_cmd rereplicate_pred(struct bch_fs *c, void *arg, -+ struct bkey_s_c k, -+ struct bch_io_opts *io_opts, -+ struct data_opts *data_opts) ++static bool rereplicate_pred(struct bch_fs *c, void *arg, ++ struct bkey_s_c k, ++ struct bch_io_opts *io_opts, ++ struct data_update_opts *data_opts) +{ + unsigned nr_good = bch2_bkey_durability(c, k); + unsigned replicas = bkey_is_btree_ptr(k.k) @@ -64225,43 +64188,50 @@ index 000000000000..bc507c59df33 + : io_opts->data_replicas; + + if (!nr_good || nr_good >= replicas) -+ return DATA_SKIP; ++ return false; + + data_opts->target = 0; -+ data_opts->nr_replicas = 1; ++ data_opts->extra_replicas = replicas - nr_good; + data_opts->btree_insert_flags = 0; -+ return DATA_ADD_REPLICAS; ++ return true; +} + -+static enum data_cmd migrate_pred(struct bch_fs *c, void *arg, -+ struct bkey_s_c k, -+ struct bch_io_opts *io_opts, -+ struct data_opts *data_opts) ++static bool migrate_pred(struct bch_fs *c, void *arg, ++ struct bkey_s_c k, ++ struct bch_io_opts *io_opts, ++ struct data_update_opts *data_opts) +{ ++ struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); ++ const struct bch_extent_ptr *ptr; + struct bch_ioctl_data *op = arg; ++ unsigned i = 0; + -+ if (!bch2_bkey_has_device(k, op->migrate.dev)) -+ return DATA_SKIP; -+ ++ data_opts->rewrite_ptrs = 0; + data_opts->target = 0; -+ data_opts->nr_replicas = 1; ++ data_opts->extra_replicas = 0; + data_opts->btree_insert_flags = 0; -+ data_opts->rewrite_dev = op->migrate.dev; -+ return DATA_REWRITE; ++ ++ bkey_for_each_ptr(ptrs, ptr) { ++ if (ptr->dev == op->migrate.dev) ++ data_opts->rewrite_ptrs |= 1U << i; ++ i++; ++ } ++ ++ return data_opts->rewrite_ptrs != 0;; +} + -+static enum data_cmd rereplicate_btree_pred(struct bch_fs *c, void *arg, -+ struct btree *b, -+ struct bch_io_opts *io_opts, -+ struct data_opts *data_opts) ++static bool rereplicate_btree_pred(struct bch_fs *c, void *arg, ++ struct btree *b, ++ struct bch_io_opts *io_opts, ++ struct data_update_opts *data_opts) +{ + return rereplicate_pred(c, arg, bkey_i_to_s_c(&b->key), io_opts, data_opts); +} + -+static enum data_cmd migrate_btree_pred(struct bch_fs *c, void *arg, -+ struct btree *b, -+ struct bch_io_opts *io_opts, -+ struct data_opts *data_opts) ++static bool migrate_btree_pred(struct bch_fs *c, void *arg, ++ struct btree *b, ++ struct bch_io_opts *io_opts, ++ struct data_update_opts *data_opts) +{ + return migrate_pred(c, arg, bkey_i_to_s_c(&b->key), io_opts, data_opts); +} @@ -64290,21 +64260,21 @@ index 000000000000..bc507c59df33 + return false; +} + -+static enum data_cmd rewrite_old_nodes_pred(struct bch_fs *c, void *arg, -+ struct btree *b, -+ struct bch_io_opts *io_opts, -+ struct data_opts *data_opts) ++static bool rewrite_old_nodes_pred(struct bch_fs *c, void *arg, ++ struct btree *b, ++ struct bch_io_opts *io_opts, ++ struct data_update_opts *data_opts) +{ + if (b->version_ondisk != c->sb.version || + btree_node_need_rewrite(b) || + bformat_needs_redo(&b->format)) { + data_opts->target = 0; -+ data_opts->nr_replicas = 1; ++ data_opts->extra_replicas = 0; + data_opts->btree_insert_flags = 0; -+ return DATA_REWRITE; ++ return true; + } + -+ return DATA_SKIP; ++ return false; +} + +int bch2_scan_old_btree_nodes(struct bch_fs *c, struct bch_move_stats *stats) @@ -64348,8 +64318,11 @@ index 000000000000..bc507c59df33 + ret = bch2_move_data(c, + op.start_btree, op.start_pos, + op.end_btree, op.end_pos, -+ NULL, writepoint_hashed((unsigned long) current), -+ rereplicate_pred, c, stats) ?: ret; ++ NULL, ++ stats, ++ writepoint_hashed((unsigned long) current), ++ true, ++ rereplicate_pred, c) ?: ret; + ret = bch2_replicas_gc2(c) ?: ret; + break; + case BCH_DATA_OP_MIGRATE: @@ -64369,8 +64342,11 @@ index 000000000000..bc507c59df33 + ret = bch2_move_data(c, + op.start_btree, op.start_pos, + op.end_btree, op.end_pos, -+ NULL, writepoint_hashed((unsigned long) current), -+ migrate_pred, &op, stats) ?: ret; ++ NULL, ++ stats, ++ writepoint_hashed((unsigned long) current), ++ true, ++ migrate_pred, &op) ?: ret; + ret = bch2_replicas_gc2(c) ?: ret; + break; + case BCH_DATA_OP_REWRITE_OLD_NODES: @@ -64385,10 +64361,10 @@ index 000000000000..bc507c59df33 +} diff --git a/fs/bcachefs/move.h b/fs/bcachefs/move.h new file mode 100644 -index 000000000000..8a0500450d52 +index 000000000..c0fec69bb --- /dev/null +++ b/fs/bcachefs/move.h -@@ -0,0 +1,55 @@ +@@ -0,0 +1,67 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _BCACHEFS_MOVE_H +#define _BCACHEFS_MOVE_H @@ -64401,11 +64377,15 @@ index 000000000000..8a0500450d52 +struct bch_read_bio; + +struct moving_context { -+ /* Closure for waiting on all reads and writes to complete */ -+ struct closure cl; -+ ++ struct bch_fs *c; ++ struct bch_ratelimit *rate; + struct bch_move_stats *stats; ++ struct write_point_specifier wp; ++ bool wait_on_copygc; ++ bool write_error; + ++ /* For waiting on outstanding reads and writes: */ ++ struct closure cl; + struct list_head reads; + + /* in flight sectors: */ @@ -64415,9 +64395,13 @@ index 000000000000..8a0500450d52 + wait_queue_head_t wait; +}; + -+typedef enum data_cmd (*move_pred_fn)(struct bch_fs *, void *, -+ struct bkey_s_c, -+ struct bch_io_opts *, struct data_opts *); ++typedef bool (*move_pred_fn)(struct bch_fs *, void *, struct bkey_s_c, ++ struct bch_io_opts *, struct data_update_opts *); ++ ++void bch2_moving_ctxt_exit(struct moving_context *); ++void bch2_moving_ctxt_init(struct moving_context *, struct bch_fs *, ++ struct bch_ratelimit *, struct bch_move_stats *, ++ struct write_point_specifier, bool); + +int bch2_scan_old_btree_nodes(struct bch_fs *, struct bch_move_stats *); + @@ -64425,16 +64409,20 @@ index 000000000000..8a0500450d52 + enum btree_id, struct bpos, + enum btree_id, struct bpos, + struct bch_ratelimit *, ++ struct bch_move_stats *, + struct write_point_specifier, -+ move_pred_fn, void *, -+ struct bch_move_stats *); ++ bool, ++ move_pred_fn, void *); + ++int __bch2_evacuate_bucket(struct moving_context *, ++ struct bpos, int, ++ struct data_update_opts); +int bch2_evacuate_bucket(struct bch_fs *, struct bpos, int, ++ struct data_update_opts, + struct bch_ratelimit *, ++ struct bch_move_stats *, + struct write_point_specifier, -+ enum data_cmd, -+ struct data_opts *, -+ struct bch_move_stats *); ++ bool); +int bch2_data_job(struct bch_fs *, + struct bch_move_stats *, + struct bch_ioctl_data); @@ -64446,7 +64434,7 @@ index 000000000000..8a0500450d52 +#endif /* _BCACHEFS_MOVE_H */ diff --git a/fs/bcachefs/move_types.h b/fs/bcachefs/move_types.h new file mode 100644 -index 000000000000..9df6d18137a5 +index 000000000..9df6d1813 --- /dev/null +++ b/fs/bcachefs/move_types.h @@ -0,0 +1,19 @@ @@ -64471,10 +64459,10 @@ index 000000000000..9df6d18137a5 +#endif /* _BCACHEFS_MOVE_TYPES_H */ diff --git a/fs/bcachefs/movinggc.c b/fs/bcachefs/movinggc.c new file mode 100644 -index 000000000000..fb629129d794 +index 000000000..f9ad4cb26 --- /dev/null +++ b/fs/bcachefs/movinggc.c -@@ -0,0 +1,274 @@ +@@ -0,0 +1,282 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Moving/copying garbage collector @@ -64572,11 +64560,11 @@ index 000000000000..fb629129d794 + struct bch_dev *ca; + unsigned dev_idx; + size_t heap_size = 0; -+ struct data_opts data_opts = { -+ .nr_replicas = 1, -+ .btree_insert_flags = BTREE_INSERT_USE_RESERVE|JOURNAL_WATERMARK_copygc, ++ struct moving_context ctxt; ++ struct data_update_opts data_opts = { ++ .btree_insert_flags = BTREE_INSERT_USE_RESERVE|JOURNAL_WATERMARK_copygc, + }; -+ int ret; ++ int ret = 0; + + bch_move_stats_init(&move_stats, "copygc"); + @@ -64625,20 +64613,22 @@ index 000000000000..fb629129d794 + + heap_resort(h, fragmentation_cmp, NULL); + -+ while (h->used) { -+ BUG_ON(!heap_pop(h, e, -fragmentation_cmp, NULL)); -+ /* not correct w.r.t. device removal */ ++ bch2_moving_ctxt_init(&ctxt, c, NULL, &move_stats, ++ writepoint_ptr(&c->copygc_write_point), ++ false); + -+ ret = bch2_evacuate_bucket(c, POS(e.dev, e.bucket), e.gen, NULL, -+ writepoint_ptr(&c->copygc_write_point), -+ DATA_REWRITE, &data_opts, -+ &move_stats); -+ if (ret < 0) -+ bch_err(c, "error %i from bch2_move_data() in copygc", ret); -+ if (ret) -+ return ret; ++ /* not correct w.r.t. device removal */ ++ while (h->used && !ret) { ++ BUG_ON(!heap_pop(h, e, -fragmentation_cmp, NULL)); ++ ret = __bch2_evacuate_bucket(&ctxt, POS(e.dev, e.bucket), e.gen, ++ data_opts); + } + ++ bch2_moving_ctxt_exit(&ctxt); ++ ++ if (ret < 0) ++ bch_err(c, "error %i from bch2_move_data() in copygc", ret); ++ + trace_copygc(c, atomic64_read(&move_stats.sectors_moved), 0, 0, 0); + return ret; +} @@ -64681,10 +64671,11 @@ index 000000000000..fb629129d794 + struct bch_fs *c = arg; + struct io_clock *clock = &c->io_clock[WRITE]; + u64 last, wait; ++ int ret = 0; + + set_freezable(); + -+ while (!kthread_should_stop()) { ++ while (!ret && !kthread_should_stop()) { + cond_resched(); + + if (kthread_wait_freezable(c->copy_gc_enabled)) @@ -64703,8 +64694,11 @@ index 000000000000..fb629129d794 + + c->copygc_wait = 0; + -+ if (bch2_copygc(c)) -+ break; ++ c->copygc_running = true; ++ ret = bch2_copygc(c); ++ c->copygc_running = false; ++ ++ wake_up(&c->copygc_running_wq); + } + + return 0; @@ -64748,10 +64742,12 @@ index 000000000000..fb629129d794 + +void bch2_fs_copygc_init(struct bch_fs *c) +{ ++ init_waitqueue_head(&c->copygc_running_wq); ++ c->copygc_running = false; +} diff --git a/fs/bcachefs/movinggc.h b/fs/bcachefs/movinggc.h new file mode 100644 -index 000000000000..922738247d03 +index 000000000..922738247 --- /dev/null +++ b/fs/bcachefs/movinggc.h @@ -0,0 +1,9 @@ @@ -64766,7 +64762,7 @@ index 000000000000..922738247d03 +#endif /* _BCACHEFS_MOVINGGC_H */ diff --git a/fs/bcachefs/opts.c b/fs/bcachefs/opts.c new file mode 100644 -index 000000000000..407b221e8f6c +index 000000000..407b221e8 --- /dev/null +++ b/fs/bcachefs/opts.c @@ -0,0 +1,578 @@ @@ -65350,10 +65346,10 @@ index 000000000000..407b221e8f6c +} diff --git a/fs/bcachefs/opts.h b/fs/bcachefs/opts.h new file mode 100644 -index 000000000000..54e3575f4d0a +index 000000000..2f5f49cb7 --- /dev/null +++ b/fs/bcachefs/opts.h -@@ -0,0 +1,499 @@ +@@ -0,0 +1,504 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _BCACHEFS_OPTS_H +#define _BCACHEFS_OPTS_H @@ -65625,7 +65621,7 @@ index 000000000000..54e3575f4d0a + BCH2_NO_SB_OPT, true, \ + NULL, "Enable discard/TRIM support") \ + x(verbose, u8, \ -+ OPT_FS|OPT_MOUNT, \ ++ OPT_FS|OPT_MOUNT|OPT_RUNTIME, \ + OPT_BOOL(), \ + BCH2_NO_SB_OPT, false, \ + NULL, "Extra debugging information during mount/recovery")\ @@ -65646,6 +65642,11 @@ index 000000000000..54e3575f4d0a + OPT_UINT(0, U32_MAX), \ + BCH_SB_JOURNAL_RECLAIM_DELAY, 100, \ + NULL, "Delay in milliseconds before automatic journal reclaim")\ ++ x(move_bytes_in_flight, u32, \ ++ OPT_HUMAN_READABLE|OPT_FS|OPT_MOUNT|OPT_RUNTIME, \ ++ OPT_UINT(1024, U32_MAX), \ ++ BCH2_NO_SB_OPT, 1U << 20, \ ++ NULL, "Amount of IO in flight to keep in flight by the move path")\ + x(fsck, u8, \ + OPT_FS|OPT_MOUNT, \ + OPT_BOOL(), \ @@ -65855,7 +65856,7 @@ index 000000000000..54e3575f4d0a +#endif /* _BCACHEFS_OPTS_H */ diff --git a/fs/bcachefs/quota.c b/fs/bcachefs/quota.c new file mode 100644 -index 000000000000..d764dc7abfe8 +index 000000000..d764dc7ab --- /dev/null +++ b/fs/bcachefs/quota.c @@ -0,0 +1,859 @@ @@ -66720,7 +66721,7 @@ index 000000000000..d764dc7abfe8 +#endif /* CONFIG_BCACHEFS_QUOTA */ diff --git a/fs/bcachefs/quota.h b/fs/bcachefs/quota.h new file mode 100644 -index 000000000000..8c67ae1da7c7 +index 000000000..8c67ae1da --- /dev/null +++ b/fs/bcachefs/quota.h @@ -0,0 +1,71 @@ @@ -66797,7 +66798,7 @@ index 000000000000..8c67ae1da7c7 +#endif /* _BCACHEFS_QUOTA_H */ diff --git a/fs/bcachefs/quota_types.h b/fs/bcachefs/quota_types.h new file mode 100644 -index 000000000000..6a136083d389 +index 000000000..6a136083d --- /dev/null +++ b/fs/bcachefs/quota_types.h @@ -0,0 +1,43 @@ @@ -66846,10 +66847,10 @@ index 000000000000..6a136083d389 +#endif /* _BCACHEFS_QUOTA_TYPES_H */ diff --git a/fs/bcachefs/rebalance.c b/fs/bcachefs/rebalance.c new file mode 100644 -index 000000000000..23cc46e95021 +index 000000000..31da40933 --- /dev/null +++ b/fs/bcachefs/rebalance.c -@@ -0,0 +1,349 @@ +@@ -0,0 +1,358 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include "bcachefs.h" @@ -66874,62 +66875,70 @@ index 000000000000..23cc46e95021 + * returns -1 if it should not be moved, or + * device of pointer that should be moved, if known, or INT_MAX if unknown + */ -+static int __bch2_rebalance_pred(struct bch_fs *c, -+ struct bkey_s_c k, -+ struct bch_io_opts *io_opts) ++static bool rebalance_pred(struct bch_fs *c, void *arg, ++ struct bkey_s_c k, ++ struct bch_io_opts *io_opts, ++ struct data_update_opts *data_opts) +{ + struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); -+ const union bch_extent_entry *entry; -+ struct extent_ptr_decoded p; ++ unsigned i; ++ ++ data_opts->rewrite_ptrs = 0; ++ data_opts->target = io_opts->background_target; ++ data_opts->extra_replicas = 0; ++ data_opts->btree_insert_flags = 0; + + if (io_opts->background_compression && -+ !bch2_bkey_is_incompressible(k)) -+ bkey_for_each_ptr_decode(k.k, ptrs, p, entry) ++ !bch2_bkey_is_incompressible(k)) { ++ const union bch_extent_entry *entry; ++ struct extent_ptr_decoded p; ++ ++ i = 0; ++ bkey_for_each_ptr_decode(k.k, ptrs, p, entry) { + if (!p.ptr.cached && + p.crc.compression_type != + bch2_compression_opt_to_type[io_opts->background_compression]) -+ return p.ptr.dev; ++ data_opts->rewrite_ptrs |= 1U << i; ++ i++; ++ } ++ } + -+ if (io_opts->background_target) -+ bkey_for_each_ptr_decode(k.k, ptrs, p, entry) -+ if (!p.ptr.cached && -+ !bch2_dev_in_target(c, p.ptr.dev, io_opts->background_target)) -+ return p.ptr.dev; ++ if (io_opts->background_target) { ++ const struct bch_extent_ptr *ptr; + -+ return -1; ++ i = 0; ++ bkey_for_each_ptr(ptrs, ptr) { ++ if (!ptr->cached && ++ !bch2_dev_in_target(c, ptr->dev, io_opts->background_target)) ++ data_opts->rewrite_ptrs |= 1U << i; ++ i++; ++ } ++ } ++ ++ return data_opts->rewrite_ptrs != 0; +} + +void bch2_rebalance_add_key(struct bch_fs *c, + struct bkey_s_c k, + struct bch_io_opts *io_opts) +{ -+ atomic64_t *counter; -+ int dev; ++ struct data_update_opts update_opts = { 0 }; ++ struct bkey_ptrs_c ptrs; ++ const struct bch_extent_ptr *ptr; ++ unsigned i; + -+ dev = __bch2_rebalance_pred(c, k, io_opts); -+ if (dev < 0) ++ if (!rebalance_pred(c, NULL, k, io_opts, &update_opts)) + return; + -+ counter = dev < INT_MAX -+ ? &bch_dev_bkey_exists(c, dev)->rebalance_work -+ : &c->rebalance.work_unknown_dev; -+ -+ if (atomic64_add_return(k.k->size, counter) == k.k->size) -+ rebalance_wakeup(c); -+} -+ -+static enum data_cmd rebalance_pred(struct bch_fs *c, void *arg, -+ struct bkey_s_c k, -+ struct bch_io_opts *io_opts, -+ struct data_opts *data_opts) -+{ -+ if (__bch2_rebalance_pred(c, k, io_opts) >= 0) { -+ data_opts->target = io_opts->background_target; -+ data_opts->nr_replicas = 1; -+ data_opts->btree_insert_flags = 0; -+ return DATA_ADD_REPLICAS; -+ } else { -+ return DATA_SKIP; ++ i = 0; ++ ptrs = bch2_bkey_ptrs_c(k); ++ bkey_for_each_ptr(ptrs, ptr) { ++ if ((1U << i) && update_opts.rewrite_ptrs) ++ if (atomic64_add_return(k.k->size, ++ &bch_dev_bkey_exists(c, ptr->dev)->rebalance_work) == ++ k.k->size) ++ rebalance_wakeup(c); ++ i++; + } +} + @@ -67097,9 +67106,10 @@ index 000000000000..23cc46e95021 + BTREE_ID_NR, POS_MAX, + /* ratelimiting disabled for now */ + NULL, /* &r->pd.rate, */ ++ &move_stats, + writepoint_ptr(&c->rebalance_write_point), -+ rebalance_pred, NULL, -+ &move_stats); ++ true, ++ rebalance_pred, NULL); + } + + return 0; @@ -67201,7 +67211,7 @@ index 000000000000..23cc46e95021 +} diff --git a/fs/bcachefs/rebalance.h b/fs/bcachefs/rebalance.h new file mode 100644 -index 000000000000..7ade0bb81cce +index 000000000..7ade0bb81 --- /dev/null +++ b/fs/bcachefs/rebalance.h @@ -0,0 +1,28 @@ @@ -67235,7 +67245,7 @@ index 000000000000..7ade0bb81cce +#endif /* _BCACHEFS_REBALANCE_H */ diff --git a/fs/bcachefs/rebalance_types.h b/fs/bcachefs/rebalance_types.h new file mode 100644 -index 000000000000..7462a92e9598 +index 000000000..7462a92e9 --- /dev/null +++ b/fs/bcachefs/rebalance_types.h @@ -0,0 +1,26 @@ @@ -67267,10 +67277,10 @@ index 000000000000..7462a92e9598 +#endif /* _BCACHEFS_REBALANCE_TYPES_H */ diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c new file mode 100644 -index 000000000000..63e8c1c3d940 +index 000000000..eea025a83 --- /dev/null +++ b/fs/bcachefs/recovery.c -@@ -0,0 +1,1570 @@ +@@ -0,0 +1,1584 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include "bcachefs.h" @@ -68192,6 +68202,19 @@ index 000000000000..63e8c1c3d940 + return ERR_PTR(ret); +} + ++static bool btree_id_is_alloc(enum btree_id id) ++{ ++ switch (id) { ++ case BTREE_ID_alloc: ++ case BTREE_ID_backpointers: ++ case BTREE_ID_need_discard: ++ case BTREE_ID_freespace: ++ return true; ++ default: ++ return false; ++ } ++} ++ +static int read_btree_roots(struct bch_fs *c) +{ + unsigned i; @@ -68203,14 +68226,14 @@ index 000000000000..63e8c1c3d940 + if (!r->alive) + continue; + -+ if (i == BTREE_ID_alloc && ++ if (btree_id_is_alloc(i) && + c->opts.reconstruct_alloc) { + c->sb.compat &= ~(1ULL << BCH_COMPAT_alloc_info); + continue; + } + + if (r->error) { -+ __fsck_err(c, i == BTREE_ID_alloc ++ __fsck_err(c, btree_id_is_alloc(i) + ? FSCK_CAN_IGNORE : 0, + "invalid btree root %s", + bch2_btree_ids[i]); @@ -68220,7 +68243,8 @@ index 000000000000..63e8c1c3d940 + + ret = bch2_btree_root_read(c, i, &r->key, r->level); + if (ret) { -+ __fsck_err(c, i == BTREE_ID_alloc ++ __fsck_err(c, ++ btree_id_is_alloc(i) + ? FSCK_CAN_IGNORE : 0, + "error reading btree root %s", + bch2_btree_ids[i]); @@ -68843,7 +68867,7 @@ index 000000000000..63e8c1c3d940 +} diff --git a/fs/bcachefs/recovery.h b/fs/bcachefs/recovery.h new file mode 100644 -index 000000000000..8c0348e8b84c +index 000000000..8c0348e8b --- /dev/null +++ b/fs/bcachefs/recovery.h @@ -0,0 +1,58 @@ @@ -68907,7 +68931,7 @@ index 000000000000..8c0348e8b84c +#endif /* _BCACHEFS_RECOVERY_H */ diff --git a/fs/bcachefs/reflink.c b/fs/bcachefs/reflink.c new file mode 100644 -index 000000000000..4e589c02a93b +index 000000000..2038e3502 --- /dev/null +++ b/fs/bcachefs/reflink.c @@ -0,0 +1,421 @@ @@ -69195,7 +69219,7 @@ index 000000000000..4e589c02a93b + u32 dst_snapshot, src_snapshot; + int ret = 0, ret2 = 0; + -+ if (!percpu_ref_tryget(&c->writes)) ++ if (!percpu_ref_tryget_live(&c->writes)) + return -EROFS; + + bch2_check_set_feature(c, BCH_FEATURE_reflink); @@ -69334,7 +69358,7 @@ index 000000000000..4e589c02a93b +} diff --git a/fs/bcachefs/reflink.h b/fs/bcachefs/reflink.h new file mode 100644 -index 000000000000..f9848dc3eebb +index 000000000..f9848dc3e --- /dev/null +++ b/fs/bcachefs/reflink.h @@ -0,0 +1,76 @@ @@ -69416,7 +69440,7 @@ index 000000000000..f9848dc3eebb +#endif /* _BCACHEFS_REFLINK_H */ diff --git a/fs/bcachefs/replicas.c b/fs/bcachefs/replicas.c new file mode 100644 -index 000000000000..9cb47ba62bc3 +index 000000000..9cb47ba62 --- /dev/null +++ b/fs/bcachefs/replicas.c @@ -0,0 +1,1073 @@ @@ -70495,7 +70519,7 @@ index 000000000000..9cb47ba62bc3 +} diff --git a/fs/bcachefs/replicas.h b/fs/bcachefs/replicas.h new file mode 100644 -index 000000000000..87820b2e1ad3 +index 000000000..87820b2e1 --- /dev/null +++ b/fs/bcachefs/replicas.h @@ -0,0 +1,106 @@ @@ -70607,7 +70631,7 @@ index 000000000000..87820b2e1ad3 +#endif /* _BCACHEFS_REPLICAS_H */ diff --git a/fs/bcachefs/replicas_types.h b/fs/bcachefs/replicas_types.h new file mode 100644 -index 000000000000..0535b1d3760e +index 000000000..0535b1d37 --- /dev/null +++ b/fs/bcachefs/replicas_types.h @@ -0,0 +1,10 @@ @@ -70623,7 +70647,7 @@ index 000000000000..0535b1d3760e +#endif /* _BCACHEFS_REPLICAS_TYPES_H */ diff --git a/fs/bcachefs/siphash.c b/fs/bcachefs/siphash.c new file mode 100644 -index 000000000000..c062edb3fbc2 +index 000000000..c062edb3f --- /dev/null +++ b/fs/bcachefs/siphash.c @@ -0,0 +1,173 @@ @@ -70802,7 +70826,7 @@ index 000000000000..c062edb3fbc2 +} diff --git a/fs/bcachefs/siphash.h b/fs/bcachefs/siphash.h new file mode 100644 -index 000000000000..3dfaf34a43b2 +index 000000000..3dfaf34a4 --- /dev/null +++ b/fs/bcachefs/siphash.h @@ -0,0 +1,87 @@ @@ -70895,7 +70919,7 @@ index 000000000000..3dfaf34a43b2 +#endif /* _SIPHASH_H_ */ diff --git a/fs/bcachefs/str_hash.h b/fs/bcachefs/str_hash.h new file mode 100644 -index 000000000000..591bbb9f8beb +index 000000000..591bbb9f8 --- /dev/null +++ b/fs/bcachefs/str_hash.h @@ -0,0 +1,351 @@ @@ -71252,7 +71276,7 @@ index 000000000000..591bbb9f8beb +#endif /* _BCACHEFS_STR_HASH_H */ diff --git a/fs/bcachefs/subvolume.c b/fs/bcachefs/subvolume.c new file mode 100644 -index 000000000000..8f41a06c3e11 +index 000000000..60b60de83 --- /dev/null +++ b/fs/bcachefs/subvolume.c @@ -0,0 +1,1095 @@ @@ -71987,7 +72011,7 @@ index 000000000000..8f41a06c3e11 + +static void bch2_delete_dead_snapshots(struct bch_fs *c) +{ -+ if (unlikely(!percpu_ref_tryget(&c->writes))) ++ if (unlikely(!percpu_ref_tryget_live(&c->writes))) + return; + + if (!queue_work(system_long_wq, &c->snapshot_delete_work)) @@ -72189,7 +72213,7 @@ index 000000000000..8f41a06c3e11 + if (ret) + return ret; + -+ if (unlikely(!percpu_ref_tryget(&c->writes))) ++ if (unlikely(!percpu_ref_tryget_live(&c->writes))) + return -EROFS; + + if (!queue_work(system_long_wq, &c->snapshot_wait_for_pagecache_and_delete_work)) @@ -72353,7 +72377,7 @@ index 000000000000..8f41a06c3e11 +} diff --git a/fs/bcachefs/subvolume.h b/fs/bcachefs/subvolume.h new file mode 100644 -index 000000000000..b1739d29c7d4 +index 000000000..b1739d29c --- /dev/null +++ b/fs/bcachefs/subvolume.h @@ -0,0 +1,126 @@ @@ -72485,7 +72509,7 @@ index 000000000000..b1739d29c7d4 +#endif /* _BCACHEFS_SUBVOLUME_H */ diff --git a/fs/bcachefs/subvolume_types.h b/fs/bcachefs/subvolume_types.h new file mode 100644 -index 000000000000..f7562b5d51df +index 000000000..f7562b5d5 --- /dev/null +++ b/fs/bcachefs/subvolume_types.h @@ -0,0 +1,9 @@ @@ -72500,7 +72524,7 @@ index 000000000000..f7562b5d51df +#endif /* _BCACHEFS_SUBVOLUME_TYPES_H */ diff --git a/fs/bcachefs/super-io.c b/fs/bcachefs/super-io.c new file mode 100644 -index 000000000000..8b8130993a59 +index 000000000..8b8130993 --- /dev/null +++ b/fs/bcachefs/super-io.c @@ -0,0 +1,1602 @@ @@ -74108,7 +74132,7 @@ index 000000000000..8b8130993a59 +} diff --git a/fs/bcachefs/super-io.h b/fs/bcachefs/super-io.h new file mode 100644 -index 000000000000..14a25f6fe29a +index 000000000..14a25f6fe --- /dev/null +++ b/fs/bcachefs/super-io.h @@ -0,0 +1,126 @@ @@ -74240,7 +74264,7 @@ index 000000000000..14a25f6fe29a +#endif /* _BCACHEFS_SUPER_IO_H */ diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c new file mode 100644 -index 000000000000..2908974034ca +index 000000000..290897403 --- /dev/null +++ b/fs/bcachefs/super.c @@ -0,0 +1,1970 @@ @@ -76216,7 +76240,7 @@ index 000000000000..2908974034ca +module_init(bcachefs_init); diff --git a/fs/bcachefs/super.h b/fs/bcachefs/super.h new file mode 100644 -index 000000000000..8501adaff4c2 +index 000000000..8501adaff --- /dev/null +++ b/fs/bcachefs/super.h @@ -0,0 +1,264 @@ @@ -76486,7 +76510,7 @@ index 000000000000..8501adaff4c2 +#endif /* _BCACHEFS_SUPER_H */ diff --git a/fs/bcachefs/super_types.h b/fs/bcachefs/super_types.h new file mode 100644 -index 000000000000..89419fc7930d +index 000000000..89419fc79 --- /dev/null +++ b/fs/bcachefs/super_types.h @@ -0,0 +1,51 @@ @@ -76543,10 +76567,10 @@ index 000000000000..89419fc7930d +#endif /* _BCACHEFS_SUPER_TYPES_H */ diff --git a/fs/bcachefs/sysfs.c b/fs/bcachefs/sysfs.c new file mode 100644 -index 000000000000..d72ec0629a37 +index 000000000..2c650055f --- /dev/null +++ b/fs/bcachefs/sysfs.c -@@ -0,0 +1,948 @@ +@@ -0,0 +1,943 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * bcache sysfs interfaces @@ -76731,7 +76755,6 @@ index 000000000000..d72ec0629a37 +read_attribute(btree_updates); +read_attribute(btree_cache); +read_attribute(btree_key_cache); -+read_attribute(btree_transactions); +read_attribute(stripes_heap); +read_attribute(open_buckets); + @@ -76969,9 +76992,6 @@ index 000000000000..d72ec0629a37 + if (attr == &sysfs_btree_key_cache) + bch2_btree_key_cache_to_text(out, &c->btree_key_cache); + -+ if (attr == &sysfs_btree_transactions) -+ bch2_btree_trans_to_text(out, c); -+ + if (attr == &sysfs_stripes_heap) + bch2_stripes_heap_to_text(out, c); + @@ -77166,7 +77186,6 @@ index 000000000000..d72ec0629a37 + &sysfs_btree_updates, + &sysfs_btree_cache, + &sysfs_btree_key_cache, -+ &sysfs_btree_transactions, + &sysfs_new_stripes, + &sysfs_stripes_heap, + &sysfs_open_buckets, @@ -77225,7 +77244,7 @@ index 000000000000..d72ec0629a37 + * We don't need to take c->writes for correctness, but it eliminates an + * unsightly error message in the dmesg log when we're RO: + */ -+ if (unlikely(!percpu_ref_tryget(&c->writes))) ++ if (unlikely(!percpu_ref_tryget_live(&c->writes))) + return -EROFS; + + tmp = kstrdup(buf, GFP_KERNEL); @@ -77497,7 +77516,7 @@ index 000000000000..d72ec0629a37 +#endif /* _BCACHEFS_SYSFS_H_ */ diff --git a/fs/bcachefs/sysfs.h b/fs/bcachefs/sysfs.h new file mode 100644 -index 000000000000..222cd5062702 +index 000000000..222cd5062 --- /dev/null +++ b/fs/bcachefs/sysfs.h @@ -0,0 +1,48 @@ @@ -77551,7 +77570,7 @@ index 000000000000..222cd5062702 +#endif /* _BCACHEFS_SYSFS_H_ */ diff --git a/fs/bcachefs/tests.c b/fs/bcachefs/tests.c new file mode 100644 -index 000000000000..1954891ce7ee +index 000000000..1954891ce --- /dev/null +++ b/fs/bcachefs/tests.c @@ -0,0 +1,947 @@ @@ -78504,7 +78523,7 @@ index 000000000000..1954891ce7ee +#endif /* CONFIG_BCACHEFS_TESTS */ diff --git a/fs/bcachefs/tests.h b/fs/bcachefs/tests.h new file mode 100644 -index 000000000000..c73b18aea7e0 +index 000000000..c73b18aea --- /dev/null +++ b/fs/bcachefs/tests.h @@ -0,0 +1,15 @@ @@ -78525,7 +78544,7 @@ index 000000000000..c73b18aea7e0 +#endif /* _BCACHEFS_TEST_H */ diff --git a/fs/bcachefs/trace.c b/fs/bcachefs/trace.c new file mode 100644 -index 000000000000..59e8dfa3d245 +index 000000000..59e8dfa3d --- /dev/null +++ b/fs/bcachefs/trace.c @@ -0,0 +1,12 @@ @@ -78543,10 +78562,10 @@ index 000000000000..59e8dfa3d245 +#include diff --git a/fs/bcachefs/util.c b/fs/bcachefs/util.c new file mode 100644 -index 000000000000..85b8f3df22e0 +index 000000000..8ef4b5915 --- /dev/null +++ b/fs/bcachefs/util.c -@@ -0,0 +1,957 @@ +@@ -0,0 +1,958 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * random utiility code, for bcache but in theory not specific to bcache @@ -78694,9 +78713,10 @@ index 000000000000..85b8f3df22e0 + if (f_n > div_u64(U64_MAX, b)) + return -ERANGE; + -+ if (v + (f_n * b) / f_d < v) ++ f_n = div_u64(f_n * b, f_d); ++ if (v + f_n < v) + return -ERANGE; -+ v += (f_n * b) / f_d; ++ v += f_n; + + *res = v; + return cp - start; @@ -79506,7 +79526,7 @@ index 000000000000..85b8f3df22e0 +} diff --git a/fs/bcachefs/util.h b/fs/bcachefs/util.h new file mode 100644 -index 000000000000..1fe66fd91ccc +index 000000000..1fe66fd91 --- /dev/null +++ b/fs/bcachefs/util.h @@ -0,0 +1,783 @@ @@ -80295,7 +80315,7 @@ index 000000000000..1fe66fd91ccc +#endif /* _BCACHEFS_UTIL_H */ diff --git a/fs/bcachefs/varint.c b/fs/bcachefs/varint.c new file mode 100644 -index 000000000000..5143b603bf67 +index 000000000..5143b603b --- /dev/null +++ b/fs/bcachefs/varint.c @@ -0,0 +1,121 @@ @@ -80422,7 +80442,7 @@ index 000000000000..5143b603bf67 +} diff --git a/fs/bcachefs/varint.h b/fs/bcachefs/varint.h new file mode 100644 -index 000000000000..92a182fb3d7a +index 000000000..92a182fb3 --- /dev/null +++ b/fs/bcachefs/varint.h @@ -0,0 +1,11 @@ @@ -80439,7 +80459,7 @@ index 000000000000..92a182fb3d7a +#endif /* _BCACHEFS_VARINT_H */ diff --git a/fs/bcachefs/vstructs.h b/fs/bcachefs/vstructs.h new file mode 100644 -index 000000000000..53a694d71967 +index 000000000..53a694d71 --- /dev/null +++ b/fs/bcachefs/vstructs.h @@ -0,0 +1,63 @@ @@ -80508,7 +80528,7 @@ index 000000000000..53a694d71967 +#endif /* _VSTRUCTS_H */ diff --git a/fs/bcachefs/xattr.c b/fs/bcachefs/xattr.c new file mode 100644 -index 000000000000..1236127162e5 +index 000000000..123612716 --- /dev/null +++ b/fs/bcachefs/xattr.c @@ -0,0 +1,648 @@ @@ -81162,7 +81182,7 @@ index 000000000000..1236127162e5 +} diff --git a/fs/bcachefs/xattr.h b/fs/bcachefs/xattr.h new file mode 100644 -index 000000000000..66d7a1e30350 +index 000000000..66d7a1e30 --- /dev/null +++ b/fs/bcachefs/xattr.h @@ -0,0 +1,50 @@ @@ -81217,7 +81237,7 @@ index 000000000000..66d7a1e30350 + +#endif /* _BCACHEFS_XATTR_H */ diff --git a/fs/d_path.c b/fs/d_path.c -index e4e0ebad1f15..1bd9e85f2f65 100644 +index e4e0ebad1..1bd9e85f2 100644 --- a/fs/d_path.c +++ b/fs/d_path.c @@ -5,6 +5,7 @@ @@ -81270,7 +81290,7 @@ index e4e0ebad1f15..1bd9e85f2f65 100644 * Helper function for dentry_operations.d_dname() members */ diff --git a/fs/dcache.c b/fs/dcache.c -index 93f4f5ee07bf..d90ed65e2a75 100644 +index 93f4f5ee0..d90ed65e2 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -3193,9 +3193,8 @@ void d_genocide(struct dentry *parent) @@ -81299,7 +81319,7 @@ index 93f4f5ee07bf..d90ed65e2a75 100644 } EXPORT_SYMBOL(d_tmpfile); diff --git a/fs/inode.c b/fs/inode.c -index 9d9b422504d1..8694e55820ec 100644 +index bd4da9c52..ac0da28a1 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -56,8 +56,23 @@ @@ -81786,7 +81806,7 @@ index 9d9b422504d1..8694e55820ec 100644 14, HASH_ZERO, diff --git a/include/linux/bio.h b/include/linux/bio.h -index 00450fd86bb4..c11103a8720a 100644 +index 00450fd86..c11103a87 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -483,7 +483,12 @@ extern void bio_copy_data_iter(struct bio *dst, struct bvec_iter *dst_iter, @@ -81804,10 +81824,10 @@ index 00450fd86bb4..c11103a8720a 100644 static inline void bio_release_pages(struct bio *bio, bool mark_dirty) { diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h -index 60d016138997..1757ffc35d09 100644 +index 108e3d114..20f76bd27 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h -@@ -872,6 +872,7 @@ extern const char *blk_op_str(unsigned int op); +@@ -873,6 +873,7 @@ extern const char *blk_op_str(unsigned int op); int blk_status_to_errno(blk_status_t status); blk_status_t errno_to_blk_status(int errno); @@ -81815,388 +81835,94 @@ index 60d016138997..1757ffc35d09 100644 /* only poll the hardware once, don't continue until a completion was found */ #define BLK_POLL_ONESHOT (1 << 0) -diff --git a/include/linux/closure.h b/include/linux/closure.h -new file mode 100644 -index 000000000000..36b4a83f9b77 ---- /dev/null +diff --git a/drivers/md/bcache/closure.h b/include/linux/closure.h +similarity index 94% +rename from drivers/md/bcache/closure.h +rename to include/linux/closure.h +index c88cdc4ae..36b4a83f9 100644 +--- a/drivers/md/bcache/closure.h +++ b/include/linux/closure.h -@@ -0,0 +1,399 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+#ifndef _LINUX_CLOSURE_H -+#define _LINUX_CLOSURE_H -+ -+#include -+#include -+#include -+#include -+ -+/* -+ * Closure is perhaps the most overused and abused term in computer science, but -+ * since I've been unable to come up with anything better you're stuck with it -+ * again. -+ * -+ * What are closures? -+ * -+ * They embed a refcount. The basic idea is they count "things that are in -+ * progress" - in flight bios, some other thread that's doing something else - -+ * anything you might want to wait on. -+ * -+ * The refcount may be manipulated with closure_get() and closure_put(). -+ * closure_put() is where many of the interesting things happen, when it causes -+ * the refcount to go to 0. -+ * -+ * Closures can be used to wait on things both synchronously and asynchronously, -+ * and synchronous and asynchronous use can be mixed without restriction. To -+ * wait synchronously, use closure_sync() - you will sleep until your closure's -+ * refcount hits 1. -+ * -+ * To wait asynchronously, use -+ * continue_at(cl, next_function, workqueue); -+ * -+ * passing it, as you might expect, the function to run when nothing is pending -+ * and the workqueue to run that function out of. -+ * -+ * continue_at() also, critically, requires a 'return' immediately following the -+ * location where this macro is referenced, to return to the calling function. -+ * There's good reason for this. -+ * -+ * To use safely closures asynchronously, they must always have a refcount while -+ * they are running owned by the thread that is running them. Otherwise, suppose -+ * you submit some bios and wish to have a function run when they all complete: -+ * -+ * foo_endio(struct bio *bio) -+ * { -+ * closure_put(cl); -+ * } -+ * -+ * closure_init(cl); -+ * -+ * do_stuff(); -+ * closure_get(cl); -+ * bio1->bi_endio = foo_endio; -+ * bio_submit(bio1); -+ * -+ * do_more_stuff(); -+ * closure_get(cl); -+ * bio2->bi_endio = foo_endio; -+ * bio_submit(bio2); -+ * -+ * continue_at(cl, complete_some_read, system_wq); -+ * -+ * If closure's refcount started at 0, complete_some_read() could run before the -+ * second bio was submitted - which is almost always not what you want! More -+ * importantly, it wouldn't be possible to say whether the original thread or -+ * complete_some_read()'s thread owned the closure - and whatever state it was -+ * associated with! -+ * -+ * So, closure_init() initializes a closure's refcount to 1 - and when a -+ * closure_fn is run, the refcount will be reset to 1 first. -+ * -+ * Then, the rule is - if you got the refcount with closure_get(), release it -+ * with closure_put() (i.e, in a bio->bi_endio function). If you have a refcount -+ * on a closure because you called closure_init() or you were run out of a -+ * closure - _always_ use continue_at(). Doing so consistently will help -+ * eliminate an entire class of particularly pernicious races. -+ * -+ * Lastly, you might have a wait list dedicated to a specific event, and have no -+ * need for specifying the condition - you just want to wait until someone runs -+ * closure_wake_up() on the appropriate wait list. In that case, just use -+ * closure_wait(). It will return either true or false, depending on whether the -+ * closure was already on a wait list or not - a closure can only be on one wait -+ * list at a time. -+ * -+ * Parents: -+ * -+ * closure_init() takes two arguments - it takes the closure to initialize, and -+ * a (possibly null) parent. -+ * -+ * If parent is non null, the new closure will have a refcount for its lifetime; -+ * a closure is considered to be "finished" when its refcount hits 0 and the -+ * function to run is null. Hence -+ * -+ * continue_at(cl, NULL, NULL); -+ * -+ * returns up the (spaghetti) stack of closures, precisely like normal return -+ * returns up the C stack. continue_at() with non null fn is better thought of -+ * as doing a tail call. -+ * -+ * All this implies that a closure should typically be embedded in a particular -+ * struct (which its refcount will normally control the lifetime of), and that -+ * struct can very much be thought of as a stack frame. -+ */ -+ -+struct closure; -+struct closure_syncer; -+typedef void (closure_fn) (struct closure *); -+extern struct dentry *bcache_debug; -+ -+struct closure_waitlist { -+ struct llist_head list; -+}; -+ -+enum closure_state { -+ /* -+ * CLOSURE_WAITING: Set iff the closure is on a waitlist. Must be set by -+ * the thread that owns the closure, and cleared by the thread that's -+ * waking up the closure. -+ * -+ * The rest are for debugging and don't affect behaviour: -+ * -+ * CLOSURE_RUNNING: Set when a closure is running (i.e. by -+ * closure_init() and when closure_put() runs then next function), and -+ * must be cleared before remaining hits 0. Primarily to help guard -+ * against incorrect usage and accidentally transferring references. -+ * continue_at() and closure_return() clear it for you, if you're doing -+ * something unusual you can use closure_set_dead() which also helps -+ * annotate where references are being transferred. -+ */ -+ -+ CLOSURE_BITS_START = (1U << 26), -+ CLOSURE_DESTRUCTOR = (1U << 26), -+ CLOSURE_WAITING = (1U << 28), -+ CLOSURE_RUNNING = (1U << 30), -+}; -+ -+#define CLOSURE_GUARD_MASK \ -+ ((CLOSURE_DESTRUCTOR|CLOSURE_WAITING|CLOSURE_RUNNING) << 1) -+ -+#define CLOSURE_REMAINING_MASK (CLOSURE_BITS_START - 1) -+#define CLOSURE_REMAINING_INITIALIZER (1|CLOSURE_RUNNING) -+ -+struct closure { -+ union { -+ struct { -+ struct workqueue_struct *wq; -+ struct closure_syncer *s; -+ struct llist_node list; -+ closure_fn *fn; -+ }; -+ struct work_struct work; -+ }; -+ -+ struct closure *parent; -+ -+ atomic_t remaining; -+ +@@ -155,7 +155,7 @@ struct closure { + + atomic_t remaining; + +-#ifdef CONFIG_BCACHE_CLOSURES_DEBUG +#ifdef CONFIG_DEBUG_CLOSURES -+#define CLOSURE_MAGIC_DEAD 0xc054dead -+#define CLOSURE_MAGIC_ALIVE 0xc054a11e -+ -+ unsigned int magic; -+ struct list_head all; -+ unsigned long ip; -+ unsigned long waiting_on; -+#endif -+}; -+ -+void closure_sub(struct closure *cl, int v); -+void closure_put(struct closure *cl); -+void __closure_wake_up(struct closure_waitlist *list); -+bool closure_wait(struct closure_waitlist *list, struct closure *cl); -+void __closure_sync(struct closure *cl); -+ -+/** -+ * closure_sync - sleep until a closure a closure has nothing left to wait on -+ * -+ * Sleeps until the refcount hits 1 - the thread that's running the closure owns -+ * the last refcount. -+ */ -+static inline void closure_sync(struct closure *cl) -+{ -+ if ((atomic_read(&cl->remaining) & CLOSURE_REMAINING_MASK) != 1) -+ __closure_sync(cl); -+} -+ + #define CLOSURE_MAGIC_DEAD 0xc054dead + #define CLOSURE_MAGIC_ALIVE 0xc054a11e + +@@ -184,15 +184,13 @@ static inline void closure_sync(struct closure *cl) + __closure_sync(cl); + } + +-#ifdef CONFIG_BCACHE_CLOSURES_DEBUG +#ifdef CONFIG_DEBUG_CLOSURES -+ -+void closure_debug_create(struct closure *cl); -+void closure_debug_destroy(struct closure *cl); -+ -+#else -+ -+static inline void closure_debug_create(struct closure *cl) {} -+static inline void closure_debug_destroy(struct closure *cl) {} -+ -+#endif -+ -+static inline void closure_set_ip(struct closure *cl) -+{ + +-void closure_debug_init(void); + void closure_debug_create(struct closure *cl); + void closure_debug_destroy(struct closure *cl); + + #else + +-static inline void closure_debug_init(void) {} + static inline void closure_debug_create(struct closure *cl) {} + static inline void closure_debug_destroy(struct closure *cl) {} + +@@ -200,21 +198,21 @@ static inline void closure_debug_destroy(struct closure *cl) {} + + static inline void closure_set_ip(struct closure *cl) + { +-#ifdef CONFIG_BCACHE_CLOSURES_DEBUG +#ifdef CONFIG_DEBUG_CLOSURES -+ cl->ip = _THIS_IP_; -+#endif -+} -+ -+static inline void closure_set_ret_ip(struct closure *cl) -+{ + cl->ip = _THIS_IP_; + #endif + } + + static inline void closure_set_ret_ip(struct closure *cl) + { +-#ifdef CONFIG_BCACHE_CLOSURES_DEBUG +#ifdef CONFIG_DEBUG_CLOSURES -+ cl->ip = _RET_IP_; -+#endif -+} -+ -+static inline void closure_set_waiting(struct closure *cl, unsigned long f) -+{ + cl->ip = _RET_IP_; + #endif + } + + static inline void closure_set_waiting(struct closure *cl, unsigned long f) + { +-#ifdef CONFIG_BCACHE_CLOSURES_DEBUG +#ifdef CONFIG_DEBUG_CLOSURES -+ cl->waiting_on = f; -+#endif -+} + cl->waiting_on = f; + #endif + } +@@ -243,6 +241,7 @@ static inline void closure_queue(struct closure *cl) + */ + BUILD_BUG_ON(offsetof(struct closure, fn) + != offsetof(struct work_struct, func)); + -+static inline void closure_set_stopped(struct closure *cl) -+{ -+ atomic_sub(CLOSURE_RUNNING, &cl->remaining); -+} -+ -+static inline void set_closure_fn(struct closure *cl, closure_fn *fn, -+ struct workqueue_struct *wq) -+{ -+ closure_set_ip(cl); -+ cl->fn = fn; -+ cl->wq = wq; -+ /* between atomic_dec() in closure_put() */ -+ smp_mb__before_atomic(); -+} -+ -+static inline void closure_queue(struct closure *cl) -+{ -+ struct workqueue_struct *wq = cl->wq; -+ /** -+ * Changes made to closure, work_struct, or a couple of other structs -+ * may cause work.func not pointing to the right location. -+ */ -+ BUILD_BUG_ON(offsetof(struct closure, fn) -+ != offsetof(struct work_struct, func)); -+ -+ if (wq) { -+ INIT_WORK(&cl->work, cl->work.func); -+ BUG_ON(!queue_work(wq, &cl->work)); -+ } else -+ cl->fn(cl); -+} -+ -+/** -+ * closure_get - increment a closure's refcount -+ */ -+static inline void closure_get(struct closure *cl) -+{ + if (wq) { + INIT_WORK(&cl->work, cl->work.func); + BUG_ON(!queue_work(wq, &cl->work)); +@@ -255,7 +254,7 @@ static inline void closure_queue(struct closure *cl) + */ + static inline void closure_get(struct closure *cl) + { +-#ifdef CONFIG_BCACHE_CLOSURES_DEBUG +#ifdef CONFIG_DEBUG_CLOSURES -+ BUG_ON((atomic_inc_return(&cl->remaining) & -+ CLOSURE_REMAINING_MASK) <= 1); -+#else -+ atomic_inc(&cl->remaining); -+#endif -+} -+ -+/** -+ * closure_init - Initialize a closure, setting the refcount to 1 -+ * @cl: closure to initialize -+ * @parent: parent of the new closure. cl will take a refcount on it for its -+ * lifetime; may be NULL. -+ */ -+static inline void closure_init(struct closure *cl, struct closure *parent) -+{ + BUG_ON((atomic_inc_return(&cl->remaining) & + CLOSURE_REMAINING_MASK) <= 1); + #else +@@ -271,7 +270,7 @@ static inline void closure_get(struct closure *cl) + */ + static inline void closure_init(struct closure *cl, struct closure *parent) + { +- memset(cl, 0, sizeof(struct closure)); + cl->fn = NULL; -+ cl->parent = parent; -+ if (parent) -+ closure_get(parent); -+ -+ atomic_set(&cl->remaining, CLOSURE_REMAINING_INITIALIZER); -+ -+ closure_debug_create(cl); -+ closure_set_ip(cl); -+} -+ -+static inline void closure_init_stack(struct closure *cl) -+{ -+ memset(cl, 0, sizeof(struct closure)); -+ atomic_set(&cl->remaining, CLOSURE_REMAINING_INITIALIZER); -+} -+ -+/** -+ * closure_wake_up - wake up all closures on a wait list, -+ * with memory barrier -+ */ -+static inline void closure_wake_up(struct closure_waitlist *list) -+{ -+ /* Memory barrier for the wait list */ -+ smp_mb(); -+ __closure_wake_up(list); -+} -+ -+/** -+ * continue_at - jump to another function with barrier -+ * -+ * After @cl is no longer waiting on anything (i.e. all outstanding refs have -+ * been dropped with closure_put()), it will resume execution at @fn running out -+ * of @wq (or, if @wq is NULL, @fn will be called by closure_put() directly). -+ * -+ * This is because after calling continue_at() you no longer have a ref on @cl, -+ * and whatever @cl owns may be freed out from under you - a running closure fn -+ * has a ref on its own closure which continue_at() drops. -+ * -+ * Note you are expected to immediately return after using this macro. -+ */ -+#define continue_at(_cl, _fn, _wq) \ -+do { \ -+ set_closure_fn(_cl, _fn, _wq); \ -+ closure_sub(_cl, CLOSURE_RUNNING + 1); \ -+} while (0) -+ -+/** -+ * closure_return - finish execution of a closure -+ * -+ * This is used to indicate that @cl is finished: when all outstanding refs on -+ * @cl have been dropped @cl's ref on its parent closure (as passed to -+ * closure_init()) will be dropped, if one was specified - thus this can be -+ * thought of as returning to the parent closure. -+ */ -+#define closure_return(_cl) continue_at((_cl), NULL, NULL) -+ -+/** -+ * continue_at_nobarrier - jump to another function without barrier -+ * -+ * Causes @fn to be executed out of @cl, in @wq context (or called directly if -+ * @wq is NULL). -+ * -+ * The ref the caller of continue_at_nobarrier() had on @cl is now owned by @fn, -+ * thus it's not safe to touch anything protected by @cl after a -+ * continue_at_nobarrier(). -+ */ -+#define continue_at_nobarrier(_cl, _fn, _wq) \ -+do { \ -+ set_closure_fn(_cl, _fn, _wq); \ -+ closure_queue(_cl); \ -+} while (0) -+ -+/** -+ * closure_return_with_destructor - finish execution of a closure, -+ * with destructor -+ * -+ * Works like closure_return(), except @destructor will be called when all -+ * outstanding refs on @cl have been dropped; @destructor may be used to safely -+ * free the memory occupied by @cl, and it is called with the ref on the parent -+ * closure still held - so @destructor could safely return an item to a -+ * freelist protected by @cl's parent. -+ */ -+#define closure_return_with_destructor(_cl, _destructor) \ -+do { \ -+ set_closure_fn(_cl, _destructor, NULL); \ -+ closure_sub(_cl, CLOSURE_RUNNING - CLOSURE_DESTRUCTOR + 1); \ -+} while (0) -+ -+/** -+ * closure_call - execute @fn out of a new, uninitialized closure -+ * -+ * Typically used when running out of one closure, and we want to run @fn -+ * asynchronously out of a new closure - @parent will then wait for @cl to -+ * finish. -+ */ -+static inline void closure_call(struct closure *cl, closure_fn fn, -+ struct workqueue_struct *wq, -+ struct closure *parent) -+{ -+ closure_init(cl, parent); -+ continue_at_nobarrier(cl, fn, wq); -+} -+ + cl->parent = parent; + if (parent) + closure_get(parent); +@@ -375,4 +374,26 @@ static inline void closure_call(struct closure *cl, closure_fn fn, + continue_at_nobarrier(cl, fn, wq); + } + +#define __closure_wait_event(waitlist, _cond) \ +do { \ + struct closure cl; \ @@ -82219,9 +81945,9 @@ index 000000000000..36b4a83f9b77 + __closure_wait_event(waitlist, _cond); \ +} while (0) + -+#endif /* _LINUX_CLOSURE_H */ + #endif /* _LINUX_CLOSURE_H */ diff --git a/include/linux/compiler_attributes.h b/include/linux/compiler_attributes.h -index 445e80517cab..57e7d0b94119 100644 +index 445e80517..57e7d0b94 100644 --- a/include/linux/compiler_attributes.h +++ b/include/linux/compiler_attributes.h @@ -371,4 +371,9 @@ @@ -82235,7 +81961,7 @@ index 445e80517cab..57e7d0b94119 100644 + #endif /* __LINUX_COMPILER_ATTRIBUTES_H */ diff --git a/include/linux/dcache.h b/include/linux/dcache.h -index f5bba51480b2..6c661059a55b 100644 +index f5bba5148..6c661059a 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -248,6 +248,7 @@ extern struct dentry * d_make_root(struct inode *); @@ -82255,7 +81981,7 @@ index f5bba51480b2..6c661059a55b 100644 /* Allocation counts.. */ diff --git a/include/linux/exportfs.h b/include/linux/exportfs.h -index fe848901fcc3..5a3cc0e1da9b 100644 +index fe848901f..5a3cc0e1d 100644 --- a/include/linux/exportfs.h +++ b/include/linux/exportfs.h @@ -98,6 +98,12 @@ enum fid_type { @@ -82272,7 +81998,7 @@ index fe848901fcc3..5a3cc0e1da9b 100644 * 128 bit child FID (struct lu_fid) * 128 bit parent FID (struct lu_fid) diff --git a/include/linux/fs.h b/include/linux/fs.h -index bbde95387a23..98f62ebf9224 100644 +index bbde95387..98f62ebf9 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -637,7 +637,8 @@ struct inode { @@ -82313,7 +82039,7 @@ index bbde95387a23..98f62ebf9224 100644 } diff --git a/include/linux/generic-radix-tree.h b/include/linux/generic-radix-tree.h -index 107613f7d792..c74b7376990d 100644 +index 107613f7d..c74b73769 100644 --- a/include/linux/generic-radix-tree.h +++ b/include/linux/generic-radix-tree.h @@ -38,6 +38,7 @@ @@ -82424,7 +82150,7 @@ index 107613f7d792..c74b7376990d 100644 /** diff --git a/include/linux/kernel.h b/include/linux/kernel.h -index fe6efb24d151..9ba5a53c6ad5 100644 +index fe6efb24d..9ba5a53c6 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -202,11 +202,17 @@ static inline void might_fault(void) { } @@ -82459,7 +82185,7 @@ index fe6efb24d151..9ba5a53c6ad5 100644 /* diff --git a/include/linux/list_bl.h b/include/linux/list_bl.h -index ae1b541446c9..8ee2bf5af131 100644 +index ae1b54144..8ee2bf5af 100644 --- a/include/linux/list_bl.h +++ b/include/linux/list_bl.h @@ -143,6 +143,28 @@ static inline void hlist_bl_del_init(struct hlist_bl_node *n) @@ -82492,7 +82218,7 @@ index ae1b541446c9..8ee2bf5af131 100644 { bit_spin_lock(0, (unsigned long *)b); diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h -index 467b94257105..c46b0c76c064 100644 +index 467b94257..c46b0c76c 100644 --- a/include/linux/lockdep.h +++ b/include/linux/lockdep.h @@ -336,6 +336,8 @@ extern void lock_unpin_lock(struct lockdep_map *lock, struct pin_cookie); @@ -82515,7 +82241,7 @@ index 467b94257105..c46b0c76c064 100644 enum xhlock_context_t { diff --git a/include/linux/pretty-printers.h b/include/linux/pretty-printers.h new file mode 100644 -index 000000000000..f39d8edfba02 +index 000000000..f39d8edfb --- /dev/null +++ b/include/linux/pretty-printers.h @@ -0,0 +1,10 @@ @@ -82531,10 +82257,10 @@ index 000000000000..f39d8edfba02 +#endif /* _LINUX_PRETTY_PRINTERS_H */ diff --git a/include/linux/printbuf.h b/include/linux/printbuf.h new file mode 100644 -index 000000000000..fa8e73d5766a +index 000000000..861c5d75f --- /dev/null +++ b/include/linux/printbuf.h -@@ -0,0 +1,248 @@ +@@ -0,0 +1,283 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ +/* Copyright (C) 2022 Kent Overstreet */ + @@ -82569,6 +82295,10 @@ index 000000000000..fa8e73d5766a + * Since no equivalent yet exists for GFP_ATOMIC/GFP_NOWAIT, memory allocations + * will be done with GFP_NOWAIT if printbuf->atomic is nonzero. + * ++ * It's allowed to grab the output buffer and free it later with kfree() instead ++ * of using printbuf_exit(), if the user just needs a heap allocated string at ++ * the end. ++ * + * Memory allocation failures: We don't return errors directly, because on + * memory allocation failure we usually don't want to bail out and unwind - we + * want to print what we've got, on a best-effort basis. But code that does want @@ -82666,7 +82396,7 @@ index 000000000000..fa8e73d5766a + +static inline unsigned printbuf_written(struct printbuf *out) +{ -+ return min(out->pos, out->size); ++ return out->size ? min(out->pos, out->size - 1) : 0; +} + +/* @@ -82687,21 +82417,6 @@ index 000000000000..fa8e73d5766a + out->buf[out->size - 1] = 0; +} + -+static inline void __prt_chars_reserved(struct printbuf *out, char c, unsigned n) -+{ -+ memset(out->buf + out->pos, -+ c, -+ min(n, printbuf_remaining(out))); -+ out->pos += n; -+} -+ -+static inline void prt_chars(struct printbuf *out, char c, unsigned n) -+{ -+ printbuf_make_room(out, n); -+ __prt_chars_reserved(out, c, n); -+ printbuf_nul_terminate(out); -+} -+ +/* Doesn't call printbuf_make_room(), doesn't nul terminate: */ +static inline void __prt_char_reserved(struct printbuf *out, char c) +{ @@ -82723,14 +82438,34 @@ index 000000000000..fa8e73d5766a + printbuf_nul_terminate(out); +} + -+static inline void prt_bytes(struct printbuf *out, const void *b, unsigned n) ++static inline void __prt_chars_reserved(struct printbuf *out, char c, unsigned n) ++{ ++ unsigned i, can_print = min(n, printbuf_remaining(out)); ++ ++ for (i = 0; i < can_print; i++) ++ out->buf[out->pos++] = c; ++ out->pos += n - can_print; ++} ++ ++static inline void prt_chars(struct printbuf *out, char c, unsigned n) +{ + printbuf_make_room(out, n); ++ __prt_chars_reserved(out, c, n); ++ printbuf_nul_terminate(out); ++} ++ ++static inline void prt_bytes(struct printbuf *out, const void *b, unsigned n) ++{ ++ unsigned i, can_print; ++ ++ printbuf_make_room(out, n); ++ ++ can_print = min(n, printbuf_remaining(out)); ++ ++ for (i = 0; i < can_print; i++) ++ out->buf[out->pos++] = ((char *) b)[i]; ++ out->pos += n - can_print; + -+ memcpy(out->buf + out->pos, -+ b, -+ min(n, printbuf_remaining(out))); -+ out->pos += n; + printbuf_nul_terminate(out); +} + @@ -82782,9 +82517,35 @@ index 000000000000..fa8e73d5766a + buf->atomic--; +} + ++/* ++ * This is used for the %pf(%p) sprintf format extension, where we pass a pretty ++ * printer and arguments to the pretty-printer to sprintf ++ * ++ * Instead of passing a pretty-printer function to sprintf directly, we pass it ++ * a pointer to a struct call_pp, so that sprintf can check that the magic ++ * number is present, which in turn ensures that the CALL_PP() macro has been ++ * used in order to typecheck the arguments to the pretty printer function ++ * ++ * Example usage: ++ * sprintf("%pf(%p)", CALL_PP(prt_bdev, bdev)); ++ */ ++struct call_pp { ++ unsigned long magic; ++ void *fn; ++}; ++ ++#define PP_TYPECHECK(fn, ...) \ ++ ({ while (0) fn((struct printbuf *) NULL, ##__VA_ARGS__); }) ++ ++#define CALL_PP_MAGIC (unsigned long) 0xce0b92d22f6b6be4 ++ ++#define CALL_PP(fn, ...) \ ++ (PP_TYPECHECK(fn, ##__VA_ARGS__), \ ++ &((struct call_pp) { CALL_PP_MAGIC, fn })), ##__VA_ARGS__ ++ +#endif /* _LINUX_PRINTBUF_H */ diff --git a/include/linux/sched.h b/include/linux/sched.h -index a8911b1f35aa..252bac976763 100644 +index a8911b1f3..252bac976 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -859,6 +859,7 @@ struct task_struct { @@ -82797,7 +82558,7 @@ index a8911b1f35aa..252bac976763 100644 struct vmacache vmacache; diff --git a/include/linux/seq_buf.h b/include/linux/seq_buf.h deleted file mode 100644 -index 5b31c5147969..000000000000 +index 5b31c5147..000000000 --- a/include/linux/seq_buf.h +++ /dev/null @@ -1,162 +0,0 @@ @@ -82964,7 +82725,7 @@ index 5b31c5147969..000000000000 - -#endif /* _LINUX_SEQ_BUF_H */ diff --git a/include/linux/shrinker.h b/include/linux/shrinker.h -index 76fbf92b04d9..12967748f9f7 100644 +index 76fbf92b0..12967748f 100644 --- a/include/linux/shrinker.h +++ b/include/linux/shrinker.h @@ -2,6 +2,8 @@ @@ -83007,7 +82768,7 @@ index 76fbf92b04d9..12967748f9f7 100644 #endif diff --git a/include/linux/six.h b/include/linux/six.h new file mode 100644 -index 000000000000..477c33eb00d7 +index 000000000..477c33eb0 --- /dev/null +++ b/include/linux/six.h @@ -0,0 +1,203 @@ @@ -83215,7 +82976,7 @@ index 000000000000..477c33eb00d7 + +#endif /* _LINUX_SIX_H */ diff --git a/include/linux/string.h b/include/linux/string.h -index b6572aeca2f5..0a737d5b9203 100644 +index b6572aeca..0a737d5b9 100644 --- a/include/linux/string.h +++ b/include/linux/string.h @@ -195,7 +195,12 @@ int __sysfs_match_string(const char * const *array, size_t n, const char *s); @@ -83232,7 +82993,7 @@ index b6572aeca2f5..0a737d5b9203 100644 int bstr_printf(char *buf, size_t size, const char *fmt, const u32 *bin_buf); int bprintf(u32 *bin_buf, size_t size, const char *fmt, ...) __printf(3, 4); diff --git a/include/linux/string_helpers.h b/include/linux/string_helpers.h -index 4d72258d42fd..52e0f1d283b9 100644 +index 4d72258d4..52e0f1d28 100644 --- a/include/linux/string_helpers.h +++ b/include/linux/string_helpers.h @@ -10,6 +10,7 @@ @@ -83272,7 +83033,7 @@ index 4d72258d42fd..52e0f1d283b9 100644 unsigned int flags, const char *only) { diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h -index e6e95a9f07a5..48471e32f8e4 100644 +index e6e95a9f0..48471e32f 100644 --- a/include/linux/trace_events.h +++ b/include/linux/trace_events.h @@ -496,7 +496,7 @@ struct dynevent_cmd; @@ -83285,7 +83046,7 @@ index e6e95a9f07a5..48471e32f8e4 100644 unsigned int n_fields; enum dynevent_type type; diff --git a/include/linux/trace_seq.h b/include/linux/trace_seq.h -index 5a2c650d9e1c..d2b51007b3b9 100644 +index 5a2c650d9..d2b51007b 100644 --- a/include/linux/trace_seq.h +++ b/include/linux/trace_seq.h @@ -2,10 +2,12 @@ @@ -83357,7 +83118,7 @@ index 5a2c650d9e1c..d2b51007b3b9 100644 extern void trace_seq_bitmask(struct trace_seq *s, const unsigned long *maskp, diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h -index b159c2789961..0f4151e98331 100644 +index b159c2789..0f4151e98 100644 --- a/include/linux/vmalloc.h +++ b/include/linux/vmalloc.h @@ -144,6 +144,7 @@ extern void *vzalloc(unsigned long size) __alloc_size(1); @@ -83370,10 +83131,10 @@ index b159c2789961..0f4151e98331 100644 extern void *__vmalloc(unsigned long size, gfp_t gfp_mask) __alloc_size(1); diff --git a/include/trace/events/bcachefs.h b/include/trace/events/bcachefs.h new file mode 100644 -index 000000000000..b96b25741b68 +index 000000000..66ad356e9 --- /dev/null +++ b/include/trace/events/bcachefs.h -@@ -0,0 +1,1017 @@ +@@ -0,0 +1,1020 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM bcachefs @@ -83910,24 +83671,27 @@ index 000000000000..b96b25741b68 +); + +TRACE_EVENT(invalidate_bucket, -+ TP_PROTO(struct bch_fs *c, unsigned dev, u64 bucket), -+ TP_ARGS(c, dev, bucket), ++ TP_PROTO(struct bch_fs *c, unsigned dev, u64 bucket, u32 sectors), ++ TP_ARGS(c, dev, bucket, sectors), + + TP_STRUCT__entry( + __field(dev_t, dev ) + __field(u32, dev_idx ) ++ __field(u32, sectors ) + __field(u64, bucket ) + ), + + TP_fast_assign( + __entry->dev = c->dev; + __entry->dev_idx = dev; ++ __entry->sectors = sectors; + __entry->bucket = bucket; + ), + -+ TP_printk("%d:%d invalidated %u:%llu", ++ TP_printk("%d:%d invalidated %u:%llu cached sectors %u", + MAJOR(__entry->dev), MINOR(__entry->dev), -+ __entry->dev_idx, __entry->bucket) ++ __entry->dev_idx, __entry->bucket, ++ __entry->sectors) +); + +/* Moving IO */ @@ -84392,7 +84156,7 @@ index 000000000000..b96b25741b68 +/* This part must be outside protection */ +#include diff --git a/init/init_task.c b/init/init_task.c -index 73cc8f03511a..3e3aed110153 100644 +index 73cc8f035..3e3aed110 100644 --- a/init/init_task.c +++ b/init/init_task.c @@ -85,6 +85,7 @@ struct task_struct init_task @@ -84404,7 +84168,7 @@ index 73cc8f03511a..3e3aed110153 100644 .fn = do_no_restart_syscall, }, diff --git a/kernel/Kconfig.locks b/kernel/Kconfig.locks -index 4198f0273ecd..b2abd9a5d9ab 100644 +index 4198f0273..b2abd9a5d 100644 --- a/kernel/Kconfig.locks +++ b/kernel/Kconfig.locks @@ -259,3 +259,6 @@ config ARCH_HAS_MMIOWB @@ -84415,7 +84179,7 @@ index 4198f0273ecd..b2abd9a5d9ab 100644 +config SIXLOCKS + bool diff --git a/kernel/locking/Makefile b/kernel/locking/Makefile -index d51cabf28f38..cadbf6520c4b 100644 +index d51cabf28..cadbf6520 100644 --- a/kernel/locking/Makefile +++ b/kernel/locking/Makefile @@ -32,3 +32,4 @@ obj-$(CONFIG_QUEUED_RWLOCKS) += qrwlock.o @@ -84424,7 +84188,7 @@ index d51cabf28f38..cadbf6520c4b 100644 obj-$(CONFIG_LOCK_EVENT_COUNTS) += lock_events.o +obj-$(CONFIG_SIXLOCKS) += six.o diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c -index c06cab6546ed..9426050d30d9 100644 +index c06cab654..9426050d3 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c @@ -6459,6 +6459,26 @@ void debug_check_no_locks_held(void) @@ -84456,7 +84220,7 @@ index c06cab6546ed..9426050d30d9 100644 { diff --git a/kernel/locking/six.c b/kernel/locking/six.c new file mode 100644 -index 000000000000..fca1208720b6 +index 000000000..fca120872 --- /dev/null +++ b/kernel/locking/six.c @@ -0,0 +1,759 @@ @@ -85220,7 +84984,7 @@ index 000000000000..fca1208720b6 +} +EXPORT_SYMBOL_GPL(six_lock_pcpu_alloc); diff --git a/kernel/module.c b/kernel/module.c -index 6cea788fd965..207cf0430f72 100644 +index 6529c84c5..df4959bda 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -2834,9 +2834,7 @@ static void dynamic_debug_remove(struct module *mod, struct _ddebug *debug) @@ -85235,10 +84999,10 @@ index 6cea788fd965..207cf0430f72 100644 bool __weak module_init_section(const char *name) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c -index f4de111fa18f..b815a914b50a 100644 +index 114c31bdf..7c7fd7b66 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c -@@ -1670,15 +1670,15 @@ static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt) +@@ -1672,15 +1672,15 @@ static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt) { int len; @@ -85258,7 +85022,7 @@ index f4de111fa18f..b815a914b50a 100644 return cnt; } -@@ -3725,11 +3725,7 @@ static bool trace_safe_str(struct trace_iterator *iter, const char *str, +@@ -3727,11 +3727,7 @@ static bool trace_safe_str(struct trace_iterator *iter, const char *str, static const char *show_buffer(struct trace_seq *s) { @@ -85271,7 +85035,7 @@ index f4de111fa18f..b815a914b50a 100644 } static DEFINE_STATIC_KEY_FALSE(trace_no_verify); -@@ -6762,12 +6758,12 @@ tracing_read_pipe(struct file *filp, char __user *ubuf, +@@ -6770,12 +6766,12 @@ tracing_read_pipe(struct file *filp, char __user *ubuf, trace_access_lock(iter->cpu_file); while (trace_find_next_entry_inc(iter) != NULL) { enum print_line_t ret; @@ -85286,7 +85050,7 @@ index f4de111fa18f..b815a914b50a 100644 break; } if (ret != TRACE_TYPE_NO_CONSUME) -@@ -6789,7 +6785,7 @@ tracing_read_pipe(struct file *filp, char __user *ubuf, +@@ -6797,7 +6793,7 @@ tracing_read_pipe(struct file *filp, char __user *ubuf, /* Now copy what we have to the user */ sret = trace_seq_to_user(&iter->seq, ubuf, cnt); @@ -85295,7 +85059,7 @@ index f4de111fa18f..b815a914b50a 100644 trace_seq_init(&iter->seq); /* -@@ -6815,16 +6811,15 @@ static size_t +@@ -6823,16 +6819,15 @@ static size_t tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter) { size_t count; @@ -85314,7 +85078,7 @@ index f4de111fa18f..b815a914b50a 100644 break; } -@@ -6834,14 +6829,14 @@ tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter) +@@ -6842,14 +6837,14 @@ tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter) * anyway to be safe. */ if (ret == TRACE_TYPE_PARTIAL_LINE) { @@ -85332,7 +85096,7 @@ index f4de111fa18f..b815a914b50a 100644 break; } -@@ -9817,20 +9812,8 @@ static struct notifier_block trace_die_notifier = { +@@ -9826,20 +9821,8 @@ static struct notifier_block trace_die_notifier = { void trace_printk_seq(struct trace_seq *s) { @@ -85355,7 +85119,7 @@ index f4de111fa18f..b815a914b50a 100644 printk(KERN_TRACE "%s", s->buffer); diff --git a/kernel/trace/trace_dynevent.c b/kernel/trace/trace_dynevent.c -index e34e8182ee4b..eabeeb97b55e 100644 +index e34e8182e..eabeeb97b 100644 --- a/kernel/trace/trace_dynevent.c +++ b/kernel/trace/trace_dynevent.c @@ -295,21 +295,19 @@ int dynevent_arg_add(struct dynevent_cmd *cmd, @@ -85445,7 +85209,7 @@ index e34e8182ee4b..eabeeb97b55e 100644 cmd->run_command = run_command; } diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c -index b458a9afa2c0..70cfd1241018 100644 +index b458a9afa..70cfd1241 100644 --- a/kernel/trace/trace_events_filter.c +++ b/kernel/trace/trace_events_filter.c @@ -1059,7 +1059,7 @@ static void append_filter_err(struct trace_array *tr, @@ -85458,7 +85222,7 @@ index b458a9afa2c0..70cfd1241018 100644 kfree(filter->filter_string); filter->filter_string = buf; diff --git a/kernel/trace/trace_events_synth.c b/kernel/trace/trace_events_synth.c -index 5e8c07aef071..ddb2a2737b82 100644 +index 5e8c07aef..ddb2a2737 100644 --- a/kernel/trace/trace_events_synth.c +++ b/kernel/trace/trace_events_synth.c @@ -5,13 +5,14 @@ @@ -85539,7 +85303,7 @@ index 5e8c07aef071..ddb2a2737b82 100644 return ret; diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c -index 203204cadf92..9f270fdde99b 100644 +index 203204cad..9f270fdde 100644 --- a/kernel/trace/trace_functions_graph.c +++ b/kernel/trace/trace_functions_graph.c @@ -1022,9 +1022,9 @@ print_graph_comment(struct trace_seq *s, struct trace_entry *ent, @@ -85556,7 +85320,7 @@ index 203204cadf92..9f270fdde99b 100644 trace_seq_puts(s, " */\n"); diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c -index 47cebef78532..b97a912eede6 100644 +index 134397432..6e4485b04 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -915,7 +915,7 @@ static int create_or_delete_trace_kprobe(const char *raw_command) @@ -85569,7 +85333,7 @@ index 47cebef78532..b97a912eede6 100644 /** diff --git a/kernel/trace/trace_seq.c b/kernel/trace/trace_seq.c -index 9c90b3a7dce2..48c08f29c342 100644 +index 9c90b3a7d..48c08f29c 100644 --- a/kernel/trace/trace_seq.c +++ b/kernel/trace/trace_seq.c @@ -25,11 +25,9 @@ @@ -85848,10 +85612,10 @@ index 9c90b3a7dce2..48c08f29c342 100644 return 0; } diff --git a/lib/Kconfig b/lib/Kconfig -index 087e06b4cdfd..89a324ac0551 100644 +index 55f0bba8f..9161ac314 100644 --- a/lib/Kconfig +++ b/lib/Kconfig -@@ -488,6 +488,9 @@ config ASSOCIATIVE_ARRAY +@@ -491,6 +491,9 @@ config ASSOCIATIVE_ARRAY for more information. @@ -85862,10 +85626,10 @@ index 087e06b4cdfd..89a324ac0551 100644 bool depends on !NO_IOMEM diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug -index 075cd25363ac..759494896f0c 100644 +index 7e2829701..2bef39841 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug -@@ -1724,6 +1724,15 @@ config DEBUG_CREDENTIALS +@@ -1723,6 +1723,15 @@ config DEBUG_CREDENTIALS source "kernel/rcu/Kconfig.debug" @@ -85882,7 +85646,7 @@ index 075cd25363ac..759494896f0c 100644 bool "Force round-robin CPU selection for unbound work items" depends on DEBUG_KERNEL diff --git a/lib/Makefile b/lib/Makefile -index 6b9ffc1bd1ee..9230bc9ebcd7 100644 +index 60843ab66..d98f3c92b 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -30,11 +30,11 @@ endif @@ -85909,218 +85673,125 @@ index 6b9ffc1bd1ee..9230bc9ebcd7 100644 obj-$(CONFIG_DQL) += dynamic_queue_limits.o obj-$(CONFIG_GLOB) += glob.o -diff --git a/lib/closure.c b/lib/closure.c -new file mode 100644 -index 000000000000..b38ded00b9b0 ---- /dev/null +diff --git a/drivers/md/bcache/closure.c b/lib/closure.c +similarity index 88% +rename from drivers/md/bcache/closure.c +rename to lib/closure.c +index d8d9394a6..b38ded00b 100644 +--- a/drivers/md/bcache/closure.c +++ b/lib/closure.c -@@ -0,0 +1,204 @@ -+// SPDX-License-Identifier: GPL-2.0 -+/* -+ * Asynchronous refcounty things -+ * -+ * Copyright 2010, 2011 Kent Overstreet -+ * Copyright 2012 Google, Inc. -+ */ -+ +@@ -6,13 +6,12 @@ + * Copyright 2012 Google, Inc. + */ + +#include -+#include + #include +-#include +#include -+#include -+#include -+ -+static inline void closure_put_after_sub(struct closure *cl, int flags) -+{ -+ int r = flags & CLOSURE_REMAINING_MASK; -+ -+ BUG_ON(flags & CLOSURE_GUARD_MASK); -+ BUG_ON(!r && (flags & ~CLOSURE_DESTRUCTOR)); -+ -+ if (!r) { -+ if (cl->fn && !(flags & CLOSURE_DESTRUCTOR)) { -+ atomic_set(&cl->remaining, -+ CLOSURE_REMAINING_INITIALIZER); -+ closure_queue(cl); -+ } else { -+ struct closure *parent = cl->parent; -+ closure_fn *destructor = cl->fn; -+ -+ closure_debug_destroy(cl); -+ -+ if (destructor) -+ destructor(cl); -+ -+ if (parent) -+ closure_put(parent); -+ } -+ } -+} -+ -+/* For clearing flags with the same atomic op as a put */ -+void closure_sub(struct closure *cl, int v) -+{ -+ closure_put_after_sub(cl, atomic_sub_return(v, &cl->remaining)); -+} + #include + #include + +-#include "closure.h" +- + static inline void closure_put_after_sub(struct closure *cl, int flags) + { + int r = flags & CLOSURE_REMAINING_MASK; +@@ -45,6 +44,7 @@ void closure_sub(struct closure *cl, int v) + { + closure_put_after_sub(cl, atomic_sub_return(v, &cl->remaining)); + } +EXPORT_SYMBOL(closure_sub); -+ -+/* -+ * closure_put - decrement a closure's refcount -+ */ -+void closure_put(struct closure *cl) -+{ -+ closure_put_after_sub(cl, atomic_dec_return(&cl->remaining)); -+} + + /* + * closure_put - decrement a closure's refcount +@@ -53,6 +53,7 @@ void closure_put(struct closure *cl) + { + closure_put_after_sub(cl, atomic_dec_return(&cl->remaining)); + } +EXPORT_SYMBOL(closure_put); -+ -+/* -+ * closure_wake_up - wake up all closures on a wait list, without memory barrier -+ */ -+void __closure_wake_up(struct closure_waitlist *wait_list) -+{ -+ struct llist_node *list; -+ struct closure *cl, *t; -+ struct llist_node *reverse = NULL; -+ -+ list = llist_del_all(&wait_list->list); -+ -+ /* We first reverse the list to preserve FIFO ordering and fairness */ -+ reverse = llist_reverse_order(list); -+ -+ /* Then do the wakeups */ -+ llist_for_each_entry_safe(cl, t, reverse, list) { -+ closure_set_waiting(cl, 0); -+ closure_sub(cl, CLOSURE_WAITING + 1); -+ } -+} + + /* + * closure_wake_up - wake up all closures on a wait list, without memory barrier +@@ -74,6 +75,7 @@ void __closure_wake_up(struct closure_waitlist *wait_list) + closure_sub(cl, CLOSURE_WAITING + 1); + } + } +EXPORT_SYMBOL(__closure_wake_up); -+ -+/** -+ * closure_wait - add a closure to a waitlist -+ * @waitlist: will own a ref on @cl, which will be released when -+ * closure_wake_up() is called on @waitlist. -+ * @cl: closure pointer. -+ * -+ */ -+bool closure_wait(struct closure_waitlist *waitlist, struct closure *cl) -+{ -+ if (atomic_read(&cl->remaining) & CLOSURE_WAITING) -+ return false; -+ -+ closure_set_waiting(cl, _RET_IP_); -+ atomic_add(CLOSURE_WAITING + 1, &cl->remaining); -+ llist_add(&cl->list, &waitlist->list); -+ -+ return true; -+} + + /** + * closure_wait - add a closure to a waitlist +@@ -93,6 +95,7 @@ bool closure_wait(struct closure_waitlist *waitlist, struct closure *cl) + + return true; + } +EXPORT_SYMBOL(closure_wait); -+ -+struct closure_syncer { -+ struct task_struct *task; -+ int done; -+}; -+ -+static void closure_sync_fn(struct closure *cl) -+{ -+ struct closure_syncer *s = cl->s; -+ struct task_struct *p; -+ -+ rcu_read_lock(); -+ p = READ_ONCE(s->task); -+ s->done = 1; -+ wake_up_process(p); -+ rcu_read_unlock(); -+} -+ -+void __sched __closure_sync(struct closure *cl) -+{ -+ struct closure_syncer s = { .task = current }; -+ -+ cl->s = &s; -+ continue_at(cl, closure_sync_fn, NULL); -+ -+ while (1) { -+ set_current_state(TASK_UNINTERRUPTIBLE); -+ if (s.done) -+ break; -+ schedule(); -+ } -+ -+ __set_current_state(TASK_RUNNING); -+} + + struct closure_syncer { + struct task_struct *task; +@@ -127,8 +130,9 @@ void __sched __closure_sync(struct closure *cl) + + __set_current_state(TASK_RUNNING); + } +EXPORT_SYMBOL(__closure_sync); -+ + +-#ifdef CONFIG_BCACHE_CLOSURES_DEBUG +#ifdef CONFIG_DEBUG_CLOSURES -+ -+static LIST_HEAD(closure_list); -+static DEFINE_SPINLOCK(closure_list_lock); -+ -+void closure_debug_create(struct closure *cl) -+{ -+ unsigned long flags; -+ -+ BUG_ON(cl->magic == CLOSURE_MAGIC_ALIVE); -+ cl->magic = CLOSURE_MAGIC_ALIVE; -+ -+ spin_lock_irqsave(&closure_list_lock, flags); -+ list_add(&cl->all, &closure_list); -+ spin_unlock_irqrestore(&closure_list_lock, flags); -+} + + static LIST_HEAD(closure_list); + static DEFINE_SPINLOCK(closure_list_lock); +@@ -144,6 +148,7 @@ void closure_debug_create(struct closure *cl) + list_add(&cl->all, &closure_list); + spin_unlock_irqrestore(&closure_list_lock, flags); + } +EXPORT_SYMBOL(closure_debug_create); -+ -+void closure_debug_destroy(struct closure *cl) -+{ -+ unsigned long flags; -+ -+ BUG_ON(cl->magic != CLOSURE_MAGIC_ALIVE); -+ cl->magic = CLOSURE_MAGIC_DEAD; -+ -+ spin_lock_irqsave(&closure_list_lock, flags); -+ list_del(&cl->all); -+ spin_unlock_irqrestore(&closure_list_lock, flags); -+} + + void closure_debug_destroy(struct closure *cl) + { +@@ -156,8 +161,7 @@ void closure_debug_destroy(struct closure *cl) + list_del(&cl->all); + spin_unlock_irqrestore(&closure_list_lock, flags); + } +- +-static struct dentry *closure_debug; +EXPORT_SYMBOL(closure_debug_destroy); -+ -+static int debug_show(struct seq_file *f, void *data) -+{ -+ struct closure *cl; -+ -+ spin_lock_irq(&closure_list_lock); -+ -+ list_for_each_entry(cl, &closure_list, all) { -+ int r = atomic_read(&cl->remaining); -+ -+ seq_printf(f, "%p: %pS -> %pS p %p r %i ", -+ cl, (void *) cl->ip, cl->fn, cl->parent, -+ r & CLOSURE_REMAINING_MASK); -+ -+ seq_printf(f, "%s%s\n", -+ test_bit(WORK_STRUCT_PENDING_BIT, -+ work_data_bits(&cl->work)) ? "Q" : "", -+ r & CLOSURE_RUNNING ? "R" : ""); -+ -+ if (r & CLOSURE_WAITING) -+ seq_printf(f, " W %pS\n", -+ (void *) cl->waiting_on); -+ + + static int debug_show(struct seq_file *f, void *data) + { +@@ -181,7 +185,7 @@ static int debug_show(struct seq_file *f, void *data) + seq_printf(f, " W %pS\n", + (void *) cl->waiting_on); + +- seq_printf(f, "\n"); + seq_puts(f, "\n"); -+ } -+ -+ spin_unlock_irq(&closure_list_lock); -+ return 0; -+} -+ -+DEFINE_SHOW_ATTRIBUTE(debug); -+ + } + + spin_unlock_irq(&closure_list_lock); +@@ -190,18 +194,11 @@ static int debug_show(struct seq_file *f, void *data) + + DEFINE_SHOW_ATTRIBUTE(debug); + +-void __init closure_debug_init(void) +static int __init closure_debug_init(void) -+{ + { +- if (!IS_ERR_OR_NULL(bcache_debug)) +- /* +- * it is unnecessary to check return value of +- * debugfs_create_file(), we should not care +- * about this. +- */ +- closure_debug = debugfs_create_file( +- "closures", 0400, bcache_debug, NULL, &debug_fops); + debugfs_create_file("closures", 0400, NULL, NULL, &debug_fops); + return 0; -+} + } +-#endif +late_initcall(closure_debug_init) -+ + +-MODULE_AUTHOR("Kent Overstreet "); +-MODULE_LICENSE("GPL"); +#endif diff --git a/lib/generic-radix-tree.c b/lib/generic-radix-tree.c -index f25eb111c051..41f1bcdc4488 100644 +index f25eb111c..41f1bcdc4 100644 --- a/lib/generic-radix-tree.c +++ b/lib/generic-radix-tree.c @@ -1,4 +1,5 @@ @@ -86227,7 +85898,7 @@ index f25eb111c051..41f1bcdc4488 100644 { if (level) { diff --git a/lib/hexdump.c b/lib/hexdump.c -index 06833d404398..9556f15ad295 100644 +index 06833d404..9556f15ad 100644 --- a/lib/hexdump.c +++ b/lib/hexdump.c @@ -9,6 +9,7 @@ @@ -86534,7 +86205,7 @@ index 06833d404398..9556f15ad295 100644 unsigned char linebuf[32 * 3 + 2 + 32 + 1]; diff --git a/lib/pretty-printers.c b/lib/pretty-printers.c new file mode 100644 -index 000000000000..addbac95e065 +index 000000000..addbac95e --- /dev/null +++ b/lib/pretty-printers.c @@ -0,0 +1,60 @@ @@ -86600,10 +86271,10 @@ index 000000000000..addbac95e065 +EXPORT_SYMBOL(prt_bitflags); diff --git a/lib/printbuf.c b/lib/printbuf.c new file mode 100644 -index 000000000000..553f89ebc1dc +index 000000000..047470025 --- /dev/null +++ b/lib/printbuf.c -@@ -0,0 +1,253 @@ +@@ -0,0 +1,258 @@ +// SPDX-License-Identifier: LGPL-2.1+ +/* Copyright (C) 2022 Kent Overstreet */ + @@ -86639,6 +86310,11 @@ index 000000000000..553f89ebc1dc + return 0; + + new_size = roundup_pow_of_two(out->size + extra); ++ ++ /* ++ * Note: output buffer must be freeable with kfree(), it's not required ++ * that the user use printbuf_exit(). ++ */ + buf = krealloc(out->buf, new_size, !out->atomic ? GFP_KERNEL : GFP_NOWAIT); + + if (!buf) { @@ -86859,7 +86535,7 @@ index 000000000000..553f89ebc1dc +EXPORT_SYMBOL(prt_units_s64); diff --git a/lib/seq_buf.c b/lib/seq_buf.c deleted file mode 100644 -index 0a68f7aa85d6..000000000000 +index 0a68f7aa8..000000000 --- a/lib/seq_buf.c +++ /dev/null @@ -1,397 +0,0 @@ @@ -87260,58 +86936,8 @@ index 0a68f7aa85d6..000000000000 - } - return 0; -} -diff --git a/lib/show_mem.c b/lib/show_mem.c -deleted file mode 100644 -index 1c26c14ffbb9..000000000000 ---- a/lib/show_mem.c -+++ /dev/null -@@ -1,44 +0,0 @@ --// SPDX-License-Identifier: GPL-2.0-only --/* -- * Generic show_mem() implementation -- * -- * Copyright (C) 2008 Johannes Weiner -- */ -- --#include --#include -- --void show_mem(unsigned int filter, nodemask_t *nodemask) --{ -- pg_data_t *pgdat; -- unsigned long total = 0, reserved = 0, highmem = 0; -- -- printk("Mem-Info:\n"); -- show_free_areas(filter, nodemask); -- -- for_each_online_pgdat(pgdat) { -- int zoneid; -- -- for (zoneid = 0; zoneid < MAX_NR_ZONES; zoneid++) { -- struct zone *zone = &pgdat->node_zones[zoneid]; -- if (!populated_zone(zone)) -- continue; -- -- total += zone->present_pages; -- reserved += zone->present_pages - zone_managed_pages(zone); -- -- if (is_highmem_idx(zoneid)) -- highmem += zone->present_pages; -- } -- } -- -- printk("%lu pages RAM\n", total); -- printk("%lu pages HighMem/MovableOnly\n", highmem); -- printk("%lu pages reserved\n", reserved); --#ifdef CONFIG_CMA -- printk("%lu pages cma reserved\n", totalcma_pages); --#endif --#ifdef CONFIG_MEMORY_FAILURE -- printk("%lu pages hwpoisoned\n", atomic_long_read(&num_poisoned_pages)); --#endif --} diff --git a/lib/string_helpers.c b/lib/string_helpers.c -index 4f877e9551d5..c1c8d4dfc9c9 100644 +index 5ed3beb06..d247bf945 100644 --- a/lib/string_helpers.c +++ b/lib/string_helpers.c @@ -15,6 +15,7 @@ @@ -87651,7 +87277,7 @@ index 4f877e9551d5..c1c8d4dfc9c9 100644 EXPORT_SYMBOL(string_escape_mem); diff --git a/lib/test_hexdump.c b/lib/test_hexdump.c -index 5144899d3c6b..f9e97879dcdf 100644 +index 5144899d3..f9e97879d 100644 --- a/lib/test_hexdump.c +++ b/lib/test_hexdump.c @@ -25,36 +25,19 @@ static const char * const test_data_1[] __initconst = { @@ -87718,10 +87344,18 @@ index 5144899d3c6b..f9e97879dcdf 100644 result = test_data_1; diff --git a/lib/test_printf.c b/lib/test_printf.c -index 07309c45f327..a702dd5dc034 100644 +index 07309c45f..ac5f9f0eb 100644 --- a/lib/test_printf.c +++ b/lib/test_printf.c -@@ -78,12 +78,6 @@ do_test(int bufsize, const char *expect, int elen, +@@ -9,6 +9,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -78,12 +79,6 @@ do_test(int bufsize, const char *expect, int elen, return 1; } @@ -87734,11 +87368,16 @@ index 07309c45f327..a702dd5dc034 100644 if (memcmp(test_buffer, expect, written)) { pr_warn("vsnprintf(buf, %d, \"%s\", ...) wrote '%s', expected '%.*s'\n", bufsize, fmt, test_buffer, written, expect); -@@ -783,6 +777,25 @@ test_pointer(void) +@@ -783,6 +778,31 @@ test_pointer(void) fourcc_pointer(); } -+static void printf_test_fn(struct printbuf *out, void *p) ++static void printf_test_fn_0(struct printbuf *out) ++{ ++ prt_str(out, "0"); ++} ++ ++static void printf_test_fn_1(struct printbuf *out, void *p) +{ + int *i = p; + @@ -87750,7 +87389,8 @@ index 07309c45f327..a702dd5dc034 100644 +{ + int i = 1; + -+ test("1", "%pf(%p)", printf_test_fn, &i); ++ test("0", "%pf()", CALL_PP(printf_test_fn_0)); ++ test("1", "%pf(%p)", CALL_PP(printf_test_fn_1, &i)); + /* + * Not tested, so we don't fail the build with -Werror: + */ @@ -87760,7 +87400,7 @@ index 07309c45f327..a702dd5dc034 100644 static void __init selftest(void) { alloced_buffer = kmalloc(BUF_SIZE + 2*PAD_SIZE, GFP_KERNEL); -@@ -794,6 +807,7 @@ static void __init selftest(void) +@@ -794,6 +814,7 @@ static void __init selftest(void) test_number(); test_string(); test_pointer(); @@ -87769,7 +87409,7 @@ index 07309c45f327..a702dd5dc034 100644 kfree(alloced_buffer); } diff --git a/lib/vsprintf.c b/lib/vsprintf.c -index 40d26a07a133..99051e345b1b 100644 +index 40d26a07a..dfca8a7c9 100644 --- a/lib/vsprintf.c +++ b/lib/vsprintf.c @@ -44,6 +44,7 @@ @@ -87836,7 +87476,9 @@ index 40d26a07a133..99051e345b1b 100644 + __prt_char_reserved(out, tmp[--len]); + printbuf_nul_terminate(out); +} -+ + +- for (idx = 0; idx < len; ++idx) +- buf[idx + width] = tmp[len - idx - 1]; +/** + * prt_u64 - print a simple u64, in decimal + * @out: printbuf to output to @@ -87846,9 +87488,7 @@ index 40d26a07a133..99051e345b1b 100644 +{ + prt_u64_minwidth(out, num, 0); +} - -- for (idx = 0; idx < len; ++idx) -- buf[idx + width] = tmp[len - idx - 1]; ++ +/* + * Convert passed number to decimal string. + * Returns the length of string. On buffer overflow, returns 0. @@ -87966,12 +87606,13 @@ index 40d26a07a133..99051e345b1b 100644 + /* leading space padding */ - field_width -= precision; - if (!(spec.flags & (ZEROPAD | LEFT))) { +- if (!(spec.flags & (ZEROPAD | LEFT))) { - while (--field_width >= 0) { - if (buf < end) - *buf = ' '; - ++buf; - } ++ if (!(spec.flags & (ZEROPAD | LEFT)) && field_width) { + __prt_chars_reserved(out, ' ', field_width); + field_width = 0; } @@ -88019,7 +87660,7 @@ index 40d26a07a133..99051e345b1b 100644 - ++buf; - } + /* zero padding */ -+ if (!(spec.flags & LEFT)) ++ if (!(spec.flags & LEFT) && field_width) + __prt_chars_reserved(out, '0', field_width); + + /* zero padding from precision */ @@ -88041,7 +87682,7 @@ index 40d26a07a133..99051e345b1b 100644 - *buf = ' '; - ++buf; - } -+ if (spec.flags & LEFT) ++ if ((spec.flags & LEFT) && field_width) + __prt_chars_reserved(out, ' ', field_width); - return buf; @@ -88646,14 +88287,14 @@ index 40d26a07a133..99051e345b1b 100644 + /* nothing to print */ + if (len == 0) + return; - -- if (check_pointer(&buf, end, addr, spec)) -- return buf; ++ + /* if we pass '%ph[CDN]', field width remains + negative value, fallback to the default */ + if (len < 0) + len = 1; -+ + +- if (check_pointer(&buf, end, addr, spec)) +- return buf; + len = min(len, 64); + + if (check_pointer(out, addr)) @@ -89894,11 +89535,12 @@ index 40d26a07a133..99051e345b1b 100644 } } -@@ -2623,7 +2477,16 @@ int format_decode(const char *fmt, struct printf_spec *spec) +@@ -2623,8 +2477,14 @@ int format_decode(const char *fmt, struct printf_spec *spec) return ++fmt - start; case 'p': - spec->type = FORMAT_TYPE_PTR; +- return ++fmt - start; + fmt++; + if (fmt[0] == 'f' && + fmt[1] == '(') { @@ -89907,16 +89549,14 @@ index 40d26a07a133..99051e345b1b 100644 + } else + spec->type = FORMAT_TYPE_PTR; + return fmt - start; -+ case '(': -+ spec->type = FORMAT_TYPE_FN; - return ++fmt - start; case '%': -@@ -2705,71 +2568,83 @@ set_precision(struct printf_spec *spec, int prec) + spec->type = FORMAT_TYPE_PERCENT_CHAR; +@@ -2705,53 +2565,89 @@ set_precision(struct printf_spec *spec, int prec) } } -+static void call_prt_fn(struct printbuf *out, void *fn, void **fn_args, unsigned nr_args) ++static void call_prt_fn(struct printbuf *out, struct call_pp *call_pp, void **fn_args, unsigned nr_args) +{ + typedef void (*printf_fn_0)(struct printbuf *); + typedef void (*printf_fn_1)(struct printbuf *, void *); @@ -89927,6 +89567,24 @@ index 40d26a07a133..99051e345b1b 100644 + typedef void (*printf_fn_6)(struct printbuf *, void *, void *, void *, void *, void *, void *); + typedef void (*printf_fn_7)(struct printbuf *, void *, void *, void *, void *, void *, void *, void *); + typedef void (*printf_fn_8)(struct printbuf *, void *, void *, void *, void *, void *, void *, void *, void *); ++ void *fn; ++ unsigned i; ++ ++ if (check_pointer(out, call_pp)) ++ return; ++ ++ if (call_pp->magic != CALL_PP_MAGIC) { ++ error_string(out, "bad pretty-printer magic"); ++ return; ++ } ++ ++ fn = call_pp->fn; ++ if (check_pointer(out, fn)) ++ return; ++ ++ for (i = 0; i < nr_args; i++) ++ if (check_pointer(out, fn_args[i])) ++ return; + + switch (nr_args) { + case 0: @@ -89970,17 +89628,18 @@ index 40d26a07a133..99051e345b1b 100644 * - * This function generally follows C99 vsnprintf, but has some - * extensions and a few limitations: -- * ++ * prt_vprintf works much like the traditional vsnprintf(), but outputs to a ++ * printbuf instead of raw pointer/size. + * - * - ``%n`` is unsupported - * - ``%p*`` is handled by pointer() - * - * See pointer() or Documentation/core-api/printk-formats.rst for more - * extensive description. -- * -- * **Please update the documentation in both places when making changes** -+ * prt_vprintf works much like the traditional vsnprintf(), but outputs to a -+ * printbuf instead of raw pointer/size. ++ * If you're not already dealing with a va_list consider using prt_printf(). * +- * **Please update the documentation in both places when making changes** +- * - * The return value is the number of characters which would - * be generated for the given input, excluding the trailing - * '\0', as per ISO C99. If you want to have the exact @@ -89988,8 +89647,7 @@ index 40d26a07a133..99051e345b1b 100644 - * (not including the trailing '\0'), use vscnprintf(). If the - * return is greater than or equal to @size, the resulting - * string is truncated. -+ * If you're not already dealing with a va_list consider using prt_printf(). - * +- * - * If you're not already dealing with a va_list consider using snprintf(). + * See the vsnprintf() documentation for format string extensions over C99. */ @@ -90018,9 +89676,7 @@ index 40d26a07a133..99051e345b1b 100644 while (*fmt) { const char *old_fmt = fmt; -+ unsigned prev_pos = out->pos; - int read = format_decode(fmt, &spec); - +@@ -2760,16 +2656,9 @@ int vsnprintf(char *buf, size_t size, const char *fmt, va_list args) fmt += read; switch (spec.type) { @@ -90039,7 +89695,7 @@ index 40d26a07a133..99051e345b1b 100644 case FORMAT_TYPE_WIDTH: set_field_width(&spec, va_arg(args, int)); -@@ -2779,44 +2654,58 @@ int vsnprintf(char *buf, size_t size, const char *fmt, va_list args) +@@ -2779,44 +2668,60 @@ int vsnprintf(char *buf, size_t size, const char *fmt, va_list args) set_precision(&spec, va_arg(args, int)); break; @@ -90096,18 +89752,20 @@ index 40d26a07a133..99051e345b1b 100644 + void *fn_args[8]; + void *fn = va_arg(args, void *); + -+ while (1) { ++ while (*fmt != ')') { ++ if (nr_args) { ++ if (fmt[0] != ',') ++ goto out; ++ fmt++; ++ } ++ ++ if (fmt[0] != '%' || fmt[1] != 'p') ++ goto out; ++ fmt += 2; ++ + if (WARN_ON_ONCE(nr_args == ARRAY_SIZE(fn_args))) + goto out; -+ if (*fmt++ != '%') -+ goto out; -+ if (*fmt++ != 'p') -+ goto out; + fn_args[nr_args++] = va_arg(args, void *); -+ if (*fmt == ')') -+ break; -+ if (*fmt++ != ',') -+ goto out; + } + + call_prt_fn(out, fn, fn_args, nr_args); @@ -90123,7 +89781,7 @@ index 40d26a07a133..99051e345b1b 100644 break; case FORMAT_TYPE_INVALID: -@@ -2869,21 +2758,70 @@ int vsnprintf(char *buf, size_t size, const char *fmt, va_list args) +@@ -2869,21 +2774,70 @@ int vsnprintf(char *buf, size_t size, const char *fmt, va_list args) num = va_arg(args, unsigned int); } @@ -90142,7 +89800,9 @@ index 40d26a07a133..99051e345b1b 100644 + printbuf_nul_terminate(out); +} +EXPORT_SYMBOL(prt_vprintf); -+ + +- /* the trailing null byte doesn't count towards the total */ +- return str-buf; +/** + * prt_printf - Format a string, outputting to a printbuf + * @out: The printbuf to output to @@ -90164,9 +89824,7 @@ index 40d26a07a133..99051e345b1b 100644 + va_end(args); +} +EXPORT_SYMBOL(prt_printf); - -- /* the trailing null byte doesn't count towards the total */ -- return str-buf; ++ +/** + * vsnprintf - Format a string and place it in a buffer + * @buf: The buffer to place the result into @@ -90204,7 +89862,7 @@ index 40d26a07a133..99051e345b1b 100644 } EXPORT_SYMBOL(vsnprintf); -@@ -3021,53 +2959,46 @@ EXPORT_SYMBOL(sprintf); +@@ -3021,53 +2975,46 @@ EXPORT_SYMBOL(sprintf); * bstr_printf() - Binary data to text string */ @@ -90275,7 +89933,7 @@ index 40d26a07a133..99051e345b1b 100644 value; \ }) -@@ -3098,16 +3029,12 @@ int vbin_printf(u32 *bin_buf, size_t size, const char *fmt, va_list args) +@@ -3098,16 +3045,12 @@ int vbin_printf(u32 *bin_buf, size_t size, const char *fmt, va_list args) case FORMAT_TYPE_STR: { const char *save_str = va_arg(args, char *); const char *err_msg; @@ -90293,7 +89951,7 @@ index 40d26a07a133..99051e345b1b 100644 break; } -@@ -3127,12 +3054,7 @@ int vbin_printf(u32 *bin_buf, size_t size, const char *fmt, va_list args) +@@ -3127,12 +3070,7 @@ int vbin_printf(u32 *bin_buf, size_t size, const char *fmt, va_list args) save_arg(void *); break; } @@ -90307,7 +89965,7 @@ index 40d26a07a133..99051e345b1b 100644 } /* skip all alphanumeric pointer suffixes */ while (isalnum(*fmt)) -@@ -3170,15 +3092,15 @@ int vbin_printf(u32 *bin_buf, size_t size, const char *fmt, va_list args) +@@ -3170,15 +3108,15 @@ int vbin_printf(u32 *bin_buf, size_t size, const char *fmt, va_list args) } out: @@ -90327,7 +89985,7 @@ index 40d26a07a133..99051e345b1b 100644 * @fmt: The format string to use * @bin_buf: Binary arguments for the format string * -@@ -3188,26 +3110,14 @@ EXPORT_SYMBOL_GPL(vbin_printf); +@@ -3188,26 +3126,14 @@ EXPORT_SYMBOL_GPL(vbin_printf); * * The format follows C99 vsnprintf, but has some extensions: * see vsnprintf comment for details. @@ -90357,7 +90015,7 @@ index 40d26a07a133..99051e345b1b 100644 #define get_arg(type) \ ({ \ -@@ -3224,12 +3134,6 @@ int bstr_printf(char *buf, size_t size, const char *fmt, const u32 *bin_buf) +@@ -3224,12 +3150,6 @@ int bstr_printf(char *buf, size_t size, const char *fmt, const u32 *bin_buf) value; \ }) @@ -90370,7 +90028,7 @@ index 40d26a07a133..99051e345b1b 100644 while (*fmt) { const char *old_fmt = fmt; int read = format_decode(fmt, &spec); -@@ -3237,16 +3141,9 @@ int bstr_printf(char *buf, size_t size, const char *fmt, const u32 *bin_buf) +@@ -3237,16 +3157,9 @@ int bstr_printf(char *buf, size_t size, const char *fmt, const u32 *bin_buf) fmt += read; switch (spec.type) { @@ -90389,7 +90047,7 @@ index 40d26a07a133..99051e345b1b 100644 case FORMAT_TYPE_WIDTH: set_field_width(&spec, get_arg(int)); -@@ -3256,38 +3153,24 @@ int bstr_printf(char *buf, size_t size, const char *fmt, const u32 *bin_buf) +@@ -3256,38 +3169,24 @@ int bstr_printf(char *buf, size_t size, const char *fmt, const u32 *bin_buf) set_precision(&spec, get_arg(int)); break; @@ -90436,7 +90094,7 @@ index 40d26a07a133..99051e345b1b 100644 /* Non function dereferences were already done */ switch (*fmt) { case 'S': -@@ -3303,17 +3186,12 @@ int bstr_printf(char *buf, size_t size, const char *fmt, const u32 *bin_buf) +@@ -3303,17 +3202,12 @@ int bstr_printf(char *buf, size_t size, const char *fmt, const u32 *bin_buf) break; } /* Pointer dereference was already processed */ @@ -90458,7 +90116,7 @@ index 40d26a07a133..99051e345b1b 100644 while (isalnum(*fmt)) fmt++; -@@ -3321,9 +3199,7 @@ int bstr_printf(char *buf, size_t size, const char *fmt, const u32 *bin_buf) +@@ -3321,9 +3215,7 @@ int bstr_printf(char *buf, size_t size, const char *fmt, const u32 *bin_buf) } case FORMAT_TYPE_PERCENT_CHAR: @@ -90469,7 +90127,7 @@ index 40d26a07a133..99051e345b1b 100644 break; case FORMAT_TYPE_INVALID: -@@ -3366,23 +3242,87 @@ int bstr_printf(char *buf, size_t size, const char *fmt, const u32 *bin_buf) +@@ -3366,23 +3258,87 @@ int bstr_printf(char *buf, size_t size, const char *fmt, const u32 *bin_buf) num = get_arg(int); } @@ -90568,7 +90226,7 @@ index 40d26a07a133..99051e345b1b 100644 EXPORT_SYMBOL_GPL(bstr_printf); diff --git a/mm/Makefile b/mm/Makefile -index 4cc13f3179a5..7e852599b917 100644 +index 4cc13f317..7e852599b 100644 --- a/mm/Makefile +++ b/mm/Makefile @@ -54,7 +54,7 @@ obj-y := filemap.o mempool.o oom_kill.o fadvise.o \ @@ -90581,7 +90239,7 @@ index 4cc13f3179a5..7e852599b917 100644 # Give 'page_alloc' its own module-parameter namespace page-alloc-y := page_alloc.o diff --git a/mm/filemap.c b/mm/filemap.c -index 9a1eef6c5d35..3f1aa900a1c4 100644 +index be1859a27..222bcfe7a 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -2223,6 +2223,7 @@ unsigned find_get_pages_range(struct address_space *mapping, pgoff_t *start, @@ -90593,7 +90251,7 @@ index 9a1eef6c5d35..3f1aa900a1c4 100644 /** * find_get_pages_contig - gang contiguous pagecache lookup diff --git a/mm/memcontrol.c b/mm/memcontrol.c -index 598fece89e2b..57861dc9fee5 100644 +index 598fece89..57861dc9f 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -62,7 +62,7 @@ @@ -90702,7 +90360,7 @@ index 598fece89e2b..57861dc9fee5 100644 #define K(x) ((x) << (PAGE_SHIFT-10)) diff --git a/mm/nommu.c b/mm/nommu.c -index 9d7afc2d959e..dd53020262d8 100644 +index 9d7afc2d9..dd5302026 100644 --- a/mm/nommu.c +++ b/mm/nommu.c @@ -281,6 +281,24 @@ void *vzalloc_node(unsigned long size, int node) @@ -90731,7 +90389,7 @@ index 9d7afc2d959e..dd53020262d8 100644 * vmalloc_32 - allocate virtually contiguous memory (32bit addressable) * @size: allocation size diff --git a/mm/oom_kill.c b/mm/oom_kill.c -index 49d7df39b02d..9c550a283037 100644 +index 49d7df39b..9c550a283 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c @@ -168,27 +168,6 @@ static bool oom_unkillable_task(struct task_struct *p) @@ -90771,72 +90429,35 @@ index 49d7df39b02d..9c550a283037 100644 } if (sysctl_oom_dump_tasks) dump_tasks(oc); -diff --git a/mm/show_mem.c b/mm/show_mem.c -new file mode 100644 -index 000000000000..24b662f64d40 ---- /dev/null +diff --git a/lib/show_mem.c b/mm/show_mem.c +similarity index 83% +rename from lib/show_mem.c +rename to mm/show_mem.c +index 1c26c14ff..47225158c 100644 +--- a/lib/show_mem.c +++ b/mm/show_mem.c -@@ -0,0 +1,58 @@ -+// SPDX-License-Identifier: GPL-2.0-only -+/* -+ * Generic show_mem() implementation -+ * -+ * Copyright (C) 2008 Johannes Weiner -+ */ -+ -+#include -+#include +@@ -7,6 +7,9 @@ + + #include + #include +#include + +#include "slab.h" -+ -+void show_mem(unsigned int filter, nodemask_t *nodemask) -+{ -+ pg_data_t *pgdat; -+ unsigned long total = 0, reserved = 0, highmem = 0; -+ struct printbuf buf = PRINTBUF; -+ -+ printk("Mem-Info:\n"); -+ show_free_areas(filter, nodemask); -+ -+ for_each_online_pgdat(pgdat) { -+ int zoneid; -+ -+ for (zoneid = 0; zoneid < MAX_NR_ZONES; zoneid++) { -+ struct zone *zone = &pgdat->node_zones[zoneid]; -+ if (!populated_zone(zone)) -+ continue; -+ -+ total += zone->present_pages; -+ reserved += zone->present_pages - zone_managed_pages(zone); -+ -+ if (is_highmem_idx(zoneid)) -+ highmem += zone->present_pages; -+ } -+ } -+ -+ printk("%lu pages RAM\n", total); -+ printk("%lu pages HighMem/MovableOnly\n", highmem); -+ printk("%lu pages reserved\n", reserved); -+#ifdef CONFIG_CMA -+ printk("%lu pages cma reserved\n", totalcma_pages); -+#endif -+#ifdef CONFIG_MEMORY_FAILURE -+ printk("%lu pages hwpoisoned\n", atomic_long_read(&num_poisoned_pages)); -+#endif -+ -+ pr_info("Unreclaimable slab info:\n"); -+ dump_unreclaimable_slab(&buf); -+ printk("%s", printbuf_str(&buf)); -+ printbuf_reset(&buf); + + void show_mem(unsigned int filter, nodemask_t *nodemask) + { +@@ -41,4 +44,9 @@ void show_mem(unsigned int filter, nodemask_t *nodemask) + #ifdef CONFIG_MEMORY_FAILURE + printk("%lu pages hwpoisoned\n", atomic_long_read(&num_poisoned_pages)); + #endif ++ printk("Unreclaimable slab info:\n"); ++ printk("%pf()", CALL_PP(dump_unreclaimable_slab)); + + printk("Shrinkers:\n"); -+ shrinkers_to_text(&buf); -+ printk("%s", printbuf_str(&buf)); -+ printbuf_exit(&buf); -+} ++ printk("%pf()", CALL_PP(shrinkers_to_text)); + } diff --git a/mm/slab.h b/mm/slab.h -index 95eb34174c1b..a91fc5aa1054 100644 +index 95eb34174..a91fc5aa1 100644 --- a/mm/slab.h +++ b/mm/slab.h @@ -805,10 +805,12 @@ static inline struct kmem_cache_node *get_node(struct kmem_cache *s, int node) @@ -90855,7 +90476,7 @@ index 95eb34174c1b..a91fc5aa1054 100644 } #endif diff --git a/mm/slab_common.c b/mm/slab_common.c -index 2b3206a2c3b5..333f431e0708 100644 +index 2b3206a2c..333f431e0 100644 --- a/mm/slab_common.c +++ b/mm/slab_common.c @@ -24,6 +24,7 @@ @@ -90947,7 +90568,7 @@ index 2b3206a2c3b5..333f431e0708 100644 } diff --git a/mm/vmalloc.c b/mm/vmalloc.c -index cadfbb5155ea..60456a184b6a 100644 +index cadfbb515..60456a184 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -3363,6 +3363,27 @@ void *vzalloc_node(unsigned long size, int node) @@ -90979,7 +90600,7 @@ index cadfbb5155ea..60456a184b6a 100644 #define GFP_VMALLOC32 (GFP_DMA32 | GFP_KERNEL) #elif defined(CONFIG_64BIT) && defined(CONFIG_ZONE_DMA) diff --git a/mm/vmscan.c b/mm/vmscan.c -index 1678802e03e7..e0bd7af711cd 100644 +index 1678802e0..d911c5e33 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -50,6 +50,7 @@ @@ -90999,7 +90620,7 @@ index 1678802e03e7..e0bd7af711cd 100644 + struct shrink_control sc = { .gfp_mask = GFP_KERNEL, }; + + if (shrinker->name[0]) -+ prt_printf(out, "%s", shrinker->name); ++ prt_str(out, shrinker->name); + else + prt_printf(out, "%ps:", shrinker->scan_objects); + prt_newline(out); @@ -91036,7 +90657,7 @@ index 1678802e03e7..e0bd7af711cd 100644 + int i, nr = 0; + + if (!down_read_trylock(&shrinker_rwsem)) { -+ prt_printf(out, "(couldn't take shrinker lock)"); ++ prt_str(out, "(couldn't take shrinker lock)"); + return; + } + @@ -91098,7 +90719,7 @@ index 1678802e03e7..e0bd7af711cd 100644 count_vm_events(SLABS_SCANNED, shrinkctl->nr_scanned); total_scan -= shrinkctl->nr_scanned; diff --git a/tools/testing/nvdimm/test/ndtest.c b/tools/testing/nvdimm/test/ndtest.c -index 4d1a947367f9..a2097955dace 100644 +index 4d1a94736..a2097955d 100644 --- a/tools/testing/nvdimm/test/ndtest.c +++ b/tools/testing/nvdimm/test/ndtest.c @@ -12,7 +12,7 @@ @@ -91152,3 +90773,6 @@ index 4d1a947367f9..a2097955dace 100644 } static DEVICE_ATTR_RO(flags); +-- +2.37.0.rc0.15.g3b9a5a33c2 +