From ecc5f1adb3c5689024530426228d4cdb9fe9876b Mon Sep 17 00:00:00 2001 From: ptr1337 Date: Sat, 10 Sep 2022 20:01:02 +0200 Subject: [PATCH] update bcachefs for 5.19 (#590) Signed-off-by: Peter Jung Signed-off-by: Peter Jung --- PKGBUILD | 2 +- .../5.19/0008-5.19-bcachefs.patch | 6237 +++++++++-------- 2 files changed, 3329 insertions(+), 2910 deletions(-) diff --git a/PKGBUILD b/PKGBUILD index 0442e56..985e72b 100644 --- a/PKGBUILD +++ b/PKGBUILD @@ -835,7 +835,7 @@ case $_basever in '19661ec0d39f9663452b34433214c755179894528bf73a42f6ba52ccf572832a' '9df628fd530950e37d31da854cb314d536f33c83935adf5c47e71266a55f7004' 'd2255d8f60d90d1c1d76ab7808d4a04844b6a1b3c83390ac44de0e4b721c3577' - '829631f803f11579972aa19f3f7f2ae11b0e380c01745a05776dd02b8e6c8855' + '3aaf65ddf916bfda626ea441514cbebc3d5597caf1a203180aad5319005c4259' '9fad4a40449e09522899955762c8928ae17f4cdaa16e01239fd12592e9d58177' 'a557b342111849a5f920bbe1c129f3ff1fc1eff62c6bd6685e0972fc88e39911' 'fbf28a5bcf0ce0443ac2a621796ff2c2b1ade06b263d8f01356fae9a3035c585' diff --git a/linux-tkg-patches/5.19/0008-5.19-bcachefs.patch b/linux-tkg-patches/5.19/0008-5.19-bcachefs.patch index f1f6223..ca05f3b 100644 --- a/linux-tkg-patches/5.19/0008-5.19-bcachefs.patch +++ b/linux-tkg-patches/5.19/0008-5.19-bcachefs.patch @@ -1,6 +1,6 @@ -From bb574d84674f3be67f0cf87aaa6bf99033c7db33 Mon Sep 17 00:00:00 2001 +From 3a641d387d7678759fe5b1359c363145ead24964 Mon Sep 17 00:00:00 2001 From: Peter Jung -Date: Fri, 5 Aug 2022 21:43:09 +0200 +Date: Sat, 10 Sep 2022 16:26:37 +0200 Subject: [PATCH] bcachefs Signed-off-by: Peter Jung @@ -27,21 +27,21 @@ Signed-off-by: Peter Jung fs/Kconfig | 1 + fs/Makefile | 1 + fs/bcachefs/Kconfig | 59 + - fs/bcachefs/Makefile | 69 + + fs/bcachefs/Makefile | 70 + fs/bcachefs/acl.c | 406 ++ fs/bcachefs/acl.h | 58 + - fs/bcachefs/alloc_background.c | 1552 ++++++++ + fs/bcachefs/alloc_background.c | 1551 ++++++++ fs/bcachefs/alloc_background.h | 183 + - fs/bcachefs/alloc_foreground.c | 1305 ++++++ - fs/bcachefs/alloc_foreground.h | 173 + + fs/bcachefs/alloc_foreground.c | 1382 +++++++ + fs/bcachefs/alloc_foreground.h | 181 + fs/bcachefs/alloc_types.h | 87 + - fs/bcachefs/backpointers.c | 875 ++++ + fs/bcachefs/backpointers.c | 890 +++++ fs/bcachefs/backpointers.h | 38 + - fs/bcachefs/bcachefs.h | 1000 +++++ - fs/bcachefs/bcachefs_format.h | 2052 ++++++++++ + fs/bcachefs/bcachefs.h | 1001 +++++ + fs/bcachefs/bcachefs_format.h | 2121 ++++++++++ fs/bcachefs/bcachefs_ioctl.h | 368 ++ - fs/bcachefs/bkey.c | 1175 ++++++ - fs/bcachefs/bkey.h | 566 +++ + fs/bcachefs/bkey.c | 1203 ++++++ + fs/bcachefs/bkey.h | 571 +++ fs/bcachefs/bkey_buf.h | 60 + fs/bcachefs/bkey_methods.c | 503 +++ fs/bcachefs/bkey_methods.h | 175 + @@ -49,22 +49,23 @@ Signed-off-by: Peter Jung fs/bcachefs/bkey_sort.h | 44 + fs/bcachefs/bset.c | 1598 ++++++++ fs/bcachefs/bset.h | 615 +++ - fs/bcachefs/btree_cache.c | 1170 ++++++ - fs/bcachefs/btree_cache.h | 107 + - fs/bcachefs/btree_gc.c | 2098 ++++++++++ + fs/bcachefs/btree_cache.c | 1169 ++++++ + fs/bcachefs/btree_cache.h | 105 + + fs/bcachefs/btree_gc.c | 2106 ++++++++++ fs/bcachefs/btree_gc.h | 112 + - fs/bcachefs/btree_io.c | 2150 ++++++++++ + fs/bcachefs/btree_io.c | 2153 ++++++++++ fs/bcachefs/btree_io.h | 222 ++ - fs/bcachefs/btree_iter.c | 3513 +++++++++++++++++ - fs/bcachefs/btree_iter.h | 550 +++ - fs/bcachefs/btree_key_cache.c | 855 ++++ + fs/bcachefs/btree_iter.c | 3109 +++++++++++++++ + fs/bcachefs/btree_iter.h | 541 +++ + fs/bcachefs/btree_key_cache.c | 943 +++++ fs/bcachefs/btree_key_cache.h | 47 + - fs/bcachefs/btree_locking.h | 289 ++ - fs/bcachefs/btree_types.h | 697 ++++ + fs/bcachefs/btree_locking.c | 466 +++ + fs/bcachefs/btree_locking.h | 401 ++ + fs/bcachefs/btree_types.h | 695 ++++ fs/bcachefs/btree_update.h | 158 + - fs/bcachefs/btree_update_interior.c | 2252 +++++++++++ - fs/bcachefs/btree_update_interior.h | 321 ++ - fs/bcachefs/btree_update_leaf.c | 1800 +++++++++ + fs/bcachefs/btree_update_interior.c | 2271 +++++++++++ + fs/bcachefs/btree_update_interior.h | 322 ++ + fs/bcachefs/btree_update_leaf.c | 1823 +++++++++ fs/bcachefs/buckets.c | 2113 ++++++++++ fs/bcachefs/buckets.h | 300 ++ fs/bcachefs/buckets_types.h | 103 + @@ -83,19 +84,19 @@ Signed-off-by: Peter Jung fs/bcachefs/counters.c | 107 + fs/bcachefs/counters.h | 17 + fs/bcachefs/darray.h | 77 + - fs/bcachefs/data_update.c | 376 ++ + fs/bcachefs/data_update.c | 373 ++ fs/bcachefs/data_update.h | 38 + - fs/bcachefs/debug.c | 764 ++++ + fs/bcachefs/debug.c | 781 ++++ fs/bcachefs/debug.h | 30 + fs/bcachefs/dirent.c | 565 +++ fs/bcachefs/dirent.h | 67 + - fs/bcachefs/disk_groups.c | 506 +++ - fs/bcachefs/disk_groups.h | 90 + + fs/bcachefs/disk_groups.c | 508 +++ + fs/bcachefs/disk_groups.h | 91 + fs/bcachefs/ec.c | 1673 ++++++++ fs/bcachefs/ec.h | 230 ++ fs/bcachefs/ec_types.h | 46 + fs/bcachefs/errcode.c | 51 + - fs/bcachefs/errcode.h | 65 + + fs/bcachefs/errcode.h | 75 + fs/bcachefs/error.c | 184 + fs/bcachefs/error.h | 223 ++ fs/bcachefs/extent_update.c | 178 + @@ -107,24 +108,24 @@ Signed-off-by: Peter Jung fs/bcachefs/fifo.h | 127 + fs/bcachefs/fs-common.c | 496 +++ fs/bcachefs/fs-common.h | 43 + - fs/bcachefs/fs-io.c | 3492 ++++++++++++++++ + fs/bcachefs/fs-io.c | 3492 +++++++++++++++++ fs/bcachefs/fs-io.h | 56 + fs/bcachefs/fs-ioctl.c | 523 +++ fs/bcachefs/fs-ioctl.h | 81 + fs/bcachefs/fs.c | 1939 +++++++++ fs/bcachefs/fs.h | 208 + - fs/bcachefs/fsck.c | 2390 +++++++++++ + fs/bcachefs/fsck.c | 2395 +++++++++++ fs/bcachefs/fsck.h | 8 + fs/bcachefs/inode.c | 771 ++++ fs/bcachefs/inode.h | 189 + fs/bcachefs/io.c | 2423 ++++++++++++ fs/bcachefs/io.h | 189 + fs/bcachefs/io_types.h | 161 + - fs/bcachefs/journal.c | 1432 +++++++ + fs/bcachefs/journal.c | 1433 +++++++ fs/bcachefs/journal.h | 521 +++ fs/bcachefs/journal_io.c | 1735 ++++++++ fs/bcachefs/journal_io.h | 59 + - fs/bcachefs/journal_reclaim.c | 852 ++++ + fs/bcachefs/journal_reclaim.c | 853 ++++ fs/bcachefs/journal_reclaim.h | 86 + fs/bcachefs/journal_sb.c | 220 ++ fs/bcachefs/journal_sb.h | 24 + @@ -138,7 +139,7 @@ Signed-off-by: Peter Jung fs/bcachefs/lru.h | 19 + fs/bcachefs/migrate.c | 186 + fs/bcachefs/migrate.h | 7 + - fs/bcachefs/move.c | 952 +++++ + fs/bcachefs/move.c | 954 +++++ fs/bcachefs/move.h | 67 + fs/bcachefs/move_types.h | 19 + fs/bcachefs/movinggc.c | 285 ++ @@ -148,7 +149,7 @@ Signed-off-by: Peter Jung fs/bcachefs/quota.c | 823 ++++ fs/bcachefs/quota.h | 71 + fs/bcachefs/quota_types.h | 43 + - fs/bcachefs/rebalance.c | 361 ++ + fs/bcachefs/rebalance.c | 362 ++ fs/bcachefs/rebalance.h | 28 + fs/bcachefs/rebalance_types.h | 26 + fs/bcachefs/recovery.c | 1597 ++++++++ @@ -160,22 +161,22 @@ Signed-off-by: Peter Jung fs/bcachefs/replicas_types.h | 10 + fs/bcachefs/siphash.c | 173 + fs/bcachefs/siphash.h | 87 + - fs/bcachefs/str_hash.h | 351 ++ - fs/bcachefs/subvolume.c | 1108 ++++++ + fs/bcachefs/str_hash.h | 368 ++ + fs/bcachefs/subvolume.c | 1110 ++++++ fs/bcachefs/subvolume.h | 137 + fs/bcachefs/subvolume_types.h | 9 + fs/bcachefs/super-io.c | 1605 ++++++++ fs/bcachefs/super-io.h | 126 + - fs/bcachefs/super.c | 1950 +++++++++ + fs/bcachefs/super.c | 1968 ++++++++++ fs/bcachefs/super.h | 264 ++ fs/bcachefs/super_types.h | 51 + - fs/bcachefs/sysfs.c | 943 +++++ + fs/bcachefs/sysfs.c | 925 +++++ fs/bcachefs/sysfs.h | 48 + fs/bcachefs/tests.c | 976 +++++ fs/bcachefs/tests.h | 15 + - fs/bcachefs/trace.c | 12 + - fs/bcachefs/util.c | 964 +++++ - fs/bcachefs/util.h | 783 ++++ + fs/bcachefs/trace.c | 14 + + fs/bcachefs/util.c | 971 +++++ + fs/bcachefs/util.h | 785 ++++ fs/bcachefs/varint.c | 121 + fs/bcachefs/varint.h | 11 + fs/bcachefs/vstructs.h | 63 + @@ -196,29 +197,28 @@ Signed-off-by: Peter Jung include/linux/list_bl.h | 22 + include/linux/lockdep.h | 4 + include/linux/pretty-printers.h | 10 + - include/linux/printbuf.h | 283 ++ + include/linux/printbuf.h | 306 ++ include/linux/sched.h | 1 + include/linux/seq_buf.h | 162 - include/linux/shrinker.h | 8 + - include/linux/six.h | 203 + + include/linux/six.h | 220 ++ include/linux/string.h | 5 + include/linux/string_helpers.h | 8 +- include/linux/trace_events.h | 2 +- include/linux/trace_seq.h | 17 +- include/linux/vmalloc.h | 1 + - include/net/9p/9p.h | 2 +- - include/net/9p/client.h | 20 +- - include/trace/events/bcachefs.h | 1045 +++++ + include/trace/events/bcachefs.h | 1100 ++++++ init/init_task.c | 1 + kernel/Kconfig.locks | 3 + kernel/locking/Makefile | 1 + kernel/locking/lockdep.c | 20 + - kernel/locking/six.c | 759 ++++ + kernel/locking/six.c | 748 ++++ kernel/module/main.c | 4 +- + kernel/stacktrace.c | 2 + kernel/trace/trace.c | 45 +- kernel/trace/trace_dynevent.c | 34 +- kernel/trace/trace_events_filter.c | 2 +- - kernel/trace/trace_events_synth.c | 32 +- + kernel/trace/trace_events_synth.c | 53 +- kernel/trace/trace_functions_graph.c | 6 +- kernel/trace/trace_kprobe.c | 2 +- kernel/trace/trace_seq.c | 111 +- @@ -226,10 +226,11 @@ Signed-off-by: Peter Jung lib/Kconfig.debug | 9 + lib/Makefile | 8 +- {drivers/md/bcache => lib}/closure.c | 35 +- + lib/errname.c | 1 + lib/generic-radix-tree.c | 76 +- lib/hexdump.c | 246 +- lib/pretty-printers.c | 60 + - lib/printbuf.c | 258 ++ + lib/printbuf.c | 368 ++ lib/seq_buf.c | 397 -- lib/string_helpers.c | 224 +- lib/test_hexdump.c | 30 +- @@ -245,13 +246,8 @@ Signed-off-by: Peter Jung mm/slab_common.c | 53 +- mm/vmalloc.c | 21 + mm/vmscan.c | 88 + - net/9p/client.c | 97 +- - net/9p/trans_fd.c | 12 +- - net/9p/trans_rdma.c | 4 +- - net/9p/trans_virtio.c | 4 +- - net/9p/trans_xen.c | 2 +- tools/testing/nvdimm/test/ndtest.c | 22 +- - 246 files changed, 84261 insertions(+), 2219 deletions(-) + 242 files changed, 84952 insertions(+), 2177 deletions(-) create mode 100644 .github/ISSUE_TEMPLATE/bug_report.md create mode 100644 fs/bcachefs/Kconfig create mode 100644 fs/bcachefs/Makefile @@ -286,6 +282,7 @@ Signed-off-by: Peter Jung create mode 100644 fs/bcachefs/btree_iter.h create mode 100644 fs/bcachefs/btree_key_cache.c create mode 100644 fs/bcachefs/btree_key_cache.h + create mode 100644 fs/bcachefs/btree_locking.c create mode 100644 fs/bcachefs/btree_locking.h create mode 100644 fs/bcachefs/btree_types.h create mode 100644 fs/bcachefs/btree_update.h @@ -423,7 +420,7 @@ Signed-off-by: Peter Jung diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md new file mode 100644 -index 0000000000000..8af34357dd989 +index 000000000000..8af34357dd98 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/bug_report.md @@ -0,0 +1,61 @@ @@ -489,7 +486,7 @@ index 0000000000000..8af34357dd989 +* provide the output of `bcachefs list_journal -a | zstd -f -T0 -o ../journal.log.zst` +*compress & upload all the `metdata.dump.*` files from: bcachefs dump -o metadata.dump diff --git a/Documentation/core-api/printk-formats.rst b/Documentation/core-api/printk-formats.rst -index 5e89497ba314e..4f4a35b3aadc2 100644 +index 5e89497ba314..4f4a35b3aadc 100644 --- a/Documentation/core-api/printk-formats.rst +++ b/Documentation/core-api/printk-formats.rst @@ -625,6 +625,28 @@ Examples:: @@ -522,7 +519,7 @@ index 5e89497ba314e..4f4a35b3aadc2 100644 ====== diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c -index 0fbda89cd1bb5..05654dbeb2c44 100644 +index 0fbda89cd1bb..05654dbeb2c4 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -37,7 +37,7 @@ @@ -574,7 +571,7 @@ index 0fbda89cd1bb5..05654dbeb2c44 100644 } diff --git a/arch/powerpc/kernel/security.c b/arch/powerpc/kernel/security.c -index d96fd14bd7c9c..b34de62e65ceb 100644 +index d96fd14bd7c9..b34de62e65ce 100644 --- a/arch/powerpc/kernel/security.c +++ b/arch/powerpc/kernel/security.c @@ -10,7 +10,7 @@ @@ -723,7 +720,7 @@ index d96fd14bd7c9c..b34de62e65ceb 100644 #ifdef CONFIG_PPC_BOOK3S_64 diff --git a/arch/powerpc/platforms/pseries/papr_scm.c b/arch/powerpc/platforms/pseries/papr_scm.c -index 82cae08976bcd..fe2b41858b5fe 100644 +index 92074a6c49d4..30965af0b93e 100644 --- a/arch/powerpc/platforms/pseries/papr_scm.c +++ b/arch/powerpc/platforms/pseries/papr_scm.c @@ -12,7 +12,7 @@ @@ -735,7 +732,7 @@ index 82cae08976bcd..fe2b41858b5fe 100644 #include #include -@@ -1142,7 +1142,7 @@ static ssize_t perf_stats_show(struct device *dev, +@@ -1111,7 +1111,7 @@ static ssize_t perf_stats_show(struct device *dev, { int index; ssize_t rc; @@ -744,7 +741,7 @@ index 82cae08976bcd..fe2b41858b5fe 100644 struct papr_scm_perf_stat *stat; struct papr_scm_perf_stats *stats; struct nvdimm *dimm = to_nvdimm(dev); -@@ -1165,18 +1165,17 @@ static ssize_t perf_stats_show(struct device *dev, +@@ -1134,18 +1134,17 @@ static ssize_t perf_stats_show(struct device *dev, * values. Since stat_id is essentially a char string of * 8 bytes, simply use the string format specifier to print it. */ @@ -767,7 +764,7 @@ index 82cae08976bcd..fe2b41858b5fe 100644 } static DEVICE_ATTR_ADMIN_RO(perf_stats); -@@ -1185,7 +1184,7 @@ static ssize_t flags_show(struct device *dev, +@@ -1154,7 +1153,7 @@ static ssize_t flags_show(struct device *dev, { struct nvdimm *dimm = to_nvdimm(dev); struct papr_scm_priv *p = nvdimm_provider_data(dimm); @@ -776,7 +773,7 @@ index 82cae08976bcd..fe2b41858b5fe 100644 u64 health; int rc; -@@ -1196,29 +1195,28 @@ static ssize_t flags_show(struct device *dev, +@@ -1165,29 +1164,28 @@ static ssize_t flags_show(struct device *dev, /* Copy health_bitmap locally, check masks & update out buffer */ health = READ_ONCE(p->health_bitmap); @@ -816,7 +813,7 @@ index 82cae08976bcd..fe2b41858b5fe 100644 DEVICE_ATTR_RO(flags); diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c -index f276aff521e8b..50c12711a249b 100644 +index f276aff521e8..50c12711a249 100644 --- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c +++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c @@ -19,7 +19,7 @@ @@ -883,7 +880,7 @@ index f276aff521e8b..50c12711a249b 100644 ret = rdtgroup_setup_root(); if (ret) diff --git a/block/bio.c b/block/bio.c -index 51c99f2c5c908..480ac5a73232e 100644 +index eb7cc591ee93..03fedd67888a 100644 --- a/block/bio.c +++ b/block/bio.c @@ -582,15 +582,15 @@ struct bio *bio_kmalloc(unsigned short nr_vecs, gfp_t gfp_mask) @@ -905,7 +902,7 @@ index 51c99f2c5c908..480ac5a73232e 100644 /** * bio_truncate - truncate the bio to small size of @new_size -@@ -1447,6 +1447,7 @@ void bio_set_pages_dirty(struct bio *bio) +@@ -1426,6 +1426,7 @@ void bio_set_pages_dirty(struct bio *bio) set_page_dirty_lock(bvec->bv_page); } } @@ -913,7 +910,7 @@ index 51c99f2c5c908..480ac5a73232e 100644 /* * bio_check_pages_dirty() will check that all the BIO's pages are still dirty. -@@ -1506,6 +1507,7 @@ void bio_check_pages_dirty(struct bio *bio) +@@ -1485,6 +1486,7 @@ void bio_check_pages_dirty(struct bio *bio) spin_unlock_irqrestore(&bio_dirty_lock, flags); schedule_work(&bio_dirty_work); } @@ -922,7 +919,7 @@ index 51c99f2c5c908..480ac5a73232e 100644 static inline bool bio_remaining_done(struct bio *bio) { diff --git a/block/blk-core.c b/block/blk-core.c -index 27fb1357ad4b8..7697abda9fadc 100644 +index 27fb1357ad4b..7697abda9fad 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -207,6 +207,7 @@ const char *blk_status_to_str(blk_status_t status) @@ -934,7 +931,7 @@ index 27fb1357ad4b8..7697abda9fadc 100644 /** * blk_sync_queue - cancel any pending callbacks on a queue diff --git a/block/blk.h b/block/blk.h -index 434017701403f..066fd89c916ba 100644 +index 434017701403..066fd89c916b 100644 --- a/block/blk.h +++ b/block/blk.h @@ -240,7 +240,6 @@ static inline void blk_integrity_del(struct gendisk *disk) @@ -946,7 +943,7 @@ index 434017701403f..066fd89c916ba 100644 bool blk_attempt_plug_merge(struct request_queue *q, struct bio *bio, unsigned int nr_segs); diff --git a/drivers/acpi/apei/erst-dbg.c b/drivers/acpi/apei/erst-dbg.c -index 8bc71cdc2270a..370993c9c3816 100644 +index 8bc71cdc2270..370993c9c381 100644 --- a/drivers/acpi/apei/erst-dbg.c +++ b/drivers/acpi/apei/erst-dbg.c @@ -11,6 +11,7 @@ @@ -958,10 +955,10 @@ index 8bc71cdc2270a..370993c9c3816 100644 #include #include diff --git a/drivers/block/loop.c b/drivers/block/loop.c -index 084f9b8a0ba3c..7a420623ac384 100644 +index a59910ef948e..d82b3288227c 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c -@@ -1166,8 +1166,6 @@ static void __loop_clr_fd(struct loop_device *lo, bool release) +@@ -1171,8 +1171,6 @@ static void __loop_clr_fd(struct loop_device *lo, bool release) if (!release) blk_mq_unfreeze_queue(lo->lo_queue); @@ -971,7 +968,7 @@ index 084f9b8a0ba3c..7a420623ac384 100644 int err; diff --git a/drivers/clk/tegra/clk-bpmp.c b/drivers/clk/tegra/clk-bpmp.c -index 3748a39dae7cb..7e3b48ed9d455 100644 +index 3748a39dae7c..7e3b48ed9d45 100644 --- a/drivers/clk/tegra/clk-bpmp.c +++ b/drivers/clk/tegra/clk-bpmp.c @@ -5,7 +5,7 @@ @@ -1033,7 +1030,7 @@ index 3748a39dae7cb..7e3b48ed9d455 100644 static int tegra_bpmp_probe_clocks(struct tegra_bpmp *bpmp, diff --git a/drivers/input/joystick/analog.c b/drivers/input/joystick/analog.c -index 3088c5b829f07..a8c5f90e8208b 100644 +index 3088c5b829f0..a8c5f90e8208 100644 --- a/drivers/input/joystick/analog.c +++ b/drivers/input/joystick/analog.c @@ -19,7 +19,7 @@ @@ -1080,7 +1077,7 @@ index 3088c5b829f07..a8c5f90e8208b 100644 /* diff --git a/drivers/md/bcache/Kconfig b/drivers/md/bcache/Kconfig -index cf3e8096942a0..f1a1f0c4a0eaf 100644 +index cf3e8096942a..f1a1f0c4a0ea 100644 --- a/drivers/md/bcache/Kconfig +++ b/drivers/md/bcache/Kconfig @@ -4,6 +4,7 @@ config BCACHE @@ -1108,7 +1105,7 @@ index cf3e8096942a0..f1a1f0c4a0eaf 100644 bool "Asynchronous device registration (EXPERIMENTAL)" depends on BCACHE diff --git a/drivers/md/bcache/Makefile b/drivers/md/bcache/Makefile -index 5b87e59676b86..054e8a33a7ab1 100644 +index 5b87e59676b8..054e8a33a7ab 100644 --- a/drivers/md/bcache/Makefile +++ b/drivers/md/bcache/Makefile @@ -2,6 +2,6 @@ @@ -1121,7 +1118,7 @@ index 5b87e59676b86..054e8a33a7ab1 100644 + journal.o movinggc.o request.o stats.o super.o sysfs.o trace.o\ util.o writeback.o features.o diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h -index 2acda9cea0f90..bf96b3e6b6eb8 100644 +index 2acda9cea0f9..bf96b3e6b6eb 100644 --- a/drivers/md/bcache/bcache.h +++ b/drivers/md/bcache/bcache.h @@ -179,6 +179,7 @@ @@ -1141,7 +1138,7 @@ index 2acda9cea0f90..bf96b3e6b6eb8 100644 struct bucket { atomic_t pin; diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c -index 3563d15dbaf27..9249aba333bcd 100644 +index 3563d15dbaf2..9249aba333bc 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -2913,7 +2913,6 @@ static int __init bcache_init(void) @@ -1153,7 +1150,7 @@ index 3563d15dbaf27..9249aba333bcd 100644 bcache_is_reboot = false; diff --git a/drivers/md/bcache/util.h b/drivers/md/bcache/util.h -index 6f3cb7c921303..f61ab1bada6cf 100644 +index 6f3cb7c92130..f61ab1bada6c 100644 --- a/drivers/md/bcache/util.h +++ b/drivers/md/bcache/util.h @@ -4,6 +4,7 @@ @@ -1174,7 +1171,7 @@ index 6f3cb7c921303..f61ab1bada6cf 100644 #ifdef CONFIG_BCACHE_DEBUG diff --git a/drivers/pci/p2pdma.c b/drivers/pci/p2pdma.c -index 462b429ad2434..f06328035b9c6 100644 +index 462b429ad243..f06328035b9c 100644 --- a/drivers/pci/p2pdma.c +++ b/drivers/pci/p2pdma.c @@ -17,7 +17,7 @@ @@ -1249,7 +1246,7 @@ index 462b429ad2434..f06328035b9c6 100644 acs_redirects = true; diff --git a/fs/Kconfig b/fs/Kconfig -index 5976eb33535ff..6d2c4231494a4 100644 +index 5976eb33535f..6d2c4231494a 100644 --- a/fs/Kconfig +++ b/fs/Kconfig @@ -40,6 +40,7 @@ source "fs/ocfs2/Kconfig" @@ -1261,10 +1258,10 @@ index 5976eb33535ff..6d2c4231494a4 100644 endif # BLOCK diff --git a/fs/Makefile b/fs/Makefile -index 208a74e0b00e1..5d5c8c7920584 100644 +index 93b80529f8e8..2b8d04016a20 100644 --- a/fs/Makefile +++ b/fs/Makefile -@@ -134,6 +134,7 @@ obj-$(CONFIG_OCFS2_FS) += ocfs2/ +@@ -132,6 +132,7 @@ obj-$(CONFIG_OCFS2_FS) += ocfs2/ obj-$(CONFIG_BTRFS_FS) += btrfs/ obj-$(CONFIG_GFS2_FS) += gfs2/ obj-$(CONFIG_F2FS_FS) += f2fs/ @@ -1274,7 +1271,7 @@ index 208a74e0b00e1..5d5c8c7920584 100644 obj-$(CONFIG_EFIVAR_FS) += efivarfs/ diff --git a/fs/bcachefs/Kconfig b/fs/bcachefs/Kconfig new file mode 100644 -index 0000000000000..0088869678419 +index 000000000000..008886967841 --- /dev/null +++ b/fs/bcachefs/Kconfig @@ -0,0 +1,59 @@ @@ -1339,10 +1336,10 @@ index 0000000000000..0088869678419 + Expose statistics for how long we held a lock in debugfs diff --git a/fs/bcachefs/Makefile b/fs/bcachefs/Makefile new file mode 100644 -index 0000000000000..5dad8ed03a20a +index 000000000000..8124d356baa1 --- /dev/null +++ b/fs/bcachefs/Makefile -@@ -0,0 +1,69 @@ +@@ -0,0 +1,70 @@ + +obj-$(CONFIG_BCACHEFS_FS) += bcachefs.o + @@ -1359,6 +1356,7 @@ index 0000000000000..5dad8ed03a20a + btree_io.o \ + btree_iter.o \ + btree_key_cache.o \ ++ btree_locking.o \ + btree_update_interior.o \ + btree_update_leaf.o \ + buckets.o \ @@ -1414,7 +1412,7 @@ index 0000000000000..5dad8ed03a20a +bcachefs-$(CONFIG_BCACHEFS_POSIX_ACL) += acl.o diff --git a/fs/bcachefs/acl.c b/fs/bcachefs/acl.c new file mode 100644 -index 0000000000000..5c6ccf6850940 +index 000000000000..5c6ccf685094 --- /dev/null +++ b/fs/bcachefs/acl.c @@ -0,0 +1,406 @@ @@ -1826,7 +1824,7 @@ index 0000000000000..5c6ccf6850940 +#endif /* CONFIG_BCACHEFS_POSIX_ACL */ diff --git a/fs/bcachefs/acl.h b/fs/bcachefs/acl.h new file mode 100644 -index 0000000000000..2d76a4897ba89 +index 000000000000..2d76a4897ba8 --- /dev/null +++ b/fs/bcachefs/acl.h @@ -0,0 +1,58 @@ @@ -1890,10 +1888,10 @@ index 0000000000000..2d76a4897ba89 +#endif /* _BCACHEFS_ACL_H */ diff --git a/fs/bcachefs/alloc_background.c b/fs/bcachefs/alloc_background.c new file mode 100644 -index 0000000000000..2281b8d45982a +index 000000000000..d0d7690a4940 --- /dev/null +++ b/fs/bcachefs/alloc_background.c -@@ -0,0 +1,1552 @@ +@@ -0,0 +1,1551 @@ +// SPDX-License-Identifier: GPL-2.0 +#include "bcachefs.h" +#include "alloc_background.h" @@ -3113,8 +3111,7 @@ index 0000000000000..2281b8d45982a + if (ret) + goto out; + -+ trace_invalidate_bucket(c, bucket.inode, bucket.offset, cached_sectors); -+ this_cpu_inc(c->counters[BCH_COUNTER_bucket_invalidate]); ++ trace_and_count(c, bucket_invalidate, c, bucket.inode, bucket.offset, cached_sectors); + --*nr_to_invalidate; +out: + bch2_trans_iter_exit(trans, &alloc_iter); @@ -3448,7 +3445,7 @@ index 0000000000000..2281b8d45982a +} diff --git a/fs/bcachefs/alloc_background.h b/fs/bcachefs/alloc_background.h new file mode 100644 -index 0000000000000..044bc72992d41 +index 000000000000..044bc72992d4 --- /dev/null +++ b/fs/bcachefs/alloc_background.h @@ -0,0 +1,183 @@ @@ -3637,10 +3634,10 @@ index 0000000000000..044bc72992d41 +#endif /* _BCACHEFS_ALLOC_BACKGROUND_H */ diff --git a/fs/bcachefs/alloc_foreground.c b/fs/bcachefs/alloc_foreground.c new file mode 100644 -index 0000000000000..0a9f1313414b7 +index 000000000000..dce227c54a7e --- /dev/null +++ b/fs/bcachefs/alloc_foreground.c -@@ -0,0 +1,1305 @@ +@@ -0,0 +1,1382 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright 2012 Google, Inc. @@ -3911,7 +3908,7 @@ index 0000000000000..0a9f1313414b7 + + spin_unlock(&c->freelist_lock); + -+ trace_bucket_alloc(ca, bch2_alloc_reserves[reserve]); ++ trace_and_count(c, bucket_alloc, ca, bch2_alloc_reserves[reserve]); + return ob; +} + @@ -3982,6 +3979,8 @@ index 0000000000000..0a9f1313414b7 + skipped_need_journal_commit, + skipped_nouse, + cl); ++ if (!ob) ++ iter.path->preserve = false; +err: + set_btree_iter_dontneed(&iter); + bch2_trans_iter_exit(trans, &iter); @@ -4022,15 +4021,15 @@ index 0000000000000..0a9f1313414b7 + * journal buckets - journal buckets will be < ca->new_fs_bucket_idx + */ +static noinline struct open_bucket * -+bch2_bucket_alloc_trans_early(struct btree_trans *trans, -+ struct bch_dev *ca, -+ enum alloc_reserve reserve, -+ u64 *cur_bucket, -+ u64 *buckets_seen, -+ u64 *skipped_open, -+ u64 *skipped_need_journal_commit, -+ u64 *skipped_nouse, -+ struct closure *cl) ++bch2_bucket_alloc_early(struct btree_trans *trans, ++ struct bch_dev *ca, ++ enum alloc_reserve reserve, ++ u64 *cur_bucket, ++ u64 *buckets_seen, ++ u64 *skipped_open, ++ u64 *skipped_need_journal_commit, ++ u64 *skipped_nouse, ++ struct closure *cl) +{ + struct btree_iter iter; + struct bkey_s_c k; @@ -4073,7 +4072,7 @@ index 0000000000000..0a9f1313414b7 + return ob ?: ERR_PTR(ret ?: -BCH_ERR_no_buckets_found); +} + -+static struct open_bucket *bch2_bucket_alloc_trans(struct btree_trans *trans, ++static struct open_bucket *bch2_bucket_alloc_freelist(struct btree_trans *trans, + struct bch_dev *ca, + enum alloc_reserve reserve, + u64 *cur_bucket, @@ -4088,15 +4087,6 @@ index 0000000000000..0a9f1313414b7 + struct open_bucket *ob = NULL; + int ret; + -+ if (unlikely(!ca->mi.freespace_initialized)) -+ return bch2_bucket_alloc_trans_early(trans, ca, reserve, -+ cur_bucket, -+ buckets_seen, -+ skipped_open, -+ skipped_need_journal_commit, -+ skipped_nouse, -+ cl); -+ + BUG_ON(ca->new_fs_bucket_idx); + + /* @@ -4110,7 +4100,7 @@ index 0000000000000..0a9f1313414b7 + break; + + for (*cur_bucket = max(*cur_bucket, bkey_start_offset(k.k)); -+ *cur_bucket < k.k->p.offset && !ob; ++ *cur_bucket < k.k->p.offset; + (*cur_bucket)++) { + ret = btree_trans_too_many_iters(trans); + if (ret) @@ -4124,6 +4114,8 @@ index 0000000000000..0a9f1313414b7 + skipped_need_journal_commit, + skipped_nouse, + k, cl); ++ if (ob) ++ break; + } + + if (ob || ret) @@ -4139,11 +4131,13 @@ index 0000000000000..0a9f1313414b7 + * + * Returns index of bucket on success, 0 on failure + * */ -+struct open_bucket *bch2_bucket_alloc(struct bch_fs *c, struct bch_dev *ca, ++static struct open_bucket *bch2_bucket_alloc_trans(struct btree_trans *trans, ++ struct bch_dev *ca, + enum alloc_reserve reserve, + bool may_alloc_partial, + struct closure *cl) +{ ++ struct bch_fs *c = trans->c; + struct open_bucket *ob = NULL; + struct bch_dev_usage usage; + bool freespace_initialized = READ_ONCE(ca->mi.freespace_initialized); @@ -4155,7 +4149,6 @@ index 0000000000000..0a9f1313414b7 + u64 skipped_need_journal_commit = 0; + u64 skipped_nouse = 0; + bool waiting = false; -+ int ret; +again: + usage = bch2_dev_usage_read(ca); + avail = dev_buckets_free(ca, usage, reserve); @@ -4192,19 +4185,26 @@ index 0000000000000..0a9f1313414b7 + return ob; + } + -+ ret = bch2_trans_do(c, NULL, NULL, 0, -+ PTR_ERR_OR_ZERO(ob = bch2_bucket_alloc_trans(&trans, ca, reserve, -+ &cur_bucket, -+ &buckets_seen, -+ &skipped_open, -+ &skipped_need_journal_commit, -+ &skipped_nouse, -+ cl))); ++ ob = likely(ca->mi.freespace_initialized) ++ ? bch2_bucket_alloc_freelist(trans, ca, reserve, ++ &cur_bucket, ++ &buckets_seen, ++ &skipped_open, ++ &skipped_need_journal_commit, ++ &skipped_nouse, ++ cl) ++ : bch2_bucket_alloc_early(trans, ca, reserve, ++ &cur_bucket, ++ &buckets_seen, ++ &skipped_open, ++ &skipped_need_journal_commit, ++ &skipped_nouse, ++ cl); + + if (skipped_need_journal_commit * 2 > avail) + bch2_journal_flush_async(&c->journal, NULL); + -+ if (!ob && !ret && !freespace_initialized && start) { ++ if (!ob && !freespace_initialized && start) { + start = cur_bucket = 0; + goto again; + } @@ -4213,26 +4213,38 @@ index 0000000000000..0a9f1313414b7 + ca->bucket_alloc_trans_early_cursor = cur_bucket; +err: + if (!ob) -+ ob = ERR_PTR(ret ?: -BCH_ERR_no_buckets_found); ++ ob = ERR_PTR(-BCH_ERR_no_buckets_found); + -+ if (IS_ERR(ob)) { -+ trace_bucket_alloc_fail(ca, bch2_alloc_reserves[reserve], -+ usage.d[BCH_DATA_free].buckets, -+ avail, -+ bch2_copygc_wait_amount(c), -+ c->copygc_wait - atomic64_read(&c->io_clock[WRITE].now), -+ buckets_seen, -+ skipped_open, -+ skipped_need_journal_commit, -+ skipped_nouse, -+ cl == NULL, -+ bch2_err_str(PTR_ERR(ob))); -+ atomic_long_inc(&c->bucket_alloc_fail); -+ } ++ if (IS_ERR(ob)) ++ trace_and_count(c, bucket_alloc_fail, ++ ca, bch2_alloc_reserves[reserve], ++ usage.d[BCH_DATA_free].buckets, ++ avail, ++ bch2_copygc_wait_amount(c), ++ c->copygc_wait - atomic64_read(&c->io_clock[WRITE].now), ++ buckets_seen, ++ skipped_open, ++ skipped_need_journal_commit, ++ skipped_nouse, ++ cl == NULL, ++ bch2_err_str(PTR_ERR(ob))); + + return ob; +} + ++struct open_bucket *bch2_bucket_alloc(struct bch_fs *c, struct bch_dev *ca, ++ enum alloc_reserve reserve, ++ bool may_alloc_partial, ++ struct closure *cl) ++{ ++ struct open_bucket *ob; ++ ++ bch2_trans_do(c, NULL, NULL, 0, ++ PTR_ERR_OR_ZERO(ob = bch2_bucket_alloc_trans(&trans, ca, reserve, ++ may_alloc_partial, cl))); ++ return ob; ++} ++ +static int __dev_stripe_cmp(struct dev_stripe_state *stripe, + unsigned l, unsigned r) +{ @@ -4298,7 +4310,7 @@ index 0000000000000..0a9f1313414b7 + ob_push(c, ptrs, ob); +} + -+int bch2_bucket_alloc_set(struct bch_fs *c, ++static int bch2_bucket_alloc_set_trans(struct btree_trans *trans, + struct open_buckets *ptrs, + struct dev_stripe_state *stripe, + struct bch_devs_mask *devs_may_alloc, @@ -4309,11 +4321,12 @@ index 0000000000000..0a9f1313414b7 + unsigned flags, + struct closure *cl) +{ ++ struct bch_fs *c = trans->c; + struct dev_alloc_list devs_sorted = + bch2_dev_alloc_list(c, stripe, devs_may_alloc); + unsigned dev; + struct bch_dev *ca; -+ int ret = -BCH_ERR_insufficient_devices; ++ int ret = 0; + unsigned i; + + BUG_ON(*nr_effective >= nr_replicas); @@ -4337,16 +4350,15 @@ index 0000000000000..0a9f1313414b7 + continue; + } + -+ ob = bch2_bucket_alloc(c, ca, reserve, ++ ob = bch2_bucket_alloc_trans(trans, ca, reserve, + flags & BUCKET_MAY_ALLOC_PARTIAL, cl); + if (!IS_ERR(ob)) + bch2_dev_stripe_increment(ca, stripe); + percpu_ref_put(&ca->ref); + -+ if (IS_ERR(ob)) { -+ ret = PTR_ERR(ob); -+ -+ if (cl) ++ ret = PTR_ERR_OR_ZERO(ob); ++ if (ret) { ++ if (bch2_err_matches(ret, BCH_ERR_transaction_restart) || cl) + break; + continue; + } @@ -4354,15 +4366,36 @@ index 0000000000000..0a9f1313414b7 + add_new_bucket(c, ptrs, devs_may_alloc, + nr_effective, have_cache, flags, ob); + -+ if (*nr_effective >= nr_replicas) { -+ ret = 0; ++ if (*nr_effective >= nr_replicas) + break; -+ } + } + ++ if (*nr_effective >= nr_replicas) ++ ret = 0; ++ else if (!ret) ++ ret = -BCH_ERR_insufficient_devices; ++ + return ret; +} + ++int bch2_bucket_alloc_set(struct bch_fs *c, ++ struct open_buckets *ptrs, ++ struct dev_stripe_state *stripe, ++ struct bch_devs_mask *devs_may_alloc, ++ unsigned nr_replicas, ++ unsigned *nr_effective, ++ bool *have_cache, ++ enum alloc_reserve reserve, ++ unsigned flags, ++ struct closure *cl) ++{ ++ return bch2_trans_do(c, NULL, NULL, 0, ++ bch2_bucket_alloc_set_trans(&trans, ptrs, stripe, ++ devs_may_alloc, nr_replicas, ++ nr_effective, have_cache, reserve, ++ flags, cl)); ++} ++ +/* Allocate from stripes: */ + +/* @@ -4467,7 +4500,7 @@ index 0000000000000..0a9f1313414b7 + wp->ptrs = ptrs_skip; +} + -+static int open_bucket_add_buckets(struct bch_fs *c, ++static int open_bucket_add_buckets(struct btree_trans *trans, + struct open_buckets *ptrs, + struct write_point *wp, + struct bch_devs_list *devs_have, @@ -4480,6 +4513,7 @@ index 0000000000000..0a9f1313414b7 + unsigned flags, + struct closure *_cl) +{ ++ struct bch_fs *c = trans->c; + struct bch_devs_mask devs; + struct open_bucket *ob; + struct closure *cl = NULL; @@ -4511,7 +4545,8 @@ index 0000000000000..0a9f1313414b7 + target, erasure_code, + nr_replicas, nr_effective, + have_cache, flags, _cl); -+ if (bch2_err_matches(ret, BCH_ERR_freelist_empty) || ++ if (bch2_err_matches(ret, BCH_ERR_transaction_restart) || ++ bch2_err_matches(ret, BCH_ERR_freelist_empty) || + bch2_err_matches(ret, BCH_ERR_open_buckets_empty)) + return ret; + if (*nr_effective >= nr_replicas) @@ -4530,10 +4565,11 @@ index 0000000000000..0a9f1313414b7 + * Try nonblocking first, so that if one device is full we'll try from + * other devices: + */ -+ ret = bch2_bucket_alloc_set(c, ptrs, &wp->stripe, &devs, ++ ret = bch2_bucket_alloc_set_trans(trans, ptrs, &wp->stripe, &devs, + nr_replicas, nr_effective, have_cache, + reserve, flags, cl); + if (ret && ++ !bch2_err_matches(ret, BCH_ERR_transaction_restart) && + !bch2_err_matches(ret, BCH_ERR_insufficient_devices) && + !cl && _cl) { + cl = _cl; @@ -4653,15 +4689,25 @@ index 0000000000000..0a9f1313414b7 + return true; +} + -+static struct write_point *writepoint_find(struct bch_fs *c, ++static void bch2_trans_mutex_lock(struct btree_trans *trans, ++ struct mutex *lock) ++{ ++ if (!mutex_trylock(lock)) { ++ bch2_trans_unlock(trans); ++ mutex_lock(lock); ++ } ++} ++ ++static struct write_point *writepoint_find(struct btree_trans *trans, + unsigned long write_point) +{ ++ struct bch_fs *c = trans->c; + struct write_point *wp, *oldest; + struct hlist_head *head; + + if (!(write_point & 1UL)) { + wp = (struct write_point *) write_point; -+ mutex_lock(&wp->lock); ++ bch2_trans_mutex_lock(trans, &wp->lock); + return wp; + } + @@ -4670,7 +4716,7 @@ index 0000000000000..0a9f1313414b7 + wp = __writepoint_find(head, write_point); + if (wp) { +lock_wp: -+ mutex_lock(&wp->lock); ++ bch2_trans_mutex_lock(trans, &wp->lock); + if (wp->write_point == write_point) + goto out; + mutex_unlock(&wp->lock); @@ -4683,8 +4729,8 @@ index 0000000000000..0a9f1313414b7 + if (!oldest || time_before64(wp->last_used, oldest->last_used)) + oldest = wp; + -+ mutex_lock(&oldest->lock); -+ mutex_lock(&c->write_points_hash_lock); ++ bch2_trans_mutex_lock(trans, &oldest->lock); ++ bch2_trans_mutex_lock(trans, &c->write_points_hash_lock); + if (oldest >= c->write_points + c->write_points_nr || + try_increase_writepoints(c)) { + mutex_unlock(&c->write_points_hash_lock); @@ -4712,7 +4758,7 @@ index 0000000000000..0a9f1313414b7 +/* + * Get us an open_bucket we can allocate from, return with it locked: + */ -+struct write_point *bch2_alloc_sectors_start(struct bch_fs *c, ++struct write_point *bch2_alloc_sectors_start_trans(struct btree_trans *trans, + unsigned target, + unsigned erasure_code, + struct write_point_specifier write_point, @@ -4723,6 +4769,7 @@ index 0000000000000..0a9f1313414b7 + unsigned flags, + struct closure *cl) +{ ++ struct bch_fs *c = trans->c; + struct write_point *wp; + struct open_bucket *ob; + struct open_buckets ptrs; @@ -4742,7 +4789,7 @@ index 0000000000000..0a9f1313414b7 + write_points_nr = c->write_points_nr; + have_cache = false; + -+ wp = writepoint_find(c, write_point.v); ++ wp = writepoint_find(trans, write_point.v); + + if (wp->data_type == BCH_DATA_user) + ob_flags |= BUCKET_MAY_ALLOC_PARTIAL; @@ -4752,21 +4799,22 @@ index 0000000000000..0a9f1313414b7 + have_cache = true; + + if (!target || (flags & BCH_WRITE_ONLY_SPECIFIED_DEVS)) { -+ ret = open_bucket_add_buckets(c, &ptrs, wp, devs_have, ++ ret = open_bucket_add_buckets(trans, &ptrs, wp, devs_have, + target, erasure_code, + nr_replicas, &nr_effective, + &have_cache, reserve, + ob_flags, cl); + } else { -+ ret = open_bucket_add_buckets(c, &ptrs, wp, devs_have, ++ ret = open_bucket_add_buckets(trans, &ptrs, wp, devs_have, + target, erasure_code, + nr_replicas, &nr_effective, + &have_cache, reserve, + ob_flags, NULL); -+ if (!ret) ++ if (!ret || ++ bch2_err_matches(ret, BCH_ERR_transaction_restart)) + goto alloc_done; + -+ ret = open_bucket_add_buckets(c, &ptrs, wp, devs_have, ++ ret = open_bucket_add_buckets(trans, &ptrs, wp, devs_have, + 0, erasure_code, + nr_replicas, &nr_effective, + &have_cache, reserve, @@ -4823,6 +4871,32 @@ index 0000000000000..0a9f1313414b7 + return ERR_PTR(ret); +} + ++struct write_point *bch2_alloc_sectors_start(struct bch_fs *c, ++ unsigned target, ++ unsigned erasure_code, ++ struct write_point_specifier write_point, ++ struct bch_devs_list *devs_have, ++ unsigned nr_replicas, ++ unsigned nr_replicas_required, ++ enum alloc_reserve reserve, ++ unsigned flags, ++ struct closure *cl) ++{ ++ struct write_point *wp; ++ ++ bch2_trans_do(c, NULL, NULL, 0, ++ PTR_ERR_OR_ZERO(wp = bch2_alloc_sectors_start_trans(&trans, target, ++ erasure_code, ++ write_point, ++ devs_have, ++ nr_replicas, ++ nr_replicas_required, ++ reserve, ++ flags, cl))); ++ return wp; ++ ++} ++ +struct bch_extent_ptr bch2_ob_ptr(struct bch_fs *c, struct open_bucket *ob) +{ + struct bch_dev *ca = bch_dev_bkey_exists(c, ob->dev); @@ -4948,10 +5022,10 @@ index 0000000000000..0a9f1313414b7 +} diff --git a/fs/bcachefs/alloc_foreground.h b/fs/bcachefs/alloc_foreground.h new file mode 100644 -index 0000000000000..8bc78877f0fc8 +index 000000000000..6de63a351fa8 --- /dev/null +++ b/fs/bcachefs/alloc_foreground.h -@@ -0,0 +1,173 @@ +@@ -0,0 +1,181 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _BCACHEFS_ALLOC_FOREGROUND_H +#define _BCACHEFS_ALLOC_FOREGROUND_H @@ -5090,6 +5164,14 @@ index 0000000000000..8bc78877f0fc8 + unsigned, unsigned *, bool *, enum alloc_reserve, + unsigned, struct closure *); + ++struct write_point *bch2_alloc_sectors_start_trans(struct btree_trans *, ++ unsigned, unsigned, ++ struct write_point_specifier, ++ struct bch_devs_list *, ++ unsigned, unsigned, ++ enum alloc_reserve, ++ unsigned, ++ struct closure *); +struct write_point *bch2_alloc_sectors_start(struct bch_fs *, + unsigned, unsigned, + struct write_point_specifier, @@ -5127,7 +5209,7 @@ index 0000000000000..8bc78877f0fc8 +#endif /* _BCACHEFS_ALLOC_FOREGROUND_H */ diff --git a/fs/bcachefs/alloc_types.h b/fs/bcachefs/alloc_types.h new file mode 100644 -index 0000000000000..e078584d46f65 +index 000000000000..e078584d46f6 --- /dev/null +++ b/fs/bcachefs/alloc_types.h @@ -0,0 +1,87 @@ @@ -5220,10 +5302,10 @@ index 0000000000000..e078584d46f65 +#endif /* _BCACHEFS_ALLOC_TYPES_H */ diff --git a/fs/bcachefs/backpointers.c b/fs/bcachefs/backpointers.c new file mode 100644 -index 0000000000000..5a46b25b0587e +index 000000000000..029b1ec14283 --- /dev/null +++ b/fs/bcachefs/backpointers.c -@@ -0,0 +1,875 @@ +@@ -0,0 +1,890 @@ +// SPDX-License-Identifier: GPL-2.0 +#include "bcachefs.h" +#include "alloc_background.h" @@ -5718,7 +5800,7 @@ index 0000000000000..5a46b25b0587e + prt_printf(&buf, "\n "); + bch2_bkey_val_to_text(&buf, c, k); + if (!test_bit(BCH_FS_CHECK_BACKPOINTERS_DONE, &c->flags)) -+ bch_err(c, "%s", buf.buf); ++ bch_err_ratelimited(c, "%s", buf.buf); + else + bch2_trans_inconsistent(trans, "%s", buf.buf); + @@ -5752,9 +5834,21 @@ index 0000000000000..5a46b25b0587e + if (extent_matches_bp(c, bp.btree_id, bp.level, k, bucket, bp)) + return k; + -+ backpointer_not_found(trans, bucket, bp_offset, bp, k, "extent"); -+ + bch2_trans_iter_exit(trans, iter); ++ ++ if (bp.level) { ++ /* ++ * If a backpointer for a btree node wasn't found, it may be ++ * because it was overwritten by a new btree node that hasn't ++ * been written out yet - backpointer_get_node() checks for ++ * this: ++ */ ++ bch2_backpointer_get_node(trans, iter, bucket, bp_offset, bp); ++ bch2_trans_iter_exit(trans, iter); ++ return bkey_s_c_null; ++ } ++ ++ backpointer_not_found(trans, bucket, bp_offset, bp, k, "extent"); + return bkey_s_c_null; +} + @@ -5766,7 +5860,6 @@ index 0000000000000..5a46b25b0587e +{ + struct bch_fs *c = trans->c; + struct btree *b; -+ struct bkey_s_c k; + + BUG_ON(!bp.level); + @@ -5777,22 +5870,24 @@ index 0000000000000..5a46b25b0587e + bp.level - 1, + 0); + b = bch2_btree_iter_peek_node(iter); -+ if (IS_ERR(b)) { -+ bch2_trans_iter_exit(trans, iter); -+ return b; -+ } ++ if (IS_ERR(b)) ++ goto err; + + if (extent_matches_bp(c, bp.btree_id, bp.level, + bkey_i_to_s_c(&b->key), + bucket, bp)) + return b; + -+ if (!btree_node_will_make_reachable(b)) -+ backpointer_not_found(trans, bucket, bp_offset, -+ bp, k, "btree node"); -+ ++ if (btree_node_will_make_reachable(b)) { ++ b = ERR_PTR(-BCH_ERR_backpointer_to_overwritten_btree_node); ++ } else { ++ backpointer_not_found(trans, bucket, bp_offset, bp, ++ bkey_i_to_s_c(&b->key), "btree node"); ++ b = NULL; ++ } ++err: + bch2_trans_iter_exit(trans, iter); -+ return NULL; ++ return b; +} + +static int bch2_check_btree_backpointer(struct btree_trans *trans, struct btree_iter *bp_iter, @@ -6055,6 +6150,8 @@ index 0000000000000..5a46b25b0587e + + k = bch2_backpointer_get_key(trans, &iter, bucket, *bp_offset, bp); + ret = bkey_err(k); ++ if (ret == -BCH_ERR_backpointer_to_overwritten_btree_node) ++ return 0; + if (ret) + return ret; + @@ -6101,7 +6198,7 @@ index 0000000000000..5a46b25b0587e +} diff --git a/fs/bcachefs/backpointers.h b/fs/bcachefs/backpointers.h new file mode 100644 -index 0000000000000..fe42af296e9ca +index 000000000000..fe42af296e9c --- /dev/null +++ b/fs/bcachefs/backpointers.h @@ -0,0 +1,38 @@ @@ -6145,10 +6242,10 @@ index 0000000000000..fe42af296e9ca +#endif /* _BCACHEFS_BACKPOINTERS_BACKGROUND_H */ diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h new file mode 100644 -index 0000000000000..8ffdb4dee47ae +index 000000000000..53e7b5a0bea9 --- /dev/null +++ b/fs/bcachefs/bcachefs.h -@@ -0,0 +1,1000 @@ +@@ -0,0 +1,1001 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _BCACHEFS_H +#define _BCACHEFS_H @@ -6363,6 +6460,12 @@ index 0000000000000..8ffdb4dee47ae +#define dynamic_fault(...) 0 +#define race_fault(...) 0 + ++#define trace_and_count(_c, _name, ...) \ ++do { \ ++ this_cpu_inc((_c)->counters[BCH_COUNTER_##_name]); \ ++ trace_##_name(__VA_ARGS__); \ ++} while (0) ++ +#define bch2_fs_init_fault(name) \ + dynamic_fault("bcachefs:bch_fs_init:" name) +#define bch2_meta_read_fault(name) \ @@ -6470,8 +6573,6 @@ index 0000000000000..8ffdb4dee47ae +#undef BCH_DEBUG_PARAM +#endif + -+#define BCH_LOCK_TIME_NR 128 -+ +#define BCH_TIME_STATS() \ + x(btree_node_mem_alloc) \ + x(btree_node_split) \ @@ -6482,9 +6583,6 @@ index 0000000000000..8ffdb4dee47ae + x(btree_interior_update_foreground) \ + x(btree_interior_update_total) \ + x(btree_gc) \ -+ x(btree_lock_contended_read) \ -+ x(btree_lock_contended_intent) \ -+ x(btree_lock_contended_write) \ + x(data_write) \ + x(data_read) \ + x(data_promote) \ @@ -6682,9 +6780,14 @@ index 0000000000000..8ffdb4dee47ae + unsigned id; +}; + -+struct lock_held_stats { -+ struct time_stats times[BCH_LOCK_TIME_NR]; -+ const char *names[BCH_LOCK_TIME_NR]; ++#define BCH_TRANSACTIONS_NR 128 ++ ++struct btree_transaction_stats { ++ struct mutex lock; ++ struct time_stats lock_hold_times; ++ unsigned nr_max_paths; ++ unsigned max_mem; ++ char *max_paths_text; +}; + +struct bch_fs_pcpu { @@ -7066,12 +7169,6 @@ index 0000000000000..8ffdb4dee47ae + + u64 last_bucket_seq_cleanup; + -+ /* TODO rewrite as counters - The rest of this all shows up in sysfs */ -+ atomic_long_t read_realloc_races; -+ atomic_long_t extent_migrate_done; -+ atomic_long_t extent_migrate_raced; -+ atomic_long_t bucket_alloc_fail; -+ + u64 counters_on_mount[BCH_COUNTER_NR]; + u64 __percpu *counters; + @@ -7081,7 +7178,8 @@ index 0000000000000..8ffdb4dee47ae + + struct time_stats times[BCH_TIME_STAT_NR]; + -+ struct lock_held_stats lock_held_stats; ++ const char *btree_transaction_fns[BCH_TRANSACTIONS_NR]; ++ struct btree_transaction_stats btree_transaction_stats[BCH_TRANSACTIONS_NR]; +}; + +static inline void bch2_set_ra_pages(struct bch_fs *c, unsigned ra_pages) @@ -7151,10 +7249,10 @@ index 0000000000000..8ffdb4dee47ae +#endif /* _BCACHEFS_H */ diff --git a/fs/bcachefs/bcachefs_format.h b/fs/bcachefs/bcachefs_format.h new file mode 100644 -index 0000000000000..147fde1417b05 +index 000000000000..7730e955d8d8 --- /dev/null +++ b/fs/bcachefs/bcachefs_format.h -@@ -0,0 +1,2052 @@ +@@ -0,0 +1,2121 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _BCACHEFS_FORMAT_H +#define _BCACHEFS_FORMAT_H @@ -8494,12 +8592,81 @@ index 0000000000000..147fde1417b05 + +/* BCH_SB_FIELD_counters */ + -+#define BCH_PERSISTENT_COUNTERS() \ -+ x(io_read, 0) \ -+ x(io_write, 1) \ -+ x(io_move, 2) \ -+ x(bucket_invalidate, 3) \ -+ x(bucket_discard, 4) ++#define BCH_PERSISTENT_COUNTERS() \ ++ x(io_read, 0) \ ++ x(io_write, 1) \ ++ x(io_move, 2) \ ++ x(bucket_invalidate, 3) \ ++ x(bucket_discard, 4) \ ++ x(bucket_alloc, 5) \ ++ x(bucket_alloc_fail, 6) \ ++ x(btree_cache_scan, 7) \ ++ x(btree_cache_reap, 8) \ ++ x(btree_cache_cannibalize, 9) \ ++ x(btree_cache_cannibalize_lock, 10) \ ++ x(btree_cache_cannibalize_lock_fail, 11) \ ++ x(btree_cache_cannibalize_unlock, 12) \ ++ x(btree_node_write, 13) \ ++ x(btree_node_read, 14) \ ++ x(btree_node_compact, 15) \ ++ x(btree_node_merge, 16) \ ++ x(btree_node_split, 17) \ ++ x(btree_node_rewrite, 18) \ ++ x(btree_node_alloc, 19) \ ++ x(btree_node_free, 20) \ ++ x(btree_node_set_root, 21) \ ++ x(btree_path_relock_fail, 22) \ ++ x(btree_path_upgrade_fail, 23) \ ++ x(btree_reserve_get_fail, 24) \ ++ x(journal_entry_full, 25) \ ++ x(journal_full, 26) \ ++ x(journal_reclaim_finish, 27) \ ++ x(journal_reclaim_start, 28) \ ++ x(journal_write, 29) \ ++ x(read_promote, 30) \ ++ x(read_bounce, 31) \ ++ x(read_split, 33) \ ++ x(read_retry, 32) \ ++ x(read_reuse_race, 34) \ ++ x(move_extent_read, 35) \ ++ x(move_extent_write, 36) \ ++ x(move_extent_finish, 37) \ ++ x(move_extent_race, 38) \ ++ x(move_extent_alloc_mem_fail, 39) \ ++ x(copygc, 40) \ ++ x(copygc_wait, 41) \ ++ x(gc_gens_end, 42) \ ++ x(gc_gens_start, 43) \ ++ x(trans_blocked_journal_reclaim, 44) \ ++ x(trans_restart_btree_node_reused, 45) \ ++ x(trans_restart_btree_node_split, 46) \ ++ x(trans_restart_fault_inject, 47) \ ++ x(trans_restart_iter_upgrade, 48) \ ++ x(trans_restart_journal_preres_get, 49) \ ++ x(trans_restart_journal_reclaim, 50) \ ++ x(trans_restart_journal_res_get, 51) \ ++ x(trans_restart_key_cache_key_realloced, 52) \ ++ x(trans_restart_key_cache_raced, 53) \ ++ x(trans_restart_mark_replicas, 54) \ ++ x(trans_restart_mem_realloced, 55) \ ++ x(trans_restart_memory_allocation_failure, 56) \ ++ x(trans_restart_relock, 57) \ ++ x(trans_restart_relock_after_fill, 58) \ ++ x(trans_restart_relock_key_cache_fill, 59) \ ++ x(trans_restart_relock_next_node, 60) \ ++ x(trans_restart_relock_parent_for_fill, 61) \ ++ x(trans_restart_relock_path, 62) \ ++ x(trans_restart_relock_path_intent, 63) \ ++ x(trans_restart_too_many_iters, 64) \ ++ x(trans_restart_traverse, 65) \ ++ x(trans_restart_upgrade, 66) \ ++ x(trans_restart_would_deadlock, 67) \ ++ x(trans_restart_would_deadlock_write, 68) \ ++ x(trans_restart_injected, 69) \ ++ x(trans_restart_key_cache_upgrade, 70) \ ++ x(trans_traverse_all, 71) \ ++ x(transaction_commit, 72) \ ++ x(write_super, 73) + +enum bch_persistent_counters { +#define x(t, n, ...) BCH_COUNTER_##t, @@ -9209,7 +9376,7 @@ index 0000000000000..147fde1417b05 +#endif /* _BCACHEFS_FORMAT_H */ diff --git a/fs/bcachefs/bcachefs_ioctl.h b/fs/bcachefs/bcachefs_ioctl.h new file mode 100644 -index 0000000000000..b2edabf58260d +index 000000000000..b2edabf58260 --- /dev/null +++ b/fs/bcachefs/bcachefs_ioctl.h @@ -0,0 +1,368 @@ @@ -9583,10 +9750,10 @@ index 0000000000000..b2edabf58260d +#endif /* _BCACHEFS_IOCTL_H */ diff --git a/fs/bcachefs/bkey.c b/fs/bcachefs/bkey.c new file mode 100644 -index 0000000000000..cc06896351640 +index 000000000000..d348175edad4 --- /dev/null +++ b/fs/bcachefs/bkey.c -@@ -0,0 +1,1175 @@ +@@ -0,0 +1,1203 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include "bcachefs.h" @@ -9608,33 +9775,49 @@ index 0000000000000..cc06896351640 +struct bkey __bch2_bkey_unpack_key(const struct bkey_format *, + const struct bkey_packed *); + -+void bch2_to_binary(char *out, const u64 *p, unsigned nr_bits) ++void bch2_bkey_packed_to_binary_text(struct printbuf *out, ++ const struct bkey_format *f, ++ const struct bkey_packed *k) +{ -+ unsigned bit = high_bit_offset, done = 0; ++ const u64 *p = high_word(f, k); ++ unsigned word_bits = 64 - high_bit_offset; ++ unsigned nr_key_bits = bkey_format_key_bits(f) + high_bit_offset; ++ u64 v = *p & (~0ULL >> high_bit_offset); ++ ++ if (!nr_key_bits) { ++ prt_str(out, "(empty)"); ++ return; ++ } + + while (1) { -+ while (bit < 64) { -+ if (done && !(done % 8)) -+ *out++ = ' '; -+ *out++ = *p & (1ULL << (63 - bit)) ? '1' : '0'; -+ bit++; -+ done++; -+ if (done == nr_bits) { -+ *out++ = '\0'; -+ return; -+ } ++ unsigned next_key_bits = nr_key_bits; ++ ++ if (nr_key_bits < 64) { ++ v >>= 64 - nr_key_bits; ++ next_key_bits = 0; ++ } else { ++ next_key_bits -= 64; + } + ++ bch2_prt_u64_binary(out, v, min(word_bits, nr_key_bits)); ++ ++ if (!next_key_bits) ++ break; ++ ++ prt_char(out, ' '); ++ + p = next_word(p); -+ bit = 0; ++ v = *p; ++ word_bits = 64; ++ nr_key_bits = next_key_bits; + } +} + +#ifdef CONFIG_BCACHEFS_DEBUG + +static void bch2_bkey_pack_verify(const struct bkey_packed *packed, -+ const struct bkey *unpacked, -+ const struct bkey_format *format) ++ const struct bkey *unpacked, ++ const struct bkey_format *format) +{ + struct bkey tmp; + @@ -9646,23 +9829,35 @@ index 0000000000000..cc06896351640 + tmp = __bch2_bkey_unpack_key(format, packed); + + if (memcmp(&tmp, unpacked, sizeof(struct bkey))) { -+ struct printbuf buf1 = PRINTBUF; -+ struct printbuf buf2 = PRINTBUF; -+ char buf3[160], buf4[160]; ++ struct printbuf buf = PRINTBUF; + -+ bch2_bkey_to_text(&buf1, unpacked); -+ bch2_bkey_to_text(&buf2, &tmp); -+ bch2_to_binary(buf3, (void *) unpacked, 80); -+ bch2_to_binary(buf4, high_word(format, packed), 80); -+ -+ panic("keys differ: format u64s %u fields %u %u %u %u %u\n%s\n%s\n%s\n%s\n", ++ prt_printf(&buf, "keys differ: format u64s %u fields %u %u %u %u %u\n", + format->key_u64s, + format->bits_per_field[0], + format->bits_per_field[1], + format->bits_per_field[2], + format->bits_per_field[3], -+ format->bits_per_field[4], -+ buf1.buf, buf2.buf, buf3, buf4); ++ format->bits_per_field[4]); ++ ++ prt_printf(&buf, "compiled unpack: "); ++ bch2_bkey_to_text(&buf, unpacked); ++ prt_newline(&buf); ++ ++ prt_printf(&buf, "c unpack: "); ++ bch2_bkey_to_text(&buf, &tmp); ++ prt_newline(&buf); ++ ++ prt_printf(&buf, "compiled unpack: "); ++ bch2_bkey_packed_to_binary_text(&buf, &bch2_bkey_format_current, ++ (struct bkey_packed *) unpacked); ++ prt_newline(&buf); ++ ++ prt_printf(&buf, "c unpack: "); ++ bch2_bkey_packed_to_binary_text(&buf, &bch2_bkey_format_current, ++ (struct bkey_packed *) &tmp); ++ prt_newline(&buf); ++ ++ panic("%s", buf.buf); + } +} + @@ -10764,10 +10959,10 @@ index 0000000000000..cc06896351640 +#endif diff --git a/fs/bcachefs/bkey.h b/fs/bcachefs/bkey.h new file mode 100644 -index 0000000000000..7dee3d8e0a3d1 +index 000000000000..df9fb859d1db --- /dev/null +++ b/fs/bcachefs/bkey.h -@@ -0,0 +1,566 @@ +@@ -0,0 +1,571 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _BCACHEFS_BKEY_H +#define _BCACHEFS_BKEY_H @@ -10782,7 +10977,9 @@ index 0000000000000..7dee3d8e0a3d1 +#define HAVE_BCACHEFS_COMPILED_UNPACK 1 +#endif + -+void bch2_to_binary(char *, const u64 *, unsigned); ++void bch2_bkey_packed_to_binary_text(struct printbuf *, ++ const struct bkey_format *, ++ const struct bkey_packed *); + +/* bkey with split value, const */ +struct bkey_s_c { @@ -10812,12 +11009,15 @@ index 0000000000000..7dee3d8e0a3d1 + +static inline void set_bkey_val_u64s(struct bkey *k, unsigned val_u64s) +{ -+ k->u64s = BKEY_U64s + val_u64s; ++ unsigned u64s = BKEY_U64s + val_u64s; ++ ++ BUG_ON(u64s > U8_MAX); ++ k->u64s = u64s; +} + +static inline void set_bkey_val_bytes(struct bkey *k, unsigned bytes) +{ -+ k->u64s = BKEY_U64s + DIV_ROUND_UP(bytes, sizeof(u64)); ++ set_bkey_val_u64s(k, DIV_ROUND_UP(bytes, sizeof(u64))); +} + +#define bkey_val_end(_k) ((void *) (((u64 *) (_k).v) + bkey_val_u64s((_k).k))) @@ -11336,7 +11536,7 @@ index 0000000000000..7dee3d8e0a3d1 +#endif /* _BCACHEFS_BKEY_H */ diff --git a/fs/bcachefs/bkey_buf.h b/fs/bcachefs/bkey_buf.h new file mode 100644 -index 0000000000000..0d7c67a959af1 +index 000000000000..0d7c67a959af --- /dev/null +++ b/fs/bcachefs/bkey_buf.h @@ -0,0 +1,60 @@ @@ -11402,7 +11602,7 @@ index 0000000000000..0d7c67a959af1 +#endif /* _BCACHEFS_BKEY_BUF_H */ diff --git a/fs/bcachefs/bkey_methods.c b/fs/bcachefs/bkey_methods.c new file mode 100644 -index 0000000000000..e0cbac8811afa +index 000000000000..e0cbac8811af --- /dev/null +++ b/fs/bcachefs/bkey_methods.c @@ -0,0 +1,503 @@ @@ -11911,7 +12111,7 @@ index 0000000000000..e0cbac8811afa +} diff --git a/fs/bcachefs/bkey_methods.h b/fs/bcachefs/bkey_methods.h new file mode 100644 -index 0000000000000..db894b40d2ca4 +index 000000000000..db894b40d2ca --- /dev/null +++ b/fs/bcachefs/bkey_methods.h @@ -0,0 +1,175 @@ @@ -12092,7 +12292,7 @@ index 0000000000000..db894b40d2ca4 +#endif /* _BCACHEFS_BKEY_METHODS_H */ diff --git a/fs/bcachefs/bkey_sort.c b/fs/bcachefs/bkey_sort.c new file mode 100644 -index 0000000000000..b1385a77da114 +index 000000000000..b1385a77da11 --- /dev/null +++ b/fs/bcachefs/bkey_sort.c @@ -0,0 +1,198 @@ @@ -12296,7 +12496,7 @@ index 0000000000000..b1385a77da114 +} diff --git a/fs/bcachefs/bkey_sort.h b/fs/bcachefs/bkey_sort.h new file mode 100644 -index 0000000000000..79cf11d1b4e7e +index 000000000000..79cf11d1b4e7 --- /dev/null +++ b/fs/bcachefs/bkey_sort.h @@ -0,0 +1,44 @@ @@ -12346,7 +12546,7 @@ index 0000000000000..79cf11d1b4e7e +#endif /* _BCACHEFS_BKEY_SORT_H */ diff --git a/fs/bcachefs/bset.c b/fs/bcachefs/bset.c new file mode 100644 -index 0000000000000..fa60ef84e4ef0 +index 000000000000..fa60ef84e4ef --- /dev/null +++ b/fs/bcachefs/bset.c @@ -0,0 +1,1598 @@ @@ -13950,7 +14150,7 @@ index 0000000000000..fa60ef84e4ef0 +} diff --git a/fs/bcachefs/bset.h b/fs/bcachefs/bset.h new file mode 100644 -index 0000000000000..0d46534c3dcd1 +index 000000000000..0d46534c3dcd --- /dev/null +++ b/fs/bcachefs/bset.h @@ -0,0 +1,615 @@ @@ -14571,10 +14771,10 @@ index 0000000000000..0d46534c3dcd1 +#endif /* _BCACHEFS_BSET_H */ diff --git a/fs/bcachefs/btree_cache.c b/fs/bcachefs/btree_cache.c new file mode 100644 -index 0000000000000..579a8f8c6a65f +index 000000000000..dabdb25c879a --- /dev/null +++ b/fs/bcachefs/btree_cache.c -@@ -0,0 +1,1170 @@ +@@ -0,0 +1,1169 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include "bcachefs.h" @@ -14591,8 +14791,6 @@ index 0000000000000..579a8f8c6a65f +#include +#include + -+struct lock_class_key bch2_btree_node_lock_key; -+ +const char * const bch2_btree_node_flags[] = { +#define x(f) #f, + BTREE_FLAGS() @@ -14831,7 +15029,7 @@ index 0000000000000..579a8f8c6a65f + } +out: + if (b->hash_val && !ret) -+ trace_btree_node_reap(c, b); ++ trace_and_count(c, btree_cache_reap, c, b); + return ret; +out_unlock: + six_unlock_write(&b->c.lock); @@ -14955,7 +15153,7 @@ index 0000000000000..579a8f8c6a65f + ret = freed; + memalloc_nofs_restore(flags); +out_norestore: -+ trace_btree_cache_scan(sc->nr_to_scan, can_free, ret); ++ trace_and_count(c, btree_cache_scan, sc->nr_to_scan, can_free, ret); + return ret; +} + @@ -15091,7 +15289,7 @@ index 0000000000000..579a8f8c6a65f + struct btree_cache *bc = &c->btree_cache; + + if (bc->alloc_lock == current) { -+ trace_btree_node_cannibalize_unlock(c); ++ trace_and_count(c, btree_cache_cannibalize_unlock, c); + bc->alloc_lock = NULL; + closure_wake_up(&bc->alloc_wait); + } @@ -15107,7 +15305,7 @@ index 0000000000000..579a8f8c6a65f + goto success; + + if (!cl) { -+ trace_btree_node_cannibalize_lock_fail(c); ++ trace_and_count(c, btree_cache_cannibalize_lock_fail, c); + return -ENOMEM; + } + @@ -15121,11 +15319,11 @@ index 0000000000000..579a8f8c6a65f + goto success; + } + -+ trace_btree_node_cannibalize_lock_fail(c); ++ trace_and_count(c, btree_cache_cannibalize_lock_fail, c); + return -EAGAIN; + +success: -+ trace_btree_node_cannibalize_lock(c); ++ trace_and_count(c, btree_cache_cannibalize_lock, c); + return 0; +} + @@ -15249,7 +15447,7 @@ index 0000000000000..579a8f8c6a65f + + mutex_unlock(&bc->lock); + -+ trace_btree_node_cannibalize(c); ++ trace_and_count(c, btree_cache_cannibalize, c); + goto out; + } + @@ -15278,8 +15476,7 @@ index 0000000000000..579a8f8c6a65f + * been freed: + */ + if (trans && !bch2_btree_node_relock(trans, path, level + 1)) { -+ trace_trans_restart_relock_parent_for_fill(trans->fn, -+ _THIS_IP_, btree_id, &path->pos); ++ trace_and_count(c, trans_restart_relock_parent_for_fill, trans, _THIS_IP_, path); + return ERR_PTR(btree_trans_restart(trans, BCH_ERR_transaction_restart_fill_relock)); + } + @@ -15287,9 +15484,7 @@ index 0000000000000..579a8f8c6a65f + + if (trans && b == ERR_PTR(-ENOMEM)) { + trans->memory_allocation_failure = true; -+ trace_trans_restart_memory_allocation_failure(trans->fn, -+ _THIS_IP_, btree_id, &path->pos); -+ ++ trace_and_count(c, trans_restart_memory_allocation_failure, trans, _THIS_IP_, path); + return ERR_PTR(btree_trans_restart(trans, BCH_ERR_transaction_restart_fill_mem_alloc_fail)); + } + @@ -15337,8 +15532,8 @@ index 0000000000000..579a8f8c6a65f + } + + if (!six_relock_type(&b->c.lock, lock_type, seq)) { -+ trace_trans_restart_relock_after_fill(trans->fn, _THIS_IP_, -+ btree_id, &path->pos); ++ if (trans) ++ trace_and_count(c, trans_restart_relock_after_fill, trans, _THIS_IP_, path); + return ERR_PTR(btree_trans_restart(trans, BCH_ERR_transaction_restart_relock_after_fill)); + } + @@ -15476,7 +15671,7 @@ index 0000000000000..579a8f8c6a65f + if (btree_node_read_locked(path, level + 1)) + btree_node_unlock(trans, path, level + 1); + -+ ret = btree_node_lock(trans, path, b, k->k.p, level, lock_type, ++ ret = btree_node_lock(trans, path, &b->c, k->k.p, level, lock_type, + lock_node_check_fn, (void *) k, trace_ip); + if (unlikely(ret)) { + if (bch2_err_matches(ret, BCH_ERR_lock_fail_node_reused)) @@ -15493,10 +15688,7 @@ index 0000000000000..579a8f8c6a65f + if (bch2_btree_node_relock(trans, path, level + 1)) + goto retry; + -+ trace_trans_restart_btree_node_reused(trans->fn, -+ trace_ip, -+ path->btree_id, -+ &path->pos); ++ trace_and_count(c, trans_restart_btree_node_reused, trans, trace_ip, path); + return ERR_PTR(btree_trans_restart(trans, BCH_ERR_transaction_restart_lock_node_reused)); + } + } @@ -15552,12 +15744,13 @@ index 0000000000000..579a8f8c6a65f + return b; +} + -+struct btree *bch2_btree_node_get_noiter(struct bch_fs *c, ++struct btree *bch2_btree_node_get_noiter(struct btree_trans *trans, + const struct bkey_i *k, + enum btree_id btree_id, + unsigned level, + bool nofill) +{ ++ struct bch_fs *c = trans->c; + struct btree_cache *bc = &c->btree_cache; + struct btree *b; + struct bset_tree *t; @@ -15591,9 +15784,14 @@ index 0000000000000..579a8f8c6a65f + goto out; + } else { +lock_node: -+ ret = six_lock_read(&b->c.lock, lock_node_check_fn, (void *) k); -+ if (ret) -+ goto retry; ++ ret = btree_node_lock_nopath(trans, &b->c, SIX_LOCK_read); ++ if (unlikely(ret)) { ++ if (bch2_err_matches(ret, BCH_ERR_lock_fail_node_reused)) ++ goto retry; ++ if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) ++ return ERR_PTR(ret); ++ BUG(); ++ } + + if (unlikely(b->hash_val != btree_ptr_hash_val(k) || + b->c.btree_id != btree_id || @@ -15655,8 +15853,9 @@ index 0000000000000..579a8f8c6a65f + return PTR_ERR_OR_ZERO(b); +} + -+void bch2_btree_node_evict(struct bch_fs *c, const struct bkey_i *k) ++void bch2_btree_node_evict(struct btree_trans *trans, const struct bkey_i *k) +{ ++ struct bch_fs *c = trans->c; + struct btree_cache *bc = &c->btree_cache; + struct btree *b; + @@ -15672,8 +15871,8 @@ index 0000000000000..579a8f8c6a65f + __bch2_btree_node_wait_on_read(b); + __bch2_btree_node_wait_on_write(b); + -+ six_lock_intent(&b->c.lock, NULL, NULL); -+ six_lock_write(&b->c.lock, NULL, NULL); ++ btree_node_lock_nopath_nofail(trans, &b->c, SIX_LOCK_intent); ++ btree_node_lock_nopath_nofail(trans, &b->c, SIX_LOCK_write); + + if (btree_node_dirty(b)) { + __bch2_btree_node_write(c, b, 0); @@ -15747,10 +15946,10 @@ index 0000000000000..579a8f8c6a65f +} diff --git a/fs/bcachefs/btree_cache.h b/fs/bcachefs/btree_cache.h new file mode 100644 -index 0000000000000..25906127c0231 +index 000000000000..a4df3e866bb8 --- /dev/null +++ b/fs/bcachefs/btree_cache.h -@@ -0,0 +1,107 @@ +@@ -0,0 +1,105 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _BCACHEFS_BTREE_CACHE_H +#define _BCACHEFS_BTREE_CACHE_H @@ -15758,8 +15957,6 @@ index 0000000000000..25906127c0231 +#include "bcachefs.h" +#include "btree_types.h" + -+extern struct lock_class_key bch2_btree_node_lock_key; -+ +extern const char * const bch2_btree_node_flags[]; + +struct btree_iter; @@ -15781,13 +15978,13 @@ index 0000000000000..25906127c0231 + const struct bkey_i *, unsigned, + enum six_lock_type, unsigned long); + -+struct btree *bch2_btree_node_get_noiter(struct bch_fs *, const struct bkey_i *, ++struct btree *bch2_btree_node_get_noiter(struct btree_trans *, const struct bkey_i *, + enum btree_id, unsigned, bool); + +int bch2_btree_node_prefetch(struct bch_fs *, struct btree_trans *, struct btree_path *, + const struct bkey_i *, enum btree_id, unsigned); + -+void bch2_btree_node_evict(struct bch_fs *, const struct bkey_i *); ++void bch2_btree_node_evict(struct btree_trans *, const struct bkey_i *); + +void bch2_fs_btree_cache_exit(struct bch_fs *); +int bch2_fs_btree_cache_init(struct bch_fs *); @@ -15860,10 +16057,10 @@ index 0000000000000..25906127c0231 +#endif /* _BCACHEFS_BTREE_CACHE_H */ diff --git a/fs/bcachefs/btree_gc.c b/fs/bcachefs/btree_gc.c new file mode 100644 -index 0000000000000..2f563365ea4cb +index 000000000000..663c66d060a1 --- /dev/null +++ b/fs/bcachefs/btree_gc.c -@@ -0,0 +1,2098 @@ +@@ -0,0 +1,2106 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2010 Kent Overstreet @@ -16031,10 +16228,11 @@ index 0000000000000..2f563365ea4cb + } +} + -+static void bch2_btree_node_update_key_early(struct bch_fs *c, ++static void bch2_btree_node_update_key_early(struct btree_trans *trans, + enum btree_id btree, unsigned level, + struct bkey_s_c old, struct bkey_i *new) +{ ++ struct bch_fs *c = trans->c; + struct btree *b; + struct bkey_buf tmp; + int ret; @@ -16042,7 +16240,7 @@ index 0000000000000..2f563365ea4cb + bch2_bkey_buf_init(&tmp); + bch2_bkey_buf_reassemble(&tmp, c, old); + -+ b = bch2_btree_node_get_noiter(c, tmp.k, btree, level, true); ++ b = bch2_btree_node_get_noiter(trans, tmp.k, btree, level, true); + if (!IS_ERR_OR_NULL(b)) { + mutex_lock(&c->btree_cache.lock); + @@ -16218,8 +16416,9 @@ index 0000000000000..2f563365ea4cb + return ret; +} + -+static int bch2_btree_repair_topology_recurse(struct bch_fs *c, struct btree *b) ++static int bch2_btree_repair_topology_recurse(struct btree_trans *trans, struct btree *b) +{ ++ struct bch_fs *c = trans->c; + struct btree_and_journal_iter iter; + struct bkey_s_c k; + struct bkey_buf prev_k, cur_k; @@ -16244,7 +16443,7 @@ index 0000000000000..2f563365ea4cb + bch2_btree_and_journal_iter_advance(&iter); + bch2_bkey_buf_reassemble(&cur_k, c, k); + -+ cur = bch2_btree_node_get_noiter(c, cur_k.k, ++ cur = bch2_btree_node_get_noiter(trans, cur_k.k, + b->c.btree_id, b->c.level - 1, + false); + ret = PTR_ERR_OR_ZERO(cur); @@ -16258,7 +16457,7 @@ index 0000000000000..2f563365ea4cb + bch2_btree_ids[b->c.btree_id], + b->c.level - 1, + buf.buf)) { -+ bch2_btree_node_evict(c, cur_k.k); ++ bch2_btree_node_evict(trans, cur_k.k); + ret = bch2_journal_key_delete(c, b->c.btree_id, + b->c.level, cur_k.k->k.p); + cur = NULL; @@ -16277,7 +16476,7 @@ index 0000000000000..2f563365ea4cb + + if (ret == DROP_THIS_NODE) { + six_unlock_read(&cur->c.lock); -+ bch2_btree_node_evict(c, cur_k.k); ++ bch2_btree_node_evict(trans, cur_k.k); + ret = bch2_journal_key_delete(c, b->c.btree_id, + b->c.level, cur_k.k->k.p); + cur = NULL; @@ -16291,7 +16490,7 @@ index 0000000000000..2f563365ea4cb + prev = NULL; + + if (ret == DROP_PREV_NODE) { -+ bch2_btree_node_evict(c, prev_k.k); ++ bch2_btree_node_evict(trans, prev_k.k); + ret = bch2_journal_key_delete(c, b->c.btree_id, + b->c.level, prev_k.k->k.p); + if (ret) @@ -16331,7 +16530,7 @@ index 0000000000000..2f563365ea4cb + bch2_bkey_buf_reassemble(&cur_k, c, k); + bch2_btree_and_journal_iter_advance(&iter); + -+ cur = bch2_btree_node_get_noiter(c, cur_k.k, ++ cur = bch2_btree_node_get_noiter(trans, cur_k.k, + b->c.btree_id, b->c.level - 1, + false); + ret = PTR_ERR_OR_ZERO(cur); @@ -16342,12 +16541,12 @@ index 0000000000000..2f563365ea4cb + goto err; + } + -+ ret = bch2_btree_repair_topology_recurse(c, cur); ++ ret = bch2_btree_repair_topology_recurse(trans, cur); + six_unlock_read(&cur->c.lock); + cur = NULL; + + if (ret == DROP_THIS_NODE) { -+ bch2_btree_node_evict(c, cur_k.k); ++ bch2_btree_node_evict(trans, cur_k.k); + ret = bch2_journal_key_delete(c, b->c.btree_id, + b->c.level, cur_k.k->k.p); + dropped_children = true; @@ -16388,18 +16587,21 @@ index 0000000000000..2f563365ea4cb + +static int bch2_repair_topology(struct bch_fs *c) +{ ++ struct btree_trans trans; + struct btree *b; + unsigned i; + int ret = 0; + ++ bch2_trans_init(&trans, c, 0, 0); ++ + for (i = 0; i < BTREE_ID_NR && !ret; i++) { + b = c->btree_roots[i].b; + if (btree_node_fake(b)) + continue; + -+ six_lock_read(&b->c.lock, NULL, NULL); -+ ret = bch2_btree_repair_topology_recurse(c, b); + six_unlock_read(&b->c.lock); ++ btree_node_lock_nopath_nofail(&trans, &b->c, SIX_LOCK_read); ++ ret = bch2_btree_repair_topology_recurse(&trans, b); + + if (ret == DROP_THIS_NODE) { + bch_err(c, "empty btree root - repair unimplemented"); @@ -16407,13 +16609,16 @@ index 0000000000000..2f563365ea4cb + } + } + ++ bch2_trans_exit(&trans); ++ + return ret; +} + -+static int bch2_check_fix_ptrs(struct bch_fs *c, enum btree_id btree_id, ++static int bch2_check_fix_ptrs(struct btree_trans *trans, enum btree_id btree_id, + unsigned level, bool is_root, + struct bkey_s_c *k) +{ ++ struct bch_fs *c = trans->c; + struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(*k); + const union bch_extent_entry *entry; + struct extent_ptr_decoded p = { 0 }; @@ -16613,7 +16818,7 @@ index 0000000000000..2f563365ea4cb + } + + if (level) -+ bch2_btree_node_update_key_early(c, btree_id, level - 1, *k, new); ++ bch2_btree_node_update_key_early(trans, btree_id, level - 1, *k, new); + + if (c->opts.verbose) { + printbuf_reset(&buf); @@ -16654,7 +16859,7 @@ index 0000000000000..2f563365ea4cb + BUG_ON(bch2_journal_seq_verify && + k->k->version.lo > atomic64_read(&c->journal.seq)); + -+ ret = bch2_check_fix_ptrs(c, btree_id, level, is_root, k); ++ ret = bch2_check_fix_ptrs(trans, btree_id, level, is_root, k); + if (ret) + goto err; + @@ -16807,7 +17012,7 @@ index 0000000000000..2f563365ea4cb + bch2_bkey_buf_reassemble(&cur, c, k); + bch2_btree_and_journal_iter_advance(&iter); + -+ child = bch2_btree_node_get_noiter(c, cur.k, ++ child = bch2_btree_node_get_noiter(trans, cur.k, + b->c.btree_id, b->c.level - 1, + false); + ret = PTR_ERR_OR_ZERO(child); @@ -17800,7 +18005,7 @@ index 0000000000000..2f563365ea4cb + if (!mutex_trylock(&c->gc_gens_lock)) + return 0; + -+ trace_gc_gens_start(c); ++ trace_and_count(c, gc_gens_start, c); + down_read(&c->gc_lock); + bch2_trans_init(&trans, c, 0, 0); + @@ -17861,7 +18066,7 @@ index 0000000000000..2f563365ea4cb + c->gc_count++; + + bch2_time_stats_update(&c->times[BCH_TIME_btree_gc], start_time); -+ trace_gc_gens_end(c); ++ trace_and_count(c, gc_gens_end, c); +err: + for_each_member_device(ca, c, i) { + kvfree(ca->oldest_gen); @@ -17964,7 +18169,7 @@ index 0000000000000..2f563365ea4cb +} diff --git a/fs/bcachefs/btree_gc.h b/fs/bcachefs/btree_gc.h new file mode 100644 -index 0000000000000..95d803b5743de +index 000000000000..95d803b5743d --- /dev/null +++ b/fs/bcachefs/btree_gc.h @@ -0,0 +1,112 @@ @@ -18082,10 +18287,10 @@ index 0000000000000..95d803b5743de +#endif /* _BCACHEFS_BTREE_GC_H */ diff --git a/fs/bcachefs/btree_io.c b/fs/bcachefs/btree_io.c new file mode 100644 -index 0000000000000..ae731b3a39084 +index 000000000000..177fd49d65dd --- /dev/null +++ b/fs/bcachefs/btree_io.c -@@ -0,0 +1,2150 @@ +@@ -0,0 +1,2153 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include "bcachefs.h" @@ -18704,7 +18909,6 @@ index 0000000000000..ae731b3a39084 + (u64 *) vstruct_end(i) - (u64 *) k); + i->u64s = cpu_to_le16(le16_to_cpu(i->u64s) - shift); + set_btree_bset_end(b, t); -+ bch2_bset_set_no_aux_tree(b, t); + } + + for (k = i->start; k != vstruct_last(i); k = bkey_next(k)) @@ -18714,10 +18918,14 @@ index 0000000000000..ae731b3a39084 + if (k != vstruct_last(i)) { + i->u64s = cpu_to_le16((u64 *) k - (u64 *) i->start); + set_btree_bset_end(b, t); -+ bch2_bset_set_no_aux_tree(b, t); + } + } + ++ /* ++ * Always rebuild search trees: eytzinger search tree nodes directly ++ * depend on the values of min/max key: ++ */ ++ bch2_bset_set_no_aux_tree(b, b->set); + bch2_btree_build_aux_trees(b); + + for_each_btree_node_key_unpack(b, k, &iter, &unpacked) { @@ -18866,8 +19074,7 @@ index 0000000000000..ae731b3a39084 +} + +static int validate_bset_keys(struct bch_fs *c, struct btree *b, -+ struct bset *i, unsigned *whiteout_u64s, -+ int write, bool have_retry) ++ struct bset *i, int write, bool have_retry) +{ + unsigned version = le16_to_cpu(i->version); + struct bkey_packed *k, *prev = NULL; @@ -19003,7 +19210,7 @@ index 0000000000000..ae731b3a39084 + } + + while (b->written < (ptr_written ?: btree_sectors(c))) { -+ unsigned sectors, whiteout_u64s = 0; ++ unsigned sectors; + struct nonce nonce; + struct bch_csum csum; + bool first = !b->written; @@ -19072,8 +19279,7 @@ index 0000000000000..ae731b3a39084 + if (!b->written) + btree_node_set_format(b, b->data->format); + -+ ret = validate_bset_keys(c, b, i, &whiteout_u64s, -+ READ, have_retry); ++ ret = validate_bset_keys(c, b, i, READ, have_retry); + if (ret) + goto fsck_err; + @@ -19099,11 +19305,8 @@ index 0000000000000..ae731b3a39084 + if (blacklisted && !first) + continue; + -+ sort_iter_add(iter, i->start, -+ vstruct_idx(i, whiteout_u64s)); -+ + sort_iter_add(iter, -+ vstruct_idx(i, whiteout_u64s), ++ vstruct_idx(i, 0), + vstruct_last(i)); + + nonblacklisted_written = b->written; @@ -19580,7 +19783,7 @@ index 0000000000000..ae731b3a39084 + struct bio *bio; + int ret; + -+ trace_btree_read(c, b); ++ trace_and_count(c, btree_node_read, c, b); + + if (bch2_verify_all_btree_replicas && + !btree_node_read_all_replicas(c, b, sync)) @@ -19747,9 +19950,15 @@ index 0000000000000..ae731b3a39084 + +static void btree_node_write_done(struct bch_fs *c, struct btree *b) +{ -+ six_lock_read(&b->c.lock, NULL, NULL); ++ struct btree_trans trans; ++ ++ bch2_trans_init(&trans, c, 0, 0); ++ ++ btree_node_lock_nopath_nofail(&trans, &b->c, SIX_LOCK_read); + __btree_node_write_done(c, b); + six_unlock_read(&b->c.lock); ++ ++ bch2_trans_exit(&trans); +} + +static void btree_node_write_work(struct work_struct *work) @@ -19833,7 +20042,6 @@ index 0000000000000..ae731b3a39084 +static int validate_bset_for_write(struct bch_fs *c, struct btree *b, + struct bset *i, unsigned sectors) +{ -+ unsigned whiteout_u64s = 0; + struct printbuf buf = PRINTBUF; + int ret; + @@ -19846,7 +20054,7 @@ index 0000000000000..ae731b3a39084 + if (ret) + return ret; + -+ ret = validate_bset_keys(c, b, i, &whiteout_u64s, WRITE, false) ?: ++ ret = validate_bset_keys(c, b, i, WRITE, false) ?: + validate_bset(c, NULL, b, i, b->written, sectors, WRITE, false); + if (ret) { + bch2_inconsistent_error(c); @@ -20070,7 +20278,7 @@ index 0000000000000..ae731b3a39084 + c->opts.nochanges) + goto err; + -+ trace_btree_write(b, bytes_to_write, sectors_to_write); ++ trace_and_count(c, btree_node_write, b, bytes_to_write, sectors_to_write); + + wbio = container_of(bio_alloc_bioset(NULL, + buf_pages(data, sectors_to_write << 9), @@ -20238,7 +20446,7 @@ index 0000000000000..ae731b3a39084 +} diff --git a/fs/bcachefs/btree_io.h b/fs/bcachefs/btree_io.h new file mode 100644 -index 0000000000000..8af853642123d +index 000000000000..8af853642123 --- /dev/null +++ b/fs/bcachefs/btree_io.h @@ -0,0 +1,222 @@ @@ -20466,10 +20674,10 @@ index 0000000000000..8af853642123d +#endif /* _BCACHEFS_BTREE_IO_H */ diff --git a/fs/bcachefs/btree_iter.c b/fs/bcachefs/btree_iter.c new file mode 100644 -index 0000000000000..c6c0e24003869 +index 000000000000..512c3b2b4769 --- /dev/null +++ b/fs/bcachefs/btree_iter.c -@@ -0,0 +1,3513 @@ +@@ -0,0 +1,3109 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include "bcachefs.h" @@ -20572,12 +20780,6 @@ index 0000000000000..c6c0e24003869 + return p; +} + -+static inline bool is_btree_node(struct btree_path *path, unsigned l) -+{ -+ return l < BTREE_MAX_DEPTH && -+ (unsigned long) path->l[l].b >= 128; -+} -+ +static inline struct bpos btree_iter_search_key(struct btree_iter *iter) +{ + struct bpos pos = iter->pos; @@ -20608,439 +20810,6 @@ index 0000000000000..c6c0e24003869 + !btree_path_pos_after_node(path, b); +} + -+/* Btree node locking: */ -+ -+void bch2_btree_node_unlock_write(struct btree_trans *trans, -+ struct btree_path *path, struct btree *b) -+{ -+ bch2_btree_node_unlock_write_inlined(trans, path, b); -+} -+ -+void __bch2_btree_node_lock_write(struct btree_trans *trans, struct btree *b) -+{ -+ struct btree_path *linked; -+ unsigned readers = 0; -+ -+ trans_for_each_path(trans, linked) -+ if (linked->l[b->c.level].b == b && -+ btree_node_read_locked(linked, b->c.level)) -+ readers++; -+ -+ /* -+ * Must drop our read locks before calling six_lock_write() - -+ * six_unlock() won't do wakeups until the reader count -+ * goes to 0, and it's safe because we have the node intent -+ * locked: -+ */ -+ if (!b->c.lock.readers) -+ atomic64_sub(__SIX_VAL(read_lock, readers), -+ &b->c.lock.state.counter); -+ else -+ this_cpu_sub(*b->c.lock.readers, readers); -+ -+ six_lock_write(&b->c.lock, NULL, NULL); -+ -+ if (!b->c.lock.readers) -+ atomic64_add(__SIX_VAL(read_lock, readers), -+ &b->c.lock.state.counter); -+ else -+ this_cpu_add(*b->c.lock.readers, readers); -+} -+ -+bool __bch2_btree_node_relock(struct btree_trans *trans, -+ struct btree_path *path, unsigned level) -+{ -+ struct btree *b = btree_path_node(path, level); -+ int want = __btree_lock_want(path, level); -+ -+ if (!is_btree_node(path, level)) -+ goto fail; -+ -+ if (race_fault()) -+ goto fail; -+ -+ if (six_relock_type(&b->c.lock, want, path->l[level].lock_seq) || -+ (btree_node_lock_seq_matches(path, b, level) && -+ btree_node_lock_increment(trans, b, level, want))) { -+ mark_btree_node_locked(trans, path, level, want); -+ return true; -+ } -+fail: -+ if (b != BTREE_ITER_NO_NODE_CACHED && -+ b != BTREE_ITER_NO_NODE_INIT) -+ trace_btree_node_relock_fail(trans->fn, _RET_IP_, -+ path->btree_id, -+ &path->pos, -+ (unsigned long) b, -+ path->l[level].lock_seq, -+ is_btree_node(path, level) ? b->c.lock.state.seq : 0); -+ return false; -+} -+ -+bool bch2_btree_node_upgrade(struct btree_trans *trans, -+ struct btree_path *path, unsigned level) -+{ -+ struct btree *b = path->l[level].b; -+ -+ if (!is_btree_node(path, level)) -+ return false; -+ -+ switch (btree_lock_want(path, level)) { -+ case BTREE_NODE_UNLOCKED: -+ BUG_ON(btree_node_locked(path, level)); -+ return true; -+ case BTREE_NODE_READ_LOCKED: -+ BUG_ON(btree_node_intent_locked(path, level)); -+ return bch2_btree_node_relock(trans, path, level); -+ case BTREE_NODE_INTENT_LOCKED: -+ break; -+ } -+ -+ if (btree_node_intent_locked(path, level)) -+ return true; -+ -+ if (race_fault()) -+ return false; -+ -+ if (btree_node_locked(path, level) -+ ? six_lock_tryupgrade(&b->c.lock) -+ : six_relock_type(&b->c.lock, SIX_LOCK_intent, path->l[level].lock_seq)) -+ goto success; -+ -+ if (btree_node_lock_seq_matches(path, b, level) && -+ btree_node_lock_increment(trans, b, level, BTREE_NODE_INTENT_LOCKED)) { -+ btree_node_unlock(trans, path, level); -+ goto success; -+ } -+ -+ return false; -+success: -+ mark_btree_node_intent_locked(trans, path, level); -+ return true; -+} -+ -+static inline bool btree_path_get_locks(struct btree_trans *trans, -+ struct btree_path *path, -+ bool upgrade) -+{ -+ unsigned l = path->level; -+ int fail_idx = -1; -+ -+ do { -+ if (!btree_path_node(path, l)) -+ break; -+ -+ if (!(upgrade -+ ? bch2_btree_node_upgrade(trans, path, l) -+ : bch2_btree_node_relock(trans, path, l))) -+ fail_idx = l; -+ -+ l++; -+ } while (l < path->locks_want); -+ -+ /* -+ * When we fail to get a lock, we have to ensure that any child nodes -+ * can't be relocked so bch2_btree_path_traverse has to walk back up to -+ * the node that we failed to relock: -+ */ -+ if (fail_idx >= 0) { -+ __bch2_btree_path_unlock(trans, path); -+ btree_path_set_dirty(path, BTREE_ITER_NEED_TRAVERSE); -+ -+ do { -+ path->l[fail_idx].b = BTREE_ITER_NO_NODE_GET_LOCKS; -+ --fail_idx; -+ } while (fail_idx >= 0); -+ } -+ -+ if (path->uptodate == BTREE_ITER_NEED_RELOCK) -+ path->uptodate = BTREE_ITER_UPTODATE; -+ -+ bch2_trans_verify_locks(trans); -+ -+ return path->uptodate < BTREE_ITER_NEED_RELOCK; -+} -+ -+static struct bpos btree_node_pos(struct btree_bkey_cached_common *_b, -+ bool cached) -+{ -+ return !cached -+ ? container_of(_b, struct btree, c)->key.k.p -+ : container_of(_b, struct bkey_cached, c)->key.pos; -+} -+ -+/* Slowpath: */ -+int __bch2_btree_node_lock(struct btree_trans *trans, -+ struct btree_path *path, -+ struct btree *b, -+ struct bpos pos, unsigned level, -+ enum six_lock_type type, -+ six_lock_should_sleep_fn should_sleep_fn, void *p, -+ unsigned long ip) -+{ -+ struct btree_path *linked; -+ unsigned reason; -+ -+ /* Check if it's safe to block: */ -+ trans_for_each_path(trans, linked) { -+ if (!linked->nodes_locked) -+ continue; -+ -+ /* -+ * Can't block taking an intent lock if we have _any_ nodes read -+ * locked: -+ * -+ * - Our read lock blocks another thread with an intent lock on -+ * the same node from getting a write lock, and thus from -+ * dropping its intent lock -+ * -+ * - And the other thread may have multiple nodes intent locked: -+ * both the node we want to intent lock, and the node we -+ * already have read locked - deadlock: -+ */ -+ if (type == SIX_LOCK_intent && -+ linked->nodes_locked != linked->nodes_intent_locked) { -+ reason = 1; -+ goto deadlock; -+ } -+ -+ if (linked->btree_id != path->btree_id) { -+ if (linked->btree_id < path->btree_id) -+ continue; -+ -+ reason = 3; -+ goto deadlock; -+ } -+ -+ /* -+ * Within the same btree, non-cached paths come before cached -+ * paths: -+ */ -+ if (linked->cached != path->cached) { -+ if (!linked->cached) -+ continue; -+ -+ reason = 4; -+ goto deadlock; -+ } -+ -+ /* -+ * Interior nodes must be locked before their descendants: if -+ * another path has possible descendants locked of the node -+ * we're about to lock, it must have the ancestors locked too: -+ */ -+ if (level > __fls(linked->nodes_locked)) { -+ reason = 5; -+ goto deadlock; -+ } -+ -+ /* Must lock btree nodes in key order: */ -+ if (btree_node_locked(linked, level) && -+ bpos_cmp(pos, btree_node_pos((void *) linked->l[level].b, -+ linked->cached)) <= 0) { -+ reason = 7; -+ goto deadlock; -+ } -+ } -+ -+ return btree_node_lock_type(trans, path, b, pos, level, -+ type, should_sleep_fn, p); -+deadlock: -+ trace_trans_restart_would_deadlock(trans->fn, ip, -+ trans->in_traverse_all, reason, -+ linked->btree_id, -+ linked->cached, -+ &linked->pos, -+ path->btree_id, -+ path->cached, -+ &pos); -+ return btree_trans_restart(trans, BCH_ERR_transaction_restart_would_deadlock); -+} -+ -+/* Btree iterator locking: */ -+ -+#ifdef CONFIG_BCACHEFS_DEBUG -+ -+static void bch2_btree_path_verify_locks(struct btree_path *path) -+{ -+ unsigned l; -+ -+ if (!path->nodes_locked) { -+ BUG_ON(path->uptodate == BTREE_ITER_UPTODATE && -+ btree_path_node(path, path->level)); -+ return; -+ } -+ -+ for (l = 0; btree_path_node(path, l); l++) -+ BUG_ON(btree_lock_want(path, l) != -+ btree_node_locked_type(path, l)); -+} -+ -+void bch2_trans_verify_locks(struct btree_trans *trans) -+{ -+ struct btree_path *path; -+ -+ trans_for_each_path(trans, path) -+ bch2_btree_path_verify_locks(path); -+} -+#else -+static inline void bch2_btree_path_verify_locks(struct btree_path *path) {} -+#endif -+ -+/* Btree path locking: */ -+ -+/* -+ * Only for btree_cache.c - only relocks intent locks -+ */ -+int bch2_btree_path_relock_intent(struct btree_trans *trans, -+ struct btree_path *path) -+{ -+ unsigned l; -+ -+ for (l = path->level; -+ l < path->locks_want && btree_path_node(path, l); -+ l++) { -+ if (!bch2_btree_node_relock(trans, path, l)) { -+ __bch2_btree_path_unlock(trans, path); -+ btree_path_set_dirty(path, BTREE_ITER_NEED_TRAVERSE); -+ trace_trans_restart_relock_path_intent(trans->fn, _RET_IP_, -+ path->btree_id, &path->pos); -+ return btree_trans_restart(trans, BCH_ERR_transaction_restart_relock_path_intent); -+ } -+ } -+ -+ return 0; -+} -+ -+__flatten -+static int bch2_btree_path_relock(struct btree_trans *trans, -+ struct btree_path *path, unsigned long trace_ip) -+{ -+ bool ret = btree_path_get_locks(trans, path, false); -+ -+ if (!ret) { -+ trace_trans_restart_relock_path(trans->fn, trace_ip, -+ path->btree_id, &path->pos); -+ return btree_trans_restart(trans, BCH_ERR_transaction_restart_relock_path); -+ } -+ -+ return 0; -+} -+ -+bool __bch2_btree_path_upgrade(struct btree_trans *trans, -+ struct btree_path *path, -+ unsigned new_locks_want) -+{ -+ struct btree_path *linked; -+ -+ EBUG_ON(path->locks_want >= new_locks_want); -+ -+ path->locks_want = new_locks_want; -+ -+ if (btree_path_get_locks(trans, path, true)) -+ return true; -+ -+ /* -+ * XXX: this is ugly - we'd prefer to not be mucking with other -+ * iterators in the btree_trans here. -+ * -+ * On failure to upgrade the iterator, setting iter->locks_want and -+ * calling get_locks() is sufficient to make bch2_btree_path_traverse() -+ * get the locks we want on transaction restart. -+ * -+ * But if this iterator was a clone, on transaction restart what we did -+ * to this iterator isn't going to be preserved. -+ * -+ * Possibly we could add an iterator field for the parent iterator when -+ * an iterator is a copy - for now, we'll just upgrade any other -+ * iterators with the same btree id. -+ * -+ * The code below used to be needed to ensure ancestor nodes get locked -+ * before interior nodes - now that's handled by -+ * bch2_btree_path_traverse_all(). -+ */ -+ if (!path->cached && !trans->in_traverse_all) -+ trans_for_each_path(trans, linked) -+ if (linked != path && -+ linked->cached == path->cached && -+ linked->btree_id == path->btree_id && -+ linked->locks_want < new_locks_want) { -+ linked->locks_want = new_locks_want; -+ btree_path_get_locks(trans, linked, true); -+ } -+ -+ return false; -+} -+ -+void __bch2_btree_path_downgrade(struct btree_trans *trans, -+ struct btree_path *path, -+ unsigned new_locks_want) -+{ -+ unsigned l; -+ -+ EBUG_ON(path->locks_want < new_locks_want); -+ -+ path->locks_want = new_locks_want; -+ -+ while (path->nodes_locked && -+ (l = __fls(path->nodes_locked)) >= path->locks_want) { -+ if (l > path->level) { -+ btree_node_unlock(trans, path, l); -+ } else { -+ if (btree_node_intent_locked(path, l)) { -+ six_lock_downgrade(&path->l[l].b->c.lock); -+ path->nodes_intent_locked ^= 1 << l; -+ } -+ break; -+ } -+ } -+ -+ bch2_btree_path_verify_locks(path); -+} -+ -+void bch2_trans_downgrade(struct btree_trans *trans) -+{ -+ struct btree_path *path; -+ -+ trans_for_each_path(trans, path) -+ bch2_btree_path_downgrade(trans, path); -+} -+ -+/* Btree transaction locking: */ -+ -+int bch2_trans_relock(struct btree_trans *trans) -+{ -+ struct btree_path *path; -+ -+ if (unlikely(trans->restarted)) -+ return -BCH_ERR_transaction_restart_relock; -+ -+ trans_for_each_path(trans, path) -+ if (path->should_be_locked && -+ bch2_btree_path_relock(trans, path, _RET_IP_)) { -+ trace_trans_restart_relock(trans->fn, _RET_IP_, -+ path->btree_id, &path->pos); -+ BUG_ON(!trans->restarted); -+ return -BCH_ERR_transaction_restart_relock; -+ } -+ return 0; -+} -+ -+void bch2_trans_unlock(struct btree_trans *trans) -+{ -+ struct btree_path *path; -+ -+ trans_for_each_path(trans, path) -+ __bch2_btree_path_unlock(trans, path); -+ -+ /* -+ * bch2_gc_btree_init_recurse() doesn't use btree iterators for walking -+ * btree nodes, it implements its own walking: -+ */ -+ BUG_ON(!trans->is_initial_gc && -+ lock_class_is_held(&bch2_btree_node_lock_key)); -+} -+ +/* Btree iterator: */ + +#ifdef CONFIG_BCACHEFS_DEBUG @@ -21269,7 +21038,7 @@ index 0000000000000..c6c0e24003869 + if (cmp < 0) + continue; + -+ if (!(path->nodes_locked & 1) || ++ if (!btree_node_locked(path, 0) || + !path->should_be_locked) + continue; + @@ -21635,13 +21404,13 @@ index 0000000000000..c6c0e24003869 + struct btree_path *path; + + trans_for_each_path(trans, path) -+ if (!path->cached && ++ if (path->uptodate == BTREE_ITER_UPTODATE && ++ !path->cached && + btree_path_pos_in_node(path, b)) { + enum btree_node_locked_type t = + btree_lock_want(path, b->c.level); + -+ if (path->nodes_locked && -+ t != BTREE_NODE_UNLOCKED) { ++ if (t != BTREE_NODE_UNLOCKED) { + btree_node_unlock(trans, path, b->c.level); + six_lock_increment(&b->c.lock, t); + mark_btree_node_locked(trans, path, b->c.level, t); @@ -21706,7 +21475,7 @@ index 0000000000000..c6c0e24003869 + } + + lock_type = __btree_lock_want(path, path->level); -+ ret = btree_node_lock(trans, path, b, SPOS_MAX, ++ ret = btree_node_lock(trans, path, &b->c, SPOS_MAX, + path->level, lock_type, + lock_root_check_fn, rootp, + trace_ip); @@ -21722,7 +21491,7 @@ index 0000000000000..c6c0e24003869 + b->c.level == path->level && + !race_fault())) { + for (i = 0; i < path->level; i++) -+ path->l[i].b = BTREE_ITER_NO_NODE_LOCK_ROOT; ++ path->l[i].b = ERR_PTR(-BCH_ERR_no_btree_node_lock_root); + path->l[path->level].b = b; + for (i = path->level + 1; i < BTREE_MAX_DEPTH; i++) + path->l[i].b = NULL; @@ -21892,16 +21661,16 @@ index 0000000000000..c6c0e24003869 + if (unlikely(ret)) + goto err; + -+ mark_btree_node_locked(trans, path, level, lock_type); -+ btree_path_level_init(trans, path, b); -+ + if (likely(replay_done && tmp.k->k.type == KEY_TYPE_btree_ptr_v2) && + unlikely(b != btree_node_mem_ptr(tmp.k))) + btree_node_mem_ptr_set(trans, path, level + 1, b); + + if (btree_node_read_locked(path, level + 1)) + btree_node_unlock(trans, path, level + 1); ++ ++ mark_btree_node_locked(trans, path, level, lock_type); + path->level = level; ++ btree_path_level_init(trans, path, b); + + bch2_btree_path_verify_locks(path); +err: @@ -21991,7 +21760,7 @@ index 0000000000000..c6c0e24003869 + + trans->in_traverse_all = false; + -+ trace_trans_traverse_all(trans->fn, trace_ip); ++ trace_and_count(c, trans_traverse_all, trans, trace_ip); + return ret; +} + @@ -22010,15 +21779,6 @@ index 0000000000000..c6c0e24003869 + return true; +} + -+static void btree_path_set_level_up(struct btree_trans *trans, -+ struct btree_path *path) -+{ -+ btree_node_unlock(trans, path, path->level); -+ path->l[path->level].b = BTREE_ITER_NO_NODE_UP; -+ path->level++; -+ btree_path_set_dirty(path, BTREE_ITER_NEED_TRAVERSE); -+} -+ +static void btree_path_set_level_down(struct btree_trans *trans, + struct btree_path *path, + unsigned new_level) @@ -22042,22 +21802,16 @@ index 0000000000000..c6c0e24003869 + unsigned i, l = path->level; + + while (btree_path_node(path, l) && -+ !btree_path_good_node(trans, path, l, check_pos)) { -+ btree_node_unlock(trans, path, l); -+ path->l[l].b = BTREE_ITER_NO_NODE_UP; -+ l++; -+ } ++ !btree_path_good_node(trans, path, l, check_pos)) ++ __btree_path_set_level_up(trans, path, l++); + + /* If we need intent locks, take them too: */ + for (i = l + 1; + i < path->locks_want && btree_path_node(path, i); + i++) + if (!bch2_btree_node_relock(trans, path, i)) -+ while (l <= i) { -+ btree_node_unlock(trans, path, l); -+ path->l[l].b = BTREE_ITER_NO_NODE_UP; -+ l++; -+ } ++ while (l <= i) ++ __btree_path_set_level_up(trans, path, l++); + + return l; +} @@ -22123,13 +21877,7 @@ index 0000000000000..c6c0e24003869 + + __bch2_btree_path_unlock(trans, path); + path->level = depth_want; -+ -+ if (ret == -EIO) -+ path->l[path->level].b = -+ BTREE_ITER_NO_NODE_ERROR; -+ else -+ path->l[path->level].b = -+ BTREE_ITER_NO_NODE_DOWN; ++ path->l[path->level].b = ERR_PTR(ret); + goto out; + } + } @@ -22149,7 +21897,7 @@ index 0000000000000..c6c0e24003869 + u64 mask = ~(~0ULL << restart_probability_bits); + + if ((prandom_u32() & mask) == mask) { -+ trace_transaction_restart_injected(trans->fn, _RET_IP_); ++ trace_and_count(trans->c, trans_restart_injected, trans, _RET_IP_); + return btree_trans_restart(trans, BCH_ERR_transaction_restart_fault_inject); + } + } @@ -22229,7 +21977,7 @@ index 0000000000000..c6c0e24003869 + + if (unlikely(path->cached)) { + btree_node_unlock(trans, path, 0); -+ path->l[0].b = BTREE_ITER_NO_NODE_CACHED; ++ path->l[0].b = ERR_PTR(-BCH_ERR_no_btree_node_up); + btree_path_set_dirty(path, BTREE_ITER_NEED_TRAVERSE); + goto out; + } @@ -22262,30 +22010,30 @@ index 0000000000000..c6c0e24003869 + +static struct btree_path *have_path_at_pos(struct btree_trans *trans, struct btree_path *path) +{ -+ struct btree_path *next; ++ struct btree_path *sib; + -+ next = prev_btree_path(trans, path); -+ if (next && !btree_path_cmp(next, path)) -+ return next; ++ sib = prev_btree_path(trans, path); ++ if (sib && !btree_path_cmp(sib, path)) ++ return sib; + -+ next = next_btree_path(trans, path); -+ if (next && !btree_path_cmp(next, path)) -+ return next; ++ sib = next_btree_path(trans, path); ++ if (sib && !btree_path_cmp(sib, path)) ++ return sib; + + return NULL; +} + +static struct btree_path *have_node_at_pos(struct btree_trans *trans, struct btree_path *path) +{ -+ struct btree_path *next; ++ struct btree_path *sib; + -+ next = prev_btree_path(trans, path); -+ if (next && next->level == path->level && path_l(next)->b == path_l(path)->b) -+ return next; ++ sib = prev_btree_path(trans, path); ++ if (sib && sib->level == path->level && path_l(sib)->b == path_l(path)->b) ++ return sib; + -+ next = next_btree_path(trans, path); -+ if (next && next->level == path->level && path_l(next)->b == path_l(path)->b) -+ return next; ++ sib = next_btree_path(trans, path); ++ if (sib && sib->level == path->level && path_l(sib)->b == path_l(path)->b) ++ return sib; + + return NULL; +} @@ -22307,26 +22055,23 @@ index 0000000000000..c6c0e24003869 + if (!__btree_path_put(path, intent)) + return; + -+ /* -+ * Perhaps instead we should check for duplicate paths in traverse_all: -+ */ -+ if (path->preserve && -+ (dup = have_path_at_pos(trans, path))) { -+ dup->preserve = true; -+ path->preserve = false; -+ goto free; -+ } ++ dup = path->preserve ++ ? have_path_at_pos(trans, path) ++ : have_node_at_pos(trans, path); + -+ if (!path->preserve && -+ (dup = have_node_at_pos(trans, path))) -+ goto free; -+ return; -+free: -+ if (path->should_be_locked && -+ !btree_node_locked(dup, path->level)) ++ if (!dup && !(!path->preserve && !is_btree_node(path, path->level))) + return; + -+ dup->should_be_locked |= path->should_be_locked; ++ if (path->should_be_locked && ++ !trans->restarted && ++ (!dup || !bch2_btree_path_relock_norestart(trans, dup, _THIS_IP_))) ++ return; ++ ++ if (dup) { ++ dup->preserve |= path->preserve; ++ dup->should_be_locked |= path->should_be_locked; ++ } ++ + __bch2_path_free(trans, path); +} + @@ -22370,42 +22115,69 @@ index 0000000000000..c6c0e24003869 + printbuf_exit(&buf); +} + ++void bch2_btree_path_to_text(struct printbuf *out, struct btree_path *path) ++{ ++ prt_printf(out, "path: idx %2u ref %u:%u %c %c btree=%s l=%u pos ", ++ path->idx, path->ref, path->intent_ref, ++ path->preserve ? 'P' : ' ', ++ path->should_be_locked ? 'S' : ' ', ++ bch2_btree_ids[path->btree_id], ++ path->level); ++ bch2_bpos_to_text(out, path->pos); ++ ++ prt_printf(out, " locks %u", path->nodes_locked); ++#ifdef CONFIG_BCACHEFS_DEBUG ++ prt_printf(out, " %pS", (void *) path->ip_allocated); ++#endif ++ prt_newline(out); ++} ++ ++void bch2_trans_paths_to_text(struct printbuf *out, struct btree_trans *trans) ++{ ++ struct btree_path *path; ++ unsigned idx; ++ ++ trans_for_each_path_inorder(trans, path, idx) ++ bch2_btree_path_to_text(out, path); ++} ++ +noinline __cold +void bch2_dump_trans_paths_updates(struct btree_trans *trans) +{ -+ struct btree_path *path; + struct printbuf buf = PRINTBUF; -+ unsigned idx; + -+ trans_for_each_path_inorder(trans, path, idx) { -+ printbuf_reset(&buf); -+ -+ bch2_bpos_to_text(&buf, path->pos); -+ -+ printk(KERN_ERR "path: idx %u ref %u:%u%s%s btree=%s l=%u pos %s locks %u %pS\n", -+ path->idx, path->ref, path->intent_ref, -+ path->should_be_locked ? " S" : "", -+ path->preserve ? " P" : "", -+ bch2_btree_ids[path->btree_id], -+ path->level, -+ buf.buf, -+ path->nodes_locked, -+#ifdef CONFIG_BCACHEFS_DEBUG -+ (void *) path->ip_allocated -+#else -+ NULL -+#endif -+ ); -+ } ++ bch2_trans_paths_to_text(&buf, trans); + ++ printk(KERN_ERR "%s", buf.buf); + printbuf_exit(&buf); + + bch2_dump_trans_updates(trans); +} + ++noinline ++static void bch2_trans_update_max_paths(struct btree_trans *trans) ++{ ++ struct btree_transaction_stats *s = btree_trans_stats(trans); ++ struct printbuf buf = PRINTBUF; ++ ++ bch2_trans_paths_to_text(&buf, trans); ++ ++ if (!buf.allocation_failure) { ++ mutex_lock(&s->lock); ++ if (s->nr_max_paths < hweight64(trans->paths_allocated)) { ++ s->nr_max_paths = hweight64(trans->paths_allocated); ++ swap(s->max_paths_text, buf.buf); ++ } ++ mutex_unlock(&s->lock); ++ } ++ ++ printbuf_exit(&buf); ++} ++ +static struct btree_path *btree_path_alloc(struct btree_trans *trans, + struct btree_path *pos) +{ ++ struct btree_transaction_stats *s = btree_trans_stats(trans); + struct btree_path *path; + unsigned idx; + @@ -22418,13 +22190,15 @@ index 0000000000000..c6c0e24003869 + idx = __ffs64(~trans->paths_allocated); + trans->paths_allocated |= 1ULL << idx; + ++ if (s && unlikely(hweight64(trans->paths_allocated) > s->nr_max_paths)) ++ bch2_trans_update_max_paths(trans); ++ + path = &trans->paths[idx]; + + path->idx = idx; + path->ref = 0; + path->intent_ref = 0; + path->nodes_locked = 0; -+ path->nodes_intent_locked = 0; + + btree_path_list_add(trans, pos, path); + return path; @@ -22474,9 +22248,8 @@ index 0000000000000..c6c0e24003869 + path->level = level; + path->locks_want = locks_want; + path->nodes_locked = 0; -+ path->nodes_intent_locked = 0; + for (i = 0; i < ARRAY_SIZE(path->l); i++) -+ path->l[i].b = BTREE_ITER_NO_NODE_INIT; ++ path->l[i].b = ERR_PTR(-BCH_ERR_no_btree_node_init); +#ifdef CONFIG_BCACHEFS_DEBUG + path->ip_allocated = ip; +#endif @@ -22498,10 +22271,8 @@ index 0000000000000..c6c0e24003869 + */ + + locks_want = min(locks_want, BTREE_MAX_DEPTH); -+ if (locks_want > path->locks_want) { -+ path->locks_want = locks_want; -+ btree_path_get_locks(trans, path, true); -+ } ++ if (locks_want > path->locks_want) ++ bch2_btree_path_upgrade_noupgrade_sibs(trans, path, locks_want); + + return path; +} @@ -22511,12 +22282,13 @@ index 0000000000000..c6c0e24003869 + + struct bkey_s_c k; + ++ EBUG_ON(path->uptodate != BTREE_ITER_UPTODATE); ++ EBUG_ON(!btree_node_locked(path, path->level)); ++ + if (!path->cached) { + struct btree_path_level *l = path_l(path); + struct bkey_packed *_k; + -+ EBUG_ON(path->uptodate != BTREE_ITER_UPTODATE); -+ + _k = bch2_btree_node_iter_peek_all(&l->iter, l->b); + k = _k ? bkey_disassemble(l->b, _k, u) : bkey_s_c_null; + @@ -22530,12 +22302,7 @@ index 0000000000000..c6c0e24003869 + EBUG_ON(ck && + (path->btree_id != ck->key.btree_id || + bkey_cmp(path->pos, ck->key.pos))); -+ -+ /* BTREE_ITER_CACHED_NOFILL|BTREE_ITER_CACHED_NOCREATE? */ -+ if (unlikely(!ck || !ck->valid)) -+ return bkey_s_c_null; -+ -+ EBUG_ON(path->uptodate != BTREE_ITER_UPTODATE); ++ EBUG_ON(!ck || !ck->valid); + + *u = ck->k->k; + k = bkey_i_to_s_c(ck->k); @@ -22570,7 +22337,7 @@ index 0000000000000..c6c0e24003869 + if (ret) + return ret; + -+ iter->path->should_be_locked = true; ++ btree_path_set_should_be_locked(iter->path); + return 0; +} + @@ -22601,8 +22368,7 @@ index 0000000000000..c6c0e24003869 + iter->path = bch2_btree_path_set_pos(trans, iter->path, b->key.k.p, + iter->flags & BTREE_ITER_INTENT, + btree_iter_ip_allocated(iter)); -+ iter->path->should_be_locked = true; -+ BUG_ON(iter->path->uptodate); ++ btree_path_set_should_be_locked(iter->path); +out: + bch2_btree_iter_verify_entry_exit(iter); + bch2_btree_iter_verify(iter); @@ -22636,11 +22402,10 @@ index 0000000000000..c6c0e24003869 + + if (!bch2_btree_node_relock(trans, path, path->level + 1)) { + __bch2_btree_path_unlock(trans, path); -+ path->l[path->level].b = BTREE_ITER_NO_NODE_GET_LOCKS; -+ path->l[path->level + 1].b = BTREE_ITER_NO_NODE_GET_LOCKS; ++ path->l[path->level].b = ERR_PTR(-BCH_ERR_no_btree_node_relock); ++ path->l[path->level + 1].b = ERR_PTR(-BCH_ERR_no_btree_node_relock); + btree_path_set_dirty(path, BTREE_ITER_NEED_TRAVERSE); -+ trace_trans_restart_relock_next_node(trans->fn, _THIS_IP_, -+ path->btree_id, &path->pos); ++ trace_and_count(trans->c, trans_restart_relock_next_node, trans, _THIS_IP_, path); + ret = btree_trans_restart(trans, BCH_ERR_transaction_restart_relock); + goto err; + } @@ -22648,9 +22413,7 @@ index 0000000000000..c6c0e24003869 + b = btree_path_node(path, path->level + 1); + + if (!bpos_cmp(iter->pos, b->key.k.p)) { -+ btree_node_unlock(trans, path, path->level); -+ path->l[path->level].b = BTREE_ITER_NO_NODE_UP; -+ path->level++; ++ __btree_path_set_level_up(trans, path, path->level++); + } else { + /* + * Haven't gotten to the end of the parent node: go back down to @@ -22676,7 +22439,7 @@ index 0000000000000..c6c0e24003869 + iter->path = bch2_btree_path_set_pos(trans, iter->path, b->key.k.p, + iter->flags & BTREE_ITER_INTENT, + btree_iter_ip_allocated(iter)); -+ iter->path->should_be_locked = true; ++ btree_path_set_should_be_locked(iter->path); + BUG_ON(iter->path->uptodate); +out: + bch2_btree_iter_verify_entry_exit(iter); @@ -22794,7 +22557,7 @@ index 0000000000000..c6c0e24003869 + * bkey_s_c_null: + */ +static noinline -+struct bkey_s_c btree_trans_peek_key_cache(struct btree_iter *iter, struct bpos pos) ++struct bkey_s_c __btree_trans_peek_key_cache(struct btree_iter *iter, struct bpos pos) +{ + struct btree_trans *trans = iter->trans; + struct bch_fs *c = trans->c; @@ -22818,11 +22581,20 @@ index 0000000000000..c6c0e24003869 + if (unlikely(ret)) + return bkey_s_c_err(ret); + -+ iter->key_cache_path->should_be_locked = true; ++ btree_path_set_should_be_locked(iter->key_cache_path); + + return bch2_btree_path_peek_slot(iter->key_cache_path, &u); +} + ++static noinline ++struct bkey_s_c btree_trans_peek_key_cache(struct btree_iter *iter, struct bpos pos) ++{ ++ struct bkey_s_c ret = __btree_trans_peek_key_cache(iter, pos); ++ int err = bkey_err(ret) ?: bch2_btree_path_relock(iter->trans, iter->path, _THIS_IP_); ++ ++ return err ? bkey_s_c_err(err) : ret; ++} ++ +static struct bkey_s_c __bch2_btree_iter_peek(struct btree_iter *iter, struct bpos search_key) +{ + struct btree_trans *trans = iter->trans; @@ -22846,22 +22618,19 @@ index 0000000000000..c6c0e24003869 + goto out; + } + -+ iter->path->should_be_locked = true; ++ btree_path_set_should_be_locked(iter->path); + + k = btree_path_level_peek_all(trans->c, &iter->path->l[0], &iter->k); + + if (unlikely(iter->flags & BTREE_ITER_WITH_KEY_CACHE) && + k.k && + (k2 = btree_trans_peek_key_cache(iter, k.k->p)).k) { -+ ret = bkey_err(k2); ++ k = k2; ++ ret = bkey_err(k); + if (ret) { -+ k = k2; + bch2_btree_iter_set_pos(iter, iter->pos); + goto out; + } -+ -+ k = k2; -+ iter->k = *k.k; + } + + if (unlikely(iter->flags & BTREE_ITER_WITH_JOURNAL)) @@ -22934,7 +22703,7 @@ index 0000000000000..c6c0e24003869 + while (1) { + k = __bch2_btree_iter_peek(iter, search_key); + if (!k.k || bkey_err(k)) -+ goto out; ++ goto out_no_locked; + + /* + * iter->pos should be mononotically increasing, and always be @@ -22951,7 +22720,7 @@ index 0000000000000..c6c0e24003869 + if (bkey_cmp(iter_pos, end) > 0) { + bch2_btree_iter_set_pos(iter, end); + k = bkey_s_c_null; -+ goto out; ++ goto out_no_locked; + } + + if (iter->update_path && @@ -23013,18 +22782,16 @@ index 0000000000000..c6c0e24003869 + iter->path = bch2_btree_path_set_pos(trans, iter->path, k.k->p, + iter->flags & BTREE_ITER_INTENT, + btree_iter_ip_allocated(iter)); -+ BUG_ON(!iter->path->nodes_locked); -+out: ++ ++ btree_path_set_should_be_locked(iter->path); ++out_no_locked: + if (iter->update_path) { + if (iter->update_path->uptodate && -+ (ret = bch2_btree_path_relock(trans, iter->update_path, _THIS_IP_))) { ++ (ret = bch2_btree_path_relock(trans, iter->update_path, _THIS_IP_))) + k = bkey_s_c_err(ret); -+ } else { -+ BUG_ON(!(iter->update_path->nodes_locked & 1)); -+ iter->update_path->should_be_locked = true; -+ } ++ else ++ btree_path_set_should_be_locked(iter->update_path); + } -+ iter->path->should_be_locked = true; + + if (!(iter->flags & BTREE_ITER_ALL_SNAPSHOTS)) + iter->pos.snapshot = iter->snapshot; @@ -23068,13 +22835,13 @@ index 0000000000000..c6c0e24003869 + /* ensure that iter->k is consistent with iter->pos: */ + bch2_btree_iter_set_pos(iter, iter->pos); + k = bkey_s_c_err(ret); -+ goto out; ++ goto out_no_locked; + } + + /* Already at end? */ + if (!btree_path_node(iter->path, iter->path->level)) { + k = bkey_s_c_null; -+ goto out; ++ goto out_no_locked; + } + + k = btree_path_level_peek_all(trans->c, @@ -23127,8 +22894,8 @@ index 0000000000000..c6c0e24003869 + } + + iter->pos = k.k->p; -+out: -+ iter->path->should_be_locked = true; ++ btree_path_set_should_be_locked(iter->path); ++out_no_locked: + bch2_btree_iter_verify(iter); + + return k; @@ -23182,7 +22949,7 @@ index 0000000000000..c6c0e24003869 + /* ensure that iter->k is consistent with iter->pos: */ + bch2_btree_iter_set_pos(iter, iter->pos); + k = bkey_s_c_err(ret); -+ goto out; ++ goto out_no_locked; + } + + k = btree_path_level_peek(trans, iter->path, @@ -23248,7 +23015,7 @@ index 0000000000000..c6c0e24003869 + /* Start of btree: */ + bch2_btree_iter_set_pos(iter, POS_MIN); + k = bkey_s_c_null; -+ goto out; ++ goto out_no_locked; + } + } + @@ -23260,10 +23027,11 @@ index 0000000000000..c6c0e24003869 + + if (iter->flags & BTREE_ITER_FILTER_SNAPSHOTS) + iter->pos.snapshot = iter->snapshot; -+out: ++ ++ btree_path_set_should_be_locked(iter->path); ++out_no_locked: + if (saved_path) + bch2_path_put(trans, saved_path, iter->flags & BTREE_ITER_INTENT); -+ iter->path->should_be_locked = true; + + bch2_btree_iter_verify_entry_exit(iter); + bch2_btree_iter_verify(iter); @@ -23310,8 +23078,10 @@ index 0000000000000..c6c0e24003869 + btree_iter_ip_allocated(iter)); + + ret = bch2_btree_path_traverse(trans, iter->path, iter->flags); -+ if (unlikely(ret)) -+ return bkey_s_c_err(ret); ++ if (unlikely(ret)) { ++ k = bkey_s_c_err(ret); ++ goto out_no_locked; ++ } + + if ((iter->flags & BTREE_ITER_CACHED) || + !(iter->flags & (BTREE_ITER_IS_EXTENTS|BTREE_ITER_FILTER_SNAPSHOTS))) { @@ -23335,10 +23105,11 @@ index 0000000000000..c6c0e24003869 + } + + if (unlikely(iter->flags & BTREE_ITER_WITH_KEY_CACHE) && -+ (k = btree_trans_peek_key_cache(iter, iter->pos)).k) { ++ (k = __btree_trans_peek_key_cache(iter, iter->pos)).k) { + if (!bkey_err(k)) + iter->k = *k.k; -+ goto out; ++ /* We're not returning a key from iter->path: */ ++ goto out_no_locked; + } + + k = bch2_btree_path_peek_slot(iter->path, &iter->k); @@ -23366,11 +23137,14 @@ index 0000000000000..c6c0e24003869 + struct bpos pos = iter->pos; + + k = bch2_btree_iter_peek(iter); -+ iter->pos = pos; ++ if (unlikely(bkey_err(k))) ++ bch2_btree_iter_set_pos(iter, pos); ++ else ++ iter->pos = pos; + } + + if (unlikely(bkey_err(k))) -+ return k; ++ goto out_no_locked; + + next = k.k ? bkey_start_pos(k.k) : POS_MAX; + @@ -23392,8 +23166,8 @@ index 0000000000000..c6c0e24003869 + } + } +out: -+ iter->path->should_be_locked = true; -+ ++ btree_path_set_should_be_locked(iter->path); ++out_no_locked: + bch2_btree_iter_verify_entry_exit(iter); + bch2_btree_iter_verify(iter); + ret = bch2_btree_iter_verify_ret(iter, k); @@ -23650,9 +23424,11 @@ index 0000000000000..c6c0e24003869 + +void *bch2_trans_kmalloc(struct btree_trans *trans, size_t size) +{ -+ size_t new_top = trans->mem_top + size; ++ unsigned new_top = trans->mem_top + size; + void *p; + ++ trans->mem_max = max(trans->mem_max, new_top); ++ + if (new_top > trans->mem_bytes) { + size_t old_bytes = trans->mem_bytes; + size_t new_bytes = roundup_pow_of_two(new_top); @@ -23674,7 +23450,7 @@ index 0000000000000..c6c0e24003869 + trans->mem_bytes = new_bytes; + + if (old_bytes) { -+ trace_trans_restart_mem_realloced(trans->fn, _RET_IP_, new_bytes); ++ trace_and_count(trans->c, trans_restart_mem_realloced, trans, _RET_IP_, new_bytes); + return ERR_PTR(btree_trans_restart(trans, BCH_ERR_transaction_restart_mem_realloced)); + } + } @@ -23699,6 +23475,7 @@ index 0000000000000..c6c0e24003869 + + bch2_trans_reset_updates(trans); + ++ trans->restart_count++; + trans->mem_top = 0; + + if (trans->fs_usage_deltas) { @@ -23749,10 +23526,10 @@ index 0000000000000..c6c0e24003869 + +void bch2_trans_verify_not_restarted(struct btree_trans *trans, u32 restart_count) +{ -+ bch2_trans_inconsistent_on(trans_was_restarted(trans, restart_count), trans, -+ "trans->restart_count %u, should be %u, last restarted by %ps\n", -+ trans->restart_count, restart_count, -+ (void *) trans->last_restarted_ip); ++ if (trans_was_restarted(trans, restart_count)) ++ panic("trans->restart_count %u, should be %u, last restarted by %pS\n", ++ trans->restart_count, restart_count, ++ (void *) trans->last_restarted_ip); +} + +static void bch2_trans_alloc_paths(struct btree_trans *trans, struct bch_fs *c) @@ -23773,12 +23550,26 @@ index 0000000000000..c6c0e24003869 + trans->updates = p; p += updates_bytes; +} + -+void __bch2_trans_init(struct btree_trans *trans, struct bch_fs *c, -+ unsigned expected_nr_iters, -+ size_t expected_mem_bytes, -+ const char *fn) ++static inline unsigned bch2_trans_get_fn_idx(struct btree_trans *trans, struct bch_fs *c, ++ const char *fn) ++{ ++ unsigned i; ++ ++ for (i = 0; i < ARRAY_SIZE(c->btree_transaction_fns); i++) ++ if (!c->btree_transaction_fns[i] || ++ c->btree_transaction_fns[i] == fn) { ++ c->btree_transaction_fns[i] = fn; ++ return i; ++ } ++ ++ pr_warn_once("BCH_TRANSACTIONS_NR not big enough!"); ++ return i; ++} ++ ++void __bch2_trans_init(struct btree_trans *trans, struct bch_fs *c, const char *fn) + __acquires(&c->btree_trans_barrier) +{ ++ struct btree_transaction_stats *s; + struct btree_trans *pos; + + BUG_ON(lock_class_is_held(&bch2_btree_node_lock_key)); @@ -23788,19 +23579,14 @@ index 0000000000000..c6c0e24003869 + trans->fn = fn; + trans->last_begin_time = ktime_get_ns(); + trans->task = current; -+ -+ while (c->lock_held_stats.names[trans->lock_name_idx] != fn -+ && c->lock_held_stats.names[trans->lock_name_idx] != 0) -+ trans->lock_name_idx++; -+ -+ if (trans->lock_name_idx >= BCH_LOCK_TIME_NR) -+ pr_warn_once("lock_times array not big enough!"); -+ else -+ c->lock_held_stats.names[trans->lock_name_idx] = fn; ++ trans->fn_idx = bch2_trans_get_fn_idx(trans, c, fn); + + bch2_trans_alloc_paths(trans, c); + -+ if (expected_mem_bytes) { ++ s = btree_trans_stats(trans); ++ if (s) { ++ unsigned expected_mem_bytes = s->max_mem; ++ + trans->mem_bytes = roundup_pow_of_two(expected_mem_bytes); + trans->mem = kmalloc(trans->mem_bytes, GFP_KERNEL|__GFP_NOFAIL); + @@ -23851,9 +23637,13 @@ index 0000000000000..c6c0e24003869 +{ + struct btree_insert_entry *i; + struct bch_fs *c = trans->c; ++ struct btree_transaction_stats *s = btree_trans_stats(trans); + + bch2_trans_unlock(trans); + ++ if (s) ++ s->max_mem = max(s->max_mem, trans->mem_max); ++ + trans_for_each_update(trans, i) + __btree_path_put(i->path, true); + trans->nr_updates = 0; @@ -23900,12 +23690,23 @@ index 0000000000000..c6c0e24003869 + +static void __maybe_unused +bch2_btree_path_node_to_text(struct printbuf *out, -+ struct btree_bkey_cached_common *b, -+ bool cached) ++ struct btree_bkey_cached_common *b) +{ ++ struct six_lock_count c = six_lock_counts(&b->lock); ++ struct task_struct *owner; ++ pid_t pid; ++ ++ rcu_read_lock(); ++ owner = READ_ONCE(b->lock.owner); ++ pid = owner ? owner->pid : 0;; ++ rcu_read_unlock(); ++ + prt_printf(out, " l=%u %s:", + b->level, bch2_btree_ids[b->btree_id]); -+ bch2_bpos_to_text(out, btree_node_pos(b, cached)); ++ bch2_bpos_to_text(out, btree_node_pos(b)); ++ ++ prt_printf(out, " locks %u:%u:%u held by pid %u", ++ c.n[0], c.n[1], c.n[2], pid); +} + +void bch2_btree_trans_to_text(struct printbuf *out, struct btree_trans *trans) @@ -23931,10 +23732,10 @@ index 0000000000000..c6c0e24003869 + + for (l = 0; l < BTREE_MAX_DEPTH; l++) { + if (btree_node_locked(path, l) && -+ (unsigned long) (b = (void *) READ_ONCE(path->l[l].b)) >= 128) { -+ prt_printf(out, " %s l=%u ", -+ btree_node_intent_locked(path, l) ? "i" : "r", l); -+ bch2_btree_path_node_to_text(out, b, path->cached); ++ !IS_ERR_OR_NULL(b = (void *) READ_ONCE(path->l[l].b))) { ++ prt_printf(out, " %c l=%u ", ++ lock_types[btree_node_locked_type(path, l)], l); ++ bch2_btree_path_node_to_text(out, b); + prt_printf(out, "\n"); + } + } @@ -23952,7 +23753,7 @@ index 0000000000000..c6c0e24003869 + bch2_bpos_to_text(out, trans->locking_pos); + + prt_printf(out, " node "); -+ bch2_btree_path_node_to_text(out, b, path->cached); ++ bch2_btree_path_node_to_text(out, b); + prt_printf(out, "\n"); + } +} @@ -23967,9 +23768,12 @@ index 0000000000000..c6c0e24003869 + +int bch2_fs_btree_iter_init(struct bch_fs *c) +{ -+ unsigned nr = BTREE_ITER_MAX; ++ unsigned i, nr = BTREE_ITER_MAX; + int ret; + ++ for (i = 0; i < ARRAY_SIZE(c->btree_transaction_stats); i++) ++ mutex_init(&c->btree_transaction_stats[i].lock); ++ + INIT_LIST_HEAD(&c->btree_trans_list); + mutex_init(&c->btree_trans_lock); + @@ -23985,10 +23789,10 @@ index 0000000000000..c6c0e24003869 +} diff --git a/fs/bcachefs/btree_iter.h b/fs/bcachefs/btree_iter.h new file mode 100644 -index 0000000000000..6041bbfbf6d19 +index 000000000000..7b47b8802413 --- /dev/null +++ b/fs/bcachefs/btree_iter.h -@@ -0,0 +1,550 @@ +@@ -0,0 +1,541 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _BCACHEFS_BTREE_ITER_H +#define _BCACHEFS_BTREE_ITER_H @@ -24136,12 +23940,10 @@ index 0000000000000..6041bbfbf6d19 + +#ifdef CONFIG_BCACHEFS_DEBUG +void bch2_trans_verify_paths(struct btree_trans *); -+void bch2_trans_verify_locks(struct btree_trans *); +void bch2_assert_pos_locked(struct btree_trans *, enum btree_id, + struct bpos, bool); +#else +static inline void bch2_trans_verify_paths(struct btree_trans *trans) {} -+static inline void bch2_trans_verify_locks(struct btree_trans *trans) {} +static inline void bch2_assert_pos_locked(struct btree_trans *trans, enum btree_id id, + struct bpos pos, bool key_cache) {} +#endif @@ -24173,7 +23975,6 @@ index 0000000000000..6041bbfbf6d19 + BUG_ON(!bch2_err_matches(err, BCH_ERR_transaction_restart)); + + trans->restarted = err; -+ trans->restart_count++; + return -err; +} + @@ -24187,20 +23988,6 @@ index 0000000000000..6041bbfbf6d19 +bool bch2_btree_node_upgrade(struct btree_trans *, + struct btree_path *, unsigned); + -+bool __bch2_btree_path_upgrade(struct btree_trans *, -+ struct btree_path *, unsigned); -+ -+static inline bool bch2_btree_path_upgrade(struct btree_trans *trans, -+ struct btree_path *path, -+ unsigned new_locks_want) -+{ -+ new_locks_want = min(new_locks_want, BTREE_MAX_DEPTH); -+ -+ return path->locks_want < new_locks_want -+ ? __bch2_btree_path_upgrade(trans, path, new_locks_want) -+ : path->uptodate == BTREE_ITER_UPTODATE; -+} -+ +void __bch2_btree_path_downgrade(struct btree_trans *, struct btree_path *, unsigned); + +static inline void bch2_btree_path_downgrade(struct btree_trans *trans, @@ -24290,7 +24077,8 @@ index 0000000000000..6041bbfbf6d19 + +static inline void set_btree_iter_dontneed(struct btree_iter *iter) +{ -+ iter->path->preserve = false; ++ if (!iter->trans->restarted) ++ iter->path->preserve = false; +} + +void *bch2_trans_kmalloc(struct btree_trans *, size_t); @@ -24358,8 +24146,8 @@ index 0000000000000..6041bbfbf6d19 + +static inline int btree_trans_too_many_iters(struct btree_trans *trans) +{ -+ if (hweight64(trans->paths_allocated) > BTREE_ITER_MAX) { -+ trace_trans_restart_too_many_iters(trans->fn, _THIS_IP_); ++ if (hweight64(trans->paths_allocated) > BTREE_ITER_MAX - 8) { ++ trace_and_count(trans->c, trans_restart_too_many_iters, trans, _THIS_IP_); + return btree_trans_restart(trans, BCH_ERR_transaction_restart_too_many_iters); + } + @@ -24382,13 +24170,17 @@ index 0000000000000..6041bbfbf6d19 + +#define lockrestart_do(_trans, _do) \ +({ \ ++ u32 _restart_count; \ + int _ret; \ + \ + do { \ -+ bch2_trans_begin(_trans); \ ++ _restart_count = bch2_trans_begin(_trans); \ + _ret = (_do); \ + } while (bch2_err_matches(_ret, BCH_ERR_transaction_restart)); \ + \ ++ if (!_ret) \ ++ bch2_trans_verify_not_restarted(_trans, _restart_count);\ ++ \ + _ret; \ +}) + @@ -24429,7 +24221,7 @@ index 0000000000000..6041bbfbf6d19 + (_start), (_flags)); \ + \ + while (1) { \ -+ bch2_trans_begin(_trans); \ ++ u32 _restart_count = bch2_trans_begin(_trans); \ + (_k) = bch2_btree_iter_peek_type(&(_iter), (_flags)); \ + if (!(_k).k) { \ + _ret = 0; \ @@ -24441,6 +24233,7 @@ index 0000000000000..6041bbfbf6d19 + continue; \ + if (_ret) \ + break; \ ++ bch2_trans_verify_not_restarted(_trans, _restart_count);\ + if (!bch2_btree_iter_advance(&(_iter))) \ + break; \ + } \ @@ -24458,7 +24251,7 @@ index 0000000000000..6041bbfbf6d19 + (_start), (_flags)); \ + \ + while (1) { \ -+ bch2_trans_begin(_trans); \ ++ u32 _restart_count = bch2_trans_begin(_trans); \ + (_k) = bch2_btree_iter_peek_prev_type(&(_iter), (_flags));\ + if (!(_k).k) { \ + _ret = 0; \ @@ -24470,6 +24263,7 @@ index 0000000000000..6041bbfbf6d19 + continue; \ + if (_ret) \ + break; \ ++ bch2_trans_verify_not_restarted(_trans, _restart_count);\ + if (!bch2_btree_iter_rewind(&(_iter))) \ + break; \ + } \ @@ -24525,13 +24319,14 @@ index 0000000000000..6041bbfbf6d19 +/* new multiple iterator interface: */ + +void bch2_trans_updates_to_text(struct printbuf *, struct btree_trans *); ++void bch2_btree_path_to_text(struct printbuf *, struct btree_path *); ++void bch2_trans_paths_to_text(struct printbuf *, struct btree_trans *); +void bch2_dump_trans_updates(struct btree_trans *); +void bch2_dump_trans_paths_updates(struct btree_trans *); -+void __bch2_trans_init(struct btree_trans *, struct bch_fs *, -+ unsigned, size_t, const char *); ++void __bch2_trans_init(struct btree_trans *, struct bch_fs *, const char *); +void bch2_trans_exit(struct btree_trans *); + -+#define bch2_trans_init(...) __bch2_trans_init(__VA_ARGS__, __func__) ++#define bch2_trans_init(_trans, _c, _nr_iters, _mem) __bch2_trans_init(_trans, _c, __func__) + +void bch2_btree_trans_to_text(struct printbuf *, struct btree_trans *); + @@ -24541,10 +24336,10 @@ index 0000000000000..6041bbfbf6d19 +#endif /* _BCACHEFS_BTREE_ITER_H */ diff --git a/fs/bcachefs/btree_key_cache.c b/fs/bcachefs/btree_key_cache.c new file mode 100644 -index 0000000000000..b36df9a718bd1 +index 000000000000..d900ff42437e --- /dev/null +++ b/fs/bcachefs/btree_key_cache.c -@@ -0,0 +1,855 @@ +@@ -0,0 +1,943 @@ + +#include "bcachefs.h" +#include "btree_cache.h" @@ -24560,6 +24355,11 @@ index 0000000000000..b36df9a718bd1 +#include +#include + ++static inline bool btree_uses_pcpu_readers(enum btree_id id) ++{ ++ return id == BTREE_ID_subvolumes; ++} ++ +static struct kmem_cache *bch2_key_cache; + +static int bch2_btree_key_cache_cmp_fn(struct rhashtable_compare_arg *arg, @@ -24631,7 +24431,10 @@ index 0000000000000..b36df9a718bd1 + ck->btree_trans_barrier_seq = + start_poll_synchronize_srcu(&c->btree_trans_barrier); + -+ list_move_tail(&ck->list, &bc->freed); ++ if (ck->c.lock.readers) ++ list_move_tail(&ck->list, &bc->freed_pcpu); ++ else ++ list_move_tail(&ck->list, &bc->freed_nonpcpu); + atomic_long_inc(&bc->nr_freed); + + kfree(ck->k); @@ -24642,15 +24445,51 @@ index 0000000000000..b36df9a718bd1 + six_unlock_intent(&ck->c.lock); +} + -+static void bkey_cached_free_fast(struct btree_key_cache *bc, -+ struct bkey_cached *ck) ++static void bkey_cached_move_to_freelist(struct btree_key_cache *bc, ++ struct bkey_cached *ck) +{ -+ struct bch_fs *c = container_of(bc, struct bch_fs, btree_key_cache); + struct btree_key_cache_freelist *f; + bool freed = false; + + BUG_ON(test_bit(BKEY_CACHED_DIRTY, &ck->flags)); + ++ if (!ck->c.lock.readers) { ++ preempt_disable(); ++ f = this_cpu_ptr(bc->pcpu_freed); ++ ++ if (f->nr < ARRAY_SIZE(f->objs)) { ++ f->objs[f->nr++] = ck; ++ freed = true; ++ } ++ preempt_enable(); ++ ++ if (!freed) { ++ mutex_lock(&bc->lock); ++ preempt_disable(); ++ f = this_cpu_ptr(bc->pcpu_freed); ++ ++ while (f->nr > ARRAY_SIZE(f->objs) / 2) { ++ struct bkey_cached *ck2 = f->objs[--f->nr]; ++ ++ list_move_tail(&ck2->list, &bc->freed_nonpcpu); ++ } ++ preempt_enable(); ++ ++ list_move_tail(&ck->list, &bc->freed_nonpcpu); ++ mutex_unlock(&bc->lock); ++ } ++ } else { ++ mutex_lock(&bc->lock); ++ list_move_tail(&ck->list, &bc->freed_pcpu); ++ mutex_unlock(&bc->lock); ++ } ++} ++ ++static void bkey_cached_free_fast(struct btree_key_cache *bc, ++ struct bkey_cached *ck) ++{ ++ struct bch_fs *c = container_of(bc, struct bch_fs, btree_key_cache); ++ + ck->btree_trans_barrier_seq = + start_poll_synchronize_srcu(&c->btree_trans_barrier); + @@ -24661,74 +24500,84 @@ index 0000000000000..b36df9a718bd1 + ck->k = NULL; + ck->u64s = 0; + -+ preempt_disable(); -+ f = this_cpu_ptr(bc->pcpu_freed); -+ -+ if (f->nr < ARRAY_SIZE(f->objs)) { -+ f->objs[f->nr++] = ck; -+ freed = true; -+ } -+ preempt_enable(); -+ -+ if (!freed) { -+ mutex_lock(&bc->lock); -+ preempt_disable(); -+ f = this_cpu_ptr(bc->pcpu_freed); -+ -+ while (f->nr > ARRAY_SIZE(f->objs) / 2) { -+ struct bkey_cached *ck2 = f->objs[--f->nr]; -+ -+ list_move_tail(&ck2->list, &bc->freed); -+ } -+ preempt_enable(); -+ -+ list_move_tail(&ck->list, &bc->freed); -+ mutex_unlock(&bc->lock); -+ } ++ bkey_cached_move_to_freelist(bc, ck); + + six_unlock_write(&ck->c.lock); + six_unlock_intent(&ck->c.lock); +} + +static struct bkey_cached * -+bkey_cached_alloc(struct btree_key_cache *c) ++bkey_cached_alloc(struct btree_trans *trans, struct btree_path *path) +{ ++ struct bch_fs *c = trans->c; ++ struct btree_key_cache *bc = &c->btree_key_cache; + struct bkey_cached *ck = NULL; + struct btree_key_cache_freelist *f; ++ bool pcpu_readers = btree_uses_pcpu_readers(path->btree_id); + -+ preempt_disable(); -+ f = this_cpu_ptr(c->pcpu_freed); -+ if (f->nr) -+ ck = f->objs[--f->nr]; -+ preempt_enable(); -+ -+ if (!ck) { -+ mutex_lock(&c->lock); ++ if (!pcpu_readers) { + preempt_disable(); -+ f = this_cpu_ptr(c->pcpu_freed); -+ -+ while (!list_empty(&c->freed) && -+ f->nr < ARRAY_SIZE(f->objs) / 2) { -+ ck = list_last_entry(&c->freed, struct bkey_cached, list); -+ list_del_init(&ck->list); -+ f->objs[f->nr++] = ck; -+ } -+ -+ ck = f->nr ? f->objs[--f->nr] : NULL; ++ f = this_cpu_ptr(bc->pcpu_freed); ++ if (f->nr) ++ ck = f->objs[--f->nr]; + preempt_enable(); -+ mutex_unlock(&c->lock); ++ ++ if (!ck) { ++ mutex_lock(&bc->lock); ++ preempt_disable(); ++ f = this_cpu_ptr(bc->pcpu_freed); ++ ++ while (!list_empty(&bc->freed_nonpcpu) && ++ f->nr < ARRAY_SIZE(f->objs) / 2) { ++ ck = list_last_entry(&bc->freed_nonpcpu, struct bkey_cached, list); ++ list_del_init(&ck->list); ++ f->objs[f->nr++] = ck; ++ } ++ ++ ck = f->nr ? f->objs[--f->nr] : NULL; ++ preempt_enable(); ++ mutex_unlock(&bc->lock); ++ } ++ } else { ++ mutex_lock(&bc->lock); ++ if (!list_empty(&bc->freed_pcpu)) { ++ ck = list_last_entry(&bc->freed_pcpu, struct bkey_cached, list); ++ list_del_init(&ck->list); ++ } ++ mutex_unlock(&bc->lock); + } + + if (ck) { -+ six_lock_intent(&ck->c.lock, NULL, NULL); -+ six_lock_write(&ck->c.lock, NULL, NULL); ++ int ret; ++ ++ ret = btree_node_lock_nopath(trans, &ck->c, SIX_LOCK_intent); ++ if (unlikely(ret)) { ++ bkey_cached_move_to_freelist(bc, ck); ++ return ERR_PTR(ret); ++ } ++ ++ path->l[0].b = (void *) ck; ++ path->l[0].lock_seq = ck->c.lock.state.seq; ++ mark_btree_node_locked(trans, path, 0, SIX_LOCK_intent); ++ ++ ret = bch2_btree_node_lock_write(trans, path, &ck->c); ++ if (unlikely(ret)) { ++ btree_node_unlock(trans, path, 0); ++ bkey_cached_move_to_freelist(bc, ck); ++ return ERR_PTR(ret); ++ } ++ + return ck; + } + + ck = kmem_cache_alloc(bch2_key_cache, GFP_NOFS|__GFP_ZERO); + if (likely(ck)) { + INIT_LIST_HEAD(&ck->list); -+ six_lock_init(&ck->c.lock); ++ __six_lock_init(&ck->c.lock, "b->c.lock", &bch2_btree_node_lock_key); ++ if (pcpu_readers) ++ six_lock_pcpu_alloc(&ck->c.lock); ++ ++ ck->c.cached = true; + BUG_ON(!six_trylock_intent(&ck->c.lock)); + BUG_ON(!six_trylock_write(&ck->c.lock)); + return ck; @@ -24762,36 +24611,36 @@ index 0000000000000..b36df9a718bd1 +} + +static struct bkey_cached * -+btree_key_cache_create(struct bch_fs *c, -+ enum btree_id btree_id, -+ struct bpos pos) ++btree_key_cache_create(struct btree_trans *trans, struct btree_path *path) +{ ++ struct bch_fs *c = trans->c; + struct btree_key_cache *bc = &c->btree_key_cache; + struct bkey_cached *ck; + bool was_new = true; + -+ ck = bkey_cached_alloc(bc); ++ ck = bkey_cached_alloc(trans, path); ++ if (unlikely(IS_ERR(ck))) ++ return ck; + + if (unlikely(!ck)) { + ck = bkey_cached_reuse(bc); + if (unlikely(!ck)) { + bch_err(c, "error allocating memory for key cache item, btree %s", -+ bch2_btree_ids[btree_id]); ++ bch2_btree_ids[path->btree_id]); + return ERR_PTR(-ENOMEM); + } + ++ mark_btree_node_locked(trans, path, 0, SIX_LOCK_intent); + was_new = false; + } else { -+ if (btree_id == BTREE_ID_subvolumes) ++ if (path->btree_id == BTREE_ID_subvolumes) + six_lock_pcpu_alloc(&ck->c.lock); -+ else -+ six_lock_pcpu_free(&ck->c.lock); + } + + ck->c.level = 0; -+ ck->c.btree_id = btree_id; -+ ck->key.btree_id = btree_id; -+ ck->key.pos = pos; ++ ck->c.btree_id = path->btree_id; ++ ck->key.btree_id = path->btree_id; ++ ck->key.pos = path->pos; + ck->valid = false; + ck->flags = 1U << BKEY_CACHED_ACCESSED; + @@ -24803,6 +24652,7 @@ index 0000000000000..b36df9a718bd1 + if (likely(was_new)) { + six_unlock_write(&ck->c.lock); + six_unlock_intent(&ck->c.lock); ++ mark_btree_node_locked(trans, path, 0, BTREE_NODE_UNLOCKED); + kfree(ck); + } else { + bkey_cached_free_fast(bc, ck); @@ -24838,8 +24688,7 @@ index 0000000000000..b36df9a718bd1 + k = bch2_btree_path_peek_slot(path, &u); + + if (!bch2_btree_node_relock(trans, ck_path, 0)) { -+ trace_trans_restart_relock_key_cache_fill(trans->fn, -+ _THIS_IP_, ck_path->btree_id, &ck_path->pos); ++ trace_and_count(trans->c, trans_restart_relock_key_cache_fill, trans, _THIS_IP_, ck_path); + ret = btree_trans_restart(trans, BCH_ERR_transaction_restart_key_cache_raced); + goto err; + } @@ -24868,11 +24717,12 @@ index 0000000000000..b36df9a718bd1 + } + } + -+ /* -+ * XXX: not allowed to be holding read locks when we take a write lock, -+ * currently -+ */ -+ bch2_btree_node_lock_write(trans, ck_path, ck_path->l[0].b); ++ ret = bch2_btree_node_lock_write(trans, ck_path, &ck_path->l[0].b->c); ++ if (ret) { ++ kfree(new_k); ++ goto err; ++ } ++ + if (new_k) { + kfree(ck->k); + ck->u64s = new_u64s; @@ -24920,12 +24770,7 @@ index 0000000000000..b36df9a718bd1 +retry: + ck = bch2_btree_key_cache_find(c, path->btree_id, path->pos); + if (!ck) { -+ if (flags & BTREE_ITER_CACHED_NOCREATE) { -+ path->l[0].b = NULL; -+ return 0; -+ } -+ -+ ck = btree_key_cache_create(c, path->btree_id, path->pos); ++ ck = btree_key_cache_create(trans, path); + ret = PTR_ERR_OR_ZERO(ck); + if (ret) + goto err; @@ -24960,10 +24805,14 @@ index 0000000000000..b36df9a718bd1 + path->l[0].lock_seq = ck->c.lock.state.seq; + path->l[0].b = (void *) ck; +fill: -+ if (!ck->valid && !(flags & BTREE_ITER_CACHED_NOFILL)) { ++ if (!ck->valid) { ++ /* ++ * Using the underscore version because we haven't set ++ * path->uptodate yet: ++ */ + if (!path->locks_want && + !__bch2_btree_path_upgrade(trans, path, 1)) { -+ trace_transaction_restart_ip(trans->fn, _THIS_IP_); ++ trace_and_count(trans->c, trans_restart_key_cache_upgrade, trans, _THIS_IP_); + ret = btree_trans_restart(trans, BCH_ERR_transaction_restart_key_cache_upgrade); + goto err; + } @@ -24977,13 +24826,14 @@ index 0000000000000..b36df9a718bd1 + set_bit(BKEY_CACHED_ACCESSED, &ck->flags); + + path->uptodate = BTREE_ITER_UPTODATE; ++ BUG_ON(!ck->valid); + BUG_ON(btree_node_locked_type(path, 0) != btree_lock_want(path, 0)); + + return ret; +err: + if (!bch2_err_matches(ret, BCH_ERR_transaction_restart)) { + btree_node_unlock(trans, path, 0); -+ path->l[0].b = BTREE_ITER_NO_NODE_ERROR; ++ path->l[0].b = ERR_PTR(ret); + } + return ret; +} @@ -25006,8 +24856,6 @@ index 0000000000000..b36df9a718bd1 + BTREE_ITER_ALL_SNAPSHOTS); + bch2_trans_iter_init(trans, &c_iter, key.btree_id, key.pos, + BTREE_ITER_CACHED| -+ BTREE_ITER_CACHED_NOFILL| -+ BTREE_ITER_CACHED_NOCREATE| + BTREE_ITER_INTENT); + b_iter.flags &= ~BTREE_ITER_WITH_KEY_CACHE; + @@ -25068,21 +24916,21 @@ index 0000000000000..b36df9a718bd1 + atomic_long_dec(&c->btree_key_cache.nr_dirty); + } + } else { ++ struct btree_path *path2; +evict: -+ BUG_ON(!btree_node_intent_locked(c_iter.path, 0)); ++ trans_for_each_path(trans, path2) ++ if (path2 != c_iter.path) ++ __bch2_btree_path_unlock(trans, path2); + -+ mark_btree_node_unlocked(c_iter.path, 0); -+ c_iter.path->l[0].b = NULL; -+ -+ six_lock_write(&ck->c.lock, NULL, NULL); ++ bch2_btree_node_lock_write_nofail(trans, c_iter.path, &ck->c); + + if (test_bit(BKEY_CACHED_DIRTY, &ck->flags)) { + clear_bit(BKEY_CACHED_DIRTY, &ck->flags); + atomic_long_dec(&c->btree_key_cache.nr_dirty); + } + ++ mark_btree_node_locked_noreset(c_iter.path, 0, BTREE_NODE_UNLOCKED); + bkey_cached_evict(&c->btree_key_cache, ck); -+ + bkey_cached_free_fast(&c->btree_key_cache, ck); + } +out: @@ -25098,11 +24946,13 @@ index 0000000000000..b36df9a718bd1 + struct bkey_cached *ck = + container_of(pin, struct bkey_cached, journal); + struct bkey_cached_key key; ++ struct btree_trans trans; ++ int srcu_idx = srcu_read_lock(&c->btree_trans_barrier); + int ret = 0; + -+ int srcu_idx = srcu_read_lock(&c->btree_trans_barrier); ++ bch2_trans_init(&trans, c, 0, 0); + -+ six_lock_read(&ck->c.lock, NULL, NULL); ++ btree_node_lock_nopath_nofail(&trans, &ck->c, SIX_LOCK_read); + key = ck->key; + + if (ck->journal.seq != seq || @@ -25112,12 +24962,13 @@ index 0000000000000..b36df9a718bd1 + } + six_unlock_read(&ck->c.lock); + -+ ret = bch2_trans_do(c, NULL, NULL, 0, ++ ret = commit_do(&trans, NULL, NULL, 0, + btree_key_cache_flush_pos(&trans, key, seq, + BTREE_INSERT_JOURNAL_RECLAIM, false)); +unlock: + srcu_read_unlock(&c->btree_trans_barrier, srcu_idx); + ++ bch2_trans_exit(&trans); + return ret; +} + @@ -25181,11 +25032,22 @@ index 0000000000000..b36df9a718bd1 +void bch2_btree_key_cache_drop(struct btree_trans *trans, + struct btree_path *path) +{ ++ struct bch_fs *c = trans->c; + struct bkey_cached *ck = (void *) path->l[0].b; + -+ ck->valid = false; ++ BUG_ON(!ck->valid); + -+ BUG_ON(test_bit(BKEY_CACHED_DIRTY, &ck->flags)); ++ /* ++ * We just did an update to the btree, bypassing the key cache: the key ++ * cache key is now stale and must be dropped, even if dirty: ++ */ ++ if (test_bit(BKEY_CACHED_DIRTY, &ck->flags)) { ++ clear_bit(BKEY_CACHED_DIRTY, &ck->flags); ++ atomic_long_dec(&c->btree_key_cache.nr_dirty); ++ bch2_journal_pin_drop(&c->journal, &ck->journal); ++ } ++ ++ ck->valid = false; +} + +static unsigned long bch2_btree_key_cache_scan(struct shrinker *shrink, @@ -25213,12 +25075,29 @@ index 0000000000000..b36df9a718bd1 + * Newest freed entries are at the end of the list - once we hit one + * that's too new to be freed, we can bail out: + */ -+ list_for_each_entry_safe(ck, t, &bc->freed, list) { ++ list_for_each_entry_safe(ck, t, &bc->freed_nonpcpu, list) { + if (!poll_state_synchronize_srcu(&c->btree_trans_barrier, + ck->btree_trans_barrier_seq)) + break; + + list_del(&ck->list); ++ six_lock_pcpu_free(&ck->c.lock); ++ kmem_cache_free(bch2_key_cache, ck); ++ atomic_long_dec(&bc->nr_freed); ++ scanned++; ++ freed++; ++ } ++ ++ if (scanned >= nr) ++ goto out; ++ ++ list_for_each_entry_safe(ck, t, &bc->freed_pcpu, list) { ++ if (!poll_state_synchronize_srcu(&c->btree_trans_barrier, ++ ck->btree_trans_barrier_seq)) ++ break; ++ ++ list_del(&ck->list); ++ six_lock_pcpu_free(&ck->c.lock); + kmem_cache_free(bch2_key_cache, ck); + atomic_long_dec(&bc->nr_freed); + scanned++; @@ -25306,7 +25185,7 @@ index 0000000000000..b36df9a718bd1 + for (i = 0; i < tbl->size; i++) + rht_for_each_entry_rcu(ck, pos, tbl, i, hash) { + bkey_cached_evict(bc, ck); -+ list_add(&ck->list, &bc->freed); ++ list_add(&ck->list, &bc->freed_nonpcpu); + } + rcu_read_unlock(); + @@ -25316,11 +25195,13 @@ index 0000000000000..b36df9a718bd1 + + for (i = 0; i < f->nr; i++) { + ck = f->objs[i]; -+ list_add(&ck->list, &bc->freed); ++ list_add(&ck->list, &bc->freed_nonpcpu); + } + } + -+ list_for_each_entry_safe(ck, n, &bc->freed, list) { ++ list_splice(&bc->freed_pcpu, &bc->freed_nonpcpu); ++ ++ list_for_each_entry_safe(ck, n, &bc->freed_nonpcpu, list) { + cond_resched(); + + bch2_journal_pin_drop(&c->journal, &ck->journal); @@ -25328,6 +25209,7 @@ index 0000000000000..b36df9a718bd1 + + list_del(&ck->list); + kfree(ck->k); ++ six_lock_pcpu_free(&ck->c.lock); + kmem_cache_free(bch2_key_cache, ck); + } + @@ -25347,7 +25229,8 @@ index 0000000000000..b36df9a718bd1 +void bch2_fs_btree_key_cache_init_early(struct btree_key_cache *c) +{ + mutex_init(&c->lock); -+ INIT_LIST_HEAD(&c->freed); ++ INIT_LIST_HEAD(&c->freed_pcpu); ++ INIT_LIST_HEAD(&c->freed_nonpcpu); +} + +static void bch2_btree_key_cache_shrinker_to_text(struct printbuf *out, struct shrinker *shrink) @@ -25402,7 +25285,7 @@ index 0000000000000..b36df9a718bd1 +} diff --git a/fs/bcachefs/btree_key_cache.h b/fs/bcachefs/btree_key_cache.h new file mode 100644 -index 0000000000000..670746e72daba +index 000000000000..670746e72dab --- /dev/null +++ b/fs/bcachefs/btree_key_cache.h @@ -0,0 +1,47 @@ @@ -25453,12 +25336,484 @@ index 0000000000000..670746e72daba +int __init bch2_btree_key_cache_init(void); + +#endif /* _BCACHEFS_BTREE_KEY_CACHE_H */ +diff --git a/fs/bcachefs/btree_locking.c b/fs/bcachefs/btree_locking.c +new file mode 100644 +index 000000000000..1cdf7d4f9cc7 +--- /dev/null ++++ b/fs/bcachefs/btree_locking.c +@@ -0,0 +1,466 @@ ++// SPDX-License-Identifier: GPL-2.0 ++ ++#include "bcachefs.h" ++#include "btree_locking.h" ++#include "btree_types.h" ++ ++struct lock_class_key bch2_btree_node_lock_key; ++ ++/* Btree node locking: */ ++ ++static inline void six_lock_readers_add(struct six_lock *lock, int nr) ++{ ++ if (lock->readers) ++ this_cpu_add(*lock->readers, nr); ++ else if (nr > 0) ++ atomic64_add(__SIX_VAL(read_lock, nr), &lock->state.counter); ++ else ++ atomic64_sub(__SIX_VAL(read_lock, -nr), &lock->state.counter); ++} ++ ++struct six_lock_count bch2_btree_node_lock_counts(struct btree_trans *trans, ++ struct btree_path *skip, ++ struct btree_bkey_cached_common *b, ++ unsigned level) ++{ ++ struct btree_path *path; ++ struct six_lock_count ret; ++ ++ memset(&ret, 0, sizeof(ret)); ++ ++ if (IS_ERR_OR_NULL(b)) ++ return ret; ++ ++ trans_for_each_path(trans, path) ++ if (path != skip && &path->l[level].b->c == b) { ++ int t = btree_node_locked_type(path, level); ++ ++ if (t != BTREE_NODE_UNLOCKED) ++ ret.n[t]++; ++ } ++ ++ return ret; ++} ++ ++/* unlock */ ++ ++void bch2_btree_node_unlock_write(struct btree_trans *trans, ++ struct btree_path *path, struct btree *b) ++{ ++ bch2_btree_node_unlock_write_inlined(trans, path, b); ++} ++ ++/* lock */ ++ ++void __bch2_btree_node_lock_write(struct btree_trans *trans, ++ struct btree_bkey_cached_common *b) ++{ ++ int readers = bch2_btree_node_lock_counts(trans, NULL, b, b->level).n[SIX_LOCK_read]; ++ ++ /* ++ * Must drop our read locks before calling six_lock_write() - ++ * six_unlock() won't do wakeups until the reader count ++ * goes to 0, and it's safe because we have the node intent ++ * locked: ++ */ ++ six_lock_readers_add(&b->lock, -readers); ++ btree_node_lock_nopath_nofail(trans, b, SIX_LOCK_write); ++ six_lock_readers_add(&b->lock, readers); ++} ++ ++static inline bool path_has_read_locks(struct btree_path *path) ++{ ++ unsigned l; ++ ++ for (l = 0; l < BTREE_MAX_DEPTH; l++) ++ if (btree_node_read_locked(path, l)) ++ return true; ++ return false; ++} ++ ++/* Slowpath: */ ++int __bch2_btree_node_lock(struct btree_trans *trans, ++ struct btree_path *path, ++ struct btree_bkey_cached_common *b, ++ struct bpos pos, unsigned level, ++ enum six_lock_type type, ++ six_lock_should_sleep_fn should_sleep_fn, void *p, ++ unsigned long ip) ++{ ++ struct btree_path *linked; ++ unsigned reason; ++ ++ /* Check if it's safe to block: */ ++ trans_for_each_path(trans, linked) { ++ if (!linked->nodes_locked) ++ continue; ++ ++ /* ++ * Can't block taking an intent lock if we have _any_ nodes read ++ * locked: ++ * ++ * - Our read lock blocks another thread with an intent lock on ++ * the same node from getting a write lock, and thus from ++ * dropping its intent lock ++ * ++ * - And the other thread may have multiple nodes intent locked: ++ * both the node we want to intent lock, and the node we ++ * already have read locked - deadlock: ++ */ ++ if (type == SIX_LOCK_intent && ++ path_has_read_locks(linked)) { ++ reason = 1; ++ goto deadlock; ++ } ++ ++ if (linked->btree_id != path->btree_id) { ++ if (linked->btree_id < path->btree_id) ++ continue; ++ ++ reason = 3; ++ goto deadlock; ++ } ++ ++ /* ++ * Within the same btree, non-cached paths come before cached ++ * paths: ++ */ ++ if (linked->cached != path->cached) { ++ if (!linked->cached) ++ continue; ++ ++ reason = 4; ++ goto deadlock; ++ } ++ ++ /* ++ * Interior nodes must be locked before their descendants: if ++ * another path has possible descendants locked of the node ++ * we're about to lock, it must have the ancestors locked too: ++ */ ++ if (level > btree_path_highest_level_locked(linked)) { ++ reason = 5; ++ goto deadlock; ++ } ++ ++ /* Must lock btree nodes in key order: */ ++ if (btree_node_locked(linked, level) && ++ bpos_cmp(pos, btree_node_pos(&linked->l[level].b->c)) <= 0) { ++ reason = 7; ++ goto deadlock; ++ } ++ } ++ ++ return btree_node_lock_type(trans, path, b, pos, level, ++ type, should_sleep_fn, p); ++deadlock: ++ trace_and_count(trans->c, trans_restart_would_deadlock, trans, ip, reason, linked, path, &pos); ++ return btree_trans_restart(trans, BCH_ERR_transaction_restart_would_deadlock); ++} ++ ++/* relock */ ++ ++static inline bool btree_path_get_locks(struct btree_trans *trans, ++ struct btree_path *path, ++ bool upgrade) ++{ ++ unsigned l = path->level; ++ int fail_idx = -1; ++ ++ do { ++ if (!btree_path_node(path, l)) ++ break; ++ ++ if (!(upgrade ++ ? bch2_btree_node_upgrade(trans, path, l) ++ : bch2_btree_node_relock(trans, path, l))) ++ fail_idx = l; ++ ++ l++; ++ } while (l < path->locks_want); ++ ++ /* ++ * When we fail to get a lock, we have to ensure that any child nodes ++ * can't be relocked so bch2_btree_path_traverse has to walk back up to ++ * the node that we failed to relock: ++ */ ++ if (fail_idx >= 0) { ++ __bch2_btree_path_unlock(trans, path); ++ btree_path_set_dirty(path, BTREE_ITER_NEED_TRAVERSE); ++ ++ do { ++ path->l[fail_idx].b = upgrade ++ ? ERR_PTR(-BCH_ERR_no_btree_node_upgrade) ++ : ERR_PTR(-BCH_ERR_no_btree_node_relock); ++ --fail_idx; ++ } while (fail_idx >= 0); ++ } ++ ++ if (path->uptodate == BTREE_ITER_NEED_RELOCK) ++ path->uptodate = BTREE_ITER_UPTODATE; ++ ++ bch2_trans_verify_locks(trans); ++ ++ return path->uptodate < BTREE_ITER_NEED_RELOCK; ++} ++ ++bool __bch2_btree_node_relock(struct btree_trans *trans, ++ struct btree_path *path, unsigned level) ++{ ++ struct btree *b = btree_path_node(path, level); ++ int want = __btree_lock_want(path, level); ++ ++ if (race_fault()) ++ goto fail; ++ ++ if (six_relock_type(&b->c.lock, want, path->l[level].lock_seq) || ++ (btree_node_lock_seq_matches(path, b, level) && ++ btree_node_lock_increment(trans, &b->c, level, want))) { ++ mark_btree_node_locked(trans, path, level, want); ++ return true; ++ } ++fail: ++ trace_and_count(trans->c, btree_path_relock_fail, trans, _RET_IP_, path, level); ++ return false; ++} ++ ++/* upgrade */ ++ ++bool bch2_btree_node_upgrade(struct btree_trans *trans, ++ struct btree_path *path, unsigned level) ++{ ++ struct btree *b = path->l[level].b; ++ ++ if (!is_btree_node(path, level)) ++ return false; ++ ++ switch (btree_lock_want(path, level)) { ++ case BTREE_NODE_UNLOCKED: ++ BUG_ON(btree_node_locked(path, level)); ++ return true; ++ case BTREE_NODE_READ_LOCKED: ++ BUG_ON(btree_node_intent_locked(path, level)); ++ return bch2_btree_node_relock(trans, path, level); ++ case BTREE_NODE_INTENT_LOCKED: ++ break; ++ case BTREE_NODE_WRITE_LOCKED: ++ BUG(); ++ } ++ ++ if (btree_node_intent_locked(path, level)) ++ return true; ++ ++ if (race_fault()) ++ return false; ++ ++ if (btree_node_locked(path, level) ++ ? six_lock_tryupgrade(&b->c.lock) ++ : six_relock_type(&b->c.lock, SIX_LOCK_intent, path->l[level].lock_seq)) ++ goto success; ++ ++ if (btree_node_lock_seq_matches(path, b, level) && ++ btree_node_lock_increment(trans, &b->c, level, BTREE_NODE_INTENT_LOCKED)) { ++ btree_node_unlock(trans, path, level); ++ goto success; ++ } ++ ++ trace_and_count(trans->c, btree_path_upgrade_fail, trans, _RET_IP_, path, level); ++ return false; ++success: ++ mark_btree_node_locked_noreset(path, level, SIX_LOCK_intent); ++ return true; ++} ++ ++/* Btree path locking: */ ++ ++/* ++ * Only for btree_cache.c - only relocks intent locks ++ */ ++int bch2_btree_path_relock_intent(struct btree_trans *trans, ++ struct btree_path *path) ++{ ++ unsigned l; ++ ++ for (l = path->level; ++ l < path->locks_want && btree_path_node(path, l); ++ l++) { ++ if (!bch2_btree_node_relock(trans, path, l)) { ++ __bch2_btree_path_unlock(trans, path); ++ btree_path_set_dirty(path, BTREE_ITER_NEED_TRAVERSE); ++ trace_and_count(trans->c, trans_restart_relock_path_intent, trans, _RET_IP_, path); ++ return btree_trans_restart(trans, BCH_ERR_transaction_restart_relock_path_intent); ++ } ++ } ++ ++ return 0; ++} ++ ++__flatten ++bool bch2_btree_path_relock_norestart(struct btree_trans *trans, ++ struct btree_path *path, unsigned long trace_ip) ++{ ++ return btree_path_get_locks(trans, path, false); ++} ++ ++__flatten ++bool bch2_btree_path_upgrade_norestart(struct btree_trans *trans, ++ struct btree_path *path, unsigned long trace_ip) ++{ ++ return btree_path_get_locks(trans, path, true); ++} ++ ++bool bch2_btree_path_upgrade_noupgrade_sibs(struct btree_trans *trans, ++ struct btree_path *path, ++ unsigned new_locks_want) ++{ ++ EBUG_ON(path->locks_want >= new_locks_want); ++ ++ path->locks_want = new_locks_want; ++ ++ return btree_path_get_locks(trans, path, true); ++} ++ ++bool __bch2_btree_path_upgrade(struct btree_trans *trans, ++ struct btree_path *path, ++ unsigned new_locks_want) ++{ ++ struct btree_path *linked; ++ ++ if (bch2_btree_path_upgrade_noupgrade_sibs(trans, path, new_locks_want)) ++ return true; ++ ++ /* ++ * XXX: this is ugly - we'd prefer to not be mucking with other ++ * iterators in the btree_trans here. ++ * ++ * On failure to upgrade the iterator, setting iter->locks_want and ++ * calling get_locks() is sufficient to make bch2_btree_path_traverse() ++ * get the locks we want on transaction restart. ++ * ++ * But if this iterator was a clone, on transaction restart what we did ++ * to this iterator isn't going to be preserved. ++ * ++ * Possibly we could add an iterator field for the parent iterator when ++ * an iterator is a copy - for now, we'll just upgrade any other ++ * iterators with the same btree id. ++ * ++ * The code below used to be needed to ensure ancestor nodes get locked ++ * before interior nodes - now that's handled by ++ * bch2_btree_path_traverse_all(). ++ */ ++ if (!path->cached && !trans->in_traverse_all) ++ trans_for_each_path(trans, linked) ++ if (linked != path && ++ linked->cached == path->cached && ++ linked->btree_id == path->btree_id && ++ linked->locks_want < new_locks_want) { ++ linked->locks_want = new_locks_want; ++ btree_path_get_locks(trans, linked, true); ++ } ++ ++ return false; ++} ++ ++void __bch2_btree_path_downgrade(struct btree_trans *trans, ++ struct btree_path *path, ++ unsigned new_locks_want) ++{ ++ unsigned l; ++ ++ EBUG_ON(path->locks_want < new_locks_want); ++ ++ path->locks_want = new_locks_want; ++ ++ while (path->nodes_locked && ++ (l = btree_path_highest_level_locked(path)) >= path->locks_want) { ++ if (l > path->level) { ++ btree_node_unlock(trans, path, l); ++ } else { ++ if (btree_node_intent_locked(path, l)) { ++ six_lock_downgrade(&path->l[l].b->c.lock); ++ mark_btree_node_locked_noreset(path, l, SIX_LOCK_read); ++ } ++ break; ++ } ++ } ++ ++ bch2_btree_path_verify_locks(path); ++} ++ ++/* Btree transaction locking: */ ++ ++void bch2_trans_downgrade(struct btree_trans *trans) ++{ ++ struct btree_path *path; ++ ++ trans_for_each_path(trans, path) ++ bch2_btree_path_downgrade(trans, path); ++} ++ ++int bch2_trans_relock(struct btree_trans *trans) ++{ ++ struct btree_path *path; ++ ++ if (unlikely(trans->restarted)) ++ return - ((int) trans->restarted); ++ ++ trans_for_each_path(trans, path) ++ if (path->should_be_locked && ++ !bch2_btree_path_relock_norestart(trans, path, _RET_IP_)) { ++ trace_and_count(trans->c, trans_restart_relock, trans, _RET_IP_, path); ++ return btree_trans_restart(trans, BCH_ERR_transaction_restart_relock); ++ } ++ return 0; ++} ++ ++void bch2_trans_unlock(struct btree_trans *trans) ++{ ++ struct btree_path *path; ++ ++ trans_for_each_path(trans, path) ++ __bch2_btree_path_unlock(trans, path); ++ ++ /* ++ * bch2_gc_btree_init_recurse() doesn't use btree iterators for walking ++ * btree nodes, it implements its own walking: ++ */ ++ BUG_ON(!trans->is_initial_gc && ++ lock_class_is_held(&bch2_btree_node_lock_key)); ++} ++ ++/* Debug */ ++ ++#ifdef CONFIG_BCACHEFS_DEBUG ++ ++void bch2_btree_path_verify_locks(struct btree_path *path) ++{ ++ unsigned l; ++ ++ if (!path->nodes_locked) { ++ BUG_ON(path->uptodate == BTREE_ITER_UPTODATE && ++ btree_path_node(path, path->level)); ++ return; ++ } ++ ++ for (l = 0; l < BTREE_MAX_DEPTH; l++) { ++ int want = btree_lock_want(path, l); ++ int have = btree_node_locked_type(path, l); ++ ++ BUG_ON(!is_btree_node(path, l) && have != BTREE_NODE_UNLOCKED); ++ ++ BUG_ON(is_btree_node(path, l) && ++ (want == BTREE_NODE_UNLOCKED || ++ have != BTREE_NODE_WRITE_LOCKED) && ++ want != have); ++ } ++} ++ ++void bch2_trans_verify_locks(struct btree_trans *trans) ++{ ++ struct btree_path *path; ++ ++ trans_for_each_path(trans, path) ++ bch2_btree_path_verify_locks(path); ++} ++ ++#endif diff --git a/fs/bcachefs/btree_locking.h b/fs/bcachefs/btree_locking.h new file mode 100644 -index 0000000000000..1982b6c873aae +index 000000000000..3bc490bc5120 --- /dev/null +++ b/fs/bcachefs/btree_locking.h -@@ -0,0 +1,289 @@ +@@ -0,0 +1,401 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _BCACHEFS_BTREE_LOCKING_H +#define _BCACHEFS_BTREE_LOCKING_H @@ -25475,63 +25830,71 @@ index 0000000000000..1982b6c873aae + +#include "btree_iter.h" + ++extern struct lock_class_key bch2_btree_node_lock_key; ++ ++static inline bool is_btree_node(struct btree_path *path, unsigned l) ++{ ++ return l < BTREE_MAX_DEPTH && !IS_ERR_OR_NULL(path->l[l].b); ++} ++ ++static inline struct btree_transaction_stats *btree_trans_stats(struct btree_trans *trans) ++{ ++ return trans->fn_idx < ARRAY_SIZE(trans->c->btree_transaction_stats) ++ ? &trans->c->btree_transaction_stats[trans->fn_idx] ++ : NULL; ++} ++ +/* matches six lock types */ +enum btree_node_locked_type { + BTREE_NODE_UNLOCKED = -1, + BTREE_NODE_READ_LOCKED = SIX_LOCK_read, + BTREE_NODE_INTENT_LOCKED = SIX_LOCK_intent, ++ BTREE_NODE_WRITE_LOCKED = SIX_LOCK_write, +}; + +static inline int btree_node_locked_type(struct btree_path *path, + unsigned level) +{ -+ /* -+ * We're relying on the fact that if nodes_intent_locked is set -+ * nodes_locked must be set as well, so that we can compute without -+ * branches: -+ */ -+ return BTREE_NODE_UNLOCKED + -+ ((path->nodes_locked >> level) & 1) + -+ ((path->nodes_intent_locked >> level) & 1); ++ return BTREE_NODE_UNLOCKED + ((path->nodes_locked >> (level << 1)) & 3); +} + -+static inline bool btree_node_intent_locked(struct btree_path *path, -+ unsigned level) ++static inline bool btree_node_write_locked(struct btree_path *path, unsigned l) +{ -+ return btree_node_locked_type(path, level) == BTREE_NODE_INTENT_LOCKED; ++ return btree_node_locked_type(path, l) == BTREE_NODE_WRITE_LOCKED; +} + -+static inline bool btree_node_read_locked(struct btree_path *path, -+ unsigned level) ++static inline bool btree_node_intent_locked(struct btree_path *path, unsigned l) +{ -+ return btree_node_locked_type(path, level) == BTREE_NODE_READ_LOCKED; ++ return btree_node_locked_type(path, l) == BTREE_NODE_INTENT_LOCKED; ++} ++ ++static inline bool btree_node_read_locked(struct btree_path *path, unsigned l) ++{ ++ return btree_node_locked_type(path, l) == BTREE_NODE_READ_LOCKED; +} + +static inline bool btree_node_locked(struct btree_path *path, unsigned level) +{ -+ return path->nodes_locked & (1 << level); ++ return btree_node_locked_type(path, level) != BTREE_NODE_UNLOCKED; +} + -+static inline void mark_btree_node_unlocked(struct btree_path *path, -+ unsigned level) -+{ -+ path->nodes_locked &= ~(1 << level); -+ path->nodes_intent_locked &= ~(1 << level); -+} -+ -+static inline void mark_btree_node_locked_noreset(struct btree_trans *trans, -+ struct btree_path *path, -+ unsigned level, -+ enum six_lock_type type) ++static inline void mark_btree_node_locked_noreset(struct btree_path *path, ++ unsigned level, ++ enum btree_node_locked_type type) +{ + /* relying on this to avoid a branch */ + BUILD_BUG_ON(SIX_LOCK_read != 0); + BUILD_BUG_ON(SIX_LOCK_intent != 1); + -+ BUG_ON(trans->in_traverse_all && path->sorted_idx > trans->traverse_all_idx); ++ path->nodes_locked &= ~(3U << (level << 1)); ++ path->nodes_locked |= (type + 1) << (level << 1); ++} + -+ path->nodes_locked |= 1 << level; -+ path->nodes_intent_locked |= type << level; ++static inline void mark_btree_node_unlocked(struct btree_path *path, ++ unsigned level) ++{ ++ EBUG_ON(btree_node_write_locked(path, level)); ++ mark_btree_node_locked_noreset(path, level, BTREE_NODE_UNLOCKED); +} + +static inline void mark_btree_node_locked(struct btree_trans *trans, @@ -25539,19 +25902,12 @@ index 0000000000000..1982b6c873aae + unsigned level, + enum six_lock_type type) +{ -+ mark_btree_node_locked_noreset(trans, path, level, type); ++ mark_btree_node_locked_noreset(path, level, type); +#ifdef CONFIG_BCACHEFS_LOCK_TIME_STATS + path->l[level].lock_taken_time = ktime_get_ns(); +#endif +} + -+static inline void mark_btree_node_intent_locked(struct btree_trans *trans, -+ struct btree_path *path, -+ unsigned level) -+{ -+ mark_btree_node_locked_noreset(trans, path, level, SIX_LOCK_intent); -+} -+ +static inline enum six_lock_type __btree_lock_want(struct btree_path *path, int level) +{ + return level < path->locks_want @@ -25571,6 +25927,21 @@ index 0000000000000..1982b6c873aae + return BTREE_NODE_UNLOCKED; +} + ++static void btree_trans_lock_hold_time_update(struct btree_trans *trans, ++ struct btree_path *path, unsigned level) ++{ ++#ifdef CONFIG_BCACHEFS_LOCK_TIME_STATS ++ struct btree_transaction_stats *s = btree_trans_stats(trans); ++ ++ if (s) ++ __bch2_time_stats_update(&s->lock_hold_times, ++ path->l[level].lock_taken_time, ++ ktime_get_ns()); ++#endif ++} ++ ++/* unlock: */ ++ +static inline void btree_node_unlock(struct btree_trans *trans, + struct btree_path *path, unsigned level) +{ @@ -25580,135 +25951,28 @@ index 0000000000000..1982b6c873aae + + if (lock_type != BTREE_NODE_UNLOCKED) { + six_unlock_type(&path->l[level].b->c.lock, lock_type); -+#ifdef CONFIG_BCACHEFS_LOCK_TIME_STATS -+ if (trans->lock_name_idx < BCH_LOCK_TIME_NR) { -+ struct bch_fs *c = trans->c; -+ -+ __bch2_time_stats_update(&c->lock_held_stats.times[trans->lock_name_idx], -+ path->l[level].lock_taken_time, -+ ktime_get_ns()); -+ } -+#endif ++ btree_trans_lock_hold_time_update(trans, path, level); + } + mark_btree_node_unlocked(path, level); +} + ++static inline int btree_path_lowest_level_locked(struct btree_path *path) ++{ ++ return __ffs(path->nodes_locked) >> 1; ++} ++ ++static inline int btree_path_highest_level_locked(struct btree_path *path) ++{ ++ return __fls(path->nodes_locked) >> 1; ++} ++ +static inline void __bch2_btree_path_unlock(struct btree_trans *trans, + struct btree_path *path) +{ + btree_path_set_dirty(path, BTREE_ITER_NEED_RELOCK); + + while (path->nodes_locked) -+ btree_node_unlock(trans, path, __ffs(path->nodes_locked)); -+} -+ -+static inline enum bch_time_stats lock_to_time_stat(enum six_lock_type type) -+{ -+ switch (type) { -+ case SIX_LOCK_read: -+ return BCH_TIME_btree_lock_contended_read; -+ case SIX_LOCK_intent: -+ return BCH_TIME_btree_lock_contended_intent; -+ case SIX_LOCK_write: -+ return BCH_TIME_btree_lock_contended_write; -+ default: -+ BUG(); -+ } -+} -+ -+static inline int btree_node_lock_type(struct btree_trans *trans, -+ struct btree_path *path, -+ struct btree *b, -+ struct bpos pos, unsigned level, -+ enum six_lock_type type, -+ six_lock_should_sleep_fn should_sleep_fn, void *p) -+{ -+ struct bch_fs *c = trans->c; -+ u64 start_time; -+ int ret; -+ -+ if (six_trylock_type(&b->c.lock, type)) -+ return 0; -+ -+ start_time = local_clock(); -+ -+ trans->locking_path_idx = path->idx; -+ trans->locking_pos = pos; -+ trans->locking_btree_id = path->btree_id; -+ trans->locking_level = level; -+ trans->locking_lock_type = type; -+ trans->locking = &b->c; -+ ret = six_lock_type(&b->c.lock, type, should_sleep_fn, p); -+ trans->locking = NULL; -+ -+ if (ret) -+ return ret; -+ -+ bch2_time_stats_update(&c->times[lock_to_time_stat(type)], start_time); -+ return 0; -+} -+ -+/* -+ * Lock a btree node if we already have it locked on one of our linked -+ * iterators: -+ */ -+static inline bool btree_node_lock_increment(struct btree_trans *trans, -+ struct btree *b, unsigned level, -+ enum btree_node_locked_type want) -+{ -+ struct btree_path *path; -+ -+ trans_for_each_path(trans, path) -+ if (path->l[level].b == b && -+ btree_node_locked_type(path, level) >= want) { -+ six_lock_increment(&b->c.lock, want); -+ return true; -+ } -+ -+ return false; -+} -+ -+int __bch2_btree_node_lock(struct btree_trans *, struct btree_path *, -+ struct btree *, struct bpos, unsigned, -+ enum six_lock_type, -+ six_lock_should_sleep_fn, void *, -+ unsigned long); -+ -+static inline int btree_node_lock(struct btree_trans *trans, -+ struct btree_path *path, -+ struct btree *b, struct bpos pos, unsigned level, -+ enum six_lock_type type, -+ six_lock_should_sleep_fn should_sleep_fn, void *p, -+ unsigned long ip) -+{ -+ int ret = 0; -+ -+ EBUG_ON(level >= BTREE_MAX_DEPTH); -+ EBUG_ON(!(trans->paths_allocated & (1ULL << path->idx))); -+ -+ if (likely(six_trylock_type(&b->c.lock, type)) || -+ btree_node_lock_increment(trans, b, level, type) || -+ !(ret = __bch2_btree_node_lock(trans, path, b, pos, level, type, -+ should_sleep_fn, p, ip))) { -+#ifdef CONFIG_BCACHEFS_LOCK_TIME_STATS -+ path->l[b->c.level].lock_taken_time = ktime_get_ns(); -+#endif -+ } -+ -+ return ret; -+} -+ -+bool __bch2_btree_node_relock(struct btree_trans *, struct btree_path *, unsigned); -+ -+static inline bool bch2_btree_node_relock(struct btree_trans *trans, -+ struct btree_path *path, unsigned level) -+{ -+ EBUG_ON(btree_node_locked(path, level) && -+ btree_node_locked_type(path, level) != -+ __btree_lock_want(path, level)); -+ -+ return likely(btree_node_locked(path, level)) || -+ __bch2_btree_node_relock(trans, path, level); ++ btree_node_unlock(trans, path, btree_path_lowest_level_locked(path)); +} + +/* @@ -25723,6 +25987,9 @@ index 0000000000000..1982b6c873aae + + EBUG_ON(path->l[b->c.level].b != b); + EBUG_ON(path->l[b->c.level].lock_seq + 1 != b->c.lock.state.seq); ++ EBUG_ON(btree_node_locked_type(path, b->c.level) != SIX_LOCK_write); ++ ++ mark_btree_node_locked_noreset(path, b->c.level, SIX_LOCK_intent); + + trans_for_each_path_with_node(trans, b, linked) + linked->l[b->c.level].lock_seq += 2; @@ -25733,27 +26000,227 @@ index 0000000000000..1982b6c873aae +void bch2_btree_node_unlock_write(struct btree_trans *, + struct btree_path *, struct btree *); + -+void __bch2_btree_node_lock_write(struct btree_trans *, struct btree *); ++/* lock: */ + -+static inline void bch2_btree_node_lock_write(struct btree_trans *trans, -+ struct btree_path *path, -+ struct btree *b) ++static inline int __must_check ++btree_node_lock_nopath(struct btree_trans *trans, ++ struct btree_bkey_cached_common *b, ++ enum six_lock_type type) +{ -+ EBUG_ON(path->l[b->c.level].b != b); -+ EBUG_ON(path->l[b->c.level].lock_seq != b->c.lock.state.seq); -+ EBUG_ON(!btree_node_intent_locked(path, b->c.level)); ++ six_lock_type(&b->lock, type, NULL, NULL); ++ return 0; ++} + -+ if (unlikely(!six_trylock_write(&b->c.lock))) ++static inline void btree_node_lock_nopath_nofail(struct btree_trans *trans, ++ struct btree_bkey_cached_common *b, ++ enum six_lock_type type) ++{ ++ int ret = btree_node_lock_nopath(trans, b, type); ++ ++ BUG_ON(ret); ++} ++ ++static inline int btree_node_lock_type(struct btree_trans *trans, ++ struct btree_path *path, ++ struct btree_bkey_cached_common *b, ++ struct bpos pos, unsigned level, ++ enum six_lock_type type, ++ six_lock_should_sleep_fn should_sleep_fn, void *p) ++{ ++ int ret; ++ ++ if (six_trylock_type(&b->lock, type)) ++ return 0; ++ ++ trans->locking_path_idx = path->idx; ++ trans->locking_pos = pos; ++ trans->locking_btree_id = path->btree_id; ++ trans->locking_level = level; ++ trans->locking_lock_type = type; ++ trans->locking = b; ++ ret = six_lock_type(&b->lock, type, should_sleep_fn, p); ++ trans->locking = NULL; ++ return ret; ++} ++ ++/* ++ * Lock a btree node if we already have it locked on one of our linked ++ * iterators: ++ */ ++static inline bool btree_node_lock_increment(struct btree_trans *trans, ++ struct btree_bkey_cached_common *b, ++ unsigned level, ++ enum btree_node_locked_type want) ++{ ++ struct btree_path *path; ++ ++ trans_for_each_path(trans, path) ++ if (&path->l[level].b->c == b && ++ btree_node_locked_type(path, level) >= want) { ++ six_lock_increment(&b->lock, want); ++ return true; ++ } ++ ++ return false; ++} ++ ++int __bch2_btree_node_lock(struct btree_trans *, struct btree_path *, ++ struct btree_bkey_cached_common *, ++ struct bpos, unsigned, ++ enum six_lock_type, ++ six_lock_should_sleep_fn, void *, ++ unsigned long); ++ ++static inline int btree_node_lock(struct btree_trans *trans, ++ struct btree_path *path, ++ struct btree_bkey_cached_common *b, ++ struct bpos pos, unsigned level, ++ enum six_lock_type type, ++ six_lock_should_sleep_fn should_sleep_fn, void *p, ++ unsigned long ip) ++{ ++ int ret = 0; ++ ++ EBUG_ON(level >= BTREE_MAX_DEPTH); ++ EBUG_ON(!(trans->paths_allocated & (1ULL << path->idx))); ++ ++ if (likely(six_trylock_type(&b->lock, type)) || ++ btree_node_lock_increment(trans, b, level, type) || ++ !(ret = __bch2_btree_node_lock(trans, path, b, pos, level, type, ++ should_sleep_fn, p, ip))) { ++#ifdef CONFIG_BCACHEFS_LOCK_TIME_STATS ++ path->l[b->level].lock_taken_time = ktime_get_ns(); ++#endif ++ } ++ ++ return ret; ++} ++ ++void __bch2_btree_node_lock_write(struct btree_trans *, struct btree_bkey_cached_common *); ++ ++static inline void bch2_btree_node_lock_write_nofail(struct btree_trans *trans, ++ struct btree_path *path, ++ struct btree_bkey_cached_common *b) ++{ ++ EBUG_ON(&path->l[b->level].b->c != b); ++ EBUG_ON(path->l[b->level].lock_seq != b->lock.state.seq); ++ EBUG_ON(!btree_node_intent_locked(path, b->level)); ++ ++ /* ++ * six locks are unfair, and read locks block while a thread wants a ++ * write lock: thus, we need to tell the cycle detector we have a write ++ * lock _before_ taking the lock: ++ */ ++ mark_btree_node_locked_noreset(path, b->level, SIX_LOCK_write); ++ ++ if (unlikely(!six_trylock_write(&b->lock))) + __bch2_btree_node_lock_write(trans, b); +} + ++static inline int __must_check ++bch2_btree_node_lock_write(struct btree_trans *trans, ++ struct btree_path *path, ++ struct btree_bkey_cached_common *b) ++{ ++ bch2_btree_node_lock_write_nofail(trans, path, b); ++ return 0; ++} ++ ++/* relock: */ ++ ++bool bch2_btree_path_relock_norestart(struct btree_trans *, ++ struct btree_path *, unsigned long); ++bool __bch2_btree_node_relock(struct btree_trans *, struct btree_path *, unsigned); ++ ++static inline bool bch2_btree_node_relock(struct btree_trans *trans, ++ struct btree_path *path, unsigned level) ++{ ++ EBUG_ON(btree_node_locked(path, level) && ++ !btree_node_write_locked(path, level) && ++ btree_node_locked_type(path, level) != __btree_lock_want(path, level)); ++ ++ return likely(btree_node_locked(path, level)) || ++ (!IS_ERR_OR_NULL(path->l[level].b) && ++ __bch2_btree_node_relock(trans, path, level)); ++} ++ ++static inline int bch2_btree_path_relock(struct btree_trans *trans, ++ struct btree_path *path, unsigned long trace_ip) ++{ ++ if (!bch2_btree_path_relock_norestart(trans, path, trace_ip)) { ++ trace_and_count(trans->c, trans_restart_relock_path, trans, trace_ip, path); ++ return btree_trans_restart(trans, BCH_ERR_transaction_restart_relock_path); ++ } ++ ++ return 0; ++} ++ ++/* upgrade */ ++ ++bool bch2_btree_path_upgrade_noupgrade_sibs(struct btree_trans *, ++ struct btree_path *, unsigned); ++bool __bch2_btree_path_upgrade(struct btree_trans *, ++ struct btree_path *, unsigned); ++ ++static inline bool bch2_btree_path_upgrade(struct btree_trans *trans, ++ struct btree_path *path, ++ unsigned new_locks_want) ++{ ++ new_locks_want = min(new_locks_want, BTREE_MAX_DEPTH); ++ ++ return path->locks_want < new_locks_want ++ ? __bch2_btree_path_upgrade(trans, path, new_locks_want) ++ : path->uptodate == BTREE_ITER_UPTODATE; ++} ++ ++/* misc: */ ++ ++static inline void btree_path_set_should_be_locked(struct btree_path *path) ++{ ++ EBUG_ON(!btree_node_locked(path, path->level)); ++ EBUG_ON(path->uptodate); ++ ++ path->should_be_locked = true; ++} ++ ++static inline void __btree_path_set_level_up(struct btree_trans *trans, ++ struct btree_path *path, ++ unsigned l) ++{ ++ btree_node_unlock(trans, path, l); ++ path->l[l].b = ERR_PTR(-BCH_ERR_no_btree_node_up); ++} ++ ++static inline void btree_path_set_level_up(struct btree_trans *trans, ++ struct btree_path *path) ++{ ++ __btree_path_set_level_up(trans, path, path->level++); ++ btree_path_set_dirty(path, BTREE_ITER_NEED_TRAVERSE); ++} ++ ++/* debug */ ++ ++struct six_lock_count bch2_btree_node_lock_counts(struct btree_trans *, ++ struct btree_path *, ++ struct btree_bkey_cached_common *b, ++ unsigned); ++ ++ ++#ifdef CONFIG_BCACHEFS_DEBUG ++void bch2_btree_path_verify_locks(struct btree_path *); ++void bch2_trans_verify_locks(struct btree_trans *); ++#else ++static inline void bch2_btree_path_verify_locks(struct btree_path *path) {} ++static inline void bch2_trans_verify_locks(struct btree_trans *trans) {} ++#endif ++ +#endif /* _BCACHEFS_BTREE_LOCKING_H */ diff --git a/fs/bcachefs/btree_types.h b/fs/bcachefs/btree_types.h new file mode 100644 -index 0000000000000..e5cc5a6f2af3b +index 000000000000..7c01663721ed --- /dev/null +++ b/fs/bcachefs/btree_types.h -@@ -0,0 +1,697 @@ +@@ -0,0 +1,695 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _BCACHEFS_BTREE_TYPES_H +#define _BCACHEFS_BTREE_TYPES_H @@ -25819,6 +26286,7 @@ index 0000000000000..e5cc5a6f2af3b + struct six_lock lock; + u8 level; + u8 btree_id; ++ bool cached; +}; + +struct btree { @@ -25955,15 +26423,13 @@ index 0000000000000..e5cc5a6f2af3b +#define BTREE_ITER_IS_EXTENTS (1 << 4) +#define BTREE_ITER_NOT_EXTENTS (1 << 5) +#define BTREE_ITER_CACHED (1 << 6) -+#define BTREE_ITER_CACHED_NOFILL (1 << 7) -+#define BTREE_ITER_CACHED_NOCREATE (1 << 8) -+#define BTREE_ITER_WITH_KEY_CACHE (1 << 9) -+#define BTREE_ITER_WITH_UPDATES (1 << 10) -+#define BTREE_ITER_WITH_JOURNAL (1 << 11) -+#define __BTREE_ITER_ALL_SNAPSHOTS (1 << 12) -+#define BTREE_ITER_ALL_SNAPSHOTS (1 << 13) -+#define BTREE_ITER_FILTER_SNAPSHOTS (1 << 14) -+#define BTREE_ITER_NOPRESERVE (1 << 15) ++#define BTREE_ITER_WITH_KEY_CACHE (1 << 7) ++#define BTREE_ITER_WITH_UPDATES (1 << 8) ++#define BTREE_ITER_WITH_JOURNAL (1 << 9) ++#define __BTREE_ITER_ALL_SNAPSHOTS (1 << 10) ++#define BTREE_ITER_ALL_SNAPSHOTS (1 << 11) ++#define BTREE_ITER_FILTER_SNAPSHOTS (1 << 12) ++#define BTREE_ITER_NOPRESERVE (1 << 13) + +enum btree_path_uptodate { + BTREE_ITER_UPTODATE = 0, @@ -25971,15 +26437,6 @@ index 0000000000000..e5cc5a6f2af3b + BTREE_ITER_NEED_TRAVERSE = 2, +}; + -+#define BTREE_ITER_NO_NODE_GET_LOCKS ((struct btree *) 1) -+#define BTREE_ITER_NO_NODE_DROP ((struct btree *) 2) -+#define BTREE_ITER_NO_NODE_LOCK_ROOT ((struct btree *) 3) -+#define BTREE_ITER_NO_NODE_UP ((struct btree *) 4) -+#define BTREE_ITER_NO_NODE_DOWN ((struct btree *) 5) -+#define BTREE_ITER_NO_NODE_INIT ((struct btree *) 6) -+#define BTREE_ITER_NO_NODE_ERROR ((struct btree *) 7) -+#define BTREE_ITER_NO_NODE_CACHED ((struct btree *) 8) -+ +struct btree_path { + u8 idx; + u8 sorted_idx; @@ -25999,9 +26456,8 @@ index 0000000000000..e5cc5a6f2af3b + */ + bool should_be_locked:1; + unsigned level:3, -+ locks_want:4, -+ nodes_locked:4, -+ nodes_intent_locked:4; ++ locks_want:4; ++ u8 nodes_locked; + + struct btree_path_level { + struct btree *b; @@ -26069,7 +26525,8 @@ index 0000000000000..e5cc5a6f2af3b + struct mutex lock; + struct rhashtable table; + bool table_init_done; -+ struct list_head freed; ++ struct list_head freed_pcpu; ++ struct list_head freed_nonpcpu; + struct shrinker shrink; + unsigned shrink_iter; + struct btree_key_cache_freelist __percpu *pcpu_freed; @@ -26105,6 +26562,13 @@ index 0000000000000..e5cc5a6f2af3b + struct bkey_i *k; +}; + ++static inline struct bpos btree_node_pos(struct btree_bkey_cached_common *b) ++{ ++ return !b->cached ++ ? container_of(b, struct btree, c)->key.k.p ++ : container_of(b, struct bkey_cached, c)->key.pos; ++} ++ +struct btree_insert_entry { + unsigned flags; + u8 bkey_type; @@ -26159,6 +26623,7 @@ index 0000000000000..e5cc5a6f2af3b + struct task_struct *task; + int srcu_idx; + ++ u8 fn_idx; + u8 nr_sorted; + u8 nr_updates; + u8 traverse_all_idx; @@ -26179,6 +26644,7 @@ index 0000000000000..e5cc5a6f2af3b + u64 paths_allocated; + + unsigned mem_top; ++ unsigned mem_max; + unsigned mem_bytes; + void *mem; + @@ -26199,7 +26665,6 @@ index 0000000000000..e5cc5a6f2af3b + unsigned journal_u64s; + unsigned journal_preres_u64s; + struct replicas_delta_list *fs_usage_deltas; -+ int lock_name_idx; +}; + +#define BTREE_FLAGS() \ @@ -26453,7 +26918,7 @@ index 0000000000000..e5cc5a6f2af3b +#endif /* _BCACHEFS_BTREE_TYPES_H */ diff --git a/fs/bcachefs/btree_update.h b/fs/bcachefs/btree_update.h new file mode 100644 -index 0000000000000..89941fb8caa06 +index 000000000000..89941fb8caa0 --- /dev/null +++ b/fs/bcachefs/btree_update.h @@ -0,0 +1,158 @@ @@ -26617,10 +27082,10 @@ index 0000000000000..89941fb8caa06 +#endif /* _BCACHEFS_BTREE_UPDATE_H */ diff --git a/fs/bcachefs/btree_update_interior.c b/fs/bcachefs/btree_update_interior.c new file mode 100644 -index 0000000000000..9b0dedea7ed27 +index 000000000000..d31c6eeba8fc --- /dev/null +++ b/fs/bcachefs/btree_update_interior.c -@@ -0,0 +1,2252 @@ +@@ -0,0 +1,2271 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include "bcachefs.h" @@ -26766,7 +27231,7 @@ index 0000000000000..9b0dedea7ed27 + +static void __btree_node_free(struct bch_fs *c, struct btree *b) +{ -+ trace_btree_node_free(c, b); ++ trace_and_count(c, btree_node_free, c, b); + + BUG_ON(btree_node_dirty(b)); + BUG_ON(btree_node_need_write(b)); @@ -26783,30 +27248,32 @@ index 0000000000000..9b0dedea7ed27 +} + +static void bch2_btree_node_free_inmem(struct btree_trans *trans, ++ struct btree_path *path, + struct btree *b) +{ + struct bch_fs *c = trans->c; -+ struct btree_path *path; -+ -+ trans_for_each_path(trans, path) -+ BUG_ON(path->l[b->c.level].b == b && -+ path->l[b->c.level].lock_seq == b->c.lock.state.seq); -+ -+ six_lock_write(&b->c.lock, NULL, NULL); ++ unsigned level = b->c.level; + ++ bch2_btree_node_lock_write_nofail(trans, path, &b->c); + bch2_btree_node_hash_remove(&c->btree_cache, b); + __btree_node_free(c, b); -+ + six_unlock_write(&b->c.lock); -+ six_unlock_intent(&b->c.lock); ++ mark_btree_node_locked_noreset(path, level, SIX_LOCK_intent); ++ ++ trans_for_each_path(trans, path) ++ if (path->l[level].b == b) { ++ btree_node_unlock(trans, path, level); ++ path->l[level].b = ERR_PTR(-BCH_ERR_no_btree_node_init); ++ } +} + -+static struct btree *__bch2_btree_node_alloc(struct bch_fs *c, ++static struct btree *__bch2_btree_node_alloc(struct btree_trans *trans, + struct disk_reservation *res, + struct closure *cl, + bool interior_node, + unsigned flags) +{ ++ struct bch_fs *c = trans->c; + struct write_point *wp; + struct btree *b; + __BKEY_PADDED(k, BKEY_BTREE_PTR_VAL_U64s_MAX) tmp; @@ -26836,7 +27303,7 @@ index 0000000000000..9b0dedea7ed27 + mutex_unlock(&c->btree_reserve_cache_lock); + +retry: -+ wp = bch2_alloc_sectors_start(c, ++ wp = bch2_alloc_sectors_start_trans(trans, + c->opts.metadata_target ?: + c->opts.foreground_target, + 0, @@ -26880,7 +27347,9 @@ index 0000000000000..9b0dedea7ed27 + return b; +} + -+static struct btree *bch2_btree_node_alloc(struct btree_update *as, unsigned level) ++static struct btree *bch2_btree_node_alloc(struct btree_update *as, ++ struct btree_trans *trans, ++ unsigned level) +{ + struct bch_fs *c = as->c; + struct btree *b; @@ -26892,8 +27361,8 @@ index 0000000000000..9b0dedea7ed27 + + b = p->b[--p->nr]; + -+ six_lock_intent(&b->c.lock, NULL, NULL); -+ six_lock_write(&b->c.lock, NULL, NULL); ++ btree_node_lock_nopath_nofail(trans, &b->c, SIX_LOCK_intent); ++ btree_node_lock_nopath_nofail(trans, &b->c, SIX_LOCK_write); + + set_btree_node_accessed(b); + set_btree_node_dirty_acct(c, b); @@ -26926,7 +27395,7 @@ index 0000000000000..9b0dedea7ed27 + ret = bch2_btree_node_hash_insert(&c->btree_cache, b, level, as->btree_id); + BUG_ON(ret); + -+ trace_btree_node_alloc(c, b); ++ trace_and_count(c, btree_node_alloc, c, b); + return b; +} + @@ -26944,12 +27413,13 @@ index 0000000000000..9b0dedea7ed27 +} + +struct btree *__bch2_btree_node_alloc_replacement(struct btree_update *as, ++ struct btree_trans *trans, + struct btree *b, + struct bkey_format format) +{ + struct btree *n; + -+ n = bch2_btree_node_alloc(as, b->c.level); ++ n = bch2_btree_node_alloc(as, trans, b->c.level); + + SET_BTREE_NODE_SEQ(n->data, BTREE_NODE_SEQ(b->data) + 1); + @@ -26968,6 +27438,7 @@ index 0000000000000..9b0dedea7ed27 +} + +static struct btree *bch2_btree_node_alloc_replacement(struct btree_update *as, ++ struct btree_trans *trans, + struct btree *b) +{ + struct bkey_format new_f = bch2_btree_calc_format(b); @@ -26979,12 +27450,13 @@ index 0000000000000..9b0dedea7ed27 + if (!bch2_btree_node_format_fits(as->c, b, &new_f)) + new_f = b->format; + -+ return __bch2_btree_node_alloc_replacement(as, b, new_f); ++ return __bch2_btree_node_alloc_replacement(as, trans, b, new_f); +} + -+static struct btree *__btree_root_alloc(struct btree_update *as, unsigned level) ++static struct btree *__btree_root_alloc(struct btree_update *as, ++ struct btree_trans *trans, unsigned level) +{ -+ struct btree *b = bch2_btree_node_alloc(as, level); ++ struct btree *b = bch2_btree_node_alloc(as, trans, level); + + btree_set_min(b, POS_MIN); + btree_set_max(b, SPOS_MAX); @@ -26999,7 +27471,7 @@ index 0000000000000..9b0dedea7ed27 + return b; +} + -+static void bch2_btree_reserve_put(struct btree_update *as) ++static void bch2_btree_reserve_put(struct btree_update *as, struct btree_trans *trans) +{ + struct bch_fs *c = as->c; + struct prealloc_nodes *p; @@ -27026,8 +27498,8 @@ index 0000000000000..9b0dedea7ed27 + + mutex_unlock(&c->btree_reserve_cache_lock); + -+ six_lock_intent(&b->c.lock, NULL, NULL); -+ six_lock_write(&b->c.lock, NULL, NULL); ++ btree_node_lock_nopath_nofail(trans, &b->c, SIX_LOCK_intent); ++ btree_node_lock_nopath_nofail(trans, &b->c, SIX_LOCK_write); + __btree_node_free(c, b); + six_unlock_write(&b->c.lock); + six_unlock_intent(&b->c.lock); @@ -27035,18 +27507,16 @@ index 0000000000000..9b0dedea7ed27 + } +} + -+static int bch2_btree_reserve_get(struct btree_update *as, ++static int bch2_btree_reserve_get(struct btree_trans *trans, ++ struct btree_update *as, + unsigned nr_nodes[2], -+ unsigned flags) ++ unsigned flags, ++ struct closure *cl) +{ + struct bch_fs *c = as->c; -+ struct closure cl; + struct btree *b; + unsigned interior; -+ int ret; -+ -+ closure_init_stack(&cl); -+retry: ++ int ret = 0; + + BUG_ON(nr_nodes[0] + nr_nodes[1] > BTREE_RESERVE_MAX); + @@ -27057,18 +27527,17 @@ index 0000000000000..9b0dedea7ed27 + * BTREE_INSERT_NOWAIT only applies to btree node allocation, not + * blocking on this lock: + */ -+ ret = bch2_btree_cache_cannibalize_lock(c, &cl); ++ ret = bch2_btree_cache_cannibalize_lock(c, cl); + if (ret) -+ goto err; ++ return ret; + + for (interior = 0; interior < 2; interior++) { + struct prealloc_nodes *p = as->prealloc_nodes + interior; + + while (p->nr < nr_nodes[interior]) { -+ b = __bch2_btree_node_alloc(c, &as->disk_res, -+ flags & BTREE_INSERT_NOWAIT -+ ? NULL : &cl, -+ interior, flags); ++ b = __bch2_btree_node_alloc(trans, &as->disk_res, ++ flags & BTREE_INSERT_NOWAIT ? NULL : cl, ++ interior, flags); + if (IS_ERR(b)) { + ret = PTR_ERR(b); + goto err; @@ -27077,24 +27546,14 @@ index 0000000000000..9b0dedea7ed27 + p->b[p->nr++] = b; + } + } -+ -+ bch2_btree_cache_cannibalize_unlock(c); -+ closure_sync(&cl); -+ return 0; +err: + bch2_btree_cache_cannibalize_unlock(c); -+ closure_sync(&cl); -+ -+ if (ret == -EAGAIN) -+ goto retry; -+ -+ trace_btree_reserve_get_fail(c, nr_nodes[0] + nr_nodes[1], &cl); + return ret; +} + +/* Asynchronous interior node update machinery */ + -+static void bch2_btree_update_free(struct btree_update *as) ++static void bch2_btree_update_free(struct btree_update *as, struct btree_trans *trans) +{ + struct bch_fs *c = as->c; + @@ -27107,7 +27566,7 @@ index 0000000000000..9b0dedea7ed27 + bch2_journal_pin_drop(&c->journal, &as->journal); + bch2_journal_pin_flush(&c->journal, &as->journal); + bch2_disk_reservation_put(c, &as->disk_res); -+ bch2_btree_reserve_put(as); ++ bch2_btree_reserve_put(as, trans); + + bch2_time_stats_update(&c->times[BCH_TIME_btree_interior_update_total], + as->start_time); @@ -27185,12 +27644,13 @@ index 0000000000000..9b0dedea7ed27 +static void btree_update_nodes_written(struct btree_update *as) +{ + struct bch_fs *c = as->c; -+ struct btree *b = as->b; ++ struct btree *b; + struct btree_trans trans; + u64 journal_seq = 0; + unsigned i; + int ret; + ++ bch2_trans_init(&trans, c, 0, 512); + /* + * If we're already in an error state, it might be because a btree node + * was never written, and we might be trying to free that same btree @@ -27207,15 +27667,16 @@ index 0000000000000..9b0dedea7ed27 + * on disk: + */ + for (i = 0; i < as->nr_old_nodes; i++) { -+ struct btree *old = as->old_nodes[i]; + __le64 seq; + -+ six_lock_read(&old->c.lock, NULL, NULL); -+ seq = old->data ? old->data->keys.seq : 0; -+ six_unlock_read(&old->c.lock); ++ b = as->old_nodes[i]; ++ ++ btree_node_lock_nopath_nofail(&trans, &b->c, SIX_LOCK_read); ++ seq = b->data ? b->data->keys.seq : 0; ++ six_unlock_read(&b->c.lock); + + if (seq == as->old_nodes_seq[i]) -+ wait_on_bit_io(&old->flags, BTREE_NODE_write_in_flight_inner, ++ wait_on_bit_io(&b->flags, BTREE_NODE_write_in_flight_inner, + TASK_UNINTERRUPTIBLE); + } + @@ -27232,19 +27693,19 @@ index 0000000000000..9b0dedea7ed27 + * journal reclaim does btree updates when flushing bkey_cached entries, + * which may require allocations as well. + */ -+ bch2_trans_init(&trans, c, 0, 512); + ret = commit_do(&trans, &as->disk_res, &journal_seq, -+ BTREE_INSERT_NOFAIL| -+ BTREE_INSERT_NOCHECK_RW| -+ BTREE_INSERT_JOURNAL_RECLAIM| -+ JOURNAL_WATERMARK_reserved, -+ btree_update_nodes_written_trans(&trans, as)); -+ bch2_trans_exit(&trans); ++ BTREE_INSERT_NOFAIL| ++ BTREE_INSERT_NOCHECK_RW| ++ BTREE_INSERT_JOURNAL_RECLAIM| ++ JOURNAL_WATERMARK_reserved, ++ btree_update_nodes_written_trans(&trans, as)); ++ bch2_trans_unlock(&trans); + + bch2_fs_fatal_err_on(ret && !bch2_journal_error(&c->journal), c, + "error %i in btree_update_nodes_written()", ret); +err: -+ if (b) { ++ if (as->b) { ++ b = as->b; + /* + * @b is the node we did the final insert into: + * @@ -27257,8 +27718,8 @@ index 0000000000000..9b0dedea7ed27 + * we're in journal error state: + */ + -+ six_lock_intent(&b->c.lock, NULL, NULL); -+ six_lock_write(&b->c.lock, NULL, NULL); ++ btree_node_lock_nopath_nofail(&trans, &b->c, SIX_LOCK_intent); ++ btree_node_lock_nopath_nofail(&trans, &b->c, SIX_LOCK_write); + mutex_lock(&c->btree_interior_update_lock); + + list_del(&as->write_blocked_list); @@ -27315,7 +27776,7 @@ index 0000000000000..9b0dedea7ed27 + for (i = 0; i < as->nr_new_nodes; i++) { + b = as->new_nodes[i]; + -+ six_lock_read(&b->c.lock, NULL, NULL); ++ btree_node_lock_nopath_nofail(&trans, &b->c, SIX_LOCK_read); + btree_node_write_if_need(c, b, SIX_LOCK_read); + six_unlock_read(&b->c.lock); + } @@ -27323,7 +27784,8 @@ index 0000000000000..9b0dedea7ed27 + for (i = 0; i < as->nr_open_buckets; i++) + bch2_open_bucket_put(c, c->open_buckets + as->open_buckets[i]); + -+ bch2_btree_update_free(as); ++ bch2_btree_update_free(as, &trans); ++ bch2_trans_exit(&trans); +} + +static void btree_interior_update_work(struct work_struct *work) @@ -27570,7 +28032,7 @@ index 0000000000000..9b0dedea7ed27 + as->nr_old_nodes++; +} + -+static void bch2_btree_update_done(struct btree_update *as) ++static void bch2_btree_update_done(struct btree_update *as, struct btree_trans *trans) +{ + struct bch_fs *c = as->c; + u64 start_time = as->start_time; @@ -27581,7 +28043,7 @@ index 0000000000000..9b0dedea7ed27 + up_read(&as->c->gc_lock); + as->took_gc_lock = false; + -+ bch2_btree_reserve_put(as); ++ bch2_btree_reserve_put(as, trans); + + continue_at(&as->cl, btree_update_set_nodes_written, + as->c->btree_interior_update_worker); @@ -27603,6 +28065,7 @@ index 0000000000000..9b0dedea7ed27 + unsigned update_level = level; + int journal_flags = flags & JOURNAL_WATERMARK_MASK; + int ret = 0; ++ u32 restart_count = trans->restart_count; + + BUG_ON(!path->should_be_locked); + @@ -27628,8 +28091,7 @@ index 0000000000000..9b0dedea7ed27 + nr_nodes[1] += 1; + + if (!bch2_btree_path_upgrade(trans, path, U8_MAX)) { -+ trace_trans_restart_iter_upgrade(trans->fn, _RET_IP_, -+ path->btree_id, &path->pos); ++ trace_and_count(c, trans_restart_iter_upgrade, trans, _RET_IP_, path); + ret = btree_trans_restart(trans, BCH_ERR_transaction_restart_upgrade); + return ERR_PTR(ret); + } @@ -27677,16 +28139,29 @@ index 0000000000000..9b0dedea7ed27 + if (ret) + goto err; + -+ bch2_trans_unlock(trans); -+ + ret = bch2_journal_preres_get(&c->journal, &as->journal_preres, + BTREE_UPDATE_JOURNAL_RES, -+ journal_flags); ++ journal_flags|JOURNAL_RES_GET_NONBLOCK); + if (ret) { -+ bch2_btree_update_free(as); -+ trace_trans_restart_journal_preres_get(trans->fn, _RET_IP_); -+ ret = btree_trans_restart(trans, BCH_ERR_transaction_restart_journal_preres_get); -+ return ERR_PTR(ret); ++ bch2_trans_unlock(trans); ++ ++ if (flags & BTREE_INSERT_JOURNAL_RECLAIM) { ++ ret = -BCH_ERR_journal_reclaim_would_deadlock; ++ goto err; ++ } ++ ++ ret = bch2_journal_preres_get(&c->journal, &as->journal_preres, ++ BTREE_UPDATE_JOURNAL_RES, ++ journal_flags); ++ if (ret) { ++ trace_and_count(c, trans_restart_journal_preres_get, trans, _RET_IP_, journal_flags); ++ ret = btree_trans_restart(trans, BCH_ERR_transaction_restart_journal_preres_get); ++ goto err; ++ } ++ ++ ret = bch2_trans_relock(trans); ++ if (ret) ++ goto err; + } + + ret = bch2_disk_reservation_get(c, &as->disk_res, @@ -27696,17 +28171,34 @@ index 0000000000000..9b0dedea7ed27 + if (ret) + goto err; + -+ ret = bch2_btree_reserve_get(as, nr_nodes, flags); -+ if (ret) ++ ret = bch2_btree_reserve_get(trans, as, nr_nodes, flags, NULL); ++ if (ret == -EAGAIN || ++ ret == -ENOMEM) { ++ struct closure cl; ++ ++ closure_init_stack(&cl); ++ ++ bch2_trans_unlock(trans); ++ ++ do { ++ ret = bch2_btree_reserve_get(trans, as, nr_nodes, flags, &cl); ++ closure_sync(&cl); ++ } while (ret == -EAGAIN); ++ } ++ ++ if (ret) { ++ trace_and_count(c, btree_reserve_get_fail, trans->fn, _RET_IP_, nr_nodes[0] + nr_nodes[1]); + goto err; ++ } + + ret = bch2_trans_relock(trans); + if (ret) + goto err; + ++ bch2_trans_verify_not_restarted(trans, restart_count); + return as; +err: -+ bch2_btree_update_free(as); ++ bch2_btree_update_free(as, trans); + return ERR_PTR(ret); +} + @@ -27750,7 +28242,7 @@ index 0000000000000..9b0dedea7ed27 + struct bch_fs *c = as->c; + struct btree *old; + -+ trace_btree_set_root(c, b); ++ trace_and_count(c, btree_node_set_root, c, b); + BUG_ON(!b->written); + + old = btree_node_root(c, b); @@ -27759,7 +28251,7 @@ index 0000000000000..9b0dedea7ed27 + * Ensure no one is using the old root while we switch to the + * new root: + */ -+ bch2_btree_node_lock_write(trans, path, old); ++ bch2_btree_node_lock_write_nofail(trans, path, &old->c); + + bch2_btree_set_root_inmem(c, b); + @@ -27858,6 +28350,7 @@ index 0000000000000..9b0dedea7ed27 + * node) + */ +static struct btree *__btree_split_node(struct btree_update *as, ++ struct btree_trans *trans, + struct btree *n1) +{ + struct bkey_format_state s; @@ -27867,7 +28360,7 @@ index 0000000000000..9b0dedea7ed27 + struct bkey_packed *k, *set2_start, *set2_end, *out, *prev = NULL; + struct bpos n1_pos; + -+ n2 = bch2_btree_node_alloc(as, n1->c.level); ++ n2 = bch2_btree_node_alloc(as, trans, n1->c.level); + + n2->data->max_key = n1->data->max_key; + n2->data->format = n1->format; @@ -28031,15 +28524,15 @@ index 0000000000000..9b0dedea7ed27 + + bch2_btree_interior_update_will_free_node(as, b); + -+ n1 = bch2_btree_node_alloc_replacement(as, b); ++ n1 = bch2_btree_node_alloc_replacement(as, trans, b); + + if (keys) + btree_split_insert_keys(as, trans, path, n1, keys); + + if (bset_u64s(&n1->set[0]) > BTREE_SPLIT_THRESHOLD(c)) { -+ trace_btree_split(c, b); ++ trace_and_count(c, btree_node_split, c, b); + -+ n2 = __btree_split_node(as, n1); ++ n2 = __btree_split_node(as, trans, n1); + + bch2_btree_build_aux_trees(n2); + bch2_btree_build_aux_trees(n1); @@ -28061,7 +28554,7 @@ index 0000000000000..9b0dedea7ed27 + + if (!parent) { + /* Depth increases, make a new root */ -+ n3 = __btree_root_alloc(as, b->c.level + 1); ++ n3 = __btree_root_alloc(as, trans, b->c.level + 1); + + n3->sib_u64s[0] = U16_MAX; + n3->sib_u64s[1] = U16_MAX; @@ -28071,7 +28564,7 @@ index 0000000000000..9b0dedea7ed27 + bch2_btree_node_write(c, n3, SIX_LOCK_intent, 0); + } + } else { -+ trace_btree_compact(c, b); ++ trace_and_count(c, btree_node_compact, c, b); + + bch2_btree_build_aux_trees(n1); + six_unlock_write(&n1->c.lock); @@ -28102,22 +28595,19 @@ index 0000000000000..9b0dedea7ed27 + if (n3) + bch2_btree_update_get_open_buckets(as, n3); + -+ /* Successful split, update the path to point to the new nodes: */ -+ -+ six_lock_increment(&b->c.lock, SIX_LOCK_intent); -+ if (n3) -+ bch2_trans_node_add(trans, n3); -+ if (n2) -+ bch2_trans_node_add(trans, n2); -+ bch2_trans_node_add(trans, n1); -+ + /* + * The old node must be freed (in memory) _before_ unlocking the new + * nodes - else another thread could re-acquire a read lock on the old + * node after another thread has locked and updated the new node, thus + * seeing stale data: + */ -+ bch2_btree_node_free_inmem(trans, b); ++ bch2_btree_node_free_inmem(trans, path, b); ++ ++ if (n3) ++ bch2_trans_node_add(trans, n3); ++ if (n2) ++ bch2_trans_node_add(trans, n2); ++ bch2_trans_node_add(trans, n1); + + if (n3) + six_unlock_intent(&n3->c.lock); @@ -28226,7 +28716,7 @@ index 0000000000000..9b0dedea7ed27 + return PTR_ERR(as); + + btree_split(as, trans, path, b, NULL, flags); -+ bch2_btree_update_done(as); ++ bch2_btree_update_done(as, trans); + + for (l = path->level + 1; btree_path_node(path, l) && !ret; l++) + ret = bch2_foreground_maybe_merge(trans, path, l, flags); @@ -28273,7 +28763,7 @@ index 0000000000000..9b0dedea7ed27 + if (ret) + goto err; + -+ sib_path->should_be_locked = true; ++ btree_path_set_should_be_locked(sib_path); + + m = sib_path->l[level].b; + @@ -28340,12 +28830,12 @@ index 0000000000000..9b0dedea7ed27 + if (ret) + goto err; + -+ trace_btree_merge(c, b); ++ trace_and_count(c, btree_node_merge, c, b); + + bch2_btree_interior_update_will_free_node(as, b); + bch2_btree_interior_update_will_free_node(as, m); + -+ n = bch2_btree_node_alloc(as, b->c.level); ++ n = bch2_btree_node_alloc(as, trans, b->c.level); + + SET_BTREE_NODE_SEQ(n->data, + max(BTREE_NODE_SEQ(b->data), @@ -28380,19 +28870,16 @@ index 0000000000000..9b0dedea7ed27 + + bch2_btree_update_get_open_buckets(as, n); + -+ six_lock_increment(&b->c.lock, SIX_LOCK_intent); -+ six_lock_increment(&m->c.lock, SIX_LOCK_intent); ++ bch2_btree_node_free_inmem(trans, path, b); ++ bch2_btree_node_free_inmem(trans, sib_path, m); + + bch2_trans_node_add(trans, n); + + bch2_trans_verify_paths(trans); + -+ bch2_btree_node_free_inmem(trans, b); -+ bch2_btree_node_free_inmem(trans, m); -+ + six_unlock_intent(&n->c.lock); + -+ bch2_btree_update_done(as); ++ bch2_btree_update_done(as, trans); + + bch2_time_stats_update(&c->times[BCH_TIME_btree_node_merge], start_time); +out: @@ -28426,13 +28913,13 @@ index 0000000000000..9b0dedea7ed27 + + bch2_btree_interior_update_will_free_node(as, b); + -+ n = bch2_btree_node_alloc_replacement(as, b); ++ n = bch2_btree_node_alloc_replacement(as, trans, b); + bch2_btree_update_add_new_node(as, n); + + bch2_btree_build_aux_trees(n); + six_unlock_write(&n->c.lock); + -+ trace_btree_rewrite(c, b); ++ trace_and_count(c, btree_node_rewrite, c, b); + + bch2_btree_node_write(c, n, SIX_LOCK_intent, 0); + @@ -28446,12 +28933,12 @@ index 0000000000000..9b0dedea7ed27 + + bch2_btree_update_get_open_buckets(as, n); + -+ six_lock_increment(&b->c.lock, SIX_LOCK_intent); ++ bch2_btree_node_free_inmem(trans, iter->path, b); ++ + bch2_trans_node_add(trans, n); -+ bch2_btree_node_free_inmem(trans, b); + six_unlock_intent(&n->c.lock); + -+ bch2_btree_update_done(as); ++ bch2_btree_update_done(as, trans); +out: + bch2_btree_path_downgrade(trans, iter->path); + return ret; @@ -28566,10 +29053,7 @@ index 0000000000000..9b0dedea7ed27 + BUG_ON(iter2.path->level != b->c.level); + BUG_ON(bpos_cmp(iter2.path->pos, new_key->k.p)); + -+ btree_node_unlock(trans, iter2.path, iter2.path->level); -+ path_l(iter2.path)->b = BTREE_ITER_NO_NODE_UP; -+ iter2.path->level++; -+ btree_path_set_dirty(iter2.path, BTREE_ITER_NEED_TRAVERSE); ++ btree_path_set_level_up(trans, iter2.path); + + bch2_btree_path_check_sort(trans, iter2.path, 0); + @@ -28601,7 +29085,7 @@ index 0000000000000..9b0dedea7ed27 + if (ret) + goto err; + -+ bch2_btree_node_lock_write(trans, iter->path, b); ++ bch2_btree_node_lock_write_nofail(trans, iter->path, &b->c); + + if (new_hash) { + mutex_lock(&c->btree_cache.lock); @@ -28875,10 +29359,10 @@ index 0000000000000..9b0dedea7ed27 +} diff --git a/fs/bcachefs/btree_update_interior.h b/fs/bcachefs/btree_update_interior.h new file mode 100644 -index 0000000000000..adfc6c24a7a40 +index 000000000000..7af810df8348 --- /dev/null +++ b/fs/bcachefs/btree_update_interior.h -@@ -0,0 +1,321 @@ +@@ -0,0 +1,322 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _BCACHEFS_BTREE_UPDATE_INTERIOR_H +#define _BCACHEFS_BTREE_UPDATE_INTERIOR_H @@ -28998,6 +29482,7 @@ index 0000000000000..adfc6c24a7a40 +}; + +struct btree *__bch2_btree_node_alloc_replacement(struct btree_update *, ++ struct btree_trans *, + struct btree *, + struct bkey_format); + @@ -29202,10 +29687,10 @@ index 0000000000000..adfc6c24a7a40 +#endif /* _BCACHEFS_BTREE_UPDATE_INTERIOR_H */ diff --git a/fs/bcachefs/btree_update_leaf.c b/fs/bcachefs/btree_update_leaf.c new file mode 100644 -index 0000000000000..e2ecbd3bca778 +index 000000000000..e9518fbc92a4 --- /dev/null +++ b/fs/bcachefs/btree_update_leaf.c -@@ -0,0 +1,1800 @@ +@@ -0,0 +1,1823 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include "bcachefs.h" @@ -29289,7 +29774,7 @@ index 0000000000000..e2ecbd3bca778 + struct btree_path *path, + struct btree *b) +{ -+ bch2_btree_node_lock_write(trans, path, b); ++ bch2_btree_node_lock_write_nofail(trans, path, &b->c); + bch2_btree_node_prep_for_write(trans, path, b); +} + @@ -29377,10 +29862,13 @@ index 0000000000000..e2ecbd3bca778 + struct bch_fs *c = container_of(j, struct bch_fs, journal); + struct btree_write *w = container_of(pin, struct btree_write, journal); + struct btree *b = container_of(w, struct btree, writes[i]); ++ struct btree_trans trans; + unsigned long old, new, v; + unsigned idx = w - b->writes; + -+ six_lock_read(&b->c.lock, NULL, NULL); ++ bch2_trans_init(&trans, c, 0, 0); ++ ++ btree_node_lock_nopath_nofail(&trans, &b->c, SIX_LOCK_read); + v = READ_ONCE(b->flags); + + do { @@ -29396,6 +29884,8 @@ index 0000000000000..e2ecbd3bca778 + + btree_node_write_if_need(c, b, SIX_LOCK_read); + six_unlock_read(&b->c.lock); ++ ++ bch2_trans_exit(&trans); + return 0; +} + @@ -29493,7 +29983,7 @@ index 0000000000000..e2ecbd3bca778 + + ret = bch2_trans_relock(trans); + if (ret) { -+ trace_trans_restart_journal_preres_get(trans->fn, trace_ip); ++ trace_and_count(c, trans_restart_journal_preres_get, trans, trace_ip, 0); + return ret; + } + @@ -29583,9 +30073,7 @@ index 0000000000000..e2ecbd3bca778 + * Keys returned by peek() are no longer valid pointers, so we need a + * transaction restart: + */ -+ trace_trans_restart_key_cache_key_realloced(trans->fn, _RET_IP_, -+ path->btree_id, &path->pos, -+ old_u64s, new_u64s); ++ trace_and_count(c, trans_restart_key_cache_key_realloced, trans, _RET_IP_, path, old_u64s, new_u64s); + return btree_trans_restart_nounlock(trans, BCH_ERR_transaction_restart_key_cache_realloced); +} + @@ -29777,7 +30265,7 @@ index 0000000000000..e2ecbd3bca778 + int ret; + + if (race_fault()) { -+ trace_trans_restart_fault_inject(trans->fn, trace_ip); ++ trace_and_count(c, trans_restart_fault_inject, trans, trace_ip); + return btree_trans_restart_nounlock(trans, BCH_ERR_transaction_restart_fault_inject); + } + @@ -29930,8 +30418,10 @@ index 0000000000000..e2ecbd3bca778 + btree_insert_key_leaf(trans, i); + else if (!i->key_cache_already_flushed) + bch2_btree_insert_key_cached(trans, i->path, i->k); -+ else ++ else { + bch2_btree_key_cache_drop(trans, i->path); ++ btree_path_set_dirty(i->path, BTREE_ITER_NEED_TRAVERSE); ++ } + } + + return ret; @@ -29949,11 +30439,12 @@ index 0000000000000..e2ecbd3bca778 +static inline void upgrade_readers(struct btree_trans *trans, struct btree_path *path) +{ + struct btree *b = path_l(path)->b; ++ unsigned l; + + do { -+ if (path->nodes_locked && -+ path->nodes_locked != path->nodes_intent_locked) -+ path_upgrade_readers(trans, path); ++ for (l = 0; l < BTREE_MAX_DEPTH; l++) ++ if (btree_node_read_locked(path, l)) ++ path_upgrade_readers(trans, path); + } while ((path = prev_btree_path(trans, path)) && + path_l(path)->b == b); +} @@ -29972,11 +30463,13 @@ index 0000000000000..e2ecbd3bca778 + ? trans->paths + trans->sorted[i + 1] + : NULL; + -+ if (path->nodes_locked) { -+ if (path->nodes_intent_locked) -+ nr_intent++; -+ else -+ nr_read++; ++ switch (btree_node_locked_type(path, path->level)) { ++ case BTREE_NODE_READ_LOCKED: ++ nr_read++; ++ break; ++ case BTREE_NODE_INTENT_LOCKED: ++ nr_intent++; ++ break; + } + + if (!next || path_l(path)->b != path_l(next)->b) { @@ -29999,7 +30492,7 @@ index 0000000000000..e2ecbd3bca778 + //if (path == pos) + // break; + -+ if (path->nodes_locked != path->nodes_intent_locked && ++ if (btree_node_read_locked(path, path->level) && + !bch2_btree_path_upgrade(trans, path, path->level + 1)) + return true; + } @@ -30016,12 +30509,19 @@ index 0000000000000..e2ecbd3bca778 + if (same_leaf_as_prev(trans, i)) + continue; + ++ /* ++ * six locks are unfair, and read locks block while a thread ++ * wants a write lock: thus, we need to tell the cycle detector ++ * we have a write lock _before_ taking the lock: ++ */ ++ mark_btree_node_locked_noreset(i->path, i->level, SIX_LOCK_write); ++ + if (!six_trylock_write(&insert_l(i)->b->c.lock)) { + if (have_conflicting_read_lock(trans, i->path)) + goto fail; + + ret = btree_node_lock_type(trans, i->path, -+ insert_l(i)->b, ++ &insert_l(i)->b->c, + i->path->pos, i->level, + SIX_LOCK_write, NULL, NULL); + BUG_ON(ret); @@ -30032,6 +30532,8 @@ index 0000000000000..e2ecbd3bca778 + + return 0; +fail: ++ mark_btree_node_locked_noreset(i->path, i->level, SIX_LOCK_intent); ++ + while (--i >= trans->updates) { + if (same_leaf_as_prev(trans, i)) + continue; @@ -30039,7 +30541,7 @@ index 0000000000000..e2ecbd3bca778 + bch2_btree_node_unlock_write_inlined(trans, i->path, insert_l(i)->b); + } + -+ trace_trans_restart_would_deadlock_write(trans->fn); ++ trace_and_count(trans->c, trans_restart_would_deadlock_write, trans); + return btree_trans_restart(trans, BCH_ERR_transaction_restart_would_deadlock_write); +} + @@ -30172,8 +30674,7 @@ index 0000000000000..e2ecbd3bca778 + case BTREE_INSERT_BTREE_NODE_FULL: + ret = bch2_btree_split_leaf(trans, i->path, trans->flags); + if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) -+ trace_trans_restart_btree_node_split(trans->fn, trace_ip, -+ i->btree_id, &i->path->pos); ++ trace_and_count(c, trans_restart_btree_node_split, trans, trace_ip, i->path); + break; + case BTREE_INSERT_NEED_MARK_REPLICAS: + bch2_trans_unlock(trans); @@ -30184,7 +30685,7 @@ index 0000000000000..e2ecbd3bca778 + + ret = bch2_trans_relock(trans); + if (ret) -+ trace_trans_restart_mark_replicas(trans->fn, trace_ip); ++ trace_and_count(c, trans_restart_mark_replicas, trans, trace_ip); + break; + case BTREE_INSERT_NEED_JOURNAL_RES: + bch2_trans_unlock(trans); @@ -30201,12 +30702,12 @@ index 0000000000000..e2ecbd3bca778 + + ret = bch2_trans_relock(trans); + if (ret) -+ trace_trans_restart_journal_res_get(trans->fn, trace_ip); ++ trace_and_count(c, trans_restart_journal_res_get, trans, trace_ip); + break; + case BTREE_INSERT_NEED_JOURNAL_RECLAIM: + bch2_trans_unlock(trans); + -+ trace_trans_blocked_journal_reclaim(trans->fn, trace_ip); ++ trace_and_count(c, trans_blocked_journal_reclaim, trans, trace_ip); + + wait_event_freezable(c->journal.reclaim_wait, + (ret = journal_reclaim_wait_done(c))); @@ -30215,7 +30716,7 @@ index 0000000000000..e2ecbd3bca778 + + ret = bch2_trans_relock(trans); + if (ret) -+ trace_trans_restart_journal_reclaim(trans->fn, trace_ip); ++ trace_and_count(c, trans_restart_journal_reclaim, trans, trace_ip); + break; + default: + BUG_ON(ret >= 0); @@ -30316,8 +30817,7 @@ index 0000000000000..e2ecbd3bca778 + BUG_ON(!i->path->should_be_locked); + + if (unlikely(!bch2_btree_path_upgrade(trans, i->path, i->level + 1))) { -+ trace_trans_restart_upgrade(trans->fn, _RET_IP_, -+ i->btree_id, &i->path->pos); ++ trace_and_count(c, trans_restart_upgrade, trans, _RET_IP_, i->path); + ret = btree_trans_restart(trans, BCH_ERR_transaction_restart_upgrade); + goto out; + } @@ -30358,7 +30858,7 @@ index 0000000000000..e2ecbd3bca778 + if (ret) + goto err; + -+ trace_transaction_commit(trans->fn, _RET_IP_); ++ trace_and_count(c, transaction_commit, trans, _RET_IP_); +out: + bch2_journal_preres_put(&c->journal, &trans->journal_preres); + @@ -30761,7 +31261,7 @@ index 0000000000000..e2ecbd3bca778 + if (ret) + goto err; + -+ btree_path->should_be_locked = true; ++ btree_path_set_should_be_locked(btree_path); + ret = bch2_trans_update_by_path_trace(trans, btree_path, k, flags, ip); +err: + bch2_path_put(trans, btree_path, true); @@ -30827,11 +31327,11 @@ index 0000000000000..e2ecbd3bca778 + ck = (void *) iter->key_cache_path->l[0].b; + + if (test_bit(BKEY_CACHED_DIRTY, &ck->flags)) { -+ trace_trans_restart_key_cache_raced(trans->fn, _RET_IP_); ++ trace_and_count(trans->c, trans_restart_key_cache_raced, trans, _RET_IP_); + return btree_trans_restart(trans, BCH_ERR_transaction_restart_key_cache_raced); + } + -+ iter->key_cache_path->should_be_locked = true; ++ btree_path_set_should_be_locked(iter->key_cache_path); + } + + path = iter->key_cache_path; @@ -30903,15 +31403,16 @@ index 0000000000000..e2ecbd3bca778 + unsigned update_flags, + u64 *journal_seq) +{ ++ u32 restart_count = trans->restart_count; + struct btree_iter iter; + struct bkey_s_c k; + int ret = 0; + + bch2_trans_iter_init(trans, &iter, id, start, BTREE_ITER_INTENT); +retry: -+ while ((bch2_trans_begin(trans), -+ (k = bch2_btree_iter_peek(&iter)).k) && -+ !(ret = bkey_err(k)) && ++ while ((k = bch2_btree_iter_peek(&iter)).k && ++ !(ret = bkey_err(k) ?: ++ btree_trans_too_many_iters(trans)) && + bkey_cmp(iter.pos, end) < 0) { + struct disk_reservation disk_res = + bch2_disk_reservation_init(trans->c, 0); @@ -30957,11 +31458,15 @@ index 0000000000000..e2ecbd3bca778 + } + + if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) { ++ bch2_trans_begin(trans); + ret = 0; + goto retry; + } + + bch2_trans_iter_exit(trans, &iter); ++ ++ if (!ret && trans_was_restarted(trans, restart_count)) ++ ret = -BCH_ERR_transaction_restart_nested; + return ret; +} + @@ -30975,9 +31480,12 @@ index 0000000000000..e2ecbd3bca778 + unsigned update_flags, + u64 *journal_seq) +{ -+ return bch2_trans_do(c, NULL, journal_seq, 0, -+ bch2_btree_delete_range_trans(&trans, id, start, end, -+ update_flags, journal_seq)); ++ int ret = bch2_trans_run(c, ++ bch2_btree_delete_range_trans(&trans, id, start, end, ++ update_flags, journal_seq)); ++ if (ret == -BCH_ERR_transaction_restart_nested) ++ ret = 0; ++ return ret; +} + +int bch2_trans_log_msg(struct btree_trans *trans, const char *msg) @@ -31008,7 +31516,7 @@ index 0000000000000..e2ecbd3bca778 +} diff --git a/fs/bcachefs/buckets.c b/fs/bcachefs/buckets.c new file mode 100644 -index 0000000000000..b4be2122c2d5e +index 000000000000..b4be2122c2d5 --- /dev/null +++ b/fs/bcachefs/buckets.c @@ -0,0 +1,2113 @@ @@ -33127,7 +33635,7 @@ index 0000000000000..b4be2122c2d5e +} diff --git a/fs/bcachefs/buckets.h b/fs/bcachefs/buckets.h new file mode 100644 -index 0000000000000..6881502d95f1a +index 000000000000..6881502d95f1 --- /dev/null +++ b/fs/bcachefs/buckets.h @@ -0,0 +1,300 @@ @@ -33433,7 +33941,7 @@ index 0000000000000..6881502d95f1a +#endif /* _BUCKETS_H */ diff --git a/fs/bcachefs/buckets_types.h b/fs/bcachefs/buckets_types.h new file mode 100644 -index 0000000000000..1dbba7d906dd8 +index 000000000000..1dbba7d906dd --- /dev/null +++ b/fs/bcachefs/buckets_types.h @@ -0,0 +1,103 @@ @@ -33542,7 +34050,7 @@ index 0000000000000..1dbba7d906dd8 +#endif /* _BUCKETS_TYPES_H */ diff --git a/fs/bcachefs/buckets_waiting_for_journal.c b/fs/bcachefs/buckets_waiting_for_journal.c new file mode 100644 -index 0000000000000..2e5b955080de4 +index 000000000000..2e5b955080de --- /dev/null +++ b/fs/bcachefs/buckets_waiting_for_journal.c @@ -0,0 +1,167 @@ @@ -33715,7 +34223,7 @@ index 0000000000000..2e5b955080de4 +} diff --git a/fs/bcachefs/buckets_waiting_for_journal.h b/fs/bcachefs/buckets_waiting_for_journal.h new file mode 100644 -index 0000000000000..d2ae19cbe18c4 +index 000000000000..d2ae19cbe18c --- /dev/null +++ b/fs/bcachefs/buckets_waiting_for_journal.h @@ -0,0 +1,15 @@ @@ -33736,7 +34244,7 @@ index 0000000000000..d2ae19cbe18c4 +#endif /* _BUCKETS_WAITING_FOR_JOURNAL_H */ diff --git a/fs/bcachefs/buckets_waiting_for_journal_types.h b/fs/bcachefs/buckets_waiting_for_journal_types.h new file mode 100644 -index 0000000000000..fea7f944d0ed3 +index 000000000000..fea7f944d0ed --- /dev/null +++ b/fs/bcachefs/buckets_waiting_for_journal_types.h @@ -0,0 +1,23 @@ @@ -33765,7 +34273,7 @@ index 0000000000000..fea7f944d0ed3 +#endif /* _BUCKETS_WAITING_FOR_JOURNAL_TYPES_H */ diff --git a/fs/bcachefs/chardev.c b/fs/bcachefs/chardev.c new file mode 100644 -index 0000000000000..dbb7e5e0b35b8 +index 000000000000..dbb7e5e0b35b --- /dev/null +++ b/fs/bcachefs/chardev.c @@ -0,0 +1,760 @@ @@ -34531,7 +35039,7 @@ index 0000000000000..dbb7e5e0b35b8 +#endif /* NO_BCACHEFS_CHARDEV */ diff --git a/fs/bcachefs/chardev.h b/fs/bcachefs/chardev.h new file mode 100644 -index 0000000000000..3a4890d39ff98 +index 000000000000..3a4890d39ff9 --- /dev/null +++ b/fs/bcachefs/chardev.h @@ -0,0 +1,31 @@ @@ -34568,7 +35076,7 @@ index 0000000000000..3a4890d39ff98 +#endif /* _BCACHEFS_CHARDEV_H */ diff --git a/fs/bcachefs/checksum.c b/fs/bcachefs/checksum.c new file mode 100644 -index 0000000000000..b5850a761b910 +index 000000000000..b5850a761b91 --- /dev/null +++ b/fs/bcachefs/checksum.c @@ -0,0 +1,712 @@ @@ -35286,7 +35794,7 @@ index 0000000000000..b5850a761b910 +} diff --git a/fs/bcachefs/checksum.h b/fs/bcachefs/checksum.h new file mode 100644 -index 0000000000000..c86c3c05d6205 +index 000000000000..c86c3c05d620 --- /dev/null +++ b/fs/bcachefs/checksum.h @@ -0,0 +1,204 @@ @@ -35496,7 +36004,7 @@ index 0000000000000..c86c3c05d6205 +#endif /* _BCACHEFS_CHECKSUM_H */ diff --git a/fs/bcachefs/clock.c b/fs/bcachefs/clock.c new file mode 100644 -index 0000000000000..f3ffdbc38485b +index 000000000000..f3ffdbc38485 --- /dev/null +++ b/fs/bcachefs/clock.c @@ -0,0 +1,191 @@ @@ -35693,7 +36201,7 @@ index 0000000000000..f3ffdbc38485b +} diff --git a/fs/bcachefs/clock.h b/fs/bcachefs/clock.h new file mode 100644 -index 0000000000000..70a0f7436c844 +index 000000000000..70a0f7436c84 --- /dev/null +++ b/fs/bcachefs/clock.h @@ -0,0 +1,38 @@ @@ -35737,7 +36245,7 @@ index 0000000000000..70a0f7436c844 +#endif /* _BCACHEFS_CLOCK_H */ diff --git a/fs/bcachefs/clock_types.h b/fs/bcachefs/clock_types.h new file mode 100644 -index 0000000000000..5fae0012d808f +index 000000000000..5fae0012d808 --- /dev/null +++ b/fs/bcachefs/clock_types.h @@ -0,0 +1,37 @@ @@ -35780,7 +36288,7 @@ index 0000000000000..5fae0012d808f +#endif /* _BCACHEFS_CLOCK_TYPES_H */ diff --git a/fs/bcachefs/compress.c b/fs/bcachefs/compress.c new file mode 100644 -index 0000000000000..f692f35a6a98e +index 000000000000..f692f35a6a98 --- /dev/null +++ b/fs/bcachefs/compress.c @@ -0,0 +1,639 @@ @@ -36425,7 +36933,7 @@ index 0000000000000..f692f35a6a98e +} diff --git a/fs/bcachefs/compress.h b/fs/bcachefs/compress.h new file mode 100644 -index 0000000000000..4bab1f61b3b50 +index 000000000000..4bab1f61b3b5 --- /dev/null +++ b/fs/bcachefs/compress.h @@ -0,0 +1,18 @@ @@ -36449,7 +36957,7 @@ index 0000000000000..4bab1f61b3b50 +#endif /* _BCACHEFS_COMPRESS_H */ diff --git a/fs/bcachefs/counters.c b/fs/bcachefs/counters.c new file mode 100644 -index 0000000000000..745f856e6d3e9 +index 000000000000..745f856e6d3e --- /dev/null +++ b/fs/bcachefs/counters.c @@ -0,0 +1,107 @@ @@ -36562,7 +37070,7 @@ index 0000000000000..745f856e6d3e9 +}; diff --git a/fs/bcachefs/counters.h b/fs/bcachefs/counters.h new file mode 100644 -index 0000000000000..4778aa19bf346 +index 000000000000..4778aa19bf34 --- /dev/null +++ b/fs/bcachefs/counters.h @@ -0,0 +1,17 @@ @@ -36585,7 +37093,7 @@ index 0000000000000..4778aa19bf346 +#endif // _BCACHEFS_COUNTERS_H diff --git a/fs/bcachefs/darray.h b/fs/bcachefs/darray.h new file mode 100644 -index 0000000000000..519ab9b96e67f +index 000000000000..519ab9b96e67 --- /dev/null +++ b/fs/bcachefs/darray.h @@ -0,0 +1,77 @@ @@ -36668,10 +37176,10 @@ index 0000000000000..519ab9b96e67f +#endif /* _BCACHEFS_DARRAY_H */ diff --git a/fs/bcachefs/data_update.c b/fs/bcachefs/data_update.c new file mode 100644 -index 0000000000000..3b442b01ca869 +index 000000000000..cb25efb68d3f --- /dev/null +++ b/fs/bcachefs/data_update.c -@@ -0,0 +1,376 @@ +@@ -0,0 +1,373 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include "bcachefs.h" @@ -36905,9 +37413,12 @@ index 0000000000000..3b442b01ca869 + m->data_opts.btree_insert_flags); + if (!ret) { + bch2_btree_iter_set_pos(&iter, next_pos); -+ atomic_long_inc(&c->extent_migrate_done); ++ + if (ec_ob) + bch2_ob_add_backpointer(c, ec_ob, &insert->k); ++ ++ this_cpu_add(c->counters[BCH_COUNTER_move_extent_finish], new->k.size); ++ trace_move_extent_finish(&new->k); + } +err: + if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) @@ -36922,22 +37433,16 @@ index 0000000000000..3b442b01ca869 + } + continue; +nomatch: -+ if (IS_ENABLED(CONFIG_BCACHEFS_DEBUG)) { -+ struct printbuf buf = PRINTBUF; -+ -+ bch2_bkey_val_to_text(&buf, c, old); -+ bch_info(c, "no match for %s", buf.buf); -+ printbuf_exit(&buf); -+ } -+ + if (m->ctxt) { + BUG_ON(k.k->p.offset <= iter.pos.offset); + atomic64_inc(&m->ctxt->stats->keys_raced); + atomic64_add(k.k->p.offset - iter.pos.offset, + &m->ctxt->stats->sectors_raced); + } -+ atomic_long_inc(&c->extent_migrate_raced); -+ trace_move_race(&new->k); ++ ++ this_cpu_add(c->counters[BCH_COUNTER_move_extent_race], new->k.size); ++ trace_move_extent_race(&new->k); ++ + bch2_btree_iter_advance(&iter); + goto next; + } @@ -37050,7 +37555,7 @@ index 0000000000000..3b442b01ca869 +} diff --git a/fs/bcachefs/data_update.h b/fs/bcachefs/data_update.h new file mode 100644 -index 0000000000000..e64505453a550 +index 000000000000..e64505453a55 --- /dev/null +++ b/fs/bcachefs/data_update.h @@ -0,0 +1,38 @@ @@ -37094,10 +37599,10 @@ index 0000000000000..e64505453a550 +#endif /* _BCACHEFS_DATA_UPDATE_H */ diff --git a/fs/bcachefs/debug.c b/fs/bcachefs/debug.c new file mode 100644 -index 0000000000000..cd37a1016e259 +index 000000000000..fb518d59a134 --- /dev/null +++ b/fs/bcachefs/debug.c -@@ -0,0 +1,764 @@ +@@ -0,0 +1,781 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Assorted bcachefs debug code @@ -37299,7 +37804,7 @@ index 0000000000000..cd37a1016e259 + ssize_t ret; /* bytes read so far */ +}; + -+static int flush_buf(struct dump_iter *i) ++static ssize_t flush_buf(struct dump_iter *i) +{ + if (i->buf.pos) { + size_t bytes = min_t(size_t, i->buf.pos, i->size); @@ -37315,7 +37820,7 @@ index 0000000000000..cd37a1016e259 + memmove(i->buf.buf, i->buf.buf + bytes, i->buf.pos); + } + -+ return 0; ++ return i->size ? 0 : i->ret; +} + +static int bch2_dump_open(struct inode *inode, struct file *file) @@ -37353,7 +37858,7 @@ index 0000000000000..cd37a1016e259 + struct btree_trans trans; + struct btree_iter iter; + struct bkey_s_c k; -+ int err; ++ ssize_t ret; + + i->ubuf = buf; + i->size = size; @@ -37361,14 +37866,11 @@ index 0000000000000..cd37a1016e259 + + bch2_trans_init(&trans, i->c, 0, 0); + -+ err = for_each_btree_key2(&trans, iter, i->id, i->from, ++ ret = for_each_btree_key2(&trans, iter, i->id, i->from, + BTREE_ITER_PREFETCH| + BTREE_ITER_ALL_SNAPSHOTS, k, ({ -+ err = flush_buf(i); -+ if (err) -+ break; -+ -+ if (!i->size) ++ ret = flush_buf(i); ++ if (ret) + break; + + bch2_bkey_val_to_text(&i->buf, i->c, k); @@ -37377,12 +37879,12 @@ index 0000000000000..cd37a1016e259 + })); + i->from = iter.pos; + -+ if (!err) -+ err = flush_buf(i); ++ if (!ret) ++ ret = flush_buf(i); + + bch2_trans_exit(&trans); + -+ return err ?: i->ret; ++ return ret ?: i->ret; +} + +static const struct file_operations btree_debug_ops = { @@ -37399,43 +37901,39 @@ index 0000000000000..cd37a1016e259 + struct btree_trans trans; + struct btree_iter iter; + struct btree *b; -+ int err; ++ ssize_t ret; + + i->ubuf = buf; + i->size = size; + i->ret = 0; + -+ err = flush_buf(i); -+ if (err) -+ return err; ++ ret = flush_buf(i); ++ if (ret) ++ return ret; + -+ if (!i->size || !bpos_cmp(SPOS_MAX, i->from)) ++ if (!bpos_cmp(SPOS_MAX, i->from)) + return i->ret; + + bch2_trans_init(&trans, i->c, 0, 0); + -+ for_each_btree_node(&trans, iter, i->id, i->from, 0, b, err) { -+ bch2_btree_node_to_text(&i->buf, i->c, b); -+ err = flush_buf(i); -+ if (err) ++ for_each_btree_node(&trans, iter, i->id, i->from, 0, b, ret) { ++ ret = flush_buf(i); ++ if (ret) + break; + -+ /* -+ * can't easily correctly restart a btree node traversal across -+ * all nodes, meh -+ */ ++ bch2_btree_node_to_text(&i->buf, i->c, b); + i->from = bpos_cmp(SPOS_MAX, b->key.k.p) + ? bpos_successor(b->key.k.p) + : b->key.k.p; -+ -+ if (!i->size) -+ break; + } + bch2_trans_iter_exit(&trans, &iter); + + bch2_trans_exit(&trans); + -+ return err < 0 ? err : i->ret; ++ if (!ret) ++ ret = flush_buf(i); ++ ++ return ret ?: i->ret; +} + +static const struct file_operations btree_format_debug_ops = { @@ -37452,33 +37950,27 @@ index 0000000000000..cd37a1016e259 + struct btree_trans trans; + struct btree_iter iter; + struct bkey_s_c k; -+ int err; ++ ssize_t ret; + + i->ubuf = buf; + i->size = size; + i->ret = 0; + -+ err = flush_buf(i); -+ if (err) -+ return err; -+ -+ if (!i->size) -+ return i->ret; ++ ret = flush_buf(i); ++ if (ret) ++ return ret; + + bch2_trans_init(&trans, i->c, 0, 0); + -+ err = for_each_btree_key2(&trans, iter, i->id, i->from, ++ ret = for_each_btree_key2(&trans, iter, i->id, i->from, + BTREE_ITER_PREFETCH| + BTREE_ITER_ALL_SNAPSHOTS, k, ({ + struct btree_path_level *l = &iter.path->l[0]; + struct bkey_packed *_k = + bch2_btree_node_iter_peek(&l->iter, l->b); + -+ err = flush_buf(i); -+ if (err) -+ break; -+ -+ if (!i->size) ++ ret = flush_buf(i); ++ if (ret) + break; + + if (bpos_cmp(l->b->key.k.p, i->prev_node) > 0) { @@ -37491,12 +37983,12 @@ index 0000000000000..cd37a1016e259 + })); + i->from = iter.pos; + -+ if (!err) -+ err = flush_buf(i); -+ + bch2_trans_exit(&trans); + -+ return err ?: i->ret; ++ if (!ret) ++ ret = flush_buf(i); ++ ++ return ret ?: i->ret; +} + +static const struct file_operations bfloat_failed_debug_ops = { @@ -37509,7 +38001,8 @@ index 0000000000000..cd37a1016e259 +static void bch2_cached_btree_node_to_text(struct printbuf *out, struct bch_fs *c, + struct btree *b) +{ -+ out->tabstops[0] = 32; ++ if (!out->nr_tabstops) ++ printbuf_tabstop_push(out, 32); + + prt_printf(out, "%px btree=%s l=%u ", + b, @@ -37566,7 +38059,7 @@ index 0000000000000..cd37a1016e259 + struct dump_iter *i = file->private_data; + struct bch_fs *c = i->c; + bool done = false; -+ int err; ++ ssize_t ret = 0; + + i->ubuf = buf; + i->size = size; @@ -37577,12 +38070,9 @@ index 0000000000000..cd37a1016e259 + struct rhash_head *pos; + struct btree *b; + -+ err = flush_buf(i); -+ if (err) -+ return err; -+ -+ if (!i->size) -+ break; ++ ret = flush_buf(i); ++ if (ret) ++ return ret; + + rcu_read_lock(); + i->buf.atomic++; @@ -37600,9 +38090,12 @@ index 0000000000000..cd37a1016e259 + } while (!done); + + if (i->buf.allocation_failure) -+ return -ENOMEM; ++ ret = -ENOMEM; + -+ return i->ret; ++ if (!ret) ++ ret = flush_buf(i); ++ ++ return ret ?: i->ret; +} + +static const struct file_operations cached_btree_nodes_ops = { @@ -37638,7 +38131,7 @@ index 0000000000000..cd37a1016e259 + struct dump_iter *i = file->private_data; + struct bch_fs *c = i->c; + struct btree_trans *trans; -+ int err; ++ ssize_t ret = 0; + + i->ubuf = buf; + i->size = size; @@ -37649,12 +38142,9 @@ index 0000000000000..cd37a1016e259 + if (trans->task->pid <= i->iter) + continue; + -+ err = flush_buf(i); -+ if (err) -+ return err; -+ -+ if (!i->size) -+ break; ++ ret = flush_buf(i); ++ if (ret) ++ return ret; + + bch2_btree_trans_to_text(&i->buf, trans); + @@ -37670,9 +38160,12 @@ index 0000000000000..cd37a1016e259 + mutex_unlock(&c->btree_trans_lock); + + if (i->buf.allocation_failure) -+ return -ENOMEM; ++ ret = -ENOMEM; + -+ return i->ret; ++ if (!ret) ++ ret = flush_buf(i); ++ ++ return ret ?: i->ret; +} + +static const struct file_operations btree_transactions_ops = { @@ -37751,14 +38244,16 @@ index 0000000000000..cd37a1016e259 + size_t size, loff_t *ppos) +{ + struct dump_iter *i = file->private_data; -+ struct lock_held_stats *lhs = &i->c->lock_held_stats; ++ struct bch_fs *c = i->c; + int err; + + i->ubuf = buf; + i->size = size; + i->ret = 0; + -+ while (lhs->names[i->iter] != 0 && i->iter < BCH_LOCK_TIME_NR) { ++ while (1) { ++ struct btree_transaction_stats *s = &c->btree_transaction_stats[i->iter]; ++ + err = flush_buf(i); + if (err) + return err; @@ -37766,11 +38261,40 @@ index 0000000000000..cd37a1016e259 + if (!i->size) + break; + -+ prt_printf(&i->buf, "%s:", lhs->names[i->iter]); ++ if (i->iter == ARRAY_SIZE(c->btree_transaction_fns) || ++ !c->btree_transaction_fns[i->iter]) ++ break; ++ ++ prt_printf(&i->buf, "%s: ", c->btree_transaction_fns[i->iter]); + prt_newline(&i->buf); -+ printbuf_indent_add(&i->buf, 8); -+ bch2_time_stats_to_text(&i->buf, &lhs->times[i->iter]); -+ printbuf_indent_sub(&i->buf, 8); ++ printbuf_indent_add(&i->buf, 2); ++ ++ mutex_lock(&s->lock); ++ ++ prt_printf(&i->buf, "Max mem used: %u", s->max_mem); ++ prt_newline(&i->buf); ++ ++ if (IS_ENABLED(CONFIG_BCACHEFS_LOCK_TIME_STATS)) { ++ prt_printf(&i->buf, "Lock hold times:"); ++ prt_newline(&i->buf); ++ ++ printbuf_indent_add(&i->buf, 2); ++ bch2_time_stats_to_text(&i->buf, &s->lock_hold_times); ++ printbuf_indent_sub(&i->buf, 2); ++ } ++ ++ if (s->max_paths_text) { ++ prt_printf(&i->buf, "Maximum allocated btree paths (%u):", s->nr_max_paths); ++ prt_newline(&i->buf); ++ ++ printbuf_indent_add(&i->buf, 2); ++ prt_str_indented(&i->buf, s->max_paths_text); ++ printbuf_indent_sub(&i->buf, 2); ++ } ++ ++ mutex_unlock(&s->lock); ++ ++ printbuf_indent_sub(&i->buf, 2); + prt_newline(&i->buf); + i->iter++; + } @@ -37816,10 +38340,8 @@ index 0000000000000..cd37a1016e259 + debugfs_create_file("journal_pins", 0400, c->fs_debug_dir, + c->btree_debug, &journal_pins_ops); + -+ if (IS_ENABLED(CONFIG_BCACHEFS_LOCK_TIME_STATS)) { -+ debugfs_create_file("lock_held_stats", 0400, c->fs_debug_dir, -+ c, &lock_held_stats_op); -+ } ++ debugfs_create_file("btree_transaction_stats", 0400, c->fs_debug_dir, ++ c, &lock_held_stats_op); + + c->btree_debug_dir = debugfs_create_dir("btrees", c->fs_debug_dir); + if (IS_ERR_OR_NULL(c->btree_debug_dir)) @@ -37864,7 +38386,7 @@ index 0000000000000..cd37a1016e259 +} diff --git a/fs/bcachefs/debug.h b/fs/bcachefs/debug.h new file mode 100644 -index 0000000000000..0b86736e5e1be +index 000000000000..0b86736e5e1b --- /dev/null +++ b/fs/bcachefs/debug.h @@ -0,0 +1,30 @@ @@ -37900,7 +38422,7 @@ index 0000000000000..0b86736e5e1be +#endif /* _BCACHEFS_DEBUG_H */ diff --git a/fs/bcachefs/dirent.c b/fs/bcachefs/dirent.c new file mode 100644 -index 0000000000000..4d942d224a088 +index 000000000000..4d942d224a08 --- /dev/null +++ b/fs/bcachefs/dirent.c @@ -0,0 +1,565 @@ @@ -38471,7 +38993,7 @@ index 0000000000000..4d942d224a088 +} diff --git a/fs/bcachefs/dirent.h b/fs/bcachefs/dirent.h new file mode 100644 -index 0000000000000..b1466932c7687 +index 000000000000..b1466932c768 --- /dev/null +++ b/fs/bcachefs/dirent.h @@ -0,0 +1,67 @@ @@ -38544,10 +39066,10 @@ index 0000000000000..b1466932c7687 +#endif /* _BCACHEFS_DIRENT_H */ diff --git a/fs/bcachefs/disk_groups.c b/fs/bcachefs/disk_groups.c new file mode 100644 -index 0000000000000..7bd4413671d22 +index 000000000000..22b6b841d836 --- /dev/null +++ b/fs/bcachefs/disk_groups.c -@@ -0,0 +1,506 @@ +@@ -0,0 +1,508 @@ +// SPDX-License-Identifier: GPL-2.0 +#include "bcachefs.h" +#include "disk_groups.h" @@ -38934,32 +39456,34 @@ index 0000000000000..7bd4413671d22 + prt_printf(out, "invalid label %u", v); +} + -+int bch2_dev_group_set(struct bch_fs *c, struct bch_dev *ca, const char *name) ++int __bch2_dev_group_set(struct bch_fs *c, struct bch_dev *ca, const char *name) +{ + struct bch_member *mi; -+ int v = -1; -+ int ret = 0; -+ -+ mutex_lock(&c->sb_lock); ++ int ret, v = -1; + + if (!strlen(name) || !strcmp(name, "none")) -+ goto write_sb; ++ return 0; + + v = bch2_disk_path_find_or_create(&c->disk_sb, name); -+ if (v < 0) { -+ mutex_unlock(&c->sb_lock); ++ if (v < 0) + return v; -+ } + + ret = bch2_sb_disk_groups_to_cpu(c); + if (ret) -+ goto unlock; -+write_sb: ++ return ret; ++ + mi = &bch2_sb_get_members(c->disk_sb.sb)->members[ca->dev_idx]; + SET_BCH_MEMBER_GROUP(mi, v + 1); ++ return 0; ++} + -+ bch2_write_super(c); -+unlock: ++int bch2_dev_group_set(struct bch_fs *c, struct bch_dev *ca, const char *name) ++{ ++ int ret; ++ ++ mutex_lock(&c->sb_lock); ++ ret = __bch2_dev_group_set(c, ca, name) ?: ++ bch2_write_super(c); + mutex_unlock(&c->sb_lock); + + return ret; @@ -39056,10 +39580,10 @@ index 0000000000000..7bd4413671d22 +} diff --git a/fs/bcachefs/disk_groups.h b/fs/bcachefs/disk_groups.h new file mode 100644 -index 0000000000000..de915480514b1 +index 000000000000..e4470c357a66 --- /dev/null +++ b/fs/bcachefs/disk_groups.h -@@ -0,0 +1,90 @@ +@@ -0,0 +1,91 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _BCACHEFS_DISK_GROUPS_H +#define _BCACHEFS_DISK_GROUPS_H @@ -39144,6 +39668,7 @@ index 0000000000000..de915480514b1 + +int bch2_sb_disk_groups_to_cpu(struct bch_fs *); + ++int __bch2_dev_group_set(struct bch_fs *, struct bch_dev *, const char *); +int bch2_dev_group_set(struct bch_fs *, struct bch_dev *, const char *); + +const char *bch2_sb_validate_disk_groups(struct bch_sb *, @@ -39152,7 +39677,7 @@ index 0000000000000..de915480514b1 +#endif /* _BCACHEFS_DISK_GROUPS_H */ diff --git a/fs/bcachefs/ec.c b/fs/bcachefs/ec.c new file mode 100644 -index 0000000000000..f33acf1af1109 +index 000000000000..f33acf1af110 --- /dev/null +++ b/fs/bcachefs/ec.c @@ -0,0 +1,1673 @@ @@ -40831,7 +41356,7 @@ index 0000000000000..f33acf1af1109 +} diff --git a/fs/bcachefs/ec.h b/fs/bcachefs/ec.h new file mode 100644 -index 0000000000000..a4c13d61af109 +index 000000000000..a4c13d61af10 --- /dev/null +++ b/fs/bcachefs/ec.h @@ -0,0 +1,230 @@ @@ -41067,7 +41592,7 @@ index 0000000000000..a4c13d61af109 +#endif /* _BCACHEFS_EC_H */ diff --git a/fs/bcachefs/ec_types.h b/fs/bcachefs/ec_types.h new file mode 100644 -index 0000000000000..edd93da663c1f +index 000000000000..edd93da663c1 --- /dev/null +++ b/fs/bcachefs/ec_types.h @@ -0,0 +1,46 @@ @@ -41119,7 +41644,7 @@ index 0000000000000..edd93da663c1f +#endif /* _BCACHEFS_EC_TYPES_H */ diff --git a/fs/bcachefs/errcode.c b/fs/bcachefs/errcode.c new file mode 100644 -index 0000000000000..9da8a5973af06 +index 000000000000..9da8a5973af0 --- /dev/null +++ b/fs/bcachefs/errcode.c @@ -0,0 +1,51 @@ @@ -41176,10 +41701,10 @@ index 0000000000000..9da8a5973af06 +} diff --git a/fs/bcachefs/errcode.h b/fs/bcachefs/errcode.h new file mode 100644 -index 0000000000000..6dd2152e782e8 +index 000000000000..232f7c7999f6 --- /dev/null +++ b/fs/bcachefs/errcode.h -@@ -0,0 +1,65 @@ +@@ -0,0 +1,75 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _BCACHEFS_ERRCODE_H +#define _BCACHEFS_ERRCODE_H @@ -41210,6 +41735,16 @@ index 0000000000000..6dd2152e782e8 + x(transaction_restart, transaction_restart_key_cache_realloced)\ + x(transaction_restart, transaction_restart_journal_preres_get) \ + x(transaction_restart, transaction_restart_nested) \ ++ x(0, no_btree_node) \ ++ x(no_btree_node, no_btree_node_relock) \ ++ x(no_btree_node, no_btree_node_upgrade) \ ++ x(no_btree_node, no_btree_node_drop) \ ++ x(no_btree_node, no_btree_node_lock_root) \ ++ x(no_btree_node, no_btree_node_up) \ ++ x(no_btree_node, no_btree_node_down) \ ++ x(no_btree_node, no_btree_node_init) \ ++ x(no_btree_node, no_btree_node_cached) \ ++ x(0, backpointer_to_overwritten_btree_node) \ + x(0, lock_fail_node_reused) \ + x(0, lock_fail_root_changed) \ + x(0, journal_reclaim_would_deadlock) \ @@ -41247,7 +41782,7 @@ index 0000000000000..6dd2152e782e8 +#endif /* _BCACHFES_ERRCODE_H */ diff --git a/fs/bcachefs/error.c b/fs/bcachefs/error.c new file mode 100644 -index 0000000000000..f6a895b2ceb7d +index 000000000000..f6a895b2ceb7 --- /dev/null +++ b/fs/bcachefs/error.c @@ -0,0 +1,184 @@ @@ -41437,7 +41972,7 @@ index 0000000000000..f6a895b2ceb7d +} diff --git a/fs/bcachefs/error.h b/fs/bcachefs/error.h new file mode 100644 -index 0000000000000..b603d738c5492 +index 000000000000..b603d738c549 --- /dev/null +++ b/fs/bcachefs/error.h @@ -0,0 +1,223 @@ @@ -41666,7 +42201,7 @@ index 0000000000000..b603d738c5492 +#endif /* _BCACHEFS_ERROR_H */ diff --git a/fs/bcachefs/extent_update.c b/fs/bcachefs/extent_update.c new file mode 100644 -index 0000000000000..2fd5d9672a442 +index 000000000000..2fd5d9672a44 --- /dev/null +++ b/fs/bcachefs/extent_update.c @@ -0,0 +1,178 @@ @@ -41850,7 +42385,7 @@ index 0000000000000..2fd5d9672a442 +} diff --git a/fs/bcachefs/extent_update.h b/fs/bcachefs/extent_update.h new file mode 100644 -index 0000000000000..6f5cf449361a7 +index 000000000000..6f5cf449361a --- /dev/null +++ b/fs/bcachefs/extent_update.h @@ -0,0 +1,12 @@ @@ -41868,7 +42403,7 @@ index 0000000000000..6f5cf449361a7 +#endif /* _BCACHEFS_EXTENT_UPDATE_H */ diff --git a/fs/bcachefs/extents.c b/fs/bcachefs/extents.c new file mode 100644 -index 0000000000000..2ca13014b9c44 +index 000000000000..2ca13014b9c4 --- /dev/null +++ b/fs/bcachefs/extents.c @@ -0,0 +1,1324 @@ @@ -43198,7 +43733,7 @@ index 0000000000000..2ca13014b9c44 +} diff --git a/fs/bcachefs/extents.h b/fs/bcachefs/extents.h new file mode 100644 -index 0000000000000..3c17b81130bbf +index 000000000000..3c17b81130bb --- /dev/null +++ b/fs/bcachefs/extents.h @@ -0,0 +1,685 @@ @@ -43889,7 +44424,7 @@ index 0000000000000..3c17b81130bbf +#endif /* _BCACHEFS_EXTENTS_H */ diff --git a/fs/bcachefs/extents_types.h b/fs/bcachefs/extents_types.h new file mode 100644 -index 0000000000000..43d6c341eccab +index 000000000000..43d6c341ecca --- /dev/null +++ b/fs/bcachefs/extents_types.h @@ -0,0 +1,40 @@ @@ -43935,7 +44470,7 @@ index 0000000000000..43d6c341eccab +#endif /* _BCACHEFS_EXTENTS_TYPES_H */ diff --git a/fs/bcachefs/eytzinger.h b/fs/bcachefs/eytzinger.h new file mode 100644 -index 0000000000000..05429c9631cda +index 000000000000..05429c9631cd --- /dev/null +++ b/fs/bcachefs/eytzinger.h @@ -0,0 +1,281 @@ @@ -44222,7 +44757,7 @@ index 0000000000000..05429c9631cda +#endif /* _EYTZINGER_H */ diff --git a/fs/bcachefs/fifo.h b/fs/bcachefs/fifo.h new file mode 100644 -index 0000000000000..cdb272708a4bd +index 000000000000..cdb272708a4b --- /dev/null +++ b/fs/bcachefs/fifo.h @@ -0,0 +1,127 @@ @@ -44355,7 +44890,7 @@ index 0000000000000..cdb272708a4bd +#endif /* _BCACHEFS_FIFO_H */ diff --git a/fs/bcachefs/fs-common.c b/fs/bcachefs/fs-common.c new file mode 100644 -index 0000000000000..53ffc684223cf +index 000000000000..53ffc684223c --- /dev/null +++ b/fs/bcachefs/fs-common.c @@ -0,0 +1,496 @@ @@ -44857,7 +45392,7 @@ index 0000000000000..53ffc684223cf +} diff --git a/fs/bcachefs/fs-common.h b/fs/bcachefs/fs-common.h new file mode 100644 -index 0000000000000..dde2378595143 +index 000000000000..dde237859514 --- /dev/null +++ b/fs/bcachefs/fs-common.h @@ -0,0 +1,43 @@ @@ -44906,7 +45441,7 @@ index 0000000000000..dde2378595143 +#endif /* _BCACHEFS_FS_COMMON_H */ diff --git a/fs/bcachefs/fs-io.c b/fs/bcachefs/fs-io.c new file mode 100644 -index 0000000000000..0a7f172f11c64 +index 000000000000..0a7f172f11c6 --- /dev/null +++ b/fs/bcachefs/fs-io.c @@ -0,0 +1,3492 @@ @@ -48404,7 +48939,7 @@ index 0000000000000..0a7f172f11c64 +#endif /* NO_BCACHEFS_FS */ diff --git a/fs/bcachefs/fs-io.h b/fs/bcachefs/fs-io.h new file mode 100644 -index 0000000000000..a22a4e95731be +index 000000000000..a22a4e95731b --- /dev/null +++ b/fs/bcachefs/fs-io.h @@ -0,0 +1,56 @@ @@ -48466,7 +49001,7 @@ index 0000000000000..a22a4e95731be +#endif /* _BCACHEFS_FS_IO_H */ diff --git a/fs/bcachefs/fs-ioctl.c b/fs/bcachefs/fs-ioctl.c new file mode 100644 -index 0000000000000..9f329a624c127 +index 000000000000..9f329a624c12 --- /dev/null +++ b/fs/bcachefs/fs-ioctl.c @@ -0,0 +1,523 @@ @@ -48995,7 +49530,7 @@ index 0000000000000..9f329a624c127 +#endif /* NO_BCACHEFS_FS */ diff --git a/fs/bcachefs/fs-ioctl.h b/fs/bcachefs/fs-ioctl.h new file mode 100644 -index 0000000000000..f201980ef2c38 +index 000000000000..f201980ef2c3 --- /dev/null +++ b/fs/bcachefs/fs-ioctl.h @@ -0,0 +1,81 @@ @@ -49082,7 +49617,7 @@ index 0000000000000..f201980ef2c38 +#endif /* _BCACHEFS_FS_IOCTL_H */ diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c new file mode 100644 -index 0000000000000..3e2b6097819b1 +index 000000000000..3e2b6097819b --- /dev/null +++ b/fs/bcachefs/fs.c @@ -0,0 +1,1939 @@ @@ -51027,7 +51562,7 @@ index 0000000000000..3e2b6097819b1 +#endif /* NO_BCACHEFS_FS */ diff --git a/fs/bcachefs/fs.h b/fs/bcachefs/fs.h new file mode 100644 -index 0000000000000..9f4b57e30e2a7 +index 000000000000..9f4b57e30e2a --- /dev/null +++ b/fs/bcachefs/fs.h @@ -0,0 +1,208 @@ @@ -51241,10 +51776,10 @@ index 0000000000000..9f4b57e30e2a7 +#endif /* _BCACHEFS_FS_H */ diff --git a/fs/bcachefs/fsck.c b/fs/bcachefs/fsck.c new file mode 100644 -index 0000000000000..bb8cab7cb405f +index 000000000000..12f2ef4417cb --- /dev/null +++ b/fs/bcachefs/fsck.c -@@ -0,0 +1,2390 @@ +@@ -0,0 +1,2395 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include "bcachefs.h" @@ -51266,6 +51801,10 @@ index 0000000000000..bb8cab7cb405f + +#define QSTR(n) { { { .len = strlen(n) } }, .name = n } + ++/* ++ * XXX: this is handling transaction restarts without returning ++ * -BCH_ERR_transaction_restart_nested, this is not how we do things anymore: ++ */ +static s64 bch2_count_inode_sectors(struct btree_trans *trans, u64 inum, + u32 snapshot) +{ @@ -51486,18 +52025,20 @@ index 0000000000000..bb8cab7cb405f + struct bkey_s_c k; + int ret; + -+ ret = bch2_btree_delete_range_trans(trans, BTREE_ID_extents, -+ SPOS(inum, 0, snapshot), -+ SPOS(inum, U64_MAX, snapshot), -+ 0, NULL) ?: -+ bch2_btree_delete_range_trans(trans, BTREE_ID_dirents, -+ SPOS(inum, 0, snapshot), -+ SPOS(inum, U64_MAX, snapshot), -+ 0, NULL) ?: -+ bch2_btree_delete_range_trans(trans, BTREE_ID_xattrs, -+ SPOS(inum, 0, snapshot), -+ SPOS(inum, U64_MAX, snapshot), -+ 0, NULL); ++ do { ++ ret = bch2_btree_delete_range_trans(trans, BTREE_ID_extents, ++ SPOS(inum, 0, snapshot), ++ SPOS(inum, U64_MAX, snapshot), ++ 0, NULL) ?: ++ bch2_btree_delete_range_trans(trans, BTREE_ID_dirents, ++ SPOS(inum, 0, snapshot), ++ SPOS(inum, U64_MAX, snapshot), ++ 0, NULL) ?: ++ bch2_btree_delete_range_trans(trans, BTREE_ID_xattrs, ++ SPOS(inum, 0, snapshot), ++ SPOS(inum, U64_MAX, snapshot), ++ 0, NULL); ++ } while (ret == -BCH_ERR_transaction_restart_nested); + if (ret) + goto err; +retry: @@ -51537,7 +52078,7 @@ index 0000000000000..bb8cab7cb405f + if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) + goto retry; + -+ return ret; ++ return ret ?: -BCH_ERR_transaction_restart_nested; +} + +static int __remove_dirent(struct btree_trans *trans, struct bpos pos) @@ -51766,7 +52307,7 @@ index 0000000000000..bb8cab7cb405f + .id = pos.snapshot, + .equiv = bch2_snapshot_equiv(c, pos.snapshot), + }; -+ int ret; ++ int ret = 0; + + if (bkey_cmp(s->pos, pos)) + s->ids.nr = 0; @@ -51776,14 +52317,13 @@ index 0000000000000..bb8cab7cb405f + + darray_for_each(s->ids, i) + if (i->equiv == n.equiv) { -+ if (i->id != n.id) { -+ bch_err(c, "snapshot deletion did not run correctly:\n" ++ if (fsck_err_on(i->id != n.id, c, ++ "snapshot deletion did not run correctly:\n" + " duplicate keys in btree %s at %llu:%llu snapshots %u, %u (equiv %u)\n", + bch2_btree_ids[btree_id], + pos.inode, pos.offset, -+ i->id, n.id, n.equiv); ++ i->id, n.id, n.equiv)) + return -BCH_ERR_need_snapshot_cleanup; -+ } + + return 0; + } @@ -51792,6 +52332,7 @@ index 0000000000000..bb8cab7cb405f + if (ret) + bch_err(c, "error reallocating snapshots_seen table (size %zu)", + s->ids.size); ++fsck_err: + return ret; +} + @@ -51969,7 +52510,7 @@ index 0000000000000..bb8cab7cb405f + + w->inodes.nr = 0; + -+ for_each_btree_key(trans, iter, BTREE_ID_inodes, POS(0, inum), ++ for_each_btree_key_norestart(trans, iter, BTREE_ID_inodes, POS(0, inum), + BTREE_ITER_ALL_SNAPSHOTS, k, ret) { + u32 equiv = bch2_snapshot_equiv(c, k.k->p.snapshot); + @@ -52013,9 +52554,6 @@ index 0000000000000..bb8cab7cb405f + struct bch_hash_info *hash_info, + struct btree_iter *k_iter, struct bkey_s_c k) +{ -+ bch_err(trans->c, "hash_redo_key() not implemented yet"); -+ return -EINVAL; -+#if 0 + struct bkey_i *delete; + struct bkey_i *tmp; + @@ -52033,8 +52571,14 @@ index 0000000000000..bb8cab7cb405f + delete->k.p = k_iter->pos; + return bch2_btree_iter_traverse(k_iter) ?: + bch2_trans_update(trans, k_iter, delete, 0) ?: -+ bch2_hash_set(trans, desc, hash_info, k_iter->pos.inode, tmp, 0); -+#endif ++ bch2_hash_set_snapshot(trans, desc, hash_info, ++ (subvol_inum) { 0, k.k->p.inode }, ++ k.k->p.snapshot, tmp, ++ BCH_HASH_SET_MUST_CREATE, ++ BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE) ?: ++ bch2_trans_commit(trans, NULL, NULL, ++ BTREE_INSERT_NOFAIL| ++ BTREE_INSERT_LAZY_RW); +} + +static int hash_check_key(struct btree_trans *trans, @@ -52161,7 +52705,7 @@ index 0000000000000..bb8cab7cb405f + bch2_fs_lazy_rw(c); + + ret = fsck_inode_rm(trans, u.bi_inum, iter->pos.snapshot); -+ if (ret) ++ if (ret && !bch2_err_matches(ret, BCH_ERR_transaction_restart)) + bch_err(c, "error in fsck: error while deleting inode: %s", + bch2_err_str(ret)); + return ret; @@ -52396,13 +52940,11 @@ index 0000000000000..bb8cab7cb405f + } + } +fsck_err: -+ if (ret) { ++ if (ret) + bch_err(c, "error from check_i_sectors(): %s", bch2_err_str(ret)); -+ return ret; -+ } -+ if (trans_was_restarted(trans, restart_count)) -+ return -BCH_ERR_transaction_restart_nested; -+ return 0; ++ if (!ret && trans_was_restarted(trans, restart_count)) ++ ret = -BCH_ERR_transaction_restart_nested; ++ return ret; +} + +static int check_extent(struct btree_trans *trans, struct btree_iter *iter, @@ -52615,13 +53157,11 @@ index 0000000000000..bb8cab7cb405f + } + } +fsck_err: -+ if (ret) { ++ if (ret) + bch_err(c, "error from check_subdir_count(): %s", bch2_err_str(ret)); -+ return ret; -+ } -+ if (trans_was_restarted(trans, restart_count)) -+ return -BCH_ERR_transaction_restart_nested; -+ return 0; ++ if (!ret && trans_was_restarted(trans, restart_count)) ++ ret = -BCH_ERR_transaction_restart_nested; ++ return ret; +} + +static int check_dirent_target(struct btree_trans *trans, @@ -53637,7 +54177,7 @@ index 0000000000000..bb8cab7cb405f +} diff --git a/fs/bcachefs/fsck.h b/fs/bcachefs/fsck.h new file mode 100644 -index 0000000000000..264f2706b12d4 +index 000000000000..264f2706b12d --- /dev/null +++ b/fs/bcachefs/fsck.h @@ -0,0 +1,8 @@ @@ -53651,7 +54191,7 @@ index 0000000000000..264f2706b12d4 +#endif /* _BCACHEFS_FSCK_H */ diff --git a/fs/bcachefs/inode.c b/fs/bcachefs/inode.c new file mode 100644 -index 0000000000000..0831060067473 +index 000000000000..083106006747 --- /dev/null +++ b/fs/bcachefs/inode.c @@ -0,0 +1,771 @@ @@ -54428,7 +54968,7 @@ index 0000000000000..0831060067473 +} diff --git a/fs/bcachefs/inode.h b/fs/bcachefs/inode.h new file mode 100644 -index 0000000000000..2ac2fc10513bb +index 000000000000..2ac2fc10513b --- /dev/null +++ b/fs/bcachefs/inode.h @@ -0,0 +1,189 @@ @@ -54623,7 +55163,7 @@ index 0000000000000..2ac2fc10513bb +#endif /* _BCACHEFS_INODE_H */ diff --git a/fs/bcachefs/io.c b/fs/bcachefs/io.c new file mode 100644 -index 0000000000000..c4523b14293ad +index 000000000000..a683a6899873 --- /dev/null +++ b/fs/bcachefs/io.c @@ -0,0 +1,2423 @@ @@ -55220,7 +55760,7 @@ index 0000000000000..c4523b14293ad + + if (ret) { + bch_err_inum_ratelimited(c, op->pos.inode, -+ "write error %i from btree update", ret); ++ "write error while doing btree update: %s", bch2_err_str(ret)); + op->error = ret; + } + } @@ -56016,7 +56556,7 @@ index 0000000000000..c4523b14293ad + struct closure *cl = &op->cl; + struct bio *bio = &op->write.op.wbio.bio; + -+ trace_promote(&rbio->bio); ++ trace_and_count(op->write.op.c, read_promote, &rbio->bio); + + /* we now own pages: */ + BUG_ON(!rbio->bounce); @@ -56282,7 +56822,7 @@ index 0000000000000..c4523b14293ad + }; + struct bch_io_failures failed = { .nr = 0 }; + -+ trace_read_retry(&rbio->bio); ++ trace_and_count(c, read_retry, &rbio->bio); + + if (rbio->retry == READ_RETRY_AVOID) + bch2_mark_io_failure(&failed, &rbio->pick); @@ -56538,7 +57078,7 @@ index 0000000000000..c4523b14293ad + + if (((rbio->flags & BCH_READ_RETRY_IF_STALE) && race_fault()) || + ptr_stale(ca, &rbio->pick.ptr)) { -+ atomic_long_inc(&c->read_realloc_races); ++ trace_and_count(c, read_reuse_race, &rbio->bio); + + if (rbio->flags & BCH_READ_RETRY_IF_STALE) + bch2_rbio_error(rbio, READ_RETRY, BLK_STS_AGAIN); @@ -56826,7 +57366,7 @@ index 0000000000000..c4523b14293ad + rbio->bio.bi_end_io = bch2_read_endio; + + if (rbio->bounce) -+ trace_read_bounce(&rbio->bio); ++ trace_and_count(c, read_bounce, &rbio->bio); + + this_cpu_add(c->counters[BCH_COUNTER_io_read], bio_sectors(&rbio->bio)); + bch2_increment_clock(c, bio_sectors(&rbio->bio), READ); @@ -56841,7 +57381,7 @@ index 0000000000000..c4523b14293ad + + if (!(flags & (BCH_READ_IN_RETRY|BCH_READ_LAST_FRAGMENT))) { + bio_inc_remaining(&orig->bio); -+ trace_read_split(&orig->bio); ++ trace_and_count(c, read_split, &orig->bio); + } + + if (!rbio->pick.idx) { @@ -57052,7 +57592,7 @@ index 0000000000000..c4523b14293ad +} diff --git a/fs/bcachefs/io.h b/fs/bcachefs/io.h new file mode 100644 -index 0000000000000..fb5114518666c +index 000000000000..fb5114518666 --- /dev/null +++ b/fs/bcachefs/io.h @@ -0,0 +1,189 @@ @@ -57247,7 +57787,7 @@ index 0000000000000..fb5114518666c +#endif /* _BCACHEFS_IO_H */ diff --git a/fs/bcachefs/io_types.h b/fs/bcachefs/io_types.h new file mode 100644 -index 0000000000000..78bff13d36f27 +index 000000000000..78bff13d36f2 --- /dev/null +++ b/fs/bcachefs/io_types.h @@ -0,0 +1,161 @@ @@ -57414,10 +57954,10 @@ index 0000000000000..78bff13d36f27 +#endif /* _BCACHEFS_IO_TYPES_H */ diff --git a/fs/bcachefs/journal.c b/fs/bcachefs/journal.c new file mode 100644 -index 0000000000000..d77092aa069ee +index 000000000000..3e8972c22b5c --- /dev/null +++ b/fs/bcachefs/journal.c -@@ -0,0 +1,1432 @@ +@@ -0,0 +1,1433 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * bcachefs journalling code, for btree insertions @@ -57811,12 +58351,12 @@ index 0000000000000..d77092aa069ee + ret = journal_entry_open(j); + + if (ret == JOURNAL_ERR_max_in_flight) -+ trace_journal_entry_full(c); ++ trace_and_count(c, journal_entry_full, c); +unlock: + if ((ret && ret != JOURNAL_ERR_insufficient_devices) && + !j->res_get_blocked_start) { + j->res_get_blocked_start = local_clock() ?: 1; -+ trace_journal_full(c); ++ trace_and_count(c, journal_full, c); + } + + can_discard = j->can_discard; @@ -58675,8 +59215,9 @@ index 0000000000000..d77092aa069ee + u64 seq; + unsigned i; + ++ if (!out->nr_tabstops) ++ printbuf_tabstop_push(out, 24); + out->atomic++; -+ out->tabstops[0] = 24; + + rcu_read_lock(); + s = READ_ONCE(j->reservations); @@ -58852,7 +59393,7 @@ index 0000000000000..d77092aa069ee +} diff --git a/fs/bcachefs/journal.h b/fs/bcachefs/journal.h new file mode 100644 -index 0000000000000..d3caa7ea7ce94 +index 000000000000..d3caa7ea7ce9 --- /dev/null +++ b/fs/bcachefs/journal.h @@ -0,0 +1,521 @@ @@ -59379,7 +59920,7 @@ index 0000000000000..d3caa7ea7ce94 +#endif /* _BCACHEFS_JOURNAL_H */ diff --git a/fs/bcachefs/journal_io.c b/fs/bcachefs/journal_io.c new file mode 100644 -index 0000000000000..107521e10ff9f +index 000000000000..55b86cbd37c6 --- /dev/null +++ b/fs/bcachefs/journal_io.c @@ -0,0 +1,1735 @@ @@ -60936,7 +61477,7 @@ index 0000000000000..107521e10ff9f + + bch2_bio_map(bio, w->data, sectors << 9); + -+ trace_journal_write(bio); ++ trace_and_count(c, journal_write, bio); + closure_bio_submit(bio, cl); + + ca->journal.bucket_seq[ca->journal.cur_idx] = @@ -61120,7 +61661,7 @@ index 0000000000000..107521e10ff9f +} diff --git a/fs/bcachefs/journal_io.h b/fs/bcachefs/journal_io.h new file mode 100644 -index 0000000000000..30e995c81fc4d +index 000000000000..30e995c81fc4 --- /dev/null +++ b/fs/bcachefs/journal_io.h @@ -0,0 +1,59 @@ @@ -61185,10 +61726,10 @@ index 0000000000000..30e995c81fc4d +#endif /* _BCACHEFS_JOURNAL_IO_H */ diff --git a/fs/bcachefs/journal_reclaim.c b/fs/bcachefs/journal_reclaim.c new file mode 100644 -index 0000000000000..9f8b63b340f71 +index 000000000000..e69595bd1359 --- /dev/null +++ b/fs/bcachefs/journal_reclaim.c -@@ -0,0 +1,852 @@ +@@ -0,0 +1,853 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include "bcachefs.h" @@ -61832,7 +62373,8 @@ index 0000000000000..9f8b63b340f71 + + min_key_cache = min(bch2_nr_btree_keys_need_flush(c), (size_t) 128); + -+ trace_journal_reclaim_start(c, direct, kicked, ++ trace_and_count(c, journal_reclaim_start, c, ++ direct, kicked, + min_nr, min_key_cache, + j->prereserved.reserved, + j->prereserved.remaining, @@ -61848,7 +62390,7 @@ index 0000000000000..9f8b63b340f71 + j->nr_direct_reclaim += nr_flushed; + else + j->nr_background_reclaim += nr_flushed; -+ trace_journal_reclaim_finish(c, nr_flushed); ++ trace_and_count(c, journal_reclaim_finish, c, nr_flushed); + + if (nr_flushed) + wake_up(&j->reclaim_wait); @@ -62043,7 +62585,7 @@ index 0000000000000..9f8b63b340f71 +} diff --git a/fs/bcachefs/journal_reclaim.h b/fs/bcachefs/journal_reclaim.h new file mode 100644 -index 0000000000000..0fd1af120db55 +index 000000000000..0fd1af120db5 --- /dev/null +++ b/fs/bcachefs/journal_reclaim.h @@ -0,0 +1,86 @@ @@ -62135,7 +62677,7 @@ index 0000000000000..0fd1af120db55 +#endif /* _BCACHEFS_JOURNAL_RECLAIM_H */ diff --git a/fs/bcachefs/journal_sb.c b/fs/bcachefs/journal_sb.c new file mode 100644 -index 0000000000000..001cecec1291b +index 000000000000..001cecec1291 --- /dev/null +++ b/fs/bcachefs/journal_sb.c @@ -0,0 +1,220 @@ @@ -62361,7 +62903,7 @@ index 0000000000000..001cecec1291b +} diff --git a/fs/bcachefs/journal_sb.h b/fs/bcachefs/journal_sb.h new file mode 100644 -index 0000000000000..a39192e9f6f4c +index 000000000000..a39192e9f6f4 --- /dev/null +++ b/fs/bcachefs/journal_sb.h @@ -0,0 +1,24 @@ @@ -62391,7 +62933,7 @@ index 0000000000000..a39192e9f6f4c +int bch2_journal_buckets_to_sb(struct bch_fs *, struct bch_dev *); diff --git a/fs/bcachefs/journal_seq_blacklist.c b/fs/bcachefs/journal_seq_blacklist.c new file mode 100644 -index 0000000000000..5c555b3703c09 +index 000000000000..5c555b3703c0 --- /dev/null +++ b/fs/bcachefs/journal_seq_blacklist.c @@ -0,0 +1,322 @@ @@ -62719,7 +63261,7 @@ index 0000000000000..5c555b3703c09 +} diff --git a/fs/bcachefs/journal_seq_blacklist.h b/fs/bcachefs/journal_seq_blacklist.h new file mode 100644 -index 0000000000000..afb886ec8e254 +index 000000000000..afb886ec8e25 --- /dev/null +++ b/fs/bcachefs/journal_seq_blacklist.h @@ -0,0 +1,22 @@ @@ -62747,7 +63289,7 @@ index 0000000000000..afb886ec8e254 +#endif /* _BCACHEFS_JOURNAL_SEQ_BLACKLIST_H */ diff --git a/fs/bcachefs/journal_types.h b/fs/bcachefs/journal_types.h new file mode 100644 -index 0000000000000..a6cdb885ad410 +index 000000000000..a6cdb885ad41 --- /dev/null +++ b/fs/bcachefs/journal_types.h @@ -0,0 +1,340 @@ @@ -63093,7 +63635,7 @@ index 0000000000000..a6cdb885ad410 +#endif /* _BCACHEFS_JOURNAL_TYPES_H */ diff --git a/fs/bcachefs/keylist.c b/fs/bcachefs/keylist.c new file mode 100644 -index 0000000000000..cda77835b9ea6 +index 000000000000..cda77835b9ea --- /dev/null +++ b/fs/bcachefs/keylist.c @@ -0,0 +1,67 @@ @@ -63166,7 +63708,7 @@ index 0000000000000..cda77835b9ea6 +#endif diff --git a/fs/bcachefs/keylist.h b/fs/bcachefs/keylist.h new file mode 100644 -index 0000000000000..195799bb20bcb +index 000000000000..195799bb20bc --- /dev/null +++ b/fs/bcachefs/keylist.h @@ -0,0 +1,76 @@ @@ -63248,7 +63790,7 @@ index 0000000000000..195799bb20bcb +#endif /* _BCACHEFS_KEYLIST_H */ diff --git a/fs/bcachefs/keylist_types.h b/fs/bcachefs/keylist_types.h new file mode 100644 -index 0000000000000..4b3ff7d8a8756 +index 000000000000..4b3ff7d8a875 --- /dev/null +++ b/fs/bcachefs/keylist_types.h @@ -0,0 +1,16 @@ @@ -63270,7 +63812,7 @@ index 0000000000000..4b3ff7d8a8756 +#endif /* _BCACHEFS_KEYLIST_TYPES_H */ diff --git a/fs/bcachefs/lru.c b/fs/bcachefs/lru.c new file mode 100644 -index 0000000000000..53e607d72274c +index 000000000000..53e607d72274 --- /dev/null +++ b/fs/bcachefs/lru.c @@ -0,0 +1,206 @@ @@ -63482,7 +64024,7 @@ index 0000000000000..53e607d72274c +} diff --git a/fs/bcachefs/lru.h b/fs/bcachefs/lru.h new file mode 100644 -index 0000000000000..3decb7b1dde23 +index 000000000000..3decb7b1dde2 --- /dev/null +++ b/fs/bcachefs/lru.h @@ -0,0 +1,19 @@ @@ -63507,7 +64049,7 @@ index 0000000000000..3decb7b1dde23 +#endif /* _BCACHEFS_LRU_H */ diff --git a/fs/bcachefs/migrate.c b/fs/bcachefs/migrate.c new file mode 100644 -index 0000000000000..8b258d966d042 +index 000000000000..8b258d966d04 --- /dev/null +++ b/fs/bcachefs/migrate.c @@ -0,0 +1,186 @@ @@ -63699,7 +64241,7 @@ index 0000000000000..8b258d966d042 +} diff --git a/fs/bcachefs/migrate.h b/fs/bcachefs/migrate.h new file mode 100644 -index 0000000000000..027efaa0d575f +index 000000000000..027efaa0d575 --- /dev/null +++ b/fs/bcachefs/migrate.h @@ -0,0 +1,7 @@ @@ -63712,10 +64254,10 @@ index 0000000000000..027efaa0d575f +#endif /* _BCACHEFS_MIGRATE_H */ diff --git a/fs/bcachefs/move.c b/fs/bcachefs/move.c new file mode 100644 -index 0000000000000..2fc247451390d +index 000000000000..e85c3143051c --- /dev/null +++ b/fs/bcachefs/move.c -@@ -0,0 +1,952 @@ +@@ -0,0 +1,954 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include "bcachefs.h" @@ -63970,8 +64512,8 @@ index 0000000000000..2fc247451390d + atomic64_inc(&ctxt->stats->keys_moved); + atomic64_add(k.k->size, &ctxt->stats->sectors_moved); + this_cpu_add(c->counters[BCH_COUNTER_io_move], k.k->size); -+ -+ trace_move_extent(k.k); ++ this_cpu_add(c->counters[BCH_COUNTER_move_extent_read], k.k->size); ++ trace_move_extent_read(k.k); + + atomic_add(io->read_sectors, &ctxt->read_sectors); + list_add_tail(&io->list, &ctxt->reads); @@ -63993,7 +64535,7 @@ index 0000000000000..2fc247451390d + kfree(io); +err: + percpu_ref_put(&c->writes); -+ trace_move_alloc_mem_fail(k.k); ++ trace_and_count(c, move_extent_alloc_mem_fail, k.k); + return ret; +} + @@ -64354,6 +64896,8 @@ index 0000000000000..2fc247451390d + b = bch2_backpointer_get_node(&trans, &iter, + bucket, bp_offset, bp); + ret = PTR_ERR_OR_ZERO(b); ++ if (ret == -BCH_ERR_backpointer_to_overwritten_btree_node) ++ continue; + if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) + continue; + if (ret) @@ -64670,7 +65214,7 @@ index 0000000000000..2fc247451390d +} diff --git a/fs/bcachefs/move.h b/fs/bcachefs/move.h new file mode 100644 -index 0000000000000..c0fec69bbb6a1 +index 000000000000..c0fec69bbb6a --- /dev/null +++ b/fs/bcachefs/move.h @@ -0,0 +1,67 @@ @@ -64743,7 +65287,7 @@ index 0000000000000..c0fec69bbb6a1 +#endif /* _BCACHEFS_MOVE_H */ diff --git a/fs/bcachefs/move_types.h b/fs/bcachefs/move_types.h new file mode 100644 -index 0000000000000..9df6d18137a5e +index 000000000000..9df6d18137a5 --- /dev/null +++ b/fs/bcachefs/move_types.h @@ -0,0 +1,19 @@ @@ -64768,7 +65312,7 @@ index 0000000000000..9df6d18137a5e +#endif /* _BCACHEFS_MOVE_TYPES_H */ diff --git a/fs/bcachefs/movinggc.c b/fs/bcachefs/movinggc.c new file mode 100644 -index 0000000000000..f913864eaa4f5 +index 000000000000..35958c6bb4a6 --- /dev/null +++ b/fs/bcachefs/movinggc.c @@ -0,0 +1,285 @@ @@ -64939,7 +65483,7 @@ index 0000000000000..f913864eaa4f5 + if (ret < 0) + bch_err(c, "error from bch2_move_data() in copygc: %s", bch2_err_str(ret)); + -+ trace_copygc(c, atomic64_read(&move_stats.sectors_moved), 0, 0, 0); ++ trace_and_count(c, copygc, c, atomic64_read(&move_stats.sectors_moved), 0, 0, 0); + return ret; +} + @@ -64995,7 +65539,7 @@ index 0000000000000..f913864eaa4f5 + wait = bch2_copygc_wait_amount(c); + + if (wait > clock->max_slop) { -+ trace_copygc_wait(c, wait, last + wait); ++ trace_and_count(c, copygc_wait, c, wait, last + wait); + c->copygc_wait = last + wait; + bch2_kthread_io_clock_wait(clock, last + wait, + MAX_SCHEDULE_TIMEOUT); @@ -65059,7 +65603,7 @@ index 0000000000000..f913864eaa4f5 +} diff --git a/fs/bcachefs/movinggc.h b/fs/bcachefs/movinggc.h new file mode 100644 -index 0000000000000..e85c8136a46e9 +index 000000000000..e85c8136a46e --- /dev/null +++ b/fs/bcachefs/movinggc.h @@ -0,0 +1,10 @@ @@ -65075,7 +65619,7 @@ index 0000000000000..e85c8136a46e9 +#endif /* _BCACHEFS_MOVINGGC_H */ diff --git a/fs/bcachefs/opts.c b/fs/bcachefs/opts.c new file mode 100644 -index 0000000000000..407b221e8f6c9 +index 000000000000..407b221e8f6c --- /dev/null +++ b/fs/bcachefs/opts.c @@ -0,0 +1,578 @@ @@ -65659,7 +66203,7 @@ index 0000000000000..407b221e8f6c9 +} diff --git a/fs/bcachefs/opts.h b/fs/bcachefs/opts.h new file mode 100644 -index 0000000000000..5b8586ecb3743 +index 000000000000..5b8586ecb374 --- /dev/null +++ b/fs/bcachefs/opts.h @@ -0,0 +1,509 @@ @@ -66174,7 +66718,7 @@ index 0000000000000..5b8586ecb3743 +#endif /* _BCACHEFS_OPTS_H */ diff --git a/fs/bcachefs/quota.c b/fs/bcachefs/quota.c new file mode 100644 -index 0000000000000..454c76e03be90 +index 000000000000..454c76e03be9 --- /dev/null +++ b/fs/bcachefs/quota.c @@ -0,0 +1,823 @@ @@ -67003,7 +67547,7 @@ index 0000000000000..454c76e03be90 +#endif /* CONFIG_BCACHEFS_QUOTA */ diff --git a/fs/bcachefs/quota.h b/fs/bcachefs/quota.h new file mode 100644 -index 0000000000000..8c67ae1da7c75 +index 000000000000..8c67ae1da7c7 --- /dev/null +++ b/fs/bcachefs/quota.h @@ -0,0 +1,71 @@ @@ -67080,7 +67624,7 @@ index 0000000000000..8c67ae1da7c75 +#endif /* _BCACHEFS_QUOTA_H */ diff --git a/fs/bcachefs/quota_types.h b/fs/bcachefs/quota_types.h new file mode 100644 -index 0000000000000..6a136083d3899 +index 000000000000..6a136083d389 --- /dev/null +++ b/fs/bcachefs/quota_types.h @@ -0,0 +1,43 @@ @@ -67129,10 +67673,10 @@ index 0000000000000..6a136083d3899 +#endif /* _BCACHEFS_QUOTA_TYPES_H */ diff --git a/fs/bcachefs/rebalance.c b/fs/bcachefs/rebalance.c new file mode 100644 -index 0000000000000..ecc64dd92b050 +index 000000000000..17b289b051f2 --- /dev/null +++ b/fs/bcachefs/rebalance.c -@@ -0,0 +1,361 @@ +@@ -0,0 +1,362 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include "bcachefs.h" @@ -67403,7 +67947,8 @@ index 0000000000000..ecc64dd92b050 + struct bch_fs_rebalance *r = &c->rebalance; + struct rebalance_work w = rebalance_work(c); + -+ out->tabstops[0] = 20; ++ if (!out->nr_tabstops) ++ printbuf_tabstop_push(out, 20); + + prt_printf(out, "fullest_dev (%i):", w.dev_most_full_idx); + prt_tab(out); @@ -67496,7 +68041,7 @@ index 0000000000000..ecc64dd92b050 +} diff --git a/fs/bcachefs/rebalance.h b/fs/bcachefs/rebalance.h new file mode 100644 -index 0000000000000..7ade0bb81cce8 +index 000000000000..7ade0bb81cce --- /dev/null +++ b/fs/bcachefs/rebalance.h @@ -0,0 +1,28 @@ @@ -67530,7 +68075,7 @@ index 0000000000000..7ade0bb81cce8 +#endif /* _BCACHEFS_REBALANCE_H */ diff --git a/fs/bcachefs/rebalance_types.h b/fs/bcachefs/rebalance_types.h new file mode 100644 -index 0000000000000..7462a92e95985 +index 000000000000..7462a92e9598 --- /dev/null +++ b/fs/bcachefs/rebalance_types.h @@ -0,0 +1,26 @@ @@ -67562,7 +68107,7 @@ index 0000000000000..7462a92e95985 +#endif /* _BCACHEFS_REBALANCE_TYPES_H */ diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c new file mode 100644 -index 0000000000000..b070bdf01500a +index 000000000000..b070bdf01500 --- /dev/null +++ b/fs/bcachefs/recovery.c @@ -0,0 +1,1597 @@ @@ -69165,7 +69710,7 @@ index 0000000000000..b070bdf01500a +} diff --git a/fs/bcachefs/recovery.h b/fs/bcachefs/recovery.h new file mode 100644 -index 0000000000000..8c0348e8b84cf +index 000000000000..8c0348e8b84c --- /dev/null +++ b/fs/bcachefs/recovery.h @@ -0,0 +1,58 @@ @@ -69229,7 +69774,7 @@ index 0000000000000..8c0348e8b84cf +#endif /* _BCACHEFS_RECOVERY_H */ diff --git a/fs/bcachefs/reflink.c b/fs/bcachefs/reflink.c new file mode 100644 -index 0000000000000..d5c14bb2992d5 +index 000000000000..d5c14bb2992d --- /dev/null +++ b/fs/bcachefs/reflink.c @@ -0,0 +1,422 @@ @@ -69657,7 +70202,7 @@ index 0000000000000..d5c14bb2992d5 +} diff --git a/fs/bcachefs/reflink.h b/fs/bcachefs/reflink.h new file mode 100644 -index 0000000000000..f9848dc3eebba +index 000000000000..f9848dc3eebb --- /dev/null +++ b/fs/bcachefs/reflink.h @@ -0,0 +1,76 @@ @@ -69739,7 +70284,7 @@ index 0000000000000..f9848dc3eebba +#endif /* _BCACHEFS_REFLINK_H */ diff --git a/fs/bcachefs/replicas.c b/fs/bcachefs/replicas.c new file mode 100644 -index 0000000000000..9cb47ba62bc3c +index 000000000000..9cb47ba62bc3 --- /dev/null +++ b/fs/bcachefs/replicas.c @@ -0,0 +1,1073 @@ @@ -70818,7 +71363,7 @@ index 0000000000000..9cb47ba62bc3c +} diff --git a/fs/bcachefs/replicas.h b/fs/bcachefs/replicas.h new file mode 100644 -index 0000000000000..87820b2e1ad3e +index 000000000000..87820b2e1ad3 --- /dev/null +++ b/fs/bcachefs/replicas.h @@ -0,0 +1,106 @@ @@ -70930,7 +71475,7 @@ index 0000000000000..87820b2e1ad3e +#endif /* _BCACHEFS_REPLICAS_H */ diff --git a/fs/bcachefs/replicas_types.h b/fs/bcachefs/replicas_types.h new file mode 100644 -index 0000000000000..0535b1d3760ed +index 000000000000..0535b1d3760e --- /dev/null +++ b/fs/bcachefs/replicas_types.h @@ -0,0 +1,10 @@ @@ -70946,7 +71491,7 @@ index 0000000000000..0535b1d3760ed +#endif /* _BCACHEFS_REPLICAS_TYPES_H */ diff --git a/fs/bcachefs/siphash.c b/fs/bcachefs/siphash.c new file mode 100644 -index 0000000000000..c062edb3fbc24 +index 000000000000..c062edb3fbc2 --- /dev/null +++ b/fs/bcachefs/siphash.c @@ -0,0 +1,173 @@ @@ -71125,7 +71670,7 @@ index 0000000000000..c062edb3fbc24 +} diff --git a/fs/bcachefs/siphash.h b/fs/bcachefs/siphash.h new file mode 100644 -index 0000000000000..3dfaf34a43b28 +index 000000000000..3dfaf34a43b2 --- /dev/null +++ b/fs/bcachefs/siphash.h @@ -0,0 +1,87 @@ @@ -71218,10 +71763,10 @@ index 0000000000000..3dfaf34a43b28 +#endif /* _SIPHASH_H_ */ diff --git a/fs/bcachefs/str_hash.h b/fs/bcachefs/str_hash.h new file mode 100644 -index 0000000000000..591bbb9f8beb5 +index 000000000000..5c327b3128da --- /dev/null +++ b/fs/bcachefs/str_hash.h -@@ -0,0 +1,351 @@ +@@ -0,0 +1,368 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _BCACHEFS_STR_HASH_H +#define _BCACHEFS_STR_HASH_H @@ -71463,29 +72008,26 @@ index 0000000000000..591bbb9f8beb5 +} + +static __always_inline -+int bch2_hash_set(struct btree_trans *trans, -+ const struct bch_hash_desc desc, -+ const struct bch_hash_info *info, -+ subvol_inum inum, -+ struct bkey_i *insert, int flags) ++int bch2_hash_set_snapshot(struct btree_trans *trans, ++ const struct bch_hash_desc desc, ++ const struct bch_hash_info *info, ++ subvol_inum inum, u32 snapshot, ++ struct bkey_i *insert, ++ int flags, ++ int update_flags) +{ + struct btree_iter iter, slot = { NULL }; + struct bkey_s_c k; + bool found = false; -+ u32 snapshot; + int ret; + -+ ret = bch2_subvolume_get_snapshot(trans, inum.subvol, &snapshot); -+ if (ret) -+ return ret; -+ + for_each_btree_key_upto_norestart(trans, iter, desc.btree_id, -+ SPOS(inum.inum, ++ SPOS(insert->k.p.inode, + desc.hash_bkey(info, bkey_i_to_s_c(insert)), + snapshot), -+ POS(inum.inum, U64_MAX), ++ POS(insert->k.p.inode, U64_MAX), + BTREE_ITER_SLOTS|BTREE_ITER_INTENT, k, ret) { -+ if (is_visible_key(desc, inum, k)) { ++ if (!inum.subvol || is_visible_key(desc, inum, k)) { + if (!desc.cmp_bkey(k, bkey_i_to_s_c(insert))) + goto found; + @@ -71528,6 +72070,26 @@ index 0000000000000..591bbb9f8beb5 +} + +static __always_inline ++int bch2_hash_set(struct btree_trans *trans, ++ const struct bch_hash_desc desc, ++ const struct bch_hash_info *info, ++ subvol_inum inum, ++ struct bkey_i *insert, int flags) ++{ ++ u32 snapshot; ++ int ret; ++ ++ ret = bch2_subvolume_get_snapshot(trans, inum.subvol, &snapshot); ++ if (ret) ++ return ret; ++ ++ insert->k.p.inode = inum.inum; ++ ++ return bch2_hash_set_snapshot(trans, desc, info, inum, ++ snapshot, insert, flags, 0); ++} ++ ++static __always_inline +int bch2_hash_delete_at(struct btree_trans *trans, + const struct bch_hash_desc desc, + const struct bch_hash_info *info, @@ -71575,10 +72137,10 @@ index 0000000000000..591bbb9f8beb5 +#endif /* _BCACHEFS_STR_HASH_H */ diff --git a/fs/bcachefs/subvolume.c b/fs/bcachefs/subvolume.c new file mode 100644 -index 0000000000000..b5b0f5e39f976 +index 000000000000..fb3f8e4074c7 --- /dev/null +++ b/fs/bcachefs/subvolume.c -@@ -0,0 +1,1108 @@ +@@ -0,0 +1,1110 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include "bcachefs.h" @@ -71859,8 +72421,8 @@ index 0000000000000..b5b0f5e39f976 + + bch2_trans_init(&trans, c, 0, 0); + -+ ret = for_each_btree_key_commit(&trans, iter, BTREE_ID_snapshots, -+ POS(BCACHEFS_ROOT_INO, 0), ++ ret = for_each_btree_key_commit(&trans, iter, ++ BTREE_ID_snapshots, POS_MIN, + BTREE_ITER_PREFETCH, k, + NULL, NULL, BTREE_INSERT_LAZY_RW|BTREE_INSERT_NOFAIL, + check_snapshot(&trans, &iter, k)); @@ -72458,6 +73020,8 @@ index 0000000000000..b5b0f5e39f976 + goto err; + + ret = bch2_snapshot_node_set_deleted(trans, snapid); ++ if (ret) ++ goto err; + + h = bch2_trans_kmalloc(trans, sizeof(*h)); + ret = PTR_ERR_OR_ZERO(h); @@ -72689,7 +73253,7 @@ index 0000000000000..b5b0f5e39f976 +} diff --git a/fs/bcachefs/subvolume.h b/fs/bcachefs/subvolume.h new file mode 100644 -index 0000000000000..02a636644988a +index 000000000000..02a636644988 --- /dev/null +++ b/fs/bcachefs/subvolume.h @@ -0,0 +1,137 @@ @@ -72832,7 +73396,7 @@ index 0000000000000..02a636644988a +#endif /* _BCACHEFS_SUBVOLUME_H */ diff --git a/fs/bcachefs/subvolume_types.h b/fs/bcachefs/subvolume_types.h new file mode 100644 -index 0000000000000..f7562b5d51dff +index 000000000000..f7562b5d51df --- /dev/null +++ b/fs/bcachefs/subvolume_types.h @@ -0,0 +1,9 @@ @@ -72847,7 +73411,7 @@ index 0000000000000..f7562b5d51dff +#endif /* _BCACHEFS_SUBVOLUME_TYPES_H */ diff --git a/fs/bcachefs/super-io.c b/fs/bcachefs/super-io.c new file mode 100644 -index 0000000000000..55f8c65ad725e +index 000000000000..e1e70d35fe0f --- /dev/null +++ b/fs/bcachefs/super-io.c @@ -0,0 +1,1605 @@ @@ -73649,7 +74213,7 @@ index 0000000000000..55f8c65ad725e + unsigned degraded_flags = BCH_FORCE_IF_DEGRADED; + int ret = 0; + -+ trace_write_super(c, _RET_IP_); ++ trace_and_count(c, write_super, c, _RET_IP_); + + if (c->opts.very_degraded) + degraded_flags |= BCH_FORCE_IF_LOST; @@ -74280,8 +74844,8 @@ index 0000000000000..55f8c65ad725e + const struct bch_sb_field_ops *ops = type < BCH_SB_FIELD_NR + ? bch2_sb_field_ops[type] : NULL; + -+ if (!out->tabstops[0]) -+ out->tabstops[0] = 32; ++ if (!out->nr_tabstops) ++ printbuf_tabstop_push(out, 32); + + if (ops) + prt_printf(out, "%s", bch2_sb_fields[type]); @@ -74329,8 +74893,8 @@ index 0000000000000..55f8c65ad725e + u64 fields_have = 0; + unsigned nr_devices = 0; + -+ if (!out->tabstops[0]) -+ out->tabstops[0] = 32; ++ if (!out->nr_tabstops) ++ printbuf_tabstop_push(out, 32); + + mi = bch2_sb_get_members(sb); + if (mi) { @@ -74458,7 +75022,7 @@ index 0000000000000..55f8c65ad725e +} diff --git a/fs/bcachefs/super-io.h b/fs/bcachefs/super-io.h new file mode 100644 -index 0000000000000..14a25f6fe29a5 +index 000000000000..14a25f6fe29a --- /dev/null +++ b/fs/bcachefs/super-io.h @@ -0,0 +1,126 @@ @@ -74590,10 +75154,10 @@ index 0000000000000..14a25f6fe29a5 +#endif /* _BCACHEFS_SUPER_IO_H */ diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c new file mode 100644 -index 0000000000000..7c6348001ae39 +index 000000000000..8b3ce780338c --- /dev/null +++ b/fs/bcachefs/super.c -@@ -0,0 +1,1950 @@ +@@ -0,0 +1,1968 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * bcachefs setup/teardown code, and some metadata io - read a superblock and @@ -76126,6 +76690,7 @@ index 0000000000000..7c6348001ae39 + struct bch_member dev_mi; + unsigned dev_idx, nr_devices, u64s; + struct printbuf errbuf = PRINTBUF; ++ struct printbuf label = PRINTBUF; + int ret; + + ret = bch2_read_super(path, &opts, &sb); @@ -76136,6 +76701,14 @@ index 0000000000000..7c6348001ae39 + + dev_mi = bch2_sb_get_members(sb.sb)->members[sb.sb->dev_idx]; + ++ if (BCH_MEMBER_GROUP(&dev_mi)) { ++ bch2_disk_path_to_text(&label, sb.sb, BCH_MEMBER_GROUP(&dev_mi) - 1); ++ if (label.allocation_failure) { ++ ret = -ENOMEM; ++ goto err; ++ } ++ } ++ + err = bch2_dev_may_add(sb.sb, c); + if (err) { + bch_err(c, "device add error: %s", err); @@ -76216,6 +76789,14 @@ index 0000000000000..7c6348001ae39 + ca->disk_sb.sb->dev_idx = dev_idx; + bch2_dev_attach(c, ca, dev_idx); + ++ if (BCH_MEMBER_GROUP(&dev_mi)) { ++ ret = __bch2_dev_group_set(c, ca, label.buf); ++ if (ret) { ++ bch_err(c, "device add error: error setting label"); ++ goto err_unlock; ++ } ++ } ++ + bch2_write_super(c); + mutex_unlock(&c->sb_lock); + @@ -76248,6 +76829,7 @@ index 0000000000000..7c6348001ae39 + if (ca) + bch2_dev_free(ca); + bch2_free_super(&sb); ++ printbuf_exit(&label); + printbuf_exit(&errbuf); + return ret; +err_late: @@ -76546,7 +77128,7 @@ index 0000000000000..7c6348001ae39 +module_init(bcachefs_init); diff --git a/fs/bcachefs/super.h b/fs/bcachefs/super.h new file mode 100644 -index 0000000000000..8501adaff4c2f +index 000000000000..8501adaff4c2 --- /dev/null +++ b/fs/bcachefs/super.h @@ -0,0 +1,264 @@ @@ -76816,7 +77398,7 @@ index 0000000000000..8501adaff4c2f +#endif /* _BCACHEFS_SUPER_H */ diff --git a/fs/bcachefs/super_types.h b/fs/bcachefs/super_types.h new file mode 100644 -index 0000000000000..89419fc7930d0 +index 000000000000..89419fc7930d --- /dev/null +++ b/fs/bcachefs/super_types.h @@ -0,0 +1,51 @@ @@ -76873,10 +77455,10 @@ index 0000000000000..89419fc7930d0 +#endif /* _BCACHEFS_SUPER_TYPES_H */ diff --git a/fs/bcachefs/sysfs.c b/fs/bcachefs/sysfs.c new file mode 100644 -index 0000000000000..2c650055f5306 +index 000000000000..98449e428d20 --- /dev/null +++ b/fs/bcachefs/sysfs.c -@@ -0,0 +1,943 @@ +@@ -0,0 +1,925 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * bcache sysfs interfaces @@ -77069,11 +77651,6 @@ index 0000000000000..2c650055f5306 +read_attribute(has_data); +read_attribute(alloc_debug); + -+read_attribute(read_realloc_races); -+read_attribute(extent_migrate_done); -+read_attribute(extent_migrate_raced); -+read_attribute(bucket_alloc_fail); -+ +#define x(t, n, ...) read_attribute(t); +BCH_PERSISTENT_COUNTERS() +#undef x @@ -77257,15 +77834,6 @@ index 0000000000000..2c650055f5306 + sysfs_hprint(btree_cache_size, bch2_btree_cache_size(c)); + sysfs_hprint(btree_avg_write_size, bch2_btree_avg_write_size(c)); + -+ sysfs_print(read_realloc_races, -+ atomic_long_read(&c->read_realloc_races)); -+ sysfs_print(extent_migrate_done, -+ atomic_long_read(&c->extent_migrate_done)); -+ sysfs_print(extent_migrate_raced, -+ atomic_long_read(&c->extent_migrate_raced)); -+ sysfs_print(bucket_alloc_fail, -+ atomic_long_read(&c->bucket_alloc_fail)); -+ + sysfs_printf(btree_gc_periodic, "%u", (int) c->btree_gc_periodic); + + if (attr == &sysfs_gc_gens_pos) @@ -77439,7 +78007,8 @@ index 0000000000000..2c650055f5306 + u64 counter = 0; + u64 counter_since_mount = 0; + -+ out->tabstops[0] = 32; ++ printbuf_tabstop_push(out, 32); ++ + #define x(t, ...) \ + if (attr == &sysfs_##t) { \ + counter = percpu_u64_get(&c->counters[BCH_COUNTER_##t]);\ @@ -77503,11 +78072,6 @@ index 0000000000000..2c650055f5306 + &sysfs_trigger_invalidates, + &sysfs_prune_cache, + -+ &sysfs_read_realloc_races, -+ &sysfs_extent_migrate_done, -+ &sysfs_extent_migrate_raced, -+ &sysfs_bucket_alloc_fail, -+ + &sysfs_gc_gens_pos, + + &sysfs_copy_gc_enabled, @@ -77822,7 +78386,7 @@ index 0000000000000..2c650055f5306 +#endif /* _BCACHEFS_SYSFS_H_ */ diff --git a/fs/bcachefs/sysfs.h b/fs/bcachefs/sysfs.h new file mode 100644 -index 0000000000000..222cd5062702c +index 000000000000..222cd5062702 --- /dev/null +++ b/fs/bcachefs/sysfs.h @@ -0,0 +1,48 @@ @@ -77876,7 +78440,7 @@ index 0000000000000..222cd5062702c +#endif /* _BCACHEFS_SYSFS_H_ */ diff --git a/fs/bcachefs/tests.c b/fs/bcachefs/tests.c new file mode 100644 -index 0000000000000..56058a56f2a28 +index 000000000000..56058a56f2a2 --- /dev/null +++ b/fs/bcachefs/tests.c @@ -0,0 +1,976 @@ @@ -78858,7 +79422,7 @@ index 0000000000000..56058a56f2a28 +#endif /* CONFIG_BCACHEFS_TESTS */ diff --git a/fs/bcachefs/tests.h b/fs/bcachefs/tests.h new file mode 100644 -index 0000000000000..c73b18aea7e01 +index 000000000000..c73b18aea7e0 --- /dev/null +++ b/fs/bcachefs/tests.h @@ -0,0 +1,15 @@ @@ -78879,28 +79443,30 @@ index 0000000000000..c73b18aea7e01 +#endif /* _BCACHEFS_TEST_H */ diff --git a/fs/bcachefs/trace.c b/fs/bcachefs/trace.c new file mode 100644 -index 0000000000000..59e8dfa3d2452 +index 000000000000..70573981b87d --- /dev/null +++ b/fs/bcachefs/trace.c -@@ -0,0 +1,12 @@ +@@ -0,0 +1,14 @@ +// SPDX-License-Identifier: GPL-2.0 +#include "bcachefs.h" +#include "alloc_types.h" +#include "buckets.h" -+#include "btree_types.h" ++#include "btree_iter.h" ++#include "btree_locking.h" +#include "keylist.h" ++#include "opts.h" + +#include -+#include "keylist.h" ++#include + +#define CREATE_TRACE_POINTS +#include diff --git a/fs/bcachefs/util.c b/fs/bcachefs/util.c new file mode 100644 -index 0000000000000..ee2c7d9e70500 +index 000000000000..42da6623d815 --- /dev/null +++ b/fs/bcachefs/util.c -@@ -0,0 +1,964 @@ +@@ -0,0 +1,971 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * random utiility code, for bcache but in theory not specific to bcache @@ -79171,6 +79737,12 @@ index 0000000000000..ee2c7d9e70500 + } +} + ++void bch2_prt_u64_binary(struct printbuf *out, u64 v, unsigned nr_bits) ++{ ++ while (nr_bits) ++ prt_char(out, '0' + ((v >> --nr_bits) & 1)); ++} ++ +/* time stats: */ + +static void bch2_time_stats_update_one(struct time_stats *stats, @@ -79429,7 +80001,8 @@ index 0000000000000..ee2c7d9e70500 + +void bch2_pd_controller_debug_to_text(struct printbuf *out, struct bch_pd_controller *pd) +{ -+ out->tabstops[0] = 20; ++ if (!out->nr_tabstops) ++ printbuf_tabstop_push(out, 20); + + prt_printf(out, "rate:"); + prt_tab(out); @@ -79867,10 +80440,10 @@ index 0000000000000..ee2c7d9e70500 +} diff --git a/fs/bcachefs/util.h b/fs/bcachefs/util.h new file mode 100644 -index 0000000000000..1fe66fd91ccc7 +index 000000000000..ab7e43d4bf8b --- /dev/null +++ b/fs/bcachefs/util.h -@@ -0,0 +1,783 @@ +@@ -0,0 +1,785 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _BCACHEFS_UTIL_H +#define _BCACHEFS_UTIL_H @@ -80226,6 +80799,8 @@ index 0000000000000..1fe66fd91ccc7 + +u64 bch2_read_flag_list(char *, const char * const[]); + ++void bch2_prt_u64_binary(struct printbuf *, u64, unsigned); ++ +#define NR_QUANTILES 15 +#define QUANTILE_IDX(i) inorder_to_eytzinger0(i, NR_QUANTILES) +#define QUANTILE_FIRST eytzinger0_first(NR_QUANTILES) @@ -80656,7 +81231,7 @@ index 0000000000000..1fe66fd91ccc7 +#endif /* _BCACHEFS_UTIL_H */ diff --git a/fs/bcachefs/varint.c b/fs/bcachefs/varint.c new file mode 100644 -index 0000000000000..5143b603bf67f +index 000000000000..5143b603bf67 --- /dev/null +++ b/fs/bcachefs/varint.c @@ -0,0 +1,121 @@ @@ -80783,7 +81358,7 @@ index 0000000000000..5143b603bf67f +} diff --git a/fs/bcachefs/varint.h b/fs/bcachefs/varint.h new file mode 100644 -index 0000000000000..92a182fb3d7ae +index 000000000000..92a182fb3d7a --- /dev/null +++ b/fs/bcachefs/varint.h @@ -0,0 +1,11 @@ @@ -80800,7 +81375,7 @@ index 0000000000000..92a182fb3d7ae +#endif /* _BCACHEFS_VARINT_H */ diff --git a/fs/bcachefs/vstructs.h b/fs/bcachefs/vstructs.h new file mode 100644 -index 0000000000000..53a694d719671 +index 000000000000..53a694d71967 --- /dev/null +++ b/fs/bcachefs/vstructs.h @@ -0,0 +1,63 @@ @@ -80869,7 +81444,7 @@ index 0000000000000..53a694d719671 +#endif /* _VSTRUCTS_H */ diff --git a/fs/bcachefs/xattr.c b/fs/bcachefs/xattr.c new file mode 100644 -index 0000000000000..186ffab542d53 +index 000000000000..186ffab542d5 --- /dev/null +++ b/fs/bcachefs/xattr.c @@ -0,0 +1,648 @@ @@ -81523,7 +82098,7 @@ index 0000000000000..186ffab542d53 +} diff --git a/fs/bcachefs/xattr.h b/fs/bcachefs/xattr.h new file mode 100644 -index 0000000000000..66d7a1e30350e +index 000000000000..66d7a1e30350 --- /dev/null +++ b/fs/bcachefs/xattr.h @@ -0,0 +1,50 @@ @@ -81578,7 +82153,7 @@ index 0000000000000..66d7a1e30350e + +#endif /* _BCACHEFS_XATTR_H */ diff --git a/fs/d_path.c b/fs/d_path.c -index e4e0ebad1f153..1bd9e85f2f65a 100644 +index e4e0ebad1f15..1bd9e85f2f65 100644 --- a/fs/d_path.c +++ b/fs/d_path.c @@ -5,6 +5,7 @@ @@ -81631,7 +82206,7 @@ index e4e0ebad1f153..1bd9e85f2f65a 100644 * Helper function for dentry_operations.d_dname() members */ diff --git a/fs/dcache.c b/fs/dcache.c -index 93f4f5ee07bfd..d90ed65e2a75f 100644 +index 93f4f5ee07bf..d90ed65e2a75 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -3193,9 +3193,8 @@ void d_genocide(struct dentry *parent) @@ -81660,7 +82235,7 @@ index 93f4f5ee07bfd..d90ed65e2a75f 100644 } EXPORT_SYMBOL(d_tmpfile); diff --git a/fs/inode.c b/fs/inode.c -index bd4da9c5207ea..ac0da28a1ac6e 100644 +index bd4da9c5207e..ac0da28a1ac6 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -56,8 +56,23 @@ @@ -82147,7 +82722,7 @@ index bd4da9c5207ea..ac0da28a1ac6e 100644 14, HASH_ZERO, diff --git a/include/linux/bio.h b/include/linux/bio.h -index 992ee987f2738..6d5acc1b407fa 100644 +index 992ee987f273..6d5acc1b407f 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -480,7 +480,12 @@ extern void bio_copy_data_iter(struct bio *dst, struct bvec_iter *dst_iter, @@ -82165,7 +82740,7 @@ index 992ee987f2738..6d5acc1b407fa 100644 static inline void bio_release_pages(struct bio *bio, bool mark_dirty) { diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h -index 2f7b43444c5f8..4ef515977abc2 100644 +index 62e3ff52ab03..0d161d5e3950 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -884,6 +884,7 @@ extern const char *blk_op_str(unsigned int op); @@ -82180,7 +82755,7 @@ diff --git a/drivers/md/bcache/closure.h b/include/linux/closure.h similarity index 94% rename from drivers/md/bcache/closure.h rename to include/linux/closure.h -index c88cdc4ae4ec5..36b4a83f9b774 100644 +index c88cdc4ae4ec..36b4a83f9b77 100644 --- a/drivers/md/bcache/closure.h +++ b/include/linux/closure.h @@ -155,7 +155,7 @@ struct closure { @@ -82288,7 +82863,7 @@ index c88cdc4ae4ec5..36b4a83f9b774 100644 + #endif /* _LINUX_CLOSURE_H */ diff --git a/include/linux/compiler_attributes.h b/include/linux/compiler_attributes.h -index 445e80517cab6..57e7d0b94119e 100644 +index 445e80517cab..57e7d0b94119 100644 --- a/include/linux/compiler_attributes.h +++ b/include/linux/compiler_attributes.h @@ -371,4 +371,9 @@ @@ -82302,7 +82877,7 @@ index 445e80517cab6..57e7d0b94119e 100644 + #endif /* __LINUX_COMPILER_ATTRIBUTES_H */ diff --git a/include/linux/dcache.h b/include/linux/dcache.h -index f5bba51480b2f..6c661059a55b6 100644 +index f5bba51480b2..6c661059a55b 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -248,6 +248,7 @@ extern struct dentry * d_make_root(struct inode *); @@ -82322,7 +82897,7 @@ index f5bba51480b2f..6c661059a55b6 100644 /* Allocation counts.. */ diff --git a/include/linux/exportfs.h b/include/linux/exportfs.h -index fe848901fcc3a..5a3cc0e1da9b9 100644 +index fe848901fcc3..5a3cc0e1da9b 100644 --- a/include/linux/exportfs.h +++ b/include/linux/exportfs.h @@ -98,6 +98,12 @@ enum fid_type { @@ -82339,7 +82914,7 @@ index fe848901fcc3a..5a3cc0e1da9b9 100644 * 128 bit child FID (struct lu_fid) * 128 bit parent FID (struct lu_fid) diff --git a/include/linux/fs.h b/include/linux/fs.h -index 9ad5e3520fae5..1f7671a674e34 100644 +index 9ad5e3520fae..1f7671a674e3 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -630,7 +630,8 @@ struct inode { @@ -82380,7 +82955,7 @@ index 9ad5e3520fae5..1f7671a674e34 100644 } diff --git a/include/linux/generic-radix-tree.h b/include/linux/generic-radix-tree.h -index 107613f7d7920..c74b7376990d5 100644 +index 107613f7d792..c74b7376990d 100644 --- a/include/linux/generic-radix-tree.h +++ b/include/linux/generic-radix-tree.h @@ -38,6 +38,7 @@ @@ -82491,7 +83066,7 @@ index 107613f7d7920..c74b7376990d5 100644 /** diff --git a/include/linux/kernel.h b/include/linux/kernel.h -index fe6efb24d151a..9ba5a53c6ad55 100644 +index fe6efb24d151..9ba5a53c6ad5 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -202,11 +202,17 @@ static inline void might_fault(void) { } @@ -82526,7 +83101,7 @@ index fe6efb24d151a..9ba5a53c6ad55 100644 /* diff --git a/include/linux/list_bl.h b/include/linux/list_bl.h -index ae1b541446c90..8ee2bf5af1318 100644 +index ae1b541446c9..8ee2bf5af131 100644 --- a/include/linux/list_bl.h +++ b/include/linux/list_bl.h @@ -143,6 +143,28 @@ static inline void hlist_bl_del_init(struct hlist_bl_node *n) @@ -82559,10 +83134,10 @@ index ae1b541446c90..8ee2bf5af1318 100644 { bit_spin_lock(0, (unsigned long *)b); diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h -index b6829b9700936..5b90b2abd326c 100644 +index 1f1099dac3f0..e027c504b7d3 100644 --- a/include/linux/lockdep.h +++ b/include/linux/lockdep.h -@@ -335,6 +335,8 @@ extern void lock_unpin_lock(struct lockdep_map *lock, struct pin_cookie); +@@ -339,6 +339,8 @@ extern void lock_unpin_lock(struct lockdep_map *lock, struct pin_cookie); #define lockdep_repin_lock(l,c) lock_repin_lock(&(l)->dep_map, (c)) #define lockdep_unpin_lock(l,c) lock_unpin_lock(&(l)->dep_map, (c)) @@ -82571,7 +83146,7 @@ index b6829b9700936..5b90b2abd326c 100644 #else /* !CONFIG_LOCKDEP */ static inline void lockdep_init_task(struct task_struct *task) -@@ -423,6 +425,8 @@ extern int lockdep_is_held(const void *); +@@ -427,6 +429,8 @@ extern int lockdep_is_held(const void *); #define lockdep_repin_lock(l, c) do { (void)(l); (void)(c); } while (0) #define lockdep_unpin_lock(l, c) do { (void)(l); (void)(c); } while (0) @@ -82582,7 +83157,7 @@ index b6829b9700936..5b90b2abd326c 100644 enum xhlock_context_t { diff --git a/include/linux/pretty-printers.h b/include/linux/pretty-printers.h new file mode 100644 -index 0000000000000..f39d8edfba025 +index 000000000000..f39d8edfba02 --- /dev/null +++ b/include/linux/pretty-printers.h @@ -0,0 +1,10 @@ @@ -82598,10 +83173,10 @@ index 0000000000000..f39d8edfba025 +#endif /* _LINUX_PRETTY_PRINTERS_H */ diff --git a/include/linux/printbuf.h b/include/linux/printbuf.h new file mode 100644 -index 0000000000000..861c5d75f852a +index 000000000000..24e62e56d18c --- /dev/null +++ b/include/linux/printbuf.h -@@ -0,0 +1,283 @@ +@@ -0,0 +1,306 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ +/* Copyright (C) 2022 Kent Overstreet */ + @@ -82675,6 +83250,8 @@ index 0000000000000..861c5d75f852a + PRINTBUF_UNITS_10, /* use powers of 10^3 (standard SI) */ +}; + ++#define PRINTBUF_INLINE_TABSTOPS 4 ++ +struct printbuf { + char *buf; + unsigned size; @@ -82690,19 +83267,34 @@ index 0000000000000..861c5d75f852a + bool heap_allocated:1; + enum printbuf_si si_units:1; + bool human_readable_units:1; -+ u8 tabstop; -+ u8 tabstops[4]; ++ bool has_indent_or_tabstops:1; ++ bool suppress_indent_tabstop_handling:1; ++ u8 nr_tabstops; ++ ++ /* ++ * Do not modify directly: use printbuf_tabstop_add(), ++ * printbuf_tabstop_get() ++ */ ++ u8 cur_tabstop; ++ u8 _tabstops[PRINTBUF_INLINE_TABSTOPS]; +}; + +int printbuf_make_room(struct printbuf *, unsigned); +const char *printbuf_str(const struct printbuf *); +void printbuf_exit(struct printbuf *); + -+void prt_newline(struct printbuf *); ++void printbuf_tabstops_reset(struct printbuf *); ++void printbuf_tabstop_pop(struct printbuf *); ++int printbuf_tabstop_push(struct printbuf *, unsigned); ++ +void printbuf_indent_add(struct printbuf *, unsigned); +void printbuf_indent_sub(struct printbuf *, unsigned); ++ ++void prt_newline(struct printbuf *); +void prt_tab(struct printbuf *); +void prt_tab_rjust(struct printbuf *); ++ ++void prt_bytes_indented(struct printbuf *, const char *, unsigned); +void prt_human_readable_u64(struct printbuf *, u64); +void prt_human_readable_s64(struct printbuf *, s64); +void prt_units_u64(struct printbuf *, u64); @@ -82815,6 +83407,11 @@ index 0000000000000..861c5d75f852a + prt_bytes(out, str, strlen(str)); +} + ++static inline void prt_str_indented(struct printbuf *out, const char *str) ++{ ++ prt_bytes_indented(out, str, strlen(str)); ++} ++ +static inline void prt_hex_byte(struct printbuf *out, u8 byte) +{ + printbuf_make_room(out, 2); @@ -82839,7 +83436,8 @@ index 0000000000000..861c5d75f852a + buf->pos = 0; + buf->allocation_failure = 0; + buf->indent = 0; -+ buf->tabstop = 0; ++ buf->nr_tabstops = 0; ++ buf->cur_tabstop = 0; +} + +/** @@ -82886,7 +83484,7 @@ index 0000000000000..861c5d75f852a + +#endif /* _LINUX_PRINTBUF_H */ diff --git a/include/linux/sched.h b/include/linux/sched.h -index c46f3a63b758f..5038c87db7402 100644 +index 6d877c7e22ff..8bc99de02247 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -857,6 +857,7 @@ struct task_struct { @@ -82899,7 +83497,7 @@ index c46f3a63b758f..5038c87db7402 100644 struct vmacache vmacache; diff --git a/include/linux/seq_buf.h b/include/linux/seq_buf.h deleted file mode 100644 -index 5b31c51479694..0000000000000 +index 5b31c5147969..000000000000 --- a/include/linux/seq_buf.h +++ /dev/null @@ -1,162 +0,0 @@ @@ -83066,7 +83664,7 @@ index 5b31c51479694..0000000000000 - -#endif /* _LINUX_SEQ_BUF_H */ diff --git a/include/linux/shrinker.h b/include/linux/shrinker.h -index 76fbf92b04d95..12967748f9f7b 100644 +index 76fbf92b04d9..12967748f9f7 100644 --- a/include/linux/shrinker.h +++ b/include/linux/shrinker.h @@ -2,6 +2,8 @@ @@ -83109,10 +83707,10 @@ index 76fbf92b04d95..12967748f9f7b 100644 #endif diff --git a/include/linux/six.h b/include/linux/six.h new file mode 100644 -index 0000000000000..477c33eb00d7d +index 000000000000..f336ae049df8 --- /dev/null +++ b/include/linux/six.h -@@ -0,0 +1,203 @@ +@@ -0,0 +1,220 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef _LINUX_SIX_H @@ -83174,7 +83772,6 @@ index 0000000000000..477c33eb00d7d + */ + +#include -+#include +#include +#include + @@ -83220,18 +83817,23 @@ index 0000000000000..477c33eb00d7d + +struct six_lock { + union six_lock_state state; -+ unsigned intent_lock_recurse; + struct task_struct *owner; -+ struct optimistic_spin_queue osq; + unsigned __percpu *readers; -+ ++ unsigned intent_lock_recurse; + raw_spinlock_t wait_lock; -+ struct list_head wait_list[2]; ++ struct list_head wait_list; +#ifdef CONFIG_DEBUG_LOCK_ALLOC + struct lockdep_map dep_map; +#endif +}; + ++struct six_lock_waiter { ++ struct list_head list; ++ struct task_struct *task; ++ enum six_lock_type lock_want; ++ bool lock_acquired; ++}; ++ +typedef int (*six_lock_should_sleep_fn)(struct six_lock *lock, void *); + +static __always_inline void __six_lock_init(struct six_lock *lock, @@ -83240,8 +83842,7 @@ index 0000000000000..477c33eb00d7d +{ + atomic64_set(&lock->state.counter, 0); + raw_spin_lock_init(&lock->wait_lock); -+ INIT_LIST_HEAD(&lock->wait_list[SIX_LOCK_read]); -+ INIT_LIST_HEAD(&lock->wait_list[SIX_LOCK_intent]); ++ INIT_LIST_HEAD(&lock->wait_list); +#ifdef CONFIG_DEBUG_LOCK_ALLOC + debug_check_no_locks_freed((void *) lock, sizeof(*lock)); + lockdep_init_map(&lock->dep_map, name, key, 0); @@ -83261,6 +83862,8 @@ index 0000000000000..477c33eb00d7d +bool six_trylock_##type(struct six_lock *); \ +bool six_relock_##type(struct six_lock *, u32); \ +int six_lock_##type(struct six_lock *, six_lock_should_sleep_fn, void *);\ ++int six_lock_waiter_##type(struct six_lock *, struct six_lock_waiter *, \ ++ six_lock_should_sleep_fn, void *); \ +void six_unlock_##type(struct six_lock *); + +__SIX_LOCK(read) @@ -83297,6 +83900,13 @@ index 0000000000000..477c33eb00d7d + SIX_LOCK_DISPATCH(type, six_lock, lock, should_sleep_fn, p); +} + ++static inline int six_lock_type_waiter(struct six_lock *lock, enum six_lock_type type, ++ struct six_lock_waiter *wait, ++ six_lock_should_sleep_fn should_sleep_fn, void *p) ++{ ++ SIX_LOCK_DISPATCH(type, six_lock_waiter, lock, wait, should_sleep_fn, p); ++} ++ +static inline void six_unlock_type(struct six_lock *lock, enum six_lock_type type) +{ + SIX_LOCK_DISPATCH(type, six_unlock, lock); @@ -83311,13 +83921,18 @@ index 0000000000000..477c33eb00d7d + +void six_lock_wakeup_all(struct six_lock *); + -+void six_lock_pcpu_free_rcu(struct six_lock *); +void six_lock_pcpu_free(struct six_lock *); +void six_lock_pcpu_alloc(struct six_lock *); + ++struct six_lock_count { ++ unsigned n[3]; ++}; ++ ++struct six_lock_count six_lock_counts(struct six_lock *); ++ +#endif /* _LINUX_SIX_H */ diff --git a/include/linux/string.h b/include/linux/string.h -index 61ec7e4f6311a..22a45d553fbc0 100644 +index 61ec7e4f6311..22a45d553fbc 100644 --- a/include/linux/string.h +++ b/include/linux/string.h @@ -195,7 +195,12 @@ int __sysfs_match_string(const char * const *array, size_t n, const char *s); @@ -83334,7 +83949,7 @@ index 61ec7e4f6311a..22a45d553fbc0 100644 int bstr_printf(char *buf, size_t size, const char *fmt, const u32 *bin_buf); int bprintf(u32 *bin_buf, size_t size, const char *fmt, ...) __printf(3, 4); diff --git a/include/linux/string_helpers.h b/include/linux/string_helpers.h -index 4d72258d42fd9..52e0f1d283b9e 100644 +index 4d72258d42fd..52e0f1d283b9 100644 --- a/include/linux/string_helpers.h +++ b/include/linux/string_helpers.h @@ -10,6 +10,7 @@ @@ -83374,7 +83989,7 @@ index 4d72258d42fd9..52e0f1d283b9e 100644 unsigned int flags, const char *only) { diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h -index e6e95a9f07a52..48471e32f8e48 100644 +index b18759a673c6..4cb7cacb57b1 100644 --- a/include/linux/trace_events.h +++ b/include/linux/trace_events.h @@ -496,7 +496,7 @@ struct dynevent_cmd; @@ -83387,7 +84002,7 @@ index e6e95a9f07a52..48471e32f8e48 100644 unsigned int n_fields; enum dynevent_type type; diff --git a/include/linux/trace_seq.h b/include/linux/trace_seq.h -index 5a2c650d9e1c1..d2b51007b3b99 100644 +index 5a2c650d9e1c..d2b51007b3b9 100644 --- a/include/linux/trace_seq.h +++ b/include/linux/trace_seq.h @@ -2,10 +2,12 @@ @@ -83459,7 +84074,7 @@ index 5a2c650d9e1c1..d2b51007b3b99 100644 extern void trace_seq_bitmask(struct trace_seq *s, const unsigned long *maskp, diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h -index 096d48aa34373..8d11e2e4ddc8c 100644 +index 096d48aa3437..8d11e2e4ddc8 100644 --- a/include/linux/vmalloc.h +++ b/include/linux/vmalloc.h @@ -144,6 +144,7 @@ extern void *vzalloc(unsigned long size) __alloc_size(1); @@ -83470,92 +84085,12 @@ index 096d48aa34373..8d11e2e4ddc8c 100644 extern void *vmalloc_32(unsigned long size) __alloc_size(1); extern void *vmalloc_32_user(unsigned long size) __alloc_size(1); extern void *__vmalloc(unsigned long size, gfp_t gfp_mask) __alloc_size(1); -diff --git a/include/net/9p/9p.h b/include/net/9p/9p.h -index 24a509f559ee2..0b20ee6854d6c 100644 ---- a/include/net/9p/9p.h -+++ b/include/net/9p/9p.h -@@ -539,12 +539,12 @@ struct p9_rstatfs { - struct p9_fcall { - u32 size; - u8 id; -+ bool used_mempool; - u16 tag; - - size_t offset; - size_t capacity; - -- struct kmem_cache *cache; - u8 *sdata; - }; - -diff --git a/include/net/9p/client.h b/include/net/9p/client.h -index ec1d1706f43c0..832dcc866a201 100644 ---- a/include/net/9p/client.h -+++ b/include/net/9p/client.h -@@ -9,6 +9,7 @@ - #ifndef NET_9P_CLIENT_H - #define NET_9P_CLIENT_H - -+#include - #include - #include - -@@ -76,7 +77,7 @@ enum p9_req_status_t { - struct p9_req_t { - int status; - int t_err; -- struct kref refcount; -+ refcount_t refcount; - wait_queue_head_t wq; - struct p9_fcall tc; - struct p9_fcall rc; -@@ -107,6 +108,14 @@ struct p9_client { - void *trans; - struct kmem_cache *fcall_cache; - -+ /* -+ * We need two identical mempools because it's not safe to allocate -+ * multiple elements from the same pool (without freeing the first); -+ * that will deadlock if multiple threads need the last element at the -+ * same time. -+ */ -+ mempool_t pools[2]; -+ - union { - struct { - int rfd; -@@ -222,20 +231,21 @@ int p9_client_mkdir_dotl(struct p9_fid *fid, const char *name, int mode, - kgid_t gid, struct p9_qid *qid); - int p9_client_lock_dotl(struct p9_fid *fid, struct p9_flock *flock, u8 *status); - int p9_client_getlock_dotl(struct p9_fid *fid, struct p9_getlock *fl); --void p9_fcall_fini(struct p9_fcall *fc); -+void p9_fcall_fini(struct p9_client *c, struct p9_fcall *fc, -+ int fc_idx); - struct p9_req_t *p9_tag_lookup(struct p9_client *c, u16 tag); - - static inline void p9_req_get(struct p9_req_t *r) - { -- kref_get(&r->refcount); -+ refcount_inc(&r->refcount); - } - - static inline int p9_req_try_get(struct p9_req_t *r) - { -- return kref_get_unless_zero(&r->refcount); -+ return refcount_inc_not_zero(&r->refcount); - } - --int p9_req_put(struct p9_req_t *r); -+int p9_req_put(struct p9_client *c, struct p9_req_t *r); - - void p9_client_cb(struct p9_client *c, struct p9_req_t *req, int status); - diff --git a/include/trace/events/bcachefs.h b/include/trace/events/bcachefs.h new file mode 100644 -index 0000000000000..1c7ca964748d5 +index 000000000000..ff5e6f7c914e --- /dev/null +++ b/include/trace/events/bcachefs.h -@@ -0,0 +1,1045 @@ +@@ -0,0 +1,1100 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM bcachefs @@ -83565,21 +84100,29 @@ index 0000000000000..1c7ca964748d5 + +#include + ++#define TRACE_BPOS_entries(name) \ ++ __field(u64, name##_inode ) \ ++ __field(u64, name##_offset ) \ ++ __field(u32, name##_snapshot ) ++ ++#define TRACE_BPOS_assign(dst, src) \ ++ __entry->dst##_inode = (src).inode; \ ++ __entry->dst##_offset = (src).offset; \ ++ __entry->dst##_snapshot = (src).snapshot ++ +DECLARE_EVENT_CLASS(bpos, -+ TP_PROTO(struct bpos *p), ++ TP_PROTO(const struct bpos *p), + TP_ARGS(p), + + TP_STRUCT__entry( -+ __field(u64, inode ) -+ __field(u64, offset ) ++ TRACE_BPOS_entries(p) + ), + + TP_fast_assign( -+ __entry->inode = p->inode; -+ __entry->offset = p->offset; ++ TRACE_BPOS_assign(p, *p); + ), + -+ TP_printk("%llu:%llu", __entry->inode, __entry->offset) ++ TP_printk("%llu:%llu:%u", __entry->p_inode, __entry->p_offset, __entry->p_snapshot) +); + +DECLARE_EVENT_CLASS(bkey, @@ -83602,6 +84145,31 @@ index 0000000000000..1c7ca964748d5 + __entry->offset, __entry->size) +); + ++DECLARE_EVENT_CLASS(btree_node, ++ TP_PROTO(struct bch_fs *c, struct btree *b), ++ TP_ARGS(c, b), ++ ++ TP_STRUCT__entry( ++ __field(dev_t, dev ) ++ __field(u8, level ) ++ __field(u8, btree_id ) ++ TRACE_BPOS_entries(pos) ++ ), ++ ++ TP_fast_assign( ++ __entry->dev = c->dev; ++ __entry->level = b->c.level; ++ __entry->btree_id = b->c.btree_id; ++ TRACE_BPOS_assign(pos, b->key.k.p); ++ ), ++ ++ TP_printk("%d,%d %u %s %llu:%llu:%u", ++ MAJOR(__entry->dev), MINOR(__entry->dev), ++ __entry->level, ++ bch2_btree_ids[__entry->btree_id], ++ __entry->pos_inode, __entry->pos_offset, __entry->pos_snapshot) ++); ++ +DECLARE_EVENT_CLASS(bch_fs, + TP_PROTO(struct bch_fs *c), + TP_ARGS(c), @@ -83662,7 +84230,7 @@ index 0000000000000..1c7ca964748d5 + +/* io.c: */ + -+DEFINE_EVENT(bio, read_split, ++DEFINE_EVENT(bio, read_promote, + TP_PROTO(struct bio *bio), + TP_ARGS(bio) +); @@ -83672,12 +84240,17 @@ index 0000000000000..1c7ca964748d5 + TP_ARGS(bio) +); + ++DEFINE_EVENT(bio, read_split, ++ TP_PROTO(struct bio *bio), ++ TP_ARGS(bio) ++); ++ +DEFINE_EVENT(bio, read_retry, + TP_PROTO(struct bio *bio), + TP_ARGS(bio) +); + -+DEFINE_EVENT(bio, promote, ++DEFINE_EVENT(bio, read_reuse_race, + TP_PROTO(struct bio *bio), + TP_ARGS(bio) +); @@ -83770,49 +84343,68 @@ index 0000000000000..1c7ca964748d5 + __entry->nr_flushed) +); + -+/* allocator: */ -+ +/* bset.c: */ + +DEFINE_EVENT(bpos, bkey_pack_pos_fail, -+ TP_PROTO(struct bpos *p), ++ TP_PROTO(const struct bpos *p), + TP_ARGS(p) +); + -+/* Btree */ ++/* Btree cache: */ + -+DECLARE_EVENT_CLASS(btree_node, -+ TP_PROTO(struct bch_fs *c, struct btree *b), -+ TP_ARGS(c, b), ++TRACE_EVENT(btree_cache_scan, ++ TP_PROTO(long nr_to_scan, long can_free, long ret), ++ TP_ARGS(nr_to_scan, can_free, ret), + + TP_STRUCT__entry( -+ __field(dev_t, dev ) -+ __field(u8, level ) -+ __field(u8, id ) -+ __field(u64, inode ) -+ __field(u64, offset ) ++ __field(long, nr_to_scan ) ++ __field(long, can_free ) ++ __field(long, ret ) + ), + + TP_fast_assign( -+ __entry->dev = c->dev; -+ __entry->level = b->c.level; -+ __entry->id = b->c.btree_id; -+ __entry->inode = b->key.k.p.inode; -+ __entry->offset = b->key.k.p.offset; ++ __entry->nr_to_scan = nr_to_scan; ++ __entry->can_free = can_free; ++ __entry->ret = ret; + ), + -+ TP_printk("%d,%d %u id %u %llu:%llu", -+ MAJOR(__entry->dev), MINOR(__entry->dev), -+ __entry->level, __entry->id, -+ __entry->inode, __entry->offset) ++ TP_printk("scanned for %li nodes, can free %li, ret %li", ++ __entry->nr_to_scan, __entry->can_free, __entry->ret) +); + -+DEFINE_EVENT(btree_node, btree_read, ++DEFINE_EVENT(btree_node, btree_cache_reap, + TP_PROTO(struct bch_fs *c, struct btree *b), + TP_ARGS(c, b) +); + -+TRACE_EVENT(btree_write, ++DEFINE_EVENT(bch_fs, btree_cache_cannibalize_lock_fail, ++ TP_PROTO(struct bch_fs *c), ++ TP_ARGS(c) ++); ++ ++DEFINE_EVENT(bch_fs, btree_cache_cannibalize_lock, ++ TP_PROTO(struct bch_fs *c), ++ TP_ARGS(c) ++); ++ ++DEFINE_EVENT(bch_fs, btree_cache_cannibalize, ++ TP_PROTO(struct bch_fs *c), ++ TP_ARGS(c) ++); ++ ++DEFINE_EVENT(bch_fs, btree_cache_cannibalize_unlock, ++ TP_PROTO(struct bch_fs *c), ++ TP_ARGS(c) ++); ++ ++/* Btree */ ++ ++DEFINE_EVENT(btree_node, btree_node_read, ++ TP_PROTO(struct bch_fs *c, struct btree *b), ++ TP_ARGS(c, b) ++); ++ ++TRACE_EVENT(btree_node_write, + TP_PROTO(struct btree *b, unsigned bytes, unsigned sectors), + TP_ARGS(b, bytes, sectors), + @@ -83842,135 +84434,91 @@ index 0000000000000..1c7ca964748d5 + TP_ARGS(c, b) +); + -+DEFINE_EVENT(btree_node, btree_node_reap, -+ TP_PROTO(struct bch_fs *c, struct btree *b), -+ TP_ARGS(c, b) -+); -+ -+DEFINE_EVENT(bch_fs, btree_node_cannibalize_lock_fail, -+ TP_PROTO(struct bch_fs *c), -+ TP_ARGS(c) -+); -+ -+DEFINE_EVENT(bch_fs, btree_node_cannibalize_lock, -+ TP_PROTO(struct bch_fs *c), -+ TP_ARGS(c) -+); -+ -+DEFINE_EVENT(bch_fs, btree_node_cannibalize, -+ TP_PROTO(struct bch_fs *c), -+ TP_ARGS(c) -+); -+ -+DEFINE_EVENT(bch_fs, btree_node_cannibalize_unlock, -+ TP_PROTO(struct bch_fs *c), -+ TP_ARGS(c) -+); -+ +TRACE_EVENT(btree_reserve_get_fail, -+ TP_PROTO(struct bch_fs *c, size_t required, struct closure *cl), -+ TP_ARGS(c, required, cl), -+ -+ TP_STRUCT__entry( -+ __field(dev_t, dev ) -+ __field(size_t, required ) -+ __field(struct closure *, cl ) -+ ), -+ -+ TP_fast_assign( -+ __entry->dev = c->dev; -+ __entry->required = required; -+ __entry->cl = cl; -+ ), -+ -+ TP_printk("%d,%d required %zu by %p", -+ MAJOR(__entry->dev), MINOR(__entry->dev), -+ __entry->required, __entry->cl) -+); -+ -+DEFINE_EVENT(btree_node, btree_split, -+ TP_PROTO(struct bch_fs *c, struct btree *b), -+ TP_ARGS(c, b) -+); -+ -+DEFINE_EVENT(btree_node, btree_compact, -+ TP_PROTO(struct bch_fs *c, struct btree *b), -+ TP_ARGS(c, b) -+); -+ -+DEFINE_EVENT(btree_node, btree_merge, -+ TP_PROTO(struct bch_fs *c, struct btree *b), -+ TP_ARGS(c, b) -+); -+ -+DEFINE_EVENT(btree_node, btree_rewrite, -+ TP_PROTO(struct bch_fs *c, struct btree *b), -+ TP_ARGS(c, b) -+); -+ -+DEFINE_EVENT(btree_node, btree_set_root, -+ TP_PROTO(struct bch_fs *c, struct btree *b), -+ TP_ARGS(c, b) -+); -+ -+TRACE_EVENT(btree_cache_scan, -+ TP_PROTO(long nr_to_scan, long can_free, long ret), -+ TP_ARGS(nr_to_scan, can_free, ret), -+ -+ TP_STRUCT__entry( -+ __field(long, nr_to_scan ) -+ __field(long, can_free ) -+ __field(long, ret ) -+ ), -+ -+ TP_fast_assign( -+ __entry->nr_to_scan = nr_to_scan; -+ __entry->can_free = can_free; -+ __entry->ret = ret; -+ ), -+ -+ TP_printk("scanned for %li nodes, can free %li, ret %li", -+ __entry->nr_to_scan, __entry->can_free, __entry->ret) -+); -+ -+TRACE_EVENT(btree_node_relock_fail, + TP_PROTO(const char *trans_fn, + unsigned long caller_ip, -+ enum btree_id btree_id, -+ struct bpos *pos, -+ unsigned long node, -+ u32 iter_lock_seq, -+ u32 node_lock_seq), -+ TP_ARGS(trans_fn, caller_ip, btree_id, pos, node, iter_lock_seq, node_lock_seq), ++ size_t required), ++ TP_ARGS(trans_fn, caller_ip, required), + + TP_STRUCT__entry( -+ __array(char, trans_fn, 24 ) ++ __array(char, trans_fn, 32 ) ++ __field(unsigned long, caller_ip ) ++ __field(size_t, required ) ++ ), ++ ++ TP_fast_assign( ++ strlcpy(__entry->trans_fn, trans_fn, sizeof(__entry->trans_fn)); ++ __entry->caller_ip = caller_ip; ++ __entry->required = required; ++ ), ++ ++ TP_printk("%s %pS required %zu", ++ __entry->trans_fn, ++ (void *) __entry->caller_ip, ++ __entry->required) ++); ++ ++DEFINE_EVENT(btree_node, btree_node_compact, ++ TP_PROTO(struct bch_fs *c, struct btree *b), ++ TP_ARGS(c, b) ++); ++ ++DEFINE_EVENT(btree_node, btree_node_merge, ++ TP_PROTO(struct bch_fs *c, struct btree *b), ++ TP_ARGS(c, b) ++); ++ ++DEFINE_EVENT(btree_node, btree_node_split, ++ TP_PROTO(struct bch_fs *c, struct btree *b), ++ TP_ARGS(c, b) ++); ++ ++DEFINE_EVENT(btree_node, btree_node_rewrite, ++ TP_PROTO(struct bch_fs *c, struct btree *b), ++ TP_ARGS(c, b) ++); ++ ++DEFINE_EVENT(btree_node, btree_node_set_root, ++ TP_PROTO(struct bch_fs *c, struct btree *b), ++ TP_ARGS(c, b) ++); ++ ++TRACE_EVENT(btree_path_relock_fail, ++ TP_PROTO(struct btree_trans *trans, ++ unsigned long caller_ip, ++ struct btree_path *path, ++ unsigned level), ++ TP_ARGS(trans, caller_ip, path, level), ++ ++ TP_STRUCT__entry( ++ __array(char, trans_fn, 32 ) + __field(unsigned long, caller_ip ) + __field(u8, btree_id ) -+ __field(u64, pos_inode ) -+ __field(u64, pos_offset ) -+ __field(u32, pos_snapshot ) -+ __field(unsigned long, node ) ++ TRACE_BPOS_entries(pos) ++ __array(char, node, 24 ) + __field(u32, iter_lock_seq ) + __field(u32, node_lock_seq ) + ), + + TP_fast_assign( -+ strncpy(__entry->trans_fn, trans_fn, sizeof(__entry->trans_fn)); ++ struct btree *b = btree_path_node(path, level); ++ ++ strlcpy(__entry->trans_fn, trans->fn, sizeof(__entry->trans_fn)); + __entry->caller_ip = caller_ip; -+ __entry->btree_id = btree_id; -+ __entry->pos_inode = pos->inode; -+ __entry->pos_offset = pos->offset; -+ __entry->pos_snapshot = pos->snapshot; -+ __entry->node = node; -+ __entry->iter_lock_seq = iter_lock_seq; -+ __entry->node_lock_seq = node_lock_seq; ++ __entry->btree_id = path->btree_id; ++ TRACE_BPOS_assign(pos, path->pos); ++ if (IS_ERR(b)) ++ strscpy(__entry->node, bch2_err_str(PTR_ERR(b)), sizeof(__entry->node)); ++ else ++ scnprintf(__entry->node, sizeof(__entry->node), "%px", b); ++ __entry->iter_lock_seq = path->l[level].lock_seq; ++ __entry->node_lock_seq = is_btree_node(path, level) ? path->l[level].b->c.lock.state.seq : 0; + ), + -+ TP_printk("%s %pS btree %u pos %llu:%llu:%u, node %lu iter seq %u lock seq %u", ++ TP_printk("%s %pS btree %s pos %llu:%llu:%u, node %s iter seq %u lock seq %u", + __entry->trans_fn, + (void *) __entry->caller_ip, -+ __entry->btree_id, ++ bch2_btree_ids[__entry->btree_id], + __entry->pos_inode, + __entry->pos_offset, + __entry->pos_snapshot, @@ -83979,6 +84527,56 @@ index 0000000000000..1c7ca964748d5 + __entry->node_lock_seq) +); + ++TRACE_EVENT(btree_path_upgrade_fail, ++ TP_PROTO(struct btree_trans *trans, ++ unsigned long caller_ip, ++ struct btree_path *path, ++ unsigned level), ++ TP_ARGS(trans, caller_ip, path, level), ++ ++ TP_STRUCT__entry( ++ __array(char, trans_fn, 32 ) ++ __field(unsigned long, caller_ip ) ++ __field(u8, btree_id ) ++ TRACE_BPOS_entries(pos) ++ __field(u8, locked ) ++ __field(u8, self_read_count ) ++ __field(u8, self_intent_count) ++ __field(u8, read_count ) ++ __field(u8, intent_count ) ++ ), ++ ++ TP_fast_assign( ++ struct six_lock_count c; ++ ++ strlcpy(__entry->trans_fn, trans->fn, sizeof(__entry->trans_fn)); ++ __entry->caller_ip = caller_ip; ++ __entry->btree_id = path->btree_id; ++ TRACE_BPOS_assign(pos, path->pos); ++ __entry->locked = btree_node_locked(path, level); ++ ++ c = bch2_btree_node_lock_counts(trans, NULL, &path->l[level].b->c, level), ++ __entry->self_read_count = c.n[SIX_LOCK_read]; ++ __entry->self_intent_count = c.n[SIX_LOCK_intent]; ++ c = six_lock_counts(&path->l[level].b->c.lock); ++ __entry->read_count = c.n[SIX_LOCK_read]; ++ __entry->intent_count = c.n[SIX_LOCK_read]; ++ ), ++ ++ TP_printk("%s %pS btree %s pos %llu:%llu:%u, locked %u held %u:%u lock count %u:%u", ++ __entry->trans_fn, ++ (void *) __entry->caller_ip, ++ bch2_btree_ids[__entry->btree_id], ++ __entry->pos_inode, ++ __entry->pos_offset, ++ __entry->pos_snapshot, ++ __entry->locked, ++ __entry->self_read_count, ++ __entry->self_intent_count, ++ __entry->read_count, ++ __entry->intent_count) ++); ++ +/* Garbage collection */ + +DEFINE_EVENT(bch_fs, gc_gens_start, @@ -84104,7 +84702,7 @@ index 0000000000000..1c7ca964748d5 + __entry->err) +); + -+TRACE_EVENT(invalidate_bucket, ++TRACE_EVENT(bucket_invalidate, + TP_PROTO(struct bch_fs *c, unsigned dev, u64 bucket, u32 sectors), + TP_ARGS(c, dev, bucket, sectors), + @@ -84130,17 +84728,27 @@ index 0000000000000..1c7ca964748d5 + +/* Moving IO */ + -+DEFINE_EVENT(bkey, move_extent, ++DEFINE_EVENT(bkey, move_extent_read, + TP_PROTO(const struct bkey *k), + TP_ARGS(k) +); + -+DEFINE_EVENT(bkey, move_alloc_mem_fail, ++DEFINE_EVENT(bkey, move_extent_write, + TP_PROTO(const struct bkey *k), + TP_ARGS(k) +); + -+DEFINE_EVENT(bkey, move_race, ++DEFINE_EVENT(bkey, move_extent_finish, ++ TP_PROTO(const struct bkey *k), ++ TP_ARGS(k) ++); ++ ++DEFINE_EVENT(bkey, move_extent_race, ++ TP_PROTO(const struct bkey *k), ++ TP_ARGS(k) ++); ++ ++DEFINE_EVENT(bkey, move_extent_alloc_mem_fail, + TP_PROTO(const struct bkey *k), + TP_ARGS(k) +); @@ -84219,18 +84827,20 @@ index 0000000000000..1c7ca964748d5 + __entry->wait_amount, __entry->until) +); + ++/* btree transactions: */ ++ +DECLARE_EVENT_CLASS(transaction_event, -+ TP_PROTO(const char *trans_fn, ++ TP_PROTO(struct btree_trans *trans, + unsigned long caller_ip), -+ TP_ARGS(trans_fn, caller_ip), ++ TP_ARGS(trans, caller_ip), + + TP_STRUCT__entry( -+ __array(char, trans_fn, 24 ) ++ __array(char, trans_fn, 32 ) + __field(unsigned long, caller_ip ) + ), + + TP_fast_assign( -+ strncpy(__entry->trans_fn, trans_fn, sizeof(__entry->trans_fn)); ++ strlcpy(__entry->trans_fn, trans->fn, sizeof(__entry->trans_fn)); + __entry->caller_ip = caller_ip; + ), + @@ -84238,265 +84848,249 @@ index 0000000000000..1c7ca964748d5 +); + +DEFINE_EVENT(transaction_event, transaction_commit, -+ TP_PROTO(const char *trans_fn, ++ TP_PROTO(struct btree_trans *trans, + unsigned long caller_ip), -+ TP_ARGS(trans_fn, caller_ip) ++ TP_ARGS(trans, caller_ip) +); + -+DEFINE_EVENT(transaction_event, transaction_restart_ip, -+ TP_PROTO(const char *trans_fn, ++DEFINE_EVENT(transaction_event, trans_restart_injected, ++ TP_PROTO(struct btree_trans *trans, + unsigned long caller_ip), -+ TP_ARGS(trans_fn, caller_ip) -+); -+ -+DEFINE_EVENT(transaction_event, transaction_restart_injected, -+ TP_PROTO(const char *trans_fn, -+ unsigned long caller_ip), -+ TP_ARGS(trans_fn, caller_ip) ++ TP_ARGS(trans, caller_ip) +); + +DEFINE_EVENT(transaction_event, trans_blocked_journal_reclaim, -+ TP_PROTO(const char *trans_fn, ++ TP_PROTO(struct btree_trans *trans, + unsigned long caller_ip), -+ TP_ARGS(trans_fn, caller_ip) ++ TP_ARGS(trans, caller_ip) +); + +DEFINE_EVENT(transaction_event, trans_restart_journal_res_get, -+ TP_PROTO(const char *trans_fn, ++ TP_PROTO(struct btree_trans *trans, + unsigned long caller_ip), -+ TP_ARGS(trans_fn, caller_ip) ++ TP_ARGS(trans, caller_ip) +); + -+DEFINE_EVENT(transaction_event, trans_restart_journal_preres_get, -+ TP_PROTO(const char *trans_fn, -+ unsigned long caller_ip), -+ TP_ARGS(trans_fn, caller_ip) -+); + -+DEFINE_EVENT(transaction_event, trans_restart_journal_reclaim, -+ TP_PROTO(const char *trans_fn, -+ unsigned long caller_ip), -+ TP_ARGS(trans_fn, caller_ip) -+); -+ -+DEFINE_EVENT(transaction_event, trans_restart_fault_inject, -+ TP_PROTO(const char *trans_fn, -+ unsigned long caller_ip), -+ TP_ARGS(trans_fn, caller_ip) -+); -+ -+DEFINE_EVENT(transaction_event, trans_traverse_all, -+ TP_PROTO(const char *trans_fn, -+ unsigned long caller_ip), -+ TP_ARGS(trans_fn, caller_ip) -+); -+ -+DEFINE_EVENT(transaction_event, trans_restart_mark_replicas, -+ TP_PROTO(const char *trans_fn, -+ unsigned long caller_ip), -+ TP_ARGS(trans_fn, caller_ip) -+); -+ -+DEFINE_EVENT(transaction_event, trans_restart_key_cache_raced, -+ TP_PROTO(const char *trans_fn, -+ unsigned long caller_ip), -+ TP_ARGS(trans_fn, caller_ip) -+); -+ -+DEFINE_EVENT(transaction_event, trans_restart_too_many_iters, -+ TP_PROTO(const char *trans_fn, -+ unsigned long caller_ip), -+ TP_ARGS(trans_fn, caller_ip) -+); -+ -+DECLARE_EVENT_CLASS(transaction_restart_iter, -+ TP_PROTO(const char *trans_fn, ++TRACE_EVENT(trans_restart_journal_preres_get, ++ TP_PROTO(struct btree_trans *trans, + unsigned long caller_ip, -+ enum btree_id btree_id, -+ struct bpos *pos), -+ TP_ARGS(trans_fn, caller_ip, btree_id, pos), ++ unsigned flags), ++ TP_ARGS(trans, caller_ip, flags), + + TP_STRUCT__entry( -+ __array(char, trans_fn, 24 ) ++ __array(char, trans_fn, 32 ) + __field(unsigned long, caller_ip ) -+ __field(u8, btree_id ) -+ __field(u64, pos_inode ) -+ __field(u64, pos_offset ) -+ __field(u32, pos_snapshot ) ++ __field(unsigned, flags ) + ), + + TP_fast_assign( -+ strncpy(__entry->trans_fn, trans_fn, sizeof(__entry->trans_fn)); ++ strlcpy(__entry->trans_fn, trans->fn, sizeof(__entry->trans_fn)); + __entry->caller_ip = caller_ip; -+ __entry->btree_id = btree_id; -+ __entry->pos_inode = pos->inode; -+ __entry->pos_offset = pos->offset; -+ __entry->pos_snapshot = pos->snapshot; ++ __entry->flags = flags; + ), + -+ TP_printk("%s %pS btree %u pos %llu:%llu:%u", ++ TP_printk("%s %pS %x", __entry->trans_fn, ++ (void *) __entry->caller_ip, ++ __entry->flags) ++); ++ ++DEFINE_EVENT(transaction_event, trans_restart_journal_reclaim, ++ TP_PROTO(struct btree_trans *trans, ++ unsigned long caller_ip), ++ TP_ARGS(trans, caller_ip) ++); ++ ++DEFINE_EVENT(transaction_event, trans_restart_fault_inject, ++ TP_PROTO(struct btree_trans *trans, ++ unsigned long caller_ip), ++ TP_ARGS(trans, caller_ip) ++); ++ ++DEFINE_EVENT(transaction_event, trans_traverse_all, ++ TP_PROTO(struct btree_trans *trans, ++ unsigned long caller_ip), ++ TP_ARGS(trans, caller_ip) ++); ++ ++DEFINE_EVENT(transaction_event, trans_restart_mark_replicas, ++ TP_PROTO(struct btree_trans *trans, ++ unsigned long caller_ip), ++ TP_ARGS(trans, caller_ip) ++); ++ ++DEFINE_EVENT(transaction_event, trans_restart_key_cache_raced, ++ TP_PROTO(struct btree_trans *trans, ++ unsigned long caller_ip), ++ TP_ARGS(trans, caller_ip) ++); ++ ++DEFINE_EVENT(transaction_event, trans_restart_too_many_iters, ++ TP_PROTO(struct btree_trans *trans, ++ unsigned long caller_ip), ++ TP_ARGS(trans, caller_ip) ++); ++ ++DECLARE_EVENT_CLASS(transaction_restart_iter, ++ TP_PROTO(struct btree_trans *trans, ++ unsigned long caller_ip, ++ struct btree_path *path), ++ TP_ARGS(trans, caller_ip, path), ++ ++ TP_STRUCT__entry( ++ __array(char, trans_fn, 32 ) ++ __field(unsigned long, caller_ip ) ++ __field(u8, btree_id ) ++ TRACE_BPOS_entries(pos) ++ ), ++ ++ TP_fast_assign( ++ strlcpy(__entry->trans_fn, trans->fn, sizeof(__entry->trans_fn)); ++ __entry->caller_ip = caller_ip; ++ __entry->btree_id = path->btree_id; ++ TRACE_BPOS_assign(pos, path->pos) ++ ), ++ ++ TP_printk("%s %pS btree %s pos %llu:%llu:%u", + __entry->trans_fn, + (void *) __entry->caller_ip, -+ __entry->btree_id, ++ bch2_btree_ids[__entry->btree_id], + __entry->pos_inode, + __entry->pos_offset, + __entry->pos_snapshot) +); + +DEFINE_EVENT(transaction_restart_iter, trans_restart_btree_node_reused, -+ TP_PROTO(const char *trans_fn, ++ TP_PROTO(struct btree_trans *trans, + unsigned long caller_ip, -+ enum btree_id btree_id, -+ struct bpos *pos), -+ TP_ARGS(trans_fn, caller_ip, btree_id, pos) ++ struct btree_path *path), ++ TP_ARGS(trans, caller_ip, path) +); + +DEFINE_EVENT(transaction_restart_iter, trans_restart_btree_node_split, -+ TP_PROTO(const char *trans_fn, ++ TP_PROTO(struct btree_trans *trans, + unsigned long caller_ip, -+ enum btree_id btree_id, -+ struct bpos *pos), -+ TP_ARGS(trans_fn, caller_ip, btree_id, pos) ++ struct btree_path *path), ++ TP_ARGS(trans, caller_ip, path) +); + +DEFINE_EVENT(transaction_restart_iter, trans_restart_upgrade, -+ TP_PROTO(const char *trans_fn, ++ TP_PROTO(struct btree_trans *trans, + unsigned long caller_ip, -+ enum btree_id btree_id, -+ struct bpos *pos), -+ TP_ARGS(trans_fn, caller_ip, btree_id, pos) ++ struct btree_path *path), ++ TP_ARGS(trans, caller_ip, path) +); + +DEFINE_EVENT(transaction_restart_iter, trans_restart_iter_upgrade, -+ TP_PROTO(const char *trans_fn, ++ TP_PROTO(struct btree_trans *trans, + unsigned long caller_ip, -+ enum btree_id btree_id, -+ struct bpos *pos), -+ TP_ARGS(trans_fn, caller_ip, btree_id, pos) ++ struct btree_path *path), ++ TP_ARGS(trans, caller_ip, path) +); + +DEFINE_EVENT(transaction_restart_iter, trans_restart_relock, -+ TP_PROTO(const char *trans_fn, ++ TP_PROTO(struct btree_trans *trans, + unsigned long caller_ip, -+ enum btree_id btree_id, -+ struct bpos *pos), -+ TP_ARGS(trans_fn, caller_ip, btree_id, pos) ++ struct btree_path *path), ++ TP_ARGS(trans, caller_ip, path) +); + +DEFINE_EVENT(transaction_restart_iter, trans_restart_relock_next_node, -+ TP_PROTO(const char *trans_fn, ++ TP_PROTO(struct btree_trans *trans, + unsigned long caller_ip, -+ enum btree_id btree_id, -+ struct bpos *pos), -+ TP_ARGS(trans_fn, caller_ip, btree_id, pos) ++ struct btree_path *path), ++ TP_ARGS(trans, caller_ip, path) +); + +DEFINE_EVENT(transaction_restart_iter, trans_restart_relock_parent_for_fill, -+ TP_PROTO(const char *trans_fn, ++ TP_PROTO(struct btree_trans *trans, + unsigned long caller_ip, -+ enum btree_id btree_id, -+ struct bpos *pos), -+ TP_ARGS(trans_fn, caller_ip, btree_id, pos) ++ struct btree_path *path), ++ TP_ARGS(trans, caller_ip, path) +); + +DEFINE_EVENT(transaction_restart_iter, trans_restart_relock_after_fill, -+ TP_PROTO(const char *trans_fn, ++ TP_PROTO(struct btree_trans *trans, + unsigned long caller_ip, -+ enum btree_id btree_id, -+ struct bpos *pos), -+ TP_ARGS(trans_fn, caller_ip, btree_id, pos) ++ struct btree_path *path), ++ TP_ARGS(trans, caller_ip, path) ++); ++ ++DEFINE_EVENT(transaction_event, trans_restart_key_cache_upgrade, ++ TP_PROTO(struct btree_trans *trans, ++ unsigned long caller_ip), ++ TP_ARGS(trans, caller_ip) +); + +DEFINE_EVENT(transaction_restart_iter, trans_restart_relock_key_cache_fill, -+ TP_PROTO(const char *trans_fn, ++ TP_PROTO(struct btree_trans *trans, + unsigned long caller_ip, -+ enum btree_id btree_id, -+ struct bpos *pos), -+ TP_ARGS(trans_fn, caller_ip, btree_id, pos) ++ struct btree_path *path), ++ TP_ARGS(trans, caller_ip, path) +); + +DEFINE_EVENT(transaction_restart_iter, trans_restart_relock_path, -+ TP_PROTO(const char *trans_fn, ++ TP_PROTO(struct btree_trans *trans, + unsigned long caller_ip, -+ enum btree_id btree_id, -+ struct bpos *pos), -+ TP_ARGS(trans_fn, caller_ip, btree_id, pos) ++ struct btree_path *path), ++ TP_ARGS(trans, caller_ip, path) +); + +DEFINE_EVENT(transaction_restart_iter, trans_restart_relock_path_intent, -+ TP_PROTO(const char *trans_fn, ++ TP_PROTO(struct btree_trans *trans, + unsigned long caller_ip, -+ enum btree_id btree_id, -+ struct bpos *pos), -+ TP_ARGS(trans_fn, caller_ip, btree_id, pos) ++ struct btree_path *path), ++ TP_ARGS(trans, caller_ip, path) +); + +DEFINE_EVENT(transaction_restart_iter, trans_restart_traverse, -+ TP_PROTO(const char *trans_fn, ++ TP_PROTO(struct btree_trans *trans, + unsigned long caller_ip, -+ enum btree_id btree_id, -+ struct bpos *pos), -+ TP_ARGS(trans_fn, caller_ip, btree_id, pos) ++ struct btree_path *path), ++ TP_ARGS(trans, caller_ip, path) +); + +DEFINE_EVENT(transaction_restart_iter, trans_restart_memory_allocation_failure, -+ TP_PROTO(const char *trans_fn, ++ TP_PROTO(struct btree_trans *trans, + unsigned long caller_ip, -+ enum btree_id btree_id, -+ struct bpos *pos), -+ TP_ARGS(trans_fn, caller_ip, btree_id, pos) ++ struct btree_path *path), ++ TP_ARGS(trans, caller_ip, path) +); + +TRACE_EVENT(trans_restart_would_deadlock, -+ TP_PROTO(const char *trans_fn, ++ TP_PROTO(struct btree_trans *trans, + unsigned long caller_ip, -+ bool in_traverse_all, + unsigned reason, -+ enum btree_id have_btree_id, -+ unsigned have_iter_type, -+ struct bpos *have_pos, -+ enum btree_id want_btree_id, -+ unsigned want_iter_type, ++ struct btree_path *have, ++ struct btree_path *want, + struct bpos *want_pos), -+ TP_ARGS(trans_fn, caller_ip, in_traverse_all, reason, -+ have_btree_id, have_iter_type, have_pos, -+ want_btree_id, want_iter_type, want_pos), ++ TP_ARGS(trans, caller_ip, reason, ++ have, want, want_pos), + + TP_STRUCT__entry( -+ __array(char, trans_fn, 24 ) ++ __array(char, trans_fn, 32 ) + __field(unsigned long, caller_ip ) + __field(u8, in_traverse_all ) + __field(u8, reason ) + __field(u8, have_btree_id ) -+ __field(u8, have_iter_type ) ++ __field(u8, have_type ) + __field(u8, want_btree_id ) -+ __field(u8, want_iter_type ) -+ -+ __field(u64, have_pos_inode ) -+ __field(u64, have_pos_offset ) -+ __field(u32, have_pos_snapshot) -+ __field(u32, want_pos_snapshot) -+ __field(u64, want_pos_inode ) -+ __field(u64, want_pos_offset ) ++ __field(u8, want_type ) ++ TRACE_BPOS_entries(have_pos) ++ TRACE_BPOS_entries(want_pos) + ), + + TP_fast_assign( -+ strncpy(__entry->trans_fn, trans_fn, sizeof(__entry->trans_fn)); ++ strlcpy(__entry->trans_fn, trans->fn, sizeof(__entry->trans_fn)); + __entry->caller_ip = caller_ip; -+ __entry->in_traverse_all = in_traverse_all; ++ __entry->in_traverse_all = trans->in_traverse_all; + __entry->reason = reason; -+ __entry->have_btree_id = have_btree_id; -+ __entry->have_iter_type = have_iter_type; -+ __entry->want_btree_id = want_btree_id; -+ __entry->want_iter_type = want_iter_type; -+ -+ __entry->have_pos_inode = have_pos->inode; -+ __entry->have_pos_offset = have_pos->offset; -+ __entry->have_pos_snapshot = have_pos->snapshot; -+ -+ __entry->want_pos_inode = want_pos->inode; -+ __entry->want_pos_offset = want_pos->offset; -+ __entry->want_pos_snapshot = want_pos->snapshot; ++ __entry->have_btree_id = have->btree_id; ++ __entry->have_type = have->cached; ++ __entry->want_btree_id = want->btree_id; ++ __entry->want_type = want->cached; ++ TRACE_BPOS_assign(have_pos, have->pos); ++ TRACE_BPOS_assign(want_pos, *want_pos); + ), + + TP_printk("%s %pS traverse_all %u because %u have %u:%u %llu:%llu:%u want %u:%u %llu:%llu:%u", @@ -84505,46 +85099,46 @@ index 0000000000000..1c7ca964748d5 + __entry->in_traverse_all, + __entry->reason, + __entry->have_btree_id, -+ __entry->have_iter_type, ++ __entry->have_type, + __entry->have_pos_inode, + __entry->have_pos_offset, + __entry->have_pos_snapshot, + __entry->want_btree_id, -+ __entry->want_iter_type, ++ __entry->want_type, + __entry->want_pos_inode, + __entry->want_pos_offset, + __entry->want_pos_snapshot) +); + +TRACE_EVENT(trans_restart_would_deadlock_write, -+ TP_PROTO(const char *trans_fn), -+ TP_ARGS(trans_fn), ++ TP_PROTO(struct btree_trans *trans), ++ TP_ARGS(trans), + + TP_STRUCT__entry( -+ __array(char, trans_fn, 24 ) ++ __array(char, trans_fn, 32 ) + ), + + TP_fast_assign( -+ strncpy(__entry->trans_fn, trans_fn, sizeof(__entry->trans_fn)); ++ strlcpy(__entry->trans_fn, trans->fn, sizeof(__entry->trans_fn)); + ), + + TP_printk("%s", __entry->trans_fn) +); + +TRACE_EVENT(trans_restart_mem_realloced, -+ TP_PROTO(const char *trans_fn, ++ TP_PROTO(struct btree_trans *trans, + unsigned long caller_ip, + unsigned long bytes), -+ TP_ARGS(trans_fn, caller_ip, bytes), ++ TP_ARGS(trans, caller_ip, bytes), + + TP_STRUCT__entry( -+ __array(char, trans_fn, 24 ) ++ __array(char, trans_fn, 32 ) + __field(unsigned long, caller_ip ) + __field(unsigned long, bytes ) + ), + + TP_fast_assign( -+ strncpy(__entry->trans_fn, trans_fn, sizeof(__entry->trans_fn)); ++ strlcpy(__entry->trans_fn, trans->fn, sizeof(__entry->trans_fn)); + __entry->caller_ip = caller_ip; + __entry->bytes = bytes; + ), @@ -84556,32 +85150,28 @@ index 0000000000000..1c7ca964748d5 +); + +TRACE_EVENT(trans_restart_key_cache_key_realloced, -+ TP_PROTO(const char *trans_fn, ++ TP_PROTO(struct btree_trans *trans, + unsigned long caller_ip, -+ enum btree_id btree_id, -+ struct bpos *pos, ++ struct btree_path *path, + unsigned old_u64s, + unsigned new_u64s), -+ TP_ARGS(trans_fn, caller_ip, btree_id, pos, old_u64s, new_u64s), ++ TP_ARGS(trans, caller_ip, path, old_u64s, new_u64s), + + TP_STRUCT__entry( -+ __array(char, trans_fn, 24 ) ++ __array(char, trans_fn, 32 ) + __field(unsigned long, caller_ip ) + __field(enum btree_id, btree_id ) -+ __field(u64, inode ) -+ __field(u64, offset ) -+ __field(u32, snapshot ) ++ TRACE_BPOS_entries(pos) + __field(u32, old_u64s ) + __field(u32, new_u64s ) + ), + + TP_fast_assign( -+ strncpy(__entry->trans_fn, trans_fn, sizeof(__entry->trans_fn)); -+ __entry->caller_ip = caller_ip; -+ __entry->btree_id = btree_id; -+ __entry->inode = pos->inode; -+ __entry->offset = pos->offset; -+ __entry->snapshot = pos->snapshot; ++ strlcpy(__entry->trans_fn, trans->fn, sizeof(__entry->trans_fn)); ++ __entry->caller_ip = caller_ip; ++ ++ __entry->btree_id = path->btree_id; ++ TRACE_BPOS_assign(pos, path->pos); + __entry->old_u64s = old_u64s; + __entry->new_u64s = new_u64s; + ), @@ -84590,9 +85180,9 @@ index 0000000000000..1c7ca964748d5 + __entry->trans_fn, + (void *) __entry->caller_ip, + bch2_btree_ids[__entry->btree_id], -+ __entry->inode, -+ __entry->offset, -+ __entry->snapshot, ++ __entry->pos_inode, ++ __entry->pos_offset, ++ __entry->pos_snapshot, + __entry->old_u64s, + __entry->new_u64s) +); @@ -84602,7 +85192,7 @@ index 0000000000000..1c7ca964748d5 +/* This part must be outside protection */ +#include diff --git a/init/init_task.c b/init/init_task.c -index 73cc8f03511a3..3e3aed1101539 100644 +index 73cc8f03511a..3e3aed110153 100644 --- a/init/init_task.c +++ b/init/init_task.c @@ -85,6 +85,7 @@ struct task_struct init_task @@ -84614,7 +85204,7 @@ index 73cc8f03511a3..3e3aed1101539 100644 .fn = do_no_restart_syscall, }, diff --git a/kernel/Kconfig.locks b/kernel/Kconfig.locks -index 4198f0273ecdc..b2abd9a5d9abd 100644 +index 4198f0273ecd..b2abd9a5d9ab 100644 --- a/kernel/Kconfig.locks +++ b/kernel/Kconfig.locks @@ -259,3 +259,6 @@ config ARCH_HAS_MMIOWB @@ -84625,7 +85215,7 @@ index 4198f0273ecdc..b2abd9a5d9abd 100644 +config SIXLOCKS + bool diff --git a/kernel/locking/Makefile b/kernel/locking/Makefile -index d51cabf28f382..cadbf6520c4b4 100644 +index d51cabf28f38..cadbf6520c4b 100644 --- a/kernel/locking/Makefile +++ b/kernel/locking/Makefile @@ -32,3 +32,4 @@ obj-$(CONFIG_QUEUED_RWLOCKS) += qrwlock.o @@ -84634,10 +85224,10 @@ index d51cabf28f382..cadbf6520c4b4 100644 obj-$(CONFIG_LOCK_EVENT_COUNTS) += lock_events.o +obj-$(CONFIG_SIXLOCKS) += six.o diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c -index f06b91ca6482d..0b1a3a949b478 100644 +index e2f179491b08..317d3bd95177 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c -@@ -6483,6 +6483,26 @@ void debug_check_no_locks_held(void) +@@ -6484,6 +6484,26 @@ void debug_check_no_locks_held(void) } EXPORT_SYMBOL_GPL(debug_check_no_locks_held); @@ -84666,10 +85256,10 @@ index f06b91ca6482d..0b1a3a949b478 100644 { diff --git a/kernel/locking/six.c b/kernel/locking/six.c new file mode 100644 -index 0000000000000..fca1208720b67 +index 000000000000..d22750558847 --- /dev/null +++ b/kernel/locking/six.c -@@ -0,0 +1,759 @@ +@@ -0,0 +1,748 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include @@ -84691,6 +85281,8 @@ index 0000000000000..fca1208720b67 +#define six_acquire(l, t) lock_acquire(l, 0, t, 0, 0, NULL, _RET_IP_) +#define six_release(l) lock_release(l, _RET_IP_) + ++static void do_six_unlock_type(struct six_lock *lock, enum six_lock_type type); ++ +struct six_lock_vals { + /* Value we add to the lock in order to take the lock: */ + u64 lock_val; @@ -84737,14 +85329,15 @@ index 0000000000000..fca1208720b67 +} + +static inline void six_set_owner(struct six_lock *lock, enum six_lock_type type, -+ union six_lock_state old) ++ union six_lock_state old, ++ struct task_struct *owner) +{ + if (type != SIX_LOCK_intent) + return; + + if (!old.intent_lock) { + EBUG_ON(lock->owner); -+ lock->owner = current; ++ lock->owner = owner; + } else { + EBUG_ON(lock->owner != current); + } @@ -84760,64 +85353,21 @@ index 0000000000000..fca1208720b67 + return read_count; +} + -+struct six_lock_waiter { -+ struct list_head list; -+ struct task_struct *task; -+}; -+ +/* This is probably up there with the more evil things I've done */ +#define waitlist_bitnr(id) ilog2((((union six_lock_state) { .waiters = 1 << (id) }).l)) + -+static inline void six_lock_wakeup(struct six_lock *lock, -+ union six_lock_state state, -+ unsigned waitlist_id) -+{ -+ if (waitlist_id == SIX_LOCK_write) { -+ if (state.write_locking && !state.read_lock) { -+ struct task_struct *p = READ_ONCE(lock->owner); -+ if (p) -+ wake_up_process(p); -+ } -+ } else { -+ struct list_head *wait_list = &lock->wait_list[waitlist_id]; -+ struct six_lock_waiter *w, *next; -+ -+ if (!(state.waiters & (1 << waitlist_id))) -+ return; -+ -+ clear_bit(waitlist_bitnr(waitlist_id), -+ (unsigned long *) &lock->state.v); -+ -+ raw_spin_lock(&lock->wait_lock); -+ -+ list_for_each_entry_safe(w, next, wait_list, list) { -+ list_del_init(&w->list); -+ -+ if (wake_up_process(w->task) && -+ waitlist_id != SIX_LOCK_read) { -+ if (!list_empty(wait_list)) -+ set_bit(waitlist_bitnr(waitlist_id), -+ (unsigned long *) &lock->state.v); -+ break; -+ } -+ } -+ -+ raw_spin_unlock(&lock->wait_lock); -+ } -+} -+ -+static __always_inline bool do_six_trylock_type(struct six_lock *lock, -+ enum six_lock_type type, -+ bool try) ++static int __do_six_trylock_type(struct six_lock *lock, ++ enum six_lock_type type, ++ struct task_struct *task, ++ bool try) +{ + const struct six_lock_vals l[] = LOCK_VALS; + union six_lock_state old, new; -+ bool ret; ++ int ret; + u64 v; + -+ EBUG_ON(type == SIX_LOCK_write && lock->owner != current); ++ EBUG_ON(type == SIX_LOCK_write && lock->owner != task); + EBUG_ON(type == SIX_LOCK_write && (lock->state.seq & 1)); -+ + EBUG_ON(type == SIX_LOCK_write && (try != !(lock->state.write_locking))); + + /* @@ -84849,18 +85399,6 @@ index 0000000000000..fca1208720b67 + preempt_enable(); + + /* -+ * If we failed because a writer was trying to take the -+ * lock, issue a wakeup because we might have caused a -+ * spurious trylock failure: -+ */ -+ if (old.write_locking) { -+ struct task_struct *p = READ_ONCE(lock->owner); -+ -+ if (p) -+ wake_up_process(p); -+ } -+ -+ /* + * If we failed from the lock path and the waiting bit wasn't + * set, set it: + */ @@ -84880,6 +85418,14 @@ index 0000000000000..fca1208720b67 + } while ((v = atomic64_cmpxchg(&lock->state.counter, + old.v, new.v)) != old.v); + } ++ ++ /* ++ * If we failed because a writer was trying to take the ++ * lock, issue a wakeup because we might have caused a ++ * spurious trylock failure: ++ */ ++ if (old.write_locking) ++ ret = -1 - SIX_LOCK_write; + } else if (type == SIX_LOCK_write && lock->readers) { + if (try) { + atomic64_add(__SIX_VAL(write_locking, 1), @@ -84899,9 +85445,13 @@ index 0000000000000..fca1208720b67 + if (ret || try) + v -= __SIX_VAL(write_locking, 1); + ++ if (!ret && !try && !(lock->state.waiters & (1 << SIX_LOCK_write))) ++ v += __SIX_VAL(waiters, 1 << SIX_LOCK_write); ++ + if (try && !ret) { + old.v = atomic64_add_return(v, &lock->state.counter); -+ six_lock_wakeup(lock, old, SIX_LOCK_read); ++ if (old.waiters & (1 << SIX_LOCK_read)) ++ ret = -1 - SIX_LOCK_read; + } else { + atomic64_add(v, &lock->state.counter); + } @@ -84915,8 +85465,7 @@ index 0000000000000..fca1208720b67 + + if (type == SIX_LOCK_write) + new.write_locking = 0; -+ } else if (!try && type != SIX_LOCK_write && -+ !(new.waiters & (1 << type))) ++ } else if (!try && !(new.waiters & (1 << type))) + new.waiters |= 1 << type; + else + break; /* waiting bit already set */ @@ -84928,14 +85477,84 @@ index 0000000000000..fca1208720b67 + EBUG_ON(ret && !(lock->state.v & l[type].held_mask)); + } + -+ if (ret) -+ six_set_owner(lock, type, old); ++ if (ret > 0) ++ six_set_owner(lock, type, old, task); + -+ EBUG_ON(type == SIX_LOCK_write && (try || ret) && (lock->state.write_locking)); ++ EBUG_ON(type == SIX_LOCK_write && (try || ret > 0) && (lock->state.write_locking)); + + return ret; +} + ++static inline void __six_lock_wakeup(struct six_lock *lock, enum six_lock_type lock_type) ++{ ++ struct six_lock_waiter *w, *next; ++ struct task_struct *task; ++ bool saw_one; ++ int ret; ++again: ++ ret = 0; ++ saw_one = false; ++ raw_spin_lock(&lock->wait_lock); ++ ++ list_for_each_entry_safe(w, next, &lock->wait_list, list) { ++ if (w->lock_want != lock_type) ++ continue; ++ ++ if (saw_one && lock_type != SIX_LOCK_read) ++ goto unlock; ++ saw_one = true; ++ ++ ret = __do_six_trylock_type(lock, lock_type, w->task, false); ++ if (ret <= 0) ++ goto unlock; ++ ++ __list_del(w->list.prev, w->list.next); ++ task = w->task; ++ /* ++ * Do no writes to @w besides setting lock_acquired - otherwise ++ * we would need a memory barrier: ++ */ ++ barrier(); ++ w->lock_acquired = true; ++ wake_up_process(task); ++ } ++ ++ clear_bit(waitlist_bitnr(lock_type), (unsigned long *) &lock->state.v); ++unlock: ++ raw_spin_unlock(&lock->wait_lock); ++ ++ if (ret < 0) { ++ lock_type = -ret - 1; ++ goto again; ++ } ++} ++ ++static inline void six_lock_wakeup(struct six_lock *lock, ++ union six_lock_state state, ++ enum six_lock_type lock_type) ++{ ++ if (lock_type == SIX_LOCK_write && state.read_lock) ++ return; ++ ++ if (!(state.waiters & (1 << lock_type))) ++ return; ++ ++ __six_lock_wakeup(lock, lock_type); ++} ++ ++static bool do_six_trylock_type(struct six_lock *lock, ++ enum six_lock_type type, ++ bool try) ++{ ++ int ret; ++ ++ ret = __do_six_trylock_type(lock, type, current, try); ++ if (ret < 0) ++ __six_lock_wakeup(lock, -ret - 1); ++ ++ return ret > 0; ++} ++ +__always_inline __flatten +static bool __six_trylock_type(struct six_lock *lock, enum six_lock_type type) +{ @@ -84976,12 +85595,8 @@ index 0000000000000..fca1208720b67 + * Similar to the lock path, we may have caused a spurious write + * lock fail and need to issue a wakeup: + */ -+ if (old.write_locking) { -+ struct task_struct *p = READ_ONCE(lock->owner); -+ -+ if (p) -+ wake_up_process(p); -+ } ++ if (old.write_locking) ++ six_lock_wakeup(lock, old, SIX_LOCK_write); + + if (ret) + six_acquire(&lock->dep_map, 1); @@ -84999,7 +85614,7 @@ index 0000000000000..fca1208720b67 + old.v, + old.v + l[type].lock_val)) != old.v); + -+ six_set_owner(lock, type, old); ++ six_set_owner(lock, type, old, current); + if (type != SIX_LOCK_write) + six_acquire(&lock->dep_map, 1); + return true; @@ -85007,33 +85622,26 @@ index 0000000000000..fca1208720b67 + +#ifdef CONFIG_LOCK_SPIN_ON_OWNER + -+static inline int six_can_spin_on_owner(struct six_lock *lock) ++static inline bool six_optimistic_spin(struct six_lock *lock, ++ struct six_lock_waiter *wait) +{ -+ struct task_struct *owner; -+ int retval = 1; ++ struct task_struct *owner, *task = current; + -+ if (need_resched()) -+ return 0; ++ switch (wait->lock_want) { ++ case SIX_LOCK_read: ++ break; ++ case SIX_LOCK_intent: ++ if (lock->wait_list.next != &wait->list) ++ return false; ++ break; ++ case SIX_LOCK_write: ++ return false; ++ } + + rcu_read_lock(); + owner = READ_ONCE(lock->owner); -+ if (owner) -+ retval = owner->on_cpu; -+ rcu_read_unlock(); -+ /* -+ * if lock->owner is not set, the mutex owner may have just acquired -+ * it and not set the owner yet or the mutex has been released. -+ */ -+ return retval; -+} + -+static inline bool six_spin_on_owner(struct six_lock *lock, -+ struct task_struct *owner) -+{ -+ bool ret = true; -+ -+ rcu_read_lock(); -+ while (lock->owner == owner) { ++ while (owner && lock->owner == owner) { + /* + * Ensure we emit the owner->on_cpu, dereference _after_ + * checking lock->owner still matches owner. If that fails, @@ -85042,85 +85650,27 @@ index 0000000000000..fca1208720b67 + */ + barrier(); + -+ if (!owner->on_cpu || need_resched()) { -+ ret = false; ++ /* ++ * If we're an RT task that will live-lock because we won't let ++ * the owner complete. ++ */ ++ if (wait->lock_acquired || ++ !owner->on_cpu || ++ rt_task(task) || ++ need_resched()) + break; -+ } + + cpu_relax(); + } + rcu_read_unlock(); + -+ return ret; -+} -+ -+static inline bool six_optimistic_spin(struct six_lock *lock, enum six_lock_type type) -+{ -+ struct task_struct *task = current; -+ -+ if (type == SIX_LOCK_write) -+ return false; -+ -+ preempt_disable(); -+ if (!six_can_spin_on_owner(lock)) -+ goto fail; -+ -+ if (!osq_lock(&lock->osq)) -+ goto fail; -+ -+ while (1) { -+ struct task_struct *owner; -+ -+ /* -+ * If there's an owner, wait for it to either -+ * release the lock or go to sleep. -+ */ -+ owner = READ_ONCE(lock->owner); -+ if (owner && !six_spin_on_owner(lock, owner)) -+ break; -+ -+ if (do_six_trylock_type(lock, type, false)) { -+ osq_unlock(&lock->osq); -+ preempt_enable(); -+ return true; -+ } -+ -+ /* -+ * When there's no owner, we might have preempted between the -+ * owner acquiring the lock and setting the owner field. If -+ * we're an RT task that will live-lock because we won't let -+ * the owner complete. -+ */ -+ if (!owner && (need_resched() || rt_task(task))) -+ break; -+ -+ /* -+ * The cpu_relax() call is a compiler barrier which forces -+ * everything in this loop to be re-loaded. We don't need -+ * memory barriers as we'll eventually observe the right -+ * values at the cost of a few extra spins. -+ */ -+ cpu_relax(); -+ } -+ -+ osq_unlock(&lock->osq); -+fail: -+ preempt_enable(); -+ -+ /* -+ * If we fell out of the spin path because of need_resched(), -+ * reschedule now, before we try-lock again. This avoids getting -+ * scheduled out right after we obtained the lock. -+ */ -+ if (need_resched()) -+ schedule(); -+ -+ return false; ++ return wait->lock_acquired; +} + +#else /* CONFIG_LOCK_SPIN_ON_OWNER */ + -+static inline bool six_optimistic_spin(struct six_lock *lock, enum six_lock_type type) ++static inline bool six_optimistic_spin(struct six_lock *lock, ++ struct six_lock_waiter *wait) +{ + return false; +} @@ -85129,10 +85679,10 @@ index 0000000000000..fca1208720b67 + +noinline +static int __six_lock_type_slowpath(struct six_lock *lock, enum six_lock_type type, ++ struct six_lock_waiter *wait, + six_lock_should_sleep_fn should_sleep_fn, void *p) +{ + union six_lock_state old; -+ struct six_lock_waiter wait; + int ret = 0; + + if (type == SIX_LOCK_write) { @@ -85141,46 +85691,58 @@ index 0000000000000..fca1208720b67 + smp_mb__after_atomic(); + } + -+ ret = should_sleep_fn ? should_sleep_fn(lock, p) : 0; -+ if (ret) -+ goto out_before_sleep; -+ -+ if (six_optimistic_spin(lock, type)) -+ goto out_before_sleep; -+ + lock_contended(&lock->dep_map, _RET_IP_); + -+ INIT_LIST_HEAD(&wait.list); -+ wait.task = current; ++ wait->task = current; ++ wait->lock_want = type; ++ wait->lock_acquired = false; ++ ++ raw_spin_lock(&lock->wait_lock); ++ /* ++ * Retry taking the lock after taking waitlist lock, have raced with an ++ * unlock: ++ */ ++ ret = __do_six_trylock_type(lock, type, current, false); ++ if (ret <= 0) ++ list_add_tail(&wait->list, &lock->wait_list); ++ raw_spin_unlock(&lock->wait_lock); ++ ++ if (unlikely(ret > 0)) { ++ ret = 0; ++ goto out; ++ } ++ ++ if (unlikely(ret < 0)) { ++ __six_lock_wakeup(lock, -ret - 1); ++ ret = 0; ++ } ++ ++ if (six_optimistic_spin(lock, wait)) ++ goto out; + + while (1) { + set_current_state(TASK_UNINTERRUPTIBLE); -+ if (type == SIX_LOCK_write) -+ EBUG_ON(lock->owner != current); -+ else if (list_empty_careful(&wait.list)) { -+ raw_spin_lock(&lock->wait_lock); -+ list_add_tail(&wait.list, &lock->wait_list[type]); -+ raw_spin_unlock(&lock->wait_lock); -+ } + -+ if (do_six_trylock_type(lock, type, false)) ++ if (wait->lock_acquired) + break; + + ret = should_sleep_fn ? should_sleep_fn(lock, p) : 0; -+ if (ret) ++ if (unlikely(ret)) { ++ raw_spin_lock(&lock->wait_lock); ++ if (!wait->lock_acquired) ++ list_del(&wait->list); ++ raw_spin_unlock(&lock->wait_lock); ++ ++ if (wait->lock_acquired) ++ do_six_unlock_type(lock, type); + break; ++ } + + schedule(); + } + + __set_current_state(TASK_RUNNING); -+ -+ if (!list_empty_careful(&wait.list)) { -+ raw_spin_lock(&lock->wait_lock); -+ list_del_init(&wait.list); -+ raw_spin_unlock(&lock->wait_lock); -+ } -+out_before_sleep: ++out: + if (ret && type == SIX_LOCK_write) { + old.v = atomic64_sub_return(__SIX_VAL(write_locking, 1), + &lock->state.counter); @@ -85190,9 +85752,10 @@ index 0000000000000..fca1208720b67 + return ret; +} + -+__always_inline -+static int __six_lock_type(struct six_lock *lock, enum six_lock_type type, -+ six_lock_should_sleep_fn should_sleep_fn, void *p) ++__always_inline __flatten ++static int __six_lock_type_waiter(struct six_lock *lock, enum six_lock_type type, ++ struct six_lock_waiter *wait, ++ six_lock_should_sleep_fn should_sleep_fn, void *p) +{ + int ret; + @@ -85200,7 +85763,7 @@ index 0000000000000..fca1208720b67 + six_acquire(&lock->dep_map, 0); + + ret = do_six_trylock_type(lock, type, true) ? 0 -+ : __six_lock_type_slowpath(lock, type, should_sleep_fn, p); ++ : __six_lock_type_slowpath(lock, type, wait, should_sleep_fn, p); + + if (ret && type != SIX_LOCK_write) + six_release(&lock->dep_map); @@ -85210,28 +85773,23 @@ index 0000000000000..fca1208720b67 + return ret; +} + ++__always_inline ++static int __six_lock_type(struct six_lock *lock, enum six_lock_type type, ++ six_lock_should_sleep_fn should_sleep_fn, void *p) ++{ ++ struct six_lock_waiter wait; ++ ++ return __six_lock_type_waiter(lock, type, &wait, should_sleep_fn, p); ++} ++ +__always_inline __flatten -+static void __six_unlock_type(struct six_lock *lock, enum six_lock_type type) ++static void do_six_unlock_type(struct six_lock *lock, enum six_lock_type type) +{ + const struct six_lock_vals l[] = LOCK_VALS; + union six_lock_state state; + -+ EBUG_ON(type == SIX_LOCK_write && -+ !(lock->state.v & __SIX_LOCK_HELD_intent)); -+ -+ if (type != SIX_LOCK_write) -+ six_release(&lock->dep_map); -+ -+ if (type == SIX_LOCK_intent) { -+ EBUG_ON(lock->owner != current); -+ -+ if (lock->intent_lock_recurse) { -+ --lock->intent_lock_recurse; -+ return; -+ } -+ ++ if (type == SIX_LOCK_intent) + lock->owner = NULL; -+ } + + if (type == SIX_LOCK_read && + lock->readers) { @@ -85248,6 +85806,27 @@ index 0000000000000..fca1208720b67 + six_lock_wakeup(lock, state, l[type].unlock_wakeup); +} + ++__always_inline __flatten ++static void __six_unlock_type(struct six_lock *lock, enum six_lock_type type) ++{ ++ EBUG_ON(type == SIX_LOCK_write && ++ !(lock->state.v & __SIX_LOCK_HELD_intent)); ++ EBUG_ON((type == SIX_LOCK_write || ++ type == SIX_LOCK_intent) && ++ lock->owner != current); ++ ++ if (type != SIX_LOCK_write) ++ six_release(&lock->dep_map); ++ ++ if (type == SIX_LOCK_intent && ++ lock->intent_lock_recurse) { ++ --lock->intent_lock_recurse; ++ return; ++ } ++ ++ do_six_unlock_type(lock, type); ++} ++ +#define __SIX_LOCK(type) \ +bool six_trylock_##type(struct six_lock *lock) \ +{ \ @@ -85268,6 +85847,14 @@ index 0000000000000..fca1208720b67 +} \ +EXPORT_SYMBOL_GPL(six_lock_##type); \ + \ ++int six_lock_waiter_##type(struct six_lock *lock, \ ++ struct six_lock_waiter *wait, \ ++ six_lock_should_sleep_fn should_sleep_fn, void *p)\ ++{ \ ++ return __six_lock_type_waiter(lock, SIX_LOCK_##type, wait, should_sleep_fn, p);\ ++} \ ++EXPORT_SYMBOL_GPL(six_lock_waiter_##type); \ ++ \ +void six_unlock_##type(struct six_lock *lock) \ +{ \ + __six_unlock_type(lock, SIX_LOCK_##type); \ @@ -85311,7 +85898,7 @@ index 0000000000000..fca1208720b67 + if (lock->readers) + this_cpu_dec(*lock->readers); + -+ six_set_owner(lock, SIX_LOCK_intent, old); ++ six_set_owner(lock, SIX_LOCK_intent, old, current); + + return true; +} @@ -85373,44 +85960,12 @@ index 0000000000000..fca1208720b67 + struct six_lock_waiter *w; + + raw_spin_lock(&lock->wait_lock); -+ -+ list_for_each_entry(w, &lock->wait_list[0], list) ++ list_for_each_entry(w, &lock->wait_list, list) + wake_up_process(w->task); -+ list_for_each_entry(w, &lock->wait_list[1], list) -+ wake_up_process(w->task); -+ + raw_spin_unlock(&lock->wait_lock); +} +EXPORT_SYMBOL_GPL(six_lock_wakeup_all); + -+struct free_pcpu_rcu { -+ struct rcu_head rcu; -+ void __percpu *p; -+}; -+ -+static void free_pcpu_rcu_fn(struct rcu_head *_rcu) -+{ -+ struct free_pcpu_rcu *rcu = -+ container_of(_rcu, struct free_pcpu_rcu, rcu); -+ -+ free_percpu(rcu->p); -+ kfree(rcu); -+} -+ -+void six_lock_pcpu_free_rcu(struct six_lock *lock) -+{ -+ struct free_pcpu_rcu *rcu = kzalloc(sizeof(*rcu), GFP_KERNEL); -+ -+ if (!rcu) -+ return; -+ -+ rcu->p = lock->readers; -+ lock->readers = NULL; -+ -+ call_rcu(&rcu->rcu, free_pcpu_rcu_fn); -+} -+EXPORT_SYMBOL_GPL(six_lock_pcpu_free_rcu); -+ +void six_lock_pcpu_free(struct six_lock *lock) +{ + BUG_ON(lock->readers && pcpu_read_count(lock)); @@ -85429,8 +85984,32 @@ index 0000000000000..fca1208720b67 +#endif +} +EXPORT_SYMBOL_GPL(six_lock_pcpu_alloc); ++ ++/* ++ * Returns lock held counts, for both read and intent ++ */ ++struct six_lock_count six_lock_counts(struct six_lock *lock) ++{ ++ struct six_lock_count ret; ++ ++ ret.n[SIX_LOCK_read] = 0; ++ ret.n[SIX_LOCK_intent] = lock->state.intent_lock + lock->intent_lock_recurse; ++ ret.n[SIX_LOCK_write] = lock->state.seq & 1; ++ ++ if (!lock->readers) ++ ret.n[SIX_LOCK_read] += lock->state.read_lock; ++ else { ++ int cpu; ++ ++ for_each_possible_cpu(cpu) ++ ret.n[SIX_LOCK_read] += *per_cpu_ptr(lock->readers, cpu); ++ } ++ ++ return ret; ++} ++EXPORT_SYMBOL_GPL(six_lock_counts); diff --git a/kernel/module/main.c b/kernel/module/main.c -index 0548151dd9339..55ba98a99387d 100644 +index 0548151dd933..55ba98a99387 100644 --- a/kernel/module/main.c +++ b/kernel/module/main.c @@ -1608,9 +1608,7 @@ static void dynamic_debug_remove(struct module *mod, struct _ddebug *debug) @@ -85444,8 +86023,28 @@ index 0548151dd9339..55ba98a99387d 100644 } bool __weak module_init_section(const char *name) +diff --git a/kernel/stacktrace.c b/kernel/stacktrace.c +index 9ed5ce989415..4f65824879ab 100644 +--- a/kernel/stacktrace.c ++++ b/kernel/stacktrace.c +@@ -151,6 +151,7 @@ unsigned int stack_trace_save_tsk(struct task_struct *tsk, unsigned long *store, + put_task_stack(tsk); + return c.len; + } ++EXPORT_SYMBOL_GPL(stack_trace_save_tsk); + + /** + * stack_trace_save_regs - Save a stack trace based on pt_regs into a storage array +@@ -301,6 +302,7 @@ unsigned int stack_trace_save_tsk(struct task_struct *task, + save_stack_trace_tsk(task, &trace); + return trace.nr_entries; + } ++EXPORT_SYMBOL_GPL(stack_trace_save_tsk); + + /** + * stack_trace_save_regs - Save a stack trace based on pt_regs into a storage array diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c -index b8dd546270750..26cfe909f9af6 100644 +index b8dd54627075..26cfe909f9af 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -1673,15 +1673,15 @@ static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt) @@ -85565,7 +86164,7 @@ index b8dd546270750..26cfe909f9af6 100644 printk(KERN_TRACE "%s", s->buffer); diff --git a/kernel/trace/trace_dynevent.c b/kernel/trace/trace_dynevent.c -index 076b447a1b889..30a106c168711 100644 +index 076b447a1b88..30a106c16871 100644 --- a/kernel/trace/trace_dynevent.c +++ b/kernel/trace/trace_dynevent.c @@ -290,21 +290,19 @@ int dynevent_arg_add(struct dynevent_cmd *cmd, @@ -85655,7 +86254,7 @@ index 076b447a1b889..30a106c168711 100644 cmd->run_command = run_command; } diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c -index 4b1057ab9d968..9d5137df1a158 100644 +index 4b1057ab9d96..9d5137df1a15 100644 --- a/kernel/trace/trace_events_filter.c +++ b/kernel/trace/trace_events_filter.c @@ -1059,7 +1059,7 @@ static void append_filter_err(struct trace_array *tr, @@ -85668,7 +86267,7 @@ index 4b1057ab9d968..9d5137df1a158 100644 kfree(filter->filter_string); filter->filter_string = buf; diff --git a/kernel/trace/trace_events_synth.c b/kernel/trace/trace_events_synth.c -index 5e8c07aef071b..ddb2a2737b82e 100644 +index 5e8c07aef071..914b4e5e32a5 100644 --- a/kernel/trace/trace_events_synth.c +++ b/kernel/trace/trace_events_synth.c @@ -5,13 +5,14 @@ @@ -85694,52 +86293,75 @@ index 5e8c07aef071b..ddb2a2737b82e 100644 struct synth_field *field; int len, ret = -ENOMEM; - struct seq_buf s; -+ struct printbuf s; ++ struct printbuf buf; ssize_t size; if (!strcmp(field_type, "unsigned")) { -@@ -666,17 +667,15 @@ static struct synth_field *parse_synth_field(int argc, char **argv, - if (!field->type) +@@ -654,28 +655,16 @@ static struct synth_field *parse_synth_field(int argc, char **argv, goto free; + } +- len = strlen(field_type) + 1; +- +- if (array) +- len += strlen(array); +- +- if (prefix) +- len += strlen(prefix); +- +- field->type = kzalloc(len, GFP_KERNEL); +- if (!field->type) +- goto free; +- - seq_buf_init(&s, field->type, len); -+ s = PRINTBUF_EXTERN(field->type, len); ++ buf = PRINTBUF; if (prefix) - seq_buf_puts(&s, prefix); - seq_buf_puts(&s, field_type); -+ prt_str(&s, prefix); -+ prt_str(&s, field_type); ++ prt_str(&buf, prefix); ++ prt_str(&buf, field_type); if (array) - seq_buf_puts(&s, array); - if (WARN_ON_ONCE(!seq_buf_buffer_left(&s))) -+ prt_str(&s, array); -+ if (WARN_ON_ONCE(!printbuf_remaining(&s))) ++ prt_str(&buf, array); ++ if (buf.allocation_failure) goto free; - s.buffer[s.len] = '\0'; -- ++ field->type = buf.buf; + size = synth_field_size(field->type); if (size < 0) { - if (array) -@@ -694,13 +693,12 @@ static struct synth_field *parse_synth_field(int argc, char **argv, - if (!type) - goto free; - +@@ -687,23 +676,15 @@ static struct synth_field *parse_synth_field(int argc, char **argv, + goto free; + } else if (size == 0) { + if (synth_field_is_string(field->type)) { +- char *type; +- +- len = sizeof("__data_loc ") + strlen(field->type) + 1; +- type = kzalloc(len, GFP_KERNEL); +- if (!type) +- goto free; +- - seq_buf_init(&s, type, len); - seq_buf_puts(&s, "__data_loc "); - seq_buf_puts(&s, field->type); -+ s = PRINTBUF_EXTERN(type, len); -+ prt_str(&s, "__data_loc "); -+ prt_str(&s, field->type); ++ buf = PRINTBUF; ++ prt_str(&buf, "__data_loc "); ++ prt_str(&buf, field->type); - if (WARN_ON_ONCE(!seq_buf_buffer_left(&s))) -+ if (WARN_ON_ONCE(!printbuf_remaining(&s))) ++ if (buf.allocation_failure) goto free; - s.buffer[s.len] = '\0'; kfree(field->type); - field->type = type; -@@ -1514,7 +1512,7 @@ static int synth_event_run_command(struct dynevent_cmd *cmd) +- field->type = type; ++ field->type = buf.buf; + + field->is_dynamic = true; + size = sizeof(u64); +@@ -1514,7 +1495,7 @@ static int synth_event_run_command(struct dynevent_cmd *cmd) struct synth_event *se; int ret; @@ -85749,7 +86371,7 @@ index 5e8c07aef071b..ddb2a2737b82e 100644 return ret; diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c -index 203204cadf92f..9f270fdde99bb 100644 +index 203204cadf92..9f270fdde99b 100644 --- a/kernel/trace/trace_functions_graph.c +++ b/kernel/trace/trace_functions_graph.c @@ -1022,9 +1022,9 @@ print_graph_comment(struct trace_seq *s, struct trace_entry *ent, @@ -85766,7 +86388,7 @@ index 203204cadf92f..9f270fdde99bb 100644 trace_seq_puts(s, " */\n"); diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c -index a245ea673715d..c9f03c2d7c914 100644 +index a245ea673715..c9f03c2d7c91 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -915,7 +915,7 @@ static int create_or_delete_trace_kprobe(const char *raw_command) @@ -85779,7 +86401,7 @@ index a245ea673715d..c9f03c2d7c914 100644 /** diff --git a/kernel/trace/trace_seq.c b/kernel/trace/trace_seq.c -index 9c90b3a7dce2c..48c08f29c3424 100644 +index 9c90b3a7dce2..48c08f29c342 100644 --- a/kernel/trace/trace_seq.c +++ b/kernel/trace/trace_seq.c @@ -25,11 +25,9 @@ @@ -86058,7 +86680,7 @@ index 9c90b3a7dce2c..48c08f29c3424 100644 return 0; } diff --git a/lib/Kconfig b/lib/Kconfig -index eaaad4d85bf24..8eb7050fb422f 100644 +index eaaad4d85bf2..8eb7050fb422 100644 --- a/lib/Kconfig +++ b/lib/Kconfig @@ -491,6 +491,9 @@ config ASSOCIATIVE_ARRAY @@ -86072,7 +86694,7 @@ index eaaad4d85bf24..8eb7050fb422f 100644 bool depends on !NO_IOMEM diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug -index 2e24db4bff192..1d4ed12a5355b 100644 +index 2e24db4bff19..1d4ed12a5355 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -1646,6 +1646,15 @@ config DEBUG_CREDENTIALS @@ -86092,7 +86714,7 @@ index 2e24db4bff192..1d4ed12a5355b 100644 bool "Force round-robin CPU selection for unbound work items" depends on DEBUG_KERNEL diff --git a/lib/Makefile b/lib/Makefile -index f99bf61f8bbc6..d24209a59df93 100644 +index f99bf61f8bbc..d24209a59df9 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -30,11 +30,11 @@ endif @@ -86123,7 +86745,7 @@ diff --git a/drivers/md/bcache/closure.c b/lib/closure.c similarity index 88% rename from drivers/md/bcache/closure.c rename to lib/closure.c -index d8d9394a6beb1..b38ded00b9b05 100644 +index d8d9394a6beb..b38ded00b9b0 100644 --- a/drivers/md/bcache/closure.c +++ b/lib/closure.c @@ -6,13 +6,12 @@ @@ -86236,8 +86858,17 @@ index d8d9394a6beb1..b38ded00b9b05 100644 -MODULE_AUTHOR("Kent Overstreet "); -MODULE_LICENSE("GPL"); +#endif +diff --git a/lib/errname.c b/lib/errname.c +index 05cbf731545f..82ea4778f478 100644 +--- a/lib/errname.c ++++ b/lib/errname.c +@@ -222,3 +222,4 @@ const char *errname(int err) + + return err > 0 ? name + 1 : name; + } ++EXPORT_SYMBOL(errname); diff --git a/lib/generic-radix-tree.c b/lib/generic-radix-tree.c -index f25eb111c0516..41f1bcdc44886 100644 +index f25eb111c051..41f1bcdc4488 100644 --- a/lib/generic-radix-tree.c +++ b/lib/generic-radix-tree.c @@ -1,4 +1,5 @@ @@ -86344,7 +86975,7 @@ index f25eb111c0516..41f1bcdc44886 100644 { if (level) { diff --git a/lib/hexdump.c b/lib/hexdump.c -index 06833d404398d..9556f15ad2953 100644 +index 06833d404398..9556f15ad295 100644 --- a/lib/hexdump.c +++ b/lib/hexdump.c @@ -9,6 +9,7 @@ @@ -86651,7 +87282,7 @@ index 06833d404398d..9556f15ad2953 100644 unsigned char linebuf[32 * 3 + 2 + 32 + 1]; diff --git a/lib/pretty-printers.c b/lib/pretty-printers.c new file mode 100644 -index 0000000000000..addbac95e065e +index 000000000000..addbac95e065 --- /dev/null +++ b/lib/pretty-printers.c @@ -0,0 +1,60 @@ @@ -86717,26 +87348,21 @@ index 0000000000000..addbac95e065e +EXPORT_SYMBOL(prt_bitflags); diff --git a/lib/printbuf.c b/lib/printbuf.c new file mode 100644 -index 0000000000000..0474700257484 +index 000000000000..5cf79d43f5a4 --- /dev/null +++ b/lib/printbuf.c -@@ -0,0 +1,258 @@ +@@ -0,0 +1,368 @@ +// SPDX-License-Identifier: LGPL-2.1+ +/* Copyright (C) 2022 Kent Overstreet */ + -+#ifdef __KERNEL__ ++#include +#include +#include -+#else -+#define EXPORT_SYMBOL(x) -+#endif -+ -+#include ++#include +#include +#include -+#include + -+static inline size_t printbuf_linelen(struct printbuf *buf) ++static inline unsigned printbuf_linelen(struct printbuf *buf) +{ + return buf->pos - buf->last_newline; +} @@ -86804,25 +87430,43 @@ index 0000000000000..0474700257484 +} +EXPORT_SYMBOL(printbuf_exit); + -+void prt_newline(struct printbuf *buf) ++void printbuf_tabstops_reset(struct printbuf *buf) +{ -+ unsigned i; -+ -+ printbuf_make_room(buf, 1 + buf->indent); -+ -+ __prt_char(buf, '\n'); -+ -+ buf->last_newline = buf->pos; -+ -+ for (i = 0; i < buf->indent; i++) -+ __prt_char(buf, ' '); -+ -+ printbuf_nul_terminate(buf); -+ -+ buf->last_field = buf->pos; -+ buf->tabstop = 0; ++ buf->nr_tabstops = 0; +} -+EXPORT_SYMBOL(prt_newline); ++EXPORT_SYMBOL(printbuf_tabstops_reset); ++ ++void printbuf_tabstop_pop(struct printbuf *buf) ++{ ++ if (buf->nr_tabstops) ++ --buf->nr_tabstops; ++} ++EXPORT_SYMBOL(printbuf_tabstop_pop); ++ ++/* ++ * printbuf_tabstop_set - add a tabstop, n spaces from the previous tabstop ++ * ++ * @buf: printbuf to control ++ * @spaces: number of spaces from previous tabpstop ++ * ++ * In the future this function may allocate memory if setting more than ++ * PRINTBUF_INLINE_TABSTOPS or setting tabstops more than 255 spaces from start ++ * of line. ++ */ ++int printbuf_tabstop_push(struct printbuf *buf, unsigned spaces) ++{ ++ unsigned prev_tabstop = buf->nr_tabstops ++ ? buf->_tabstops[buf->nr_tabstops - 1] ++ : 0; ++ ++ if (WARN_ON(buf->nr_tabstops >= ARRAY_SIZE(buf->_tabstops))) ++ return -EINVAL; ++ ++ buf->_tabstops[buf->nr_tabstops++] = prev_tabstop + spaces; ++ buf->has_indent_or_tabstops = true; ++ return 0; ++} ++EXPORT_SYMBOL(printbuf_tabstop_push); + +/** + * printbuf_indent_add - add to the current indent level @@ -86839,8 +87483,9 @@ index 0000000000000..0474700257484 + spaces = 0; + + buf->indent += spaces; -+ while (spaces--) -+ prt_char(buf, ' '); ++ prt_chars(buf, ' ', spaces); ++ ++ buf->has_indent_or_tabstops = true; +} +EXPORT_SYMBOL(printbuf_indent_add); + @@ -86863,9 +87508,52 @@ index 0000000000000..0474700257484 + printbuf_nul_terminate(buf); + } + buf->indent -= spaces; ++ ++ if (!buf->indent && !buf->nr_tabstops) ++ buf->has_indent_or_tabstops = false; +} +EXPORT_SYMBOL(printbuf_indent_sub); + ++void prt_newline(struct printbuf *buf) ++{ ++ unsigned i; ++ ++ printbuf_make_room(buf, 1 + buf->indent); ++ ++ __prt_char(buf, '\n'); ++ ++ buf->last_newline = buf->pos; ++ ++ for (i = 0; i < buf->indent; i++) ++ __prt_char(buf, ' '); ++ ++ printbuf_nul_terminate(buf); ++ ++ buf->last_field = buf->pos; ++ buf->cur_tabstop = 0; ++} ++EXPORT_SYMBOL(prt_newline); ++ ++/* ++ * Returns spaces from start of line, if set, or 0 if unset: ++ */ ++static inline unsigned cur_tabstop(struct printbuf *buf) ++{ ++ return buf->cur_tabstop < buf->nr_tabstops ++ ? buf->_tabstops[buf->cur_tabstop] ++ : 0; ++} ++ ++static void __prt_tab(struct printbuf *out) ++{ ++ int spaces = max_t(int, 0, cur_tabstop(out) - printbuf_linelen(out)); ++ ++ prt_chars(out, ' ', spaces); ++ ++ out->last_field = out->pos; ++ out->cur_tabstop++; ++} ++ +/** + * prt_tab - Advance printbuf to the next tabstop + * @@ -86875,17 +87563,38 @@ index 0000000000000..0474700257484 + */ +void prt_tab(struct printbuf *out) +{ -+ int spaces = max_t(int, 0, out->tabstops[out->tabstop] - printbuf_linelen(out)); ++ if (WARN_ON(!cur_tabstop(out))) ++ return; + -+ BUG_ON(out->tabstop > ARRAY_SIZE(out->tabstops)); -+ -+ prt_chars(out, ' ', spaces); -+ -+ out->last_field = out->pos; -+ out->tabstop++; ++ __prt_tab(out); +} +EXPORT_SYMBOL(prt_tab); + ++static void __prt_tab_rjust(struct printbuf *buf) ++{ ++ unsigned move = buf->pos - buf->last_field; ++ int pad = (int) cur_tabstop(buf) - (int) printbuf_linelen(buf); ++ ++ if (pad > 0) { ++ printbuf_make_room(buf, pad); ++ ++ if (buf->last_field + pad < buf->size) ++ memmove(buf->buf + buf->last_field + pad, ++ buf->buf + buf->last_field, ++ min(move, buf->size - 1 - buf->last_field - pad)); ++ ++ if (buf->last_field < buf->size) ++ memset(buf->buf + buf->last_field, ' ', ++ min((unsigned) pad, buf->size - buf->last_field)); ++ ++ buf->pos += pad; ++ printbuf_nul_terminate(buf); ++ } ++ ++ buf->last_field = buf->pos; ++ buf->cur_tabstop++; ++} ++ +/** + * prt_tab_rjust - Advance printbuf to the next tabstop, right justifying + * previous output @@ -86897,34 +87606,66 @@ index 0000000000000..0474700257484 + */ +void prt_tab_rjust(struct printbuf *buf) +{ -+ BUG_ON(buf->tabstop > ARRAY_SIZE(buf->tabstops)); ++ if (WARN_ON(!cur_tabstop(buf))) ++ return; + -+ if (printbuf_linelen(buf) < buf->tabstops[buf->tabstop]) { -+ unsigned move = buf->pos - buf->last_field; -+ unsigned shift = buf->tabstops[buf->tabstop] - -+ printbuf_linelen(buf); -+ -+ printbuf_make_room(buf, shift); -+ -+ if (buf->last_field + shift < buf->size) -+ memmove(buf->buf + buf->last_field + shift, -+ buf->buf + buf->last_field, -+ min(move, buf->size - 1 - buf->last_field - shift)); -+ -+ if (buf->last_field < buf->size) -+ memset(buf->buf + buf->last_field, ' ', -+ min(shift, buf->size - buf->last_field)); -+ -+ buf->pos += shift; -+ printbuf_nul_terminate(buf); -+ } -+ -+ buf->last_field = buf->pos; -+ buf->tabstop++; ++ __prt_tab_rjust(buf); +} +EXPORT_SYMBOL(prt_tab_rjust); + +/** ++ * prt_bytes_indented - Print an array of chars, handling embedded control characters ++ * ++ * @out: printbuf to output to ++ * @str: string to print ++ * @count: number of bytes to print ++ * ++ * The following contol characters are handled as so: ++ * \n: prt_newline newline that obeys current indent level ++ * \t: prt_tab advance to next tabstop ++ * \r: prt_tab_rjust advance to next tabstop, with right justification ++ */ ++void prt_bytes_indented(struct printbuf *out, const char *str, unsigned count) ++{ ++ const char *unprinted_start = str; ++ const char *end = str + count; ++ ++ if (!out->has_indent_or_tabstops || out->suppress_indent_tabstop_handling) { ++ prt_bytes(out, str, count); ++ return; ++ } ++ ++ while (str != end) { ++ switch (*str) { ++ case '\n': ++ prt_bytes(out, unprinted_start, str - unprinted_start); ++ unprinted_start = str + 1; ++ prt_newline(out); ++ break; ++ case '\t': ++ if (likely(cur_tabstop(out))) { ++ prt_bytes(out, unprinted_start, str - unprinted_start); ++ unprinted_start = str + 1; ++ __prt_tab(out); ++ } ++ break; ++ case '\r': ++ if (likely(cur_tabstop(out))) { ++ prt_bytes(out, unprinted_start, str - unprinted_start); ++ unprinted_start = str + 1; ++ __prt_tab_rjust(out); ++ } ++ break; ++ } ++ ++ str++; ++ } ++ ++ prt_bytes(out, unprinted_start, str - unprinted_start); ++} ++EXPORT_SYMBOL(prt_bytes_indented); ++ ++/** + * prt_human_readable_u64 - Print out a u64 in human readable units + * + * Units of 2^10 (default) or 10^3 are controlled via @buf->si_units @@ -86981,7 +87722,7 @@ index 0000000000000..0474700257484 +EXPORT_SYMBOL(prt_units_s64); diff --git a/lib/seq_buf.c b/lib/seq_buf.c deleted file mode 100644 -index 0a68f7aa85d67..0000000000000 +index 0a68f7aa85d6..000000000000 --- a/lib/seq_buf.c +++ /dev/null @@ -1,397 +0,0 @@ @@ -87383,7 +88124,7 @@ index 0a68f7aa85d67..0000000000000 - return 0; -} diff --git a/lib/string_helpers.c b/lib/string_helpers.c -index 5ed3beb066e6d..d247bf945f162 100644 +index 5ed3beb066e6..d247bf945f16 100644 --- a/lib/string_helpers.c +++ b/lib/string_helpers.c @@ -15,6 +15,7 @@ @@ -87723,7 +88464,7 @@ index 5ed3beb066e6d..d247bf945f162 100644 EXPORT_SYMBOL(string_escape_mem); diff --git a/lib/test_hexdump.c b/lib/test_hexdump.c -index 5144899d3c6b8..f9e97879dcdf2 100644 +index 5144899d3c6b..f9e97879dcdf 100644 --- a/lib/test_hexdump.c +++ b/lib/test_hexdump.c @@ -25,36 +25,19 @@ static const char * const test_data_1[] __initconst = { @@ -87790,7 +88531,7 @@ index 5144899d3c6b8..f9e97879dcdf2 100644 result = test_data_1; diff --git a/lib/test_printf.c b/lib/test_printf.c -index 07309c45f3279..ac5f9f0eb4e0e 100644 +index 07309c45f327..ac5f9f0eb4e0 100644 --- a/lib/test_printf.c +++ b/lib/test_printf.c @@ -9,6 +9,7 @@ @@ -87855,7 +88596,7 @@ index 07309c45f3279..ac5f9f0eb4e0e 100644 kfree(alloced_buffer); } diff --git a/lib/vsprintf.c b/lib/vsprintf.c -index 3c1853a9d1c09..d92a212db2f5f 100644 +index 3c1853a9d1c0..5e78781bbca8 100644 --- a/lib/vsprintf.c +++ b/lib/vsprintf.c @@ -44,6 +44,7 @@ @@ -87909,7 +88650,7 @@ index 3c1853a9d1c09..d92a212db2f5f 100644 - if (len > size || width > size) - return 0; + if (width > len) -+ __prt_chars_reserved(out, '0', width - len); ++ __prt_chars_reserved(out, ' ', width - len); - if (width > len) { - width = width - len; @@ -90672,7 +91413,7 @@ index 3c1853a9d1c09..d92a212db2f5f 100644 EXPORT_SYMBOL_GPL(bstr_printf); diff --git a/mm/Makefile b/mm/Makefile -index 6f9ffa968a1a1..9731f495bbceb 100644 +index 6f9ffa968a1a..9731f495bbce 100644 --- a/mm/Makefile +++ b/mm/Makefile @@ -54,7 +54,7 @@ obj-y := filemap.o mempool.o oom_kill.o fadvise.o \ @@ -90685,7 +91426,7 @@ index 6f9ffa968a1a1..9731f495bbceb 100644 # Give 'page_alloc' its own module-parameter namespace page-alloc-y := page_alloc.o diff --git a/mm/filemap.c b/mm/filemap.c -index ffdfbc8b0e3ca..8b9e18f79f2b1 100644 +index ffdfbc8b0e3c..8b9e18f79f2b 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -2223,6 +2223,7 @@ unsigned find_get_pages_range(struct address_space *mapping, pgoff_t *start, @@ -90697,7 +91438,7 @@ index ffdfbc8b0e3ca..8b9e18f79f2b1 100644 /** * find_get_pages_contig - gang contiguous pagecache lookup diff --git a/mm/memcontrol.c b/mm/memcontrol.c -index 618c366a2f074..1371a93f55de8 100644 +index 618c366a2f07..1371a93f55de 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -62,7 +62,7 @@ @@ -90817,7 +91558,7 @@ index 618c366a2f074..1371a93f55de8 100644 #define K(x) ((x) << (PAGE_SHIFT-10)) diff --git a/mm/nommu.c b/mm/nommu.c -index 9d7afc2d959e4..dd53020262d8e 100644 +index 9d7afc2d959e..dd53020262d8 100644 --- a/mm/nommu.c +++ b/mm/nommu.c @@ -281,6 +281,24 @@ void *vzalloc_node(unsigned long size, int node) @@ -90846,7 +91587,7 @@ index 9d7afc2d959e4..dd53020262d8e 100644 * vmalloc_32 - allocate virtually contiguous memory (32bit addressable) * @size: allocation size diff --git a/mm/oom_kill.c b/mm/oom_kill.c -index 3c6cf9e3cd66e..e4dca11dc54aa 100644 +index 3c6cf9e3cd66..e4dca11dc54a 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c @@ -168,27 +168,6 @@ static bool oom_unkillable_task(struct task_struct *p) @@ -90890,7 +91631,7 @@ diff --git a/lib/show_mem.c b/mm/show_mem.c similarity index 83% rename from lib/show_mem.c rename to mm/show_mem.c -index 1c26c14ffbb9b..47225158ce490 100644 +index 1c26c14ffbb9..47225158ce49 100644 --- a/lib/show_mem.c +++ b/mm/show_mem.c @@ -7,6 +7,9 @@ @@ -90914,7 +91655,7 @@ index 1c26c14ffbb9b..47225158ce490 100644 + printk("%pf()", CALL_PP(shrinkers_to_text)); } diff --git a/mm/slab.h b/mm/slab.h -index db9fb5c8dae73..502616394f7fe 100644 +index db9fb5c8dae7..502616394f7f 100644 --- a/mm/slab.h +++ b/mm/slab.h @@ -806,10 +806,12 @@ static inline struct kmem_cache_node *get_node(struct kmem_cache *s, int node) @@ -90933,7 +91674,7 @@ index db9fb5c8dae73..502616394f7fe 100644 } #endif diff --git a/mm/slab_common.c b/mm/slab_common.c -index 77c3adf40e504..2ea6585832c4e 100644 +index dbd4b6f9b0e7..675184eb3358 100644 --- a/mm/slab_common.c +++ b/mm/slab_common.c @@ -24,6 +24,7 @@ @@ -90944,7 +91685,7 @@ index 77c3adf40e504..2ea6585832c4e 100644 #include #define CREATE_TRACE_POINTS -@@ -1085,10 +1086,15 @@ static int slab_show(struct seq_file *m, void *p) +@@ -1098,10 +1099,15 @@ static int slab_show(struct seq_file *m, void *p) return 0; } @@ -90961,7 +91702,7 @@ index 77c3adf40e504..2ea6585832c4e 100644 /* * Here acquiring slab_mutex is risky since we don't prefer to get -@@ -1098,12 +1104,11 @@ void dump_unreclaimable_slab(void) +@@ -1111,12 +1117,11 @@ void dump_unreclaimable_slab(void) * without acquiring the mutex. */ if (!mutex_trylock(&slab_mutex)) { @@ -90976,7 +91717,7 @@ index 77c3adf40e504..2ea6585832c4e 100644 list_for_each_entry(s, &slab_caches, list) { if (s->flags & SLAB_RECLAIM_ACCOUNT) -@@ -1111,11 +1116,43 @@ void dump_unreclaimable_slab(void) +@@ -1124,11 +1129,43 @@ void dump_unreclaimable_slab(void) get_slabinfo(s, &sinfo); @@ -91025,7 +91766,7 @@ index 77c3adf40e504..2ea6585832c4e 100644 } diff --git a/mm/vmalloc.c b/mm/vmalloc.c -index effd1ff6a4b41..ea6375c960a2b 100644 +index a1ab9b472571..536a2e51cccf 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -3361,6 +3361,27 @@ void *vzalloc_node(unsigned long size, int node) @@ -91057,7 +91798,7 @@ index effd1ff6a4b41..ea6375c960a2b 100644 #define GFP_VMALLOC32 (GFP_DMA32 | GFP_KERNEL) #elif defined(CONFIG_64BIT) && defined(CONFIG_ZONE_DMA) diff --git a/mm/vmscan.c b/mm/vmscan.c -index f7d9a683e3a7d..0ea3ce8e258f1 100644 +index f7d9a683e3a7..0ea3ce8e258f 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -50,6 +50,7 @@ @@ -91175,330 +91916,8 @@ index f7d9a683e3a7d..0ea3ce8e258f1 100644 count_vm_events(SLABS_SCANNED, shrinkctl->nr_scanned); total_scan -= shrinkctl->nr_scanned; -diff --git a/net/9p/client.c b/net/9p/client.c -index 8bba0d9cf9754..e14074d031c6a 100644 ---- a/net/9p/client.c -+++ b/net/9p/client.c -@@ -218,23 +218,29 @@ static int parse_opts(char *opts, struct p9_client *clnt) - return ret; - } - --static int p9_fcall_init(struct p9_client *c, struct p9_fcall *fc, -- int alloc_msize) -+static void p9_fcall_init(struct p9_client *c, struct p9_fcall *fc, -+ int fc_idx, unsigned alloc_msize) - { -- if (likely(c->fcall_cache) && alloc_msize == c->msize) { -- fc->sdata = kmem_cache_alloc(c->fcall_cache, GFP_NOFS); -- fc->cache = c->fcall_cache; -- } else { -- fc->sdata = kmalloc(alloc_msize, GFP_NOFS); -- fc->cache = NULL; -- } -- if (!fc->sdata) -- return -ENOMEM; -+ gfp_t gfp = GFP_NOFS|__GFP_NOWARN; -+ -+ BUG_ON(alloc_msize > c->msize); -+ -+ fc->sdata = NULL; -+ fc->used_mempool = false; - fc->capacity = alloc_msize; -- return 0; -+ -+ if (alloc_msize < c->msize) -+ fc->sdata = kmalloc(alloc_msize, gfp); -+ -+ if (!fc->sdata) { -+ fc->sdata = mempool_alloc(&c->pools[fc_idx], gfp); -+ fc->used_mempool = true; -+ fc->capacity = c->msize; -+ } - } - --void p9_fcall_fini(struct p9_fcall *fc) -+void p9_fcall_fini(struct p9_client *c, struct p9_fcall *fc, -+ int fc_idx) - { - /* sdata can be NULL for interrupted requests in trans_rdma, - * and kmem_cache_free does not do NULL-check for us -@@ -242,8 +248,8 @@ void p9_fcall_fini(struct p9_fcall *fc) - if (unlikely(!fc->sdata)) - return; - -- if (fc->cache) -- kmem_cache_free(fc->cache, fc->sdata); -+ if (fc->used_mempool) -+ mempool_free(fc->sdata, &c->pools[fc_idx]); - else - kfree(fc->sdata); - } -@@ -270,10 +276,8 @@ p9_tag_alloc(struct p9_client *c, int8_t type, unsigned int max_size) - if (!req) - return ERR_PTR(-ENOMEM); - -- if (p9_fcall_init(c, &req->tc, alloc_msize)) -- goto free_req; -- if (p9_fcall_init(c, &req->rc, alloc_msize)) -- goto free; -+ p9_fcall_init(c, &req->tc, 0, alloc_msize); -+ p9_fcall_init(c, &req->rc, 1, alloc_msize); - - p9pdu_reset(&req->tc); - p9pdu_reset(&req->rc); -@@ -305,14 +309,13 @@ p9_tag_alloc(struct p9_client *c, int8_t type, unsigned int max_size) - * callback), so p9_client_cb eats the second ref there - * as the pointer is duplicated directly by virtqueue_add_sgs() - */ -- refcount_set(&req->refcount.refcount, 2); -+ refcount_set(&req->refcount, 2); - - return req; - - free: -- p9_fcall_fini(&req->tc); -- p9_fcall_fini(&req->rc); --free_req: -+ p9_fcall_fini(c, &req->tc, 0); -+ p9_fcall_fini(c, &req->rc, 1); - kmem_cache_free(p9_req_cache, req); - return ERR_PTR(-ENOMEM); - } -@@ -341,7 +344,7 @@ struct p9_req_t *p9_tag_lookup(struct p9_client *c, u16 tag) - if (!p9_req_try_get(req)) - goto again; - if (req->tc.tag != tag) { -- p9_req_put(req); -+ p9_req_put(c, req); - goto again; - } - } -@@ -367,21 +370,18 @@ static int p9_tag_remove(struct p9_client *c, struct p9_req_t *r) - spin_lock_irqsave(&c->lock, flags); - idr_remove(&c->reqs, tag); - spin_unlock_irqrestore(&c->lock, flags); -- return p9_req_put(r); --} -- --static void p9_req_free(struct kref *ref) --{ -- struct p9_req_t *r = container_of(ref, struct p9_req_t, refcount); -- -- p9_fcall_fini(&r->tc); -- p9_fcall_fini(&r->rc); -- kmem_cache_free(p9_req_cache, r); -+ return p9_req_put(c, r); - } - --int p9_req_put(struct p9_req_t *r) -+int p9_req_put(struct p9_client *c, struct p9_req_t *r) - { -- return kref_put(&r->refcount, p9_req_free); -+ if (refcount_dec_and_test(&r->refcount)) { -+ p9_fcall_fini(c, &r->tc, 0); -+ p9_fcall_fini(c, &r->rc, 1); -+ kmem_cache_free(p9_req_cache, r); -+ return 1; -+ } -+ return 0; - } - EXPORT_SYMBOL(p9_req_put); - -@@ -426,7 +426,7 @@ void p9_client_cb(struct p9_client *c, struct p9_req_t *req, int status) - - wake_up(&req->wq); - p9_debug(P9_DEBUG_MUX, "wakeup: %d\n", req->tc.tag); -- p9_req_put(req); -+ p9_req_put(c, req); - } - EXPORT_SYMBOL(p9_client_cb); - -@@ -709,7 +709,7 @@ static struct p9_req_t *p9_client_prepare_req(struct p9_client *c, - reterr: - p9_tag_remove(c, req); - /* We have to put also the 2nd reference as it won't be used */ -- p9_req_put(req); -+ p9_req_put(c, req); - return ERR_PTR(err); - } - -@@ -746,7 +746,7 @@ p9_client_rpc(struct p9_client *c, int8_t type, const char *fmt, ...) - err = c->trans_mod->request(c, req); - if (err < 0) { - /* write won't happen */ -- p9_req_put(req); -+ p9_req_put(c, req); - if (err != -ERESTARTSYS && err != -EFAULT) - c->status = Disconnected; - goto recalc_sigpending; -@@ -1002,7 +1002,7 @@ struct p9_client *p9_client_create(const char *dev_name, char *options) - char *client_id; - - err = 0; -- clnt = kmalloc(sizeof(*clnt), GFP_KERNEL); -+ clnt = kzalloc(sizeof(*clnt), GFP_KERNEL); - if (!clnt) - return ERR_PTR(-ENOMEM); - -@@ -1053,10 +1053,6 @@ struct p9_client *p9_client_create(const char *dev_name, char *options) - goto close_trans; - } - -- err = p9_client_version(clnt); -- if (err) -- goto close_trans; -- - /* P9_HDRSZ + 4 is the smallest packet header we can have that is - * followed by data accessed from userspace by read - */ -@@ -1066,6 +1062,15 @@ struct p9_client *p9_client_create(const char *dev_name, char *options) - clnt->msize - (P9_HDRSZ + 4), - NULL); - -+ err = mempool_init_slab_pool(&clnt->pools[0], 4, clnt->fcall_cache) ?: -+ mempool_init_slab_pool(&clnt->pools[1], 4, clnt->fcall_cache); -+ if (err) -+ goto close_trans; -+ -+ err = p9_client_version(clnt); -+ if (err) -+ goto close_trans; -+ - return clnt; - - close_trans: -@@ -1073,6 +1078,8 @@ struct p9_client *p9_client_create(const char *dev_name, char *options) - put_trans: - v9fs_put_trans(clnt->trans_mod); - free_client: -+ mempool_exit(&clnt->pools[1]); -+ mempool_exit(&clnt->pools[0]); - kfree(clnt); - return ERR_PTR(err); - } -@@ -1097,6 +1104,8 @@ void p9_client_destroy(struct p9_client *clnt) - - p9_tag_cleanup(clnt); - -+ mempool_exit(&clnt->pools[1]); -+ mempool_exit(&clnt->pools[0]); - kmem_cache_destroy(clnt->fcall_cache); - kfree(clnt); - } -diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c -index 8f8f95e39b03a..007c3f45fe052 100644 ---- a/net/9p/trans_fd.c -+++ b/net/9p/trans_fd.c -@@ -378,7 +378,7 @@ static void p9_read_work(struct work_struct *work) - m->rc.sdata = NULL; - m->rc.offset = 0; - m->rc.capacity = 0; -- p9_req_put(m->rreq); -+ p9_req_put(m->client, m->rreq); - m->rreq = NULL; - } - -@@ -492,7 +492,7 @@ static void p9_write_work(struct work_struct *work) - m->wpos += err; - if (m->wpos == m->wsize) { - m->wpos = m->wsize = 0; -- p9_req_put(m->wreq); -+ p9_req_put(m->client, m->wreq); - m->wreq = NULL; - } - -@@ -695,7 +695,7 @@ static int p9_fd_cancel(struct p9_client *client, struct p9_req_t *req) - if (req->status == REQ_STATUS_UNSENT) { - list_del(&req->req_list); - req->status = REQ_STATUS_FLSHD; -- p9_req_put(req); -+ p9_req_put(client, req); - ret = 0; - } - spin_unlock(&client->lock); -@@ -722,7 +722,7 @@ static int p9_fd_cancelled(struct p9_client *client, struct p9_req_t *req) - list_del(&req->req_list); - req->status = REQ_STATUS_FLSHD; - spin_unlock(&client->lock); -- p9_req_put(req); -+ p9_req_put(client, req); - - return 0; - } -@@ -883,12 +883,12 @@ static void p9_conn_destroy(struct p9_conn *m) - p9_mux_poll_stop(m); - cancel_work_sync(&m->rq); - if (m->rreq) { -- p9_req_put(m->rreq); -+ p9_req_put(m->client, m->rreq); - m->rreq = NULL; - } - cancel_work_sync(&m->wq); - if (m->wreq) { -- p9_req_put(m->wreq); -+ p9_req_put(m->client, m->wreq); - m->wreq = NULL; - } - -diff --git a/net/9p/trans_rdma.c b/net/9p/trans_rdma.c -index 88e5638266743..99d878d70d56c 100644 ---- a/net/9p/trans_rdma.c -+++ b/net/9p/trans_rdma.c -@@ -350,7 +350,7 @@ send_done(struct ib_cq *cq, struct ib_wc *wc) - c->busa, c->req->tc.size, - DMA_TO_DEVICE); - up(&rdma->sq_sem); -- p9_req_put(c->req); -+ p9_req_put(client, c->req); - kfree(c); - } - -@@ -431,7 +431,7 @@ static int rdma_request(struct p9_client *client, struct p9_req_t *req) - if (unlikely(atomic_read(&rdma->excess_rc) > 0)) { - if ((atomic_sub_return(1, &rdma->excess_rc) >= 0)) { - /* Got one! */ -- p9_fcall_fini(&req->rc); -+ p9_fcall_fini(client, &req->rc, 1); - req->rc.sdata = NULL; - goto dont_need_post_recv; - } else { -diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c -index b24a4fb0f0a23..147972bf2e797 100644 ---- a/net/9p/trans_virtio.c -+++ b/net/9p/trans_virtio.c -@@ -199,7 +199,7 @@ static int p9_virtio_cancel(struct p9_client *client, struct p9_req_t *req) - /* Reply won't come, so drop req ref */ - static int p9_virtio_cancelled(struct p9_client *client, struct p9_req_t *req) - { -- p9_req_put(req); -+ p9_req_put(client, req); - return 0; - } - -@@ -523,7 +523,7 @@ p9_virtio_zc_request(struct p9_client *client, struct p9_req_t *req, - kvfree(out_pages); - if (!kicked) { - /* reply won't come */ -- p9_req_put(req); -+ p9_req_put(client, req); - } - return err; - } -diff --git a/net/9p/trans_xen.c b/net/9p/trans_xen.c -index 833cd3792c51c..227f89cc7237c 100644 ---- a/net/9p/trans_xen.c -+++ b/net/9p/trans_xen.c -@@ -163,7 +163,7 @@ static int p9_xen_request(struct p9_client *client, struct p9_req_t *p9_req) - ring->intf->out_prod = prod; - spin_unlock_irqrestore(&ring->lock, flags); - notify_remote_via_irq(ring->irq); -- p9_req_put(p9_req); -+ p9_req_put(client, p9_req); - - return 0; - } diff --git a/tools/testing/nvdimm/test/ndtest.c b/tools/testing/nvdimm/test/ndtest.c -index 4d1a947367f9b..a2097955dacee 100644 +index 4d1a947367f9..a2097955dace 100644 --- a/tools/testing/nvdimm/test/ndtest.c +++ b/tools/testing/nvdimm/test/ndtest.c @@ -12,7 +12,7 @@ @@ -91553,5 +91972,5 @@ index 4d1a947367f9b..a2097955dacee 100644 static DEVICE_ATTR_RO(flags); -- -2.37.1 +2.37.3