linux513/514/515-tkg: Add futex_waitv() patchset from 5.16 as a toggle option (#342)
* linux513/514/515-tkg: Add futex_waitv() patchset from 5.16 as a toggle option Not enabled by default since this make currents public Wine/Proton builds with old fsync interfaces broken. https://github.com/ValveSoftware/wine/pull/128 https://github.com/andrealmeid/futex_waitv_patches * Minor message/comment tweaks Co-authored-by: Etienne JUVIGNY <ti3nou@gmail.com>
This commit is contained in:
6
PKGBUILD
6
PKGBUILD
@@ -500,6 +500,7 @@ case $_basever in
|
|||||||
0006-add-acs-overrides_iommu.patch
|
0006-add-acs-overrides_iommu.patch
|
||||||
0007-v5.13-fsync.patch
|
0007-v5.13-fsync.patch
|
||||||
0007-v5.13-futex2_interface.patch
|
0007-v5.13-futex2_interface.patch
|
||||||
|
0007-v5.13-futex_waitv.patch
|
||||||
0007-v5.13-winesync.patch
|
0007-v5.13-winesync.patch
|
||||||
0008-5.13-bcachefs.patch
|
0008-5.13-bcachefs.patch
|
||||||
0009-glitched-ondemand-bmq.patch
|
0009-glitched-ondemand-bmq.patch
|
||||||
@@ -530,6 +531,7 @@ case $_basever in
|
|||||||
'19661ec0d39f9663452b34433214c755179894528bf73a42f6ba52ccf572832a'
|
'19661ec0d39f9663452b34433214c755179894528bf73a42f6ba52ccf572832a'
|
||||||
'89d837bfea3515504b1c99fc881ebdc4f15e2999558127a263e795fc69408a39'
|
'89d837bfea3515504b1c99fc881ebdc4f15e2999558127a263e795fc69408a39'
|
||||||
'9ec679871cba674cf876ba836cde969296ae5034bcc10e1ec39b372e6e07aab0'
|
'9ec679871cba674cf876ba836cde969296ae5034bcc10e1ec39b372e6e07aab0'
|
||||||
|
'0e3473c19e5513bee886f03cf2476f746d8b5b2fbc0841c9d60d609b16a97c14'
|
||||||
'034d12a73b507133da2c69a34d61efd2f6b6618549650aa26d748142d22002e1'
|
'034d12a73b507133da2c69a34d61efd2f6b6618549650aa26d748142d22002e1'
|
||||||
'b0004bc559653fd8719b8adcfa1ead1075db3425d30d7d7adb8cbc6296386a8f'
|
'b0004bc559653fd8719b8adcfa1ead1075db3425d30d7d7adb8cbc6296386a8f'
|
||||||
'9fad4a40449e09522899955762c8928ae17f4cdaa16e01239fd12592e9d58177'
|
'9fad4a40449e09522899955762c8928ae17f4cdaa16e01239fd12592e9d58177'
|
||||||
@@ -560,6 +562,7 @@ case $_basever in
|
|||||||
0006-add-acs-overrides_iommu.patch
|
0006-add-acs-overrides_iommu.patch
|
||||||
0007-v5.14-fsync.patch
|
0007-v5.14-fsync.patch
|
||||||
0007-v5.14-futex2_interface.patch
|
0007-v5.14-futex2_interface.patch
|
||||||
|
0007-v5.14-futex_waitv.patch
|
||||||
0007-v5.14-winesync.patch
|
0007-v5.14-winesync.patch
|
||||||
#0008-5.14-bcachefs.patch
|
#0008-5.14-bcachefs.patch
|
||||||
0009-glitched-ondemand-bmq.patch
|
0009-glitched-ondemand-bmq.patch
|
||||||
@@ -586,6 +589,7 @@ case $_basever in
|
|||||||
'19661ec0d39f9663452b34433214c755179894528bf73a42f6ba52ccf572832a'
|
'19661ec0d39f9663452b34433214c755179894528bf73a42f6ba52ccf572832a'
|
||||||
'aa67e81a27d9062e463594acb91eca6dd13388f23cbe53ca56298f9dba61cc10'
|
'aa67e81a27d9062e463594acb91eca6dd13388f23cbe53ca56298f9dba61cc10'
|
||||||
'efe5e21706fdf64559ead866c85a5d88c5c3f743d814410df3810ca61cc5b966'
|
'efe5e21706fdf64559ead866c85a5d88c5c3f743d814410df3810ca61cc5b966'
|
||||||
|
'5742277f41f22bf29fa9742562946b8a01377f8a22adb42ceed3607541c1d5b6'
|
||||||
'034d12a73b507133da2c69a34d61efd2f6b6618549650aa26d748142d22002e1'
|
'034d12a73b507133da2c69a34d61efd2f6b6618549650aa26d748142d22002e1'
|
||||||
'9fad4a40449e09522899955762c8928ae17f4cdaa16e01239fd12592e9d58177'
|
'9fad4a40449e09522899955762c8928ae17f4cdaa16e01239fd12592e9d58177'
|
||||||
'a557b342111849a5f920bbe1c129f3ff1fc1eff62c6bd6685e0972fc88e39911'
|
'a557b342111849a5f920bbe1c129f3ff1fc1eff62c6bd6685e0972fc88e39911'
|
||||||
@@ -614,6 +618,7 @@ case $_basever in
|
|||||||
0006-add-acs-overrides_iommu.patch
|
0006-add-acs-overrides_iommu.patch
|
||||||
0007-v5.15-fsync.patch
|
0007-v5.15-fsync.patch
|
||||||
#0007-v5.15-futex2_interface.patch
|
#0007-v5.15-futex2_interface.patch
|
||||||
|
0007-v5.15-futex_waitv.patch
|
||||||
0007-v5.15-winesync.patch
|
0007-v5.15-winesync.patch
|
||||||
#0008-5.14-bcachefs.patch
|
#0008-5.14-bcachefs.patch
|
||||||
#0009-glitched-ondemand-bmq.patch
|
#0009-glitched-ondemand-bmq.patch
|
||||||
@@ -640,6 +645,7 @@ case $_basever in
|
|||||||
'19661ec0d39f9663452b34433214c755179894528bf73a42f6ba52ccf572832a'
|
'19661ec0d39f9663452b34433214c755179894528bf73a42f6ba52ccf572832a'
|
||||||
'6c4f0099896f69e56ebd8c9eac266ac8ad993acecd50945e0e84ef6f95f9ddca'
|
'6c4f0099896f69e56ebd8c9eac266ac8ad993acecd50945e0e84ef6f95f9ddca'
|
||||||
#'efe5e21706fdf64559ead866c85a5d88c5c3f743d814410df3810ca61cc5b966'
|
#'efe5e21706fdf64559ead866c85a5d88c5c3f743d814410df3810ca61cc5b966'
|
||||||
|
'c8f7c50d9b1418ba22b5ca735c47111a162be416109714d26a674162e5b2cb97'
|
||||||
'034d12a73b507133da2c69a34d61efd2f6b6618549650aa26d748142d22002e1'
|
'034d12a73b507133da2c69a34d61efd2f6b6618549650aa26d748142d22002e1'
|
||||||
#'9fad4a40449e09522899955762c8928ae17f4cdaa16e01239fd12592e9d58177'
|
#'9fad4a40449e09522899955762c8928ae17f4cdaa16e01239fd12592e9d58177'
|
||||||
#'a557b342111849a5f920bbe1c129f3ff1fc1eff62c6bd6685e0972fc88e39911'
|
#'a557b342111849a5f920bbe1c129f3ff1fc1eff62c6bd6685e0972fc88e39911'
|
||||||
|
@@ -154,6 +154,11 @@ _fsync="true"
|
|||||||
# https://gitlab.collabora.com/tonyk/linux/-/tree/futex2-dev
|
# https://gitlab.collabora.com/tonyk/linux/-/tree/futex2-dev
|
||||||
_futex2="true"
|
_futex2="true"
|
||||||
|
|
||||||
|
# Set to "true" to enable backported patches to add support for the futex_waitv() syscall, a new interface for fsync. It will appear in mainline at Linux 5.16 release and requires a wine/proton with builtin support for it. It's expected to be available in Valve Proton 6.3 stable soon - https://github.com/ValveSoftware/wine/pull/128
|
||||||
|
# !! Disables fsync/futex2 interfaces support !!
|
||||||
|
# https://github.com/andrealmeid/futex_waitv_patches
|
||||||
|
_futex_waitv="false"
|
||||||
|
|
||||||
# Set to "true" to enable support for winesync, an experimental replacement for esync - requires patched wine - https://repo.or.cz/linux/zf.git/shortlog/refs/heads/winesync
|
# Set to "true" to enable support for winesync, an experimental replacement for esync - requires patched wine - https://repo.or.cz/linux/zf.git/shortlog/refs/heads/winesync
|
||||||
# ! Can't be used on multiple kernels installed side-by-side, which will require https://aur.archlinux.org/packages/winesync-dkms/ instead of this option !
|
# ! Can't be used on multiple kernels installed side-by-side, which will require https://aur.archlinux.org/packages/winesync-dkms/ instead of this option !
|
||||||
_winesync="false"
|
_winesync="false"
|
||||||
|
@@ -571,7 +571,7 @@ _tkg_srcprep() {
|
|||||||
_enable "CRYPTO_LZ4" "CRYPTO_LZ4HC" "LZ4_COMPRESS" "LZ4HC_COMPRESS" "ZSWAP_COMPRESSOR_DEFAULT_LZ4" "CMDLINE_BOOL" "CONFIG_BLK_DEV_LOOP"
|
_enable "CRYPTO_LZ4" "CRYPTO_LZ4HC" "LZ4_COMPRESS" "LZ4HC_COMPRESS" "ZSWAP_COMPRESSOR_DEFAULT_LZ4" "CMDLINE_BOOL" "CONFIG_BLK_DEV_LOOP"
|
||||||
_disable "DEBUG_FORCE_FUNCTION_ALIGN_64B"
|
_disable "DEBUG_FORCE_FUNCTION_ALIGN_64B"
|
||||||
scripts/config --set-str "ZSWAP_COMPRESSOR_DEFAULT" "lz4"
|
scripts/config --set-str "ZSWAP_COMPRESSOR_DEFAULT" "lz4"
|
||||||
if [ "$_futex2" = "true" ] && [ "$_basever" != "54" ] && [ "$_basever" != "57" ] && [ "$_basever" != "58" ] && [ "$_basever" != "59" ]; then
|
if [ "$_futex2" = "true" ] && [ "$_futex_waitv" != "true" ] && [ "$_basever" != "54" ] && [ "$_basever" != "57" ] && [ "$_basever" != "58" ] && [ "$_basever" != "59" ]; then
|
||||||
sed -i -e 's/# CONFIG_EXPERT is not set/CONFIG_EXPERT=y/' ./.config
|
sed -i -e 's/# CONFIG_EXPERT is not set/CONFIG_EXPERT=y/' ./.config
|
||||||
echo -e "\r# start of config expert\r
|
echo -e "\r# start of config expert\r
|
||||||
# CONFIG_DEBUG_RSEQ is not set\r
|
# CONFIG_DEBUG_RSEQ is not set\r
|
||||||
@@ -1140,6 +1140,25 @@ CONFIG_DEBUG_INFO_BTF_MODULES=y\r
|
|||||||
fi
|
fi
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
# futex_waitv support
|
||||||
|
tkgpatch="$srcdir/0007-v${_basekernel}-futex_waitv.patch"
|
||||||
|
if [ -e "$tkgpatch" ]; then
|
||||||
|
if [ -z "$_futex_waitv" ]; then
|
||||||
|
plain ""
|
||||||
|
plain "Enable support for futex_waitv, backported patches for fsync from 5.16 Kernel"
|
||||||
|
plain "! Will disable fsync/futex2 patchsets !"
|
||||||
|
plain "https://github.com/andrealmeid/futex_waitv_patches"
|
||||||
|
plain "https://github.com/ValveSoftware/wine/pull/128"
|
||||||
|
read -rp "`echo $' > N/y : '`" CONDITION9;
|
||||||
|
fi
|
||||||
|
if [[ "$CONDITION9" =~ [yY] ]] || [ "$_futex_waitv" = "true" ]; then
|
||||||
|
_msg="Patching futex_waitv support"
|
||||||
|
_tkg_patcher
|
||||||
|
_fsync="false"
|
||||||
|
_futex2="false"
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
|
||||||
# fsync support
|
# fsync support
|
||||||
tkgpatch="$srcdir/0007-v${_basekernel}-fsync.patch"
|
tkgpatch="$srcdir/0007-v${_basekernel}-fsync.patch"
|
||||||
if [ -e "$tkgpatch" ]; then
|
if [ -e "$tkgpatch" ]; then
|
||||||
@@ -1147,9 +1166,9 @@ CONFIG_DEBUG_INFO_BTF_MODULES=y\r
|
|||||||
plain ""
|
plain ""
|
||||||
plain "Enable support for fsync, an experimental replacement for esync in Valve Proton 4.11+"
|
plain "Enable support for fsync, an experimental replacement for esync in Valve Proton 4.11+"
|
||||||
plain "https://steamcommunity.com/games/221410/announcements/detail/2957094910196249305"
|
plain "https://steamcommunity.com/games/221410/announcements/detail/2957094910196249305"
|
||||||
read -rp "`echo $' > N/y : '`" CONDITION9;
|
read -rp "`echo $' > N/y : '`" CONDITION10;
|
||||||
fi
|
fi
|
||||||
if [[ "$CONDITION9" =~ [yY] ]] || [ "$_fsync" = "true" ]; then
|
if [[ "$CONDITION10" =~ [yY] ]] || [ "$_fsync" = "true" ]; then
|
||||||
_msg="Patching Fsync support"
|
_msg="Patching Fsync support"
|
||||||
_tkg_patcher
|
_tkg_patcher
|
||||||
fi
|
fi
|
||||||
@@ -1164,9 +1183,9 @@ CONFIG_DEBUG_INFO_BTF_MODULES=y\r
|
|||||||
plain "Can be enabled alongside regular fsync patchset to have a fallback option"
|
plain "Can be enabled alongside regular fsync patchset to have a fallback option"
|
||||||
plain "https://gitlab.collabora.com/tonyk/linux/-/tree/futex2-dev"
|
plain "https://gitlab.collabora.com/tonyk/linux/-/tree/futex2-dev"
|
||||||
plain "https://github.com/ValveSoftware/Proton/issues/4568"
|
plain "https://github.com/ValveSoftware/Proton/issues/4568"
|
||||||
read -rp "`echo $' > N/y : '`" CONDITION10;
|
read -rp "`echo $' > N/y : '`" CONDITION11;
|
||||||
fi
|
fi
|
||||||
if [[ "$CONDITION10" =~ [yY] ]] || [ "$_futex2" = "true" ]; then
|
if [[ "$CONDITION11" =~ [yY] ]] || [ "$_futex2" = "true" ]; then
|
||||||
_msg="Patching futex2 support"
|
_msg="Patching futex2 support"
|
||||||
_tkg_patcher
|
_tkg_patcher
|
||||||
_enable "FUTEX2"
|
_enable "FUTEX2"
|
||||||
|
536
linux-tkg-patches/5.13/0007-v5.13-futex_waitv.patch
Normal file
536
linux-tkg-patches/5.13/0007-v5.13-futex_waitv.patch
Normal file
@@ -0,0 +1,536 @@
|
|||||||
|
From 4901e29e3c0237c52eadd2c82deb9bd6e7add5ac Mon Sep 17 00:00:00 2001
|
||||||
|
From: =?UTF-8?q?Andr=C3=A9=20Almeida?= <andrealmeid@collabora.com>
|
||||||
|
Date: Thu, 23 Sep 2021 14:11:05 -0300
|
||||||
|
Subject: [PATCH 1/2] futex: Implement sys_futex_waitv()
|
||||||
|
MIME-Version: 1.0
|
||||||
|
Content-Type: text/plain; charset=UTF-8
|
||||||
|
Content-Transfer-Encoding: 8bit
|
||||||
|
|
||||||
|
Add support to wait on multiple futexes. This is the interface
|
||||||
|
implemented by this syscall:
|
||||||
|
|
||||||
|
futex_waitv(struct futex_waitv *waiters, unsigned int nr_futexes,
|
||||||
|
unsigned int flags, struct timespec *timeout, clockid_t clockid)
|
||||||
|
|
||||||
|
struct futex_waitv {
|
||||||
|
__u64 val;
|
||||||
|
__u64 uaddr;
|
||||||
|
__u32 flags;
|
||||||
|
__u32 __reserved;
|
||||||
|
};
|
||||||
|
|
||||||
|
Given an array of struct futex_waitv, wait on each uaddr. The thread
|
||||||
|
wakes if a futex_wake() is performed at any uaddr. The syscall returns
|
||||||
|
immediately if any waiter has *uaddr != val. *timeout is an optional
|
||||||
|
absolute timeout value for the operation. This syscall supports only
|
||||||
|
64bit sized timeout structs. The flags argument of the syscall should be
|
||||||
|
empty, but it can be used for future extensions. Flags for shared
|
||||||
|
futexes, sizes, etc. should be used on the individual flags of each
|
||||||
|
waiter.
|
||||||
|
|
||||||
|
__reserved is used for explicit padding and should be 0, but it might be
|
||||||
|
used for future extensions. If the userspace uses 32-bit pointers, it
|
||||||
|
should make sure to explicitly cast it when assigning to waitv::uaddr.
|
||||||
|
|
||||||
|
Returns the array index of one of the woken futexes. There’s no given
|
||||||
|
information of how many were woken, or any particular attribute of it
|
||||||
|
(if it’s the first woken, if it is of the smaller index...).
|
||||||
|
|
||||||
|
Signed-off-by: André Almeida <andrealmeid@collabora.com>
|
||||||
|
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
|
||||||
|
Link: https://lore.kernel.org/r/20210923171111.300673-17-andrealmeid@collabora.com
|
||||||
|
---
|
||||||
|
include/linux/syscalls.h | 6 +
|
||||||
|
include/uapi/asm-generic/unistd.h | 5 +-
|
||||||
|
include/uapi/linux/futex.h | 26 +++
|
||||||
|
kernel/futex.c | 334 ++++++++++++++++++++++++++++++
|
||||||
|
kernel/sys_ni.c | 1 +
|
||||||
|
5 files changed, 371 insertions(+), 1 deletion(-)
|
||||||
|
|
||||||
|
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
|
||||||
|
index 050511e8f1f8..8390482cf082 100644
|
||||||
|
--- a/include/linux/syscalls.h
|
||||||
|
+++ b/include/linux/syscalls.h
|
||||||
|
@@ -58,6 +58,7 @@ struct mq_attr;
|
||||||
|
struct compat_stat;
|
||||||
|
struct old_timeval32;
|
||||||
|
struct robust_list_head;
|
||||||
|
+struct futex_waitv;
|
||||||
|
struct getcpu_cache;
|
||||||
|
struct old_linux_dirent;
|
||||||
|
struct perf_event_attr;
|
||||||
|
@@ -623,6 +624,11 @@ asmlinkage long sys_get_robust_list(int pid,
|
||||||
|
asmlinkage long sys_set_robust_list(struct robust_list_head __user *head,
|
||||||
|
size_t len);
|
||||||
|
|
||||||
|
+asmlinkage long sys_futex_waitv(struct futex_waitv *waiters,
|
||||||
|
+ unsigned int nr_futexes, unsigned int flags,
|
||||||
|
+ struct __kernel_timespec __user *timeout, clockid_t clockid);
|
||||||
|
+
|
||||||
|
+
|
||||||
|
/* kernel/hrtimer.c */
|
||||||
|
asmlinkage long sys_nanosleep(struct __kernel_timespec __user *rqtp,
|
||||||
|
struct __kernel_timespec __user *rmtp);
|
||||||
|
diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h
|
||||||
|
index d2a942086fcb..3f55ac23cea9 100644
|
||||||
|
--- a/include/uapi/asm-generic/unistd.h
|
||||||
|
+++ b/include/uapi/asm-generic/unistd.h
|
||||||
|
@@ -872,8 +872,11 @@ __SYSCALL(__NR_landlock_add_rule, sys_landlock_add_rule)
|
||||||
|
#define __NR_landlock_restrict_self 446
|
||||||
|
__SYSCALL(__NR_landlock_restrict_self, sys_landlock_restrict_self)
|
||||||
|
|
||||||
|
+#define __NR_futex_waitv 449
|
||||||
|
+__SYSCALL(__NR_futex_waitv, sys_futex_waitv)
|
||||||
|
+
|
||||||
|
#undef __NR_syscalls
|
||||||
|
-#define __NR_syscalls 447
|
||||||
|
+#define __NR_syscalls 450
|
||||||
|
|
||||||
|
/*
|
||||||
|
* 32 bit systems traditionally used different
|
||||||
|
diff --git a/include/uapi/linux/futex.h b/include/uapi/linux/futex.h
|
||||||
|
index a89eb0accd5e..1666f5e4b837 100644
|
||||||
|
--- a/include/uapi/linux/futex.h
|
||||||
|
+++ b/include/uapi/linux/futex.h
|
||||||
|
@@ -41,6 +41,32 @@
|
||||||
|
#define FUTEX_CMP_REQUEUE_PI_PRIVATE (FUTEX_CMP_REQUEUE_PI | \
|
||||||
|
FUTEX_PRIVATE_FLAG)
|
||||||
|
|
||||||
|
+ /*
|
||||||
|
+ * Flags to specify the bit length of the futex word for futex2 syscalls.
|
||||||
|
+ * Currently, only 32 is supported.
|
||||||
|
+ */
|
||||||
|
+#define FUTEX_32 2
|
||||||
|
+
|
||||||
|
+/*
|
||||||
|
+ * Max numbers of elements in a futex_waitv array
|
||||||
|
+ */
|
||||||
|
+#define FUTEX_WAITV_MAX 128
|
||||||
|
+
|
||||||
|
+/**
|
||||||
|
+ * struct futex_waitv - A waiter for vectorized wait
|
||||||
|
+ * @val: Expected value at uaddr
|
||||||
|
+ * @uaddr: User address to wait on
|
||||||
|
+ * @flags: Flags for this waiter
|
||||||
|
+ * @__reserved: Reserved member to preserve data alignment. Should be 0.
|
||||||
|
+ */
|
||||||
|
+struct futex_waitv {
|
||||||
|
+ __u64 val;
|
||||||
|
+ __u64 uaddr;
|
||||||
|
+ __u32 flags;
|
||||||
|
+ __u32 __reserved;
|
||||||
|
+};
|
||||||
|
+
|
||||||
|
+
|
||||||
|
/*
|
||||||
|
* Support for robust futexes: the kernel cleans up held futexes at
|
||||||
|
* thread exit time.
|
||||||
|
diff --git a/kernel/futex.c b/kernel/futex.c
|
||||||
|
index 408cad5e8968..d7dc0bd9379c 100644
|
||||||
|
--- a/kernel/futex.c
|
||||||
|
+++ b/kernel/futex.c
|
||||||
|
@@ -227,6 +227,18 @@ static const struct futex_q futex_q_init = {
|
||||||
|
.bitset = FUTEX_BITSET_MATCH_ANY
|
||||||
|
};
|
||||||
|
|
||||||
|
+/**
|
||||||
|
+ * struct futex_vector - Auxiliary struct for futex_waitv()
|
||||||
|
+ * @w: Userspace provided data
|
||||||
|
+ * @q: Kernel side data
|
||||||
|
+ *
|
||||||
|
+ * Struct used to build an array with all data need for futex_waitv()
|
||||||
|
+ */
|
||||||
|
+struct futex_vector {
|
||||||
|
+ struct futex_waitv w;
|
||||||
|
+ struct futex_q q;
|
||||||
|
+};
|
||||||
|
+
|
||||||
|
/*
|
||||||
|
* Hash buckets are shared by all the futex_keys that hash to the same
|
||||||
|
* location. Each key may have multiple futex_q structures, one for each task
|
||||||
|
@@ -3962,6 +3974,328 @@ COMPAT_SYSCALL_DEFINE3(get_robust_list, int, pid,
|
||||||
|
}
|
||||||
|
#endif /* CONFIG_COMPAT */
|
||||||
|
|
||||||
|
+/* Mask of available flags for each futex in futex_waitv list */
|
||||||
|
+#define FUTEXV_WAITER_MASK (FUTEX_32 | FUTEX_PRIVATE_FLAG)
|
||||||
|
+
|
||||||
|
+/**
|
||||||
|
+ * futex_parse_waitv - Parse a waitv array from userspace
|
||||||
|
+ * @futexv: Kernel side list of waiters to be filled
|
||||||
|
+ * @uwaitv: Userspace list to be parsed
|
||||||
|
+ * @nr_futexes: Length of futexv
|
||||||
|
+ *
|
||||||
|
+ * Return: Error code on failure, 0 on success
|
||||||
|
+ */
|
||||||
|
+static int futex_parse_waitv(struct futex_vector *futexv,
|
||||||
|
+ struct futex_waitv __user *uwaitv,
|
||||||
|
+ unsigned int nr_futexes)
|
||||||
|
+{
|
||||||
|
+ struct futex_waitv aux;
|
||||||
|
+ unsigned int i;
|
||||||
|
+
|
||||||
|
+ for (i = 0; i < nr_futexes; i++) {
|
||||||
|
+ if (copy_from_user(&aux, &uwaitv[i], sizeof(aux)))
|
||||||
|
+ return -EFAULT;
|
||||||
|
+
|
||||||
|
+ if ((aux.flags & ~FUTEXV_WAITER_MASK) || aux.__reserved)
|
||||||
|
+ return -EINVAL;
|
||||||
|
+
|
||||||
|
+ if (!(aux.flags & FUTEX_32))
|
||||||
|
+ return -EINVAL;
|
||||||
|
+
|
||||||
|
+ futexv[i].w.flags = aux.flags;
|
||||||
|
+ futexv[i].w.val = aux.val;
|
||||||
|
+ futexv[i].w.uaddr = aux.uaddr;
|
||||||
|
+ futexv[i].q = futex_q_init;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ return 0;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+/**
|
||||||
|
+ * unqueue_multiple - Remove various futexes from their hash bucket
|
||||||
|
+ * @v: The list of futexes to unqueue
|
||||||
|
+ * @count: Number of futexes in the list
|
||||||
|
+ *
|
||||||
|
+ * Helper to unqueue a list of futexes. This can't fail.
|
||||||
|
+ *
|
||||||
|
+ * Return:
|
||||||
|
+ * - >=0 - Index of the last futex that was awoken;
|
||||||
|
+ * - -1 - No futex was awoken
|
||||||
|
+ */
|
||||||
|
+static int unqueue_multiple(struct futex_vector *v, int count)
|
||||||
|
+{
|
||||||
|
+ int ret = -1, i;
|
||||||
|
+
|
||||||
|
+ for (i = 0; i < count; i++) {
|
||||||
|
+ if (!unqueue_me(&v[i].q))
|
||||||
|
+ ret = i;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ return ret;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+/**
|
||||||
|
+ * futex_wait_multiple_setup - Prepare to wait and enqueue multiple futexes
|
||||||
|
+ * @vs: The futex list to wait on
|
||||||
|
+ * @count: The size of the list
|
||||||
|
+ * @woken: Index of the last woken futex, if any. Used to notify the
|
||||||
|
+ * caller that it can return this index to userspace (return parameter)
|
||||||
|
+ *
|
||||||
|
+ * Prepare multiple futexes in a single step and enqueue them. This may fail if
|
||||||
|
+ * the futex list is invalid or if any futex was already awoken. On success the
|
||||||
|
+ * task is ready to interruptible sleep.
|
||||||
|
+ *
|
||||||
|
+ * Return:
|
||||||
|
+ * - 1 - One of the futexes was woken by another thread
|
||||||
|
+ * - 0 - Success
|
||||||
|
+ * - <0 - -EFAULT, -EWOULDBLOCK or -EINVAL
|
||||||
|
+ */
|
||||||
|
+static int futex_wait_multiple_setup(struct futex_vector *vs, int count, int *woken)
|
||||||
|
+{
|
||||||
|
+ struct futex_hash_bucket *hb;
|
||||||
|
+ bool retry = false;
|
||||||
|
+ int ret, i;
|
||||||
|
+ u32 uval;
|
||||||
|
+
|
||||||
|
+ /*
|
||||||
|
+ * Enqueuing multiple futexes is tricky, because we need to enqueue
|
||||||
|
+ * each futex on the list before dealing with the next one to avoid
|
||||||
|
+ * deadlocking on the hash bucket. But, before enqueuing, we need to
|
||||||
|
+ * make sure that current->state is TASK_INTERRUPTIBLE, so we don't
|
||||||
|
+ * lose any wake events, which cannot be done before the get_futex_key
|
||||||
|
+ * of the next key, because it calls get_user_pages, which can sleep.
|
||||||
|
+ * Thus, we fetch the list of futexes keys in two steps, by first
|
||||||
|
+ * pinning all the memory keys in the futex key, and only then we read
|
||||||
|
+ * each key and queue the corresponding futex.
|
||||||
|
+ *
|
||||||
|
+ * Private futexes doesn't need to recalculate hash in retry, so skip
|
||||||
|
+ * get_futex_key() when retrying.
|
||||||
|
+ */
|
||||||
|
+retry:
|
||||||
|
+ for (i = 0; i < count; i++) {
|
||||||
|
+ if ((vs[i].w.flags & FUTEX_PRIVATE_FLAG) && retry)
|
||||||
|
+ continue;
|
||||||
|
+
|
||||||
|
+ ret = get_futex_key(u64_to_user_ptr(vs[i].w.uaddr),
|
||||||
|
+ !(vs[i].w.flags & FUTEX_PRIVATE_FLAG),
|
||||||
|
+ &vs[i].q.key, FUTEX_READ);
|
||||||
|
+
|
||||||
|
+ if (unlikely(ret))
|
||||||
|
+ return ret;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ set_current_state(TASK_INTERRUPTIBLE);
|
||||||
|
+
|
||||||
|
+ for (i = 0; i < count; i++) {
|
||||||
|
+ u32 __user *uaddr = (u32 __user *)(unsigned long)vs[i].w.uaddr;
|
||||||
|
+ struct futex_q *q = &vs[i].q;
|
||||||
|
+ u32 val = (u32)vs[i].w.val;
|
||||||
|
+
|
||||||
|
+ hb = queue_lock(q);
|
||||||
|
+ ret = get_futex_value_locked(&uval, uaddr);
|
||||||
|
+
|
||||||
|
+ if (!ret && uval == val) {
|
||||||
|
+ /*
|
||||||
|
+ * The bucket lock can't be held while dealing with the
|
||||||
|
+ * next futex. Queue each futex at this moment so hb can
|
||||||
|
+ * be unlocked.
|
||||||
|
+ */
|
||||||
|
+ queue_me(q, hb);
|
||||||
|
+ continue;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ queue_unlock(hb);
|
||||||
|
+ __set_current_state(TASK_RUNNING);
|
||||||
|
+
|
||||||
|
+ /*
|
||||||
|
+ * Even if something went wrong, if we find out that a futex
|
||||||
|
+ * was woken, we don't return error and return this index to
|
||||||
|
+ * userspace
|
||||||
|
+ */
|
||||||
|
+ *woken = unqueue_multiple(vs, i);
|
||||||
|
+ if (*woken >= 0)
|
||||||
|
+ return 1;
|
||||||
|
+
|
||||||
|
+ if (ret) {
|
||||||
|
+ /*
|
||||||
|
+ * If we need to handle a page fault, we need to do so
|
||||||
|
+ * without any lock and any enqueued futex (otherwise
|
||||||
|
+ * we could lose some wakeup). So we do it here, after
|
||||||
|
+ * undoing all the work done so far. In success, we
|
||||||
|
+ * retry all the work.
|
||||||
|
+ */
|
||||||
|
+ if (get_user(uval, uaddr))
|
||||||
|
+ return -EFAULT;
|
||||||
|
+
|
||||||
|
+ retry = true;
|
||||||
|
+ goto retry;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ if (uval != val)
|
||||||
|
+ return -EWOULDBLOCK;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ return 0;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+/**
|
||||||
|
+ * futex_sleep_multiple - Check sleeping conditions and sleep
|
||||||
|
+ * @vs: List of futexes to wait for
|
||||||
|
+ * @count: Length of vs
|
||||||
|
+ * @to: Timeout
|
||||||
|
+ *
|
||||||
|
+ * Sleep if and only if the timeout hasn't expired and no futex on the list has
|
||||||
|
+ * been woken up.
|
||||||
|
+ */
|
||||||
|
+static void futex_sleep_multiple(struct futex_vector *vs, unsigned int count,
|
||||||
|
+ struct hrtimer_sleeper *to)
|
||||||
|
+{
|
||||||
|
+ if (to && !to->task)
|
||||||
|
+ return;
|
||||||
|
+
|
||||||
|
+ for (; count; count--, vs++) {
|
||||||
|
+ if (!READ_ONCE(vs->q.lock_ptr))
|
||||||
|
+ return;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ freezable_schedule();
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+/**
|
||||||
|
+ * futex_wait_multiple - Prepare to wait on and enqueue several futexes
|
||||||
|
+ * @vs: The list of futexes to wait on
|
||||||
|
+ * @count: The number of objects
|
||||||
|
+ * @to: Timeout before giving up and returning to userspace
|
||||||
|
+ *
|
||||||
|
+ * Entry point for the FUTEX_WAIT_MULTIPLE futex operation, this function
|
||||||
|
+ * sleeps on a group of futexes and returns on the first futex that is
|
||||||
|
+ * wake, or after the timeout has elapsed.
|
||||||
|
+ *
|
||||||
|
+ * Return:
|
||||||
|
+ * - >=0 - Hint to the futex that was awoken
|
||||||
|
+ * - <0 - On error
|
||||||
|
+ */
|
||||||
|
+int futex_wait_multiple(struct futex_vector *vs, unsigned int count,
|
||||||
|
+ struct hrtimer_sleeper *to)
|
||||||
|
+{
|
||||||
|
+ int ret, hint = 0;
|
||||||
|
+
|
||||||
|
+ if (to)
|
||||||
|
+ hrtimer_sleeper_start_expires(to, HRTIMER_MODE_ABS);
|
||||||
|
+
|
||||||
|
+ while (1) {
|
||||||
|
+ ret = futex_wait_multiple_setup(vs, count, &hint);
|
||||||
|
+ if (ret) {
|
||||||
|
+ if (ret > 0) {
|
||||||
|
+ /* A futex was woken during setup */
|
||||||
|
+ ret = hint;
|
||||||
|
+ }
|
||||||
|
+ return ret;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ futex_sleep_multiple(vs, count, to);
|
||||||
|
+
|
||||||
|
+ __set_current_state(TASK_RUNNING);
|
||||||
|
+
|
||||||
|
+ ret = unqueue_multiple(vs, count);
|
||||||
|
+ if (ret >= 0)
|
||||||
|
+ return ret;
|
||||||
|
+
|
||||||
|
+ if (to && !to->task)
|
||||||
|
+ return -ETIMEDOUT;
|
||||||
|
+ else if (signal_pending(current))
|
||||||
|
+ return -ERESTARTSYS;
|
||||||
|
+ /*
|
||||||
|
+ * The final case is a spurious wakeup, for
|
||||||
|
+ * which just retry.
|
||||||
|
+ */
|
||||||
|
+ }
|
||||||
|
+}
|
||||||
|
+/* Mask of available flags for each futex in futex_waitv list */
|
||||||
|
+#define FUTEXV_WAITER_MASK (FUTEX_32 | FUTEX_PRIVATE_FLAG)
|
||||||
|
+
|
||||||
|
+/**
|
||||||
|
+ * sys_futex_waitv - Wait on a list of futexes
|
||||||
|
+ * @waiters: List of futexes to wait on
|
||||||
|
+ * @nr_futexes: Length of futexv
|
||||||
|
+ * @flags: Flag for timeout (monotonic/realtime)
|
||||||
|
+ * @timeout: Optional absolute timeout.
|
||||||
|
+ * @clockid: Clock to be used for the timeout, realtime or monotonic.
|
||||||
|
+ *
|
||||||
|
+ * Given an array of `struct futex_waitv`, wait on each uaddr. The thread wakes
|
||||||
|
+ * if a futex_wake() is performed at any uaddr. The syscall returns immediately
|
||||||
|
+ * if any waiter has *uaddr != val. *timeout is an optional timeout value for
|
||||||
|
+ * the operation. Each waiter has individual flags. The `flags` argument for
|
||||||
|
+ * the syscall should be used solely for specifying the timeout as realtime, if
|
||||||
|
+ * needed. Flags for private futexes, sizes, etc. should be used on the
|
||||||
|
+ * individual flags of each waiter.
|
||||||
|
+ *
|
||||||
|
+ * Returns the array index of one of the woken futexes. No further information
|
||||||
|
+ * is provided: any number of other futexes may also have been woken by the
|
||||||
|
+ * same event, and if more than one futex was woken, the retrned index may
|
||||||
|
+ * refer to any one of them. (It is not necessaryily the futex with the
|
||||||
|
+ * smallest index, nor the one most recently woken, nor...)
|
||||||
|
+ */
|
||||||
|
+
|
||||||
|
+SYSCALL_DEFINE5(futex_waitv, struct futex_waitv __user *, waiters,
|
||||||
|
+ unsigned int, nr_futexes, unsigned int, flags,
|
||||||
|
+ struct __kernel_timespec __user *, timeout, clockid_t, clockid)
|
||||||
|
+{
|
||||||
|
+ struct hrtimer_sleeper to;
|
||||||
|
+ struct futex_vector *futexv;
|
||||||
|
+ struct timespec64 ts;
|
||||||
|
+ ktime_t time;
|
||||||
|
+ int ret;
|
||||||
|
+
|
||||||
|
+ /* This syscall supports no flags for now */
|
||||||
|
+ if (flags)
|
||||||
|
+ return -EINVAL;
|
||||||
|
+
|
||||||
|
+ if (!nr_futexes || nr_futexes > FUTEX_WAITV_MAX || !waiters)
|
||||||
|
+ return -EINVAL;
|
||||||
|
+
|
||||||
|
+ if (timeout) {
|
||||||
|
+ int flag_clkid = 0, flag_init = 0;
|
||||||
|
+
|
||||||
|
+ if (clockid == CLOCK_REALTIME) {
|
||||||
|
+ flag_clkid = FLAGS_CLOCKRT;
|
||||||
|
+ flag_init = FUTEX_CLOCK_REALTIME;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ if (clockid != CLOCK_REALTIME && clockid != CLOCK_MONOTONIC)
|
||||||
|
+ return -EINVAL;
|
||||||
|
+
|
||||||
|
+ if (get_timespec64(&ts, timeout))
|
||||||
|
+ return -EFAULT;
|
||||||
|
+
|
||||||
|
+ /*
|
||||||
|
+ * Since there's no opcode for futex_waitv, use
|
||||||
|
+ * FUTEX_WAIT_BITSET that uses absolute timeout as well
|
||||||
|
+ */
|
||||||
|
+ ret = futex_init_timeout(FUTEX_WAIT_BITSET, flag_init, &ts, &time);
|
||||||
|
+ if (ret)
|
||||||
|
+ return ret;
|
||||||
|
+
|
||||||
|
+ futex_setup_timer(&time, &to, flag_clkid, 0);
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ futexv = kcalloc(nr_futexes, sizeof(*futexv), GFP_KERNEL);
|
||||||
|
+ if (!futexv)
|
||||||
|
+ return -ENOMEM;
|
||||||
|
+
|
||||||
|
+ ret = futex_parse_waitv(futexv, waiters, nr_futexes);
|
||||||
|
+ if (!ret)
|
||||||
|
+ ret = futex_wait_multiple(futexv, nr_futexes, timeout ? &to : NULL);
|
||||||
|
+
|
||||||
|
+ if (timeout) {
|
||||||
|
+ hrtimer_cancel(&to.timer);
|
||||||
|
+ destroy_hrtimer_on_stack(&to.timer);
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ kfree(futexv);
|
||||||
|
+ return ret;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
#ifdef CONFIG_COMPAT_32BIT_TIME
|
||||||
|
SYSCALL_DEFINE6(futex_time32, u32 __user *, uaddr, int, op, u32, val,
|
||||||
|
const struct old_timespec32 __user *, utime, u32 __user *, uaddr2,
|
||||||
|
diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c
|
||||||
|
index 0ea8128468c3..0979fac9414d 100644
|
||||||
|
--- a/kernel/sys_ni.c
|
||||||
|
+++ b/kernel/sys_ni.c
|
||||||
|
@@ -150,6 +150,7 @@ COND_SYSCALL(set_robust_list);
|
||||||
|
COND_SYSCALL_COMPAT(set_robust_list);
|
||||||
|
COND_SYSCALL(get_robust_list);
|
||||||
|
COND_SYSCALL_COMPAT(get_robust_list);
|
||||||
|
+COND_SYSCALL(futex_waitv);
|
||||||
|
|
||||||
|
/* kernel/hrtimer.c */
|
||||||
|
|
||||||
|
--
|
||||||
|
2.33.1
|
||||||
|
|
||||||
|
From 4e40f3886e134f33c50ca79bc8b323cea784bd78 Mon Sep 17 00:00:00 2001
|
||||||
|
From: =?UTF-8?q?Andr=C3=A9=20Almeida?= <andrealmeid@collabora.com>
|
||||||
|
Date: Thu, 23 Sep 2021 14:11:06 -0300
|
||||||
|
Subject: [PATCH 2/2] futex,x86: Wire up sys_futex_waitv()
|
||||||
|
MIME-Version: 1.0
|
||||||
|
Content-Type: text/plain; charset=UTF-8
|
||||||
|
Content-Transfer-Encoding: 8bit
|
||||||
|
|
||||||
|
Wire up syscall entry point for x86 arch, for both i386 and x86_64.
|
||||||
|
|
||||||
|
Signed-off-by: André Almeida <andrealmeid@collabora.com>
|
||||||
|
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
|
||||||
|
Link: https://lore.kernel.org/r/20210923171111.300673-18-andrealmeid@collabora.com
|
||||||
|
---
|
||||||
|
arch/x86/entry/syscalls/syscall_32.tbl | 1 +
|
||||||
|
arch/x86/entry/syscalls/syscall_64.tbl | 1 +
|
||||||
|
2 files changed, 2 insertions(+)
|
||||||
|
|
||||||
|
diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl
|
||||||
|
index 4bbc267fb36b..b2b9b9df1355 100644
|
||||||
|
--- a/arch/x86/entry/syscalls/syscall_32.tbl
|
||||||
|
+++ b/arch/x86/entry/syscalls/syscall_32.tbl
|
||||||
|
@@ -451,3 +451,4 @@
|
||||||
|
444 i386 landlock_create_ruleset sys_landlock_create_ruleset
|
||||||
|
445 i386 landlock_add_rule sys_landlock_add_rule
|
||||||
|
446 i386 landlock_restrict_self sys_landlock_restrict_self
|
||||||
|
+449 i386 futex_waitv sys_futex_waitv
|
||||||
|
diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl
|
||||||
|
index ce18119ea0d0..bfd4e8f5be34 100644
|
||||||
|
--- a/arch/x86/entry/syscalls/syscall_64.tbl
|
||||||
|
+++ b/arch/x86/entry/syscalls/syscall_64.tbl
|
||||||
|
@@ -368,6 +368,7 @@
|
||||||
|
444 common landlock_create_ruleset sys_landlock_create_ruleset
|
||||||
|
445 common landlock_add_rule sys_landlock_add_rule
|
||||||
|
446 common landlock_restrict_self sys_landlock_restrict_self
|
||||||
|
+449 common futex_waitv sys_futex_waitv
|
||||||
|
|
||||||
|
#
|
||||||
|
# Due to a historical design error, certain syscalls are numbered differently
|
||||||
|
--
|
||||||
|
2.33.1
|
||||||
|
|
536
linux-tkg-patches/5.14/0007-v5.14-futex_waitv.patch
Normal file
536
linux-tkg-patches/5.14/0007-v5.14-futex_waitv.patch
Normal file
@@ -0,0 +1,536 @@
|
|||||||
|
From 4901e29e3c0237c52eadd2c82deb9bd6e7add5ac Mon Sep 17 00:00:00 2001
|
||||||
|
From: =?UTF-8?q?Andr=C3=A9=20Almeida?= <andrealmeid@collabora.com>
|
||||||
|
Date: Thu, 23 Sep 2021 14:11:05 -0300
|
||||||
|
Subject: [PATCH 1/2] futex: Implement sys_futex_waitv()
|
||||||
|
MIME-Version: 1.0
|
||||||
|
Content-Type: text/plain; charset=UTF-8
|
||||||
|
Content-Transfer-Encoding: 8bit
|
||||||
|
|
||||||
|
Add support to wait on multiple futexes. This is the interface
|
||||||
|
implemented by this syscall:
|
||||||
|
|
||||||
|
futex_waitv(struct futex_waitv *waiters, unsigned int nr_futexes,
|
||||||
|
unsigned int flags, struct timespec *timeout, clockid_t clockid)
|
||||||
|
|
||||||
|
struct futex_waitv {
|
||||||
|
__u64 val;
|
||||||
|
__u64 uaddr;
|
||||||
|
__u32 flags;
|
||||||
|
__u32 __reserved;
|
||||||
|
};
|
||||||
|
|
||||||
|
Given an array of struct futex_waitv, wait on each uaddr. The thread
|
||||||
|
wakes if a futex_wake() is performed at any uaddr. The syscall returns
|
||||||
|
immediately if any waiter has *uaddr != val. *timeout is an optional
|
||||||
|
absolute timeout value for the operation. This syscall supports only
|
||||||
|
64bit sized timeout structs. The flags argument of the syscall should be
|
||||||
|
empty, but it can be used for future extensions. Flags for shared
|
||||||
|
futexes, sizes, etc. should be used on the individual flags of each
|
||||||
|
waiter.
|
||||||
|
|
||||||
|
__reserved is used for explicit padding and should be 0, but it might be
|
||||||
|
used for future extensions. If the userspace uses 32-bit pointers, it
|
||||||
|
should make sure to explicitly cast it when assigning to waitv::uaddr.
|
||||||
|
|
||||||
|
Returns the array index of one of the woken futexes. There’s no given
|
||||||
|
information of how many were woken, or any particular attribute of it
|
||||||
|
(if it’s the first woken, if it is of the smaller index...).
|
||||||
|
|
||||||
|
Signed-off-by: André Almeida <andrealmeid@collabora.com>
|
||||||
|
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
|
||||||
|
Link: https://lore.kernel.org/r/20210923171111.300673-17-andrealmeid@collabora.com
|
||||||
|
---
|
||||||
|
include/linux/syscalls.h | 6 +
|
||||||
|
include/uapi/asm-generic/unistd.h | 5 +-
|
||||||
|
include/uapi/linux/futex.h | 26 +++
|
||||||
|
kernel/futex.c | 334 ++++++++++++++++++++++++++++++
|
||||||
|
kernel/sys_ni.c | 1 +
|
||||||
|
5 files changed, 371 insertions(+), 1 deletion(-)
|
||||||
|
|
||||||
|
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
|
||||||
|
index 050511e8f1f8..8390482cf082 100644
|
||||||
|
--- a/include/linux/syscalls.h
|
||||||
|
+++ b/include/linux/syscalls.h
|
||||||
|
@@ -58,6 +58,7 @@ struct mq_attr;
|
||||||
|
struct compat_stat;
|
||||||
|
struct old_timeval32;
|
||||||
|
struct robust_list_head;
|
||||||
|
+struct futex_waitv;
|
||||||
|
struct getcpu_cache;
|
||||||
|
struct old_linux_dirent;
|
||||||
|
struct perf_event_attr;
|
||||||
|
@@ -623,6 +624,11 @@ asmlinkage long sys_get_robust_list(int pid,
|
||||||
|
asmlinkage long sys_set_robust_list(struct robust_list_head __user *head,
|
||||||
|
size_t len);
|
||||||
|
|
||||||
|
+asmlinkage long sys_futex_waitv(struct futex_waitv *waiters,
|
||||||
|
+ unsigned int nr_futexes, unsigned int flags,
|
||||||
|
+ struct __kernel_timespec __user *timeout, clockid_t clockid);
|
||||||
|
+
|
||||||
|
+
|
||||||
|
/* kernel/hrtimer.c */
|
||||||
|
asmlinkage long sys_nanosleep(struct __kernel_timespec __user *rqtp,
|
||||||
|
struct __kernel_timespec __user *rmtp);
|
||||||
|
diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h
|
||||||
|
index d2a942086fcb..3f55ac23cea9 100644
|
||||||
|
--- a/include/uapi/asm-generic/unistd.h
|
||||||
|
+++ b/include/uapi/asm-generic/unistd.h
|
||||||
|
@@ -878,8 +878,11 @@ __SYSCALL(__NR_landlock_add_rule, sys_landlock_add_rule)
|
||||||
|
__SYSCALL(__NR_memfd_secret, sys_memfd_secret)
|
||||||
|
#endif
|
||||||
|
|
||||||
|
+#define __NR_futex_waitv 449
|
||||||
|
+__SYSCALL(__NR_futex_waitv, sys_futex_waitv)
|
||||||
|
+
|
||||||
|
#undef __NR_syscalls
|
||||||
|
-#define __NR_syscalls 448
|
||||||
|
+#define __NR_syscalls 450
|
||||||
|
|
||||||
|
/*
|
||||||
|
* 32 bit systems traditionally used different
|
||||||
|
diff --git a/include/uapi/linux/futex.h b/include/uapi/linux/futex.h
|
||||||
|
index a89eb0accd5e..1666f5e4b837 100644
|
||||||
|
--- a/include/uapi/linux/futex.h
|
||||||
|
+++ b/include/uapi/linux/futex.h
|
||||||
|
@@ -41,6 +41,32 @@
|
||||||
|
#define FUTEX_CMP_REQUEUE_PI_PRIVATE (FUTEX_CMP_REQUEUE_PI | \
|
||||||
|
FUTEX_PRIVATE_FLAG)
|
||||||
|
|
||||||
|
+ /*
|
||||||
|
+ * Flags to specify the bit length of the futex word for futex2 syscalls.
|
||||||
|
+ * Currently, only 32 is supported.
|
||||||
|
+ */
|
||||||
|
+#define FUTEX_32 2
|
||||||
|
+
|
||||||
|
+/*
|
||||||
|
+ * Max numbers of elements in a futex_waitv array
|
||||||
|
+ */
|
||||||
|
+#define FUTEX_WAITV_MAX 128
|
||||||
|
+
|
||||||
|
+/**
|
||||||
|
+ * struct futex_waitv - A waiter for vectorized wait
|
||||||
|
+ * @val: Expected value at uaddr
|
||||||
|
+ * @uaddr: User address to wait on
|
||||||
|
+ * @flags: Flags for this waiter
|
||||||
|
+ * @__reserved: Reserved member to preserve data alignment. Should be 0.
|
||||||
|
+ */
|
||||||
|
+struct futex_waitv {
|
||||||
|
+ __u64 val;
|
||||||
|
+ __u64 uaddr;
|
||||||
|
+ __u32 flags;
|
||||||
|
+ __u32 __reserved;
|
||||||
|
+};
|
||||||
|
+
|
||||||
|
+
|
||||||
|
/*
|
||||||
|
* Support for robust futexes: the kernel cleans up held futexes at
|
||||||
|
* thread exit time.
|
||||||
|
diff --git a/kernel/futex.c b/kernel/futex.c
|
||||||
|
index 408cad5e8968..d7dc0bd9379c 100644
|
||||||
|
--- a/kernel/futex.c
|
||||||
|
+++ b/kernel/futex.c
|
||||||
|
@@ -227,6 +227,18 @@ static const struct futex_q futex_q_init = {
|
||||||
|
.bitset = FUTEX_BITSET_MATCH_ANY
|
||||||
|
};
|
||||||
|
|
||||||
|
+/**
|
||||||
|
+ * struct futex_vector - Auxiliary struct for futex_waitv()
|
||||||
|
+ * @w: Userspace provided data
|
||||||
|
+ * @q: Kernel side data
|
||||||
|
+ *
|
||||||
|
+ * Struct used to build an array with all data need for futex_waitv()
|
||||||
|
+ */
|
||||||
|
+struct futex_vector {
|
||||||
|
+ struct futex_waitv w;
|
||||||
|
+ struct futex_q q;
|
||||||
|
+};
|
||||||
|
+
|
||||||
|
/*
|
||||||
|
* Hash buckets are shared by all the futex_keys that hash to the same
|
||||||
|
* location. Each key may have multiple futex_q structures, one for each task
|
||||||
|
@@ -3962,6 +3974,328 @@ COMPAT_SYSCALL_DEFINE3(get_robust_list, int, pid,
|
||||||
|
}
|
||||||
|
#endif /* CONFIG_COMPAT */
|
||||||
|
|
||||||
|
+/* Mask of available flags for each futex in futex_waitv list */
|
||||||
|
+#define FUTEXV_WAITER_MASK (FUTEX_32 | FUTEX_PRIVATE_FLAG)
|
||||||
|
+
|
||||||
|
+/**
|
||||||
|
+ * futex_parse_waitv - Parse a waitv array from userspace
|
||||||
|
+ * @futexv: Kernel side list of waiters to be filled
|
||||||
|
+ * @uwaitv: Userspace list to be parsed
|
||||||
|
+ * @nr_futexes: Length of futexv
|
||||||
|
+ *
|
||||||
|
+ * Return: Error code on failure, 0 on success
|
||||||
|
+ */
|
||||||
|
+static int futex_parse_waitv(struct futex_vector *futexv,
|
||||||
|
+ struct futex_waitv __user *uwaitv,
|
||||||
|
+ unsigned int nr_futexes)
|
||||||
|
+{
|
||||||
|
+ struct futex_waitv aux;
|
||||||
|
+ unsigned int i;
|
||||||
|
+
|
||||||
|
+ for (i = 0; i < nr_futexes; i++) {
|
||||||
|
+ if (copy_from_user(&aux, &uwaitv[i], sizeof(aux)))
|
||||||
|
+ return -EFAULT;
|
||||||
|
+
|
||||||
|
+ if ((aux.flags & ~FUTEXV_WAITER_MASK) || aux.__reserved)
|
||||||
|
+ return -EINVAL;
|
||||||
|
+
|
||||||
|
+ if (!(aux.flags & FUTEX_32))
|
||||||
|
+ return -EINVAL;
|
||||||
|
+
|
||||||
|
+ futexv[i].w.flags = aux.flags;
|
||||||
|
+ futexv[i].w.val = aux.val;
|
||||||
|
+ futexv[i].w.uaddr = aux.uaddr;
|
||||||
|
+ futexv[i].q = futex_q_init;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ return 0;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+/**
|
||||||
|
+ * unqueue_multiple - Remove various futexes from their hash bucket
|
||||||
|
+ * @v: The list of futexes to unqueue
|
||||||
|
+ * @count: Number of futexes in the list
|
||||||
|
+ *
|
||||||
|
+ * Helper to unqueue a list of futexes. This can't fail.
|
||||||
|
+ *
|
||||||
|
+ * Return:
|
||||||
|
+ * - >=0 - Index of the last futex that was awoken;
|
||||||
|
+ * - -1 - No futex was awoken
|
||||||
|
+ */
|
||||||
|
+static int unqueue_multiple(struct futex_vector *v, int count)
|
||||||
|
+{
|
||||||
|
+ int ret = -1, i;
|
||||||
|
+
|
||||||
|
+ for (i = 0; i < count; i++) {
|
||||||
|
+ if (!unqueue_me(&v[i].q))
|
||||||
|
+ ret = i;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ return ret;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+/**
|
||||||
|
+ * futex_wait_multiple_setup - Prepare to wait and enqueue multiple futexes
|
||||||
|
+ * @vs: The futex list to wait on
|
||||||
|
+ * @count: The size of the list
|
||||||
|
+ * @woken: Index of the last woken futex, if any. Used to notify the
|
||||||
|
+ * caller that it can return this index to userspace (return parameter)
|
||||||
|
+ *
|
||||||
|
+ * Prepare multiple futexes in a single step and enqueue them. This may fail if
|
||||||
|
+ * the futex list is invalid or if any futex was already awoken. On success the
|
||||||
|
+ * task is ready to interruptible sleep.
|
||||||
|
+ *
|
||||||
|
+ * Return:
|
||||||
|
+ * - 1 - One of the futexes was woken by another thread
|
||||||
|
+ * - 0 - Success
|
||||||
|
+ * - <0 - -EFAULT, -EWOULDBLOCK or -EINVAL
|
||||||
|
+ */
|
||||||
|
+static int futex_wait_multiple_setup(struct futex_vector *vs, int count, int *woken)
|
||||||
|
+{
|
||||||
|
+ struct futex_hash_bucket *hb;
|
||||||
|
+ bool retry = false;
|
||||||
|
+ int ret, i;
|
||||||
|
+ u32 uval;
|
||||||
|
+
|
||||||
|
+ /*
|
||||||
|
+ * Enqueuing multiple futexes is tricky, because we need to enqueue
|
||||||
|
+ * each futex on the list before dealing with the next one to avoid
|
||||||
|
+ * deadlocking on the hash bucket. But, before enqueuing, we need to
|
||||||
|
+ * make sure that current->state is TASK_INTERRUPTIBLE, so we don't
|
||||||
|
+ * lose any wake events, which cannot be done before the get_futex_key
|
||||||
|
+ * of the next key, because it calls get_user_pages, which can sleep.
|
||||||
|
+ * Thus, we fetch the list of futexes keys in two steps, by first
|
||||||
|
+ * pinning all the memory keys in the futex key, and only then we read
|
||||||
|
+ * each key and queue the corresponding futex.
|
||||||
|
+ *
|
||||||
|
+ * Private futexes doesn't need to recalculate hash in retry, so skip
|
||||||
|
+ * get_futex_key() when retrying.
|
||||||
|
+ */
|
||||||
|
+retry:
|
||||||
|
+ for (i = 0; i < count; i++) {
|
||||||
|
+ if ((vs[i].w.flags & FUTEX_PRIVATE_FLAG) && retry)
|
||||||
|
+ continue;
|
||||||
|
+
|
||||||
|
+ ret = get_futex_key(u64_to_user_ptr(vs[i].w.uaddr),
|
||||||
|
+ !(vs[i].w.flags & FUTEX_PRIVATE_FLAG),
|
||||||
|
+ &vs[i].q.key, FUTEX_READ);
|
||||||
|
+
|
||||||
|
+ if (unlikely(ret))
|
||||||
|
+ return ret;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ set_current_state(TASK_INTERRUPTIBLE);
|
||||||
|
+
|
||||||
|
+ for (i = 0; i < count; i++) {
|
||||||
|
+ u32 __user *uaddr = (u32 __user *)(unsigned long)vs[i].w.uaddr;
|
||||||
|
+ struct futex_q *q = &vs[i].q;
|
||||||
|
+ u32 val = (u32)vs[i].w.val;
|
||||||
|
+
|
||||||
|
+ hb = queue_lock(q);
|
||||||
|
+ ret = get_futex_value_locked(&uval, uaddr);
|
||||||
|
+
|
||||||
|
+ if (!ret && uval == val) {
|
||||||
|
+ /*
|
||||||
|
+ * The bucket lock can't be held while dealing with the
|
||||||
|
+ * next futex. Queue each futex at this moment so hb can
|
||||||
|
+ * be unlocked.
|
||||||
|
+ */
|
||||||
|
+ queue_me(q, hb);
|
||||||
|
+ continue;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ queue_unlock(hb);
|
||||||
|
+ __set_current_state(TASK_RUNNING);
|
||||||
|
+
|
||||||
|
+ /*
|
||||||
|
+ * Even if something went wrong, if we find out that a futex
|
||||||
|
+ * was woken, we don't return error and return this index to
|
||||||
|
+ * userspace
|
||||||
|
+ */
|
||||||
|
+ *woken = unqueue_multiple(vs, i);
|
||||||
|
+ if (*woken >= 0)
|
||||||
|
+ return 1;
|
||||||
|
+
|
||||||
|
+ if (ret) {
|
||||||
|
+ /*
|
||||||
|
+ * If we need to handle a page fault, we need to do so
|
||||||
|
+ * without any lock and any enqueued futex (otherwise
|
||||||
|
+ * we could lose some wakeup). So we do it here, after
|
||||||
|
+ * undoing all the work done so far. In success, we
|
||||||
|
+ * retry all the work.
|
||||||
|
+ */
|
||||||
|
+ if (get_user(uval, uaddr))
|
||||||
|
+ return -EFAULT;
|
||||||
|
+
|
||||||
|
+ retry = true;
|
||||||
|
+ goto retry;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ if (uval != val)
|
||||||
|
+ return -EWOULDBLOCK;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ return 0;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+/**
|
||||||
|
+ * futex_sleep_multiple - Check sleeping conditions and sleep
|
||||||
|
+ * @vs: List of futexes to wait for
|
||||||
|
+ * @count: Length of vs
|
||||||
|
+ * @to: Timeout
|
||||||
|
+ *
|
||||||
|
+ * Sleep if and only if the timeout hasn't expired and no futex on the list has
|
||||||
|
+ * been woken up.
|
||||||
|
+ */
|
||||||
|
+static void futex_sleep_multiple(struct futex_vector *vs, unsigned int count,
|
||||||
|
+ struct hrtimer_sleeper *to)
|
||||||
|
+{
|
||||||
|
+ if (to && !to->task)
|
||||||
|
+ return;
|
||||||
|
+
|
||||||
|
+ for (; count; count--, vs++) {
|
||||||
|
+ if (!READ_ONCE(vs->q.lock_ptr))
|
||||||
|
+ return;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ freezable_schedule();
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+/**
|
||||||
|
+ * futex_wait_multiple - Prepare to wait on and enqueue several futexes
|
||||||
|
+ * @vs: The list of futexes to wait on
|
||||||
|
+ * @count: The number of objects
|
||||||
|
+ * @to: Timeout before giving up and returning to userspace
|
||||||
|
+ *
|
||||||
|
+ * Entry point for the FUTEX_WAIT_MULTIPLE futex operation, this function
|
||||||
|
+ * sleeps on a group of futexes and returns on the first futex that is
|
||||||
|
+ * wake, or after the timeout has elapsed.
|
||||||
|
+ *
|
||||||
|
+ * Return:
|
||||||
|
+ * - >=0 - Hint to the futex that was awoken
|
||||||
|
+ * - <0 - On error
|
||||||
|
+ */
|
||||||
|
+int futex_wait_multiple(struct futex_vector *vs, unsigned int count,
|
||||||
|
+ struct hrtimer_sleeper *to)
|
||||||
|
+{
|
||||||
|
+ int ret, hint = 0;
|
||||||
|
+
|
||||||
|
+ if (to)
|
||||||
|
+ hrtimer_sleeper_start_expires(to, HRTIMER_MODE_ABS);
|
||||||
|
+
|
||||||
|
+ while (1) {
|
||||||
|
+ ret = futex_wait_multiple_setup(vs, count, &hint);
|
||||||
|
+ if (ret) {
|
||||||
|
+ if (ret > 0) {
|
||||||
|
+ /* A futex was woken during setup */
|
||||||
|
+ ret = hint;
|
||||||
|
+ }
|
||||||
|
+ return ret;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ futex_sleep_multiple(vs, count, to);
|
||||||
|
+
|
||||||
|
+ __set_current_state(TASK_RUNNING);
|
||||||
|
+
|
||||||
|
+ ret = unqueue_multiple(vs, count);
|
||||||
|
+ if (ret >= 0)
|
||||||
|
+ return ret;
|
||||||
|
+
|
||||||
|
+ if (to && !to->task)
|
||||||
|
+ return -ETIMEDOUT;
|
||||||
|
+ else if (signal_pending(current))
|
||||||
|
+ return -ERESTARTSYS;
|
||||||
|
+ /*
|
||||||
|
+ * The final case is a spurious wakeup, for
|
||||||
|
+ * which just retry.
|
||||||
|
+ */
|
||||||
|
+ }
|
||||||
|
+}
|
||||||
|
+/* Mask of available flags for each futex in futex_waitv list */
|
||||||
|
+#define FUTEXV_WAITER_MASK (FUTEX_32 | FUTEX_PRIVATE_FLAG)
|
||||||
|
+
|
||||||
|
+/**
|
||||||
|
+ * sys_futex_waitv - Wait on a list of futexes
|
||||||
|
+ * @waiters: List of futexes to wait on
|
||||||
|
+ * @nr_futexes: Length of futexv
|
||||||
|
+ * @flags: Flag for timeout (monotonic/realtime)
|
||||||
|
+ * @timeout: Optional absolute timeout.
|
||||||
|
+ * @clockid: Clock to be used for the timeout, realtime or monotonic.
|
||||||
|
+ *
|
||||||
|
+ * Given an array of `struct futex_waitv`, wait on each uaddr. The thread wakes
|
||||||
|
+ * if a futex_wake() is performed at any uaddr. The syscall returns immediately
|
||||||
|
+ * if any waiter has *uaddr != val. *timeout is an optional timeout value for
|
||||||
|
+ * the operation. Each waiter has individual flags. The `flags` argument for
|
||||||
|
+ * the syscall should be used solely for specifying the timeout as realtime, if
|
||||||
|
+ * needed. Flags for private futexes, sizes, etc. should be used on the
|
||||||
|
+ * individual flags of each waiter.
|
||||||
|
+ *
|
||||||
|
+ * Returns the array index of one of the woken futexes. No further information
|
||||||
|
+ * is provided: any number of other futexes may also have been woken by the
|
||||||
|
+ * same event, and if more than one futex was woken, the retrned index may
|
||||||
|
+ * refer to any one of them. (It is not necessaryily the futex with the
|
||||||
|
+ * smallest index, nor the one most recently woken, nor...)
|
||||||
|
+ */
|
||||||
|
+
|
||||||
|
+SYSCALL_DEFINE5(futex_waitv, struct futex_waitv __user *, waiters,
|
||||||
|
+ unsigned int, nr_futexes, unsigned int, flags,
|
||||||
|
+ struct __kernel_timespec __user *, timeout, clockid_t, clockid)
|
||||||
|
+{
|
||||||
|
+ struct hrtimer_sleeper to;
|
||||||
|
+ struct futex_vector *futexv;
|
||||||
|
+ struct timespec64 ts;
|
||||||
|
+ ktime_t time;
|
||||||
|
+ int ret;
|
||||||
|
+
|
||||||
|
+ /* This syscall supports no flags for now */
|
||||||
|
+ if (flags)
|
||||||
|
+ return -EINVAL;
|
||||||
|
+
|
||||||
|
+ if (!nr_futexes || nr_futexes > FUTEX_WAITV_MAX || !waiters)
|
||||||
|
+ return -EINVAL;
|
||||||
|
+
|
||||||
|
+ if (timeout) {
|
||||||
|
+ int flag_clkid = 0, flag_init = 0;
|
||||||
|
+
|
||||||
|
+ if (clockid == CLOCK_REALTIME) {
|
||||||
|
+ flag_clkid = FLAGS_CLOCKRT;
|
||||||
|
+ flag_init = FUTEX_CLOCK_REALTIME;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ if (clockid != CLOCK_REALTIME && clockid != CLOCK_MONOTONIC)
|
||||||
|
+ return -EINVAL;
|
||||||
|
+
|
||||||
|
+ if (get_timespec64(&ts, timeout))
|
||||||
|
+ return -EFAULT;
|
||||||
|
+
|
||||||
|
+ /*
|
||||||
|
+ * Since there's no opcode for futex_waitv, use
|
||||||
|
+ * FUTEX_WAIT_BITSET that uses absolute timeout as well
|
||||||
|
+ */
|
||||||
|
+ ret = futex_init_timeout(FUTEX_WAIT_BITSET, flag_init, &ts, &time);
|
||||||
|
+ if (ret)
|
||||||
|
+ return ret;
|
||||||
|
+
|
||||||
|
+ futex_setup_timer(&time, &to, flag_clkid, 0);
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ futexv = kcalloc(nr_futexes, sizeof(*futexv), GFP_KERNEL);
|
||||||
|
+ if (!futexv)
|
||||||
|
+ return -ENOMEM;
|
||||||
|
+
|
||||||
|
+ ret = futex_parse_waitv(futexv, waiters, nr_futexes);
|
||||||
|
+ if (!ret)
|
||||||
|
+ ret = futex_wait_multiple(futexv, nr_futexes, timeout ? &to : NULL);
|
||||||
|
+
|
||||||
|
+ if (timeout) {
|
||||||
|
+ hrtimer_cancel(&to.timer);
|
||||||
|
+ destroy_hrtimer_on_stack(&to.timer);
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ kfree(futexv);
|
||||||
|
+ return ret;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
#ifdef CONFIG_COMPAT_32BIT_TIME
|
||||||
|
SYSCALL_DEFINE6(futex_time32, u32 __user *, uaddr, int, op, u32, val,
|
||||||
|
const struct old_timespec32 __user *, utime, u32 __user *, uaddr2,
|
||||||
|
diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c
|
||||||
|
index 0ea8128468c3..0979fac9414d 100644
|
||||||
|
--- a/kernel/sys_ni.c
|
||||||
|
+++ b/kernel/sys_ni.c
|
||||||
|
@@ -150,6 +150,7 @@ COND_SYSCALL(set_robust_list);
|
||||||
|
COND_SYSCALL_COMPAT(set_robust_list);
|
||||||
|
COND_SYSCALL(get_robust_list);
|
||||||
|
COND_SYSCALL_COMPAT(get_robust_list);
|
||||||
|
+COND_SYSCALL(futex_waitv);
|
||||||
|
|
||||||
|
/* kernel/hrtimer.c */
|
||||||
|
|
||||||
|
--
|
||||||
|
2.33.1
|
||||||
|
|
||||||
|
From 4e40f3886e134f33c50ca79bc8b323cea784bd78 Mon Sep 17 00:00:00 2001
|
||||||
|
From: =?UTF-8?q?Andr=C3=A9=20Almeida?= <andrealmeid@collabora.com>
|
||||||
|
Date: Thu, 23 Sep 2021 14:11:06 -0300
|
||||||
|
Subject: [PATCH 2/2] futex,x86: Wire up sys_futex_waitv()
|
||||||
|
MIME-Version: 1.0
|
||||||
|
Content-Type: text/plain; charset=UTF-8
|
||||||
|
Content-Transfer-Encoding: 8bit
|
||||||
|
|
||||||
|
Wire up syscall entry point for x86 arch, for both i386 and x86_64.
|
||||||
|
|
||||||
|
Signed-off-by: André Almeida <andrealmeid@collabora.com>
|
||||||
|
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
|
||||||
|
Link: https://lore.kernel.org/r/20210923171111.300673-18-andrealmeid@collabora.com
|
||||||
|
---
|
||||||
|
arch/x86/entry/syscalls/syscall_32.tbl | 1 +
|
||||||
|
arch/x86/entry/syscalls/syscall_64.tbl | 1 +
|
||||||
|
2 files changed, 2 insertions(+)
|
||||||
|
|
||||||
|
diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl
|
||||||
|
index 4bbc267fb36b..b2b9b9df1355 100644
|
||||||
|
--- a/arch/x86/entry/syscalls/syscall_32.tbl
|
||||||
|
+++ b/arch/x86/entry/syscalls/syscall_32.tbl
|
||||||
|
@@ -452,3 +452,4 @@
|
||||||
|
445 i386 landlock_add_rule sys_landlock_add_rule
|
||||||
|
446 i386 landlock_restrict_self sys_landlock_restrict_self
|
||||||
|
447 i386 memfd_secret sys_memfd_secret
|
||||||
|
+449 i386 futex_waitv sys_futex_waitv
|
||||||
|
diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl
|
||||||
|
index ce18119ea0d0..bfd4e8f5be34 100644
|
||||||
|
--- a/arch/x86/entry/syscalls/syscall_64.tbl
|
||||||
|
+++ b/arch/x86/entry/syscalls/syscall_64.tbl
|
||||||
|
@@ -369,6 +369,7 @@
|
||||||
|
445 common landlock_add_rule sys_landlock_add_rule
|
||||||
|
446 common landlock_restrict_self sys_landlock_restrict_self
|
||||||
|
447 common memfd_secret sys_memfd_secret
|
||||||
|
+449 common futex_waitv sys_futex_waitv
|
||||||
|
|
||||||
|
#
|
||||||
|
# Due to a historical design error, certain syscalls are numbered differently
|
||||||
|
--
|
||||||
|
2.33.1
|
||||||
|
|
536
linux-tkg-patches/5.15/0007-v5.15-futex_waitv.patch
Normal file
536
linux-tkg-patches/5.15/0007-v5.15-futex_waitv.patch
Normal file
@@ -0,0 +1,536 @@
|
|||||||
|
From 4901e29e3c0237c52eadd2c82deb9bd6e7add5ac Mon Sep 17 00:00:00 2001
|
||||||
|
From: =?UTF-8?q?Andr=C3=A9=20Almeida?= <andrealmeid@collabora.com>
|
||||||
|
Date: Thu, 23 Sep 2021 14:11:05 -0300
|
||||||
|
Subject: [PATCH 1/2] futex: Implement sys_futex_waitv()
|
||||||
|
MIME-Version: 1.0
|
||||||
|
Content-Type: text/plain; charset=UTF-8
|
||||||
|
Content-Transfer-Encoding: 8bit
|
||||||
|
|
||||||
|
Add support to wait on multiple futexes. This is the interface
|
||||||
|
implemented by this syscall:
|
||||||
|
|
||||||
|
futex_waitv(struct futex_waitv *waiters, unsigned int nr_futexes,
|
||||||
|
unsigned int flags, struct timespec *timeout, clockid_t clockid)
|
||||||
|
|
||||||
|
struct futex_waitv {
|
||||||
|
__u64 val;
|
||||||
|
__u64 uaddr;
|
||||||
|
__u32 flags;
|
||||||
|
__u32 __reserved;
|
||||||
|
};
|
||||||
|
|
||||||
|
Given an array of struct futex_waitv, wait on each uaddr. The thread
|
||||||
|
wakes if a futex_wake() is performed at any uaddr. The syscall returns
|
||||||
|
immediately if any waiter has *uaddr != val. *timeout is an optional
|
||||||
|
absolute timeout value for the operation. This syscall supports only
|
||||||
|
64bit sized timeout structs. The flags argument of the syscall should be
|
||||||
|
empty, but it can be used for future extensions. Flags for shared
|
||||||
|
futexes, sizes, etc. should be used on the individual flags of each
|
||||||
|
waiter.
|
||||||
|
|
||||||
|
__reserved is used for explicit padding and should be 0, but it might be
|
||||||
|
used for future extensions. If the userspace uses 32-bit pointers, it
|
||||||
|
should make sure to explicitly cast it when assigning to waitv::uaddr.
|
||||||
|
|
||||||
|
Returns the array index of one of the woken futexes. There’s no given
|
||||||
|
information of how many were woken, or any particular attribute of it
|
||||||
|
(if it’s the first woken, if it is of the smaller index...).
|
||||||
|
|
||||||
|
Signed-off-by: André Almeida <andrealmeid@collabora.com>
|
||||||
|
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
|
||||||
|
Link: https://lore.kernel.org/r/20210923171111.300673-17-andrealmeid@collabora.com
|
||||||
|
---
|
||||||
|
include/linux/syscalls.h | 6 +
|
||||||
|
include/uapi/asm-generic/unistd.h | 5 +-
|
||||||
|
include/uapi/linux/futex.h | 26 +++
|
||||||
|
kernel/futex.c | 334 ++++++++++++++++++++++++++++++
|
||||||
|
kernel/sys_ni.c | 1 +
|
||||||
|
5 files changed, 371 insertions(+), 1 deletion(-)
|
||||||
|
|
||||||
|
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
|
||||||
|
index 050511e8f1f8..8390482cf082 100644
|
||||||
|
--- a/include/linux/syscalls.h
|
||||||
|
+++ b/include/linux/syscalls.h
|
||||||
|
@@ -58,6 +58,7 @@ struct mq_attr;
|
||||||
|
struct compat_stat;
|
||||||
|
struct old_timeval32;
|
||||||
|
struct robust_list_head;
|
||||||
|
+struct futex_waitv;
|
||||||
|
struct getcpu_cache;
|
||||||
|
struct old_linux_dirent;
|
||||||
|
struct perf_event_attr;
|
||||||
|
@@ -623,6 +624,11 @@ asmlinkage long sys_get_robust_list(int pid,
|
||||||
|
asmlinkage long sys_set_robust_list(struct robust_list_head __user *head,
|
||||||
|
size_t len);
|
||||||
|
|
||||||
|
+asmlinkage long sys_futex_waitv(struct futex_waitv *waiters,
|
||||||
|
+ unsigned int nr_futexes, unsigned int flags,
|
||||||
|
+ struct __kernel_timespec __user *timeout, clockid_t clockid);
|
||||||
|
+
|
||||||
|
+
|
||||||
|
/* kernel/hrtimer.c */
|
||||||
|
asmlinkage long sys_nanosleep(struct __kernel_timespec __user *rqtp,
|
||||||
|
struct __kernel_timespec __user *rmtp);
|
||||||
|
diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h
|
||||||
|
index d2a942086fcb..3f55ac23cea9 100644
|
||||||
|
--- a/include/uapi/asm-generic/unistd.h
|
||||||
|
+++ b/include/uapi/asm-generic/unistd.h
|
||||||
|
@@ -880,8 +880,11 @@ __SYSCALL(__NR_landlock_add_rule, sys_landlock_add_rule)
|
||||||
|
#define __NR_process_mrelease 448
|
||||||
|
__SYSCALL(__NR_process_mrelease, sys_process_mrelease)
|
||||||
|
|
||||||
|
+#define __NR_futex_waitv 449
|
||||||
|
+__SYSCALL(__NR_futex_waitv, sys_futex_waitv)
|
||||||
|
+
|
||||||
|
#undef __NR_syscalls
|
||||||
|
-#define __NR_syscalls 449
|
||||||
|
+#define __NR_syscalls 450
|
||||||
|
|
||||||
|
/*
|
||||||
|
* 32 bit systems traditionally used different
|
||||||
|
diff --git a/include/uapi/linux/futex.h b/include/uapi/linux/futex.h
|
||||||
|
index a89eb0accd5e..1666f5e4b837 100644
|
||||||
|
--- a/include/uapi/linux/futex.h
|
||||||
|
+++ b/include/uapi/linux/futex.h
|
||||||
|
@@ -41,6 +41,32 @@
|
||||||
|
#define FUTEX_CMP_REQUEUE_PI_PRIVATE (FUTEX_CMP_REQUEUE_PI | \
|
||||||
|
FUTEX_PRIVATE_FLAG)
|
||||||
|
|
||||||
|
+ /*
|
||||||
|
+ * Flags to specify the bit length of the futex word for futex2 syscalls.
|
||||||
|
+ * Currently, only 32 is supported.
|
||||||
|
+ */
|
||||||
|
+#define FUTEX_32 2
|
||||||
|
+
|
||||||
|
+/*
|
||||||
|
+ * Max numbers of elements in a futex_waitv array
|
||||||
|
+ */
|
||||||
|
+#define FUTEX_WAITV_MAX 128
|
||||||
|
+
|
||||||
|
+/**
|
||||||
|
+ * struct futex_waitv - A waiter for vectorized wait
|
||||||
|
+ * @val: Expected value at uaddr
|
||||||
|
+ * @uaddr: User address to wait on
|
||||||
|
+ * @flags: Flags for this waiter
|
||||||
|
+ * @__reserved: Reserved member to preserve data alignment. Should be 0.
|
||||||
|
+ */
|
||||||
|
+struct futex_waitv {
|
||||||
|
+ __u64 val;
|
||||||
|
+ __u64 uaddr;
|
||||||
|
+ __u32 flags;
|
||||||
|
+ __u32 __reserved;
|
||||||
|
+};
|
||||||
|
+
|
||||||
|
+
|
||||||
|
/*
|
||||||
|
* Support for robust futexes: the kernel cleans up held futexes at
|
||||||
|
* thread exit time.
|
||||||
|
diff --git a/kernel/futex.c b/kernel/futex.c
|
||||||
|
index 408cad5e8968..d7dc0bd9379c 100644
|
||||||
|
--- a/kernel/futex.c
|
||||||
|
+++ b/kernel/futex.c
|
||||||
|
@@ -285,6 +285,18 @@ static const struct futex_q futex_q_init = {
|
||||||
|
.requeue_state = ATOMIC_INIT(Q_REQUEUE_PI_NONE),
|
||||||
|
};
|
||||||
|
|
||||||
|
+/**
|
||||||
|
+ * struct futex_vector - Auxiliary struct for futex_waitv()
|
||||||
|
+ * @w: Userspace provided data
|
||||||
|
+ * @q: Kernel side data
|
||||||
|
+ *
|
||||||
|
+ * Struct used to build an array with all data need for futex_waitv()
|
||||||
|
+ */
|
||||||
|
+struct futex_vector {
|
||||||
|
+ struct futex_waitv w;
|
||||||
|
+ struct futex_q q;
|
||||||
|
+};
|
||||||
|
+
|
||||||
|
/*
|
||||||
|
* Hash buckets are shared by all the futex_keys that hash to the same
|
||||||
|
* location. Each key may have multiple futex_q structures, one for each task
|
||||||
|
@@ -3962,6 +3974,328 @@ COMPAT_SYSCALL_DEFINE3(get_robust_list, int, pid,
|
||||||
|
}
|
||||||
|
#endif /* CONFIG_COMPAT */
|
||||||
|
|
||||||
|
+/* Mask of available flags for each futex in futex_waitv list */
|
||||||
|
+#define FUTEXV_WAITER_MASK (FUTEX_32 | FUTEX_PRIVATE_FLAG)
|
||||||
|
+
|
||||||
|
+/**
|
||||||
|
+ * futex_parse_waitv - Parse a waitv array from userspace
|
||||||
|
+ * @futexv: Kernel side list of waiters to be filled
|
||||||
|
+ * @uwaitv: Userspace list to be parsed
|
||||||
|
+ * @nr_futexes: Length of futexv
|
||||||
|
+ *
|
||||||
|
+ * Return: Error code on failure, 0 on success
|
||||||
|
+ */
|
||||||
|
+static int futex_parse_waitv(struct futex_vector *futexv,
|
||||||
|
+ struct futex_waitv __user *uwaitv,
|
||||||
|
+ unsigned int nr_futexes)
|
||||||
|
+{
|
||||||
|
+ struct futex_waitv aux;
|
||||||
|
+ unsigned int i;
|
||||||
|
+
|
||||||
|
+ for (i = 0; i < nr_futexes; i++) {
|
||||||
|
+ if (copy_from_user(&aux, &uwaitv[i], sizeof(aux)))
|
||||||
|
+ return -EFAULT;
|
||||||
|
+
|
||||||
|
+ if ((aux.flags & ~FUTEXV_WAITER_MASK) || aux.__reserved)
|
||||||
|
+ return -EINVAL;
|
||||||
|
+
|
||||||
|
+ if (!(aux.flags & FUTEX_32))
|
||||||
|
+ return -EINVAL;
|
||||||
|
+
|
||||||
|
+ futexv[i].w.flags = aux.flags;
|
||||||
|
+ futexv[i].w.val = aux.val;
|
||||||
|
+ futexv[i].w.uaddr = aux.uaddr;
|
||||||
|
+ futexv[i].q = futex_q_init;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ return 0;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+/**
|
||||||
|
+ * unqueue_multiple - Remove various futexes from their hash bucket
|
||||||
|
+ * @v: The list of futexes to unqueue
|
||||||
|
+ * @count: Number of futexes in the list
|
||||||
|
+ *
|
||||||
|
+ * Helper to unqueue a list of futexes. This can't fail.
|
||||||
|
+ *
|
||||||
|
+ * Return:
|
||||||
|
+ * - >=0 - Index of the last futex that was awoken;
|
||||||
|
+ * - -1 - No futex was awoken
|
||||||
|
+ */
|
||||||
|
+static int unqueue_multiple(struct futex_vector *v, int count)
|
||||||
|
+{
|
||||||
|
+ int ret = -1, i;
|
||||||
|
+
|
||||||
|
+ for (i = 0; i < count; i++) {
|
||||||
|
+ if (!unqueue_me(&v[i].q))
|
||||||
|
+ ret = i;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ return ret;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+/**
|
||||||
|
+ * futex_wait_multiple_setup - Prepare to wait and enqueue multiple futexes
|
||||||
|
+ * @vs: The futex list to wait on
|
||||||
|
+ * @count: The size of the list
|
||||||
|
+ * @woken: Index of the last woken futex, if any. Used to notify the
|
||||||
|
+ * caller that it can return this index to userspace (return parameter)
|
||||||
|
+ *
|
||||||
|
+ * Prepare multiple futexes in a single step and enqueue them. This may fail if
|
||||||
|
+ * the futex list is invalid or if any futex was already awoken. On success the
|
||||||
|
+ * task is ready to interruptible sleep.
|
||||||
|
+ *
|
||||||
|
+ * Return:
|
||||||
|
+ * - 1 - One of the futexes was woken by another thread
|
||||||
|
+ * - 0 - Success
|
||||||
|
+ * - <0 - -EFAULT, -EWOULDBLOCK or -EINVAL
|
||||||
|
+ */
|
||||||
|
+static int futex_wait_multiple_setup(struct futex_vector *vs, int count, int *woken)
|
||||||
|
+{
|
||||||
|
+ struct futex_hash_bucket *hb;
|
||||||
|
+ bool retry = false;
|
||||||
|
+ int ret, i;
|
||||||
|
+ u32 uval;
|
||||||
|
+
|
||||||
|
+ /*
|
||||||
|
+ * Enqueuing multiple futexes is tricky, because we need to enqueue
|
||||||
|
+ * each futex on the list before dealing with the next one to avoid
|
||||||
|
+ * deadlocking on the hash bucket. But, before enqueuing, we need to
|
||||||
|
+ * make sure that current->state is TASK_INTERRUPTIBLE, so we don't
|
||||||
|
+ * lose any wake events, which cannot be done before the get_futex_key
|
||||||
|
+ * of the next key, because it calls get_user_pages, which can sleep.
|
||||||
|
+ * Thus, we fetch the list of futexes keys in two steps, by first
|
||||||
|
+ * pinning all the memory keys in the futex key, and only then we read
|
||||||
|
+ * each key and queue the corresponding futex.
|
||||||
|
+ *
|
||||||
|
+ * Private futexes doesn't need to recalculate hash in retry, so skip
|
||||||
|
+ * get_futex_key() when retrying.
|
||||||
|
+ */
|
||||||
|
+retry:
|
||||||
|
+ for (i = 0; i < count; i++) {
|
||||||
|
+ if ((vs[i].w.flags & FUTEX_PRIVATE_FLAG) && retry)
|
||||||
|
+ continue;
|
||||||
|
+
|
||||||
|
+ ret = get_futex_key(u64_to_user_ptr(vs[i].w.uaddr),
|
||||||
|
+ !(vs[i].w.flags & FUTEX_PRIVATE_FLAG),
|
||||||
|
+ &vs[i].q.key, FUTEX_READ);
|
||||||
|
+
|
||||||
|
+ if (unlikely(ret))
|
||||||
|
+ return ret;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ set_current_state(TASK_INTERRUPTIBLE);
|
||||||
|
+
|
||||||
|
+ for (i = 0; i < count; i++) {
|
||||||
|
+ u32 __user *uaddr = (u32 __user *)(unsigned long)vs[i].w.uaddr;
|
||||||
|
+ struct futex_q *q = &vs[i].q;
|
||||||
|
+ u32 val = (u32)vs[i].w.val;
|
||||||
|
+
|
||||||
|
+ hb = queue_lock(q);
|
||||||
|
+ ret = get_futex_value_locked(&uval, uaddr);
|
||||||
|
+
|
||||||
|
+ if (!ret && uval == val) {
|
||||||
|
+ /*
|
||||||
|
+ * The bucket lock can't be held while dealing with the
|
||||||
|
+ * next futex. Queue each futex at this moment so hb can
|
||||||
|
+ * be unlocked.
|
||||||
|
+ */
|
||||||
|
+ queue_me(q, hb);
|
||||||
|
+ continue;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ queue_unlock(hb);
|
||||||
|
+ __set_current_state(TASK_RUNNING);
|
||||||
|
+
|
||||||
|
+ /*
|
||||||
|
+ * Even if something went wrong, if we find out that a futex
|
||||||
|
+ * was woken, we don't return error and return this index to
|
||||||
|
+ * userspace
|
||||||
|
+ */
|
||||||
|
+ *woken = unqueue_multiple(vs, i);
|
||||||
|
+ if (*woken >= 0)
|
||||||
|
+ return 1;
|
||||||
|
+
|
||||||
|
+ if (ret) {
|
||||||
|
+ /*
|
||||||
|
+ * If we need to handle a page fault, we need to do so
|
||||||
|
+ * without any lock and any enqueued futex (otherwise
|
||||||
|
+ * we could lose some wakeup). So we do it here, after
|
||||||
|
+ * undoing all the work done so far. In success, we
|
||||||
|
+ * retry all the work.
|
||||||
|
+ */
|
||||||
|
+ if (get_user(uval, uaddr))
|
||||||
|
+ return -EFAULT;
|
||||||
|
+
|
||||||
|
+ retry = true;
|
||||||
|
+ goto retry;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ if (uval != val)
|
||||||
|
+ return -EWOULDBLOCK;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ return 0;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+/**
|
||||||
|
+ * futex_sleep_multiple - Check sleeping conditions and sleep
|
||||||
|
+ * @vs: List of futexes to wait for
|
||||||
|
+ * @count: Length of vs
|
||||||
|
+ * @to: Timeout
|
||||||
|
+ *
|
||||||
|
+ * Sleep if and only if the timeout hasn't expired and no futex on the list has
|
||||||
|
+ * been woken up.
|
||||||
|
+ */
|
||||||
|
+static void futex_sleep_multiple(struct futex_vector *vs, unsigned int count,
|
||||||
|
+ struct hrtimer_sleeper *to)
|
||||||
|
+{
|
||||||
|
+ if (to && !to->task)
|
||||||
|
+ return;
|
||||||
|
+
|
||||||
|
+ for (; count; count--, vs++) {
|
||||||
|
+ if (!READ_ONCE(vs->q.lock_ptr))
|
||||||
|
+ return;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ freezable_schedule();
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+/**
|
||||||
|
+ * futex_wait_multiple - Prepare to wait on and enqueue several futexes
|
||||||
|
+ * @vs: The list of futexes to wait on
|
||||||
|
+ * @count: The number of objects
|
||||||
|
+ * @to: Timeout before giving up and returning to userspace
|
||||||
|
+ *
|
||||||
|
+ * Entry point for the FUTEX_WAIT_MULTIPLE futex operation, this function
|
||||||
|
+ * sleeps on a group of futexes and returns on the first futex that is
|
||||||
|
+ * wake, or after the timeout has elapsed.
|
||||||
|
+ *
|
||||||
|
+ * Return:
|
||||||
|
+ * - >=0 - Hint to the futex that was awoken
|
||||||
|
+ * - <0 - On error
|
||||||
|
+ */
|
||||||
|
+int futex_wait_multiple(struct futex_vector *vs, unsigned int count,
|
||||||
|
+ struct hrtimer_sleeper *to)
|
||||||
|
+{
|
||||||
|
+ int ret, hint = 0;
|
||||||
|
+
|
||||||
|
+ if (to)
|
||||||
|
+ hrtimer_sleeper_start_expires(to, HRTIMER_MODE_ABS);
|
||||||
|
+
|
||||||
|
+ while (1) {
|
||||||
|
+ ret = futex_wait_multiple_setup(vs, count, &hint);
|
||||||
|
+ if (ret) {
|
||||||
|
+ if (ret > 0) {
|
||||||
|
+ /* A futex was woken during setup */
|
||||||
|
+ ret = hint;
|
||||||
|
+ }
|
||||||
|
+ return ret;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ futex_sleep_multiple(vs, count, to);
|
||||||
|
+
|
||||||
|
+ __set_current_state(TASK_RUNNING);
|
||||||
|
+
|
||||||
|
+ ret = unqueue_multiple(vs, count);
|
||||||
|
+ if (ret >= 0)
|
||||||
|
+ return ret;
|
||||||
|
+
|
||||||
|
+ if (to && !to->task)
|
||||||
|
+ return -ETIMEDOUT;
|
||||||
|
+ else if (signal_pending(current))
|
||||||
|
+ return -ERESTARTSYS;
|
||||||
|
+ /*
|
||||||
|
+ * The final case is a spurious wakeup, for
|
||||||
|
+ * which just retry.
|
||||||
|
+ */
|
||||||
|
+ }
|
||||||
|
+}
|
||||||
|
+/* Mask of available flags for each futex in futex_waitv list */
|
||||||
|
+#define FUTEXV_WAITER_MASK (FUTEX_32 | FUTEX_PRIVATE_FLAG)
|
||||||
|
+
|
||||||
|
+/**
|
||||||
|
+ * sys_futex_waitv - Wait on a list of futexes
|
||||||
|
+ * @waiters: List of futexes to wait on
|
||||||
|
+ * @nr_futexes: Length of futexv
|
||||||
|
+ * @flags: Flag for timeout (monotonic/realtime)
|
||||||
|
+ * @timeout: Optional absolute timeout.
|
||||||
|
+ * @clockid: Clock to be used for the timeout, realtime or monotonic.
|
||||||
|
+ *
|
||||||
|
+ * Given an array of `struct futex_waitv`, wait on each uaddr. The thread wakes
|
||||||
|
+ * if a futex_wake() is performed at any uaddr. The syscall returns immediately
|
||||||
|
+ * if any waiter has *uaddr != val. *timeout is an optional timeout value for
|
||||||
|
+ * the operation. Each waiter has individual flags. The `flags` argument for
|
||||||
|
+ * the syscall should be used solely for specifying the timeout as realtime, if
|
||||||
|
+ * needed. Flags for private futexes, sizes, etc. should be used on the
|
||||||
|
+ * individual flags of each waiter.
|
||||||
|
+ *
|
||||||
|
+ * Returns the array index of one of the woken futexes. No further information
|
||||||
|
+ * is provided: any number of other futexes may also have been woken by the
|
||||||
|
+ * same event, and if more than one futex was woken, the retrned index may
|
||||||
|
+ * refer to any one of them. (It is not necessaryily the futex with the
|
||||||
|
+ * smallest index, nor the one most recently woken, nor...)
|
||||||
|
+ */
|
||||||
|
+
|
||||||
|
+SYSCALL_DEFINE5(futex_waitv, struct futex_waitv __user *, waiters,
|
||||||
|
+ unsigned int, nr_futexes, unsigned int, flags,
|
||||||
|
+ struct __kernel_timespec __user *, timeout, clockid_t, clockid)
|
||||||
|
+{
|
||||||
|
+ struct hrtimer_sleeper to;
|
||||||
|
+ struct futex_vector *futexv;
|
||||||
|
+ struct timespec64 ts;
|
||||||
|
+ ktime_t time;
|
||||||
|
+ int ret;
|
||||||
|
+
|
||||||
|
+ /* This syscall supports no flags for now */
|
||||||
|
+ if (flags)
|
||||||
|
+ return -EINVAL;
|
||||||
|
+
|
||||||
|
+ if (!nr_futexes || nr_futexes > FUTEX_WAITV_MAX || !waiters)
|
||||||
|
+ return -EINVAL;
|
||||||
|
+
|
||||||
|
+ if (timeout) {
|
||||||
|
+ int flag_clkid = 0, flag_init = 0;
|
||||||
|
+
|
||||||
|
+ if (clockid == CLOCK_REALTIME) {
|
||||||
|
+ flag_clkid = FLAGS_CLOCKRT;
|
||||||
|
+ flag_init = FUTEX_CLOCK_REALTIME;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ if (clockid != CLOCK_REALTIME && clockid != CLOCK_MONOTONIC)
|
||||||
|
+ return -EINVAL;
|
||||||
|
+
|
||||||
|
+ if (get_timespec64(&ts, timeout))
|
||||||
|
+ return -EFAULT;
|
||||||
|
+
|
||||||
|
+ /*
|
||||||
|
+ * Since there's no opcode for futex_waitv, use
|
||||||
|
+ * FUTEX_WAIT_BITSET that uses absolute timeout as well
|
||||||
|
+ */
|
||||||
|
+ ret = futex_init_timeout(FUTEX_WAIT_BITSET, flag_init, &ts, &time);
|
||||||
|
+ if (ret)
|
||||||
|
+ return ret;
|
||||||
|
+
|
||||||
|
+ futex_setup_timer(&time, &to, flag_clkid, 0);
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ futexv = kcalloc(nr_futexes, sizeof(*futexv), GFP_KERNEL);
|
||||||
|
+ if (!futexv)
|
||||||
|
+ return -ENOMEM;
|
||||||
|
+
|
||||||
|
+ ret = futex_parse_waitv(futexv, waiters, nr_futexes);
|
||||||
|
+ if (!ret)
|
||||||
|
+ ret = futex_wait_multiple(futexv, nr_futexes, timeout ? &to : NULL);
|
||||||
|
+
|
||||||
|
+ if (timeout) {
|
||||||
|
+ hrtimer_cancel(&to.timer);
|
||||||
|
+ destroy_hrtimer_on_stack(&to.timer);
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ kfree(futexv);
|
||||||
|
+ return ret;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
#ifdef CONFIG_COMPAT_32BIT_TIME
|
||||||
|
SYSCALL_DEFINE6(futex_time32, u32 __user *, uaddr, int, op, u32, val,
|
||||||
|
const struct old_timespec32 __user *, utime, u32 __user *, uaddr2,
|
||||||
|
diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c
|
||||||
|
index 0ea8128468c3..0979fac9414d 100644
|
||||||
|
--- a/kernel/sys_ni.c
|
||||||
|
+++ b/kernel/sys_ni.c
|
||||||
|
@@ -150,6 +150,7 @@ COND_SYSCALL(set_robust_list);
|
||||||
|
COND_SYSCALL_COMPAT(set_robust_list);
|
||||||
|
COND_SYSCALL(get_robust_list);
|
||||||
|
COND_SYSCALL_COMPAT(get_robust_list);
|
||||||
|
+COND_SYSCALL(futex_waitv);
|
||||||
|
|
||||||
|
/* kernel/hrtimer.c */
|
||||||
|
|
||||||
|
--
|
||||||
|
2.33.1
|
||||||
|
|
||||||
|
From 4e40f3886e134f33c50ca79bc8b323cea784bd78 Mon Sep 17 00:00:00 2001
|
||||||
|
From: =?UTF-8?q?Andr=C3=A9=20Almeida?= <andrealmeid@collabora.com>
|
||||||
|
Date: Thu, 23 Sep 2021 14:11:06 -0300
|
||||||
|
Subject: [PATCH 2/2] futex,x86: Wire up sys_futex_waitv()
|
||||||
|
MIME-Version: 1.0
|
||||||
|
Content-Type: text/plain; charset=UTF-8
|
||||||
|
Content-Transfer-Encoding: 8bit
|
||||||
|
|
||||||
|
Wire up syscall entry point for x86 arch, for both i386 and x86_64.
|
||||||
|
|
||||||
|
Signed-off-by: André Almeida <andrealmeid@collabora.com>
|
||||||
|
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
|
||||||
|
Link: https://lore.kernel.org/r/20210923171111.300673-18-andrealmeid@collabora.com
|
||||||
|
---
|
||||||
|
arch/x86/entry/syscalls/syscall_32.tbl | 1 +
|
||||||
|
arch/x86/entry/syscalls/syscall_64.tbl | 1 +
|
||||||
|
2 files changed, 2 insertions(+)
|
||||||
|
|
||||||
|
diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl
|
||||||
|
index 4bbc267fb36b..b2b9b9df1355 100644
|
||||||
|
--- a/arch/x86/entry/syscalls/syscall_32.tbl
|
||||||
|
+++ b/arch/x86/entry/syscalls/syscall_32.tbl
|
||||||
|
@@ -453,3 +453,4 @@
|
||||||
|
446 i386 landlock_restrict_self sys_landlock_restrict_self
|
||||||
|
447 i386 memfd_secret sys_memfd_secret
|
||||||
|
448 i386 process_mrelease sys_process_mrelease
|
||||||
|
+449 i386 futex_waitv sys_futex_waitv
|
||||||
|
diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl
|
||||||
|
index ce18119ea0d0..bfd4e8f5be34 100644
|
||||||
|
--- a/arch/x86/entry/syscalls/syscall_64.tbl
|
||||||
|
+++ b/arch/x86/entry/syscalls/syscall_64.tbl
|
||||||
|
@@ -370,6 +370,7 @@
|
||||||
|
446 common landlock_restrict_self sys_landlock_restrict_self
|
||||||
|
447 common memfd_secret sys_memfd_secret
|
||||||
|
448 common process_mrelease sys_process_mrelease
|
||||||
|
+449 common futex_waitv sys_futex_waitv
|
||||||
|
|
||||||
|
#
|
||||||
|
# Due to a historical design error, certain syscalls are numbered differently
|
||||||
|
--
|
||||||
|
2.33.1
|
||||||
|
|
Reference in New Issue
Block a user