Files
linux-tkg/linux-tkg-patches/6.6/0002-mm-Support-soft-dirty-flag-read-with-reset.patch
Kai Krakow 112adb9e50 6.6: Add patches for WRITE_WATCH support in Wine (#857)
This adds missing write watch patches I've ported over from 6.1 to 6.6.
I'm using this in my own daily-drive 6.6 LTS kernel.

See-also: https://github.com/kakra/linux/pull/30
2023-12-11 13:06:14 +01:00

354 lines
10 KiB
Diff

From 30aad6be7fa9513c818f33cf1e0e725920619145 Mon Sep 17 00:00:00 2001
From: Paul Gofman <pgofman@codeweavers.com>
Date: Thu, 7 May 2020 14:05:31 +0300
Subject: [PATCH 2/2] mm: Support soft dirty flag read with reset.
v2: ported from 6.1 to 6.6
Signed-off-by: Kai Krakow <kai@kaishome.de>
---
fs/proc/base.c | 3 +
fs/proc/internal.h | 1 +
fs/proc/task_mmu.c | 139 +++++++++++++++++++++++++++++++++++++++------
3 files changed, 127 insertions(+), 16 deletions(-)
diff --git a/fs/proc/base.c b/fs/proc/base.c
index ffd54617c3547..5683da416a891 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -3284,6 +3284,9 @@ static const struct pid_entry tgid_base_stuff[] = {
REG("smaps", S_IRUGO, proc_pid_smaps_operations),
REG("smaps_rollup", S_IRUGO, proc_pid_smaps_rollup_operations),
REG("pagemap", S_IRUSR, proc_pagemap_operations),
+#ifdef CONFIG_MEM_SOFT_DIRTY
+ REG("pagemap_reset", S_IRUSR, proc_pagemap_reset_operations),
+#endif
#endif
#ifdef CONFIG_SECURITY
DIR("attr", S_IRUGO|S_IXUGO, proc_attr_dir_inode_operations, proc_attr_dir_operations),
diff --git a/fs/proc/internal.h b/fs/proc/internal.h
index 9a8f32f21ff56..f3a16b26dd6e4 100644
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -303,6 +303,7 @@ extern const struct file_operations proc_pid_smaps_operations;
extern const struct file_operations proc_pid_smaps_rollup_operations;
extern const struct file_operations proc_clear_refs_operations;
extern const struct file_operations proc_pagemap_operations;
+extern const struct file_operations proc_pagemap_reset_operations;
extern unsigned long task_vsize(struct mm_struct *);
extern unsigned long task_statm(struct mm_struct *,
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index daa4fc32de9a2..291f530a8cfb2 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -1094,7 +1094,7 @@ static inline bool pte_is_pinned(struct vm_area_struct *vma, unsigned long addr,
return page_maybe_dma_pinned(page);
}
-static inline void clear_soft_dirty(struct vm_area_struct *vma,
+static inline bool clear_soft_dirty(struct vm_area_struct *vma,
unsigned long addr, pte_t *pte)
{
/*
@@ -1104,37 +1104,46 @@ static inline void clear_soft_dirty(struct vm_area_struct *vma,
* of how soft-dirty works.
*/
pte_t ptent = ptep_get(pte);
+ bool ret = false;
if (pte_present(ptent)) {
pte_t old_pte;
if (pte_is_pinned(vma, addr, ptent))
- return;
+ return ret;
old_pte = ptep_modify_prot_start(vma, addr, pte);
+ ret = pte_soft_dirty(old_pte);
ptent = pte_wrprotect(old_pte);
ptent = pte_clear_soft_dirty(ptent);
ptep_modify_prot_commit(vma, addr, pte, old_pte, ptent);
} else if (is_swap_pte(ptent)) {
+ ret = pte_swp_soft_dirty(ptent);
ptent = pte_swp_clear_soft_dirty(ptent);
set_pte_at(vma->vm_mm, addr, pte, ptent);
}
+ return ret;
}
#else
-static inline void clear_soft_dirty(struct vm_area_struct *vma,
+static inline bool clear_soft_dirty(struct vm_area_struct *vma,
unsigned long addr, pte_t *pte)
{
+ return false;
}
#endif
#if defined(CONFIG_MEM_SOFT_DIRTY) && defined(CONFIG_TRANSPARENT_HUGEPAGE)
-static inline void clear_soft_dirty_pmd(struct vm_area_struct *vma,
+static inline bool clear_soft_dirty_pmd(struct vm_area_struct *vma,
unsigned long addr, pmd_t *pmdp)
{
pmd_t old, pmd = *pmdp;
+ bool ret = false;
if (pmd_present(pmd)) {
/* See comment in change_huge_pmd() */
old = pmdp_invalidate(vma, addr, pmdp);
+
+ ret = pmd_soft_dirty(old);
+
if (pmd_dirty(old))
pmd = pmd_mkdirty(pmd);
if (pmd_young(old))
@@ -1145,14 +1154,17 @@ static inline void clear_soft_dirty_pmd(struct vm_area_struct *vma,
set_pmd_at(vma->vm_mm, addr, pmdp, pmd);
} else if (is_migration_entry(pmd_to_swp_entry(pmd))) {
+ ret = pmd_swp_soft_dirty(pmd);
pmd = pmd_swp_clear_soft_dirty(pmd);
set_pmd_at(vma->vm_mm, addr, pmdp, pmd);
}
+ return ret;
}
#else
-static inline void clear_soft_dirty_pmd(struct vm_area_struct *vma,
+static inline bool clear_soft_dirty_pmd(struct vm_area_struct *vma,
unsigned long addr, pmd_t *pmdp)
{
+ return false;
}
#endif
@@ -1407,6 +1419,7 @@ struct pagemapread {
int pos, len; /* units: PM_ENTRY_BYTES, not bytes */
pagemap_entry_t *buffer;
bool show_pfn;
+ bool reset;
};
#define PAGEMAP_WALK_SIZE (PMD_SIZE)
@@ -1439,6 +1452,14 @@ static int add_to_pagemap(unsigned long addr, pagemap_entry_t *pme,
return 0;
}
+static int add_addr_to_pagemap(unsigned long addr, struct pagemapread *pm)
+{
+ ((unsigned long *)pm->buffer)[pm->pos++] = addr;
+ if (pm->pos >= pm->len)
+ return PM_END_OF_BUFFER;
+ return 0;
+}
+
static int pagemap_pte_hole(unsigned long start, unsigned long end,
__always_unused int depth, struct mm_walk *walk)
{
@@ -1446,6 +1467,9 @@ static int pagemap_pte_hole(unsigned long start, unsigned long end,
unsigned long addr = start;
int err = 0;
+ if (pm->reset)
+ goto out;
+
while (addr < end) {
struct vm_area_struct *vma = find_vma(walk->mm, addr);
pagemap_entry_t pme = make_pme(0, 0);
@@ -1550,6 +1574,20 @@ static int pagemap_pmd_range(pmd_t *pmdp, unsigned long addr, unsigned long end,
pmd_t pmd = *pmdp;
struct page *page = NULL;
+ if (pm->reset)
+ {
+ if (clear_soft_dirty_pmd(vma, addr, pmdp))
+ {
+ for (; addr != end; addr += PAGE_SIZE)
+ {
+ err = add_addr_to_pagemap(addr, pm);
+ if (err)
+ break;
+ }
+ }
+ goto trans_huge_done;
+ }
+
if (vma->vm_flags & VM_SOFTDIRTY)
flags |= PM_SOFT_DIRTY;
@@ -1607,6 +1645,7 @@ static int pagemap_pmd_range(pmd_t *pmdp, unsigned long addr, unsigned long end,
frame += (1 << MAX_SWAPFILES_SHIFT);
}
}
+trans_huge_done:
spin_unlock(ptl);
return err;
}
@@ -1622,10 +1661,18 @@ static int pagemap_pmd_range(pmd_t *pmdp, unsigned long addr, unsigned long end,
return err;
}
for (; addr < end; pte++, addr += PAGE_SIZE) {
- pagemap_entry_t pme;
+ if (pm->reset)
+ {
+ if (clear_soft_dirty(vma, addr, pte))
+ err = add_addr_to_pagemap(addr, pm);
+ }
+ else
+ {
+ pagemap_entry_t pme;
- pme = pte_to_pagemap_entry(pm, vma, addr, ptep_get(pte));
- err = add_to_pagemap(addr, &pme, pm);
+ pme = pte_to_pagemap_entry(pm, vma, addr, ptep_get(pte));
+ err = add_to_pagemap(addr, &pme, pm);
+ }
if (err)
break;
}
@@ -1724,8 +1771,8 @@ static const struct mm_walk_ops pagemap_ops = {
* determine which areas of memory are actually mapped and llseek to
* skip over unmapped regions.
*/
-static ssize_t pagemap_read(struct file *file, char __user *buf,
- size_t count, loff_t *ppos)
+static ssize_t do_pagemap_read(struct file *file, char __user *buf,
+ size_t count, loff_t *ppos, bool reset)
{
struct mm_struct *mm = file->private_data;
struct pagemapread pm;
@@ -1734,6 +1781,8 @@ static ssize_t pagemap_read(struct file *file, char __user *buf,
unsigned long start_vaddr;
unsigned long end_vaddr;
int ret = 0, copied = 0;
+ struct mmu_notifier_range range;
+ size_t buffer_len;
if (!mm || !mmget_not_zero(mm))
goto out;
@@ -1749,19 +1798,38 @@ static ssize_t pagemap_read(struct file *file, char __user *buf,
/* do not disclose physical addresses: attack vector */
pm.show_pfn = file_ns_capable(file, &init_user_ns, CAP_SYS_ADMIN);
+ pm.reset = reset;
- pm.len = (PAGEMAP_WALK_SIZE >> PAGE_SHIFT);
- pm.buffer = kmalloc_array(pm.len, PM_ENTRY_BYTES, GFP_KERNEL);
+ buffer_len = min(PAGEMAP_WALK_SIZE >> PAGE_SHIFT, count / PM_ENTRY_BYTES);
+
+ pm.buffer = kmalloc_array(buffer_len, PM_ENTRY_BYTES, GFP_KERNEL);
ret = -ENOMEM;
if (!pm.buffer)
goto out_mm;
src = *ppos;
svpfn = src / PM_ENTRY_BYTES;
- end_vaddr = mm->task_size;
+
+ start_vaddr = svpfn << PAGE_SHIFT;
+
+ if (reset)
+ {
+ if (count < sizeof(end_vaddr))
+ {
+ ret = -EINVAL;
+ goto out_mm;
+ }
+ if (copy_from_user(&end_vaddr, buf, sizeof(end_vaddr)))
+ return -EFAULT;
+ end_vaddr = min(end_vaddr, mm->task_size);
+ }
+ else
+ {
+ end_vaddr = mm->task_size;
+ start_vaddr = end_vaddr;
+ }
/* watch out for wraparound */
- start_vaddr = end_vaddr;
if (svpfn <= (ULONG_MAX >> PAGE_SHIFT)) {
unsigned long end;
@@ -1786,18 +1854,35 @@ static ssize_t pagemap_read(struct file *file, char __user *buf,
unsigned long end;
pm.pos = 0;
- end = (start_vaddr + PAGEMAP_WALK_SIZE) & PAGEMAP_WALK_MASK;
+ pm.len = min(buffer_len, count / PM_ENTRY_BYTES);
+
+ end = reset ? end_vaddr : (start_vaddr + (pm.len << PAGE_SHIFT));
/* overflow ? */
if (end < start_vaddr || end > end_vaddr)
end = end_vaddr;
+
ret = mmap_read_lock_killable(mm);
if (ret)
goto out_free;
+
+ if (reset)
+ {
+ inc_tlb_flush_pending(mm);
+ mmu_notifier_range_init(&range, MMU_NOTIFY_SOFT_DIRTY,
+ 0, mm, start_vaddr, end);
+ mmu_notifier_invalidate_range_start(&range);
+ }
ret = walk_page_range(mm, start_vaddr, end, &pagemap_ops, &pm);
+ if (reset)
+ {
+ mmu_notifier_invalidate_range_end(&range);
+ flush_tlb_mm(mm);
+ dec_tlb_flush_pending(mm);
+ }
mmap_read_unlock(mm);
- start_vaddr = end;
len = min(count, PM_ENTRY_BYTES * pm.pos);
+ BUG_ON(ret && ret != PM_END_OF_BUFFER);
if (copy_to_user(buf, pm.buffer, len)) {
ret = -EFAULT;
goto out_free;
@@ -1805,6 +1890,8 @@ static ssize_t pagemap_read(struct file *file, char __user *buf,
copied += len;
buf += len;
count -= len;
+
+ start_vaddr = reset && pm.pos == pm.len ? ((unsigned long *)pm.buffer)[pm.pos - 1] + PAGE_SIZE : end;
}
*ppos += copied;
if (!ret || ret == PM_END_OF_BUFFER)
@@ -1818,6 +1905,18 @@ static ssize_t pagemap_read(struct file *file, char __user *buf,
return ret;
}
+static ssize_t pagemap_read(struct file *file, char __user *buf,
+ size_t count, loff_t *ppos)
+{
+ return do_pagemap_read(file, buf, count, ppos, false);
+}
+
+static ssize_t pagemap_reset_read(struct file *file, char __user *buf,
+ size_t count, loff_t *ppos)
+{
+ return do_pagemap_read(file, buf, count, ppos, true);
+}
+
static int pagemap_open(struct inode *inode, struct file *file)
{
struct mm_struct *mm;
@@ -1844,6 +1943,14 @@ const struct file_operations proc_pagemap_operations = {
.open = pagemap_open,
.release = pagemap_release,
};
+
+const struct file_operations proc_pagemap_reset_operations = {
+ .llseek = mem_lseek, /* borrow this */
+ .read = pagemap_reset_read,
+ .open = pagemap_open,
+ .release = pagemap_release,
+};
+
#endif /* CONFIG_PROC_PAGE_MONITOR */
#ifdef CONFIG_NUMA
--
2.41.0