Note: for an updated version of the code in this answer that also covers the case where huge mappings are used for sys_call_table
(not currently handled by the code here, which fails with -EINVAL
), see this other answer of mine.
So, as I said in the comments above, it appears that the function change_memory_common()
(which is used by set_memory_ro/rw()
) does a check before applying the requested permissions. This is documented with a comment:
/*
* Kernel VA mappings are always live, and splitting live section
* mappings into page mappings may cause TLB conflicts. This means
* we have to ensure that changing the permission bits of the range
* we are operating on does not result in such splitting.
*
* Let's restrict ourselves to mappings created by vmalloc (or vmap).
* Those are guaranteed to consist entirely of page mappings, and
* splitting is never needed.
*
* So check whether the [addr, addr + size) interval is entirely
* covered by precisely one VM area that has the VM_ALLOC flag set.
*/
area = find_vm_area((void *)addr);
if (!area ||
end > (unsigned long)area->addr + area->size ||
!(area->flags & VM_ALLOC))
return -EINVAL;
The function seems to only work for mappings created through vmalloc()
or vmap()
, and the sys_call_table
does not reside in a mapping of such kind.
The concern seems to be around TLB conflicts. It may be that this function does not split huge pages and therefore cannot set permissions with single (non-huge) page granularity. You will need to test this, it will either work or not work for your specific case.
In any case, for the purpose of your exercise on syscall hijacking, you can re-write your own version of set_memory_common()
and set_memory_rw/ro()
avoiding this check. An easier way would be to just get the appropriate PTE for the desired address and then change the permissions, but I didn't look through all the countless macros for that.
Last, but not least, since the sys_call_table
could end up crossing a page boundary, it's better to use syscall_table + __NR_read
instead of just sys_call_table
when applying changes to the page.
Here's a working example:
// SPDX-License-Identifier: GPL-3.0
#include <linux/init.h> // module_{init,exit}()
#include <linux/module.h> // THIS_MODULE, MODULE_VERSION, ...
#include <linux/kernel.h> // printk(), pr_*()
#include <linux/kallsyms.h> // kallsyms_lookup_name()
#include <asm/syscall.h> // syscall_fn_t, __NR_*
#include <asm/ptrace.h> // struct pt_regs
#include <asm/tlbflush.h> // flush_tlb_kernel_range()
#include <asm/pgtable.h> // {clear,set}_pte_bit(), set_pte()
#include <linux/vmalloc.h> // vm_unmap_aliases()
#include <linux/mm.h> // struct mm_struct, apply_to_page_range()
#include <linux/kconfig.h> // IS_ENABLED()
#ifdef pr_fmt
#undef pr_fmt
#endif
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
static struct mm_struct *init_mm_ptr;
static syscall_fn_t *syscall_table;
static syscall_fn_t original_read;
/********** HELPERS **********/
// From arch/arm64/mm/pageattr.c.
struct page_change_data {
pgprot_t set_mask;
pgprot_t clear_mask;
};
// From arch/arm64/mm/pageattr.c.
static int change_page_range(pte_t *ptep, unsigned long addr, void *data)
{
struct page_change_data *cdata = data;
pte_t pte = READ_ONCE(*ptep);
pte = clear_pte_bit(pte, cdata->clear_mask);
pte = set_pte_bit(pte, cdata->set_mask);
set_pte(ptep, pte);
return 0;
}
// From arch/arm64/mm/pageattr.c.
static int __change_memory_common(unsigned long start, unsigned long size,
pgprot_t set_mask, pgprot_t clear_mask)
{
struct page_change_data data;
int ret;
data.set_mask = set_mask;
data.clear_mask = clear_mask;
ret = apply_to_page_range(init_mm_ptr, start, size, change_page_range, &data);
flush_tlb_kernel_range(start, start + size);
return ret;
}
// Simplified set_memory_rw() from arch/arm64/mm/pageattr.c.
static int set_page_rw(unsigned long addr)
{
vm_unmap_aliases();
return __change_memory_common(addr, PAGE_SIZE, __pgprot(PTE_WRITE), __pgprot(PTE_RDONLY));
}
// Simplified set_memory_ro() from arch/arm64/mm/pageattr.c.
static int set_page_ro(unsigned long addr)
{
vm_unmap_aliases();
return __change_memory_common(addr, PAGE_SIZE, __pgprot(PTE_RDONLY), __pgprot(PTE_WRITE));
}
/********** ACTUAL MODULE **********/
static long myread(const struct pt_regs *regs)
{
pr_info("read() called\n");
return original_read(regs);
}
static int __init modinit(void)
{
int res;
pr_info("init\n");
// Shouldn't fail.
init_mm_ptr = (struct mm_struct *)kallsyms_lookup_name("init_mm");
syscall_table = (syscall_fn_t *)kallsyms_lookup_name("sys_call_table");
original_read = syscall_table[__NR_read];
res = set_page_rw((unsigned long)(syscall_table + __NR_read) & PAGE_MASK);
if (res != 0) {
pr_err("set_page_rw() failed: %d\n", res);
return res;
}
syscall_table[__NR_read] = myread;
res = set_page_ro((unsigned long)(syscall_table + __NR_read) & PAGE_MASK);
if (res != 0) {
pr_err("set_page_ro() failed: %d\n", res);
return res;
}
pr_info("init done\n");
return 0;
}
static void __exit modexit(void)
{
int res;
pr_info("exit\n");
res = set_page_rw((unsigned long)(syscall_table + __NR_read) & PAGE_MASK);
if (res != 0) {
pr_err("set_page_rw() failed: %d\n", res);
return;
}
syscall_table[__NR_read] = original_read;
res = set_page_ro((unsigned long)(syscall_table + __NR_read) & PAGE_MASK);
if (res != 0)
pr_err("set_page_ro() failed: %d\n", res);
pr_info("goodbye\n");
}
module_init(modinit);
module_exit(modexit);
MODULE_VERSION("0.1");
MODULE_DESCRIPTION("Syscall hijack on arm64.");
MODULE_AUTHOR("Marco Bonelli");
MODULE_LICENSE("GPL");
#include <linux/set_memory.h>
? – Mccowanset_memory_rw()
function is not exported, so you cannot use it directly in your module. Trying to use it in your module will cause the compilation to fail. You could get a pointer to it at runtime usingkallsyms_lookup_name("set_memory_rw")
. – Ophiolatry