As others have hinted, the whole story is a bit different now on modern kernels. I'll be covering x86-64 here, for syscall hijacking on modern arm64 refer to this other answer of mine.
Latest x86 kernels
See also this other answer of mine.
Since Linux v6.9, commit 1e3ad78334a69b36e107232e337f9d693dcc9df2 introduced a security mitigation against speculative execution on x86 that removed the use of syscall tables. This has been backported on other stable kernels too (notably v6.8.5+, v6.6.26+, v6.1.85+, v5.15.154+).
It is not possible to simply hijack the entries of sys_call_table
anymore, but much nicer ways to do the same thing still exist. One of those is kprobes.
In short, something like the following should work:
#include <linux/kprobes.h>
#include <linux/ptrace.h>
// ...
static int sys_read_kprobe_pre_handler(struct kprobe *p, struct pt_regs *regs)
{
pr_info("read(%ld, 0x%lx, 0x%lx)\n", regs->di, regs->si, regs->dx);
return 0;
}
struct kprobe syscall_kprobe = {
.symbol_name = "__x64_sys_read",
.pre_handler = sys_read_kprobe_pre_handler,
};
static int __init my_module_init(void)
{
int err;
err = register_kprobe(&syscall_kprobe);
if (err) {
pr_err("register_kprobe() failed: %d\n", err);
return err;
}
return 0;
}
static void __exit my_module_exit(void)
{
unregister_kprobe(&syscall_kprobe);
}
Older x86 kernels (< v6.9)
Now this is plain and simple syscall table hijacking. Non-invasive hooking can still be done in a much nicer way using kprobes as described in the section above.
Since Linux v4.17, x86 (both 64 and 32 bit) now uses syscall wrappers that take a struct pt_regs *
as the only argument (see commit 1, commit 2). You can see arch/x86/include/asm/syscall.h
for the definitions.
Additionally, as others have described already in different answers, the simplest way to modify sys_call_table
is to temporarily disable CR0 WP (Write-Protect) bit, which could be done using read_cr0()
and write_cr0()
. However, since Linux v5.3, [native_]write_cr0
will check sensitive bits that should never change (like WP) and refuse to change them (commit). In order to work around this, we need to write CR0 manually using inline assembly.
Here is a working kernel module (tested on Linux 5.10 and 5.18) that does syscall hijacking on modern Linux x86-64 considering the above caveats and assuming that you already know the address of sys_call_table
(if you also want to find that in the module, see Proper way of getting the address of non-exported kernel symbols in a Linux kernel module):
// SPDX-License-Identifier: (GPL-2.0 OR MIT)
/**
* Test syscall table hijacking on x86-64. This module will replace the `read`
* syscall with a simple wrapper which logs every invocation of `read` using
* printk().
*
* Tested on Linux x86-64 v5.10, v5.18.
*
* Usage:
*
* sudo cat /proc/kallsyms | grep sys_call_table # grab address
* sudo insmod syscall_hijack.ko sys_call_table_addr=0x<address_here>
*/
#include <linux/init.h> // module_{init,exit}()
#include <linux/module.h> // THIS_MODULE, MODULE_VERSION, ...
#include <linux/kernel.h> // printk(), pr_*()
#include <asm/special_insns.h> // {read,write}_cr0()
#include <asm/processor-flags.h> // X86_CR0_WP
#include <asm/unistd.h> // __NR_*
#ifdef pr_fmt
#undef pr_fmt
#endif
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
typedef long (*sys_call_ptr_t)(const struct pt_regs *);
static sys_call_ptr_t *real_sys_call_table;
static sys_call_ptr_t original_read;
static unsigned long sys_call_table_addr;
module_param(sys_call_table_addr, ulong, 0);
MODULE_PARM_DESC(sys_call_table_addr, "Address of sys_call_table");
// Since Linux v5.3 [native_]write_cr0 won't change "sensitive" CR0 bits, need
// to re-implement this ourselves.
static void write_cr0_unsafe(unsigned long val)
{
asm volatile("mov %0,%%cr0": "+r" (val) : : "memory");
}
static long myread(const struct pt_regs *regs)
{
pr_info("read(%ld, 0x%lx, 0x%lx)\n", regs->di, regs->si, regs->dx);
return original_read(regs);
}
static int __init modinit(void)
{
unsigned long old_cr0;
real_sys_call_table = (typeof(real_sys_call_table))sys_call_table_addr;
pr_info("init\n");
// Temporarily disable CR0 WP to be able to write to read-only pages
old_cr0 = read_cr0();
write_cr0_unsafe(old_cr0 & ~(X86_CR0_WP));
// Overwrite syscall and save original to be restored later
original_read = real_sys_call_table[__NR_read];
real_sys_call_table[__NR_read] = myread;
// Restore CR0 WP
write_cr0_unsafe(old_cr0);
pr_info("init done\n");
return 0;
}
static void __exit modexit(void)
{
unsigned long old_cr0;
pr_info("exit\n");
old_cr0 = read_cr0();
write_cr0_unsafe(old_cr0 & ~(X86_CR0_WP));
// Restore original syscall
real_sys_call_table[__NR_read] = original_read;
write_cr0_unsafe(old_cr0);
pr_info("goodbye\n");
}
module_init(modinit);
module_exit(modexit);
MODULE_VERSION("0.1");
MODULE_DESCRIPTION("Test syscall table hijacking on x86-64.");
MODULE_AUTHOR("Marco Bonelli");
MODULE_LICENSE("Dual MIT/GPL");
LD_PRELOAD
orptrace
? Do they not satisfy what you are trying to do? – Dispersion