Intercepting RDTSC instruction in KVM
Asked Answered
L

1

6

I am trying to debug a rootkit in a virtual environment. From reversing I know that it uses super simple CPU timing checks, that look something like this (source pafish):

static inline unsigned long long rdtsc_diff_vmexit() {
    unsigned long long ret, ret2;
    unsigned eax, edx;
    __asm__ volatile("rdtsc" : "=a" (eax), "=d" (edx));
    ret  = ((unsigned long long)eax) | (((unsigned long long)edx) << 32);
    /* vm exit forced here. it uses: eax = 0; cpuid; */
    __asm__ volatile("cpuid" : /* no output */ : "a"(0x00));
    /**/
    __asm__ volatile("rdtsc" : "=a" (eax), "=d" (edx));
    ret2  = ((unsigned long long)eax) | (((unsigned long long)edx) << 32);
    return ret2 - ret;
}

int cpu_rdtsc() {
    int i;
    unsigned long long avg = 0;
    for (i = 0; i < 10; i++) {
        avg = avg + rdtsc_diff();
        Sleep(500);
    }
    avg = avg / 10;
    return (avg < 750 && avg > 0) ? FALSE : TRUE;
}

It's calling rdtsc instruction with cpuid in between. The issue is that this simple check will detect presence of KVM. I want to overcome that.

As it was stated in this question (and mailing list), I cloned Linux kernel source (5.4.0 stable) and edited these files accordingly:

arch/x86/kvm/vmx/vmx.c (around 2300, setup_vmcs_config):

min = CPU_BASED_HLT_EXITING |
#ifdef CONFIG_X86_64
          CPU_BASED_CR8_LOAD_EXITING |
          CPU_BASED_CR8_STORE_EXITING |
#endif
          CPU_BASED_CR3_LOAD_EXITING |
          CPU_BASED_CR3_STORE_EXITING |
          CPU_BASED_UNCOND_IO_EXITING |
          CPU_BASED_MOV_DR_EXITING |
          CPU_BASED_USE_TSC_OFFSETTING |
          CPU_BASED_MWAIT_EXITING |
          CPU_BASED_MONITOR_EXITING |
          CPU_BASED_INVLPG_EXITING |
          CPU_BASED_RDPMC_EXITING |
          CPU_BASED_RDTSC_EXITING; // <- added this

    opt = CPU_BASED_TPR_SHADOW |
          CPU_BASED_USE_MSR_BITMAPS |
          CPU_BASED_ACTIVATE_SECONDARY_CONTROLS;

    if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_PROCBASED_CTLS,
                &_cpu_based_exec_control) < 0)
        return -EIO;

arch/x86/kvm/vmx/vmx.c (around 5500):

static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = {
    [EXIT_REASON_EXCEPTION_NMI]           = handle_exception_nmi,
    [EXIT_REASON_EXTERNAL_INTERRUPT]      = handle_external_interrupt,
    [EXIT_REASON_TRIPLE_FAULT]            = handle_triple_fault,
    [EXIT_REASON_NMI_WINDOW]          = handle_nmi_window,
    [EXIT_REASON_IO_INSTRUCTION]          = handle_io,
    [EXIT_REASON_CR_ACCESS]               = handle_cr,
    [EXIT_REASON_DR_ACCESS]               = handle_dr,
    [EXIT_REASON_CPUID]                   = kvm_emulate_cpuid,
    [EXIT_REASON_MSR_READ]                = kvm_emulate_rdmsr,
    [EXIT_REASON_MSR_WRITE]               = kvm_emulate_wrmsr,
    [EXIT_REASON_INTERRUPT_WINDOW]        = handle_interrupt_window,
    [EXIT_REASON_HLT]                     = kvm_emulate_halt,
    [EXIT_REASON_INVD]            = handle_invd,
    [EXIT_REASON_INVLPG]              = handle_invlpg,
    [EXIT_REASON_RDPMC]                   = handle_rdpmc,
    [EXIT_REASON_VMCALL]                  = handle_vmcall,
    [EXIT_REASON_VMCLEAR]             = handle_vmx_instruction,
    [EXIT_REASON_VMLAUNCH]            = handle_vmx_instruction,
    [EXIT_REASON_VMPTRLD]             = handle_vmx_instruction,
    [EXIT_REASON_VMPTRST]             = handle_vmx_instruction,
    [EXIT_REASON_VMREAD]              = handle_vmx_instruction,
    [EXIT_REASON_VMRESUME]            = handle_vmx_instruction,
    [EXIT_REASON_VMWRITE]             = handle_vmx_instruction,
    [EXIT_REASON_VMOFF]           = handle_vmx_instruction,
    [EXIT_REASON_VMON]            = handle_vmx_instruction,
    [EXIT_REASON_TPR_BELOW_THRESHOLD]     = handle_tpr_below_threshold,
    [EXIT_REASON_APIC_ACCESS]             = handle_apic_access,
    [EXIT_REASON_APIC_WRITE]              = handle_apic_write,
    [EXIT_REASON_EOI_INDUCED]             = handle_apic_eoi_induced,
    [EXIT_REASON_WBINVD]                  = handle_wbinvd,
    [EXIT_REASON_XSETBV]                  = handle_xsetbv,
    [EXIT_REASON_TASK_SWITCH]             = handle_task_switch,
    [EXIT_REASON_MCE_DURING_VMENTRY]      = handle_machine_check,
    [EXIT_REASON_GDTR_IDTR]           = handle_desc,
    [EXIT_REASON_LDTR_TR]             = handle_desc,
    [EXIT_REASON_EPT_VIOLATION]       = handle_ept_violation,
    [EXIT_REASON_EPT_MISCONFIG]           = handle_ept_misconfig,
    [EXIT_REASON_PAUSE_INSTRUCTION]       = handle_pause,
    [EXIT_REASON_MWAIT_INSTRUCTION]       = handle_mwait,
    [EXIT_REASON_MONITOR_TRAP_FLAG]       = handle_monitor_trap,
    [EXIT_REASON_MONITOR_INSTRUCTION]     = handle_monitor,
    [EXIT_REASON_INVEPT]                  = handle_vmx_instruction,
    [EXIT_REASON_INVVPID]                 = handle_vmx_instruction,
    [EXIT_REASON_RDRAND]                  = handle_invalid_op,
    [EXIT_REASON_RDSEED]                  = handle_invalid_op,
    [EXIT_REASON_PML_FULL]            = handle_pml_full,
    [EXIT_REASON_INVPCID]                 = handle_invpcid,
    [EXIT_REASON_VMFUNC]              = handle_vmx_instruction,
    [EXIT_REASON_PREEMPTION_TIMER]        = handle_preemption_timer,
    [EXIT_REASON_ENCLS]           = handle_encls,
    [EXIT_REASON_RDTSC]             = handle_rdtsc, // <- added exit handler
};

and finally defined the exit handler itself right above it (just to test if the handling works):

static int handle_rdtsc(struct kvm_vcpu *vcpu) 
{
    printk("[vmkernel] handling fake rdtsc from cpl %i\n", vmx_get_cpl(vcpu));
    
    uint64_t data;
    data = 123;
    
    vcpu->arch.regs[VCPU_REGS_RAX] = data & -1u;
    vcpu->arch.regs[VCPU_REGS_RDX] = (data >> 32) & -1u;
    
    skip_emulated_instruction(vcpu);

    return 1;
}

And as I was afraid, after building the kernel, running the VM and inspecting dmesg, I can't see anything. Also the measured difference is completely the same and unchanged.

What else do I need to edit in order for the handler to be actually called and used? I am building the kernel with all options set to default (run make menuconfig -> save).

Of course I also checked that the kernel is booted and that Qemu is using KVM for the VM.

Any ideas would be appreciated. Thanks.

Lazare answered 18/7, 2020 at 15:20 Comment(1)
Would be nice if people that downvote at least wrote why...Lazare
L
7

Well... The code above that I posted works, but only on Intel CPUs. Sadly I did not notice that.

To make it work on AMD CPU, I needed to modify arch/x86/kvm/svm/svm.c:

static int (*const svm_exit_handlers[])(struct vcpu_svm *svm) = {
    [SVM_EXIT_READ_CR0]         = cr_interception,
    [SVM_EXIT_READ_CR3]         = cr_interception,
    [SVM_EXIT_READ_CR4]         = cr_interception,
    [SVM_EXIT_READ_CR8]         = cr_interception,
    [SVM_EXIT_CR0_SEL_WRITE]        = cr_interception,
    [SVM_EXIT_WRITE_CR0]            = cr_interception,
    [SVM_EXIT_WRITE_CR3]            = cr_interception,
    [SVM_EXIT_WRITE_CR4]            = cr_interception,
    [SVM_EXIT_WRITE_CR8]            = cr8_write_interception,
    [SVM_EXIT_READ_DR0]         = dr_interception,
    [SVM_EXIT_READ_DR1]         = dr_interception,
    [SVM_EXIT_READ_DR2]         = dr_interception,
    [SVM_EXIT_READ_DR3]         = dr_interception,
    [SVM_EXIT_READ_DR4]         = dr_interception,
    [SVM_EXIT_READ_DR5]         = dr_interception,
    [SVM_EXIT_READ_DR6]         = dr_interception,
    [SVM_EXIT_READ_DR7]         = dr_interception,
    [SVM_EXIT_WRITE_DR0]            = dr_interception,
    [SVM_EXIT_WRITE_DR1]            = dr_interception,
    [SVM_EXIT_WRITE_DR2]            = dr_interception,
    [SVM_EXIT_WRITE_DR3]            = dr_interception,
    [SVM_EXIT_WRITE_DR4]            = dr_interception,
    [SVM_EXIT_WRITE_DR5]            = dr_interception,
    [SVM_EXIT_WRITE_DR6]            = dr_interception,
    [SVM_EXIT_WRITE_DR7]            = dr_interception,
    [SVM_EXIT_EXCP_BASE + DB_VECTOR]    = db_interception,
    [SVM_EXIT_EXCP_BASE + BP_VECTOR]    = bp_interception,
    [SVM_EXIT_EXCP_BASE + UD_VECTOR]    = ud_interception,
    [SVM_EXIT_EXCP_BASE + PF_VECTOR]    = pf_interception,
    [SVM_EXIT_EXCP_BASE + MC_VECTOR]    = mc_interception,
    [SVM_EXIT_EXCP_BASE + AC_VECTOR]    = ac_interception,
    [SVM_EXIT_EXCP_BASE + GP_VECTOR]    = gp_interception,
    [SVM_EXIT_INTR]             = intr_interception,
    [SVM_EXIT_NMI]              = nmi_interception,
    [SVM_EXIT_SMI]              = nop_on_interception,
    [SVM_EXIT_INIT]             = nop_on_interception,
    [SVM_EXIT_VINTR]            = interrupt_window_interception,
    [SVM_EXIT_RDPMC]            = rdpmc_interception,
    [SVM_EXIT_CPUID]            = cpuid_interception,
    [SVM_EXIT_IRET]                         = iret_interception,
    [SVM_EXIT_INVD]                         = emulate_on_interception,
    [SVM_EXIT_PAUSE]            = pause_interception,
    [SVM_EXIT_HLT]              = halt_interception,
    [SVM_EXIT_INVLPG]           = invlpg_interception,
    [SVM_EXIT_INVLPGA]          = invlpga_interception,
    [SVM_EXIT_IOIO]             = io_interception,
    [SVM_EXIT_MSR]              = msr_interception,
    [SVM_EXIT_TASK_SWITCH]          = task_switch_interception,
    [SVM_EXIT_SHUTDOWN]         = shutdown_interception,
    [SVM_EXIT_VMRUN]            = vmrun_interception,
    [SVM_EXIT_VMMCALL]          = vmmcall_interception,
    [SVM_EXIT_VMLOAD]           = vmload_interception,
    [SVM_EXIT_VMSAVE]           = vmsave_interception,
    [SVM_EXIT_STGI]             = stgi_interception,
    [SVM_EXIT_CLGI]             = clgi_interception,
    [SVM_EXIT_SKINIT]           = skinit_interception,
    [SVM_EXIT_WBINVD]                       = wbinvd_interception,
    [SVM_EXIT_MONITOR]          = monitor_interception,
    [SVM_EXIT_MWAIT]            = mwait_interception,
    [SVM_EXIT_XSETBV]           = xsetbv_interception,
    [SVM_EXIT_RDPRU]            = rdpru_interception,
    [SVM_EXIT_NPF]              = npf_interception,
    [SVM_EXIT_RSM]                          = rsm_interception,
    [SVM_EXIT_AVIC_INCOMPLETE_IPI]      = avic_incomplete_ipi_interception,
    [SVM_EXIT_AVIC_UNACCELERATED_ACCESS]    = avic_unaccelerated_access_interception,
    [SVM_EXIT_RDTSC]                = handle_rdtsc_interception, // added handler
};

in init_vmcb:

set_intercept(svm, INTERCEPT_WBINVD);
set_intercept(svm, INTERCEPT_XSETBV);
set_intercept(svm, INTERCEPT_RDPRU);
set_intercept(svm, INTERCEPT_RSM);

set_intercept(svm, INTERCEPT_RDTSC); // added

if (!kvm_mwait_in_guest(svm->vcpu.kvm)) {
    set_intercept(svm, INTERCEPT_MONITOR);
    set_intercept(svm, INTERCEPT_MWAIT);
}

then defining the handler itself again:

static int handle_rdtsc_interception(struct vcpu_svm *svm) 
{
    printk("[vmkernel] handling fake rdtsc (AMD) from cpl %i\n", svm_get_cpl(&svm->vcpu));
    
    svm->vcpu.arch.regs[VCPU_REGS_RAX] = last_tick & -1u;
    svm->vcpu.arch.regs[VCPU_REGS_RDX] = (last_tick >> 32) & -1u;

    skip_emulated_instruction(&svm->vcpu);

    return 1;
}
Lazare answered 19/7, 2020 at 12:1 Comment(0)

© 2022 - 2024 — McMap. All rights reserved.