How can an user-space program configure "GS:" under 64-bit Windows (currently XP-64)?
(By configure, set GS:0 at an arbitrary 64-bit linear address).
I am trying to port a "JIT" environment to X86-64 that was originally developed for Win32.
One unfortunate design aspect is that identical code needs to run on multiple user-space threads (eg, "fibers"). The Win32 version of the code uses the GS selector for this, and generates the proper prefix to access the local data - "mov eax,GS:[offset]" points to the correct data for the current task. The code from the Win32 version would load a value into GS, if only it had a value that would work.
So far I've been able to find that 64-bit windows doesn't support the LDT, so the method used under Win32 won't work. However, the X86-64 instruction set includes "SWAPGS", as well as a method to load GS without using the legacy segmentation - but that only works in kernel space.
According to X64 manuals, even if Win64 allowed access to descriptors -- which it doesn't -- there's no way to set the high 32-bits of the segment base. The only way to set these is through GS_BASE_MSR (and corresponding FS_BASE_MSR - the other segment bases are ignored in 64-bit mode). The WRMSR instruction is Ring0, so I can't use it directly.
I am hoping for a Zw* function that allows me to change "GS:" in user space, or some other dark corner of the Windows API. I believe Windows still uses FS: for its own TLS, so some mechanism must be available?
This sample code illustrates the problem. I apologize in advance for using byte code - VS won't do inline assembly for the 64-bit compile, and I was trying to keep this as one file for illustrative purposes.
The program displays "PASS" on XP-32, and doesn't on XP-x64.
#include <windows.h>
#include <string.h>
#include <stdio.h>
unsigned char GetDS32[] =
{0x8C,0xD8, // mov eax, ds
0xC3}; // ret
unsigned char SetGS32[] =
{0x8E,0x6C,0x24,0x04, // mov gs, ss:[sp+4]
0xC3 }; // ret
unsigned char UseGS32[] =
{ 0x8B,0x44,0x24,0x04, // mov eax, ss:[sp+4]
0x65,0x8B,0x00, // mov eax, gs:[eax]
0xc3 }; // ret
unsigned char SetGS64[] =
{0x8E,0xe9, // mov gs, rcx
0xC3 }; // ret
unsigned char UseGS64[] =
{ 0x65,0x8B,0x01, // mov eax, gs:[rcx]
0xc3 };
typedef WORD(*fcnGetDS)(void);
typedef void(*fcnSetGS)(WORD);
typedef DWORD(*fcnUseGS)(LPVOID);
int (*NtSetLdtEntries)(DWORD, DWORD, DWORD, DWORD, DWORD, DWORD);
int main( void )
{
SYSTEM_INFO si;
GetSystemInfo(&si);
LPVOID p = VirtualAlloc(NULL, 1024, MEM_COMMIT|MEM_TOP_DOWN,PAGE_EXECUTE_READWRITE);
fcnGetDS GetDS = (fcnGetDS)((LPBYTE)p+16);
fcnUseGS UseGS = (fcnUseGS)((LPBYTE)p+32);
fcnSetGS SetGS = (fcnSetGS)((LPBYTE)p+48);
*(DWORD *)p = 0x12345678;
if (si.wProcessorArchitecture == PROCESSOR_ARCHITECTURE_AMD64)
{
memcpy( GetDS, &GetDS32, sizeof(GetDS32));
memcpy( UseGS, &UseGS64, sizeof(UseGS64));
memcpy( SetGS, &SetGS64, sizeof(SetGS64));
}
else
{
memcpy( GetDS, &GetDS32, sizeof(GetDS32));
memcpy( UseGS, &UseGS32, sizeof(UseGS32));
memcpy( SetGS, &SetGS32, sizeof(SetGS32));
}
SetGS(GetDS());
if (UseGS(p) != 0x12345678) exit(-1);
if (si.wProcessorArchitecture == PROCESSOR_ARCHITECTURE_AMD64)
{
// The gist of the question - What is the 64-bit equivalent of the following code
}
else
{
DWORD base = (DWORD)p;
LDT_ENTRY ll;
int ret;
*(FARPROC*)(&NtSetLdtEntries) = GetProcAddress(LoadLibrary("ntdll.dll"), "NtSetLdtEntries");
ll.BaseLow = base & 0xFFFF;
ll.HighWord.Bytes.BaseMid = base >> 16;
ll.HighWord.Bytes.BaseHi = base >> 24;
ll.LimitLow = 400;
ll.HighWord.Bits.LimitHi = 0;
ll.HighWord.Bits.Granularity = 0;
ll.HighWord.Bits.Default_Big = 1;
ll.HighWord.Bits.Reserved_0 = 0;
ll.HighWord.Bits.Sys = 0;
ll.HighWord.Bits.Pres = 1;
ll.HighWord.Bits.Dpl = 3;
ll.HighWord.Bits.Type = 0x13;
ret = NtSetLdtEntries(0x80, *(DWORD*)&ll, *((DWORD*)(&ll)+1),0,0,0);
if (ret < 0) { exit(-1);}
SetGS(0x84);
}
if (UseGS(0) != 0x12345678) exit(-1);
printf("PASS\n");
}