Just for the lulz, another solution without ptracing your own process or touching a single line of assembly or playing around with /proc
. You only have to load the library in the context of the process and let the magic happen.
The solution I propose is to use the constructor feature (brought from C++ to C by gcc) to run some code when a library is loaded. Then this library just patch the GOT (Global Offset Table) entry for malloc
. The GOT stores the real addresses for the library functions so that the name resolution happen only once. To patch the GOT you have to play around with the ELF structures (see man 5 elf
). And Linux is kind enough to give you the aux
vector (see man 3 getauxval
) that tells you where to find in memory the program headers of the current program. However, better interface is provided by dl_iterate_phdr
, which is used below.
Here is an example code of library that does exactly this when the init
function is called. Although the same could probably be achieved with a gdb script.
#define _GNU_SOURCE
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <dlfcn.h>
#include <sys/auxv.h>
#include <elf.h>
#include <link.h>
#include <sys/mman.h>
struct strtab {
char *tab;
ElfW(Xword) size;
};
struct jmpreltab {
ElfW(Rela) *tab;
ElfW(Xword) size;
};
struct symtab {
ElfW(Sym) *tab;
ElfW(Xword) entsz;
};
/* Backup of the real malloc function */
static void *(*realmalloc)(size_t) = NULL;
/* My local versions of the malloc functions */
static void *mymalloc(size_t size);
/*************/
/* ELF stuff */
/*************/
static const ElfW(Phdr) *get_phdr_dynamic(const ElfW(Phdr) *phdr,
uint16_t phnum, uint16_t phentsize) {
int i;
for (i = 0; i < phnum; i++) {
if (phdr->p_type == PT_DYNAMIC)
return phdr;
phdr = (ElfW(Phdr) *)((char *)phdr + phentsize);
}
return NULL;
}
static const ElfW(Dyn) *get_dynentry(ElfW(Addr) base, const ElfW(Phdr) *pdyn,
uint32_t type) {
ElfW(Dyn) *dyn;
for (dyn = (ElfW(Dyn) *)(base + pdyn->p_vaddr); dyn->d_tag; dyn++) {
if (dyn->d_tag == type)
return dyn;
}
return NULL;
}
static struct jmpreltab get_jmprel(ElfW(Addr) base, const ElfW(Phdr) *pdyn) {
struct jmpreltab table;
const ElfW(Dyn) *dyn;
dyn = get_dynentry(base, pdyn, DT_JMPREL);
table.tab = (dyn == NULL) ? NULL : (ElfW(Rela) *)dyn->d_un.d_ptr;
dyn = get_dynentry(base, pdyn, DT_PLTRELSZ);
table.size = (dyn == NULL) ? 0 : dyn->d_un.d_val;
return table;
}
static struct symtab get_symtab(ElfW(Addr) base, const ElfW(Phdr) *pdyn) {
struct symtab table;
const ElfW(Dyn) *dyn;
dyn = get_dynentry(base, pdyn, DT_SYMTAB);
table.tab = (dyn == NULL) ? NULL : (ElfW(Sym) *)dyn->d_un.d_ptr;
dyn = get_dynentry(base, pdyn, DT_SYMENT);
table.entsz = (dyn == NULL) ? 0 : dyn->d_un.d_val;
return table;
}
static struct strtab get_strtab(ElfW(Addr) base, const ElfW(Phdr) *pdyn) {
struct strtab table;
const ElfW(Dyn) *dyn;
dyn = get_dynentry(base, pdyn, DT_STRTAB);
table.tab = (dyn == NULL) ? NULL : (char *)dyn->d_un.d_ptr;
dyn = get_dynentry(base, pdyn, DT_STRSZ);
table.size = (dyn == NULL) ? 0 : dyn->d_un.d_val;
return table;
}
static void *get_got_entry(ElfW(Addr) base, struct jmpreltab jmprel,
struct symtab symtab, struct strtab strtab, const char *symname) {
ElfW(Rela) *rela;
ElfW(Rela) *relaend;
relaend = (ElfW(Rela) *)((char *)jmprel.tab + jmprel.size);
for (rela = jmprel.tab; rela < relaend; rela++) {
uint32_t relsymidx;
char *relsymname;
relsymidx = ELF64_R_SYM(rela->r_info);
relsymname = strtab.tab + symtab.tab[relsymidx].st_name;
if (strcmp(symname, relsymname) == 0)
return (void *)(base + rela->r_offset);
}
return NULL;
}
static void patch_got(ElfW(Addr) base, const ElfW(Phdr) *phdr, int16_t phnum,
int16_t phentsize) {
const ElfW(Phdr) *dphdr;
struct jmpreltab jmprel;
struct symtab symtab;
struct strtab strtab;
void *(**mallocgot)(size_t);
dphdr = get_phdr_dynamic(phdr, phnum, phentsize);
jmprel = get_jmprel(base, dphdr);
symtab = get_symtab(base, dphdr);
strtab = get_strtab(base, dphdr);
mallocgot = get_got_entry(base, jmprel, symtab, strtab, "malloc");
/* Replace the pointer with our version. */
if (mallocgot != NULL) {
/* Quick & dirty hack for some programs that need it. */
/* Should check the returned value. */
void *page = (void *)((intptr_t)mallocgot & ~(0x1000 - 1));
mprotect(page, 0x1000, PROT_READ | PROT_WRITE);
*mallocgot = mymalloc;
}
}
static int callback(struct dl_phdr_info *info, size_t size, void *data) {
uint16_t phentsize;
data = data;
size = size;
printf("Patching GOT entry of \"%s\"\n", info->dlpi_name);
phentsize = getauxval(AT_PHENT);
patch_got(info->dlpi_addr, info->dlpi_phdr, info->dlpi_phnum, phentsize);
return 0;
}
/*****************/
/* Init function */
/*****************/
__attribute__((constructor)) static void init(void) {
realmalloc = malloc;
dl_iterate_phdr(callback, NULL);
}
/*********************************************/
/* Here come the malloc function and sisters */
/*********************************************/
static void *mymalloc(size_t size) {
printf("hello from my malloc\n");
return realmalloc(size);
}
And an example program that just loads the library between two malloc
calls.
#include <stdio.h>
#include <stdlib.h>
#include <dlfcn.h>
void loadmymalloc(void) {
/* Should check return value. */
dlopen("./mymalloc.so", RTLD_LAZY);
}
int main(void) {
void *ptr;
ptr = malloc(42);
printf("malloc returned: %p\n", ptr);
loadmymalloc();
ptr = malloc(42);
printf("malloc returned: %p\n", ptr);
return EXIT_SUCCESS;
}
The call to mprotect
is usually useless. However I found that gvim (which is compiled as a shared object) needs it. If you also want to catch the references to malloc
as pointers (which may allow to later call the real function and bypass yours), you can apply the very same process to the symbol table pointed to by the DT_RELA
dynamic entry.
If the constructor feature is not available for you, all you have to do is resolve the init
symbol from the newly loaded library and call it.
Note that you may also want to replace dlopen
so that libraries loaded after yours gets patched as well. Which may happen if you load your library quite early or if the application has dynamically loaded plugins.
ltrace -e 'malloc+free' -p xxxxx
seems to work just fine here (ltrace 0.7.3 running on linux 3.13.0 / x86_64). – Edessamalloc
/free
? Or must the entire backtrace be examined? It is my view that your tool will be fastest (and strategy, different) if you adopt the strategy that amasses on-the-fly the minimum amount of data required to reconstitute the events of interest. Currently I envision patchingmalloc
's first instruction as ajmp
to an injected page of code, accompanied with a large buffer for call records. – Aggressivemalloc()
,memalign()
,posix_memalign()
,free()
et al. as the way to go. Using ptrace to attach to the target process, and anonymously mapping writable pages, then copying position-independent executable code to that page, is not hard at all. The attaching process can use elf tools and/proc/PID/maps
to locate the target addresses. This should work for even static binaries (no libdl). Difficult part is to disassemble/duplicate the asm op(s) under the jump instruction -- unless it is a jump instruction itself, of course. – Apiarianptrace()
single-step all threads until they leave the prologues of all functions being injected (this should take no time at all), and then the process is patched. For the second, some primitive binary analysis and relocation will be required. – Aggressive%rip
, 13 bytes if an arbitrary 64-bitpushq %rax; movabs $constant, %rax ; jmp *%rax
sequence is needed. Instruction analysis (those 5-13 bytes) is nasty. I'd prefer to mmap complete replacement functions instead. Would that be an acceptable option? – Apiarianmalloc
to where it expected certain instructions, but there it will find either the unexpected, or no instruction at all, and crash. More broadly, how can I be sure that the program will never attempt to execute anything in the replaced prologue ever again? To solve this problem in full generality would require solving the Halting Problem, and is indeed very nasty in its full generality. – Aggressivemmap
-ing complete replacements, I must be sure that a thread has exited the replaced function and will not come back into it other than through the entry point. – Aggressive