I would like to use ld's --build-id option in order to add build information to my binary. However, I'm not sure how to make this information available inside the program. Assume I want to write a program that writes a backtrace every time an exception occurs, and a script that parses this information. The script reads the symbol table of the program and searches for the addresses printed in the backtrace (I'm forced to use such a script because the program is statically linked and backtrace_symbols is not working). In order for the script to work correctly I need to match build version of the program with the build version of the program which created the backtrace. How can I print the build version of the program (located in the .note.gnu.build-id elf section) from the program itself?
How can I print the build version of the program (located in the .note.gnu.build-id elf section) from the program itself?
You need to read the
ElfW(Ehdr)
(at the beginning of the file) to find program headers in your binary (.e_phoff
and.e_phnum
will tell you where program headers are, and how many of them to read).You then read program headers, until you find
PT_NOTE
segment of your program. That segment will tell you offset to the beginning of all the notes in your binary.You then need to read the
ElfW(Nhdr)
and skip the rest of the note (total size of the note issizeof(Nhdr) + .n_namesz + .n_descsz
, properly aligned), until you find a note with.n_type == NT_GNU_BUILD_ID
.Once you find
NT_GNU_BUILD_ID
note, skip past its.n_namesz
, and read the.n_descsz
bytes to read the actual build-id.
You can verify that you are reading the right data by comparing what you read with the output of readelf -n a.out
.
P.S.
If you are going to go through the trouble to decode build-id as above, and if your executable is not stripped, it may be better for you to just decode and print symbol names instead (i.e. to replicate what backtrace_symbols
does) -- it's actually easier to do than decoding ELF notes, because the symbol table contains fixed-sized entries.
Yes, a program can read its own .note.gnu.build-id
. The important piece is the dl_iterate_phdr
function.
I've used this technique in Mesa (the OpenGL/Vulkan implementation) to read its own build-id for use with the on-disk shader cache.
I've extracted those bits into a separate project[1] for easy use by others.
Basically, this is the code I've written based on answer given to my question. In order to compile the code I had to make some changes and I hope it will work for as many types of platforms as possible. However, it was tested only on one build machine. One of the assumptions I used was that the program was built on the machine which runs it so no point in checking endianness compatibility between the program and the machine.
user@:~/$ uname -s -r -m -o
Linux 3.2.0-45-generic x86_64 GNU/Linux
user@:~/$ g++ test.cpp -o test
user@:~/$ readelf -n test | grep Build
Build ID: dc5c4682e0282e2bd8bc2d3b61cfe35826aa34fc
user@:~/$ ./test
Build ID: dc5c4682e0282e2bd8bc2d3b61cfe35826aa34fc
#include <elf.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/mman.h>
#include <sys/stat.h>
#if __x86_64__
# define ElfW(type) Elf64_##type
#else
# define ElfW(type) Elf32_##type
#endif
/*
detecting build id of a program from its note section
https://mcmap.net/q/1178609/-can-a-program-read-its-own-elf-section
http://www.scs.stanford.edu/histar/src/pkg/uclibc/utils/readelf.c
http://www.sco.com/developers/gabi/2000-07-17/ch5.pheader.html#note_section
*/
int main (int argc, char* argv[])
{
char *thefilename = argv[0];
FILE *thefile;
struct stat statbuf;
ElfW(Ehdr) *ehdr = 0;
ElfW(Phdr) *phdr = 0;
ElfW(Nhdr) *nhdr = 0;
if (!(thefile = fopen(thefilename, "r"))) {
perror(thefilename);
exit(EXIT_FAILURE);
}
if (fstat(fileno(thefile), &statbuf) < 0) {
perror(thefilename);
exit(EXIT_FAILURE);
}
ehdr = (ElfW(Ehdr) *)mmap(0, statbuf.st_size,
PROT_READ|PROT_WRITE, MAP_PRIVATE, fileno(thefile), 0);
phdr = (ElfW(Phdr) *)(ehdr->e_phoff + (size_t)ehdr);
while (phdr->p_type != PT_NOTE)
{
++phdr;
}
nhdr = (ElfW(Nhdr) *)(phdr->p_offset + (size_t)ehdr);
while (nhdr->n_type != NT_GNU_BUILD_ID)
{
nhdr = (ElfW(Nhdr) *)((size_t)nhdr + sizeof(ElfW(Nhdr)) + nhdr->n_namesz + nhdr->n_descsz);
}
unsigned char * build_id = (unsigned char *)malloc(nhdr->n_descsz);
memcpy(build_id, (void *)((size_t)nhdr + sizeof(ElfW(Nhdr)) + nhdr->n_namesz), nhdr->n_descsz);
printf(" Build ID: ");
for (int i = 0 ; i < nhdr->n_descsz ; ++i)
{
printf("%02x",build_id[i]);
}
free(build_id);
printf("\n");
return 0;
}
I just wrote my own code for this ...
#include <fcntl.h> // open
#include <unistd.h> // close
#include <stdio.h> // printf
#include <string.h> // strncmp
#include <libelf.h> // elf_*
#include <gelf.h> // gelf_*
// Compile as: gcc -o read_build_id read_build_id.c -lelf
int main(int argc, char **argv)
{
if (argc < 2)
{
printf("Usage: %s <elf-file>\n", argv[0]);
return 1;
}
// Determine the working version (the ELF version supported by both, the libelf library and this program).
if (elf_version(EV_CURRENT) == EV_NONE)
{
printf("Warning: libelf is out of date. Can't read build-id of \"%s\" (or any object file).\n", argv[1]);
return 1;
}
// Open the ELF object file.
int fd = open(argv[1], O_RDONLY);
// Open an ELF descriptor for reading.
Elf* e = elf_begin(fd, ELF_C_READ, NULL);
if (!e)
{
printf("Warning: elf_begin returned NULL for \"%s\": %s\n", argv[1], elf_errmsg(-1));
return 1;
}
if (elf_kind(e) != ELF_K_ELF)
{
printf("%s: skipping, not an ELF file.\n", argv[1]);
return 0;
}
#if 0
// Get the string table section index.
size_t shstrndx;
if (elf_getshdrstrndx(e, &shstrndx) != 0)
{
printf("elf_getshdrstrndx() failed: %s\n", elf_errmsg(-1));
return 1;
}
#endif
// Run over all sections in the ELF file.
Elf_Scn* scn = NULL;
while ((scn = elf_nextscn(e, scn)) != NULL)
{
// Get the section header.
GElf_Shdr shdr;
gelf_getshdr(scn, &shdr);
if (shdr.sh_type == SHT_NOTE)
{
#if 0
// Get the name of the section
char const* name = elf_strptr(e, shstrndx, shdr.sh_name);
printf("Section: %s\n", name);
#endif
Elf_Data* data = elf_getdata(scn, NULL);
GElf_Nhdr nhdr;
size_t name_offset, desc_offset;
size_t offset = 0;
while ((offset = gelf_getnote(data, offset, &nhdr, &name_offset, &desc_offset)) > 0)
{
if (nhdr.n_type == NT_GNU_BUILD_ID && nhdr.n_namesz == 4 && strncmp((char*)data->d_buf + name_offset, "GNU", 4) == 0)
{
printf("Found build ID: ");
unsigned char *desc = (unsigned char *)data->d_buf + desc_offset;
for (int i = 0; i < nhdr.n_descsz; ++i)
printf("%02x", desc[i]);
printf("\n");
}
}
}
}
elf_end(e);
close(fd);
}
© 2022 - 2024 — McMap. All rights reserved.