I am not 100% i understand your requirements correctly, but i 'll give it a try.
There is an interesting function posted here by user Ciro Santilli, pagemap_get_entry
. It uses the /proc/[pid]/pagemap interface to get the page table entry (pte) that corresponds to the virtual address you give as input. From the pte, you get the pfn (physical frame number) where the virtual address is mapped. Having this function, we can use the following logic to find out if a virtual address is mapped to 4K, 2M or 1G physical page:
First, get the address of the 1G virtual page where the virtual address of interest belongs. Call pagemap_get_entry with that virtual address and if the returned pfn is 218-aligned, then assume we are on a 1G physical page (218 is used because we assume size of physical frame to be 4K=212 bytes and 218*212=230=1GiB).
Else, get the address of the 2M virtual page inside which the virtual address falls. Call pagemap_get_entry with that and if the returned pfn is 29-aligned, then assume we are inside a 2M physical page (again 29*212=221=2MiB).
Else, assume that virtual address is mapped in RAM with 4K physical page.
With code, i hope it would be something like that (part of linked post is reposted here for completeness):
#define _XOPEN_SOURCE 700
#include <fcntl.h> /* open */
#include <stdint.h> /* uint64_t */
#include <stdio.h> /* printf */
#include <stdlib.h> /* size_t, malloc */
#include <unistd.h> /* pread, sysconf, getpid */
#include <sys/types.h> /* getpid */
#include <string.h> /* memset */
typedef struct {
uint64_t pfn : 55;
unsigned int soft_dirty : 1;
unsigned int file_page : 1;
unsigned int swapped : 1;
unsigned int present : 1;
} PagemapEntry;
/* Parse the pagemap entry for the given virtual address.
*
* @param[out] entry the parsed entry
* @param[in] pagemap_fd file descriptor to an open /proc/pid/pagemap file
* @param[in] vaddr virtual address to get entry for
* @return 0 for success, 1 for failure
*/
int pagemap_get_entry(PagemapEntry *entry, int pagemap_fd, uintptr_t vaddr)
{
size_t nread;
ssize_t ret;
uint64_t data;
uintptr_t vpn;
vpn = vaddr / sysconf(_SC_PAGE_SIZE);
nread = 0;
while (nread < sizeof(data)) {
ret = pread(pagemap_fd, ((uint8_t*)&data) + nread, sizeof(data) - nread,
vpn * sizeof(data) + nread);
nread += ret;
if (ret <= 0) {
return 1;
}
}
entry->pfn = data & (((uint64_t)1 << 55) - 1);
entry->soft_dirty = (data >> 55) & 1;
entry->file_page = (data >> 61) & 1;
entry->swapped = (data >> 62) & 1;
entry->present = (data >> 63) & 1;
return 0;
}
int main()
{
unsigned long long PAGE_SIZE_1G = 1024*1024*1024;
unsigned long long PAGE_SIZE_2M = 2*1024*1024;
unsigned long long PAGE_SIZE_4K = 4*1024;
uint64_t pfn_1g, pfn_2m, pfn_4k, pfn_original;
char * arr = (char *)malloc(4*PAGE_SIZE_1G * sizeof(char));
if (arr == NULL) {
printf("malloc\n");
return 1;
}
memset(arr, 1, 4*PAGE_SIZE_1G);
uintptr_t vaddr = (uintptr_t)arr + 1024*1025*1026; // get a random virtual address
PagemapEntry entry;
uintptr_t vaddr_1g_aligned = vaddr & ~(PAGE_SIZE_1G - 1);
uintptr_t vaddr_2m_aligned = vaddr & ~(PAGE_SIZE_2M - 1);
uintptr_t vaddr_4k_aligned = vaddr & ~(PAGE_SIZE_4K - 1);
printf("Virtual address of interest %jx\n", (uintmax_t) vaddr);
printf("1G-aligned virtual address %jx\n", (uintmax_t) vaddr_1g_aligned);
printf("2M-aligned virtual address %jx\n", (uintmax_t) vaddr_2m_aligned);
printf("4K-aligned virtual address %jx\n", (uintmax_t) vaddr_4k_aligned);
char pagemap_file[BUFSIZ];
int pagemap_fd;
pid_t pid = getpid();
snprintf(pagemap_file, sizeof(pagemap_file), "/proc/%ju/pagemap", (uintmax_t)pid);
pagemap_fd = open(pagemap_file, O_RDONLY);
if (pagemap_fd < 0) {
return 1;
}
if (pagemap_get_entry(&entry, pagemap_fd, vaddr_1g_aligned)) {
printf("pagemap_get_entry\n");
return 1;
}
pfn_1g = entry.pfn;
if (pagemap_get_entry(&entry, pagemap_fd, vaddr_2m_aligned)) {
printf("pagemap_get_entry\n");
return 1;
}
pfn_2m = entry.pfn;
if (pagemap_get_entry(&entry, pagemap_fd, vaddr_4k_aligned)) {
printf("pagemap_get_entry\n");
return 1;
}
pfn_4k = entry.pfn;
if (pagemap_get_entry(&entry, pagemap_fd, vaddr)) {
printf("pagemap_get_entry\n");
return 1;
}
pfn_original = entry.pfn;
printf("pfn of 1G-alignment: %jx\n", (uintmax_t) pfn_1g);
printf("pfn of 2M-alignment: %jx\n", (uintmax_t) pfn_2m);
printf("pfn of 4K-alignment: %jx\n", (uintmax_t) pfn_4k);
printf("pfn of original address: %jx\n", (uintmax_t) pfn_original);
if ((pfn_1g != 0) && (pfn_1g % (1 << 18) == 0)) {
printf("Virtual address is mapped to 1G physical page\n");
}
else if ((pfn_2m != 0) && (pfn_2m % (1 << 9) == 0)) {
printf("Virtual address is mapped to 2M physical page\n");
}
else {
printf("Virtual address is mapped to 4K physical page\n");
}
return 0;
}
As original poster explains, you have to run this program with sudo, because of read access to /proc/<pid>/pagemap.
In my system that supports only 2M and 4K page sizes, i get the followings:
root@debian # cat /sys/kernel/mm/transparent_hugepages/enabled
always madvise [never]
root@debian # ./physical_page_size
Virtual address of interest 7f4f9d01a810
1G-aligned virtual address 7f4f80000000
2M-aligned virtual address 7f4f9d000000
4K-aligned virtual address 7f4f9d01a000
pfn of 1G-alignment: 1809fa
pfn of 2M-alignment: 1639fa
pfn of 4K-alignment: 163a14
pfn of original address: 163a14
Virtual address is mapped to 4K physical page
root@debian # echo "always" > /sys/kernel/mm/transparent_hugepages/enabled
root@debian # ./physical_page_size
Virtual address of interest 7f978d0d2810
1G-aligned virtual address 7f9780000000
2M-aligned virtual address 7f978d000000
4K-aligned virtual address 7f978d0d2000
pfn of 1G-alignment: 137a00
pfn of 2M-alignment: 145a00
pfn of 4K-alignment: 145ad2
pfn of original address: 145ad2
Virtual address is mapped to 2M physical page
Also, i have to mention that when the program reports 1G or 2M physical page size, it is not guaranteed that this is the case, however is very highly possible.
Finally, i see that your problem is with mbind. Again, i am not sure i understand it correctly or if this is a valid suggestion, but maybe you could try all possible page sizes starting from smallest until the call succeeds.
int wrapper(void *start, unsigned long size)
{
unsigned long long PAGE_SIZE_4K = 4*1024;
unsigned long long PAGE_SIZE_2M = 2*1024*1024;
unsigned long long PAGE_SIZE_1G = 1024*1024*1024;
void *start_4k = (void *)((unsigned long) start & ~(PAGE_SIZE_4K-1));
void *start_2m = (void *)((unsigned long) start & ~(PAGE_SIZE_2M-1));
void *start_1g = (void *)((unsigned long) start & ~(PAGE_SIZE_1G-1));
unsigned long size_4k, size_2m, size_1g;
if (size % PAGE_SIZE_4K != 0) {
size_4k = size - (size % PAGE_SIZE_4K) + PAGE_SIZE_4K;
}
if (size % PAGE_SIZE_2M != 0) {
size_2m = size - (size % PAGE_SIZE_2M) + PAGE_SIZE_2M;
}
if (size % PAGE_SIZE_1G != 0) {
size_1g = size - (size % PAGE_SIZE_1G) + PAGE_SIZE_1G;
}
if (mbind(start_4k, size_4k, .....) == 0) {
return 0;
}
if (mbind(start_2m, size_2m, .....) == 0) {
return 0;
}
if (mbind(start_1g, size_1g, .....) == 0) {
return 0;
}
return 1;
}
khugepaged
that chugs away in the background and converts consecutive chunks into 2M huge pages. Almost the only programs that request huge pages explicitely are VMs like qemu/kvm... So is it the explicit huge pages you are running into or the randomly created transparent ones? – Gallowsmadvise
for a specific memory region. – Gallows