The idea here is to use strace -p <your pid>
to observe the running pid
. An extension to gawk
is created to call lseek
from an awk script. lseek
reports the file descriptor offset as you have requested.
You will need to modify the awk script to call the extension.
Without modifying the awk script, strace -p <your pid>
will report lots of read
calls and that usually indicates progress thru the input file.
Below is a Makefile to create the lseek
extension and run a quick test.
CC = gcc
CFLAGS = -Wall -fPIC -c -O2 -DHAVE_STRING_H -DHAVE_SNPRINTF -DHAVE_STDARG_H -DHAVE_VPRINTF -DDYNAMIC
LDFLAGS = -shared
all: awklseek.so
gawk -l ./awklseek.so 'BEGIN{ print lseek() } { print $$0 ; lseek() ; } ' data
awklseek.so: lseek.o
${CC} $^ -o $@ ${LDFLAGS}
c.o:
${CC} ${CFLAGS} $<
.PHONY:clean
clean:
rm awklseek.so lseek.o
Here is the extension:
/*
* lseek.c - Provide an interface to lseek(2) routine
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <sys/stat.h>
#include <sys/types.h>
#include "gawkapi.h"
static const gawk_api_t *api;
static awk_ext_id_t ext_id;
static const char *ext_version = "lseek extension: version 1.0";
static awk_bool_t init_lseek(void);
static
awk_bool_t(*init_func) (void)= init_lseek;
int plugin_is_GPL_compatible;
/* do_lseek --- implement the lseek interface */
static awk_value_t *
do_lseek (int nargs, awk_value_t * result, struct awk_ext_func *unused)
{
awk_bool_t api_result;
const awk_input_buf_t *ibuf;
const awk_output_buf_t *obuf;
//GET FILE DESCRIPTOR FOR CURRENT INPUT FILE
api_result = api->api_get_file(ext_id, NULL, 0, "<", 0, &ibuf, &obuf);
if (api_result == awk_false) {
make_number(-1.0, result);
} else {
//GET OFFSET OF CURRENT FILE ACCORDING TO OS
off_t position = lseek(ibuf->fd, 0, SEEK_CUR);
make_number(position, result);
}
return result;
}
/* init_lseek */
static awk_bool_t
init_lseek(void)
{
int errors = 0;
return errors == 0;
}
static awk_ext_func_t func_table[] = {
{"lseek", do_lseek, 0, 0, awk_false, NULL},
};
/* define the dl_load function using the boilerplate macro */
dl_load_func(func_table, lseek, "")