[DTrace-devel] [PATCH] Implement the ppid builtin D variable

Eugene Loh eugene.loh at oracle.com
Wed Sep 16 14:42:37 PDT 2020


On 09/16/2020 02:02 PM, Kris Van Hees wrote:

> Comments below...
>
> On Wed, Sep 16, 2020 at 02:16:33PM -0400, eugene.loh at oracle.com wrote:
>> From: Eugene Loh <eugene.loh at oracle.com>
>>
>> ---
>>   bpf/get_bvar.c                           | 11 ++++++
>>   libdtrace/dt_bpf.c                       | 49 +++++++++++++++++++++---
>>   libdtrace/dt_dctx.h                      |  4 ++
>>   test/unittest/variables/bvar/tst.ppid.d  |  1 -
>>   test/unittest/variables/bvar/tst.ppid2.d | 21 ++++++++++
>>   5 files changed, 80 insertions(+), 6 deletions(-)
>>   create mode 100644 test/unittest/variables/bvar/tst.ppid2.d
>>
>> diff --git a/bpf/get_bvar.c b/bpf/get_bvar.c
>> index 9f091080..2f6a59cc 100644
>> --- a/bpf/get_bvar.c
>> +++ b/bpf/get_bvar.c
>> @@ -46,6 +46,17 @@ noinline uint64_t dt_get_bvar(dt_mstate_t *mst, uint32_t id)
>>   
>>   		return val & 0x00000000ffffffffUL;
>>   	}
>> +	case DIF_VAR_PPID: {
>> +		uint64_t	ptr = bpf_get_current_task();
>> +		int32_t		val = -1;
>> +
>> +		if (ptr == 0)
>> +			return -1;
>> +
>> +		bpf_probe_read((void *) &ptr, 8, (const void *) (ptr + mst->offparent));
>> +		bpf_probe_read((void *) &val, 4, (const void *) (ptr + mst->offtgid));
> We should be checking the return values of the bpf_probe_read() calls.  It
> returns 0 upon success, and a negative value (I think just -EFAULT) when it
> fails.
>
> I suggested storing the offsets in the 'state' map rather than the 'mem' map.
> By placing it in mst (struct mstate in the 'mem' per-cpu map value) we end up
> with a copy per CPU which wastes (some) unnecessary space.  It is also a better
> fit for the 'state' map (as suggested) than the mstate (which is the machine
> state during the execution of a clause, populated in the prologue of each and
> every clause).
>
> The penalty of doing two bpf_map_lookup_elem() calls to get the offset values
> isn't a significant overhead.
>
>> +		return (uint64_t) val;
>> +	}
>>   	case DIF_VAR_UID: {
>>   		uint64_t	val = bpf_get_current_uid_gid();
>>   
>> diff --git a/libdtrace/dt_bpf.c b/libdtrace/dt_bpf.c
>> index 556acb22..3cc55213 100644
>> --- a/libdtrace/dt_bpf.c
>> +++ b/libdtrace/dt_bpf.c
>> @@ -82,6 +82,28 @@ create_gmap(dtrace_hdl_t *dtp, const char *name, enum bpf_map_type type,
>>   	return fd;
>>   }
>>   
>> +static int
>> +get_task_offsets(dtrace_hdl_t *dtp, int *offparent, int *offtgid)
>> +{
>> +	ctf_id_t type;
>> +	ctf_membinfo_t ctm;
>> +
>> +	type = ctf_lookup_by_name(dtp->dt_shared_ctf, "struct task_struct");
>> +	if (type == CTF_ERR)
>> +		return -1;
>> +
>> +	if (ctf_member_info(dtp->dt_shared_ctf, type, "real_parent", &ctm)
>> +	    == CTF_ERR)
>> +		return -1;
>> +	*offparent = ctm.ctm_offset / NBBY;
>> +
>> +	if (ctf_member_info(dtp->dt_shared_ctf, type, "tgid", &ctm) == CTF_ERR)
>> +		return -1;
>> +	*offtgid = ctm.ctm_offset / NBBY;
>> +
>> +	return 0;
>> +}
> Whenwe store these in the state map, this function can simply be passed the
> state map fd and populate it directly.
>
>>   /*
>>    * Create the global BPF maps that are shared between all BPF programs in a
>>    * single tracing session:
>> @@ -134,8 +156,13 @@ int
>>   dt_bpf_gmap_create(dtrace_hdl_t *dtp)
>>   {
>>   	int		gvarc, tvarc;
>> -	int		ci_mapfd;
>> +	int		ci_mapfd, ci_memfd;
> Not that it matters in view of the comments above, but "ci_" stands for
> cpu info, sp ci_memfd wouldn't really be the right name to use here.  Anyway,
> I do think we should store this in the 'state' map so this is not an issue
> in that case.
>
>>   	uint32_t	key = 0;
>> +	size_t		memsz = roundup(sizeof(dt_mstate_t), 8) + 8 +
>> +				roundup(dtp->dt_maxreclen, 8);
> I rather keep it in the actual map creation call so it is obvious what the size
> is as you see the call to create the map.
>
>> +	int		icpu, ncpus = dtp->dt_conf.num_online_cpus;
>> +	int 		offparent = -1, offtgid = -1;
>> +	char		*p;
> Not needed when storing the data in the 'state' map.
>
>>   	/* If we already created the global maps, return success. */
>>   	if (dt_gmap_done)
>> @@ -159,10 +186,9 @@ dt_bpf_gmap_create(dtrace_hdl_t *dtp)
>>   	if (ci_mapfd == -1)
>>   		return -1;	/* dt_errno is set for us */
>>   
>> -	if (create_gmap(dtp, "mem", BPF_MAP_TYPE_PERCPU_ARRAY,
>> -			sizeof(uint32_t),
>> -			roundup(sizeof(dt_mstate_t), 8) + 8 +
>> -				roundup(dtp->dt_maxreclen, 8), 1) == -1)
> Retained.
>
>> +	ci_memfd = create_gmap(dtp, "mem", BPF_MAP_TYPE_PERCPU_ARRAY,
>> +			sizeof(uint32_t), memsz, 1);
>> +	if (ci_memfd == -1)
> Not needed.
>
>>   		return -1;	/* dt_errno is set for us */
>>   
>>   	if (create_gmap(dtp, "strtab", BPF_MAP_TYPE_ARRAY,
>> @@ -182,6 +208,19 @@ dt_bpf_gmap_create(dtrace_hdl_t *dtp)
>>   	/* Populate the 'cpuinfo' map. */
>>   	dt_bpf_map_update(ci_mapfd, &key, dtp->dt_conf.cpus);
>>   
>> +	/* Write some task_struct offsets into mstate. */
>> +	if (get_task_offsets(dtp, &offparent, &offtgid))
>> +		return -1;   /* FIXME: set dt_errno */
>> +	p = alloca(memsz * ncpus);
>> +	memset(p, 0, memsz * ncpus);
>> +	for (icpu = 0; icpu < ncpus; icpu++) {
>> +		dt_mstate_t *q = (dt_mstate_t *) (p + icpu * memsz);
>> +		q->offparent = offparent;
>> +		q->offtgid = offtgid;
>> +	}
>> +	/* FIXME; is this how to use dt_bpf_map_update for a per-CPU array? */
>> +	dt_bpf_map_update(ci_memfd, &key, p);
>> +
> Yes indeed, but not needed when we store the offsets in the 'state' map.
>
>>   	return 0;
>>   }
>>   
>> diff --git a/libdtrace/dt_dctx.h b/libdtrace/dt_dctx.h
>> index 79ef7105..b9b1c460 100644
>> --- a/libdtrace/dt_dctx.h
>> +++ b/libdtrace/dt_dctx.h
>> @@ -18,6 +18,8 @@ typedef struct dt_mstate {
>>   	uint32_t	tag;		/* Tag (for future use) */
>>   	uint64_t	fault;		/* DTrace fault flags */
>>   	uint64_t	tstamp;		/* cached timestamp value */
>> +	uint32_t	offparent;	/* offset in task_struct to real_parent */
>> +	uint32_t	offtgid;	/* offset in task_struct to tgid */
> Not needed.
>
>>   #if 0
>>   	dt_pt_regs	regs;		/* CPU registers */
>>   #endif
>> @@ -48,6 +50,8 @@ typedef struct dt_dctx {
>>   #define DMST_TAG	offsetof(dt_mstate_t, tag)
>>   #define DMST_FAULT	offsetof(dt_mstate_t, fault)
>>   #define DMST_TSTAMP	offsetof(dt_mstate_t, tstamp)
>> +#define DMST_OFFPARENT	offsetof(dt_mstate_t, offparent)
>> +#define DMST_OFFTGID	offsetof(dt_mstate_t, offtgid)
> Not needed - instead add new entries to the 'state' map in dt_state.h.
>
>>   #define DMST_REGS	offsetof(dt_mstate_t, regs)
>>   #define DMST_ARG(n)	offsetof(dt_mstate_t, argv[n])
>>   
>> diff --git a/test/unittest/variables/bvar/tst.ppid.d b/test/unittest/variables/bvar/tst.ppid.d
>> index e406b36f..87bc52da 100644
>> --- a/test/unittest/variables/bvar/tst.ppid.d
>> +++ b/test/unittest/variables/bvar/tst.ppid.d
>> @@ -4,7 +4,6 @@
>>    * Licensed under the Universal Permissive License v 1.0 as shown at
>>    * http://oss.oracle.com/licenses/upl.
>>    */
>> -/* @@xfail: dtv2 */
>>   
>>   /*
>>    * ASSERTION: The 'ppid' variable can be accessed and is not -1.
>> diff --git a/test/unittest/variables/bvar/tst.ppid2.d b/test/unittest/variables/bvar/tst.ppid2.d
>> new file mode 100644
>> index 00000000..be95d033
>> --- /dev/null
>> +++ b/test/unittest/variables/bvar/tst.ppid2.d
>> @@ -0,0 +1,21 @@
>> +/*
>> + * Oracle Linux DTrace.
>> + * Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved.
>> + * Licensed under the Universal Permissive License v 1.0 as shown at
>> + * http://oss.oracle.com/licenses/upl.
>> + */
>> +
>> +/*
>> + * ASSERTION: The 'ppid' variable matches the scripting variable for BEGIN.
>> + *
>> + * SECTION: Variables/Built-in Variables/ppid
>> + */
>> +
>> +#pragma D option quiet
>> +
>> +BEGIN {
>> +	trace(ppid);
>> +	trace($ppid);
>> +	exit(ppid == $ppid ? 0 : 1);
>> +}
>> +
>> -- 
>> 2.18.4
>>
>>
>> _______________________________________________
>> DTrace-devel mailing list
>> DTrace-devel at oss.oracle.com
>> https://oss.oracle.com/mailman/listinfo/dtrace-devel




More information about the DTrace-devel mailing list