[DTrace-devel] [PATCH] Implement the ppid builtin D variable

Kris Van Hees kris.van.hees at oracle.com
Wed Sep 16 14:02:18 PDT 2020


Comments below...

On Wed, Sep 16, 2020 at 02:16:33PM -0400, eugene.loh at oracle.com wrote:
> From: Eugene Loh <eugene.loh at oracle.com>
> 
> ---
>  bpf/get_bvar.c                           | 11 ++++++
>  libdtrace/dt_bpf.c                       | 49 +++++++++++++++++++++---
>  libdtrace/dt_dctx.h                      |  4 ++
>  test/unittest/variables/bvar/tst.ppid.d  |  1 -
>  test/unittest/variables/bvar/tst.ppid2.d | 21 ++++++++++
>  5 files changed, 80 insertions(+), 6 deletions(-)
>  create mode 100644 test/unittest/variables/bvar/tst.ppid2.d
> 
> diff --git a/bpf/get_bvar.c b/bpf/get_bvar.c
> index 9f091080..2f6a59cc 100644
> --- a/bpf/get_bvar.c
> +++ b/bpf/get_bvar.c
> @@ -46,6 +46,17 @@ noinline uint64_t dt_get_bvar(dt_mstate_t *mst, uint32_t id)
>  
>  		return val & 0x00000000ffffffffUL;
>  	}
> +	case DIF_VAR_PPID: {
> +		uint64_t	ptr = bpf_get_current_task();
> +		int32_t		val = -1;
> +
> +		if (ptr == 0)
> +			return -1;
> +
> +		bpf_probe_read((void *) &ptr, 8, (const void *) (ptr + mst->offparent));
> +		bpf_probe_read((void *) &val, 4, (const void *) (ptr + mst->offtgid));

We should be checking the return values of the bpf_probe_read() calls.  It
returns 0 upon success, and a negative value (I think just -EFAULT) when it
fails.

I suggested storing the offsets in the 'state' map rather than the 'mem' map.
By placing it in mst (struct mstate in the 'mem' per-cpu map value) we end up
with a copy per CPU which wastes (some) unnecessary space.  It is also a better
fit for the 'state' map (as suggested) than the mstate (which is the machine
state during the execution of a clause, populated in the prologue of each and
every clause).

The penalty of doing two bpf_map_lookup_elem() calls to get the offset values
isn't a significant overhead.

> +		return (uint64_t) val;
> +	}
>  	case DIF_VAR_UID: {
>  		uint64_t	val = bpf_get_current_uid_gid();
>  
> diff --git a/libdtrace/dt_bpf.c b/libdtrace/dt_bpf.c
> index 556acb22..3cc55213 100644
> --- a/libdtrace/dt_bpf.c
> +++ b/libdtrace/dt_bpf.c
> @@ -82,6 +82,28 @@ create_gmap(dtrace_hdl_t *dtp, const char *name, enum bpf_map_type type,
>  	return fd;
>  }
>  
> +static int
> +get_task_offsets(dtrace_hdl_t *dtp, int *offparent, int *offtgid)
> +{
> +	ctf_id_t type;
> +	ctf_membinfo_t ctm;
> +
> +	type = ctf_lookup_by_name(dtp->dt_shared_ctf, "struct task_struct");
> +	if (type == CTF_ERR)
> +		return -1;
> +
> +	if (ctf_member_info(dtp->dt_shared_ctf, type, "real_parent", &ctm)
> +	    == CTF_ERR)
> +		return -1;
> +	*offparent = ctm.ctm_offset / NBBY;
> +
> +	if (ctf_member_info(dtp->dt_shared_ctf, type, "tgid", &ctm) == CTF_ERR)
> +		return -1;
> +	*offtgid = ctm.ctm_offset / NBBY;
> +
> +	return 0;
> +}

Whenwe store these in the state map, this function can simply be passed the
state map fd and populate it directly.

>  /*
>   * Create the global BPF maps that are shared between all BPF programs in a
>   * single tracing session:
> @@ -134,8 +156,13 @@ int
>  dt_bpf_gmap_create(dtrace_hdl_t *dtp)
>  {
>  	int		gvarc, tvarc;
> -	int		ci_mapfd;
> +	int		ci_mapfd, ci_memfd;

Not that it matters in view of the comments above, but "ci_" stands for
cpu info, sp ci_memfd wouldn't really be the right name to use here.  Anyway,
I do think we should store this in the 'state' map so this is not an issue
in that case.

>  	uint32_t	key = 0;
> +	size_t		memsz = roundup(sizeof(dt_mstate_t), 8) + 8 +
> +				roundup(dtp->dt_maxreclen, 8);

I rather keep it in the actual map creation call so it is obvious what the size
is as you see the call to create the map.

> +	int		icpu, ncpus = dtp->dt_conf.num_online_cpus;
> +	int 		offparent = -1, offtgid = -1;
> +	char		*p;

Not needed when storing the data in the 'state' map.

>  	/* If we already created the global maps, return success. */
>  	if (dt_gmap_done)
> @@ -159,10 +186,9 @@ dt_bpf_gmap_create(dtrace_hdl_t *dtp)
>  	if (ci_mapfd == -1)
>  		return -1;	/* dt_errno is set for us */
>  
> -	if (create_gmap(dtp, "mem", BPF_MAP_TYPE_PERCPU_ARRAY,
> -			sizeof(uint32_t),
> -			roundup(sizeof(dt_mstate_t), 8) + 8 +
> -				roundup(dtp->dt_maxreclen, 8), 1) == -1)

Retained.

> +	ci_memfd = create_gmap(dtp, "mem", BPF_MAP_TYPE_PERCPU_ARRAY,
> +			sizeof(uint32_t), memsz, 1);
> +	if (ci_memfd == -1)

Not needed.

>  		return -1;	/* dt_errno is set for us */
>  
>  	if (create_gmap(dtp, "strtab", BPF_MAP_TYPE_ARRAY,
> @@ -182,6 +208,19 @@ dt_bpf_gmap_create(dtrace_hdl_t *dtp)
>  	/* Populate the 'cpuinfo' map. */
>  	dt_bpf_map_update(ci_mapfd, &key, dtp->dt_conf.cpus);
>  
> +	/* Write some task_struct offsets into mstate. */
> +	if (get_task_offsets(dtp, &offparent, &offtgid))
> +		return -1;   /* FIXME: set dt_errno */
> +	p = alloca(memsz * ncpus);
> +	memset(p, 0, memsz * ncpus);
> +	for (icpu = 0; icpu < ncpus; icpu++) {
> +		dt_mstate_t *q = (dt_mstate_t *) (p + icpu * memsz);
> +		q->offparent = offparent;
> +		q->offtgid = offtgid;
> +	}
> +	/* FIXME; is this how to use dt_bpf_map_update for a per-CPU array? */
> +	dt_bpf_map_update(ci_memfd, &key, p);
> +

Yes indeed, but not needed when we store the offsets in the 'state' map.

>  	return 0;
>  }
>  
> diff --git a/libdtrace/dt_dctx.h b/libdtrace/dt_dctx.h
> index 79ef7105..b9b1c460 100644
> --- a/libdtrace/dt_dctx.h
> +++ b/libdtrace/dt_dctx.h
> @@ -18,6 +18,8 @@ typedef struct dt_mstate {
>  	uint32_t	tag;		/* Tag (for future use) */
>  	uint64_t	fault;		/* DTrace fault flags */
>  	uint64_t	tstamp;		/* cached timestamp value */
> +	uint32_t	offparent;	/* offset in task_struct to real_parent */
> +	uint32_t	offtgid;	/* offset in task_struct to tgid */

Not needed.

>  #if 0
>  	dt_pt_regs	regs;		/* CPU registers */
>  #endif
> @@ -48,6 +50,8 @@ typedef struct dt_dctx {
>  #define DMST_TAG	offsetof(dt_mstate_t, tag)
>  #define DMST_FAULT	offsetof(dt_mstate_t, fault)
>  #define DMST_TSTAMP	offsetof(dt_mstate_t, tstamp)
> +#define DMST_OFFPARENT	offsetof(dt_mstate_t, offparent)
> +#define DMST_OFFTGID	offsetof(dt_mstate_t, offtgid)

Not needed - instead add new entries to the 'state' map in dt_state.h.

>  #define DMST_REGS	offsetof(dt_mstate_t, regs)
>  #define DMST_ARG(n)	offsetof(dt_mstate_t, argv[n])
>  
> diff --git a/test/unittest/variables/bvar/tst.ppid.d b/test/unittest/variables/bvar/tst.ppid.d
> index e406b36f..87bc52da 100644
> --- a/test/unittest/variables/bvar/tst.ppid.d
> +++ b/test/unittest/variables/bvar/tst.ppid.d
> @@ -4,7 +4,6 @@
>   * Licensed under the Universal Permissive License v 1.0 as shown at
>   * http://oss.oracle.com/licenses/upl.
>   */
> -/* @@xfail: dtv2 */
>  
>  /*
>   * ASSERTION: The 'ppid' variable can be accessed and is not -1.
> diff --git a/test/unittest/variables/bvar/tst.ppid2.d b/test/unittest/variables/bvar/tst.ppid2.d
> new file mode 100644
> index 00000000..be95d033
> --- /dev/null
> +++ b/test/unittest/variables/bvar/tst.ppid2.d
> @@ -0,0 +1,21 @@
> +/*
> + * Oracle Linux DTrace.
> + * Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved.
> + * Licensed under the Universal Permissive License v 1.0 as shown at
> + * http://oss.oracle.com/licenses/upl.
> + */
> +
> +/*
> + * ASSERTION: The 'ppid' variable matches the scripting variable for BEGIN.
> + *
> + * SECTION: Variables/Built-in Variables/ppid
> + */
> +
> +#pragma D option quiet
> +
> +BEGIN {
> +	trace(ppid);
> +	trace($ppid);
> +	exit(ppid == $ppid ? 0 : 1);
> +}
> +
> -- 
> 2.18.4
> 
> 
> _______________________________________________
> DTrace-devel mailing list
> DTrace-devel at oss.oracle.com
> https://oss.oracle.com/mailman/listinfo/dtrace-devel



More information about the DTrace-devel mailing list