[DTrace-devel] [PATCH] Implement the ppid builtin D variable
Kris Van Hees
kris.van.hees at oracle.com
Wed Sep 16 14:02:18 PDT 2020
Comments below...
On Wed, Sep 16, 2020 at 02:16:33PM -0400, eugene.loh at oracle.com wrote:
> From: Eugene Loh <eugene.loh at oracle.com>
>
> ---
> bpf/get_bvar.c | 11 ++++++
> libdtrace/dt_bpf.c | 49 +++++++++++++++++++++---
> libdtrace/dt_dctx.h | 4 ++
> test/unittest/variables/bvar/tst.ppid.d | 1 -
> test/unittest/variables/bvar/tst.ppid2.d | 21 ++++++++++
> 5 files changed, 80 insertions(+), 6 deletions(-)
> create mode 100644 test/unittest/variables/bvar/tst.ppid2.d
>
> diff --git a/bpf/get_bvar.c b/bpf/get_bvar.c
> index 9f091080..2f6a59cc 100644
> --- a/bpf/get_bvar.c
> +++ b/bpf/get_bvar.c
> @@ -46,6 +46,17 @@ noinline uint64_t dt_get_bvar(dt_mstate_t *mst, uint32_t id)
>
> return val & 0x00000000ffffffffUL;
> }
> + case DIF_VAR_PPID: {
> + uint64_t ptr = bpf_get_current_task();
> + int32_t val = -1;
> +
> + if (ptr == 0)
> + return -1;
> +
> + bpf_probe_read((void *) &ptr, 8, (const void *) (ptr + mst->offparent));
> + bpf_probe_read((void *) &val, 4, (const void *) (ptr + mst->offtgid));
We should be checking the return values of the bpf_probe_read() calls. It
returns 0 upon success, and a negative value (I think just -EFAULT) when it
fails.
I suggested storing the offsets in the 'state' map rather than the 'mem' map.
By placing it in mst (struct mstate in the 'mem' per-cpu map value) we end up
with a copy per CPU which wastes (some) unnecessary space. It is also a better
fit for the 'state' map (as suggested) than the mstate (which is the machine
state during the execution of a clause, populated in the prologue of each and
every clause).
The penalty of doing two bpf_map_lookup_elem() calls to get the offset values
isn't a significant overhead.
> + return (uint64_t) val;
> + }
> case DIF_VAR_UID: {
> uint64_t val = bpf_get_current_uid_gid();
>
> diff --git a/libdtrace/dt_bpf.c b/libdtrace/dt_bpf.c
> index 556acb22..3cc55213 100644
> --- a/libdtrace/dt_bpf.c
> +++ b/libdtrace/dt_bpf.c
> @@ -82,6 +82,28 @@ create_gmap(dtrace_hdl_t *dtp, const char *name, enum bpf_map_type type,
> return fd;
> }
>
> +static int
> +get_task_offsets(dtrace_hdl_t *dtp, int *offparent, int *offtgid)
> +{
> + ctf_id_t type;
> + ctf_membinfo_t ctm;
> +
> + type = ctf_lookup_by_name(dtp->dt_shared_ctf, "struct task_struct");
> + if (type == CTF_ERR)
> + return -1;
> +
> + if (ctf_member_info(dtp->dt_shared_ctf, type, "real_parent", &ctm)
> + == CTF_ERR)
> + return -1;
> + *offparent = ctm.ctm_offset / NBBY;
> +
> + if (ctf_member_info(dtp->dt_shared_ctf, type, "tgid", &ctm) == CTF_ERR)
> + return -1;
> + *offtgid = ctm.ctm_offset / NBBY;
> +
> + return 0;
> +}
Whenwe store these in the state map, this function can simply be passed the
state map fd and populate it directly.
> /*
> * Create the global BPF maps that are shared between all BPF programs in a
> * single tracing session:
> @@ -134,8 +156,13 @@ int
> dt_bpf_gmap_create(dtrace_hdl_t *dtp)
> {
> int gvarc, tvarc;
> - int ci_mapfd;
> + int ci_mapfd, ci_memfd;
Not that it matters in view of the comments above, but "ci_" stands for
cpu info, sp ci_memfd wouldn't really be the right name to use here. Anyway,
I do think we should store this in the 'state' map so this is not an issue
in that case.
> uint32_t key = 0;
> + size_t memsz = roundup(sizeof(dt_mstate_t), 8) + 8 +
> + roundup(dtp->dt_maxreclen, 8);
I rather keep it in the actual map creation call so it is obvious what the size
is as you see the call to create the map.
> + int icpu, ncpus = dtp->dt_conf.num_online_cpus;
> + int offparent = -1, offtgid = -1;
> + char *p;
Not needed when storing the data in the 'state' map.
> /* If we already created the global maps, return success. */
> if (dt_gmap_done)
> @@ -159,10 +186,9 @@ dt_bpf_gmap_create(dtrace_hdl_t *dtp)
> if (ci_mapfd == -1)
> return -1; /* dt_errno is set for us */
>
> - if (create_gmap(dtp, "mem", BPF_MAP_TYPE_PERCPU_ARRAY,
> - sizeof(uint32_t),
> - roundup(sizeof(dt_mstate_t), 8) + 8 +
> - roundup(dtp->dt_maxreclen, 8), 1) == -1)
Retained.
> + ci_memfd = create_gmap(dtp, "mem", BPF_MAP_TYPE_PERCPU_ARRAY,
> + sizeof(uint32_t), memsz, 1);
> + if (ci_memfd == -1)
Not needed.
> return -1; /* dt_errno is set for us */
>
> if (create_gmap(dtp, "strtab", BPF_MAP_TYPE_ARRAY,
> @@ -182,6 +208,19 @@ dt_bpf_gmap_create(dtrace_hdl_t *dtp)
> /* Populate the 'cpuinfo' map. */
> dt_bpf_map_update(ci_mapfd, &key, dtp->dt_conf.cpus);
>
> + /* Write some task_struct offsets into mstate. */
> + if (get_task_offsets(dtp, &offparent, &offtgid))
> + return -1; /* FIXME: set dt_errno */
> + p = alloca(memsz * ncpus);
> + memset(p, 0, memsz * ncpus);
> + for (icpu = 0; icpu < ncpus; icpu++) {
> + dt_mstate_t *q = (dt_mstate_t *) (p + icpu * memsz);
> + q->offparent = offparent;
> + q->offtgid = offtgid;
> + }
> + /* FIXME; is this how to use dt_bpf_map_update for a per-CPU array? */
> + dt_bpf_map_update(ci_memfd, &key, p);
> +
Yes indeed, but not needed when we store the offsets in the 'state' map.
> return 0;
> }
>
> diff --git a/libdtrace/dt_dctx.h b/libdtrace/dt_dctx.h
> index 79ef7105..b9b1c460 100644
> --- a/libdtrace/dt_dctx.h
> +++ b/libdtrace/dt_dctx.h
> @@ -18,6 +18,8 @@ typedef struct dt_mstate {
> uint32_t tag; /* Tag (for future use) */
> uint64_t fault; /* DTrace fault flags */
> uint64_t tstamp; /* cached timestamp value */
> + uint32_t offparent; /* offset in task_struct to real_parent */
> + uint32_t offtgid; /* offset in task_struct to tgid */
Not needed.
> #if 0
> dt_pt_regs regs; /* CPU registers */
> #endif
> @@ -48,6 +50,8 @@ typedef struct dt_dctx {
> #define DMST_TAG offsetof(dt_mstate_t, tag)
> #define DMST_FAULT offsetof(dt_mstate_t, fault)
> #define DMST_TSTAMP offsetof(dt_mstate_t, tstamp)
> +#define DMST_OFFPARENT offsetof(dt_mstate_t, offparent)
> +#define DMST_OFFTGID offsetof(dt_mstate_t, offtgid)
Not needed - instead add new entries to the 'state' map in dt_state.h.
> #define DMST_REGS offsetof(dt_mstate_t, regs)
> #define DMST_ARG(n) offsetof(dt_mstate_t, argv[n])
>
> diff --git a/test/unittest/variables/bvar/tst.ppid.d b/test/unittest/variables/bvar/tst.ppid.d
> index e406b36f..87bc52da 100644
> --- a/test/unittest/variables/bvar/tst.ppid.d
> +++ b/test/unittest/variables/bvar/tst.ppid.d
> @@ -4,7 +4,6 @@
> * Licensed under the Universal Permissive License v 1.0 as shown at
> * http://oss.oracle.com/licenses/upl.
> */
> -/* @@xfail: dtv2 */
>
> /*
> * ASSERTION: The 'ppid' variable can be accessed and is not -1.
> diff --git a/test/unittest/variables/bvar/tst.ppid2.d b/test/unittest/variables/bvar/tst.ppid2.d
> new file mode 100644
> index 00000000..be95d033
> --- /dev/null
> +++ b/test/unittest/variables/bvar/tst.ppid2.d
> @@ -0,0 +1,21 @@
> +/*
> + * Oracle Linux DTrace.
> + * Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved.
> + * Licensed under the Universal Permissive License v 1.0 as shown at
> + * http://oss.oracle.com/licenses/upl.
> + */
> +
> +/*
> + * ASSERTION: The 'ppid' variable matches the scripting variable for BEGIN.
> + *
> + * SECTION: Variables/Built-in Variables/ppid
> + */
> +
> +#pragma D option quiet
> +
> +BEGIN {
> + trace(ppid);
> + trace($ppid);
> + exit(ppid == $ppid ? 0 : 1);
> +}
> +
> --
> 2.18.4
>
>
> _______________________________________________
> DTrace-devel mailing list
> DTrace-devel at oss.oracle.com
> https://oss.oracle.com/mailman/listinfo/dtrace-devel
More information about the DTrace-devel
mailing list