[DTrace-devel] [PATCH] Implement the ppid builtin D variable
Eugene Loh
eugene.loh at oracle.com
Wed Sep 16 14:42:37 PDT 2020
On 09/16/2020 02:02 PM, Kris Van Hees wrote:
> Comments below...
>
> On Wed, Sep 16, 2020 at 02:16:33PM -0400, eugene.loh at oracle.com wrote:
>> From: Eugene Loh <eugene.loh at oracle.com>
>>
>> ---
>> bpf/get_bvar.c | 11 ++++++
>> libdtrace/dt_bpf.c | 49 +++++++++++++++++++++---
>> libdtrace/dt_dctx.h | 4 ++
>> test/unittest/variables/bvar/tst.ppid.d | 1 -
>> test/unittest/variables/bvar/tst.ppid2.d | 21 ++++++++++
>> 5 files changed, 80 insertions(+), 6 deletions(-)
>> create mode 100644 test/unittest/variables/bvar/tst.ppid2.d
>>
>> diff --git a/bpf/get_bvar.c b/bpf/get_bvar.c
>> index 9f091080..2f6a59cc 100644
>> --- a/bpf/get_bvar.c
>> +++ b/bpf/get_bvar.c
>> @@ -46,6 +46,17 @@ noinline uint64_t dt_get_bvar(dt_mstate_t *mst, uint32_t id)
>>
>> return val & 0x00000000ffffffffUL;
>> }
>> + case DIF_VAR_PPID: {
>> + uint64_t ptr = bpf_get_current_task();
>> + int32_t val = -1;
>> +
>> + if (ptr == 0)
>> + return -1;
>> +
>> + bpf_probe_read((void *) &ptr, 8, (const void *) (ptr + mst->offparent));
>> + bpf_probe_read((void *) &val, 4, (const void *) (ptr + mst->offtgid));
> We should be checking the return values of the bpf_probe_read() calls. It
> returns 0 upon success, and a negative value (I think just -EFAULT) when it
> fails.
>
> I suggested storing the offsets in the 'state' map rather than the 'mem' map.
> By placing it in mst (struct mstate in the 'mem' per-cpu map value) we end up
> with a copy per CPU which wastes (some) unnecessary space. It is also a better
> fit for the 'state' map (as suggested) than the mstate (which is the machine
> state during the execution of a clause, populated in the prologue of each and
> every clause).
>
> The penalty of doing two bpf_map_lookup_elem() calls to get the offset values
> isn't a significant overhead.
>
>> + return (uint64_t) val;
>> + }
>> case DIF_VAR_UID: {
>> uint64_t val = bpf_get_current_uid_gid();
>>
>> diff --git a/libdtrace/dt_bpf.c b/libdtrace/dt_bpf.c
>> index 556acb22..3cc55213 100644
>> --- a/libdtrace/dt_bpf.c
>> +++ b/libdtrace/dt_bpf.c
>> @@ -82,6 +82,28 @@ create_gmap(dtrace_hdl_t *dtp, const char *name, enum bpf_map_type type,
>> return fd;
>> }
>>
>> +static int
>> +get_task_offsets(dtrace_hdl_t *dtp, int *offparent, int *offtgid)
>> +{
>> + ctf_id_t type;
>> + ctf_membinfo_t ctm;
>> +
>> + type = ctf_lookup_by_name(dtp->dt_shared_ctf, "struct task_struct");
>> + if (type == CTF_ERR)
>> + return -1;
>> +
>> + if (ctf_member_info(dtp->dt_shared_ctf, type, "real_parent", &ctm)
>> + == CTF_ERR)
>> + return -1;
>> + *offparent = ctm.ctm_offset / NBBY;
>> +
>> + if (ctf_member_info(dtp->dt_shared_ctf, type, "tgid", &ctm) == CTF_ERR)
>> + return -1;
>> + *offtgid = ctm.ctm_offset / NBBY;
>> +
>> + return 0;
>> +}
> Whenwe store these in the state map, this function can simply be passed the
> state map fd and populate it directly.
>
>> /*
>> * Create the global BPF maps that are shared between all BPF programs in a
>> * single tracing session:
>> @@ -134,8 +156,13 @@ int
>> dt_bpf_gmap_create(dtrace_hdl_t *dtp)
>> {
>> int gvarc, tvarc;
>> - int ci_mapfd;
>> + int ci_mapfd, ci_memfd;
> Not that it matters in view of the comments above, but "ci_" stands for
> cpu info, sp ci_memfd wouldn't really be the right name to use here. Anyway,
> I do think we should store this in the 'state' map so this is not an issue
> in that case.
>
>> uint32_t key = 0;
>> + size_t memsz = roundup(sizeof(dt_mstate_t), 8) + 8 +
>> + roundup(dtp->dt_maxreclen, 8);
> I rather keep it in the actual map creation call so it is obvious what the size
> is as you see the call to create the map.
>
>> + int icpu, ncpus = dtp->dt_conf.num_online_cpus;
>> + int offparent = -1, offtgid = -1;
>> + char *p;
> Not needed when storing the data in the 'state' map.
>
>> /* If we already created the global maps, return success. */
>> if (dt_gmap_done)
>> @@ -159,10 +186,9 @@ dt_bpf_gmap_create(dtrace_hdl_t *dtp)
>> if (ci_mapfd == -1)
>> return -1; /* dt_errno is set for us */
>>
>> - if (create_gmap(dtp, "mem", BPF_MAP_TYPE_PERCPU_ARRAY,
>> - sizeof(uint32_t),
>> - roundup(sizeof(dt_mstate_t), 8) + 8 +
>> - roundup(dtp->dt_maxreclen, 8), 1) == -1)
> Retained.
>
>> + ci_memfd = create_gmap(dtp, "mem", BPF_MAP_TYPE_PERCPU_ARRAY,
>> + sizeof(uint32_t), memsz, 1);
>> + if (ci_memfd == -1)
> Not needed.
>
>> return -1; /* dt_errno is set for us */
>>
>> if (create_gmap(dtp, "strtab", BPF_MAP_TYPE_ARRAY,
>> @@ -182,6 +208,19 @@ dt_bpf_gmap_create(dtrace_hdl_t *dtp)
>> /* Populate the 'cpuinfo' map. */
>> dt_bpf_map_update(ci_mapfd, &key, dtp->dt_conf.cpus);
>>
>> + /* Write some task_struct offsets into mstate. */
>> + if (get_task_offsets(dtp, &offparent, &offtgid))
>> + return -1; /* FIXME: set dt_errno */
>> + p = alloca(memsz * ncpus);
>> + memset(p, 0, memsz * ncpus);
>> + for (icpu = 0; icpu < ncpus; icpu++) {
>> + dt_mstate_t *q = (dt_mstate_t *) (p + icpu * memsz);
>> + q->offparent = offparent;
>> + q->offtgid = offtgid;
>> + }
>> + /* FIXME; is this how to use dt_bpf_map_update for a per-CPU array? */
>> + dt_bpf_map_update(ci_memfd, &key, p);
>> +
> Yes indeed, but not needed when we store the offsets in the 'state' map.
>
>> return 0;
>> }
>>
>> diff --git a/libdtrace/dt_dctx.h b/libdtrace/dt_dctx.h
>> index 79ef7105..b9b1c460 100644
>> --- a/libdtrace/dt_dctx.h
>> +++ b/libdtrace/dt_dctx.h
>> @@ -18,6 +18,8 @@ typedef struct dt_mstate {
>> uint32_t tag; /* Tag (for future use) */
>> uint64_t fault; /* DTrace fault flags */
>> uint64_t tstamp; /* cached timestamp value */
>> + uint32_t offparent; /* offset in task_struct to real_parent */
>> + uint32_t offtgid; /* offset in task_struct to tgid */
> Not needed.
>
>> #if 0
>> dt_pt_regs regs; /* CPU registers */
>> #endif
>> @@ -48,6 +50,8 @@ typedef struct dt_dctx {
>> #define DMST_TAG offsetof(dt_mstate_t, tag)
>> #define DMST_FAULT offsetof(dt_mstate_t, fault)
>> #define DMST_TSTAMP offsetof(dt_mstate_t, tstamp)
>> +#define DMST_OFFPARENT offsetof(dt_mstate_t, offparent)
>> +#define DMST_OFFTGID offsetof(dt_mstate_t, offtgid)
> Not needed - instead add new entries to the 'state' map in dt_state.h.
>
>> #define DMST_REGS offsetof(dt_mstate_t, regs)
>> #define DMST_ARG(n) offsetof(dt_mstate_t, argv[n])
>>
>> diff --git a/test/unittest/variables/bvar/tst.ppid.d b/test/unittest/variables/bvar/tst.ppid.d
>> index e406b36f..87bc52da 100644
>> --- a/test/unittest/variables/bvar/tst.ppid.d
>> +++ b/test/unittest/variables/bvar/tst.ppid.d
>> @@ -4,7 +4,6 @@
>> * Licensed under the Universal Permissive License v 1.0 as shown at
>> * http://oss.oracle.com/licenses/upl.
>> */
>> -/* @@xfail: dtv2 */
>>
>> /*
>> * ASSERTION: The 'ppid' variable can be accessed and is not -1.
>> diff --git a/test/unittest/variables/bvar/tst.ppid2.d b/test/unittest/variables/bvar/tst.ppid2.d
>> new file mode 100644
>> index 00000000..be95d033
>> --- /dev/null
>> +++ b/test/unittest/variables/bvar/tst.ppid2.d
>> @@ -0,0 +1,21 @@
>> +/*
>> + * Oracle Linux DTrace.
>> + * Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved.
>> + * Licensed under the Universal Permissive License v 1.0 as shown at
>> + * http://oss.oracle.com/licenses/upl.
>> + */
>> +
>> +/*
>> + * ASSERTION: The 'ppid' variable matches the scripting variable for BEGIN.
>> + *
>> + * SECTION: Variables/Built-in Variables/ppid
>> + */
>> +
>> +#pragma D option quiet
>> +
>> +BEGIN {
>> + trace(ppid);
>> + trace($ppid);
>> + exit(ppid == $ppid ? 0 : 1);
>> +}
>> +
>> --
>> 2.18.4
>>
>>
>> _______________________________________________
>> DTrace-devel mailing list
>> DTrace-devel at oss.oracle.com
>> https://oss.oracle.com/mailman/listinfo/dtrace-devel
More information about the DTrace-devel
mailing list