[DTrace-devel] [PATCH 2/3] Introduce temporary string space for string manipulation functions
Eugene Loh
eugene.loh at oracle.com
Thu Sep 2 11:22:19 PDT 2021
Reviewed-by: Eugene Loh <eugene.loh at oracle.com>
albeit with more than the usual amount of comments.
Minor, but "temporary" in the subject line sounds like perhaps this is
being done temporarily. Maybe hyphenate "temporary-string" so that it's
clear what temporary is being attached to.
Incidentally, "tstring" is clearly a string but it's unclear what the
"t" is. (Looking at this patch it's clear, but I mean someday when
someone is reading this code.) How would you be with replacing
"tstring" and "TSTRING" with "tmpstr" and "TMPSTR" everywhere?
On 9/2/21 2:41 AM, Kris Van Hees wrote:
> String functions that have a string as return value need to store that
> string in a temporary location so it can be used in following code.
> We need at most 4 temporary strings to support nesting string functions
> in an expression.
How do you know 4?
Also, what about subroutines that simply *want* to use temporary space
for their own, internal purposes, regardless of what is needed to store
a result? Perhaps the commit message can describe that usage as simply:
dt_cg_tstring_alloc(void)
dt_cg_tstring_free(uint64_t offset)
without having to worry about the dt_node stuff.
> Functions must request a temporary string slot and have it associated
> with their return value using a statement like:
>
> dt_node_tstring(dnp, dt_cg_tstring_alloc());
>
> The address of the string location should be obtained using:
>
> BPF_LOAD(BPF_DW, dnp->dn_reg, BPF_REG_FP, DT_STK_DCTX)
> BPF_LOAD(BPF_DW, dnp->dn_reg, dnp->dn_reg, DCTX_MEM)
> BPF_ALU64_IMM(BPF_ADD, dnp->dn_reg, dnp->dn_tstring->dn_value)
>
> Functions that may be receiving a temporary string as argument (which
> is any function or action that accept string arguments) must free the
> temporary string once it is no longer needed. This should be done
> using code like:
>
> if (n->dn_tstring)
> dt_cg_tstring_free(n->dn_tstring->dn_value);
Are there tests for this sort of thing? Maybe I'm using stale bits, but
stuff like
BEGIN {
strjoin("hi", "there");
strjoin("hi", "there");
strjoin("hi", "there");
strjoin("hi", "there");
strjoin("hi", "there");
strjoin("hi", "there");
strjoin("hi", "there");
exit(0)
}
and
BEGIN {
x = strjoin("hi", "there");
x = strjoin("hi", "there");
x = strjoin("hi", "there");
x = strjoin("hi", "there");
x = strjoin("hi", "there");
x = strjoin("hi", "there");
x = strjoin("hi", "there");
exit(0)
}
do not work for me.
> Signed-off-by: Kris Van Hees <kris.van.hees at oracle.com>
> ---
> libdtrace/dt_bpf.c | 24 +++++++--------
> libdtrace/dt_cg.c | 68 +++++++++++++++++++++++++++++++++++++++++++
> libdtrace/dt_impl.h | 2 ++
> libdtrace/dt_parser.c | 12 ++++++++
> libdtrace/dt_parser.h | 5 +++-
> 5 files changed, 96 insertions(+), 15 deletions(-)
>
> diff --git a/libdtrace/dt_bpf.c b/libdtrace/dt_bpf.c
> index 8195cd07..b6e44a6e 100644
> --- a/libdtrace/dt_bpf.c
> +++ b/libdtrace/dt_bpf.c
> @@ -17,6 +17,7 @@
> #include <dt_dctx.h>
> #include <dt_probe.h>
> #include <dt_state.h>
> +#include <dt_string.h>
> #include <dt_strtab.h>
> #include <dt_bpf.h>
> #include <dt_bpf_maps.h>
> @@ -178,17 +179,9 @@ populate_probes_map(dtrace_hdl_t *dtp, int fd)
> * with a singleton element (key 0). This means that every CPU
> * will see its own copy of this singleton element, and can use it
> * without interference from other CPUs. The scratch memory is
> - * used to store the DTrace context, the temporary output buffer,
> - * and temporary storage for stack traces, string manipulation,
> - * etc.
> - * The size of the map value (a byte array) is the sum of:
> - * - size of the DTrace context, rounded up to the nearest
> - * multiple of 8
> - * - 8 bytes padding for trace buffer alignment purposes
> - * - maximum trace buffer record size, rounded up to the
> - * multiple of 8
> - * - the greater of the maximum stack trace size and three
> - * times the maximum string size
> + * used to store the DTrace machine state, the temporary output
> + * buffer, and temporary storage for stack traces, string
> + * manipulation, etc.
> * - strtab: String table map. This is a global map with a singleton
> * element (key 0) that contains the entire string table as a
> * concatenation of all unique strings (each terminated with a
> @@ -276,20 +269,23 @@ dt_bpf_gmap_create(dtrace_hdl_t *dtp)
>
> /*
> * The size of the map value (a byte array) is the sum of:
> - * - size of the DTrace context, rounded up to the nearest
> + * - size of the DTrace machine state, rounded up to the nearest
> * multiple of 8
> * - 8 bytes padding for trace buffer alignment purposes
> * - maximum trace buffer record size, rounded up to the
> * multiple of 8
> * - the greater of:
> * + the maximum stack trace size
> - * + three times the maximum string size
> + * + three times the maximum string size (incl. length)
"three" looks wrong.
> */
> memsz = roundup(sizeof(dt_mstate_t), 8) +
> 8 +
> roundup(dtp->dt_maxreclen, 8) +
> MAX(sizeof(uint64_t) * dtp->dt_options[DTRACEOPT_MAXFRAMES],
> - 3 * dtp->dt_options[DTRACEOPT_STRSIZE]);
> + DT_TSTRING_SLOTS *
> + (DT_STRLEN_BYTES + dtp->dt_options[DTRACEOPT_STRSIZE] +
> + dtp->dt_options[DTRACEOPT_STRSIZE]
> + ));
Looks like STRSIZE is in there twice? Do you want space for a
terminating NULL?
> if (create_gmap(dtp, "mem", BPF_MAP_TYPE_PERCPU_ARRAY,
> sizeof(uint32_t), memsz, 1) == -1)
> return -1; /* dt_errno is set for us */
> diff --git a/libdtrace/dt_cg.c b/libdtrace/dt_cg.c
> index 67621be6..d8ffa479 100644
> --- a/libdtrace/dt_cg.c
> +++ b/libdtrace/dt_cg.c
> @@ -759,6 +759,7 @@ dt_cg_strlen(dt_irlist_t *dlp, dt_regset_t *drp, int dst, int src)
>
> emit(dlp, BPF_MOV_REG(BPF_REG_1, src));
> dt_regset_xalloc(drp, BPF_REG_0);
> +
> emite(dlp, BPF_CALL_FUNC(idp->di_id), idp);
> dt_regset_free_args(drp);
> emit(dlp, BPF_BRANCH_IMM(BPF_JLE, BPF_REG_0, size, lbl_ok));
> @@ -784,6 +785,70 @@ dt_cg_spill_load(int reg)
> emit(dlp, BPF_LOAD(BPF_DW, reg, BPF_REG_FP, DT_STK_SPILL(reg)));
> }
>
> +typedef struct dt_tstring {
> + uint64_t offset; /* Offset from dctx->mem */
> + int in_use; /* In use (1) or not (0) */
> +} dt_tstring_t;
> +
> +static dt_tstring_t dt_cg_tstrings[DT_TSTRING_SLOTS];
I don't understand this stuff, but other "similar" things are pointers
within dtp. So why is dt_cg_tstrings global?
> +
> +/*
> + * Initialize the temporary string offsets and mark all not in use.
> + */
> +static void
> +dt_cg_tstring_init(dtrace_hdl_t *dtp)
> +{
> + int i;
> + dt_tstring_t *ts = dt_cg_tstrings;
> +
> + for (i = 0; i < ARRAY_SIZE(dt_cg_tstrings); i++, ts++) {
> + ts->offset = i * (DT_STRLEN_BYTES +
> + dtp->dt_options[DTRACEOPT_STRSIZE]);
> + ts->in_use = 0;
> + }
> +}
Here and elsewhere, it would seem that DT_TSTRING_SLOTS would be
preferable over ARRAY_SIZE(dt_cg_tstrings). The former is more concise
and direct. Also, it will work even if one were to allocate
dt_cg_tstrings dynamically.
Should the offsets be computed with space for terminating NULL chars?
> +
> +/*
> + * Allocate a temporary string node to hold the result of a string subroutine.
> + * The first available slot will be used (out of 3 available slots).
> + */
Not 3? Might as well get rid of the parenthetical remark, or even that
entire sentence. Or even the comment. The function is reasonably
self-descriptive.
> +static uint64_t
> +dt_cg_tstring_alloc(void)
> +{
> + int i;
> + dt_tstring_t *ts = dt_cg_tstrings;
> +
> + for (i = 0; i < ARRAY_SIZE(dt_cg_tstrings); i++, ts++) {
> + if (!ts->in_use)
> + break;
> + }
> +
> + assert(i >= 0 && i < ARRAY_SIZE(dt_cg_tstrings));
> + ts->in_use = 1;
> +
> + return ts->offset;
> +}
Why assert(i>=0)? How could i<0?
To me, the loop would be clearer so:
for (i = 0; i < DT_TSTRING_SLOTS; i++, ts++) {
if (!ts->in_use) {
ts->in_use = 1;
return ts->offset;
}
}
assert(0);
> +
> +/*
> + * Mark a temprorary string node as no longer needed. This will make the
> + * temporary string slot available for re-use.
> + */
temprorary -> temporary; but again the function is short and clear
enough that the comment might not even be needed
> +static void
> +dt_cg_tstring_free(uint64_t offset)
> +{
> + int i;
> + dt_tstring_t *ts = dt_cg_tstrings;
> +
> + for (i = 0; i < ARRAY_SIZE(dt_cg_tstrings); i++, ts++) {
> + if (ts->offset == offset)
> + break;
> + }
> +
> + assert(i >= 0 && i < ARRAY_SIZE(dt_cg_tstrings));
> +
> + ts->in_use = 0;
> +}
Same comments about ARRAY_SIZE(), assert(i>=0), and loop rewrite.
> +
> static const uint_t ldstw[] = {
> 0,
> BPF_B, BPF_H, 0, BPF_W,
> @@ -880,6 +945,8 @@ dt_cg_store_val(dt_pcb_t *pcb, dt_node_t *dnp, dtrace_actkind_t kind,
> emitl(dlp, size_ok,
> BPF_MOV_REG(BPF_REG_3, dnp->dn_reg));
> dt_regset_free(drp, dnp->dn_reg);
> + if (dnp->dn_tstring)
> + dt_cg_tstring_free(dnp->dn_tstring->dn_value);
> dt_regset_xalloc(drp, BPF_REG_0);
> emit(dlp, BPF_CALL_HELPER(BPF_FUNC_probe_read));
> dt_regset_free_args(drp);
> @@ -4855,6 +4922,7 @@ dt_cg(dt_pcb_t *pcb, dt_node_t *dnp)
> }
>
> dt_regset_reset(pcb->pcb_regs);
> + dt_cg_tstring_init(pcb->pcb_hdl);
>
> dt_irlist_destroy(&pcb->pcb_ir);
> dt_irlist_create(&pcb->pcb_ir);
> diff --git a/libdtrace/dt_impl.h b/libdtrace/dt_impl.h
> index bd8d9943..da58cfb8 100644
> --- a/libdtrace/dt_impl.h
> +++ b/libdtrace/dt_impl.h
> @@ -188,6 +188,8 @@ typedef struct dt_ahash {
> size_t dtah_size; /* size of hash table */
> } dt_ahash_t;
>
> +#define DT_TSTRING_SLOTS 4
> +
> /*
> * To provide a lock-free aggregation write mechanism for the producer,
> * two copies of each aggregation can be used. A latch sequence number
> diff --git a/libdtrace/dt_parser.c b/libdtrace/dt_parser.c
> index bac895a2..65509f08 100644
> --- a/libdtrace/dt_parser.c
> +++ b/libdtrace/dt_parser.c
> @@ -2582,6 +2582,18 @@ dt_node_trampoline(dt_probe_t *prp)
> return dnp;
> }
>
> +dt_node_t *
> +dt_node_tstring(dt_node_t *fnp, uintmax_t val)
> +{
> + dt_node_t *dnp = dt_node_alloc(DT_NODE_TSTRING);
> +
> + dnp->dn_value = val;
> + dnp->dn_reg = fnp->dn_reg;
> + fnp->dn_tstring = dnp;
> +
> + return dnp;
> +}
> +
> /*
> * This function provides the underlying implementation of cooking an
> * identifier given its node, a hash of dynamic identifiers, an identifier
> diff --git a/libdtrace/dt_parser.h b/libdtrace/dt_parser.h
> index 26e00aec..aec2a410 100644
> --- a/libdtrace/dt_parser.h
> +++ b/libdtrace/dt_parser.h
> @@ -1,6 +1,6 @@
> /*
> * Oracle Linux DTrace.
> - * Copyright (c) 2007, 2019, Oracle and/or its affiliates. All rights reserved.
> + * Copyright (c) 2007, 2021, Oracle and/or its affiliates. All rights reserved.
> * Licensed under the Universal Permissive License v 1.0 as shown at
> * http://oss.oracle.com/licenses/upl.
> */
> @@ -99,6 +99,7 @@ typedef struct dt_node {
> #define dn_string dn_u._const._string /* STRING, IDENT, TYPE */
> #define dn_ident dn_u._nodes._ident /* VAR,SYM,FUN,AGG,INL,PROBE */
> #define dn_args dn_u._nodes._links[0] /* DT_NODE_VAR, FUNC */
> +#define dn_tstring dn_u._nodes._links[1] /* DT_NODE_FUNC */
> #define dn_child dn_u._nodes._links[0] /* DT_NODE_OP1 */
> #define dn_left dn_u._nodes._links[0] /* DT_NODE_OP2, OP3 */
> #define dn_right dn_u._nodes._links[1] /* DT_NODE_OP2, OP3 */
> @@ -148,6 +149,7 @@ typedef struct dt_node {
> #define DT_NODE_PROVIDER 20 /* provider definition */
> #define DT_NODE_PROG 21 /* program translation unit */
> #define DT_NODE_TRAMPOLINE 22 /* probe trampoline */
> +#define DT_NODE_TSTRING 23 /* temnporary string slot */
>
> #define DT_NF_SIGNED 0x01 /* data is a signed quantity (else unsigned) */
> #define DT_NF_COOKED 0x02 /* data is a known type (else still cooking) */
> @@ -200,6 +202,7 @@ extern dt_node_t *dt_node_probe(char *, int, dt_node_t *, dt_node_t *);
> extern dt_node_t *dt_node_provider(char *, dt_node_t *);
> extern dt_node_t *dt_node_program(dt_node_t *);
> extern dt_node_t *dt_node_trampoline(struct dt_probe *);
> +extern dt_node_t *dt_node_tstring(dt_node_t *, uintmax_t);
>
> extern dt_node_t *dt_node_link(dt_node_t *, dt_node_t *);
> extern dt_node_t *dt_node_cook(dt_node_t *, uint_t);
More information about the DTrace-devel
mailing list