[DTrace-devel] [PATCH 40/61] Fix records for aggregation descriptions
eugene.loh at oracle.com
eugene.loh at oracle.com
Fri Jul 8 14:45:24 UTC 2022
From: Eugene Loh <eugene.loh at oracle.com>
There is an array of aggregation descriptions, dtp->dt_adesc.
In legacy DTrace, an aggregation description's records, in order, describe:
*) The 8-byte, dtrace_aggvarid_t, aggregation variable ID.
*) The aggregation's keys, if any -- one record per key.
*) The aggregation's data.
In the port to BPF, the records were changed to be one per each 64-bit
word of aggregation data. Problems include:
*) This breaks some dependent code.
*) There is no description of the keys, if any.
*) There is no use for having a different record for each word of
data... and there are potentially very many such words for
*quantize() aggregations.
Change the aggregation description records back to legacy style.
One exception is that the legacy variable ID has changed from an 8-byte
dtrace_aggvarid_t to a 4-byte dtrace_aggid_t, to match the 4-byte
prefix at the beginning of "tuples". E.g., see dt_cg_arglist().
In legacy DTrace, aggregation descriptions are requested from the
kernel. In the BPF port, key and data sizes are discovered during
code generation. Therefore, add function dt_aggid_add_rec_data()
to complement dt_aggid_add(). (Support for aggregation keys will
come in later patches.)
Signed-off-by: Eugene Loh <eugene.loh at oracle.com>
---
libdtrace/dt_cg.c | 20 ++++++++++++-
libdtrace/dt_consume.c | 6 ++--
libdtrace/dt_impl.h | 3 +-
libdtrace/dt_map.c | 68 ++++++++++++++++++++++++++++--------------
libdtrace/dt_printf.c | 4 +--
5 files changed, 71 insertions(+), 30 deletions(-)
diff --git a/libdtrace/dt_cg.c b/libdtrace/dt_cg.c
index 063ec113..61611c1d 100644
--- a/libdtrace/dt_cg.c
+++ b/libdtrace/dt_cg.c
@@ -6586,8 +6586,10 @@ static dt_cg_aggfunc_f *_dt_cg_agg[DT_AGG_NUM] = {
static void
dt_cg_agg(dt_pcb_t *pcb, dt_node_t *dnp, dt_irlist_t *dlp, dt_regset_t *drp)
{
+ dtrace_hdl_t *dtp = pcb->pcb_hdl;
dt_ident_t *aid, *fid;
dt_cg_aggfunc_f *aggfp;
+ int nrecs, off;
/*
* If the aggregation has no aggregating function applied to it, then
@@ -6618,7 +6620,23 @@ dt_cg_agg(dt_pcb_t *pcb, dt_node_t *dnp, dt_irlist_t *dlp, dt_regset_t *drp)
assert(aggfp != NULL);
(*aggfp)(pcb, aid, dnp, dlp, drp);
- dt_aggid_add(pcb->pcb_hdl, aid);
+
+ /* add this agg id if we are seeing it for the first time */
+ /* we do this after the BPF code generation since aggfp() sets aid->di_size */
+ if (dt_aggid_lookup(dtp, aid->di_id, NULL) == -1) {
+
+ /* count the number of records */
+ nrecs = 1; /* variable ID */
+ nrecs++; /* data */
+
+ /* add the aid */
+ dt_aggid_add(dtp, aid, nrecs);
+
+ off = sizeof(uint32_t);
+
+ /* add record for aggregation data */
+ off = dt_aggid_add_rec_data(dtp, aid->di_id, off);
+ }
}
void
diff --git a/libdtrace/dt_consume.c b/libdtrace/dt_consume.c
index 41f99970..28fed161 100644
--- a/libdtrace/dt_consume.c
+++ b/libdtrace/dt_consume.c
@@ -1712,7 +1712,7 @@ dt_print_aggs(const dtrace_aggdata_t **aggsdata, int naggvars, void *arg)
* Iterate over each record description in the key, printing the traced
* data.
*/
- for (i = 0; i < agg->dtagd_nrecs; i++) {
+ for (i = 1; i < agg->dtagd_nrecs; i++) {
rec = &agg->dtagd_recs[i];
addr = aggdata->dtada_data + rec->dtrd_offset;
size = rec->dtrd_size;
@@ -1736,8 +1736,8 @@ dt_print_aggs(const dtrace_aggdata_t **aggsdata, int naggvars, void *arg)
aggdata = aggsdata[i];
agg = aggdata->dtada_desc;
rec = &agg->dtagd_recs[aggact];
- addr = aggdata->dtada_data + rec->dtrd_offset;
- size = agg->dtagd_size;
+ addr = aggdata->dtada_data;
+ size = rec->dtrd_size;
assert(DTRACEACT_ISAGG(rec->dtrd_action));
normal = aggdata->dtada_desc->dtagd_normal;
diff --git a/libdtrace/dt_impl.h b/libdtrace/dt_impl.h
index b0af2451..581f568f 100644
--- a/libdtrace/dt_impl.h
+++ b/libdtrace/dt_impl.h
@@ -779,7 +779,8 @@ extern void dt_epid_destroy(dtrace_hdl_t *);
typedef void (*dt_cg_gap_f)(dt_pcb_t *, int);
extern uint32_t dt_rec_add(dtrace_hdl_t *, dt_cg_gap_f, dtrace_actkind_t,
uint32_t, uint16_t, dt_pfargv_t *, uint64_t);
-extern int dt_aggid_add(dtrace_hdl_t *, const dt_ident_t *);
+extern int dt_aggid_add(dtrace_hdl_t *, const dt_ident_t *, int);
+extern int dt_aggid_add_rec_data(dtrace_hdl_t *, dtrace_aggid_t, int);
extern int dt_aggid_lookup(dtrace_hdl_t *, dtrace_aggid_t, dtrace_aggdesc_t **);
extern void dt_aggid_destroy(dtrace_hdl_t *);
diff --git a/libdtrace/dt_map.c b/libdtrace/dt_map.c
index 96660fd0..d7e8a8c5 100644
--- a/libdtrace/dt_map.c
+++ b/libdtrace/dt_map.c
@@ -237,15 +237,12 @@ dt_rec_add(dtrace_hdl_t *dtp, dt_cg_gap_f gapf, dtrace_actkind_t kind,
}
int
-dt_aggid_add(dtrace_hdl_t *dtp, const dt_ident_t *aid)
+dt_aggid_add(dtrace_hdl_t *dtp, const dt_ident_t *aid, int nrecs)
{
dtrace_id_t max;
dtrace_aggdesc_t *agg;
dtrace_recdesc_t *recs;
dtrace_aggid_t id = aid->di_id;
- dt_ident_t *fid = aid->di_iarg;
- int i;
- uint_t off = 0;
while (id >= (max = dtp->dt_maxagg) || dtp->dt_adesc == NULL) {
dtrace_id_t nmax = max ? (max << 1) : 1;
@@ -274,18 +271,13 @@ dt_aggid_add(dtrace_hdl_t *dtp, const dt_ident_t *aid)
if (agg == NULL)
return dt_set_errno(dtp, EDT_NOMEM);
- /*
- * Note the relationship between the aggregation storage
- * size (di_size) and the aggregation data size (dtagd_size):
- * di_size = dtagd_size * (num copies) + (size of latch seq #)
- */
agg->dtagd_id = id;
agg->dtagd_name = aid->di_name;
agg->dtagd_sig = ((dt_idsig_t *)aid->di_data)->dis_auxinfo;
agg->dtagd_normal = 1;
agg->dtagd_varid = aid->di_id;
- agg->dtagd_size = (aid->di_size - sizeof(uint64_t)) / DT_AGG_NUM_COPIES;
- agg->dtagd_nrecs = agg->dtagd_size / sizeof(uint64_t);
+ agg->dtagd_size = -1; /* to be set in dt_aggid_add_rec_data() */
+ agg->dtagd_nrecs = nrecs;
recs = dt_calloc(dtp, agg->dtagd_nrecs, sizeof(dtrace_recdesc_t));
if (recs == NULL) {
@@ -295,22 +287,52 @@ dt_aggid_add(dtrace_hdl_t *dtp, const dt_ident_t *aid)
agg->dtagd_recs = recs;
- for (i = 0; i < agg->dtagd_nrecs; i++) {
- dtrace_recdesc_t *rec = &recs[i];
+ recs[0].dtrd_action = DTRACEACT_DIFEXPR; /* does this matter? */
+ recs[0].dtrd_size = sizeof(uint32_t);
+ recs[0].dtrd_offset = 0;
+ recs[0].dtrd_alignment = sizeof(uint64_t);
+ recs[0].dtrd_format = NULL;
+ recs[0].dtrd_arg = 1;
+
+ /* prepopulate some data fields */
+ recs[nrecs - 1].dtrd_action = ((dt_ident_t *) aid->di_iarg)->di_id;
+ recs[nrecs - 1].dtrd_size = (aid->di_size - sizeof(uint64_t)) / DT_AGG_NUM_COPIES;
+
+ dtp->dt_adesc[id] = agg;
- rec->dtrd_action = fid->di_id;
- rec->dtrd_size = sizeof(uint64_t);
- rec->dtrd_offset = off;
- rec->dtrd_alignment = sizeof(uint64_t);
- rec->dtrd_format = NULL;
- rec->dtrd_arg = 1;
+ return 0;
+}
- off += sizeof(uint64_t);
- }
+int
+dt_aggid_add_rec_data(dtrace_hdl_t *dtp, dtrace_aggid_t id, int off)
+{
+ int nrecs;
+ dtrace_recdesc_t *rec;
- dtp->dt_adesc[id] = agg;
+ assert(id >= 0 && id < dtp->dt_maxagg && dtp->dt_adesc != NULL && dtp->dt_adesc[id] != NULL);
- return 0;
+ nrecs = dtp->dt_adesc[id]->dtagd_nrecs;
+ rec = &dtp->dt_adesc[id]->dtagd_recs[nrecs - 1];
+
+ /* account for the optional TLS key used in tuples */
+ off += sizeof(uint64_t);
+
+ /* align for data */
+ off = (off + 7) & (-8); // FIXME: write more elegantly
+
+ /* set last record (data) */
+ rec->dtrd_offset = off;
+ rec->dtrd_alignment = sizeof(uint64_t);
+ rec->dtrd_format = NULL;
+ rec->dtrd_arg = 1; /* does this matter? */
+
+ /* overall size */
+ /* determine and set the overall size */
+ off += rec->dtrd_size;
+ off = (off + 7) & (-8); // FIXME: write more elegantly
+ dtp->dt_adesc[id]->dtagd_size = off;
+
+ return off;
}
int
diff --git a/libdtrace/dt_printf.c b/libdtrace/dt_printf.c
index e7171dcc..cfbf394c 100644
--- a/libdtrace/dt_printf.c
+++ b/libdtrace/dt_printf.c
@@ -1384,7 +1384,6 @@ dt_printf_format(dtrace_hdl_t *dtp, FILE *fp, const dt_pfargv_t *pfv,
addr = aggdata->dtada_data;
limit = addr + aggdata->dtada_size;
normal = agg->dtagd_normal;
- size = agg->dtagd_size;
sig = agg->dtagd_sig;
flags = DTRACE_BUFDATA_AGGVAL;
} else {
@@ -1409,10 +1408,11 @@ dt_printf_format(dtrace_hdl_t *dtp, FILE *fp, const dt_pfargv_t *pfv,
addr = (caddr_t)buf + rec->dtrd_offset;
limit = lim;
normal = 1;
- size = rec->dtrd_size;
sig = 0;
}
+ size = rec->dtrd_size;
+
if (addr + size > limit) {
dt_dprintf("bad size: addr=%p size=0x%x lim=%p\n",
(void *)addr, rec->dtrd_size, (void *)lim);
--
2.18.4
More information about the DTrace-devel
mailing list