[DTrace-devel] [PATCH v4 14/25] libcommon: move uprobes creation into the common library

Kris Van Hees kris.van.hees at oracle.com
Sat Oct 8 05:38:00 UTC 2022


On Fri, Oct 07, 2022 at 11:25:13AM +0100, Nick Alcock via DTrace-devel wrote:
> This commit adds functions in the libcommon library to create uprobes
> given a pid and address, create uprobes given more detailed info (a
> (dev, ino, addr) triplet, a spec for the uprobe_events file, and a
> DTrace probe name), and compute a spec. We also provide
> encoding/decoding functions to allow the uprobe to contain the DTrace
> probe name as part of its args even though DTrace probes can contain
> characters that are not allowed in uprobe names or arg names, and even
> though probe arg names must be unique while DTrace probe name components
> need not be.
> 
> The DTrace provider is rejigged to use this code to generate the
> BEGIN/END/ERROR probes, to make sure this code works.
> 
> Signed-off-by: Nick Alcock <nick.alcock at oracle.com>
> ---
>  libcommon/Build            |   2 +-
>  libcommon/uprobes.c        | 325 +++++++++++++++++++++++++++++++++++++
>  libcommon/uprobes.h        |  30 ++++
>  libdtrace/dt_prov_dtrace.c |  44 ++---
>  4 files changed, 366 insertions(+), 35 deletions(-)
>  create mode 100644 libcommon/uprobes.c
>  create mode 100644 libcommon/uprobes.h
> 
> diff --git a/libcommon/Build b/libcommon/Build
> index f4cf3ba5cb88..30d8be708d13 100644
> --- a/libcommon/Build
> +++ b/libcommon/Build
> @@ -9,5 +9,5 @@ LIBS += libcommon
>  libcommon_TARGET = libcommon
>  libcommon_DIR := $(current-dir)
>  libcommon_CPPFLAGS := -Ilibcommon -Ilibproc
> -libcommon_SOURCES = dt_list.c
> +libcommon_SOURCES = uprobes.c dt_list.c
>  libcommon_LIBSOURCES = libcommon
> diff --git a/libcommon/uprobes.c b/libcommon/uprobes.c
> new file mode 100644
> index 000000000000..65ddd2e6faa1
> --- /dev/null
> +++ b/libcommon/uprobes.c
> @@ -0,0 +1,325 @@
> +/*
> + * Oracle Linux DTrace.
> + * Copyright (c) 2019, 2022, Oracle and/or its affiliates. All rights reserved.
> + * Licensed under the Universal Permissive License v 1.0 as shown at
> + * http://oss.oracle.com/licenses/upl.
> + */
> +
> +#include <ctype.h>
> +#include <errno.h>
> +#include <inttypes.h>
> +#include <stdio.h>
> +#include <string.h>
> +#include <unistd.h>
> +#include <libproc.h>
> +#include <assert.h>
> +
> +#define TRACEFS		"/sys/kernel/debug/tracing/"
> +
> +/*
> + * Return a uprobe spec for a given address in a given PID (or
> + * process handle, to use an already-grabbed process).
> + */
> +char *
> +uprobe_spec_by_addr(pid_t pid, ps_prochandle *P, uint64_t addr,
> +		    prmap_t *mapp_)
> +{
> +	int			free_p = 0;
> +	int			perr = 0;
> +	char			*spec = NULL;
> +	const prmap_t		*mapp, *first_mapp;
> +
> +	if (!P) {
> +		P = Pgrab(pid, 2, 0, NULL, &perr);
> +		if (P == NULL)
> +			return NULL;
> +		free_p = 1;
> +	}
> +
> +	mapp = Paddr_to_map(P, addr);
> +	if (mapp == NULL)
> +		goto out;
> +
> +	first_mapp = mapp->pr_file->first_segment;
> +
> +	/*
> +	 * No need for error-checking here: we do the same on error
> +	 * and success.
> +	 */
> +	asprintf(&spec, "%s:0x%lx", mapp->pr_file->prf_mapname,
> +	    addr - first_mapp->pr_vaddr);
> +
> +	if (mapp_)
> +		memcpy(mapp_, mapp, sizeof(prmap_t));
> +
> +out:
> +	if (free_p) {
> +		/*
> +		 * Some things in the prmap aren't valid once the prochandle is
> +		 * freed.
> +		 */
> +		if (mapp_) {
> +			mapp_->pr_mapaddrname = NULL;
> +			mapp_->pr_file = NULL;
> +		}
> +
> +		Prelease(P, PS_RELEASE_NORMAL);
> +		Pfree(P);
> +	}
> +
> +	return spec;
> +}
> +
> +static const char hexdigits[] = "0123456789abcdef";
> +
> +/*
> + * Encode a NAME suitably for representation in a uprobe.  All non-alphanumeric,
> + * non-_ characters are replaced with __XX where XX is the hex encoding of the
> + * ASCII code of the byte. __ itself is replaced with ___.  The first letter
> + * gets a similar transformation applied even to digits, because anything
> + * resembling consistency in naming rules is obviously just wrong.

This is not really needed.  DTrace uses identifiers for the provider, module,
and function names (i.e. [A-Za-z_][0-9A-Za-z_]+) and an identifier that is allowed to contain - as probe name (and that - is rendered as __ in places where it
has to be an identifier, such as in the USDT data).  So, the only encoding that
would need to be done is turning - into __.  But, even that is probably not
needed because the code that would need to call this function (directly or
indirectly) won't be dealing with probe names with a - in them.  This should
only be used in the process of uprobe based probe creation.  Which is used by
the dtrace provider (BEGIN and END probes) and pid/usdt.  The pid probes are
all either entry, return, or an offset in a function, and the usdt probes are
created based on the DOF which has probe names that already had - translated
into __.

> + *
> + * The DIFFERENTIATOR byte must be different for every encoded string in the uprobe.
> + * (It is otherwise ignored, and discarded at decoding time.)

It might be worth actually explaining why the differentiator character is
needed, i.e. to handle the case where two probe description components are
the same identifier.

On the other hand, I do not think you should do that here.  Since it is a
constant, and you already have a similar (actually, identical) value associated
with the 4 argument strings, you can simply hardcode it where the argument
string for the uprobe is constructed.

So with no encoding needed, and no differentiator needed, you can get rid of
this.

> + */
> +char *
> +uprobe_encode_name(const char *name, char differentiator)
> +{
> +	const char *p = name;
> +	char *out_p;
> +	char *encoded;
> +	size_t sz = strlen(name);
> +
> +	/*
> +	 * Compute size changes needed.
> +	 */
> +
> +	while ((p = strstr(p, "__")) != NULL) {
> +		sz++;
> +		p += 2;
> +	}
> +
> +	for (p = name; *p != '\0'; p++) {
> +		if (!isalpha(*p) && !isdigit(*p) && *p != '_')
> +			sz += 3;
> +		if (p == name && isdigit(*p))
> +			sz += 3;
> +	}
> +
> +	encoded = malloc(sz + 2);		/* \0 and differentiator */
> +	if (!encoded)
> +		return NULL;
> +	out_p = encoded;
> +
> +	/* Apply translations.  */
> +
> +	for (p = name; *p != '\0'; p++) {
> +		int hexencode = 0, underencode = 0;
> +
> +		if (!isalpha(*p) && !isdigit(*p) && *p != '_')
> +			hexencode = 1;
> +		if (p == name && isdigit(*p))
> +			hexencode = 1;
> +		if (p[0] == '_' && p[1] == '_' && p[2] != '\0')
> +			underencode = 1;
> +
> +		if (underencode) {
> +			*out_p++ = '_';
> +			*out_p++ = '_';
> +			*out_p++ = '_';
> +			p++;
> +			continue;
> +		}
> +
> +		if (hexencode) {
> +			*out_p++ = '_';
> +			*out_p++ = '_';
> +			*out_p++ = hexdigits[*p >> 4];
> +			*out_p++ = hexdigits[*p & 0xf];
> +		}
> +		else
> +			*out_p++ = *p;
> +	}
> +	*out_p++ = differentiator;
> +	*out_p = '\0';
> +
> +	return encoded;
> +}
> +
> +/*
> + * Decode a NAME: the converse of uprobe_encode_name.
> + */

We already have strhyphenate() in dt_string.c which does all that is needed
as far as I can see.  (See my comment above.)  But of course, you still want
to get rid of the differentiator, although that could as easily be done in the
code that reads the argument list from the format file.  I'd prefer that (and
it pairs well that way with adding the differentiator character when the
argument string is constructed.)

> +char *
> +uprobe_decode_name(const char *name)
> +{
> +	const char *p = name;
> +	char *new_p, *out_p;
> +	char *decoded;
> +	size_t sz = strlen(name);
> +
> +	/*
> +	 * Compute size changes needed.
> +	 */
> +
> +	while ((p = strstr(p, "__")) != NULL) {
> +		if (p[3] == '_') {
> +			sz--;
> +			p += 3;
> +		}
> +		else if (strspn(&p[2], hexdigits) >= 2) {
> +			sz -= 3;
> +			p += 4;
> +		}
> +	}
> +
> +	decoded = malloc(sz + 1);
> +	if (!decoded)
> +		return NULL;
> +	out_p = decoded;
> +
> +	/* Apply translations.  */
> +
> +	p = name;
> +	while ((new_p = strstr(p, "__")) != NULL) {
> +
> +		/*
> +		 * Copy unchanged bytes.
> +		 */
> +		memcpy(out_p, p, new_p - p);
> +		out_p += new_p - p;
> +		p = new_p;
> +
> +		if (p[3] == '_') {
> +			*out_p++ = '_';
> +			*out_p++ = '_';
> +			p += 3;
> +		} else if (strspn(&p[2], hexdigits) >= 2) {
> +			if (isdigit(p[2]))
> +				*out_p = (p[2] - '0') << 4;
> +			else
> +				*out_p = (p[2] - 'a' + 10) << 4;
> +			if (isdigit(p[3]))
> +				*out_p += p[3] - '0';
> +			else
> +				*out_p += p[3] - 'a' + 10;
> +			p += 4;
> +			out_p++;
> +		}
> +		else {
> +			*out_p++ = '_';
> +			*out_p++ = '_';
> +			p += 2;
> +		}
> +	}
> +	/*
> +	 * Copy the remainder.  Clip off the last char, which is the
> +	 * differentiator.
> +	 */
> +	strcpy(out_p, p);
> +
> +	assert(strlen(out_p) > 0);
> +	out_p[strlen(out_p) - 1] = 0;
> +
> +	return decoded;
> +}
> +
> +/*
> + * Create a uprobe for a given mapping, address, and spec: the uprobe may be a
> + * uretprobe.  Return the probe's name as a new dynamically-allocated string, or
> + * NULL on error.  If prv/mod/fun/prb are all set, they are passed down as the
> + * name of the corresponding DTrace probe.
> + */
> +char *
> +uprobe_create_named(dev_t dev, ino_t ino, uint64_t addr, const char *spec, int isret,
> +		    const char *prv, const char *mod, const char *fun,
> +		    const char *prb)
> +{
> +	int fd;
> +	int rc;
> +	char *name, *args = "";
> +
> +	if (asprintf(&name, "dt_pid/dt_%s%llx_%llx_%lx",
> +		isret ? "ret_" : "",

What is the point in adding a "ret_" prefix?  The code that will parse the
uprobes list reads uprobe_events which specifies 'p' or 'r' as the first
character on each line listing a probe, indicating whether it is a regular
probe or a uretprobe.  That is sufficient so no additional "ret_" prefix is
needed.

> +		(unsigned long long) dev,
> +		(unsigned long long) ino,
> +		(unsigned long) addr) < 0)
> +		return NULL;
> +
> +	if (prv && mod && fun && prb) {
> +		char *eprv, *emod, *efun, *eprb;
> +		int failed = 0;
> +
> +		eprv = uprobe_encode_name(prv, '1');
> +		emod = uprobe_encode_name(mod, '2');
> +		efun = uprobe_encode_name(fun, '3');
> +		eprb = uprobe_encode_name(prb, '4');

Should not be needed.

> +
> +		if (eprv && emod && efun && eprb) {
> +			if (asprintf(&args, "%s=\\1 %s=\\2 %s=\\3 %s=\\4",
> +				     eprv, emod, efun, eprb) < 0)
> +				failed = 1;
> +		}
> +		else
> +			failed = 1;
> +
> +		free(eprv);
> +		free(emod);
> +		free(efun);
> +		free(eprb);
> +
> +		if (failed)
> +			return NULL;

This can simply become:

		if (asprintf(&args, "%s1=\\1 %s2=\\2 %s3=\\3 %s4=\\4",
			     prv, mod, fun, prb) < 0)
			return NULL;

> +	}
> +
> +	/* Add the uprobe. */
> +	fd = open(TRACEFS "uprobe_events", O_WRONLY | O_APPEND);
> +	if (fd != -1) {
> +		rc = dprintf(fd, "%c:%s %s %s\n", isret ? 'r' : 'p',
> +			     name, spec, args);
> +
> +		/* TODO: error reporting, probably via a callback */
> +		close(fd);
> +	}
> +
> +	if (fd == -1 || rc == -1) {
> +		return NULL;
> +	}
> +
> +	return name;
> +}
> +
> +/*
> + * Like uprobe_create, but do not specify the name of a corresponding DTrace
> + * probe.  (Used when the caller already knows what probe will be needed, and
> + * there is no possibility of another DTrace having to pick it up from the
> + * systemwide uprobe list.)
> + */
> +char *
> +uprobe_create(dev_t dev, ino_t ino, uint64_t addr, const char *spec, int isret)
> +{
> +    return uprobe_create_named(dev, ino, addr, spec, isret,
> +			       NULL, NULL, NULL, NULL);
> +}
> +
> +/*
> + * Create a uprobe given a particular pid and address.  Return the probe's name
> + * as a new dynamically-allocated string, or NULL on error.  If prv/mod/fun/prb
> + * are set, they are passed down as the name of the corresponding DTrace probe.
> + */
> +char *
> +uprobe_create_from_addr(pid_t pid, uint64_t addr, const char *prv,
> +			const char *mod, const char *fun, const char *prb)
> +{
> +	char *spec;
> +	char *name;
> +	prmap_t mapp;
> +
> +	spec = uprobe_spec_by_addr(pid, NULL, addr, &mapp);
> +	if (!spec)
> +		return NULL;
> +
> +	name = uprobe_create_named(mapp.pr_dev, mapp.pr_inum, addr, spec, 0,
> +				   prv, mod, fun, prb);
> +	free(spec);
> +	return name;
> +}
> diff --git a/libcommon/uprobes.h b/libcommon/uprobes.h
> new file mode 100644
> index 000000000000..b36c94a3c647
> --- /dev/null
> +++ b/libcommon/uprobes.h
> @@ -0,0 +1,30 @@
> +/*
> + * Oracle Linux DTrace; simple uprobe helper functions
> + * Copyright (c) 2022, Oracle and/or its affiliates. All rights reserved.
> + * Licensed under the Universal Permissive License v 1.0 as shown at
> + * http://oss.oracle.com/licenses/upl.
> + */
> +
> +#ifndef	_UPROBES_H
> +#define	_UPROBES_H
> +
> +#include <sys/types.h>
> +#include <inttypes.h>
> +#include <libproc.h>
> +#include <unistd.h>
> +
> +extern char *uprobe_spec_by_addr(pid_t pid, ps_prochandle *P, uint64_t addr,
> +				 prmap_t *mapp);
> +extern char *uprobe_create_named(dev_t dev, ino_t ino, uint64_t addr,
> +				 const char *spec, int isret, const char *prv,
> +				 const char *mod, const char *fun,
> +				 const char *prb);
> +extern char *uprobe_create(dev_t dev, ino_t ino, uint64_t addr, const char *spec,
> +			   int isret);
> +extern char *uprobe_create_from_addr(pid_t pid, uint64_t addr, const char *prv,
> +				     const char *mod, const char *fun,
> +				     const char *prb);
> +extern char *uprobe_encode_name(const char *, char differentiator);
> +extern char *uprobe_decode_name(const char *);
> +
> +#endif /* _UPROBES_H */
> diff --git a/libdtrace/dt_prov_dtrace.c b/libdtrace/dt_prov_dtrace.c
> index 82f806809fc4..286e0d45c231 100644
> --- a/libdtrace/dt_prov_dtrace.c
> +++ b/libdtrace/dt_prov_dtrace.c
> @@ -16,6 +16,7 @@
>  #include "dt_cg.h"
>  #include "dt_provider.h"
>  #include "dt_probe.h"
> +#include "uprobes.h"
>  
>  static const char		prvname[] = "dtrace";
>  static const char		modname[] = "";
> @@ -164,54 +165,29 @@ static void trampoline(dt_pcb_t *pcb)
>  	dt_cg_tramp_epilogue_advance(pcb, act);
>  }
>  
> -static char *uprobe_spec(dtrace_hdl_t *dtp, const char *prb)
> +static char *uprobe_spec(const char *prb)
>  {
>  	struct ps_prochandle	*P;
>  	int			perr = 0;
>  	char			*fun;
> -	GElf_Sym		sym;
> -	prsyminfo_t		si;
>  	char			*spec = NULL;
> +	GElf_Sym		sym;
>  
> -	fun = dt_alloc(dtp, strlen(prb) + strlen(PROBE_FUNC_SUFFIX) + 1);
> -	if (fun == NULL)
> +	if (asprintf(&fun, "%s%s", prb, PROBE_FUNC_SUFFIX) < 0)
>  		return NULL;
>  
> -	strcpy(fun, prb);
> -	strcat(fun, PROBE_FUNC_SUFFIX);
> -
>  	/* grab our process */
>  	P = Pgrab(getpid(), 2, 0, NULL, &perr);
>  	if (P == NULL) {
> -		dt_free(dtp, fun);
> +		free(fun);
>  		return NULL;
>  	}
>  
> -	/* look up function, get the map, and record */
> -	if (Pxlookup_by_name(P, -1, PR_OBJ_EVERY, fun, &sym, &si) == 0) {
> -		const prmap_t	*mapp;
> -		size_t		len;
> -
> -		mapp = Paddr_to_map(P, sym.st_value);
> -		if (mapp == NULL)
> -			goto out;
> -
> -		if (mapp->pr_file->first_segment != mapp)
> -			mapp = mapp->pr_file->first_segment;
> -
> -		len = snprintf(NULL, 0, "%s:0x%lx",
> -			       mapp->pr_file->prf_mapname,
> -			       sym.st_value - mapp->pr_vaddr) + 1;
> -		spec = dt_alloc(dtp, len);
> -		if (spec == NULL)
> -			goto out;
> -
> -		snprintf(spec, len, "%s:0x%lx", mapp->pr_file->prf_mapname,
> -			 sym.st_value - mapp->pr_vaddr);
> -	}
> +	/* look up function and thus addr */
> +	if (Pxlookup_by_name(P, -1, PR_OBJ_EVERY, fun, &sym, NULL) == 0)
> +		spec = uprobe_spec_by_addr(getpid(), P, sym.st_value, NULL);
>  
> -out:
> -	dt_free(dtp, fun);
> +	free(fun);
>  	Prelease(P, PS_RELEASE_NORMAL);
>  	Pfree(P);
>  
> @@ -230,7 +206,7 @@ static int attach(dtrace_hdl_t *dtp, const dt_probe_t *prp, int bpf_fd)
>  		int	fd, rc = -1;
>  
>  		/* get a uprobe specification for this probe */
> -		spec = uprobe_spec(dtp, prp->desc->prb);
> +		spec = uprobe_spec(prp->desc->prb);
>  		if (spec == NULL)
>  			return -ENOENT;
>  
> -- 
> 2.35.1
> 
> 
> _______________________________________________
> DTrace-devel mailing list
> DTrace-devel at oss.oracle.com
> https://oss.oracle.com/mailman/listinfo/dtrace-devel



More information about the DTrace-devel mailing list