[DTrace-devel] [PATCH 4/7] ctf: add BTF-to-CTF convertor

Thu Nov 2 14:18:58 UTC 2023

On 31 Oct 2023, Kris Van Hees via DTrace-devel spake thusly:

> For kernels that do not provide CTF data, kernel type information can be
> obtained at runtime as BTF data.  We pull in that data and generate
> equivalent CTF data from it.

Nicely done!

> Signed-off-by: Kris Van Hees <kris.van.hees at oracle.com>

Almost r-b-ready, but not quite: there's one definite bug in
BTF_KIND_FWD handling.

> diff --git a/libdtrace/dt_btf.c b/libdtrace/dt_btf.c
> new file mode 100644
> index 00000000..df521516
> --- /dev/null
> +++ b/libdtrace/dt_btf.c
> @@ -0,0 +1,876 @@
> +/*
> + * Oracle Linux DTrace.
> + * Copyright (c) 2023, Oracle and/or its affiliates. All rights reserved.
> + * Licensed under the Universal Permissive License v 1.0 as shown at
> + * http://oss.oracle.com/licenses/upl.
> + */
> +
> +#include <assert.h>
> +#include <byteswap.h>
> +#include <ctf.h>
> +#include <errno.h>
> +#include <stdio.h>
> +#include <sys/mman.h>
> +#include <linux/btf.h>
> +
> +#include <dt_impl.h>
> +#include <dt_btf.h>
> +
> +typedef struct btf_header	btf_header_t;
> +typedef struct btf_type		btf_type_t;
> +typedef struct btf_enum		btf_enum_t;
> +typedef struct btf_array	btf_array_t;
> +typedef struct btf_member	btf_member_t;
> +typedef struct btf_param	btf_param_t;
> +typedef struct btf_var		btf_var_t;
> +typedef struct btf_var_secinfo	btf_var_secinfo_t;
> +typedef struct btf_decl_tag	btf_decl_tag_t;
> +typedef struct btf_enum64	btf_enum64_t;
> +
> +struct dt_btf {
> +	void		*data;			/* raw BTF data */
> +	size_t		size;			/* raw BTF data size */
> +	btf_header_t	*hdr;			/* BTF header */
> +	char		*sdata;			/* string data */
> +	uint32_t	ssize;			/* string data size */
> +	int32_t		type_cnt;		/* number of types */
> +	btf_type_t	**types;		/* type table */
> +	ctf_id_t	*ctfids;		/* matching CTF type ids */
> +};

I'm amazed that this isn't already provided for us somewhere.

> +static const char *
> +dt_btf_errmsg(int err)
> +{
> +	return strerror(err);
> +}
> +
> +static int
> +dt_btf_set_errno(dtrace_hdl_t *dtp, int err)
> +{
> +	dtp->dt_btferr = err;
> +	return -1;
> +}
> +
> +static void *
> +dt_btf_set_load_errno(dtrace_hdl_t *dtp, int err)
> +{
> +	dtp->dt_btferr = err;
> +	return NULL;
> +}
> +
> +static void
> +dt_btf_error(dtrace_hdl_t *dtp, int eid, const char *format, ...)
> +{
> +	va_list	ap;
> +
> +	va_start(ap, format);
> +	dt_set_errmsg(dtp, dt_errtag(eid), NULL, NULL, 0, format, ap);
> +	dt_set_errno(dtp, EDT_BPF);
> +	va_end(ap);
> +}
> +
> +static ctf_id_t
> +dt_ctf_set_errno(dtrace_hdl_t *dtp, int err)
> +{
> +	dtp->dt_ctferr = err;
> +	dt_set_errno(dtp, EDT_CTF);
> +	return CTF_ERR;
> +}
> +
> +static ctf_id_t
> +dt_ctf_error(dtrace_hdl_t *dtp, ctf_dict_t *ctf)
> +{
> +	return dt_ctf_set_errno(dtp, ctf_errno(ctf));
> +}

I don't really see the point of these abstractions, unless you're
trying to make the look of BTF in DTrace parallel to what CTF looks like
(which is not a bad reason to do it).

> +/*
> + * Try to load BTF data from the given file.  We use fopen() & Co. instead of
> + * open() & Co. because the BTF data is usually provided by sysfs pseudo-files

Capitalized 'Co.' is not normal English practice ("& co" instead).

> + * and the fd-based IO primitives do not seem to work with that.

They certainly should, given that fopen() et al are implemented in terms
of them. You're probably seeing the effect of stdio buffering: most
things like this need a single big read() syscall (i.e. they are more
syscall-by-syscall messaging interfaces than files), you can't read()
bit by bit and have things work.

But using fopen() is hardly a problem. If its buffering is useful, use
it!

> +static dt_btf_t *
> +dt_btf_load(dtrace_hdl_t *dtp, const char *fn)
> +{
> +	FILE		*fp;
> +	dt_btf_t	*btf = NULL;
> +	btf_header_t	hdr;
> +	void		*data = NULL;
> +	Elf		*elf;
> +	size_t		shstrs;
> +	Elf_Scn		*sp = NULL;
> +	int		err = 0;
> +
> +	btf = dt_zalloc(dtp, sizeof(dt_btf_t));
> +	if (btf == NULL)
> +		return dt_btf_set_load_errno(dtp, ENOMEM);
> +
> +	fp = fopen(fn, "rb");
> +	if (fp == NULL)
> +		return dt_btf_set_load_errno(dtp,  errno);
> +
> +	/* First see whether this might be a file with raw BTF data. */
> +	if (fread(&hdr, 1, sizeof(hdr), fp) < sizeof(hdr))
> +		goto fail;
> +
> +	rewind(fp);
> +	if (hdr.magic == BTF_MAGIC || bswap_16(hdr.magic) == BTF_MAGIC) {
> +		struct stat	st;

If it's byteswapped, aren't we in trouble unless we byteswap the rest of
the file too? Maybe we shouldn't be checking for that case :)

> +	} else {
> +		/* Next see whether this might be an ELF file with BTF data. */
> +		elf = elf_begin(fileno(fp), ELF_C_READ_MMAP, NULL);
> +		if (elf == NULL)
> +			goto elf_fail_no_end;
> +		if (elf_getshdrstrndx(elf, &shstrs) == -1)
> +			goto elf_fail;
> +
> +		while ((sp = elf_nextscn(elf, sp)) != NULL) {

... is that really the only way to get a specific section out of libelf?

It does appear to be. Ew, how clunky. Oh well.

> +static ctf_id_t
> +dt_btf_add_to_ctf(dtrace_hdl_t *dtp, dt_btf_t *btf, ctf_dict_t *ctf,
> +		  int32_t type_id)
> +{
> +	btf_type_t	*type;
> +	int		vlen;
> +	const char	*name;
> +	ctf_id_t	ctfid;
> +	ctf_encoding_t	enc;
> +
> +	/*
> +	 * If we are not constructing shared_ctf, we may be looking for a type
> +	 * in shared_ctf.
> +	 */
> +	if (dtp->dt_btf && btf != dtp->dt_btf) {
> +		if (type_id < dtp->dt_btf->type_cnt)
> +			return dtp->dt_btf->ctfids[type_id];
> +
> +		type_id -= dtp->dt_btf->type_cnt - 1;
> +	}

So we're doing this by shoving all vmlinux types into shared_ctf and
then all the per-module types into child dicts? I... guess that works.

> +	assert(type_id < btf->type_cnt);
> +
> +	if (btf->ctfids[type_id] != CTF_ERR)
> +		return btf->ctfids[type_id];
> +
> +	assert(type_id > 0);
> +
> +	type = btf->types[type_id];
> +	vlen = BTF_INFO_VLEN(type->info);
> +	name = dt_btf_get_string(dtp, btf, type->name_off);
> +
> +	if (name && name[0]) {
> +		char	n[DT_TYPE_NAMELEN];
> +
> +		/* Do we already have this type? */
> +		switch (BTF_INFO_KIND(type->info)) {
> +		case BTF_KIND_ENUM:
> +		case BTF_KIND_ENUM64:
> +			if (snprintf(n, sizeof(n), "enum %s",
> +				     name == NULL ? "(anon)" : name) < 0)
> +				return dt_ctf_set_errno(dtp, ECTF_NAMELEN);
> +
> +			ctfid = ctf_lookup_by_name(ctf, n);
> +			break;
> +		case BTF_KIND_UNION:
> +			if (snprintf(n, sizeof(n), "union %s",
> +				     name == NULL ? "(anon)" : name) < 0)
> +				return dt_ctf_set_errno(dtp, ECTF_NAMELEN);
> +	
> +			ctfid = ctf_lookup_by_name(ctf, n);
> +			break;
> +		case BTF_KIND_STRUCT:
> +			if (snprintf(n, sizeof(n), "struct %s",
> +				     name == NULL ? "(anon)" : name) < 0)
> +				return dt_ctf_set_errno(dtp, ECTF_NAMELEN);
> +	
> +			ctfid = ctf_lookup_by_name(ctf, n);

Just in case, you probably want to make sure that the type kind of these
types is not CTF_K_FORWARD as well. (Forwards to enums/structs/unions
are in the enum/struct/union namespace and are returned when you look
those up by name: they are auto-promoted to the real type when you add
an actual struct/union/enum with the same name.)

> +	switch (BTF_INFO_KIND(type->info)) {
> +	case BTF_KIND_INT: {
> +		uint32_t	data = *(uint32_t *)(type + 1);
> +		uint32_t	benc = BTF_INT_ENCODING(data);
> +
> +		enc.cte_format = 0;
> +		enc.cte_offset = BTF_INT_OFFSET(data);
> +		enc.cte_bits = BTF_INT_BITS(data);
> +
> +		if (benc & BTF_INT_SIGNED)
> +			enc.cte_format |= CTF_INT_SIGNED;
> +		if (benc & BTF_INT_CHAR)
> +			enc.cte_format |= CTF_INT_CHAR;
> +		if (benc & BTF_INT_BOOL)
> +			enc.cte_format |= CTF_INT_BOOL;
> +
> +		/* Some special case handling. */
> +		if (enc.cte_bits == 8) {
> +			if (strcmp(name, "_Bool") == 0)
> +				enc.cte_format = CTF_INT_BOOL;
> +			else
> +				enc.cte_format = CTF_INT_CHAR;
> +		}
> +
> +		ctfid = ctf_add_integer(ctf, CTF_ADD_ROOT, name, &enc);
> +		btf->ctfids[type_id] = ctfid;
> +
> +		return ctfid == CTF_ERR ? dt_ctf_error(dtp, ctf) : ctfid;

I guess this means we can't drop the old-style non-slice bitfield
representations after all :P (not that I had any plans to, and the API
would always accept this form in any case, though it might end up
translating it into silently-created slices for you in future.)

> +	}
> +	case BTF_KIND_FLOAT: {
> +		switch (type->size) {
> +		case 4:
> +			enc.cte_format = CTF_FP_SINGLE;
> +			break;
> +		case 8:
> +			enc.cte_format = CTF_FP_DOUBLE;
> +			break;
> +		case 16:
> +			enc.cte_format = CTF_FP_LDOUBLE;
> +			break;
> +		}
> +
> +		enc.cte_offset = 0;
> +		enc.cte_bits = type->size * 8;
> +
> +		ctfid = ctf_add_float(ctf, CTF_ADD_ROOT, name, &enc);
> +		btf->ctfids[type_id] = ctfid;
> +
> +		return ctfid == CTF_ERR ? dt_ctf_error(dtp, ctf) : ctfid;
> +	}
> +	case BTF_KIND_ENUM: {
> +		int		i;
> +		btf_enum_t	*item = (btf_enum_t *)(type + 1);
> +
> +		ctfid = ctf_add_enum(ctf, CTF_ADD_ROOT, name);
> +		if (ctfid == CTF_ERR)
> +			return dt_ctf_error(dtp, ctf);
> +
> +		for (i = 0; i < vlen; i++, item++) {
> +			int	err;
> +			const char	*iname;
> +
> +			iname = dt_btf_get_string(dtp, btf, item->name_off);
> +			err = ctf_add_enumerator(ctf, ctfid, iname, item->val);
> +			if (err == CTF_ERR)
> +				return dt_ctf_error(dtp, ctf);
> +		}
> +
> +		btf->ctfids[type_id] = ctfid;
> +
> +		return ctfid == CTF_ERR ? dt_ctf_error(dtp, ctf) : ctfid;
> +	}
> +	case BTF_KIND_PTR: {
> +		ctfid = dt_btf_add_to_ctf(dtp, btf, ctf, type->type);
> +		if (ctfid == CTF_ERR)
> +			return CTF_ERR;		/* errno already set */
> +
> +		ctfid = ctf_add_pointer(ctf, CTF_ADD_ROOT, ctfid);
> +		btf->ctfids[type_id] = ctfid;
> +
> +		return ctfid == CTF_ERR ? dt_ctf_error(dtp, ctf) : ctfid;
> +	}
> +	case BTF_KIND_CONST: {
> +		ctfid = dt_btf_add_to_ctf(dtp, btf, ctf, type->type);
> +		if (ctfid == CTF_ERR)
> +			return CTF_ERR;		/* errno already set */
> +
> +		ctfid = ctf_add_const(ctf, CTF_ADD_ROOT, ctfid);
> +		btf->ctfids[type_id] = ctfid;
> +
> +		return ctfid == CTF_ERR ? dt_ctf_error(dtp, ctf) : ctfid;
> +	}
> +	case BTF_KIND_VOLATILE: {
> +		ctfid = dt_btf_add_to_ctf(dtp, btf, ctf, type->type);
> +		if (ctfid == CTF_ERR)
> +			return CTF_ERR;		/* errno already set */
> +
> +		ctfid = ctf_add_volatile(ctf, CTF_ADD_ROOT, ctfid);
> +		btf->ctfids[type_id] = ctfid;
> +
> +		return ctfid == CTF_ERR ? dt_ctf_error(dtp, ctf) : ctfid;
> +	}
> +	case BTF_KIND_RESTRICT: {
> +		ctfid = dt_btf_add_to_ctf(dtp, btf, ctf, type->type);
> +		if (ctfid == CTF_ERR)
> +			return CTF_ERR;		/* errno already set */
> +
> +		ctfid = ctf_add_restrict(ctf, CTF_ADD_ROOT, ctfid);
> +		btf->ctfids[type_id] = ctfid;
> +
> +		return ctfid == CTF_ERR ? dt_ctf_error(dtp, ctf) : ctfid;
> +	}

It seems to me that all type chains (e.g. member foo -> const ->
restrict -> int) will end up duplicating all the unnamed (const,
restrict) components over and over again. Again, maybe this is harmless,
but it will lead to a *lot* of duplication (dwarf2ctf did this at one
point and we ended up with about 80% of our types being needlessly
duplicated unnamed intermediate things like cv-quals and pointers.)

But fixing this would probably make things much more complex: you'd need
to track the relationship between the type at the end of the chain and
all the types in all the chains that point to it, by which point you
probably want a hash-based deduplicator like the one in ctf-dedup, and
you should probably just be using CTF anyway :P

(So... probably this is right call? I doubt DTrace cares about the
duplication anyway.)

> +	case BTF_KIND_STRUCT: {
> +		int		i;
> +		btf_member_t	*item = (btf_member_t *)(type + 1);

I seem to recall jemarch complaining about various horrible subtle
differences between CTF and BTF structures. You might want to ask about
that, to see if any of them affect us here.

> +			if (BTF_INFO_KFLAG(type->info)) {
> +				enc.cte_format = 0;
> +				enc.cte_offset = 0;
> +				enc.cte_bits = BTF_MEMBER_BITFIELD_SIZE(item->offset);
> +				item->offset = BTF_MEMBER_BIT_OFFSET(item->offset);
> +
> +				if (enc.cte_bits > 0) {
> +					int	kind;
> +
> +					/* Resolve member type for CTF. */
> +					mbid = ctf_type_resolve(ctf, mbid);
> +					kind = ctf_type_kind(ctf, mbid);
> +
> +					if (kind != CTF_K_INTEGER &&
> +					    kind != CTF_K_FLOAT &&
> +					    kind != CTF_K_ENUM)
> +						return dt_ctf_set_errno(
> +							dtp, ECTF_NOTINTFP);

There is no such thing as a floating-point bit field. (Though CTF *can*
represent them, because the Solaris folks seemingly forgot that.)

The use of ctf_type_resolve() should handle bitfields of typedef or
cv-qualified type, good.

> +					mbid = ctf_add_slice(ctf,
> +							     CTF_ADD_NONROOT,
> +							     mbid, &enc);

It seems to me that this will end up adding both bitfield-qualified base
types *and* slices simultaneously. There's nothing wrong with that, but
you can probably simply ignore all bitfield-qualified base types, since
if they matter you'll spot them here too and represent them as slices.
(But don't do that if it would overcomplicate anything: this is all very
marginal space-wise.)

> +	case BTF_KIND_UNION: {

... why can't we handle this with exactly the same code as the
BTF_KIND_STRUCT case above, just calling ctf_add_struct or ctf_add_union
as appropriate?

> +	case BTF_KIND_FUNC_PROTO: {
> +		int		i;
> +		ctf_id_t	argv[vlen];
> +		btf_param_t	*item = (btf_param_t *)(type + 1);
> +		ctf_funcinfo_t	ctc;
> +
> +		ctc.ctc_flags = 0;
> +		ctc.ctc_argc = vlen;
> +		ctc.ctc_return = dt_btf_add_to_ctf(dtp, btf, ctf, type->type);
> +		if (ctc.ctc_return == CTF_ERR)
> +			return CTF_ERR;
> +
> +		for (i = 0; i < vlen; i++) {
> +			argv[i] = dt_btf_add_to_ctf(dtp, btf, ctf, item->type);
> +			if (argv[i] == CTF_ERR)
> +				return CTF_ERR;
> +		}
> +
> +		ctfid = ctf_add_function(ctf, CTF_ADD_ROOT, &ctc, argv);

Best you can do. Not really implemented in CTFv3... but then DTrace
wouldn't use the function prototype names anyway.

> +		btf->ctfids[type_id] = ctfid;
> +
> +		return ctfid == CTF_ERR ? dt_ctf_error(dtp, ctf) : ctfid;
> +	}
> +	case BTF_KIND_FWD: {
> +		uint32_t	kind;
> +
> +		kind = BTF_INFO_KFLAG(type->info) ? CTF_K_STRUCT : CTF_K_STRUCT;

Shouldn't one of those be CTF_K_UNION? (And what about enums? But maybe
BTF doesn't encode forwards to enums... it looks like it doesn't.)

> +	case BTF_KIND_VAR:
> +	case BTF_KIND_DATASEC:
> +	case BTF_KIND_DECL_TAG:
> +	case BTF_KIND_TYPE_TAG:
> +	case BTF_KIND_FUNC:
> +	case BTF_KIND_ENUM64:
> +		return btf->ctfids[0];		/* Ignored for CTF */

BTF_KIND_VAR could possibly turn into a variable-section entry, maybe?

> +	/*
> +	 * Any module other than 'vmlinux' inherits the types from 'vmlinux'.
> +	 * The shared types are 1 through (base = dtp->dt_btf->type_cnt - 1).
> +	 * A omdule's types are base through (base + btf->type_cnt - 1), but

'omdule'.

> +	 * the types are stored in the BTF types array with indexes 1 through
> +	 * (btf->type_cnt - 1).
> +	 */
> +	if (dtp->dt_btf)
> +		base = dtp->dt_btf->type_cnt - 1;
> +
> +	for (i = 1; i < btf->type_cnt; i++) {
> +		int	type_id = i + base;
> +
> +		if (dt_btf_add_to_ctf(dtp, btf, ctf, type_id) == CTF_ERR) {
> +			dt_dprintf("BTF-to-CTF error: %s\n",
> +				   ctf_errmsg(dtp->dt_ctferr));
> +			break;
> +		}
> +	}
> +
> +	/*
> +	 * If we are constructing 'shared_ctf' (no module), we need to ensure
> +	 * that we have typedefs for __kernel_caddr_t and caddr_t.
> +	 */

Is this really the right place to do this? Don't you have to do this for
real CTF too?

> +	if (ctf_update(ctf) == CTF_ERR)
> +		return NULL;

As a note, this does more or less nothing in modern CTF (but is
harmless). In particular it doesn't make access any faster or make it
'like a ctf_open()ed dict' like old Solaris libctf used to (but it's
just a hash lookup anyway, so that doesn't matter).

-- 
NULL && (void)