v1.12 August 2018
- Add a BTF encoder (Martin KaFai Lau)
BTF (BPF Type Format) is the meta data format which describes
the data types of BPF program/map. Hence, it basically focus on the C
programming language which the modern BPF is primary using. The first
use case is to provide a generic pretty print capability for a BPF map.
BTF has its root from CTF (Compact C-Type format).
- Add Documentation on how to use the BTF encoder: (Arnaldo Carvalho de Melo)
Using the Linux 'perf' tools integration with BPF/llvm/clang to
show how to generate an object file that then gets its DWARF info used
to create a .BTF ELF section with this new BTF format. That augmented
eBPF ELF object file is then loaded while 'perf ftrace -g *bpf*' is used
to show the kernel BTF validation process.
- Initial support for DW_TAG_partial_unit (Arnaldo Carvalho de Melo)
Just by treating these sections as DW_TAG_compile_unit, which is
enough for the structs that don't contain cross-section type references
to be correctly loaded and pretty-printed with pahole.
This doesn't affect the kernel or modules, where such DWARF
compression techniques are not used so far. (Arnaldo Carvalho de Melo)
- Print cacheline boundaries in multiple union members, (Arnaldo Carvalho de Melo)
We were showing it just on the first inner union member members,
as if it was a struct, now we restart the cacheline boundaries when
moving to print the next inner struct.
As an example, look at 'struct audit_context' where the only
cacheline boundary printed for the following unnamed union was the first
one, for the 'socketcall' struct member, now that cacheline boundary
appears in each of the union member inner structs:
struct audit_context {
<SNIP>
union {
struct {
int nargs; /* 824 4 */
/* XXX 4 bytes hole, try to pack */
/* --- cacheline 13 boundary (832 bytes) --- */
long int args[6]; /* 832 48 */
} socketcall; /* 824 56 */
struct {
kuid_t uid; /* 824 4 */
kgid_t gid; /* 828 4 */
/* --- cacheline 13 boundary (832 bytes) --- */
umode_t mode; /* 832 2 */
/* XXX 2 bytes hole, try to pack */
u32 osid; /* 836 4 */
int has_perm; /* 840 4 */
uid_t perm_uid; /* 844 4 */
gid_t perm_gid; /* 848 4 */
umode_t perm_mode; /* 852 2 */
/* XXX 2 bytes hole, try to pack */
long unsigned int qbytes; /* 856 8 */
} ipc; /* 824 40 */
struct {
mqd_t mqdes; /* 824 4 */
/* XXX 4 bytes hole, try to pack */
/* --- cacheline 13 boundary (832 bytes) --- */
struct mq_attr mqstat; /* 832 64 */
} mq_getsetattr; /* 824 72 */
struct {
mqd_t mqdes; /* 824 4 */
int sigev_signo; /* 828 4 */
} mq_notify; /* 824 8 */
struct {
mqd_t mqdes; /* 824 4 */
/* XXX 4 bytes hole, try to pack */
/* --- cacheline 13 boundary (832 bytes) --- */
size_t msg_len; /* 832 8 */
unsigned int msg_prio; /* 840 4 */
/* XXX 4 bytes hole, try to pack */
struct timespec64 abs_timeout; /* 848 16 */
} mq_sendrecv; /* 824 40 */
struct {
int oflag; /* 824 4 */
umode_t mode; /* 828 2 */
/* XXX 2 bytes hole, try to pack */
/* --- cacheline 13 boundary (832 bytes) --- */
struct mq_attr attr; /* 832 64 */
} mq_open; /* 824 72 */
struct {
pid_t pid; /* 824 4 */
struct audit_cap_data cap; /* 828 32 */
} capset; /* 824 36 */
struct {
int fd; /* 824 4 */
int flags; /* 828 4 */
} mmap; /* 824 8 */
struct {
int argc; /* 824 4 */
} execve; /* 824 4 */
struct {
char * name; /* 824 8 */
} module; /* 824 8 */
}; /* 824 72 */
/* --- cacheline 14 boundary (896 bytes) --- */
int fds[2]; /* 896 8 */
struct audit_proctitle proctitle; /* 904 16 */
/* size: 920, cachelines: 15, members: 46 */
/* sum members: 912, holes: 2, sum holes: 8 */
/* last cacheline: 24 bytes */
};
- Show where a struct was used, e.g.
$ pahole -I vmlinux
<SNIP>
/* Used at: /home/acme/git/perf/init/main.c */
/* <1f4a5> /home/acme/git/perf/arch/x86/include/asm/orc_types.h:85 */
struct orc_entry {
s16 sp_offset; /* 0 2 */
s16 bp_offset; /* 2 2 */
<SNIP>
- Show offsets at union members (Arnaldo Carvalho de Melo, suggested by Matthew Wilcox):
In complex structs with multiple complex unions figuring out the
offset for a given union member is difficult, as one needs to figure out
the union, go to the end of it to see the offset.
This way, for instance, the Linux kernel's 'struct page' shows now as:
struct page {
long unsigned int flags; /* 0 8 */
union {
struct address_space * mapping; /* 8 8 */
void * s_mem; /* 8 8 */
atomic_t compound_mapcount; /* 8 4 */
}; /* 8 8 */
union {
long unsigned int index; /* 16 8 */
void * freelist; /* 16 8 */
}; /* 16 8 */
union {
long unsigned int counters; /* 24 8 */
struct {
union {
atomic_t _mapcount; /* 24 4 */
unsigned int active; /* 24 4 */
struct {
unsigned int inuse:16; /* 24:16 4 */
unsigned int objects:15; /* 24: 1 4 */
unsigned int frozen:1; /* 24: 0 4 */
}; /* 24 4 */
int units; /* 24 4 */
}; /* 24 4 */
atomic_t _refcount; /* 28 4 */
}; /* 24 8 */
}; /* 24 8 */
union {
struct list_head lru; /* 32 16 */
struct dev_pagemap * pgmap; /* 32 8 */
struct {
struct page * next; /* 32 8 */
int pages; /* 40 4 */
int pobjects; /* 44 4 */
}; /* 32 16 */
struct callback_head callback_head; /* 32 16 */
struct {
long unsigned int compound_head; /* 32 8 */
unsigned int compound_dtor; /* 40 4 */
unsigned int compound_order; /* 44 4 */
}; /* 32 16 */
struct {
long unsigned int __pad; /* 32 8 */
pgtable_t pmd_huge_pte; /* 40 8 */
}; /* 32 16 */
}; /* 32 16 */
union {
long unsigned int private; /* 48 8 */
spinlock_t ptl; /* 48 4 */
struct kmem_cache * slab_cache; /* 48 8 */
}; /* 48 8 */
struct mem_cgroup * mem_cgroup; /* 56 8 */
/* size: 64, cachelines: 1, members: 7 */
};
- Search and use running kernel vmlinux when no file is passed (Arnaldo Carvalho de Melo)
Now it is possible to use it just as:
$ pahole -C sk_buff_head
struct sk_buff_head {
struct sk_buff * next; /* 0 8 */
struct sk_buff * prev; /* 8 8 */
__u32 qlen; /* 16 4 */
spinlock_t lock; /* 20 4 */
/* size: 24, cachelines: 1, members: 4 */
/* last cacheline: 24 bytes */
};
$
This will look at /sys/kernel/notes, find the running kernel
build-id, and then search the usual locations (vmlinux,
/lib/modules/`uname -r`/build/vmlinux, the debuginfo package paths, etc)
to find the matching vmlinux with the DWARF info to use. Build-ids are
now ubiquitous, so this shortens a the most common binary used.
- Document 'pahole --hex' in the man page (Arnaldo Carvalho de Melo)
This option shows offsets and sizes in hexadecimal, helping to
correlate with reports using that notation.
E.g.:
$ pahole --hex -C sk_buff_head
struct sk_buff_head {
struct sk_buff * next; /* 0 0x8 */
struct sk_buff * prev; /* 0x8 0x8 */
__u32 qlen; /* 0x10 0x4 */
spinlock_t lock; /* 0x14 0x4 */
/* size: 24, cachelines: 1, members: 4 */
/* last cacheline: 24 bytes */
};
$
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
-----BEGIN PGP SIGNATURE-----
iQIzBAABCAAdFiEELb9bqkb7Te0zijNb1lAW81NSqkAFAlt1zXcACgkQ1lAW81NS
qkCMtQ//XS7kB09L+4ua3xOvYyoo948fHAPHOkkzRPT/5CovadneMpoImGbDipta
OsnhMa74gPr/2TCDEeeJnksgHDOePEQceFIHh+H/KxGt7QovtxEQyEnnUcwRVElx
bqCk8ANILMs2/pyTUn5/ctE1vSmImw7zsKMLZvkSXCx7YvPgQc5itAq+WxmFxqgm
cU4p48hw3oaItadrVg8LLSORrTgIc9L3neTS435VRj8Gz0BdayR+yGmRABCtPCSo
FzLUd6DrFdze6SPDu3JbXbanaA9SlE0qwWxTAhzpNZOya3ORwEXsTlh1ZS73U2rZ
1cYMYjD43D5HjxoiCttx6BkhIxfI5KTxmpDC7AFfN6vzflrnM9HkvgyyWw47oyT4
z8JZddzutDZPfYKev7SjAnEwTIVwhK/aZXk7UBDbWga3P62etWc4qV4RiOUSxoVN
T79OAt5xdg6V7QJYzMcbaP8QIUxlnsiXgoJYupTc72GG5blNXDokHtNUu6IVFlOs
IlFhBB6gNLNhaEgfbaTsLTz1d3VBvL7Pg9IdckRsRg+eDcYzvsexVzlsmS9IkaSz
T8UE6q/oxpBRxZY7qFvETR5w3bjm0XFzyyNwGvPjRCE+IwS4bNEUALJPld9eWoJP
mn0yDoi13kNzLZCdWO8bam+GIDINkzkeJz5Zos3MtZB7uoGr720=
=lXDj
-----END PGP SIGNATURE-----