The CTF file or section itself has the following structure:
- +--------+--------+---------+----------+----------+-------+--------+
- | file | type | data | function | variable | data | string |
- | header | labels | objects | info | info | types | table |
- +--------+--------+---------+----------+----------+-------+--------+
+ +--------+--------+---------+----------+--------+----------+...
+ | file | type | data | function | object | function |...
+ | header | labels | objects | info | index | index |...
+ +--------+--------+---------+----------+--------+----------+...
+
+ ...+----------+-------+--------+
+ ...| variable | data | string |
+ ...| info | types | table |
+ +----------+-------+--------+
The file header stores a magic number and version information, encoding
flags, and the byte offset of each of the sections relative to the end of the
For each data object, the type ID (a small integer) is recorded. For each
function, the type ID of the return type and argument types is recorded.
+ For situations in which the order of the symbols in the symtab is not known,
+ a pair of optional indexes follow the data object and function info sections:
+ each of these is an array of strtab indexes, mapped 1:1 to the corresponding
+ data object / function info section, giving each entry in those sections a
+ name so that the linker can correlate them with final symtab entries and
+ reorder them accordingly (dropping the indexes in the process).
+
Variable records (as distinct from data objects) provide a modicum of support
for non-ELF systems, mapping a variable name to a CTF type ID. The variable
- names are sorted into ASCIIbetical order, permitting binary searching.
+ names are sorted into ASCIIbetical order, permitting binary searching. We do
+ not define how the consumer maps these variable names to addresses or
+ anything else, or indeed what these names represent: they might be names
+ looked up at runtime via dlsym() or names extracted at runtime by a debugger
+ or anything else the consumer likes.
The data types section is a list of variable size records that represent each
type, in order by their ID. The types themselves form a directed graph,
where each node may contain one or more outgoing edges to other type nodes,
- denoted by their ID.
+ denoted by their ID. Most type nodes are standalone or point backwards to
+ earlier nodes, but this is not required: nodes can point to later nodes,
+ particularly structure and union members.
Strings are recorded as a string table ID (0 or 1) and a byte offset into the
string table. String table 0 is the internal CTF string table. String table
unsigned char ctp_flags; /* Flags (see below). */
} ctf_preamble_t;
+typedef struct ctf_header_v2
+{
+ ctf_preamble_t cth_preamble;
+ uint32_t cth_parlabel; /* Ref to name of parent lbl uniq'd against. */
+ uint32_t cth_parname; /* Ref to basename of parent. */
+ uint32_t cth_lbloff; /* Offset of label section. */
+ uint32_t cth_objtoff; /* Offset of object section. */
+ uint32_t cth_funcoff; /* Offset of function section. */
+ uint32_t cth_varoff; /* Offset of variable section. */
+ uint32_t cth_typeoff; /* Offset of type section. */
+ uint32_t cth_stroff; /* Offset of string section. */
+ uint32_t cth_strlen; /* Length of string section in bytes. */
+} ctf_header_v2_t;
+
typedef struct ctf_header
{
ctf_preamble_t cth_preamble;
uint32_t cth_parlabel; /* Ref to name of parent lbl uniq'd against. */
uint32_t cth_parname; /* Ref to basename of parent. */
+ uint32_t cth_cuname; /* Ref to CU name (may be 0). */
uint32_t cth_lbloff; /* Offset of label section. */
uint32_t cth_objtoff; /* Offset of object section. */
uint32_t cth_funcoff; /* Offset of function section. */
+ uint32_t cth_objtidxoff; /* Offset of object index section. */
+ uint32_t cth_funcidxoff; /* Offset of function index section. */
uint32_t cth_varoff; /* Offset of variable section. */
uint32_t cth_typeoff; /* Offset of type section. */
uint32_t cth_stroff; /* Offset of string section. */
/* Data format version number. */
-/* v1 upgraded to v2 is not quite the same as native v2 (the boundary between
- parent and child types is different), and you can write it out again via
- ctf_compress_write(), so we must track whether the thing was originally v1 or
- not. If we were writing the header from scratch, we would add a *pair* of
- version number fields to allow for this, but this will do for now. (A flag
- will not do, because we need to encode both the version we came from and the
- version we went to, not just "we were upgraded".) */
+/* v1 upgraded to a later version is not quite the same as the native form,
+ because the boundary between parent and child types is different but not
+ recorded anywhere, and you can write it out again via ctf_compress_write(),
+ so we must track whether the thing was originally v1 or not. If we were
+ writing the header from scratch, we would add a *pair* of version number
+ fields to allow for this, but this will do for now. (A flag will not do,
+ because we need to encode both the version we came from and the version we
+ went to, not just "we were upgraded".) */
# define CTF_VERSION_1 1
# define CTF_VERSION_1_UPGRADED_3 2
#define CTF_NAME_STID(name) ((name) >> 31)
#define CTF_NAME_OFFSET(name) ((name) & CTF_MAX_NAME)
+#define CTF_SET_STID(name, stid) ((name) | (stid) << 31)
/* V2 only. */
#define CTF_TYPE_INFO(kind, isroot, vlen) \
ctt_type, which must be a type which has an encoding (fp, int, or enum). We
also store the referenced type in here, because it is easier to keep the
ctt_size correct for the slice than to shuffle the size into here and keep
- the ctt_type where it is for other types. */
+ the ctt_type where it is for other types.
+
+ In a future version, where we loosen requirements on alignment in the CTF
+ file, the cts_offset and cts_bits will be chars: but for now they must be
+ shorts or everything after a slice will become unaligned. */
typedef struct ctf_slice
{
uint32_t cts_type;
- unsigned char cts_offset;
- unsigned char cts_bits;
+ unsigned short cts_offset;
+ unsigned short cts_bits;
} ctf_slice_t;
typedef struct ctf_array_v1
typedef struct ctf_enum
{
uint32_t cte_name; /* Reference to name in string table. */
- int cte_value; /* Value associated with this name. */
+ int32_t cte_value; /* Value associated with this name. */
} ctf_enum_t;
+/* The ctf_archive is a collection of ctf_file_t's stored together. The format
+ is suitable for mmap()ing: this control structure merely describes the
+ mmap()ed archive (and overlaps the first few bytes of it), hence the
+ greater care taken with integral types. All CTF files in an archive
+ must have the same data model. (This is not validated.)
+
+ All integers in this structure are stored in little-endian byte order.
+
+ The code relies on the fact that everything in this header is a uint64_t
+ and thus the header needs no padding (in particular, that no padding is
+ needed between ctfa_ctfs and the unnamed ctfa_archive_modent array
+ that follows it).
+
+ This is *not* the same as the data structure returned by the ctf_arc_*()
+ functions: this is the low-level on-disk representation. */
+
+#define CTFA_MAGIC 0x8b47f2a4d7623eeb /* Random. */
+struct ctf_archive
+{
+ /* Magic number. (In loaded files, overwritten with the file size
+ so ctf_arc_close() knows how much to munmap()). */
+ uint64_t ctfa_magic;
+
+ /* CTF data model. */
+ uint64_t ctfa_model;
+
+ /* Number of CTF files in the archive. */
+ uint64_t ctfa_nfiles;
+
+ /* Offset of the name table. */
+ uint64_t ctfa_names;
+
+ /* Offset of the CTF table. Each element starts with a size (a uint64_t
+ in network byte order) then a ctf_file_t of that size. */
+ uint64_t ctfa_ctfs;
+};
+
+/* An array of ctfa_nnamed of this structure lies at
+ ctf_archive[ctf_archive->ctfa_modents] and gives the ctfa_ctfs or
+ ctfa_names-relative offsets of each name or ctf_file_t. */
+
+typedef struct ctf_archive_modent
+{
+ uint64_t name_offset;
+ uint64_t ctf_offset;
+} ctf_archive_modent_t;
+
#ifdef __cplusplus
}
#endif