Cleanup: update alignment of TOC

[ctf.git] / common-trace-format-specification.txt
diff --git a/common-trace-format-specification.txt b/common-trace-format-specification.txt

index 90a669b849a4fb45f7b47e66723f64e1c3217f12..4af2244ab37adfa5a513cff029811cbdcc48bcea 100644 (file)
--- a/common-trace-format-specification.txt
+++ b/common-trace-format-specification.txt
@@ -1,4 +1,4 @@
-Common Trace Format (CTF) Specification (pre-v1.8)
+Common Trace Format (CTF) Specification (v1.8.2)
  
  Mathieu Desnoyers, EfficiOS Inc.
  
@@ -41,12 +41,12 @@ Table of Contents
         4.1.6 GNU/C bitfields
         4.1.7 Floating point
         4.1.8 Enumerations
-4.2 Compound types
-    4.2.1 Structures
-    4.2.2 Variants (Discriminated/Tagged Unions)
-    4.2.3 Arrays
-    4.2.4 Sequences
-    4.2.5 Strings
+   4.2 Compound types
+       4.2.1 Structures
+       4.2.2 Variants (Discriminated/Tagged Unions)
+       4.2.3 Arrays
+       4.2.4 Sequences
+       4.2.5 Strings
  5. Event Packet Header
     5.1 Event Packet Header Description
     5.2 Event Packet Context Description
@@ -54,7 +54,7 @@ Table of Contents
     6.1 Event Header
         6.1.1 Type 1 - Few event IDs
         6.1.2 Type 2 - Many event IDs
-   6.2 Event Context
+   6.2 Stream Event Context and Event Context
     6.3 Event Payload
         6.3.1 Padding
         6.3.2 Alignment
@@ -118,11 +118,10 @@ trace event types expressed in the Trace Stream Description Language
  3. Event stream
  
  An event stream can be divided into contiguous event packets of variable
-size. These subdivisions have a variable size. An event packet can
-contain a certain amount of padding at the end. The stream header is
-repeated at the beginning of each event packet. The rationale for the
-event stream design choices is explained in Appendix B. Stream Header
-Rationale.
+size. An event packet can contain a certain amount of padding at the
+end. The stream header is repeated at the beginning of each event
+packet. The rationale for the event stream design choices is explained
+in Appendix B. Stream Header Rationale.
  
  The event stream header will therefore be referred to as the "event packet
  header" throughout the rest of this document.
@@ -166,22 +165,35 @@ by default. It is however recommended to always specify the alignment
  explicitly. Alignment values must be power of two. Compound types are
  aligned as specified in their individual specification.
  
+The base offset used for field alignment is the start of the packet
+containing the field. For instance, a field aligned on 32-bit needs to
+be at an offset multiple of 32-bit from the start of the packet that
+contains it.
+
  TSDL meta-data attribute representation of a specific alignment:
  
    align = value;                                /* value in bits */
  
  4.1.3 Byte order
  
-By default, the native endianness of the source architecture the trace is used.
-Byte order can be overridden for a basic type by specifying a "byte_order"
-attribute. Typical use-case is to specify the network byte order (big endian:
-"be") to save data captured from the network into the trace without conversion.
-If not specified, the byte order is native.
+By default, byte order of a basic type is the byte order described in
+the trace description.  It can be overridden by specifying a
+"byte_order" attribute for a basic type.  Typical use-case is to specify
+the network byte order (big endian: "be") to save data captured from the
+network into the trace without conversion.
  
  TSDL meta-data representation:
  
    byte_order = native OR network OR be OR le;  /* network and be are aliases */
  
+The "native" keyword selects the byte order described in the trace
+description. The "network" byte order is an alias for big endian.
+
+Even though the trace description section is not per se a type, for sake
+of clarity, it should be noted that "native" and "network" byte orders
+are only allowed within type declaration. The byte_order specified in
+the trace description section only accepts "be" or "le" values.
+
  4.1.4 Size
  
  Type size, in bits, for integers and floats is that returned by "sizeof()" in C
@@ -236,7 +248,7 @@ TSDL meta-data representation:
      size = value;                               /* value in bits, no default */
      align = value;                              /* value in bits */
      /* based used for pretty-printing output, default: decimal. */
-    base = decimal OR dec OR OR d OR i OR u OR 10 OR hexadecimal OR hex OR x OR X OR p OR 16
+    base = decimal OR dec OR d OR i OR u OR 10 OR hexadecimal OR hex OR x OR X OR p OR 16
             OR octal OR oct OR o OR 8 OR binary OR b OR 2;
      /* character encoding, default: none */
      encoding = none or UTF8 or ASCII;
@@ -358,7 +370,8 @@ enum name : integer_type {
  };
  
  If the values are omitted, the enumeration starts at 0 and increment of 1 for
-each entry:
+each entry. An entry with omitted value that follows a range entry takes
+as value the end_value of the previous range + 1:
  
  enum name : unsigned int {
    ZERO,
@@ -380,7 +393,7 @@ Enumerations omitting the container type ": integer_type" use the "int"
  type (for compatibility with C99). The "int" type must be previously
  declared. E.g.:
  
-typealias integer { size = 32; align = 32; signed = true } := int;
+typealias integer { size = 32; align = 32; signed = true; } := int;
  
  enum {
    ...
@@ -420,7 +433,9 @@ The fields are placed in a sequence next to each other. They each
  possess a field name, which is a unique identifier within the structure.
  The identifier is not allowed to use any reserved keyword
  (see Section C.1.2). Replacing reserved keywords with
-underscore-prefixed field names is recommended.
+underscore-prefixed field names is recommended. Fields starting with an
+underscore should have their leading underscore removed by the CTF trace
+readers.
  
  A nameless structure can be declared as a field type or as part of a typedef:
  
@@ -452,16 +467,36 @@ type selector. The field to use as tag is specified by the "tag_field",
  specified between "< >" after the "variant" keyword for unnamed
  variants, and after "variant name" for named variants.
  
-The alignment of the variant is the alignment of the type as selected by the tag
-value for the specific instance of the variant. The alignment of the type
-containing the variant is independent of the variant alignment.  The size of the
-variant is the size as selected by the tag value for the specific instance of
-the variant.
+The alignment of the variant is the alignment of the type as selected by
+the tag value for the specific instance of the variant.  The size of the
+variant is the size as selected by the tag value for the specific
+instance of the variant.
+
+The alignment of the type containing the variant is independent of the
+variant alignment.  For instance, if a structure contains two fields, a
+32-bit integer, aligned on 32 bits, and a variant, which contains two
+choices: either a 32-bit field, aligned on 32 bits, or a 64-bit field,
+aligned on 64 bits, the alignment of the outmost structure will be
+32-bit (the alignment of its largest field, disregarding the alignment
+of the variant). The alignment of the variant will depend on the
+selector: if the variant's 32-bit field is selected, its alignment will
+be 32-bit, or 64-bit otherwise. It is important to note that variants
+are specifically tailored for compactness in a stream. Therefore, the
+relative offsets of compound type fields can vary depending on
+the offset at which the compound type starts if it contains a variant
+that itself contains a type with alignment larger than the largest field
+contained within the compound type. This is caused by the fact that the
+compound type may contain the enumeration that select the variant's
+choice, and therefore the alignment to be applied to the compound type
+cannot be determined before encountering the enumeration.
  
  Each variant type selector possess a field name, which is a unique
  identifier within the variant. The identifier is not allowed to use any
  reserved keyword (see Section C.1.2). Replacing reserved keywords with
-underscore-prefixed field names is recommended.
+underscore-prefixed field names is recommended. Fields starting with an
+underscore should have their leading underscore removed by the CTF trace
+readers.
+
  
  A named variant declaration followed by its definition within a structure
  declaration:
@@ -582,7 +617,7 @@ Arrays are always aligned on their element alignment requirement.
  
  4.2.4 Sequences
  
-Sequences are dynamically-sized arrays. They refer to a a "length"
+Sequences are dynamically-sized arrays. They refer to a "length"
  unsigned integer field, which must appear in either the same static scope,
  prior to the sequence field (in field declaration order), in an upper
  static scope, or in an upper dynamic scope (see Section 7.3.2). This
@@ -652,8 +687,7 @@ Strings are always aligned on byte size.
  The event packet header consists of two parts: the "event packet header"
  is the same for all streams of a trace. The second part, the "event
  packet context", is described on a per-stream basis. Both are described
-in the TSDL meta-data. The packets are aligned on architecture-page-sized
-addresses.
+in the TSDL meta-data.
  
  Event packet header (all fields are optional, specified by TSDL meta-data):
  
@@ -683,13 +717,22 @@ Event packet context (all fields are optional, specified by TSDL meta-data):
    while (or before) writing the first event and while (or after) writing the
    last event in the packet. The inclusive range between these timestamps should
    include all event timestamps assigned to events contained within the packet.
+  The timestamp at the beginning of an event packet is guaranteed to be
+  below or equal the timestamp at the end of that event packet.
+  The timestamp at the end of an event packet is guaranteed to be below
+  or equal the timestamps at the end of any following packet within the
+  same stream. See Section 8. Clocks for more detail.
  - Events discarded count
    - Snapshot of a per-stream free-running counter, counting the number of
-    events discarded that were supposed to be written in the stream prior to
-    the first event in the event packet.
-    * Note: producer-consumer buffer full condition should fill the current
+    events discarded that were supposed to be written in the stream after
+    the last event in the event packet.
+    * Note: producer-consumer buffer full condition can fill the current
              event packet with padding so we know exactly where events have been
-            discarded.
+           discarded. However, if the buffer full condition chooses not
+           to fill the current event packet with padding, all we know
+           about the timestamp range in which the events have been
+           discarded is that it is somewhere between the beginning and
+            the end of the packet.
  - Lossless compression scheme used for the event packet content. Applied
    directly to raw data. New types of compression can be added in following
    versions of the format.
@@ -751,8 +794,8 @@ struct event_packet_context {
    uint32_t stream_packet_count;
    uint32_t events_discarded;
    uint32_t cpu_id;
-  uint32_t/uint16_t content_size;
-  uint32_t/uint16_t packet_size;
+  uint64_t/uint32_t/uint16_t content_size;
+  uint64_t/uint32_t/uint16_t packet_size;
    uint8_t  compression_scheme;
    uint8_t  encryption_scheme;
    uint8_t  checksum_scheme;
@@ -763,17 +806,22 @@ struct event_packet_context {
  
  The overall structure of an event is:
  
-1 - Stream Packet Context (as specified by the stream meta-data)
- 2 - Event Header (as specified by the stream meta-data)
-  3 - Stream Event Context (as specified by the stream meta-data)
-   4 - Event Context (as specified by the event meta-data)
-    5 - Event Payload (as specified by the event meta-data)
+1 - Event Header (as specified by the stream meta-data)
+ 2 - Stream Event Context (as specified by the stream meta-data)
+  3 - Event Context (as specified by the event meta-data)
+   4 - Event Payload (as specified by the event meta-data)
  
  This structure defines an implicit dynamic scoping, where variants
  located in inner structures (those with a higher number in the listing
  above) can refer to the fields of outer structures (with lower number in
  the listing above). See Section 7.3 TSDL Scopes for more detail.
  
+The total length of an event is defined as the difference between the
+end of its Event Payload and the end of the previous event's Event
+Payload. Therefore, it includes the event header alignment padding, and
+all its fields and their respective alignment padding. Events of length
+0 are forbidden.
+
  6.1 Event Header
  
  Event headers can be described within the meta-data. We hereby propose, as an
@@ -796,11 +844,13 @@ array is then set to 1.
  Types uintX_t represent an X-bit unsigned integer, as declared with
  either:
  
-  typealias integer { size = X; align = X; signed = false } := uintX_t;
+  typealias integer { size = X; align = X; signed = false; } := uintX_t;
  
      or
  
-  typealias integer { size = X; align = 1; signed = false } := uintX_t;
+  typealias integer { size = X; align = 1; signed = false; } := uintX_t;
+
+For more information about timestamp fields, see Section 8. Clocks.
  
  6.1.1 Type 1 - Few event IDs
  
@@ -858,7 +908,7 @@ struct event_header_2 {
  } align(16);   /* or align(8) */
  
  
-6.2 Event Context
+6.2 Stream Event Context and Event Context
  
  The event context contains information relative to the current event.
  The choice and meaning of this information is specified by the TSDL
@@ -964,12 +1014,13 @@ beginning of the file. This magic number is also used to detect the
  endianness of the architecture by trying to read the CTF magic number
  and its counterpart in reversed endianness. The events within the
  meta-data stream have no event header nor event context. Each event only
-contains a "sequence" payload, which is a sequence of bits using the
-"trace.packet.header.content_size" field as a placeholder for its length
-(the packet header size should be substracted). The formatting of this
-sequence of bits is a plain-text representation of the TSDL description.
-Each meta-data packet start with a special packet header, specific to
-the meta-data stream, which contains, exactly:
+contains a special "sequence" payload, which is a sequence of bits which
+length is implicitly calculated by using the
+"trace.packet.header.content_size" field, minus the packet header size.
+The formatting of this sequence of bits is a plain-text representation
+of the TSDL description.  Each meta-data packet start with a special
+packet header, specific to the meta-data stream, which contains,
+exactly:
  
  struct metadata_packet_header {
    uint32_t magic;                      /* 0x75D11D57 */
@@ -985,7 +1036,7 @@ struct metadata_packet_header {
  };
  
  The packet-based meta-data can be converted to a text-only meta-data by
-concatenating all the strings in contains.
+concatenating all the strings it contains.
  
  In the textual representation of the meta-data, the text contained
  within "/*" and "*/", as well as within "//" and end of line, are
@@ -1024,14 +1075,14 @@ path lookups) and for sequence references to length fields.
  
  7.3.1 Lexical Scope
  
-Each of "trace", "stream", "event", "struct" and "variant" have their own
-nestable declaration scope, within which types can be declared using "typedef"
-and "typealias". A root declaration scope also contains all declarations
-located outside of any of the aforementioned declarations. An inner
-declaration scope can refer to type declared within its container
-lexical scope prior to the inner declaration scope. Redefinition of a
-typedef or typealias is not valid, although hiding an upper scope
-typedef or typealias is allowed within a sub-scope.
+Each of "trace", "env", "stream", "event", "struct" and "variant" have
+their own nestable declaration scope, within which types can be declared
+using "typedef" and "typealias". A root declaration scope also contains
+all declarations located outside of any of the aforementioned
+declarations. An inner declaration scope can refer to type declared
+within its container lexical scope prior to the inner declaration scope.
+Redefinition of a typedef or typealias is not valid, although hiding an
+upper scope typedef or typealias is allowed within a sub-scope.
  
  7.3.2 Static and Dynamic Scopes
  
@@ -1070,6 +1121,7 @@ header as selector.
  
  The dynamic scope prefixes are thus:
  
+ - Trace Environment: <env. >,
   - Trace Packet Header: <trace.packet.header. >,
   - Stream Packet Context: <stream.packet.context. >,
   - Event Header: <stream.event.header. >,
@@ -1085,7 +1137,9 @@ keywords "trace", "stream", and "event" are reserved, and thus
  not permitted as field names. It is recommended that field names
  clashing with CTF and C99 reserved keywords use an underscore prefix to
  eliminate the risk of generating a description containing an invalid
-field name.
+field name. Consequently, fields starting with an underscore should have
+their leading underscore removed by the CTF trace readers.
+
  
  The information available in the dynamic scopes can be thought of as the
  current tracing context. At trace production, information about the
@@ -1105,8 +1159,8 @@ trace. The event "id" field can be left out if there is only one event
  in a stream.
  
  trace {
-  major = value;                               /* Trace format version */
-  minor = value;
+  major = value;                       /* CTF spec version major number */
+  minor = value;                       /* CTF spec version minor number */
    uuid = "aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa";       /* Trace UUID */
    byte_order = be OR le;                       /* Endianness (required) */
    packet.header := struct {
@@ -1116,6 +1170,16 @@ trace {
    };
  };
  
+/*
+ * The "env" (environment) scope contains assignment expressions. The
+ * field names and content are implementation-defined.
+ */
+env {
+  pid = value;                 /* example */
+  proc_name = "name";          /* example */
+  ...
+};
+
  stream {
    id = stream_id;
    /* Type 1 - Few event IDs; Type 2 - Many event IDs. See section 6.1. */
@@ -1132,8 +1196,8 @@ event {
    name = "event_name";
    id = value;                  /* Numeric identifier within the stream */
    stream_id = stream_id;
-  loglevel.identifier = "loglevel_identifier";
-  loglevel.value = value;
+  loglevel = value;
+  model.emf.uri = "string";
    context := struct {
      ...
    };
@@ -1142,6 +1206,14 @@ event {
    };
  };
  
+callsite {
+  name = "event_name";
+  func = "func_name";
+  file = "myfile.c";
+  line = 39;
+  ip = 0x40096c;
+};
+
  /* More detail on types in section 4. Types */
  
  /*
@@ -1256,40 +1328,47 @@ Describing a clock and how it is used by streams is threefold: first,
  the clock and clock topology should be described in a "clock"
  description block, e.g.:
  
-typealias integer { size = 32; align = 32; signed = true } := uint32_t;
-
-enum clocks : uint32_t {
-       cycle_counter,
-};
-
-clock[cycle_counter] {
+clock {
+       name = cycle_counter_sync;
         uuid = "62189bee-96dc-11e0-91a8-cfa3d89f3923";
-       description = "Local CPU cycle counter";
+       description = "Cycle counter synchronized across CPUs";
         freq = 1000000000;             /* frequency, in Hz */
         /* precision in seconds is: 1000 * (1/freq) */
         precision = 1000;
-       /* clock value offset from Epoch is: offset * (1/freq) */
-       offset = 1326476837897235420;
+       /*
+        * clock value offset from Epoch is:
+        * offset_s + (offset * (1/freq))
+        */
+       offset_s = 1326476837;
+       offset = 897235420;
+       absolute = FALSE;
  };
  
-The optional field "uuid" is the unique identifier of the clock. It can
-be used to correlate different traces that use the same clock. An
-optional textual description string can be added with the "description"
-field. The "freq" field is the initial frequency of the clock, in Hz. If
-the "freq" field is not present, the frequency is assumed to be
-1000000000 (providing clock increment of 1 ns). The optional "precision"
-field details the uncertainty on the clock measurements, in (1/freq)
-units. The "offset" field indicates the offset from POSIX.1 Epoch,
-1970-01-01 00:00:00 +0000 (UTC), to the zero of value of the clock, in
-(1/freq) units. If the "offset" field is not present, it is assigned the
-0 value.
+The mandatory "name" field specifies the name of the clock identifier,
+which can later be used as a reference. The optional field "uuid" is the
+unique identifier of the clock. It can be used to correlate different
+traces that use the same clock. An optional textual description string
+can be added with the "description" field. The "freq" field is the
+initial frequency of the clock, in Hz. If the "freq" field is not
+present, the frequency is assumed to be 1000000000 (providing clock
+increment of 1 ns). The optional "precision" field details the
+uncertainty on the clock measurements, in (1/freq) units. The "offset_s"
+and "offset" fields indicate the offset from POSIX.1 Epoch, 1970-01-01
+00:00:00 +0000 (UTC), to the zero of value of the clock. The "offset_s"
+field is in seconds. The "offset" field is in (1/freq) units. If any of
+the "offset_s" or "offset" field is not present, it is assigned the 0
+value. The field "absolute" is TRUE if the clock is a global reference
+across different clock uuid (e.g. NTP time). Otherwise, "absolute" is
+FALSE, and the clock can be considered as synchronized only with other
+clocks that have the same uuid.
+
  
  Secondly, a reference to this clock should be added within an integer
  type:
  
  typealias integer {
         size = 64; align = 1; signed = false;
-       map = clock[cycle_counter].value;
+       map = clock.cycle_counter_sync.value;
  } := uint64_ccnt_t;
  
  Thirdly, stream declarations can reference the clock they use as a
@@ -1311,10 +1390,11 @@ stream {
  };
  
  For a N-bit integer type referring to a clock, if the integer overflows
-compared to the N low order bits of the clock prior value, then it is
-assumed that one, and only one, overflow occurred. It is therefore
-important that events encoding time on a small number of bits happen
-frequently enough to detect when more than one N-bit overflow occurs.
+compared to the N low order bits of the clock prior value found in the
+same stream, then it is assumed that one, and only one, overflow
+occurred. It is therefore important that events encoding time on a small
+number of bits happen frequently enough to detect when more than one
+N-bit overflow occurs.
  
  In a packet context, clock field names ending with "_begin" and "_end"
  have a special meaning: this refers to the time-stamps at, respectively,
@@ -1390,11 +1470,13 @@ token:
  keyword: is one of
  
  align
+callsite
  const
  char
  clock
  double
  enum
+env
  event
  floating_point
  float
@@ -1707,9 +1789,12 @@ typedef-name:
  2.3) CTF-specific declarations
  
  ctf-specifier:
+       clock { ctf-assignment-expression-list-opt }
         event { ctf-assignment-expression-list-opt }
         stream { ctf-assignment-expression-list-opt }
+       env { ctf-assignment-expression-list-opt }
         trace { ctf-assignment-expression-list-opt }
+       callsite { ctf-assignment-expression-list-opt }
         typealias declaration-specifiers abstract-declarator-list type-assignment-operator declaration-specifiers abstract-declarator-list
         typealias declaration-specifiers abstract-declarator-list type-assignment-operator declarator-list