Add table of contents

[ctf.git] / common-trace-format-proposal.txt
diff --git a/common-trace-format-proposal.txt b/common-trace-format-proposal.txt

index 7d67d2770647b961b1e8258a2ba80ffb25e50e10..31037223237fbe0510831b9911da98d8ae731049 100644 (file)
--- a/common-trace-format-proposal.txt
+++ b/common-trace-format-proposal.txt
@@ -24,6 +24,47 @@ formats. The development tree is available at:
    gitweb:     http://git.efficios.com/?p=babeltrace.git
  
  
+Table of Contents
+
+1. Preliminary definitions
+2. High-level representation of a trace
+3. Event stream
+4. Types
+   4.1 Basic types
+       4.1.1 Type inheritance
+       4.1.2 Alignment
+       4.1.3 Byte order
+       4.1.4 Size
+       4.1.5 Integers
+       4.1.6 GNU/C bitfields
+       4.1.7 Floating point
+       4.1.8 Enumerations
+4.2 Compound types
+    4.2.1 Structures
+    4.2.2 Variants (Discriminated/Tagged Unions)
+    4.2.3 Arrays
+    4.2.4 Sequences
+    4.2.5 Strings
+5. Event Packet Header
+   5.1 Event Packet Header Description
+   5.2 Event Packet Context Description
+6. Event Structure
+   6.1 Event Header
+       6.1.1 Type 1 - Few event IDs
+       6.1.2 Type 2 - Many event IDs
+   6.2 Event Context
+   6.3 Event Payload
+       6.3.1 Padding
+       6.3.2 Alignment
+7. Trace Stream Description Language (TSDL)
+   7.1 Meta-data
+   7.2 Declaration vs Definition
+   7.3 TSDL Scopes
+       7.3.1 Lexical Scope
+       7.3.2 Dynamic Scope
+   7.4 TSDL Examples
+
+
  1. Preliminary definitions
  
    - Event Trace: An ordered sequence of events.
@@ -108,16 +149,18 @@ We define "bit-packed" types as following on the next bit, as defined by the
  "Integers" section.
  
  Each basic type must specify its alignment, in bits. Examples of
-possible alignments are: bit-packed, byte-packed, or word-aligned. The
-choice depends on the architecture preference and compactness vs
-performance trade-offs of the implementation.  Architectures providing
-fast unaligned write byte-packed basic types to save space, aligning
-each type on byte boundaries (8-bit). Architectures with slow unaligned
-writes align types on specific alignment values. If no specific
-alignment is declared for a type, it is assumed to be bit-packed for
-integers with size not multiple of 8 bits and for gcc bitfields. All
-other types are byte-packed. It is however recommended to always specify
-the alignment explicitly.
+possible alignments are: bit-packed (align = 1), byte-packed (align =
+8), or word-aligned (e.g. align = 32 or align = 64). The choice depends
+on the architecture preference and compactness vs performance trade-offs
+of the implementation.  Architectures providing fast unaligned write
+byte-packed basic types to save space, aligning each type on byte
+boundaries (8-bit). Architectures with slow unaligned writes align types
+on specific alignment values. If no specific alignment is declared for a
+type, it is assumed to be bit-packed for integers with size not multiple
+of 8 bits and for gcc bitfields. All other basic types are byte-packed
+by default. It is however recommended to always specify the alignment
+explicitly. Alignment values must be power of two. Compound types are
+aligned as specified in their individual specification.
  
  TSDL meta-data attribute representation of a specific alignment:
  
@@ -188,6 +231,11 @@ TSDL meta-data representation:
      byte_order = native OR network OR be OR le; /* default native */
      size = value;                               /* value in bits, no default */
      align = value;                              /* value in bits */
+    /* based used for pretty-printing output, default: decimal. */
+    base = decimal OR dec OR OR d OR i OR u OR 10 OR hexadecimal OR hex OR x OR X OR p OR 16
+           OR octal OR oct OR o OR 8 OR binary OR b OR 2;
+    /* character encoding, default: none */
+    encoding = none or UTF8 or ASCII;
    }
  
  Example of type inheritance (creation of a uint32_t named type):
@@ -206,6 +254,17 @@ typealias integer {
    align = 1;
  } := int5_t;
  
+The character encoding field can be used to specify that the integer
+must be printed as a text character when read. e.g.:
+
+typealias integer {
+  size = 8;
+  align = 8;
+  signed = false;
+  encoding = UTF8;
+} := utf_char;
+
+
  4.1.6 GNU/C bitfields
  
  The GNU/C bitfields follow closely the integer representation, with a
@@ -215,7 +274,7 @@ defined by the size of the type "unit_type".
  
  TSDL meta-data representation:
  
-  unit_type name:size:
+  unit_type name:size;
  
  As an example, the following structure declared in C compiled by GCC:
  
@@ -252,9 +311,10 @@ in bits. Some requirements are imposed on the floating point values:
  TSDL meta-data representation:
  
  floating_point {
-   exp_dig = value;
-   mant_dig = value;
-   byte_order = native OR network OR be OR le;
+  exp_dig = value;
+  mant_dig = value;
+  byte_order = native OR network OR be OR le;
+  align = value;
  }
  
  Example of type inheritance:
@@ -263,10 +323,14 @@ typealias floating_point {
    exp_dig = 8;         /* sizeof(float) * CHAR_BIT - FLT_MANT_DIG */
    mant_dig = 24;       /* FLT_MANT_DIG */
    byte_order = native;
+  align = 32;
  } := float;
  
  TODO: define NaN, +inf, -inf behavior.
  
+Bit-packed, byte-packed or larger alignments can be used for floating
+point values, similarly to integers.
+
  4.1.8 Enumerations
  
  Enumerations are a mapping between an integer type and a table of strings. The
@@ -357,17 +421,28 @@ struct {
    ...
  }
  
+Alignment for a structure compound type can be forced to a minimum value
+by adding an "align" specifier after the declaration of a structure
+body. This attribute is read as: align(value). The value is specified in
+bits. The structure will be aligned on the maximum value between this
+attribute and the alignment required by the basic types contained within
+the structure. e.g.
+
+struct {
+  ...
+} align(32)
+
  4.2.2 Variants (Discriminated/Tagged Unions)
  
  A CTF variant is a selection between different types. A CTF variant must
  always be defined within the scope of a structure or within fields
  contained within a structure (defined recursively). A "tag" enumeration
  field must appear in either the same lexical scope, prior to the variant
-field (in field declaration order), in an uppermost lexical scope (see
-Section 7.3.1), or in an uppermost dynamic scope (see Section 7.3.2).
-The type selection is indicated by the mapping from the enumeration
-value to the string used as variant type selector. The field to use as
-tag is specified by the "tag_field", specified between "< >" after the
+field (in field declaration order), in an upper lexical scope (see
+Section 7.3.1), or in an upper dynamic scope (see Section 7.3.2). The
+type selection is indicated by the mapping from the enumeration value to
+the string used as variant type selector. The field to use as tag is
+specified by the "tag_field", specified between "< >" after the
  "variant" keyword for unnamed variants, and after "variant name" for
  named variants.
  
@@ -417,7 +492,8 @@ variant example {
  
  struct {
    enum : uint2_t { a, b, c } choice;
-  variant example <choice> v[unsigned int];
+  unsigned int seqlen;
+  variant example <choice> v[seqlen];
  }
  
  Example of an unnamed variant:
@@ -491,23 +567,55 @@ A nameless array can be declared as a field type within a structure, e.g.:
  
    uint8_t field_name[10];
  
+Arrays are always aligned on their element alignment requirement.
  
  4.2.4 Sequences
  
-Sequences are dynamically-sized arrays. They start with an integer that specify
-the length of the sequence, followed by an array of "inner type" elements.
-The length is the number of elements in the sequence.
+Sequences are dynamically-sized arrays. They refer to a a "length"
+unsigned integer field, which must appear in either the same lexical scope,
+prior to the sequence field (in field declaration order), in an upper
+lexical scope (see Section 7.3.1), or in an upper dynamic scope (see
+Section 7.3.2). This length field represents the number of elements in
+the sequence. The sequence per se is an array of "inner type" elements.
+
+TSDL meta-data representation for a sequence type definition:
+
+struct {
+  unsigned int length_field;
+  typedef elem_type typename[length_field];
+  typename seq_field_name;
+}
+
+A sequence can also be declared as a field type, e.g.:
+
+struct {
+  unsigned int length_field;
+  long seq_field_name[length_field];
+}
  
-TSDL meta-data representation for a named sequence:
+Multiple sequences can refer to the same length field, and these length
+fields can be in a different upper dynamic scope:
  
-typedef elem_type name[length_type];
+e.g., assuming the stream.event.header defines:
  
-A nameless sequence can be declared as a field type, e.g.:
+stream {
+  ...
+  id = 1;
+  event.header := struct {
+    uint16_t seq_len;
+  };
+};
  
-long field_name[int];
+event {
+  ...
+  stream_id = 1;
+  fields := struct {
+    long seq_a[stream.event.header.seq_len];
+    char seq_b[stream.event.header.seq_len];
+  };
+};
  
-The length type follows the integer types specifications, and the sequence
-elements follow the "array" specifications.
+The sequence elements follow the "array" specifications.
  
  4.2.5 Strings
  
@@ -526,6 +634,8 @@ A nameless string type can be declared as a field type:
  
  string field_name;     /* Use default UTF8 encoding */
  
+Strings are always aligned on byte size.
+
  5. Event Packet Header
  
  The event packet header consists of two parts: the "event packet header"
@@ -593,7 +703,7 @@ the fields typically expected (although these fields are each optional):
  
  struct event_packet_header {
    uint32_t magic;
-  uint8_t  trace_uuid[16];
+  uint8_t  uuid[16];
    uint32_t stream_id;
  };
  
@@ -605,8 +715,8 @@ trace {
  If the magic number is not present, tools such as "file" will have no
  mean to discover the file type.
  
-If the trace_uuid is not present, no validation that the meta-data
-actually corresponds to the stream is performed.
+If the uuid is not present, no validation that the meta-data actually
+corresponds to the stream is performed.
  
  If the stream_id packet header field is missing, the trace can only
  contain a single stream. Its "id" field can be left out, and its events
@@ -707,7 +817,7 @@ struct event_header_1 {
        uint64_t timestamp;               /* 64-bit timestamps */
      } extended;
    } v;
-};
+} align(32);   /* or align(8) */
  
  
  6.1.2 Type 2 - Many event IDs
@@ -735,7 +845,7 @@ struct event_header_2 {
        uint64_t timestamp;               /* 64-bit timestamps */ 
      } extended;
    } v;
-};
+} align(16);   /* or align(8) */
  
  
  6.2 Event Context
@@ -830,13 +940,16 @@ beginning of the file. This magic number is also used to detect the
  endianness of the architecture by trying to read the CTF magic number
  and its counterpart in reversed endianness. The events within the
  meta-data stream have no event header nor event context. Each event only
-contains a "string" payload. Each meta-data packet start with a special
-packet header, specific to the meta-data stream, which contains,
-exactly:
+contains a "sequence" payload, which is a sequence of bits using the
+"trace.packet.header.content_size" field as a placeholder for its length
+(the packet header size should be substracted). The formatting of this
+sequence of bits is a plain-text representation of the TSDL description.
+Each meta-data packet start with a special packet header, specific to
+the meta-data stream, which contains, exactly:
  
  struct metadata_packet_header {
    uint32_t magic;                      /* 0x75D11D57 */
-  uint8_t  trace_uuid[16];             /* Unique Universal Identifier */
+  uint8_t  uuid[16];                   /* Unique Universal Identifier */
    uint32_t checksum;                   /* 0 if unused */
    uint32_t content_size;               /* in bits */
    uint32_t packet_size;                        /* in bits */
@@ -880,7 +993,8 @@ in Section 7.3.
  
  TSDL uses two different types of scoping: a lexical scope is used for
  declarations and type definitions, and a dynamic scope is used for
-variants references to tag fields.
+variants references to tag fields and for sequence references to length
+fields.
  
  7.3.1 Lexical Scope
  
@@ -897,16 +1011,17 @@ typedef or typealias is allowed within a sub-scope.
  
  A dynamic scope consists in the lexical scope augmented with the
  implicit event structure definition hierarchy presented at Section 6.
-The dynamic scope is only used for variant tag definitions. It is used
-at definition time to look up the location of the tag field associated
-with a variant.
-
-Therefore, variants in lower levels in the dynamic scope (e.g. event
-context) can refer to a tag field located in upper levels (e.g. in the
-event header) by specifying, in this case, the associated tag with
-<header.field_name>. This allows, for instance, the event context to
-define a variant referring to the "id" field of the event header as
-selector.
+The dynamic scope is used for variant tag and sequence length
+definitions. It is used at definition time to look up the location of
+the tag field associated with a variant, and to lookup up the location
+of the length field associated with a sequence.
+
+Therefore, variants (or sequences) in lower levels in the dynamic scope
+(e.g. event context) can refer to a tag (or length) field located in
+upper levels (e.g. in the event header) by specifying, in this case, the
+associated tag with <header.field_name>. This allows, for instance, the
+event context to define a variant referring to the "id" field of the
+event header as selector.
  
  The target dynamic scope must be specified explicitly when referring to
  a field outside of the local static scope. The dynamic scope prefixes
@@ -949,7 +1064,7 @@ trace {
    byte_order = be OR le;                       /* Endianness (required) */
    packet.header := struct {
      uint32_t magic;
-    uint8_t  trace_uuid[16];
+    uint8_t  uuid[16];
      uint32_t stream_id;
    };
  };
@@ -969,7 +1084,7 @@ stream {
  event {
    name = event_name;
    id = value;                  /* Numeric identifier within the stream */
-  stream = stream_id;
+  stream_id = stream_id;
    context := struct {
      ...
    };
@@ -1040,6 +1155,10 @@ struct {
    ...
  }
  
+struct {
+  ...
+} align(value)
+
  variant {
    ...
  }
@@ -1145,6 +1264,7 @@ token:
  
  keyword: is one of
  
+align
  const
  char
  double
@@ -1331,11 +1451,8 @@ assignment-operator:
  type-assignment-operator:
         :=
  
-constant-expression:
-       unary-expression
-
  constant-expression-range:
-       constant-expression ... constant-expression
+       unary-expression ... unary-expression
  
  2.2) Declarations:
  
@@ -1378,9 +1495,12 @@ type-specifier:
         typedef-name
         ctf-type-specifier
  
+align-attribute:
+       align ( unary-expression )
+
  struct-specifier:
-       struct identifier-opt { struct-or-variant-declaration-list-opt }
-       struct identifier
+       struct identifier-opt { struct-or-variant-declaration-list-opt } align-attribute-opt
+       struct identifier align-attribute-opt
  
  struct-or-variant-declaration-list:
         struct-or-variant-declaration
@@ -1388,9 +1508,9 @@ struct-or-variant-declaration-list:
  
  struct-or-variant-declaration:
         specifier-qualifier-list struct-or-variant-declarator-list ;
-       declaration-specifiers storage-class-specifier declaration-specifiers declarator-list ;
-       typealias declaration-specifiers abstract-declarator-list := declaration-specifiers abstract-declarator-list ;
-       typealias declaration-specifiers abstract-declarator-list := declarator-list ;
+       declaration-specifiers-opt storage-class-specifier declaration-specifiers-opt declarator-list ;
+       typealias declaration-specifiers abstract-declarator-list type-assignment-operator declaration-specifiers abstract-declarator-list ;
+       typealias declaration-specifiers abstract-declarator-list type-assignment-operator declarator-list ;
  
  specifier-qualifier-list:
         type-specifier specifier-qualifier-list-opt
@@ -1402,7 +1522,7 @@ struct-or-variant-declarator-list:
  
  struct-or-variant-declarator:
         declarator
-       declarator-opt : constant-expression
+       declarator-opt : unary-expression
  
  variant-specifier:
         variant identifier-opt variant-tag-opt { struct-or-variant-declaration-list }
@@ -1424,8 +1544,8 @@ enumerator-list:
  
  enumerator:
         enumeration-constant
-       enumeration-constant = constant-expression
-       enumeration-constant = constant-expression-range
+       enumeration-constant assignment-operator unary-expression
+       enumeration-constant assignment-operator constant-expression-range
  
  type-qualifier:
         const
@@ -1436,8 +1556,7 @@ declarator:
  direct-declarator:
         identifier
         ( declarator )
-       direct-declarator [ type-specifier ]
-       direct-declarator [ constant-expression ]
+       direct-declarator [ unary-expression ]
  
  abstract-declarator:
         pointer-opt direct-abstract-declarator
@@ -1445,8 +1564,7 @@ abstract-declarator:
  direct-abstract-declarator:
         identifier-opt
         ( abstract-declarator )
-       direct-abstract-declarator [ type-specifier ]
-       direct-abstract-declarator [ constant-expression ]
+       direct-abstract-declarator [ unary-expression ]
         direct-abstract-declarator [ ]
  
  pointer:
@@ -1466,21 +1584,22 @@ ctf-specifier:
         event { ctf-assignment-expression-list-opt }
         stream { ctf-assignment-expression-list-opt }
         trace { ctf-assignment-expression-list-opt }
-       typealias declaration-specifiers abstract-declarator-list := declaration-specifiers abstract-declarator-list ;
-       typealias declaration-specifiers abstract-declarator-list := declarator-list ;
+       typealias declaration-specifiers abstract-declarator-list type-assignment-operator declaration-specifiers abstract-declarator-list
+       typealias declaration-specifiers abstract-declarator-list type-assignment-operator declarator-list
  
  ctf-type-specifier:
         floating_point { ctf-assignment-expression-list-opt }
         integer { ctf-assignment-expression-list-opt }
         string { ctf-assignment-expression-list-opt }
+       string
  
  ctf-assignment-expression-list:
-       ctf-assignment-expression
-       ctf-assignment-expression-list ; ctf-assignment-expression
+       ctf-assignment-expression ;
+       ctf-assignment-expression-list ctf-assignment-expression ;
  
  ctf-assignment-expression:
         unary-expression assignment-operator unary-expression
         unary-expression type-assignment-operator type-specifier
-       declaration-specifiers storage-class-specifier declaration-specifiers declarator-list
-       typealias declaration-specifiers abstract-declarator-list := declaration-specifiers abstract-declarator-list
-       typealias declaration-specifiers abstract-declarator-list := declarator-list
+       declaration-specifiers-opt storage-class-specifier declaration-specifiers-opt declarator-list
+       typealias declaration-specifiers abstract-declarator-list type-assignment-operator declaration-specifiers abstract-declarator-list
+       typealias declaration-specifiers abstract-declarator-list type-assignment-operator declarator-list