From e7f1ad36b82bb8651c91c6c6faf8b92f145f32bc Mon Sep 17 00:00:00 2001
From: =?utf8?q?J=C3=A9r=C3=A9mie=20Galarneau?=
 <jeremie.galarneau@efficios.com>
Date: Thu, 4 Apr 2019 10:19:54 -0400
Subject: [PATCH] Fix: array and sequence field's 'elems' members can be left
 NULL
MIME-Version: 1.0
Content-Type: text/plain; charset=utf8
Content-Transfer-Encoding: 8bit

Issue
---

The behaviour of a number of "rw" functions associated with array and
sequence field types differ when their element's declaration meets the
following criteria:
  - is an integer,
  - is byte-aligned,
  - is byte-sized,
  - is UTF-8 or ASCII encoded.

Those criteria are used to determine if the elements of either arrays
or sequences should be interpreted as characters.

1) The implementation of sequence and array definitions creation
   functions do not initialize their 'elems' member (a g_ptr_array),
   instead initializing a 'string' member (a g_string).

2) The 'ctf' format plug-in does not initialize the 'elems' array with
   the decoded integer definitions, instead only initializing the
   'string' member with the field's contents.

3) The 'ctf-text' format plug-in uses the internal headers to
   access the 'string' member of those definitions directly.

The 'string' member of both sequence and array definitions is meant as
a helper to allow the access to their contents in textual form.

However, while an array's content is made available under that form
through the public bt_ctf_get_char_array() function, there is no
equivalent accessor for the sequence type, as reported by a number of
users [1][2]. The 'ctf-text' format implementation works around this
limitation by making use of the internal headers to access the string
member directly.

Moreover, bypassing the creation and initialization of the 'elems'
member of both array and sequence definitions results in a crash when
bt_ctf_get_field_list() is used with these types when they contain
character elements, as reported on the mailing list [1].

Solution
---

This fix eliminates the bypass used by the definition creation
functions and 'ctf' format plug-in, ensuring that both the 'string'
and 'elems' members are initialized even if the elements fit the
"character" criteria.

This fixes the crash on a unchecked NULL pointer in
bt_ctf_get_field_list() reported in [1] when trying to access the
sequence's contents.

The checks for the various criteria that make an integer a
character have been moved to an internal function, bt_int_is_char()
since their (incorrect) duplication obscured the underlying problem.

For instance, a sequence's 'string' member is allocated if the
elements are ASCII or UTF-8 encoded integers, but only used when
the elements are byte-sized and byte-aligned (as opposed to the
intended behaviour in types/array.c).

With this fix applied, sequence elements can be accessed normally
through the existing bt_ctf_get_field_list() or bt_ctf_get_index()
functions.

Example:
```
/*
 * Print a sequence's content if it is text.
 * Error handling omitted.
 */
void print_sequence(const struct bt_ctf_event *event,
                const struct bt_definition *sequence_definition)
{
        int signedness, integer_len;
        unsigned int i, element_count;
        const struct bt_definition * const *elements;
        const struct bt_definition *element;
        const struct bt_declaration *element_declaration;
        enum ctf_string_encoding encoding;

        bt_ctf_get_field_list(event, sequence_definition, &elements,
                &element_count);
        if (element_count == 0) {
                return;
        }

        /* Is this a text sequence? */
        element = elements[0];
        element_declaration = bt_ctf_get_decl_from_def(element);
        if (bt_ctf_field_type(element_declaration) != CTF_TYPE_INTEGER) {
                /* Not a text sequence. */
                return;
        }

        signedness = bt_ctf_get_int_signedness(element_declaration);
        encoding = bt_ctf_get_encoding(element_declaration);
        integer_len = bt_ctf_get_int_len(element_declaration);
        if (integer_len != 8 ||
                (encoding != CTF_STRING_UTF8 && encoding != CTF_STRING_ASCII)) {
                /* Not a text sequence. */
                return;
        }

        putchar('"');
        for (i = 0; i < element_count; i++) {
                int val = signedness ?
                        bt_ctf_get_int64(elements[i]) :
                        bt_ctf_get_uint64(elements[i]);

                putchar(val);
        }
        putchar('"');
}
```

Notes
---

Since it is not possible for a user of the public API to determine the
alignment of a field in the ctf trace, it is not possible to check for
the criteria that make an array a "character array".

An array's element declaration could indicate that it is a byte-sized
and UTF8/ASCII encoded integer. Yet, the 'string' member could be left
NULL if the element's alignment is not '8'.

Hence, while it is possible to use the bt_ctf_get_char_array()
function on array definitions that look like "character arrays", users
should be careful in doing so.

bt_ctf_get_char_array() returning NULL should not be assumed to
indicate that an array is empty. Under such circumstances, reader code
should fall-back to using bt_ctf_get_field_list() to access the
array's contents, as shown in the example above.

[1] https://lists.lttng.org/pipermail/lttng-dev/2019-April/028704.html
[2] https://github.com/efficios/babeltrace/pull/98

Reported-by: romendmsft <romend@microsoft.com>
Reported-by: Milian Wolff <milian.wolff@kdab.com>
Signed-off-by: JÃ©rÃ©mie Galarneau <jeremie.galarneau@efficios.com>
---
 formats/ctf/types/array.c    | 62 +++++++++++++++---------------------
 formats/ctf/types/sequence.c | 53 ++++++++++++------------------
 include/babeltrace/types.h   |  1 +
 types/array.c                | 27 +++-------------
 types/integer.c              | 16 ++++++++++
 types/sequence.c             | 17 ++--------
 6 files changed, 70 insertions(+), 106 deletions(-)

diff --git a/formats/ctf/types/array.c b/formats/ctf/types/array.c
index 979f7a3d..23413dd1 100644
--- a/formats/ctf/types/array.c
+++ b/formats/ctf/types/array.c
@@ -42,27 +42,21 @@ int ctf_array_read(struct bt_stream_pos *ppos, struct bt_definition *definition)
 		struct declaration_integer *integer_declaration =
 			container_of(elem, struct declaration_integer, p);
 
-		if (integer_declaration->encoding == CTF_STRING_UTF8
-		      || integer_declaration->encoding == CTF_STRING_ASCII) {
+		if (bt_int_is_char(elem)) {
+			if (!ctf_align_pos(pos, integer_declaration->p.alignment))
+				return -EFAULT;
+			if (!ctf_pos_access_ok(pos, array_declaration->len * CHAR_BIT))
+				return -EFAULT;
 
-			if (integer_declaration->len == CHAR_BIT
-			    && integer_declaration->p.alignment == CHAR_BIT) {
-
-				if (!ctf_align_pos(pos, integer_declaration->p.alignment))
-					return -EFAULT;
-				if (!ctf_pos_access_ok(pos, array_declaration->len * CHAR_BIT))
-					return -EFAULT;
-
-				g_string_assign(array_definition->string, "");
-				g_string_insert_len(array_definition->string,
-					0, (char *) ctf_get_pos_addr(pos),
-					array_declaration->len);
-				/*
-				 * We want to populate both the string
-				 * and the underlying values, so carry
-				 * on calling bt_array_rw().
-				 */
-			}
+			g_string_assign(array_definition->string, "");
+			g_string_insert_len(array_definition->string,
+				0, (char *) ctf_get_pos_addr(pos),
+				array_declaration->len);
+			/*
+			 * We want to populate both the string
+			 * and the underlying values, so carry
+			 * on calling bt_array_rw().
+			 */
 		}
 	}
 	return bt_array_rw(ppos, definition);
@@ -82,24 +76,18 @@ int ctf_array_write(struct bt_stream_pos *ppos, struct bt_definition *definition
 		struct declaration_integer *integer_declaration =
 			container_of(elem, struct declaration_integer, p);
 
-		if (integer_declaration->encoding == CTF_STRING_UTF8
-		      || integer_declaration->encoding == CTF_STRING_ASCII) {
-
-			if (integer_declaration->len == CHAR_BIT
-			    && integer_declaration->p.alignment == CHAR_BIT) {
-
-				if (!ctf_align_pos(pos, integer_declaration->p.alignment))
-					return -EFAULT;
-				if (!ctf_pos_access_ok(pos, array_declaration->len * CHAR_BIT))
-					return -EFAULT;
+		if (bt_int_is_char(elem)) {
+			if (!ctf_align_pos(pos, integer_declaration->p.alignment))
+				return -EFAULT;
+			if (!ctf_pos_access_ok(pos, array_declaration->len * CHAR_BIT))
+				return -EFAULT;
 
-				memcpy((char *) ctf_get_pos_addr(pos),
-					array_definition->string->str,
-					array_declaration->len);
-				if (!ctf_move_pos(pos, array_declaration->len * CHAR_BIT))
-					return -EFAULT;
-				return 0;
-			}
+			memcpy((char *) ctf_get_pos_addr(pos),
+				array_definition->string->str,
+				array_declaration->len);
+			if (!ctf_move_pos(pos, array_declaration->len * CHAR_BIT))
+				return -EFAULT;
+			return 0;
 		}
 	}
 	return bt_array_rw(ppos, definition);
diff --git a/formats/ctf/types/sequence.c b/formats/ctf/types/sequence.c
index 5dae7d02..df91a533 100644
--- a/formats/ctf/types/sequence.c
+++ b/formats/ctf/types/sequence.c
@@ -41,25 +41,17 @@ int ctf_sequence_read(struct bt_stream_pos *ppos, struct bt_definition *definiti
 		struct declaration_integer *integer_declaration =
 			container_of(elem, struct declaration_integer, p);
 
-		if (integer_declaration->encoding == CTF_STRING_UTF8
-		      || integer_declaration->encoding == CTF_STRING_ASCII) {
+		if (bt_int_is_char(elem)) {
+			uint64_t len = bt_sequence_len(sequence_definition);
 
-			if (integer_declaration->len == CHAR_BIT
-			    && integer_declaration->p.alignment == CHAR_BIT) {
-				uint64_t len = bt_sequence_len(sequence_definition);
+			if (!ctf_align_pos(pos, integer_declaration->p.alignment))
+				return -EFAULT;
+			if (!ctf_pos_access_ok(pos, len * CHAR_BIT))
+				return -EFAULT;
 
-				if (!ctf_align_pos(pos, integer_declaration->p.alignment))
-					return -EFAULT;
-				if (!ctf_pos_access_ok(pos, len * CHAR_BIT))
-					return -EFAULT;
-
-				g_string_assign(sequence_definition->string, "");
-				g_string_insert_len(sequence_definition->string,
-					0, (char *) ctf_get_pos_addr(pos), len);
-				if (!ctf_move_pos(pos, len * CHAR_BIT))
-					return -EFAULT;
-				return 0;
-			}
+			g_string_assign(sequence_definition->string, "");
+			g_string_insert_len(sequence_definition->string,
+				0, (char *) ctf_get_pos_addr(pos), len);
 		}
 	}
 	return bt_sequence_rw(ppos, definition);
@@ -78,24 +70,19 @@ int ctf_sequence_write(struct bt_stream_pos *ppos, struct bt_definition *definit
 		struct declaration_integer *integer_declaration =
 			container_of(elem, struct declaration_integer, p);
 
-		if (integer_declaration->encoding == CTF_STRING_UTF8
-		      || integer_declaration->encoding == CTF_STRING_ASCII) {
-
-			if (integer_declaration->len == CHAR_BIT
-			    && integer_declaration->p.alignment == CHAR_BIT) {
-				uint64_t len = bt_sequence_len(sequence_definition);
+		if (bt_int_is_char(elem)) {
+			uint64_t len = bt_sequence_len(sequence_definition);
 
-				if (!ctf_align_pos(pos, integer_declaration->p.alignment))
-					return -EFAULT;
-				if (!ctf_pos_access_ok(pos, len * CHAR_BIT))
-					return -EFAULT;
+			if (!ctf_align_pos(pos, integer_declaration->p.alignment))
+				return -EFAULT;
+			if (!ctf_pos_access_ok(pos, len * CHAR_BIT))
+				return -EFAULT;
 
-				memcpy((char *) ctf_get_pos_addr(pos),
-					sequence_definition->string->str, len);
-				if (!ctf_move_pos(pos, len * CHAR_BIT))
-					return -EFAULT;
-				return 0;
-			}
+			memcpy((char *) ctf_get_pos_addr(pos),
+				sequence_definition->string->str, len);
+			if (!ctf_move_pos(pos, len * CHAR_BIT))
+				return -EFAULT;
+			return 0;
 		}
 	}
 	return bt_sequence_rw(ppos, definition);
diff --git a/include/babeltrace/types.h b/include/babeltrace/types.h
index e92fb842..b89e3a8e 100644
--- a/include/babeltrace/types.h
+++ b/include/babeltrace/types.h
@@ -400,6 +400,7 @@ int bt_get_int_signedness(const struct bt_definition *field);
 int bt_get_int_byte_order(const struct bt_definition *field);
 int bt_get_int_base(const struct bt_definition *field);
 size_t bt_get_int_len(const struct bt_definition *field);	/* in bits */
+bool bt_int_is_char(const struct bt_declaration *field);
 enum ctf_string_encoding bt_get_int_encoding(const struct bt_definition *field);
 
 /*
diff --git a/types/array.c b/types/array.c
index e4b43d8b..001d0540 100644
--- a/types/array.c
+++ b/types/array.c
@@ -123,15 +123,9 @@ struct bt_definition *
 	array->string = NULL;
 	array->elems = NULL;
 
-	if (array_declaration->elem->id == CTF_TYPE_INTEGER) {
-		struct declaration_integer *integer_declaration =
-			container_of(array_declaration->elem, struct declaration_integer, p);
-
-		if (integer_declaration->encoding == CTF_STRING_UTF8
-		      || integer_declaration->encoding == CTF_STRING_ASCII) {
-
-			array->string = g_string_new("");
-		}
+	if (array_declaration->elem->id == CTF_TYPE_INTEGER &&
+			bt_int_is_char(array_declaration->elem)) {
+		array->string = g_string_new("");
 	}
 
 	array->elems = g_ptr_array_sized_new(array_declaration->len);
@@ -229,19 +223,8 @@ GString *bt_get_char_array(const struct bt_definition *field)
 	array_definition = container_of(field, struct definition_array, p);
 	array_declaration = array_definition->declaration;
 	elem = array_declaration->elem;
-	if (elem->id == CTF_TYPE_INTEGER) {
-		struct declaration_integer *integer_declaration =
-			container_of(elem, struct declaration_integer, p);
-
-		if (integer_declaration->encoding == CTF_STRING_UTF8
-				|| integer_declaration->encoding == CTF_STRING_ASCII) {
-
-			if (integer_declaration->len == CHAR_BIT
-					&& integer_declaration->p.alignment == CHAR_BIT) {
-
-				return array_definition->string;
-			}
-		}
+	if (elem->id == CTF_TYPE_INTEGER && bt_int_is_char(elem)) {
+		return array_definition->string;
 	}
 	fprintf(stderr, "[warning] Extracting string\n");
 	return NULL;
diff --git a/types/integer.c b/types/integer.c
index 52cc6bb6..90525f5d 100644
--- a/types/integer.c
+++ b/types/integer.c
@@ -201,3 +201,19 @@ int64_t bt_get_signed_int(const struct bt_definition *field)
 		g_quark_to_string(field->name));
 	return (int64_t)integer_definition->value._unsigned;
 }
+
+bool bt_int_is_char(const struct bt_declaration *field)
+{
+	bool ret;
+	struct declaration_integer *integer_declaration =
+		container_of(field, struct declaration_integer, p);
+
+	/* Integer must be ASCII or encoded as UTF-8. */
+	ret = integer_declaration->encoding == CTF_STRING_UTF8 ||
+			integer_declaration->encoding == CTF_STRING_ASCII;
+	/* Integer must be aligned on a byte boundary and be byte-sized. */
+	ret &= integer_declaration->len == CHAR_BIT &&
+			integer_declaration->p.alignment == CHAR_BIT;
+
+	return ret;
+}
diff --git a/types/sequence.c b/types/sequence.c
index 6c61ef7a..dcbcf19a 100644
--- a/types/sequence.c
+++ b/types/sequence.c
@@ -171,20 +171,9 @@ struct bt_definition *_sequence_definition_new(struct bt_declaration *declaratio
 	sequence->string = NULL;
 	sequence->elems = NULL;
 
-	if (sequence_declaration->elem->id == CTF_TYPE_INTEGER) {
-		struct declaration_integer *integer_declaration =
-			container_of(sequence_declaration->elem, struct declaration_integer, p);
-
-		if (integer_declaration->encoding == CTF_STRING_UTF8
-		      || integer_declaration->encoding == CTF_STRING_ASCII) {
-
-			sequence->string = g_string_new("");
-
-			if (integer_declaration->len == CHAR_BIT
-			    && integer_declaration->p.alignment == CHAR_BIT) {
-				return &sequence->p;
-			}
-		}
+	if (sequence_declaration->elem->id == CTF_TYPE_INTEGER &&
+			bt_int_is_char(sequence_declaration->elem)) {
+		sequence->string = g_string_new("");
 	}
 
 	sequence->elems = g_ptr_array_new();
-- 
2.34.1