Fix: trimmer: use regexes to parse dates and times
authorSimon Marchi <simon.marchi@efficios.com>
Thu, 6 Jun 2019 20:59:44 +0000 (16:59 -0400)
committerPhilippe Proulx <eeppeliteloop@gmail.com>
Wed, 3 Jul 2019 20:36:15 +0000 (16:36 -0400)
This patch fixes a few issues related to how the trimmer component
parses timestamps.

1. Less than 9 digits in the nanoseconds position is not interpreted
   correctly.

   Passing "2019-01-02 12:34:56.444", we expect the nanosecond portion
   to mean 444 ms, or 444000000 ns.  Currently, it is interpreted as 444
   ns.

2. We wrongfully accept missing digits in the various fields

   It is currently possible to pass a date as "2019-1-2" (instead of
   "2019-01-02") or a time as "1:2:3" (instead of "01:02:03").  This
   patch makes the parsing stricter, in order to only accept the right
   number of digits when parsing dates (in the form "YYYY-MM-DD") and
   times (in the form "HH:MM:SS").  The nanosecond portion, if present,
   can contain anywhere from 1 to 9 digits.

Change-Id: Ie315cec0bc7387ecdaa999d6c5f82f70035412f3
Signed-off-by: Simon Marchi <simon.marchi@efficios.com>
Reviewed-on: https://review.lttng.org/c/babeltrace/+/1389
CI-Build: Philippe Proulx <eeppeliteloop@gmail.com>
Tested-by: jenkins <jenkins@lttng.org>
Reviewed-by: Philippe Proulx <eeppeliteloop@gmail.com>
src/plugins/utils/trimmer/trimmer.c
tests/cli/test_trimmer

index d771ec277d0104d4be7e45eac21c1c5d89e5d043..56ed9683f4617e965a204c8053ea3c699c3391fe 100644 (file)
@@ -174,6 +174,99 @@ void trimmer_finalize(bt_self_component_filter *self_comp)
        }
 }
 
+/*
+ * Compile regex in `pattern`, and try to match `string`.  If there's a match,
+ * return true and set `*match_info` to the list of matches.  The list of
+ * matches must be freed by the caller. If there's no match, return false and
+ * set `*match_info` to NULL;
+ */
+static
+bool compile_and_match(const char *pattern, const char *string, GMatchInfo **match_info) {
+       bool matches = false;
+       GError *regex_error = NULL;
+       GRegex *regex;
+
+       regex = g_regex_new(pattern, 0, 0, &regex_error);
+       if (!regex) {
+               goto end;
+       }
+
+       matches = g_regex_match(regex, string, 0, match_info);
+       if (!matches) {
+               /*
+                * g_regex_match allocates `*match_info` even if it returns
+                * FALSE.  If there's no match, we have no use for it, so free
+                * it immediatly and don't return it to the caller.
+                */
+               g_match_info_unref(*match_info);
+               *match_info = NULL;
+       }
+
+       g_regex_unref(regex);
+
+end:
+
+       if (regex_error) {
+               g_error_free(regex_error);
+       }
+
+       return matches;
+}
+
+/*
+ * Convert the captured text in match number `match_num` in `match_info`
+ * to an unsigned integer.
+ */
+static
+guint64 match_to_uint(const GMatchInfo *match_info, gint match_num) {
+       gchar *text, *endptr;
+       guint64 result;
+
+       text = g_match_info_fetch(match_info, match_num);
+       BT_ASSERT(text);
+
+       /*
+        * Because the input is carefully sanitized with regexes by the caller,
+        * we assume that g_ascii_strtoull cannot fail.
+        */
+       errno = 0;
+       result = g_ascii_strtoull(text, &endptr, 10);
+       BT_ASSERT(endptr > text);
+       BT_ASSERT(errno == 0);
+
+       g_free(text);
+
+       return result;
+}
+
+/*
+ * When parsing the nanoseconds part, .512 means .512000000, not .000000512.
+ * This function is like match_to_uint, but multiplies the parsed number to get
+ * the expected result.
+ */
+static
+guint64 match_to_uint_ns(const GMatchInfo *match_info, gint match_num) {
+       guint64 nanoseconds;
+       gboolean ret;
+       gint start_pos, end_pos, power;
+       static int pow10[] = {
+               1, 10, 100, 1000, 10000, 100000, 1000000, 10000000, 100000000,
+       };
+
+       nanoseconds = match_to_uint(match_info, match_num);
+
+       /* Multiply by 10 as many times as there are omitted digits. */
+       ret = g_match_info_fetch_pos(match_info, match_num, &start_pos, &end_pos);
+       BT_ASSERT(ret);
+
+       power = 9 - (end_pos - start_pos);
+       BT_ASSERT(power >= 0 && power <= 8);
+
+       nanoseconds *= pow10[power];
+
+       return nanoseconds;
+}
+
 /*
  * Sets the time (in ns from origin) of a trimmer bound from date and
  * time components.
@@ -236,98 +329,104 @@ static
 int set_bound_from_str(struct trimmer_comp *trimmer_comp,
                const char *str, struct trimmer_bound *bound, bool is_gmt)
 {
+/* Matches YYYY-MM-DD */
+#define DATE_RE "([0-9]{4})-([0-9]{2})-([0-9]{2})"
+
+/* Matches HH:MM[:SS[.NS]] */
+#define TIME_RE "([0-9]{2}):([0-9]{2})(?::([0-9]{2})(?:\\.([0-9]{1,9}))?)?"
+
+/* Matches [-]SS[.NS] */
+#define S_NS_RE "^(-?)([0-9]+)(?:\\.([0-9]{1,9}))?$"
+
+       GMatchInfo *match_info;
        int ret = 0;
-       int s_ret;
-       unsigned int year, month, day, hour, minute, second, ns;
-       char dummy;
-
-       /* Try `YYYY-MM-DD hh:mm:ss.ns` format */
-       s_ret = sscanf(str, "%u-%u-%u %u:%u:%u.%u%c", &year, &month, &day,
-               &hour, &minute, &second, &ns, &dummy);
-       if (s_ret == 7) {
-               ret = set_bound_ns_from_origin(bound, year, month, day,
-                       hour, minute, second, ns, is_gmt);
-               goto end;
-       }
 
-       /* Try `YYYY-MM-DD hh:mm:ss` format */
-       s_ret = sscanf(str, "%u-%u-%u %u:%u:%u%c", &year, &month, &day,
-               &hour, &minute, &second, &dummy);
-       if (s_ret == 6) {
-               ret = set_bound_ns_from_origin(bound, year, month, day,
-                       hour, minute, second, 0, is_gmt);
-               goto end;
-       }
+       /* Try `YYYY-MM-DD hh:mm[:ss[.ns]]` format */
+       if (compile_and_match("^" DATE_RE " " TIME_RE "$", str, &match_info)) {
+               unsigned int year = 0, month = 0, day = 0, hours = 0, minutes = 0, seconds = 0, nanoseconds = 0;
+               gint match_count = g_match_info_get_match_count(match_info);
 
-       /* Try `YYYY-MM-DD hh:mm` format */
-       s_ret = sscanf(str, "%u-%u-%u %u:%u%c", &year, &month, &day,
-               &hour, &minute, &dummy);
-       if (s_ret == 5) {
-               ret = set_bound_ns_from_origin(bound, year, month, day,
-                       hour, minute, 0, 0, is_gmt);
-               goto end;
-       }
+               BT_ASSERT(match_count >= 6 && match_count <= 8);
 
-       /* Try `YYYY-MM-DD` format */
-       s_ret = sscanf(str, "%u-%u-%u%c", &year, &month, &day, &dummy);
-       if (s_ret == 3) {
-               ret = set_bound_ns_from_origin(bound, year, month, day,
-                       0, 0, 0, 0, is_gmt);
-               goto end;
-       }
+               year = match_to_uint(match_info, 1);
+               month = match_to_uint(match_info, 2);
+               day = match_to_uint(match_info, 3);
+               hours = match_to_uint(match_info, 4);
+               minutes = match_to_uint(match_info, 5);
 
-       /* Try `hh:mm:ss.ns` format */
-       s_ret = sscanf(str, "%u:%u:%u.%u%c", &hour, &minute, &second, &ns,
-               &dummy);
-       if (s_ret == 4) {
-               bound->time.hour = hour;
-               bound->time.minute = minute;
-               bound->time.second = second;
-               bound->time.ns = ns;
-               goto end;
-       }
+               if (match_count >= 7) {
+                       seconds = match_to_uint(match_info, 6);
+               }
 
-       /* Try `hh:mm:ss` format */
-       s_ret = sscanf(str, "%u:%u:%u%c", &hour, &minute, &second, &dummy);
-       if (s_ret == 3) {
-               bound->time.hour = hour;
-               bound->time.minute = minute;
-               bound->time.second = second;
-               bound->time.ns = 0;
-               goto end;
-       }
+               if (match_count >= 8) {
+                       nanoseconds = match_to_uint_ns(match_info, 7);
+               }
+
+               set_bound_ns_from_origin(bound, year, month, day, hours, minutes, seconds, nanoseconds, is_gmt);
 
-       /* Try `-s.ns` format */
-       s_ret = sscanf(str, "-%u.%u%c", &second, &ns, &dummy);
-       if (s_ret == 2) {
-               bound->ns_from_origin = -((int64_t) second) * NS_PER_S;
-               bound->ns_from_origin -= (int64_t) ns;
-               bound->is_set = true;
                goto end;
        }
 
-       /* Try `s.ns` format */
-       s_ret = sscanf(str, "%u.%u%c", &second, &ns, &dummy);
-       if (s_ret == 2) {
-               bound->ns_from_origin = ((int64_t) second) * NS_PER_S;
-               bound->ns_from_origin += (int64_t) ns;
-               bound->is_set = true;
+       if (compile_and_match("^" DATE_RE "$", str, &match_info)) {
+               unsigned int year = 0, month = 0, day = 0;
+
+               BT_ASSERT(g_match_info_get_match_count(match_info) == 4);
+
+               year = match_to_uint(match_info, 1);
+               month = match_to_uint(match_info, 2);
+               day = match_to_uint(match_info, 3);
+
+               set_bound_ns_from_origin(bound, year, month, day, 0, 0, 0, 0, is_gmt);
+
                goto end;
        }
 
-       /* Try `-s` format */
-       s_ret = sscanf(str, "-%u%c", &second, &dummy);
-       if (s_ret == 1) {
-               bound->ns_from_origin = -((int64_t) second) * NS_PER_S;
-               bound->is_set = true;
+       /* Try `hh:mm[:ss[.ns]]` format */
+       if (compile_and_match("^" TIME_RE "$", str, &match_info)) {
+               gint match_count = g_match_info_get_match_count(match_info);
+               BT_ASSERT(match_count >= 3 && match_count <= 5);
+               bound->time.hour = match_to_uint(match_info, 1);
+               bound->time.minute = match_to_uint(match_info, 2);
+
+               if (match_count >= 4) {
+                       bound->time.second = match_to_uint(match_info, 3);
+               }
+
+               if (match_count >= 5) {
+                       bound->time.ns = match_to_uint_ns(match_info, 4);
+               }
+
                goto end;
        }
 
-       /* Try `s` format */
-       s_ret = sscanf(str, "%u%c", &second, &dummy);
-       if (s_ret == 1) {
-               bound->ns_from_origin = (int64_t) second * NS_PER_S;
+       /* Try `[-]s[.ns]` format */
+       if (compile_and_match("^" S_NS_RE "$", str, &match_info)) {
+               gboolean is_neg, fetch_pos_ret;
+               gint start_pos, end_pos, match_count;
+               guint64 seconds, nanoseconds = 0;
+
+               match_count = g_match_info_get_match_count(match_info);
+               BT_ASSERT(match_count >= 3 && match_count <= 4);
+
+               /* Check for presence of negation sign. */
+               fetch_pos_ret = g_match_info_fetch_pos(match_info, 1, &start_pos, &end_pos);
+               BT_ASSERT(fetch_pos_ret);
+               is_neg = (end_pos - start_pos) > 0;
+
+               seconds = match_to_uint(match_info, 2);
+
+               if (match_count >= 4) {
+                       nanoseconds = match_to_uint_ns(match_info, 3);
+               }
+
+               bound->ns_from_origin = seconds * NS_PER_S + nanoseconds;
+
+               if (is_neg) {
+                       bound->ns_from_origin = -bound->ns_from_origin;
+               }
+
                bound->is_set = true;
+
                goto end;
        }
 
index c1525024dc63a8ab65710e276154c229813c7b10..a3246b2cbeb5436e975ce480e01f2e7ea77c5943 100755 (executable)
@@ -28,11 +28,13 @@ source "$UTILSSH"
 
 TRACE_PATH="${BT_CTF_TRACES_PATH}/succeed/wk-heartbeat-u/"
 
-NUM_TESTS=40
+NUM_TESTS=118
 
 plan_tests $NUM_TESTS
 
 tmp_out=$(mktemp)
+tmp_err=$(mktemp)
+
 
 # Run Babeltrace with some command line arguments, verify exit status and
 # number of output events (i.e. number of output lines)
@@ -46,17 +48,51 @@ tmp_out=$(mktemp)
 function expect_success()
 {
        local expected_num_events="$1"
-       shift
-       local msg="$1"
-       shift
+       local msg="$2"
+       shift 2
 
        "${BT_TESTS_BT2_BIN}" "${TRACE_PATH}" "$@" 2>/dev/null > "${tmp_out}"
        ok $? "trimmer: ${msg}: exit status"
+
        num_events=$(wc -l < "${tmp_out}")
        # Use bash parameter expansion to strip spaces added by BSD 'wc' on macOs and Solaris
        is "${num_events// /}" "${expected_num_events}" "trimmer: ${msg}: number of events (${expected_num_events})"
 }
 
+# Run Babeltrace with some command line arguments, verify that the exit status
+# is not 0 and that the error message contains a given string.
+#
+# Arguments:
+#
+#   $1: a string expected to be found in the error message
+#   $2: test description
+#   remaining arguments: command-line arguments to pass to Babeltrace
+
+function expect_failure()
+{
+       local expected_err_string="$1"
+       local msg="$2"
+       shift 2
+
+       # We check the error message logged by the trimmer plugin, set the env
+       # var necessary for it to log errors.
+       BABELTRACE_FLT_UTILS_TRIMMER_LOG_LEVEL=E "${BT_TESTS_BT2_BIN}" "${TRACE_PATH}" "$@" 2> "${tmp_err}" > "${tmp_out}"
+       isnt $? 0 "trimmer: ${msg}: exit status"
+
+       num_events=$(wc -l < "${tmp_out}")
+       is "${num_events}" 0 "trimmer: ${msg}: number of events (0)"
+
+       stderr="$(cat "${tmp_err}")"
+       # "like" doesn't like when the passed text is empty.
+       if [ -n "${stderr}" ]; then
+               like "${stderr}" "${expected_err_string}" "trimmer: ${msg}: error message"
+       else
+               fail "trimmer: ${msg}: error message"
+               diag "Nothing was output on stderr".
+       fi
+
+}
+
 expect_success 18 "--begin, GMT relative timestamps" \
        --clock-gmt --begin 17:48:17.587029529
 expect_success 9 "--end, GMT relative timestamps" \
@@ -103,4 +139,69 @@ expect_success 0 "--begin, out of range, EST absolute timestamps" \
 expect_success 0 "--end, out of range, EST absolute timestamps" \
        --end "2012-10-29 11:48:17.588680018"
 
-rm "${tmp_out}"
+# Check various formats.
+#
+# We sometimes apply a clock offset to make the events of the trace span two
+# different seconds or minutes.
+
+expect_success 13 "date time format: partial nanosecond precision" \
+       --begin="2012-10-29 12:48:17.588"
+expect_success 11 "date time format: second precision" \
+       --clock-offset-ns=411282268 --begin="2012-10-29 12:48:18"
+expect_success 11 "date time format: minute precision" \
+       --clock-offset=42 --clock-offset-ns=411282268 --begin="2012-10-29 12:49"
+
+expect_success 11 "seconds from origin format: nanosecond precision" \
+       --begin="1351532897.588717732"
+expect_success 11 "seconds from origin format: partial nanosecond precision" \
+       --begin="1351532897.58871773"
+expect_success 11 "seconds from origin format: second precision" \
+       --clock-offset-ns=411282268 --begin="1351532898"
+
+expect_failure "Invalid date/time format" "date time format: too many nanosecond digits" \
+       --begin="2012-10-29 12:48:17.1231231231"
+expect_failure "Invalid date/time format" "date time format: missing nanoseconds" \
+       --begin="2012-10-29 12:48:17."
+expect_failure "Invalid date/time format" "date time format: seconds with too many digit" \
+       --begin="2012-10-29 12:48:123"
+expect_failure "Invalid date/time format" "date time format: seconds with missing digit" \
+       --begin="2012-10-29 12:48:1"
+expect_failure "Invalid date/time format" "date time format: minutes with too many digit" \
+       --begin="2012-10-29 12:489:17"
+expect_failure "Invalid date/time format" "date time format: minutes with missing digit" \
+       --begin="2012-10-29 12:4:17"
+expect_failure "Invalid date/time format" "date time format: hours with too many digit" \
+       --begin="2012-10-29 123:48:17"
+expect_failure "Invalid date/time format" "date time format: hours with missing digit" \
+       --begin="2012-10-29 2:48:17"
+expect_failure "Invalid date/time format" "date time format: missing seconds" \
+       --begin="2012-10-29 12:48:"
+expect_failure "Invalid date/time format" "date time format: missing minutes 1" \
+       --begin="2012-10-29 12:"
+expect_failure "Invalid date/time format" "date time format: missing minutes 2" \
+       --begin="2012-10-29 12"
+expect_failure "Invalid date/time format" "date time format: missing time" \
+       --begin="2012-10-29 "
+expect_failure "Invalid date/time format" "date time format: day with too many digit" \
+       --begin="2012-10-291"
+expect_failure "Invalid date/time format" "date time format: day with missing digit" \
+       --begin="2012-10-2"
+expect_failure "Invalid date/time format" "date time format: month with too many digit" \
+       --begin="2012-101-29"
+expect_failure "Invalid date/time format" "date time format: month with missing digit" \
+       --begin="2012-1-29"
+expect_failure "Invalid date/time format" "date time format: year with too many digits" \
+       --begin="20121-10-29"
+expect_failure "Invalid date/time format" "date time format: year with missing digits" \
+       --begin="12-10-29"
+expect_failure "Invalid date/time format" "date time format: missing day 1" \
+       --begin="2012-10-"
+expect_failure "Invalid date/time format" "date time format: missing day 2" \
+       --begin="2012-10"
+
+expect_failure "Invalid date/time format" "seconds from origin format: too many nanosecond digits" \
+       --begin="1351532898.1231231231"
+expect_failure "Invalid date/time format" "seconds from origin format: missing nanseconds" \
+       --begin="1351532898."
+
+rm "${tmp_out}" "${tmp_err}"
This page took 0.030295 seconds and 4 git commands to generate.