Fix: overly restrictive datetime regexp rejects valid dates
[lttng-tools.git] / src / bin / lttng-relayd / backward-compatibility-group-by.c
1 /*
2 * Copyright (C) 2019 - Jonathan Rajotte <jonathan.rajotte-julien@efficios.com>
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms of the GNU General Public License, version 2 only, as
6 * published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License along with
14 * this program; if not, write to the Free Software Foundation, Inc., 51
15 * Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
16 */
17
18 #include <assert.h>
19 #include <regex.h>
20 #include <stdio.h>
21 #include <stdlib.h>
22 #include <string.h>
23
24 #include <common/common.h>
25 #include <common/defaults.h>
26 #include <common/utils.h>
27
28 #include "backward-compatibility-group-by.h"
29
30 #define DATETIME_STRING_SIZE 16
31 #define DATETIME_REGEX \
32 ".*-[1-2][0-9][0-9][0-9][0-1][0-9][0-3][0-9]-[0-2][0-9][0-5][0-9][0-5][0-9]$"
33
34 /*
35 * Provide support for --group-output-by-session for producer >= 2.4 and < 2.11.
36 * Take the stream path, extract all available information, craft a new path to
37 * the best of our ability enforcing the group by session.
38 *
39 * Return the allocated string containing the new stream path or else NULL.
40 */
41 char *backward_compat_group_by_session(
42 const char *path, const char *local_session_name)
43 {
44 int ret;
45 size_t len;
46 char *leftover_ptr;
47 char *local_copy = NULL;
48 char *datetime = NULL;
49 char *partial_base_path = NULL;
50 char *filepath_per_session = NULL;
51 const char *second_token_ptr;
52 const char *leftover_second_token_ptr;
53 const char *hostname_ptr;
54 regex_t regex;
55
56 assert(path);
57 assert(local_session_name);
58 assert(local_session_name[0] != '\0');
59
60 DBG("Parsing path \"%s\" of session \"%s\" to create a new path that is grouped by session",
61 path, local_session_name);
62
63 /* Get a local copy for strtok */
64 local_copy = strdup(path);
65 if (!local_copy) {
66 PERROR("Failed to parse session path: couldn't copy input path");
67 goto error;
68 }
69
70 /*
71 * The use of strtok with '/' as delimiter is valid since we refuse '/'
72 * in session name and '/' is not a valid hostname character based on
73 * RFC-952 [1], RFC-921 [2] and refined in RFC-1123 [2].
74 * [1] https://tools.ietf.org/html/rfc952
75 * [2] https://tools.ietf.org/html/rfc921
76 * [3] https://tools.ietf.org/html/rfc1123#page-13
77 */
78
79 /*
80 * Get the hostname and possible session_name.
81 * Note that we can get the hostname and session name from the
82 * relay_session object we already have. Still, it is easier to
83 * tokenized the passed path to obtain the start of the path leftover.
84 */
85 hostname_ptr = strtok_r(local_copy, "/", &leftover_ptr);
86 if (!hostname_ptr) {
87 ERR("Failed to parse session path \"%s\": couldn't identify hostname",
88 path);
89 goto error;
90 }
91
92 second_token_ptr = strtok_r(NULL, "/", &leftover_ptr);
93 if (!second_token_ptr) {
94 ERR("Failed to parse session path \"%s\": couldn't identify session name",
95 path);
96 goto error;
97 }
98
99 /*
100 * Check if the second token is a base path set at url level. This is
101 * legal in streaming, live and snapshot [1]. Otherwise it is the
102 * session name with possibly a datetime attached [2]. Note that when
103 * "adding" snapshot output (lttng snapshot add-output), no session name
104 * is present in the path by default. The handling for "base path" take
105 * care of this case as well.
106 * [1] e.g --set-url net://localhost/my_marvellous_path
107 * [2] Can be:
108 * <session_name>
109 * When using --snapshot on session create.
110 * <session_name>-<date>-<time>
111 * <auto>-<date>-<time>
112 */
113 if (strncmp(second_token_ptr, local_session_name,
114 strlen(local_session_name)) != 0) {
115 /*
116 * Token does not start with session name.
117 * This mean this is an extra path scenario.
118 * Duplicate the current token since it is part of an
119 * base_path.
120 * Set secDuplicate the current token since it is part of an
121 * base_path. The rest is the leftover.
122 * Set second_token_ptr to the local_session_name for further
123 * processing.
124 */
125 partial_base_path = strdup(second_token_ptr);
126 if (!partial_base_path) {
127 PERROR("Failed to parse session path: couldn't copy partial base path");
128 goto error;
129 }
130
131 second_token_ptr = local_session_name;
132 }
133
134 /*
135 * Based on the previous test, we can move inside the token ptr to
136 * remove the "local_session_name" and inspect the rest of the token.
137 * We are looking into extracting the creation datetime from either the
138 * session_name or the token. We need to to all this gymnastic because
139 * an extra path could decide to append a datetime to its first
140 * subdirectory.
141 * Possible scenario:
142 * <session_name>
143 * <session_name>-<date>-<time>
144 * <auto>-<date>-<time>
145 * <session_name>_base_path_foo_bar
146 * <session_name>-<false date>-<false-time> (via a base path)
147 *
148 * We have no way to discern from the basic scenario of:
149 * <session_name>-<date>-<time>
150 * and one done using a base path with the exact format we normally
151 * expect.
152 *
153 * e.g:
154 * lttng create my_session -U
155 * net://localhost/my_session-19910319-120000/
156 */
157 ret = regcomp(&regex, DATETIME_REGEX, 0);
158 if (ret) {
159 ERR("Failed to parse session path: regex compilation failed with code %d", ret);
160 goto error;
161 }
162
163 leftover_second_token_ptr =
164 second_token_ptr + strlen(local_session_name);
165 len = strlen(leftover_second_token_ptr);
166 if (len == 0) {
167 /*
168 * We are either dealing with an auto session name or only the
169 * session_name. If this is a auto session name, we need to
170 * fetch the creation datetime.
171 */
172 ret = regexec(&regex, local_session_name, 0, NULL, 0);
173 if (ret == 0) {
174 const ssize_t local_session_name_offset =
175 strlen(local_session_name) - DATETIME_STRING_SIZE + 1;
176
177 assert(local_session_name_offset >= 0);
178 datetime = strdup(local_session_name +
179 local_session_name_offset);
180 if (!datetime) {
181 PERROR("Failed to parse session path: couldn't copy datetime on regex match");
182 goto error_regex;
183 }
184 }
185 } else if (len == DATETIME_STRING_SIZE &&
186 !regexec(&regex, leftover_second_token_ptr, 0, NULL,
187 0)) {
188 /*
189 * The leftover from the second token is of format
190 * "-<datetime>", use it as the creation time.
191 * Ignore leading "-".
192 */
193 datetime = strdup(&leftover_second_token_ptr[1]);
194 if (!datetime) {
195 PERROR("Failed to parse session path: couldn't copy datetime on regex match");
196 goto error_regex;
197 }
198 } else {
199 /*
200 * Base path scenario.
201 * We cannot try to extract the datetime from the session name
202 * since nothing prevent a user to name a session in the
203 * "name-<datetime>" format. Using the datetime from such a
204 * session would be invalid.
205 * */
206 assert(partial_base_path == NULL);
207 assert(datetime == NULL);
208
209 partial_base_path = strdup(second_token_ptr);
210 if (!partial_base_path) {
211 PERROR("Failed to parse session path: couldn't copy partial base path");
212 goto error_regex;
213 }
214 }
215
216 ret = asprintf(&filepath_per_session, "%s/%s%s%s/%s%s%s",
217 local_session_name, hostname_ptr, datetime ? "-" : "",
218 datetime ? datetime : "",
219 partial_base_path ? partial_base_path : "",
220 partial_base_path ? "/" : "", leftover_ptr);
221 if (ret < 0) {
222 filepath_per_session = NULL;
223 goto error;
224 }
225 error_regex:
226 regfree(&regex);
227 error:
228 free(local_copy);
229 free(partial_base_path);
230 free(datetime);
231 return filepath_per_session;
232 }
This page took 0.033873 seconds and 5 git commands to generate.