Add string-utils convenience library
[lttng-tools.git] / src / common / string-utils / string-utils.c
1 /*
2 * Copyright (C) 2017 - Philippe Proulx <pproulx@efficios.com>
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms of the GNU General Public License, version 2 only, as
6 * published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License along with
14 * this program; if not, write to the Free Software Foundation, Inc., 51
15 * Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
16 */
17
18 #define _LGPL_SOURCE
19 #include <stdlib.h>
20 #include <string.h>
21 #include <stdbool.h>
22 #include <assert.h>
23
24 #include "string-utils.h"
25 #include "../macros.h"
26
27 enum star_glob_pattern_type_flags {
28 STAR_GLOB_PATTERN_TYPE_FLAG_NONE = 0,
29 STAR_GLOB_PATTERN_TYPE_FLAG_PATTERN = 1,
30 STAR_GLOB_PATTERN_TYPE_FLAG_END_ONLY = 2,
31 };
32
33 /*
34 * Normalizes the star-only globbing pattern `pattern`, that is, crushes
35 * consecutive `*` characters into a single `*`, avoiding `\*`.
36 */
37 LTTNG_HIDDEN
38 void strutils_normalize_star_glob_pattern(char *pattern)
39 {
40 const char *p;
41 char *np;
42 bool got_star = false;
43
44 assert(pattern);
45
46 for (p = pattern, np = pattern; *p != '\0'; p++) {
47 switch (*p) {
48 case '*':
49 if (got_star) {
50 /* Avoid consecutive stars. */
51 continue;
52 }
53
54 got_star = true;
55 break;
56 case '\\':
57 /* Copy backslash character. */
58 *np = *p;
59 np++;
60 p++;
61
62 if (*p == '\0') {
63 goto end;
64 }
65
66 /* Fall through default case. */
67 default:
68 got_star = false;
69 break;
70 }
71
72 /* Copy single character. */
73 *np = *p;
74 np++;
75 }
76
77 end:
78 *np = '\0';
79 }
80
81 static
82 enum star_glob_pattern_type_flags strutils_test_glob_pattern(const char *pattern)
83 {
84 enum star_glob_pattern_type_flags ret =
85 STAR_GLOB_PATTERN_TYPE_FLAG_NONE;
86 const char *p;
87
88 assert(pattern);
89
90 for (p = pattern; *p != '\0'; p++) {
91 switch (*p) {
92 case '*':
93 ret = STAR_GLOB_PATTERN_TYPE_FLAG_PATTERN;
94
95 if (p[1] == '\0') {
96 ret |= STAR_GLOB_PATTERN_TYPE_FLAG_END_ONLY;
97 }
98
99 goto end;
100 case '\\':
101 p++;
102
103 if (*p == '\0') {
104 goto end;
105 }
106 break;
107 default:
108 break;
109 }
110 }
111
112 end:
113 return ret;
114 }
115
116 /*
117 * Returns true if `pattern` is a star-only globbing pattern, that is,
118 * it contains at least one non-escaped `*`.
119 */
120 bool strutils_is_star_glob_pattern(const char *pattern)
121 {
122 return strutils_test_glob_pattern(pattern) &
123 STAR_GLOB_PATTERN_TYPE_FLAG_PATTERN;
124 }
125
126 /*
127 * Returns true if `pattern` is a globbing pattern with a globbing,
128 * non-escaped star only at its very end.
129 */
130 bool strutils_is_star_at_the_end_only_glob_pattern(const char *pattern)
131 {
132 return strutils_test_glob_pattern(pattern) &
133 STAR_GLOB_PATTERN_TYPE_FLAG_END_ONLY;
134 }
135
136 /*
137 * Unescapes the input string `input`, that is, in a `\x` sequence,
138 * removes `\`. If `only_char` is not 0, only this character is
139 * escaped.
140 */
141 LTTNG_HIDDEN
142 char *strutils_unescape_string(const char *input, char only_char)
143 {
144 char *output;
145 char *o;
146 const char *i;
147
148 assert(input);
149 output = zmalloc(strlen(input) + 1);
150 if (!output) {
151 goto end;
152 }
153
154 for (i = input, o = output; *i != '\0'; i++) {
155 switch (*i) {
156 case '\\':
157 if (only_char && i[1] != only_char) {
158 break;
159 }
160
161 i++;
162
163 if (*i == '\0') {
164 /* Copy last `\`. */
165 *o = '\\';
166 o++;
167 goto end;
168 }
169 default:
170 break;
171 }
172
173 /* Copy single character. */
174 *o = *i;
175 o++;
176 }
177
178 end:
179 return output;
180 }
181
182 /*
183 * Frees a null-terminated array of strings, including each contained
184 * string.
185 */
186 LTTNG_HIDDEN
187 void strutils_free_null_terminated_array_of_strings(char **array)
188 {
189 char **item;
190
191 if (!array) {
192 return;
193 }
194
195 for (item = array; *item; item++) {
196 free(*item);
197 }
198
199 free(array);
200 }
201
202 /*
203 * Splits the input string `input` using the given delimiter `delim`.
204 *
205 * The return value is an allocated null-terminated array of the
206 * resulting substrings (also allocated). You can free this array and
207 * its content with strutils_free_null_terminated_array_of_strings(). You
208 * can get the number of substrings in it with
209 * strutils_array_of_strings_len().
210 *
211 * Empty substrings are part of the result. For example:
212 *
213 * Input: ,hello,,there,
214 * Result:
215 * ``
216 * `hello`
217 * ``
218 * `there`
219 * ``
220 *
221 * If `escape_delim` is true, then `\,`, where `,` is the delimiter,
222 * escapes the delimiter and is copied as `,` only in the resulting
223 * substring. For example:
224 *
225 * Input: hello\,world,zoom,\,hi
226 * Result:
227 * `hello,world`
228 * `zoom`
229 * `,hi`
230 *
231 * Other characters are not escaped (this is the caller's job if
232 * needed). However they are considering during the parsing, that is,
233 * `\x`, where `x` is any character, is copied as is to the resulting
234 * substring, e.g.:
235 *
236 * Input: hello\,wo\rld\\,zoom\,
237 * Result:
238 * `hello,wo\rld\\`
239 * `zoom,`
240 *
241 * If `escape_delim` is false, nothing at all is escaped, and `delim`,
242 * when found in `input`, is always a delimiter, e.g.:
243 *
244 * Input: hello\,world,zoom,\,hi
245 * Result:
246 * `hello\`
247 * `world`
248 * `zoom`
249 * `\`
250 * `hi`
251 *
252 * Returns NULL if there's an error.
253 */
254 LTTNG_HIDDEN
255 char **strutils_split(const char *input, char delim, bool escape_delim)
256 {
257 size_t at;
258 size_t number_of_substrings = 1;
259 size_t longest_substring_len = 0;
260 const char *s;
261 const char *last;
262 char **substrings = NULL;
263
264 assert(input);
265 assert(!(escape_delim && delim == '\\'));
266 assert(delim != '\0');
267
268 /* First pass: count the number of substrings. */
269 for (s = input, last = input - 1; *s != '\0'; s++) {
270 if (escape_delim && *s == '\\') {
271 /* Ignore following (escaped) character. */
272 s++;
273
274 if (*s == '\0') {
275 break;
276 }
277
278 continue;
279 }
280
281 if (*s == delim) {
282 size_t last_len = s - last - 1;
283 last = s;
284 number_of_substrings++;
285
286 if (last_len > longest_substring_len) {
287 longest_substring_len = last_len;
288 }
289 }
290 }
291
292 if ((s - last - 1) > longest_substring_len) {
293 longest_substring_len = s - last - 1;
294 }
295
296 substrings = calloc(number_of_substrings + 1, sizeof(*substrings));
297 if (!substrings) {
298 goto error;
299 }
300
301 /* Second pass: actually split and copy substrings. */
302 for (at = 0, s = input; at < number_of_substrings; at++) {
303 const char *ss;
304 char *d;
305
306 substrings[at] = zmalloc(longest_substring_len + 1);
307 if (!substrings[at]) {
308 goto error;
309 }
310
311 /*
312 * Copy characters to substring until we find the next
313 * delimiter or the end of the input string.
314 */
315 for (ss = s, d = substrings[at]; *ss != '\0'; ss++) {
316 if (escape_delim && *ss == '\\') {
317 if (ss[1] == delim) {
318 /*
319 * '\' followed by delimiter and
320 * we need to escape this ('\'
321 * won't be part of the
322 * resulting substring).
323 */
324 ss++;
325 *d = *ss;
326 d++;
327 continue;
328 } else {
329 /*
330 * Copy '\' and the following
331 * character.
332 */
333 *d = *ss;
334 d++;
335 ss++;
336
337 if (*ss == '\0') {
338 break;
339 }
340 }
341 } else if (*ss == delim) {
342 /* We're done with this substring. */
343 break;
344 }
345
346 *d = *ss;
347 d++;
348 }
349
350 /* Next substring starts after the last delimiter. */
351 s = ss + 1;
352 }
353
354 goto end;
355
356 error:
357 strutils_free_null_terminated_array_of_strings(substrings);
358
359 end:
360 return substrings;
361 }
362
363 LTTNG_HIDDEN
364 size_t strutils_array_of_strings_len(char * const *array)
365 {
366 char * const *item;
367 size_t count = 0;
368
369 assert(array);
370
371 for (item = array; *item; item++) {
372 count++;
373 }
374
375 return count;
376 }
This page took 0.037698 seconds and 5 git commands to generate.