be1c6e79c15b21bf9985868d52cca7fdef609c38
[deliverable/lttng-analyses.git] / lttnganalyses / common / parse_utils.py
1 # The MIT License (MIT)
2 #
3 # Copyright (C) 2016 - Antoine Busque <abusque@efficios.com>
4 #
5 # Permission is hereby granted, free of charge, to any person obtaining a copy
6 # of this software and associated documentation files (the "Software"), to deal
7 # in the Software without restriction, including without limitation the rights
8 # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 # copies of the Software, and to permit persons to whom the Software is
10 # furnished to do so, subject to the following conditions:
11 #
12 # The above copyright notice and this permission notice shall be included in
13 # all copies or substantial portions of the Software.
14 #
15 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 # SOFTWARE.
22
23 import datetime
24 import re
25 from time import timezone
26 from . import trace_utils
27 from .time_utils import NSEC_PER_SEC
28
29
30 def _split_value_units(raw_str):
31 """Take a string with a numerical value and units, and separate the
32 two.
33
34 Args:
35 raw_str (str): the string to parse, with numerical value and
36 (optionally) units.
37
38 Returns:
39 A tuple (value, units), where value is a string and units is
40 either a string or `None` if no units were found.
41 """
42 try:
43 units_index = next(i for i, c in enumerate(raw_str) if c.isalpha())
44 except StopIteration:
45 # no units found
46 return (raw_str, None)
47
48 return (raw_str[:units_index], raw_str[units_index:])
49
50
51 def parse_size(size_str):
52 """Convert a human-readable size string to an integral number of
53 bytes.
54
55 Args:
56 size_str (str): the formatted string comprised of the size and
57 units.
58
59 Returns:
60 A number of bytes.
61
62 Raises:
63 ValueError: if units are unrecognised or the size is not a
64 real number.
65 """
66 binary_units = ['B', 'KiB', 'MiB', 'GiB', 'TiB',
67 'PiB', 'EiB', 'ZiB', 'YiB']
68 # units as printed by GNU coreutils (e.g. ls or du), using base
69 # 1024 as well
70 coreutils_units = ['B', 'K', 'M', 'G', 'T', 'P', 'E', 'Z', 'Y']
71 si_units = ['B', 'KB', 'MB', 'GB', 'TB', 'PB', 'EB', 'ZB', 'YB']
72
73 size, units = _split_value_units(size_str)
74
75 try:
76 size = float(size)
77 except ValueError:
78 raise ValueError('invalid size: {}'.format(size))
79
80 # If no units have been found, assume bytes
81 if units is not None:
82 if units in binary_units:
83 base = 1024
84 exponent = binary_units.index(units)
85 elif units in coreutils_units:
86 base = 1024
87 exponent = coreutils_units.index(units)
88 elif units in si_units:
89 base = 1000
90 exponent = si_units.index(units)
91 else:
92 raise ValueError('unrecognised units: {}'.format(units))
93
94 size *= base ** exponent
95
96 return int(size)
97
98
99 def parse_duration(duration_str):
100 """Convert a human-readable duration string to an integral number of
101 nanoseconds.
102
103 Args:
104 duration_str (str): the formatted string comprised of the
105 duration and units.
106
107 Returns:
108 A number of nanoseconds.
109
110 Raises:
111 ValueError: if units are unrecognised or the size is not a
112 real number.
113 """
114 base = 1000
115 duration, units = _split_value_units(duration_str)
116
117 try:
118 duration = float(duration)
119 except ValueError:
120 raise ValueError('invalid duration: {}'.format(duration))
121
122 if units is not None:
123 if units == 's':
124 exponent = 3
125 elif units == 'ms':
126 exponent = 2
127 elif units in ['us', 'µs']:
128 exponent = 1
129 elif units == 'ns':
130 exponent = 0
131 else:
132 raise ValueError('unrecognised units: {}'.format(units))
133 else:
134 # no units defaults to seconds
135 exponent = 3
136
137 duration *= base ** exponent
138
139 return int(duration)
140
141
142 def _parse_date_full_with_nsec(date):
143 """Parse full date string with nanosecond resolution.
144
145 This matches either 2014-12-12 17:29:43.802588035 or
146 2014-12-12T17:29:43.802588035.
147
148 Args:
149 date (str): the date string to be parsed.
150
151 Returns:
152 A tuple of the format (date_time, nsec), where date_time is a
153 datetime.datetime object and nsec is an int of the remaining
154 nanoseconds.
155
156 Raises:
157 ValueError: if the date format does not match.
158 """
159 pattern = re.compile(
160 r'^(?P<year>\d{4})-(?P<mon>[01]\d)-(?P<day>[0-3]\d)[\sTt]'
161 r'(?P<hour>\d{2}):(?P<min>\d{2}):(?P<sec>\d{2})\.(?P<nsec>\d{9})$'
162 )
163
164 if not pattern.match(date):
165 raise ValueError('Wrong date format: {}'.format(date))
166
167 year = pattern.search(date).group('year')
168 month = pattern.search(date).group('mon')
169 day = pattern.search(date).group('day')
170 hour = pattern.search(date).group('hour')
171 minute = pattern.search(date).group('min')
172 sec = pattern.search(date).group('sec')
173 nsec = pattern.search(date).group('nsec')
174
175 date_time = datetime.datetime(
176 int(year), int(month), int(day),
177 int(hour), int(minute), int(sec)
178 )
179
180 return date_time, int(nsec)
181
182
183 def _parse_date_full(date):
184 """Parse full date string.
185
186 This matches either 2014-12-12 17:29:43 or 2014-12-12T17:29:43.
187
188 Args:
189 date (str): the date string to be parsed.
190
191 Returns:
192 A tuple of the format (date_time, nsec), where date_time is a
193 datetime.datetime object and nsec is 0.
194
195 Raises:
196 ValueError: if the date format does not match.
197 """
198 pattern = re.compile(
199 r'^(?P<year>\d{4})-(?P<mon>[01]\d)-(?P<day>[0-3]\d)[\sTt]'
200 r'(?P<hour>\d{2}):(?P<min>\d{2}):(?P<sec>\d{2})$'
201 )
202
203 if not pattern.match(date):
204 raise ValueError('Wrong date format: {}'.format(date))
205
206 year = pattern.search(date).group('year')
207 month = pattern.search(date).group('mon')
208 day = pattern.search(date).group('day')
209 hour = pattern.search(date).group('hour')
210 minute = pattern.search(date).group('min')
211 sec = pattern.search(date).group('sec')
212 nsec = 0
213
214 date_time = datetime.datetime(
215 int(year), int(month), int(day),
216 int(hour), int(minute), int(sec)
217 )
218
219 return date_time, nsec
220
221
222 def _parse_date_time_with_nsec(date):
223 """Parse time string with nanosecond resolution.
224
225 This matches 17:29:43.802588035.
226
227 Args:
228 date (str): the date string to be parsed.
229
230 Returns:
231 A tuple of the format (date_time, nsec), where date_time is a
232 datetime.time object and nsec is an int of the remaining
233 nanoseconds.
234
235 Raises:
236 ValueError: if the date format does not match.
237 """
238 pattern = re.compile(
239 r'^(?P<hour>\d{2}):(?P<min>\d{2}):(?P<sec>\d{2})\.(?P<nsec>\d{9})$'
240 )
241
242 if not pattern.match(date):
243 raise ValueError('Wrong date format: {}'.format(date))
244
245 hour = pattern.search(date).group('hour')
246 minute = pattern.search(date).group('min')
247 sec = pattern.search(date).group('sec')
248 nsec = pattern.search(date).group('nsec')
249
250 time = datetime.time(int(hour), int(minute), int(sec))
251
252 return time, int(nsec)
253
254
255 def _parse_date_time(date):
256 """Parse time string.
257
258 This matches 17:29:43.
259
260 Args:
261 date (str): the date string to be parsed.
262
263 Returns:
264 A tuple of the format (date_time, nsec), where date_time is a
265 datetime.time object and nsec is 0.
266
267 Raises:
268 ValueError: if the date format does not match.
269 """
270 pattern = re.compile(
271 r'^(?P<hour>\d{2}):(?P<min>\d{2}):(?P<sec>\d{2})$'
272 )
273
274 if not pattern.match(date):
275 raise ValueError('Wrong date format: {}'.format(date))
276
277 hour = pattern.search(date).group('hour')
278 minute = pattern.search(date).group('min')
279 sec = pattern.search(date).group('sec')
280 nsec = 0
281
282 time = datetime.time(int(hour), int(minute), int(sec))
283
284 return time, nsec
285
286
287 def _parse_date_timestamp(date):
288 """Parse timestamp string in nanoseconds from epoch.
289
290 This matches 1418423383802588035.
291
292 Args:
293 date (str): the date string to be parsed.
294
295 Returns:
296 A tuple of the format (date_time, nsec), where date_time is a
297 datetime.datetime object and nsec is an int of the remaining
298 nanoseconds.
299
300 Raises:
301 ValueError: if the date format does not match.
302 """
303 pattern = re.compile(r'^\d+$')
304
305 if not pattern.match(date):
306 raise ValueError('Wrong date format: {}'.format(date))
307
308 timestamp_ns = int(date)
309
310 date_time = datetime.datetime.fromtimestamp(
311 timestamp_ns / NSEC_PER_SEC
312 )
313 # Set the microseconds to 0 because values < 1 second are covered
314 # by the nsec value.
315 date_time = date_time.replace(microsecond=0)
316 nsec = timestamp_ns % NSEC_PER_SEC
317
318 return date_time, nsec
319
320
321 def parse_date(date):
322 """Try to parse a date string from one of many formats.
323
324 Args:
325 date (str): the date string to be parsed.
326
327 Returns:
328 A tuple of the format (date_time, nsec), where date_time is
329 one of either datetime.datetime or datetime.time, depending on
330 whether the date string contains full date information or only
331 the time of day. The latter case can still be useful when used
332 in conjuction with a trace collection's date to provide the
333 missing information. The nsec element of the tuple is an int and
334 corresponds to the nanoseconds for the given date/timestamp.
335 This is due to datetime objects only supporting a resolution
336 down to the microsecond.
337
338 Raises:
339 ValueError: if the date does not correspond to any of the
340 supported formats.
341 """
342 parsers = [
343 _parse_date_full_with_nsec, _parse_date_full,
344 _parse_date_time_with_nsec, _parse_date_time,
345 _parse_date_timestamp
346 ]
347
348 date_time = None
349 nsec = None
350
351 for parser in parsers:
352 try:
353 (date_time, nsec) = parser(date)
354 except ValueError:
355 continue
356
357 # If no exception was raised, the parser found a match, so
358 # stop iterating
359 break
360
361 if date_time is None or nsec is None:
362 # None of the parsers were a match
363 raise ValueError('Unrecognised date format: {}'.format(date))
364
365 return date_time, nsec
366
367
368 def parse_trace_collection_date(collection, date, gmt=False):
369 """Parse a date string, using a trace collection to disambiguate
370 incomplete dates.
371
372 Args:
373 collection (TraceCollection): a babeltrace TraceCollection
374 instance.
375
376 date (string): the date string to be parsed.
377
378 gmt (bool, optional): flag indicating whether the timestamp is
379 in the local timezone or gmt (default: False).
380
381 Returns:
382 A timestamp (int) in nanoseconds since epoch, corresponding to
383 the parsed date.
384
385 Raises:
386 ValueError: if the date format is unrecognised, or if the date
387 format does not specify the date and the trace collection spans
388 multiple days.
389 """
390 try:
391 date_time, nsec = parse_date(date)
392 except ValueError:
393 # This might raise ValueError if the date is in an invalid
394 # format, so just re-raise the exception to inform the caller
395 # of the problem.
396 raise
397
398 # date_time will either be an actual datetime.datetime object, or
399 # just a datetime.time object, depending on the format. In the
400 # latter case, try and fill out the missing date information from
401 # the trace collection's date.
402 if isinstance(date_time, datetime.time):
403 try:
404 collection_date = trace_utils.get_trace_collection_date(collection)
405 except ValueError:
406 raise ValueError(
407 'Invalid date format for multi-day trace: {}'.format(date)
408 )
409
410 date_time = datetime.datetime.combine(collection_date, date_time)
411
412 if gmt:
413 date_time = date_time + datetime.timedelta(seconds=timezone)
414
415 timestamp_ns = date_time.timestamp() * NSEC_PER_SEC + nsec
416
417 return timestamp_ns
418
419
420 def parse_trace_collection_time_range(collection, time_range, gmt=False):
421 """Parse a time range string, using a trace collection to
422 disambiguate incomplete dates.
423
424 Args:
425 collection (TraceCollection): a babeltrace TraceCollection
426 instance.
427
428 time_range (string): the time range string to be parsed.
429
430 gmt (bool, optional): flag indicating whether the timestamps are
431 in the local timezone or gmt (default: False).
432
433 Returns:
434 A tuple (begin, end) of the two timestamps (int) in nanoseconds
435 since epoch, corresponding to the parsed dates.
436
437 Raises:
438 ValueError: if the time range or date format is unrecognised,
439 or if the date format does not specify the date and the trace
440 collection spans multiple days.
441 """
442 pattern = re.compile(r'^\[(?P<begin>.*),(?P<end>.*)\]$')
443 if not pattern.match(time_range):
444 raise ValueError('Invalid time range format: {}'.format(time_range))
445
446 begin_str = pattern.search(time_range).group('begin').strip()
447 end_str = pattern.search(time_range).group('end').strip()
448
449 try:
450 begin = parse_trace_collection_date(collection, begin_str, gmt)
451 end = parse_trace_collection_date(collection, end_str, gmt)
452 except ValueError:
453 # Either of the dates was in the wrong format, propagate the
454 # exception to the caller.
455 raise
456
457 return begin, end
This page took 0.038504 seconds and 4 git commands to generate.