Fix erroneous comments in parse_utils
[deliverable/lttng-analyses.git] / lttnganalyses / common / parse_utils.py
1 # The MIT License (MIT)
2 #
3 # Copyright (C) 2016 - Antoine Busque <abusque@efficios.com>
4 #
5 # Permission is hereby granted, free of charge, to any person obtaining a copy
6 # of this software and associated documentation files (the "Software"), to deal
7 # in the Software without restriction, including without limitation the rights
8 # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 # copies of the Software, and to permit persons to whom the Software is
10 # furnished to do so, subject to the following conditions:
11 #
12 # The above copyright notice and this permission notice shall be included in
13 # all copies or substantial portions of the Software.
14 #
15 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 # SOFTWARE.
22
23 import datetime
24 import math
25 import re
26 from time import timezone
27 from . import trace_utils
28 from .time_utils import NSEC_PER_SEC
29
30
31 def _split_value_units(raw_str):
32 """Take a string with a numerical value and units, and separate the
33 two.
34
35 Args:
36 raw_str (str): the string to parse, with numerical value and
37 (optionally) units.
38
39 Returns:
40 A tuple (value, units), where value is a string and units is
41 either a string or `None` if no units were found.
42 """
43 try:
44 units_index = next(i for i, c in enumerate(raw_str) if c.isalpha())
45 except StopIteration:
46 # no units found
47 return (raw_str, None)
48
49 return (raw_str[:units_index], raw_str[units_index:])
50
51
52 def parse_size(size_str):
53 """Convert a human-readable size string to an integral number of
54 bytes.
55
56 Args:
57 size_str (str): the formatted string comprised of the size and
58 units.
59
60 Returns:
61 A number of bytes.
62
63 Raises:
64 ValueError: if units are unrecognised or the size is not a
65 real number.
66 """
67 binary_units = ['B', 'KiB', 'MiB', 'GiB', 'TiB',
68 'PiB', 'EiB', 'ZiB', 'YiB']
69 # units as printed by GNU coreutils (e.g. ls or du), using base
70 # 1024 as well
71 coreutils_units = ['B', 'K', 'M', 'G', 'T', 'P', 'E', 'Z', 'Y']
72 si_units = ['B', 'KB', 'MB', 'GB', 'TB', 'PB', 'EB', 'ZB', 'YB']
73
74 size, units = _split_value_units(size_str)
75
76 try:
77 size = float(size)
78 except ValueError:
79 raise ValueError('invalid size: {}'.format(size))
80
81 # If no units have been found, assume bytes
82 if units is not None:
83 if units in binary_units:
84 base = 1024
85 exponent = binary_units.index(units)
86 elif units in coreutils_units:
87 base = 1024
88 exponent = coreutils_units.index(units)
89 elif units in si_units:
90 base = 1000
91 exponent = si_units.index(units)
92 else:
93 raise ValueError('unrecognised units: {}'.format(units))
94
95 size *= math.pow(base, exponent)
96
97 return int(size)
98
99
100 def parse_duration(duration_str):
101 """Convert a human-readable duration string to an integral number of
102 nanoseconds.
103
104 Args:
105 duration_str (str): the formatted string comprised of the
106 duration and units.
107
108 Returns:
109 A number of nanoseconds.
110
111 Raises:
112 ValueError: if units are unrecognised or the size is not a
113 real number.
114 """
115 base = 1000
116
117 try:
118 units_index = next(i for i, c in enumerate(duration_str)
119 if c.isalpha())
120 except StopIteration:
121 # no units found
122 units_index = None
123
124 if units_index is not None:
125 duration = duration_str[:units_index]
126 units = duration_str[units_index:].lower()
127 else:
128 duration = duration_str
129 units = None
130
131 try:
132 duration = float(duration)
133 except ValueError:
134 raise ValueError('invalid duration: {}'.format(duration))
135
136 if units is not None:
137 if units == 's':
138 exponent = 3
139 elif units == 'ms':
140 exponent = 2
141 elif units in ['us', 'µs']:
142 exponent = 1
143 elif units == 'ns':
144 exponent = 0
145 else:
146 raise ValueError('unrecognised units: {}'.format(units))
147 else:
148 # no units defaults to seconds
149 exponent = 3
150
151 duration *= math.pow(base, exponent)
152
153 return int(duration)
154
155
156 def _parse_date_full_with_nsec(date):
157 """Parse full date string with nanosecond resolution.
158
159 This matches either 2014-12-12 17:29:43.802588035 or
160 2014-12-12T17:29:43.802588035.
161
162 Args:
163 date (str): the date string to be parsed.
164
165 Returns:
166 A tuple of the format (date_time, nsec), where date_time is a
167 datetime.datetime object and nsec is an int of the remaining
168 nanoseconds.
169
170 Raises:
171 ValueError: if the date format does not match.
172 """
173 pattern = re.compile(
174 r'^(?P<year>\d{4})-(?P<mon>[01]\d)-(?P<day>[0-3]\d)[\sTt]'
175 r'(?P<hour>\d{2}):(?P<min>\d{2}):(?P<sec>\d{2})\.(?P<nsec>\d{9})$'
176 )
177
178 if not pattern.match(date):
179 raise ValueError('Wrong date format: {}'.format(date))
180
181 year = pattern.search(date).group('year')
182 month = pattern.search(date).group('mon')
183 day = pattern.search(date).group('day')
184 hour = pattern.search(date).group('hour')
185 minute = pattern.search(date).group('min')
186 sec = pattern.search(date).group('sec')
187 nsec = pattern.search(date).group('nsec')
188
189 date_time = datetime.datetime(
190 int(year), int(month), int(day),
191 int(hour), int(minute), int(sec)
192 )
193
194 return date_time, int(nsec)
195
196
197 def _parse_date_full(date):
198 """Parse full date string.
199
200 This matches either 2014-12-12 17:29:43 or 2014-12-12T17:29:43.
201
202 Args:
203 date (str): the date string to be parsed.
204
205 Returns:
206 A tuple of the format (date_time, nsec), where date_time is a
207 datetime.datetime object and nsec is 0.
208
209 Raises:
210 ValueError: if the date format does not match.
211 """
212 pattern = re.compile(
213 r'^(?P<year>\d{4})-(?P<mon>[01]\d)-(?P<day>[0-3]\d)[\sTt]'
214 r'(?P<hour>\d{2}):(?P<min>\d{2}):(?P<sec>\d{2})$'
215 )
216
217 if not pattern.match(date):
218 raise ValueError('Wrong date format: {}'.format(date))
219
220 year = pattern.search(date).group('year')
221 month = pattern.search(date).group('mon')
222 day = pattern.search(date).group('day')
223 hour = pattern.search(date).group('hour')
224 minute = pattern.search(date).group('min')
225 sec = pattern.search(date).group('sec')
226 nsec = 0
227
228 date_time = datetime.datetime(
229 int(year), int(month), int(day),
230 int(hour), int(minute), int(sec)
231 )
232
233 return date_time, nsec
234
235
236 def _parse_date_time_with_nsec(date):
237 """Parse time string with nanosecond resolution.
238
239 This matches 17:29:43.802588035.
240
241 Args:
242 date (str): the date string to be parsed.
243
244 Returns:
245 A tuple of the format (date_time, nsec), where date_time is a
246 datetime.time object and nsec is an int of the remaining
247 nanoseconds.
248
249 Raises:
250 ValueError: if the date format does not match.
251 """
252 pattern = re.compile(
253 r'^(?P<hour>\d{2}):(?P<min>\d{2}):(?P<sec>\d{2})\.(?P<nsec>\d{9})$'
254 )
255
256 if not pattern.match(date):
257 raise ValueError('Wrong date format: {}'.format(date))
258
259 hour = pattern.search(date).group('hour')
260 minute = pattern.search(date).group('min')
261 sec = pattern.search(date).group('sec')
262 nsec = pattern.search(date).group('nsec')
263
264 time = datetime.time(int(hour), int(minute), int(sec))
265
266 return time, int(nsec)
267
268
269 def _parse_date_time(date):
270 """Parse time string.
271
272 This matches 17:29:43.
273
274 Args:
275 date (str): the date string to be parsed.
276
277 Returns:
278 A tuple of the format (date_time, nsec), where date_time is a
279 datetime.time object and nsec is 0.
280
281 Raises:
282 ValueError: if the date format does not match.
283 """
284 pattern = re.compile(
285 r'^(?P<hour>\d{2}):(?P<min>\d{2}):(?P<sec>\d{2})$'
286 )
287
288 if not pattern.match(date):
289 raise ValueError('Wrong date format: {}'.format(date))
290
291 hour = pattern.search(date).group('hour')
292 minute = pattern.search(date).group('min')
293 sec = pattern.search(date).group('sec')
294 nsec = 0
295
296 time = datetime.time(int(hour), int(minute), int(sec))
297
298 return time, nsec
299
300
301 def _parse_date_timestamp(date):
302 """Parse timestamp string in nanoseconds from epoch.
303
304 This matches 1418423383802588035.
305
306 Args:
307 date (str): the date string to be parsed.
308
309 Returns:
310 A tuple of the format (date_time, nsec), where date_time is a
311 datetime.datetime object and nsec is an int of the remaining
312 nanoseconds.
313
314 Raises:
315 ValueError: if the date format does not match.
316 """
317 pattern = re.compile(r'^\d+$')
318
319 if not pattern.match(date):
320 raise ValueError('Wrong date format: {}'.format(date))
321
322 timestamp_ns = int(date)
323
324 date_time = datetime.datetime.fromtimestamp(
325 timestamp_ns / NSEC_PER_SEC
326 )
327 nsec = timestamp_ns % NSEC_PER_SEC
328
329 return date_time, nsec
330
331
332 def parse_date(date):
333 """Try to parse a date string from one of many formats.
334
335 Args:
336 date (str): the date string to be parsed.
337
338 Returns:
339 A tuple of the format (date_time, nsec), where date_time is
340 one of either datetime.datetime or datetime.time, depending on
341 whether the date string contains full date information or only
342 the time of day. The latter case can still be useful when used
343 in conjuction with a trace collection's date to provide the
344 missing information. The nsec element of the tuple is an int and
345 corresponds to the nanoseconds for the given date/timestamp.
346 This is due to datetime objects only supporting a resolution
347 down to the microsecond.
348
349 Raises:
350 ValueError: if the date does not correspond to any of the
351 supported formats.
352 """
353 parsers = [
354 _parse_date_full_with_nsec, _parse_date_full,
355 _parse_date_time_with_nsec, _parse_date_time,
356 _parse_date_timestamp
357 ]
358
359 date_time = None
360 nsec = None
361
362 for parser in parsers:
363 try:
364 (date_time, nsec) = parser(date)
365 except ValueError:
366 continue
367
368 # If no exception was raised, the parser found a match, so
369 # stop iterating
370 break
371
372 if date_time is None or nsec is None:
373 # None of the parsers were a match
374 raise ValueError('Unrecognised date format: {}'.format(date))
375
376 return date_time, nsec
377
378
379 def parse_trace_collection_date(collection, date, gmt=False):
380 """Parse a date string, using a trace collection to disambiguate
381 incomplete dates.
382
383 Args:
384 collection (TraceCollection): a babeltrace TraceCollection
385 instance.
386
387 date (string): the date string to be parsed.
388
389 gmt (bool, optional): flag indicating whether the timestamp is
390 in the local timezone or gmt (default: False).
391
392 Returns:
393 A timestamp (int) in nanoseconds since epoch, corresponding to
394 the parsed date.
395
396 Raises:
397 ValueError: if the date format is unrecognised, or if the date
398 format does not specify the date and the trace collection spans
399 multiple days.
400 """
401 try:
402 date_time, nsec = parse_date(date)
403 except ValueError:
404 # This might raise ValueError if the date is in an invalid
405 # format, so just re-raise the exception to inform the caller
406 # of the problem.
407 raise
408
409 # date_time will either be an actual datetime.datetime object, or
410 # just a datetime.time object, depending on the format. In the
411 # latter case, try and fill out the missing date information from
412 # the trace collection's date.
413 if isinstance(date_time, datetime.time):
414 try:
415 collection_date = trace_utils.get_trace_collection_date(collection)
416 except ValueError:
417 raise ValueError(
418 'Invalid date format for multi-day trace: {}'.format(date)
419 )
420
421 date_time = datetime.datetime.combine(collection_date, date_time)
422
423 if gmt:
424 date_time = date_time + datetime.timedelta(seconds=timezone)
425
426 timestamp_ns = date_time.timestamp() * NSEC_PER_SEC + nsec
427
428 return timestamp_ns
429
430
431 def parse_trace_collection_time_range(collection, time_range, gmt=False):
432 """Parse a time range string, using a trace collection to
433 disambiguate incomplete dates.
434
435 Args:
436 collection (TraceCollection): a babeltrace TraceCollection
437 instance.
438
439 time_range (string): the time range string to be parsed.
440
441 gmt (bool, optional): flag indicating whether the timestamps are
442 in the local timezone or gmt (default: False).
443
444 Returns:
445 A tuple (begin, end) of the two timestamps (int) in nanoseconds
446 since epoch, corresponding to the parsed dates.
447
448 Raises:
449 ValueError: if the time range or date format is unrecognised,
450 or if the date format does not specify the date and the trace
451 collection spans multiple days.
452 """
453 pattern = re.compile(r'^\[(?P<begin>.*),(?P<end>.*)\]$')
454 if not pattern.match(time_range):
455 raise ValueError('Invalid time range format: {}'.format(time_range))
456
457 begin_str = pattern.search(time_range).group('begin').strip()
458 end_str = pattern.search(time_range).group('end').strip()
459
460 try:
461 begin = parse_trace_collection_date(collection, begin_str, gmt)
462 end = parse_trace_collection_date(collection, end_str, gmt)
463 except ValueError:
464 # Either of the dates was in the wrong format, propagate the
465 # exception to the caller.
466 raise
467
468 return begin, end
This page took 0.040136 seconds and 5 git commands to generate.