Replace superfluous math.pow uses by ** operator
[deliverable/lttng-analyses.git] / lttnganalyses / common / parse_utils.py
1 # The MIT License (MIT)
2 #
3 # Copyright (C) 2016 - Antoine Busque <abusque@efficios.com>
4 #
5 # Permission is hereby granted, free of charge, to any person obtaining a copy
6 # of this software and associated documentation files (the "Software"), to deal
7 # in the Software without restriction, including without limitation the rights
8 # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 # copies of the Software, and to permit persons to whom the Software is
10 # furnished to do so, subject to the following conditions:
11 #
12 # The above copyright notice and this permission notice shall be included in
13 # all copies or substantial portions of the Software.
14 #
15 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 # SOFTWARE.
22
23 import datetime
24 import re
25 from time import timezone
26 from . import trace_utils
27 from .time_utils import NSEC_PER_SEC
28
29
30 def _split_value_units(raw_str):
31 """Take a string with a numerical value and units, and separate the
32 two.
33
34 Args:
35 raw_str (str): the string to parse, with numerical value and
36 (optionally) units.
37
38 Returns:
39 A tuple (value, units), where value is a string and units is
40 either a string or `None` if no units were found.
41 """
42 try:
43 units_index = next(i for i, c in enumerate(raw_str) if c.isalpha())
44 except StopIteration:
45 # no units found
46 return (raw_str, None)
47
48 return (raw_str[:units_index], raw_str[units_index:])
49
50
51 def parse_size(size_str):
52 """Convert a human-readable size string to an integral number of
53 bytes.
54
55 Args:
56 size_str (str): the formatted string comprised of the size and
57 units.
58
59 Returns:
60 A number of bytes.
61
62 Raises:
63 ValueError: if units are unrecognised or the size is not a
64 real number.
65 """
66 binary_units = ['B', 'KiB', 'MiB', 'GiB', 'TiB',
67 'PiB', 'EiB', 'ZiB', 'YiB']
68 # units as printed by GNU coreutils (e.g. ls or du), using base
69 # 1024 as well
70 coreutils_units = ['B', 'K', 'M', 'G', 'T', 'P', 'E', 'Z', 'Y']
71 si_units = ['B', 'KB', 'MB', 'GB', 'TB', 'PB', 'EB', 'ZB', 'YB']
72
73 size, units = _split_value_units(size_str)
74
75 try:
76 size = float(size)
77 except ValueError:
78 raise ValueError('invalid size: {}'.format(size))
79
80 # If no units have been found, assume bytes
81 if units is not None:
82 if units in binary_units:
83 base = 1024
84 exponent = binary_units.index(units)
85 elif units in coreutils_units:
86 base = 1024
87 exponent = coreutils_units.index(units)
88 elif units in si_units:
89 base = 1000
90 exponent = si_units.index(units)
91 else:
92 raise ValueError('unrecognised units: {}'.format(units))
93
94 size *= base ** exponent
95
96 return int(size)
97
98
99 def parse_duration(duration_str):
100 """Convert a human-readable duration string to an integral number of
101 nanoseconds.
102
103 Args:
104 duration_str (str): the formatted string comprised of the
105 duration and units.
106
107 Returns:
108 A number of nanoseconds.
109
110 Raises:
111 ValueError: if units are unrecognised or the size is not a
112 real number.
113 """
114 base = 1000
115
116 try:
117 units_index = next(i for i, c in enumerate(duration_str)
118 if c.isalpha())
119 except StopIteration:
120 # no units found
121 units_index = None
122
123 if units_index is not None:
124 duration = duration_str[:units_index]
125 units = duration_str[units_index:].lower()
126 else:
127 duration = duration_str
128 units = None
129
130 try:
131 duration = float(duration)
132 except ValueError:
133 raise ValueError('invalid duration: {}'.format(duration))
134
135 if units is not None:
136 if units == 's':
137 exponent = 3
138 elif units == 'ms':
139 exponent = 2
140 elif units in ['us', 'µs']:
141 exponent = 1
142 elif units == 'ns':
143 exponent = 0
144 else:
145 raise ValueError('unrecognised units: {}'.format(units))
146 else:
147 # no units defaults to seconds
148 exponent = 3
149
150 duration *= base ** exponent
151
152 return int(duration)
153
154
155 def _parse_date_full_with_nsec(date):
156 """Parse full date string with nanosecond resolution.
157
158 This matches either 2014-12-12 17:29:43.802588035 or
159 2014-12-12T17:29:43.802588035.
160
161 Args:
162 date (str): the date string to be parsed.
163
164 Returns:
165 A tuple of the format (date_time, nsec), where date_time is a
166 datetime.datetime object and nsec is an int of the remaining
167 nanoseconds.
168
169 Raises:
170 ValueError: if the date format does not match.
171 """
172 pattern = re.compile(
173 r'^(?P<year>\d{4})-(?P<mon>[01]\d)-(?P<day>[0-3]\d)[\sTt]'
174 r'(?P<hour>\d{2}):(?P<min>\d{2}):(?P<sec>\d{2})\.(?P<nsec>\d{9})$'
175 )
176
177 if not pattern.match(date):
178 raise ValueError('Wrong date format: {}'.format(date))
179
180 year = pattern.search(date).group('year')
181 month = pattern.search(date).group('mon')
182 day = pattern.search(date).group('day')
183 hour = pattern.search(date).group('hour')
184 minute = pattern.search(date).group('min')
185 sec = pattern.search(date).group('sec')
186 nsec = pattern.search(date).group('nsec')
187
188 date_time = datetime.datetime(
189 int(year), int(month), int(day),
190 int(hour), int(minute), int(sec)
191 )
192
193 return date_time, int(nsec)
194
195
196 def _parse_date_full(date):
197 """Parse full date string.
198
199 This matches either 2014-12-12 17:29:43 or 2014-12-12T17:29:43.
200
201 Args:
202 date (str): the date string to be parsed.
203
204 Returns:
205 A tuple of the format (date_time, nsec), where date_time is a
206 datetime.datetime object and nsec is 0.
207
208 Raises:
209 ValueError: if the date format does not match.
210 """
211 pattern = re.compile(
212 r'^(?P<year>\d{4})-(?P<mon>[01]\d)-(?P<day>[0-3]\d)[\sTt]'
213 r'(?P<hour>\d{2}):(?P<min>\d{2}):(?P<sec>\d{2})$'
214 )
215
216 if not pattern.match(date):
217 raise ValueError('Wrong date format: {}'.format(date))
218
219 year = pattern.search(date).group('year')
220 month = pattern.search(date).group('mon')
221 day = pattern.search(date).group('day')
222 hour = pattern.search(date).group('hour')
223 minute = pattern.search(date).group('min')
224 sec = pattern.search(date).group('sec')
225 nsec = 0
226
227 date_time = datetime.datetime(
228 int(year), int(month), int(day),
229 int(hour), int(minute), int(sec)
230 )
231
232 return date_time, nsec
233
234
235 def _parse_date_time_with_nsec(date):
236 """Parse time string with nanosecond resolution.
237
238 This matches 17:29:43.802588035.
239
240 Args:
241 date (str): the date string to be parsed.
242
243 Returns:
244 A tuple of the format (date_time, nsec), where date_time is a
245 datetime.time object and nsec is an int of the remaining
246 nanoseconds.
247
248 Raises:
249 ValueError: if the date format does not match.
250 """
251 pattern = re.compile(
252 r'^(?P<hour>\d{2}):(?P<min>\d{2}):(?P<sec>\d{2})\.(?P<nsec>\d{9})$'
253 )
254
255 if not pattern.match(date):
256 raise ValueError('Wrong date format: {}'.format(date))
257
258 hour = pattern.search(date).group('hour')
259 minute = pattern.search(date).group('min')
260 sec = pattern.search(date).group('sec')
261 nsec = pattern.search(date).group('nsec')
262
263 time = datetime.time(int(hour), int(minute), int(sec))
264
265 return time, int(nsec)
266
267
268 def _parse_date_time(date):
269 """Parse time string.
270
271 This matches 17:29:43.
272
273 Args:
274 date (str): the date string to be parsed.
275
276 Returns:
277 A tuple of the format (date_time, nsec), where date_time is a
278 datetime.time object and nsec is 0.
279
280 Raises:
281 ValueError: if the date format does not match.
282 """
283 pattern = re.compile(
284 r'^(?P<hour>\d{2}):(?P<min>\d{2}):(?P<sec>\d{2})$'
285 )
286
287 if not pattern.match(date):
288 raise ValueError('Wrong date format: {}'.format(date))
289
290 hour = pattern.search(date).group('hour')
291 minute = pattern.search(date).group('min')
292 sec = pattern.search(date).group('sec')
293 nsec = 0
294
295 time = datetime.time(int(hour), int(minute), int(sec))
296
297 return time, nsec
298
299
300 def _parse_date_timestamp(date):
301 """Parse timestamp string in nanoseconds from epoch.
302
303 This matches 1418423383802588035.
304
305 Args:
306 date (str): the date string to be parsed.
307
308 Returns:
309 A tuple of the format (date_time, nsec), where date_time is a
310 datetime.datetime object and nsec is an int of the remaining
311 nanoseconds.
312
313 Raises:
314 ValueError: if the date format does not match.
315 """
316 pattern = re.compile(r'^\d+$')
317
318 if not pattern.match(date):
319 raise ValueError('Wrong date format: {}'.format(date))
320
321 timestamp_ns = int(date)
322
323 date_time = datetime.datetime.fromtimestamp(
324 timestamp_ns / NSEC_PER_SEC
325 )
326 nsec = timestamp_ns % NSEC_PER_SEC
327
328 return date_time, nsec
329
330
331 def parse_date(date):
332 """Try to parse a date string from one of many formats.
333
334 Args:
335 date (str): the date string to be parsed.
336
337 Returns:
338 A tuple of the format (date_time, nsec), where date_time is
339 one of either datetime.datetime or datetime.time, depending on
340 whether the date string contains full date information or only
341 the time of day. The latter case can still be useful when used
342 in conjuction with a trace collection's date to provide the
343 missing information. The nsec element of the tuple is an int and
344 corresponds to the nanoseconds for the given date/timestamp.
345 This is due to datetime objects only supporting a resolution
346 down to the microsecond.
347
348 Raises:
349 ValueError: if the date does not correspond to any of the
350 supported formats.
351 """
352 parsers = [
353 _parse_date_full_with_nsec, _parse_date_full,
354 _parse_date_time_with_nsec, _parse_date_time,
355 _parse_date_timestamp
356 ]
357
358 date_time = None
359 nsec = None
360
361 for parser in parsers:
362 try:
363 (date_time, nsec) = parser(date)
364 except ValueError:
365 continue
366
367 # If no exception was raised, the parser found a match, so
368 # stop iterating
369 break
370
371 if date_time is None or nsec is None:
372 # None of the parsers were a match
373 raise ValueError('Unrecognised date format: {}'.format(date))
374
375 return date_time, nsec
376
377
378 def parse_trace_collection_date(collection, date, gmt=False):
379 """Parse a date string, using a trace collection to disambiguate
380 incomplete dates.
381
382 Args:
383 collection (TraceCollection): a babeltrace TraceCollection
384 instance.
385
386 date (string): the date string to be parsed.
387
388 gmt (bool, optional): flag indicating whether the timestamp is
389 in the local timezone or gmt (default: False).
390
391 Returns:
392 A timestamp (int) in nanoseconds since epoch, corresponding to
393 the parsed date.
394
395 Raises:
396 ValueError: if the date format is unrecognised, or if the date
397 format does not specify the date and the trace collection spans
398 multiple days.
399 """
400 try:
401 date_time, nsec = parse_date(date)
402 except ValueError:
403 # This might raise ValueError if the date is in an invalid
404 # format, so just re-raise the exception to inform the caller
405 # of the problem.
406 raise
407
408 # date_time will either be an actual datetime.datetime object, or
409 # just a datetime.time object, depending on the format. In the
410 # latter case, try and fill out the missing date information from
411 # the trace collection's date.
412 if isinstance(date_time, datetime.time):
413 try:
414 collection_date = trace_utils.get_trace_collection_date(collection)
415 except ValueError:
416 raise ValueError(
417 'Invalid date format for multi-day trace: {}'.format(date)
418 )
419
420 date_time = datetime.datetime.combine(collection_date, date_time)
421
422 if gmt:
423 date_time = date_time + datetime.timedelta(seconds=timezone)
424
425 timestamp_ns = date_time.timestamp() * NSEC_PER_SEC + nsec
426
427 return timestamp_ns
428
429
430 def parse_trace_collection_time_range(collection, time_range, gmt=False):
431 """Parse a time range string, using a trace collection to
432 disambiguate incomplete dates.
433
434 Args:
435 collection (TraceCollection): a babeltrace TraceCollection
436 instance.
437
438 time_range (string): the time range string to be parsed.
439
440 gmt (bool, optional): flag indicating whether the timestamps are
441 in the local timezone or gmt (default: False).
442
443 Returns:
444 A tuple (begin, end) of the two timestamps (int) in nanoseconds
445 since epoch, corresponding to the parsed dates.
446
447 Raises:
448 ValueError: if the time range or date format is unrecognised,
449 or if the date format does not specify the date and the trace
450 collection spans multiple days.
451 """
452 pattern = re.compile(r'^\[(?P<begin>.*),(?P<end>.*)\]$')
453 if not pattern.match(time_range):
454 raise ValueError('Invalid time range format: {}'.format(time_range))
455
456 begin_str = pattern.search(time_range).group('begin').strip()
457 end_str = pattern.search(time_range).group('end').strip()
458
459 try:
460 begin = parse_trace_collection_date(collection, begin_str, gmt)
461 end = parse_trace_collection_date(collection, end_str, gmt)
462 except ValueError:
463 # Either of the dates was in the wrong format, propagate the
464 # exception to the caller.
465 raise
466
467 return begin, end
This page took 0.040448 seconds and 5 git commands to generate.