From e21e74d3d22701fa56664af9ddc050e717e88663 Mon Sep 17 00:00:00 2001 From: Simon Marchi Date: Mon, 30 Oct 2023 14:38:57 -0400 Subject: [PATCH] tests: retry os.rename on PermissionError failure in lttng_live_server.py On the Windows CI jobs, we get random failures like: # plugins/src.ctf.lttng-live/test-live.sh: python3 /c/Users/jenkins/workspace/dev_review_babeltrace_master_winbuild/build/std/conf/std/platform/msys2-mingw64/src/babeltrace/tests/data/plugins/src.ctf.lttng-live/lttng_live_server.py /c/Users/jenkins/workspace/dev_review_babeltrace_master_winbuild/build/std/conf/std/platform/msys2-mingw64/src/babeltrace/tests/data/plugins/src.ctf.lttng-live/inactivity-discarded-packet.json --port-file /c/Users/jenkins/workspace/dev_review_babeltrace_master_winbuild/build/std/conf/std/platform/msys2-mingw64/tmp/test-live-server-port.Rn2dyS --trace-path-prefix C:\Users\jenkins\workspace\dev_review_babeltrace_master_winbuild\build\std\conf\std\platform\msys2-mingw64\src\babeltrace\tests\data\ctf-traces Traceback (most recent call last): File "C:/Users/jenkins/workspace/dev_review_babeltrace_master_winbuild/build/std/conf/std/platform/msys2-mingw64/src/babeltrace/tests/data/plugins/src.ctf.lttng-live/lttng_live_server.py", line 1951, in LttngLiveServer(port, port_filename, sessions, max_query_data_response_size) File "C:/Users/jenkins/workspace/dev_review_babeltrace_master_winbuild/build/std/conf/std/platform/msys2-mingw64/src/babeltrace/tests/data/plugins/src.ctf.lttng-live/lttng_live_server.py", line 1667, in __init__ self._write_port_to_file(port_filename) File "C:/Users/jenkins/workspace/dev_review_babeltrace_master_winbuild/build/std/conf/std/platform/msys2-mingw64/src/babeltrace/tests/data/plugins/src.ctf.lttng-live/lttng_live_server.py", line 1792, in _write_port_to_file os.replace(tmp_port_file.name, port_filename) PermissionError: [WinError 5] Access is denied: 'C:/Users/jenkins/workspace/dev_review_babeltrace_master_winbuild/build/std/conf/std/platform/msys2-mingw64/tmp/tmpt13jh6sp' -> 'C:/Users/jenkins/workspace/dev_review_babeltrace_master_winbuild/build/std/conf/std/platform/msys2-mingw64/tmp/test-live-server-port.Rn2dyS' The PermissionError exception is raised when trying to move the port file from its temporary location to its final location, where the bash script expects it to appear. I don't understand the root cause of the issue. When exiting the `with` scope, the temporary file is supposed to be closed, and it should be fine to move it. I suppose it's possible that something in the Windows kernel hasn't completely finished using the file when we try to move it. Implement a wait-and-retry scheme as a (bad) workaround. Change-Id: Ia8dcefca9538aa5e58438bf84a3fa67e5e05a49a Signed-off-by: Simon Marchi Reviewed-on: https://review.lttng.org/c/babeltrace/+/11174 Reviewed-by: Philippe Proulx (cherry picked from commit 6362d281d2fce21fbb610bae0cabaee4f73c3415) Reviewed-on: https://review.lttng.org/c/babeltrace/+/11224 Tested-by: jenkins CI-Build: Michael Jeanson --- .../src.ctf.lttng-live/lttng_live_server.py | 43 ++++++++++++++++--- 1 file changed, 36 insertions(+), 7 deletions(-) diff --git a/tests/data/plugins/src.ctf.lttng-live/lttng_live_server.py b/tests/data/plugins/src.ctf.lttng-live/lttng_live_server.py index 91e1cc8a..573c47e3 100644 --- a/tests/data/plugins/src.ctf.lttng-live/lttng_live_server.py +++ b/tests/data/plugins/src.ctf.lttng-live/lttng_live_server.py @@ -26,6 +26,7 @@ import logging import os import os.path import re +import time import socket import struct import sys @@ -1344,13 +1345,41 @@ class LttngLiveServer: with tempfile.NamedTemporaryFile(mode='w', delete=False) as tmp_port_file: print(self._server_port, end='', file=tmp_port_file) - # Rename temporary file to real file - os.replace(tmp_port_file.name, port_filename) - logging.info( - 'Renamed port file: src-path="{}", dst-path="{}"'.format( - tmp_port_file.name, port_filename - ) - ) + # Rename temporary file to real file. + # + # For unknown reasons, on Windows, moving the port file from its + # temporary location to its final location (where the user of + # the server expects it to appear) may raise a `PermissionError` + # exception. + # + # We suppose it's possible that something in the Windows kernel + # hasn't completely finished using the file when we try to move + # it. + # + # Use a wait-and-retry scheme as a (bad) workaround. + num_attempts = 5 + retry_delay_s = 1 + + for attempt in reversed(range(num_attempts)): + try: + os.replace(tmp_port_file.name, port_filename) + logging.info( + 'Renamed port file: src-path="{}", dst-path="{}"'.format( + tmp_port_file.name, port_filename + ) + ) + return + except PermissionError: + logging.info( + 'Permission error while attempting to rename port file; retrying in {} second: src-path="{}", dst-path="{}"'.format( + retry_delay_s, tmp_port_file.name, port_filename + ) + ) + + if attempt == 0: + raise + + time.sleep(retry_delay_s) # A tracing session descriptor. -- 2.34.1