diff --git a/package/debian13/control b/package/debian13/control index 1d4b2a2441..6e1032de26 100644 --- a/package/debian13/control +++ b/package/debian13/control @@ -36,6 +36,7 @@ Build-Depends: cython3 (>= 0.23.2), python3-sphinx, python3-sphinx-copybutton, python3-sphinxcontrib.programoutput, + python3-filelock, xauth, xvfb Standards-Version: 4.1.3 @@ -74,7 +75,10 @@ Description: Toolbox for X-Ray data analysis - Executables Package: python3-silx Architecture: any Section: python -Depends: ${misc:Depends}, ${python3:Depends}, ${shlibs:Depends} +Depends: ${misc:Depends}, + ${python3:Depends}, + ${shlibs:Depends}, + python3-filelock Description: Toolbox for X-Ray data analysis - Python3 The silx project aims at providing a collection of Python packages to support the development of data assessment, reduction and analysis diff --git a/pyproject.toml b/pyproject.toml index 4b57e54503..45de5d6beb 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -36,6 +36,7 @@ dependencies = [ 'h5py >= 3', 'fabio', 'pydantic >= 2', + 'filelock', ] [build-system] diff --git a/src/silx/_version.py b/src/silx/_version.py index 6492dc9820..652e9025d6 100755 --- a/src/silx/_version.py +++ b/src/silx/_version.py @@ -71,8 +71,8 @@ MAJOR = 3 MINOR = 0 -MICRO = 1 -RELEV = "final" # <16 +MICRO = 2 +RELEV = "dev" # <16 SERIAL = 0 # <16 date = __date__ diff --git a/src/silx/utils/ExternalResources.py b/src/silx/utils/ExternalResources.py index 258306f113..6a93ec04e5 100644 --- a/src/silx/utils/ExternalResources.py +++ b/src/silx/utils/ExternalResources.py @@ -25,7 +25,7 @@ __authors__ = ["Thomas Vincent", "J. Kieffer"] __license__ = "MIT" -__date__ = "21/12/2021" +__date__ = "12/05/2026" import hashlib @@ -34,12 +34,14 @@ import os import sys import tarfile -import threading import tempfile +import threading +import time import unittest import urllib.request import urllib.error import zipfile +import filelock logger = logging.getLogger(__name__) @@ -74,6 +76,13 @@ def __init__(self, project, url_base, env_key=None, timeout=60, data_home=None): self.all_data = {} self.timeout = timeout self._data_home = data_home + self.testdata = "" + self.lock = None + + @property + def lockfile(self): + """Returns the lockfile path.""" + return self.testdata + ".lock" @property def data_home(self): @@ -101,8 +110,14 @@ def data_home(self): basename = f"{self.project}_testdata_{name}" data_home = os.path.join(tempfile.gettempdir(), basename) if not os.path.exists(data_home): - os.makedirs(data_home) + try: + os.makedirs(data_home) + except OSError as exc: + raise RuntimeError( + f"Unable to create data directory {data_home} ! ({exc})" + ) self._data_home = data_home + return data_home def get_hash(self, filename=None, data=None): @@ -126,15 +141,8 @@ def _initialize_data(self): with self.sem: if not self._initialized: self.testdata = os.path.join(self.data_home, "all_testdata.json") - if os.path.exists(self.testdata): - with open(self.testdata) as f: - jdata = json.load(f) - if isinstance(jdata, dict): - self.all_data = jdata - else: - # recalculate the hash only if the data was stored as a list - self.all_data = {k: self.get_hash(k) for k in jdata} - self.save_json() + self.lock = filelock.FileLock(self.lockfile, timeout=self.timeout) + self.all_data = self.load_json() self._initialized = True def getfile(self, filename): @@ -151,7 +159,34 @@ def getfile(self, filename): fullfilename = os.path.abspath(os.path.join(self.data_home, filename)) - if not os.path.isfile(fullfilename): + if os.path.isfile(fullfilename): + if filename not in self.all_data: + """File already exists but is not in the list of known files""" + time_out = time.perf_counter() + self.timeout + while time.perf_counter() < time_out: + dico = self.load_json() + if filename in dico: + dico.update(self.all_data) + self.all_data = dico + break + time.sleep(1) + else: + logger.warning( + f"Timeout! Filename {filename} not present in all_data:{os.linesep}{json.dumps(self.all_data, indent=2)}" + ) + os.remove(fullfilename) + return self.getfile(filename) + + h = self.hash() + with open(fullfilename, mode="rb") as fd: + h.update(fd.read()) + + if h.hexdigest() != self.all_data[filename]: + logger.warning(f"Detected corrupted file {fullfilename} !") + self.all_data.pop(filename) + os.unlink(fullfilename) + return self.getfile(filename) + else: logger.debug( "Trying to download file %s, timeout set to %ss", filename, @@ -174,53 +209,68 @@ def getfile(self, filename): data = opener( f"{self.url_base}/{filename}", data=None, timeout=self.timeout ).read() - logger.info("File %s successfully downloaded.", filename) + logger.info(f"File {filename} successfully downloaded.") except urllib.error.URLError: raise unittest.SkipTest("network unreachable.") - if not os.path.isdir(os.path.dirname(fullfilename)): - # Create sub-directory if needed - os.makedirs(os.path.dirname(fullfilename)) + dirname = os.path.dirname(fullfilename) + if not os.path.isdir(dirname): + """Create sub-directory if needed""" + os.makedirs(dirname) try: with open(fullfilename, mode="wb") as outfile: outfile.write(data) except OSError: - raise OSError("unable to write downloaded \ - data to disk at %s" % self.data_home) + raise OSError(f"unable to write downloaded \ + data to disk at {fullfilename}") - if not os.path.isfile(fullfilename): + if os.path.isfile(fullfilename): + self.all_data[filename] = self.get_hash(data=data) + self.save_json() + else: raise RuntimeError("""Could not automatically download test files %s! If you are behind a firewall, please set both environment variable http_proxy and https_proxy. This even works under windows ! Otherwise please try to download the files manually from %s/%s""" % (filename, self.url_base, filename)) - else: - self.all_data[filename] = self.get_hash(data=data) - self.save_json() - - else: - h = self.hash() - with open(fullfilename, mode="rb") as fd: - h.update(fd.read()) - if h.hexdigest() != self.all_data[filename]: - logger.warning(f"Detected corruped file {fullfilename}") - self.all_data.pop(filename) - os.unlink(fullfilename) - return self.getfile(filename) - return fullfilename + def load_json(self) -> dict: + """Loads the JSON file containing the list of files and their hashes""" + all_data = {} + if self.testdata and os.path.exists(self.testdata): + try: + with self.lock: + with open(self.testdata) as f: + jdata = json.load(f) + except filelock.Timeout: + logger.error("Unable to lock JSON file") + jdata = {} + if isinstance(jdata, dict): + all_data = jdata + else: + # recalculate the hash only if the data was stored as a list + all_data = {k: self.get_hash(k) for k in jdata} + return all_data + def save_json(self): - file_list = list(self.all_data.keys()) + """Saves the JSON file containing the list of files and their hashes""" + dico = self.load_json() + dico.update(self.all_data) + file_list = list(dico.keys()) file_list.sort() - dico = {i: self.all_data[i] for i in file_list} + dico = {i: dico[i] for i in file_list} # reorder items + try: - with open(self.testdata, "w") as fp: - json.dump(dico, fp, indent=4) + with self.lock: + with open(self.testdata, "w") as fp: + json.dump(dico, fp, indent=4) + except filelock.Timeout: + logger.error("Unable to lock JSON file") except OSError: - logger.info("Unable to save JSON dict") + logger.error("Unable to save JSON dict") def getdir(self, dirname): """Downloads the requested tarball from the server