Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion package/debian13/control
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ Build-Depends: cython3 (>= 0.23.2),
python3-sphinx,
python3-sphinx-copybutton,
python3-sphinxcontrib.programoutput,
python3-filelock,
xauth,
xvfb
Standards-Version: 4.1.3
Expand Down Expand Up @@ -74,7 +75,10 @@ Description: Toolbox for X-Ray data analysis - Executables
Package: python3-silx
Architecture: any
Section: python
Depends: ${misc:Depends}, ${python3:Depends}, ${shlibs:Depends}
Depends: ${misc:Depends},
${python3:Depends},
${shlibs:Depends},
python3-filelock
Description: Toolbox for X-Ray data analysis - Python3
The silx project aims at providing a collection of Python packages to
support the development of data assessment, reduction and analysis
Expand Down
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ dependencies = [
'h5py >= 3',
'fabio',
'pydantic >= 2',
'filelock',
]

[build-system]
Expand Down
4 changes: 2 additions & 2 deletions src/silx/_version.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,8 +71,8 @@

MAJOR = 3
MINOR = 0
MICRO = 1
RELEV = "final" # <16
MICRO = 2
RELEV = "dev" # <16
SERIAL = 0 # <16

date = __date__
Expand Down
128 changes: 89 additions & 39 deletions src/silx/utils/ExternalResources.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@

__authors__ = ["Thomas Vincent", "J. Kieffer"]
__license__ = "MIT"
__date__ = "21/12/2021"
__date__ = "12/05/2026"


import hashlib
Expand All @@ -34,12 +34,14 @@
import os
import sys
import tarfile
import threading
import tempfile
import threading
import time
import unittest
import urllib.request
import urllib.error
import zipfile
import filelock

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -74,6 +76,13 @@ def __init__(self, project, url_base, env_key=None, timeout=60, data_home=None):
self.all_data = {}
self.timeout = timeout
self._data_home = data_home
self.testdata = ""
self.lock = None

@property
def lockfile(self):
"""Returns the lockfile path."""
return self.testdata + ".lock"

@property
def data_home(self):
Expand Down Expand Up @@ -101,8 +110,14 @@ def data_home(self):
basename = f"{self.project}_testdata_{name}"
data_home = os.path.join(tempfile.gettempdir(), basename)
if not os.path.exists(data_home):
os.makedirs(data_home)
try:
os.makedirs(data_home)
except OSError as exc:
raise RuntimeError(
f"Unable to create data directory {data_home} ! ({exc})"
)
self._data_home = data_home

return data_home

def get_hash(self, filename=None, data=None):
Expand All @@ -126,15 +141,8 @@ def _initialize_data(self):
with self.sem:
if not self._initialized:
self.testdata = os.path.join(self.data_home, "all_testdata.json")
if os.path.exists(self.testdata):
with open(self.testdata) as f:
jdata = json.load(f)
if isinstance(jdata, dict):
self.all_data = jdata
else:
# recalculate the hash only if the data was stored as a list
self.all_data = {k: self.get_hash(k) for k in jdata}
self.save_json()
self.lock = filelock.FileLock(self.lockfile, timeout=self.timeout)
self.all_data = self.load_json()
self._initialized = True

def getfile(self, filename):
Expand All @@ -151,7 +159,34 @@ def getfile(self, filename):

fullfilename = os.path.abspath(os.path.join(self.data_home, filename))

if not os.path.isfile(fullfilename):
if os.path.isfile(fullfilename):
if filename not in self.all_data:
"""File already exists but is not in the list of known files"""
time_out = time.perf_counter() + self.timeout
while time.perf_counter() < time_out:
dico = self.load_json()
if filename in dico:
dico.update(self.all_data)
self.all_data = dico
break
time.sleep(1)
else:
logger.warning(
f"Timeout! Filename {filename} not present in all_data:{os.linesep}{json.dumps(self.all_data, indent=2)}"
)
os.remove(fullfilename)
return self.getfile(filename)

h = self.hash()
with open(fullfilename, mode="rb") as fd:
h.update(fd.read())

if h.hexdigest() != self.all_data[filename]:
logger.warning(f"Detected corrupted file {fullfilename} !")
self.all_data.pop(filename)
os.unlink(fullfilename)
return self.getfile(filename)
else:
logger.debug(
"Trying to download file %s, timeout set to %ss",
filename,
Expand All @@ -174,53 +209,68 @@ def getfile(self, filename):
data = opener(
f"{self.url_base}/{filename}", data=None, timeout=self.timeout
).read()
logger.info("File %s successfully downloaded.", filename)
logger.info(f"File {filename} successfully downloaded.")
except urllib.error.URLError:
raise unittest.SkipTest("network unreachable.")

if not os.path.isdir(os.path.dirname(fullfilename)):
# Create sub-directory if needed
os.makedirs(os.path.dirname(fullfilename))
dirname = os.path.dirname(fullfilename)
if not os.path.isdir(dirname):
"""Create sub-directory if needed"""
os.makedirs(dirname)

try:
with open(fullfilename, mode="wb") as outfile:
outfile.write(data)
except OSError:
raise OSError("unable to write downloaded \
data to disk at %s" % self.data_home)
raise OSError(f"unable to write downloaded \
data to disk at {fullfilename}")

if not os.path.isfile(fullfilename):
if os.path.isfile(fullfilename):
self.all_data[filename] = self.get_hash(data=data)
self.save_json()
else:
raise RuntimeError("""Could not automatically download test files %s!
If you are behind a firewall, please set both environment variable
http_proxy and https_proxy.
This even works under windows !
Otherwise please try to download the files manually from
%s/%s""" % (filename, self.url_base, filename))
else:
self.all_data[filename] = self.get_hash(data=data)
self.save_json()

else:
h = self.hash()
with open(fullfilename, mode="rb") as fd:
h.update(fd.read())
if h.hexdigest() != self.all_data[filename]:
logger.warning(f"Detected corruped file {fullfilename}")
self.all_data.pop(filename)
os.unlink(fullfilename)
return self.getfile(filename)

return fullfilename

def load_json(self) -> dict:
"""Loads the JSON file containing the list of files and their hashes"""
all_data = {}
if self.testdata and os.path.exists(self.testdata):
try:
with self.lock:
with open(self.testdata) as f:
jdata = json.load(f)
except filelock.Timeout:
logger.error("Unable to lock JSON file")
jdata = {}
if isinstance(jdata, dict):
all_data = jdata
else:
# recalculate the hash only if the data was stored as a list
all_data = {k: self.get_hash(k) for k in jdata}
return all_data

def save_json(self):
file_list = list(self.all_data.keys())
"""Saves the JSON file containing the list of files and their hashes"""
dico = self.load_json()
dico.update(self.all_data)
file_list = list(dico.keys())
file_list.sort()
dico = {i: self.all_data[i] for i in file_list}
dico = {i: dico[i] for i in file_list} # reorder items

try:
with open(self.testdata, "w") as fp:
json.dump(dico, fp, indent=4)
with self.lock:
with open(self.testdata, "w") as fp:
json.dump(dico, fp, indent=4)
except filelock.Timeout:
logger.error("Unable to lock JSON file")
except OSError:
logger.info("Unable to save JSON dict")
logger.error("Unable to save JSON dict")

def getdir(self, dirname):
"""Downloads the requested tarball from the server
Expand Down
Loading