Source code for pooch.utils

"""
Misc utilities
"""
from pathlib import Path
import hashlib

import appdirs
from packaging.version import Version


[docs]def os_cache(project): r""" Default cache location based on the operating system. The folder locations are defined by the ``appdirs`` package. Usually, the locations will be following (see the `appdirs documentation <https://github.com/ActiveState/appdirs>`__): * Mac: ``~/Library/Application Support/<project>`` * Unix: ``~/.local/share/<project>`` or the value of the ``XDG_DATA_HOME`` environment variable, if defined. * Windows: ``C:\Users\<user>\AppData\Roaming\<project>\<project>`` Parameters ---------- project : str The project name. Returns ------- cache_path : :class:`pathlib.Path` The default location for the data cache. User directories (``'~'``) are not expanded. """ return Path(appdirs.user_cache_dir(project))
[docs]def file_hash(fname): """ Calculate the SHA256 hash of a given file. Useful for checking if a file has changed or been corrupted. Parameters ---------- fname : str The name of the file. Returns ------- hash : str The hash of the file. Examples -------- >>> fname = "test-file-for-hash.txt" >>> with open(fname, "w") as f: ... __ = f.write("content of the file") >>> print(file_hash(fname)) 0fc74468e6a9a829f103d069aeb2bb4f8646bad58bf146bb0e3379b759ec4a00 >>> import os >>> os.remove(fname) """ # Calculate the hash in chunks to avoid overloading the memory chunksize = 65536 hasher = hashlib.sha256() with open(fname, "rb") as fin: buff = fin.read(chunksize) while buff: hasher.update(buff) buff = fin.read(chunksize) return hasher.hexdigest()
[docs]def check_version(version, fallback="master"): """ Check that a version string is PEP440 compliant and there are no unreleased changes. For example, ``version = "0.1"`` will be returned as is but ``version = "0.1+10.8dl8dh9"`` will return the fallback. This is the convention used by `versioneer <https://github.com/warner/python-versioneer>`__ to mark that this version is 10 commits ahead of the last release. Parameters ---------- version : str A version string. fallback : str What to return if the version string has unreleased changes. Returns ------- version : str If *version* is PEP440 compliant and there are unreleased changes, then return *version*. Otherwise, return *fallback*. Raises ------ InvalidVersion If *version* is not PEP440 compliant. Examples -------- >>> check_version("0.1") '0.1' >>> check_version("0.1a10") '0.1a10' >>> check_version("0.1+111.9hdg36") 'master' >>> check_version("0.1+111.9hdg36", fallback="dev") 'dev' """ parse = Version(version) if parse.local is not None: return fallback return version
[docs]def make_registry(directory, output, recursive=True): """ Make a registry of files and hashes for the given directory. This is helpful if you have many files in your test dataset as it keeps you from needing to manually update the registry. Parameters ---------- directory : str Directory of the test data to put in the registry. All file names in the registry will be relative to this directory. output : str Name of the output registry file. recursive : bool If True, will recursively look for files in subdirectories of *directory*. """ directory = Path(directory) if recursive: pattern = "**/*" else: pattern = "*" files = sorted( [ str(path.relative_to(directory)) for path in directory.glob(pattern) if path.is_file() ] ) hashes = [file_hash(str(directory / fname)) for fname in files] with open(output, "w") as outfile: for fname, fhash in zip(files, hashes): # Only use Unix separators for the registry so that we don't go insane # dealing with file paths. outfile.write("{} {}\n".format(fname.replace("\\", "/"), fhash))