gnu: python-bed-reader: Fix build and enable tests.

* gnu/packages/bioinformatics.scm (python-bed-reader): Fix build and enable tests.
[source]{snippet}: Delete bundled website-related javascript.
{patches}: Use the store-cached instead of pooch-cached samples.
[arguments]{tests?}: Enable them.
{cargo-test-flags}: Skip doc tests. Skip failing tests.
{cargo-inputs}: Improve style.
{cargo-development-inputs}: Improve style.
{phases}: Add phases 'set-data-path, 'patch-data-path to use
store-cached samples for library and tests.  Rewrite phase
'prepare-python-module to rely more on the existing info in
pyproject.toml.  Rewrite phase 'check-python entirely, and
marginaly rewrite phase 'install-python-library to match 'check-python
phase style.
{modules}: Adapt accordingly.
{propagated-inputs}: Remove python-pooch. Add python-scipy.

(bed-sample-files): Add origin, used in python-bed-reader.

* gnu/packages/patches/python-bed-reader-use-store-samples.patch: Add
patch.
* gnu/local.mk: Record patch.

Signed-off-by: Sharlatan Hellseher <sharlatanus@gmail.com>
This commit is contained in:
Nicolas Graves 2025-04-04 21:16:55 +02:00 committed by Andreas Enge
parent e8e46bbeb5
commit db14ce5c4d
No known key found for this signature in database
GPG key ID: F7D5C9BF765C61E3
3 changed files with 284 additions and 59 deletions

View file

@ -2009,6 +2009,7 @@ dist_patch_DATA = \
%D%/packages/patches/python-3.12-fix-tests.patch \
%D%/packages/patches/python-accupy-use-matplotx.patch \
%D%/packages/patches/python-accupy-fix-use-of-perfplot.patch \
%D%/packages/patches/python-bed-reader-use-store-samples.patch \
%D%/packages/patches/python-chai-drop-python2.patch \
%D%/packages/patches/python-clarabel-blas.patch \
%D%/packages/patches/python-docrepr-fix-tests.patch \

View file

@ -2185,6 +2185,20 @@ Format (GFF) with Biopython integration.")
(modify-inputs (package-propagated-inputs python-bcbio-gff)
(replace "python-biopython" python-biopython-1.73))))))
(define bed-sample-files
(let* ((name "bed-sample-files")
(commit "a06dc0450e484090f15656ffd5d317813a5e1e01")
(revision "0")
(version (git-version "0.0.0" revision commit)))
(origin
(method git-fetch)
(uri (git-reference
(url "https://github.com/fastlmm/bed-sample-files")
(commit commit)))
(file-name (git-file-name name version))
(sha256
(base32 "1ldr2lvgbcykxa9i2s2298mhfh0sz96aaxs5dx217aipa9vsrjwk")))))
(define-public python-bed-reader
(package
(name "python-bed-reader")
@ -2194,80 +2208,124 @@ Format (GFF) with Biopython integration.")
(method url-fetch)
(uri (pypi-uri "bed_reader" version))
(sha256
(base32 "1c8ibwvz3b069w7ffh9aasz16lfkmx4z0249c2v909a21mrkkd6n"))))
(base32 "1c8ibwvz3b069w7ffh9aasz16lfkmx4z0249c2v909a21mrkkd6n"))
(modules '((guix build utils)))
;; Bundled unused javascript & co.
(snippet #~(delete-file-recursively "_static"))
(patches
(search-patches "python-bed-reader-use-store-samples.patch"))))
(build-system cargo-build-system)
(arguments
(list
;; Many of the tests (both the Rust tests and the Python tests) require
;; Internet access to fetch samples.
#:tests? #false
#:install-source? #false
#:features '(list "extension-module")
#:cargo-test-flags '(list "--features=extension-module")
#:cargo-test-flags
'(list "--features=extension-module"
;; Skip doc tests.
"--lib" "--bins" "--tests" "--"
;; This test is the only one not matched by our regexp.
"--skip=http_one"
;; These test require a 84 GB file.
"--skip=http_two"
"--skip=http_cloud_urls_md_3")
#:cargo-inputs
`(("rust-anyinput" ,rust-anyinput-0.1)
("rust-bytecount" ,rust-bytecount-0.6)
("rust-byteorder" ,rust-byteorder-1)
("rust-bytes" ,rust-bytes-1)
("rust-cloud-file" ,rust-cloud-file-0.2)
("rust-derive-builder" ,rust-derive-builder-0.20)
("rust-dpc-pariter" ,rust-dpc-pariter-0.4)
("rust-fetch-data" ,rust-fetch-data-0.2)
("rust-futures-util" ,rust-futures-util-0.3)
("rust-itertools" ,rust-itertools-0.13)
("rust-ndarray" ,rust-ndarray-0.16)
("rust-ndarray-npy" ,rust-ndarray-npy-0.9)
("rust-num-traits" ,rust-num-traits-0.2)
("rust-numpy" ,rust-numpy-0.22)
("rust-pyo3" ,rust-pyo3-0.22)
("rust-pyo3-build-config" ,rust-pyo3-build-config-0.22)
("rust-rayon" ,rust-rayon-1)
("rust-statrs" ,rust-statrs-0.17)
("rust-thiserror" ,rust-thiserror-1)
("rust-tokio" ,rust-tokio-1))
(list rust-anyinput-0.1
rust-bytecount-0.6
rust-byteorder-1
rust-bytes-1
rust-cloud-file-0.2
rust-derive-builder-0.20
rust-dpc-pariter-0.4
rust-fetch-data-0.2
rust-futures-util-0.3
rust-itertools-0.13
rust-ndarray-0.16
rust-ndarray-npy-0.9
rust-num-traits-0.2
rust-numpy-0.22
rust-pyo3-0.22
rust-pyo3-build-config-0.22
rust-rayon-1
rust-statrs-0.17
rust-thiserror-1
rust-tokio-1)
#:cargo-development-inputs
`(("rust-anyhow" ,rust-anyhow-1)
("rust-ndarray-rand" ,rust-ndarray-rand-0.15)
("rust-rusoto-credential" ,rust-rusoto-credential-0.48)
("rust-temp-testdir" ,rust-temp-testdir-0.2)
("rust-thousands" ,rust-thousands-0.2))
(list rust-anyhow-1
rust-ndarray-rand-0.15
rust-rusoto-credential-0.48
rust-temp-testdir-0.2
rust-thousands-0.2)
#:imported-modules
(append %cargo-build-system-modules
%pyproject-build-system-modules)
#:modules
'((guix build cargo-build-system)
((guix build pyproject-build-system) #:prefix py:)
(guix build utils))
(guix build utils)
(ice-9 match)
(ice-9 rdelim))
#:phases
#~(modify-phases %standard-phases
(add-after 'configure 'set-data-path
(lambda _
;; This var is still necessary despite the patch-data-path phase.
;; Otherwise more tests fail with a read-only filesystem error.
(setenv "BED_READER_DATA_DIR" #+bed-sample-files)))
(add-after 'unpack 'patch-data-path
(lambda _
;; If BED_READER_DATA_DIR is unset, default to bed-sample-files.
(substitute* "bed_reader/_sample_data.py"
(("os\\.environ\\.get\\(\"BED_READER_DATA_DIR\"" all)
(format #f "~a, ~s" all #+bed-sample-files)))
;; XXX: More work is necessary to use another
;; version of sample files with BED_READER_DATA_DIR
;; Currently, only the hardcoded Guix version is working.
(substitute* '("bed_reader/tests/test_open_bed_cloud.py"
"src/bed_cloud.rs"
"src/lib.rs"
"src/supplemental_documents/cloud_urls_etc.md"
"tests/tests_api_cloud.rs")
(("\
https://raw\\.githubusercontent\\.com/fastlmm/bed-sample-files/main")
(string-append "file://" #+bed-sample-files)))
(substitute* "src/tests.rs"
(("bed_reader/tests/data")
#+bed-sample-files))))
(add-after 'install 'prepare-python-module
(lambda _
;; We don't use maturin.
(delete-file "pyproject.toml")
(call-with-output-file "pyproject.toml"
(lambda (port)
(format port "\
;; We don't use maturin. Conveniently, what we want to drop
;; from pyproject.toml is at the end of the file.
(rename-file "pyproject.toml" "pyproject.toml.bak")
(call-with-input-file "pyproject.toml.bak"
(lambda (in)
(call-with-output-file "pyproject.toml"
(lambda (out)
(let loop ()
(match (read-line in)
((? eof-object? eof)
eof)
("[build-system]"
(and (format out "\
[build-system]
build-backend = 'setuptools.build_meta'
requires = ['setuptools']
[tool.setuptools.packages.find]
where = [\".\"]
exclude = [\"src\", \"docs\", \"tests\", \"Cargo.toml\"]
")))
(call-with-output-file "setup.cfg"
(lambda (port)
(format port "\
[metadata]
name = bed-reader
version = ~a
[options]
packages = find:
[options.packages.find]
exclude =
src
docs
tests
Cargo.toml
" #$version)))))
("samples = [\"pooch>=1.5.0\"]"
(and (format out "samples = []~%")
(loop)))
("[project]"
(and (format out "\
[project]
version = ~s
" #$version)
(loop)))
(line
(and (format out "~a~%" line)
(loop)))))))))))
(add-after 'prepare-python-module 'enable-bytecode-determinism
(assoc-ref py:%standard-phases 'enable-bytecode-determinism))
(add-after 'enable-bytecode-determinism 'build-python-module
@ -2279,16 +2337,35 @@ exclude =
(let ((site (string-append #$output "/lib/python"
#$(version-major+minor
(package-version python))
"/site-packages")))
(mkdir-p site)
"/site-packages/")))
(mkdir-p (string-append site "bed_reader"))
(copy-file "target/release/libbed_reader.so"
(string-append site "/bed_reader/bed_reader.so")))))
(string-append site "bed_reader/bed_reader.so")))))
(add-after 'install-python-library 'add-install-to-pythonpath
(assoc-ref py:%standard-phases 'add-install-to-pythonpath))
(add-after 'add-install-to-pythonpath 'check-python
(lambda* (#:key tests? test-flags #:allow-other-keys)
(lambda* (#:key tests? #:allow-other-keys)
(when tests?
(apply invoke "pytest" "-v" #$output test-flags)))))))
(let ((site (string-append #$output "/lib/python"
#$(version-major+minor
(package-version python))
"/site-packages/"))
(data-dir "bed_reader/tests/data"))
(symlink (canonicalize-path data-dir)
(string-append site data-dir))
(invoke "pytest" "-v" #$output
;; These test require a 84 GB file.
"-k" (string-join
(list "not test_http_two"
"test_http_cloud_urls_rst_3"
"test_http_cloud_urls_rst_4"
;; XXX: python-pooch dependency removed
"test_optional_dependencies")
" and not "))
(delete-file-recursively
(string-append site "bed_reader/tests"))
(delete-file-recursively
(string-append #$output "/.pytest_cache")))))))))
(native-inputs (list python-pytest
python-pytest-cov
python-pytest-datadir
@ -2296,7 +2373,7 @@ exclude =
python-recommonmark
python-sphinx))
(inputs (list python-wrapper))
(propagated-inputs (list python-numpy python-pandas python-pooch))
(propagated-inputs (list python-numpy python-pandas python-scipy))
(home-page "https://fastlmm.github.io/")
(synopsis "Read and write the PLINK BED format, simply and efficiently")
(description

View file

@ -0,0 +1,147 @@
From 7e6bcdfeed54500ca533d2f0eb12078248c43c77 Mon Sep 17 00:00:00 2001
Message-ID: <7e6bcdfeed54500ca533d2f0eb12078248c43c77.1743682382.git.ngraves@ngraves.fr>
From: Nicolas Graves <ngraves@ngraves.fr>
Date: Thu, 3 Apr 2025 11:33:58 +0200
Subject: [PATCH] samples: Use deterministic samples in Guix.
---
bed_reader/_sample_data.py | 86 +++++++++-----------------------------
1 file changed, 19 insertions(+), 67 deletions(-)
diff --git a/bed_reader/_sample_data.py b/bed_reader/_sample_data.py
index 6ca4cc0..6a1146e 100644
--- a/bed_reader/_sample_data.py
+++ b/bed_reader/_sample_data.py
@@ -1,33 +1,8 @@
+import os
import tempfile
from pathlib import Path, PurePath
from typing import Union
-try:
- import pooch
-
- """
- Load sample data.
- """
-
- POOCH = pooch.create(
- # Use the default cache folder for the OS
- path=pooch.os_cache("bed_reader"),
- # The remote data is on Github
- base_url="https://raw.githubusercontent.com/"
- + "fastlmm/bed-sample-files/main/",
- # If this is a development version, get the data from the master branch
- version_dev="main",
- # The registry specifies the files that can be fetched
- env="BED_READER_DATA_DIR",
- )
-
- # Get registry file from package_data
- registry_file = Path(__file__).parent / "tests/registry.txt"
- # Load this registry file
- POOCH.load_registry(registry_file)
-except ImportError:
- pooch = None
-
def sample_file(filepath: Union[str, Path]) -> str:
"""Retrieve a sample .bed file. (Also retrieves associated .fam and .bim files).
@@ -40,50 +15,41 @@ def sample_file(filepath: Union[str, Path]) -> str:
Returns
-------
str
- Local name of sample .bed file.
-
-
- .. note::
- This function requires the :mod:`pooch` package. Install `pooch` with:
-
- .. code-block:: bash
-
- pip install --upgrade bed-reader[samples]
-
-
- By default this function puts files under the user's cache directory.
- Override this by setting
- the `BED_READER_DATA_DIR` environment variable.
+ Local path of sample .bed file.
Example
--------
.. doctest::
- >>> # pip install bed-reader[samples] # if needed
>>> from bed_reader import sample_file
>>>
>>> file_name = sample_file("small.bed")
>>> print(f"The local file name is '{file_name}'")
The local file name is '...small.bed'
-
"""
- if pooch is None:
- raise ImportError(
- "The function sample_file() requires pooch. "
- + "Install it with 'pip install --upgrade bed-reader[samples]'.",
+ filepath = Path(filepath)
+ sample_dir = os.environ.get("BED_READER_DATA_DIR")
+ if sample_dir is None:
+ raise EnvironmentError(
+ "BED_READER_DATA_DIR environment variable is not set. "
+ "This should point to the directory containing the sample files."
)
- filepath = Path(filepath)
- file_string = str(filepath)
- if file_string.lower().endswith(".bed"):
- POOCH.fetch(file_string[:-4] + ".fam")
- POOCH.fetch(file_string[:-4] + ".bim")
- return POOCH.fetch(file_string)
+ file_path = Path(sample_dir) / filepath
+
+ # Check if file exists
+ if not file_path.exists():
+ raise FileNotFoundError(
+ f"Sample file '{filepath}' not found in {sample_dir}. "
+ f"Make sure you're using the latest samples in BED_READER_DATA_DIR."
+ )
+
+ return str(file_path)
def sample_url(filepath: Union[str, Path]) -> str:
- """Retrieve a URL to a sample .bed file. (Also makes ready associated .fam and .bim files).
+ """Retrieve a URL to a sample .bed file.
Parameters
----------
@@ -95,25 +61,11 @@ def sample_url(filepath: Union[str, Path]) -> str:
str
URL to sample .bed file.
-
- .. note::
- This function requires the :mod:`pooch` package. Install `pooch` with:
-
- .. code-block:: bash
-
- pip install --upgrade bed-reader[samples]
-
-
- By default this function puts files under the user's cache directory.
- Override this by setting
- the `BED_READER_DATA_DIR` environment variable.
-
Example
--------
.. doctest::
- >>> # pip install bed-reader[samples] # if needed
>>> from bed_reader import sample_url
>>>
>>> url = sample_url("small.bed")
--
2.49.0