import: go: Improve synopsis and description parsing.

* guix/import/go.scm (%strict-tokenizer?): Set parameter to #t.
(go-path-escape): Redefine to prevent inlining.
(http-get*): Replace by ...
(http-fetch*): this ...
(json-fetch*): New procedure.
(go.pkg.dev-info): Use http-fetch*.
(go-package-licenses): Rewrite in terms of go.pkg.dev-info.
(go-package-description): Likewise.
(go-package-synopsis): Likewise.
(fetch-go.mod): Use the memoized http-fetch*.
(parse-go.mod): Adjust to receive content as a string.
(fetch-module-meta-data): Adjust to use http-fetch*.
(go-module->guix-package): Adjust to the modified fetch-go.mod return value.
[inputs]: Use propagated inputs, which is the most common situations for Go
libraries.
[description]: Beautify description.
[licenses]: Do no check for #f.  The result of the license parsing is always a
list.
* tests/go.scm: Adjust following above changes.
This commit is contained in:
Maxim Cournoyer 2021-03-21 23:53:21 -04:00
parent 2446a112df
commit 6aee902eaf
No known key found for this signature in database
GPG key ID: 1260E46482E63562
3 changed files with 169 additions and 121 deletions

View file

@ -33,7 +33,7 @@
#:use-module (guix http-client) #:use-module (guix http-client)
#:use-module ((guix licenses) #:prefix license:) #:use-module ((guix licenses) #:prefix license:)
#:use-module (guix memoization) #:use-module (guix memoization)
#:autoload (htmlprag) (html->sxml) ;from Guile-Lib #:use-module (htmlprag) ;from Guile-Lib
#:autoload (guix git) (update-cached-checkout) #:autoload (guix git) (update-cached-checkout)
#:autoload (gcrypt hash) (open-hash-port hash-algorithm sha256) #:autoload (gcrypt hash) (open-hash-port hash-algorithm sha256)
#:autoload (guix serialization) (write-file) #:autoload (guix serialization) (write-file)
@ -43,20 +43,28 @@
#:use-module (ice-9 rdelim) #:use-module (ice-9 rdelim)
#:use-module (ice-9 receive) #:use-module (ice-9 receive)
#:use-module (ice-9 regex) #:use-module (ice-9 regex)
#:use-module (ice-9 textual-ports)
#:use-module ((rnrs io ports) #:select (call-with-port)) #:use-module ((rnrs io ports) #:select (call-with-port))
#:use-module (srfi srfi-1) #:use-module (srfi srfi-1)
#:use-module (srfi srfi-2)
#:use-module (srfi srfi-9) #:use-module (srfi srfi-9)
#:use-module (srfi srfi-11) #:use-module (srfi srfi-11)
#:use-module (srfi srfi-26) #:use-module (srfi srfi-26)
#:use-module (sxml xpath) #:use-module (sxml match)
#:use-module ((sxml xpath) #:renamer (lambda (s)
(if (eq? 'filter s)
'xfilter
s)))
#:use-module (web client) #:use-module (web client)
#:use-module (web response) #:use-module (web response)
#:use-module (web uri) #:use-module (web uri)
#:export (go-path-escape #:export (go-module->guix-package
go-module->guix-package
go-module-recursive-import)) go-module-recursive-import))
;;; Parameterize htmlprag to parse valid HTML more reliably.
(%strict-tokenizer? #t)
;;; Commentary: ;;; Commentary:
;;; ;;;
;;; (guix import go) attempts to make it easier to create Guix package ;;; (guix import go) attempts to make it easier to create Guix package
@ -90,6 +98,14 @@
;;; Code: ;;; Code:
(define http-fetch*
;; Like http-fetch, but memoized and returning the body as a string.
(memoize (lambda args
(call-with-port (apply http-fetch args) get-string-all))))
(define json-fetch*
(memoize json-fetch))
(define (go-path-escape path) (define (go-path-escape path)
"Escape a module path by replacing every uppercase letter with an "Escape a module path by replacing every uppercase letter with an
exclamation mark followed with its lowercase equivalent, as per the module exclamation mark followed with its lowercase equivalent, as per the module
@ -99,54 +115,73 @@ https://godoc.org/golang.org/x/mod/module#hdr-Escaped_Paths)."
(string-append "!" (string-downcase (match:substring occurrence)))) (string-append "!" (string-downcase (match:substring occurrence))))
(regexp-substitute/global #f "[A-Z]" path 'pre escape 'post)) (regexp-substitute/global #f "[A-Z]" path 'pre escape 'post))
;; Prevent inlining of this procedure, which is accessed by unit tests.
(set! go-path-escape go-path-escape)
(define (go.pkg.dev-info name)
(http-fetch* (string-append "https://pkg.go.dev/" name)))
(define (go-module-latest-version goproxy-url module-path) (define (go-module-latest-version goproxy-url module-path)
"Fetch the version number of the latest version for MODULE-PATH from the "Fetch the version number of the latest version for MODULE-PATH from the
given GOPROXY-URL server." given GOPROXY-URL server."
(assoc-ref (json-fetch (format #f "~a/~a/@latest" goproxy-url (assoc-ref (json-fetch* (format #f "~a/~a/@latest" goproxy-url
(go-path-escape module-path))) (go-path-escape module-path)))
"Version")) "Version"))
(define (go-package-licenses name) (define (go-package-licenses name)
"Retrieve the list of licenses that apply to NAME, a Go package or module "Retrieve the list of licenses that apply to NAME, a Go package or module
name (e.g. \"github.com/golang/protobuf/proto\"). The data is scraped from name (e.g. \"github.com/golang/protobuf/proto\")."
the https://pkg.go.dev/ web site." (let* ((body (go.pkg.dev-info (string-append name "?tab=licenses")))
(let*-values (((url) (string-append "https://pkg.go.dev/" name ;; Extract the text contained in a h2 child node of any
"?tab=licenses")) ;; element marked with a "License" class attribute.
((response body) (http-get url)) (select (sxpath `(// (* (@ (equal? (class "License"))))
;; Extract the text contained in a h2 child node of any h2 // *text*))))
;; element marked with a "License" class attribute. (select (html->sxml body))))
((select) (sxpath `(// (* (@ (equal? (class "License"))))
h2 // *text*))))
(and (eq? (response-code response) 200)
(match (select (html->sxml body))
(() #f) ;nothing selected
(licenses licenses)))))
(define (go.pkg.dev-info name) (define (sxml->texi sxml-node)
(http-get (string-append "https://pkg.go.dev/" name))) "A very basic SXML to Texinfo converter which attempts to preserve HTML
(define go.pkg.dev-info* formatting and links as text."
(memoize go.pkg.dev-info)) (sxml-match sxml-node
((strong ,text)
(format #f "@strong{~a}" text))
((a (@ (href ,url)) ,text)
(format #f "@url{~a,~a}" url text))
((code ,text)
(format #f "@code{~a}" text))
(,something-else something-else)))
(define (go-package-description name) (define (go-package-description name)
"Retrieve a short description for NAME, a Go package name, "Retrieve a short description for NAME, a Go package name,
e.g. \"google.golang.org/protobuf/proto\". The data is scraped from the e.g. \"google.golang.org/protobuf/proto\"."
https://pkg.go.dev/ web site." (let* ((body (go.pkg.dev-info name))
(let*-values (((response body) (go.pkg.dev-info* name)) (sxml (html->sxml body))
;; Extract the text contained in a h2 child node of any (overview ((sxpath
;; element marked with a "License" class attribute. `(//
((select) (sxpath (* (@ (equal? (class "Documentation-overview"))))
`(// (section (p 1))) sxml))
(@ (equal? (class "Documentation-overview")))) ;; Sometimes, the first paragraph just contains images/links that
(p 1))))) ;; has only "\n" for text. The following filter is designed to
(and (eq? (response-code response) 200) ;; omit it.
(match (select (html->sxml body)) (contains-text? (lambda (node)
(() #f) ;nothing selected (remove string-null?
(((p . strings)) (map string-trim-both
;; The paragraph text is returned as a list of strings embedding (filter (node-typeof? '*text*)
;; newline characters. Join them and strip the newline (cdr node))))))
;; characters. (select-content (sxpath
(string-delete #\newline (string-join strings))))))) `(//
(* (@ (equal? (class "UnitReadme-content"))))
div // p ,(xfilter contains-text?))))
;; Fall-back to use content; this is less desirable as it is more
;; verbose, but not every page has an overview.
(description (if (not (null? overview))
overview
(select-content sxml)))
(description* (and (not (null? description))
(first description))))
(match description*
(() #f) ;nothing selected
((p elements ...)
(apply string-append (filter string? (map sxml->texi elements)))))))
(define (go-package-synopsis module-name) (define (go-package-synopsis module-name)
"Retrieve a short synopsis for a Go module named MODULE-NAME, "Retrieve a short synopsis for a Go module named MODULE-NAME,
@ -154,17 +189,17 @@ e.g. \"google.golang.org/protobuf\". The data is scraped from
the https://pkg.go.dev/ web site." the https://pkg.go.dev/ web site."
;; Note: Only the *module* (rather than package) page has the README title ;; Note: Only the *module* (rather than package) page has the README title
;; used as a synopsis on the https://pkg.go.dev web site. ;; used as a synopsis on the https://pkg.go.dev web site.
(let*-values (((response body) (go.pkg.dev-info* module-name)) (let* ((url (string-append "https://pkg.go.dev/" module-name))
;; Extract the text contained in a h2 child node of any (body (http-fetch* url))
;; element marked with a "License" class attribute. ;; Extract the text contained in a h2 child node of any
((select) (sxpath ;; element marked with a "License" class attribute.
`(// (div (@ (equal? (class "UnitReadme-content")))) (select-title (sxpath
// h3 *text*)))) `(// (div (@ (equal? (class "UnitReadme-content"))))
(and (eq? (response-code response) 200) // h3 *text*))))
(match (select (html->sxml body)) (match (select-title (html->sxml body))
(() #f) ;nothing selected (() #f) ;nothing selected
((title more ...) ;title is the first string of the list ((title more ...) ;title is the first string of the list
(string-trim-both title)))))) (string-trim-both title)))))
(define (list->licenses licenses) (define (list->licenses licenses)
"Given a list of LICENSES mostly following the SPDX conventions, return the "Given a list of LICENSES mostly following the SPDX conventions, return the
@ -189,13 +224,13 @@ corresponding Guix license or 'unknown-license!"
'unknown-license!))) 'unknown-license!)))
licenses)) licenses))
(define (fetch-go.mod goproxy-url module-path version) (define (fetch-go.mod goproxy module-path version)
"Fetches go.mod from the given GOPROXY-URL server for the given MODULE-PATH "Fetch go.mod from the given GOPROXY server for the given MODULE-PATH
and VERSION." and VERSION and return an input port."
(let ((url (format #f "~a/~a/@v/~a.mod" goproxy-url (let ((url (format #f "~a/~a/@v/~a.mod" goproxy
(go-path-escape module-path) (go-path-escape module-path)
(go-path-escape version)))) (go-path-escape version))))
(http-fetch url))) (http-fetch* url)))
(define %go.mod-require-directive-rx (define %go.mod-require-directive-rx
;; A line in a require directive is composed of a module path and ;; A line in a require directive is composed of a module path and
@ -216,9 +251,8 @@ and VERSION."
"[[:blank:]]+" "=>" "[[:blank:]]+" "[[:blank:]]+" "=>" "[[:blank:]]+"
"([^[:blank:]]+)([[:blank:]]+([^[:blank:]]+))?"))) "([^[:blank:]]+)([[:blank:]]+([^[:blank:]]+))?")))
(define (parse-go.mod port) (define (parse-go.mod content)
"Parse the go.mod file accessible via the input PORT, returning a list of "Parse the go.mod file CONTENT, returning a list of requirements."
requirements."
(define-record-type <results> (define-record-type <results>
(make-results requirements replacements) (make-results requirements replacements)
results? results?
@ -229,7 +263,7 @@ requirements."
(define (toplevel results) (define (toplevel results)
"Main parser, RESULTS is a pair of alist serving as accumulator for "Main parser, RESULTS is a pair of alist serving as accumulator for
all encountered requirements and replacements." all encountered requirements and replacements."
(let ((line (read-line port))) (let ((line (read-line)))
(cond (cond
((eof-object? line) ((eof-object? line)
;; parsing ended, give back the result ;; parsing ended, give back the result
@ -255,7 +289,7 @@ requirements."
(toplevel results))))) (toplevel results)))))
(define (in-require results) (define (in-require results)
(let ((line (read-line port))) (let ((line (read-line)))
(cond (cond
((eof-object? line) ((eof-object? line)
;; this should never happen here but we ignore silently ;; this should never happen here but we ignore silently
@ -267,7 +301,7 @@ requirements."
(in-require (require-directive results line)))))) (in-require (require-directive results line))))))
(define (in-replace results) (define (in-replace results)
(let ((line (read-line port))) (let ((line (read-line)))
(cond (cond
((eof-object? line) ((eof-object? line)
;; this should never happen here but we ignore silently ;; this should never happen here but we ignore silently
@ -306,7 +340,9 @@ requirements."
(($ <results> requirements replaced) (($ <results> requirements replaced)
(make-results (alist-cons module-path version requirements) replaced))))) (make-results (alist-cons module-path version requirements) replaced)))))
(let ((results (toplevel (make-results '() '())))) (let ((results (with-input-from-string content
(lambda _
(toplevel (make-results '() '()))))))
(match results (match results
(($ <results> requirements replaced) (($ <results> requirements replaced)
;; At last we remove replaced modules from the requirements list ;; At last we remove replaced modules from the requirements list
@ -325,8 +361,10 @@ requirements."
(url-prefix vcs-url-prefix) (url-prefix vcs-url-prefix)
(root-regex vcs-root-regex) (root-regex vcs-root-regex)
(type vcs-type)) (type vcs-type))
(define (make-vcs prefix regexp type) (define (make-vcs prefix regexp type)
(%make-vcs prefix (make-regexp regexp) type)) (%make-vcs prefix (make-regexp regexp) type))
(define known-vcs (define known-vcs
;; See the following URL for the official Go equivalent: ;; See the following URL for the official Go equivalent:
;; https://github.com/golang/go/blob/846dce9d05f19a1f53465e62a304dea21b99f910/src/cmd/go/internal/vcs/vcs.go#L1026-L1087 ;; https://github.com/golang/go/blob/846dce9d05f19a1f53465e62a304dea21b99f910/src/cmd/go/internal/vcs/vcs.go#L1026-L1087
@ -387,6 +425,14 @@ hence the need to derive this information."
"/" "-") "/" "-")
"_" "-")))) "_" "-"))))
(define (strip-.git-suffix/maybe repo-url)
"Strip a repository URL '.git' suffix from REPO-URL if hosted at GitHub."
(match repo-url
((and (? (cut string-prefix? "https://github.com" <>))
(? (cut string-suffix? ".git" <>)))
(string-drop-right repo-url 4))
(_ repo-url)))
(define-record-type <module-meta> (define-record-type <module-meta>
(make-module-meta import-prefix vcs repo-root) (make-module-meta import-prefix vcs repo-root)
module-meta? module-meta?
@ -399,21 +445,22 @@ hence the need to derive this information."
because goproxy servers don't currently provide all the information needed to because goproxy servers don't currently provide all the information needed to
build a package." build a package."
;; <meta name="go-import" content="import-prefix vcs repo-root"> ;; <meta name="go-import" content="import-prefix vcs repo-root">
(let* ((port (http-fetch (format #f "https://~a?go-get=1" module-path))) (let* ((meta-data (http-fetch* (format #f "https://~a?go-get=1" module-path)))
(select (sxpath `(// head (meta (@ (equal? (name "go-import")))) (select (sxpath `(// head (meta (@ (equal? (name "go-import"))))
// content)))) // content))))
(match (select (call-with-port port html->sxml)) (match (select (html->sxml meta-data))
(() #f) ;nothing selected (() #f) ;nothing selected
(((content content-text)) (((content content-text))
(match (string-split content-text #\space) (match (string-split content-text #\space)
((root-path vcs repo-url) ((root-path vcs repo-url)
(make-module-meta root-path (string->symbol vcs) repo-url))))))) (make-module-meta root-path (string->symbol vcs)
(strip-.git-suffix/maybe repo-url))))))))
(define (module-meta-data-repo-url meta-data goproxy-url) (define (module-meta-data-repo-url meta-data goproxy)
"Return the URL where the fetcher which will be used can download the "Return the URL where the fetcher which will be used can download the
source." source."
(if (member (module-meta-vcs meta-data) '(fossil mod)) (if (member (module-meta-vcs meta-data) '(fossil mod))
goproxy-url goproxy
(module-meta-repo-root meta-data))) (module-meta-repo-root meta-data)))
;; XXX: Copied from (guix scripts hash). ;; XXX: Copied from (guix scripts hash).
@ -466,6 +513,9 @@ control system is being used."
(method git-fetch) (method git-fetch)
(uri (git-reference (uri (git-reference
(url ,vcs-repo-url) (url ,vcs-repo-url)
;; This is done because the version field of the package,
;; which the generated quoted expression refers to, has been
;; stripped of any 'v' prefixed.
(commit ,(if (and plain-version? v-prefixed?) (commit ,(if (and plain-version? v-prefixed?)
'(string-append "v" version) '(string-append "v" version)
'(go-version->git-ref version))))) '(go-version->git-ref version)))))
@ -505,8 +555,8 @@ control system is being used."
(define* (go-module->guix-package module-path #:key (define* (go-module->guix-package module-path #:key
(goproxy-url "https://proxy.golang.org")) (goproxy-url "https://proxy.golang.org"))
(let* ((latest-version (go-module-latest-version goproxy-url module-path)) (let* ((latest-version (go-module-latest-version goproxy-url module-path))
(port (fetch-go.mod goproxy-url module-path latest-version)) (content (fetch-go.mod goproxy-url module-path latest-version))
(dependencies (map car (call-with-port port parse-go.mod))) (dependencies (map car (parse-go.mod content)))
(guix-name (go-module->guix-package-name module-path)) (guix-name (go-module->guix-package-name module-path))
(root-module-path (module-path->repository-root module-path)) (root-module-path (module-path->repository-root module-path))
;; The VCS type and URL are not included in goproxy information. For ;; The VCS type and URL are not included in goproxy information. For
@ -527,14 +577,17 @@ control system is being used."
(build-system go-build-system) (build-system go-build-system)
(arguments (arguments
'(#:import-path ,root-module-path)) '(#:import-path ,root-module-path))
,@(maybe-inputs (map go-module->guix-package-name dependencies)) ,@(maybe-propagated-inputs
(map go-module->guix-package-name dependencies))
(home-page ,(format #f "https://~a" root-module-path)) (home-page ,(format #f "https://~a" root-module-path))
(synopsis ,synopsis) (synopsis ,synopsis)
(description ,description) (description ,(and=> description beautify-description))
(license ,(match (and=> licenses list->licenses) (license ,(match (list->licenses licenses)
((license) license) (() #f) ;unknown license
((licenses ...) `(list ,@licenses)) ((license) ;a single license
(x x)))) license)
((license ...) ;a list of licenses
`(list ,@license)))))
dependencies))) dependencies)))
(define go-module->guix-package* (memoize go-module->guix-package)) (define go-module->guix-package* (memoize go-module->guix-package))

View file

@ -446,8 +446,8 @@ obtain a node's uniquely identifying \"key\"."
"Return a list of package expressions for PACKAGE-NAME and all its "Return a list of package expressions for PACKAGE-NAME and all its
dependencies, sorted in topological order. For each package, dependencies, sorted in topological order. For each package,
call (REPO->GUIX-PACKAGE NAME :KEYS version repo), which should return a call (REPO->GUIX-PACKAGE NAME :KEYS version repo), which should return a
package expression and a list of dependencies; call (GUIX-NAME NAME) to package expression and a list of dependencies; call (GUIX-NAME PACKAGE-NAME)
obtain the Guix package name corresponding to the upstream name." to obtain the Guix package name corresponding to the upstream name."
(define-record-type <node> (define-record-type <node>
(make-node name version package dependencies) (make-node name version package dependencies)
node? node?

View file

@ -180,13 +180,9 @@ require github.com/kr/pretty v0.2.1
(define (testing-parse-mod name expected input) (define (testing-parse-mod name expected input)
(define (inf? p1 p2) (define (inf? p1 p2)
(string<? (car p1) (car p2))) (string<? (car p1) (car p2)))
(let ((input-port (open-input-string input))) (test-equal name
(test-equal name (sort expected inf?)
(sort expected inf?) (sort ((@@ (guix import go) parse-go.mod) input) inf?)))
(sort
( (@@ (guix import go) parse-go.mod)
input-port)
inf?))))
(testing-parse-mod "parse-go.mod-simple" (testing-parse-mod "parse-go.mod-simple"
'(("good/thing" . "v1.4.5") '(("good/thing" . "v1.4.5")
@ -249,44 +245,43 @@ require github.com/kr/pretty v0.2.1
(test-equal "go-module->guix-package" (test-equal "go-module->guix-package"
'(package '(package
(name "go-github-com-go-check-check") (name "go-github-com-go-check-check")
(version "0.0.0-20201130134442-10cb98267c6c") (version "0.0.0-20201130134442-10cb98267c6c")
(source (source
(origin (origin
(method git-fetch) (method git-fetch)
(uri (git-reference (uri (git-reference
(url "https://github.com/go-check/check.git") (url "https://github.com/go-check/check")
(commit (go-version->git-ref version)))) (commit (go-version->git-ref version))))
(file-name (git-file-name name version)) (file-name (git-file-name name version))
(sha256 (sha256
(base32 (base32
"0sjjj9z1dhilhpc8pq4154czrb79z9cm044jvn75kxcjv6v5l2m5")))) "0sjjj9z1dhilhpc8pq4154czrb79z9cm044jvn75kxcjv6v5l2m5"))))
(build-system go-build-system) (build-system go-build-system)
(arguments (arguments
(quote (#:import-path "github.com/go-check/check"))) '(#:import-path "github.com/go-check/check"))
(inputs (propagated-inputs
(quasiquote (("go-github-com-kr-pretty" `(("go-github-com-kr-pretty" ,go-github-com-kr-pretty)))
(unquote go-github-com-kr-pretty))))) (home-page "https://github.com/go-check/check")
(home-page "https://github.com/go-check/check") (synopsis "Instructions")
(synopsis "Instructions") (description "Package check is a rich testing extension for Go's testing \
(description #f) package.")
(license license:bsd-2)) (license license:bsd-2))
;; Replace network resources with sample data. ;; Replace network resources with sample data.
(call-with-temporary-directory (call-with-temporary-directory
(lambda (checkout) (lambda (checkout)
(mock ((web client) http-get (mock ((web client) http-get
(mock-http-get fixtures-go-check-test)) (mock-http-get fixtures-go-check-test))
(mock ((guix http-client) http-fetch (mock ((guix http-client) http-fetch
(mock-http-fetch fixtures-go-check-test)) (mock-http-fetch fixtures-go-check-test))
(mock ((guix git) update-cached-checkout (mock ((guix git) update-cached-checkout
(lambda* (url #:key ref) (lambda* (url #:key ref)
;; Return an empty directory and its hash. ;; Return an empty directory and its hash.
(values checkout (values checkout
(nix-base32-string->bytevector (nix-base32-string->bytevector
"0sjjj9z1dhilhpc8pq4154czrb79z9cm044jvn75kxcjv6v5l2m5") "0sjjj9z1dhilhpc8pq4154czrb79z9cm044jvn75kxcjv6v5l2m5")
#f))) #f)))
(go-module->guix-package "github.com/go-check/check"))))))) (go-module->guix-package "github.com/go-check/check")))))))
(test-end "go") (test-end "go")