nls: Minify translation PO files.

To save a lot of disk space, keep only actually translated messages in
Gettext PO files.  Ignore the guix domain, which is more complicated
and is tiny.

* build-aux/keep-only-translated.scm: New file.
* Makefile.am (download-po): Run it.

Change-Id: I6442ce0ef8d62f7e48e667c766b86d0ebf9c5415
This commit is contained in:
Florian Pelz 2025-09-01 13:51:04 +02:00
parent 0509bc4cba
commit b5054a85c1
No known key found for this signature in database
GPG key ID: 300888CB39C63817
2 changed files with 91 additions and 4 deletions

View file

@ -1299,8 +1299,10 @@ WEBLATE_REPO = https://codeberg.org/guix/translations
# Shallow clone the Git repository behind Weblate and copy files from it if
# they contain at least one translation, and they are well-formed (Scheme
# format only), warn otherwise. Copied files are converted to a canonical
# form.
download-po:
# form. Note: The files will be minified to reduce file size, except the
# guix domain, because they are comparatively small and it would need more
# comprehensive PO file parsing abilities for plural forms.
download-po: guix/build/po.go
dir=$$(mktemp -d); \
git clone --depth 1 "$(WEBLATE_REPO)" "$$dir/translations" && \
for domain in po/doc po/guix po/packages; do \
@ -1313,8 +1315,15 @@ download-po:
target="$$domain/$$target"; \
msgfmt -c "$$po"; \
if msgfmt -c "$$po" && [ "$$translated" != "0" ] && ([ "$$domain" != "po/doc" ] || [ "$$translated" -gt $$(($$total/10)) ] || [ -f $$target ]); then \
msgconv --no-wrap -o "$$po".tmp "$$po"; \
mv "$$po".tmp "$$target"; \
if [ "$$domain" != "po/guix" ]; then \
$(GUILE) -L "$(top_builddir)" -L "$(top_srcdir)" \
--no-auto-compile \
-s "$(top_srcdir)"/build-aux/keep-only-translated.scm \
"$$po" > "$$po".tmp; \
else ln -s "$$po" "$$po".tmp; fi; \
msgconv --no-wrap -o "$$po".tmp2 "$$po".tmp; \
rm "$$po".tmp; \
mv "$$po".tmp2 "$$target"; \
echo "copied $$target."; \
else \
echo "WARN: $$target ($$translated translated messages ($$((translated/total*100))%)) was not added/updated."; \

View file

@ -0,0 +1,78 @@
;;; GNU Guix --- Functional package management for GNU
;;; Copyright © 2025 Florian Pelz <pelzflorian@pelzflorian.de>
;;;
;;; This file is part of GNU Guix.
;;;
;;; GNU Guix is free software; you can redistribute it and/or modify it
;;; under the terms of the GNU General Public License as published by
;;; the Free Software Foundation; either version 3 of the License, or (at
;;; your option) any later version.
;;;
;;; GNU Guix is distributed in the hope that it will be useful, but
;;; WITHOUT ANY WARRANTY; without even the implied warranty of
;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
;;; GNU General Public License for more details.
;;;
;;; You should have received a copy of the GNU General Public License
;;; along with GNU Guix. If not, see <http://www.gnu.org/licenses/>.
;; Minify Gettext PO files when synced from
;; <https://codeberg.org/guix/translations/>,
;; keeping only actually translated messages.
;; Note: This does not work for PO files of the guix domain, which needed
;; support for plural forms in (@ (guix build po) read-po-file). The guix
;; domain's files are comparatively small and the read-po-file API would
;; have to be expanded to use records or such; it is not worth it.
(use-modules (guix build po)
(ice-9 match)
(ice-9 textual-ports))
(define (escape str)
"Escape msgid or msgstr. Replace by C-style escape sequences."
(let* ((in (open-input-string str))
(text (get-string-all in))
(escaped-text-as-list
(string-fold-right
(lambda (char result)
(cons (case char
((#\") "\\\"")
((#\\) "\\\\")
((#\linefeed) "\\n")
((#\return) "\\r")
((#\tab) "\\t")
(else (string char)))
result))
'()
text))
(escaped-text (apply string-append escaped-text-as-list)))
(display escaped-text)))
(match (command-line)
((program pofile)
(let ((input (open-input-file pofile)))
;; Just copy until an empty line.
(letrec ((copy
(lambda ()
(let ((next-line (get-line input)))
(display next-line)
(newline)
(when (> (string-length next-line) 0)
(copy))))))
(copy))
;; Then print only translated messages.
(for-each
(lambda (msg)
(match msg
((msgid . msgstr)
(display "msgid \"")
(escape msgid)
(display "\"")
(newline)
(display "msgstr \"")
(escape msgstr)
(display "\"")
(newline)
(newline))))
(read-po-file input)))))