From b5054a85c1bff91ca8a55b2ca218fb5f2b9518cd Mon Sep 17 00:00:00 2001 From: Florian Pelz Date: Mon, 1 Sep 2025 13:51:04 +0200 Subject: [PATCH] nls: Minify translation PO files. To save a lot of disk space, keep only actually translated messages in Gettext PO files. Ignore the guix domain, which is more complicated and is tiny. * build-aux/keep-only-translated.scm: New file. * Makefile.am (download-po): Run it. Change-Id: I6442ce0ef8d62f7e48e667c766b86d0ebf9c5415 --- Makefile.am | 17 +++++-- build-aux/keep-only-translated.scm | 78 ++++++++++++++++++++++++++++++ 2 files changed, 91 insertions(+), 4 deletions(-) create mode 100644 build-aux/keep-only-translated.scm diff --git a/Makefile.am b/Makefile.am index 19aad0042e3..b29f2dfa84a 100644 --- a/Makefile.am +++ b/Makefile.am @@ -1299,8 +1299,10 @@ WEBLATE_REPO = https://codeberg.org/guix/translations # Shallow clone the Git repository behind Weblate and copy files from it if # they contain at least one translation, and they are well-formed (Scheme # format only), warn otherwise. Copied files are converted to a canonical -# form. -download-po: +# form. Note: The files will be minified to reduce file size, except the +# guix domain, because they are comparatively small and it would need more +# comprehensive PO file parsing abilities for plural forms. +download-po: guix/build/po.go dir=$$(mktemp -d); \ git clone --depth 1 "$(WEBLATE_REPO)" "$$dir/translations" && \ for domain in po/doc po/guix po/packages; do \ @@ -1313,8 +1315,15 @@ download-po: target="$$domain/$$target"; \ msgfmt -c "$$po"; \ if msgfmt -c "$$po" && [ "$$translated" != "0" ] && ([ "$$domain" != "po/doc" ] || [ "$$translated" -gt $$(($$total/10)) ] || [ -f $$target ]); then \ - msgconv --no-wrap -o "$$po".tmp "$$po"; \ - mv "$$po".tmp "$$target"; \ + if [ "$$domain" != "po/guix" ]; then \ + $(GUILE) -L "$(top_builddir)" -L "$(top_srcdir)" \ + --no-auto-compile \ + -s "$(top_srcdir)"/build-aux/keep-only-translated.scm \ + "$$po" > "$$po".tmp; \ + else ln -s "$$po" "$$po".tmp; fi; \ + msgconv --no-wrap -o "$$po".tmp2 "$$po".tmp; \ + rm "$$po".tmp; \ + mv "$$po".tmp2 "$$target"; \ echo "copied $$target."; \ else \ echo "WARN: $$target ($$translated translated messages ($$((translated/total*100))%)) was not added/updated."; \ diff --git a/build-aux/keep-only-translated.scm b/build-aux/keep-only-translated.scm new file mode 100644 index 00000000000..c6b0905c4b4 --- /dev/null +++ b/build-aux/keep-only-translated.scm @@ -0,0 +1,78 @@ +;;; GNU Guix --- Functional package management for GNU +;;; Copyright © 2025 Florian Pelz +;;; +;;; This file is part of GNU Guix. +;;; +;;; GNU Guix is free software; you can redistribute it and/or modify it +;;; under the terms of the GNU General Public License as published by +;;; the Free Software Foundation; either version 3 of the License, or (at +;;; your option) any later version. +;;; +;;; GNU Guix is distributed in the hope that it will be useful, but +;;; WITHOUT ANY WARRANTY; without even the implied warranty of +;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;;; GNU General Public License for more details. +;;; +;;; You should have received a copy of the GNU General Public License +;;; along with GNU Guix. If not, see . + +;; Minify Gettext PO files when synced from +;; , +;; keeping only actually translated messages. + +;; Note: This does not work for PO files of the guix domain, which needed +;; support for plural forms in (@ (guix build po) read-po-file). The guix +;; domain's files are comparatively small and the read-po-file API would +;; have to be expanded to use records or such; it is not worth it. + +(use-modules (guix build po) + (ice-9 match) + (ice-9 textual-ports)) + +(define (escape str) + "Escape msgid or msgstr. Replace by C-style escape sequences." + (let* ((in (open-input-string str)) + (text (get-string-all in)) + (escaped-text-as-list + (string-fold-right + (lambda (char result) + (cons (case char + ((#\") "\\\"") + ((#\\) "\\\\") + ((#\linefeed) "\\n") + ((#\return) "\\r") + ((#\tab) "\\t") + (else (string char))) + result)) + '() + text)) + (escaped-text (apply string-append escaped-text-as-list))) + (display escaped-text))) + +(match (command-line) + ((program pofile) + (let ((input (open-input-file pofile))) + ;; Just copy until an empty line. + (letrec ((copy + (lambda () + (let ((next-line (get-line input))) + (display next-line) + (newline) + (when (> (string-length next-line) 0) + (copy)))))) + (copy)) + ;; Then print only translated messages. + (for-each + (lambda (msg) + (match msg + ((msgid . msgstr) + (display "msgid \"") + (escape msgid) + (display "\"") + (newline) + (display "msgstr \"") + (escape msgstr) + (display "\"") + (newline) + (newline)))) + (read-po-file input)))))