po: Add procedures to handle '\"', '\t' and '\\' escape sequences.

They were handled with PEG before.

Now `make download-po` has the same result as before PO files were
minified.

* guix/build/po.scm (interpret-newline-escape): Replace with ...
(interpret-escape): ... this more general procedure.
(replace-escaped-backslashes): New procedure, basically restored from
the past implementation of 'interpret-newline-escape' but
for '\\' escapes.
(interpret-escape-sequences): New procedure to call them all.
(parse-tree->assoc): Use it.

Change-Id: I03226281019fa39ef7bca524278dbc434df95f2e
This commit is contained in:
Florian Pelz 2025-09-16 03:05:06 +02:00
parent e16cfd61f9
commit aff572c9ac
No known key found for this signature in database
GPG key ID: 300888CB39C63817

View file

@ -48,6 +48,17 @@
(and (ignore "\"") (* str-chr) (ignore "\"") (and (ignore "\"") (* str-chr) (ignore "\"")
(? (and (ignore (* whitespace)) content)))) (? (and (ignore (* whitespace)) content))))
(define (replace-escaped-backslashes str)
"Replace '\\\\' sequences in STR with a single '\\'."
(let loop ((str str)
(result '()))
(match (string-contains str "\\\\")
(#f (string-concatenate-reverse (cons str result)))
(index
(let ((prefix (string-take str index)))
(loop (string-drop str (+ 2 index))
(append (list "\\" prefix) result)))))))
(define (final-character-escapes? str last-index) (define (final-character-escapes? str last-index)
"Check if STR ends in an incomplete escape sequence, that is ends in an uneven "Check if STR ends in an incomplete escape sequence, that is ends in an uneven
number of backslashes. LAST-INDEX is the index of its last character." number of backslashes. LAST-INDEX is the index of its last character."
@ -55,19 +66,29 @@ number of backslashes. LAST-INDEX is the index of its last character."
(eqv? (string-ref str last-index) #\\) (eqv? (string-ref str last-index) #\\)
(not (final-character-escapes? str (- last-index 1))))) (not (final-character-escapes? str (- last-index 1)))))
(define (interpret-newline-escape str) (define (interpret-escape sequence replacement str)
"Replace unescaped '\\n' sequences in STR with a newline character." "Replace backslash escape sequence SEQUENCE in STR with REPLACEMENT (a string)
when SEQUENCE is not escaped itself. For example, SEQUENCE '\\n' with a
newline string as REPLACEMENT."
(let loop ((str str) (let loop ((str str)
(result '())) (result '()))
(match (string-contains str "\\n") (match (string-contains str sequence)
(#f (string-concatenate-reverse (cons str result))) (#f (string-concatenate-reverse (cons str result)))
(index (index
(let ((prefix (string-take str index))) (let ((prefix (string-take str index)))
(loop (string-drop str (+ 2 index)) (loop (string-drop str (+ 2 index))
;; Only add a newline when the backslash is not escaped itself. ;; Only add REPLACEMENT when the backslash is not escaped itself.
(if (final-character-escapes? str (- index 1)) (if (final-character-escapes? str (- index 1))
(cons (string-take str (+ 2 index)) result) (cons (string-take str (+ 2 index)) result)
(append (list "\n" prefix) result)))))))) (append (list replacement prefix) result))))))))
(define (interpret-escape-sequences str)
"Unescape all escape sequences in STR."
(replace-escaped-backslashes
(interpret-escape "\\n" "\n"
(interpret-escape "\\\"" "\""
(interpret-escape "\\t" "\t"
str)))))
(define (parse-tree->assoc parse-tree) (define (parse-tree->assoc parse-tree)
"Converts a po PARSE-TREE to an association list, where the key is the msgid "Converts a po PARSE-TREE to an association list, where the key is the msgid
@ -103,18 +124,14 @@ and the value is the msgstr. The result only contains non fuzzy strings."
(('entry _ ('msgid msgid) 'msgstr) (('entry _ ('msgid msgid) 'msgstr)
(parse-tree->assoc parse-tree)) (parse-tree->assoc parse-tree))
(('entry ('msgid msgid) ('msgstr msgstr)) (('entry ('msgid msgid) ('msgstr msgstr))
(acons (interpret-newline-escape msgid) (acons (interpret-escape-sequences msgid)
(interpret-newline-escape msgstr) (interpret-escape-sequences msgstr)
(parse-tree->assoc parse-tree)))
(('entry ('msgid msgid) ('msgstr msgstr))
(acons (interpret-newline-escape msgid)
(interpret-newline-escape msgstr)
(parse-tree->assoc parse-tree))) (parse-tree->assoc parse-tree)))
(('entry comments ('msgid msgid) ('msgstr msgstr)) (('entry comments ('msgid msgid) ('msgstr msgstr))
(if (member 'fuzzy (comments->flags comments)) (if (member 'fuzzy (comments->flags comments))
(parse-tree->assoc parse-tree) (parse-tree->assoc parse-tree)
(acons (interpret-newline-escape msgid) (acons (interpret-escape-sequences msgid)
(interpret-newline-escape msgstr) (interpret-escape-sequences msgstr)
(parse-tree->assoc parse-tree)))))))) (parse-tree->assoc parse-tree))))))))
(define (read-po-file port) (define (read-po-file port)