po: Add procedures to handle '\"', '\t' and '\\' escape sequences.

They were handled with PEG before.

Now `make download-po` has the same result as before PO files were
minified.

* guix/build/po.scm (interpret-newline-escape): Replace with ...
(interpret-escape): ... this more general procedure.
(replace-escaped-backslashes): New procedure, basically restored from
the past implementation of 'interpret-newline-escape' but
for '\\' escapes.
(interpret-escape-sequences): New procedure to call them all.
(parse-tree->assoc): Use it.

Change-Id: I03226281019fa39ef7bca524278dbc434df95f2e
This commit is contained in:
Florian Pelz 2025-09-16 03:05:06 +02:00
parent e16cfd61f9
commit aff572c9ac
No known key found for this signature in database
GPG key ID: 300888CB39C63817

View file

@ -48,6 +48,17 @@
(and (ignore "\"") (* str-chr) (ignore "\"")
(? (and (ignore (* whitespace)) content))))
(define (replace-escaped-backslashes str)
"Replace '\\\\' sequences in STR with a single '\\'."
(let loop ((str str)
(result '()))
(match (string-contains str "\\\\")
(#f (string-concatenate-reverse (cons str result)))
(index
(let ((prefix (string-take str index)))
(loop (string-drop str (+ 2 index))
(append (list "\\" prefix) result)))))))
(define (final-character-escapes? str last-index)
"Check if STR ends in an incomplete escape sequence, that is ends in an uneven
number of backslashes. LAST-INDEX is the index of its last character."
@ -55,19 +66,29 @@ number of backslashes. LAST-INDEX is the index of its last character."
(eqv? (string-ref str last-index) #\\)
(not (final-character-escapes? str (- last-index 1)))))
(define (interpret-newline-escape str)
"Replace unescaped '\\n' sequences in STR with a newline character."
(define (interpret-escape sequence replacement str)
"Replace backslash escape sequence SEQUENCE in STR with REPLACEMENT (a string)
when SEQUENCE is not escaped itself. For example, SEQUENCE '\\n' with a
newline string as REPLACEMENT."
(let loop ((str str)
(result '()))
(match (string-contains str "\\n")
(match (string-contains str sequence)
(#f (string-concatenate-reverse (cons str result)))
(index
(let ((prefix (string-take str index)))
(loop (string-drop str (+ 2 index))
;; Only add a newline when the backslash is not escaped itself.
;; Only add REPLACEMENT when the backslash is not escaped itself.
(if (final-character-escapes? str (- index 1))
(cons (string-take str (+ 2 index)) result)
(append (list "\n" prefix) result))))))))
(append (list replacement prefix) result))))))))
(define (interpret-escape-sequences str)
"Unescape all escape sequences in STR."
(replace-escaped-backslashes
(interpret-escape "\\n" "\n"
(interpret-escape "\\\"" "\""
(interpret-escape "\\t" "\t"
str)))))
(define (parse-tree->assoc parse-tree)
"Converts a po PARSE-TREE to an association list, where the key is the msgid
@ -103,18 +124,14 @@ and the value is the msgstr. The result only contains non fuzzy strings."
(('entry _ ('msgid msgid) 'msgstr)
(parse-tree->assoc parse-tree))
(('entry ('msgid msgid) ('msgstr msgstr))
(acons (interpret-newline-escape msgid)
(interpret-newline-escape msgstr)
(parse-tree->assoc parse-tree)))
(('entry ('msgid msgid) ('msgstr msgstr))
(acons (interpret-newline-escape msgid)
(interpret-newline-escape msgstr)
(acons (interpret-escape-sequences msgid)
(interpret-escape-sequences msgstr)
(parse-tree->assoc parse-tree)))
(('entry comments ('msgid msgid) ('msgstr msgstr))
(if (member 'fuzzy (comments->flags comments))
(parse-tree->assoc parse-tree)
(acons (interpret-newline-escape msgid)
(interpret-newline-escape msgstr)
(acons (interpret-escape-sequences msgid)
(interpret-escape-sequences msgstr)
(parse-tree->assoc parse-tree))))))))
(define (read-po-file port)