Make table of transliterations more maintainable.
* tools/gen-translit-def-part.lisp: New file.
* Makefile.in (SOURCE_FILES): Add it.
diff --git a/ChangeLog b/ChangeLog
index 80579ad..2489183 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,11 @@
2023-03-31 Bruno Haible <bruno@clisp.org>
+ Make table of transliterations more maintainable.
+ * tools/gen-translit-def-part.lisp: New file.
+ * Makefile.in (SOURCE_FILES): Add it.
+
+2023-03-31 Bruno Haible <bruno@clisp.org>
+
Fix table of transliterations.
* lib/translit.def: Sort according to Unicode code point.
diff --git a/Makefile.in b/Makefile.in
index 475a93b..5ddabe1 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -172,7 +172,8 @@
extras/ChangeLog extras/iconv_string.h extras/iconv_string.c \
tools/Makefile \
tools/8bit_tab_to_h.c tools/cjk_tab_to_h.c tools/cjk_variants.c \
- tools/CP50221-0208-EXT.TXT tools/CP50221-0212-EXT.TXT tools/JISX0213.TXT
+ tools/CP50221-0208-EXT.TXT tools/CP50221-0212-EXT.TXT tools/JISX0213.TXT \
+ tools/gen-translit-def-part.lisp
# List of distributed files imported from other packages.
LIBTOOL_IMPORTED_FILES = \
build-aux/ltmain.sh \
diff --git a/tools/gen-translit-def-part.lisp b/tools/gen-translit-def-part.lisp
new file mode 100644
index 0000000..5d293dd
--- /dev/null
+++ b/tools/gen-translit-def-part.lisp
@@ -0,0 +1,83 @@
+;;; Copyright (C) 2023 Free Software Foundation, Inc.
+;;; This file is part of the GNU LIBICONV Tools.
+;;;
+;;; This program is free software: you can redistribute it and/or modify
+;;; it under the terms of the GNU General Public License as published by
+;;; the Free Software Foundation; either version 3 of the License, or
+;;; (at your option) any later version.
+;;;
+;;; This program is distributed in the hope that it will be useful,
+;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+;;; GNU General Public License for more details.
+;;;
+;;; You should have received a copy of the GNU General Public License
+;;; along with this program; if not, see <https://www.gnu.org/licenses/>.
+
+;;; Transform UnicodeData.txt to a part of translit.def.
+;;; translit.def consists of lines of the form
+;;; Unicode <tab> utf-8 replacement <tab> # comment
+;;; This program produces the lines for the code points >= U+3200
+;;; and reasonable approximations for the code points < U+3200.
+
+(defun starts-with (s prefix)
+ (locally (declare (compile)))
+ (and (>= (length s) (length prefix))
+ (equal (substring s 0 (length prefix)) prefix)))
+
+(defun generate-part (infilename outfilename)
+ (locally (declare (compile)))
+ (with-open-file (in infilename)
+ (with-open-file (out outfilename :direction :output
+ :external-format CHARSET:UTF-8)
+ (loop
+ (let ((line (read-line in nil nil)))
+ (unless line (return))
+ (let* ((semicolon1 (position #\; line))
+ (semicolon2 (position #\; line :start (+ semicolon1 1)))
+ (semicolon3 (position #\; line :start (+ semicolon2 1)))
+ (semicolon4 (position #\; line :start (+ semicolon3 1)))
+ (semicolon5 (position #\; line :start (+ semicolon4 1)))
+ (semicolon6 (position #\; line :start (+ semicolon5 1)))
+ (code (let ((*read-base* 16)) (read-from-string (substring line 0 semicolon1))))
+ (name (substring line (+ semicolon1 1) semicolon2))
+ (category (substring line (+ semicolon2 1) semicolon3))
+ (combining (substring line (+ semicolon3 1) semicolon4))
+ (bidi (substring line (+ semicolon4 1) semicolon5))
+ (decomposition (substring line (+ semicolon5 1) semicolon6)))
+ (declare (ignore category combining bidi))
+ (when (or (starts-with decomposition "<circle> ")
+ (starts-with decomposition "<compat> ")
+ (starts-with decomposition "<font> ")
+ (starts-with decomposition "<narrow> ")
+ (starts-with decomposition "<small> ")
+ (starts-with decomposition "<square> ")
+ (starts-with decomposition "<wide> ")
+ (and (starts-with name "CJK COMPATIBILITY IDEOGRAPH-")
+ (> (length decomposition) 0))
+ )
+ (let ((replacement '()))
+ (let ((i (+ (or (position #\> decomposition) -2) 2)))
+ (loop
+ (let* ((space (position #\Space decomposition :start i))
+ (hexcode (substring decomposition i (or space (length decomposition)))))
+ (push
+ (int-char (let ((*read-base* 16)) (read-from-string hexcode)))
+ replacement)
+ (unless space
+ (return))
+ (setq i (+ space 1)))))
+ (setq replacement (nreverse replacement))
+ (when (starts-with decomposition "<circle> ")
+ (setq replacement (concatenate 'list '(#\() replacement '(#\)))))
+ (setq replacement (mapcan (lambda (c)
+ (cond ((eql c #\DIVISION_SLASH) (list #\/))
+ ((eql c #\SUPERSCRIPT_TWO) (list #\^ #\2))
+ ((eql c #\SUPERSCRIPT_THREE) (list #\^ #\3))
+ ((eql c #\SCRIPT_SMALL_L) (list #\l))
+ (t (list c)))) replacement))
+ (format out "~4,'0X~A~{~A~}~A# ~A~%" code #\Tab replacement #\Tab name)
+ ) )
+) ) ) ) ) )
+
+; (generate-part "/home/bruno/notes/UnicodeData-4.0.0.txt" "translit-part-4.0.0.def")