On z/OS, set a charset tag on iconv's output file.
For the concept of charset tags as external metadata on z/OS files, see
<https://lists.gnu.org/archive/html/bug-gnu-libiconv/2023-04/msg00021.html>.
* src/zos-tag.h: New file.
* src/iconv.c: Include zos-tag.h.
(convert): Add a 'tocode' parameter. On z/OS, turn off auto-conversion
and tag the output file.
(main): Update callers.
* tests/check-ebcdic: On z/OS, make all test files initially untagged.
* tests/check-tag: New file.
* tests/Makefile.in (check): Pass the host_os to check-ebcdic. Invoke
check-tag.
diff --git a/ChangeLog b/ChangeLog
index d76dff0..389f771 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,18 @@
+2023-05-12 Mike Fulton <mikefultonpersonal@gmail.com>
+
+ On z/OS, set a charset tag on iconv's output file.
+ For the concept of charset tags as external metadata on z/OS files, see
+ <https://lists.gnu.org/archive/html/bug-gnu-libiconv/2023-04/msg00021.html>.
+ * src/zos-tag.h: New file.
+ * src/iconv.c: Include zos-tag.h.
+ (convert): Add a 'tocode' parameter. On z/OS, turn off auto-conversion
+ and tag the output file.
+ (main): Update callers.
+ * tests/check-ebcdic: On z/OS, make all test files initially untagged.
+ * tests/check-tag: New file.
+ * tests/Makefile.in (check): Pass the host_os to check-ebcdic. Invoke
+ check-tag.
+
2023-04-03 Bruno Haible <bruno@clisp.org>
Fix genflags compilation error.
diff --git a/src/iconv.c b/src/iconv.c
index ec4caa1..7c75dfd 100644
--- a/src/iconv.c
+++ b/src/iconv.c
@@ -1,4 +1,4 @@
-/* Copyright (C) 2000-2022 Free Software Foundation, Inc.
+/* Copyright (C) 2000-2023 Free Software Foundation, Inc.
This file is part of the GNU LIBICONV Library.
This program is free software: you can redistribute it and/or modify
@@ -43,6 +43,10 @@
#include "uniwidth.h"
#include "uniwidth/cjk.h"
+#ifdef __MVS__
+#include "zos-tag.h"
+#endif
+
/* Ensure that iconv_no_i18n does not depend on libintl. */
#ifdef NO_I18N
#include <stdarg.h>
@@ -674,7 +678,7 @@
/* Convert the input given in infile. */
-static int convert (iconv_t cd, int infile, const char* infilename)
+static int convert (iconv_t cd, int infile, const char* infilename, _GL_UNUSED const char* tocode)
{
char inbuf[4096+4096];
size_t inbufrest = 0;
@@ -687,6 +691,11 @@
#if O_BINARY
SET_BINARY(infile);
#endif
+#ifdef __MVS__
+ /* Turn off z/OS auto-conversion. */
+ struct f_cnvrt req = {SETCVTOFF, 0, 0};
+ fcntl(infile, F_CONTROL_CVT, &req);
+#endif
line = 1; column = 0;
iconv(cd,NULL,NULL,NULL,NULL);
for (;;) {
@@ -835,6 +844,11 @@
goto done;
}
done:
+#ifdef __MVS__
+ if (!status) {
+ status = tagfile(fileno(stdout), tocode);
+ }
+#endif
if (outbuf != initial_outbuf)
free(outbuf);
return status;
@@ -1113,7 +1127,8 @@
if (i == argc)
status = convert(cd,fileno(stdin),
/* TRANSLATORS: A filename substitute denoting standard input. */
- _("(stdin)"));
+ _("(stdin)"),
+ tocode);
else {
status = 0;
for (; i < argc; i++) {
@@ -1129,7 +1144,7 @@
infilename);
status = 1;
} else {
- status |= convert(cd,fileno(infile),infilename);
+ status |= convert(cd,fileno(infile),infilename,tocode);
fclose(infile);
}
}
diff --git a/src/zos-tag.h b/src/zos-tag.h
new file mode 100644
index 0000000..e3b2bc4
--- /dev/null
+++ b/src/zos-tag.h
@@ -0,0 +1,96 @@
+/* Copyright (C) 2023 Free Software Foundation, Inc.
+ This file is part of the GNU LIBICONV Library.
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <https://www.gnu.org/licenses/>. */
+
+/* Written by Mike Fulton. */
+
+#ifndef __ZOS_TAG__
+#define __ZOS_TAG__ 1
+
+#include <_Ccsid.h>
+
+#ifdef __MVS__
+/* See: https://www.ibm.com/docs/en/zos/latest?topic=lf-toccsid-convert-codeset-name-coded-character-set-id */
+
+static void chgpfx (char* encoding, size_t enclen, size_t pfxlen, const char* normpfx, size_t normpfxlen)
+{
+ /* assertion: enclen >= pfxlen >= normpfxlen */
+ if (normpfxlen > 0) {
+ memcpy(encoding, normpfx, normpfxlen);
+ }
+ memcpy(&encoding[normpfxlen], &encoding[pfxlen], enclen-pfxlen+1);
+}
+
+static __ccsid_t map_encoding_to_ccsid (const char* encoding)
+{
+ size_t enclen = strlen(encoding);
+ char* updtenc = (char*) xmalloc(enclen+1);
+ memcpy(updtenc, encoding, enclen+1);
+
+ /*
+ * Some strings are known to gnu iconv but not z/OS __toCcsid.
+ * Examples are ISO-8859-1, ISO_8859-2, CP819 (which map to ISO8859-1, ISO8859-2, 819)
+ *
+ * The following are supported encodings and corresponding output CCSIDs
+ *
+ * CCSID Encoding
+ * 819 ISO8859-1
+ * 912 ISO8859-2
+ * 914 ISO8859-4
+ * 915 ISO8859-5
+ * 1089 ISO8859-6
+ * 813 ISO8859-7
+ * 916 ISO8859-8
+ * 920 ISO8859-9
+ * 921 ISO8859-13
+ * 923 ISO8859-15
+ */
+ #define ISO8859 "ISO8859"
+ #define ISO8859_LEN (sizeof(ISO8859)-1)
+ #define ISO8859_DASH "ISO-8859"
+ #define ISO8859_DASH_LEN (sizeof(ISO8859_DASH)-1)
+ #define ISO8859_UL "ISO_8859"
+ #define ISO8859_UL_LEN (sizeof(ISO8859_UL)-1)
+ #define CP "CP"
+ #define CP_LEN (sizeof(CP)-1)
+ #define NO_PFX ""
+ #define NO_PFX_LEN (0)
+
+ if (enclen > ISO8859_DASH_LEN && !memcmp(ISO8859_DASH, encoding, ISO8859_DASH_LEN)) {
+ chgpfx(updtenc, enclen, ISO8859_DASH_LEN, ISO8859, ISO8859_LEN);
+ } else if (enclen > ISO8859_UL_LEN && !memcmp(ISO8859_UL, encoding, ISO8859_UL_LEN)) {
+ chgpfx(updtenc, enclen, ISO8859_UL_LEN, ISO8859, ISO8859_LEN);
+ } else if (enclen > CP_LEN && !memcmp(CP, encoding, CP_LEN)) {
+ chgpfx(updtenc, enclen, CP_LEN, NO_PFX, NO_PFX_LEN);
+ }
+ return __toCcsid(updtenc);
+}
+
+static int tagfile(int filedes, const char* tocode)
+{
+ int status = 0;
+
+ __ccsid_t newccsid = map_encoding_to_ccsid(tocode);
+ if (newccsid) {
+ attrib_t attr = {0};
+ attr.att_filetagchg = 1;
+ attr.att_filetag.ft_ccsid = newccsid;
+ attr.att_filetag.ft_txtflag = 1;
+ status = __fchattr(filedes, &attr, sizeof(attr));
+ }
+ return status;
+}
+#endif
+#endif
diff --git a/tests/Makefile.in b/tests/Makefile.in
index 794a379..b94c92e 100644
--- a/tests/Makefile.in
+++ b/tests/Makefile.in
@@ -161,7 +161,9 @@
# /* substitution */
$(SHELL) $(srcdir)/check-subst
# /* EBCDIC specific functionality */
- $(SHELL) $(srcdir)/check-ebcdic
+ $(SHELL) $(srcdir)/check-ebcdic '@host_os@'
+# /* test z/OS file tagging */
+ $(SHELL) $(srcdir)/check-tag '@host_os@'
# /* shift sequence before invalid multibyte character */
./test-shiftseq
# /* conversion to wchar_t */
diff --git a/tests/check-ebcdic b/tests/check-ebcdic
index 62dfd61..1bad48d 100755
--- a/tests/check-ebcdic
+++ b/tests/check-ebcdic
@@ -1,13 +1,27 @@
#!/bin/sh
# Check of ICONV_EBCDIC_ZOS_UNIX environment variable.
set -e
+host_os="$1"
iconv=../src/iconv_no_i18n
# This test is only meaningful when the EBCDIC encodings are included.
if $iconv -l | grep IBM-1047 > /dev/null; then
- printf 'hello\n' > tmp-ok-lf
- printf 'hello\302\205' > tmp-ok-nel
+ if test "${host_os}" = 'openedition' ; then
+ # On z/OS, make all the files 'untagged'
+ rm -f tmp-ok-lf tmp-ok-nel tmp-ok-x15 tmp-ok-x25
+ touch tmp-ok-lf
+ touch tmp-ok-nel
+ touch tmp-ok-x15
+ touch tmp-ok-x25
+ chtag -r tmp-ok-lf
+ chtag -r tmp-ok-nel
+ chtag -r tmp-ok-x15
+ chtag -r tmp-ok-x25
+ fi
+
+ printf '\150\145\154\154\157\012' > tmp-ok-lf
+ printf '\150\145\154\154\157\302\205' > tmp-ok-nel
printf '\210\205\223\223\226\025' > tmp-ok-x15
printf '\210\205\223\223\226\045' > tmp-ok-x25
diff --git a/tests/check-tag b/tests/check-tag
new file mode 100644
index 0000000..426f316
--- /dev/null
+++ b/tests/check-tag
@@ -0,0 +1,44 @@
+#!/bin/sh
+# Check that files on z/OS are properly tagged with their CCSIDs
+set -e
+host_os="$1"
+iconv=../src/iconv_no_i18n
+
+# This test is only meaningful on z/OS (previously called OS/390)
+rc=0
+if test "${host_os}" = 'openedition' ; then
+
+ printf 'hello' | $iconv -f ISO8859-1 -t IBM-1047 > tmp-tag-1
+ if ls -T tmp-tag-1 | grep -v 'IBM-1047' ; then
+ echo "tmp-tag-1 is not correctly tagged as IBM-1047." >&2
+ rc=1
+ fi
+ printf 'hello' | $iconv -f ISO8859-1 -t IBM-1047 >tmp-tag-ebcdic
+
+ $iconv -f IBM-1047 -t ISO-8859-2 <tmp-tag-ebcdic > tmp-tag-2
+ if ls -T tmp-tag-2 | grep -v 'ISO8859-2' ; then
+ echo "tmp-tag-2 is not correctly tagged as ISO8859-2." >&2
+ rc=1
+ fi
+ $iconv -f IBM-1047 -t CP037 <tmp-tag-ebcdic > tmp-tag-7
+ if ls -T tmp-tag-7 | grep -v 'IBM-037' ; then
+ echo "tmp-tag-7 is not correctly tagged as IBM-037." >&2
+ rc=1
+ fi
+ $iconv -f IBM-1047 -t UTF-8 <tmp-tag-ebcdic > tmp-tag-8
+ if ls -T tmp-tag-8 | grep -v 'UTF-8' ; then
+ echo "tmp-tag-8 is not correctly tagged as UTF-8." >&2
+ rc=1
+ fi
+ $iconv -f IBM-1047 -t ISO_8859-9 <tmp-tag-ebcdic > tmp-tag-9
+ if ls -T tmp-tag-9 | grep -v 'ISO8859-9' ; then
+ echo "tmp-tag-9 is not correctly tagged as ISO8859-9." >&2
+ rc=1
+ fi
+
+ if [ $rc -eq 0 ] ; then
+ rm -f tmp-tag-*
+ fi
+fi
+
+exit $rc