Bruno Haible | 65068c4 | 2001-03-27 13:35:32 +0000 | [diff] [blame] | 1 | /* |
Bruno Haible | 19b6af5 | 2023-04-03 04:12:01 +0200 | [diff] [blame] | 2 | * Copyright (C) 1999-2008, 2011, 2016, 2018, 2020, 2022-2023 Free Software Foundation, Inc. |
Bruno Haible | 65068c4 | 2001-03-27 13:35:32 +0000 | [diff] [blame] | 3 | * This file is part of the GNU LIBICONV Library. |
| 4 | * |
| 5 | * The GNU LIBICONV Library is free software; you can redistribute it |
Bruno Haible | 3acb117 | 2020-04-04 14:58:34 +0200 | [diff] [blame] | 6 | * and/or modify it under the terms of the GNU Lesser General Public |
Bruno Haible | 91f96be | 2021-06-06 11:51:12 +0200 | [diff] [blame] | 7 | * License as published by the Free Software Foundation; either version 2.1 |
Bruno Haible | 65068c4 | 2001-03-27 13:35:32 +0000 | [diff] [blame] | 8 | * of the License, or (at your option) any later version. |
| 9 | * |
| 10 | * The GNU LIBICONV Library is distributed in the hope that it will be |
| 11 | * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
Bruno Haible | 3acb117 | 2020-04-04 14:58:34 +0200 | [diff] [blame] | 13 | * Lesser General Public License for more details. |
Bruno Haible | 65068c4 | 2001-03-27 13:35:32 +0000 | [diff] [blame] | 14 | * |
Bruno Haible | 3acb117 | 2020-04-04 14:58:34 +0200 | [diff] [blame] | 15 | * You should have received a copy of the GNU Lesser General Public |
Bruno Haible | 65068c4 | 2001-03-27 13:35:32 +0000 | [diff] [blame] | 16 | * License along with the GNU LIBICONV Library; see the file COPYING.LIB. |
Bruno Haible | e54fc9c | 2018-09-17 18:28:56 +0200 | [diff] [blame] | 17 | * If not, see <https://www.gnu.org/licenses/>. |
Bruno Haible | 65068c4 | 2001-03-27 13:35:32 +0000 | [diff] [blame] | 18 | */ |
Bruno Haible | 3af9cd3 | 1999-12-31 12:50:49 +0000 | [diff] [blame] | 19 | |
Bruno Haible | c37afd4 | 2000-01-24 13:06:57 +0000 | [diff] [blame] | 20 | #include <iconv.h> |
Bruno Haible | 3af9cd3 | 1999-12-31 12:50:49 +0000 | [diff] [blame] | 21 | |
Bruno Haible | 40924a6 | 2016-10-14 03:18:05 +0200 | [diff] [blame] | 22 | #include <limits.h> |
Bruno Haible | 3af9cd3 | 1999-12-31 12:50:49 +0000 | [diff] [blame] | 23 | #include <stdlib.h> |
| 24 | #include <string.h> |
| 25 | #include "config.h" |
Bruno Haible | 64d5a8b | 2003-03-29 16:35:11 +0000 | [diff] [blame] | 26 | #include "localcharset.h" |
Bruno Haible | 3af9cd3 | 1999-12-31 12:50:49 +0000 | [diff] [blame] | 27 | |
Bruno Haible | 3cdff14 | 2011-01-29 18:34:14 +0100 | [diff] [blame] | 28 | #ifdef __CYGWIN__ |
| 29 | #include <cygwin/version.h> |
| 30 | #endif |
| 31 | |
Bruno Haible | ca7aa55 | 2002-05-16 12:01:34 +0000 | [diff] [blame] | 32 | #if ENABLE_EXTRA |
| 33 | /* |
| 34 | * Consider all system dependent encodings, for any system, |
| 35 | * and the extra encodings. |
| 36 | */ |
| 37 | #define USE_AIX |
| 38 | #define USE_OSF1 |
| 39 | #define USE_DOS |
Bruno Haible | 68ac8a9 | 2022-01-23 23:37:30 +0100 | [diff] [blame] | 40 | #define USE_ZOS |
Bruno Haible | ca7aa55 | 2002-05-16 12:01:34 +0000 | [diff] [blame] | 41 | #define USE_EXTRA |
| 42 | #else |
Bruno Haible | 00e960f | 2000-01-04 21:56:56 +0000 | [diff] [blame] | 43 | /* |
Bruno Haible | c5e6259 | 2000-03-13 14:46:54 +0000 | [diff] [blame] | 44 | * Consider those system dependent encodings that are needed for the |
| 45 | * current system. |
| 46 | */ |
| 47 | #ifdef _AIX |
| 48 | #define USE_AIX |
| 49 | #endif |
Bruno Haible | 2929676 | 2003-04-25 10:18:03 +0000 | [diff] [blame] | 50 | #if defined(__osf__) || defined(VMS) |
Bruno Haible | 8c5fb20 | 2001-03-06 13:43:56 +0000 | [diff] [blame] | 51 | #define USE_OSF1 |
| 52 | #endif |
Bruno Haible | 5ab9f90 | 2002-05-08 19:37:48 +0000 | [diff] [blame] | 53 | #if defined(__DJGPP__) || (defined(_WIN32) && (defined(_MSC_VER) || defined(__MINGW32__))) |
Bruno Haible | 3d31606 | 2001-02-26 13:36:17 +0000 | [diff] [blame] | 54 | #define USE_DOS |
| 55 | #endif |
Bruno Haible | 68ac8a9 | 2022-01-23 23:37:30 +0100 | [diff] [blame] | 56 | /* Enable the EBCDIC encodings not only on z/OS but also on Linux/s390, for |
| 57 | easier interoperability between z/OS and Linux/s390. */ |
| 58 | #if defined(__MVS__) || (defined(__linux__) && (defined(__s390__) || defined(__s390x__))) |
| 59 | #define USE_ZOS |
| 60 | #endif |
Bruno Haible | ca7aa55 | 2002-05-16 12:01:34 +0000 | [diff] [blame] | 61 | #endif |
Bruno Haible | c5e6259 | 2000-03-13 14:46:54 +0000 | [diff] [blame] | 62 | |
| 63 | /* |
Bruno Haible | d51d26b | 2000-11-20 18:33:47 +0000 | [diff] [blame] | 64 | * Data type for general conversion loop. |
| 65 | */ |
| 66 | struct loop_funcs { |
| 67 | size_t (*loop_convert) (iconv_t icd, |
| 68 | const char* * inbuf, size_t *inbytesleft, |
| 69 | char* * outbuf, size_t *outbytesleft); |
| 70 | size_t (*loop_reset) (iconv_t icd, |
| 71 | char* * outbuf, size_t *outbytesleft); |
| 72 | }; |
| 73 | |
| 74 | /* |
Bruno Haible | 00e960f | 2000-01-04 21:56:56 +0000 | [diff] [blame] | 75 | * Converters. |
| 76 | */ |
| 77 | #include "converters.h" |
Bruno Haible | 3af9cd3 | 1999-12-31 12:50:49 +0000 | [diff] [blame] | 78 | |
| 79 | /* |
Bruno Haible | 00e960f | 2000-01-04 21:56:56 +0000 | [diff] [blame] | 80 | * Transliteration tables. |
Bruno Haible | 3af9cd3 | 1999-12-31 12:50:49 +0000 | [diff] [blame] | 81 | */ |
Bruno Haible | 00e960f | 2000-01-04 21:56:56 +0000 | [diff] [blame] | 82 | #include "cjk_variants.h" |
| 83 | #include "translit.h" |
Bruno Haible | 3af9cd3 | 1999-12-31 12:50:49 +0000 | [diff] [blame] | 84 | |
| 85 | /* |
| 86 | * Table of all supported encodings. |
| 87 | */ |
| 88 | struct encoding { |
| 89 | struct mbtowc_funcs ifuncs; /* conversion multibyte -> unicode */ |
| 90 | struct wctomb_funcs ofuncs; /* conversion unicode -> multibyte */ |
Bruno Haible | 00e960f | 2000-01-04 21:56:56 +0000 | [diff] [blame] | 91 | int oflags; /* flags for unicode -> multibyte conversion */ |
Bruno Haible | 3af9cd3 | 1999-12-31 12:50:49 +0000 | [diff] [blame] | 92 | }; |
Bruno Haible | bcb1081 | 2008-04-06 07:38:02 +0000 | [diff] [blame] | 93 | #define DEFALIAS(xxx_alias,xxx) /* nothing */ |
Bruno Haible | 3af9cd3 | 1999-12-31 12:50:49 +0000 | [diff] [blame] | 94 | enum { |
Bruno Haible | 19bb4fe | 2001-05-26 00:31:46 +0000 | [diff] [blame] | 95 | #define DEFENCODING(xxx_names,xxx,xxx_ifuncs1,xxx_ifuncs2,xxx_ofuncs1,xxx_ofuncs2) \ |
Bruno Haible | 3af9cd3 | 1999-12-31 12:50:49 +0000 | [diff] [blame] | 96 | ei_##xxx , |
| 97 | #include "encodings.def" |
Bruno Haible | c5e6259 | 2000-03-13 14:46:54 +0000 | [diff] [blame] | 98 | #ifdef USE_AIX |
Bruno Haible | bcb1081 | 2008-04-06 07:38:02 +0000 | [diff] [blame] | 99 | # include "encodings_aix.def" |
Bruno Haible | c5e6259 | 2000-03-13 14:46:54 +0000 | [diff] [blame] | 100 | #endif |
Bruno Haible | 8c5fb20 | 2001-03-06 13:43:56 +0000 | [diff] [blame] | 101 | #ifdef USE_OSF1 |
Bruno Haible | bcb1081 | 2008-04-06 07:38:02 +0000 | [diff] [blame] | 102 | # include "encodings_osf1.def" |
Bruno Haible | 8c5fb20 | 2001-03-06 13:43:56 +0000 | [diff] [blame] | 103 | #endif |
Bruno Haible | 3d31606 | 2001-02-26 13:36:17 +0000 | [diff] [blame] | 104 | #ifdef USE_DOS |
Bruno Haible | bcb1081 | 2008-04-06 07:38:02 +0000 | [diff] [blame] | 105 | # include "encodings_dos.def" |
Bruno Haible | 3d31606 | 2001-02-26 13:36:17 +0000 | [diff] [blame] | 106 | #endif |
Bruno Haible | 68ac8a9 | 2022-01-23 23:37:30 +0100 | [diff] [blame] | 107 | #ifdef USE_ZOS |
| 108 | # include "encodings_zos.def" |
| 109 | #endif |
Bruno Haible | ca7aa55 | 2002-05-16 12:01:34 +0000 | [diff] [blame] | 110 | #ifdef USE_EXTRA |
Bruno Haible | bcb1081 | 2008-04-06 07:38:02 +0000 | [diff] [blame] | 111 | # include "encodings_extra.def" |
Bruno Haible | ca7aa55 | 2002-05-16 12:01:34 +0000 | [diff] [blame] | 112 | #endif |
Bruno Haible | d51d26b | 2000-11-20 18:33:47 +0000 | [diff] [blame] | 113 | #include "encodings_local.def" |
Bruno Haible | 3af9cd3 | 1999-12-31 12:50:49 +0000 | [diff] [blame] | 114 | #undef DEFENCODING |
| 115 | ei_for_broken_compilers_that_dont_like_trailing_commas |
| 116 | }; |
Bruno Haible | 00e960f | 2000-01-04 21:56:56 +0000 | [diff] [blame] | 117 | #include "flags.h" |
Bruno Haible | 3af9cd3 | 1999-12-31 12:50:49 +0000 | [diff] [blame] | 118 | static struct encoding const all_encodings[] = { |
Bruno Haible | 19bb4fe | 2001-05-26 00:31:46 +0000 | [diff] [blame] | 119 | #define DEFENCODING(xxx_names,xxx,xxx_ifuncs1,xxx_ifuncs2,xxx_ofuncs1,xxx_ofuncs2) \ |
| 120 | { xxx_ifuncs1,xxx_ifuncs2, xxx_ofuncs1,xxx_ofuncs2, ei_##xxx##_oflags }, |
Bruno Haible | 3af9cd3 | 1999-12-31 12:50:49 +0000 | [diff] [blame] | 121 | #include "encodings.def" |
Bruno Haible | c5e6259 | 2000-03-13 14:46:54 +0000 | [diff] [blame] | 122 | #ifdef USE_AIX |
Bruno Haible | bcb1081 | 2008-04-06 07:38:02 +0000 | [diff] [blame] | 123 | # include "encodings_aix.def" |
Bruno Haible | c5e6259 | 2000-03-13 14:46:54 +0000 | [diff] [blame] | 124 | #endif |
Bruno Haible | 8c5fb20 | 2001-03-06 13:43:56 +0000 | [diff] [blame] | 125 | #ifdef USE_OSF1 |
Bruno Haible | bcb1081 | 2008-04-06 07:38:02 +0000 | [diff] [blame] | 126 | # include "encodings_osf1.def" |
Bruno Haible | 8c5fb20 | 2001-03-06 13:43:56 +0000 | [diff] [blame] | 127 | #endif |
Bruno Haible | 3d31606 | 2001-02-26 13:36:17 +0000 | [diff] [blame] | 128 | #ifdef USE_DOS |
Bruno Haible | bcb1081 | 2008-04-06 07:38:02 +0000 | [diff] [blame] | 129 | # include "encodings_dos.def" |
Bruno Haible | 3d31606 | 2001-02-26 13:36:17 +0000 | [diff] [blame] | 130 | #endif |
Bruno Haible | 68ac8a9 | 2022-01-23 23:37:30 +0100 | [diff] [blame] | 131 | #ifdef USE_ZOS |
| 132 | # include "encodings_zos.def" |
| 133 | #endif |
Bruno Haible | ca7aa55 | 2002-05-16 12:01:34 +0000 | [diff] [blame] | 134 | #ifdef USE_EXTRA |
Bruno Haible | bcb1081 | 2008-04-06 07:38:02 +0000 | [diff] [blame] | 135 | # include "encodings_extra.def" |
Bruno Haible | ca7aa55 | 2002-05-16 12:01:34 +0000 | [diff] [blame] | 136 | #endif |
Bruno Haible | 3af9cd3 | 1999-12-31 12:50:49 +0000 | [diff] [blame] | 137 | #undef DEFENCODING |
Bruno Haible | 19bb4fe | 2001-05-26 00:31:46 +0000 | [diff] [blame] | 138 | #define DEFENCODING(xxx_names,xxx,xxx_ifuncs1,xxx_ifuncs2,xxx_ofuncs1,xxx_ofuncs2) \ |
| 139 | { xxx_ifuncs1,xxx_ifuncs2, xxx_ofuncs1,xxx_ofuncs2, 0 }, |
Bruno Haible | d51d26b | 2000-11-20 18:33:47 +0000 | [diff] [blame] | 140 | #include "encodings_local.def" |
| 141 | #undef DEFENCODING |
Bruno Haible | 3af9cd3 | 1999-12-31 12:50:49 +0000 | [diff] [blame] | 142 | }; |
Bruno Haible | bcb1081 | 2008-04-06 07:38:02 +0000 | [diff] [blame] | 143 | #undef DEFALIAS |
Bruno Haible | 3af9cd3 | 1999-12-31 12:50:49 +0000 | [diff] [blame] | 144 | |
| 145 | /* |
Bruno Haible | d51d26b | 2000-11-20 18:33:47 +0000 | [diff] [blame] | 146 | * Conversion loops. |
| 147 | */ |
| 148 | #include "loops.h" |
| 149 | |
| 150 | /* |
Bruno Haible | 3af9cd3 | 1999-12-31 12:50:49 +0000 | [diff] [blame] | 151 | * Alias lookup function. |
| 152 | * Defines |
Bruno Haible | 294602d | 2003-05-22 19:17:48 +0000 | [diff] [blame] | 153 | * struct alias { int name; unsigned int encoding_index; }; |
Bruno Haible | 3af9cd3 | 1999-12-31 12:50:49 +0000 | [diff] [blame] | 154 | * const struct alias * aliases_lookup (const char *str, unsigned int len); |
| 155 | * #define MAX_WORD_LENGTH ... |
| 156 | */ |
Bruno Haible | bcb1081 | 2008-04-06 07:38:02 +0000 | [diff] [blame] | 157 | #if defined _AIX |
| 158 | # include "aliases_sysaix.h" |
| 159 | #elif defined hpux || defined __hpux |
| 160 | # include "aliases_syshpux.h" |
| 161 | #elif defined __osf__ |
| 162 | # include "aliases_sysosf1.h" |
| 163 | #elif defined __sun |
| 164 | # include "aliases_syssolaris.h" |
| 165 | #else |
| 166 | # include "aliases.h" |
| 167 | #endif |
Bruno Haible | 3af9cd3 | 1999-12-31 12:50:49 +0000 | [diff] [blame] | 168 | |
Bruno Haible | aaebc78 | 2000-03-07 14:16:40 +0000 | [diff] [blame] | 169 | /* |
| 170 | * System dependent alias lookup function. |
| 171 | * Defines |
| 172 | * const struct alias * aliases2_lookup (const char *str); |
| 173 | */ |
Bruno Haible | 68ac8a9 | 2022-01-23 23:37:30 +0100 | [diff] [blame] | 174 | #if defined(USE_AIX) || defined(USE_OSF1) || defined(USE_DOS) || defined(USE_ZOS) || defined(USE_EXTRA) /* || ... */ |
Bruno Haible | 294602d | 2003-05-22 19:17:48 +0000 | [diff] [blame] | 175 | struct stringpool2_t { |
| 176 | #define S(tag,name,encoding_index) char stringpool_##tag[sizeof(name)]; |
| 177 | #include "aliases2.h" |
| 178 | #undef S |
| 179 | }; |
| 180 | static const struct stringpool2_t stringpool2_contents = { |
| 181 | #define S(tag,name,encoding_index) name, |
| 182 | #include "aliases2.h" |
| 183 | #undef S |
| 184 | }; |
| 185 | #define stringpool2 ((const char *) &stringpool2_contents) |
| 186 | static const struct alias sysdep_aliases[] = { |
| 187 | #define S(tag,name,encoding_index) { (int)(long)&((struct stringpool2_t *)0)->stringpool_##tag, encoding_index }, |
| 188 | #include "aliases2.h" |
| 189 | #undef S |
Bruno Haible | c5e6259 | 2000-03-13 14:46:54 +0000 | [diff] [blame] | 190 | }; |
| 191 | #ifdef __GNUC__ |
| 192 | __inline |
Bruno Haible | b29089d | 2016-11-19 17:13:56 +0100 | [diff] [blame] | 193 | #else |
| 194 | #ifdef __cplusplus |
| 195 | inline |
Bruno Haible | c5e6259 | 2000-03-13 14:46:54 +0000 | [diff] [blame] | 196 | #endif |
Bruno Haible | b29089d | 2016-11-19 17:13:56 +0100 | [diff] [blame] | 197 | #endif |
| 198 | static const struct alias * |
Bruno Haible | c5e6259 | 2000-03-13 14:46:54 +0000 | [diff] [blame] | 199 | aliases2_lookup (register const char *str) |
| 200 | { |
Bruno Haible | 294602d | 2003-05-22 19:17:48 +0000 | [diff] [blame] | 201 | const struct alias * ptr; |
Bruno Haible | c5e6259 | 2000-03-13 14:46:54 +0000 | [diff] [blame] | 202 | unsigned int count; |
| 203 | for (ptr = sysdep_aliases, count = sizeof(sysdep_aliases)/sizeof(sysdep_aliases[0]); count > 0; ptr++, count--) |
Bruno Haible | 294602d | 2003-05-22 19:17:48 +0000 | [diff] [blame] | 204 | if (!strcmp(str, stringpool2 + ptr->name)) |
Bruno Haible | c5e6259 | 2000-03-13 14:46:54 +0000 | [diff] [blame] | 205 | return ptr; |
| 206 | return NULL; |
| 207 | } |
| 208 | #else |
| 209 | #define aliases2_lookup(str) NULL |
Bruno Haible | 8489f6d | 2005-03-14 11:27:00 +0000 | [diff] [blame] | 210 | #define stringpool2 NULL |
Bruno Haible | c5e6259 | 2000-03-13 14:46:54 +0000 | [diff] [blame] | 211 | #endif |
Bruno Haible | aaebc78 | 2000-03-07 14:16:40 +0000 | [diff] [blame] | 212 | |
Bruno Haible | 3af9cd3 | 1999-12-31 12:50:49 +0000 | [diff] [blame] | 213 | #if 0 |
| 214 | /* Like !strcasecmp, except that the both strings can be assumed to be ASCII |
| 215 | and the first string can be assumed to be in uppercase. */ |
| 216 | static int strequal (const char* str1, const char* str2) |
| 217 | { |
| 218 | unsigned char c1; |
| 219 | unsigned char c2; |
| 220 | for (;;) { |
| 221 | c1 = * (unsigned char *) str1++; |
| 222 | c2 = * (unsigned char *) str2++; |
| 223 | if (c1 == 0) |
| 224 | break; |
| 225 | if (c2 >= 'a' && c2 <= 'z') |
| 226 | c2 -= 'a'-'A'; |
| 227 | if (c1 != c2) |
| 228 | break; |
| 229 | } |
| 230 | return (c1 == c2); |
| 231 | } |
| 232 | #endif |
| 233 | |
| 234 | iconv_t iconv_open (const char* tocode, const char* fromcode) |
| 235 | { |
Bruno Haible | d51d26b | 2000-11-20 18:33:47 +0000 | [diff] [blame] | 236 | struct conv_struct * cd; |
Bruno Haible | d51d26b | 2000-11-20 18:33:47 +0000 | [diff] [blame] | 237 | unsigned int from_index; |
| 238 | int from_wchar; |
Bruno Haible | 19b6af5 | 2023-04-03 04:12:01 +0200 | [diff] [blame] | 239 | unsigned int from_surface; |
Bruno Haible | d51d26b | 2000-11-20 18:33:47 +0000 | [diff] [blame] | 240 | unsigned int to_index; |
| 241 | int to_wchar; |
Bruno Haible | 19b6af5 | 2023-04-03 04:12:01 +0200 | [diff] [blame] | 242 | unsigned int to_surface; |
Bruno Haible | 607294d | 2008-09-17 01:08:35 +0000 | [diff] [blame] | 243 | int transliterate; |
| 244 | int discard_ilseq; |
Bruno Haible | 3af9cd3 | 1999-12-31 12:50:49 +0000 | [diff] [blame] | 245 | |
Bruno Haible | 607294d | 2008-09-17 01:08:35 +0000 | [diff] [blame] | 246 | #include "iconv_open1.h" |
| 247 | |
Bruno Haible | d51d26b | 2000-11-20 18:33:47 +0000 | [diff] [blame] | 248 | cd = (struct conv_struct *) malloc(from_wchar != to_wchar |
| 249 | ? sizeof(struct wchar_conv_struct) |
| 250 | : sizeof(struct conv_struct)); |
| 251 | if (cd == NULL) { |
| 252 | errno = ENOMEM; |
| 253 | return (iconv_t)(-1); |
Bruno Haible | fdd8e49 | 2000-11-20 18:26:50 +0000 | [diff] [blame] | 254 | } |
Bruno Haible | 607294d | 2008-09-17 01:08:35 +0000 | [diff] [blame] | 255 | |
| 256 | #include "iconv_open2.h" |
| 257 | |
Bruno Haible | 3af9cd3 | 1999-12-31 12:50:49 +0000 | [diff] [blame] | 258 | return (iconv_t)cd; |
| 259 | invalid: |
| 260 | errno = EINVAL; |
| 261 | return (iconv_t)(-1); |
| 262 | } |
| 263 | |
| 264 | size_t iconv (iconv_t icd, |
| 265 | ICONV_CONST char* * inbuf, size_t *inbytesleft, |
| 266 | char* * outbuf, size_t *outbytesleft) |
| 267 | { |
| 268 | conv_t cd = (conv_t) icd; |
Bruno Haible | d51d26b | 2000-11-20 18:33:47 +0000 | [diff] [blame] | 269 | if (inbuf == NULL || *inbuf == NULL) |
| 270 | return cd->lfuncs.loop_reset(icd,outbuf,outbytesleft); |
| 271 | else |
| 272 | return cd->lfuncs.loop_convert(icd, |
| 273 | (const char* *)inbuf,inbytesleft, |
| 274 | outbuf,outbytesleft); |
Bruno Haible | 3af9cd3 | 1999-12-31 12:50:49 +0000 | [diff] [blame] | 275 | } |
| 276 | |
| 277 | int iconv_close (iconv_t icd) |
| 278 | { |
| 279 | conv_t cd = (conv_t) icd; |
| 280 | free(cd); |
| 281 | return 0; |
| 282 | } |
Bruno Haible | c37afd4 | 2000-01-24 13:06:57 +0000 | [diff] [blame] | 283 | |
| 284 | #ifndef LIBICONV_PLUG |
| 285 | |
Bruno Haible | 607294d | 2008-09-17 01:08:35 +0000 | [diff] [blame] | 286 | /* |
| 287 | * Verify that a 'struct conv_struct' and a 'struct wchar_conv_struct' each |
| 288 | * fit in an iconv_allocation_t. |
| 289 | * If this verification fails, iconv_allocation_t must be made larger and |
| 290 | * the major version in LIBICONV_VERSION_INFO must be bumped. |
Bruno Haible | 19b6af5 | 2023-04-03 04:12:01 +0200 | [diff] [blame] | 291 | * Currently 'struct conv_struct' has 23 integer/pointer fields, and |
Bruno Haible | 607294d | 2008-09-17 01:08:35 +0000 | [diff] [blame] | 292 | * 'struct wchar_conv_struct' additionally has an 'mbstate_t' field. |
| 293 | */ |
| 294 | typedef int verify_size_1[2 * (sizeof (struct conv_struct) <= sizeof (iconv_allocation_t)) - 1]; |
| 295 | typedef int verify_size_2[2 * (sizeof (struct wchar_conv_struct) <= sizeof (iconv_allocation_t)) - 1]; |
| 296 | |
| 297 | int iconv_open_into (const char* tocode, const char* fromcode, |
| 298 | iconv_allocation_t* resultp) |
| 299 | { |
| 300 | struct conv_struct * cd; |
| 301 | unsigned int from_index; |
| 302 | int from_wchar; |
Bruno Haible | 19b6af5 | 2023-04-03 04:12:01 +0200 | [diff] [blame] | 303 | unsigned int from_surface; |
Bruno Haible | 607294d | 2008-09-17 01:08:35 +0000 | [diff] [blame] | 304 | unsigned int to_index; |
| 305 | int to_wchar; |
Bruno Haible | 19b6af5 | 2023-04-03 04:12:01 +0200 | [diff] [blame] | 306 | unsigned int to_surface; |
Bruno Haible | 607294d | 2008-09-17 01:08:35 +0000 | [diff] [blame] | 307 | int transliterate; |
| 308 | int discard_ilseq; |
| 309 | |
| 310 | #include "iconv_open1.h" |
| 311 | |
| 312 | cd = (struct conv_struct *) resultp; |
| 313 | |
| 314 | #include "iconv_open2.h" |
| 315 | |
| 316 | return 0; |
| 317 | invalid: |
| 318 | errno = EINVAL; |
| 319 | return -1; |
| 320 | } |
| 321 | |
Bruno Haible | 19b6af5 | 2023-04-03 04:12:01 +0200 | [diff] [blame] | 322 | /* Bit mask of all valid surfaces. */ |
| 323 | #define ALL_SURFACES (ICONV_SURFACE_EBCDIC_ZOS_UNIX) |
| 324 | |
Bruno Haible | c37afd4 | 2000-01-24 13:06:57 +0000 | [diff] [blame] | 325 | int iconvctl (iconv_t icd, int request, void* argument) |
| 326 | { |
| 327 | conv_t cd = (conv_t) icd; |
| 328 | switch (request) { |
| 329 | case ICONV_TRIVIALP: |
Bruno Haible | d51d26b | 2000-11-20 18:33:47 +0000 | [diff] [blame] | 330 | *(int *)argument = |
| 331 | ((cd->lfuncs.loop_convert == unicode_loop_convert |
Bruno Haible | 19b6af5 | 2023-04-03 04:12:01 +0200 | [diff] [blame] | 332 | && cd->iindex == cd->oindex |
| 333 | && cd->isurface == cd->osurface) |
Bruno Haible | d51d26b | 2000-11-20 18:33:47 +0000 | [diff] [blame] | 334 | || cd->lfuncs.loop_convert == wchar_id_loop_convert |
| 335 | ? 1 : 0); |
Bruno Haible | c37afd4 | 2000-01-24 13:06:57 +0000 | [diff] [blame] | 336 | return 0; |
| 337 | case ICONV_GET_TRANSLITERATE: |
| 338 | *(int *)argument = cd->transliterate; |
| 339 | return 0; |
| 340 | case ICONV_SET_TRANSLITERATE: |
| 341 | cd->transliterate = (*(const int *)argument ? 1 : 0); |
| 342 | return 0; |
Bruno Haible | 4f99b68 | 2002-01-15 12:47:34 +0000 | [diff] [blame] | 343 | case ICONV_GET_DISCARD_ILSEQ: |
| 344 | *(int *)argument = cd->discard_ilseq; |
| 345 | return 0; |
| 346 | case ICONV_SET_DISCARD_ILSEQ: |
| 347 | cd->discard_ilseq = (*(const int *)argument ? 1 : 0); |
| 348 | return 0; |
Bruno Haible | 719d85b | 2005-03-14 11:24:40 +0000 | [diff] [blame] | 349 | case ICONV_SET_HOOKS: |
| 350 | if (argument != NULL) { |
| 351 | cd->hooks = *(const struct iconv_hooks *)argument; |
| 352 | } else { |
| 353 | cd->hooks.uc_hook = NULL; |
| 354 | cd->hooks.wc_hook = NULL; |
| 355 | cd->hooks.data = NULL; |
| 356 | } |
| 357 | return 0; |
Bruno Haible | 422b3b1 | 2006-01-23 13:25:49 +0000 | [diff] [blame] | 358 | case ICONV_SET_FALLBACKS: |
| 359 | if (argument != NULL) { |
| 360 | cd->fallbacks = *(const struct iconv_fallbacks *)argument; |
| 361 | } else { |
| 362 | cd->fallbacks.mb_to_uc_fallback = NULL; |
| 363 | cd->fallbacks.uc_to_mb_fallback = NULL; |
| 364 | cd->fallbacks.mb_to_wc_fallback = NULL; |
| 365 | cd->fallbacks.wc_to_mb_fallback = NULL; |
| 366 | cd->fallbacks.data = NULL; |
| 367 | } |
| 368 | return 0; |
Bruno Haible | 19b6af5 | 2023-04-03 04:12:01 +0200 | [diff] [blame] | 369 | case ICONV_GET_FROM_SURFACE: |
| 370 | *(unsigned int *)argument = cd->isurface; |
| 371 | return 0; |
| 372 | case ICONV_SET_FROM_SURFACE: |
| 373 | if ((*(const unsigned int *)argument & ~ALL_SURFACES) == 0) { |
| 374 | cd->isurface = *(const unsigned int *)argument; |
| 375 | return 0; |
| 376 | } else { |
| 377 | errno = EINVAL; |
| 378 | return -1; |
| 379 | } |
| 380 | case ICONV_GET_TO_SURFACE: |
| 381 | *(unsigned int *)argument = cd->osurface; |
| 382 | return 0; |
| 383 | case ICONV_SET_TO_SURFACE: |
| 384 | if ((*(const unsigned int *)argument & ~ALL_SURFACES) == 0) { |
| 385 | cd->osurface = *(const unsigned int *)argument; |
| 386 | return 0; |
| 387 | } else { |
| 388 | errno = EINVAL; |
| 389 | return -1; |
| 390 | } |
Bruno Haible | c37afd4 | 2000-01-24 13:06:57 +0000 | [diff] [blame] | 391 | default: |
| 392 | errno = EINVAL; |
| 393 | return -1; |
| 394 | } |
| 395 | } |
| 396 | |
Bruno Haible | 294602d | 2003-05-22 19:17:48 +0000 | [diff] [blame] | 397 | /* An alias after its name has been converted from 'int' to 'const char*'. */ |
| 398 | struct nalias { const char* name; unsigned int encoding_index; }; |
| 399 | |
Bruno Haible | d82b82d | 2002-01-14 12:05:04 +0000 | [diff] [blame] | 400 | static int compare_by_index (const void * arg1, const void * arg2) |
| 401 | { |
Bruno Haible | 294602d | 2003-05-22 19:17:48 +0000 | [diff] [blame] | 402 | const struct nalias * alias1 = (const struct nalias *) arg1; |
| 403 | const struct nalias * alias2 = (const struct nalias *) arg2; |
Bruno Haible | d82b82d | 2002-01-14 12:05:04 +0000 | [diff] [blame] | 404 | return (int)alias1->encoding_index - (int)alias2->encoding_index; |
| 405 | } |
| 406 | |
| 407 | static int compare_by_name (const void * arg1, const void * arg2) |
| 408 | { |
Bruno Haible | 0eb1068 | 2020-08-31 23:43:18 +0200 | [diff] [blame] | 409 | const char * name1 = *(const char * const *)arg1; |
| 410 | const char * name2 = *(const char * const *)arg2; |
Bruno Haible | d82b82d | 2002-01-14 12:05:04 +0000 | [diff] [blame] | 411 | /* Compare alphabetically, but put "CS" names at the end. */ |
| 412 | int sign = strcmp(name1,name2); |
| 413 | if (sign != 0) { |
| 414 | sign = ((name1[0]=='C' && name1[1]=='S') - (name2[0]=='C' && name2[1]=='S')) |
| 415 | * 4 + (sign >= 0 ? 1 : -1); |
| 416 | } |
| 417 | return sign; |
| 418 | } |
| 419 | |
| 420 | void iconvlist (int (*do_one) (unsigned int namescount, |
| 421 | const char * const * names, |
| 422 | void* data), |
| 423 | void* data) |
| 424 | { |
| 425 | #define aliascount1 sizeof(aliases)/sizeof(aliases[0]) |
| 426 | #ifndef aliases2_lookup |
| 427 | #define aliascount2 sizeof(sysdep_aliases)/sizeof(sysdep_aliases[0]) |
| 428 | #else |
| 429 | #define aliascount2 0 |
| 430 | #endif |
| 431 | #define aliascount (aliascount1+aliascount2) |
Bruno Haible | 294602d | 2003-05-22 19:17:48 +0000 | [diff] [blame] | 432 | struct nalias aliasbuf[aliascount]; |
Bruno Haible | d82b82d | 2002-01-14 12:05:04 +0000 | [diff] [blame] | 433 | const char * namesbuf[aliascount]; |
| 434 | size_t num_aliases; |
| 435 | { |
| 436 | /* Put all existing aliases into a buffer. */ |
| 437 | size_t i; |
| 438 | size_t j; |
| 439 | j = 0; |
| 440 | for (i = 0; i < aliascount1; i++) { |
| 441 | const struct alias * p = &aliases[i]; |
Bruno Haible | 294602d | 2003-05-22 19:17:48 +0000 | [diff] [blame] | 442 | if (p->name >= 0 |
Bruno Haible | d82b82d | 2002-01-14 12:05:04 +0000 | [diff] [blame] | 443 | && p->encoding_index != ei_local_char |
Bruno Haible | 294602d | 2003-05-22 19:17:48 +0000 | [diff] [blame] | 444 | && p->encoding_index != ei_local_wchar_t) { |
| 445 | aliasbuf[j].name = stringpool + p->name; |
| 446 | aliasbuf[j].encoding_index = p->encoding_index; |
| 447 | j++; |
| 448 | } |
Bruno Haible | d82b82d | 2002-01-14 12:05:04 +0000 | [diff] [blame] | 449 | } |
| 450 | #ifndef aliases2_lookup |
Bruno Haible | 294602d | 2003-05-22 19:17:48 +0000 | [diff] [blame] | 451 | for (i = 0; i < aliascount2; i++) { |
| 452 | aliasbuf[j].name = stringpool2 + sysdep_aliases[i].name; |
| 453 | aliasbuf[j].encoding_index = sysdep_aliases[i].encoding_index; |
| 454 | j++; |
| 455 | } |
Bruno Haible | d82b82d | 2002-01-14 12:05:04 +0000 | [diff] [blame] | 456 | #endif |
| 457 | num_aliases = j; |
| 458 | } |
| 459 | /* Sort by encoding_index. */ |
| 460 | if (num_aliases > 1) |
Bruno Haible | 294602d | 2003-05-22 19:17:48 +0000 | [diff] [blame] | 461 | qsort(aliasbuf, num_aliases, sizeof(struct nalias), compare_by_index); |
Bruno Haible | d82b82d | 2002-01-14 12:05:04 +0000 | [diff] [blame] | 462 | { |
| 463 | /* Process all aliases with the same encoding_index together. */ |
| 464 | size_t j; |
| 465 | j = 0; |
| 466 | while (j < num_aliases) { |
| 467 | unsigned int ei = aliasbuf[j].encoding_index; |
| 468 | size_t i = 0; |
| 469 | do |
| 470 | namesbuf[i++] = aliasbuf[j++].name; |
| 471 | while (j < num_aliases && aliasbuf[j].encoding_index == ei); |
| 472 | if (i > 1) |
| 473 | qsort(namesbuf, i, sizeof(const char *), compare_by_name); |
| 474 | /* Call the callback. */ |
| 475 | if (do_one(i,namesbuf,data)) |
| 476 | break; |
| 477 | } |
| 478 | } |
| 479 | #undef aliascount |
| 480 | #undef aliascount2 |
| 481 | #undef aliascount1 |
| 482 | } |
| 483 | |
Bruno Haible | 8489f6d | 2005-03-14 11:27:00 +0000 | [diff] [blame] | 484 | /* |
| 485 | * Table of canonical names of encodings. |
| 486 | * Instead of strings, it contains offsets into stringpool and stringpool2. |
| 487 | */ |
| 488 | static const unsigned short all_canonical[] = { |
Bruno Haible | bcb1081 | 2008-04-06 07:38:02 +0000 | [diff] [blame] | 489 | #if defined _AIX |
| 490 | # include "canonical_sysaix.h" |
| 491 | #elif defined hpux || defined __hpux |
| 492 | # include "canonical_syshpux.h" |
| 493 | #elif defined __osf__ |
| 494 | # include "canonical_sysosf1.h" |
| 495 | #elif defined __sun |
| 496 | # include "canonical_syssolaris.h" |
| 497 | #else |
| 498 | # include "canonical.h" |
| 499 | #endif |
Bruno Haible | 8489f6d | 2005-03-14 11:27:00 +0000 | [diff] [blame] | 500 | #ifdef USE_AIX |
Bruno Haible | bcb1081 | 2008-04-06 07:38:02 +0000 | [diff] [blame] | 501 | # if defined _AIX |
| 502 | # include "canonical_aix_sysaix.h" |
| 503 | # else |
| 504 | # include "canonical_aix.h" |
| 505 | # endif |
Bruno Haible | 8489f6d | 2005-03-14 11:27:00 +0000 | [diff] [blame] | 506 | #endif |
| 507 | #ifdef USE_OSF1 |
Bruno Haible | bcb1081 | 2008-04-06 07:38:02 +0000 | [diff] [blame] | 508 | # if defined __osf__ |
| 509 | # include "canonical_osf1_sysosf1.h" |
| 510 | # else |
| 511 | # include "canonical_osf1.h" |
| 512 | # endif |
Bruno Haible | 8489f6d | 2005-03-14 11:27:00 +0000 | [diff] [blame] | 513 | #endif |
| 514 | #ifdef USE_DOS |
Bruno Haible | bcb1081 | 2008-04-06 07:38:02 +0000 | [diff] [blame] | 515 | # include "canonical_dos.h" |
Bruno Haible | 8489f6d | 2005-03-14 11:27:00 +0000 | [diff] [blame] | 516 | #endif |
Bruno Haible | 68ac8a9 | 2022-01-23 23:37:30 +0100 | [diff] [blame] | 517 | #ifdef USE_ZOS |
| 518 | # include "canonical_zos.h" |
| 519 | #endif |
Bruno Haible | 8489f6d | 2005-03-14 11:27:00 +0000 | [diff] [blame] | 520 | #ifdef USE_EXTRA |
Bruno Haible | bcb1081 | 2008-04-06 07:38:02 +0000 | [diff] [blame] | 521 | # include "canonical_extra.h" |
Bruno Haible | 8489f6d | 2005-03-14 11:27:00 +0000 | [diff] [blame] | 522 | #endif |
Bruno Haible | bcb1081 | 2008-04-06 07:38:02 +0000 | [diff] [blame] | 523 | #if defined _AIX |
| 524 | # include "canonical_local_sysaix.h" |
| 525 | #elif defined hpux || defined __hpux |
| 526 | # include "canonical_local_syshpux.h" |
| 527 | #elif defined __osf__ |
| 528 | # include "canonical_local_sysosf1.h" |
| 529 | #elif defined __sun |
| 530 | # include "canonical_local_syssolaris.h" |
| 531 | #else |
| 532 | # include "canonical_local.h" |
| 533 | #endif |
Bruno Haible | 8489f6d | 2005-03-14 11:27:00 +0000 | [diff] [blame] | 534 | }; |
| 535 | |
| 536 | const char * iconv_canonicalize (const char * name) |
| 537 | { |
| 538 | const char* code; |
| 539 | char buf[MAX_WORD_LENGTH+10+1]; |
| 540 | const char* cp; |
| 541 | char* bp; |
| 542 | const struct alias * ap; |
| 543 | unsigned int count; |
| 544 | unsigned int index; |
| 545 | const char* pool; |
| 546 | |
| 547 | /* Before calling aliases_lookup, convert the input string to upper case, |
| 548 | * and check whether it's entirely ASCII (we call gperf with option "-7" |
| 549 | * to achieve a smaller table) and non-empty. If it's not entirely ASCII, |
| 550 | * or if it's too long, it is not a valid encoding name. |
| 551 | */ |
| 552 | for (code = name;;) { |
| 553 | /* Search code in the table. */ |
| 554 | for (cp = code, bp = buf, count = MAX_WORD_LENGTH+10+1; ; cp++, bp++) { |
Bruno Haible | 0eb1068 | 2020-08-31 23:43:18 +0200 | [diff] [blame] | 555 | unsigned char c = (unsigned char) *cp; |
Bruno Haible | 8489f6d | 2005-03-14 11:27:00 +0000 | [diff] [blame] | 556 | if (c >= 0x80) |
| 557 | goto invalid; |
| 558 | if (c >= 'a' && c <= 'z') |
| 559 | c -= 'a'-'A'; |
| 560 | *bp = c; |
| 561 | if (c == '\0') |
| 562 | break; |
| 563 | if (--count == 0) |
| 564 | goto invalid; |
| 565 | } |
Bruno Haible | 1f180ad | 2005-03-16 12:19:06 +0000 | [diff] [blame] | 566 | for (;;) { |
| 567 | if (bp-buf >= 10 && memcmp(bp-10,"//TRANSLIT",10)==0) { |
| 568 | bp -= 10; |
| 569 | *bp = '\0'; |
| 570 | continue; |
| 571 | } |
| 572 | if (bp-buf >= 8 && memcmp(bp-8,"//IGNORE",8)==0) { |
| 573 | bp -= 8; |
| 574 | *bp = '\0'; |
| 575 | continue; |
| 576 | } |
| 577 | break; |
Bruno Haible | 8489f6d | 2005-03-14 11:27:00 +0000 | [diff] [blame] | 578 | } |
| 579 | if (buf[0] == '\0') { |
| 580 | code = locale_charset(); |
| 581 | /* Avoid an endless loop that could occur when using an older version |
| 582 | of localcharset.c. */ |
| 583 | if (code[0] == '\0') |
| 584 | goto invalid; |
| 585 | continue; |
| 586 | } |
| 587 | pool = stringpool; |
| 588 | ap = aliases_lookup(buf,bp-buf); |
| 589 | if (ap == NULL) { |
| 590 | pool = stringpool2; |
| 591 | ap = aliases2_lookup(buf); |
| 592 | if (ap == NULL) |
| 593 | goto invalid; |
| 594 | } |
| 595 | if (ap->encoding_index == ei_local_char) { |
| 596 | code = locale_charset(); |
| 597 | /* Avoid an endless loop that could occur when using an older version |
| 598 | of localcharset.c. */ |
| 599 | if (code[0] == '\0') |
| 600 | goto invalid; |
| 601 | continue; |
| 602 | } |
| 603 | if (ap->encoding_index == ei_local_wchar_t) { |
Bruno Haible | a446ed6 | 2007-04-23 21:57:37 +0000 | [diff] [blame] | 604 | /* On systems which define __STDC_ISO_10646__, wchar_t is Unicode. |
Bruno Haible | 3cdff14 | 2011-01-29 18:34:14 +0100 | [diff] [blame] | 605 | This is also the case on native Woe32 systems and Cygwin >= 1.7, where |
| 606 | we know that it is UTF-16. */ |
Bruno Haible | 35064ed | 2018-05-04 21:27:39 +0200 | [diff] [blame] | 607 | #if (defined _WIN32 && !defined __CYGWIN__) || (defined __CYGWIN__ && CYGWIN_VERSION_DLL_MAJOR >= 1007) |
Bruno Haible | c38bdb4 | 2011-01-29 18:24:44 +0100 | [diff] [blame] | 608 | if (sizeof(wchar_t) == 4) { |
| 609 | index = ei_ucs4internal; |
| 610 | break; |
| 611 | } |
| 612 | if (sizeof(wchar_t) == 2) { |
| 613 | # if WORDS_LITTLEENDIAN |
| 614 | index = ei_utf16le; |
| 615 | # else |
| 616 | index = ei_utf16be; |
| 617 | # endif |
| 618 | break; |
| 619 | } |
| 620 | #elif __STDC_ISO_10646__ |
Bruno Haible | 8489f6d | 2005-03-14 11:27:00 +0000 | [diff] [blame] | 621 | if (sizeof(wchar_t) == 4) { |
| 622 | index = ei_ucs4internal; |
| 623 | break; |
| 624 | } |
| 625 | if (sizeof(wchar_t) == 2) { |
| 626 | index = ei_ucs2internal; |
| 627 | break; |
| 628 | } |
| 629 | if (sizeof(wchar_t) == 1) { |
| 630 | index = ei_iso8859_1; |
| 631 | break; |
| 632 | } |
| 633 | #endif |
| 634 | } |
| 635 | index = ap->encoding_index; |
| 636 | break; |
| 637 | } |
| 638 | return all_canonical[index] + pool; |
| 639 | invalid: |
| 640 | return name; |
| 641 | } |
| 642 | |
Bruno Haible | 58849cb | 2000-12-01 20:10:31 +0000 | [diff] [blame] | 643 | int _libiconv_version = _LIBICONV_VERSION; |
| 644 | |
Bruno Haible | 411b8e5 | 2003-04-24 10:49:10 +0000 | [diff] [blame] | 645 | #if defined __FreeBSD__ && !defined __gnu_freebsd__ |
| 646 | /* GNU libiconv is the native FreeBSD iconv implementation since 2002. |
| 647 | It wants to define the symbols 'iconv_open', 'iconv', 'iconv_close'. */ |
| 648 | #define strong_alias(name, aliasname) _strong_alias(name, aliasname) |
| 649 | #define _strong_alias(name, aliasname) \ |
| 650 | extern __typeof (name) aliasname __attribute__ ((alias (#name))); |
| 651 | #undef iconv_open |
| 652 | #undef iconv |
| 653 | #undef iconv_close |
| 654 | strong_alias (libiconv_open, iconv_open) |
| 655 | strong_alias (libiconv, iconv) |
| 656 | strong_alias (libiconv_close, iconv_close) |
| 657 | #endif |
| 658 | |
Bruno Haible | c37afd4 | 2000-01-24 13:06:57 +0000 | [diff] [blame] | 659 | #endif |