| /* |
| * Copyright (C) 1999-2008, 2011, 2018, 2020, 2023 Free Software Foundation, Inc. |
| * This file is part of the GNU LIBICONV Library. |
| * |
| * The GNU LIBICONV Library is free software; you can redistribute it |
| * and/or modify it under the terms of the GNU Lesser General Public |
| * License as published by the Free Software Foundation; either version 2.1 |
| * of the License, or (at your option) any later version. |
| * |
| * The GNU LIBICONV Library is distributed in the hope that it will be |
| * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of |
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| * Lesser General Public License for more details. |
| * |
| * You should have received a copy of the GNU Lesser General Public |
| * License along with the GNU LIBICONV Library; see the file COPYING.LIB. |
| * If not, see <https://www.gnu.org/licenses/>. |
| */ |
| |
| /* Part 1 of iconv_open. |
| Input: const char* tocode, const char* fromcode. |
| Output: |
| unsigned int from_index; |
| int from_wchar; |
| unsigned int from_surface; |
| unsigned int to_index; |
| int to_wchar; |
| unsigned int to_surface; |
| int transliterate; |
| int discard_ilseq; |
| Jumps to 'invalid' in case of errror. |
| */ |
| { |
| char buf[MAX_WORD_LENGTH+9+9+1]; |
| const char* cp; |
| char* bp; |
| const struct alias * ap; |
| unsigned int count; |
| |
| from_surface = ICONV_SURFACE_NONE; |
| to_surface = ICONV_SURFACE_NONE; |
| transliterate = 0; |
| discard_ilseq = 0; |
| |
| /* Before calling aliases_lookup, convert the input string to upper case, |
| * and check whether it's entirely ASCII (we call gperf with option "-7" |
| * to achieve a smaller table) and non-empty. If it's not entirely ASCII, |
| * or if it's too long, it is not a valid encoding name. |
| */ |
| for (to_wchar = 0;;) { |
| /* Search tocode in the table. */ |
| for (cp = tocode, bp = buf, count = MAX_WORD_LENGTH+9+9+1; ; cp++, bp++) { |
| unsigned char c = (unsigned char) *cp; |
| if (c >= 0x80) |
| goto invalid; |
| if (c >= 'a' && c <= 'z') |
| c -= 'a'-'A'; |
| *bp = c; |
| if (c == '\0') |
| break; |
| if (--count == 0) |
| goto invalid; |
| } |
| for (;;) { |
| char *sp = bp; |
| int parsed_translit = 0; |
| int parsed_ignore = 0; |
| if (sp-buf > 9 && memcmp(sp-9,"/TRANSLIT",9)==0) { |
| sp = sp - 9; |
| parsed_translit = 1; |
| } else if (sp-buf > 7 && memcmp(sp-7,"/IGNORE",7)==0) { |
| sp = sp - 7; |
| parsed_ignore = 1; |
| } |
| if (sp > buf && memcmp(sp-1,"/",1) == 0) { |
| bp = sp - 1; |
| } else if (sp-buf >= 9 && memcmp(sp-9,"/ZOS_UNIX",9)==0) { |
| bp = sp - 9; |
| to_surface = ICONV_SURFACE_EBCDIC_ZOS_UNIX; |
| } else |
| break; |
| *bp = '\0'; |
| if (parsed_translit) |
| transliterate = 1; |
| if (parsed_ignore) |
| discard_ilseq = 1; |
| break; |
| } |
| if (buf[0] == '\0') { |
| tocode = locale_charset(); |
| /* Avoid an endless loop that could occur when using an older version |
| of localcharset.c. */ |
| if (tocode[0] == '\0') |
| goto invalid; |
| continue; |
| } |
| ap = aliases_lookup(buf,bp-buf); |
| if (ap == NULL) { |
| ap = aliases2_lookup(buf); |
| if (ap == NULL) |
| goto invalid; |
| } |
| if (ap->encoding_index == ei_local_char) { |
| tocode = locale_charset(); |
| /* Avoid an endless loop that could occur when using an older version |
| of localcharset.c. */ |
| if (tocode[0] == '\0') |
| goto invalid; |
| continue; |
| } |
| if (ap->encoding_index == ei_local_wchar_t) { |
| /* On systems which define __STDC_ISO_10646__, wchar_t is Unicode. |
| This is also the case on native Woe32 systems and Cygwin >= 1.7, where |
| we know that it is UTF-16. */ |
| #if (defined _WIN32 && !defined __CYGWIN__) || (defined __CYGWIN__ && CYGWIN_VERSION_DLL_MAJOR >= 1007) |
| if (sizeof(wchar_t) == 4) { |
| to_index = ei_ucs4internal; |
| break; |
| } |
| if (sizeof(wchar_t) == 2) { |
| # if WORDS_LITTLEENDIAN |
| to_index = ei_utf16le; |
| # else |
| to_index = ei_utf16be; |
| # endif |
| break; |
| } |
| #elif __STDC_ISO_10646__ |
| if (sizeof(wchar_t) == 4) { |
| to_index = ei_ucs4internal; |
| break; |
| } |
| if (sizeof(wchar_t) == 2) { |
| to_index = ei_ucs2internal; |
| break; |
| } |
| if (sizeof(wchar_t) == 1) { |
| to_index = ei_iso8859_1; |
| break; |
| } |
| #endif |
| #if HAVE_MBRTOWC |
| to_wchar = 1; |
| tocode = locale_charset(); |
| continue; |
| #endif |
| goto invalid; |
| } |
| to_index = ap->encoding_index; |
| break; |
| } |
| for (from_wchar = 0;;) { |
| /* Search fromcode in the table. */ |
| for (cp = fromcode, bp = buf, count = MAX_WORD_LENGTH+9+9+1; ; cp++, bp++) { |
| unsigned char c = (unsigned char) *cp; |
| if (c >= 0x80) |
| goto invalid; |
| if (c >= 'a' && c <= 'z') |
| c -= 'a'-'A'; |
| *bp = c; |
| if (c == '\0') |
| break; |
| if (--count == 0) |
| goto invalid; |
| } |
| for (;;) { |
| char *sp = bp; |
| int parsed_translit = 0; |
| int parsed_ignore = 0; |
| if (sp-buf > 9 && memcmp(sp-9,"/TRANSLIT",9)==0) { |
| sp = sp - 9; |
| parsed_translit = 1; |
| } else if (sp-buf > 7 && memcmp(sp-7,"/IGNORE",7)==0) { |
| sp = sp - 7; |
| parsed_ignore = 1; |
| } |
| if (sp > buf && memcmp(sp-1,"/",1) == 0) { |
| bp = sp - 1; |
| } else if (sp-buf >= 9 && memcmp(sp-9,"/ZOS_UNIX",9)==0) { |
| bp = sp - 9; |
| from_surface = ICONV_SURFACE_EBCDIC_ZOS_UNIX; |
| } else |
| break; |
| *bp = '\0'; |
| if (parsed_translit) |
| transliterate = 1; |
| if (parsed_ignore) |
| discard_ilseq = 1; |
| break; |
| } |
| if (buf[0] == '\0') { |
| fromcode = locale_charset(); |
| /* Avoid an endless loop that could occur when using an older version |
| of localcharset.c. */ |
| if (fromcode[0] == '\0') |
| goto invalid; |
| continue; |
| } |
| ap = aliases_lookup(buf,bp-buf); |
| if (ap == NULL) { |
| ap = aliases2_lookup(buf); |
| if (ap == NULL) |
| goto invalid; |
| } |
| if (ap->encoding_index == ei_local_char) { |
| fromcode = locale_charset(); |
| /* Avoid an endless loop that could occur when using an older version |
| of localcharset.c. */ |
| if (fromcode[0] == '\0') |
| goto invalid; |
| continue; |
| } |
| if (ap->encoding_index == ei_local_wchar_t) { |
| /* On systems which define __STDC_ISO_10646__, wchar_t is Unicode. |
| This is also the case on native Woe32 systems and Cygwin >= 1.7, where |
| we know that it is UTF-16. */ |
| #if (defined _WIN32 && !defined __CYGWIN__) || (defined __CYGWIN__ && CYGWIN_VERSION_DLL_MAJOR >= 1007) |
| if (sizeof(wchar_t) == 4) { |
| from_index = ei_ucs4internal; |
| break; |
| } |
| if (sizeof(wchar_t) == 2) { |
| # if WORDS_LITTLEENDIAN |
| from_index = ei_utf16le; |
| # else |
| from_index = ei_utf16be; |
| # endif |
| break; |
| } |
| #elif __STDC_ISO_10646__ |
| if (sizeof(wchar_t) == 4) { |
| from_index = ei_ucs4internal; |
| break; |
| } |
| if (sizeof(wchar_t) == 2) { |
| from_index = ei_ucs2internal; |
| break; |
| } |
| if (sizeof(wchar_t) == 1) { |
| from_index = ei_iso8859_1; |
| break; |
| } |
| #endif |
| #if HAVE_WCRTOMB |
| from_wchar = 1; |
| fromcode = locale_charset(); |
| continue; |
| #endif |
| goto invalid; |
| } |
| from_index = ap->encoding_index; |
| break; |
| } |
| } |