blob: 16a190dbcf40f2493854cab171e708f8e2c14852 [file] [log] [blame]
/* Copyright (C) 1999-2000 Free Software Foundation, Inc.
This file is part of the GNU ICONV Library.
The GNU ICONV Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Library General Public License as
published by the Free Software Foundation; either version 2 of the
License, or (at your option) any later version.
The GNU ICONV Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Library General Public License for more details.
You should have received a copy of the GNU Library General Public
License along with the GNU ICONV Library; see the file COPYING.LIB. If not,
write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
Boston, MA 02111-1307, USA. */
#include <iconv.h>
#include <stdlib.h>
#include <string.h>
#include "config.h"
/*
* Consider those system dependent encodings that are needed for the
* current system.
*/
#ifdef _AIX
#define USE_AIX
#endif
/*
* Converters.
*/
#include "converters.h"
/*
* Transliteration tables.
*/
#include "cjk_variants.h"
#include "translit.h"
/*
* Table of all supported encodings.
*/
struct encoding {
struct mbtowc_funcs ifuncs; /* conversion multibyte -> unicode */
struct wctomb_funcs ofuncs; /* conversion unicode -> multibyte */
int oflags; /* flags for unicode -> multibyte conversion */
};
enum {
#define DEFENCODING(xxx_names,xxx,xxx_ifuncs,xxx_ofuncs1,xxx_ofuncs2) \
ei_##xxx ,
#include "encodings.def"
#ifdef USE_AIX
#include "encodings_aix.def"
#endif
#undef DEFENCODING
ei_for_broken_compilers_that_dont_like_trailing_commas
};
#include "flags.h"
static struct encoding const all_encodings[] = {
#define DEFENCODING(xxx_names,xxx,xxx_ifuncs,xxx_ofuncs1,xxx_ofuncs2) \
{ xxx_ifuncs, xxx_ofuncs1,xxx_ofuncs2, ei_##xxx##_oflags },
#include "encodings.def"
#ifdef USE_AIX
#include "encodings_aix.def"
#endif
#undef DEFENCODING
};
/*
* Alias lookup function.
* Defines
* struct alias { const char* name; unsigned int encoding_index; };
* const struct alias * aliases_lookup (const char *str, unsigned int len);
* #define MAX_WORD_LENGTH ...
*/
#include "aliases.h"
/*
* System dependent alias lookup function.
* Defines
* const struct alias * aliases2_lookup (const char *str);
*/
#if defined(USE_AIX) /* || ... */
static struct alias sysdep_aliases[] = {
#ifdef USE_AIX
#include "aliases_aix.h"
#endif
};
#ifdef __GNUC__
__inline
#endif
const struct alias *
aliases2_lookup (register const char *str)
{
struct alias * ptr;
unsigned int count;
for (ptr = sysdep_aliases, count = sizeof(sysdep_aliases)/sizeof(sysdep_aliases[0]); count > 0; ptr++, count--)
if (!strcmp(str,ptr->name))
return ptr;
return NULL;
}
#else
#define aliases2_lookup(str) NULL
#endif
#if 0
/* Like !strcasecmp, except that the both strings can be assumed to be ASCII
and the first string can be assumed to be in uppercase. */
static int strequal (const char* str1, const char* str2)
{
unsigned char c1;
unsigned char c2;
for (;;) {
c1 = * (unsigned char *) str1++;
c2 = * (unsigned char *) str2++;
if (c1 == 0)
break;
if (c2 >= 'a' && c2 <= 'z')
c2 -= 'a'-'A';
if (c1 != c2)
break;
}
return (c1 == c2);
}
#endif
iconv_t iconv_open (const char* tocode, const char* fromcode)
{
struct conv_struct * cd = (struct conv_struct *) malloc(sizeof(struct conv_struct));
char buf[MAX_WORD_LENGTH+1];
const char* cp;
char* bp;
const struct alias * ap;
unsigned int count;
if (cd == NULL) {
errno = ENOMEM;
return (iconv_t)(-1);
}
/* Before calling aliases_lookup, convert the input string to upper case,
* and check whether it's entirely ASCII (we call gperf with option "-7"
* to achieve a smaller table) and non-empty. If it's not entirely ASCII,
* or if it's too long, it is not a valid encoding name.
*/
/* Search tocode in the table. */
for (cp = tocode, bp = buf, count = MAX_WORD_LENGTH+1; ; cp++, bp++) {
unsigned char c = * (unsigned char *) cp;
if (c >= 0x80)
goto invalid;
if (c >= 'a' && c <= 'z')
c -= 'a'-'A';
*bp = c;
if (c == '\0')
break;
if (--count == 0)
goto invalid;
}
ap = aliases_lookup(buf,bp-buf);
if (ap == NULL) {
ap = aliases2_lookup(buf);
if (ap == NULL)
goto invalid;
}
cd->oindex = ap->encoding_index;
cd->ofuncs = all_encodings[ap->encoding_index].ofuncs;
cd->oflags = all_encodings[ap->encoding_index].oflags;
/* Search fromcode in the table. */
for (cp = fromcode, bp = buf, count = MAX_WORD_LENGTH+1; ; cp++, bp++) {
unsigned char c = * (unsigned char *) cp;
if (c >= 0x80)
goto invalid;
if (c >= 'a' && c <= 'z')
c -= 'a'-'A';
*bp = c;
if (c == '\0')
break;
if (--count == 0)
goto invalid;
}
ap = aliases_lookup(buf,bp-buf);
if (ap == NULL) {
ap = aliases2_lookup(buf);
if (ap == NULL)
goto invalid;
}
cd->iindex = ap->encoding_index;
cd->ifuncs = all_encodings[ap->encoding_index].ifuncs;
/* Initialize the states. */
memset(&cd->istate,'\0',sizeof(state_t));
memset(&cd->ostate,'\0',sizeof(state_t));
/* Initialize the operation flags. */
cd->transliterate = 1;
/* Done. */
return (iconv_t)cd;
invalid:
errno = EINVAL;
return (iconv_t)(-1);
}
size_t iconv (iconv_t icd,
ICONV_CONST char* * inbuf, size_t *inbytesleft,
char* * outbuf, size_t *outbytesleft)
{
conv_t cd = (conv_t) icd;
if (inbuf == NULL || *inbuf == NULL) {
if (outbuf == NULL || *outbuf == NULL) {
/* Reset the states. */
memset(&cd->istate,'\0',sizeof(state_t));
memset(&cd->ostate,'\0',sizeof(state_t));
return 0;
} else {
if (cd->ofuncs.xxx_reset) {
int outcount =
cd->ofuncs.xxx_reset(cd, (unsigned char *) *outbuf, *outbytesleft);
if (outcount < 0) {
errno = E2BIG;
return -1;
}
*outbuf += outcount; *outbytesleft -= outcount;
}
memset(&cd->istate,'\0',sizeof(state_t));
memset(&cd->ostate,'\0',sizeof(state_t));
return 0;
}
} else {
size_t result = 0;
const unsigned char* inptr = (const unsigned char*) *inbuf;
size_t inleft = *inbytesleft;
unsigned char* outptr = (unsigned char*) *outbuf;
size_t outleft = *outbytesleft;
while (inleft > 0) {
wchar_t wc;
int incount;
int outcount;
incount = cd->ifuncs.xxx_mbtowc(cd,&wc,inptr,inleft);
if (incount <= 0) {
if (incount == 0) {
/* Case 1: invalid input */
errno = EILSEQ;
result = -1;
break;
}
if (incount == -1) {
/* Case 2: not enough bytes available to detect anything */
errno = EINVAL;
result = -1;
break;
}
/* Case 3: k bytes read, but only a shift sequence */
incount = -1-incount;
} else {
/* Case 4: k bytes read, making up a wide character */
if (outleft == 0) {
errno = E2BIG;
result = -1;
break;
}
outcount = cd->ofuncs.xxx_wctomb(cd,outptr,wc,outleft);
if (outcount != 0)
goto outcount_ok;
/* Try transliteration. */
result++;
if (cd->transliterate) {
if (cd->oflags & HAVE_HANGUL_JAMO) {
/* Decompose Hangul into Jamo. Use double-width Jamo (contained
in all Korean encodings and ISO-2022-JP-2), not half-width Jamo
(contained in Unicode only). */
wchar_t buf[3];
int ret = johab_hangul_decompose(cd,buf,wc);
if (ret != RET_ILSEQ) {
/* we know 1 <= ret <= 3 */
state_t backup_state = cd->ostate;
unsigned char* backup_outptr = outptr;
size_t backup_outleft = outleft;
int i, sub_outcount;
for (i = 0; i < ret; i++) {
if (outleft == 0) {
sub_outcount = RET_TOOSMALL;
goto johab_hangul_failed;
}
sub_outcount = cd->ofuncs.xxx_wctomb(cd,outptr,buf[i],outleft);
if (sub_outcount <= 0)
goto johab_hangul_failed;
if (!(sub_outcount <= outleft)) abort();
outptr += sub_outcount; outleft -= sub_outcount;
}
goto char_done;
johab_hangul_failed:
cd->ostate = backup_state;
outptr = backup_outptr;
outleft = backup_outleft;
if (sub_outcount < 0) {
errno = E2BIG;
result = -1;
break;
}
}
}
{
/* Try to use a variant, but postfix it with
U+303E IDEOGRAPHIC VARIATION INDICATOR
(cf. Ken Lunde's "CJKV information processing", p. 188). */
int indx = -1;
if (wc == 0x3006)
indx = 0;
else if (wc == 0x30f6)
indx = 1;
else if (wc >= 0x4e00 && wc < 0xa000)
indx = cjk_variants_indx[wc-0x4e00];
if (indx >= 0) {
for (;; indx++) {
wchar_t buf[2];
unsigned short variant = cjk_variants[indx];
unsigned short last = variant & 0x8000;
variant &= 0x7fff;
variant += 0x3000;
buf[0] = variant; buf[1] = 0x303e;
{
state_t backup_state = cd->ostate;
unsigned char* backup_outptr = outptr;
size_t backup_outleft = outleft;
int i, sub_outcount;
for (i = 0; i < 2; i++) {
if (outleft == 0) {
sub_outcount = RET_TOOSMALL;
goto variant_failed;
}
sub_outcount = cd->ofuncs.xxx_wctomb(cd,outptr,buf[i],outleft);
if (sub_outcount <= 0)
goto variant_failed;
if (!(sub_outcount <= outleft)) abort();
outptr += sub_outcount; outleft -= sub_outcount;
}
goto char_done;
variant_failed:
cd->ostate = backup_state;
outptr = backup_outptr;
outleft = backup_outleft;
if (sub_outcount < 0) {
errno = E2BIG;
result = -1;
break;
}
}
if (last)
break;
}
}
}
if (wc >= 0x2018 && wc <= 0x201a) {
/* Special case for quotation marks 0x2018, 0x2019, 0x201a */
wchar_t substitute =
(cd->oflags & HAVE_QUOTATION_MARKS
? (wc == 0x201a ? 0x2018 : wc)
: (cd->oflags & HAVE_ACCENTS
? (wc==0x2019 ? 0x00b4 : 0x0060) /* use accents */
: 0x0027 /* use apostrophe */
) );
outcount = cd->ofuncs.xxx_wctomb(cd,outptr,substitute,outleft);
if (outcount != 0)
goto outcount_ok;
}
{
/* Use the transliteration table. */
int indx = translit_index(wc);
if (indx >= 0) {
const unsigned char * cp = &translit_data[indx];
unsigned int num = *cp++;
state_t backup_state = cd->ostate;
unsigned char* backup_outptr = outptr;
size_t backup_outleft = outleft;
unsigned int i;
int sub_outcount;
for (i = 0; i < num; i++) {
if (outleft == 0) {
sub_outcount = RET_TOOSMALL;
goto translit_failed;
}
sub_outcount = cd->ofuncs.xxx_wctomb(cd,outptr,cp[i],outleft);
if (sub_outcount <= 0)
goto translit_failed;
if (!(sub_outcount <= outleft)) abort();
outptr += sub_outcount; outleft -= sub_outcount;
}
goto char_done;
translit_failed:
cd->ostate = backup_state;
outptr = backup_outptr;
outleft = backup_outleft;
if (sub_outcount < 0) {
errno = E2BIG;
result = -1;
break;
}
}
}
}
outcount = cd->ofuncs.xxx_wctomb(cd,outptr,0xFFFD,outleft);
if (outcount != 0)
goto outcount_ok;
errno = EILSEQ;
result = -1;
break;
outcount_ok:
if (outcount < 0) {
errno = E2BIG;
result = -1;
break;
}
if (!(outcount <= outleft)) abort();
outptr += outcount; outleft -= outcount;
char_done:
;
}
if (!(incount <= inleft)) abort();
inptr += incount; inleft -= incount;
}
*inbuf = (ICONV_CONST char*) inptr;
*inbytesleft = inleft;
*outbuf = (char*) outptr;
*outbytesleft = outleft;
return result;
}
}
int iconv_close (iconv_t icd)
{
conv_t cd = (conv_t) icd;
free(cd);
return 0;
}
#ifndef LIBICONV_PLUG
int iconvctl (iconv_t icd, int request, void* argument)
{
conv_t cd = (conv_t) icd;
switch (request) {
case ICONV_TRIVIALP:
*(int *)argument = (cd->iindex == cd->oindex ? 1 : 0);
return 0;
case ICONV_GET_TRANSLITERATE:
*(int *)argument = cd->transliterate;
return 0;
case ICONV_SET_TRANSLITERATE:
cd->transliterate = (*(const int *)argument ? 1 : 0);
return 0;
default:
errno = EINVAL;
return -1;
}
}
#endif