blob: fa36f436fd0f63194b4cdcc7267f90459f7b50d2 [file] [log] [blame]
/* Copyright (C) 2024 Free Software Foundation, Inc.
This file is part of the GNU LIBICONV Library.
The GNU LIBICONV Library is free software; you can redistribute it
and/or modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either version 2.1
of the License, or (at your option) any later version.
The GNU LIBICONV Library is distributed in the hope that it will be
useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU LIBICONV Library; see the file COPYING.LIB.
If not, see <https://www.gnu.org/licenses/>. */
#include "config.h"
#include <stdlib.h>
#include <string.h>
#include <iconv.h>
#include <errno.h>
/* This test checks that iconv(cd,NULL,NULL,...) does not forget about
the byte-order state in conversions from UCS-2, UCS-4, UTF-16, UTF-32.
The POSIX specification
<https://pubs.opengroup.org/onlinepubs/9799919799/functions/iconv.html>
is clear that iconv(cd,NULL,NULL,...) has an effect for state-dependent
encodings only. The manual page
<https://www.kernel.org/doc/man-pages/online/pages/man3/iconv.3.html>
is not so clear about it. But Ulrich Drepper states it correctly in
<https://bugzilla.redhat.com/show_bug.cgi?id=165368>:
"Flushing using iconv() only resets the shift state. This is needed
for stateful encodings with states where the caller wants a converted
string to end in the initial state. The BOM recognition has nothing
to do with shift states. Once the byte order is determined this is
a property which stays with the iconv_t descriptor for its lifetime."
Based on a bug report from Tomas Kalibera <tomas.kalibera@gmail.com> in
<https://lists.gnu.org/archive/html/bug-gnu-libiconv/2024-12/msg00000.html>.
*/
static void test_one_input (const char *fromcode,
const char *input, size_t input_size)
{
char outbuf1[3];
char outbuf2[3];
iconv_t cd = iconv_open ("UTF-8", fromcode);
if (cd == (iconv_t)(-1))
abort ();
/* Convert the first character. */
char *inbuf = (char *) input;
size_t inbytesleft = input_size;
char *outbuf = outbuf1;
size_t outbytesleft = sizeof (outbuf1);
size_t ret = iconv (cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft);
if (!(ret == (size_t)(-1) && errno == E2BIG && outbytesleft == 0))
abort ();
if (!(memcmp (outbuf1, "\xe2\x94\xa6", 3) == 0)) /* should be U+2526 */
abort ();
/* Reset the shift state. */
ret = iconv (cd, NULL, NULL, NULL, NULL);
if (!(ret == 0))
abort ();
/* Convert the second character. */
outbuf = outbuf2;
outbytesleft = sizeof (outbuf2);
ret = iconv (cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft);
if (!(ret == 0 && outbytesleft == 0))
abort ();
if (!(memcmp (outbuf2, "\xe2\x98\xa9", 3) == 0)) /* should be U+2629 */
abort ();
iconv_close (cd);
}
static void test_both_inputs (const char *fromcode,
const char *be_input, const char *le_input,
size_t input_size)
{
test_one_input (fromcode, be_input, input_size);
test_one_input (fromcode, le_input, input_size);
}
int main ()
{
{
static const char be_input[] = "\xfe\xff\x25\x26\x26\x29";
static const char le_input[] = "\xff\xfe\x26\x25\x29\x26";
#ifdef _LIBICONV_VERSION
test_both_inputs ("UCS-2", be_input, le_input, 6);
#endif
test_both_inputs ("UTF-16", be_input, le_input, 6);
}
{
static const char be_input[] =
"\x00\x00\xfe\xff\x00\x00\x25\x26\x00\x00\x26\x29";
static const char le_input[] =
"\xff\xfe\x00\x00\x26\x25\x00\x00\x29\x26\x00\x00";
#ifdef _LIBICONV_VERSION
test_both_inputs ("UCS-4", be_input, le_input, 12);
#endif
test_both_inputs ("UTF-32", be_input, le_input, 12);
}
return 0;
}