| /* |
| ********************************************************************** |
| * Copyright (C) 1999-2001, International Business Machines |
| * Corporation and others. All Rights Reserved. |
| ********************************************************************** |
| * |
| * |
| * ucnv_err.h: |
| */ |
| /** |
| * \file |
| * \brief C UConverter predefined error callbacks |
| * |
| * <h2> Error Behaviour Fnctions </h2> |
| * Defines some error behaviour functions called by ucnv_{from,to}Unicode |
| * These are provided as part of ICU and many are stable, but they |
| * can also be considered only as an example of what can be done with |
| * callbacks. You may of course write your own. |
| * |
| * These Functions, although public, should NEVER be called directly, they should be used as parameters to |
| * the ucnv_setFromUCallback and ucnv_setToUCallback functions, to |
| * set the behaviour of a converter |
| * when it encounters ILLEGAL/UNMAPPED/INVALID sequences. |
| * |
| * usage example: 'STOP' doesn't need any context, but newContext |
| * could be set to something other than 'NULL' if needed. |
| * |
| * \code |
| * UErrorCode err = U_ZERO_ERROR; |
| * UConverter* myConverter = ucnv_open("ibm-949", &err); |
| * const void *newContext = NULL; |
| * const void *oldContext; |
| * UConverterFromUCallback oldAction; |
| * |
| * |
| * if (U_SUCCESS(err)) |
| * { |
| * ucnv_setFromUCallBack(myConverter, |
| * UCNV_FROM_U_CALLBACK_STOP, |
| * newContext, |
| * &oldAction, |
| * &oldContext, |
| * &status); |
| * } |
| * \endcode |
| * |
| * The code above tells "myConverter" to stop when it encounters a ILLEGAL/TRUNCATED/INVALID sequences when it is used to |
| * convert from Unicode -> Codepage. |
| * The behavior from Codepage to Unicode is not changed. |
| */ |
| |
| /* This file isn't designed to be included all by itself. */ |
| #ifndef UCNV_H |
| # include "unicode/ucnv.h" |
| /* and the rest of this file will be ignored. */ |
| #endif |
| |
| #ifndef UCNV_ERR_H |
| #define UCNV_ERR_H |
| |
| #include "unicode/utypes.h" |
| |
| |
| /** |
| * FROM_U, TO_U options for sub and skip callbacks |
| * @draft ICU 1.8 |
| */ |
| #define UCNV_SUB_STOP_ON_ILLEGAL "i" |
| #define UCNV_SKIP_STOP_ON_ILLEGAL "i" |
| |
| /** |
| * FROM_U_CALLBACK_ESCAPE options |
| * @draft ICU 1.8 |
| */ |
| #define UCNV_ESCAPE_ICU NULL |
| #define UCNV_ESCAPE_JAVA "J" |
| #define UCNV_ESCAPE_C "C" |
| #define UCNV_ESCAPE_XML_DEC "D" |
| #define UCNV_ESCAPE_XML_HEX "X" |
| |
| /** |
| * The process condition code to be used with the callbacks. |
| * @stable |
| */ |
| typedef enum { |
| UCNV_UNASSIGNED = 0, /**< The code point is unassigned. |
| The error code U_INVALID_CHAR_FOUND will be set. */ |
| UCNV_ILLEGAL = 1, /**< The code point is illegal. For example, |
| \x81\x2E is illegal in SJIS because \x2E |
| is not a valid trail byte for the \x81 |
| lead byte. |
| Also, starting with Unicode 3.0.1, non-shortest byte sequences |
| in UTF-8 (like \xC1\xA1 instead of \x61 for U+0061) |
| are also illegal, not just irregular. |
| The error code U_ILLEGAL_CHAR_FOUND will be set. */ |
| UCNV_IRREGULAR = 2, /**< The codepoint is not a regular sequence in |
| the encoding. For example, \xED\xA0\x80..\xED\xBF\xBF |
| are irregular UTF-8 byte sequences for single surrogate |
| code points. |
| The error code U_INVALID_CHAR_FOUND will be set. */ |
| UCNV_RESET = 3, /**< The callback is called with this reason when a |
| 'reset' has occured. Callback should reset all |
| state. */ |
| UCNV_CLOSE = 4 /**< Called when the converter is closed. The |
| callback should release any allocated memory.*/ |
| } UConverterCallbackReason; |
| |
| |
| /** |
| * The structure for the fromUnicode callback function parameter. |
| * @stable |
| */ |
| typedef struct { |
| uint16_t size; |
| UBool flush; |
| UConverter *converter; |
| const UChar *source; |
| const UChar *sourceLimit; |
| char *target; |
| const char *targetLimit; |
| int32_t *offsets; /* *offset = blah ; offset++; */ |
| } UConverterFromUnicodeArgs; |
| |
| |
| /** |
| * The structure for the toUnicode callback function parameter. |
| * @stable |
| */ |
| typedef struct { |
| uint16_t size; |
| UBool flush; |
| UConverter *converter; |
| const char *source; |
| const char *sourceLimit; |
| UChar *target; |
| const UChar *targetLimit; |
| int32_t *offsets; |
| } UConverterToUnicodeArgs; |
| |
| |
| /** |
| * DO NOT CALL THIS FUNCTION DIRECTLY! |
| * This From Unicode callback STOPS at the ILLEGAL_SEQUENCE, |
| * returning the error code back to the caller immediately. |
| * @stable |
| */ |
| U_CAPI void U_EXPORT2 UCNV_FROM_U_CALLBACK_STOP ( |
| const void *context, |
| UConverterFromUnicodeArgs *fromUArgs, |
| const UChar* codeUnits, |
| int32_t length, |
| UChar32 codePoint, |
| UConverterCallbackReason reason, |
| UErrorCode * err); |
| |
| |
| |
| /** |
| * DO NOT CALL THIS FUNCTION DIRECTLY! |
| * This To Unicode callback STOPS at the ILLEGAL_SEQUENCE, |
| * returning the error code back to the caller immediately. |
| * |
| * @stable |
| */ |
| U_CAPI void U_EXPORT2 UCNV_TO_U_CALLBACK_STOP ( |
| const void *context, |
| UConverterToUnicodeArgs *fromUArgs, |
| const char* codeUnits, |
| int32_t length, |
| UConverterCallbackReason reason, |
| UErrorCode * err); |
| |
| /** |
| * DO NOT CALL THIS FUNCTION DIRECTLY! |
| * This From Unicode callback skips any ILLEGAL_SEQUENCE, or |
| * skips only UNASSINGED_SEQUENCE depending on the context parameter |
| * simply ignoring those characters. |
| * @param context: the function currently recognizes the callback options: |
| * UCNV_SKIP_STOP_ON_ILLEGAL: STOPS at the ILLEGAL_SEQUENCE, |
| * returning the error code back to the caller immediately. |
| * NULL: Skips any ILLEGAL_SEQUENCE |
| * |
| * @stable |
| */ |
| U_CAPI void U_EXPORT2 UCNV_FROM_U_CALLBACK_SKIP ( |
| const void *context, |
| UConverterFromUnicodeArgs *fromUArgs, |
| const UChar* codeUnits, |
| int32_t length, |
| UChar32 codePoint, |
| UConverterCallbackReason reason, |
| UErrorCode * err); |
| |
| /** |
| * DO NOT CALL THIS FUNCTION DIRECTLY! |
| * This From Unicode callback will Substitute the ILLEGAL SEQUENCE, or |
| * UNASSIGNED_SEQUENCE depending on context parameter, with the |
| * current substitution string for the converter. This is the default |
| * callback. |
| * @param context: the function currently recognizes the callback options: |
| * UCNV_SUB_STOP_ON_ILLEGAL: STOPS at the ILLEGAL_SEQUENCE, |
| * returning the error code back to the caller immediately. |
| * NULL: Substitutes any ILLEGAL_SEQUENCE |
| * @see ucnv_setSubstChars |
| * @stable |
| */ |
| |
| U_CAPI void U_EXPORT2 UCNV_FROM_U_CALLBACK_SUBSTITUTE ( |
| const void *context, |
| UConverterFromUnicodeArgs *fromUArgs, |
| const UChar* codeUnits, |
| int32_t length, |
| UChar32 codePoint, |
| UConverterCallbackReason reason, |
| UErrorCode * err); |
| |
| /** |
| * DO NOT CALL THIS FUNCTION DIRECTLY! |
| * This From Unicode callback will Substitute the ILLEGAL SEQUENCE with the |
| * hexadecimal representation of the illegal codepoints |
| |
| * @param context: the function currently recognizes the callback options: |
| * |
| * UCNV_ESCAPE_ICU: Substitues the ILLEGAL SEQUENCE with the hexadecimal |
| * representation in the format %UXXXX, e.g. "%uFFFE%u00AC%uC8FE"). |
| * In the Event the converter doesn't support the characters {u,%}[A-F][0-9], |
| * it will substitute the illegal sequence with the substitution characters. |
| * Note that codeUnit(32bit int eg: unit of a surrogate pair) is represented as |
| * %UD84D%UDC56 |
| * UCNV_ESCAPE_JAVA: Substitues the ILLEGAL SEQUENCE with the hexadecimal |
| * representation in the format \uXXXX, e.g. "\uFFFE\u00AC\uC8FE"). |
| * In the Event the converter doesn't support the characters {u,\}[A-F][0-9], |
| * it will substitute the illegal sequence with the substitution characters. |
| * Note that codeUnit(32bit int eg: unit of a surrogate pair) is represented as |
| * \uD84D\uDC56 |
| * UCNV_ESCAPE_C: Substitues the ILLEGAL SEQUENCE with the hexadecimal |
| * representation in the format \uXXXX, e.g. "\uFFFE\u00AC\uC8FE"). |
| * In the Event the converter doesn't support the characters {u,U,\}[A-F][0-9], |
| * it will substitute the illegal sequence with the substitution characters. |
| * Note that codeUnit(32bit int eg: unit of a surrogate pair) is represented as |
| * \U00023456 |
| * UCNV_ESCAPE_XML_DEC: Substitues the ILLEGAL SEQUENCE with the decimal |
| * representation in the format &#DDDDDDDD, e.g. "¬죾"). |
| * In the Event the converter doesn't support the characters {&,#}[0-9], |
| * it will substitute the illegal sequence with the substitution characters. |
| * Note that codeUnit(32bit int eg: unit of a surrogate pair) is represented as |
| * 𣑖 and Zero padding is ignored. |
| * UCNV_ESCAPE_XML_HEX:Substitues the ILLEGAL SEQUENCE with the decimal |
| * representation in the format &#xXXXX, e.g. "¬죾"). |
| * In the Event the converter doesn't support the characters {&,#,x}[0-9], |
| * it will substitute the illegal sequence with the substitution characters. |
| * Note that codeUnit(32bit int eg: unit of a surrogate pair) is represented as |
| * 𣑖 |
| * @stable |
| */ |
| |
| U_CAPI void U_EXPORT2 UCNV_FROM_U_CALLBACK_ESCAPE ( |
| const void *context, |
| UConverterFromUnicodeArgs *fromUArgs, |
| const UChar* codeUnits, |
| int32_t length, |
| UChar32 codePoint, |
| UConverterCallbackReason reason, |
| UErrorCode * err); |
| |
| |
| /** |
| * DO NOT CALL THIS FUNCTION DIRECTLY! |
| * This To Unicode callback skips any ILLEGAL_SEQUENCE, or |
| * skips only UNASSINGED_SEQUENCE depending on the context parameter |
| * simply ignoring those characters. |
| * @param context: the function currently recognizes the callback options: |
| * UCNV_SKIP_STOP_ON_ILLEGAL: STOPS at the ILLEGAL_SEQUENCE, |
| * returning the error code back to the caller immediately. |
| * NULL: Skips any ILLEGAL_SEQUENCE |
| * |
| * @stable |
| */ |
| U_CAPI void U_EXPORT2 UCNV_TO_U_CALLBACK_SKIP ( |
| const void *context, |
| UConverterToUnicodeArgs *fromUArgs, |
| const char* codeUnits, |
| int32_t length, |
| UConverterCallbackReason reason, |
| UErrorCode * err); |
| |
| /** |
| * DO NOT CALL THIS FUNCTION DIRECTLY! |
| * This To Unicode callback will Substitute the ILLEGAL SEQUENCE,or |
| * UNASSIGNED_SEQUENCE depending on context parameter, with the |
| * Unicode substitution character, U+FFFD. |
| * @param context: the function currently recognizes the callback options: |
| * UCNV_SUB_STOP_ON_ILLEGAL: STOPS at the ILLEGAL_SEQUENCE, |
| * returning the error code back to the caller immediately. |
| * NULL: Substitutes any ILLEGAL_SEQUENCE |
| * @stable |
| */ |
| U_CAPI void U_EXPORT2 UCNV_TO_U_CALLBACK_SUBSTITUTE ( |
| const void *context, |
| UConverterToUnicodeArgs *fromUArgs, |
| const char* codeUnits, |
| int32_t length, |
| UConverterCallbackReason reason, |
| UErrorCode * err); |
| |
| /** |
| * DO NOT CALL THIS FUNCTION DIRECTLY! |
| * This To Unicode callback will Substitute the ILLEGAL SEQUENCE with the |
| * hexadecimal representation of the illegal bytes |
| * (in the format %XNN, e.g. "%XFF%X0A%XC8%X03"). |
| * @stable |
| */ |
| |
| U_CAPI void U_EXPORT2 UCNV_TO_U_CALLBACK_ESCAPE ( |
| const void *context, |
| UConverterToUnicodeArgs *fromUArgs, |
| const char* codeUnits, |
| int32_t length, |
| UConverterCallbackReason reason, |
| UErrorCode * err); |
| |
| #endif |
| |
| /*UCNV_ERR_H*/ |