source/common/ucnv_cnv.h - external/github.com/unicode-org/icu - Git at Google

 /*
 **********************************************************************
 *   Copyright (C) 1999, International Business Machines
 *   Corporation and others.  All Rights Reserved.
 **********************************************************************
 *
 *   uconv_cnv.h:
 *   defines all the low level conversion functions
 *   T_UnicodeConverter_{to,from}Unicode_$ConversionType
 *
 * Modification History:
 *
 *   Date        Name        Description
 *   05/09/00    helena      Added implementation to handle fallback mappings.
 *   06/29/2000  helena      Major rewrite of the callback APIs.
 */

 #ifndef UCNV_CNV_H
 #define UCNV_CNV_H

 #include "unicode/utypes.h"
 #include "unicode/ucnv_err.h"
 #include "ucnv_bld.h"
 #include "ucnvmbcs.h"
 #include "ucmp8.h"
 #include "ucmp16.h"

 /*Table Node Definitions */
 typedef struct
   {
     UChar *toUnicode;  /* [256]; */
     CompactByteArray fromUnicode;
     UChar *toUnicodeFallback;
     CompactByteArray fromUnicodeFallback;
   }
 UConverterSBCSTable;

 typedef struct
   {
     CompactShortArray toUnicode;
     CompactShortArray fromUnicode;
     CompactShortArray toUnicodeFallback;
     CompactShortArray fromUnicodeFallback;
   }
 UConverterDBCSTable;

 union UConverterTable
   {
     UConverterSBCSTable sbcs;
     UConverterDBCSTable dbcs;
     UConverterMBCSTable mbcs;
   };


 U_CDECL_BEGIN

 /* this is used in fromUnicode DBCS tables as an "unassigned" marker */
 #define missingCharMarker 0xFFFF

 /*
  * #define missingUCharMarker 0xfffe
  *
  * there are actually two values used in toUnicode tables:
  * U+fffe "unassigned"
  * U+ffff "illegal"
  */

 #define FromU_CALLBACK_MACRO(context, args, codeUnits, length, codePoint, reason, err) \
                 { \
                   /*copies current values for the ErrorFunctor to update */ \
                   /*Calls the ErrorFunctor */ \
                   args->converter->fromUCharErrorBehaviour ( context, \
                                                   args, \
                                                   codeUnits, \
                                                   length, \
                                                   codePoint, \
                                                   reason, \
                                                   err); \
                  myTargetIndex = args->target - (char*)myTarget; \
                  mySourceIndex = args->source - mySource; \
                 }
 /*
 */
 #define ToU_CALLBACK_MACRO(context, args, codePoints, length, reason, err) \
                 { \
                   /*Calls the ErrorFunctor */ \
                   args->converter->fromCharErrorBehaviour ( \
                                                  context, \
                                                  args, \
                                                  codePoints, \
                                                  length, \
                                                  reason, \
                                                  err); \
                  myTargetIndex = args->target - myTarget; \
                  mySourceIndex = args->source - (const char*)mySource; \
                 }
 /*
 */
 #define FromU_CALLBACK_OFFSETS_LOGIC_MACRO(context, args, codeUnits, length, codePoint, reason, err) \
                 { \
                  int32_t My_i = myTargetIndex; \
                   /*copies current values for the ErrorFunctor to update */ \
                   /*Calls the ErrorFunctor */ \
                   args->converter->fromUCharErrorBehaviour ( \
                                                  context, \
                                                  args, \
                                                  codeUnits, \
                                                  length, \
                                                  codePoint, \
                                                  reason, \
                                                  err); \
                   /*Update the local Indexes so that the conversion can restart at the right points */ \
                  myTargetIndex = args->target - (char*)myTarget; \
                  mySourceIndex = args->source - mySource; \
                  args->offsets = saveOffsets; \
                   for (;My_i < myTargetIndex;My_i++) args->offsets[My_i] += currentOffset; \
                 }
 /*
 */
 #define ToU_CALLBACK_OFFSETS_LOGIC_MACRO(context, args, codePoints, length, reason, err) \
                 { \
                       args->converter->fromCharErrorBehaviour ( \
                                                  context, \
                                                  args, \
                                                  codePoints, \
                                                  length, \
                                                  reason, \
                                                  err); \
                   /*Update the local Indexes so that the conversion can restart at the right points */ \
                  myTargetIndex = args->target - myTarget; \
                  mySourceIndex = args->source - (const char*)mySource; \
                  args->offsets = saveOffsets; \
                   for (;My_i < myTargetIndex;My_i++) {args->offsets[My_i] += currentOffset;} \
                 }


 typedef void (*UConverterLoad) (UConverterSharedData *sharedData, const uint8_t *raw, UErrorCode *pErrorCode);
 typedef void (*UConverterUnload) (UConverterSharedData *sharedData);

 typedef void (*UConverterOpen) (UConverter *cnv, const char *name, const char *locale, UErrorCode *pErrorCode);
 typedef void (*UConverterClose) (UConverter *cnv);

 typedef void (*UConverterReset) (UConverter *cnv);

 typedef void (*T_ToUnicodeFunction) (UConverterToUnicodeArgs *, UErrorCode *);

 typedef void (*T_FromUnicodeFunction) (UConverterFromUnicodeArgs *, UErrorCode *);

 typedef UChar32 (*T_GetNextUCharFunction) (UConverterToUnicodeArgs *, UErrorCode *);

 typedef void (*UConverterGetStarters)(const UConverter* converter,
                                       UBool starters[256],
                                       UErrorCode *pErrorCode);

 UBool CONVERSION_U_SUCCESS (UErrorCode err);

 void flushInternalUnicodeBuffer (UConverter * _this,
                                  UChar * myTarget,
                                  int32_t * myTargetIndex,
                                  int32_t targetLength,
                                  int32_t** offsets,
                                  UErrorCode * err);

 void flushInternalCharBuffer (UConverter * _this,
                               char *myTarget,
                               int32_t * myTargetIndex,
                               int32_t targetLength,
                               int32_t** offsets,
                               UErrorCode * err);

 /**
  * UConverterImpl contains all the data and functions for a converter type.
  * Its function pointers work much like a C++ vtable.
  * Many converter types need to define only a subset of the functions;
  * when a function pointer is NULL, then a default action will be performed.
  *
  * Every converter type must implement toUnicode, fromUnicode, and getNextUChar,
  * otherwise the converter may crash.
  * Every converter type that has variable-length codepage sequences should
  * also implement toUnicodeWithOffsets and fromUnicodeWithOffsets for
  * correct offset handling.
  * All other functions may or may not be implemented - it depends only on
  * whether the converter type needs them.
  *
  * When open() fails, then close() will be called, if present.
  */
 struct UConverterImpl {
     UConverterType type;

     UConverterLoad load;
     UConverterUnload unload;

     UConverterOpen open;
     UConverterClose close;
     UConverterReset reset;

     T_ToUnicodeFunction toUnicode;
     T_ToUnicodeFunction toUnicodeWithOffsets;
     T_FromUnicodeFunction fromUnicode;
     T_FromUnicodeFunction fromUnicodeWithOffsets;
     T_GetNextUCharFunction getNextUChar;

     UConverterGetStarters getStarters;
 };

 extern const UConverterSharedData
     _SBCSData, _DBCSData, _MBCSData, _Latin1Data,
     _UTF8Data, _UTF16BEData, _UTF16LEData, _EBCDICStatefulData,
     _ISO2022Data,
     _LMBCSData1,_LMBCSData2, _LMBCSData3, _LMBCSData4, _LMBCSData5, _LMBCSData6,
     _LMBCSData8,_LMBCSData11,_LMBCSData16,_LMBCSData17,_LMBCSData18,_LMBCSData19;

 U_CDECL_END

 /**
  * This function is useful for implementations of getNextUChar().
  * After a call to a callback function or to toUnicode(), an output buffer
  * begins with a Unicode code point that needs to be returned as UChar32,
  * and all following code units must be prepended to the - potentially
  * prefilled - overflow buffer in the UConverter.
  * The buffer should be at least of capacity UTF_MAX_CHAR_LENGTH so that a
  * complete UChar32's UChars fit into it.
  *
  * @param cnv    The converter that will get remaining UChars copied to its overflow area.
  * @param buffer An array of UChars that was passed into a callback function
  *               or a toUnicode() function.
  * @param length The number of code units (UChars) that are actually in the buffer.
  *               This must be >0.
  * @return The code point from the first UChars in the buffer.
  */
 U_CFUNC UChar32
 ucnv_getUChar32KeepOverflow(UConverter *cnv, const UChar *buffer, int32_t length);

 #endif /* UCNV_CNV */
	/*
	**********************************************************************
	* Copyright (C) 1999, International Business Machines
	* Corporation and others. All Rights Reserved.
	**********************************************************************
	*
	* uconv_cnv.h:
	* defines all the low level conversion functions
	* T_UnicodeConverter_{to,from}Unicode_$ConversionType
	*
	* Modification History:
	*
	* Date Name Description
	* 05/09/00 helena Added implementation to handle fallback mappings.
	* 06/29/2000 helena Major rewrite of the callback APIs.
	*/

	#ifndef UCNV_CNV_H
	#define UCNV_CNV_H

	#include "unicode/utypes.h"
	#include "unicode/ucnv_err.h"
	#include "ucnv_bld.h"
	#include "ucnvmbcs.h"
	#include "ucmp8.h"
	#include "ucmp16.h"

	/Table Node Definitions /
	typedef struct
	{
	UChar toUnicode; / [256]; */
	CompactByteArray fromUnicode;
	UChar *toUnicodeFallback;
	CompactByteArray fromUnicodeFallback;
	}
	UConverterSBCSTable;

	typedef struct
	{
	CompactShortArray toUnicode;
	CompactShortArray fromUnicode;
	CompactShortArray toUnicodeFallback;
	CompactShortArray fromUnicodeFallback;
	}
	UConverterDBCSTable;

	union UConverterTable
	{
	UConverterSBCSTable sbcs;
	UConverterDBCSTable dbcs;
	UConverterMBCSTable mbcs;
	};


	U_CDECL_BEGIN

	/* this is used in fromUnicode DBCS tables as an "unassigned" marker */
	#define missingCharMarker 0xFFFF

	/*
	* #define missingUCharMarker 0xfffe
	*
	* there are actually two values used in toUnicode tables:
	* U+fffe "unassigned"
	* U+ffff "illegal"
	*/

	#define FromU_CALLBACK_MACRO(context, args, codeUnits, length, codePoint, reason, err) \
	{ \
	/copies current values for the ErrorFunctor to update / \
	/Calls the ErrorFunctor / \
	args->converter->fromUCharErrorBehaviour ( context, \
	args, \
	codeUnits, \
	length, \
	codePoint, \
	reason, \
	err); \
	myTargetIndex = args->target - (char*)myTarget; \
	mySourceIndex = args->source - mySource; \
	}
	/*
	*/
	#define ToU_CALLBACK_MACRO(context, args, codePoints, length, reason, err) \
	{ \
	/Calls the ErrorFunctor / \
	args->converter->fromCharErrorBehaviour ( \
	context, \
	args, \
	codePoints, \
	length, \
	reason, \
	err); \
	myTargetIndex = args->target - myTarget; \
	mySourceIndex = args->source - (const char*)mySource; \
	}
	/*
	*/
	#define FromU_CALLBACK_OFFSETS_LOGIC_MACRO(context, args, codeUnits, length, codePoint, reason, err) \
	{ \
	int32_t My_i = myTargetIndex; \
	/copies current values for the ErrorFunctor to update / \
	/Calls the ErrorFunctor / \
	args->converter->fromUCharErrorBehaviour ( \
	context, \
	args, \
	codeUnits, \
	length, \
	codePoint, \
	reason, \
	err); \
	/Update the local Indexes so that the conversion can restart at the right points / \
	myTargetIndex = args->target - (char*)myTarget; \
	mySourceIndex = args->source - mySource; \
	args->offsets = saveOffsets; \
	for (;My_i < myTargetIndex;My_i++) args->offsets[My_i] += currentOffset; \
	}
	/*
	*/
	#define ToU_CALLBACK_OFFSETS_LOGIC_MACRO(context, args, codePoints, length, reason, err) \
	{ \
	args->converter->fromCharErrorBehaviour ( \
	context, \
	args, \
	codePoints, \
	length, \
	reason, \
	err); \
	/Update the local Indexes so that the conversion can restart at the right points / \
	myTargetIndex = args->target - myTarget; \
	mySourceIndex = args->source - (const char*)mySource; \
	args->offsets = saveOffsets; \
	for (;My_i < myTargetIndex;My_i++) {args->offsets[My_i] += currentOffset;} \
	}


	typedef void (UConverterLoad) (UConverterSharedData sharedData, const uint8_t raw, UErrorCode pErrorCode);
	typedef void (UConverterUnload) (UConverterSharedData sharedData);

	typedef void (UConverterOpen) (UConverter cnv, const char name, const char locale, UErrorCode *pErrorCode);
	typedef void (UConverterClose) (UConverter cnv);

	typedef void (UConverterReset) (UConverter cnv);

	typedef void (T_ToUnicodeFunction) (UConverterToUnicodeArgs , UErrorCode *);

	typedef void (T_FromUnicodeFunction) (UConverterFromUnicodeArgs , UErrorCode *);

	typedef UChar32 (T_GetNextUCharFunction) (UConverterToUnicodeArgs , UErrorCode *);

	typedef void (UConverterGetStarters)(const UConverter converter,
	UBool starters[256],
	UErrorCode *pErrorCode);

	UBool CONVERSION_U_SUCCESS (UErrorCode err);

	void flushInternalUnicodeBuffer (UConverter * _this,
	UChar * myTarget,
	int32_t * myTargetIndex,
	int32_t targetLength,
	int32_t** offsets,
	UErrorCode * err);

	void flushInternalCharBuffer (UConverter * _this,
	char *myTarget,
	int32_t * myTargetIndex,
	int32_t targetLength,
	int32_t** offsets,
	UErrorCode * err);

	/**
	* UConverterImpl contains all the data and functions for a converter type.
	* Its function pointers work much like a C++ vtable.
	* Many converter types need to define only a subset of the functions;
	* when a function pointer is NULL, then a default action will be performed.
	*
	* Every converter type must implement toUnicode, fromUnicode, and getNextUChar,
	* otherwise the converter may crash.
	* Every converter type that has variable-length codepage sequences should
	* also implement toUnicodeWithOffsets and fromUnicodeWithOffsets for
	* correct offset handling.
	* All other functions may or may not be implemented - it depends only on
	* whether the converter type needs them.
	*
	* When open() fails, then close() will be called, if present.
	*/
	struct UConverterImpl {
	UConverterType type;

	UConverterLoad load;
	UConverterUnload unload;

	UConverterOpen open;
	UConverterClose close;
	UConverterReset reset;

	T_ToUnicodeFunction toUnicode;
	T_ToUnicodeFunction toUnicodeWithOffsets;
	T_FromUnicodeFunction fromUnicode;
	T_FromUnicodeFunction fromUnicodeWithOffsets;
	T_GetNextUCharFunction getNextUChar;

	UConverterGetStarters getStarters;
	};

	extern const UConverterSharedData
	_SBCSData, _DBCSData, _MBCSData, _Latin1Data,
	_UTF8Data, _UTF16BEData, _UTF16LEData, _EBCDICStatefulData,
	_ISO2022Data,
	_LMBCSData1,_LMBCSData2, _LMBCSData3, _LMBCSData4, _LMBCSData5, _LMBCSData6,
	_LMBCSData8,_LMBCSData11,_LMBCSData16,_LMBCSData17,_LMBCSData18,_LMBCSData19;

	U_CDECL_END

	/**
	* This function is useful for implementations of getNextUChar().
	* After a call to a callback function or to toUnicode(), an output buffer
	* begins with a Unicode code point that needs to be returned as UChar32,
	* and all following code units must be prepended to the - potentially
	* prefilled - overflow buffer in the UConverter.
	* The buffer should be at least of capacity UTF_MAX_CHAR_LENGTH so that a
	* complete UChar32's UChars fit into it.
	*
	* @param cnv The converter that will get remaining UChars copied to its overflow area.
	* @param buffer An array of UChars that was passed into a callback function
	* or a toUnicode() function.
	* @param length The number of code units (UChars) that are actually in the buffer.
	* This must be >0.
	* @return The code point from the first UChars in the buffer.
	*/
	U_CFUNC UChar32
	ucnv_getUChar32KeepOverflow(UConverter cnv, const UChar buffer, int32_t length);

	#endif /* UCNV_CNV */