blob: f25e8def04d3e69ee39ceb89e45ab41ea374465d [file] [log] [blame]
/*
*******************************************************************************
* Copyright (C) 2001, International Business Machines
* Corporation and others. All Rights Reserved.
*******************************************************************************
* file name: genjp.
* encoding: US-ASCII
*
* Modification history
* Date Name Comments
* 10/13/2001 weiv created
*
* The GenJP class is useful for generating various stuff related to Japanese language.
* Right now, it uses ICU to generate rules for JIS X 4061 compliant collation.
* Also, it is useful for getting compatibility versions of the characters.
*/
#ifndef ICU_GENJP
#define ICU_GENJP
#include <stdio.h>
#include "unicode/utypes.h"
#include "unicode/uchar.h"
#include "unicode/unorm.h"
#include "ucmpe32.h"
#include "cmemory.h"
static const uint32_t _bufferSize = 256;
static const UChar _vowels[] = { 0x30A1, 0x30A3, 0x30A5, 0x30A7, 0x30A9, 0 }; // katakana AIUEO
static const UChar _prolongedSoundMark = 0x30FC;
static const UChar _voicedMark = 0x3099;
static const UChar _hiraganaIterationMark = 0x309D;
static const UChar _hiraganaVoicedIterationMark = 0x309E;
static const UChar _katakanaIterationMark = 0x30FD;
static const UChar _katakanaVoicedIterationMark = 0x30FE;
static const UChar _hiraganaStart = 0x3041;
static const UChar _hiraganaEnd = 0x3094;
static const UChar _katakanaStart = 0x30A1;
static const UChar _katakanaEnd = 0x30FA;
static const char *_tertiaryLess = "\"<<<";
static const char *_equal = " \"=";
class GenJP {
public:
GenJP();
~GenJP();
UChar getHalf(UChar u); // Gets the compatibility version of an UChar.
// The structure holds halfwidth and fullwidth compatibility characters.
UBool isSemivoiced(UChar ch, UErrorCode &status); // Is a code point semivoiced
UBool isVoiced(UChar ch, UErrorCode &status); // Is a code point voiced
void writeHeader(UErrorCode &status);
void processLengthMark(UErrorCode &status); // This will do small vowels and generate rules for the length mark
void processIterationMark(UErrorCode &status); // This will generate the rules for the iteration mark
void processCompatibility(UErrorCode &status); // This will generate the rules for making compatibility chars
// equal with their normal counter part (only halfwidth and fullwidth).
void equalKatakanaToHiragana(UErrorCode &status); // This will generate the rules &K=K=hK=H
void printOutKanji(UErrorCode &status); // Just prints out Kanji ordering...
void writeFooter(UErrorCode &status);
private:
const char *getRelation();
UChar getHiragana(UChar katakana);
const char *getName(const UChar ch, UErrorCode &status);
char *printUnicodeStuff(UChar *zTStuff, char *resBuf);
void processIterationMark(UChar katakana, UErrorCode &status);
void processVoicedIterationMark(UChar katakana, UErrorCode &status);
void processVoicedKana(UChar katakana, UErrorCode &status);
void processSemivoicedKana(UChar katakana, UErrorCode &status);
CompactEIntArray *kanaToHalf;
FILE *out;
char *nameBuff;
UBool wasReset;
};
inline UChar GenJP::getHiragana(UChar katakana) {
return katakana - (_katakanaStart - _hiraganaStart);
}
#endif