icu4c/source/tools/escapesrc/tblgen.cpp - external/github.com/unicode-org/icu - Git at Google

 // © 2016 and later: Unicode, Inc. and others.
 // License & terms of use: http://www.unicode.org/copyright.html

 #include "unicode/utypes.h"
 #include "unicode/ucnv.h"
 #include "unicode/uniset.h"
 #include <stdio.h>

 static const char *kConverter = "ibm-1047";

 int main(int argc, const char *argv[]) {
   printf("// %s\n", U_COPYRIGHT_STRING);
   printf("// generated by tblgen. You weren't going to edit it by hand, were you?\n");
   printf("\n");

   UErrorCode status = U_ZERO_ERROR;
   LocalUConverterPointer cnv(ucnv_open(kConverter, &status));

   if(U_FAILURE(status)) {
     fprintf(stderr, "Failed to open %s: %s\n", kConverter, u_errorName(status));
     return 1;
   }

   printf("static const char cp1047_8859_1[256] = { \n");
   for(int i=0x00; i<0x100; i++) {
     char cp1047[1];
     cp1047[0] = i;
     UChar u[1];
     UChar *target = u;
     const char *source = cp1047;
     ucnv_toUnicode(cnv.getAlias(), &target, u+1, &source, cp1047+1, nullptr, true, &status);
     if(U_FAILURE(status)) {
       fprintf(stderr, "Conversion failure at #%X: %s\n", i, u_errorName(status));
       return 2;
     }
     printf(" (char)0x%02X, /* %02X */\n", u[0], i);
   }
   printf("};\n\n");

   //
   //  UnicodeSet oldIllegal("[:print:]", status); // [a-zA-Z0-9_}{#)(><%:;.?*+-/^&|~!=,\\u005b\\u005d\\u005c]", status);
   UnicodeSet oldIllegal("[0-9 a-z A-Z "
                         "_ \\{ \\} \\[ \\] # \\( \\) < > % \\: ; . "
                         "? * + \\- / \\^ \\& | ~ ! = , \\ \" ' ]", status);

   /*

 http://www.lirmm.fr/~ducour/Doc-objets/ISO+IEC+14882-1998.pdf ( note: 1998 )   page 10, section 2.2 says:

 1 The basic source character set consists of 96 characters: the space character, the control characters repre- 15)
 senting horizontal tab, vertical tab, form feed, and new-line, plus the following 91 graphical characters:
 a b c d e f g h i j k l m n opqrstuvwxyz
 A B C D E F G H I J K L M N OPQRSTUVWXYZ
 0 12 3 4 5 6 7 8 9
  _ { } [ ] # ( ) < > % : ; . ?*+-/^&|~!=,\"
 2 The universal-character-name construct provides a way to name other characters. hex-quad:
 hexadecimal-digit hexadecimal-digit hexadecimal-digit hexadecimal-digit
 universal-character-name: \u hex-quad
 \U hex-quad hex-quad
 The character designated by the universal-character-name \UNNNNNNNN is that character whose character short name in ISO/IEC 10646 is NNNNNNNN; the character designated by the universal-character-name \uNNNN is that character whose character short name in ISO/IEC 10646 is 0000NNNN. If the hexadecimal value for a universal character name is less than 0x20 or in the range 0x7F-0x9F (inclusive), or if the uni- versal character name designates a character in the basic source character set, then the program is ill- formed.


 So basically:  printable ASCII plus  0x00-0x1F,  0x7F-0x9F, was all illegal.

 Some discussion at http://unicode.org/mail-arch/unicode-ml/y2003-m10/0471.html

    */


   printf("static const bool oldIllegal[256] = { \n");
   for(UChar i=0x00; i<0x100;i++) {
     printf(" %s, /* U+%04X */\n",
            (oldIllegal.contains(i))?" true":"false",
            i);
   }
   printf("};\n\n");

   return 0;
 }
	// © 2016 and later: Unicode, Inc. and others.
	// License & terms of use: http://www.unicode.org/copyright.html

	#include "unicode/utypes.h"
	#include "unicode/ucnv.h"
	#include "unicode/uniset.h"
	#include <stdio.h>

	static const char *kConverter = "ibm-1047";

	int main(int argc, const char *argv[]) {
	printf("// %s\n", U_COPYRIGHT_STRING);
	printf("// generated by tblgen. You weren't going to edit it by hand, were you?\n");
	printf("\n");

	UErrorCode status = U_ZERO_ERROR;
	LocalUConverterPointer cnv(ucnv_open(kConverter, &status));

	if(U_FAILURE(status)) {
	fprintf(stderr, "Failed to open %s: %s\n", kConverter, u_errorName(status));
	return 1;
	}

	printf("static const char cp1047_8859_1[256] = { \n");
	for(int i=0x00; i<0x100; i++) {
	char cp1047[1];
	cp1047[0] = i;
	UChar u[1];
	UChar *target = u;
	const char *source = cp1047;
	ucnv_toUnicode(cnv.getAlias(), &target, u+1, &source, cp1047+1, nullptr, true, &status);
	if(U_FAILURE(status)) {
	fprintf(stderr, "Conversion failure at #%X: %s\n", i, u_errorName(status));
	return 2;
	}
	printf(" (char)0x%02X, /* %02X */\n", u[0], i);
	}
	printf("};\n\n");

	//
	// UnicodeSet oldIllegal("[:print:]", status); // [a-zA-Z0-9_}{#)(><%:;.?*+-/^&\|~!=,\\u005b\\u005d\\u005c]", status);
	UnicodeSet oldIllegal("[0-9 a-z A-Z "
	"_ \\{ \\} \\[ \\] # \\( \\) < > % \\: ; . "
	"? * + \\- / \\^ \\& \| ~ ! = , \\ \" ' ]", status);

	/*

	http://www.lirmm.fr/~ducour/Doc-objets/ISO+IEC+14882-1998.pdf ( note: 1998 ) page 10, section 2.2 says:

	1 The basic source character set consists of 96 characters: the space character, the control characters repre- 15)
	senting horizontal tab, vertical tab, form feed, and new-line, plus the following 91 graphical characters:
	a b c d e f g h i j k l m n opqrstuvwxyz
	A B C D E F G H I J K L M N OPQRSTUVWXYZ
	0 12 3 4 5 6 7 8 9
	_ { } [ ] # ( ) < > % : ; . ?*+-/^&\|~!=,\"
	2 The universal-character-name construct provides a way to name other characters. hex-quad:
	hexadecimal-digit hexadecimal-digit hexadecimal-digit hexadecimal-digit
	universal-character-name: \u hex-quad
	\U hex-quad hex-quad
	The character designated by the universal-character-name \UNNNNNNNN is that character whose character short name in ISO/IEC 10646 is NNNNNNNN; the character designated by the universal-character-name \uNNNN is that character whose character short name in ISO/IEC 10646 is 0000NNNN. If the hexadecimal value for a universal character name is less than 0x20 or in the range 0x7F-0x9F (inclusive), or if the uni- versal character name designates a character in the basic source character set, then the program is ill- formed.


	So basically: printable ASCII plus 0x00-0x1F, 0x7F-0x9F, was all illegal.

	Some discussion at http://unicode.org/mail-arch/unicode-ml/y2003-m10/0471.html

	*/



	printf("static const bool oldIllegal[256] = { \n");
	for(UChar i=0x00; i<0x100;i++) {
	printf(" %s, /* U+%04X */\n",
	(oldIllegal.contains(i))?" true":"false",
	i);
	}
	printf("};\n\n");

	return 0;
	}