| #!/usr/bin/perl -w |
| |
| # Simple DirectMedia Layer |
| # Copyright (C) 1997-2025 Sam Lantinga <slouken@libsdl.org> |
| # |
| # This software is provided 'as-is', without any express or implied |
| # warranty. In no event will the authors be held liable for any damages |
| # arising from the use of this software. |
| # |
| # Permission is granted to anyone to use this software for any purpose, |
| # including commercial applications, and to alter it and redistribute it |
| # freely, subject to the following restrictions: |
| # |
| # 1. The origin of this software must not be misrepresented; you must not |
| # claim that you wrote the original software. If you use this software |
| # in a product, an acknowledgment in the product documentation would be |
| # appreciated but is not required. |
| # 2. Altered source versions must be plainly marked as such, and must not be |
| # misrepresented as being the original software. |
| # 3. This notice may not be removed or altered from any source distribution. |
| |
| # This script was originally written by Ryan C. Gordon for PhysicsFS |
| # ( https://icculus.org/physfs/ ), under the zlib license: the same license |
| # that SDL itself uses). |
| |
| use warnings; |
| use strict; |
| |
| my $HASHBUCKETS1_16 = 256; |
| my $HASHBUCKETS1_32 = 16; |
| my $HASHBUCKETS2_16 = 16; |
| my $HASHBUCKETS3_16 = 4; |
| |
| my $mem_used = 0; |
| |
| print <<__EOF__; |
| /* |
| Simple DirectMedia Layer |
| Copyright (C) 1997-2025 Sam Lantinga <slouken\@libsdl.org> |
| |
| This software is provided 'as-is', without any express or implied |
| warranty. In no event will the authors be held liable for any damages |
| arising from the use of this software. |
| |
| Permission is granted to anyone to use this software for any purpose, |
| including commercial applications, and to alter it and redistribute it |
| freely, subject to the following restrictions: |
| |
| 1. The origin of this software must not be misrepresented; you must not |
| claim that you wrote the original software. If you use this software |
| in a product, an acknowledgment in the product documentation would be |
| appreciated but is not required. |
| 2. Altered source versions must be plainly marked as such, and must not be |
| misrepresented as being the original software. |
| 3. This notice may not be removed or altered from any source distribution. |
| */ |
| |
| /* |
| * This data was generated by SDL/build-scripts/makecasefoldhashtable.pl |
| * |
| * Do not manually edit this file! |
| */ |
| |
| #ifndef SDL_casefolding_h_ |
| #define SDL_casefolding_h_ |
| |
| /* We build three simple hashmaps here: one that maps Unicode codepoints to |
| a one, two, or three lowercase codepoints. To retrieve this info: look at |
| case_fold_hashX, where X is 1, 2, or 3. Most foldable codepoints fold to one, |
| a few dozen fold to two, and a handful fold to three. If the codepoint isn't |
| in any of these hashes, it doesn't fold (no separate upper and lowercase). |
| |
| Almost all these codepoints fit into 16 bits, so we hash them as such to save |
| memory. If a codepoint is > 0xFFFF, we have separate hashes for them, |
| since there are (currently) only about 120 of them and (currently) all of them |
| map to a single lowercase codepoint. */ |
| |
| typedef struct CaseFoldMapping1_32 |
| { |
| Uint32 from; |
| Uint32 to0; |
| } CaseFoldMapping1_32; |
| |
| typedef struct CaseFoldMapping1_16 |
| { |
| Uint16 from; |
| Uint16 to0; |
| } CaseFoldMapping1_16; |
| |
| typedef struct CaseFoldMapping2_16 |
| { |
| Uint16 from; |
| Uint16 to0; |
| Uint16 to1; |
| } CaseFoldMapping2_16; |
| |
| typedef struct CaseFoldMapping3_16 |
| { |
| Uint16 from; |
| Uint16 to0; |
| Uint16 to1; |
| Uint16 to2; |
| } CaseFoldMapping3_16; |
| |
| typedef struct CaseFoldHashBucket1_16 |
| { |
| const CaseFoldMapping1_16 *list; |
| const Uint8 count; |
| } CaseFoldHashBucket1_16; |
| |
| typedef struct CaseFoldHashBucket1_32 |
| { |
| const CaseFoldMapping1_32 *list; |
| const Uint8 count; |
| } CaseFoldHashBucket1_32; |
| |
| typedef struct CaseFoldHashBucket2_16 |
| { |
| const CaseFoldMapping2_16 *list; |
| const Uint8 count; |
| } CaseFoldHashBucket2_16; |
| |
| typedef struct CaseFoldHashBucket3_16 |
| { |
| const CaseFoldMapping3_16 *list; |
| const Uint8 count; |
| } CaseFoldHashBucket3_16; |
| |
| __EOF__ |
| |
| |
| my @foldPairs1_16; |
| my @foldPairs2_16; |
| my @foldPairs3_16; |
| my @foldPairs1_32; |
| |
| for (my $i = 0; $i < $HASHBUCKETS1_16; $i++) { |
| $foldPairs1_16[$i] = ''; |
| } |
| |
| for (my $i = 0; $i < $HASHBUCKETS1_32; $i++) { |
| $foldPairs1_32[$i] = ''; |
| } |
| |
| for (my $i = 0; $i < $HASHBUCKETS2_16; $i++) { |
| $foldPairs2_16[$i] = ''; |
| } |
| |
| for (my $i = 0; $i < $HASHBUCKETS3_16; $i++) { |
| $foldPairs3_16[$i] = ''; |
| } |
| |
| open(FH,'<','casefolding.txt') or die("failed to open casefolding.txt: $!\n"); |
| while (<FH>) { |
| chomp; |
| # strip comments from textfile... |
| s/\#.*\Z//; |
| |
| # strip whitespace... |
| s/\A\s+//; |
| s/\s+\Z//; |
| |
| next if not /\A([a-fA-F0-9]+)\;\s*(.)\;\s*(.+)\;/; |
| my ($code, $status, $mapping) = ($1, $2, $3); |
| |
| my $hexxed = hex($code); |
| #print("// code '$code' status '$status' mapping '$mapping'\n"); |
| |
| if (($status eq 'C') or ($status eq 'F')) { |
| my ($map1, $map2, $map3) = (undef, undef, undef); |
| $map1 = $1 if $mapping =~ s/\A([a-fA-F0-9]+)(\s*|\Z)//; |
| $map2 = $1 if $mapping =~ s/\A([a-fA-F0-9]+)(\s*|\Z)//; |
| $map3 = $1 if $mapping =~ s/\A([a-fA-F0-9]+)(\s*|\Z)//; |
| die("mapping space too small for '$code'\n") if ($mapping ne ''); |
| die("problem parsing mapping for '$code'\n") if (not defined($map1)); |
| |
| if ($hexxed < 128) { |
| # Just ignore these, we'll handle the low-ASCII ones ourselves. |
| } elsif ($hexxed > 0xFFFF) { |
| # We just need to add the 32-bit 2 and/or 3 codepoint maps if this die()'s here. |
| die("Uhoh, a codepoint > 0xFFFF that folds to multiple codepoints! Fixme.") if defined($map2); |
| my $hashed = (($hexxed ^ ($hexxed >> 8)) & ($HASHBUCKETS1_32-1)); |
| #print("// hexxed '$hexxed' hashed1 '$hashed'\n"); |
| $foldPairs1_32[$hashed] .= " { 0x$code, 0x$map1 },\n"; |
| $mem_used += 8; |
| } elsif (not defined($map2)) { |
| my $hashed = (($hexxed ^ ($hexxed >> 8)) & ($HASHBUCKETS1_16-1)); |
| #print("// hexxed '$hexxed' hashed1 '$hashed'\n"); |
| $foldPairs1_16[$hashed] .= " { 0x$code, 0x$map1 },\n"; |
| $mem_used += 4; |
| } elsif (not defined($map3)) { |
| my $hashed = (($hexxed ^ ($hexxed >> 8)) & ($HASHBUCKETS2_16-1)); |
| #print("// hexxed '$hexxed' hashed2 '$hashed'\n"); |
| $foldPairs2_16[$hashed] .= " { 0x$code, 0x$map1, 0x$map2 },\n"; |
| $mem_used += 6; |
| } else { |
| my $hashed = (($hexxed ^ ($hexxed >> 8)) & ($HASHBUCKETS3_16-1)); |
| #print("// hexxed '$hexxed' hashed3 '$hashed'\n"); |
| $foldPairs3_16[$hashed] .= " { 0x$code, 0x$map1, 0x$map2, 0x$map3 },\n"; |
| $mem_used += 8; |
| } |
| } |
| } |
| close(FH); |
| |
| for (my $i = 0; $i < $HASHBUCKETS1_16; $i++) { |
| $foldPairs1_16[$i] =~ s/,\n\Z//; |
| my $str = $foldPairs1_16[$i]; |
| next if $str eq ''; |
| my $num = '000' . $i; |
| $num =~ s/\A.*?(\d\d\d)\Z/$1/; |
| my $sym = "case_fold1_16_${num}"; |
| print("static const CaseFoldMapping1_16 ${sym}[] = {\n$str\n};\n\n"); |
| } |
| |
| for (my $i = 0; $i < $HASHBUCKETS1_32; $i++) { |
| $foldPairs1_32[$i] =~ s/,\n\Z//; |
| my $str = $foldPairs1_32[$i]; |
| next if $str eq ''; |
| my $num = '000' . $i; |
| $num =~ s/\A.*?(\d\d\d)\Z/$1/; |
| my $sym = "case_fold1_32_${num}"; |
| print("static const CaseFoldMapping1_32 ${sym}[] = {\n$str\n};\n\n"); |
| } |
| |
| for (my $i = 0; $i < $HASHBUCKETS2_16; $i++) { |
| $foldPairs2_16[$i] =~ s/,\n\Z//; |
| my $str = $foldPairs2_16[$i]; |
| next if $str eq ''; |
| my $num = '000' . $i; |
| $num =~ s/\A.*?(\d\d\d)\Z/$1/; |
| my $sym = "case_fold2_16_${num}"; |
| print("static const CaseFoldMapping2_16 ${sym}[] = {\n$str\n};\n\n"); |
| } |
| |
| for (my $i = 0; $i < $HASHBUCKETS3_16; $i++) { |
| $foldPairs3_16[$i] =~ s/,\n\Z//; |
| my $str = $foldPairs3_16[$i]; |
| next if $str eq ''; |
| my $num = '000' . $i; |
| $num =~ s/\A.*?(\d\d\d)\Z/$1/; |
| my $sym = "case_fold3_16_${num}"; |
| print("static const CaseFoldMapping3_16 ${sym}[] = {\n$str\n};\n\n"); |
| } |
| |
| print("static const CaseFoldHashBucket1_16 case_fold_hash1_16[] = {\n"); |
| |
| for (my $i = 0; $i < $HASHBUCKETS1_16; $i++) { |
| my $str = $foldPairs1_16[$i]; |
| if ($str eq '') { |
| print(" { NULL, 0 },\n"); |
| } else { |
| my $num = '000' . $i; |
| $num =~ s/\A.*?(\d\d\d)\Z/$1/; |
| my $sym = "case_fold1_16_${num}"; |
| print(" { $sym, SDL_arraysize($sym) },\n"); |
| } |
| $mem_used += 12; |
| } |
| print("};\n\n"); |
| |
| |
| print("static const CaseFoldHashBucket1_32 case_fold_hash1_32[] = {\n"); |
| |
| for (my $i = 0; $i < $HASHBUCKETS1_32; $i++) { |
| my $str = $foldPairs1_32[$i]; |
| if ($str eq '') { |
| print(" { NULL, 0 },\n"); |
| } else { |
| my $num = '000' . $i; |
| $num =~ s/\A.*?(\d\d\d)\Z/$1/; |
| my $sym = "case_fold1_32_${num}"; |
| print(" { $sym, SDL_arraysize($sym) },\n"); |
| } |
| $mem_used += 12; |
| } |
| print("};\n\n"); |
| |
| |
| print("static const CaseFoldHashBucket2_16 case_fold_hash2_16[] = {\n"); |
| |
| for (my $i = 0; $i < $HASHBUCKETS2_16; $i++) { |
| my $str = $foldPairs2_16[$i]; |
| if ($str eq '') { |
| print(" { NULL, 0 },\n"); |
| } else { |
| my $num = '000' . $i; |
| $num =~ s/\A.*?(\d\d\d)\Z/$1/; |
| my $sym = "case_fold2_16_${num}"; |
| print(" { $sym, SDL_arraysize($sym) },\n"); |
| } |
| $mem_used += 12; |
| } |
| print("};\n\n"); |
| |
| print("static const CaseFoldHashBucket3_16 case_fold_hash3_16[] = {\n"); |
| |
| for (my $i = 0; $i < $HASHBUCKETS3_16; $i++) { |
| my $str = $foldPairs3_16[$i]; |
| if ($str eq '') { |
| print(" { NULL, 0 },\n"); |
| } else { |
| my $num = '000' . $i; |
| $num =~ s/\A.*?(\d\d\d)\Z/$1/; |
| my $sym = "case_fold3_16_${num}"; |
| print(" { $sym, SDL_arraysize($sym) },\n"); |
| } |
| $mem_used += 12; |
| } |
| print("};\n\n"); |
| |
| print <<__EOF__; |
| #endif /* SDL_casefolding_h_ */ |
| |
| __EOF__ |
| |
| print STDERR "Memory required for case-folding hashtable: $mem_used bytes\n"; |
| |
| exit 0; |
| |
| # end of makecashfoldhashtable.pl ... |
| |