| <!DOCTYPE HTML PUBLIC "-//IETF//DTD HTML//EN"> |
| <html xmlns:v="urn:schemas-microsoft-com:vml" |
| xmlns:o="urn:schemas-microsoft-com:office:office" |
| xmlns:w="urn:schemas-microsoft-com:office:word" |
| xmlns="http://www.w3.org/TR/REC-html40"> |
| |
| <head> |
| <meta http-equiv=Content-Type content="text/html; charset=iso-8859-1"> |
| <meta name=ProgId content=Word.Document> |
| <meta name=Generator content="Microsoft Word 9"> |
| <meta name=Originator content="Microsoft Word 9"> |
| <link rel=File-List href="./Collate_files/filelist.xml"> |
| <link rel=Edit-Time-Data href="./Collate_files/editdata.mso"> |
| <!--[if !mso]> |
| <style> |
| v\:* {behavior:url(#default#VML);} |
| o\:* {behavior:url(#default#VML);} |
| w\:* {behavior:url(#default#VML);} |
| .shape {behavior:url(#default#VML);} |
| </style> |
| <![endif]--> |
| <title>International Classes for Unicode - Collation</title> |
| <!--[if gte mso 9]><xml> |
| <o:DocumentProperties> |
| <o:Author>Helena Shih</o:Author> |
| <o:Template>Normal</o:Template> |
| <o:LastAuthor>Helena Shih</o:LastAuthor> |
| <o:Revision>2</o:Revision> |
| <o:TotalTime>0</o:TotalTime> |
| <o:Created>2000-01-15T02:20:00Z</o:Created> |
| <o:LastSaved>2000-01-15T02:20:00Z</o:LastSaved> |
| <o:Pages>4</o:Pages> |
| <o:Words>982</o:Words> |
| <o:Characters>5599</o:Characters> |
| <o:Company>IBM</o:Company> |
| <o:Lines>46</o:Lines> |
| <o:Paragraphs>11</o:Paragraphs> |
| <o:CharactersWithSpaces>6875</o:CharactersWithSpaces> |
| <o:Version>9.2720</o:Version> |
| </o:DocumentProperties> |
| </xml><![endif]--> |
| <style> |
| <!-- |
| /* Style Definitions */ |
| p.MsoNormal, li.MsoNormal, div.MsoNormal |
| {mso-style-parent:""; |
| margin:0in; |
| margin-bottom:.0001pt; |
| mso-pagination:widow-orphan; |
| font-size:12.0pt; |
| font-family:"Times New Roman"; |
| mso-fareast-font-family:"Times New Roman";} |
| p |
| {font-size:12.0pt; |
| font-family:"Times New Roman"; |
| mso-fareast-font-family:"Times New Roman";} |
| @page Section1 |
| {size:8.5in 11.0in; |
| margin:1.0in 1.25in 1.0in 1.25in; |
| mso-header-margin:.5in; |
| mso-footer-margin:.5in; |
| mso-paper-source:0;} |
| div.Section1 |
| {page:Section1;} |
| /* List Definitions */ |
| @list l0 |
| {mso-list-id:56786128; |
| mso-list-type:hybrid; |
| mso-list-template-ids:316935058 799580840 -2129604566 1894698424 -1886861812 1076558752 -1316478726 -1694838522 -1962102214 -432647774;} |
| @list l0:level1 |
| {mso-level-number-format:bullet; |
| mso-level-text:\F0B7; |
| mso-level-tab-stop:.5in; |
| mso-level-number-position:left; |
| text-indent:-.25in; |
| mso-ansi-font-size:10.0pt; |
| font-family:Symbol;} |
| @list l1 |
| {mso-list-id:218128614; |
| mso-list-type:hybrid; |
| mso-list-template-ids:726427572 -220806470 -535028662 -1139008068 926857880 19293176 -1336270008 -629910652 1961381030 -112671298;} |
| @list l1:level1 |
| {mso-level-number-format:bullet; |
| mso-level-text:\F0B7; |
| mso-level-tab-stop:.5in; |
| mso-level-number-position:left; |
| text-indent:-.25in; |
| mso-ansi-font-size:10.0pt; |
| font-family:Symbol;} |
| @list l2 |
| {mso-list-id:398596625; |
| mso-list-type:hybrid; |
| mso-list-template-ids:1581174674 -1457777898 -2089911548 -358428948 -724424496 486305342 -803443362 102692998 -557393154 -905049134;} |
| @list l3 |
| {mso-list-id:399836585; |
| mso-list-type:hybrid; |
| mso-list-template-ids:-1362098886 105021812 -1203221970 402953214 116268298 1957075642 -2103935390 135310026 2024683000 -1150503632;} |
| @list l4 |
| {mso-list-id:512963647; |
| mso-list-type:hybrid; |
| mso-list-template-ids:870891966 718416036 1606166768 -734915736 -457166330 1502492706 -472354846 -1617501690 -1298739844 -1263368320;} |
| @list l4:level1 |
| {mso-level-number-format:bullet; |
| mso-level-text:\F0B7; |
| mso-level-tab-stop:.5in; |
| mso-level-number-position:left; |
| text-indent:-.25in; |
| mso-ansi-font-size:10.0pt; |
| font-family:Symbol;} |
| @list l4:level2 |
| {mso-level-number-format:bullet; |
| mso-level-text:o; |
| mso-level-tab-stop:1.0in; |
| mso-level-number-position:left; |
| text-indent:-.25in; |
| mso-ansi-font-size:10.0pt; |
| font-family:"Courier New"; |
| mso-bidi-font-family:"Times New Roman";} |
| @list l5 |
| {mso-list-id:1228296673; |
| mso-list-type:hybrid; |
| mso-list-template-ids:-1126373434 672300220 -1390479766 -247709302 -1005183176 586437888 1683491832 2053804606 -1192436542 -1064926734;} |
| @list l5:level1 |
| {mso-level-number-format:bullet; |
| mso-level-text:\F0B7; |
| mso-level-tab-stop:.5in; |
| mso-level-number-position:left; |
| text-indent:-.25in; |
| mso-ansi-font-size:10.0pt; |
| font-family:Symbol;} |
| @list l6 |
| {mso-list-id:1292789779; |
| mso-list-type:hybrid; |
| mso-list-template-ids:-517678870 758258566 1452305524 223270358 -53846700 -513747388 -2068313458 1590586680 -1770905550 -1947821216;} |
| @list l6:level1 |
| {mso-level-number-format:bullet; |
| mso-level-text:\F0B7; |
| mso-level-tab-stop:.5in; |
| mso-level-number-position:left; |
| text-indent:-.25in; |
| mso-ansi-font-size:10.0pt; |
| font-family:Symbol;} |
| @list l7 |
| {mso-list-id:1316833148; |
| mso-list-type:hybrid; |
| mso-list-template-ids:1332113200 -424870104 1051208176 892489996 -1233066968 -1810460500 -1022461362 2044245910 -1736145250 -1083136974;} |
| @list l7:level1 |
| {mso-level-number-format:bullet; |
| mso-level-text:\F0B7; |
| mso-level-tab-stop:.5in; |
| mso-level-number-position:left; |
| text-indent:-.25in; |
| mso-ansi-font-size:10.0pt; |
| font-family:Symbol;} |
| @list l8 |
| {mso-list-id:1327392266; |
| mso-list-type:hybrid; |
| mso-list-template-ids:-1403883808 -326201746 -1498098978 273696058 1220037092 978733230 -1686724236 -2129999476 1994688346 -162764280;} |
| @list l8:level1 |
| {mso-level-number-format:bullet; |
| mso-level-text:\F0B7; |
| mso-level-tab-stop:.5in; |
| mso-level-number-position:left; |
| text-indent:-.25in; |
| mso-ansi-font-size:10.0pt; |
| font-family:Symbol;} |
| @list l9 |
| {mso-list-id:1662541158; |
| mso-list-type:hybrid; |
| mso-list-template-ids:-1664995996 -1721883820 -1345845536 314467622 -225125196 2134916302 970646716 2097594674 1329790124 -20539764;} |
| @list l9:level1 |
| {mso-level-number-format:bullet; |
| mso-level-text:\F0B7; |
| mso-level-tab-stop:.5in; |
| mso-level-number-position:left; |
| text-indent:-.25in; |
| mso-ansi-font-size:10.0pt; |
| font-family:Symbol;} |
| @list l10 |
| {mso-list-id:1800419391; |
| mso-list-type:hybrid; |
| mso-list-template-ids:-160151480 1490450160 1649709476 -912377276 -843295976 955542454 -958090392 1090130474 897190372 -1064773102;} |
| @list l10:level1 |
| {mso-level-number-format:bullet; |
| mso-level-text:\F0B7; |
| mso-level-tab-stop:.5in; |
| mso-level-number-position:left; |
| text-indent:-.25in; |
| mso-ansi-font-size:10.0pt; |
| font-family:Symbol;} |
| @list l11 |
| {mso-list-id:1804076982; |
| mso-list-type:hybrid; |
| mso-list-template-ids:26769352 -1273701986 819625222 -1355785140 1125281256 -561239646 -1265353568 721578390 882827536 -1507570576;} |
| ol |
| {margin-bottom:0in;} |
| ul |
| {margin-bottom:0in;} |
| --> |
| </style> |
| <!--[if gte mso 9]><xml> |
| <o:shapedefaults v:ext="edit" spidmax="1027"/> |
| </xml><![endif]--><!--[if gte mso 9]><xml> |
| <o:shapelayout v:ext="edit"> |
| <o:idmap v:ext="edit" data="1"/> |
| </o:shapelayout></xml><![endif]--> |
| </head> |
| |
| <body bgcolor=white lang=EN-US link=blue vlink=blue style='tab-interval:.5in'> |
| |
| <div class=Section1> |
| |
| <h1>International Components for Unicode</h1> |
| |
| <h2>Collation Framework</h2> |
| |
| |
| <div class=MsoNormal align=center style='text-align:center'> |
| |
| <hr size=2 width="100%" align=center> |
| |
| </div> |
| |
| |
| <h3><u>Contents</u></h3> |
| |
| <ul type=disc> |
| <li class=MsoNormal style='mso-margin-top-alt:auto;mso-margin-bottom-alt:auto; |
| mso-list:l4 level1 lfo1;tab-stops:list .5in'>What is collation?</li> |
| <li class=MsoNormal style='mso-margin-top-alt:auto;mso-margin-bottom-alt:auto; |
| mso-list:l4 level1 lfo1;tab-stops:list .5in'>The rule symbols and their |
| usage</li> |
| <li class=MsoNormal style='mso-margin-top-alt:auto;mso-margin-bottom-alt:auto; |
| mso-list:l4 level1 lfo1;tab-stops:list .5in'>Interesting Examples</li> |
| <li class=MsoNormal style='mso-margin-top-alt:auto;mso-margin-bottom-alt:auto; |
| mso-list:l4 level1 lfo1;tab-stops:list .5in'>Implementation Details</li> |
| <ul type=circle> |
| <li class=MsoNormal style='mso-margin-top-alt:auto;mso-margin-bottom-alt: |
| auto;mso-list:l4 level2 lfo1;tab-stops:list 1.0in'>Building the Collation |
| Table</li> |
| <li class=MsoNormal style='mso-margin-top-alt:auto;mso-margin-bottom-alt: |
| auto;mso-list:l4 level2 lfo1;tab-stops:list 1.0in'>Incremental Comparison |
| Diagram</li> |
| <li class=MsoNormal style='mso-margin-top-alt:auto;mso-margin-bottom-alt: |
| auto;mso-list:l4 level2 lfo1;tab-stops:list 1.0in'>Generating a Collation |
| Table</li> |
| </ul> |
| <li class=MsoNormal style='mso-margin-top-alt:auto;mso-margin-bottom-alt:auto; |
| mso-list:l4 level1 lfo1;tab-stops:list .5in'>Q and A</li> |
| </ul> |
| |
| <h3><u>What is collation?</u></h3> |
| |
| <p>Collation framework performs locale-sensitive string comparison. The user of |
| this class can use this class to build searching and sorting routines for |
| natural language text, build table of contents for large documentation or |
| create efficient index look up for database entries.<br> |
| <br> |
| The ICU Collator classes provides services to allow: </p> |
| |
| <ul type=disc> |
| <li class=MsoNormal style='mso-margin-top-alt:auto;mso-margin-bottom-alt:auto; |
| mso-list:l10 level1 lfo2;tab-stops:list .5in'>Simple, data-driven, table |
| based collation.</li> |
| <li class=MsoNormal style='mso-margin-top-alt:auto;mso-margin-bottom-alt:auto; |
| mso-list:l10 level1 lfo2;tab-stops:list .5in'>Easily customizble for your |
| needs.</li> |
| <li class=MsoNormal style='mso-margin-top-alt:auto;mso-margin-bottom-alt:auto; |
| mso-list:l10 level1 lfo2;tab-stops:list .5in'>Merging different resources |
| made possible.</li> |
| <li class=MsoNormal style='mso-margin-top-alt:auto;mso-margin-bottom-alt:auto; |
| mso-list:l10 level1 lfo2;tab-stops:list .5in'>Behind the scene |
| transforming the ASCII data file into a binary file for efficiency.</li> |
| <li class=MsoNormal style='mso-margin-top-alt:auto;mso-margin-bottom-alt:auto; |
| mso-list:l10 level1 lfo2;tab-stops:list .5in'>Offering both incremental |
| comparison for simple comparison and collation keys for batch processes.</li> |
| </ul> |
| |
| <p>There are 4 comparison levels in the Collator classes to allow different |
| levels of difference to be considered significant: </p> |
| |
| <ul type=disc> |
| <li class=MsoNormal style='mso-margin-top-alt:auto;mso-margin-bottom-alt:auto; |
| mso-list:l6 level1 lfo3;tab-stops:list .5in'>Primary: a letter difference. |
| For example, 'a' and 'b'.</li> |
| <li class=MsoNormal style='mso-margin-top-alt:auto;mso-margin-bottom-alt:auto; |
| mso-list:l6 level1 lfo3;tab-stops:list .5in'>Secondary: an accent |
| difference. For example, 'ä' and 'å'.</li> |
| <li class=MsoNormal style='mso-margin-top-alt:auto;mso-margin-bottom-alt:auto; |
| mso-list:l6 level1 lfo3;tab-stops:list .5in'>Tertiary: a case difference. |
| For example, 'a' and 'A'.</li> |
| <li class=MsoNormal style='mso-margin-top-alt:auto;mso-margin-bottom-alt:auto; |
| mso-list:l6 level1 lfo3;tab-stops:list .5in'>Identical: no difference. For |
| example, 'a' and 'a'.</li> |
| </ul> |
| |
| <h3><u>The rule symbols and their usage</u></h3> |
| |
| <p>A string is decomposed to be one or more collation elements when using with |
| the collation classes. The collation rules specify the order of these collation |
| elements. The collation table is composed of a list of collation rules, where |
| each rule is of three forms: </p> |
| |
| <ol start=1 type=1> |
| <li class=MsoNormal style='mso-margin-top-alt:auto;mso-margin-bottom-alt:auto; |
| mso-list:l3 level1 lfo4;tab-stops:list .5in'><modifier></li> |
| <li class=MsoNormal style='mso-margin-top-alt:auto;mso-margin-bottom-alt:auto; |
| mso-list:l3 level1 lfo4;tab-stops:list .5in'><relation> |
| <text-argument></li> |
| <li class=MsoNormal style='mso-margin-top-alt:auto;mso-margin-bottom-alt:auto; |
| mso-list:l3 level1 lfo4;tab-stops:list .5in'><reset> |
| <text-argument1> <relation> <text-argument2></li> |
| </ol> |
| |
| <h4><modifier></h4> |
| |
| <ul type=disc> |
| <li class=MsoNormal style='mso-margin-top-alt:auto;mso-margin-bottom-alt:auto; |
| mso-list:l7 level1 lfo5;tab-stops:list .5in'>'@': French secondary, accent |
| weights sorted backwards.</li> |
| </ul> |
| |
| <h4><text-argument></h4> |
| |
| <p>A text-argument is any sequence of characters, excluding special characters |
| (that is, common whitespace characters [0009-000D, 0020] and rule syntax |
| characters [0021-002F, 003A-0040, 005B-0060, 007B-007E]). If those characters |
| are desired, you can put them in single quotes (e.g. ampersand => '&'). |
| Note that unquoted white space characters are ignored; e.g. "b c" is |
| treated as "bc".</p> |
| |
| <h4><relation></h4> |
| |
| <ul type=disc> |
| <li class=MsoNormal style='mso-margin-top-alt:auto;mso-margin-bottom-alt:auto; |
| mso-list:l9 level1 lfo6;tab-stops:list .5in'>'<' : Greater, as a letter |
| difference (primary)</li> |
| <li class=MsoNormal style='mso-margin-top-alt:auto;mso-margin-bottom-alt:auto; |
| mso-list:l9 level1 lfo6;tab-stops:list .5in'>';' : Greater, as an accent |
| difference (secondary)</li> |
| <li class=MsoNormal style='mso-margin-top-alt:auto;mso-margin-bottom-alt:auto; |
| mso-list:l9 level1 lfo6;tab-stops:list .5in'>',' : Greater, as a case |
| difference (tertiary)</li> |
| <li class=MsoNormal style='mso-margin-top-alt:auto;mso-margin-bottom-alt:auto; |
| mso-list:l9 level1 lfo6;tab-stops:list .5in'>'=' : Equal</li> |
| </ul> |
| |
| <h4><reset></h4> |
| |
| <ul type=disc> |
| <li class=MsoNormal style='mso-margin-top-alt:auto;mso-margin-bottom-alt:auto; |
| mso-list:l0 level1 lfo7;tab-stops:list .5in'>'&': Indicates that |
| text-argument2 follows the position to where the reset text-argument1 |
| would be sorted.</li> |
| </ul> |
| |
| <h3><u>Interesting Examples</u></h3> |
| |
| <p>The following is a list of interesting examples of the rules and some string |
| comparison results using those rules. The comparison relation will be denoted |
| as "<" of primary difference of less than, "<<" of |
| secondary difference of less than, "<<<" of teriatry |
| difference of less than and "==" of equal to relationships: </p> |
| |
| <ul type=disc> |
| <li class=MsoNormal style='mso-margin-top-alt:auto;mso-margin-bottom-alt:auto; |
| mso-list:l1 level1 lfo8;tab-stops:list .5in'>Rule " a, A < b, B |
| < c, C < ch, cH, Ch, CH < d, D < e, E": this rule simply |
| says, sorts letters 'a', 'b', 'c', 'd' and 'e' in that order with primary |
| weights. 'ch' is sorted as a significant letter between 'c' and 'd' with |
| primary weights and upper cased letters sorts after lower cased letters |
| with tertiary weights. For example, "abc" <<< |
| "ABC" and "achb" < "adb".</li> |
| <li class=MsoNormal style='mso-margin-top-alt:auto;mso-margin-bottom-alt:auto; |
| mso-list:l1 level1 lfo8;tab-stops:list .5in'>Rule " a, A < b, B |
| < c, C < d, D < e, E & AE; ä ": this will sort letters |
| 'a', 'b', 'c', 'd' and 'e' in that order with primary weights. 'ä' will |
| sort as with a secondary less than to the sequence of 'A' following 'E'. |
| For example, "aeb" << "äb" and "acb" |
| < "äb".</li> |
| <li class=MsoNormal style='mso-margin-top-alt:auto;mso-margin-bottom-alt:auto; |
| mso-list:l1 level1 lfo8;tab-stops:list .5in'>Rule ".... q, Q & |
| Question'-'mark = '?' ....": the rule shows how to sort symbols to be |
| equivalent to the corrsponding text. In this example, "?" == |
| "Question-mark". Note that the special symbols need to be quoted |
| in the rule.</li> |
| <li class=MsoNormal style='mso-margin-top-alt:auto;mso-margin-bottom-alt:auto; |
| mso-list:l1 level1 lfo8;tab-stops:list .5in'>Rule ".... & aa ; a- |
| & ee ; e- & ii ; i- & oo ; o- & uu ; u- ....": this |
| rule demonstrates how to specify prolonged vowels in Japanese. In this |
| case, "aa" is sorted as with a secondary less than to |
| "a-". For example, "baab" << "ba-b".</li> |
| </ul> |
| |
| <h3><u>Implementation Details</u></h3> |
| |
| <p>Three parts of the code will be carefully examined here: </p> |
| |
| <ul type=disc> |
| <li class=MsoNormal style='mso-margin-top-alt:auto;mso-margin-bottom-alt:auto; |
| mso-list:l5 level1 lfo9;tab-stops:list .5in'>Building the collation rule |
| table. (see mergecol.cpp, ptnentry.cpp and tblcoll.cpp)</li> |
| <li class=MsoNormal style='mso-margin-top-alt:auto;mso-margin-bottom-alt:auto; |
| mso-list:l5 level1 lfo9;tab-stops:list .5in'>Incremental comparison |
| algorithm for simple string comparison. (RuleBasedCollator.compare() in |
| tblcoll.cpp)</li> |
| <li class=MsoNormal style='mso-margin-top-alt:auto;mso-margin-bottom-alt:auto; |
| mso-list:l5 level1 lfo9;tab-stops:list .5in'>Collation key generation and |
| its format. (RuleBasedCollator.getCollationKey() in tblcoll.cpp)</li> |
| </ul> |
| |
| <h3><u>Building the Collation Table</u></h3> |
| |
| <p>The process of building a collation table is as following: </p> |
| |
| <ul type=disc> |
| <li class=MsoNormal style='mso-margin-top-alt:auto;mso-margin-bottom-alt:auto; |
| mso-list:l8 level1 lfo10;tab-stops:list .5in'>Parse the rule text into a |
| list of pattern entries. Each pattern has the content of current core |
| characters, extension character and the strength relation. (In |
| ptnentry.cpp)</li> |
| <li class=MsoNormal style='mso-margin-top-alt:auto;mso-margin-bottom-alt:auto; |
| mso-list:l8 level1 lfo10;tab-stops:list .5in'>Inserts each entry at the |
| correct position based on the <reset> arguements. (In mergecol.cpp)</li> |
| <li class=MsoNormal style='mso-margin-top-alt:auto;mso-margin-bottom-alt:auto; |
| mso-list:l8 level1 lfo10;tab-stops:list .5in'>Build the compacted, highly |
| efficient look-up table based on the list of pattern entries. (In |
| tblcoll.cpp)</li> |
| </ul> |
| |
| <p> </p> |
| |
| <h3><u>Incremental Comparison Diagram</u></h3> |
| |
| <p> </p> |
| |
| <p><img width=468 height=800 id="_x0000_i1026" src=collflow.gif></p> |
| |
| <h3><u>Generating a Collation Key</u></h3> |
| |
| <p>The control flow of generating a collation key is as the following: </p> |
| |
| <ol start=1 type=1> |
| <li class=MsoNormal style='mso-margin-top-alt:auto;mso-margin-bottom-alt:auto; |
| mso-list:l2 level1 lfo11;tab-stops:list .5in'>Retrieve the next collation |
| element of the source string. Go to step 5 when reaches the end of string.</li> |
| <li class=MsoNormal style='mso-margin-top-alt:auto;mso-margin-bottom-alt:auto; |
| mso-list:l2 level1 lfo11;tab-stops:list .5in'>Append the primary weight of |
| element to the primary weight buffer.</li> |
| <li class=MsoNormal style='mso-margin-top-alt:auto;mso-margin-bottom-alt:auto; |
| mso-list:l2 level1 lfo11;tab-stops:list .5in'>Checks if it's necessary to |
| process secondary weights. If so, append the secondary weights to the |
| secondary weight buffer. If the collator is marked to process French |
| secondary, reverse the order of all the secondary weights before encounters |
| the next primary weight.</li> |
| <li class=MsoNormal style='mso-margin-top-alt:auto;mso-margin-bottom-alt:auto; |
| mso-list:l2 level1 lfo11;tab-stops:list .5in'>Checks if it's necessary to |
| process tertiary weights. If so, append the tertiary weights to the |
| tertiary weight buffer. </li> |
| <li class=MsoNormal style='mso-margin-top-alt:auto;mso-margin-bottom-alt:auto; |
| mso-list:l2 level1 lfo11;tab-stops:list .5in'>Concatenate the primary |
| weight buffer, secondary weight buffer and tertiary weight buffer and add |
| a null delimiter among the weights. Return the concatenated buffer as the |
| collation key.</li> |
| </ol> |
| |
| <h3><u>Q & A</u></h3> |
| |
| <ol start=1 type=1> |
| <li class=MsoNormal style='mso-margin-top-alt:auto;mso-margin-bottom-alt:auto; |
| mso-list:l11 level1 lfo12;tab-stops:list .5in'>How do I customize the |
| collation sequence?<br> |
| A: Using the RuleBasedCollator constructor, the user of the collation |
| framework can then create his/her own Collator with a customized rule.</li> |
| <li class=MsoNormal style='mso-margin-top-alt:auto;mso-margin-bottom-alt:auto; |
| mso-list:l11 level1 lfo12;tab-stops:list .5in'>Will the collation framwork |
| support the surrogate and private use characters?<br> |
| A: It's part of our future work items. However, no firm schedule has |
| been set for this yet.</li> |
| <li class=MsoNormal style='mso-margin-top-alt:auto;mso-margin-bottom-alt:auto; |
| mso-list:l11 level1 lfo12;tab-stops:list .5in'>How does the French |
| secondary turn-on affect the generation of collation key?<br> |
| A: In French, the secondary differences are sorted backwards so this will |
| invoke the collation key to reverse the secondary weights in the keys.</li> |
| <li class=MsoNormal style='mso-margin-top-alt:auto;mso-margin-bottom-alt:auto; |
| mso-list:l11 level1 lfo12;tab-stops:list .5in'>Is there any support for |
| composing characters? If so, how does it work?<br> |
| A: Yes, it is based on the Normalizer interface. When a expanding |
| character is detected, the rule builder will construct collation entries |
| for the precomposed version internally to handle the composed characters |
| correctly.</li> |
| <li class=MsoNormal style='mso-margin-top-alt:auto;mso-margin-bottom-alt:auto; |
| mso-list:l11 level1 lfo12;tab-stops:list .5in'>Is there any plan for |
| performance improvement, for instance, contracting/expanding character |
| lookup?<br> |
| A: Yes, the performance enhancement is an ongoing work item.</li> |
| </ol> |
| |
| <p> </p> |
| |
| <p><a href="../readme.html">ReadMe for </a><a href="../readme.html#API">International |
| Components for Unicode</a></p> |
| |
| |
| <div class=MsoNormal align=center style='text-align:center'> |
| |
| <hr size=2 width="100%" align=center> |
| |
| </div> |
| |
| </div> |
| |
| </body> |
| |
| </html> |