| // -*- Coding: utf-8; -*- |
| //-------------------------------------------------------------------- |
| // Copyright (c) 1999-2002, International Business Machines |
| // Corporation and others. All Rights Reserved. |
| //-------------------------------------------------------------------- |
| // THIS IS A MACHINE-GENERATED FILE |
| // Tool: dumpicurules.bat |
| // Source: ../../../impl/data/Transliterator_Arabic_Latin.txt |
| // Date: Sat Jul 27 10:31:01 2002 |
| //-------------------------------------------------------------------- |
| |
| // Arabic_Latin |
| |
| t_Arab_Latn { |
| Rule { |
| //-------------------------------------------------------------------- |
| //-------------------------------------------------------------------- |
| //-------------------------------------------------------------------- |
| |
| // Generally follows UNGEGN <http://www.eki.ee/wgrs/rom1_ar.pdf> |
| // Occasionally deviates in the direction of ISO 233 <http://homepage.mac.com/sirbinks/pdf/Arabic.pdf> |
| // a) where required for disambiguation. |
| // b) with underdot instead of cedilla for letter like SAD, since |
| // those are explicitly in Unicode for transliteration. |
| // c) with extra non-Arabic-language letters, like PEH |
| |
| // Does *not* do assimilation of "al", nor hyphenation. |
| // While it could be done, we need to determine whether a prefix "al" could |
| // occur other than as the definite article (since no space is used). |
| |
| ":: [[:Arabic:] [ⁿ\u060C\u061B\u061F\u0640\u064B-\u0655\u0660-\u066C\u06F0-\u06F9\uFDFC]] ;" |
| ":: NFKD (NFC);" |
| "$disambig = ̱ ;" |
| "$disambig2 = ̰ ;" |
| "$under = ̣ ;" |
| |
| "$notAbove = [[:^ccc=0:]&[:^ccc=230:]];" |
| |
| // non-letters |
| |
| "٫ <> '.' $disambig ;" // ARABIC DECIMAL SEPARATOR |
| "٬ <> ',' $disambig ;" // ARABIC THOUSANDS SEPARATOR |
| // ٭ <> ; # ARABIC FIVE POINTED STAR // no need to transliterate |
| |
| "، <> ',' ;" // ARABIC COMMA |
| "؛ <> ';' ;" // ARABIC SEMICOLON |
| "؟ <> '?' ;" // ARABIC QUESTION MARK |
| "٪ <> '%' ;" // ARABIC PERCENT SIGN |
| |
| "۰ <> 0 $disambig ;" // EXTENDED ARABIC-INDIC DIGIT ZERO |
| "۱ <> 1 $disambig ;" // EXTENDED ARABIC-INDIC DIGIT ONE |
| "۲ <> 2 $disambig ;" // EXTENDED ARABIC-INDIC DIGIT TWO |
| "۳ <> 3 $disambig ;" // EXTENDED ARABIC-INDIC DIGIT THREE |
| "۴ <> 4 $disambig ;" // EXTENDED ARABIC-INDIC DIGIT FOUR |
| "۵ <> 5 $disambig ;" // EXTENDED ARABIC-INDIC DIGIT FIVE |
| "۶ <> 6 $disambig ;" // EXTENDED ARABIC-INDIC DIGIT SIX |
| "۷ <> 7 $disambig ;" // EXTENDED ARABIC-INDIC DIGIT SEVEN |
| "۸ <> 8 $disambig ;" // EXTENDED ARABIC-INDIC DIGIT EIGHT |
| "۹ <> 9 $disambig ;" // EXTENDED ARABIC-INDIC DIGIT NINE |
| |
| "٠ <> 0 ;" // ARABIC-INDIC DIGIT ZERO |
| "١ <> 1 ;" // ARABIC-INDIC DIGIT ONE |
| "٢ <> 2 ;" // ARABIC-INDIC DIGIT TWO |
| "٣ <> 3 ;" // ARABIC-INDIC DIGIT THREE |
| "٤ <> 4 ;" // ARABIC-INDIC DIGIT FOUR |
| "٥ <> 5 ;" // ARABIC-INDIC DIGIT FIVE |
| "٦ <> 6 ;" // ARABIC-INDIC DIGIT SIX |
| "٧ <> 7 ;" // ARABIC-INDIC DIGIT SEVEN |
| "٨ <> 8 ;" // ARABIC-INDIC DIGIT EIGHT |
| "٩ <> 9 ;" // ARABIC-INDIC DIGIT NINE |
| |
| // letters |
| |
| // long vowels |
| "َا<> ā ;" // ARABIC FATHA, ARABIC LETTER ALEF |
| "ُو <> ū ;" // ARABIC DAMMA, ARABIC LETTER WAW |
| "ِي <> ī ;" // ARABIC KASRA, ARABIC LETTER YEH |
| |
| // longer items moved here to prevent masking |
| "ث <> t h $disambig ;" // ARABIC LETTER THEH |
| "ذ <> d h $disambig ;" // ARABIC LETTER THAL |
| "ش <> s h $disambig ;" // ARABIC LETTER SHEEN |
| "ص <> s $under ;" // ARABIC LETTER SAD |
| "ض <> d $under ;" // ARABIC LETTER DAD |
| "ط <> t $under ;" // ARABIC LETTER TAH |
| "ظ <> z $under ;" // ARABIC LETTER ZAH |
| "غ <> g h $disambig ;" // ARABIC LETTER GHAIN |
| |
| // WARNING: special case |
| // <t, umlaut, half-ring below> will be canonically ordered as <t, half-ring below, umlaut> |
| // so on the return, we have to skip over (but preserve) the half-ring below (or others like it) |
| // ةٕ < ẗ̹ ; # LATIN SMALL LETTER T, COMBINING RIGHT HALF RING BELOW, COMBINING DIAERESIS |
| |
| "ة <> t \u0308 ;" // ARABIC LETTER TEH MARBUTA |
| "ة | $1 < t ($notAbove+) \u0308 ;" // ARABIC LETTER TEH MARBUTA |
| |
| // non-Arabic language |
| "ژ <> z h $disambig ;" // ARABIC LETTER JEH |
| "ڭ <> n $disambig g ;" // ARABIC LETTER NG |
| "ۋ <> v $disambig ;" // ARABIC LETTER VE |
| "ی <> y $disambig2 ;" // ARABIC LETTER FARSI YEH |
| |
| // Arabic language |
| |
| "ء <> ʾ ;" // ARABIC LETTER HAMZA |
| "ا <> a $under;" // ARABIC LETTER ALEF |
| "ب <> b ;" // ARABIC LETTER BEH |
| "ت <> t ;" // ARABIC LETTER TEH |
| "ج <> j ;" // ARABIC LETTER JEEM |
| "ح <> h $under ;" // ARABIC LETTER HAH |
| "خ <> k h $disambig ;" // ARABIC LETTER KHAH |
| "د <> d ;" // ARABIC LETTER DAL |
| "ر <> r ;" // ARABIC LETTER REH |
| "ز <> z ;" // ARABIC LETTER ZAIN |
| "س <> s ;" // ARABIC LETTER SEEN |
| "ع <> ʿ ;" // ARABIC LETTER AIN |
| "ـ > ;" // ARABIC TATWEEL |
| "ف <> f ;" // ARABIC LETTER FEH |
| "ق <> q ;" // ARABIC LETTER QAF |
| "ك <> k ;" // ARABIC LETTER KAF |
| "ل <> l ;" // ARABIC LETTER LAM |
| "م <> m ;" // ARABIC LETTER MEEM |
| "ن <> n ;" // ARABIC LETTER NOON |
| "ه <> h ;" // ARABIC LETTER HEH |
| "و <> w ;" // ARABIC LETTER WAW |
| "ى <> y $disambig ;" // ARABIC LETTER ALEF MAKSURA |
| "ي <> y ;" // ARABIC LETTER YEH |
| "ً <> aⁿ ;" // ARABIC FATHATAN |
| "ٌ <> uⁿ ;" // ARABIC DAMMATAN |
| "ٍ <> iⁿ ;" // ARABIC KASRATAN |
| "َ <> a ;" // ARABIC FATHA |
| "ُ <> u ;" // ARABIC DAMMA |
| "ِ <> i ;" // ARABIC KASRA |
| "ّ <> ̃ ;" // ARABIC SHADDA |
| "ْ <> ̊ ;" // ARABIC SUKUN |
| |
| // special combining marks |
| "ٓ <> ̂ ;" // ARABIC MADDAH ABOVE |
| "ٔ <> ̉ ;" // ARABIC HAMZA ABOVE |
| "ٕ <> ̹ ;" // ARABIC HAMZA BELOW |
| |
| // Some non-Arabic language (not in UNGEGN) |
| "پ <> p ;" // ARABIC LETTER PEH |
| "چ <> c h $disambig ;" // ARABIC LETTER TCHEH |
| "ڤ <> v ;" // ARABIC LETTER VEH |
| // ڥ <> v $disambig ; # ARABIC LETTER FEH WITH THREE DOTS BELOW |
| // ڢ <> f $disambig ; # ARABIC LETTER FEH WITH DOT MOVED BELOW |
| "گ <> g ;" // ARABIC LETTER GAF |
| |
| // fallbacks |
| "| s < c } [eiy];" |
| "| k < c ;" |
| "| i < e ;" |
| "| u < o ;" |
| "| ks < x ;" |
| "| n < ⁿ;" |
| |
| ":: (lower) ;" |
| "::NFC (NFD);" |
| ":: ( [[:Latin:] [%,.0-9;?\u02BE-\u02BF\u0302-\u0304\u0308-\u030A\u0323\u0330-\u0331\u0339\u037E]] );" |
| } |
| } |