blob: 4e0635cc0a0d0f34e45b748ca2d59dcf5d29bd2b [file] [log] [blame]
#--------------------------------------------------------------------
# Copyright (c) 1999-2004, International Business Machines
# Corporation and others. All Rights Reserved.
#--------------------------------------------------------------------
# Transliteration table for Hebrew
# Based on the UNGEGN table at:
# http://www.eki.ee/wgrs/rom1_he.pdf
#
# Exceptions:
# - Accents are added to disambiguate letters
# - Combinations of dagesh, shin/sin dot that produce different
# letters are not yet encoded.
#
# To test, open:
# http://oss.software.ibm.com/cgi-bin/icu/tr
# Click Edit, paste in this file, Save As hebrew-latin/XXX
# (where XXX is a username)
# Now go back to the main window, and try it out.
# Use hebrew-latin/XXX for Output 1, and (Inverse) for Output 2
# Paste in hebrew text in Input, and hit Transliterate.
#
# For more information, see"
# http://oss.software.ibm.com/icu/userguide/Transliteration.html
:: [[:Hebrew:] [:^ccc=0:] [\u05B0-\u05B9\u05BB-\u05BC\u05C1-\u05C2\u2135-\u2138̄\u05BF] - [\u05BD]] ;
:: nfkd (nfc) ;
$letterAfter = [:M:]* [:L:] ;
# move longer items here to avoid masking
ח <> ẖ ;
צ <> ẕ } $letterAfter;
ץ <> ẕ ;
ש <> ş ;
ת <> ţ ;
א <> ʼ ;
ב <> b ;
ג <> g ;
ד <> d ;
ה <> h ;
ו <> w ;
ז <> z ;
ט <> t ;
י <> y ;
כ <> k } $letterAfter;
ך <> k ;
ל <> l ;
מ <> m } $letterAfter;
ם <> m ;
נ <> n } $letterAfter;
ן <> n ;
ס <> s ;
ע <> ʻ ;
פ <> p } $letterAfter;
ף <> p ;
ק <> q ;
ר <> r ;
װ > | וו; # HEBREW LIGATURE YIDDISH DOUBLE VAV
ױ > | וי; # HEBREW LIGATURE YIDDISH VAV YOD
ײ > | יי ; # HEBREW LIGATURE YIDDISH DOUBLE YOD
ּ <> ̇ ; # dagesh just goes to overdot for now
ׁ <> ̌ ; # shin dot -> sh
ׂ <> ̂ ; # sin dot -> s
# points
$above = [^[:ccc=0:][:ccc=230:]]*;
‎ֲ‎ > à ;
‎ֲ‎ $1< a ($above) ̀;
‎ָ‎ > á ;
‎ָ‎ $1 < a ($above) ́;
‎ֱ‎ > è ;
‎ֱ‎ $1 < e ($above) ̀;
‎ֵ‎ > é ;
‎ֵ‎ $1 < e ($above) ́;
‎ְ‎ > e ̆ ;
‎ְ‎ $1 < e ($above) ̆;
‎ֹ‎ > ò ;
‎ֹ‎ $1 < o ($above) ̀;
ִ <> i ;
ֻ <> u ;
ַ <> a ;
ֶ <> e ;
ֳ <> o ;
\u05BF <> ̄ ;
# fallbacks
ק < c ;
פ < f } $letterAfter;
ף < f ;
ז < j ;
ו < v ;
כס < x ;
:: (lower);
:: nfc (nfd) ;
:: ([[:Latin:] [:^ccc=0:] [\u02BB-\u02BC\u0300-\u0302\u0307\u030C\u0327\u0331\u0340-\u0341 ̄ ]]);