source/data/translit/sat_Olck_sat_FONIPA.txt - external/github.com/unicode-org/icu - Git at Google

 # © 2016 and later: Unicode, Inc. and others.
 # License & terms of use: http://www.unicode.org/copyright.html#License
 #
 # File: sat_Olck_sat_FONIPA.txt
 # Generated from CLDR
 #

 # Santali (Ol Chiki) → Santali (International Phonetic Alphabet)
 # Output
 # ------
 # m mː n nː ɳ ɳː ɲ ɲː ŋ ŋː
 # p pʰ pʼ b bʰ t tʰ tʼ d dʰ ʈ ʈʰ ɖ ɖʰ c cʰ cʼ k kʰ kʼ ɡ ʔ
 # s sː h
 # d\u0361ʒ
 # ɽ r
 # l lː
 # w wː w\u0303 w\u0303ː
 #
 # i iː ĩ ĩː u uː ũ ũː
 # e eː ẽ ẽː ə əː ə\u0303 ə\u0303ː o oː õ õː
 # ɛ ɛː ɛ\u0303 ɛ\u0303ː ɔ ɔː ɔ\u0303 ɔ\u0303ː
 # a aː ã ãː
 # References
 # ----------
 # [1] Michael Everson: Final proposal to encode the Ol Chiki script
 #     in the UCS.  ISO/IEC JTC1/SC2/WG2 Working Group Document N2984R,
 #     September 21, 2005.  http://std.dkuug.dk/jtc1/sc2/wg2/docs/n2984.pdf
 #
 # [2] George L. Campbell: Compendium of the World's Languages.
 #     Volume 2: Ladakhi to Zuni. ISBN 0-415-20297-3.  Taylor & Francis, 2000.
 #     Pages 1454 to 1458.
 # Notes
 # -----
 # According to [1] (page 3), ᱽ can only follow the four ejective
 # consonants ᱵ /pʼ/, ᱡ /cʼ/, ᱫ /tʼ/, and ᱜ /kʼ/; these become
 # ᱵᱽ /b/, ᱫᱽ /d/, ᱡᱽ /d\u0361ʒ/, and ᱜᱽ /ɡ/.  In online texts, however,
 # we have occasionally encountered ᱽ following non-ejective plosives,
 # for example after ᱯ /p/. These might possibly be typos.  Our rules
 # try to be resilient and handle ᱯᱽ as /b/.
 #
 # According to [1] (page 2), U+1C7C PHAARKAA follows the four “glottal”
 # consonants ᱵ /pʼ/, ᱡ /cʼ/, ᱫ /tʼ/, and ᱜ /kʼ/ (these are actually
 # ejective, not glottal).  In online texts, however, we have frequently
 # encountered ᱼ following non-ejective consonants.
 $inword = [[:L:][:M:]];
 # Some online texts use a decomposed form of U+1C7A MU-GAAHLAA TTUDDAG.
 ᱹᱸ → ᱺ ;
 ᱸᱹ → ᱺ ;
 ::null();
 # To simplify the rules below, enforce a uniform ordering of marks.
 ᱻᱹ → ᱹᱻ ;
 ᱻᱸ → ᱸᱻ ;
 ᱻᱺ → ᱺᱻ ;
 ᱼᱹ → ᱹᱼ ;
 ᱼᱸ → ᱸᱼ ;
 ᱼᱺ → ᱺᱼ ;
 ::null();
 # Some online texts use U+1C7C PHAARKAA instead of U+1C7B RELAA for indicating
 # long phonemes, presumably because the graphemes look similar in some fonts.
 # Since phaarkaa is used for voicing ejectives and plosives (which cannot
 # be lenghtened), we rewrite phaarkaa to relaa.
 [ᱚᱟᱤᱩᱮᱳᱶᱢᱝᱞᱱ] [ᱹᱸᱺ]* {ᱼ} → ᱻ ;
 ::null();
 ᱚᱹᱻ → ɔː ;
 ᱚᱹ → ɔ ;
 ᱚᱸᱻ → ɔ\u0303ː ;
 ᱚᱸ → ɔ\u0303 ;
 ᱚᱺᱻ → ɔ\u0303ː ;
 ᱚᱺ → ɔ\u0303 ;
 ᱚᱻ → ɔː ;
 ᱚ → ɔ ;
 ᱛᱼ → t ;
 ᱛᱷ → tʰ ;
 ᱛᱽ → d ;
 $inword {ᱛ} → d ;
 ᱛ → t ;
 ᱜᱼ → kʼ ;
 ᱜᱷ → kʰ ;
 ᱜᱽ → ɡ ;
 $inword {ᱜ} → ɡ ;
 ᱜ → kʼ ;
 ᱝᱻ → ŋː ;
 ᱝ → ŋ ;
 ᱞᱻ → lː ;
 ᱞ → l ;
 ᱟᱹᱻ → əː ;
 ᱟᱹ → ə ;
 ᱟᱸᱻ → ãː ;
 ᱟᱸ → ã ;
 ᱟᱺᱻ → ə\u0303ː ;
 ᱟᱺ → ə\u0303 ;
 ᱟᱻ → aː ;
 ᱟ → a ;
 ᱠᱼ → k ;
 ᱠᱷ → kʰ ;
 ᱠᱽ → ɡ ;
 ᱠ → k ;
 ᱡᱼ → cʼ ;
 ᱡᱷ → cʰ ;
 ᱡᱽ →  d\u0361ʒ ;
 $inword {ᱡ} →  d\u0361ʒ ;
 ᱡ → cʼ ;
 ᱢᱻ → mː ;
 ᱢ → m ;
 # According to [1], ᱣ is sometimes /v/ and sometimes /w/.
 # TODO: Find out if there is a rule for this.
 ᱣᱸ → w\u0303 ;
 ᱣ → w ;
 ᱤᱹᱻ → iː ;
 ᱤᱹ → i ;
 ᱤᱸᱻ → ĩː ;
 ᱤᱸ → ĩ ;
 ᱤᱺᱻ → ĩː ;
 ᱤᱺ → ĩ ;
 ᱤᱻ → iː ;
 ᱤ → i ;
 ᱥᱻ → sː ;
 ᱥ → s ;
 # According to [1], ᱦ is sometimes /h/ and sometimes /ʔ/.
 # TODO: Find out if there is a rule for this.
 ᱦ → h ;
 ᱧᱻ → ɲː ;
 ᱧ → ɲ ;
 ᱨᱻ → r ;
 ᱨ → r ;
 ᱩᱹᱻ → uː ;
 ᱩᱹ → u ;
 ᱩᱸᱻ → ũː ;
 ᱩᱸ → ũ ;
 ᱩᱺᱻ → ũː ;
 ᱩᱺ → ũ ;
 ᱩᱻ → uː ;
 ᱩ → u ;
 ᱪᱼ → c ;
 ᱪᱷ → cʰ ;
 ᱪᱽ →  d\u0361ʒ ;
 ᱪ → c ;
 ᱫᱼ → tʼ ;
 ᱫᱷ → tʰ ;
 ᱫᱽ → d ;
 $inword {ᱫ} → d ;
 ᱫ → tʼ ;
 ᱬᱻ → ɳː ;
 ᱬ → ɳ ;
 # TODO: ᱵᱷᱭᱨᱚᱵ → bʰhrɔb seems unlikely; would be good to verify.
 ᱭ → h ;
 ᱮᱹᱻ → ɛː ;
 ᱮᱹ → ɛ ;
 ᱮᱺᱻ → ɛ\u0303ː ;
 ᱮᱺ → ɛ\u0303 ;
 ᱮᱸᱻ → ẽː ;
 ᱮᱸ → ẽ ;
 ᱮᱻ → eː ;
 ᱮ → e ;
 ᱯᱼ → p ;
 ᱯᱷ → pʰ ;
 ᱯᱽ → b ;
 ᱯ → p ;
 ᱰᱷ → ɖʰ ;
 ᱰ → ɖ ;
 ᱱᱻ → nː ;
 ᱱ → n ;
 ᱲᱻ → ɽ ;
 ᱲ → ɽ ;
 ᱳᱸᱻ → õː ;
 ᱳᱸ → õ ;
 ᱳᱻ → oː ;
 ᱳ → o ;
 ᱴᱼ → ʈ ;
 ᱴᱷ → ʈʰ ;
 ᱴᱽ → ɖ ;
 ᱴ → ʈ ;
 ᱵᱼ → pʼ ;
 ᱵᱷ → bʰ ;
 ᱵᱽ → b ;
 $inword {ᱵ} → b ;
 ᱵ → pʼ ;
 ᱶᱻ → w\u0303ː ;
 ᱶ → w\u0303 ;
	# © 2016 and later: Unicode, Inc. and others.
	# License & terms of use: http://www.unicode.org/copyright.html#License
	#
	# File: sat_Olck_sat_FONIPA.txt
	# Generated from CLDR
	#

	# Santali (Ol Chiki) → Santali (International Phonetic Alphabet)
	# Output
	# ------
	# m mː n nː ɳ ɳː ɲ ɲː ŋ ŋː
	# p pʰ pʼ b bʰ t tʰ tʼ d dʰ ʈ ʈʰ ɖ ɖʰ c cʰ cʼ k kʰ kʼ ɡ ʔ
	# s sː h
	# d\u0361ʒ
	# ɽ r
	# l lː
	# w wː w\u0303 w\u0303ː
	#
	# i iː ĩ ĩː u uː ũ ũː
	# e eː ẽ ẽː ə əː ə\u0303 ə\u0303ː o oː õ õː
	# ɛ ɛː ɛ\u0303 ɛ\u0303ː ɔ ɔː ɔ\u0303 ɔ\u0303ː
	# a aː ã ãː
	# References
	# ----------
	# [1] Michael Everson: Final proposal to encode the Ol Chiki script
	# in the UCS. ISO/IEC JTC1/SC2/WG2 Working Group Document N2984R,
	# September 21, 2005. http://std.dkuug.dk/jtc1/sc2/wg2/docs/n2984.pdf
	#
	# [2] George L. Campbell: Compendium of the World's Languages.
	# Volume 2: Ladakhi to Zuni. ISBN 0-415-20297-3. Taylor & Francis, 2000.
	# Pages 1454 to 1458.
	# Notes
	# -----
	# According to [1] (page 3), ᱽ can only follow the four ejective
	# consonants ᱵ /pʼ/, ᱡ /cʼ/, ᱫ /tʼ/, and ᱜ /kʼ/; these become
	# ᱵᱽ /b/, ᱫᱽ /d/, ᱡᱽ /d\u0361ʒ/, and ᱜᱽ /ɡ/. In online texts, however,
	# we have occasionally encountered ᱽ following non-ejective plosives,
	# for example after ᱯ /p/. These might possibly be typos. Our rules
	# try to be resilient and handle ᱯᱽ as /b/.
	#
	# According to [1] (page 2), U+1C7C PHAARKAA follows the four “glottal”
	# consonants ᱵ /pʼ/, ᱡ /cʼ/, ᱫ /tʼ/, and ᱜ /kʼ/ (these are actually
	# ejective, not glottal). In online texts, however, we have frequently
	# encountered ᱼ following non-ejective consonants.
	$inword = [[:L:][:M:]];
	# Some online texts use a decomposed form of U+1C7A MU-GAAHLAA TTUDDAG.
	ᱹᱸ → ᱺ ;
	ᱸᱹ → ᱺ ;
	::null();
	# To simplify the rules below, enforce a uniform ordering of marks.
	ᱻᱹ → ᱹᱻ ;
	ᱻᱸ → ᱸᱻ ;
	ᱻᱺ → ᱺᱻ ;
	ᱼᱹ → ᱹᱼ ;
	ᱼᱸ → ᱸᱼ ;
	ᱼᱺ → ᱺᱼ ;
	::null();
	# Some online texts use U+1C7C PHAARKAA instead of U+1C7B RELAA for indicating
	# long phonemes, presumably because the graphemes look similar in some fonts.
	# Since phaarkaa is used for voicing ejectives and plosives (which cannot
	# be lenghtened), we rewrite phaarkaa to relaa.
	[ᱚᱟᱤᱩᱮᱳᱶᱢᱝᱞᱱ] [ᱹᱸᱺ]* {ᱼ} → ᱻ ;
	::null();
	ᱚᱹᱻ → ɔː ;
	ᱚᱹ → ɔ ;
	ᱚᱸᱻ → ɔ\u0303ː ;
	ᱚᱸ → ɔ\u0303 ;
	ᱚᱺᱻ → ɔ\u0303ː ;
	ᱚᱺ → ɔ\u0303 ;
	ᱚᱻ → ɔː ;
	ᱚ → ɔ ;
	ᱛᱼ → t ;
	ᱛᱷ → tʰ ;
	ᱛᱽ → d ;
	$inword {ᱛ} → d ;
	ᱛ → t ;
	ᱜᱼ → kʼ ;
	ᱜᱷ → kʰ ;
	ᱜᱽ → ɡ ;
	$inword {ᱜ} → ɡ ;
	ᱜ → kʼ ;
	ᱝᱻ → ŋː ;
	ᱝ → ŋ ;
	ᱞᱻ → lː ;
	ᱞ → l ;
	ᱟᱹᱻ → əː ;
	ᱟᱹ → ə ;
	ᱟᱸᱻ → ãː ;
	ᱟᱸ → ã ;
	ᱟᱺᱻ → ə\u0303ː ;
	ᱟᱺ → ə\u0303 ;
	ᱟᱻ → aː ;
	ᱟ → a ;
	ᱠᱼ → k ;
	ᱠᱷ → kʰ ;
	ᱠᱽ → ɡ ;
	ᱠ → k ;
	ᱡᱼ → cʼ ;
	ᱡᱷ → cʰ ;
	ᱡᱽ → d\u0361ʒ ;
	$inword {ᱡ} → d\u0361ʒ ;
	ᱡ → cʼ ;
	ᱢᱻ → mː ;
	ᱢ → m ;
	# According to [1], ᱣ is sometimes /v/ and sometimes /w/.
	# TODO: Find out if there is a rule for this.
	ᱣᱸ → w\u0303 ;
	ᱣ → w ;
	ᱤᱹᱻ → iː ;
	ᱤᱹ → i ;
	ᱤᱸᱻ → ĩː ;
	ᱤᱸ → ĩ ;
	ᱤᱺᱻ → ĩː ;
	ᱤᱺ → ĩ ;
	ᱤᱻ → iː ;
	ᱤ → i ;
	ᱥᱻ → sː ;
	ᱥ → s ;
	# According to [1], ᱦ is sometimes /h/ and sometimes /ʔ/.
	# TODO: Find out if there is a rule for this.
	ᱦ → h ;
	ᱧᱻ → ɲː ;
	ᱧ → ɲ ;
	ᱨᱻ → r ;
	ᱨ → r ;
	ᱩᱹᱻ → uː ;
	ᱩᱹ → u ;
	ᱩᱸᱻ → ũː ;
	ᱩᱸ → ũ ;
	ᱩᱺᱻ → ũː ;
	ᱩᱺ → ũ ;
	ᱩᱻ → uː ;
	ᱩ → u ;
	ᱪᱼ → c ;
	ᱪᱷ → cʰ ;
	ᱪᱽ → d\u0361ʒ ;
	ᱪ → c ;
	ᱫᱼ → tʼ ;
	ᱫᱷ → tʰ ;
	ᱫᱽ → d ;
	$inword {ᱫ} → d ;
	ᱫ → tʼ ;
	ᱬᱻ → ɳː ;
	ᱬ → ɳ ;
	# TODO: ᱵᱷᱭᱨᱚᱵ → bʰhrɔb seems unlikely; would be good to verify.
	ᱭ → h ;
	ᱮᱹᱻ → ɛː ;
	ᱮᱹ → ɛ ;
	ᱮᱺᱻ → ɛ\u0303ː ;
	ᱮᱺ → ɛ\u0303 ;
	ᱮᱸᱻ → ẽː ;
	ᱮᱸ → ẽ ;
	ᱮᱻ → eː ;
	ᱮ → e ;
	ᱯᱼ → p ;
	ᱯᱷ → pʰ ;
	ᱯᱽ → b ;
	ᱯ → p ;
	ᱰᱷ → ɖʰ ;
	ᱰ → ɖ ;
	ᱱᱻ → nː ;
	ᱱ → n ;
	ᱲᱻ → ɽ ;
	ᱲ → ɽ ;
	ᱳᱸᱻ → õː ;
	ᱳᱸ → õ ;
	ᱳᱻ → oː ;
	ᱳ → o ;
	ᱴᱼ → ʈ ;
	ᱴᱷ → ʈʰ ;
	ᱴᱽ → ɖ ;
	ᱴ → ʈ ;
	ᱵᱼ → pʼ ;
	ᱵᱷ → bʰ ;
	ᱵᱽ → b ;
	$inword {ᱵ} → b ;
	ᱵ → pʼ ;
	ᱶᱻ → w\u0303ː ;
	ᱶ → w\u0303 ;