229 lines
		
	
	
		
			4.8 KiB
		
	
	
	
		
			XML
		
	
	
	
			
		
		
	
	
			229 lines
		
	
	
		
			4.8 KiB
		
	
	
	
		
			XML
		
	
	
	
| <?xml version="1.0" encoding="UTF-8" ?>
 | ||
| <!DOCTYPE supplementalData SYSTEM "../../common/dtd/ldmlSupplemental.dtd">
 | ||
| <!--
 | ||
| Copyright © 1991-2015 Unicode, Inc.
 | ||
| CLDR data files are interpreted according to the LDML specification (http://unicode.org/reports/tr35/)
 | ||
| For terms of use, see http://www.unicode.org/copyright.html
 | ||
| -->
 | ||
| <supplementalData>
 | ||
| 	<version number="$Revision$"/>
 | ||
| 	<transforms>
 | ||
| 		<transform source="sat_Olck" target="sat_FONIPA" direction="forward" alias="sat-fonipa-t-sat-olck">
 | ||
| 			<tRule><![CDATA[
 | ||
| # Santali (Ol Chiki) → Santali (International Phonetic Alphabet)
 | ||
| 
 | ||
| 
 | ||
| # Output
 | ||
| # ------
 | ||
| # m mː n nː ɳ ɳː ɲ ɲː ŋ ŋː
 | ||
| # p pʰ pʼ b bʰ t tʰ tʼ d dʰ ʈ ʈʰ ɖ ɖʰ c cʰ cʼ k kʰ kʼ ɡ ʔ
 | ||
| # s sː h
 | ||
| # d͡ʒ
 | ||
| # ɽ r
 | ||
| # l lː
 | ||
| # w wː w̃ w̃ː
 | ||
| #
 | ||
| # i iː ĩ ĩː u uː ũ ũː
 | ||
| # e eː ẽ ẽː ə əː ə̃ ə̃ː o oː õ õː
 | ||
| # ɛ ɛː ɛ̃ ɛ̃ː ɔ ɔː ɔ̃ ɔ̃ː
 | ||
| # a aː ã ãː
 | ||
| 
 | ||
| 
 | ||
| # References
 | ||
| # ----------
 | ||
| # [1] Michael Everson: Final proposal to encode the Ol Chiki script
 | ||
| #     in the UCS.  ISO/IEC JTC1/SC2/WG2 Working Group Document N2984R,
 | ||
| #     September 21, 2005.  http://std.dkuug.dk/jtc1/sc2/wg2/docs/n2984.pdf
 | ||
| #
 | ||
| # [2] George L. Campbell: Compendium of the World's Languages.
 | ||
| #     Volume 2: Ladakhi to Zuni. ISBN 0-415-20297-3.  Taylor & Francis, 2000.
 | ||
| #     Pages 1454 to 1458.
 | ||
| 
 | ||
| 
 | ||
| # Notes
 | ||
| # -----
 | ||
| # According to [1] (page 3), ᱽ can only follow the four ejective
 | ||
| # consonants ᱵ /pʼ/, ᱡ /cʼ/, ᱫ /tʼ/, and ᱜ /kʼ/; these become
 | ||
| # ᱵᱽ /b/, ᱫᱽ /d/, ᱡᱽ /d͡ʒ/, and ᱜᱽ /ɡ/.  In online texts, however,
 | ||
| # we have occasionally encountered ᱽ following non-ejective plosives,
 | ||
| # for example after ᱯ /p/. These might possibly be typos.  Our rules
 | ||
| # try to be resilient and handle ᱯᱽ as /b/.
 | ||
| #
 | ||
| # According to [1] (page 2), U+1C7C PHAARKAA follows the four “glottal”
 | ||
| # consonants ᱵ /pʼ/, ᱡ /cʼ/, ᱫ /tʼ/, and ᱜ /kʼ/ (these are actually
 | ||
| # ejective, not glottal).  In online texts, however, we have frequently
 | ||
| # encountered ᱼ following non-ejective consonants.
 | ||
| 
 | ||
| $inword = [[:L:][:M:]];
 | ||
| 
 | ||
| # Some online texts use a decomposed form of U+1C7A MU-GAAHLAA TTUDDAG.
 | ||
| ᱹᱸ → ᱺ ;
 | ||
| ᱸᱹ → ᱺ ;
 | ||
| ::null();
 | ||
| 
 | ||
| # To simplify the rules below, enforce a uniform ordering of marks.
 | ||
| ᱻᱹ → ᱹᱻ ;
 | ||
| ᱻᱸ → ᱸᱻ ;
 | ||
| ᱻᱺ → ᱺᱻ ;
 | ||
| ᱼᱹ → ᱹᱼ ;
 | ||
| ᱼᱸ → ᱸᱼ ;
 | ||
| ᱼᱺ → ᱺᱼ ;
 | ||
| ::null();
 | ||
| 
 | ||
| # Some online texts use U+1C7C PHAARKAA instead of U+1C7B RELAA for indicating
 | ||
| # long phonemes, presumably because the graphemes look similar in some fonts.
 | ||
| # Since phaarkaa is used for voicing ejectives and plosives (which cannot
 | ||
| # be lengthened), we rewrite phaarkaa to relaa.
 | ||
| [ᱚᱟᱤᱩᱮᱳᱶᱢᱝᱞᱱ] [ᱹᱸᱺ]* {ᱼ} → ᱻ ;
 | ||
| ::null();
 | ||
| 
 | ||
| ᱚᱹᱻ → ɔː ;
 | ||
| ᱚᱹ → ɔ ;
 | ||
| ᱚᱸᱻ → ɔ̃ː ;
 | ||
| ᱚᱸ → ɔ̃ ;
 | ||
| ᱚᱺᱻ → ɔ̃ː ;
 | ||
| ᱚᱺ → ɔ̃ ;
 | ||
| ᱚᱻ → ɔː ;
 | ||
| ᱚ → ɔ ;
 | ||
| 
 | ||
| ᱛᱼ → t ;
 | ||
| ᱛᱷ → tʰ ;
 | ||
| ᱛᱽ → d ;
 | ||
| $inword {ᱛ} → d ;
 | ||
| ᱛ → t ;
 | ||
| 
 | ||
| ᱜᱼ → kʼ ;
 | ||
| ᱜᱷ → kʰ ;
 | ||
| ᱜᱽ → ɡ ;
 | ||
| $inword {ᱜ} → ɡ ;
 | ||
| ᱜ → kʼ ;
 | ||
| 
 | ||
| ᱝᱻ → ŋː ;
 | ||
| ᱝ → ŋ ;
 | ||
| 
 | ||
| ᱞᱻ → lː ;
 | ||
| ᱞ → l ;
 | ||
| 
 | ||
| ᱟᱹᱻ → əː ;
 | ||
| ᱟᱹ → ə ;
 | ||
| ᱟᱸᱻ → ãː ;
 | ||
| ᱟᱸ → ã ;
 | ||
| ᱟᱺᱻ → ə̃ː ;
 | ||
| ᱟᱺ → ə̃ ;
 | ||
| ᱟᱻ → aː ;
 | ||
| ᱟ → a ;
 | ||
| 
 | ||
| ᱠᱼ → k ;
 | ||
| ᱠᱷ → kʰ ;
 | ||
| ᱠᱽ → ɡ ;
 | ||
| ᱠ → k ;
 | ||
| 
 | ||
| ᱡᱼ → cʼ ;
 | ||
| ᱡᱷ → cʰ ;
 | ||
| ᱡᱽ →  d͡ʒ ;
 | ||
| $inword {ᱡ} →  d͡ʒ ;
 | ||
| ᱡ → cʼ ;
 | ||
| 
 | ||
| ᱢᱻ → mː ;
 | ||
| ᱢ → m ;
 | ||
| 
 | ||
| # According to [1], ᱣ is sometimes /v/ and sometimes /w/.
 | ||
| # TODO: Find out if there is a rule for this.
 | ||
| ᱣᱸ → w̃ ;
 | ||
| ᱣ → w ;
 | ||
| 
 | ||
| ᱤᱹᱻ → iː ;
 | ||
| ᱤᱹ → i ;
 | ||
| ᱤᱸᱻ → ĩː ;
 | ||
| ᱤᱸ → ĩ ;
 | ||
| ᱤᱺᱻ → ĩː ;
 | ||
| ᱤᱺ → ĩ ;
 | ||
| ᱤᱻ → iː ;
 | ||
| ᱤ → i ;
 | ||
| 
 | ||
| ᱥᱻ → sː ;
 | ||
| ᱥ → s ;
 | ||
| 
 | ||
| # According to [1], ᱦ is sometimes /h/ and sometimes /ʔ/.
 | ||
| # TODO: Find out if there is a rule for this.
 | ||
| ᱦ → h ;
 | ||
| 
 | ||
| ᱧᱻ → ɲː ;
 | ||
| ᱧ → ɲ ;
 | ||
| 
 | ||
| ᱨᱻ → r ;
 | ||
| ᱨ → r ;
 | ||
| 
 | ||
| ᱩᱹᱻ → uː ;
 | ||
| ᱩᱹ → u ;
 | ||
| ᱩᱸᱻ → ũː ;
 | ||
| ᱩᱸ → ũ ;
 | ||
| ᱩᱺᱻ → ũː ;
 | ||
| ᱩᱺ → ũ ;
 | ||
| ᱩᱻ → uː ;
 | ||
| ᱩ → u ;
 | ||
| 
 | ||
| ᱪᱼ → c ;
 | ||
| ᱪᱷ → cʰ ;
 | ||
| ᱪᱽ →  d͡ʒ ;
 | ||
| ᱪ → c ;
 | ||
| 
 | ||
| ᱫᱼ → tʼ ;
 | ||
| ᱫᱷ → tʰ ;
 | ||
| ᱫᱽ → d ;
 | ||
| $inword {ᱫ} → d ;
 | ||
| ᱫ → tʼ ;
 | ||
| 
 | ||
| ᱬᱻ → ɳː ;
 | ||
| ᱬ → ɳ ;
 | ||
| 
 | ||
| # TODO: ᱵᱷᱭᱨᱚᱵ → bʰhrɔb seems unlikely; would be good to verify.
 | ||
| ᱭ → h ;
 | ||
| 
 | ||
| ᱮᱹᱻ → ɛː ;
 | ||
| ᱮᱹ → ɛ ;
 | ||
| ᱮᱺᱻ → ɛ̃ː ;
 | ||
| ᱮᱺ → ɛ̃ ;
 | ||
| ᱮᱸᱻ → ẽː ;
 | ||
| ᱮᱸ → ẽ ;
 | ||
| ᱮᱻ → eː ;
 | ||
| ᱮ → e ;
 | ||
| 
 | ||
| ᱯᱼ → p ;
 | ||
| ᱯᱷ → pʰ ;
 | ||
| ᱯᱽ → b ;
 | ||
| ᱯ → p ;
 | ||
| 
 | ||
| ᱰᱷ → ɖʰ ;
 | ||
| ᱰ → ɖ ;
 | ||
| 
 | ||
| ᱱᱻ → nː ;
 | ||
| ᱱ → n ;
 | ||
| 
 | ||
| ᱲᱻ → ɽ ;
 | ||
| ᱲ → ɽ ;
 | ||
| 
 | ||
| ᱳᱸᱻ → õː ;
 | ||
| ᱳᱸ → õ ;
 | ||
| ᱳᱻ → oː ;
 | ||
| ᱳ → o ;
 | ||
| 
 | ||
| ᱴᱼ → ʈ ;
 | ||
| ᱴᱷ → ʈʰ ;
 | ||
| ᱴᱽ → ɖ ;
 | ||
| ᱴ → ʈ ;
 | ||
| 
 | ||
| ᱵᱼ → pʼ ;
 | ||
| ᱵᱷ → bʰ ;
 | ||
| ᱵᱽ → b ;
 | ||
| $inword {ᱵ} → b ;
 | ||
| ᱵ → pʼ ;
 | ||
| 
 | ||
| ᱶᱻ → w̃ː ;
 | ||
| ᱶ → w̃ ;
 | ||
| 
 | ||
| 			]]></tRule>
 | ||
| 		</transform>
 | ||
| 	</transforms>
 | ||
| </supplementalData>
 |