152 lines
		
	
	
		
			3.7 KiB
		
	
	
	
		
			XML
		
	
	
	
			
		
		
	
	
			152 lines
		
	
	
		
			3.7 KiB
		
	
	
	
		
			XML
		
	
	
	
| <?xml version="1.0" encoding="UTF-8" ?>
 | ||
| <!DOCTYPE supplementalData SYSTEM "../../common/dtd/ldmlSupplemental.dtd">
 | ||
| <!--
 | ||
| Copyright © 1991-2016 Unicode, Inc.
 | ||
| CLDR data files are interpreted according to the LDML specification (http://unicode.org/reports/tr35/)
 | ||
| For terms of use, see http://www.unicode.org/copyright.html
 | ||
| -->
 | ||
| <supplementalData>
 | ||
| 	<version number="$Revision$"/>
 | ||
| 	<transforms>
 | ||
| 		<transform source="und_FONIPA" target="ar" direction="forward" alias="ar-t-und-fonipa">
 | ||
| 		  <tRule><![CDATA[
 | ||
| # Vowels
 | ||
| # ------
 | ||
| # In these rules, we produce ي و ا both for short and for long vowels.
 | ||
| # This would be wrong for writing Arabic, but when transliterating
 | ||
| # foreign words and names, it is strongly preferred to vowel marks.
 | ||
| # However, we emit short schwa [ə] and a few other, schwa-like vowels.
 | ||
| 
 | ||
| $IVowel = [i ɪ e {e̞}];
 | ||
| $UVowel = [y {ɨ} {ʉ} ɯ u ʏ {ɪ̈} {ʊ̈} {ɯ̽} {ʊ} ø ɤ o {ø̞} {ɤ̞} {o̞} ɞ ɔ w {w̥} ʍ ʷ];
 | ||
| $AVowel = [ɛ œ ɜ ʌ æ ɐ a ɶ {ä} {ɒ̈} ɑ ɒ];
 | ||
| $SchwaVowel = [ɘ ɵ ə {ɵ̞}];
 | ||
| $Vowel = [$IVowel $UVowel $AVowel $SchwaVowel];
 | ||
| $Click = [ʘ ɋ ǀ ʇ ǃ ʗ ǂ ʄ ǁ ʖ];
 | ||
| $Boundary =  [^[:L:][:M:][:N:]];
 | ||
| 
 | ||
| ::NFD;
 | ||
| [ʰ ʱ ʼ  ̃  ̰  ̋  ́  ̄  ̀  ̏  ̌  ̂ ˥ ˦ ˧ ˨ ˩ ꜜ ꜛ ↗ ↘  ͡  ͜  ̯] → ;
 | ||
| ʲ → j;
 | ||
| ᵐ → m;
 | ||
| ⁿ → n;
 | ||
| ᵑ → ŋ;
 | ||
| ::NFC;
 | ||
| 
 | ||
| # TODO: Diphthongs probably need more work.
 | ||
| 
 | ||
| # Romanian [sekujesk] → [sekujask], for emitting سيكوياسك not سيكويسك
 | ||
| $UVowel [j $IVowel] [e {e̞} $SchwaVowel] → uia;
 | ||
| 
 | ||
| # Kazakh Аягөз [ɑjɑɡy̯ʉz] → [ɑjɑɡiuz], to emit TODO
 | ||
| yʉ → iu;
 | ||
| 
 | ||
| ::NULL;
 | ||
| 
 | ||
| # Vowels
 | ||
| $Boundary {ʔ? $IVowel ː} → إِي;
 | ||
| $Boundary {ʔ? $IVowel} → إِ;
 | ||
| {$IVowel ʔ} $Boundary → ئ;
 | ||
| {$IVowel ː ʔ} $Boundary → يء;
 | ||
| {$IVowel ː ʔ} [$Vowel] → ئ;
 | ||
| $IVowel ː? → ي;
 | ||
| 
 | ||
| $Boundary {ʔ? $UVowel ː} → أو;
 | ||
| $Boundary {ʔ? $UVowel} → أ;
 | ||
| {$UVowel ʔ} $Boundary → ؤ;
 | ||
| {$UVowel ː ʔ} $Boundary → وء;
 | ||
| $UVowel ː? → و;
 | ||
| 
 | ||
| $Boundary {ʔ? $AVowel ː} → آ;
 | ||
| $Boundary {ʔ? $AVowel} → أ;
 | ||
| {$AVowel ʔ} $Boundary → أ;
 | ||
| {$AVowel ː ʔ} $Boundary → اء;
 | ||
| $AVowel ː? ʔ $AVowel ː? → اءا;
 | ||
| $AVowel ː? → ا;
 | ||
| 
 | ||
| $Boundary {ʔ? $SchwaVowel ː} → إِي;
 | ||
| $Boundary {ʔ? $SchwaVowel} → أ;
 | ||
| $SchwaVowel ː → ي;
 | ||
| $SchwaVowel → ;
 | ||
| 
 | ||
| # TODO: Handle glottal stop.
 | ||
| ʔ → ;
 | ||
| 
 | ||
| # Shadda for long (geminated) consonants
 | ||
| ː → ّ;
 | ||
| 
 | ||
| # Affricates
 | ||
| [{t͡ʃ} ʧ] → تْش;
 | ||
| 
 | ||
| # Clicks
 | ||
| [ɡ g ɠ k] $Click → كْش;
 | ||
| $Click → تْش;
 | ||
| 
 | ||
| # Nasal stops
 | ||
| [{m̥} m ɱ] → م;
 | ||
| [{n̼̊} {n̼} {n̥} n {ɳ̊} ɳ {ɲ̊} {ɲ̥} ɲ] → ن;
 | ||
| [{ŋ̊} ŋ {ɴ̥} ɴ] k → نك;
 | ||
| [{ŋ̊} ŋ {ɴ̥} ɴ] [ɡ g ɠ]? → نْغ;
 | ||
| 
 | ||
| # Non-nasal stops
 | ||
| [p b {p̪} {b̪} ɓ] → ب;
 | ||
| [{d̼} d ɗ ᶑ] → د;
 | ||
| [{t̼} t] → ت;
 | ||
| [ʈ] → ط;
 | ||
| [ɖ] → ض;
 | ||
| c → تْش;
 | ||
| ɟ → دج;
 | ||
| k → ك;
 | ||
| [ɡ g ɠ] → غ;
 | ||
| [q ɢ ʡ ʛ] → ق;
 | ||
| 
 | ||
| # Sibilant fricatives
 | ||
| s → س;
 | ||
| z → ز;
 | ||
| [ʃ ʂ ɕ ʄ] → ش;
 | ||
| [ʒ ʐ ʑ] → ج;
 | ||
| 
 | ||
| # Non-sibilant fricatives
 | ||
| [ɸ f v] → ف;
 | ||
| β → ب;
 | ||
| [{θ̼} θ {θ̱}] → ث;
 | ||
| [{ð̼} ð {ð̠}] → ذ;
 | ||
| ç → ش;
 | ||
| ʝ $IVowel? ː? → ي;
 | ||
| [x χ] → خ;
 | ||
| [ɣ ʁ] → غ;
 | ||
| ħ → ح;
 | ||
| ʕ → ع;
 | ||
| [h ɦ {ʔ̞}] → ه;
 | ||
| 
 | ||
| # Approximants, trills, flaps
 | ||
| ʋ → و;
 | ||
| ʙ → بر;
 | ||
| {r̝} → رش;
 | ||
| [{ɹ̥} {ɹ} {ɻ̊} {ɻ} {ɾ̥} ɾ {ɽ̊} ɽ {r̼} {r̥} r] → ر;
 | ||
| [{ʀ̥} ʀ] → غ;
 | ||
| ʜ → ح;
 | ||
| ʢ → ع;
 | ||
| j $IVowel? ː? → ي;
 | ||
| 
 | ||
| # Laterals
 | ||
| ɬ → شْل;
 | ||
| ɮ → جْل;
 | ||
| {[{ʎ̥} ʎ]} [^ $IVowel j ʝ] → لي;
 | ||
| [{l̼} {l̥} l {ɭ̊} ɭ {ʎ̥} ʎ] → ل;
 | ||
| [ʟ {ʟ̠}] → غ;
 | ||
| 
 | ||
| # Independent pass for misc cleanup.
 | ||
| ::NULL;
 | ||
| 
 | ||
| # Strip off syllable markers
 | ||
| \. → ;  
 | ||
| 
 | ||
| # Sequences of three or more ووو look very confusing; we shorten them.
 | ||
| # Polish Darłowo [darwɔvɔ] → داروو → داروووو
 | ||
| ووو+ → وو;
 | ||
| 			]]></tRule>
 | ||
| 		</transform>
 | ||
| 	</transforms>
 | ||
| </supplementalData>
 |