148 lines
3.7 KiB
XML
148 lines
3.7 KiB
XML
<?xml version="1.0" encoding="UTF-8" ?>
|
||
<!DOCTYPE supplementalData SYSTEM "../../common/dtd/ldmlSupplemental.dtd">
|
||
<!--
|
||
Copyright © 1991-2016 Unicode, Inc.
|
||
CLDR data files are interpreted according to the LDML specification (http://unicode.org/reports/tr35/)
|
||
For terms of use, see http://www.unicode.org/copyright.html
|
||
-->
|
||
<supplementalData>
|
||
<version number="$Revision$"/>
|
||
<transforms>
|
||
<transform source="und_FONIPA" target="fa" direction="forward" alias="fa-t-und-fonipa">
|
||
<tRule><![CDATA[
|
||
# Vowels
|
||
# ------
|
||
# In these rules, we produce ی و ا both for short and for long vowels.
|
||
# This would be wrong for writing Farsi or Arabic, but when transliterating
|
||
# foreign words and names, it is strongly preferred to vowel marks.
|
||
# Short schwa [ə] and a few other, schwa-like vowels get omitted entirely
|
||
# unless at the end of the word, in which case we emit ه whose Farsi
|
||
# word-final pronunciation comes close to [ə]. At the beginning of words,
|
||
# Farsi speakers prefer to see آ for [ɑ] and a few other similar-sounding
|
||
# dark vowels; note that this use of آ is quite different from Arabic.
|
||
|
||
$IVowel = [i ɪ e {e̞}];
|
||
$UVowel = [y {ɨ} {ʉ} ɯ u ʏ {ɪ̈} {ʊ̈} {ɯ̽} {ʊ} ø ɤ o {ø̞} {ɤ̞} {o̞} ɔ w {w̥} ʍ ʷ];
|
||
$AVowel = [ɛ œ ɜ æ ɶ];
|
||
$DarkAVowel = [ʌ a ɑ ɒ ɐ ɞ {ä} {ɒ̈}]; # آ instead of ا at beginning of words
|
||
$SchwaVowel = [ɘ ɵ ə {ɵ̞}];
|
||
$Click = [ʘ ɋ ǀ ʇ ǃ ʗ ǂ ʄ ǁ ʖ];
|
||
$Boundary = [^[:L:][:M:][:N:]];
|
||
|
||
::NFD;
|
||
[ʰ ʱ ʼ ̃ ̰ ̋ ́ ̄ ̀ ̏ ̌ ̂ ˥ ˦ ˧ ˨ ˩ ꜜ ꜛ ↗ ↘ ͡ ͜ ̯] → ;
|
||
ʲ → j;
|
||
ᵐ → m;
|
||
ⁿ → n;
|
||
ᵑ → ŋ;
|
||
::NFC;
|
||
|
||
# TODO: Diphthongs probably need more work.
|
||
|
||
# Romanian [sekujesk] → [sekujask], for emitting سیکویاسک not سیکویسک
|
||
$UVowel [j $IVowel] [e {e̞} $SchwaVowel] → uia;
|
||
|
||
# Kazakh Аягөз [ɑjɑɡy̯ʉz] → [ɑjɑɡiuz], to emit آیاگیوز not آیاگووز
|
||
yʉ → iu;
|
||
|
||
::NULL;
|
||
|
||
# Vowels
|
||
$Boundary {$SchwaVowel ː?} → ای;
|
||
$SchwaVowel ː → ی;
|
||
{[$SchwaVowel e {e̞}]} [^[:L:][:M:][:N:][\.]] → ه;
|
||
$SchwaVowel → ;
|
||
|
||
$Boundary {$IVowel ː?} → ای;
|
||
$IVowel ː? j? → ی;
|
||
|
||
$Boundary {$UVowel ː?} → او;
|
||
$UVowel ː? → و;
|
||
|
||
$Boundary {$AVowel ː?} → ا;
|
||
$AVowel ː? → ا;
|
||
|
||
$Boundary {$DarkAVowel ː?} → آ;
|
||
$DarkAVowel ː? → ا;
|
||
|
||
# Shadda for long (geminated) consonants
|
||
ː → ّ;
|
||
|
||
# Affricates
|
||
[{t͡ʃ} ʧ] → چ;
|
||
|
||
# Clicks
|
||
[ɡ g ɠ k] $Click → کچ;
|
||
[n ɲ]? $Click → نچ;
|
||
|
||
# Nasal stops
|
||
[{m̥} m ɱ] → م;
|
||
[{n̼̊} {n̼} {n̥} n {ɳ̊} ɳ {ɲ̊} {ɲ̥} ɲ] → ن;
|
||
[{ŋ̊} ŋ {ɴ̥} ɴ] k → نک;
|
||
[{ŋ̊} ŋ {ɴ̥} ɴ] [ɡ g]? → نگ;
|
||
|
||
# Non-nasal stops
|
||
[p {p̪}] → پ;
|
||
[b {b̪} ɓ] → ب;
|
||
[{d̼} d ɗ ᶑ] → د;
|
||
[{t̼} t] → ت;
|
||
[ʈ] → ط;
|
||
[ɖ] → ض;
|
||
c → چ;
|
||
ɟ → دج;
|
||
k → ک;
|
||
[ɡ g ɠ] → گ;
|
||
[q ɢ ʡ ʛ] → ق;
|
||
ʔ → ;
|
||
|
||
# Sibilant fricatives
|
||
s → س;
|
||
z → ز;
|
||
[ʃ ʂ ɕ ʄ] → ش;
|
||
[ʒ ʐ ʑ] → ژ;
|
||
|
||
# Non-sibilant fricatives
|
||
[ɸ f] → ف;
|
||
[β v] → و;
|
||
[{θ̼} θ {θ̱}] → ث;
|
||
[{ð̼} ð {ð̠}] → ذ;
|
||
ç → ش;
|
||
ʝ $IVowel? ː? → ی;
|
||
[x χ] → خ;
|
||
[ɣ ʁ] → غ;
|
||
ħ → ح;
|
||
ʕ → ع;
|
||
[h ɦ {ʔ̞}] → ه;
|
||
|
||
# Approximants, trills, flaps
|
||
ʋ → و;
|
||
ʙ → بر;
|
||
{r̝} → رژ;
|
||
[{ɹ̥} {ɹ} {ɻ̊} {ɻ} {ɾ̥} ɾ {ɽ̊} ɽ {r̼} {r̥} r] → ر;
|
||
[{ʀ̥} ʀ] → غ;
|
||
ʜ → ح;
|
||
ʢ → ع;
|
||
j $IVowel? ː? → ی;
|
||
|
||
# Laterals
|
||
ɬ → شل;
|
||
ɮ → ژل;
|
||
{[{ʎ̥} ʎ]} [^ $IVowel j ʝ] → لی;
|
||
[{l̼} {l̥} l {ɭ̊} ɭ {ʎ̥} ʎ] → ل;
|
||
[ʟ {ʟ̠}] → غ;
|
||
|
||
# Independent pass for misc cleanup.
|
||
::NULL;
|
||
|
||
# Strip off syllable markers
|
||
\. → ;
|
||
|
||
# Sequences of three or more ووو look very confusing; we shorten them.
|
||
# Polish Darłowo [darwɔvɔ] → داروو → داروووو
|
||
ووو+ → وو;
|
||
|
||
]]></tRule>
|
||
</transform>
|
||
</transforms>
|
||
</supplementalData>
|