android13/external/cldr/common/transforms/und_FONIPA-fa.xml

148 lines
3.7 KiB
XML
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

<?xml version="1.0" encoding="UTF-8" ?>
<!DOCTYPE supplementalData SYSTEM "../../common/dtd/ldmlSupplemental.dtd">
<!--
Copyright © 1991-2016 Unicode, Inc.
CLDR data files are interpreted according to the LDML specification (http://unicode.org/reports/tr35/)
For terms of use, see http://www.unicode.org/copyright.html
-->
<supplementalData>
<version number="$Revision$"/>
<transforms>
<transform source="und_FONIPA" target="fa" direction="forward" alias="fa-t-und-fonipa">
<tRule><![CDATA[
# Vowels
# ------
# In these rules, we produce ی و ا both for short and for long vowels.
# This would be wrong for writing Farsi or Arabic, but when transliterating
# foreign words and names, it is strongly preferred to vowel marks.
# Short schwa [ə] and a few other, schwa-like vowels get omitted entirely
# unless at the end of the word, in which case we emit ه whose Farsi
# word-final pronunciation comes close to [ə]. At the beginning of words,
# Farsi speakers prefer to see آ for [ɑ] and a few other similar-sounding
# dark vowels; note that this use of آ is quite different from Arabic.
$IVowel = [i ɪ e {e̞}];
$UVowel = [y {ɨ} {ʉ} ɯ u ʏ {ɪ̈} {ʊ̈} {ɯ̽} {ʊ} ø ɤ o {ø̞} {ɤ̞} {o̞} ɔ w {w̥} ʍ ʷ];
$AVowel = [ɛ œ ɜ æ ɶ];
$DarkAVowel = [ʌ a ɑ ɒ ɐ ɞ {ä} {ɒ̈}]; # آ instead of ا at beginning of words
$SchwaVowel = [ɘ ɵ ə {ɵ̞}];
$Click = [ʘ ɋ ǀ ʇ ǃ ʗ ǂ ʄ ǁ ʖ];
$Boundary = [^[:L:][:M:][:N:]];
::NFD;
[ʰ ʱ ʼ ̃ ̰ ̋ ́ ̄ ̀ ̏ ̌ ̂ ˥ ˦ ˧ ˨ ˩ ꜜ ꜛ ↗ ↘ ͡ ͜ ̯] → ;
ʲ → j;
ᵐ → m;
ⁿ → n;
ᵑ → ŋ;
::NFC;
# TODO: Diphthongs probably need more work.
# Romanian [sekujesk] → [sekujask], for emitting سیکویاسک not سیکویسک
$UVowel [j $IVowel] [e {e̞} $SchwaVowel] → uia;
# Kazakh Аягөз [ɑjɑɡy̯ʉz] → [ɑjɑɡiuz], to emit آیاگیوز not آیاگووز
yʉ → iu;
::NULL;
# Vowels
$Boundary {$SchwaVowel ː?} → ای;
$SchwaVowel ː → ی;
{[$SchwaVowel e {e̞}]} [^[:L:][:M:][:N:][\.]] → ه;
$SchwaVowel → ;
$Boundary {$IVowel ː?} → ای;
$IVowel ː? j? → ی;
$Boundary {$UVowel ː?} → او;
$UVowel ː? → و;
$Boundary {$AVowel ː?} → ا;
$AVowel ː? → ا;
$Boundary {$DarkAVowel ː?} → آ;
$DarkAVowel ː? → ا;
# Shadda for long (geminated) consonants
ː → ّ;
# Affricates
[{t͡ʃ} ʧ] → چ;
# Clicks
[ɡ g ɠ k] $Click → کچ;
[n ɲ]? $Click → نچ;
# Nasal stops
[{m̥} m ɱ] → م;
[{n̼̊} {n̼} {n̥} n {ɳ̊} ɳ {ɲ̊} {ɲ̥} ɲ] → ن;
[{ŋ̊} ŋ {ɴ̥} ɴ] k → نک;
[{ŋ̊} ŋ {ɴ̥} ɴ] [ɡ g]? → نگ;
# Non-nasal stops
[p {p̪}] → پ;
[b {b̪} ɓ] → ب;
[{d̼} d ɗ ᶑ] → د;
[{t̼} t] → ت;
[ʈ] → ط;
[ɖ] → ض;
c → چ;
ɟ → دج;
k → ک;
[ɡ g ɠ] → گ;
[q ɢ ʡ ʛ] → ق;
ʔ → ;
# Sibilant fricatives
s → س;
z → ز;
[ʃ ʂ ɕ ʄ] → ش;
[ʒ ʐ ʑ] → ژ;
# Non-sibilant fricatives
[ɸ f] → ف;
[β v] → و;
[{θ̼} θ {θ̱}] → ث;
[{ð̼} ð {ð̠}] → ذ;
ç → ش;
ʝ $IVowel? ː? → ی;
[x χ] → خ;
[ɣ ʁ] → غ;
ħ → ح;
ʕ → ع;
[h ɦ {ʔ̞}] → ه;
# Approximants, trills, flaps
ʋ → و;
ʙ → بر;
{r̝} → رژ;
[{ɹ̥} {ɹ} {ɻ̊} {ɻ} {ɾ̥} ɾ {ɽ̊} ɽ {r̼} {r̥} r] → ر;
[{ʀ̥} ʀ] → غ;
ʜ → ح;
ʢ → ع;
j $IVowel? ː? → ی;
# Laterals
ɬ → شل;
ɮ → ژل;
{[{ʎ̥} ʎ]} [^ $IVowel j ʝ] → لی;
[{l̼} {l̥} l {ɭ̊} ɭ {ʎ̥} ʎ] → ل;
[ʟ {ʟ̠}] → غ;
# Independent pass for misc cleanup.
::NULL;
# Strip off syllable markers
\. → ;
# Sequences of three or more ووو look very confusing; we shorten them.
# Polish Darłowo [darwɔvɔ] → داروو → داروووو
ووو+ → وو;
]]></tRule>
</transform>
</transforms>
</supplementalData>