242 lines
7.7 KiB
XML
242 lines
7.7 KiB
XML
|
<?xml version="1.0" encoding="UTF-8" ?>
|
|||
|
<!DOCTYPE supplementalData SYSTEM "../../common/dtd/ldmlSupplemental.dtd">
|
|||
|
<!--
|
|||
|
Copyright © 1991-2013 Unicode, Inc.
|
|||
|
CLDR data files are interpreted according to the LDML specification (http://unicode.org/reports/tr35/)
|
|||
|
For terms of use, see http://www.unicode.org/copyright.html
|
|||
|
-->
|
|||
|
<supplementalData>
|
|||
|
<version number="$Revision$"/>
|
|||
|
<transforms>
|
|||
|
<transform source="fa" target="fa_Latn" variant="BGN" direction="forward" draft="contributed" alias="Persian-Latin/BGN fa-Latn-t-fa-m0-bgn">
|
|||
|
<tRule><![CDATA[
|
|||
|
#
|
|||
|
########################################################################
|
|||
|
# BGN/PCGN 1956 System
|
|||
|
#
|
|||
|
# This system was adopted by the BGN in 1946 and by the PCGN in 1958.
|
|||
|
# It is used for the romanization of geographic names in Iran and
|
|||
|
# for Persian-language names in Afghanistan.
|
|||
|
#
|
|||
|
# Originally prepared by Michael Everson <everson@evertype.com>
|
|||
|
########################################################################
|
|||
|
#
|
|||
|
# MINIMAL FILTER: Persian-Latin
|
|||
|
#
|
|||
|
|
|||
|
:: [[:arabic:][:block=ARABIC:][ءآابةتثجحخدذرزسشصضطظعغفقكلمنهویيَُِّْ٠١٢٣٤٥٦٧٨٩پچژگی]] ;
|
|||
|
:: NFKD (NFC) ;
|
|||
|
#
|
|||
|
#
|
|||
|
########################################################################
|
|||
|
|
|||
|
|
|||
|
#
|
|||
|
########################################################################
|
|||
|
#
|
|||
|
# Define All Transformation Variables
|
|||
|
#
|
|||
|
########################################################################
|
|||
|
#
|
|||
|
|
|||
|
$alef = ’;
|
|||
|
$ayin = ‘;
|
|||
|
$disambig = ̱ ;
|
|||
|
#
|
|||
|
#
|
|||
|
# Use this $wordBoundary until bug 2034 is fixed in ICU:
|
|||
|
# http://bugs.icu-project.org/cgi-bin/icu-bugs/transliterate?id=2034;expression=boundary;user=guest
|
|||
|
#
|
|||
|
|
|||
|
$wordBoundary = [^[:L:][:M:][:N:]] ;
|
|||
|
#
|
|||
|
#
|
|||
|
########################################################################
|
|||
|
|
|||
|
# non-letters
|
|||
|
[:Nd:]{٫}[:Nd:] ↔ [:Nd:]{','}[:Nd:] ; # ARABIC DECIMAL SEPARATOR
|
|||
|
[:Nd:]{٬}[:Nd:] ↔ [:Nd:]{'.'}[:Nd:] ; # ARABIC THOUSANDS SEPARATOR
|
|||
|
٫ ↔ ',' $disambig ; # ARABIC DECIMAL SEPARATOR
|
|||
|
٬ ↔ '.' $disambig ; # ARABIC THOUSANDS SEPARATOR
|
|||
|
# ٭ ↔ ; # ARABIC FIVE POINTED STAR // no need to transliterate
|
|||
|
، ↔ ',' ; # ARABIC COMMA
|
|||
|
؛ ↔ ';' ; # ARABIC SEMICOLON
|
|||
|
؟ ↔ '?' ; # ARABIC QUESTION MARK
|
|||
|
٪ ↔ '%' ; # ARABIC PERCENT SIGN
|
|||
|
٠ ↔ 0 $disambig ; # ARABIC-INDIC DIGIT ZERO
|
|||
|
١ ↔ 1 $disambig ; # ARABIC-INDIC DIGIT ONE
|
|||
|
٢ ↔ 2 $disambig ; # ARABIC-INDIC DIGIT TWO
|
|||
|
٣ ↔ 3 $disambig ; # ARABIC-INDIC DIGIT THREE
|
|||
|
٤ ↔ 4 $disambig ; # ARABIC-INDIC DIGIT FOUR
|
|||
|
٥ ↔ 5 $disambig ; # ARABIC-INDIC DIGIT FIVE
|
|||
|
٦ ↔ 6 $disambig ; # ARABIC-INDIC DIGIT SIX
|
|||
|
٧ ↔ 7 $disambig ; # ARABIC-INDIC DIGIT SEVEN
|
|||
|
٨ ↔ 8 $disambig ; # ARABIC-INDIC DIGIT EIGHT
|
|||
|
٩ ↔ 9 $disambig ; # ARABIC-INDIC DIGIT NINE
|
|||
|
۰ ↔ 0 ; # EXTENDED ARABIC-INDIC DIGIT ZERO
|
|||
|
۱ ↔ 1 ; # EXTENDED ARABIC-INDIC DIGIT ONE
|
|||
|
۲ ↔ 2 ; # EXTENDED ARABIC-INDIC DIGIT TWO
|
|||
|
۳ ↔ 3 ; # EXTENDED ARABIC-INDIC DIGIT THREE
|
|||
|
۴ ↔ 4 ; # EXTENDED ARABIC-INDIC DIGIT FOUR
|
|||
|
۵ ↔ 5 ; # EXTENDED ARABIC-INDIC DIGIT FIVE
|
|||
|
۶ ↔ 6 ; # EXTENDED ARABIC-INDIC DIGIT SIX
|
|||
|
۷ ↔ 7 ; # EXTENDED ARABIC-INDIC DIGIT SEVEN
|
|||
|
۸ ↔ 8 ; # EXTENDED ARABIC-INDIC DIGIT EIGHT
|
|||
|
۹ ↔ 9 ; # EXTENDED ARABIC-INDIC DIGIT NINE
|
|||
|
|
|||
|
#
|
|||
|
########################################################################
|
|||
|
#
|
|||
|
# Rules moved to front to avoid masking
|
|||
|
#
|
|||
|
########################################################################
|
|||
|
|
|||
|
|
|||
|
#
|
|||
|
########################################################################
|
|||
|
#
|
|||
|
# BGN Page 89 Rule 4
|
|||
|
#
|
|||
|
# The character sequences كه , زه , سه , and گه may be romanized k·h, z·h,
|
|||
|
# s·h, and g·h in order to differentiate those romanizations from the
|
|||
|
# digraphs kh, zh, sh, and gh.
|
|||
|
#
|
|||
|
########################################################################
|
|||
|
#
|
|||
|
|
|||
|
كه → k·h ; # ARABIC LETTER KAF + HEH
|
|||
|
زه → z·h ; # ARABIC LETTER ZAIN + HEH
|
|||
|
سه → s·h ; # ARABIC LETTER SEEN + HEH
|
|||
|
گه → g·h ; # ARABIC LETTER GAF + HEH
|
|||
|
#
|
|||
|
#
|
|||
|
########################################################################
|
|||
|
#
|
|||
|
# End Rule 4
|
|||
|
#
|
|||
|
########################################################################
|
|||
|
|
|||
|
|
|||
|
|
|||
|
#
|
|||
|
########################################################################
|
|||
|
#
|
|||
|
# BGN Page 91 Rule 7
|
|||
|
#
|
|||
|
# Doubles consonant sounds are represented in Arabic script by
|
|||
|
# placing a shaddah ( ّ ) over a consonant character. In romanization
|
|||
|
# the letter should be doubled. [The remainder of this rule deals with
|
|||
|
# the definite article and is lexical.]
|
|||
|
#
|
|||
|
########################################################################
|
|||
|
#
|
|||
|
|
|||
|
بّ → bb ; # ARABIC LETTER BEH + SHADDA
|
|||
|
پّ → pp ; # ARABIC LETTER PEH + SHADDA
|
|||
|
تّ → tt ; # ARABIC LETTER TEH + SHADDA
|
|||
|
ثّ → s̄s̄ ; # ARABIC LETTER THEH + SHADDA
|
|||
|
جّ → jj ; # ARABIC LETTER JEEM + SHADDA
|
|||
|
چّ → chch ; # ARABIC LETTER TCHEH + SHADDA
|
|||
|
حّ → ḥḥ ; # ARABIC LETTER HAH + SHADDA
|
|||
|
خّ → khkh ; # ARABIC LETTER KHAH + SHADDA
|
|||
|
دّ → dd ; # ARABIC LETTER DAL + SHADDA
|
|||
|
ذّ → z̄z̄ ; # ARABIC LETTER THAL + SHADDA
|
|||
|
رّ → rr ; # ARABIC LETTER REH + SHADDA
|
|||
|
زّ → zz ; # ARABIC LETTER ZAIN + SHADDA
|
|||
|
ژّ → zhzh ; # ARABIC LETTER JEH + SHADDA
|
|||
|
سّ → ss ; # ARABIC LETTER SEEN + SHADDA
|
|||
|
شّ → shsh ; # ARABIC LETTER SHEEN + SHADDA
|
|||
|
صّ → ṣṣ ; # ARABIC LETTER SAD + SHADDA
|
|||
|
ضّ → ḍḍ ; # ARABIC LETTER DAD + SHADDA
|
|||
|
طّ → ṭṭ ; # ARABIC LETTER TAH + SHADDA
|
|||
|
ظّ → ẓẓ ; # ARABIC LETTER ZAH + SHADDA
|
|||
|
عّ → $ayin $ayin ; # ARABIC LETTER AIN + SHADDA
|
|||
|
غّ → ghgh ; # ARABIC LETTER GHAIN + SHADDA
|
|||
|
فّ → ff ; # ARABIC LETTER FEH + SHADDA
|
|||
|
قّ → qq ; # ARABIC LETTER QAF + SHADDA
|
|||
|
كّ → kk ; # ARABIC LETTER KAF + SHADDA
|
|||
|
لّ → ll ; # ARABIC LETTER LAM + SHADDA
|
|||
|
مّ → mm ; # ARABIC LETTER MEEM + SHADDA
|
|||
|
نّ → nn ; # ARABIC LETTER NOON + SHADDA
|
|||
|
هّ → hh ; # ARABIC LETTER HEH + SHADDA
|
|||
|
وّ → ww ; # ARABIC LETTER WAW + SHADDA
|
|||
|
یّ → yy ; # ARABIC LETTER FARSI YEH + SHADDA
|
|||
|
#
|
|||
|
#
|
|||
|
########################################################################
|
|||
|
#
|
|||
|
# End Rule 7
|
|||
|
#
|
|||
|
########################################################################
|
|||
|
|
|||
|
|
|||
|
|
|||
|
#
|
|||
|
########################################################################
|
|||
|
#
|
|||
|
# Start of Transformations
|
|||
|
#
|
|||
|
########################################################################
|
|||
|
#
|
|||
|
|
|||
|
$wordBoundary{ء → ; # ARABIC LETTER HAMZA
|
|||
|
ء → $alef ; # ARABIC LETTER HAMZA
|
|||
|
$wordBoundary{ا → ; # ARABIC LETTER ALEF
|
|||
|
آ → $alef ā ; # ARABIC FATHA ALEF WITH MADDA ABOVE
|
|||
|
ب → b ; # ARABIC LETTER BEH
|
|||
|
پ → p ; # ARABIC LETTER PEH
|
|||
|
ت → t ; # ARABIC LETTER TEH
|
|||
|
ة → h ; # ARABIC LETTER TEH MARBUTA
|
|||
|
ث → s̄ ; # ARABIC LETTER THEH
|
|||
|
ج → j ; # ARABIC LETTER JEEM
|
|||
|
چ → ch ; # ARABIC LETTER TCHEH
|
|||
|
ح → ḥ ; # ARABIC LETTER HAH
|
|||
|
خ → kh ; # ARABIC LETTER KHAH
|
|||
|
د → d ; # ARABIC LETTER DAL
|
|||
|
ذ → z̄ ; # ARABIC LETTER THAL
|
|||
|
ر → r ; # ARABIC LETTER REH
|
|||
|
ز → z ; # ARABIC LETTER ZAIN
|
|||
|
ژ → zh ; # ARABIC LETTER JEH
|
|||
|
س → s ; # ARABIC LETTER SEEN
|
|||
|
ش → sh ; # ARABIC LETTER SHEEN
|
|||
|
ص → ṣ ; # ARABIC LETTER SAD
|
|||
|
ض → ẕ ; # ARABIC LETTER DAD
|
|||
|
ط → ṭ ; # ARABIC LETTER TAH
|
|||
|
ظ → ẓ ; # ARABIC LETTER ZAH
|
|||
|
ع → $ayin ; # ARABIC LETTER AIN
|
|||
|
غ → gh ; # ARABIC LETTER GHAIN
|
|||
|
ف → f ; # ARABIC LETTER FEH
|
|||
|
ق → q ; # ARABIC LETTER QAF
|
|||
|
ک ↔ k ; # ARABIC LETTER KEHEH
|
|||
|
ك ↔ k $disambig ; # ARABIC LETTER KAF
|
|||
|
گ → g ; # ARABIC LETTER GAF
|
|||
|
ل → l ; # ARABIC LETTER LAM
|
|||
|
م → m ; # ARABIC LETTER MEEM
|
|||
|
ن → n ; # ARABIC LETTER NOON
|
|||
|
ه → h ; # ARABIC LETTER HEH
|
|||
|
و → v ; # ARABIC LETTER WAW
|
|||
|
ی → y ; # ARABIC LETTER FARSI YEH
|
|||
|
|
|||
|
َا → ā ; # ARABIC FATHA + ALEF
|
|||
|
َی → á ; # ARABIC FATHA + FARSI YEH
|
|||
|
َوْ → ow ; # ARABIC FATHA + WAW + SUKUN
|
|||
|
َ → a ; # ARABIC FATHA
|
|||
|
|
|||
|
ِي → ī ; # ARABIC KASRA + YEH
|
|||
|
ِ → e ; # ARABIC KASRA
|
|||
|
|
|||
|
ُو → ū ; # ARABIC DAMMA + WAW
|
|||
|
ُ → o ; # ARABIC DAMMA
|
|||
|
|
|||
|
ْ → ; # ARABIC SUKUN
|
|||
|
::NFC (NFD) ;
|
|||
|
|
|||
|
#
|
|||
|
#
|
|||
|
########################################################################
|
|||
|
|
|||
|
]]></tRule>
|
|||
|
</transform>
|
|||
|
</transforms>
|
|||
|
</supplementalData>
|