154 lines
		
	
	
		
			5.5 KiB
		
	
	
	
		
			XML
		
	
	
	
			
		
		
	
	
			154 lines
		
	
	
		
			5.5 KiB
		
	
	
	
		
			XML
		
	
	
	
| <?xml version="1.0" encoding="UTF-8" ?>
 | ||
| <!DOCTYPE supplementalData SYSTEM "../../common/dtd/ldmlSupplemental.dtd">
 | ||
| <!--
 | ||
| Copyright © 1991-2013 Unicode, Inc.
 | ||
| CLDR data files are interpreted according to the LDML specification (http://unicode.org/reports/tr35/)
 | ||
| For terms of use, see http://www.unicode.org/copyright.html
 | ||
| -->
 | ||
| <supplementalData>
 | ||
| 	<version number="$Revision$"/>
 | ||
| 	<transforms>
 | ||
| 		<transform source="Arab" target="Latn" direction="both" alias="Arabic-Latin und-Latn-t-und-arab" backwardAlias="Latin-Arabic und-Arab-t-und-latn">
 | ||
| 			<tRule><![CDATA[
 | ||
| # Generally follows UNGEGN
 | ||
| #     http://www.eki.ee/wgrs/rom1_ar.pdf
 | ||
| # Occasionally deviates in the direction of ISO 233
 | ||
| #     http://homepage.mac.com/sirbinks/pdf/Arabic.pdf
 | ||
| # a) where required for disambiguation.
 | ||
| # b) with underdot instead of cedilla for letter like SAD,
 | ||
| #    since those are explicitly in Unicode for transliteration.
 | ||
| # c) with extra non-Arabic-language letters, like PEH
 | ||
| #
 | ||
| # Does *not* do assimilation of "al", nor hyphenation.
 | ||
| # While it could be done, we need to determine whether a prefix "al" could
 | ||
| # occur other than as the definite article (since no space is used).
 | ||
| :: [[:Arabic:][:block=ARABIC:][ⁿ،؛؟ـً-ٕ٠-٬۰-۹﷼ښ]] ;
 | ||
| :: NFKD (NFC);
 | ||
| $disambig =  ̱ ;
 | ||
| $disambig2 =  ̰ ;
 | ||
| $under =  ̣ ;
 | ||
| $descender = ˌ;
 | ||
| $notAbove = [[:^ccc=0:] & [:^ccc=230:]];
 | ||
| 
 | ||
| # non-letters
 | ||
| [:Nd:]{٫}[:Nd:] ↔ [:Nd:]{','}[:Nd:] ; # ARABIC DECIMAL SEPARATOR
 | ||
| [:Nd:]{٬}[:Nd:] ↔ [:Nd:]{'.'}[:Nd:] ; # ARABIC THOUSANDS SEPARATOR
 | ||
| ٫ ↔ ',' $disambig ; # ARABIC DECIMAL SEPARATOR
 | ||
| ٬ ↔ '.' $disambig ; # ARABIC THOUSANDS SEPARATOR
 | ||
| #  ٭ ↔ ; # ARABIC FIVE POINTED STAR // no need to transliterate
 | ||
| ، ↔ ',' ; # ARABIC COMMA
 | ||
| ؛ ↔ ';' ; # ARABIC SEMICOLON
 | ||
| ؟ ↔ '?' ; # ARABIC QUESTION MARK
 | ||
| ٪ ↔ '%' ; # ARABIC PERCENT SIGN
 | ||
| ۰ ↔ 0 $disambig ; # EXTENDED ARABIC-INDIC DIGIT ZERO
 | ||
| ۱ ↔ 1 $disambig ; # EXTENDED ARABIC-INDIC DIGIT ONE
 | ||
| ۲ ↔ 2 $disambig ; # EXTENDED ARABIC-INDIC DIGIT TWO
 | ||
| ۳ ↔ 3 $disambig ; # EXTENDED ARABIC-INDIC DIGIT THREE
 | ||
| ۴ ↔ 4 $disambig ; # EXTENDED ARABIC-INDIC DIGIT FOUR
 | ||
| ۵ ↔ 5 $disambig ; # EXTENDED ARABIC-INDIC DIGIT FIVE
 | ||
| ۶ ↔ 6 $disambig ; # EXTENDED ARABIC-INDIC DIGIT SIX
 | ||
| ۷ ↔ 7 $disambig ; # EXTENDED ARABIC-INDIC DIGIT SEVEN
 | ||
| ۸ ↔ 8 $disambig ; # EXTENDED ARABIC-INDIC DIGIT EIGHT
 | ||
| ۹ ↔ 9 $disambig ; # EXTENDED ARABIC-INDIC DIGIT NINE
 | ||
| ٠ ↔ 0 ; # ARABIC-INDIC DIGIT ZERO
 | ||
| ١ ↔ 1 ; # ARABIC-INDIC DIGIT ONE
 | ||
| ٢ ↔ 2 ; # ARABIC-INDIC DIGIT TWO
 | ||
| ٣ ↔ 3 ; # ARABIC-INDIC DIGIT THREE
 | ||
| ٤ ↔ 4 ; # ARABIC-INDIC DIGIT FOUR
 | ||
| ٥ ↔ 5 ; # ARABIC-INDIC DIGIT FIVE
 | ||
| ٦ ↔ 6 ; # ARABIC-INDIC DIGIT SIX
 | ||
| ٧ ↔ 7 ; # ARABIC-INDIC DIGIT SEVEN
 | ||
| ٨ ↔ 8 ; # ARABIC-INDIC DIGIT EIGHT
 | ||
| ٩ ↔ 9 ; # ARABIC-INDIC DIGIT NINE
 | ||
| 
 | ||
| # letters
 | ||
| # long vowels
 | ||
| َا↔ ā ; # ARABIC FATHA, ARABIC LETTER ALEF
 | ||
| ُو ↔ ū ; # ARABIC DAMMA, ARABIC LETTER WAW
 | ||
| ِي ↔ ī ; # ARABIC KASRA, ARABIC LETTER YEH
 | ||
| # longer items moved here to prevent masking
 | ||
| ث ↔ t h $disambig ; # ARABIC LETTER THEH
 | ||
| ذ ↔ d h $disambig ; # ARABIC LETTER THAL
 | ||
| ش ↔ s h $disambig ; # ARABIC LETTER SHEEN
 | ||
| ص ↔ s $under ; # ARABIC LETTER SAD
 | ||
| ض ↔ d $under ; # ARABIC LETTER DAD
 | ||
| ط ↔ t $under ; # ARABIC LETTER TAH
 | ||
| ظ ↔ z $under ; # ARABIC LETTER ZAH
 | ||
| غ ↔ g h $disambig ; # ARABIC LETTER GHAIN
 | ||
| 
 | ||
| # WARNING: special case
 | ||
| # ←t, umlaut, half-ring below→ will be canonically ordered as ←t, half-ring below, umlaut→
 | ||
| # so on the return, we have to skip over (but preserve) the half-ring below (or others like it)
 | ||
| # ةٕ ← ẗ̹ ; # LATIN SMALL LETTER T, COMBINING RIGHT HALF RING BELOW, COMBINING DIAERESIS
 | ||
| ة ↔ t ̈ ; # ARABIC LETTER TEH MARBUTA
 | ||
| ة | $1 ← t ($notAbove+) ̈ ; # ARABIC LETTER TEH MARBUTA
 | ||
| 
 | ||
| # non-Arabic language
 | ||
| ژ ↔ z h $disambig ; # ARABIC LETTER JEH
 | ||
| ڭ ↔ n $disambig g ; # ARABIC LETTER NG
 | ||
| ۋ ↔ v $disambig ; # ARABIC LETTER VE
 | ||
| ی ↔ y $disambig2 ; # ARABIC LETTER FARSI YEH
 | ||
| ښ ↔ s $descender;
 | ||
| 
 | ||
| # Arabic language
 | ||
| ء ↔ ʾ ; # ARABIC LETTER HAMZA
 | ||
| ا ↔ a $under; # ARABIC LETTER ALEF
 | ||
| ب ↔ b ; # ARABIC LETTER BEH
 | ||
| ت ↔ t ; # ARABIC LETTER TEH
 | ||
| ج ↔ j ; # ARABIC LETTER JEEM
 | ||
| ح ↔ h $under ; # ARABIC LETTER HAH
 | ||
| خ ↔ k h $disambig ; # ARABIC LETTER KHAH
 | ||
| د ↔ d ; # ARABIC LETTER DAL
 | ||
| ر ↔ r ; # ARABIC LETTER REH
 | ||
| ز ↔ z ; # ARABIC LETTER ZAIN
 | ||
| س ↔ s ; # ARABIC LETTER SEEN
 | ||
| ع ↔ ʿ ; # ARABIC LETTER AIN
 | ||
| ـ → ; # ARABIC TATWEEL
 | ||
| ف ↔ f ; # ARABIC LETTER FEH
 | ||
| ق ↔ q ; # ARABIC LETTER QAF
 | ||
| ک ↔ k $disambig ; # ARABIC LETTER KEHEH
 | ||
| ك ↔ k ; # ARABIC LETTER KAF
 | ||
| ل ↔ l ; # ARABIC LETTER LAM
 | ||
| م ↔ m ; # ARABIC LETTER MEEM
 | ||
| ن ↔ n ; # ARABIC LETTER NOON
 | ||
| ه ↔ h ; # ARABIC LETTER HEH
 | ||
| و ↔ w ; # ARABIC LETTER WAW
 | ||
| ى ↔ y $disambig ; # ARABIC LETTER ALEF MAKSURA
 | ||
| ي ↔ y ; # ARABIC LETTER YEH
 | ||
| ً ↔ aⁿ ; # ARABIC FATHATAN
 | ||
| ٌ ↔ uⁿ ; # ARABIC DAMMATAN
 | ||
| ٍ ↔ iⁿ ; # ARABIC KASRATAN
 | ||
| َ ↔ a ; # ARABIC FATHA
 | ||
| ُ ↔ u ; # ARABIC DAMMA
 | ||
| ِ ↔ i ; # ARABIC KASRA
 | ||
| ّ ↔   ̃ ; # ARABIC SHADDA
 | ||
| ْ ↔   ̊ ; # ARABIC SUKUN
 | ||
| 
 | ||
| # special combining marks
 | ||
| ٓ ↔  ̂ ; # ARABIC MADDAH ABOVE
 | ||
| ٔ ↔  ̉ ; # ARABIC HAMZA ABOVE
 | ||
| ٕ ↔  ̹ ; # ARABIC HAMZA BELOW
 | ||
| 
 | ||
| # Some non-Arabic language (not in UNGEGN)
 | ||
| پ ↔ p ; # ARABIC LETTER PEH
 | ||
| چ ↔ c h $disambig ; # ARABIC LETTER TCHEH
 | ||
| ڤ ↔ v ; # ARABIC LETTER VEH
 | ||
| # ڥ ↔ v $disambig ; # ARABIC LETTER FEH WITH THREE DOTS BELOW
 | ||
| # ڢ ↔ f $disambig ; # ARABIC LETTER FEH WITH DOT MOVED BELOW
 | ||
| گ ↔ g ; # ARABIC LETTER GAF
 | ||
| 
 | ||
| # fallbacks
 | ||
| | s ← c } [eiy];
 | ||
| | k ← c ;
 | ||
| | i ← e ;
 | ||
| | u ← o ;
 | ||
| | ks ← x ;
 | ||
| | n ← ⁿ;
 | ||
| :: (lower) ;
 | ||
| ::NFC (NFD);
 | ||
| :: ( [[:Latin:] [%,.0-9;?ʾ-ʿ̂-̄̈-̣̰̊-̱̹;ˌ]] );
 | ||
| 			]]></tRule>
 | ||
| 		</transform>
 | ||
| 	</transforms>
 | ||
| </supplementalData>
 |