265 lines
		
	
	
		
			8.6 KiB
		
	
	
	
		
			XML
		
	
	
	
			
		
		
	
	
			265 lines
		
	
	
		
			8.6 KiB
		
	
	
	
		
			XML
		
	
	
	
| <?xml version="1.0" encoding="UTF-8" ?>
 | ||
| <!DOCTYPE supplementalData SYSTEM "../../common/dtd/ldmlSupplemental.dtd">
 | ||
| <!--
 | ||
| Copyright © 1991-2013 Unicode, Inc.
 | ||
| CLDR data files are interpreted according to the LDML specification (http://unicode.org/reports/tr35/)
 | ||
| For terms of use, see http://www.unicode.org/copyright.html
 | ||
| -->
 | ||
| <supplementalData>
 | ||
| 	<version number="$Revision$"/>
 | ||
| 	<transforms>
 | ||
| 		<transform source="Grek" target="Latn" direction="both" alias="Greek-Latin und-Latn-t-und-grek" backwardAlias="Latin-Greek und-Grek-t-und-latn">
 | ||
| 			<tRule><![CDATA[
 | ||
| # Rules are predicated on running NFD first, and NFC afterwards
 | ||
| # :: [\u0000-\u007F \u0370-Ͽ [:Greek:] [:nonspacing mark:]] ;
 | ||
| # MINIMAL FILTER GENERATED FOR: Greek-Latin
 | ||
| :: [;µ·ÄËÏÖÜäëïöüÿ-āĒ-ēĪ-īŌ-ōŪ-ūŸǕ-ǜǞ-ǣǬ-ǭȪ-ȭȰ-ȳ̄̈̓-̔͂-ͅͺ;Ά-ΊΌΎ-ΡΣ-ώϐ-ϗϛϝϟϡϣϥϧϩϫϭϯ-ϵϷ-\u07FBЁЇёїӒ-ӓӚ-ӟӢ-ӧӪ-ӱӴ-ӵӸ-ӹḔ-ḗḠ-ḡḦ-ḧḮ-ḯḸ-ḹṎ-ṓṜ-ṝṺ-ṻẄ-ẅẌ-ẍẗἀ-ἕἘ-Ἕἠ-ὅὈ-Ὅὐ-ὗὙὛὝὟ-ώᾀ-ᾴᾶ-ᾼι῁-ῄῆ-ῌ῏-ΐῖ-Ί῟-Ῥῲ-ῴῶ-ῼΩϹ] ;
 | ||
| :: NFD (NFC) ;
 | ||
| # TEST CASES
 | ||
| # Ὀλίγοι ἔμφονες πολλῶν ἀφρόνων φοβερώτεροι — Πλάτωνος
 | ||
| # ᾂ ᾒ ᾢ ᾃ ᾓ ᾣ
 | ||
| # ᾳ ῃ ῳ ὃ ὄ
 | ||
| # ὠς ὡς ὢς ὣς
 | ||
| # Ὠς Ὡς Ὢς Ὣς
 | ||
| # ὨΣ ὩΣ ὪΣ ὫΣ
 | ||
| # Ạ, ạ, Ẹ, ẹ, Ọ, ọ
 | ||
| # Useful variables
 | ||
| $lower = [[:latin:][:greek:] & [:Ll:]];
 | ||
| $glower = [[:greek:] & [:Ll:]];
 | ||
| $upper = [[:latin:][:greek:] & [:Lu:]] ;
 | ||
| $accent = [:M:] ;
 | ||
| # NOTE: restrict to just the Greek & Latin accents that we care about
 | ||
| # TODO: broaden out once interation is fixed
 | ||
| $accentMinus = [ [̀-ͅ] & [:M:] - [̸]] ;
 | ||
| $macron = ̄ ;
 | ||
| $ddot = ̈ ;
 | ||
| $ddotmac = [$ddot$macron];
 | ||
| $lcgvowel = [αεηιουω] ;
 | ||
| $ucgvowel = [ΑΕΗΙΟΥΩ] ;
 | ||
| $gvowel = [$lcgvowel $ucgvowel] ;
 | ||
| $lcgvowelC = [$lcgvowel $accent] ;
 | ||
| $evowel = [aeiouyAEIOUY];
 | ||
| $evowel2 = [iuyIUY];
 | ||
| $vowel = [ $evowel $gvowel] ;
 | ||
| $gammaLike = [ΓΚΞΧγκξχϰ] ;
 | ||
| $egammaLike = [GKXCgkxc] ;
 | ||
| $smooth = ̓ ;
 | ||
| $rough = ̔ ;
 | ||
| $iotasub = ͅ ;
 | ||
| $evowel_i = [$evowel-[iI]] ;
 | ||
| $evowel2_i = [uyUY];
 | ||
| $underbar = ̱;
 | ||
| $afterLetter = [:L:] [[:M:]\']* ;
 | ||
| $beforeLetter = [[:M:]\']* [:L:] ;
 | ||
| $beforeLower = $accent * $lower ;
 | ||
| $notLetter = [^[:L:][:M:]] ;
 | ||
| $under = ̱;
 | ||
| # Fix punctuation
 | ||
| # preserve original
 | ||
| \: ↔ \: $under ;
 | ||
| \? ↔ \? $under ;
 | ||
| \; ↔ \? ;
 | ||
| · ↔ \: ;
 | ||
| # CIRCUMFLEX: convert greek circumflex to normal one. Could use tilde or inverted breve
 | ||
| ͂ ↔ ̂ ;
 | ||
| # IOTA: convert iota subscript to iota
 | ||
| # first make previous alpha long!
 | ||
| $accent_minus = [[$accent]-[$iotasub$macron]];
 | ||
| Α } $accent_minus * $iotasub → | Α $macron ;
 | ||
| α } $accent_minus * $iotasub → | α $macron ;
 | ||
| # now convert to uppercase if after uppercase, ow to lowercase
 | ||
| $upper $accent * { $iotasub → I ;
 | ||
| $iotasub → i ;
 | ||
| | $1 $iotasub ← ($evowel $macron $accentMinus *) i ;
 | ||
| | $1 $iotasub ← ($evowel $macron $accentMinus *) I ;
 | ||
| # BREATHING
 | ||
| # Convert rough breathing to h, and move before letters.
 | ||
| # Make A ` x = → H a x
 | ||
| Α ($macron?) $rough } $beforeLower → H | α $1;
 | ||
| Ε $rough } $beforeLower → H | ε;
 | ||
| Η $rough } $beforeLower → H | η ;
 | ||
| Ι ($ddot?) $rough } $beforeLower → H | ι  $1;
 | ||
| Ο $rough } $beforeLower → H | ο ;
 | ||
| Υ $rough } $beforeLower → H | υ ;
 | ||
| Ω ($ddot?) $rough } $beforeLower → H | ω $1;
 | ||
| # Make A x ` = → H a x
 | ||
| Α ($glower $macron?) $rough → H | α $1 ;
 | ||
| Ε ($glower) $rough → H | ε $1 ;
 | ||
| Η ($glower) $rough → H | η $1 ;
 | ||
| Ι ($glower $ddot?) $rough → H | ι $1 ;
 | ||
| Ο ($glower) $rough → H | ο $1 ;
 | ||
| Υ ($glower) $rough → H | υ $1 ;
 | ||
| Ω ($glower  $ddot?) $rough → H | ω $1 ;
 | ||
| #Otherwise, make x ` into h x and X ` into H X
 | ||
| ($lcgvowel + $ddotmac? ) $rough → h | $1 ;
 | ||
| ($gvowel + $ddotmac? ) $rough → H | $1 ;
 | ||
| # Go backwards with H
 | ||
| | $1 $rough ← h ($evowel $macron $ddot? $evowel2_i $macron?) ;
 | ||
| | $1 $rough ← h ($evowel $ddot? $evowel2 $macron?) ;
 | ||
| | $1 $rough ← h ($evowel $macron? $ddot?) ;
 | ||
| | $1 $rough ← H ([AEIOUY] $macron $ddot? $evowel2_i $macron?) ;
 | ||
| | $1 $rough ← H ([AEIOUY] $ddot? $evowel2 $macron?) ;
 | ||
| | $1 $rough ← H ([AEIOUY] $macron? $ddot?) ;
 | ||
| # titlecase, have to fix individually
 | ||
| # in the future, we should add &uppercase() to make this easier
 | ||
| | A $1 $rough ← H a ($macron  $ddot? $evowel2_i $macron?) ;
 | ||
| | E $1 $rough ← H e ($macron  $ddot? $evowel2_i $macron?) ;
 | ||
| | I $1 $rough ← H i ($macron  $ddot? $evowel2_i $macron?) ;
 | ||
| | O $1 $rough ← H o ($macron  $ddot? $evowel2_i $macron?) ;
 | ||
| | U $1 $rough ← H u ($macron $ddot? $evowel2_i $macron?) ;
 | ||
| | Y $1 $rough ← H y ($macron $ddot? $evowel2_i $macron?) ;
 | ||
| | A $1 $rough ← H a ($ddot? $evowel2 $macron?) ;
 | ||
| | E $1 $rough ← H e ($ddot? $evowel2 $macron?) ;
 | ||
| | I $1 $rough ← H i ($ddot? $evowel2 $macron?) ;
 | ||
| | O $1 $rough ← H o ($ddot? $evowel2 $macron?) ;
 | ||
| | U $1 $rough ← H u ($ddot? $evowel2 $macron?) ;
 | ||
| | Y $1 $rough ← H y ($ddot? $evowel2 $macron?) ;
 | ||
| | A $1 $rough ← H a ($macron? $ddot? ) ;
 | ||
| | E $1 $rough ← H e ($macron? $ddot? ) ;
 | ||
| | I $1 $rough ← H i ($macron? $ddot? ) ;
 | ||
| | O $1 $rough ← H o ($macron? $ddot? ) ;
 | ||
| | U $1 $rough ← H u ($macron? $ddot? ) ;
 | ||
| | Y $1 $rough ← H y ($macron? $ddot? ) ;
 | ||
| # Now do smooth
 | ||
| #delete smooth breathing for Latin
 | ||
| $smooth → ;
 | ||
| # insert in Greek
 | ||
| # the assumption is that all Marks are on letters.
 | ||
| | $1 $smooth ← $notLetter { ([rR]) } [^hH$smooth$rough] ;
 | ||
| | $1 $smooth ← $notLetter { ($evowel $macron? $evowel2 $macron?) } [^$smooth$rough] ;
 | ||
| | $1 $smooth ← $notLetter { ($evowel $macron?) } [^$evowel2$smooth$rough] ;
 | ||
| # TODO: preserve smooth/rough breathing if not
 | ||
| # on initial vowel sequence
 | ||
| # need to have these up here so the rules don't mask
 | ||
| # remove now superfluous macron when returning
 | ||
| Α ← A $macron ;
 | ||
| α ← a $macron ;
 | ||
| η ↔ e $macron ;
 | ||
| Η ↔ E $macron ;
 | ||
| φ ↔ ph ;
 | ||
| Ψ } $beforeLower ↔ Ps ;
 | ||
| Ψ ↔ PS ;
 | ||
| Φ } $beforeLower ↔ Ph ;
 | ||
| Φ ↔ PH ;
 | ||
| ψ ↔ ps ;
 | ||
| ω ↔ o $macron ;
 | ||
| Ω ↔  O $macron;
 | ||
| # NORMAL
 | ||
| α ↔ a ;
 | ||
| Α ↔ A ;
 | ||
| β ↔ b ;
 | ||
| Β ↔ B ;
 | ||
| γ } $gammaLike ↔ n } $egammaLike ;
 | ||
| γ ↔ g ;
 | ||
| Γ } $gammaLike ↔ N } $egammaLike ;
 | ||
| Γ ↔ G ;
 | ||
| δ ↔ d ;
 | ||
| Δ ↔ D ;
 | ||
| ε ↔ e ;
 | ||
| Ε ↔ E ;
 | ||
| ζ ↔ z ;
 | ||
| Ζ ↔ Z ;
 | ||
| θ ↔ th ;
 | ||
| Θ } $beforeLower ↔ Th ;
 | ||
| Θ ↔ TH ;
 | ||
| ι ↔ i ;
 | ||
| Ι ↔ I ;
 | ||
| κ ↔ k ;
 | ||
| Κ ↔ K ;
 | ||
| λ ↔ l ;
 | ||
| Λ ↔ L ;
 | ||
| μ ↔ m ;
 | ||
| Μ ↔ M ;
 | ||
| ν } $gammaLike → n\' ;
 | ||
| ν ↔ n ;
 | ||
| Ν } $gammaLike ↔ N\' ;
 | ||
| Ν ↔ N ;
 | ||
| ξ ↔ x ;
 | ||
| Ξ ↔ X ;
 | ||
| ο ↔ o ;
 | ||
| Ο ↔ O ;
 | ||
| π ↔ p ;
 | ||
| Π ↔ P ;
 | ||
| ρ $rough ↔ rh;
 | ||
| Ρ $rough } $beforeLower ↔ Rh ;
 | ||
| Ρ $rough ↔ RH ;
 | ||
| ρ ↔ r ;
 | ||
| Ρ ↔ R ;
 | ||
| # insert separator before things that turn into s
 | ||
| [Pp] { } [ςσΣϷϸϺϻ] → \' ;
 | ||
| # special S variants
 | ||
| Ϸ ↔ Š ; # Ϸ GREEK CAPITAL LETTER SHO Uppercase_Letter Grek - L
 | ||
| ϸ ↔ š ; #ϸ GREEK SMALL LETTER SHO Lowercase_Letter Grek - L
 | ||
| Ϻ ↔ Ŝ ; # Ϻ GREEK CAPITAL LETTER SAN Uppercase_Letter Grek - L
 | ||
| ϻ ↔ ŝ ; # ϻ GREEK SMALL LETTER SAN Lowercase_Letter Grek - L
 | ||
| # underbar means exception
 | ||
| # before a letter, initial
 | ||
| ς } $beforeLetter ↔ s $underbar } $beforeLetter;
 | ||
| σ } $beforeLetter ↔ s } $beforeLetter;
 | ||
| # otherwise, after a letter = final
 | ||
| $afterLetter { σ ↔ $afterLetter { s $underbar;
 | ||
| $afterLetter { ς ↔ $afterLetter { s ;
 | ||
| # otherwise (isolated) = initial
 | ||
| ς ↔ s $underbar;
 | ||
| σ ↔ s ;
 | ||
| # [Pp] { Σ ↔ \'S ;
 | ||
| Σ ↔ S ;
 | ||
| τ ↔ t ;
 | ||
| Τ ↔ T ;
 | ||
| $vowel {υ } ↔ u ;
 | ||
| υ ↔ y ;
 | ||
| $vowel { Υ ↔ U ;
 | ||
| Υ ↔ Y ;
 | ||
| χ ↔ ch ;
 | ||
| Χ } $beforeLower ↔ Ch ;
 | ||
| Χ ↔ CH ;
 | ||
| # Completeness for ASCII
 | ||
| $ignore = [[:Mark:]''] * ;
 | ||
| | k  ← c ;
 | ||
| | ph ← f ;
 | ||
| | i  ← j ;
 | ||
| | k ← q ;
 | ||
| | b ← v } $vowel ;
 | ||
| | b ← w } $vowel;
 | ||
| | u ← v ;
 | ||
| | u ← w;
 | ||
| | K ← C ;
 | ||
| | Ph ← F ;
 | ||
| | I ← J ;
 | ||
| | K ← Q ;
 | ||
| | B ← V  } $vowel ;
 | ||
| | B ← W  } $vowel ;
 | ||
| | U ← V ;
 | ||
| | U ← W ;
 | ||
| $rough } $ignore [:UppercaseLetter:] → H ;
 | ||
| $ignore [:UppercaseLetter:] { $rough → H ;
 | ||
| $rough ← H ;
 | ||
| $rough ↔ h ;
 | ||
| # Completeness for Greek
 | ||
| ϐ → | β ;
 | ||
| ϑ → | θ ;
 | ||
| ϒ → | Υ ;
 | ||
| ϕ → | φ ;
 | ||
| ϖ → | π ;
 | ||
| ϰ → | κ ;
 | ||
| ϱ → | ρ ;
 | ||
| ϲ → | σ ;
 | ||
| Ϲ → | Σ; #U+03F9 GREEK CAPITAL LUNATE SIGMA SYMBOL
 | ||
| ϳ → j ;
 | ||
| ϴ → | Θ ;
 | ||
| ϵ → | ε ;
 | ||
| µ → | μ ;
 | ||
| ͺ → i;
 | ||
| # delete any trailing ' marks used for roundtripping
 | ||
| ← [Ππ] { \' } [Ss] ;
 | ||
| ← [Νν] { \' } $egammaLike ;
 | ||
| ::NFC (NFD) ;
 | ||
| # ([\u0000-\u007F [:Latin:] [:Greek:] [:nonspacing mark:]]) ;
 | ||
| # ([\u0000-\u007F · [:Latin:] [:nonspacing mark:]]) ;
 | ||
| # MINIMAL FILTER GENERATED FOR: Latin-Greek BACKWARD
 | ||
| :: ( [':?A-Za-zÀ-ÅÇ-ÏÑ-ÖÙ-Ýà-åç-ïñ-öù-ýÿ-ďĒ-ĥĨ-İĴ-ķĹ-ľŃ-ňŌ-őŔ-ťŨ-žƠ-ơƯ-ưǍ-ǜǞ-ǣǦ-ǰǴ-ǵǸ-țȞ-ȟȦ-ȳ̀-̷̹-ͅ΅-ΆΈ-ΊΌΎ-ΐΪ-ΰϊ-ώϓ-ϔЀ-ЁЃЇЌ-ЎЙйѐ-ёѓїќ-ўѶ-ѷӁ-ӂӐ-ӓӖ-ӗӚ-ӟӢ-ӧӪ-ӵӸ-ӹḀ-ẙẛẠ-ỹἀ-ἕἘ-Ἕἠ-ὅὈ-Ὅὐ-ὗὙὛὝὟ-ώᾀ-ᾴᾶ-ᾼ῁-ῄῆ-ΐῖ-Ί῝-΅ῲ-ῴῶ-ῼK-Å] ) ;
 | ||
| 			]]></tRule>
 | ||
| 		</transform>
 | ||
| 	</transforms>
 | ||
| </supplementalData>
 |