397 lines
		
	
	
		
			10 KiB
		
	
	
	
		
			XML
		
	
	
	
			
		
		
	
	
			397 lines
		
	
	
		
			10 KiB
		
	
	
	
		
			XML
		
	
	
	
| <?xml version="1.0" encoding="UTF-8" ?>
 | ||
| <!DOCTYPE supplementalData SYSTEM "../../common/dtd/ldmlSupplemental.dtd">
 | ||
| <!--
 | ||
| Copyright © 1991-2013 Unicode, Inc.
 | ||
| CLDR data files are interpreted according to the LDML specification (http://unicode.org/reports/tr35/)
 | ||
| For terms of use, see http://www.unicode.org/copyright.html
 | ||
| -->
 | ||
| <supplementalData>
 | ||
| 	<version number="$Revision$"/>
 | ||
| 	<transforms>
 | ||
| 		<transform source="Latn" target="Kana" direction="both" alias="Latin-Katakana und-Kana-t-und-latn" backwardAlias="Katakana-Latin und-Latn-t-und-kana">
 | ||
| 			<tRule>
 | ||
| # note: a global filter is more efficient, but MUST include all source chars
 | ||
| #:: [\u0000-\u007E 、。 ゙-゜ ァ-ー 。-゚ [:Latin:][:Katakana:] [:nonspacing mark:]] ;
 | ||
| # MINIMAL FILTER GENERATED FOR: Latin-Katakana
 | ||
| ### WARNING -- must add width filter, both here and below!!! ###
 | ||
| :: [[ᄀ-ᄒᄚᄡ\u1160-ᅵᆪᆬ-ᆭᆰ-ᆵ←-↓│■○\u3000-。「-」゙-゚ァ-ロワヲ-ヴヷヺ-ー!-~¢-₩][',.A-Za-z~À-ÖØ-öø-ďĒ-ĥĨ-İĴ-ķĹ-ľŃ-ňŌ-őŔ-ťŨ-žƠ-ơƯ-ưǍ-ǜǞ-ǣǦ-ǭǰǴ-ǵǸ-țȞ-ȟȦ-ȳ̄Ӣ-ӣӮ-ӯḀ-ẙẠ-ỹᾱᾹῑῙῡῩK-Å]] ;
 | ||
| :: [:Latin:] fullwidth-halfwidth ();
 | ||
| :: NFD (NFC);
 | ||
| :: Lower ();    # whenever transliterating from cased to uncased script, include this
 | ||
| # :: NFD () ;   # this would catch the odd cases where a lowercase is not in NFD, but none are important for Japanese
 | ||
| # Uses modified Hepburn. Small changes to make  unambiguous.
 | ||
| # | Kunrei-shiki: Hepburn/MHepburn
 | ||
| # | ------------------------------
 | ||
| # | si: shi
 | ||
| # | si ~ya: sha
 | ||
| # | si ~yu: shu
 | ||
| # | si ~yo: sho
 | ||
| # | zi: ji
 | ||
| # | zi ~ya: ja
 | ||
| # | zi ~yu: ju
 | ||
| # | zi ~yo: jo
 | ||
| # | ti: chi
 | ||
| # | ti ~ya: cha
 | ||
| # | ti ~yu: chu
 | ||
| # | ti ~yu: cho
 | ||
| # | tu: tsu
 | ||
| # | di: ji/dji
 | ||
| # | du: zu/dzu
 | ||
| # | hu: fu
 | ||
| # | For foreign words:
 | ||
| # | -----------------
 | ||
| # | se ~i si
 | ||
| # | si ~e she
 | ||
| # |
 | ||
| # | ze ~i zi
 | ||
| # | zi ~e je
 | ||
| # |
 | ||
| # | te ~i ti
 | ||
| # | ti ~e che
 | ||
| # | te ~u tu
 | ||
| # |
 | ||
| # | de ~i di
 | ||
| # | de ~u du
 | ||
| # | de ~i di
 | ||
| # |
 | ||
| # | he ~u: hu
 | ||
| # | hu ~a fa
 | ||
| # | hu ~i fi
 | ||
| # | hu ~e he
 | ||
| # | hu ~o ho
 | ||
| # Most small forms are generated, but if necessary
 | ||
| # explicit small forms are given with ~a, ~ya, etc.
 | ||
| #------------------------------------------------------
 | ||
| # Variables
 | ||
| $vowel = [aeiou] ;
 | ||
| $consonant = [bcdfghjklmnpqrstvwxyz] ;
 | ||
| $macron = ̄ ;
 | ||
| # Variables used for doubled-consonants with tsu
 | ||
| $kana = [ぁ-ゔ] ;
 | ||
| $voice = [゙゛];
 | ||
| $semivoice = [゚゜];
 | ||
| $k_start = [カキクケコかきくけこ] ;
 | ||
| $s_start = [サシスセソさしすせそ] ;
 | ||
| $j_start = [シし] $voice ;
 | ||
| $t_start = [タチツテトたちつてと] ;
 | ||
| $n_start = [ナニヌネノンなにぬねの] ;
 | ||
| $h_start = [ハヒヘホはひへほ] ;
 | ||
| $f_start = [フふ] ;
 | ||
| $m_start = [マミムメモまみむめも] ;
 | ||
| $y_start = [ヤユヨやゆよ] ;
 | ||
| $r_start = [ラリルレロらりるれろ] ;
 | ||
| $w_start = [ワヰヱヲわゐゑを] ;
 | ||
| $v_start = [ワヰヱヲ]゙ ;
 | ||
| $voweled_basekana = [ァ-オカキクケコサシスセソタチッツテトナ-ノハヒフヘホマ-ヲヵヶ] ;
 | ||
| # if ン is followed by $n_quoter, then it needs an
 | ||
| # apostrophe after its romaji form to disambiguate it.
 | ||
| # e.g., ン ア ! =  ナ, so represent as "n'a", not "na".
 | ||
| $n_quoter  =  [ア イ ウ エ オ ナ ニ ヌ ネ ノ ヤ ユ ヨ ン] ;
 | ||
| $small_y = [ャィュェョ] ;
 | ||
| $iteration = ゝ ;
 | ||
| #------------------------------------------------------
 | ||
| # katakana rules
 | ||
| # Punctuation
 | ||
| '.' ↔ 。;
 | ||
| ',' ↔ 、;
 | ||
| # ' ' } [a-z] → ; # delete spaces before latin
 | ||
| # ' ' ← [^' '゠-ヿ] {} ['゠-ヿ] ; #insert spaces before hiragana
 | ||
| # Iteration Mark
 | ||
| # Copy previous letter § marks
 | ||
| # TODO
 | ||
| # | $1 $1 ← ($kana [[:M:]$voice$semivoice]?) $iteration
 | ||
| # Specials for katakana -- not shared with hiragana
 | ||
| va ↔ ヷ ;
 | ||
| vi ↔ ヸ ;
 | ||
| ve ↔ ヹ ;
 | ||
| vo ↔ ヺ ;
 | ||
| '~ka' ↔ ヵ ;
 | ||
| '~ke' ↔ ヶ ;
 | ||
| # ~~~ begin shared rules ~~~
 | ||
| #special
 | ||
| ya ← '~'ャ;
 | ||
| yi ← '~'ィ ;
 | ||
| yu ← '~'ュ;
 | ||
| ye ← '~'ェ;
 | ||
| yo ← '~'ョ;
 | ||
| #normal
 | ||
| a ↔ ア ;
 | ||
| b | '~' ← ヒ ゙} $small_y ;
 | ||
| by } $vowel → ビ | '~y' ;
 | ||
| ba ↔ バ ;
 | ||
| bi ↔ ビ ;
 | ||
| bu ↔ ブ ;
 | ||
| be ↔ ベ ;
 | ||
| bo ↔ ボ ;
 | ||
| c } i → | s ;
 | ||
| c } e → | s ;
 | ||
| da ↔ ダ ;
 | ||
| di ↔ ディ ;
 | ||
| du ↔ デゥ ;
 | ||
| de ↔ デ ;
 | ||
| do ↔ ド ;
 | ||
| dzu ↔ ヅ ;
 | ||
| dja ← ヂャ ;
 | ||
| dji'~i' ← ヂィ ; # liu
 | ||
| dju ← ヂュ ;
 | ||
| dje ← ヂェ ;
 | ||
| djo ← ヂョ ;
 | ||
| dji ↔ ヂ ;
 | ||
| dj  } $vowel → ヂ | '~y' ;
 | ||
| # TODO: QUESTION: use ĵĴżŻ instead of dj, dz
 | ||
| cha ← チャ ;
 | ||
| chi'~i' ← チィ ; # liu
 | ||
| chu ← チュ ;
 | ||
| che ← チェ ;
 | ||
| cho ← チョ ;
 | ||
| chi ↔ チ ;
 | ||
| ch } $vowel → チ | '~y' ;
 | ||
| e ↔ エ ;
 | ||
| g | '~' ← ギ} $small_y ;
 | ||
| gy  } $vowel → ギ | '~y' ;
 | ||
| ga ↔ ガ ;
 | ||
| gi ↔ ギ ;
 | ||
| gu ↔ グ ;
 | ||
| ge ↔ ゲ ;
 | ||
| go ↔ ゴ ;
 | ||
| i ↔ イ ;
 | ||
| # j  } $vowel → ジ | '~y' ;
 | ||
| ja ↔ ジャ ;
 | ||
| ji'~i' ← ジィ ; # liu
 | ||
| ju ↔ ジュ ;
 | ||
| je ↔ ジェ ;
 | ||
| jo ↔ ジョ ;
 | ||
| ji ↔ ジ ;
 | ||
| k | '~' ← キ} $small_y ;
 | ||
| ky  } $vowel → キ | '~y' ;
 | ||
| ka ↔ カ ;
 | ||
| ki ↔ キ ;
 | ||
| ku ↔ ク ;
 | ||
| ke ↔ ケ ;
 | ||
| ko ↔ コ ;
 | ||
| m | '~' ← ミ} $small_y ;
 | ||
| my  } $vowel → ミ | '~y' ;
 | ||
| ma ↔ マ ;
 | ||
| mi ↔ ミ ;
 | ||
| mu ↔ ム ;
 | ||
| me ↔ メ ;
 | ||
| mo ↔ モ ;
 | ||
| m } [pbfv] → ン ;
 | ||
| n | '~' ← ニ } $small_y ;
 | ||
| ny  } $vowel → ニ | '~y' ;
 | ||
| na ↔ ナ ;
 | ||
| ni ↔ ニ ;
 | ||
| nu ↔ ヌ ;
 | ||
| ne ↔ ネ ;
 | ||
| no ↔ ノ ;
 | ||
| o ↔ オ ;
 | ||
| p | '~' ← ピ } $small_y ;
 | ||
| py  } $vowel → ピ | '~y' ;
 | ||
| pa ↔ パ ;
 | ||
| pi ↔ ピ ;
 | ||
| pu ↔ プ ;
 | ||
| pe ↔ ペ ;
 | ||
| po ↔ ポ ;
 | ||
| h | '~' ← ヒ } $small_y ;
 | ||
| hy  } $vowel → ヒ | '~y' ;
 | ||
| ha ↔ ハ ;
 | ||
| hi ↔ ヒ ;
 | ||
| hu ↔ ヘゥ ;
 | ||
| he ↔ ヘ ;
 | ||
| ho ↔ ホ ;
 | ||
| # f | '~' ← フ } $small_y ;
 | ||
| # f } $vowel → フ | '~' ;
 | ||
| fa ↔ ファ ;
 | ||
| fi ↔ フィ ;
 | ||
| fe ↔ フェ ;
 | ||
| fo ↔ フォ ;
 | ||
| fu ↔ フ ;
 | ||
| r | '~' ← リ } $small_y ;
 | ||
| ry  } $vowel → リ | '~y' ;
 | ||
| ra ↔ ラ ;
 | ||
| ri ↔ リ ;
 | ||
| ru ↔ ル ;
 | ||
| re ↔ レ ;
 | ||
| ro ↔ ロ ;
 | ||
| za ↔ ザ ;
 | ||
| zi ↔ ゼィ ;
 | ||
| zu ↔ ズ ;
 | ||
| ze ↔ ゼ ;
 | ||
| zo ↔ ゾ ;
 | ||
| sa ↔ サ ;
 | ||
| si ↔ セィ ;
 | ||
| su ↔ ス ;
 | ||
| se ↔ セ ;
 | ||
| so ↔ ソ ;
 | ||
| sha ← シャ ;
 | ||
| shi'~i' ← シィ ; # liu
 | ||
| shu ← シュ ;
 | ||
| she ← シェ ;
 | ||
| sho ← ショ ;
 | ||
| shi ↔ シ ;
 | ||
| sh } $vowel → シ | '~y' ;
 | ||
| ta ↔ タ ;
 | ||
| ti ↔ ティ ;
 | ||
| tu ↔ テゥ ;
 | ||
| te ↔ テ ;
 | ||
| to ↔ ト ;
 | ||
| tsu ↔ ツ ;
 | ||
| # v  } $vowel → ヴ | '~' ;
 | ||
| #'v~a' ← ヴァ ; # liu
 | ||
| #'v~i' ← ヴィ ; # liu
 | ||
| #'v~e' ← ヴェ ; # liu
 | ||
| #'v~o' ← ヴォ ; # liu
 | ||
| vu ↔ ヴ ;
 | ||
| u ↔ ウ ;
 | ||
| # w  } $vowel → ウ | '~' ;
 | ||
| wa ↔ ワ ;
 | ||
| wi ↔ ヰ ;
 | ||
| wu → ウ ;
 | ||
| we ↔ ヱ ;
 | ||
| wo ↔ ヲ ;
 | ||
| ya ↔ ヤ ;
 | ||
| yi → イ ;
 | ||
| yu ↔ ユ ;
 | ||
| ye → エ ;
 | ||
| yo ↔ ヨ ;
 | ||
| # double consonants
 | ||
| #specials
 | ||
| s } sh → ッ ;
 | ||
| t } ch → ッ ;
 | ||
| #voiced
 | ||
| j } j ↔ ッ } $j_start ;
 | ||
| b } b ↔ ッ } [$h_start$f_start] $voice;
 | ||
| d } d ↔ ッ } $t_start $voice;
 | ||
| g } g ↔ ッ } $k_start $voice;
 | ||
| p } p ↔ ッ } [$h_start$f_start] $semivoice;
 | ||
| # v } v ↔ ッ } [ワヰウヱヲう]  $voice ;
 | ||
| z } z ↔ ッ } $s_start $voice;
 | ||
| v } v ↔ ッ } $v_start;
 | ||
| # normal
 | ||
| k } k ↔ ッ } $k_start ;
 | ||
| m } m ↔ ッ } $m_start ;
 | ||
| n } n ↔ ッ } $n_start ;
 | ||
| h } h ↔ ッ } $h_start ;
 | ||
| f } f ↔ ッ } $f_start ;
 | ||
| r } r ↔ ッ } $r_start ;
 | ||
| t } t ↔ ッ } $t_start ;
 | ||
| s } s ↔ ッ } $s_start ;
 | ||
| w } w  ↔ ッ } $w_start;
 | ||
| y } y ↔ ッ } $y_start;
 | ||
| # completeness
 | ||
| x } x → ッ ;
 | ||
| c } k → ッ ;
 | ||
| c } c → ッ ;
 | ||
| c } q → ッ ;
 | ||
| l } l → ッ ;
 | ||
| q } q → ッ ;
 | ||
| # y } y → ッ ;
 | ||
| # w } w → ッ ;
 | ||
| # prolonged vowel mark. this indicates a doubling of
 | ||
| # the preceding vowel sound
 | ||
| #a ← a { ー ; # liu
 | ||
| #e ← e { ー ; # liu
 | ||
| #i ← i { ー ; # liu
 | ||
| #o ← o { ー ; # liu
 | ||
| #u ← u { ー ; # liu
 | ||
| $macron ↔ ー ;
 | ||
| # small forms
 | ||
| '~a' ↔ ァ ;
 | ||
| '~i' ↔ ィ ;
 | ||
| '~u' ↔ ゥ ;
 | ||
| '~e' ↔ ェ ;
 | ||
| '~o' ↔ ォ ;
 | ||
| '~tsu' ↔ ッ ;
 | ||
| '~wa' ↔ ヮ ;
 | ||
| '~ya' ↔ ャ ;
 | ||
| '~yi' → ィ ;
 | ||
| '~yu' ↔ ュ ;
 | ||
| '~ye' → ェ ;
 | ||
| '~yo' ↔ ョ ;
 | ||
| # iteration marks
 | ||
| # TODO: make more accurate
 | ||
| j $1 ← sh (y* $vowel) {ヽ$voice ;
 | ||
| dj $1 ← ch (y* $vowel) {ヽ$voice ;
 | ||
| dz $1 ← ts (y* $vowel) {ヽ$voice ;
 | ||
| g $1 ← k (y* $vowel) {ヽ$voice ;
 | ||
| z $1 ← s (y* $vowel) {ヽ$voice ;
 | ||
| d $1 ← t (y* $vowel) {ヽ$voice ;
 | ||
| h $1 ← b (y* $vowel) {ヽ$voice ;
 | ||
| v $1 ← w (y* $vowel) {ヽ$voice ;
 | ||
| sh $1 ← sh (y* $vowel) {ヽ$voice ;
 | ||
| j $1 ← j (y* $vowel) {ヽ$voice ;
 | ||
| ch $1 ← ch (y* $vowel) {ヽ$voice ;
 | ||
| dj $1 ← dj(y* $vowel) {ヽ$voice ;
 | ||
| ts $1 ← ts (y* $vowel) {ヽ$voice ;
 | ||
| dz $1 ← dz (y* $vowel) {ヽ$voice ;
 | ||
| $1 ← ($consonant y* $vowel) {ヽ$voice? ;
 | ||
| $1 ← (.) {ヽ $voice? ; # otherwise repeat last character
 | ||
| ← ヽ $voice? ; # delete if no characters found
 | ||
| # h- rule: lengthens vowel if not followed by a vowel.
 | ||
| # At the point this is applied, latin [cons]?vowel sequences
 | ||
| # have been converted to katakana in NFD form.
 | ||
| $voweled_basekana [\u3099 \u309A]? { h → ー ;
 | ||
| # one-way latin- → kana rules. these do not occur in
 | ||
| # well-formed romaji representing actual japanese text.
 | ||
| # their purpose is to make all romaji map to kana of
 | ||
| # some sort.
 | ||
| # the following are not really necessary, but produce
 | ||
| # slightly more natural results.
 | ||
| cy → セィ ;
 | ||
| dy → ディ ;
 | ||
| hy → ヒ ;
 | ||
| sy → セィ ;
 | ||
| ty → ティ ;
 | ||
| zy → ゼィ ;
 | ||
| h → ヘ ;
 | ||
| # isolated consonants listed here so as not to mask
 | ||
| # longer rules above.
 | ||
| ch → チ;
 | ||
| sh → シ ;
 | ||
| dz → ヅ ;
 | ||
| dj → ヂ;
 | ||
| b → ブ ;
 | ||
| d → デ ;
 | ||
| g → グ ;
 | ||
| k → ク ;
 | ||
| m → ム ;
 | ||
| n'' ← ン } $n_quoter ;
 | ||
| n ↔ ン ;
 | ||
| p → プ ;
 | ||
| r → ル ;
 | ||
| s → ス ;
 | ||
| t → テ ;
 | ||
| y → イ ;
 | ||
| z → ズ ;
 | ||
| v → ヴ ;
 | ||
| f → フ;
 | ||
| j  → ジ;
 | ||
| w → ウ;
 | ||
| ß → | ss ;
 | ||
| æ → | e ;
 | ||
| ð → | d ;
 | ||
| ø → | u ;
 | ||
| þ → | th ;
 | ||
| # simple substitutions using backup
 | ||
| c → | k ;
 | ||
| l → | r ;
 | ||
| q → | k ;
 | ||
| x → | ks ;
 | ||
| # ~~~ END shared rules ~~~
 | ||
| #------------------------------------------------------
 | ||
| # Final cleanup
 | ||
| '~' → ; # delete stray tildes between letters
 | ||
| [:Katakana:] { '' } [:Latin:] → ; # delete stray quotes between letters
 | ||
| # [ʾ[:Nonspacing Mark:]-[゙-゜]] → ; # delete any non-spacing marks that we didn't use
 | ||
| :: NFC (NFD) ;
 | ||
| :: ([[:Katakana:][\u309B\u309C\u30A0\u30FC\uFF70\uFF9E\uFF9F]] halfwidth-fullwidth);
 | ||
| # note: a global filter is more efficient, but MUST include all source chars!!
 | ||
| #:: ([\u0000-\u007E 、。 ゙-゜ ァ-ー 。-゚ [:Latin:][:Katakana:] [:nonspacing mark:]]);
 | ||
| # MINIMAL FILTER GENERATED FOR: Latin-Katakana BACKWARD
 | ||
| :: ( [[\ -~¢-£¥-¦¬̄₩。-하-ᅦᅧ-ᅬᅭ-ᅲᅳ-ᅵ│-○][~、-。がぎぐげござじずぜぞだぢづでどば-ぱび-ぴぶ-ぷべ-ぺぼ-ぽゔ゙-゛ゞァ-ヺー-ヾ][\u309B\u309C\u30A0\u30FC\uFF70\uFF9E\uFF9F]] ) ;
 | ||
| # eof
 | ||
| 			</tRule>
 | ||
| 		</transform>
 | ||
| 	</transforms>
 | ||
| </supplementalData>
 |