194 lines
		
	
	
		
			4.8 KiB
		
	
	
	
		
			XML
		
	
	
	
			
		
		
	
	
			194 lines
		
	
	
		
			4.8 KiB
		
	
	
	
		
			XML
		
	
	
	
| <?xml version="1.0" encoding="UTF-8" ?>
 | ||
| <!DOCTYPE supplementalData SYSTEM "../../common/dtd/ldmlSupplemental.dtd">
 | ||
| <!--
 | ||
| Copyright © 1991-2013 Unicode, Inc.
 | ||
| CLDR data files are interpreted according to the LDML specification (http://unicode.org/reports/tr35/)
 | ||
| For terms of use, see http://www.unicode.org/copyright.html
 | ||
| -->
 | ||
| <supplementalData>
 | ||
| 	<version number="$Revision$"/>
 | ||
| 	<transforms>
 | ||
| 		<transform source="Hira" target="Kana" direction="both" alias="Hiragana-Katakana und-Kana-t-und-hira" backwardAlias="Katakana-Hiragana und-Hira-t-und-kana">
 | ||
| 			<tRule>
 | ||
| # note: a global filter is more efficient, but MUST include all source chars
 | ||
| :: [[\u0000-\u007E 、。 ゙-゜ ァ-ー 。-゚ー[:Hiragana:] [:Katakana:] [:nonspacing mark:]]-[\u309B \u309C]];
 | ||
| :: NFKC (NFC);
 | ||
| # Hiragana-Katakana
 | ||
| # This is largely a one-to-one mapping, but it has a
 | ||
| # few kinks:
 | ||
| # 1. The Katakana va/vi/ve/vo (30F7-30FA) have no
 | ||
| # Hiragana equivalents.  We use Hiragana wa/wi/we/wo
 | ||
| # (308F-3092) with a voicing mark (3099), which is
 | ||
| # semantically equivalent.  However, this is a non-
 | ||
| # roundtripping transformation.
 | ||
| # 2. The Katakana small ka/ke (30F5,30F6) have no
 | ||
| # Hiragana equiavlents.  We convert them to normal
 | ||
| # Hiragana ka/ke (304B,3051).  This is a one-way
 | ||
| # information-losing transformation and precludes
 | ||
| # round-tripping of 30F5 and 30F6.
 | ||
| # 3. The combining marks 3099-309C are in the Hiragana
 | ||
| # block, but they apply to Katakana as well, so we
 | ||
| # leave them untouched.
 | ||
| # 4. The Katakana prolonged sound mark 30FC doubles the
 | ||
| # preceding vowel.  This is a one-way information-
 | ||
| # losing transformation from Katakana to Hiragana.
 | ||
| # 5. The Katakana middle dot separates words in foreign
 | ||
| # expressions; we leave this unmodified.
 | ||
| # The above points preclude successful round-trip
 | ||
| # transformations of arbitrary input text.  However,
 | ||
| # they provide naturalistic results that should conform
 | ||
| # to user expectations.
 | ||
| # Combining equivalents va/vi/ve/vo
 | ||
| わ゙ ↔ ヷ;
 | ||
| ゐ゙ ↔ ヸ;
 | ||
| ゑ゙ ↔ ヹ;
 | ||
| を゙ ↔ ヺ;
 | ||
| # One-to-one mappings, main block
 | ||
| # 3041:3094 ↔ 30A1:30F4
 | ||
| # 309D,E ↔ 30FD,E
 | ||
| ぁ ↔ ァ;
 | ||
| あ ↔ ア;
 | ||
| ぃ ↔ ィ;
 | ||
| い ↔ イ;
 | ||
| ぅ ↔ ゥ;
 | ||
| う ↔ ウ;
 | ||
| ぇ ↔ ェ;
 | ||
| え ↔ エ;
 | ||
| ぉ ↔ ォ;
 | ||
| お ↔ オ;
 | ||
| か ↔ カ;
 | ||
| が ↔ ガ;
 | ||
| き ↔ キ;
 | ||
| ぎ ↔ ギ;
 | ||
| く ↔ ク;
 | ||
| ぐ ↔ グ;
 | ||
| け ↔ ケ;
 | ||
| げ ↔ ゲ;
 | ||
| こ ↔ コ;
 | ||
| ご ↔ ゴ;
 | ||
| さ ↔ サ;
 | ||
| ざ ↔ ザ;
 | ||
| し ↔ シ;
 | ||
| じ ↔ ジ;
 | ||
| す ↔ ス;
 | ||
| ず ↔ ズ;
 | ||
| せ ↔ セ;
 | ||
| ぜ ↔ ゼ;
 | ||
| そ ↔ ソ;
 | ||
| ぞ ↔ ゾ;
 | ||
| た ↔ タ;
 | ||
| だ ↔ ダ;
 | ||
| ち ↔ チ;
 | ||
| ぢ ↔ ヂ;
 | ||
| っ ↔ ッ;
 | ||
| つ ↔ ツ;
 | ||
| づ ↔ ヅ;
 | ||
| て ↔ テ;
 | ||
| で ↔ デ;
 | ||
| と ↔ ト;
 | ||
| ど ↔ ド;
 | ||
| な ↔ ナ;
 | ||
| に ↔ ニ;
 | ||
| ぬ ↔ ヌ;
 | ||
| ね ↔ ネ;
 | ||
| の ↔ ノ;
 | ||
| は ↔ ハ;
 | ||
| ば ↔ バ;
 | ||
| ぱ ↔ パ;
 | ||
| ひ ↔ ヒ;
 | ||
| び ↔ ビ;
 | ||
| ぴ ↔ ピ;
 | ||
| ふ ↔ フ;
 | ||
| ぶ ↔ ブ;
 | ||
| ぷ ↔ プ;
 | ||
| へ ↔ ヘ;
 | ||
| べ ↔ ベ;
 | ||
| ぺ ↔ ペ;
 | ||
| ほ ↔ ホ;
 | ||
| ぼ ↔ ボ;
 | ||
| ぽ ↔ ポ;
 | ||
| ま ↔ マ;
 | ||
| み ↔ ミ;
 | ||
| む ↔ ム;
 | ||
| め ↔ メ;
 | ||
| も ↔ モ;
 | ||
| ゃ ↔ ャ;
 | ||
| や ↔ ヤ;
 | ||
| ゅ ↔ ュ;
 | ||
| ゆ ↔ ユ;
 | ||
| ょ ↔ ョ;
 | ||
| よ ↔ ヨ;
 | ||
| ら ↔ ラ;
 | ||
| り ↔ リ;
 | ||
| る ↔ ル;
 | ||
| れ ↔ レ;
 | ||
| ろ ↔ ロ;
 | ||
| ゎ ↔ ヮ;
 | ||
| わ ↔ ワ;
 | ||
| ゐ ↔ ヰ;
 | ||
| ゑ ↔ ヱ;
 | ||
| を ↔ ヲ;
 | ||
| ん ↔ ン;
 | ||
| ゔ ↔ ヴ;
 | ||
| ゝ ↔ ヽ;
 | ||
| ゞ ↔ ヾ;
 | ||
| # One-way Katakana-Hiragana xform of small K ka/ke to
 | ||
| # normal H ka/ke.
 | ||
| か ← ヵ;
 | ||
| け ← ヶ;
 | ||
| # Katakana followed by a prolonged sound mark 30FC has
 | ||
| # its final vowel doubled.  This is a Katakana-Hiragana
 | ||
| # one-way information-losing transformation.  We
 | ||
| # include the small Katakana (e.g., small A 3041) and
 | ||
| # do not distinguish them from their large
 | ||
| # counterparts.  It doesn't make sense to double a
 | ||
| # small counterpart vowel as a small Hiragana vowel, so
 | ||
| # we don't do so.  In natural text this should never
 | ||
| # occur anyway.  If a 30FC is seen without a preceding
 | ||
| # vowel sound (e.g., after n 30F3) we do not change it.
 | ||
| ### $long = ー;
 | ||
| # The following categories are Hiragana, not Katakana
 | ||
| # as might be expected, since by the time we get to the
 | ||
| # 30FC, the preceding character will have already been
 | ||
| # transformed to Hiragana.
 | ||
| # {The following mechanically generated from the
 | ||
| # Unicode 3.0 data:}
 | ||
| $xa = [ \
 | ||
| ぁ あ か が さ ざ \
 | ||
| た だ な は ば ぱ \
 | ||
| ま ゃ や ら ゎ わ \
 | ||
| ];
 | ||
| $xi = [ \
 | ||
| ぃ い き ぎ し じ \
 | ||
| ち ぢ に ひ び ぴ \
 | ||
| み り ゐ \
 | ||
| ];
 | ||
| $xu = [ \
 | ||
| ぅ う く ぐ す ず \
 | ||
| っ つ づ ぬ ふ ぶ \
 | ||
| ぷ む ゅ ゆ る ゔ \
 | ||
| ];
 | ||
| $xe = [ \
 | ||
| ぇ え け げ せ ぜ \
 | ||
| て で ね へ べ ぺ \
 | ||
| め れ ゑ \
 | ||
| ];
 | ||
| $xo = [ \
 | ||
| ぉ お こ ご そ ぞ \
 | ||
| と ど の ほ ぼ ぽ \
 | ||
| も ょ よ ろ を \
 | ||
| ];
 | ||
| あ ← $xa {ー};
 | ||
| い ← $xi {ー};
 | ||
| う ← $xu {ー};
 | ||
| え ← $xe {ー};
 | ||
| お ← $xo {ー};
 | ||
| :: NFC (NFKC) ;
 | ||
| # note: a global filter is more efficient, but MUST include all source chars!!
 | ||
| :: ([[\u0000-\u007E 、。 ゙-゜ ァ-ー 。-゚ー[:Hiragana:] [:Katakana:] [:nonspacing mark:]]-[\u309B \u309C]]);
 | ||
| # eof
 | ||
| 			</tRule>
 | ||
| 		</transform>
 | ||
| 	</transforms>
 | ||
| </supplementalData>
 |