// -*- Coding: utf-8; -*- //-------------------------------------------------------------------- // Copyright (c) 1999-2004, International Business Machines // Corporation and others. All Rights Reserved. //-------------------------------------------------------------------- // THIS IS A MACHINE-GENERATED FILE // Tool: dumpICUrules.bat // Source: ../../../impl/data/Transliterator_Latin_InterIndic.txt // Date: Tue May 18 17:24:49 2004 //-------------------------------------------------------------------- // Latin_InterIndic t_Latn_InterIndic { Rule { //-------------------------------------------------------------------- //-------------------------------------------------------------------- //-------------------------------------------------------------------- // Latin-InterIndic //:: NFD; //\u0e00 reserved //consonants "$chandrabindu=\ue001;" "$anusvara=\ue002;" "$visarga=\ue003;" //\u0e004 reserved // w represents the stand-alone form "$wa=\ue005;" "$waa=\ue006;" "$wi=\ue007;" "$wii=\ue008;" "$wu=\ue009;" "$wuu=\ue00a;" "$wr=\ue00b;" "$wl=\ue00c;" "$wce=\ue00d;" // LETTER CANDRA E "$wse=\ue00e;" // LETTER SHORT E "$we=\ue00f;" // \u090f LETTER E "$wai=\ue010;" "$wco=\ue011;" // LETTER CANDRA O "$wso=\ue012;" // LETTER SHORT O "$wo=\ue013;" // \u0913 LETTER O "$wau=\ue014;" "$ka=\ue015;" "$kha=\ue016;" "$ga=\ue017;" "$gha=\ue018;" "$nga=\ue019;" "$ca=\ue01a;" "$cha=\ue01b;" "$ja=\ue01c;" "$jha=\ue01d;" "$nya=\ue01e;" "$tta=\ue01f;" "$ttha=\ue020;" "$dda=\ue021;" "$ddha=\ue022;" "$nna=\ue023;" "$ta=\ue024;" "$tha=\ue025;" "$da=\ue026;" "$dha=\ue027;" "$na=\ue028;" "$ena=\ue029;" //compatibility "$pa=\ue02a;" "$pha=\ue02b;" "$ba=\ue02c;" "$bha=\ue02d;" "$ma=\ue02e;" "$ya=\ue02f;" "$ra=\ue030;" "$rra=\ue031;" "$la=\ue032;" "$lla=\ue033;" "$ela=\ue034;" //compatibility "$va=\ue035;" "$vva=\ue081;" "$sha=\ue036;" "$ssa=\ue037;" "$sa=\ue038;" "$ha=\ue039;" //\u093a Reserved //\u093b Reserved "$nukta=\ue03c;" "$avagraha=\ue03d;" // SIGN AVAGRAHA // represents the dependent form "$aa=\ue03e;" "$i=\ue03f;" "$ii=\ue040;" "$u=\ue041;" "$uu=\ue042;" "$rh=\ue043;" "$lh=\ue044;" "$ce=\ue045;" //VOWEL SIGN CANDRA E "$se=\ue046;" //VOWEL SIGN SHORT E "$e=\ue047;" "$ai=\ue048;" "$co=\ue049;" // VOWEL SIGN CANDRA O "$so=\ue04a;" // VOWEL SIGN SHORT O "$o=\ue04b;" // \u094b "$au=\ue04c;" "$virama=\ue04d;" // \u094e Reserved // \u094f Reserved "$om = \ue050;" // OM // \u0951>; # UNMAPPED STRESS SIGN UDATTA // \u0952>; # UNMAPPED STRESS SIGN ANUDATTA // \u0953>; # UNMAPPED GRAVE ACCENT // \u0954>; # UNMAPPED ACUTE ACCENT "$lm = \ue055;"// Telugu Length Mark "$ailm=\ue056;"// AI Length Mark "$aulm=\ue057;"// AU Length Mark //urdu compatibity forms "$uka=\ue058;" "$ukha=\ue059;" "$ugha=\ue05a;" "$ujha=\ue05b;" "$uddha=\ue05c;" "$udha=\ue05d;" "$ufa=\ue05e;" "$uya=\ue05f;" "$wrr=\ue060;" "$wll=\ue061;" "$rrh=\ue062;" "$llh=\ue063;" "$danda=\ue064;" "$doubleDanda=\ue065;" "$zero=\ue066;" // DIGIT ZERO "$one=\ue067;" // DIGIT ONE "$two=\ue068;" // DIGIT TWO "$three=\ue069;" // DIGIT THREE "$four=\ue06a;" // DIGIT FOUR "$five=\ue06b;" // DIGIT FIVE "$six=\ue06c;" // DIGIT SIX "$seven=\ue06d;" // DIGIT SEVEN "$eight=\ue06e;" // DIGIT EIGHT "$nine=\ue06f;" // DIGIT NINE // For all other scripts "$ecp0=\ue070;" "$ecp1=\ue071;" "$ecp2=\ue072;" "$ecp3=\ue073;" "$ecp4=\ue074;" "$ecp5=\ue075;" "$ecp6=\ue076;" "$ecp7=\ue077;" "$ecp8=\ue078;" "$ecp9=\ue079;" "$ecpA=\ue07a;" "$ecpB=\ue07b;" "$ecpC=\ue07c;" "$ecpD=\ue07d;" "$ecpE=\ue07e;" "$ecpF=\ue07f;" // \u0970>; # UNMAPPED ABBREVIATION SIGN "$depVowelAbove=[\ue03e-\ue040\ue045-\ue04c];" "$depVowelBelow=[\ue041-\ue044];" "$endThing=[$danda$doubleDanda];" // $x was originally called '&'; $z was '%' "$x=[$virama$aa$ai$au$ii$i$uu$u$rrh$rh$lh$e$o$se$ce$so$co];" "$z=[bcdfghjklmnpqrstvwxyz];" "$consonants=[[$ka-$ha]$z[\u0915-\u0939][\u0995-\u09b9][\u0a15-\u0a39][\u0a95-\u0ab9][\u0b15-\u0b39][\u0b95-\u0bb9][\u0c15-\u0c39][\u0c95-\u0cb9][\u0d15-\u0d39]];" "\u0315 > $avagraha;" "\u0303>$chandrabindu$anusvara;" "m\u0310>$chandrabindu;" "h\u0323>$visarga;" "x>$ka$virama$sa;" // convert to independent forms at start of word or syllable: // dependent forms for roundtrip "\u0314a\u0304>$aa;" "\u0314ai>$ai;" "\u0314au>$au;" "\u0314ii>$ii;" "\u0314i\u0304>$ii;" "\u0314i>$i;" "\u0314u\u0304>$uu;" "\u0314u>$u;" "\u0314r\u0325\u0304>$rrh;" "\u0314r\u0325>$rh;" "\u0314l\u0325\u0304>$llh;" "\u0314lh>$lh;" "\u0314l\u0325>$lh;" "\u0314e\u0304>$e;" "\u0314o\u0304>$o;" "\u0314a>;" "\u0314e\u0306>$ce;" "\u0314o\u0306>$co;" "\u0314e>$se;" "\u0314o>$so;" // preceeded by consonants "$consonants{ a\u0304>$aa;" "$consonants{ ai>$ai;" "$consonants{ au>$au;" "$consonants{ ii>$ii;" "$consonants{ i\u0304>$ii;" "$consonants{ i>$i;" "$consonants{ u\u0304>$uu;" "$consonants{ u>$u;" "$consonants{ r\u0325\u0304>$rrh;" "$consonants{ r\u0325a>$rh;" "$consonants{ r\u0325>$rh;" "$consonants{ l\u0325\u0304>$llh;" "$consonants{ lh>$lh;" "$consonants{ l\u0325>$lh;" "$consonants{ e\u0304>$e;" "$consonants{ o\u0304>$o;" "$consonants{ e\u0306>$ce;" "$consonants{ o\u0306>$co;" "$consonants{ e>$se;" "$consonants{ o>$so;" // e.g. keai -> {ka}{e}{wai}; k'ai -> {ka}{wai}; (ai) -> ({wai}) "a\u0304>$waa;" "ai>$wai;" "au>$wau;" "i\u0304>$wii;" "i>$wi;" "u\u0304>$wuu;" "u>$wu;" "r\u0325\u0304>$wrr;" "r\u0325>$wr;" "l\u0325\u0304>$wll;" "lh>$wl;" "l\u0325>$wl;" "e\u0304>$we;" "o\u0304>$wo;" "a>$wa;" "e\u0306>$wce;" "o\u0306>$wco;" "e>$wse;" "''om>$om;" "o>$wso;" // rules for anusvara "n}r\u0325 > $na|$virama;" "n}l\u0325 > $na|$virama;" "n}na > $na|$virama;" "n\u0307}[kg] > $anusvara;" "n\u0307}n\u0307 > $anusvara;" "n\u0304}[cj] > $anusvara;" "n\u0304}n\u0303 > $anusvara;" "n\u0323}[tdn]\u0323 > $anusvara;" "n}[tdn] > $anusvara;" "m}[pbm] > $anusvara;" "n}[ylvshr] > $anusvara;" "m\u0307 > $anusvara;" //urdu compatibility "q>$uka|$virama;" "k\u0331h\u0331>$ukha |$virama;" "g\u0307> $ugha | $virama;" "z > $ujha |$virama;" "f > $ufa|$virama;" // dev "y\u0307>$uya|$virama;" "l\u0331>$ela|$virama;" "n\u0331>$ena|$virama;" "n\u0307>$nga|$virama;" "n\u0303>$nya|$virama;" "n\u0323>$nna|$virama;" "t\u0323h>$ttha|$virama;" "t\u0323>$tta|$virama;" "r\u0323h>$udha|$virama;" "r\u0323>$uddha|$virama;" "d\u0323h>$ddha|$virama;" "d\u0323>$dda|$virama;" "kh>$kha|$virama;" "k>$ka|$virama;" "gh>$gha|$virama;" "g>$ga|$virama;" "ch>$cha|$virama;" "c>$ca|$virama;" "jh>$jha|$virama;" "j>$ja|$virama;" "ny>$nya|$virama;" "tth>$ttha|$virama;" "ddh>$ddha|$virama;" "th>$tha|$virama;" "t>$ta|$virama;" "dh>$dha|$virama;" "d>$da|$virama;" "n>$na|$virama;" "ph>$pha|$virama;" "p>$pa|$virama;" "bh>$bha|$virama;" "b>$ba|$virama;" "m>$ma|$virama;" "y>$ya|$virama;" "r\u0331>$rra|$virama;" "r>$ra|$virama;" "l\u0323>$lla|$virama;" "l>$la|$virama;" "v>$va|$virama;" "w\u0307>$vva|$virama;" "w>$va|$virama;" "sh>$sha|$virama;" "ss>$ssa|$virama;" "s\u0323>$ssa|$virama;" "s\u0301>$sha|$virama;" "s>$sa|$virama;" "h>$ha|$virama;" "'.'>$danda;" "$danda'.'>$doubleDanda;" "$depVowelAbove{'~'>$anusvara;" "$depVowelBelow{'~'>$chandrabindu;" // convert to dependent forms after consonant with no vowel: // e.g. kai -> {ka}{virama}ai -> {ka}{ai} //$virama aa>$aa; "$virama a\u0304>$aa;" "$virama ai>$ai;" "$virama au>$au;" "$virama ii>$ii;" "$virama i\u0304>$ii;" "$virama i>$i;" //$virama uu>$uu; "$virama u\u0304>$uu;" "$virama u>$u;" //$virama rrh>$rrh; "$virama r\u0325\u0304>$rrh;" //$virama rh>$rh; "$virama r\u0325a>$rh;" "$virama r\u0325>$rh;" "$virama l\u0325\u0304>$llh;" "$virama lh>$lh;" "$virama l\u0325>$lh;" "$virama e\u0304>$e;" "$virama o\u0304>$o;" "$virama a>;" "$virama e\u0306>$ce;" "$virama o\u0306>$co;" "$virama e>$se;" "$virama o>$so;" // otherwise convert independent forms when separated by ': k'ai -> {ka}{virama}{wai} //$virama''aa>$waa; "$virama''a\u0304>$waa;" "$virama''ai>$wai;" "$virama''au>$wau;" //$virama''ii>$wii; "$virama''i\u0304>$wii;" "$virama''i>$wi;" //$virama''uu>$wuu; "$virama''u\u0304>$wuu;" "$virama''u>$wu;" //$virama''rrh>$wrr; "$virama''r\u0325\u0304>$wrr;" //$virama''rh>$wr; "$virama''r\u0325>$wr;" "$virama''l\u0325\u0304>$wll;" //$virama''lh>$wl; "$virama''l\u0325>$wl;" "$virama''e\u0304>$we;" "$virama''o\u0304>$wo;" "$virama''a>$wa;" "$virama''e\u0306>$wce;" "$virama''o\u0306>$wco;" "$virama''e>$wse;" "$virama''o>$wso;" // no virama "''a\u0304>$waa;" "''ai>$wai;" "''au>$wau;" "''i\u0304>$wii;" "''i>$wi;" "''u\u0304>$wuu;" "''u>$wu;" "''r\u0325\u0304>$wrr;" "''r\u0325>$wr;" "''l\u0325\u0304>$wll;" "''l\u0325>$wl;" "''e\u0304>$we;" "''o\u0304>$wo;" "''a>$wa;" "''e\u0306>$wce;" "''o\u0306>$wco;" "''e>$wse;" "''o>$wso;" "$virama } [$z] > $virama;" "$virama } ' ' > $virama ;" "$virama}$endThing>;" "0>$zero;" "1>$one;" "2>$two;" "3>$three;" "4>$four;" "5>$five;" "6>$six;" "7>$seven;" "8>$eight;" "9>$nine;" "''>;" //:: NFC (NFD) ; } }