#!/usr/bin/perl ################################################################## # Copyright (C) 1998-2001 Stefan Mashkevich # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License. # Disclaimer: The program is provided "as is" and there is no # warranty of any kind whatsoever. Your using it is at your own risk. ################################################################## # converting transliterated text into KOI-8 # # v0.8 S.Mashkevich 13-Dec-1998 # v0.9 S.Mashkevich 19-Jan-1999 # revision 5 20-Nov-1999 # revision 7 17-Feb-2000 # revision 8 03-May-2000 # v1.0 S.Mashkevich 24-Feb-2001 # different symbols for switching translit on/off allowed # text on standard input, recoded on standard output # ################################################################## $firstarg = shift; if (defined $firstarg && $firstarg =~ /-s(.)(.)/) { $transoff = $1; $transon = $2; } $TRANSOFF = $transoff || '|'; $TRANSON = $transon || '|'; if ($TRANSON eq $TRANSOFF) { $TRANS = $TRANSON # otherwise $TRANS will be undefined } # extra symbols that count as letters (that figure in translit patterns) $extrasyms = "\'\`\^\~"; # read in translit patterns $maxlen = 0; $translitflag = 1; while () { next if /^#/; @chars = /([\w$extrasyms:]+)\s+([\200-\377]+)/g; for ($i = 0; $i < @chars; $i += 2) { $orig = $chars[$i]; $orig =~ s/\"//g; $len = length $orig; $maxlen = $len if $len > $maxlen; $repl = $chars[$i+1]; $repl =~ s/\"//g; $trans[$len]{$orig} = $repl; } }; #for ($len = $maxlen; $len > 0; $len--) { # print "$len :\n"; # for $orig (keys %{$trans[$len]}) {print "$orig->$trans[$len]{$orig}\n"} #}; ################################################################## # convert undef $/; $in = ; $in = ':'.$in; $inlength = length $in; $offset = 0; $out = ""; LOOP: while ($offset < $inlength) { # translit on/off switch? $onesymbol = substr($in, $offset, 1); if (defined $TRANS) { ($onesymbol eq $TRANS) && do { $translitflag = 1 - $translitflag; $offset++; next LOOP }; } else { ($onesymbol eq $TRANSOFF) && do { $translitflag = 0; $offset++; next LOOP }; ($onesymbol eq $TRANSON ) && do { $translitflag = 1; $offset++; next LOOP }; } # transliteration itself if ($translitflag) { for ($len = $maxlen; $len > 0; $len--) { $pattern = $patwopunct = substr($in, $offset, $len); if ($patwopunct =~ s/[^\w$extrasyms]/:/g) { # there are non-letters if (defined ($repl = $trans[$len]{$patwopunct})) { $pattorep = $translit = $pattern; $pattorep =~ s/[^\w$extrasyms]//g; # delete non-letters $translit =~ s/[^\w$extrasyms]$// && $len--; # if last is non-letter, # we may need it for next transliteration $translit =~ s/$pattorep/$repl/; $out .= $translit; $offset += $len; next LOOP } } else { if (defined ($translit = $trans[$len]{$pattern})) { $out .= $translit; $offset += $len; next LOOP } } } } # If we did not find pattern or translitflag was off $out .= $onesymbol; $offset++; } ################################################################## # output $out = substr($out, 1); print $out; ################################################################## __END__ # # lat_chars cyr_chars, any number of pairs in line # colon means noncharacter (space, comma etc) a Á b  c à d Ä e Å f Æ g Ç h È i É j Ê k Ë l Ì m Í n Î o Ï p Ð q Ñ r Ò s Ó t Ô u Õ v × w × x È y Ù z Ú A á B â C ã D ä E å F æ G ç H è I é J ê K ë L ì M í N î O ï P ð Q ñ R ò S ó T ô U õ V ÷ W ÷ X è Y ù Z ú `e Ü `E ü ^e Ü ^E ü :ye Å je Å :YE å JE å :Ye å Je å yo £ jo £ YO ³ JO ³ Yo ³ Jo ³ yu À ju À YU à JU à Yu à Ju à ya Ñ ja Ñ YA ñ JA ñ Ya ñ Ja ñ ay ÁÊ ey ÅÊ iy ÉÊ oy ÏÊ uy ÕÊ yy ÙÊ AY áê EY åê IY éê OY ïê UY õê YY ùê Ay áÊ Ey åÊ Iy éÊ Oy ïÊ Uy õÊ Yy ùÊ yey ÅÊ YEY åê Yey åÊ aya ÁÑ aye ÁÅ ayo Á£ ayu ÁÀ AYA áñ AYE áå AYO á³ AYU áà Aya áÑ Aye áÅ Ayo ᣠAyu áÀ eya ÅÑ eye ÅÅ eyo Å£ eyu ÅÀ EYA åñ EYE åå EYO å³ EYU åà Eya åÑ Eye åÅ Eyo å£ Eyu åÀ iya ÉÑ iye ÉÅ iyo É£ iyu ÉÀ IYA éñ IYE éå IYO é³ IYU éà Iya éÑ Iye éÅ Iyo é£ Iyu éÀ oya ÏÑ oye ÏÅ oyo Ï£ oyu ÏÀ OYA ïñ OYE ïå OYO ï³ OYU ïà Oya ïÑ Oye ïÅ Oyo ï£ Oyu ïÀ uya ÕÑ uye ÕÅ uyo Õ£ uyu ÕÀ UYA õñ UYE õå UYO õ³ UYU õà Uya õÑ Uye õÅ Uyo õ£ Uyu õÀ yya ÙÑ yye ÙÅ yyo Ù£ yyu ÙÀ YYA ùñ YYE ùå YYO ù³ YYU ùà Yya ùÑ Yye ùÅ Yyo ù£ Yyu ùÀ ajon ÁÊÏÎ AJON áêïî ajor ÁÊÏÒ AJOR áêïò ayon ÁÊÏÎ AYON áêïî ayor ÁÊÏÒ AYOR áêïò jork ÊÏÒË JORK êïòë Jork êÏÒË york ÊÏÒË YORK êïòë York êÏÒË zh Ö ZH ö Zh ö z~h ÚÈ Z~H úè Z~h úÈ kh È KH è Kh è k~h ËÈ K~H ëè K~h ëÈ ts à TS ã Ts ã t~s ÔÓ T~S ôó T~s ôÓ t~sh ÔÛ T~SH ôû T~sh ôÛ t~sch ÔÝ T~SCH ôý T~sch ôÝ tsya ÔÓÑ TSYA ôóñ tsja ÔÓÑ TSJA ôóñ tsyuda ÔÓÀÄÁ TSYUDA ôóàäá tst ÔÓÔ TST ôóô dets ÄÅÔÓ DETS äåôó Dets äÅÔÓ sovetsk ÓÏ×ÅÔÓË SOVETSK óï÷åôóë Sovetsk óÏ×ÅÔÓË piratsk ÐÉÒÁÔÓË PIRATSK ðéòáôóë Piratsk ðÉÒÁÔÓË plotsk ÐÌÏÔÓË PLOTSK ðìïôóë Plotsk ðÌÏÔÓË idiotsk ÉÄÉÏÔÓË IDIOTSK éäéïôóë Idiotsk éÄÉÏÔÓË potsdam ÐÏÔÓÄÁÍ POTSDAM ðïôóäáí Potsdam ðÏÔÓÄÁÍ :otse ÏÔÓÅ :OTSE ïôóå :Otse ïÔÓÅ :otsen ÏÃÅÎ :OTSEN ïãåî :Otsen ïÃÅÎ :otsep ÏÃÅÐ :OTSEP ïãåð :Otsep ïÃÅÐ :otsk ÏÔÓË :OTSK ïôóë :Otsk ïÔÓË :otst ÏÔÓÔ :OTST ïôóô :Otst ïÔÓÔ :otsyu ÏÔÓÀ :OTSYU ïôóà :Otsyu ïÔÓÀ :otsut ÏÔÓÕÔ :OTSUT ïôóõô :Otsut ïÔÓÕÔ :otsch ÏÔÓÞ :OTSCH ïôóþ :Otsch ïÔÓÞ odets ÏÄÅà ODETS ïäåã tsh ÔÛ TSH ôû Tsh ôÛ tsch ÔÝ TSCH ôý Tsch ôÝ tshch ÔÝ TSHCH ôý Tshch ôÝ ch Þ CH þ Ch þ c~h ÃÈ C~H ãè C~h ãÈ sh Û SH û Sh û s~h ÓÈ S~H óè S~h óÈ :shem ÓÈÅÍ :SHEM óèåí :Shem óÈÅÍ shod ÓÈÏÄ SHOD óèïä Shod óÈÏÄ shozh ÓÈÏÖ SHOZH óèïö Shozh óÈÏÖ shola ÓÈÏÌÁ SHOLA óèïìá Shola óÈÏÌÁ voshit ×ÏÓÈÉÔ Voshit ÷ÏÓÈÉÔ VOSHIT ÷ïóèéô voshisch ×ÏÓÈÉÝ Voshisch ÷ÏÓÈÉÝ VOSHISCH ÷ïóèéý shch Ý SHCH ý Shch ý sch Ý SCH ý Sch ý s~ch ÓÞ S~CH óþ S~ch óÞ ischerp ÉÓÞÅÒÐ ISCHERP éóþåòð Ischerp éÓÞÅÒÐ ischet ÉÝÅÔ ISCHET éýåô Ischet éÝÅÔ schast ÓÞÁÓÔ SCHAST óþáóô Schast óÞÁÓÔ schita ÓÞÉÔÁ SCHITA óþéôá Schita óÞÉÔÁ schityv ÓÞÉÔÙ× SCHITYV óþéôù÷ Schityv óÞÉÔÙ× schita: ÝÉÔÁ SCHITA: ýéôá schet ÓÞÅÔ SCHET óþåô Schet óÞÅÔ schyot ÓÞ£Ô SCHYOT óþ³ô Schyot óÞ£Ô schetk ÝÅÔË SCHETK ýåôë Schetk ýÅÔË schyotk Ý£ÔË SCHYOTK ý³ôë Schyotk ý£ÔË schetochn ÝÅÔÏÞÎ SCHETOCHN ýåôïþî Schetochn ýÅÔÏÞÎ schyotochn Ý£ÔÏÞÎ SCHYOTOCHN ý³ôïþî Schyotochn ý£ÔÏÞÎ schetin ÝÅÔÉÎ SCHETIN ýåôéî Schetin ýÅÔÉÎ ischez ÉÓÞÅÚ ISCHEZ éóþåú éÓÞÅÚ Ischez plesch ÐÌÅÝ PLESCH ðìåý Plesch ðÌÅÝ ###:ek ÜË :EK üë :Ek üË ###eli ÅÌÉ ELI åìé Eli åÌÉ :eta ÜÔÁ :ETA üôá :Eta üÔÁ :eti ÜÔÉ :ETI üôé :Eti üÔÉ :eto ÜÔÏ :ETO üôï :Eto üÔÏ :elekt ÜÌÅËÔ :ELEKT üìåëô :Elekt üÌÅËÔ ' Ø `' ø ^' ø '' ß `'' ÿ ^'' ÿ 'u ØÀ 'U øà 'a ØÑ 'A øñ 'ye ØÅ 'YE øå :v'e ×ßÅ :V'E ÷ÿå :V'e ÷ßÅ z'e ÚßÅ Z'E úÿå z'yo Úߣ Z'YO úÿ³ ### :ob' ÏÂß :OB' ïâÿ :Ob' ïÂß ### :pod' ÐÏÄß :POD' ðïäÿ :Pod' ðÏÄß ### :raz' ÒÁÚß :RAZ' òáúÿ :Raz' òÁÚß ### :iz' ÉÚß :IZ' éúÿ :Iz' éÚß ### z'ya ÚßÑ Z'YA úÿñ ### vyed ×ÙÅÄ VYED ÷ùåä Vyed ÷ÙÅÄ ### vyez ×ÙÅÚ VYEZ ÷ùåú Vyez ÷ÙÅÚ ### vyem ×ÙÅÍ VYEM ÷ùåí Vyem ÷ÙÅÍ ### vyes ×ÙÅÓ VYES ÷ùåó Vyes ÷ÙÅÓ ### vyeh ×ÙÅÈ VYEH ÷ùåè Vyeh ÷ÙÅÈ ### vyekh ×ÙÅÈ VYEKH ÷ùåè Vyekh ÷ÙÅÈ vyud ×ÙÕÄ VYUD ÷ùõä Vyud ÷ÙÕÄ vyuch ×ÙÕÞ VYUCH ÷ùõþ Vyuch ÷ÙÕÞ vyuzh ×ÙÕÖ VYUZH ÷ùõö Vyuzh ÷ÙÕÖ ##################################################################