Удзельнік:Alexey/канвэртар/LanguageBe tarask.php
Выгляд
<?php
/** Belarusian language, classic spelling
* (Беларуская, клясычны)
*
* @package MediaWiki
* @subpackage Language
*/
require_once( dirname(__FILE__).'/../LanguageConverter.php' );
require_once( dirname(__FILE__).'/LanguageBe_tarask_cyrl.php' );
define('BE_CYR_UPPERCASE', 'АБВГҐДЖЗЕЁЖЗІЙКЛМНОПРСТУЎФХЦЧШЫЬЭЮЯ');
define('BE_CYR_LOWERCASE', 'абвгґджзеёжзійклмнопрстуўфхцчшыьэюя');
define('BE_LAT_UPPERCASE', 'ABCĆČDEFGHIJKLŁMNŃOPRSŚŠTUŬVYZŹŽ');
define('BE_LAT_LOWERCASE', 'abcćčdefghijklłmnńoprsśštuŭvyzźž');
class BeTaraskConverter extends LanguageConverter {
var $cyr2lat = array();
var $lat2cyr = array();
function loadDefaultTables() {
$this->cyr2lat = array(
## ЕЁЮЯ перад галоснымі
'/([АЕЁІОУЫЭЮЯ])Е/u' => '$1JE', '/([аеёіоуыэюя])е/u' => '$1je',
'/([АЕЁІОУЫЭЮЯ])Ё/u' => '$1JO', '/([аеёіоуыэюя])ё/u' => '$1jo',
'/([АЕЁІОУЫЭЮЯ])Ю/u' => '$1JU', '/([аеёіоуыэюя])ю/u' => '$1ju',
'/([АЕЁІОУЫЭЮЯ])Я/u' => '$1JA', '/([аеёіоуыэюя])я/u' => '$1ja',
## ЕЁЮЯ пасьля апострафу і Ь
'/([\'’ь])е/u' => '$1je', '/([\'’ь])ё/u' => '$1jo',
'/([\'’ь])ю/u' => '$1ju', '/([\'’ь])я/u' => '$1ja',
'/ЬЕ/u' => 'ЬJE', '/ЬЁ/u' => 'ЬJO', '/ЬЮ/u' => 'ЬJU', '/ЬЯ/u' => 'ЬJA',
## ЕЁЮЯ ў пачатку // Дапрацаваць
'/^Е(['.BE_CYR_LOWERCASE.']|$)/u' => 'Je$1',
'/^Е(['.BE_CYR_UPPERCASE.']|$)/u' => 'JE$1',
'/^Ё(['.BE_CYR_LOWERCASE.']|$)/u' => 'Jo$1',
'/^Ё(['.BE_CYR_UPPERCASE.']|$)/u' => 'JO$1',
'/^Ю(['.BE_CYR_LOWERCASE.']|$)/u' => 'Ju$1',
'/^Ю(['.BE_CYR_UPPERCASE.']|$)/u' => 'JU$1',
'/^Я(['.BE_CYR_LOWERCASE.']|$)/u' => 'Ja$1',
'/^Я(['.BE_CYR_UPPERCASE.']|$)/u' => 'JA$1',
'/^е/u' => 'je', '/^ё/u' => 'jo', '/^ю/u' => 'ju', '/^я/u' => 'ja',
'/^Е$/u' => 'Je', '/^Ё$/u' => 'Jo', '/^Ю$/u' => 'Ju', '/^Я$/u' => 'Ja',
## ЕЁЮЯ перад Л
'/Ля/u' => 'La', '/ля/u' => 'la', '/ЛЯ/u' => 'LA',
'/Лю/u' => 'Lu', '/ЛЮ/u' => 'LU', '/лю/u' => 'lu',
'/Лё/u' => 'Lo', '/ЛЁ/u' => 'LO', '/лё/u' => 'lo',
'/Ле/u' => 'Le', '/ЛЕ/u' => 'LE', '/ле/u' => 'le',
## ЕЁЮЯ пасьля зычных акрамя Л
'/е/u' => 'ie', '/ё/u' => 'io', '/ю/u' => 'iu', '/я/u' => 'ia',
'/Е/u' => 'IE', '/Ё/u' => 'IO', '/Ю/u' => 'IU', '/Я/u' => 'IA',
## Л
'/Л([АОУЫЭаоуыэ])/u' => 'Ł$1', '/Л([Iі])/u' => 'L$1', '/Л[Ьь]/u' => 'L',
'/л([аоуыэ])/u' => 'ł$1', '/лі/u' => 'li', '/ль/u' => 'l',
## Ьь
'/З[Ьь]/u' => 'Ź', '/Н[Ьь]/u' => 'Ń', '/С[Ьь]/u' => 'Ś', '/Ц[Ьь]/u' => 'Ć',
'/зь/u' => 'ź', '/нь/u' => 'ń', '/сь/u' => 'ś', '/ць/u' => 'ć',
## Х // FIXME: зьлева можа быць лацінка
'/Х(['.BE_CYR_UPPERCASE.'])/u' => 'CH$1', '/Х(['.BE_CYR_LOWERCASE.'])/u' => 'Ch$1',
'/Х$/u' => 'CH', '/х/u' => 'ch',
## астатнія галосныя (АІОУЫЭ)
'/а/u' => 'a', '/А/u' => 'A', '/і/u' => 'i', '/І/u' => 'I',
'/о/u' => 'o', '/О/u' => 'O', '/у/u' => 'u', '/У/u' => 'U',
'/ы/u' => 'y', '/Ы/u' => 'Y', '/э/u' => 'e', '/Э/u' => 'E',
## астатнія зычныя
'/Б/u' => 'B', '/б/u' => 'b', '/В/u' => 'V', '/в/u' => 'v',
'/Г/u' => 'H', '/г/u' => 'h', '/Ґ/u' => 'G', '/ґ/u' => 'g',
'/Д/u' => 'D', '/д/u' => 'd', '/Ж/u' => 'Ž', '/ж/u' => 'ž',
'/З/u' => 'Z', '/з/u' => 'z', '/Й/u' => 'J', '/й/u' => 'j',
'/К/u' => 'K', '/к/u' => 'k', '/Л/u' => 'Ł', '/л/u' => 'ł',
'/М/u' => 'M', '/м/u' => 'm', '/Н/u' => 'N', '/н/u' => 'n',
'/П/u' => 'P', '/п/u' => 'p', '/Р/u' => 'R', '/р/u' => 'r',
'/С/u' => 'S', '/с/u' => 's', '/Т/u' => 'T', '/т/u' => 't',
'/Ў/u' => 'Ŭ', '/ў/u' => 'ŭ', '/Ф/u' => 'F', '/ф/u' => 'f',
'/Ц/u' => 'C', '/ц/u' => 'c', '/Ч/u' => 'Č', '/ч/u' => 'č',
'/Ш/u' => 'Š', '/ш/u' => 'š',
## Выдаляем апострафы і мяккія знакі
'/[ьЬ\'’]/u' => '',
);
$this->lat2cyr = array(
## апостраф // праверыць!
'/([bBdDvVmMpPfFtTrRgGhHkKžŽčČšŠCcNnSsZz])([Jj][eEoOuUaA])/u' => '$1\'$2',
## ĆŃŚŹ // дапрацаваць
'/Ć(['.BE_LAT_LOWERCASE.'])/u' => 'Ць$1', '/Ć/u' => 'ЦЬ',
'/Ń(['.BE_LAT_LOWERCASE.'])/u' => 'Нь$1', '/Ń/u' => 'НЬ',
'/Ś(['.BE_LAT_LOWERCASE.'])/u' => 'Сь$1', '/Ś/u' => 'СЬ',
'/Ź(['.BE_LAT_LOWERCASE.'])/u' => 'Зь$1', '/Ź/u' => 'ЗЬ',
'/^Ć$/u' => 'Ць', '/^Ń$/u' => 'Нь', '/^Ś$/u' => 'Сь', '/^Ź$/u' => 'зь',
'/ć/u' => 'ць', '/ń/u' => 'нь', '/ś/u' => 'сь', '/ź/u' => 'зь',
## ŁL // дапрацаваць
'/Ł/u' => 'Л', '/La/u' => 'Ля', '/LA/u' => 'ЛЯ',
'/Le/u' => 'Ле', '/LE/u' => 'ЛЕ', '/Li/u' => 'Лі', '/LI/u' => 'ЛІ',
'/Lo/u' => 'Лё', '/LO/u' => 'ЛЁ', '/Lu/u' => 'Лю', '/LU/u' => 'ЛЮ',
'/L(['.BE_LAT_LOWERCASE.'])/u' => 'Ль$1', '/L/u' => 'ЛЬ',
'/ł/u' => 'л', '/la/u' => 'ля', '/le/u' => 'ле',
'/li/u' => 'лі', '/lo/u' => 'лё', '/lu/u' => 'лю' , '/l/u' => 'ль',
## Ch
'/C[Hh]/u' => 'Х', '/ch/u' => 'х',
## галосныя Je Jo Ju Ja ...
'/[ij]e/u' => 'е', '/[ij]o/u' => 'ё',
'/[ij]u/u' => 'ю', '/[ij]a/u' => 'я',
'/[IJ][Ee]/u' => 'Е', '/[IJ][Oo]/u' => 'Ё',
'/[IJ][Uu]/u' => 'Ю', '/[IJ][Aa]/u' => 'Я',
## галосныя AEIOUY
'/A/u' => 'А', '/E/u' => 'Э', '/I/u' => 'І', '/O/u' => 'О',
'/U/u' => 'У', '/Y/u' => 'Ы', '/a/u' => 'а', '/e/u' => 'э',
'/i/u' => 'і', '/o/u' => 'о', '/u/u' => 'у', '/y/u' => 'ы',
## астатнія зычныя
'/B/u' => 'Б', '/b/u' => 'б', '/C/u' => 'Ц', '/c/u' => 'ц',
'/Č/u' => 'Ч', '/č/u' => 'ч', '/D/u' => 'Д', '/d/u' => 'д',
'/F/u' => 'Ф', '/f/u' => 'ф', '/G/u' => 'Ґ', '/g/u' => 'ґ',
'/H/u' => 'Г', '/h/u' => 'г', '/J/u' => 'Й', '/j/u' => 'й',
'/K/u' => 'К', '/k/u' => 'к', '/M/u' => 'М', '/m/u' => 'м',
'/N/u' => 'Н', '/n/u' => 'н', '/P/u' => 'П', '/p/u' => 'п',
'/R/u' => 'Р', '/r/u' => 'р', '/S/u' => 'С', '/s/u' => 'с',
'/Š/u' => 'Ш', '/š/u' => 'ш', '/T/u' => 'Т', '/t/u' => 'т',
'/Ŭ/u' => 'Ў', '/ŭ/u' => 'ў', '/V/u' => 'В', '/v/u' => 'в',
'/Z/u' => 'З', '/z/u' => 'з', '/Ž/u' => 'Ж', '/ž/u' => 'ж',
);
$BeTarask2Cyrl = array();
$BeTarask2Latn = array();
$this->mTables = array(
'be-latn' => new ReplacementArray( $BeTarask2Latn ),
'be-tarask' => new ReplacementArray( $BeTarask2Cyrl )
);
}
function regsConverter( $text, $toVariant ) {
if ($text == '') return $text;
if ($toVariant == 'be-tarask'){
foreach($this->lat2cyr as $pat => $rep) {
$text = preg_replace($pat, $rep, $text);
}
return $text;
}
if ($toVariant == 'be-latn'){
foreach($this->cyr2lat as $pat => $rep) {
$text = preg_replace($pat, $rep, $text);
}
return $text;
}
return $text;
}
// Do not convert content on talk pages
function parserConvert( $text, &$parser ){
if(is_object($parser->getTitle() ) && $parser->getTitle()->isTalkPage())
$this->mDoContentConvert=false;
else
$this->mDoContentConvert=true;
return parent::parserConvert($text, $parser );
}
/*
* A function wrapper:
* - if there is no selected variant, leave the link
* names as they were
* - do not try to find variants for usernames
*/
function findVariantLink( &$link, &$nt ) {
// check for user namespace
if(is_object($nt)){
$ns = $nt->getNamespace();
if($ns==NS_USER || $ns==NS_USER_TALK)
return;
}
$oldlink=$link;
parent::findVariantLink($link,$nt);
if($this->getPreferredVariant()==$this->mMainLanguageCode)
$link=$oldlink;
}
/*
* We want our external link captions to be converted in variants,
* so we return the original text instead -{$text}-, except for URLs
*/
function markNoConversion($text, $noParse=false) {
if($noParse || preg_match("/^https?:\/\/|ftp:\/\/|irc:\/\//",$text))
return parent::markNoConversion($text);
return $text;
}
/*
* An ugly function wrapper for parsing Image titles
* (to prevent image name conversion)
*/
function autoConvert($text, $toVariant=false) {
global $wgTitle;
if($wgTitle->getNameSpace()==NS_IMAGE){
$imagename = $wgTitle->getNsText();
if(preg_match("/^$imagename:/",$text)) return $text;
}
if ($this->getPreferredVariant() == $this->mMainLanguageCode) return $text; // ???
return parent::autoConvert($text,$toVariant);
}
/**
* It translates text into variant
*/
function translate($text, $toVariant){
$letters = '';
switch($toVariant) {
case 'be-latn':
$letters = BE_CYR_UPPERCASE.BE_CYR_LOWERCASE."'’";
break;
case 'be-tarask':
$letters = BE_LAT_UPPERCASE.BE_LAT_LOWERCASE;
break;
default:
return $text;
}
$text = parent::translate($text, $toVariant);
$matches = preg_split('/[^'.$letters.']+/u',$text,-1,PREG_SPLIT_OFFSET_CAPTURE);
$mstart = 0;
foreach($matches as $m) {
$ret .= substr($text, $mstart, $m[1]-$mstart);
$ret .= $this->regsConverter($m[0],$toVariant);
$mstart = $m[1] + strlen($m[0]);
}
return $ret;
}
}
class LanguageBe_tarask extends LanguageBe_tarask_cyrl {
function __construct() {
global $wgHooks;
parent::__construct();
$variants = array( 'be-tarask', 'be-latn');
$variantfallbacks = array(
'be-tarask' => 'be-latn',
'be-latn' => 'be-tarask',
);
$this->mConverter = new BeTaraskConverter( $this, 'be-tarask', $variants, $variantfallbacks );
$wgHooks['ArticleSaveComplete'][] = $this->mConverter;
}
function convertGrammar( $word, $case ) {
$fname="LanguageBeTarask::convertGrammar";
wfProfileIn( $fname );
//always convert to -tarask before convertGrammar
$w1 = $word;
$word = $this->mConverter->autoConvert( $word, 'be-tarask' );
$w2 = $word;
$case = $this->mConverter->autoConvert( $case, 'be-tarask' );
$word = parent::convertGrammar( $word, $case );
//restore encoding
if( $w1 != $w2 ) {
$word = $this->mConverter->autoConvert( $word, 'be-latn' );
}
wfProfileOut( $fname );
return $word;
}
function stripForSearch( $string ) {
$t = $this->mConverter->autoConvert($string, 'be-tarask');
$t = parent::stripForSearch( $t );
return $t;
}
function convertForSearchResult( $termsArray ) {
$terms = implode( '|', $termsArray );
$terms = implode( '|', $this->mConverter->autoConvertToAllVariants( $terms ) );
$ret = array_unique( explode('|', $terms) );
return $ret;
}
}
?>