* @link http://www.yiiframework.com/ * @copyright Copyright © 2008-2011 Yii Software LLC * @license http://www.yiiframework.com/license/ * @version $Id$ */ /** * CldrCommand converts the locale data from the {@link http://www.unicode.org/cldr/ CLDR project} * to PHP scripts so that they can be more easily used in PHP programming. * * The script respects locale inheritance so that the PHP data for a child locale * will contain all its parents' locale data if they are not specified in the child locale. * Therefore, to import the data for a locale, only the PHP script for that particular locale * needs to be included. * * Note, only the data relevant to number and date formatting are extracted. * Each PHP script file is named as the corresponding locale ID in lower case. * * @author Qiang Xue * @version $Id$ * @package system.build * @since 1.0 */ class CldrCommand extends CConsoleCommand { protected $pluralRules = array(); public function getHelp() { return << DESCRIPTION This command converts the locale data from the CLDR project to PHP scripts so that they can be more easily used in PHP programming. The script respects locale inheritance so that the PHP data for a child locale will contain all its parent locale data if they are not specified in the child locale. Therefore, to import the data for a locale, only the PHP script for that particular locale needs to be included. Note, only the data relevant to number and date formatting are extracted. Each PHP script file is named as the corresponding locale ID in lower case. The resulting PHP scripts are created under the same directory that contains the original CLDR data. PARAMETERS * data-path: required, the original CLDR data directory. This directory should contain "main" subdirectory with hundreds of XML files and "supplemental" subdirectory with "plurals.xml". EOD; } /** * Execute the action. * @param array command line parameters specific for this command */ public function run($args) { if(!isset($args[0])) $this->usageError('the CLDR data directory is not specified.'); if(!is_dir($basePath=$args[0])) $this->usageError("Directory '$basePath' does not exist."); if(!is_dir($path=$basePath.DIRECTORY_SEPARATOR.'main')) $this->usageError("directory '$path' does not exist."); if(!is_file($pluralFile=$basePath.DIRECTORY_SEPARATOR.'supplemental'.DIRECTORY_SEPARATOR.'plurals.xml')) $this->usageError("File '$pluralFile' does not exist."); // parse plural.xml before locale files $pluralXml = simplexml_load_file($pluralFile); $this->parsePluralRules($pluralXml); // collect XML files to be processed $options=array( 'exclude'=>array('.svn'), 'fileTypes'=>array('xml'), 'level'=>0, ); $files=CFileHelper::findFiles(realpath($path),$options); $sourceFiles=array(); foreach($files as $file) $sourceFiles[basename($file)]=$file; // sort by file name so that inheritances can be processed properly ksort($sourceFiles); // process root first because it is inherited by all if(isset($sourceFiles['root.xml'])) { $this->process($sourceFiles['root.xml']); unset($sourceFiles['root.xml']); foreach($sourceFiles as $sourceFile) $this->process($sourceFile); } else die('Unable to find the required root.xml under CLDR "main" data directory.'); } protected function process($path) { $source=basename($path); echo "processing $source..."; $dir=dirname($path); $locale=substr($source,0,-4); $target=$locale.'.php'; // retrieve parent data first if(($pos=strrpos($locale,'_'))!==false) $data=require($dir.DIRECTORY_SEPARATOR.strtolower(substr($locale,0,$pos)).'.php'); else if($locale!=='root') $data=require($dir.DIRECTORY_SEPARATOR.'root.php'); else $data=array(); $xml=simplexml_load_file($path); $this->parseVersion($xml,$data); $this->parseNumberSymbols($xml,$data); $this->parseNumberFormats($xml,$data); $this->parseCurrencySymbols($xml,$data); $this->parseMonthNames($xml,$data); $this->parseWeekDayNames($xml,$data); $this->parseEraNames($xml,$data); $this->parseDateFormats($xml,$data); $this->parseTimeFormats($xml,$data); $this->parseDateTimeFormat($xml,$data); $this->parsePeriodNames($xml,$data); $this->parseOrientation($xml,$data); $this->addPluralRules($data, $locale); $data=str_replace("\r",'',var_export($data,true)); $locale=substr(basename($path),0,-4); $content=<<identity->version['number'],$matches); $data['version']=$matches[0]; } protected function parseNumberSymbols($xml,&$data) { foreach($xml->xpath('/ldml/numbers/symbols/*') as $symbol) { $name=$symbol->getName(); if(!isset($data['numberSymbols'][$name]) || (string)$symbol['draft']==='') $data['numberSymbols'][$name]=(string)$symbol; } } protected function parseNumberFormats($xml,&$data) { $pattern=$xml->xpath('/ldml/numbers/decimalFormats/decimalFormatLength/decimalFormat/pattern'); if(isset($pattern[0])) $data['decimalFormat']=(string)$pattern[0]; $pattern=$xml->xpath('/ldml/numbers/scientificFormats/scientificFormatLength/scientificFormat/pattern'); if(isset($pattern[0])) $data['scientificFormat']=(string)$pattern[0]; $pattern=$xml->xpath('/ldml/numbers/percentFormats/percentFormatLength/percentFormat/pattern'); if(isset($pattern[0])) $data['percentFormat']=(string)$pattern[0]; $pattern=$xml->xpath('/ldml/numbers/currencyFormats/currencyFormatLength/currencyFormat/pattern'); if(isset($pattern[0])) $data['currencyFormat']=(string)$pattern[0]; } protected function parseCurrencySymbols($xml,&$data) { $currencies=$xml->xpath('/ldml/numbers/currencies/currency'); foreach($currencies as $currency) { if((string)$currency->symbol!='') $data['currencySymbols'][(string)$currency['type']]=(string)$currency->symbol; } } protected function parseMonthNames($xml,&$data) { $monthTypes=$xml->xpath('/ldml/dates/calendars/calendar[@type=\'gregorian\']/months/monthContext[@type=\'format\']/monthWidth'); if(is_array($monthTypes)) { foreach($monthTypes as $monthType) { $names=array(); foreach($monthType->xpath('month') as $month) $names[(string)$month['type']]=(string)$month; if($names!==array()) $data['monthNames'][(string)$monthType['type']]=$names; } } if(!isset($data['monthNames']['abbreviated'])) $data['monthNames']['abbreviated']=$data['monthNames']['wide']; $monthTypes=$xml->xpath('/ldml/dates/calendars/calendar[@type=\'gregorian\']/months/monthContext[@type=\'stand-alone\']/monthWidth'); if(is_array($monthTypes)) { foreach($monthTypes as $monthType) { $names=array(); foreach($monthType->xpath('month') as $month) $names[(string)$month['type']]=(string)$month; if($names!==array()) $data['monthNamesSA'][(string)$monthType['type']]=$names; } } } protected function parseWeekDayNames($xml,&$data) { static $mapping=array( 'sun'=>0, 'mon'=>1, 'tue'=>2, 'wed'=>3, 'thu'=>4, 'fri'=>5, 'sat'=>6, ); $dayTypes=$xml->xpath('/ldml/dates/calendars/calendar[@type=\'gregorian\']/days/dayContext[@type=\'format\']/dayWidth'); if(is_array($dayTypes)) { foreach($dayTypes as $dayType) { $names=array(); foreach($dayType->xpath('day') as $day) $names[$mapping[(string)$day['type']]]=(string)$day; if($names!==array()) $data['weekDayNames'][(string)$dayType['type']]=$names; } } if(!isset($data['weekDayNames']['abbreviated'])) $data['weekDayNames']['abbreviated']=$data['weekDayNames']['wide']; $dayTypes=$xml->xpath('/ldml/dates/calendars/calendar[@type=\'gregorian\']/days/dayContext[@type=\'stand-alone\']/dayWidth'); if(is_array($dayTypes)) { foreach($dayTypes as $dayType) { $names=array(); foreach($dayType->xpath('day') as $day) $names[$mapping[(string)$day['type']]]=(string)$day; if($names!==array()) $data['weekDayNamesSA'][(string)$dayType['type']]=$names; } } } protected function parsePeriodNames($xml,&$data) { $am=$xml->xpath('/ldml/dates/calendars/calendar[@type=\'gregorian\']/dayPeriods/dayPeriodContext[@type=\'format\']/dayPeriodWidth[@type=\'wide\']/dayPeriod[@type=\'am\']'); if(is_array($am) && isset($am[0])) $data['amName']=(string)$am[0]; $pm=$xml->xpath('/ldml/dates/calendars/calendar[@type=\'gregorian\']/dayPeriods/dayPeriodContext[@type=\'format\']/dayPeriodWidth[@type=\'wide\']/dayPeriod[@type=\'pm\']'); if(is_array($pm) && isset($pm[0])) $data['pmName']=(string)$pm[0]; } protected function parseEraNames($xml,&$data) { $era=$xml->xpath('/ldml/dates/calendars/calendar[@type=\'gregorian\']/eras/eraAbbr'); if(is_array($era) && isset($era[0])) { foreach($era[0]->xpath('era') as $e) $data['eraNames']['abbreviated'][(string)$e['type']]=(string)$e; } $era=$xml->xpath('/ldml/dates/calendars/calendar[@type=\'gregorian\']/eras/eraNames'); if(is_array($era) && isset($era[0])) { foreach($era[0]->xpath('era') as $e) $data['eraNames']['wide'][(string)$e['type']]=(string)$e; } else if(!isset($data['eraNames']['wide'])) $data['eraNames']['wide']=$data['eraNames']['abbreviated']; $era=$xml->xpath('/ldml/dates/calendars/calendar[@type=\'gregorian\']/eras/eraNarrow'); if(is_array($era) && isset($era[0])) { foreach($era[0]->xpath('era') as $e) $data['eraNames']['narrow'][(string)$e['type']]=(string)$e; } else if(!isset($data['eraNames']['narrow'])) $data['eraNames']['narrow']=$data['eraNames']['abbreviated']; } protected function parseDateFormats($xml,&$data) { $types=$xml->xpath('/ldml/dates/calendars/calendar[@type=\'gregorian\']/dateFormats/dateFormatLength'); if(is_array($types)) { foreach($types as $type) { $pattern=$type->xpath('dateFormat/pattern'); $data['dateFormats'][(string)$type['type']]=(string)$pattern[0]; } } } protected function parseTimeFormats($xml,&$data) { $types=$xml->xpath('/ldml/dates/calendars/calendar[@type=\'gregorian\']/timeFormats/timeFormatLength'); if(is_array($types)) { foreach($types as $type) { $pattern=$type->xpath('timeFormat/pattern'); $data['timeFormats'][(string)$type['type']]=(string)$pattern[0]; } } } protected function parseDateTimeFormat($xml,&$data) { $types=$xml->xpath('/ldml/dates/calendars/calendar[@type=\'gregorian\']/dateTimeFormats/dateTimeFormatLength'); if(is_array($types) && isset($types[0])) { $pattern=$types[0]->xpath('dateTimeFormat/pattern'); $data['dateTimeFormat']=(string)$pattern[0]; } } protected function parseOrientation($xml,&$data) { $orientation=$xml->xpath('/ldml/layout/orientation[@characters=\'right-to-left\']'); if(!empty($orientation)) $data['orientation']='rtl'; else if(!isset($data['orientation'])) $data['orientation']='ltr'; } /** * @see http://cldr.unicode.org/index/cldr-spec/plural-rules */ protected function parsePluralRules($xml) { echo "Processing plural.xml..."; $patterns = array( '/\s+is\s+not\s+/i'=>'!=', //is not '/\s+is\s+/i'=>'==', //is '/n\s+mod\s+(\d+)/i'=>'fmod(n,$1)', //mod (CLDR's "mod" is "fmod()", not "%") '/^(.*?)\s+not\s+(?:in|within)\s+(\d+)\.\.(\d+)/i'=>'($1<$2||$1>$3)', //not in, not within '/^(.*?)\s+within\s+(\d+)\.\.(\d+)/i'=>'($1>=$2&&$1<=$3)', //within '/^(.*?)\s+in\s+(\d+)\.\.(\d+)/i'=>'($1>=$2&&$1<=$3&&fmod($1,1)==0)', //in ); foreach($xml->plurals->pluralRules as $node) { $attributes=$node->attributes(); $locales=explode(' ',$attributes['locales']); $rules=array(); if(!empty($node->pluralRule)) { foreach($node->pluralRule as $rule) { $expr_or=preg_split('/\s+or\s+/i', $rule); foreach ($expr_or as $key_or => $val_or) { $expr_and=preg_split('/\s+and\s+/i', $val_or); $expr_and=preg_replace(array_keys($patterns), array_values($patterns), $expr_and); $expr_or[$key_or]=implode('&&', $expr_and); } $rules[]=implode('||', $expr_or); } //append last rule to match "other" $rules[] = 'true'; foreach ($locales as $locale) { $this->pluralRules[$locale] = $rules; } } } echo "Done.\n"; } protected function addPluralRules(&$data, $locale) { if (!empty($this->pluralRules[$locale])) $data['pluralRules']=$this->pluralRules[$locale]; } }