<?php
/**
Specialized parser for OpenOffice 2.3 sCalc saved files as HTML
Copyright (C) 2007 Johan Barbier <johan.barbier@gmail.com>
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*/
/**
* @desc Specialized parser for OpenOffice 2.3 sCalc saved files as HTML
* @author Johan Barbier <johan.barbier@gmail.com>
* @version 20071101
*
*/
class openoffice23htmlparser extends genparser {
/**
* @desc Long stuff! First, checks if there is a need to transform the html file to be able to read its contents as an xml file. If so, does so :
* Replaces the HTML header.
* Strips all unclosed tags.
* Replaces html entities by decimal entities.
*
* Then reads the file as an XML feed, and creates modules, languages, constants, translation found in the file.
*
* @param string $sFile : file path
*/
final protected function parseUploadedTranslation($sFile) {
if(!file_exists($sFile)) {
throw new fileUploadExceptions(fileUploadExceptions::_UPLOAD_ERR_NO_FILE_);
}
$sFileContents = file_get_contents($sFile);
if(false !== strpos($sFileContents, '<TBODY>')) {
$sFileContents = preg_replace("/(<\/?)(\w+)([^>]*>)/e", "'\\1'.strtolower('\\2').'\\3'", $sFileContents);
$sFileContents = substr_replace($sFileContents,'<?xml version="1.0" encoding="'.$this->sEncoding.'"?><!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"><html xmlns="http://www.w3.org/1999/xhtml"><body><table>', 0, strpos($sFileContents, '<tbody>'));
$sFileContents = preg_replace('`(<\w+)\s([^>]*)(>)`', '$1$3', $sFileContents);
$sFileContents = str_replace(array('<br>', '<font>', '</font>'), '', $sFileContents);
$sFileContents = str_replace(array('<td>', '</td>'), array('<td><![CDATA[', ']]></td>'), $sFileContents);
file_put_contents($sFile, $sFileContents);
}
$oXml = new DOMDocument();
$oXml->load($sFile);
$oNodeList = $oXml->getElementsByTagName('tr');
$oLanguagesList = $oNodeList->item(0)->getElementsByTagName('td');
$iCpt = 0;
foreach($oLanguagesList as $oTd) {
$aStored[$iCpt]['LNG'] = (string)preg_replace('`\s`', '',$oTd->nodeValue);
$iCpt ++;
}
for($i = 1; $i < $oNodeList->length; $i++) {
$oRowList = $oNodeList->item($i)->getElementsByTagName('td');
for($j = 1; $j < $oRowList->length; $j++) {
$aStored[$j]['MOD'][(string)preg_replace('`\s`', '',$oRowList->item(0)->nodeValue)]= (string)preg_replace('`\s$`', '',$oRowList->item($j)->nodeValue);
}
}
foreach($aStored as $iK=>$aV) {
if(!empty($aV['LNG'])) {
$aV['LNG'] = trim(html_entity_decode(preg_replace('`\s`', '', $aV['LNG'])));
$aPays = getCodes($aV['LNG']);
if(!empty($aPays['ALPHA3'])) {
$aV['LNG'] = $aPays['ALPHA3'];
}
if(!is_dir($this->subject->LOCALE_PATH.$aV['LNG'])) {
mkdir($this->subject->LOCALE_PATH.$aV['LNG'], 0755);
}
foreach($aV['MOD'] as $sConst => $sVal) {
if(!empty($sConst)) {
$sConst = trim(preg_replace('`\s`', '', $sConst));
$sVal = trim($sVal);
$sMod = substr($sConst, 0, strpos($sConst, '_'));
if(!file_exists($this->subject->LOCALE_PATH.'default/'.$sMod.'.xml')) {
$this->subject->addModule($sMod, $this->sEncoding);
}
$this->subject->addNewConstant($sConst, $sMod);
$this->subject->updateValue($sConst, $sVal, false, $aV['LNG'], $sMod);
}
}
}
}
}
}
?>
|