PHP Classes

File: class/class.openoffice23htmlparser.php

Recommend this page to a friend!
  Classes of Johan Barbier   oLoc   class/class.openoffice23htmlparser.php   Download  
File: class/class.openoffice23htmlparser.php
Role: Class source
Content type: text/plain
Description: Soecific class for parsing HTML files from sCalc in OpenOffice
Class: oLoc
Retrieve and edit internationalized texts
Author: By
Last change: Bugfixing
Date: 17 years ago
Size: 4,104 bytes
 

Contents

Class file image Download
<?php
/**
Specialized parser for OpenOffice 2.3 sCalc saved files as HTML
Copyright (C) 2007 Johan Barbier <johan.barbier@gmail.com>

This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
 */
/**
 * @desc Specialized parser for OpenOffice 2.3 sCalc saved files as HTML
 * @author Johan Barbier <johan.barbier@gmail.com>
 * @version 20071101
 *
 */
class openoffice23htmlparser extends genparser {
   
   
/**
     * @desc Long stuff! First, checks if there is a need to transform the html file to be able to read its contents as an xml file. If so, does so :
     * Replaces the HTML header.
     * Strips all unclosed tags.
     * Replaces html entities by decimal entities.
     *
     * Then reads the file as an XML feed, and creates modules, languages, constants, translation found in the file.
     *
     * @param string $sFile : file path
     */
   
final protected function parseUploadedTranslation($sFile) {
        if(!
file_exists($sFile)) {
            throw new
fileUploadExceptions(fileUploadExceptions::_UPLOAD_ERR_NO_FILE_);
        }
       
$sFileContents = file_get_contents($sFile);
        if(
false !== strpos($sFileContents, '<TBODY>')) {
           
$sFileContents = preg_replace("/(<\/?)(\w+)([^>]*>)/e", "'\\1'.strtolower('\\2').'\\3'", $sFileContents);
           
$sFileContents = substr_replace($sFileContents,'<?xml version="1.0" encoding="'.$this->sEncoding.'"?><!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"><html xmlns="http://www.w3.org/1999/xhtml"><body><table>', 0, strpos($sFileContents, '<tbody>'));
           
$sFileContents = preg_replace('`(<\w+)\s([^>]*)(>)`', '$1$3', $sFileContents);
           
$sFileContents = str_replace(array('<br>', '<font>', '</font>'), '', $sFileContents);
           
$sFileContents = str_replace(array('<td>', '</td>'), array('<td><![CDATA[', ']]></td>'), $sFileContents);
           
file_put_contents($sFile, $sFileContents);
        }
       
       
$oXml = new DOMDocument();
       
$oXml->load($sFile);
       
$oNodeList = $oXml->getElementsByTagName('tr');
       
$oLanguagesList = $oNodeList->item(0)->getElementsByTagName('td');
       
$iCpt = 0;
        foreach(
$oLanguagesList as $oTd) {
           
$aStored[$iCpt]['LNG'] = (string)preg_replace('`\s`', '',$oTd->nodeValue);
           
$iCpt ++;
        }
        for(
$i = 1; $i < $oNodeList->length; $i++) {
           
$oRowList = $oNodeList->item($i)->getElementsByTagName('td');
            for(
$j = 1; $j < $oRowList->length; $j++) {
               
$aStored[$j]['MOD'][(string)preg_replace('`\s`', '',$oRowList->item(0)->nodeValue)]= (string)preg_replace('`\s$`', '',$oRowList->item($j)->nodeValue);
            }
        }
        foreach(
$aStored as $iK=>$aV) {
            if(!empty(
$aV['LNG'])) {
               
$aV['LNG'] = trim(html_entity_decode(preg_replace('`\s`', '', $aV['LNG'])));
               
$aPays = getCodes($aV['LNG']);
                if(!empty(
$aPays['ALPHA3'])) {
                   
$aV['LNG'] = $aPays['ALPHA3'];
                }
                if(!
is_dir($this->subject->LOCALE_PATH.$aV['LNG'])) {
                   
mkdir($this->subject->LOCALE_PATH.$aV['LNG'], 0755);
                }
                foreach(
$aV['MOD'] as $sConst => $sVal) {
                    if(!empty(
$sConst)) {
                       
$sConst = trim(preg_replace('`\s`', '', $sConst));
                       
$sVal = trim($sVal);
                       
$sMod = substr($sConst, 0, strpos($sConst, '_'));
                        if(!
file_exists($this->subject->LOCALE_PATH.'default/'.$sMod.'.xml')) {
                           
$this->subject->addModule($sMod, $this->sEncoding);
                        }
                       
$this->subject->addNewConstant($sConst, $sMod);
                       
$this->subject->updateValue($sConst, $sVal, false, $aV['LNG'], $sMod);
                    }
                }
            }
        }
    }
}
?>