Login   Register  
PHP Classes
elePHPant
Icontem

File: jarg.php

Recommend this page to a friend!
Stumble It! Stumble It! Bookmark in del.icio.us Bookmark in del.icio.us
  Classes of tobozo  >  Jargon File Processor  >  jarg.php  >  Download  
File: jarg.php
Role: Application script
Content type: text/plain
Description: Program to search/display word(s) from the Jargon File (v 1.2.5)
Class: Jargon File Processor
Just a script to extract definitions from the jarg
Author: By
Last change: - Code optimizations and cleanup
- New jargon format compliance
Date: 2007-02-07 00:15
Size: 18,882 bytes
 

Contents

Class file image Download
<?

/*
 *  Program to search/display word(s) from the Jargon File (v 1.2.5)
 *  still doesn't make coffee...
 *
 *  Copyleft (c+) 2007 tobozo <tobozo@users.sourceforge.net>
 *
 *  You may copy and use this program freely as long as this
 *  notice is left intact. This program is provided "AS IS"
 *  without warranty of any kind. The copyright owner may not
 *  be held liable for any damages, direct or consequential,
 *  which may result from the the use of this program.
 */



class jargon {


  var $VER       = "1.2.5";       // version of this script
  var $JAR       = "4.4.7";       // version of the jargon text file (retrieved from $this->JARG_SRC)
  var $JARG_FILE = "jarg447.txt"; // jargon file (you know where to get it eh?)
  var $JARG_IDX  = "jarg447.idx"; // the index file (must be chmod 777 when building)
  var $JARG_SRC  = "http://www.catb.org/jargon/oldversions/"; 
  var $lock      = "yes";         // set this to "yes" when index is rebuilt
  var $showrand  = true;          // show random word in search Form
  var $showindex = true;          // show list if letters in search Form
  var $showlinks = true;          // show internal hyperlinks for {definitions}
  var $showExtra = false;         // display [?] link for every keyword (similar term search)

  var $self      = "";
  var $index     = false;
  var $jargon    = false;
  var $footer    = "";
  var $output    = "";
  var $result    = array();
  var $op        = "";
  var $l         = "";
  var $strict    = "exact";
  var $definition= "";
  var $word      = array();
                         // array( 
                         //   $keyword =>
                         //      "Keyword"=>$keyword, // the keyword itself
                         //      "Def"    =>$text, // the full text definition
                         //      "Html"   =>$html, // the full html definition
                         //      "Links"  =>array("not used yet") // the embedded {keywords}
                         // )


  /* constructor */
  function jargon($strict) {

    // name of this document
    if($this->self=="") {
      $this->self=end(explode("/", __FILE__));
    }

    // check for Jargon file
    if(!file_exists($this->JARG_FILE)) {
      die("  File $this->JARG_FILE does not exist...\n\n".
                     "  Try to edit the file '$this->self' (this file) and\n".
                     "  modify the  value of '\$this->JARG_FILE' to match\n".
                     "  the filename of your Jargon File.\n\n".
                     "  Aborting ...\n\n<br>");
    }

    // check for Index file
    if(!file_exists($this->JARG_IDX) && ($this->op!="RebuildIndex")) {
      die("  Index File $this->JARG_IDX does not exist...\n\n".
                     "  Try to create a file to store the index and\n".
                     "  modify the  value of '\$this->JARG_IDX' to match\n".
                     "  the name of the created file.\n\n".
                     "  Also you may try to edit this script and set the \n".
                     "  value of \$lock to 'true', then <a href='$this->self?op=RebuildIndex'>Rebuild the index</a> \n\n".
                     "  Aborting ...\n\n<br>");
    }


    // clean data
    if($_GET['op']!='') {
      $this->mode = 'search';
      $this->op= trim(stripslashes($_GET['op']));
    }

    // search method
    if(!$strict || $strict=="no") {
      $this->strict="no";
    } else {
      $this->strict = 'exact';
    }

    // footer and link to search page
    if($this->showlinks) { // insert html links
      $this->footer = "\n\n<a href='$this->self' title=\"Back to search screen\">Search</a>";
    }

    // $op = "List" are we listing keywords for a specific letter ?
    if(($_GET['op']=="List") && ($_GET['l']!="")) {
      $this->mode = 'list';
      $l=substr(trim(strtolower($_GET['l'])), 0, 1);
      $this->l=$l;
     }

    if($_GET['op']=="show_source") {
        show_source($this->self);
        exit(0);
    }

    // $op = rebuild index ?
    if(($_GET['op']=="RebuildIndex") && ($this->lock=="no")) {
      $this->RebuildIndex();
    }

    $this->getIndex();

    return;
  }




  function ExtractWord() {
    /* this function extracts one or several words from the array $this->jargon  */

    $keyword       = '';
    $match         = false;
    $skipped_intro = false;
    $skipped_appen = false;
    $pointerState  = false;
    $subwords = array();
    $sublinks = array();
    $subwordsHTML = array();

    if($this->op=="") {
      return false;
    }

    $this->LoadJargon();

    $fullJargon = count($this->jargon);

    for($lineNum=0;$lineNum<$fullJargon;$lineNum++) {

      $bfr = $this->jargon[$lineNum];

      if(!$skipped_intro && trim($bfr)!="The Jargon Lexicon") {  // jargon definitions start here

        continue;

      } else {

        $skipped_intro = true;

        if(!$skipped_appen && $bfr!="Appendices") { // until jargon definitions end
          if($pointerState && $match) { // since Jarg4.4.5, index is also inside the jargon.txt
            if(strlen($bfr)==1 && trim($bfr!='')) { // check for index junk
              if($this->isInTheIndex($this->jargon[$lineNum+2])) { // (signature is <letter><lf><lf><keyword>)

                $start = $lineNum+3;

                while($this->isInTheIndex($this->jargon[$start])) { // skip index junk after that
                  $start++;
                }

                if($this->strict=='exact') {
                  return $this->definition;
                }

                $lineNum = $start;

                continue;

              }; // line +2 is not some index junk, false positive
            }; // end if (start keyword list)
          }
        } else {

          $skipped_appen = true;

          if($this->strict=='exact') {
            return $this->definition;
          }

          continue;

        }
      }

      if($bfr[0]==':') { // maybe keyword line ?
        preg_match("/:([^:]+):/", $bfr, $matches);
        if($matches[1]!='') { // begin word definition
          $keyword = $matches[1]; // extract keyword name
          $pointerState = true; // parser in acquire mode
        }
      }

      switch($this->strict) { // check if matching

        case 'exact':
          if(strtolower($this->op)==strtolower($keyword)) {
            $this->result[strtolower($keyword)]=strtolower($keyword);
            $match = true; // inside the definition itself AND match keyword
          } else {
            $match = false;
            continue; // skip next line
          }
        break;

        default:
          if(@eregi(strtolower($this->op),strtolower($keyword))) {
            $this->result[strtolower($keyword)]=strtolower($keyword);
            $match = true; // inside the definition itself AND match close-keyword
          } else {
            $match = false;
            continue; // skip next line
          }

      }; // end switch

      switch($pointerState) {

            case true:
              switch($match) {

                    case true:
                      // get the content of the {linked keywords} if any exist
                      preg_match_all("/{([^}]+)}/", $bfr, $matches, PREG_SET_ORDER);
                      if(count($matches)>0) {
                        foreach($matches as $crap => $ar) { // pregged keywords as array
                          if(trim($ar[1])!='' && in_array($ar[1], $this->index)) {
                            if(!in_array($ar[1], $subwords)) {
                              // todo : use sprintf()
                              $def = sprintf('<a href="%s?op=%s&strict=exact" title="%s">%s</a>',
                                        $this->self,
                                        urlencode($ar[1]),
                                        "Exact Term Search",
                                        htmlentities($ar[1])
                              );
                              if($this->showExtra) {
                                $def.=sprintf('<a href="%s?op=%s&strict=no" title="%s"><font size="-1"><u><sup>[?]</sup></u></font></a>',
                                    $this->self,
                                    urlencode($ar[1]),
                                    "Similar Terms"
                                );/*
                                $def.="<a href=\"$this->self?op=";
                                $def.=urlencode($ar[1]);
                                $def.="&strict=no";
                                $def.="\" title=\"Similar Terms\">";
                                $def.="<font size=-1><u><sup>[?]</sup></u></a></font>";*/
                              }
                              $subwords[] = '{'.$ar[1].'}';
                              $sublinks[] = '{'.$def.'}';
                              $subwordsHTML[] = '{'.htmlentities($ar[1]).'}';
                            }
                          }
                        }
                      }
                      $bfr = $bfr."\n";
                      $this->word[strtolower($keyword)]['Keyword'] = $keyword;
                      $this->word[strtolower($keyword)]['Def']    .= $bfr;
                      $this->word[strtolower($keyword)]['Html']   .= str_replace($subwordsHTML, $sublinks, htmlentities($bfr));
                      $this->definition.= str_replace($subwords, $sublinks, $bfr);
                    break;

                    default:
                    case false:
                      continue;
                    break;

              };// end switch($match)
            break;

            default:
            case false:
              continue;
            break;

      };// end switch($pointerState)
    }

    return $this->definition;

  }



  function getForm() {
    /* just builds the form and the index links */
    $this->output .= "<form action='$this->self' method='get'>".
            "<div align='center'><pre>".
            "/----------------------------------------------------------------------------\\"."\n".
            "|                      Flat Text File Version $this->JAR                          |\n".
            "|               Jargon File search tool $this->VER  by  tobozo                    |\n".
            "|                copyleft (c+) 07-feb-2007 <a href='$this->self?op=show_source'>View source</a>                       |\n".
            "\----------------------------------------------------------------------------/\n\n".
            "</pre></div>".
            "<table border='1' cellspacing='1' cellpadding='5' align='center'><tr>".
            "<Td align='center'> Search : <input type='text' name='op' size=5>".
            "<input type='submit' value='go!'> \n<br /> Exact Term :     ".
            "<input type='checkbox' name='strict' CHECKED value='exact'>".
            "</td></tr><tr><Td align=center>";

    /* display list of existing letters in index*/
    if($this->showindex) {
      $this->showLettersFromIndex($this->index);
    }

    $this->output.= "</font></td></tr></table>".
          "</form>";

    /* display random term */
    if($this->showrand) {
      $this->getIndex();
      $max = count($this->index);
      srand((double)microtime()*1000000);
      $r = rand(0,$max);
      $this->output.="<table border='0' cellspacing='0' cellpadding='0' align='center'>".
               "<tr><td><pre><br />\nFound $max Terms \n\nRandom Term : \n\n";
      $this->op = $this->index[$r];
      $this->strict="exact";
      $this->output.=$this->extractWord();
      $this->output.= "</pre></td></tr></table>";
    }
    return $this->output;
  }



  function GetWordsFrom($l) {
    /* list words for a specific letter
       returns false if bad entry or if
       nothing is found
     */
    $l=substr(trim($l), 0, 1);

    if($l=="") {
      //echo "not enough";
      return false;
    }

    $this->getIndex();

    $w=0;

    foreach($this->index as $w => $word) {
      if(strtolower($l)==strtolower(substr($word,0,1))) {
        $this->output.=sprintf('<a href="%s?op=%s&strict=exact">%s</a>'."\n",
               $this->self,
               urlencode($word),
               htmlentities($word)
        );
        $let++;
      }
    }

    if($let==0) {
      $this->output.=sprintf("Sorry, None of the words in the file %s starts with the character '<b>%s</b>'",
           htmlentities($this->JARG_IDX),
           htmlentities($l)
      );
    }
    return $this->output;
  }


  function ShowLettersFromIndex() {
    /* returns all first letters from words in the index file */

    $this->getIndex();

    foreach($this->index as $pos=>$keyword) {
      $letter=strtolower(substr($keyword,0,1));
      $links[$letter]=sprintf('<a class=letter href="%s?op=List&l=%s">%s</a>',
              $this->self,
              urlencode($letter),
              htmlentities($letter)
      );
    }

    sort($links);
    $this->output.=implode("", $links);
    return $this->output;
  }




  function IsInTheIndex($op) {
    // checks in index file for matching string $op
    // returns the string or false if nothing found
    $op=trim(strtolower($op));
    $this->getIndex();

    // try the easy way
    if(in_array("$op", $this->index)) {
      return true;
    }

    // try the insensitive way
    foreach($this->index as $pos => $word) {
      if(strtolower($op)==strtolower($word)) {
        return true;
      }
    }
    return false;
  }



  function MatchesSimilarTerms($op) {
    // checks in index file for close-matching string $op
    // returns true or false if nothing found
    $op=trim(strtolower($op));
    $this->getIndex();
    foreach($this->index as $pos => $keyword) {
      if(@eregi($op, $keyword)) {
        return true;
      }
    }
    return false;
  }


  function GetIndex() {
    // fill $this->index with index content (if exists)
    if(!is_array($this->index)) {
      $j = file_get_contents($this->JARG_IDX)  or die(
         "Unable to open $this->JARG_IDX file\n".
         "You may try edit this script and \n".
         "set the value of \$lock to 'true', \n".
         "then <a href='$this->self?op=RebuildIndex'>Rebuild the index</a>");
      $this->index = explode("\n", $j);
    }
  }



  function RebuildIndex() {
    /* Just rebuilds the index file */
    echo "Rebuilding Index ...<br>";

    $j=file_get_contents($this->JARG_FILE) or die ("Unable to open file $this->JARG_FILE");

    echo "Reading $this->JARG_FILE content....<br>";

    preg_match_all("/\n:([^\n|:]+):/", $j, $matches, PREG_SET_ORDER);

    foreach($matches as $Pos => $word) {
      if($word[1]==trim($word[1])) {
        $this->index[]=$word[1];
        echo "Inserting word <b>$word[1]</b><br>";
      }
    }

    if(getType($this->index)=='array') {

      $q= @fopen($this->JARG_IDX, "w") or die(
         "Unable to create $this->JARG_IDX file\n".
         "You may try to chmod the file as r/w and ".
         "<a href='$this->self?op=RebuildIndex'>try again</a><br>".
         "Alternatively you can create manually the file and paste".
         "the followind index : <br><br>">implode("\n", $this->index));

      @fputs($q, implode("\n", $this->index));
      @fclose($q);

      echo "Index file rebuilt as $this->JARG_IDX";

      exit(0);

    } else {
      die("I was unable to create an index from your file.\n".
          "Please use any text version from your favourite source or from ".$this->JARG_SRC.".\n");
    }
  }


  function LoadJargon() {
    /* get the content of the jargon and store it into an array of 80 chars width */
    if($this->jargon==false) {
      $j = file_get_contents($this->JARG_FILE) or die ("Unable to open file $this->JARG_FILE");
      $this->jargon = explode("\n", $j);
    }
  }



  function out() {
    /* Returns the results or writes to stdout */

    switch ($this->mode) {

      default :
        if($this->op!="") {

          if(!$this->IsInTheIndex($this->op)) { // empty results?
            if($this->MatchesSimilarTerms($this->op)) { // check if any similar match ?
              if($this->strict=="exact") { // found something but do not search yet, prompt user instead
                $this->output.="\n  Term'<b>".htmlentities($this->op)."</b>' not found.\n";
                $this->output.="  <a href='$this->self?op=$this->op&strict=no'>Search similar terms</a>.\n\n";
                $this->output.=$this->footer;
                break;
              } else { // found something, similar match asked, process
                $this->output.="\n ... Fetching similar terms ..\n";
                $this->strict="no";
              }
            } else { // no similar term found, do not prompt user, abort script
              $this->output.="\n  ...  Term'<b>".htmlentities($this->op)."</b>' not found.\n\n";
              $this->output.=$this->footer;
              break;
            }
          }

          $this->extractWord();

          if(count($this->result)>1) {
            $this->output.=" ... Found ".count($this->result)." match(es) ...\n\n";
          }
          foreach($this->result as $k => $keyword) {
            if($this->showlinks) {
              $this->output.='<br>'.$this->word[$keyword]['Html'];
            } else {
              $this->output.="\n".$this->word[$keyword]['Def'];
            }
          }
        } else { // $this->op is empty -> display form
          $this->getForm();
        }
      break;

      case "list":
        if ($this->l!="") {
          echo "Alphabetical Listing\n\n";
          $this->output =$this->getWordsFrom($this->l);
        }
      break;


    }; // end switch

    echo $this->output;
  }


}; // end class



?><html>
<head>
<title>Jargon File Processor ... Copyleft(c+) 2001 tobozo@users.sourceforge.net</title>
</head>
<style>
A { text-decoration:none; }
A:link { text-decoration:none; }
A:visited { text-decoration:none; }
A:active { text-decoration:none; }
A:hover { text-decoration:underline; background:black; color:white;}

A.letter { text-decoration:none; font-size:12px; }
A.letter:link { text-decoration:none; }
A.letter:visited { text-decoration:none; }
A.letter:active { text-decoration:none; }
A.letter:hover { text-decoration:underline; background:black; color:white; }
</style>
<body>
<pre>
<?

$j = new jargon($strict);
$j ->out();

?>

</pre>
</body>
</html>