<?
/*
File name: news_parser_4.php - the 4 indicates that php 4 or better is required. php3 port?
Classes: article and news_xml_parser.
Purpose: These two classes are intended to be used with the moreover.com news feed site.
article is a simple object that represents a single news feed.
news_xml_parser is an xml parser that creates article objects from the moreover.com xml news feed.
ToDo: Well, there is a mess of stuff that I could add to this, but for now I am just leave it. You can add to it.
It would be nice to have more configuration stuff for the url that gets passed in.
moreover offers a ton of options, it would be nice if I handled it better.
check out http://w.moreover.com/dev/custom/
for details on building feed urls to pass the parser. They have category support in addition to the keyword
stuff I used in my example code (show_news.php) which should be with this file.
Author: Carter Comunale (carter@brasscity.com) comments and suggestions are welcome.
Date: 07/04/2001 (the 4th of July!)
Modified Last By: <your name here>
Modified Last Date: <the date you changed it>
Note: Feel free to do whatever you want with this code, however, if you do change it making it better send me a note.
I would like to know what you did :)
So you want to see it work? copy an past this url
http://agn3.dhs.org/~carter/show_news.php?search_str=linux&action=search&search=search
*/
// simple class to hold our news feed articles that we build
class article {
var $article_id;
var $url;
var $headline_text;
var $source;
var $media_type;
var $cluster;
var $tagline;
var $document_url;
var $harvest_time;
var $access_registration;
var $access_status;
function article() {
// do nothing for now just be nice oo style.
}
}
class news_xml_parser {
var $xml_file;
var $type;
var $xml_parser;
var $news_objects;
var $current_tag;
var $current_article;
function news_xml_parser($xml_file) { // constructor
$this->xml_file = $xml_file;
$this->type = 'UTF-8';
$this->parser = xml_parser_create($this->type);
xml_parser_set_option($this->parser, XML_OPTION_CASE_FOLDING, true);
xml_parser_set_option($this->parser, XML_OPTION_TARGET_ENCODING, 'UTF-8');
xml_set_element_handler($this->parser,"tag_open","tag_close");
xml_set_character_data_handler($this->parser,"cdata");
}
function parse() {
xml_set_object($this->parser,&$this);
if (!($fp = fopen($this->xml_file, 'r'))) {
echo "Could not open $xml_file for parsing!\n";
}
while ($data = fread($fp, 4096)) {
if (!($data = utf8_encode($data))) {
echo 'ERROR'."\n";
}
if (!xml_parse($this->parser, $data, feof($fp))) {
die(sprintf( "XML error: %s at line %d\n\n",
xml_error_string(xml_get_error_code($this->parser)),
xml_get_current_line_number($this->parser)));
}
}
}
function tag_open($parser,$tag,$attributes) {
//var_dump($parser,$tag,$attributes);
$this->current_tag = $tag;
switch ($tag) {
case "MOREOVERNEWS": // this tag means we are at the start of a new xml file, create the array to hold the objects created
$this->news_objects = array (" ");
array_pop($this->news_objects);
break;
case "ARTICLE": // when we get this tag, create a new article object
$this->current_article = new article();
break;
}
}
function cdata($parser,$cdata) {
//var_dump($parser,$cdata);
switch ($this->current_tag) {
case "URL":
if (!$this->current_article->url) {
$this->current_article->url = $cdata;
}
break;
case "HEADLINE_TEXT":
if (!$this->current_article->headline_text) {
$this->current_article->headline_text = $cdata;
}
break;
case "SOURCE":
if (!$this->current_article->source) {
$this->current_article->source = $cdata;
}
break;
case "MEDIA_TYPE":
if (!$this->current_article->media_type) {
$this->current_article->media_type = $cdata;
}
break;
case "CLUSTER":
if (!$this->current_article->cluster) {
$this->current_article->cluster = $cdata;
}
break;
case "TAGLINE":
if (!$this->current_article->tagline) {
$this->current_article->tagline = $cdata;
}
break;
case "DOCUMENT_URL":
if (!$this->current_article->document_url) {
$this->current_article->document_url = $cdata;
}
break;
case "HARVEST_TIME":
if (!$this->current_article->harvest_time) {
$this->current_article->harvest_time = $cdata;
}
break;
case "ACCESS_REGISTRATION":
if (!$this->current_article->access_registration) {
$this->current_article->access_registration = $cdata;
}
break;
case "ACCESS_STATUS":
if (!$this->current_article->access_status) {
$this->current_article->access_status = $cdata;
}
break;
}
}
function tag_close($parser,$tag) {
//var_dump($parser,$tag);
switch ($tag) {
case "ARTICLE": // when we get this tag, we are done with thee current object, insert it into the arrray.
array_push($this->news_objects, $this->current_article);
break;
}
}
function free_parser() {
xml_parser_free($this->parser);
}
}
?>
|