Parse Rss Into Array

Description: Parses RSS into an array. Quick, simple, and nasty but it does the job.

<?PHP
HEADER('content-type: text/plain');
 
// define hooks to rss_parser class as xml functions do not allow object methods as handlers.
FUNCTION rss_start_element($parser, $name, $attributes) {
  GLOBAL $rss;
  $rss->start_element($parser, $name, $attributes);
}
 
FUNCTION rss_end_element($parser, $name) {
  GLOBAL $rss;
  $rss->end_element($parser, $name);
}
 
FUNCTION rss_character_data($parser, $data) {
  GLOBAL $rss;
  $rss->character_data($parser, $data);
}
 
 
CLASS rss_parser {
 
// constructor. setup parser options and handlers.
FUNCTION rss_parser() {
  $this->error = '';
  $this->file = '';
 
  $this->channel = ARRAY();
  $this->data = '';
  $this->stack = ARRAY();
  $this->num_items = 0;
 
  $this->xml_parser = XML_PARSER_CREATE();
  XML_SET_ELEMENT_HANDLER($this->xml_parser, "rss_start_element", "rss_end_element");
  XML_SET_CHARACTER_DATA_HANDLER($this->xml_parser, "rss_character_data");
}
 
FUNCTION character_data($parser, $data) {
  IF (EMPTY($this->data)) $this->data = TRIM($data); // concatenate non-parsed data...
  ELSE $this->data .= ' '.TRIM($data);               // and get rid of white space.
}
 
FUNCTION start_element($parser, $name, $attrs) {
  SWITCH($name) {
    CASE 'RSS':
      BREAK;
 
    CASE 'CHANNEL':
      BREAK;
 
    CASE 'IMAGE':
      ARRAY_PUSH($this->stack, $name);
      BREAK;
 
    CASE 'ITEM':
      ARRAY_PUSH($this->stack, $name);
      ARRAY_PUSH($this->stack, $this->num_items); // push item index.
      $this->item[$this->num_items] = ARRAY();
      $this->num_items++;
      BREAK;
 
    CASE 'TEXTINPUT':
      ARRAY_PUSH($this->stack, $name);
      BREAK;
 
    DEFAULT:
      ARRAY_PUSH($this->stack, $name);
      BREAK;
 
  }
}
 
FUNCTION end_element($parser, $name) {
  SWITCH ($name) {
    CASE 'RSS':
      BREAK;
 
    CASE 'CHANNEL':
      BREAK;
 
    CASE 'IMAGE':
      ARRAY_POP($this->stack);
      BREAK;
 
    CASE 'ITEM':
      ARRAY_POP($this->stack);
      ARRAY_POP($this->stack);
      BREAK;
 
    CASE 'TEXTINPUT':
      ARRAY_POP($this->stack);
      BREAK;
 
    DEFAULT: // child element.
      $element = (IMPLODE("']['",$this->stack));
      EVAL("\$this->channel['$element']=\$this->data;"); // this does all the hard work.
      ARRAY_POP($this->stack);
      $this->data = '';
      BREAK;
  }
}
 
 
FUNCTION parse() {
  IF (!($fp = @FOPEN($this->file, "r"))) {
    $this->error = "Could not open RSS source \"$this->file\".";
    RETURN FALSE;
  }
  WHILE ($data = FREAD($fp, 4096)) {
    IF (!XML_PARSE($this->xml_parser, $data, FEOF($fp))) {
      $this->error = SPRINTF("XML error: %s at line %d.",
        XML_ERROR_STRING(XML_GET_ERROR_CODE($this->xml_parser)),
        XML_GET_CURRENT_LINE_NUMBER($this->xml_parser));
      RETURN FALSE;
    }
  }
  XML_PARSER_FREE($this->xml_parser);
  RETURN TRUE;
}
 
 
 
} // class rss_parser.
 
// try it out...
$rss = NEW rss_parser();
$rss->file = 'http://freebsddiary.org/news.php3';
$rss->parse() or DIE($rss->error);
IF ($rss->error) PRINT $rss->error;
 
PRINT_R($rss->channel); // PHP 4 needed for this to work.
?>

Enjoyed this post? Share it!

 

Leave a comment

Your email address will not be published.