| 1 |
<?php |
|---|
| 2 |
|
|---|
| 3 |
/** |
|---|
| 4 |
* Project: MagpieRSS: a simple RSS integration tool |
|---|
| 5 |
* File: rss_parse.inc - parse an RSS or Atom feed |
|---|
| 6 |
* return as a simple object. |
|---|
| 7 |
* |
|---|
| 8 |
* Handles RSS 0.9x, RSS 2.0, RSS 1.0, Atom 0.3, and Atom 1.0 |
|---|
| 9 |
* |
|---|
| 10 |
* The lastest version of MagpieRSS can be obtained from: |
|---|
| 11 |
* http://magpierss.sourceforge.net |
|---|
| 12 |
* |
|---|
| 13 |
* For questions, help, comments, discussion, etc., please join the |
|---|
| 14 |
* Magpie mailing list: |
|---|
| 15 |
* magpierss-general@lists.sourceforge.net |
|---|
| 16 |
* |
|---|
| 17 |
* @author Kellan Elliott-McCrea <kellan@protest.net> |
|---|
| 18 |
* @version 0.8 |
|---|
| 19 |
* @license GPL |
|---|
| 20 |
* |
|---|
| 21 |
*/ |
|---|
| 22 |
|
|---|
| 23 |
define('RSS', 'RSS'); |
|---|
| 24 |
define('ATOM', 'Atom'); |
|---|
| 25 |
|
|---|
| 26 |
function _convert_entities ($string) { |
|---|
| 27 |
# Source: http://www.w3.org/TR/REC-html40/sgml/entities.html |
|---|
| 28 |
$html_entities = array( |
|---|
| 29 |
" ", "¡", "¢", "£", "¤", "¥", "¦", "§", "¨", "©", |
|---|
| 30 |
"ª", "«", "¬", "­", "®", "¯", "°", "±", "²", "³", |
|---|
| 31 |
"´", "µ", "¶", "·", "¸", "¹", "º", "»", "¼", "½", |
|---|
| 32 |
"¾", "¿", "À", "Á", "Â", "Ã", "Ä", "Å", "Æ", "Ç", |
|---|
| 33 |
"È", "É", "Ê", "Ë", "Ì", "Í", "Î", "Ï", "Ð", "Ñ", |
|---|
| 34 |
"Ò", "Ó", "Ô", "Õ", "Ö", "×", "Ø", "Ù", "Ú", "Û", |
|---|
| 35 |
"Ü", "Ý", "Þ", "ß", "à", "á", "â", "ã", "ä", "å", |
|---|
| 36 |
"æ", "ç", "è", "é", "ê", "ë", "ì", "í", "î", "ï", |
|---|
| 37 |
"ð", "ñ", "ò", "ó", "ô", "õ", "ö", "÷", "ø", "ù", |
|---|
| 38 |
"ú", "û", "ü", "ý", "þ", "ÿ",); |
|---|
| 39 |
$numeric_entities = array( |
|---|
| 40 |
" ", "¡", "¢", "£", "¤", "¥", "¦", "§", "¨", "©", |
|---|
| 41 |
"ª", "«", "¬", "­", "®", "¯", "°", "±", "²", "³", |
|---|
| 42 |
"´", "µ", "¶", "·", "¸", "¹", "º", "»", "¼", "½", |
|---|
| 43 |
"¾", "¿", "À", "Á", "Â", "Ã", "Ä", "Å", "Æ", "Ç", |
|---|
| 44 |
"È", "É", "Ê", "Ë", "Ì", "Í", "Î", "Ï", "Ð", "Ñ", |
|---|
| 45 |
"Ò", "Ó", "Ô", "Õ", "Ö", "×", "Ø", "Ù", "Ú", "Û", |
|---|
| 46 |
"Ü", "Ý", "Þ", "ß", "à", "á", "â", "ã", "ä", "å", |
|---|
| 47 |
"æ", "ç", "è", "é", "ê", "ë", "ì", "í", "î", "ï", |
|---|
| 48 |
"ð", "ñ", "ò", "ó", "ô", "õ", "ö", "÷", "ø", "ù", |
|---|
| 49 |
"ú", "û", "ü", "ý", "þ", "ÿ"); |
|---|
| 50 |
return str_replace($html_entities, $numeric_entities, $string); |
|---|
| 51 |
} |
|---|
| 52 |
|
|---|
| 53 |
|
|---|
| 54 |
|
|---|
| 55 |
require_once (MAGPIE_DIR . 'rss_utils.inc'); |
|---|
| 56 |
|
|---|
| 57 |
/** |
|---|
| 58 |
* Hybrid parser, and object, takes RSS as a string and returns a simple object. |
|---|
| 59 |
* |
|---|
| 60 |
* see: rss_fetch.inc for a simpler interface with integrated caching support |
|---|
| 61 |
* |
|---|
| 62 |
*/ |
|---|
| 63 |
class MagpieRSS { |
|---|
| 64 |
var $parser; |
|---|
| 65 |
|
|---|
| 66 |
var $current_item = array(); // item currently being parsed |
|---|
| 67 |
var $items = array(); // collection of parsed items |
|---|
| 68 |
var $channel = array(); // hash of channel fields |
|---|
| 69 |
var $textinput = array(); |
|---|
| 70 |
var $image = array(); |
|---|
| 71 |
var $feed_type; |
|---|
| 72 |
var $feed_version; |
|---|
| 73 |
var $encoding = ''; // output encoding of parsed rss |
|---|
| 74 |
|
|---|
| 75 |
var $_source_encoding = ''; // only set if we have to parse xml prolog |
|---|
| 76 |
|
|---|
| 77 |
var $ERROR = ""; |
|---|
| 78 |
var $WARNING = ""; |
|---|
| 79 |
|
|---|
| 80 |
// define some constants |
|---|
| 81 |
|
|---|
| 82 |
var $_ATOM_CONTENT_CONSTRUCTS = array( |
|---|
| 83 |
'content', 'summary', 'title', /* common */ |
|---|
| 84 |
'info', 'tagline', 'copyright', /* Atom 0.3 */ |
|---|
| 85 |
'rights', 'subtitle', /* Atom 1.0 */ |
|---|
| 86 |
); |
|---|
| 87 |
var $_XHTML_CONTENT_CONSTRUCTS = array('body', 'div'); |
|---|
| 88 |
var $_KNOWN_ENCODINGS = array('UTF-8', 'US-ASCII', 'ISO-8859-1'); |
|---|
| 89 |
|
|---|
| 90 |
// parser variables, useless if you're not a parser, treat as private |
|---|
| 91 |
var $stack = array(); // parser stack |
|---|
| 92 |
var $inchannel = false; |
|---|
| 93 |
var $initem = false; |
|---|
| 94 |
|
|---|
| 95 |
var $incontent = array(); // non-empty if in namespaced XML content field |
|---|
| 96 |
var $exclude_top = false; // true when Atom 1.0 type="xhtml" |
|---|
| 97 |
|
|---|
| 98 |
var $intextinput = false; |
|---|
| 99 |
var $inimage = false; |
|---|
| 100 |
var $current_namespace = false; |
|---|
| 101 |
|
|---|
| 102 |
/** |
|---|
| 103 |
* Set up XML parser, parse source, and return populated RSS object.. |
|---|
| 104 |
* |
|---|
| 105 |
* @param string $source string containing the RSS to be parsed |
|---|
| 106 |
* |
|---|
| 107 |
* NOTE: Probably a good idea to leave the encoding options alone unless |
|---|
| 108 |
* you know what you're doing as PHP's character set support is |
|---|
| 109 |
* a little weird. |
|---|
| 110 |
* |
|---|
| 111 |
* NOTE: A lot of this is unnecessary but harmless with PHP5 |
|---|
| 112 |
* |
|---|
| 113 |
* |
|---|
| 114 |
* @param string $output_encoding output the parsed RSS in this character |
|---|
| 115 |
* set defaults to ISO-8859-1 as this is PHP's |
|---|
| 116 |
* default. |
|---|
| 117 |
* |
|---|
| 118 |
* NOTE: might be changed to UTF-8 in future |
|---|
| 119 |
* versions. |
|---|
| 120 |
* |
|---|
| 121 |
* @param string $input_encoding the character set of the incoming RSS source. |
|---|
| 122 |
* Leave blank and Magpie will try to figure it |
|---|
| 123 |
* out. |
|---|
| 124 |
* |
|---|
| 125 |
* |
|---|
| 126 |
* @param bool $detect_encoding if false Magpie won't attempt to detect |
|---|
| 127 |
* source encoding. (caveat emptor) |
|---|
| 128 |
* |
|---|
| 129 |
*/ |
|---|
| 130 |
function MagpieRSS ($source, $output_encoding='ISO-8859-1', |
|---|
| 131 |
$input_encoding=null, $detect_encoding=true) |
|---|
| 132 |
{ |
|---|
| 133 |
# if PHP xml isn't compiled in, die |
|---|
| 134 |
# |
|---|
| 135 |
if (!function_exists('xml_parser_create')) { |
|---|
| 136 |
$this->error( "Failed to load PHP's XML Extension. " . |
|---|
| 137 |
"http://www.php.net/manual/en/ref.xml.php", |
|---|
| 138 |
E_USER_ERROR ); |
|---|
| 139 |
} |
|---|
| 140 |
|
|---|
| 141 |
list($parser, $source) = $this->create_parser($source, |
|---|
| 142 |
$output_encoding, $input_encoding, $detect_encoding); |
|---|
| 143 |
|
|---|
| 144 |
|
|---|
| 145 |
if (!is_resource($parser)) { |
|---|
| 146 |
$this->error( "Failed to create an instance of PHP's XML parser. " . |
|---|
| 147 |
"http://www.php.net/manual/en/ref.xml.php", |
|---|
| 148 |
E_USER_ERROR ); |
|---|
| 149 |
} |
|---|
| 150 |
|
|---|
| 151 |
|
|---|
| 152 |
$this->parser = $parser; |
|---|
| 153 |
|
|---|
| 154 |
# pass in parser, and a reference to this object |
|---|
| 155 |
# setup handlers |
|---|
| 156 |
# |
|---|
| 157 |
xml_set_object( $this->parser, $this ); |
|---|
| 158 |
xml_set_element_handler($this->parser, |
|---|
| 159 |
'feed_start_element', 'feed_end_element' ); |
|---|
| 160 |
|
|---|
| 161 |
xml_set_character_data_handler( $this->parser, 'feed_cdata' ); |
|---|
| 162 |
|
|---|
| 163 |
$status = xml_parse( $this->parser, $source ); |
|---|
| 164 |
|
|---|
| 165 |
# try to force convert everything to UTF-8 and parse again |
|---|
| 166 |
# to salvage at least some data from the feed |
|---|
| 167 |
if (! $status) { |
|---|
| 168 |
$errorcode = xml_get_error_code( $this->parser ); |
|---|
| 169 |
if ( $errorcode != XML_ERROR_NONE ) { |
|---|
| 170 |
|
|---|
| 171 |
xml_parser_free( $this->parser ); |
|---|
| 172 |
|
|---|
| 173 |
if (preg_match('/<\?xml.*?encoding="([^ ]+)".*?\?>/', |
|---|
| 174 |
$source, $matches)) { |
|---|
| 175 |
|
|---|
| 176 |
$enc = $matches[1]; |
|---|
| 177 |
} else { |
|---|
| 178 |
$enc = mb_detect_encoding($string); |
|---|
| 179 |
} |
|---|
| 180 |
|
|---|
| 181 |
# try fix XML, pass 1 |
|---|
| 182 |
|
|---|
| 183 |
$source = mb_convert_encoding($source, "UTF-8", $enc); |
|---|
| 184 |
|
|---|
| 185 |
list($parser, $source) = $this->create_parser($source, |
|---|
| 186 |
$output_encoding, $input_encoding, $detect_encoding); |
|---|
| 187 |
|
|---|
| 188 |
$this->parser = $parser; |
|---|
| 189 |
|
|---|
| 190 |
xml_set_object( $this->parser, $this ); |
|---|
| 191 |
xml_set_element_handler($this->parser, |
|---|
| 192 |
'feed_start_element', 'feed_end_element' ); |
|---|
| 193 |
|
|---|
| 194 |
xml_set_character_data_handler( $this->parser, 'feed_cdata' ); |
|---|
| 195 |
|
|---|
| 196 |
$status = xml_parse( $this->parser, $source); |
|---|
| 197 |
|
|---|
| 198 |
# try to fix XML, pass 2 |
|---|
| 199 |
|
|---|
| 200 |
if (! $status) { |
|---|
| 201 |
$errorcode = xml_get_error_code( $this->parser ); |
|---|
| 202 |
if ( $errorcode != XML_ERROR_NONE ) { |
|---|
| 203 |
|
|---|
| 204 |
$source = _convert_entities($source); |
|---|
| 205 |
|
|---|
| 206 |
list($parser, $source) = $this->create_parser($source, |
|---|
| 207 |
$output_encoding, $input_encoding, $detect_encoding); |
|---|
| 208 |
|
|---|
| 209 |
$this->parser = $parser; |
|---|
| 210 |
|
|---|
| 211 |
xml_set_object( $this->parser, $this ); |
|---|
| 212 |
xml_set_element_handler($this->parser, |
|---|
| 213 |
'feed_start_element', 'feed_end_element' ); |
|---|
| 214 |
|
|---|
| 215 |
xml_set_character_data_handler( $this->parser, 'feed_cdata' ); |
|---|
| 216 |
|
|---|
| 217 |
$status = xml_parse( $this->parser, $source); |
|---|
| 218 |
|
|---|
| 219 |
} |
|---|
| 220 |
} |
|---|
| 221 |
} |
|---|
| 222 |
} |
|---|
| 223 |
|
|---|
| 224 |
if (! $status ) { |
|---|
| 225 |
$errorcode = xml_get_error_code( $this->parser ); |
|---|
| 226 |
if ( $errorcode != XML_ERROR_NONE ) { |
|---|
| 227 |
$xml_error = xml_error_string( $errorcode ); |
|---|
| 228 |
$error_line = xml_get_current_line_number($this->parser); |
|---|
| 229 |
$error_col = xml_get_current_column_number($this->parser); |
|---|
| 230 |
$errormsg = "$xml_error at line $error_line, column $error_col"; |
|---|
| 231 |
|
|---|
| 232 |
$this->error( $errormsg ); |
|---|
| 233 |
} |
|---|
| 234 |
} |
|---|
| 235 |
|
|---|
| 236 |
xml_parser_free( $this->parser ); |
|---|
| 237 |
|
|---|
| 238 |
$this->normalize(); |
|---|
| 239 |
} |
|---|
| 240 |
|
|---|
| 241 |
function feed_start_element($p, $element, &$attrs) { |
|---|
| 242 |
$el = $element = strtolower($element); |
|---|
| 243 |
$attrs = array_change_key_case($attrs, CASE_LOWER); |
|---|
| 244 |
|
|---|
| 245 |
// check for a namespace, and split if found |
|---|
| 246 |
// Don't munge content tags |
|---|
| 247 |
if ( empty($this->incontent) ) { |
|---|
| 248 |
$ns = false; |
|---|
| 249 |
if ( strpos( $element, ':' ) ) { |
|---|
| 250 |
list($ns, $el) = split( ':', $element, 2); |
|---|
| 251 |
} |
|---|
| 252 |
if ( $ns and $ns != 'rdf' ) { |
|---|
| 253 |
$this->current_namespace = $ns; |
|---|
| 254 |
} |
|---|
| 255 |
} |
|---|
| 256 |
|
|---|
| 257 |
# if feed type isn't set, then this is first element of feed |
|---|
| 258 |
# identify feed from root element |
|---|
| 259 |
# |
|---|
| 260 |
if (!isset($this->feed_type) ) { |
|---|
| 261 |
if ( $el == 'rdf' ) { |
|---|
| 262 |
$this->feed_type = RSS; |
|---|
| 263 |
$this->feed_version = '1.0'; |
|---|
| 264 |
} |
|---|
| 265 |
elseif ( $el == 'rss' ) { |
|---|
| 266 |
$this->feed_type = RSS; |
|---|
| 267 |
$this->feed_version = $attrs['version']; |
|---|
| 268 |
} |
|---|
| 269 |
elseif ( $el == 'feed' ) { |
|---|
| 270 |
$this->feed_type = ATOM; |
|---|
| 271 |
if ($attrs['xmlns'] == 'http://www.w3.org/2005/Atom') { // Atom 1.0 |
|---|
| 272 |
$this->feed_version = '1.0'; |
|---|
| 273 |
} |
|---|
| 274 |
else { // Atom 0.3, probably. |
|---|
| 275 |
$this->feed_version = $attrs['version']; |
|---|
| 276 |
} |
|---|
| 277 |
$this->inchannel = true; |
|---|
| 278 |
} |
|---|
| 279 |
return; |
|---|
| 280 |
} |
|---|
| 281 |
|
|---|
| 282 |
// if we're inside a namespaced content construct, treat tags as text |
|---|
| 283 |
if ( !empty($this->incontent) ) |
|---|
| 284 |
{ |
|---|
| 285 |
if ((count($this->incontent) > 1) or !$this->exclude_top) { |
|---|
| 286 |
// if tags are inlined, then flatten |
|---|
| 287 |
$attrs_str = join(' ', |
|---|
| 288 |
array_map('map_attrs', |
|---|
| 289 |
array_keys($attrs), |
|---|
| 290 |
array_values($attrs) ) |
|---|
| 291 |
); |
|---|
| 292 |
|
|---|
| 293 |
if (strlen($attrs_str) > 0) { $attrs_str = ' '.$attrs_str; } |
|---|
| 294 |
|
|---|
| 295 |
$this->append_content( "<{$element}{$attrs_str}>" ); |
|---|
| 296 |
} |
|---|
| 297 |
array_push($this->incontent, $el); // stack for parsing content XML |
|---|
| 298 |
} |
|---|
| 299 |
|
|---|
| 300 |
elseif ( $el == 'channel' ) { |
|---|
| 301 |
$this->inchannel = true; |
|---|
| 302 |
} |
|---|
| 303 |
|
|---|
| 304 |
elseif ($el == 'item' or $el == 'entry' ) |
|---|
| 305 |
{ |
|---|
| 306 |
$this->initem = true; |
|---|
| 307 |
if ( isset($attrs['rdf:about']) ) { |
|---|
| 308 |
$this->current_item['about'] = $attrs['rdf:about']; |
|---|
| 309 |
} |
|---|
| 310 |
} |
|---|
| 311 |
|
|---|
| 312 |
// if we're in the default namespace of an RSS feed, |
|---|
| 313 |
// record textinput or image fields |
|---|
| 314 |
elseif ( |
|---|
| 315 |
$this->feed_type == RSS and |
|---|
| 316 |
$this->current_namespace == '' and |
|---|
| 317 |
$el == 'textinput' ) |
|---|
| 318 |
{ |
|---|
| 319 |
$this->intextinput = true; |
|---|
| 320 |
} |
|---|
| 321 |
|
|---|
| 322 |
elseif ( |
|---|
| 323 |
$this->feed_type == RSS and |
|---|
| 324 |
$this->current_namespace == '' and |
|---|
| 325 |
$el == 'image' ) |
|---|
| 326 |
{ |
|---|
| 327 |
$this->inimage = true; |
|---|
| 328 |
} |
|---|
| 329 |
|
|---|
| 330 |
// set stack[0] to current element |
|---|
| 331 |
else { |
|---|
| 332 |
// Atom support many links per containing element. |
|---|
| 333 |
// Magpie treats link elements of type rel='alternate' |
|---|
| 334 |
// as being equivalent to RSS's simple link element. |
|---|
| 335 |
|
|---|
| 336 |
$atom_link = false; |
|---|
| 337 |
if ($this->feed_type == ATOM and $el == 'link') { |
|---|
| 338 |
$atom_link = true; |
|---|
| 339 |
if (isset($attrs['rel']) and $attrs['rel'] != 'alternate') { |
|---|
| 340 |
$el = $el . "_" . $attrs['rel']; // pseudo-element names for Atom link elements |
|---|
| 341 |
} |
|---|
| 342 |
} |
|---|
| 343 |
# handle atom content constructs |
|---|
| 344 |
elseif ( $this->feed_type == ATOM and in_array($el, $this->_ATOM_CONTENT_CONSTRUCTS) ) |
|---|
| 345 |
{ |
|---|
| 346 |
// avoid clashing w/ RSS mod_content |
|---|
| 347 |
if ($el == 'content' ) { |
|---|
| 348 |
$el = 'atom_content'; |
|---|
| 349 |
} |
|---|
| 350 |
|
|---|
| 351 |
// assume that everything accepts namespaced XML |
|---|
| 352 |
// (that will pass through some non-validating feeds; |
|---|
| 353 |
// but so what? this isn't a validating parser) |
|---|
| 354 |
$this->incontent = array(); |
|---|
| 355 |
array_push($this->incontent, $el); // start a stack |
|---|
| 356 |
|
|---|
| 357 |
if ( isset($attrs['type']) and trim(strtolower($attrs['type']))=='xhtml') { |
|---|
| 358 |
$this->exclude_top = true; |
|---|
| 359 |
} else { |
|---|
| 360 |
$this->exclude_top = false; |
|---|
| 361 |
} |
|---|
| 362 |
} |
|---|
| 363 |
# Handle inline XHTML body elements --CWJ |
|---|
| 364 |
elseif (($this->current_namespace=='xhtml' or |
|---|
| 365 |
(isset($attrs['xmlns']) and $attrs['xmlns'] == 'http://www.w3.org/1999/xhtml')) |
|---|
| 366 |
and in_array($el, $this->_XHTML_CONTENT_CONSTRUCTS) ) |
|---|
| 367 |
{ |
|---|
| 368 |
$this->current_namespace = 'xhtml'; |
|---|
| 369 |
$this->incontent = array(); |
|---|
| 370 |
array_push($this->incontent, $el); // start a stack |
|---|
| 371 |
$this->exclude_top = false; |
|---|
| 372 |
} |
|---|
| 373 |
|
|---|
| 374 |
array_unshift($this->stack, $el); |
|---|
| 375 |
$elpath = join('_', array_reverse($this->stack)); |
|---|
| 376 |
|
|---|
| 377 |
$n = $this->element_count($elpath); |
|---|
| 378 |
$this->element_count($elpath, $n+1); |
|---|
| 379 |
|
|---|
| 380 |
if ($n > 0) { |
|---|
| 381 |
array_shift($this->stack); |
|---|
| 382 |
array_unshift($this->stack, $el.'#'.($n+1)); |
|---|
| 383 |
$elpath = join('_', array_reverse($this->stack)); |
|---|
| 384 |
} |
|---|
| 385 |
|
|---|
| 386 |
// this makes the baby Jesus cry, but we can't do it in normalize() |
|---|
| 387 |
// because we've made the element name for Atom links unpredictable |
|---|
| 388 |
// by tacking on the relation to the end. -CWJ |
|---|
| 389 |
if ($atom_link and isset($attrs['href'])) { |
|---|
| 390 |
$this->append($elpath, $attrs['href']); |
|---|
| 391 |
} |
|---|
| 392 |
|
|---|
| 393 |
// add attributes |
|---|
| 394 |
if (count($attrs) > 0) { |
|---|
| 395 |
$this->append($elpath.'@', join(',', array_keys($attrs))); |
|---|
| 396 |
foreach ($attrs as $attr => $value) { |
|---|
| 397 |
$this->append($elpath.'@'.$attr, $value); |
|---|
| 398 |
} |
|---|
| 399 |
} |
|---|
| 400 |
} |
|---|
| 401 |
} |
|---|
| 402 |
|
|---|
| 403 |
|
|---|
| 404 |
|
|---|
| 405 |
function feed_cdata ($p, $text) { |
|---|
| 406 |
|
|---|
| 407 |
if ($this->incontent) { |
|---|
| 408 |
$this->append_content( $text ); |
|---|
| 409 |
} |
|---|
| 410 |
else { |
|---|
| 411 |
$current_el = join('_', array_reverse($this->stack)); |
|---|
| 412 |
$this->append($current_el, $text); |
|---|
| 413 |
} |
|---|
| 414 |
} |
|---|
| 415 |
|
|---|
| 416 |
function feed_end_element ($p, $el) { |
|---|
| 417 |
$el = strtolower($el); |
|---|
| 418 |
|
|---|
| 419 |
if ( $this->incontent ) { |
|---|
| 420 |
$opener = array_pop($this->incontent); |
|---|
| 421 |
|
|---|
| 422 |
// Don't get bamboozled by namespace voodoo |
|---|
| 423 |
if (strpos($el, ':')) { list($ns, $closer) = split(':', $el); } |
|---|
| 424 |
else { $ns = false; $closer = $el; } |
|---|
| 425 |
|
|---|
| 426 |
// Don't get bamboozled by our munging of <atom:content>, either |
|---|
| 427 |
if ($this->feed_type == ATOM and $closer == 'content') { |
|---|
| 428 |
$closer = 'atom_content'; |
|---|
| 429 |
} |
|---|
| 430 |
|
|---|
| 431 |
// balance tags properly |
|---|
| 432 |
// note: i don't think this is actually neccessary |
|---|
| 433 |
if ($opener != $closer) { |
|---|
| 434 |
array_push($this->incontent, $opener); |
|---|
| 435 |
$this->append_content("<$el />"); |
|---|
| 436 |
} elseif ($this->incontent) { // are we in the content construct still? |
|---|
| 437 |
if ((count($this->incontent) > 1) or !$this->exclude_top) { |
|---|
| 438 |
$this->append_content("</$el>"); |
|---|
| 439 |
} |
|---|
| 440 |
} else { // shift the opening of the content construct off the normal stack |
|---|
| 441 |
array_shift( $this->stack ); |
|---|
| 442 |
} |
|---|
| 443 |
} |
|---|
| 444 |
elseif ( $el == 'item' or $el == 'entry' ) |
|---|
| 445 |
{ |
|---|
| 446 |
$this->items[] = $this->current_item; |
|---|
| 447 |
$this->current_item = array(); |
|---|
| 448 |
$this->initem = false; |
|---|
| 449 |
|
|---|
| 450 |
$this->current_category = 0; |
|---|
| 451 |
} |
|---|
| 452 |
elseif ($this->feed_type == RSS and $this->current_namespace == '' and $el == 'textinput' ) |
|---|
| 453 |
{ |
|---|
| 454 |
$this->intextinput = false; |
|---|
| 455 |
} |
|---|
| 456 |
elseif ($this->feed_type == RSS and $this->current_namespace == '' and $el == 'image' ) |
|---|
| 457 |
{ |
|---|
| 458 |
$this->inimage = false; |
|---|
| 459 |
} |
|---|
| 460 |
elseif ($el == 'channel' or $el == 'feed' ) |
|---|
| 461 |
{ |
|---|
| 462 |
$this->inchannel = false; |
|---|
| 463 |
} |
|---|
| 464 |
else { |
|---|
| 465 |
array_shift( $this->stack ); |
|---|
| 466 |
} |
|---|
| 467 |
|
|---|
| 468 |
if ( !$this->incontent ) { // Don't munge the namespace after finishing with elements in namespaced content constructs -CWJ |
|---|
| 469 |
$this->current_namespace = false; |
|---|
| 470 |
} |
|---|
| 471 |
} |
|---|
| 472 |
|
|---|
| 473 |
function concat (&$str1, $str2="") { |
|---|
| 474 |
if (!isset($str1) ) { |
|---|
| 475 |
$str1=""; |
|---|
| 476 |
} |
|---|
| 477 |
$str1 .= $str2; |
|---|
| 478 |
} |
|---|
| 479 |
|
|---|
| 480 |
function append_content($text) { |
|---|
| 481 |
if ( $this->initem ) { |
|---|
| 482 |
if ($this->current_namespace) { |
|---|
| 483 |
$this->concat( $this->current_item[$this->current_namespace][ reset($this->incontent) ], $text ); |
|---|
| 484 |
} else { |
|---|
| 485 |
$this->concat( $this->current_item[ reset($this->incontent) ], $text ); |
|---|
| 486 |
} |
|---|
| 487 |
} |
|---|
| 488 |
elseif ( $this->inchannel ) { |
|---|
| 489 |
if ($this->current_namespace) { |
|---|
| 490 |
$this->concat( $this->channel[$this->current_namespace][ reset($this->incontent) ], $text ); |
|---|
| 491 |
} else { |
|---|
| 492 |
$this->concat( $this->channel[ reset($this->incontent) ], $text ); |
|---|
| 493 |
} |
|---|
| 494 |
} |
|---|
| 495 |
} |
|---|
| 496 |
|
|---|
| 497 |
// smart append - field and namespace aware |
|---|
| 498 |
function append($el, $text) { |
|---|
| 499 |
if (!$el) { |
|---|
| 500 |
return; |
|---|
| 501 |
} |
|---|
| 502 |
if ( $this->current_namespace ) |
|---|
| 503 |
{ |
|---|
| 504 |
if ( $this->initem ) { |
|---|
| 505 |
$this->concat( |
|---|
| 506 |
$this->current_item[ $this->current_namespace ][ $el ], $text); |
|---|
| 507 |
} |
|---|
| 508 |
elseif ($this->inchannel) { |
|---|
| 509 |
$this->concat( |
|---|
| 510 |
$this->channel[ $this->current_namespace][ $el ], $text ); |
|---|
| 511 |
} |
|---|
| 512 |
elseif ($this->intextinput) { |
|---|
| 513 |
$this->concat( |
|---|
| 514 |
$this->textinput[ $this->current_namespace][ $el ], $text ); |
|---|
| 515 |
} |
|---|
| 516 |
elseif ($this->inimage) { |
|---|
| 517 |
$this->concat( |
|---|
| 518 |
$this->image[ $this->current_namespace ][ $el ], $text ); |
|---|
| 519 |
} |
|---|
| 520 |
} |
|---|
| 521 |
else { |
|---|
| 522 |
if ( $this->initem ) { |
|---|
| 523 |
$this->concat( |
|---|
| 524 |
$this->current_item[ $el ], $text); |
|---|
| 525 |
} |
|---|
| 526 |
elseif ($this->intextinput) { |
|---|
| 527 |
$this->concat( |
|---|
| 528 |
$this->textinput[ $el ], $text ); |
|---|
| 529 |
} |
|---|
| 530 |
elseif ($this->inimage) { |
|---|
| 531 |
$this->concat( |
|---|
| 532 |
$this->image[ $el ], $text ); |
|---|
| 533 |
} |
|---|
| 534 |
elseif ($this->inchannel) { |
|---|
| 535 |
$this->concat( |
|---|
| 536 |
$this->channel[ $el ], $text ); |
|---|
| 537 |
} |
|---|
| 538 |
|
|---|
| 539 |
} |
|---|
| 540 |
} |
|---|
| 541 |
|
|---|
| 542 |
// smart count - field and namespace aware |
|---|
| 543 |
function element_count ($el, $set = NULL) { |
|---|
| 544 |
if (!$el) { |
|---|
| 545 |
return; |
|---|
| 546 |
} |
|---|
| 547 |
if ( $this->current_namespace ) |
|---|
| 548 |
{ |
|---|
| 549 |
if ( $this->initem ) { |
|---|
| 550 |
if (!is_null($set)) { $this->current_item[ $this->current_namespace ][ $el.'#' ] = $set; } |
|---|
| 551 |
$ret = (isset($this->current_item[ $this->current_namespace ][ $el.'#' ]) ? |
|---|
| 552 |
$this->current_item[ $this->current_namespace ][ $el.'#' ] : 0); |
|---|
| 553 |
} |
|---|
| 554 |
elseif ($this->inchannel) { |
|---|
| 555 |
if (!is_null($set)) { $this->channel[ $this->current_namespace ][ $el.'#' ] = $set; } |
|---|
| 556 |
$ret = (isset($this->channel[ $this->current_namespace][ $el.'#' ]) ? |
|---|
| 557 |
$this->channel[ $this->current_namespace][ $el.'#' ] : 0); |
|---|
| 558 |
} |
|---|
| 559 |
} |
|---|
| 560 |
else { |
|---|
| 561 |
if ( $this->initem ) { |
|---|
| 562 |
if (!is_null($set)) { $this->current_item[ $el.'#' ] = $set; } |
|---|
| 563 |
$ret = (isset($this->current_item[ $el.'#' ]) ? |
|---|
| 564 |
$this->current_item[ $el.'#' ] : 0); |
|---|
| 565 |
} |
|---|
| 566 |
elseif ($this->inchannel) { |
|---|
| 567 |
if (!is_null($set)) {$this->channel[ $el.'#' ] = $set; } |
|---|
| 568 |
$ret = (isset($this->channel[ $el.'#' ]) ? |
|---|
| 569 |
$this->channel[ $el.'#' ] : 0); |
|---|
| 570 |
} |
|---|
| 571 |
} |
|---|
| 572 |
return $ret; |
|---|
| 573 |
} |
|---|
| 574 |
|
|---|
| 575 |
function normalize_enclosure (&$source, $from, &$dest, $to, $i) { |
|---|
| 576 |
$id_from = $this->element_id($from, $i); |
|---|
| 577 |
$id_to = $this->element_id($to, $i); |
|---|
| 578 |
if (isset($source["{$id_from}@"])) { |
|---|
| 579 |
foreach (explode(',', $source["{$id_from}@"]) as $attr) { |
|---|
| 580 |
if ($from=='link_enclosure' and $attr=='href') { // from Atom |
|---|
| 581 |
$dest["{$id_to}@url"] = $source["{$id_from}@{$attr}"]; |
|---|
| 582 |
$dest["{$id_to}"] = $source["{$id_from}@{$attr}"]; |
|---|
| 583 |
} |
|---|
| 584 |
elseif ($from=='enclosure' and $attr=='url') { // from RSS |
|---|
| 585 |
$dest["{$id_to}@href"] = $source["{$id_from}@{$attr}"]; |
|---|
| 586 |
$dest["{$id_to}"] = $source["{$id_from}@{$attr}"]; |
|---|
| 587 |
} |
|---|
| 588 |
else { |
|---|
| 589 |
$dest["{$id_to}@{$attr}"] = $source["{$id_from}@{$attr}"]; |
|---|
| 590 |
} |
|---|
| 591 |
} |
|---|
| 592 |
} |
|---|
| 593 |
} |
|---|
| 594 |
|
|---|
| 595 |
function normalize_atom_person (&$source, $person, &$dest, $to, $i) { |
|---|
| 596 |
$id = $this->element_id($person, $i); |
|---|
| 597 |
$id_to = $this->element_id($to, $i); |
|---|
| 598 |
|
|---|
| 599 |
// Atom 0.3 <=> Atom 1.0 |
|---|
| 600 |
if ($this->feed_version >= 1.0) { $used = 'uri'; $norm = 'url'; } |
|---|
| 601 |
else { $used = 'url'; $norm = 'uri'; } |
|---|
| 602 |
|
|---|
| 603 |
if (isset($source["{$id}_{$used}"])) { |
|---|
| 604 |
$dest["{$id_to}_{$norm}"] = $source["{$id}_{$used}"]; |
|---|
| 605 |
} |
|---|
| 606 |
|
|---|
| 607 |
// Atom to RSS 2.0 and Dublin Core |
|---|
| 608 |
// RSS 2.0 person strings should be valid e-mail addresses if possible. |
|---|
| 609 |
if (isset($source["{$id}_email"])) { |
|---|
| 610 |
$rss_author = $source["{$id}_email"]; |
|---|
| 611 |
} |
|---|
| 612 |
if (isset($source["{$id}_name"])) { |
|---|
| 613 |
$rss_author = $source["{$id}_name"] |
|---|
| 614 |
. (isset($rss_author) ? " <$rss_author>" : ''); |
|---|
| 615 |
} |
|---|
| 616 |
if (isset($rss_author)) { |
|---|
| 617 |
$source[$id] = $rss_author; // goes to top-level author or contributor |
|---|
| 618 |
$dest[$id_to] = $rss_author; // goes to dc:creator or dc:contributor |
|---|
| 619 |
} |
|---|
| 620 |
} |
|---|
| 621 |
|
|---|
| 622 |
// Normalize Atom 1.0 and RSS 2.0 categories to Dublin Core... |
|---|
| 623 |
function normalize_category (&$source, $from, &$dest, $to, $i) { |
|---|
| 624 |
$cat_id = $this->element_id($from, $i); |
|---|
| 625 |
$dc_id = $this->element_id($to, $i); |
|---|
| 626 |
|
|---|
| 627 |
// first normalize category elements: Atom 1.0 <=> RSS 2.0 |
|---|
| 628 |
if ( isset($source["{$cat_id}@term"]) ) { // category identifier |
|---|
| 629 |
$source[$cat_id] = $source["{$cat_id}@term"]; |
|---|
| 630 |
} elseif ( $this->feed_type == RSS ) { |
|---|
| 631 |
$source["{$cat_id}@term"] = $source[$cat_id]; |
|---|
| 632 |
} |
|---|
| 633 |
|
|---|
| 634 |
if ( isset($source["{$cat_id}@scheme"]) ) { // URI to taxonomy |
|---|
| 635 |
$source["{$cat_id}@domain"] = $source["{$cat_id}@scheme"]; |
|---|
| 636 |
} elseif ( isset($source["{$cat_id}@domain"]) ) { |
|---|
| 637 |
$source["{$cat_id}@scheme"] = $source["{$cat_id}@domain"]; |
|---|
| 638 |
} |
|---|
| 639 |
|
|---|
| 640 |
// Now put the identifier into dc:subject |
|---|
| 641 |
$dest[$dc_id] = $source[$cat_id]; |
|---|
| 642 |
} |
|---|
| 643 |
|
|---|
| 644 |
// ... or vice versa |
|---|
| 645 |
function normalize_dc_subject (&$source, $from, &$dest, $to, $i) { |
|---|
| 646 |
$dc_id = $this->element_id($from, $i); |
|---|
| 647 |
$cat_id = $this->element_id($to, $i); |
|---|
| 648 |
|
|---|
| 649 |
$dest[$cat_id] = $source[$dc_id]; // RSS 2.0 |
|---|
| 650 |
$dest["{$cat_id}@term"] = $source[$dc_id]; // Atom 1.0 |
|---|
| 651 |
} |
|---|
| 652 |
|
|---|
| 653 |
// simplify the logic for normalize(). Makes sure that count of elements and |
|---|
| 654 |
// each of multiple elements is normalized properly. If you need to mess |
|---|
| 655 |
// with things like attributes or change formats or the like, pass it a |
|---|
| 656 |
// callback to handle each element. |
|---|
| 657 |
function normalize_element (&$source, $from, &$dest, $to, $via = NULL) { |
|---|
| 658 |
if (isset($source[$from]) or isset($source["{$from}#"])) { |
|---|
| 659 |
if (isset($source["{$from}#"])) { |
|---|
| 660 |
$n = $source["{$from}#"]; |
|---|
| 661 |
$dest["{$to}#"] = $source["{$from}#"]; |
|---|
| 662 |
} |
|---|
| 663 |
else { $n = 1; } |
|---|
| 664 |
|
|---|
| 665 |
for ($i = 1; $i <= $n; $i++) { |
|---|
| 666 |
if (isset($via)) { // custom callback for ninja attacks |
|---|
| 667 |
$this->{$via}($source, $from, $dest, $to, $i); |
|---|
| 668 |
} |
|---|
| 669 |
else { // just make it the same |
|---|
| 670 |
$from_id = $this->element_id($from, $i); |
|---|
| 671 |
$to_id = $this->element_id($to, $i); |
|---|
| 672 |
$dest[$to_id] = $source[$from_id]; |
|---|
| 673 |
} |
|---|
| 674 |
} |
|---|
| 675 |
} |
|---|
| 676 |
} |
|---|
| 677 |
|
|---|
| 678 |
function normalize () { |
|---|
| 679 |
// if atom populate rss fields and normalize 0.3 and 1.0 feeds |
|---|
| 680 |
if ( $this->is_atom() ) { |
|---|
| 681 |
// Atom 1.0 elements <=> Atom 0.3 elements (Thanks, o brilliant wordsmiths of the Atom 1.0 standard!) |
|---|
| 682 |
if ($this->feed_version < 1.0) { |
|---|
| 683 |
$this->normalize_element($this->channel, 'tagline', $this->channel, 'subtitle'); |
|---|
| 684 |
$this->normalize_element($this->channel, 'copyright', $this->channel, 'rights'); |
|---|
| 685 |
$this->normalize_element($this->channel, 'modified', $this->channel, 'updated'); |
|---|
| 686 |
} else { |
|---|
| 687 |
$this->normalize_element($this->channel, 'subtitle', $this->channel, 'tagline'); |
|---|
| 688 |
$this->normalize_element($this->channel, 'rights', $this->channel, 'copyright'); |
|---|
| 689 |
$this->normalize_element($this->channel, 'updated', $this->channel, 'modified'); |
|---|
| 690 |
} |
|---|
| 691 |
$this->normalize_element($this->channel, 'author', $this->channel['dc'], 'creator', 'normalize_atom_person'); |
|---|
| 692 |
$this->normalize_element($this->channel, 'contributor', $this->channel['dc'], 'contributor', 'normalize_atom_person'); |
|---|
| 693 |
|
|---|
| 694 |
// Atom elements to RSS elements |
|---|
| 695 |
$this->normalize_element($this->channel, 'subtitle', $this->channel, 'description'); |
|---|
| 696 |
|
|---|
| 697 |
if ( isset($this->channel['logo']) ) { |
|---|
| 698 |
$this->normalize_element($this->channel, 'logo', $this->image, 'url'); |
|---|
| 699 |
$this->normalize_element($this->channel, 'link', $this->image, 'link'); |
|---|
| 700 |
$this->normalize_element($this->channel, 'title', $this->image, 'title'); |
|---|
| 701 |
} |
|---|
| 702 |
|
|---|
| 703 |
for ( $i = 0; $i < count($this->items); $i++) { |
|---|
| 704 |
$item = $this->items[$i]; |
|---|
| 705 |
|
|---|
| 706 |
// Atom 1.0 elements <=> Atom 0.3 elements |
|---|
| 707 |
if ($this->feed_version < 1.0) { |
|---|
| 708 |
$this->normalize_element($item, 'modified', $item, 'updated'); |
|---|
| 709 |
$this->normalize_element($item, 'issued', $item, 'published'); |
|---|
| 710 |
} else { |
|---|
| 711 |
$this->normalize_element($item, 'updated', $item, 'modified'); |
|---|
| 712 |
$this->normalize_element($item, 'published', $item, 'issued'); |
|---|
| 713 |
} |
|---|
| 714 |
|
|---|
| 715 |
// "If an atom:entry element does not contain |
|---|
| 716 |
// atom:author elements, then the atom:author elements |
|---|
| 717 |
// of the contained atom:source element are considered |
|---|
| 718 |
// to apply. In an Atom Feed Document, the atom:author |
|---|
| 719 |
// elements of the containing atom:feed element are |
|---|
| 720 |
// considered to apply to the entry if there are no |
|---|
| 721 |
// atom:author elements in the locations described |
|---|
| 722 |
// above." <http://atompub.org/2005/08/17/draft-ietf-atompub-format-11.html#rfc.section.4.2.1> |
|---|
| 723 |
if (!isset($item["author#"])) { |
|---|
| 724 |
if (isset($item["source_author#"])) { // from aggregation source |
|---|
| 725 |
$source = $item; |
|---|
| 726 |
$author = "source_author"; |
|---|
| 727 |
} elseif (isset($this->channel["author#"])) { // from containing feed |
|---|
| 728 |
$source = $this->channel; |
|---|
| 729 |
$author = "author"; |
|---|
| 730 |
} |
|---|
| 731 |
|
|---|
| 732 |
$item["author#"] = $source["{$author}#"]; |
|---|
| 733 |
for ($au = 1; $au <= $item["author#"]; $au++) { |
|---|
| 734 |
$id_to = $this->element_id('author', $au); |
|---|
| 735 |
$id_from = $this->element_id($author, $au); |
|---|
| 736 |
|
|---|
| 737 |
$item[$id_to] = $source[$id_from]; |
|---|
| 738 |
foreach (array('name', 'email', 'uri', 'url') as $what) { |
|---|
| 739 |
if (isset($source["{$id_from}_{$what}"])) { |
|---|
| 740 |
$item["{$id_to}_{$what}"] = $source["{$id_from}_{$what}"]; |
|---|
| 741 |
} |
|---|
| 742 |
} |
|---|
| 743 |
} |
|---|
| 744 |
} |
|---|
| 745 |
|
|---|
| 746 |
// Atom elements to RSS elements |
|---|
| 747 |
$this->normalize_element($item, 'author', $item['dc'], 'creator', 'normalize_atom_person'); |
|---|
| 748 |
$this->normalize_element($item, 'contributor', $item['dc'], 'contributor', 'normalize_atom_person'); |
|---|
| 749 |
$this->normalize_element($item, 'summary', $item, 'description'); |
|---|
| 750 |
$this->normalize_element($item, 'atom_content', $item['content'], 'encoded'); |
|---|
| 751 |
$this->normalize_element($item, 'link_enclosure', $item, 'enclosure', 'normalize_enclosure'); |
|---|
| 752 |
|
|---|
| 753 |
// Categories |
|---|
| 754 |
if ( isset($item['category#']) ) { // Atom 1.0 categories to dc:subject and RSS 2.0 categories |
|---|
| 755 |
$this->normalize_element($item, 'category', $item['dc'], 'subject', 'normalize_category'); |
|---|
| 756 |
} |
|---|
| 757 |
elseif ( isset($item['dc']['subject#']) ) { // dc:subject to Atom 1.0 and RSS 2.0 categories |
|---|
| 758 |
$this->normalize_element($item['dc'], 'subject', $item, 'category', 'normalize_dc_subject'); |
|---|
| 759 |
} |
|---|
| 760 |
|
|---|
| 761 |
// Normalized item timestamp |
|---|
| 762 |
$atom_date = (isset($item['published']) ) ? $item['published'] : $item['updated']; |
|---|
| 763 |
if ( $atom_date ) { |
|---|
| 764 |
$epoch = @parse_w3cdtf($atom_date); |
|---|
| 765 |
if ($epoch and $epoch > 0) { |
|---|
| 766 |
$item['date_timestamp'] = $epoch; |
|---|
| 767 |
} |
|---|
| 768 |
} |
|---|
| 769 |
|
|---|
| 770 |
$this->items[$i] = $item; |
|---|
| 771 |
} |
|---|
| 772 |
} |
|---|
| 773 |
elseif ( $this->is_rss() ) { |
|---|
| 774 |
// RSS elements to Atom elements |
|---|
| 775 |
$this->normalize_element($this->channel, 'description', $this->channel, 'tagline'); // Atom 0.3 |
|---|
| 776 |
$this->normalize_element($this->channel, 'description', $this->channel, 'subtitle'); // Atom 1.0 (yay wordsmithing!) |
|---|
| 777 |
$this->normalize_element($this->image, 'url', $this->channel, 'logo'); |
|---|
| 778 |
|
|---|
| 779 |
for ( $i = 0; $i < count($this->items); $i++) { |
|---|
| 780 |
$item = $this->items[$i]; |
|---|
| 781 |
|
|---|
| 782 |
// RSS elements to Atom elements |
|---|
| 783 |
$this->normalize_element($item, 'description', $item, 'summary'); |
|---|
| 784 |
$this->normalize_element($item['content'], 'encoded', $item, 'atom_content'); |
|---|
| 785 |
$this->normalize_element($item, 'enclosure', $item, 'link_enclosure', 'normalize_enclosure'); |
|---|
| 786 |
|
|---|
| 787 |
// Categories |
|---|
| 788 |
if ( isset($item['category#']) ) { // RSS 2.0 categories to dc:subject and Atom 1.0 categories |
|---|
| 789 |
$this->normalize_element($item, 'category', $item['dc'], 'subject', 'normalize_category'); |
|---|
| 790 |
} |
|---|
| 791 |
elseif ( isset($item['dc']['subject#']) ) { // dc:subject to Atom 1.0 and RSS 2.0 categories |
|---|
| 792 |
$this->normalize_element($item['dc'], 'subject', $item, 'category', 'normalize_dc_subject'); |
|---|
| 793 |
} |
|---|
| 794 |
|
|---|
| 795 |
// Normalized item timestamp |
|---|
| 796 |
if ( $this->is_rss() == '1.0' and isset($item['dc']['date']) ) { |
|---|
| 797 |
$epoch = @parse_w3cdtf($item['dc']['date']); |
|---|
| 798 |
if ($epoch and $epoch > 0) { |
|---|
| 799 |
$item['date_timestamp'] = $epoch; |
|---|
| 800 |
} |
|---|
| 801 |
} |
|---|
| 802 |
elseif ( isset($item['pubdate']) ) { |
|---|
| 803 |
$epoch = @strtotime($item['pubdate']); |
|---|
| 804 |
if ($epoch > 0) { |
|---|
| 805 |
$item['date_timestamp'] = $epoch; |
|---|
| 806 |
} |
|---|
| 807 |
} |
|---|
| 808 |
|
|---|
| 809 |
$this->items[$i] = $item; |
|---|
| 810 |
} |
|---|
| 811 |
} |
|---|
| 812 |
} |
|---|
| 813 |
|
|---|
| 814 |
|
|---|
| 815 |
function is_rss () { |
|---|
| 816 |
if ( $this->feed_type == RSS ) { |
|---|
| 817 |
return $this->feed_version; |
|---|
| 818 |
} |
|---|
| 819 |
else { |
|---|
| 820 |
return false; |
|---|
| 821 |
} |
|---|
| 822 |
} |
|---|
| 823 |
|
|---|
| 824 |
function is_atom() { |
|---|
| 825 |
if ( $this->feed_type == ATOM ) { |
|---|
| 826 |
return $this->feed_version; |
|---|
| 827 |
} |
|---|
| 828 |
else { |
|---|
| 829 |
return false; |
|---|
| 830 |
} |
|---|
| 831 |
} |
|---|
| 832 |
|
|---|
| 833 |
/** |
|---|
| 834 |
* return XML parser, and possibly re-encoded source |
|---|
| 835 |
* |
|---|
| 836 |
*/ |
|---|
| 837 |
function create_parser($source, $out_enc, $in_enc, $detect) { |
|---|
| 838 |
if ( substr(phpversion(),0,1) == 5) { |
|---|
| 839 |
$parser = $this->php5_create_parser($in_enc, $detect); |
|---|
| 840 |
} |
|---|
| 841 |
else { |
|---|
| 842 |
list($parser, $source) = $this->php4_create_parser($source, $in_enc, $detect); |
|---|
| 843 |
} |
|---|
| 844 |
if ($out_enc) { |
|---|
| 845 |
$this->encoding = $out_enc; |
|---|
| 846 |
xml_parser_set_option($parser, XML_OPTION_TARGET_ENCODING, $out_enc); |
|---|
| 847 |
} |
|---|
| 848 |
|
|---|
| 849 |
return array($parser, $source); |
|---|
| 850 |
} |
|---|
| 851 |
|
|---|
| 852 |
/** |
|---|
| 853 |
* Instantiate an XML parser under PHP5 |
|---|
| 854 |
* |
|---|
| 855 |
* PHP5 will do a fine job of detecting input encoding |
|---|
| 856 |
* if passed an empty string as the encoding. |
|---|
| 857 |
* |
|---|
| 858 |
* All hail libxml2! |
|---|
| 859 |
* |
|---|
| 860 |
*/ |
|---|
| 861 |
function php5_create_parser($in_enc, $detect) { |
|---|
| 862 |
// by default php5 does a fine job of detecting input encodings |
|---|
| 863 |
if(!$detect && $in_enc) { |
|---|
| 864 |
return xml_parser_create($in_enc); |
|---|
| 865 |
} |
|---|
| 866 |
else { |
|---|
| 867 |
return xml_parser_create(''); |
|---|
| 868 |
} |
|---|
| 869 |
} |
|---|
| 870 |
|
|---|
| 871 |
/** |
|---|
| 872 |
* Instaniate an XML parser under PHP4 |
|---|
| 873 |
* |
|---|
| 874 |
* Unfortunately PHP4's support for character encodings |
|---|
| 875 |
* and especially XML and character encodings sucks. As |
|---|
| 876 |
* long as the documents you parse only contain characters |
|---|
| 877 |
* from the ISO-8859-1 character set (a superset of ASCII, |
|---|
| 878 |
* and a subset of UTF-8) you're fine. However once you |
|---|
| 879 |
* step out of that comfy little world things get mad, bad, |
|---|
| 880 |
* and dangerous to know. |
|---|
| 881 |
* |
|---|
| 882 |
* The following code is based on SJM's work with FoF |
|---|
| 883 |
* @see http://minutillo.com/steve/weblog/2004/6/17/php-xml-and-character-encodings-a-tale-of-sadness-rage-and-data-loss |
|---|
| 884 |
* |
|---|
| 885 |
*/ |
|---|
| 886 |
function php4_create_parser($source, $in_enc, $detect) { |
|---|
| 887 |
if ( !$detect ) { |
|---|
| 888 |
return array(xml_parser_create($in_enc), $source); |
|---|
| 889 |
} |
|---|
| 890 |
|
|---|
| 891 |
if (!$in_enc) { |
|---|
| 892 |
if (preg_match('/<?xml.*encoding=[\'"](.*?)[\'"].*?>/m', $source, $m)) { |
|---|
| 893 |
$in_enc = strtoupper($m[1]); |
|---|
| 894 |
$this->source_encoding = $in_enc; |
|---|
| 895 |
} |
|---|
| 896 |
else { |
|---|
| 897 |
$in_enc = 'UTF-8'; |
|---|
| 898 |
} |
|---|
| 899 |
} |
|---|
| 900 |
|
|---|
| 901 |
if ($this->known_encoding($in_enc)) { |
|---|
| 902 |
return array(xml_parser_create($in_enc), $source); |
|---|
| 903 |
} |
|---|
| 904 |
|
|---|
| 905 |
// the dectected encoding is not one of the simple encodings PHP knows |
|---|
| 906 |
|
|---|
| 907 |
// attempt to use the iconv extension to |
|---|
| 908 |
// cast the XML to a known encoding |
|---|
| 909 |
// @see http://php.net/iconv |
|---|
| 910 |
|
|---|
| 911 |
if (function_exists('iconv')) { |
|---|
| 912 |
$encoded_source = iconv($in_enc,'UTF-8', $source); |
|---|
| 913 |
if ($encoded_source) { |
|---|
| 914 |
return array(xml_parser_create('UTF-8'), $encoded_source); |
|---|
| 915 |
} |
|---|
| 916 |
} |
|---|
| 917 |
|
|---|
| 918 |
// iconv didn't work, try mb_convert_encoding |
|---|
| 919 |
// @see http://php.net/mbstring |
|---|
| 920 |
if(function_exists('mb_convert_encoding')) { |
|---|
| 921 |
$encoded_source = mb_convert_encoding($source, 'UTF-8', $in_enc ); |
|---|
| 922 |
if ($encoded_source) { |
|---|
| 923 |
return array(xml_parser_create('UTF-8'), $encoded_source); |
|---|
| 924 |
} |
|---|
| 925 |
} |
|---|
| 926 |
|
|---|
| 927 |
// else |
|---|
| 928 |
$this->error("Feed is in an unsupported character encoding. ($in_enc) " . |
|---|
| 929 |
"You may see strange artifacts, and mangled characters.", |
|---|
| 930 |
E_USER_NOTICE); |
|---|
| 931 |
  |
|---|