Moving the clean up code to a central place.
This commit is contained in:
parent
620ee6be0b
commit
04106ff6f5
2 changed files with 6 additions and 5 deletions
|
@ -263,11 +263,6 @@ function scrape_feed($url) {
|
||||||
}
|
}
|
||||||
|
|
||||||
try {
|
try {
|
||||||
// Cleanup invalid HTML
|
|
||||||
$doc = new DOMDocument();
|
|
||||||
@$doc->loadHTML($s);
|
|
||||||
$s = $doc->saveHTML();
|
|
||||||
|
|
||||||
$dom = HTML5_Parser::parse($s);
|
$dom = HTML5_Parser::parse($s);
|
||||||
} catch (DOMException $e) {
|
} catch (DOMException $e) {
|
||||||
logger('scrape_feed: parse error: ' . $e);
|
logger('scrape_feed: parse error: ' . $e);
|
||||||
|
|
|
@ -17,6 +17,12 @@ class HTML5_Parser
|
||||||
* @return Parsed HTML as DOMDocument
|
* @return Parsed HTML as DOMDocument
|
||||||
*/
|
*/
|
||||||
static public function parse($text, $builder = null) {
|
static public function parse($text, $builder = null) {
|
||||||
|
|
||||||
|
// Cleanup invalid HTML
|
||||||
|
$doc = new DOMDocument();
|
||||||
|
@$doc->loadHTML($text);
|
||||||
|
$text = $doc->saveHTML();
|
||||||
|
|
||||||
$tokenizer = new HTML5_Tokenizer($text, $builder);
|
$tokenizer = new HTML5_Tokenizer($text, $builder);
|
||||||
$tokenizer->parse();
|
$tokenizer->parse();
|
||||||
return $tokenizer->save();
|
return $tokenizer->save();
|
||||||
|
|
Loading…
Reference in a new issue