Merge pull request #13095 from annando/atom03
Improved ATOM distinction, added OPML
This commit is contained in:
commit
b270771f0f
2 changed files with 108 additions and 53 deletions
|
@ -41,6 +41,8 @@ class Conversation
|
|||
const PARCEL_RDF = 12;
|
||||
const PARCEL_RSS = 13;
|
||||
const PARCEL_ATOM = 14;
|
||||
const PARCEL_ATOM03 = 15;
|
||||
const PARCEL_OPML = 16;
|
||||
const PARCEL_TWITTER = 67;
|
||||
const PARCEL_UNKNOWN = 255;
|
||||
|
||||
|
|
|
@ -93,11 +93,8 @@ class Feed
|
|||
@$doc->loadXML($xml);
|
||||
$xpath = new DOMXPath($doc);
|
||||
|
||||
if (strpos($xml, ActivityNamespace::ATOM03) && !strpos($xml, ActivityNamespace::ATOM1)) {
|
||||
$xpath->registerNamespace('atom', ActivityNamespace::ATOM03);
|
||||
} else {
|
||||
$xpath->registerNamespace('atom', ActivityNamespace::ATOM1);
|
||||
}
|
||||
$xpath->registerNamespace('atom', ActivityNamespace::ATOM1);
|
||||
$xpath->registerNamespace('atom03', ActivityNamespace::ATOM03);
|
||||
$xpath->registerNamespace('dc', 'http://purl.org/dc/elements/1.1/');
|
||||
$xpath->registerNamespace('content', 'http://purl.org/rss/1.0/modules/content/');
|
||||
$xpath->registerNamespace('rdf', 'http://www.w3.org/1999/02/22-rdf-syntax-ns#');
|
||||
|
@ -106,6 +103,7 @@ class Feed
|
|||
$xpath->registerNamespace('poco', ActivityNamespace::POCO);
|
||||
|
||||
$author = [];
|
||||
$atomns = '';
|
||||
$entries = null;
|
||||
$protocol = Conversation::PARCEL_UNKNOWN;
|
||||
|
||||
|
@ -121,10 +119,23 @@ class Feed
|
|||
$entries = $xpath->query('/rdf:RDF/rss:item');
|
||||
}
|
||||
|
||||
if ($xpath->query('/opml')->length > 0) {
|
||||
$protocol = Conversation::PARCEL_OPML;
|
||||
$author['author-name'] = XML::getFirstNodeValue($xpath, '/opml/head/title/text()');
|
||||
$entries = $xpath->query('/opml/body/outline');
|
||||
}
|
||||
|
||||
// Is it Atom?
|
||||
if ($xpath->query('/atom:feed')->length > 0) {
|
||||
$protocol = Conversation::PARCEL_ATOM;
|
||||
$alternate = XML::getFirstAttributes($xpath, "atom:link[@rel='alternate']");
|
||||
$atomns = 'atom';
|
||||
} elseif ($xpath->query('/atom03:feed')->length > 0) {
|
||||
$protocol = Conversation::PARCEL_ATOM03;
|
||||
$atomns = 'atom03';
|
||||
}
|
||||
|
||||
if (!empty($atomns)) {
|
||||
$alternate = XML::getFirstAttributes($xpath, $atomns . ":link[@rel='alternate']");
|
||||
if (is_object($alternate)) {
|
||||
foreach ($alternate as $attribute) {
|
||||
if ($attribute->name == 'href') {
|
||||
|
@ -134,7 +145,7 @@ class Feed
|
|||
}
|
||||
|
||||
if (empty($author['author-link'])) {
|
||||
$self = XML::getFirstAttributes($xpath, "atom:link[@rel='self']");
|
||||
$self = XML::getFirstAttributes($xpath, $atomns . ":link[@rel='self']");
|
||||
if (is_object($self)) {
|
||||
foreach ($self as $attribute) {
|
||||
if ($attribute->name == 'href') {
|
||||
|
@ -145,50 +156,50 @@ class Feed
|
|||
}
|
||||
|
||||
if (empty($author['author-link'])) {
|
||||
$author['author-link'] = XML::getFirstNodeValue($xpath, '/atom:feed/atom:id/text()');
|
||||
$author['author-link'] = XML::getFirstNodeValue($xpath, '/' . $atomns . ':feed/' . $atomns . ':id/text()');
|
||||
}
|
||||
$author['author-avatar'] = XML::getFirstNodeValue($xpath, '/atom:feed/atom:logo/text()');
|
||||
$author['author-avatar'] = XML::getFirstNodeValue($xpath, '/' . $atomns . ':feed/' . $atomns . ':logo/text()');
|
||||
|
||||
$author['author-name'] = XML::getFirstNodeValue($xpath, '/atom:feed/atom:title/text()');
|
||||
$author['author-name'] = XML::getFirstNodeValue($xpath, '/' . $atomns . ':feed/' . $atomns . ':title/text()');
|
||||
|
||||
if (empty($author['author-name'])) {
|
||||
$author['author-name'] = XML::getFirstNodeValue($xpath, '/atom:feed/atom:subtitle/text()');
|
||||
$author['author-name'] = XML::getFirstNodeValue($xpath, '/' . $atomns . ':feed/' . $atomns . ':subtitle/text()');
|
||||
}
|
||||
|
||||
if (empty($author['author-name'])) {
|
||||
$author['author-name'] = XML::getFirstNodeValue($xpath, '/atom:feed/atom:author/atom:name/text()');
|
||||
$author['author-name'] = XML::getFirstNodeValue($xpath, '/' . $atomns . ':feed/' . $atomns . ':author/' . $atomns . ':name/text()');
|
||||
}
|
||||
|
||||
$value = XML::getFirstNodeValue($xpath, 'atom:author/poco:displayName/text()');
|
||||
$value = XML::getFirstNodeValue($xpath, '' . $atomns . ':author/poco:displayName/text()');
|
||||
if ($value != '') {
|
||||
$author['author-name'] = $value;
|
||||
}
|
||||
|
||||
if ($dryRun) {
|
||||
$author['author-id'] = XML::getFirstNodeValue($xpath, '/atom:feed/atom:author/atom:id/text()');
|
||||
$author['author-id'] = XML::getFirstNodeValue($xpath, '/' . $atomns . ':feed/' . $atomns . ':author/' . $atomns . ':id/text()');
|
||||
|
||||
// See https://tools.ietf.org/html/rfc4287#section-3.2.2
|
||||
$value = XML::getFirstNodeValue($xpath, 'atom:author/atom:uri/text()');
|
||||
$value = XML::getFirstNodeValue($xpath, $atomns . ':author/' . $atomns . ':uri/text()');
|
||||
if ($value != '') {
|
||||
$author['author-link'] = $value;
|
||||
}
|
||||
|
||||
$value = XML::getFirstNodeValue($xpath, 'atom:author/poco:preferredUsername/text()');
|
||||
$value = XML::getFirstNodeValue($xpath, $atomns . ':author/poco:preferredUsername/text()');
|
||||
if ($value != '') {
|
||||
$author['author-nick'] = $value;
|
||||
}
|
||||
|
||||
$value = XML::getFirstNodeValue($xpath, 'atom:author/poco:address/poco:formatted/text()');
|
||||
$value = XML::getFirstNodeValue($xpath, $atomns . ':author/poco:address/poco:formatted/text()');
|
||||
if ($value != '') {
|
||||
$author['author-location'] = $value;
|
||||
}
|
||||
|
||||
$value = XML::getFirstNodeValue($xpath, 'atom:author/poco:note/text()');
|
||||
$value = XML::getFirstNodeValue($xpath, $atomns . ':author/poco:note/text()');
|
||||
if ($value != '') {
|
||||
$author['author-about'] = $value;
|
||||
}
|
||||
|
||||
$avatar = XML::getFirstAttributes($xpath, "atom:author/atom:link[@rel='avatar']");
|
||||
$avatar = XML::getFirstAttributes($xpath, $atomns . ":author/' . $atomns . ':link[@rel='avatar']");
|
||||
if (is_object($avatar)) {
|
||||
foreach ($avatar as $attribute) {
|
||||
if ($attribute->name == 'href') {
|
||||
|
@ -198,11 +209,11 @@ class Feed
|
|||
}
|
||||
}
|
||||
|
||||
$author['edited'] = $author['created'] = XML::getFirstNodeValue($xpath, '/atom:feed/atom:updated/text()');
|
||||
$author['edited'] = $author['created'] = XML::getFirstNodeValue($xpath, '/' . $atomns . ':feed/' . $atomns . ':updated/text()');
|
||||
|
||||
$author['app'] = XML::getFirstNodeValue($xpath, '/atom:feed/atom:generator/text()');
|
||||
$author['app'] = XML::getFirstNodeValue($xpath, '/' . $atomns . ':feed/' . $atomns . ':generator/text()');
|
||||
|
||||
$entries = $xpath->query('/atom:feed/atom:entry');
|
||||
$entries = $xpath->query('/' . $atomns . ':feed/' . $atomns . ':entry');
|
||||
}
|
||||
|
||||
// Is it RSS?
|
||||
|
@ -299,9 +310,9 @@ class Feed
|
|||
|
||||
$item = array_merge($header, $author);
|
||||
|
||||
$alternate = XML::getFirstAttributes($xpath, "atom:link[@rel='alternate']", $entry);
|
||||
$alternate = XML::getFirstAttributes($xpath, $atomns . ":link[@rel='alternate']", $entry);
|
||||
if (!is_object($alternate)) {
|
||||
$alternate = XML::getFirstAttributes($xpath, 'atom:link', $entry);
|
||||
$alternate = XML::getFirstAttributes($xpath, $atomns . ':link', $entry);
|
||||
}
|
||||
if (is_object($alternate)) {
|
||||
foreach ($alternate as $attribute) {
|
||||
|
@ -311,6 +322,40 @@ class Feed
|
|||
}
|
||||
}
|
||||
|
||||
if ($entry->nodeName == 'outline') {
|
||||
$isrss = false;
|
||||
$plink = '';
|
||||
$uri = '';
|
||||
foreach ($entry->attributes as $attribute) {
|
||||
switch ($attribute->nodeName) {
|
||||
case 'title':
|
||||
$item['title'] = $attribute->nodeValue;
|
||||
break;
|
||||
|
||||
case 'text':
|
||||
$body = $attribute->nodeValue;
|
||||
break;
|
||||
|
||||
case 'htmlUrl':
|
||||
$plink = $attribute->nodeValue;
|
||||
break;
|
||||
|
||||
case 'xmlUrl':
|
||||
$uri = $attribute->nodeValue;
|
||||
break;
|
||||
|
||||
case 'type':
|
||||
$isrss = $attribute->nodeValue == 'rss';
|
||||
break;
|
||||
}
|
||||
}
|
||||
$item['plink'] = $plink ?: $uri;
|
||||
$item['uri'] = $uri ?: $plink;
|
||||
if (!$isrss || empty($item['uri'])) {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
if (empty($item['plink'])) {
|
||||
$item['plink'] = XML::getFirstNodeValue($xpath, 'link/text()', $entry);
|
||||
}
|
||||
|
@ -322,7 +367,9 @@ class Feed
|
|||
// Add the base path if missing
|
||||
$item['plink'] = Network::addBasePath($item['plink'], $basepath);
|
||||
|
||||
$item['uri'] = XML::getFirstNodeValue($xpath, 'atom:id/text()', $entry);
|
||||
if (empty($item['uri'])) {
|
||||
$item['uri'] = XML::getFirstNodeValue($xpath, $atomns . ':id/text()', $entry);
|
||||
}
|
||||
|
||||
$guid = XML::getFirstNodeValue($xpath, 'guid/text()', $entry);
|
||||
if (!empty($guid)) {
|
||||
|
@ -344,7 +391,9 @@ class Feed
|
|||
Logger::notice('Item URL couldn\'t get expanded', ['url' => $item['plink'], 'exception' => $exception]);
|
||||
}
|
||||
|
||||
$item['title'] = XML::getFirstNodeValue($xpath, 'atom:title/text()', $entry);
|
||||
if (empty($item['title'])) {
|
||||
$item['title'] = XML::getFirstNodeValue($xpath, $atomns . ':title/text()', $entry);
|
||||
}
|
||||
|
||||
if (empty($item['title'])) {
|
||||
$item['title'] = XML::getFirstNodeValue($xpath, 'title/text()', $entry);
|
||||
|
@ -360,7 +409,7 @@ class Feed
|
|||
|
||||
$item['title'] = html_entity_decode($item['title'], ENT_QUOTES, 'UTF-8');
|
||||
|
||||
$published = XML::getFirstNodeValue($xpath, 'atom:published/text()', $entry);
|
||||
$published = XML::getFirstNodeValue($xpath, $atomns . ':published/text()', $entry);
|
||||
|
||||
if (empty($published)) {
|
||||
$published = XML::getFirstNodeValue($xpath, 'pubDate/text()', $entry);
|
||||
|
@ -370,7 +419,7 @@ class Feed
|
|||
$published = XML::getFirstNodeValue($xpath, 'dc:date/text()', $entry);
|
||||
}
|
||||
|
||||
$updated = XML::getFirstNodeValue($xpath, 'atom:updated/text()', $entry);
|
||||
$updated = XML::getFirstNodeValue($xpath, $atomns . ':updated/text()', $entry);
|
||||
|
||||
if (empty($updated) && !empty($published)) {
|
||||
$updated = $published;
|
||||
|
@ -406,7 +455,7 @@ class Feed
|
|||
$creator = XML::getFirstNodeValue($xpath, 'author/text()', $entry);
|
||||
|
||||
if (empty($creator)) {
|
||||
$creator = XML::getFirstNodeValue($xpath, 'atom:author/atom:name/text()', $entry);
|
||||
$creator = XML::getFirstNodeValue($xpath, $atomns . ':author/' . $atomns . ':name/text()', $entry);
|
||||
}
|
||||
|
||||
if (empty($creator)) {
|
||||
|
@ -429,33 +478,35 @@ class Feed
|
|||
|
||||
$attachments = [];
|
||||
|
||||
$enclosures = $xpath->query("enclosure|atom:link[@rel='enclosure']", $entry);
|
||||
foreach ($enclosures as $enclosure) {
|
||||
$href = '';
|
||||
$length = null;
|
||||
$type = null;
|
||||
$enclosures = $xpath->query("enclosure|' . $atomns . ':link[@rel='enclosure']", $entry);
|
||||
if (!empty($enclosures)) {
|
||||
foreach ($enclosures as $enclosure) {
|
||||
$href = '';
|
||||
$length = null;
|
||||
$type = null;
|
||||
|
||||
foreach ($enclosure->attributes as $attribute) {
|
||||
if (in_array($attribute->name, ['url', 'href'])) {
|
||||
$href = $attribute->textContent;
|
||||
} elseif ($attribute->name == 'length') {
|
||||
$length = (int)$attribute->textContent;
|
||||
} elseif ($attribute->name == 'type') {
|
||||
$type = $attribute->textContent;
|
||||
foreach ($enclosure->attributes as $attribute) {
|
||||
if (in_array($attribute->name, ['url', 'href'])) {
|
||||
$href = $attribute->textContent;
|
||||
} elseif ($attribute->name == 'length') {
|
||||
$length = (int)$attribute->textContent;
|
||||
} elseif ($attribute->name == 'type') {
|
||||
$type = $attribute->textContent;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!empty($href)) {
|
||||
$attachment = ['uri-id' => -1, 'type' => Post\Media::UNKNOWN, 'url' => $href, 'mimetype' => $type, 'size' => $length];
|
||||
if (!empty($href)) {
|
||||
$attachment = ['uri-id' => -1, 'type' => Post\Media::UNKNOWN, 'url' => $href, 'mimetype' => $type, 'size' => $length];
|
||||
|
||||
$attachment = Post\Media::fetchAdditionalData($attachment);
|
||||
$attachment = Post\Media::fetchAdditionalData($attachment);
|
||||
|
||||
// By now we separate the visible media types (audio, video, image) from the rest
|
||||
// In the future we should try to avoid the DOCUMENT type and only use the real one - but not in the RC phase.
|
||||
if (!in_array($attachment['type'], [Post\Media::AUDIO, Post\Media::IMAGE, Post\Media::VIDEO])) {
|
||||
$attachment['type'] = Post\Media::DOCUMENT;
|
||||
}
|
||||
$attachments[] = $attachment;
|
||||
// By now we separate the visible media types (audio, video, image) from the rest
|
||||
// In the future we should try to avoid the DOCUMENT type and only use the real one - but not in the RC phase.
|
||||
if (!in_array($attachment['type'], [Post\Media::AUDIO, Post\Media::IMAGE, Post\Media::VIDEO])) {
|
||||
$attachment['type'] = Post\Media::DOCUMENT;
|
||||
}
|
||||
$attachments[] = $attachment;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -465,13 +516,15 @@ class Feed
|
|||
$taglist[] = $category->nodeValue;
|
||||
}
|
||||
|
||||
$body = trim(XML::getFirstNodeValue($xpath, 'atom:content/text()', $entry));
|
||||
if (empty($body)) {
|
||||
$body = trim(XML::getFirstNodeValue($xpath, $atomns . ':content/text()', $entry));
|
||||
}
|
||||
|
||||
if (empty($body)) {
|
||||
$body = trim(XML::getFirstNodeValue($xpath, 'content:encoded/text()', $entry));
|
||||
}
|
||||
|
||||
$summary = trim(XML::getFirstNodeValue($xpath, 'atom:summary/text()', $entry));
|
||||
$summary = trim(XML::getFirstNodeValue($xpath, $atomns . ':summary/text()', $entry));
|
||||
|
||||
if (empty($summary)) {
|
||||
$summary = trim(XML::getFirstNodeValue($xpath, 'description/text()', $entry));
|
||||
|
|
Loading…
Reference in a new issue