Merge pull request #12884 from annando/no-attachData

The function "getAttachedData" is replaced by a simplified functionality
2023-03-13 08:29:07 -04:00 · 2023-03-13 08:29:07 -04:00 · 84dfda1ccd
commit 84dfda1ccd
parent afa040dacd 2a752b37b1
3 changed files with 143 additions and 237 deletions
--- a/src/Content/Text/BBCode.php
+++ b/src/Content/Text/BBCode.php
@ -209,183 +209,6 @@ class BBCode
 		);
 	}

-	public static function getAttachedData(string $body, array $item = []): array
-	{
-		/*
-		- text:
-		- type: link, video, photo
-		- title:
-		- url:
-		- image:
-		- description:
-		- (thumbnail)
-		*/
-
-		DI::profiler()->startRecording('rendering');
-		$has_title = !empty($item['title']);
-		$plink = $item['plink'] ?? '';
-		$post = self::getAttachmentData($body);
-
-		// Get all linked images with alternative image description
-		if (preg_match_all("/\[img=(http[^\[\]]*)\]([^\[\]]*)\[\/img\]/Usi", $body, $pictures, PREG_SET_ORDER)) {
-			foreach ($pictures as $picture) {
-				if ($id = Photo::getIdForName($picture[1])) {
-					$post['images'][] = ['url' => str_replace('-1.', '-0.', $picture[1]), 'description' => $picture[2], 'id' => $id];
-				} else {
-					$post['remote_images'][] = ['url' => $picture[1], 'description' => $picture[2]];
-				}
-			}
-			if (!empty($post['images']) && !empty($post['images'][0]['description'])) {
-				$post['image_description'] = $post['images'][0]['description'];
-			}
-		}
-
-		if (preg_match_all("/\[img\]([^\[\]]*)\[\/img\]/Usi", $body, $pictures, PREG_SET_ORDER)) {
-			foreach ($pictures as $picture) {
-				if ($id = Photo::getIdForName($picture[1])) {
-					$post['images'][] = ['url' => str_replace('-1.', '-0.', $picture[1]), 'description' => '', 'id' => $id];
-				} else {
-					$post['remote_images'][] = ['url' => $picture[1], 'description' => ''];
-				}
-			}
-		}
-
-		if (!isset($post['type'])) {
-			$post['text'] = $body;
-		}
-
-		// Simplify image codes
-		$post['text'] = preg_replace("/\[img\=([0-9]*)x([0-9]*)\](.*?)\[\/img\]/ism", '[img]$3[/img]', $post['text']);
-		$post['text'] = preg_replace("/\[img\=(.*?)\](.*?)\[\/img\]/ism", '[img]$1[/img]', $post['text']);
-
-		// if nothing is found, it maybe having an image.
-		if (!isset($post['type'])) {
-			if (preg_match_all("#\[url=([^\]]+?)\]\s*\[img\]([^\[]+?)\[/img\]\s*\[/url\]#ism", $post['text'], $pictures, PREG_SET_ORDER)) {
-				if ((count($pictures) == 1) && !$has_title) {
-					if (!empty($item['object-type']) && ($item['object-type'] == Activity\ObjectType::IMAGE)) {
-						// Replace the preview picture with the real picture
-						$url = str_replace('-1.', '-0.', $pictures[0][2]);
-						$data = ['url' => $url, 'type' => 'photo'];
-					} else {
-						// Checking, if the link goes to a picture
-						$data = ParseUrl::getSiteinfoCached($pictures[0][1]);
-					}
-
-					// Workaround:
-					// Sometimes photo posts to the own album are not detected at the start.
-					// So we seem to cannot use the cache for these cases. That's strange.
-					if (($data['type'] != 'photo') && strstr($pictures[0][1], '/photos/')) {
-						$data = ParseUrl::getSiteinfo($pictures[0][1]);
-					}
-
-					if ($data['type'] == 'photo') {
-						$post['type'] = 'photo';
-						if (isset($data['images'][0])) {
-							$post['image'] = $data['images'][0]['src'];
-							$post['url'] = $data['url'];
-						} else {
-							$post['image'] = $data['url'];
-						}
-
-						$post['preview'] = $pictures[0][2];
-						$post['text'] = trim(str_replace($pictures[0][0], '', $post['text']));
-					} else {
-						$imgdata = Images::getInfoFromURLCached($pictures[0][1]);
-						if (($imgdata) && substr($imgdata['mime'], 0, 6) == 'image/') {
-							$post['type'] = 'photo';
-							$post['image'] = $pictures[0][1];
-							$post['preview'] = $pictures[0][2];
-							$post['text'] = trim(str_replace($pictures[0][0], '', $post['text']));
-						}
-					}
-				} elseif (count($pictures) > 0) {
-					if (count($pictures) > 4) {
-						$post['type'] = 'link';
-						$post['url'] = $plink;
-					} else {
-						$post['type'] = 'photo';
-					}
-
-					$post['image'] = $pictures[0][2];
-
-					foreach ($pictures as $picture) {
-						$post['text'] = trim(str_replace($picture[0], '', $post['text']));
-					}
-				}
-			} elseif (preg_match_all("(\[img\](.*?)\[\/img\])ism", $post['text'], $pictures, PREG_SET_ORDER)) {
-				if ($has_title) {
-					$post['type'] = 'link';
-					$post['url'] = $plink;
-				} else {
-					$post['type'] = 'photo';
-				}
-
-				$post['image'] = $pictures[0][1];
-				foreach ($pictures as $picture) {
-					$post['text'] = trim(str_replace($picture[0], '', $post['text']));
-				}
-			}
-
-			// Test for the external links
-			preg_match_all("(\[url\](.*?)\[\/url\])ism", $post['text'], $links1, PREG_SET_ORDER);
-			preg_match_all("(\[url\=(.*?)\].*?\[\/url\])ism", $post['text'], $links2, PREG_SET_ORDER);
-
-			$links = array_merge($links1, $links2);
-
-			// If there is only a single one, then use it.
-			// This should cover link posts via API.
-			if ((count($links) == 1) && !isset($post['preview']) && !$has_title) {
-				$post['type'] = 'link';
-				$post['url'] = $links[0][1];
-			}
-
-			// Simplify "video" element
-			$post['text'] = preg_replace('(\[video.*?\ssrc\s?=\s?([^\s\]]+).*?\].*?\[/video\])ism', '[video]$1[/video]', $post['text']);
-
-			// Now count the number of external media links
-			preg_match_all("(\[vimeo\](.*?)\[\/vimeo\])ism", $post['text'], $links1, PREG_SET_ORDER);
-			preg_match_all("(\[youtube\\](.*?)\[\/youtube\\])ism", $post['text'], $links2, PREG_SET_ORDER);
-			preg_match_all("(\[video\\](.*?)\[\/video\\])ism", $post['text'], $links3, PREG_SET_ORDER);
-			preg_match_all("(\[audio\\](.*?)\[\/audio\\])ism", $post['text'], $links4, PREG_SET_ORDER);
-
-			// Add them to the other external links
-			$links = array_merge($links, $links1, $links2, $links3, $links4);
-
-			// Are there more than one?
-			if (count($links) > 1) {
-				// The post will be the type "text", which means a blog post
-				unset($post['type']);
-				$post['url'] = $plink;
-			}
-
-			if (!isset($post['type'])) {
-				$post['type'] = 'text';
-			}
-
-			if (($post['type'] == 'photo') && empty($post['images']) && !empty($post['remote_images'])) {
-				$post['images'] = $post['remote_images'];
-				$post['image'] = $post['images'][0]['url'];
-				if (!empty($post['images']) && !empty($post['images'][0]['description'])) {
-					$post['image_description'] = $post['images'][0]['description'];
-				}
-			}
-			unset($post['remote_images']);
-		} elseif (isset($post['url']) && ($post['type'] == 'video')) {
-			$data = ParseUrl::getSiteinfoCached($post['url']);
-
-			if (isset($data['images'][0])) {
-				$post['image'] = $data['images'][0]['src'];
-			}
-		} elseif (preg_match_all("#\[url=([^\]]+?)\]\s*\[img\]([^\[]+?)\[/img\]\s*\[/url\]#ism", $post['text'], $pictures, PREG_SET_ORDER)) {
-			foreach ($pictures as $picture) {
-				$post['text'] = trim(str_replace($picture[0], '', $post['text']));
-			}
-		}
-
-		DI::profiler()->stopRecording();
-		return $post;
-	}
-
 	/**
 	 * Remove [attachment] BBCode
 	 *
--- a/src/Content/Text/Plaintext.php
+++ b/src/Content/Text/Plaintext.php
@ -23,7 +23,10 @@ namespace Friendica\Content\Text;

 use Friendica\Core\Protocol;
 use Friendica\DI;
+use Friendica\Model\Photo;
+use Friendica\Model\Post;
 use Friendica\Util\Network;
+use Friendica\Util\Strings;

 class Plaintext
 {
@ -109,30 +112,15 @@ class Plaintext
 	 * @param int    $limit          The maximum number of characters when posting to that network
 	 * @param bool   $includedlinks  Has an attached link to be included into the message?
 	 * @param int    $htmlmode       This controls the behavior of the BBCode conversion
-	 * @param string $target_network Name of the network where the post should go to.
 	 *
 	 * @return array Same array structure than \Friendica\Content\Text\BBCode::getAttachedData
 	 * @throws \Friendica\Network\HTTPException\InternalServerErrorException
 	 * @see   \Friendica\Content\Text\BBCode::getAttachedData
 	 */
-	public static function getPost(array $item, int $limit = 0, bool $includedlinks = false, int $htmlmode = BBCode::MASTODON_API, string $target_network = '')
+	public static function getPost(array $item, int $limit = 0, bool $includedlinks = false, int $htmlmode = BBCode::MASTODON_API)
 	{
-		// Remove hashtags
-		$URLSearchString = '^\[\]';
-		$body = preg_replace("/([#@])\[url\=([$URLSearchString]*)\](.*?)\[\/url\]/ism", '$1$3', $item['body']);
-
-		// Add an URL element if the text contains a raw link
-		$body = preg_replace(
-			'/([^\]\=\'"]|^)(https?\:\/\/[a-zA-Z0-9\:\/\-\?\&\;\.\=\_\~\#\%\$\!\+\,]+)/ism',
-			'$1[url]$2[/url]',
-			$body
-		);
-
-		// Remove the abstract
-		$body = BBCode::stripAbstract($body);
-
-		// At first look at data that is attached via "type-..." stuff
-		$post = BBCode::getAttachedData($body, $item);
+		// Fetch attached media information
+		$post = self::getPostMedia($item);

 		if (($item['title'] != '') && ($post['text'] != '')) {
 			$post['text'] = trim($item['title'] . "\n\n" . $post['text']);
@ -140,19 +128,7 @@ class Plaintext
 			$post['text'] = trim($item['title']);
 		}

-		$abstract = '';
-
 		// Fetch the abstract from the given target network
-		if ($target_network != '') {
-			$default_abstract = BBCode::getAbstract($item['body']);
-			$abstract = BBCode::getAbstract($item['body'], $target_network);
-
-			// If we post to a network with no limit we only fetch
-			// an abstract exactly for this network
-			if (($limit == 0) && ($abstract == $default_abstract)) {
-				$abstract = '';
-			}
-		} else { // Try to guess the correct target network
 		switch ($htmlmode) {
 			case BBCode::TWITTER:
 				$abstract = BBCode::getAbstract($item['body'], Protocol::TWITTER);
@ -168,7 +144,6 @@ class Plaintext
 					$abstract = BBCode::getAbstract($item['body']);
 				}
 		}
-		}

 		if ($abstract != '') {
 			$post['text'] = $abstract;
@ -323,4 +298,87 @@ class Plaintext

 		return $parts;
 	}
+
+	/**
+	 * Fetch attached media to the post and simplify the body.
+	 *
+	 * @param array $item
+	 * @return array
+	 */
+	private static function getPostMedia(array $item): array
+	{
+		$post = ['type' => 'text', 'images' => [], 'remote_images' => []];
+
+		// Remove mentions and hashtag links
+		$URLSearchString = '^\[\]';
+		$post['text'] = preg_replace("/([#!@])\[url\=([$URLSearchString]*)\](.*?)\[\/url\]/ism", '$1$3', $item['body']);
+
+		// Remove abstract
+		$post['text'] = BBCode::stripAbstract($post['text']);
+		// Remove attached links
+		$post['text'] = BBCode::removeAttachment($post['text']);
+		// Remove any links
+		$post['text'] = Post\Media::removeFromBody($post['text']);
+
+		$images = Post\Media::getByURIId($item['uri-id'], [Post\Media::IMAGE]);
+		if (!empty($item['quote-uri-id'])) {
+			$images = array_merge($images, Post\Media::getByURIId($item['quote-uri-id'], [Post\Media::IMAGE]));
+		}
+		foreach ($images as $image) {
+			if ($id = Photo::getIdForName($image['url'])) {
+				$post['images'][] = ['url' => $image['url'], 'description' => $image['description'], 'id' => $id];
+			} else {
+				$post['remote_images'][] = ['url' => $image['url'], 'description' => $image['description']];
+			}
+		}
+
+		if (empty($post['images'])) {
+			unset($post['images']);
+		}
+
+		if (empty($post['remote_images'])) {
+			unset($post['remote_images']);
+		}
+
+		if (!empty($post['images'])) {
+			$post['type']              = 'photo';
+			$post['image']             = $post['images'][0]['url'];
+			$post['image_description'] = $post['images'][0]['description'];
+		} elseif (!empty($post['remote_images'])) {
+			$post['type']              = 'photo';
+			$post['image']             = $post['remote_images'][0]['url'];
+			$post['image_description'] = $post['remote_images'][0]['description'];
+		}
+
+		// Look for audio or video links
+		$media = Post\Media::getByURIId($item['uri-id'], [Post\Media::AUDIO, Post\Media::VIDEO]);
+		if (!empty($item['quote-uri-id'])) {
+			$media = array_merge($media, Post\Media::getByURIId($item['quote-uri-id'], [Post\Media::AUDIO, Post\Media::VIDEO]));
+		}
+
+		foreach ($media as $medium) {
+			if (in_array($medium['type'], [Post\Media::AUDIO, Post\Media::VIDEO])) {
+				$post['type'] = 'link';
+				$post['url']  = $medium['url'];
+			}
+		}
+
+		// Look for an attached link
+		$page = Post\Media::getByURIId($item['uri-id'], [Post\Media::HTML]);
+		if (!empty($item['quote-uri-id']) && empty($page)) {
+			$page = Post\Media::getByURIId($item['quote-uri-id'], [Post\Media::HTML]);
+		}
+		if (!empty($page)) {
+			$post['type']          = 'link';
+			$post['url']           = $page[0]['url'];
+			$post['description']   = $page[0]['description'];
+			$post['title']         = $page[0]['name'];
+
+			if (empty($post['image']) && !empty($page[0]['preview'])) {
+				$post['image'] = $page[0]['preview'];
+			}
+		}
+
+		return $post;
+	}
 }
--- a/src/Model/Post/Media.php
+++ b/src/Model/Post/Media.php
@ -463,7 +463,7 @@ class Media
 	 */
 	private static function isPictureLink(string $page, string $preview): bool
 	{
-		return preg_match('#/photos/.*/image/#ism', $page) && preg_match('#/photo/.*-1\.#ism', $preview);
+		return (preg_match('#/photo/.*-0\.#ism', $page) || preg_match('#/photos/.*/image/#ism', $page)) && preg_match('#/photo/.*-[01]\.#ism', $preview);
 	}

 	/**
@ -482,15 +482,20 @@ class Media
 		$attachments = [];
 		if (preg_match_all("#\[url=([^\]]+?)\]\s*\[img=([^\[\]]*)\]([^\[\]]*)\[\/img\]\s*\[/url\]$endmatchpattern#ism", $body, $pictures, PREG_SET_ORDER)) {
 			foreach ($pictures as $picture) {
-				if (!self::isPictureLink($picture[1], $picture[2])) {
-					continue;
-				}
+				if (self::isPictureLink($picture[1], $picture[2])) {
 					$body = str_replace($picture[0], '', $body);
 					$image = str_replace('-1.', '-0.', $picture[2]);
 					$attachments[$image] = [
 						'uri-id' => $uriid, 'type' => self::IMAGE, 'url' => $image,
 						'preview' => $picture[2], 'description' => $picture[3]
 					];
+				} else {
+					$body = str_replace($picture[0], '', $body);
+					$attachments[$picture[1]] = [
+						'uri-id' => $uriid, 'type' => self::UNKNOWN, 'url' => $picture[1],
+						'preview' => $picture[2], 'description' => $picture[3]
+					];
+				}
 			}
 		}

@ -503,15 +508,20 @@ class Media

 		if (preg_match_all("#\[url=([^\]]+?)\]\s*\[img\]([^\[]+?)\[/img\]\s*\[/url\]$endmatchpattern#ism", $body, $pictures, PREG_SET_ORDER)) {
 			foreach ($pictures as $picture) {
-				if (!self::isPictureLink($picture[1], $picture[2])) {
-					continue;
-				}
+				if (self::isPictureLink($picture[1], $picture[2])) {
 					$body = str_replace($picture[0], '', $body);
 					$image = str_replace('-1.', '-0.', $picture[2]);
 					$attachments[$image] = [
 						'uri-id' => $uriid, 'type' => self::IMAGE, 'url' => $image,
 						'preview' => $picture[2], 'description' => null
 					];
+				} else {
+					$body = str_replace($picture[0], '', $body);
+					$attachments[$picture[1]] = [
+						'uri-id' => $uriid, 'type' => self::UNKNOWN, 'url' => $picture[1],
+						'preview' => $picture[2], 'description' => null
+					];
+				}
 			}
 		}

@ -567,6 +577,21 @@ class Media
 		return $body;
 	}

+	/**
+	 * Remove media from the body
+	 *
+	 * @param string $body
+	 * @return string
+	 */
+	public static function removeFromBody(string $body): string
+	{
+		do {
+			$prebody = $body;
+			$body = self::insertFromBody(0, $body);
+		} while ($prebody != $body);
+		return $body;
+	}
+
 	/**
 	 * Add media links from a relevant url in the body
 	 *