From 5ae03319936f70d12eaf4a09dfcda020de429380 Mon Sep 17 00:00:00 2001 From: Michael Date: Sun, 5 Nov 2023 19:18:10 +0000 Subject: [PATCH] Use the post language for the language detection / config for quality --- src/Model/Item.php | 6 +++- src/Protocol/ActivityPub/Processor.php | 34 ++++++++++++++++++++- src/Protocol/ActivityPub/Transmitter.php | 21 ++++++++++--- src/Protocol/Relay.php | 39 +++++++++++++++--------- static/defaults.config.php | 4 +++ 5 files changed, 84 insertions(+), 20 deletions(-) diff --git a/src/Model/Item.php b/src/Model/Item.php index 518edecb4e..18b56215ce 100644 --- a/src/Model/Item.php +++ b/src/Model/Item.php @@ -117,7 +117,7 @@ class Item const DELIVER_FIELDLIST = [ 'uid', 'id', 'parent', 'uri-id', 'uri', 'thr-parent', 'parent-uri', 'guid', 'parent-guid', 'conversation', 'received', 'created', 'edited', 'verb', 'object-type', 'object', 'target', - 'private', 'title', 'body', 'raw-body', 'location', 'coord', 'app', + 'private', 'title', 'body', 'raw-body', 'language', 'location', 'coord', 'app', 'inform', 'deleted', 'extid', 'post-type', 'post-reason', 'gravity', 'allow_cid', 'allow_gid', 'deny_cid', 'deny_gid', 'author-id', 'author-addr', 'author-link', 'author-name', 'author-avatar', 'owner-id', 'owner-link', 'contact-uid', @@ -1484,6 +1484,10 @@ class Item */ private static function setOwnerforResharedItem(array $item) { + if ($item['uid'] == 0) { + return; + } + $parent = Post::selectFirst( ['id', 'causer-id', 'owner-id', 'author-id', 'author-link', 'origin', 'post-reason'], ['uri-id' => $item['thr-parent-id'], 'uid' => $item['uid']] diff --git a/src/Protocol/ActivityPub/Processor.php b/src/Protocol/ActivityPub/Processor.php index 81b15f90ee..516ec38ac8 100644 --- a/src/Protocol/ActivityPub/Processor.php +++ b/src/Protocol/ActivityPub/Processor.php @@ -1673,7 +1673,39 @@ class Processor } } - return Relay::isSolicitedPost($messageTags, $content, $authorid, $id, Protocol::ACTIVITYPUB, $activity['thread-completion'] ?? 0); + $languages = self::getPostLanguages($activity); + + return Relay::isSolicitedPost($messageTags, $content, $authorid, $id, Protocol::ACTIVITYPUB, $activity['thread-completion'] ?? 0, $languages); + } + + /** + * Fetch the post language from the content + * + * @param array $activity + * @return array + */ + private static function getPostLanguages(array $activity): array + { + $content = JsonLD::fetchElement($activity['as:object'], 'as:content') ?? ''; + $languages = JsonLD::fetchElementArray($activity['as:object'], 'as:content', '@language') ?? []; + if (empty($languages)) { + return []; + } + + $iso639 = new \Matriphe\ISO639\ISO639; + + $result = []; + foreach ($languages as $language) { + if ($language == $content) { + continue; + } + $language = DI::l10n()->toISO6391($language); + if (!in_array($language, array_column($iso639->allLanguages(), 0))) { + continue; + } + $result[] = $language; + } + return $result; } /** diff --git a/src/Protocol/ActivityPub/Transmitter.php b/src/Protocol/ActivityPub/Transmitter.php index f1ced5b429..b18e247eac 100644 --- a/src/Protocol/ActivityPub/Transmitter.php +++ b/src/Protocol/ActivityPub/Transmitter.php @@ -895,6 +895,19 @@ class Transmitter */ public static function getReceiversForUriId(int $uri_id, bool $blindcopy) { + $tags = Tag::getByURIId($uri_id, [Tag::TO, Tag::CC, Tag::BCC, Tag::AUDIENCE]); + if (empty($tags)) { + Logger::debug('No receivers found', ['uri-id' => $uri_id]); + $post = Post::selectFirst([Item::DELIVER_FIELDLIST], ['uri-id' => $uri_id, 'origin' => true]); + if (!empty($post)) { + ActivityPub\Transmitter::storeReceiversForItem($post); + $tags = Tag::getByURIId($uri_id, [Tag::TO, Tag::CC, Tag::BCC, Tag::AUDIENCE]); + Logger::debug('Receivers are created', ['uri-id' => $uri_id, 'receivers' => count($tags)]); + } else { + Logger::debug('Origin item not found', ['uri-id' => $uri_id]); + } + } + $receivers = [ 'to' => [], 'cc' => [], @@ -902,7 +915,7 @@ class Transmitter 'audience' => [], ]; - foreach (Tag::getByURIId($uri_id, [Tag::TO, Tag::CC, Tag::BCC, Tag::AUDIENCE]) as $receiver) { + foreach ($tags as $receiver) { switch ($receiver['type']) { case Tag::TO: $receivers['to'][] = $receiver['url']; @@ -1884,7 +1897,7 @@ class Transmitter if (!empty($item['language'])) { $languages = array_keys(json_decode($item['language'], true)); if (!empty($languages[0])) { - return $languages[0]; + return DI::l10n()->toISO6391($languages[0]); } } @@ -1892,12 +1905,12 @@ class Transmitter if (!empty($item['uid'])) { $user = DBA::selectFirst('user', ['language'], ['uid' => $item['uid']]); if (!empty($user['language'])) { - return $user['language']; + return DI::l10n()->toISO6391($user['language']); } } // And finally just use the system language - return DI::config()->get('system', 'language'); + return DI::l10n()->toISO6391(DI::config()->get('system', 'language')); } /** diff --git a/src/Protocol/Relay.php b/src/Protocol/Relay.php index d2e5af0b73..a88c470e05 100644 --- a/src/Protocol/Relay.php +++ b/src/Protocol/Relay.php @@ -53,13 +53,16 @@ class Relay /** * Check if a post is wanted * - * @param array $tags + * @param array $tags * @param string $body - * @param int $authorid + * @param int $authorid * @param string $url + * @param string $network + * @param int $causerid + * @param array $languages * @return boolean "true" is the post is wanted by the system */ - public static function isSolicitedPost(array $tags, string $body, int $authorid, string $url, string $network = '', int $causerid = 0): bool + public static function isSolicitedPost(array $tags, string $body, int $authorid, string $url, string $network = '', int $causerid = 0, array $languages = []): bool { $config = DI::config(); @@ -128,7 +131,7 @@ class Relay } } - if (!self::isWantedLanguage($body, 0, $authorid)) { + if (!self::isWantedLanguage($body, 0, $authorid, $languages)) { Logger::info('Unwanted or Undetected language found - rejected', ['network' => $network, 'url' => $url, 'causer' => $causer, 'tags' => $tags]); return false; } @@ -171,37 +174,45 @@ class Relay * @param string $body * @param int $uri_id * @param int $author_id + * @param array $languages * @return boolean */ - public static function isWantedLanguage(string $body, int $uri_id = 0, int $author_id = 0) + public static function isWantedLanguage(string $body, int $uri_id = 0, int $author_id = 0, array $languages = []) { - if (empty($body) || Smilies::isEmojiPost($body)) { + if (empty($languages) && (empty($body) || Smilies::isEmojiPost($body))) { Logger::debug('Empty body or only emojis', ['body' => $body]); return true; } - $languages = []; + $detected = []; + $quality = DI::config()->get('system', 'relay_language_quality'); foreach (Item::getLanguageArray($body, 10, $uri_id, $author_id) as $language => $reliability) { - if ($reliability > 0) { - $languages[] = $language; + if (($reliability >= $quality) && ($quality > 0)) { + $detected[] = $language; } } - if (!empty($languages)) { + if (!empty($languages) || !empty($detected)) { $cachekey = 'relay:isWantedLanguage'; $user_languages = DI::cache()->get($cachekey); if (is_null($user_languages)) { $user_languages = User::getLanguages(); - DI::cache()->set($cachekey, $user_languages, Duration::HALF_HOUR); + DI::cache()->set($cachekey, $user_languages); } - foreach ($languages as $language) { + foreach ($detected as $language) { if (in_array($language, $user_languages)) { - Logger::debug('Wanted language found', ['language' => $language, 'languages' => $languages, 'userlang' => $user_languages, 'body' => $body]); + Logger::debug('Wanted language found in detected languages', ['language' => $language, 'detected' => $detected, 'userlang' => $user_languages, 'body' => $body]); return true; } } - Logger::debug('No wanted language found', ['languages' => $languages, 'userlang' => $user_languages, 'body' => $body]); + foreach ($languages as $language) { + if (in_array($language, $user_languages)) { + Logger::debug('Wanted language found in defined languages', ['language' => $language, 'languages' => $languages, 'detected' => $detected, 'userlang' => $user_languages, 'body' => $body]); + return true; + } + } + Logger::debug('No wanted language found', ['languages' => $languages, 'detected' => $detected, 'userlang' => $user_languages, 'body' => $body]); return false; } elseif (DI::config()->get('system', 'relay_deny_undetected_language')) { Logger::info('Undetected language found', ['body' => $body]); diff --git a/static/defaults.config.php b/static/defaults.config.php index 57a073e376..56f1d20f8b 100644 --- a/static/defaults.config.php +++ b/static/defaults.config.php @@ -562,6 +562,10 @@ return [ // Deny undetected languages 'relay_deny_undetected_language' => false, + // relay_language_quality (Integer) + // Minimum value for the language detection quality for relay posts. The value must be betweeen 0 and 1. + 'relay_language_quality' => 0, + // session_handler (database|cache|native) // Whether to use Cache to store session data or to use PHP native session storage. 'session_handler' => 'database',