From 1191024609ac5262dc5d0acd96b1311df4311c45 Mon Sep 17 00:00:00 2001 From: Hypolite Petovan Date: Tue, 16 Jan 2024 22:24:52 -0500 Subject: [PATCH 1/6] Add throwaway full-text search feature in a dedicated Database class - Add explicit return type to UserDefinedChannels->current() to help IDE auto-completion --- .../Collection/UserDefinedChannels.php | 6 ++ src/Database/DisposableFullTextSearch.php | 59 +++++++++++++++++++ 2 files changed, 65 insertions(+) create mode 100644 src/Database/DisposableFullTextSearch.php diff --git a/src/Content/Conversation/Collection/UserDefinedChannels.php b/src/Content/Conversation/Collection/UserDefinedChannels.php index ea5a68f48a..203a67dafe 100644 --- a/src/Content/Conversation/Collection/UserDefinedChannels.php +++ b/src/Content/Conversation/Collection/UserDefinedChannels.php @@ -21,6 +21,12 @@ namespace Friendica\Content\Conversation\Collection; +use Friendica\Content\Conversation\Entity; + class UserDefinedChannels extends Timelines { + public function current(): Entity\UserDefinedChannel + { + return parent::current(); + } } diff --git a/src/Database/DisposableFullTextSearch.php b/src/Database/DisposableFullTextSearch.php new file mode 100644 index 0000000000..9cf6337936 --- /dev/null +++ b/src/Database/DisposableFullTextSearch.php @@ -0,0 +1,59 @@ +. + * + */ + +namespace Friendica\Database; + +/** + * Full-text search on a haystack string that isn't present in the database. + * The haystack is inserted in a temporary table with a FULLTEXT index, then any number of + * matches can be performed on it before the row is deleted when the class instance is destroyed, + * either manually or at the end of the script at the latest. + */ +class DisposableFullTextSearch +{ + private Database $db; + private int $identifier; + + public function __construct(Database $database, string $haystack) + { + $this->db = $database; + + // Maximum value is indicated by the INT UNSIGNED type of the check-full-text-search.pid field + $this->identifier = random_int(0, pow(2, 32) - 1); + + $this->db->insert('check-full-text-search', ['pid' => $this->identifier, 'searchtext' => $haystack], Database::INSERT_UPDATE); + } + + public function __destruct() + { + $this->db->delete('check-full-text-search', ['pid' => $this->identifier]); + } + + /** + * @param string $needle Boolean mode search string + * @return bool + * @throws \Exception + */ + public function match(string $needle): bool + { + return $this->db->exists('check-full-text-search', ["`pid` = ? AND MATCH (`searchtext`) AGAINST (? IN BOOLEAN MODE)", $this->identifier, $needle]); + } +} From 0ee87b811696a8c655d7efe30eeaad615ac6f6df Mon Sep 17 00:00:00 2001 From: Hypolite Petovan Date: Tue, 16 Jan 2024 22:29:43 -0500 Subject: [PATCH 2/6] Refactor user-defined channel match - Remove disposable full-text search feature from UserDefinedChannels repository - Fix return value of Repository\UserDefinedChannel->match --- .../Repository/UserDefinedChannel.php | 120 ++++++++---------- 1 file changed, 52 insertions(+), 68 deletions(-) diff --git a/src/Content/Conversation/Repository/UserDefinedChannel.php b/src/Content/Conversation/Repository/UserDefinedChannel.php index 0fb282099d..c47991ac4e 100644 --- a/src/Content/Conversation/Repository/UserDefinedChannel.php +++ b/src/Content/Conversation/Repository/UserDefinedChannel.php @@ -28,6 +28,7 @@ use Friendica\Content\Conversation\Factory; use Friendica\Core\Config\Capability\IManageConfigValues; use Friendica\Database\Database; use Friendica\Database\DBA; +use Friendica\Database\DisposableFullTextSearch; use Friendica\Model\Contact; use Friendica\Model\Post\Engagement; use Friendica\Model\User; @@ -38,8 +39,7 @@ class UserDefinedChannel extends \Friendica\BaseRepository { protected static $table_name = 'channel'; - /** @var IManageConfigValues */ - private $config; + private IManageConfigValues $config; public function __construct(Database $database, LoggerInterface $logger, Factory\UserDefinedChannel $factory, IManageConfigValues $config) { @@ -160,17 +160,18 @@ class UserDefinedChannel extends \Friendica\BaseRepository } /** - * Checks, if one of the user defined channels matches with the given search text or languages + * Checks if one of the user-defined channels matches the given language or item text via full-text search * - * @param string $searchtext + * @param string $haystack * @param string $language * @return boolean + * @throws \Exception */ - public function match(string $searchtext, string $language): bool + public function match(string $haystack, string $language): bool { $users = $this->db->selectToArray('user', ['uid'], $this->getUserCondition()); if (empty($users)) { - return []; + return false; } $uids = array_column($users, 'uid'); @@ -189,15 +190,11 @@ class UserDefinedChannel extends \Friendica\BaseRepository $search .= '(' . $channel->fullTextSearch . ') '; } - $this->insertCheckFullTextSearch($searchtext); - $result = $this->inFulltext($search); - $this->deleteCheckFullTextSearch(); - - return $result; + return (new DisposableFullTextSearch($this->db, $haystack))->match(Engagement::escapeKeywords($search)); } /** - * Fetch the channel users that have got matching channels + * List the IDs of the relay/group users that have matching user-defined channels based on an item details * * @param string $searchtext * @param string $language @@ -206,6 +203,7 @@ class UserDefinedChannel extends \Friendica\BaseRepository * @param int $owner_id * @param int $reshare_id * @return array + * @throws \Exception */ public function getMatchingChannelUsers(string $searchtext, string $language, array $tags, int $media_type, int $owner_id, int $reshare_id): array { @@ -221,62 +219,53 @@ class UserDefinedChannel extends \Friendica\BaseRepository return []; } - $this->insertCheckFullTextSearch($searchtext); + $disposableFullTextSearch = new DisposableFullTextSearch($this->db, $searchtext); - $uids = []; + $filteredChannels = $this->select(['uid' => array_column($users, 'uid'), 'publish' => true, 'valid' => true])->filter( + function (Entity\UserDefinedChannel $channel) use ($owner_id, $reshare_id, $language, $tags, $media_type, $disposableFullTextSearch, $searchtext) { + static $uids = []; - foreach ($this->select(['uid' => array_column($users, 'uid'), 'publish' => true, 'valid' => true]) as $channel) { - if (in_array($channel->uid, $uids)) { - continue; - } - if (!empty($channel->circle) && ($channel->circle > 0) && !in_array($channel->uid, $uids)) { - if (!$this->inCircle($channel->circle, $channel->uid, $owner_id) && !$this->inCircle($channel->circle, $channel->uid, $reshare_id)) { - continue; + // Filter out channels from already picked users + if (in_array($channel->uid, $uids)) { + return false; } - } - if (!empty($channel->languages) && !in_array($channel->uid, $uids)) { - if (!in_array($language, $channel->languages)) { - continue; - } - } elseif (!in_array($language, User::getWantedLanguages($channel->uid))) { - continue; - } - if (!empty($channel->includeTags) && !in_array($channel->uid, $uids)) { - if (!$this->inTaglist($channel->includeTags, $tags)) { - continue; - } - } - if (!empty($channel->excludeTags) && !in_array($channel->uid, $uids)) { - if ($this->inTaglist($channel->excludeTags, $tags)) { - continue; - } - } - if (!empty($channel->mediaType) && !in_array($channel->uid, $uids)) { - if (!($channel->mediaType & $media_type)) { - continue; - } - } - if (!empty($channel->fullTextSearch) && !in_array($channel->uid, $uids)) { - if (!$this->inFulltext($channel->fullTextSearch)) { - continue; - } - } - $uids[] = $channel->uid; - $this->logger->debug('Matching channel found.', ['uid' => $channel->uid, 'label' => $channel->label, 'language' => $language, 'tags' => $tags, 'media_type' => $media_type, 'searchtext' => $searchtext]); - } - $this->deleteCheckFullTextSearch(); - return $uids; - } + if ( + ($channel->circle ?? 0) + && !$this->inCircle($channel->circle, $channel->uid, $owner_id) + && !$this->inCircle($channel->circle, $channel->uid, $reshare_id) + ) { + return false; + } - private function insertCheckFullTextSearch(string $searchtext) - { - $this->db->insert('check-full-text-search', ['pid' => getmypid(), 'searchtext' => $searchtext], Database::INSERT_UPDATE); - } + if (!in_array($language, $channel->languages ?: User::getWantedLanguages($channel->uid))) { + return false; + } - private function deleteCheckFullTextSearch() - { - $this->db->delete('check-full-text-search', ['pid' => getmypid()]); + if ($channel->includeTags && !$this->inTaglist($channel->includeTags, $tags)) { + return false; + } + + if ($channel->excludeTags && $this->inTaglist($channel->excludeTags, $tags)) { + return false; + } + + if ($channel->mediaType && !($channel->mediaType & $media_type)) { + return false; + } + + if ($channel->fullTextSearch && !$disposableFullTextSearch->match(Engagement::escapeKeywords($channel->fullTextSearch))) { + return false; + } + + $uids[] = $channel->uid; + $this->logger->debug('Matching channel found.', ['uid' => $channel->uid, 'label' => $channel->label, 'language' => $language, 'tags' => $tags, 'media_type' => $media_type, 'searchtext' => $searchtext]); + + return true; + } + ); + + return $filteredChannels->column('uid'); } private function inCircle(int $circleId, int $uid, int $cid): bool @@ -308,12 +297,7 @@ class UserDefinedChannel extends \Friendica\BaseRepository return false; } - private function inFulltext(string $fullTextSearch): bool - { - return $this->db->exists('check-full-text-search', ["`pid` = ? AND MATCH (`searchtext`) AGAINST (? IN BOOLEAN MODE)", getmypid(), Engagement::escapeKeywords($fullTextSearch)]); - } - - private function getUserCondition() + private function getUserCondition(): array { $condition = ["`verified` AND NOT `blocked` AND NOT `account_removed` AND NOT `account_expired` AND `user`.`uid` > ?", 0]; From abd5768044fe91e5e7c04fa26f2de85f96899bec Mon Sep 17 00:00:00 2001 From: Hypolite Petovan Date: Fri, 26 Jan 2024 16:48:55 -0500 Subject: [PATCH 3/6] Add documentation to DisposableFullTextSearch->idenfier field --- src/Database/DisposableFullTextSearch.php | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/Database/DisposableFullTextSearch.php b/src/Database/DisposableFullTextSearch.php index 9cf6337936..3516080f76 100644 --- a/src/Database/DisposableFullTextSearch.php +++ b/src/Database/DisposableFullTextSearch.php @@ -30,12 +30,15 @@ namespace Friendica\Database; class DisposableFullTextSearch { private Database $db; + /** @var int Unique identifier of the haystack in the database. */ private int $identifier; public function __construct(Database $database, string $haystack) { $this->db = $database; + // Unique identifier generation. Two DisposableFullTextSearch object should never have the same as the first object destruction + // would delete both check-full-text-search rows, before the second object destruction is called, leading to unexpected behavior. // Maximum value is indicated by the INT UNSIGNED type of the check-full-text-search.pid field $this->identifier = random_int(0, pow(2, 32) - 1); From 7397b3876379dcc607181f329a99534a56694fef Mon Sep 17 00:00:00 2001 From: Hypolite Petovan Date: Sat, 27 Jan 2024 11:33:28 -0500 Subject: [PATCH 4/6] Ensure identifier uniqueness in Disposable FullTextSearch --- src/Database/DisposableFullTextSearch.php | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/Database/DisposableFullTextSearch.php b/src/Database/DisposableFullTextSearch.php index 3516080f76..fb8ce900d5 100644 --- a/src/Database/DisposableFullTextSearch.php +++ b/src/Database/DisposableFullTextSearch.php @@ -37,12 +37,12 @@ class DisposableFullTextSearch { $this->db = $database; - // Unique identifier generation. Two DisposableFullTextSearch object should never have the same as the first object destruction - // would delete both check-full-text-search rows, before the second object destruction is called, leading to unexpected behavior. - // Maximum value is indicated by the INT UNSIGNED type of the check-full-text-search.pid field - $this->identifier = random_int(0, pow(2, 32) - 1); - - $this->db->insert('check-full-text-search', ['pid' => $this->identifier, 'searchtext' => $haystack], Database::INSERT_UPDATE); + do { + // Unique identifier generation. Two DisposableFullTextSearch object should never have the same as the first object destruction + // would delete both check-full-text-search rows before the second object destruction is called, leading to unexpected behavior. + // Maximum value is indicated by the INT UNSIGNED type of the check-full-text-search.pid field + $this->identifier = random_int(0, pow(2, 32) - 1); + } while($this->db->insert('check-full-text-search', ['pid' => $this->identifier, 'searchtext' => $haystack])); } public function __destruct() From 50cc2be3d1f6de8be94e46e58614e0ec50ae56d9 Mon Sep 17 00:00:00 2001 From: Hypolite Petovan Date: Mon, 29 Jan 2024 06:56:30 -0500 Subject: [PATCH 5/6] Improve uniqueness loop by adding an exists() call in DisposableFullTextSearch --- src/Database/DisposableFullTextSearch.php | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/src/Database/DisposableFullTextSearch.php b/src/Database/DisposableFullTextSearch.php index fb8ce900d5..5ef8adebd2 100644 --- a/src/Database/DisposableFullTextSearch.php +++ b/src/Database/DisposableFullTextSearch.php @@ -37,12 +37,17 @@ class DisposableFullTextSearch { $this->db = $database; + // Unique identifier generation. Two DisposableFullTextSearch object should never have the same as the first object destruction + // would delete both check-full-text-search rows before the second object destruction is called, leading to unexpected behavior. do { - // Unique identifier generation. Two DisposableFullTextSearch object should never have the same as the first object destruction - // would delete both check-full-text-search rows before the second object destruction is called, leading to unexpected behavior. // Maximum value is indicated by the INT UNSIGNED type of the check-full-text-search.pid field $this->identifier = random_int(0, pow(2, 32) - 1); - } while($this->db->insert('check-full-text-search', ['pid' => $this->identifier, 'searchtext' => $haystack])); + } while($this->db->exists('check-full-text-search', ['pid' => $this->identifier, 'searchtext' => $haystack])); + + // If the `exists()` call fails and return false because the database is unavailable, the `insert()` call will likely fail as well, which means + // all subsequent calls to `match()` will return false because the haystack won't have been inserted. + // However, at this point there may be bigger problems to worry about. + $this->db->insert('check-full-text-search', ['pid' => $this->identifier, 'searchtext' => $haystack]); } public function __destruct() From 391e41d6d6319ae9ebf18b514db357484292b0a5 Mon Sep 17 00:00:00 2001 From: Hypolite Petovan Date: Mon, 29 Jan 2024 07:37:40 -0500 Subject: [PATCH 6/6] Fix exists() condition in DisposableFullTextSearch --- src/Database/DisposableFullTextSearch.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Database/DisposableFullTextSearch.php b/src/Database/DisposableFullTextSearch.php index 5ef8adebd2..f6aa9dc4ba 100644 --- a/src/Database/DisposableFullTextSearch.php +++ b/src/Database/DisposableFullTextSearch.php @@ -42,7 +42,7 @@ class DisposableFullTextSearch do { // Maximum value is indicated by the INT UNSIGNED type of the check-full-text-search.pid field $this->identifier = random_int(0, pow(2, 32) - 1); - } while($this->db->exists('check-full-text-search', ['pid' => $this->identifier, 'searchtext' => $haystack])); + } while ($this->db->exists('check-full-text-search', ['pid' => $this->identifier])); // If the `exists()` call fails and return false because the database is unavailable, the `insert()` call will likely fail as well, which means // all subsequent calls to `match()` will return false because the haystack won't have been inserted.