Merge pull request #13838 from MrPetovan/task/refactor-throwaway-fulltext-search

Refactor user-defined channel match
This commit is contained in:
Michael Vogel 2024-01-29 16:53:41 +01:00 committed by GitHub
commit 25f2ad1b97
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 125 additions and 68 deletions

View file

@ -21,6 +21,12 @@
namespace Friendica\Content\Conversation\Collection;
use Friendica\Content\Conversation\Entity;
class UserDefinedChannels extends Timelines
{
public function current(): Entity\UserDefinedChannel
{
return parent::current();
}
}

View file

@ -28,6 +28,7 @@ use Friendica\Content\Conversation\Factory;
use Friendica\Core\Config\Capability\IManageConfigValues;
use Friendica\Database\Database;
use Friendica\Database\DBA;
use Friendica\Database\DisposableFullTextSearch;
use Friendica\Model\Contact;
use Friendica\Model\Post\Engagement;
use Friendica\Model\User;
@ -38,8 +39,7 @@ class UserDefinedChannel extends \Friendica\BaseRepository
{
protected static $table_name = 'channel';
/** @var IManageConfigValues */
private $config;
private IManageConfigValues $config;
public function __construct(Database $database, LoggerInterface $logger, Factory\UserDefinedChannel $factory, IManageConfigValues $config)
{
@ -160,17 +160,18 @@ class UserDefinedChannel extends \Friendica\BaseRepository
}
/**
* Checks, if one of the user defined channels matches with the given search text or languages
* Checks if one of the user-defined channels matches the given language or item text via full-text search
*
* @param string $searchtext
* @param string $haystack
* @param string $language
* @return boolean
* @throws \Exception
*/
public function match(string $searchtext, string $language): bool
public function match(string $haystack, string $language): bool
{
$users = $this->db->selectToArray('user', ['uid'], $this->getUserCondition());
if (empty($users)) {
return [];
return false;
}
$uids = array_column($users, 'uid');
@ -189,15 +190,11 @@ class UserDefinedChannel extends \Friendica\BaseRepository
$search .= '(' . $channel->fullTextSearch . ') ';
}
$this->insertCheckFullTextSearch($searchtext);
$result = $this->inFulltext($search);
$this->deleteCheckFullTextSearch();
return $result;
return (new DisposableFullTextSearch($this->db, $haystack))->match(Engagement::escapeKeywords($search));
}
/**
* Fetch the channel users that have got matching channels
* List the IDs of the relay/group users that have matching user-defined channels based on an item details
*
* @param string $searchtext
* @param string $language
@ -206,6 +203,7 @@ class UserDefinedChannel extends \Friendica\BaseRepository
* @param int $owner_id
* @param int $reshare_id
* @return array
* @throws \Exception
*/
public function getMatchingChannelUsers(string $searchtext, string $language, array $tags, int $media_type, int $owner_id, int $reshare_id): array
{
@ -221,62 +219,53 @@ class UserDefinedChannel extends \Friendica\BaseRepository
return [];
}
$this->insertCheckFullTextSearch($searchtext);
$disposableFullTextSearch = new DisposableFullTextSearch($this->db, $searchtext);
$uids = [];
$filteredChannels = $this->select(['uid' => array_column($users, 'uid'), 'publish' => true, 'valid' => true])->filter(
function (Entity\UserDefinedChannel $channel) use ($owner_id, $reshare_id, $language, $tags, $media_type, $disposableFullTextSearch, $searchtext) {
static $uids = [];
foreach ($this->select(['uid' => array_column($users, 'uid'), 'publish' => true, 'valid' => true]) as $channel) {
if (in_array($channel->uid, $uids)) {
continue;
}
if (!empty($channel->circle) && ($channel->circle > 0) && !in_array($channel->uid, $uids)) {
if (!$this->inCircle($channel->circle, $channel->uid, $owner_id) && !$this->inCircle($channel->circle, $channel->uid, $reshare_id)) {
continue;
// Filter out channels from already picked users
if (in_array($channel->uid, $uids)) {
return false;
}
}
if (!empty($channel->languages) && !in_array($channel->uid, $uids)) {
if (!in_array($language, $channel->languages)) {
continue;
}
} elseif (!in_array($language, User::getWantedLanguages($channel->uid))) {
continue;
}
if (!empty($channel->includeTags) && !in_array($channel->uid, $uids)) {
if (!$this->inTaglist($channel->includeTags, $tags)) {
continue;
}
}
if (!empty($channel->excludeTags) && !in_array($channel->uid, $uids)) {
if ($this->inTaglist($channel->excludeTags, $tags)) {
continue;
}
}
if (!empty($channel->mediaType) && !in_array($channel->uid, $uids)) {
if (!($channel->mediaType & $media_type)) {
continue;
}
}
if (!empty($channel->fullTextSearch) && !in_array($channel->uid, $uids)) {
if (!$this->inFulltext($channel->fullTextSearch)) {
continue;
}
}
$uids[] = $channel->uid;
$this->logger->debug('Matching channel found.', ['uid' => $channel->uid, 'label' => $channel->label, 'language' => $language, 'tags' => $tags, 'media_type' => $media_type, 'searchtext' => $searchtext]);
}
$this->deleteCheckFullTextSearch();
return $uids;
}
if (
($channel->circle ?? 0)
&& !$this->inCircle($channel->circle, $channel->uid, $owner_id)
&& !$this->inCircle($channel->circle, $channel->uid, $reshare_id)
) {
return false;
}
private function insertCheckFullTextSearch(string $searchtext)
{
$this->db->insert('check-full-text-search', ['pid' => getmypid(), 'searchtext' => $searchtext], Database::INSERT_UPDATE);
}
if (!in_array($language, $channel->languages ?: User::getWantedLanguages($channel->uid))) {
return false;
}
private function deleteCheckFullTextSearch()
{
$this->db->delete('check-full-text-search', ['pid' => getmypid()]);
if ($channel->includeTags && !$this->inTaglist($channel->includeTags, $tags)) {
return false;
}
if ($channel->excludeTags && $this->inTaglist($channel->excludeTags, $tags)) {
return false;
}
if ($channel->mediaType && !($channel->mediaType & $media_type)) {
return false;
}
if ($channel->fullTextSearch && !$disposableFullTextSearch->match(Engagement::escapeKeywords($channel->fullTextSearch))) {
return false;
}
$uids[] = $channel->uid;
$this->logger->debug('Matching channel found.', ['uid' => $channel->uid, 'label' => $channel->label, 'language' => $language, 'tags' => $tags, 'media_type' => $media_type, 'searchtext' => $searchtext]);
return true;
}
);
return $filteredChannels->column('uid');
}
private function inCircle(int $circleId, int $uid, int $cid): bool
@ -308,12 +297,7 @@ class UserDefinedChannel extends \Friendica\BaseRepository
return false;
}
private function inFulltext(string $fullTextSearch): bool
{
return $this->db->exists('check-full-text-search', ["`pid` = ? AND MATCH (`searchtext`) AGAINST (? IN BOOLEAN MODE)", getmypid(), Engagement::escapeKeywords($fullTextSearch)]);
}
private function getUserCondition()
private function getUserCondition(): array
{
$condition = ["`verified` AND NOT `blocked` AND NOT `account_removed` AND NOT `account_expired` AND `user`.`uid` > ?", 0];

View file

@ -0,0 +1,67 @@
<?php
/**
* @copyright Copyright (C) 2010-2024, the Friendica project
*
* @license GNU AGPL version 3 or any later version
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*
*/
namespace Friendica\Database;
/**
* Full-text search on a haystack string that isn't present in the database.
* The haystack is inserted in a temporary table with a FULLTEXT index, then any number of
* matches can be performed on it before the row is deleted when the class instance is destroyed,
* either manually or at the end of the script at the latest.
*/
class DisposableFullTextSearch
{
private Database $db;
/** @var int Unique identifier of the haystack in the database. */
private int $identifier;
public function __construct(Database $database, string $haystack)
{
$this->db = $database;
// Unique identifier generation. Two DisposableFullTextSearch object should never have the same as the first object destruction
// would delete both check-full-text-search rows before the second object destruction is called, leading to unexpected behavior.
do {
// Maximum value is indicated by the INT UNSIGNED type of the check-full-text-search.pid field
$this->identifier = random_int(0, pow(2, 32) - 1);
} while ($this->db->exists('check-full-text-search', ['pid' => $this->identifier]));
// If the `exists()` call fails and return false because the database is unavailable, the `insert()` call will likely fail as well, which means
// all subsequent calls to `match()` will return false because the haystack won't have been inserted.
// However, at this point there may be bigger problems to worry about.
$this->db->insert('check-full-text-search', ['pid' => $this->identifier, 'searchtext' => $haystack]);
}
public function __destruct()
{
$this->db->delete('check-full-text-search', ['pid' => $this->identifier]);
}
/**
* @param string $needle Boolean mode search string
* @return bool
* @throws \Exception
*/
public function match(string $needle): bool
{
return $this->db->exists('check-full-text-search', ["`pid` = ? AND MATCH (`searchtext`) AGAINST (? IN BOOLEAN MODE)", $this->identifier, $needle]);
}
}