Merge pull request #13846 from annando/search
Improved search results and performance by adding a separate search index table
This commit is contained in:
commit
2e5046f8c3
22 changed files with 672 additions and 370 deletions
19
database.sql
19
database.sql
|
@ -1,6 +1,6 @@
|
|||
-- ------------------------------------------
|
||||
-- Friendica 2024.03-dev (Yellow Archangel)
|
||||
-- DB_UPDATE_VERSION 1546
|
||||
-- DB_UPDATE_VERSION 1547
|
||||
-- ------------------------------------------
|
||||
|
||||
|
||||
|
@ -1293,7 +1293,6 @@ CREATE TABLE IF NOT EXISTS `post-content` (
|
|||
PRIMARY KEY(`uri-id`),
|
||||
INDEX `plink` (`plink`(191)),
|
||||
INDEX `resource-id` (`resource-id`),
|
||||
FULLTEXT INDEX `title-content-warning-body` (`title`,`content-warning`,`body`),
|
||||
INDEX `quote-uri-id` (`quote-uri-id`),
|
||||
FOREIGN KEY (`uri-id`) REFERENCES `item-uri` (`id`) ON UPDATE RESTRICT ON DELETE CASCADE,
|
||||
FOREIGN KEY (`quote-uri-id`) REFERENCES `item-uri` (`id`) ON UPDATE RESTRICT ON DELETE CASCADE
|
||||
|
@ -1460,6 +1459,21 @@ CREATE TABLE IF NOT EXISTS `post-question-option` (
|
|||
FOREIGN KEY (`uri-id`) REFERENCES `item-uri` (`id`) ON UPDATE RESTRICT ON DELETE CASCADE
|
||||
) DEFAULT COLLATE utf8mb4_general_ci COMMENT='Question option';
|
||||
|
||||
--
|
||||
-- TABLE post-searchindex
|
||||
--
|
||||
CREATE TABLE IF NOT EXISTS `post-searchindex` (
|
||||
`uri-id` int unsigned NOT NULL COMMENT 'Id of the item-uri table entry that contains the item uri',
|
||||
`network` char(4) COMMENT '',
|
||||
`private` tinyint unsigned COMMENT '0=public, 1=private, 2=unlisted',
|
||||
`searchtext` mediumtext COMMENT 'Simplified text for the full text search',
|
||||
`created` datetime COMMENT '',
|
||||
PRIMARY KEY(`uri-id`),
|
||||
INDEX `created` (`created`),
|
||||
FULLTEXT INDEX `searchtext` (`searchtext`),
|
||||
FOREIGN KEY (`uri-id`) REFERENCES `item-uri` (`id`) ON UPDATE RESTRICT ON DELETE CASCADE
|
||||
) DEFAULT COLLATE utf8mb4_general_ci COMMENT='Content for all posts';
|
||||
|
||||
--
|
||||
-- TABLE post-tag
|
||||
--
|
||||
|
@ -1711,7 +1725,6 @@ CREATE TABLE IF NOT EXISTS `profile` (
|
|||
`net-publish` boolean NOT NULL DEFAULT '0' COMMENT 'publish profile in global directory',
|
||||
PRIMARY KEY(`id`),
|
||||
INDEX `uid_is-default` (`uid`,`is-default`),
|
||||
FULLTEXT INDEX `pub_keywords` (`pub_keywords`),
|
||||
FOREIGN KEY (`uid`) REFERENCES `user` (`uid`) ON UPDATE RESTRICT ON DELETE CASCADE
|
||||
) DEFAULT COLLATE utf8mb4_general_ci COMMENT='user profiles data';
|
||||
|
||||
|
|
|
@ -80,6 +80,7 @@ Additionally to the search for content, there are additional keywords that can b
|
|||
* visibility:public
|
||||
* visibility:unlisted
|
||||
* visibility:private
|
||||
* language - Use "language:code" to search for posts with the given language in the [ISO 639-1](https://en.wikipedia.org/wiki/ISO_639-1) format.
|
||||
|
||||
Remember that you can combine these kerywords.
|
||||
So for example you can create a channel with all posts that talk about the Fediverse - that aren't posted in the Fediverse with the search terms: "fediverse -network:apub -network:dfrn"
|
|
@ -70,6 +70,7 @@ Database Tables
|
|||
| [post-media](help/database/db_post-media) | Attached media |
|
||||
| [post-question](help/database/db_post-question) | Question |
|
||||
| [post-question-option](help/database/db_post-question-option) | Question option |
|
||||
| [post-searchindex](help/database/db_post-searchindex) | Content for all posts |
|
||||
| [post-tag](help/database/db_post-tag) | post relation to tags |
|
||||
| [post-thread](help/database/db_post-thread) | Thread related data |
|
||||
| [post-thread-user](help/database/db_post-thread-user) | Thread related data per user |
|
||||
|
|
|
@ -30,13 +30,12 @@ Fields
|
|||
Indexes
|
||||
------------
|
||||
|
||||
| Name | Fields |
|
||||
| -------------------------- | -------------------------------------- |
|
||||
| PRIMARY | uri-id |
|
||||
| plink | plink(191) |
|
||||
| resource-id | resource-id |
|
||||
| title-content-warning-body | FULLTEXT, title, content-warning, body |
|
||||
| quote-uri-id | quote-uri-id |
|
||||
| Name | Fields |
|
||||
| ------------ | ------------ |
|
||||
| PRIMARY | uri-id |
|
||||
| plink | plink(191) |
|
||||
| resource-id | resource-id |
|
||||
| quote-uri-id | quote-uri-id |
|
||||
|
||||
Foreign Keys
|
||||
------------
|
||||
|
|
33
doc/database/db_post-searchindex.md
Normal file
33
doc/database/db_post-searchindex.md
Normal file
|
@ -0,0 +1,33 @@
|
|||
Table post-searchindex
|
||||
===========
|
||||
|
||||
Content for all posts
|
||||
|
||||
Fields
|
||||
------
|
||||
|
||||
| Field | Description | Type | Null | Key | Default | Extra |
|
||||
| ---------- | --------------------------------------------------------- | ---------------- | ---- | --- | ------- | ----- |
|
||||
| uri-id | Id of the item-uri table entry that contains the item uri | int unsigned | NO | PRI | NULL | |
|
||||
| network | | char(4) | YES | | NULL | |
|
||||
| private | 0=public, 1=private, 2=unlisted | tinyint unsigned | YES | | NULL | |
|
||||
| searchtext | Simplified text for the full text search | mediumtext | YES | | NULL | |
|
||||
| created | | datetime | YES | | NULL | |
|
||||
|
||||
Indexes
|
||||
------------
|
||||
|
||||
| Name | Fields |
|
||||
| ---------- | -------------------- |
|
||||
| PRIMARY | uri-id |
|
||||
| created | created |
|
||||
| searchtext | FULLTEXT, searchtext |
|
||||
|
||||
Foreign Keys
|
||||
------------
|
||||
|
||||
| Field | Target Table | Target Field |
|
||||
|-------|--------------|--------------|
|
||||
| uri-id | [item-uri](help/database/db_item-uri) | id |
|
||||
|
||||
Return to [database documentation](help/database)
|
|
@ -56,11 +56,10 @@ Fields
|
|||
Indexes
|
||||
------------
|
||||
|
||||
| Name | Fields |
|
||||
| -------------- | ---------------------- |
|
||||
| PRIMARY | id |
|
||||
| uid_is-default | uid, is-default |
|
||||
| pub_keywords | FULLTEXT, pub_keywords |
|
||||
| Name | Fields |
|
||||
| -------------- | --------------- |
|
||||
| PRIMARY | id |
|
||||
| uid_is-default | uid, is-default |
|
||||
|
||||
Foreign Keys
|
||||
------------
|
||||
|
|
|
@ -156,7 +156,7 @@ class UserDefinedChannel extends \Friendica\BaseRepository
|
|||
return true;
|
||||
}
|
||||
|
||||
return $this->db->select('check-full-text-search', [], ["`pid` = ? AND MATCH (`searchtext`) AGAINST (? IN BOOLEAN MODE)", getmypid(), $this->escapeKeywords($searchtext)]) !== false;
|
||||
return $this->db->select('check-full-text-search', [], ["`pid` = ? AND MATCH (`searchtext`) AGAINST (? IN BOOLEAN MODE)", getmypid(), Engagement::escapeKeywords($searchtext)]) !== false;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -310,15 +310,7 @@ class UserDefinedChannel extends \Friendica\BaseRepository
|
|||
|
||||
private function inFulltext(string $fullTextSearch): bool
|
||||
{
|
||||
return $this->db->exists('check-full-text-search', ["`pid` = ? AND MATCH (`searchtext`) AGAINST (? IN BOOLEAN MODE)", getmypid(), $this->escapeKeywords($fullTextSearch)]);
|
||||
}
|
||||
|
||||
private function escapeKeywords(string $fullTextSearch): string
|
||||
{
|
||||
foreach (Engagement::KEYWORDS as $keyword) {
|
||||
$fullTextSearch = preg_replace('~(' . $keyword . ':.[\w@\.-]+)~', '"$1"', $fullTextSearch);
|
||||
}
|
||||
return $fullTextSearch;
|
||||
return $this->db->exists('check-full-text-search', ["`pid` = ? AND MATCH (`searchtext`) AGAINST (? IN BOOLEAN MODE)", getmypid(), Engagement::escapeKeywords($fullTextSearch)]);
|
||||
}
|
||||
|
||||
private function getUserCondition()
|
||||
|
|
|
@ -255,12 +255,19 @@ class BBCode
|
|||
// Removes attachments
|
||||
$text = self::removeAttachment($text);
|
||||
|
||||
// Add images because of possible alt texts
|
||||
// Add text from attached media
|
||||
if (!empty($uri_id)) {
|
||||
$text = Post\Media::addAttachmentsToBody($uri_id, $text, [Post\Media::IMAGE]);
|
||||
|
||||
foreach (Post\Media::getByURIId($uri_id, [Post\Media::HTML]) as $media) {
|
||||
$text .= ' ' . $media['name'] . ' ' . $media['description'];
|
||||
foreach (Post\Media::getByURIId($uri_id) as $media) {
|
||||
if (!empty($media['description']) && (stripos($text, $media['description']) === false)) {
|
||||
$text .= ' ' . $media['description'];
|
||||
}
|
||||
if (in_array($media['type'], [Post\Media::HTML, Post\Media::ACTIVITY])) {
|
||||
foreach (['name', 'author-name', 'publisher-name'] as $key) {
|
||||
if (!empty($media[$key] && stripos($text, $media[$key]) === false)) {
|
||||
$text .= ' ' . $media[$key];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -52,7 +52,7 @@ class PostUpdate
|
|||
// Needed for the helper function to read from the legacy term table
|
||||
const OBJECT_TYPE_POST = 1;
|
||||
|
||||
const VERSION = 1544;
|
||||
const VERSION = 1547;
|
||||
|
||||
/**
|
||||
* Calls the post update functions
|
||||
|
@ -128,6 +128,9 @@ class PostUpdate
|
|||
if (!self::update1544()) {
|
||||
return false;
|
||||
}
|
||||
if (!self::update1547()) {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -1358,4 +1361,62 @@ class PostUpdate
|
|||
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Create "post-searchindex" entries for old entries.
|
||||
*
|
||||
* @return bool "true" when the job is done
|
||||
* @throws \Friendica\Network\HTTPException\InternalServerErrorException
|
||||
* @throws \ImagickException
|
||||
*/
|
||||
private static function update1547()
|
||||
{
|
||||
// Was the script completed?
|
||||
if (DI::keyValue()->get('post_update_version') >= 1547) {
|
||||
return true;
|
||||
}
|
||||
|
||||
$id = (int)(DI::keyValue()->get('post_update_version_1547_id') ?? 0);
|
||||
if ($id == 0) {
|
||||
$post = Post::selectFirstPost(['uri-id'], [], ['order' => ['uri-id' => true]]);
|
||||
$id = (int)($post['uri-id'] ?? 0);
|
||||
}
|
||||
|
||||
Logger::info('Start', ['uri-id' => $id]);
|
||||
|
||||
$rows = 0;
|
||||
|
||||
$condition = ["`uri-id` < ? AND `gravity` IN (?, ?)", $id, Item::GRAVITY_COMMENT, Item::GRAVITY_PARENT];
|
||||
|
||||
$limit = Post\SearchIndex::searchAgeDateLimit();
|
||||
if (!empty($limit)) {
|
||||
DBA::mergeConditions($condition, ["`created` > ?", $limit]);
|
||||
}
|
||||
|
||||
$posts = Post::selectPosts(['uri-id', 'network', 'private', 'created'], $condition, ['order' => ['uri-id' => true], 'limit' => 1000]);
|
||||
|
||||
if (DBA::errorNo() != 0) {
|
||||
Logger::error('Database error', ['no' => DBA::errorNo(), 'message' => DBA::errorMessage()]);
|
||||
return false;
|
||||
}
|
||||
|
||||
while ($post = Post::fetch($posts)) {
|
||||
$id = $post['uri-id'];
|
||||
Post\SearchIndex::insert($post['uri-id'], $post['network'], $post['private'], $post['created'], true);
|
||||
++$rows;
|
||||
}
|
||||
DBA::close($posts);
|
||||
|
||||
DI::keyValue()->set('post_update_version_1547_id', $id);
|
||||
|
||||
Logger::info('Processed', ['rows' => $rows, 'last' => $id]);
|
||||
|
||||
if ($rows <= 100) {
|
||||
DI::keyValue()->set('post_update_version', 1547);
|
||||
Logger::info('Done');
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -34,6 +34,7 @@ use Friendica\Core\Protocol;
|
|||
use Friendica\Core\Renderer;
|
||||
use Friendica\Core\System;
|
||||
use Friendica\Core\Worker;
|
||||
use Friendica\Database\Database;
|
||||
use Friendica\Database\DBA;
|
||||
use Friendica\DI;
|
||||
use Friendica\Model\Post\Category;
|
||||
|
@ -243,6 +244,10 @@ class Item
|
|||
$content_fields['raw-body'] = BBCode::removeAttachment($content_fields['raw-body']);
|
||||
|
||||
Post\Content::update($item['uri-id'], $content_fields);
|
||||
|
||||
$searchtext = Post\Engagement::getSearchTextForUriId($item['uri-id'], true);
|
||||
DBA::update('post-engagement', ['searchtext' => $searchtext], ['uri-id' => $item['uri-id']]);
|
||||
Post\SearchIndex::update($item['uri-id']);
|
||||
}
|
||||
|
||||
if (!empty($fields['file'])) {
|
||||
|
@ -1444,6 +1449,10 @@ class Item
|
|||
|
||||
$engagement_uri_id = Post\Engagement::storeFromItem($posted_item);
|
||||
|
||||
if (in_array($posted_item['gravity'], [self::GRAVITY_PARENT, self::GRAVITY_COMMENT])) {
|
||||
Post\SearchIndex::insert($posted_item['uri-id'], $posted_item['network'], $posted_item['private'], $posted_item['created']);
|
||||
}
|
||||
|
||||
if (($posted_item['gravity'] == self::GRAVITY_ACTIVITY) && ($posted_item['verb'] == Activity::ANNOUNCE) && ($posted_item['parent-uri-id'] == $posted_item['thr-parent-id'])) {
|
||||
self::reshareChannelPost($posted_item['thr-parent-id'], $posted_item['author-id']);
|
||||
} elseif ($engagement_uri_id) {
|
||||
|
|
|
@ -22,11 +22,10 @@
|
|||
namespace Friendica\Model\Post;
|
||||
|
||||
use \BadMethodCallException;
|
||||
use Friendica\Core\Protocol;
|
||||
use Friendica\Database\Database;
|
||||
use Friendica\Database\DBA;
|
||||
use Friendica\Database\DBStructure;
|
||||
use Friendica\DI;
|
||||
use Friendica\Model\Item;
|
||||
use Friendica\Model\Post;
|
||||
|
||||
class Content
|
||||
|
@ -109,9 +108,12 @@ class Content
|
|||
*/
|
||||
public static function getURIIdListBySearch(string $search, int $uid = 0, int $start = 0, int $limit = 100, int $last_uriid = 0)
|
||||
{
|
||||
$condition = ["`uri-id` IN (SELECT `uri-id` FROM `post-content` WHERE MATCH (`title`, `content-warning`, `body`) AGAINST (? IN BOOLEAN MODE))
|
||||
AND (`uid` = ? OR (`uid` = ? AND NOT `global`)) AND (`network` IN (?, ?, ?, ?) OR (`uid` = ? AND `uid` != ?))",
|
||||
str_replace('@', ' ', $search), 0, $uid, Protocol::ACTIVITYPUB, Protocol::DFRN, Protocol::DIASPORA, Protocol::OSTATUS, $uid, 0];
|
||||
$search = Post\Engagement::escapeKeywords($search);
|
||||
if ($uid != 0) {
|
||||
$condition = ["MATCH (`searchtext`) AGAINST (? IN BOOLEAN MODE) and (private = ? OR `uri-id` in (SELECT `uri-id` FROM `post-user` where `uid` = ?))", $search, Item::PUBLIC, $uid];
|
||||
} else {
|
||||
$condition = ["MATCH (`searchtext`) AGAINST (? IN BOOLEAN MODE) and private = ?", $search, Item::PUBLIC];
|
||||
}
|
||||
|
||||
if (!empty($last_uriid)) {
|
||||
$condition = DBA::mergeConditions($condition, ["`uri-id` < ?", $last_uriid]);
|
||||
|
@ -122,7 +124,7 @@ class Content
|
|||
'limit' => [$start, $limit]
|
||||
];
|
||||
|
||||
$tags = Post::select(['uri-id'], $condition, $params);
|
||||
$tags = DBA::select('post-searchindex', ['uri-id'], $condition, $params);
|
||||
|
||||
$uriids = [];
|
||||
while ($tag = DBA::fetch($tags)) {
|
||||
|
@ -135,9 +137,12 @@ class Content
|
|||
|
||||
public static function countBySearch(string $search, int $uid = 0)
|
||||
{
|
||||
$condition = ["`uri-id` IN (SELECT `uri-id` FROM `post-content` WHERE MATCH (`title`, `content-warning`, `body`) AGAINST (? IN BOOLEAN MODE))
|
||||
AND (`uid` = ? OR (`uid` = ? AND NOT `global`)) AND (`network` IN (?, ?, ?, ?) OR (`uid` = ? AND `uid` != ?))",
|
||||
str_replace('@', ' ', $search), 0, $uid, Protocol::ACTIVITYPUB, Protocol::DFRN, Protocol::DIASPORA, Protocol::OSTATUS, $uid, 0];
|
||||
return Post::count($condition);
|
||||
$search = Post\Engagement::escapeKeywords($search);
|
||||
if ($uid != 0) {
|
||||
$condition = ["MATCH (`searchtext`) AGAINST (? IN BOOLEAN MODE) and (private = ? OR `uri-id` in (SELECT `uri-id` FROM `post-user` where `uid` = ?))", $search, Item::PUBLIC, $uid];
|
||||
} else {
|
||||
$condition = ["MATCH (`searchtext`) AGAINST (? IN BOOLEAN MODE) and private = ?", $search, Item::PUBLIC];
|
||||
}
|
||||
return DBA::count('post-searchindex', $condition);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -24,7 +24,6 @@ namespace Friendica\Model\Post;
|
|||
use Friendica\Content\Text\BBCode;
|
||||
use Friendica\Core\Logger;
|
||||
use Friendica\Core\Protocol;
|
||||
use Friendica\Database\Database;
|
||||
use Friendica\Database\DBA;
|
||||
use Friendica\DI;
|
||||
use Friendica\Model\Contact;
|
||||
|
@ -39,7 +38,7 @@ use Friendica\Util\DateTimeFormat;
|
|||
|
||||
class Engagement
|
||||
{
|
||||
const KEYWORDS = ['source', 'server', 'from', 'to', 'group', 'tag', 'network', 'platform', 'visibility'];
|
||||
const KEYWORDS = ['source', 'server', 'from', 'to', 'group', 'tag', 'network', 'platform', 'visibility', 'language'];
|
||||
|
||||
/**
|
||||
* Store engagement data from an item array
|
||||
|
@ -146,7 +145,7 @@ class Engagement
|
|||
'owner-contact-type' => $author['contact-type'],
|
||||
'owner-nick' => $author['nick'],
|
||||
'owner-addr' => $author['addr'],
|
||||
'author-gsid' => $author['gsid'],
|
||||
'owner-gsid' => $author['gsid'],
|
||||
];
|
||||
|
||||
foreach ($receivers as $receiver) {
|
||||
|
@ -158,6 +157,24 @@ class Engagement
|
|||
return self::getSearchText($item, $receivers, $tags);
|
||||
}
|
||||
|
||||
public static function getSearchTextForUriId(int $uri_id, bool $refresh = false): string
|
||||
{
|
||||
if (!$refresh) {
|
||||
$engagement = DBA::selectFirst('post-engagement', ['searchtext'], ['uri-id' => $uri_id]);
|
||||
if (!empty($engagement['searchtext'])) {
|
||||
return $engagement['searchtext'];
|
||||
}
|
||||
}
|
||||
|
||||
$post = Post::selectFirstPost(['uri-id', 'network', 'title', 'content-warning', 'body', 'private',
|
||||
'author-id', 'author-contact-type', 'author-nick', 'author-addr', 'author-gsid',
|
||||
'owner-id', 'owner-contact-type', 'owner-nick', 'owner-addr', 'owner-gsid'], ['uri-id' => $uri_id]);
|
||||
if (empty($post['uri-id'])) {
|
||||
return '';
|
||||
}
|
||||
return self::getSearchTextForItem($post);
|
||||
}
|
||||
|
||||
private static function getSearchTextForItem(array $item): string
|
||||
{
|
||||
$receivers = array_column(Tag::getByURIId($item['uri-id'], [Tag::MENTION, Tag::IMPLICIT_MENTION, Tag::EXCLUSIVE_MENTION, Tag::AUDIENCE]), 'url');
|
||||
|
@ -167,61 +184,61 @@ class Engagement
|
|||
|
||||
private static function getSearchText(array $item, array $receivers, array $tags): string
|
||||
{
|
||||
$body = '[nosmile]network:' . $item['network'];
|
||||
$body = '[nosmile]network_' . $item['network'];
|
||||
|
||||
if (!empty($item['author-gsid'])) {
|
||||
$gserver = DBA::selectFirst('gserver', ['platform', 'nurl'], ['id' => $item['author-gsid']]);
|
||||
$platform = preg_replace( '/[\W]/', '', $gserver['platform'] ?? '');
|
||||
if (!empty($platform)) {
|
||||
$body .= ' platform:' . $platform;
|
||||
$body .= ' platform_' . $platform;
|
||||
}
|
||||
$body .= ' server:' . parse_url($gserver['nurl'], PHP_URL_HOST);
|
||||
$body .= ' server_' . parse_url($gserver['nurl'], PHP_URL_HOST);
|
||||
}
|
||||
|
||||
if (($item['owner-contact-type'] == Contact::TYPE_COMMUNITY) && !empty($item['owner-gsid']) && ($item['owner-gsid'] != ($item['author-gsid'] ?? 0))) {
|
||||
$gserver = DBA::selectFirst('gserver', ['platform', 'nurl'], ['id' => $item['owner-gsid']]);
|
||||
$platform = preg_replace( '/[\W]/', '', $gserver['platform'] ?? '');
|
||||
if (!empty($platform) && !strpos($body, 'platform:' . $platform)) {
|
||||
$body .= ' platform:' . $platform;
|
||||
if (!empty($platform) && !strpos($body, 'platform_' . $platform)) {
|
||||
$body .= ' platform_' . $platform;
|
||||
}
|
||||
$body .= ' server:' . parse_url($gserver['nurl'], PHP_URL_HOST);
|
||||
$body .= ' server_' . parse_url($gserver['nurl'], PHP_URL_HOST);
|
||||
}
|
||||
|
||||
switch ($item['private']) {
|
||||
case Item::PUBLIC:
|
||||
$body .= ' visibility:public';
|
||||
$body .= ' visibility_public';
|
||||
break;
|
||||
case Item::UNLISTED:
|
||||
$body .= ' visibility:unlisted';
|
||||
$body .= ' visibility_unlisted';
|
||||
break;
|
||||
case Item::PRIVATE:
|
||||
$body .= ' visibility:private';
|
||||
$body .= ' visibility_private';
|
||||
break;
|
||||
}
|
||||
|
||||
if (in_array(Contact::TYPE_COMMUNITY, [$item['author-contact-type'], $item['owner-contact-type']])) {
|
||||
$body .= ' source:group';
|
||||
$body .= ' source_group';
|
||||
} elseif ($item['author-contact-type'] == Contact::TYPE_PERSON) {
|
||||
$body .= ' source:person';
|
||||
$body .= ' source_person';
|
||||
} elseif ($item['author-contact-type'] == Contact::TYPE_NEWS) {
|
||||
$body .= ' source:service';
|
||||
$body .= ' source_service';
|
||||
} elseif ($item['author-contact-type'] == Contact::TYPE_ORGANISATION) {
|
||||
$body .= ' source:organization';
|
||||
$body .= ' source_organization';
|
||||
} elseif ($item['author-contact-type'] == Contact::TYPE_RELAY) {
|
||||
$body .= ' source:application';
|
||||
$body .= ' source_application';
|
||||
}
|
||||
|
||||
if ($item['author-contact-type'] == Contact::TYPE_COMMUNITY) {
|
||||
$body .= ' group:' . $item['author-nick'] . ' group:' . $item['author-addr'];
|
||||
$body .= ' group_' . $item['author-nick'] . ' group_' . $item['author-addr'];
|
||||
} elseif (in_array($item['author-contact-type'], [Contact::TYPE_PERSON, Contact::TYPE_NEWS, Contact::TYPE_ORGANISATION])) {
|
||||
$body .= ' from:' . $item['author-nick'] . ' from:' . $item['author-addr'];
|
||||
$body .= ' from_' . $item['author-nick'] . ' from_' . $item['author-addr'];
|
||||
}
|
||||
|
||||
if ($item['author-id'] != $item['owner-id']) {
|
||||
if ($item['owner-contact-type'] == Contact::TYPE_COMMUNITY) {
|
||||
$body .= ' group:' . $item['owner-nick'] . ' group:' . $item['owner-addr'];
|
||||
$body .= ' group_' . $item['owner-nick'] . ' group_' . $item['owner-addr'];
|
||||
} elseif (in_array($item['owner-contact-type'], [Contact::TYPE_PERSON, Contact::TYPE_NEWS, Contact::TYPE_ORGANISATION])) {
|
||||
$body .= ' from:' . $item['owner-nick'] . ' from:' . $item['owner-addr'];
|
||||
$body .= ' from_' . $item['owner-nick'] . ' from_' . $item['owner-addr'];
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -231,15 +248,20 @@ class Engagement
|
|||
continue;
|
||||
}
|
||||
|
||||
if (($contact['contact-type'] == Contact::TYPE_COMMUNITY) && !strpos($body, 'group:' . $contact['addr'])) {
|
||||
$body .= ' group:' . $contact['nick'] . ' group:' . $contact['addr'];
|
||||
if (($contact['contact-type'] == Contact::TYPE_COMMUNITY) && !strpos($body, 'group_' . $contact['addr'])) {
|
||||
$body .= ' group_' . $contact['nick'] . ' group_' . $contact['addr'];
|
||||
} elseif (in_array($contact['contact-type'], [Contact::TYPE_PERSON, Contact::TYPE_NEWS, Contact::TYPE_ORGANISATION])) {
|
||||
$body .= ' to:' . $contact['nick'] . ' to:' . $contact['addr'];
|
||||
$body .= ' to_' . $contact['nick'] . ' to_' . $contact['addr'];
|
||||
}
|
||||
}
|
||||
|
||||
foreach ($tags as $tag) {
|
||||
$body .= ' tag:' . $tag;
|
||||
$body .= ' tag_' . $tag;
|
||||
}
|
||||
|
||||
if (!empty($item['language'])) {
|
||||
$languages = json_decode($item['language'], true);
|
||||
$body .= ' language_' . array_key_first($languages);
|
||||
}
|
||||
|
||||
$body .= ' ' . $item['title'] . ' ' . $item['content-warning'] . ' ' . $item['body'];
|
||||
|
@ -293,4 +315,12 @@ class Engagement
|
|||
|
||||
return DateTimeFormat::utc('now - ' . DI::config()->get('channel', 'engagement_hours') . ' hour');
|
||||
}
|
||||
|
||||
public static function escapeKeywords(string $fullTextSearch): string
|
||||
{
|
||||
foreach (Engagement::KEYWORDS as $keyword) {
|
||||
$fullTextSearch = preg_replace('~(' . $keyword . '):(.[\w\*@\.-]+)~', '"$1_$2"', $fullTextSearch);
|
||||
}
|
||||
return $fullTextSearch;
|
||||
}
|
||||
}
|
||||
|
|
93
src/Model/Post/SearchIndex.php
Normal file
93
src/Model/Post/SearchIndex.php
Normal file
|
@ -0,0 +1,93 @@
|
|||
<?php
|
||||
/**
|
||||
* @copyright Copyright (C) 2010-2024, the Friendica project
|
||||
*
|
||||
* @license GNU AGPL version 3 or any later version
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Affero General Public License as
|
||||
* published by the Free Software Foundation, either version 3 of the
|
||||
* License, or (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Affero General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Affero General Public License
|
||||
* along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
*
|
||||
*/
|
||||
|
||||
namespace Friendica\Model\Post;
|
||||
|
||||
use Friendica\Core\Logger;
|
||||
use Friendica\Database\Database;
|
||||
use Friendica\Database\DBA;
|
||||
use Friendica\DI;
|
||||
use Friendica\Model\Post;
|
||||
use Friendica\Util\DateTimeFormat;
|
||||
|
||||
class SearchIndex
|
||||
{
|
||||
/**
|
||||
* Insert a post-searchindex entry
|
||||
*
|
||||
* @param int $uri_id
|
||||
* @param string $network
|
||||
* @param int $private
|
||||
* @param string $created
|
||||
* @param bool $refresh
|
||||
*/
|
||||
public static function insert(int $uri_id, string $network, int $private, string $created, bool $refresh = false)
|
||||
{
|
||||
$limit = self::searchAgeDateLimit();
|
||||
if (!empty($limit) && (strtotime($created) < strtotime($limit))) {
|
||||
return;
|
||||
}
|
||||
|
||||
$search = [
|
||||
'uri-id' => $uri_id,
|
||||
'network' => $network,
|
||||
'private' => $private,
|
||||
'created' => $created,
|
||||
'searchtext' => Post\Engagement::getSearchTextForUriId($uri_id, $refresh),
|
||||
];
|
||||
return DBA::insert('post-searchindex', $search, Database::INSERT_UPDATE);
|
||||
}
|
||||
|
||||
/**
|
||||
* update a post-searchindex entry
|
||||
*
|
||||
* @param int $uri_id
|
||||
*/
|
||||
public static function update(int $uri_id)
|
||||
{
|
||||
$searchtext = Post\Engagement::getSearchTextForUriId($uri_id, true);
|
||||
return DBA::update('post-searchindex', ['searchtext' => $searchtext], ['uri-id' => $uri_id]);
|
||||
}
|
||||
|
||||
/**
|
||||
* Expire old searchindex entries
|
||||
*
|
||||
* @return void
|
||||
*/
|
||||
public static function expire()
|
||||
{
|
||||
$limit = self::searchAgeDateLimit();
|
||||
if (empty($limit)) {
|
||||
return;
|
||||
}
|
||||
DBA::delete('post-searchindex', ["`created` < ?", $limit]);
|
||||
Logger::notice('Cleared expired searchindex entries', ['limit' => $limit, 'rows' => DBA::affectedRows()]);
|
||||
}
|
||||
|
||||
public static function searchAgeDateLimit(): string
|
||||
{
|
||||
$days = DI::config()->get('system', 'search_age_days');
|
||||
if (empty($days)) {
|
||||
return '';
|
||||
}
|
||||
return DateTimeFormat::utc('now - ' . $days . ' day');
|
||||
}
|
||||
}
|
|
@ -140,6 +140,7 @@ class Site extends BaseAdmin
|
|||
$temppath = (!empty($_POST['temppath']) ? trim($_POST['temppath']) : '');
|
||||
$singleuser = (!empty($_POST['singleuser']) ? trim($_POST['singleuser']) : '');
|
||||
$only_tag_search = !empty($_POST['only_tag_search']);
|
||||
$search_age_days = (!empty($_POST['search_age_days']) ? intval($_POST['search_age_days']) : 0);
|
||||
$compute_circle_counts = !empty($_POST['compute_circle_counts']);
|
||||
$process_view = !empty($_POST['process_view']);
|
||||
$archival_days = (!empty($_POST['archival_days']) ? intval($_POST['archival_days']) : 0);
|
||||
|
@ -314,7 +315,8 @@ class Site extends BaseAdmin
|
|||
|
||||
$transactionConfig->set('system', 'temppath', $temppath);
|
||||
|
||||
$transactionConfig->set('system', 'only_tag_search' , $only_tag_search);
|
||||
$transactionConfig->set('system', 'only_tag_search', $only_tag_search);
|
||||
$transactionConfig->set('system', 'search_age_days', $search_age_days);
|
||||
$transactionConfig->set('system', 'compute_circle_counts', $compute_circle_counts);
|
||||
$transactionConfig->set('system', 'process_view', $process_view);
|
||||
$transactionConfig->set('system', 'archival_days', $archival_days);
|
||||
|
@ -567,6 +569,7 @@ class Site extends BaseAdmin
|
|||
'$itemspage_network_mobile' => ['itemspage_network_mobile', DI::l10n()->t('Items per page for mobile devices'), DI::config()->get('system', 'itemspage_network_mobile'), DI::l10n()->t('Number of items per page in stream pages (network, community, profile/contact statuses, search) for mobile devices.')],
|
||||
'$temppath' => ['temppath', DI::l10n()->t('Temp path'), DI::config()->get('system', 'temppath'), DI::l10n()->t('If you have a restricted system where the webserver can\'t access the system temp path, enter another path here.')],
|
||||
'$only_tag_search' => ['only_tag_search', DI::l10n()->t('Only search in tags'), DI::config()->get('system', 'only_tag_search'), DI::l10n()->t('On large systems the text search can slow down the system extremely.')],
|
||||
'$search_age_days' => ['search_age_days', DI::l10n()->t('Maximum age of items in the search table'), DI::config()->get('system', 'search_age_days'), DI::l10n()->t('Maximum age of items in the search table in days. Lower values will increase the performance and reduce disk usage. 0 means no age restriction.')],
|
||||
'$compute_circle_counts' => ['compute_circle_counts', DI::l10n()->t('Generate counts per contact circle when calculating network count'), DI::config()->get('system', 'compute_circle_counts'), DI::l10n()->t('On systems with users that heavily use contact circles the query can be very expensive.')],
|
||||
'$process_view' => ['process_view', DI::l10n()->t('Process "view" activities'), DI::config()->get('system', 'process_view'), DI::l10n()->t('"view" activities are mostly geberated by Peertube systems. Per default they are not processed for performance reasons. Only activate this option on performant system.')],
|
||||
'$archival_days' => ['archival_days', DI::l10n()->t('Days, after which a contact is archived'), DI::config()->get('system', 'archival_days'), DI::l10n()->t('Number of days that we try to deliver content or to update the contact data before we archive a contact.')],
|
||||
|
|
|
@ -23,7 +23,6 @@ namespace Friendica\Module\Api\Mastodon;
|
|||
|
||||
use Friendica\Core\Logger;
|
||||
use Friendica\Core\Protocol;
|
||||
use Friendica\Core\System;
|
||||
use Friendica\Database\DBA;
|
||||
use Friendica\DI;
|
||||
use Friendica\Model\Contact;
|
||||
|
@ -154,10 +153,9 @@ class Search extends BaseApi
|
|||
substr($q, 1), 0, $uid, Protocol::ACTIVITYPUB, Protocol::DFRN, Protocol::DIASPORA, Protocol::OSTATUS, $uid, 0];
|
||||
$table = 'tag-search-view';
|
||||
} else {
|
||||
$condition = ["`uri-id` IN (SELECT `uri-id` FROM `post-content` WHERE MATCH (`title`, `content-warning`, `body`) AGAINST (? IN BOOLEAN MODE))
|
||||
AND (`uid` = ? OR (`uid` = ? AND NOT `global`)) AND (`network` IN (?, ?, ?, ?) OR (`uid` = ? AND `uid` != ?))",
|
||||
str_replace('@', ' ', $q), 0, $uid, Protocol::ACTIVITYPUB, Protocol::DFRN, Protocol::DIASPORA, Protocol::OSTATUS, $uid, 0];
|
||||
$table = 'post-user-view';
|
||||
$q = Post\Engagement::escapeKeywords($q);
|
||||
$condition = ["MATCH (`searchtext`) AGAINST (? IN BOOLEAN MODE) and (private = ? OR `uri-id` in (SELECT `uri-id` FROM `post-user` where `uid` = ?))", $q, Item::PUBLIC, $uid];
|
||||
$table = 'post-searchindex';
|
||||
}
|
||||
|
||||
if (!empty($max_id)) {
|
||||
|
|
|
@ -398,11 +398,7 @@ class Timeline extends BaseModule
|
|||
}
|
||||
|
||||
if (!empty($channel->fullTextSearch)) {
|
||||
$search = $channel->fullTextSearch;
|
||||
foreach (Engagement::KEYWORDS as $keyword) {
|
||||
$search = preg_replace('~(' . $keyword . ':.[\w@\.-]+)~', '"$1"', $search);
|
||||
}
|
||||
$condition = DBA::mergeConditions($condition, ["MATCH (`searchtext`) AGAINST (? IN BOOLEAN MODE)", $search]);
|
||||
$condition = DBA::mergeConditions($condition, ["MATCH (`searchtext`) AGAINST (? IN BOOLEAN MODE)", Engagement::escapeKeywords($channel->fullTextSearch)]);
|
||||
}
|
||||
|
||||
if (!empty($channel->includeTags)) {
|
||||
|
|
|
@ -123,6 +123,8 @@ class Cron
|
|||
|
||||
Worker::add(Worker::PRIORITY_LOW, 'ExpireActivities');
|
||||
|
||||
Worker::add(Worker::PRIORITY_LOW, 'ExpireSearchIndex');
|
||||
|
||||
Worker::add(Worker::PRIORITY_LOW, 'RemoveUnusedTags');
|
||||
|
||||
Worker::add(Worker::PRIORITY_LOW, 'RemoveUnusedContacts');
|
||||
|
|
35
src/Worker/ExpireSearchIndex.php
Normal file
35
src/Worker/ExpireSearchIndex.php
Normal file
|
@ -0,0 +1,35 @@
|
|||
<?php
|
||||
/**
|
||||
* @copyright Copyright (C) 2010-2024, the Friendica project
|
||||
*
|
||||
* @license GNU AGPL version 3 or any later version
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Affero General Public License as
|
||||
* published by the Free Software Foundation, either version 3 of the
|
||||
* License, or (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Affero General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Affero General Public License
|
||||
* along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
*
|
||||
*/
|
||||
|
||||
namespace Friendica\Worker;
|
||||
|
||||
use Friendica\Model\Post;
|
||||
|
||||
/**
|
||||
* Expire old search index entries
|
||||
*/
|
||||
class ExpireSearchIndex
|
||||
{
|
||||
public static function execute($param = '', $hook_function = '')
|
||||
{
|
||||
Post\SearchIndex::expire();
|
||||
}
|
||||
}
|
|
@ -56,7 +56,7 @@ use Friendica\Database\DBA;
|
|||
|
||||
// This file is required several times during the test in DbaDefinition which justifies this condition
|
||||
if (!defined('DB_UPDATE_VERSION')) {
|
||||
define('DB_UPDATE_VERSION', 1546);
|
||||
define('DB_UPDATE_VERSION', 1547);
|
||||
}
|
||||
|
||||
return [
|
||||
|
@ -1319,7 +1319,6 @@ return [
|
|||
"PRIMARY" => ["uri-id"],
|
||||
"plink" => ["plink(191)"],
|
||||
"resource-id" => ["resource-id"],
|
||||
"title-content-warning-body" => ["FULLTEXT", "title", "content-warning", "body"],
|
||||
"quote-uri-id" => ["quote-uri-id"],
|
||||
]
|
||||
],
|
||||
|
@ -1480,6 +1479,21 @@ return [
|
|||
"PRIMARY" => ["uri-id", "id"],
|
||||
]
|
||||
],
|
||||
"post-searchindex" => [
|
||||
"comment" => "Content for all posts",
|
||||
"fields" => [
|
||||
"uri-id" => ["type" => "int unsigned", "not null" => "1", "primary" => "1", "foreign" => ["item-uri" => "id"], "comment" => "Id of the item-uri table entry that contains the item uri"],
|
||||
"network" => ["type" => "char(4)", "comment" => ""],
|
||||
"private" => ["type" => "tinyint unsigned", "comment" => "0=public, 1=private, 2=unlisted"],
|
||||
"searchtext" => ["type" => "mediumtext", "comment" => "Simplified text for the full text search"],
|
||||
"created" => ["type" => "datetime", "comment" => ""],
|
||||
],
|
||||
"indexes" => [
|
||||
"PRIMARY" => ["uri-id"],
|
||||
"created" => ["created"],
|
||||
"searchtext" => ["FULLTEXT", "searchtext"],
|
||||
]
|
||||
],
|
||||
"post-tag" => [
|
||||
"comment" => "post relation to tags",
|
||||
"fields" => [
|
||||
|
@ -1708,7 +1722,6 @@ return [
|
|||
"indexes" => [
|
||||
"PRIMARY" => ["id"],
|
||||
"uid_is-default" => ["uid", "is-default"],
|
||||
"pub_keywords" => ["FULLTEXT", "pub_keywords"],
|
||||
]
|
||||
],
|
||||
"profile_field" => [
|
||||
|
|
File diff suppressed because it is too large
Load diff
|
@ -116,6 +116,7 @@
|
|||
<h2>{{$performance}}</h2>
|
||||
{{include file="field_checkbox.tpl" field=$compute_circle_counts}}
|
||||
{{include file="field_checkbox.tpl" field=$only_tag_search}}
|
||||
{{include file="field_input.tpl" field=$search_age_days}}
|
||||
{{include file="field_input.tpl" field=$max_comments}}
|
||||
{{include file="field_input.tpl" field=$max_display_comments}}
|
||||
{{include file="field_input.tpl" field=$itemspage_network}}
|
||||
|
|
|
@ -250,6 +250,7 @@
|
|||
<div class="panel-body">
|
||||
{{include file="field_checkbox.tpl" field=$compute_circle_counts}}
|
||||
{{include file="field_checkbox.tpl" field=$only_tag_search}}
|
||||
{{include file="field_input.tpl" field=$search_age_days}}
|
||||
{{include file="field_input.tpl" field=$max_comments}}
|
||||
{{include file="field_input.tpl" field=$max_display_comments}}
|
||||
{{include file="field_input.tpl" field=$itemspage_network}}
|
||||
|
|
Loading…
Reference in a new issue