Expiry post search index entries
This commit is contained in:
parent
919f97c9a0
commit
6389133575
12 changed files with 397 additions and 299 deletions
|
@ -261,8 +261,12 @@ class BBCode
|
||||||
if (!empty($media['description']) && (stripos($text, $media['description']) === false)) {
|
if (!empty($media['description']) && (stripos($text, $media['description']) === false)) {
|
||||||
$text .= ' ' . $media['description'];
|
$text .= ' ' . $media['description'];
|
||||||
}
|
}
|
||||||
if (in_array($media['type'], [Post\Media::HTML, Post\Media::ACTIVITY]) && !empty($media['name']) && (stripos($text, $media['name']) === false)) {
|
if (in_array($media['type'], [Post\Media::HTML, Post\Media::ACTIVITY])) {
|
||||||
$text .= ' ' . $media['name'];
|
foreach (['name', 'author-name', 'publisher-name'] as $key) {
|
||||||
|
if (!empty($media[$key] && stripos($text, $media[$key]) === false)) {
|
||||||
|
$text .= ' ' . $media[$key];
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -1386,7 +1386,14 @@ class PostUpdate
|
||||||
|
|
||||||
$rows = 0;
|
$rows = 0;
|
||||||
|
|
||||||
$posts = Post::selectPosts(['uri-id', 'network', 'private'], ["`uri-id` < ? AND `gravity` IN (?, ?)", $id, Item::GRAVITY_COMMENT, Item::GRAVITY_PARENT], ['order' => ['uri-id' => true], 'limit' => 1000]);
|
$condition = ["`uri-id` < ? AND `gravity` IN (?, ?)", $id, Item::GRAVITY_COMMENT, Item::GRAVITY_PARENT];
|
||||||
|
|
||||||
|
$limit = Post\SearchIndex::searchAgeDateLimit();
|
||||||
|
if (!empty($limit)) {
|
||||||
|
DBA::mergeConditions($condition, ["`created` > ?", $limit]);
|
||||||
|
}
|
||||||
|
|
||||||
|
$posts = Post::selectPosts(['uri-id', 'network', 'private', 'created'], $condition, ['order' => ['uri-id' => true], 'limit' => 1000]);
|
||||||
|
|
||||||
if (DBA::errorNo() != 0) {
|
if (DBA::errorNo() != 0) {
|
||||||
Logger::error('Database error', ['no' => DBA::errorNo(), 'message' => DBA::errorMessage()]);
|
Logger::error('Database error', ['no' => DBA::errorNo(), 'message' => DBA::errorMessage()]);
|
||||||
|
@ -1395,7 +1402,7 @@ class PostUpdate
|
||||||
|
|
||||||
while ($post = Post::fetch($posts)) {
|
while ($post = Post::fetch($posts)) {
|
||||||
$id = $post['uri-id'];
|
$id = $post['uri-id'];
|
||||||
Post\SearchIndex::insert($post['uri-id'], $post['network'], $post['private']);
|
Post\SearchIndex::insert($post['uri-id'], $post['network'], $post['private'], $post['created'], true);
|
||||||
++$rows;
|
++$rows;
|
||||||
}
|
}
|
||||||
DBA::close($posts);
|
DBA::close($posts);
|
||||||
|
|
|
@ -1450,7 +1450,7 @@ class Item
|
||||||
$engagement_uri_id = Post\Engagement::storeFromItem($posted_item);
|
$engagement_uri_id = Post\Engagement::storeFromItem($posted_item);
|
||||||
|
|
||||||
if (in_array($posted_item['gravity'], [self::GRAVITY_PARENT, self::GRAVITY_COMMENT])) {
|
if (in_array($posted_item['gravity'], [self::GRAVITY_PARENT, self::GRAVITY_COMMENT])) {
|
||||||
Post\SearchIndex::insert($posted_item['uri-id'], $posted_item['network'], $posted_item['private']);
|
Post\SearchIndex::insert($posted_item['uri-id'], $posted_item['network'], $posted_item['private'], $posted_item['created']);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (($posted_item['gravity'] == self::GRAVITY_ACTIVITY) && ($posted_item['verb'] == Activity::ANNOUNCE) && ($posted_item['parent-uri-id'] == $posted_item['thr-parent-id'])) {
|
if (($posted_item['gravity'] == self::GRAVITY_ACTIVITY) && ($posted_item['verb'] == Activity::ANNOUNCE) && ($posted_item['parent-uri-id'] == $posted_item['thr-parent-id'])) {
|
||||||
|
|
|
@ -169,6 +169,9 @@ class Engagement
|
||||||
$post = Post::selectFirstPost(['uri-id', 'network', 'title', 'content-warning', 'body', 'private',
|
$post = Post::selectFirstPost(['uri-id', 'network', 'title', 'content-warning', 'body', 'private',
|
||||||
'author-id', 'author-contact-type', 'author-nick', 'author-addr', 'author-gsid',
|
'author-id', 'author-contact-type', 'author-nick', 'author-addr', 'author-gsid',
|
||||||
'owner-id', 'owner-contact-type', 'owner-nick', 'owner-addr', 'owner-gsid'], ['uri-id' => $uri_id]);
|
'owner-id', 'owner-contact-type', 'owner-nick', 'owner-addr', 'owner-gsid'], ['uri-id' => $uri_id]);
|
||||||
|
if (empty($post['uri-id'])) {
|
||||||
|
return '';
|
||||||
|
}
|
||||||
return self::getSearchTextForItem($post);
|
return self::getSearchTextForItem($post);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -316,7 +319,7 @@ class Engagement
|
||||||
public static function escapeKeywords(string $fullTextSearch): string
|
public static function escapeKeywords(string $fullTextSearch): string
|
||||||
{
|
{
|
||||||
foreach (Engagement::KEYWORDS as $keyword) {
|
foreach (Engagement::KEYWORDS as $keyword) {
|
||||||
$fullTextSearch = preg_replace('~(' . $keyword . '):(.[\w\*@\.-]+)~', '$1_$2', $fullTextSearch);
|
$fullTextSearch = preg_replace('~(' . $keyword . '):(.[\w\*@\.-]+)~', '"$1_$2"', $fullTextSearch);
|
||||||
}
|
}
|
||||||
return $fullTextSearch;
|
return $fullTextSearch;
|
||||||
}
|
}
|
||||||
|
|
|
@ -21,9 +21,12 @@
|
||||||
|
|
||||||
namespace Friendica\Model\Post;
|
namespace Friendica\Model\Post;
|
||||||
|
|
||||||
|
use Friendica\Core\Logger;
|
||||||
use Friendica\Database\Database;
|
use Friendica\Database\Database;
|
||||||
use Friendica\Database\DBA;
|
use Friendica\Database\DBA;
|
||||||
|
use Friendica\DI;
|
||||||
use Friendica\Model\Post;
|
use Friendica\Model\Post;
|
||||||
|
use Friendica\Util\DateTimeFormat;
|
||||||
|
|
||||||
class SearchIndex
|
class SearchIndex
|
||||||
{
|
{
|
||||||
|
@ -33,14 +36,22 @@ class SearchIndex
|
||||||
* @param int $uri_id
|
* @param int $uri_id
|
||||||
* @param string $network
|
* @param string $network
|
||||||
* @param int $private
|
* @param int $private
|
||||||
|
* @param string $created
|
||||||
|
* @param bool $refresh
|
||||||
*/
|
*/
|
||||||
public static function insert(int $uri_id, string $network, int $private)
|
public static function insert(int $uri_id, string $network, int $private, string $created, bool $refresh = false)
|
||||||
{
|
{
|
||||||
|
$limit = self::searchAgeDateLimit();
|
||||||
|
if (!empty($limit) && (strtotime($created) < strtotime($limit))) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
$search = [
|
$search = [
|
||||||
'uri-id' => $uri_id,
|
'uri-id' => $uri_id,
|
||||||
'network' => $network,
|
'network' => $network,
|
||||||
'private' => $private,
|
'private' => $private,
|
||||||
'searchtext' => Post\Engagement::getSearchTextForUriId($uri_id),
|
'created' => $created,
|
||||||
|
'searchtext' => Post\Engagement::getSearchTextForUriId($uri_id, $refresh),
|
||||||
];
|
];
|
||||||
return DBA::insert('post-searchindex', $search, Database::INSERT_UPDATE);
|
return DBA::insert('post-searchindex', $search, Database::INSERT_UPDATE);
|
||||||
}
|
}
|
||||||
|
@ -55,4 +66,23 @@ class SearchIndex
|
||||||
$searchtext = Post\Engagement::getSearchTextForUriId($uri_id, true);
|
$searchtext = Post\Engagement::getSearchTextForUriId($uri_id, true);
|
||||||
return DBA::update('post-searchindex', ['searchtext' => $searchtext], ['uri-id' => $uri_id]);
|
return DBA::update('post-searchindex', ['searchtext' => $searchtext], ['uri-id' => $uri_id]);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static function expire()
|
||||||
|
{
|
||||||
|
$limit = self::searchAgeDateLimit();
|
||||||
|
if (empty($limit)) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
DBA::delete('post-searchindex', ["`created` < ?", $limit]);
|
||||||
|
Logger::notice('Cleared expired searchindex entries', ['limit' => $limit, 'rows' => DBA::affectedRows()]);
|
||||||
|
}
|
||||||
|
|
||||||
|
public static function searchAgeDateLimit(): string
|
||||||
|
{
|
||||||
|
$days = DI::config()->get('system', 'search_age_days');
|
||||||
|
if (empty($days)) {
|
||||||
|
return '';
|
||||||
|
}
|
||||||
|
return DateTimeFormat::utc('now - ' . $days . ' day');
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -140,6 +140,7 @@ class Site extends BaseAdmin
|
||||||
$temppath = (!empty($_POST['temppath']) ? trim($_POST['temppath']) : '');
|
$temppath = (!empty($_POST['temppath']) ? trim($_POST['temppath']) : '');
|
||||||
$singleuser = (!empty($_POST['singleuser']) ? trim($_POST['singleuser']) : '');
|
$singleuser = (!empty($_POST['singleuser']) ? trim($_POST['singleuser']) : '');
|
||||||
$only_tag_search = !empty($_POST['only_tag_search']);
|
$only_tag_search = !empty($_POST['only_tag_search']);
|
||||||
|
$search_age_days = (!empty($_POST['search_age_days']) ? intval($_POST['search_age_days']) : 0);
|
||||||
$compute_circle_counts = !empty($_POST['compute_circle_counts']);
|
$compute_circle_counts = !empty($_POST['compute_circle_counts']);
|
||||||
$process_view = !empty($_POST['process_view']);
|
$process_view = !empty($_POST['process_view']);
|
||||||
$archival_days = (!empty($_POST['archival_days']) ? intval($_POST['archival_days']) : 0);
|
$archival_days = (!empty($_POST['archival_days']) ? intval($_POST['archival_days']) : 0);
|
||||||
|
@ -315,6 +316,7 @@ class Site extends BaseAdmin
|
||||||
$transactionConfig->set('system', 'temppath', $temppath);
|
$transactionConfig->set('system', 'temppath', $temppath);
|
||||||
|
|
||||||
$transactionConfig->set('system', 'only_tag_search', $only_tag_search);
|
$transactionConfig->set('system', 'only_tag_search', $only_tag_search);
|
||||||
|
$transactionConfig->set('system', 'search_age_days', $search_age_days);
|
||||||
$transactionConfig->set('system', 'compute_circle_counts', $compute_circle_counts);
|
$transactionConfig->set('system', 'compute_circle_counts', $compute_circle_counts);
|
||||||
$transactionConfig->set('system', 'process_view', $process_view);
|
$transactionConfig->set('system', 'process_view', $process_view);
|
||||||
$transactionConfig->set('system', 'archival_days', $archival_days);
|
$transactionConfig->set('system', 'archival_days', $archival_days);
|
||||||
|
@ -567,6 +569,7 @@ class Site extends BaseAdmin
|
||||||
'$itemspage_network_mobile' => ['itemspage_network_mobile', DI::l10n()->t('Items per page for mobile devices'), DI::config()->get('system', 'itemspage_network_mobile'), DI::l10n()->t('Number of items per page in stream pages (network, community, profile/contact statuses, search) for mobile devices.')],
|
'$itemspage_network_mobile' => ['itemspage_network_mobile', DI::l10n()->t('Items per page for mobile devices'), DI::config()->get('system', 'itemspage_network_mobile'), DI::l10n()->t('Number of items per page in stream pages (network, community, profile/contact statuses, search) for mobile devices.')],
|
||||||
'$temppath' => ['temppath', DI::l10n()->t('Temp path'), DI::config()->get('system', 'temppath'), DI::l10n()->t('If you have a restricted system where the webserver can\'t access the system temp path, enter another path here.')],
|
'$temppath' => ['temppath', DI::l10n()->t('Temp path'), DI::config()->get('system', 'temppath'), DI::l10n()->t('If you have a restricted system where the webserver can\'t access the system temp path, enter another path here.')],
|
||||||
'$only_tag_search' => ['only_tag_search', DI::l10n()->t('Only search in tags'), DI::config()->get('system', 'only_tag_search'), DI::l10n()->t('On large systems the text search can slow down the system extremely.')],
|
'$only_tag_search' => ['only_tag_search', DI::l10n()->t('Only search in tags'), DI::config()->get('system', 'only_tag_search'), DI::l10n()->t('On large systems the text search can slow down the system extremely.')],
|
||||||
|
'$search_age_days' => ['search_age_days', DI::l10n()->t('Maximum age of items in the search table'), DI::config()->get('system', 'search_age_days'), DI::l10n()->t('Maximum age of items in the search table in days. Lower values will increase the performance and reduce disk usage. 0 means no age restriction.')],
|
||||||
'$compute_circle_counts' => ['compute_circle_counts', DI::l10n()->t('Generate counts per contact circle when calculating network count'), DI::config()->get('system', 'compute_circle_counts'), DI::l10n()->t('On systems with users that heavily use contact circles the query can be very expensive.')],
|
'$compute_circle_counts' => ['compute_circle_counts', DI::l10n()->t('Generate counts per contact circle when calculating network count'), DI::config()->get('system', 'compute_circle_counts'), DI::l10n()->t('On systems with users that heavily use contact circles the query can be very expensive.')],
|
||||||
'$process_view' => ['process_view', DI::l10n()->t('Process "view" activities'), DI::config()->get('system', 'process_view'), DI::l10n()->t('"view" activities are mostly geberated by Peertube systems. Per default they are not processed for performance reasons. Only activate this option on performant system.')],
|
'$process_view' => ['process_view', DI::l10n()->t('Process "view" activities'), DI::config()->get('system', 'process_view'), DI::l10n()->t('"view" activities are mostly geberated by Peertube systems. Per default they are not processed for performance reasons. Only activate this option on performant system.')],
|
||||||
'$archival_days' => ['archival_days', DI::l10n()->t('Days, after which a contact is archived'), DI::config()->get('system', 'archival_days'), DI::l10n()->t('Number of days that we try to deliver content or to update the contact data before we archive a contact.')],
|
'$archival_days' => ['archival_days', DI::l10n()->t('Days, after which a contact is archived'), DI::config()->get('system', 'archival_days'), DI::l10n()->t('Number of days that we try to deliver content or to update the contact data before we archive a contact.')],
|
||||||
|
|
|
@ -123,6 +123,8 @@ class Cron
|
||||||
|
|
||||||
Worker::add(Worker::PRIORITY_LOW, 'ExpireActivities');
|
Worker::add(Worker::PRIORITY_LOW, 'ExpireActivities');
|
||||||
|
|
||||||
|
Worker::add(Worker::PRIORITY_LOW, 'ExpireSearchIndex');
|
||||||
|
|
||||||
Worker::add(Worker::PRIORITY_LOW, 'RemoveUnusedTags');
|
Worker::add(Worker::PRIORITY_LOW, 'RemoveUnusedTags');
|
||||||
|
|
||||||
Worker::add(Worker::PRIORITY_LOW, 'RemoveUnusedContacts');
|
Worker::add(Worker::PRIORITY_LOW, 'RemoveUnusedContacts');
|
||||||
|
|
35
src/Worker/ExpireSearchIndex.php
Normal file
35
src/Worker/ExpireSearchIndex.php
Normal file
|
@ -0,0 +1,35 @@
|
||||||
|
<?php
|
||||||
|
/**
|
||||||
|
* @copyright Copyright (C) 2010-2024, the Friendica project
|
||||||
|
*
|
||||||
|
* @license GNU AGPL version 3 or any later version
|
||||||
|
*
|
||||||
|
* This program is free software: you can redistribute it and/or modify
|
||||||
|
* it under the terms of the GNU Affero General Public License as
|
||||||
|
* published by the Free Software Foundation, either version 3 of the
|
||||||
|
* License, or (at your option) any later version.
|
||||||
|
*
|
||||||
|
* This program is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
* GNU Affero General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU Affero General Public License
|
||||||
|
* along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
|
||||||
|
namespace Friendica\Worker;
|
||||||
|
|
||||||
|
use Friendica\Model\Post;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Expire old search index entries
|
||||||
|
*/
|
||||||
|
class ExpireSearchIndex
|
||||||
|
{
|
||||||
|
public static function execute($param = '', $hook_function = '')
|
||||||
|
{
|
||||||
|
Post\SearchIndex::expire();
|
||||||
|
}
|
||||||
|
}
|
|
@ -1487,9 +1487,11 @@ return [
|
||||||
"network" => ["type" => "char(4)", "comment" => ""],
|
"network" => ["type" => "char(4)", "comment" => ""],
|
||||||
"private" => ["type" => "tinyint unsigned", "comment" => "0=public, 1=private, 2=unlisted"],
|
"private" => ["type" => "tinyint unsigned", "comment" => "0=public, 1=private, 2=unlisted"],
|
||||||
"searchtext" => ["type" => "mediumtext", "comment" => "Simplified text for the full text search"],
|
"searchtext" => ["type" => "mediumtext", "comment" => "Simplified text for the full text search"],
|
||||||
|
"created" => ["type" => "datetime", "comment" => ""],
|
||||||
],
|
],
|
||||||
"indexes" => [
|
"indexes" => [
|
||||||
"PRIMARY" => ["uri-id"],
|
"PRIMARY" => ["uri-id"],
|
||||||
|
"created" => ["created"],
|
||||||
"searchtext" => ["FULLTEXT", "searchtext"],
|
"searchtext" => ["FULLTEXT", "searchtext"],
|
||||||
]
|
]
|
||||||
],
|
],
|
||||||
|
|
File diff suppressed because it is too large
Load diff
|
@ -116,6 +116,7 @@
|
||||||
<h2>{{$performance}}</h2>
|
<h2>{{$performance}}</h2>
|
||||||
{{include file="field_checkbox.tpl" field=$compute_circle_counts}}
|
{{include file="field_checkbox.tpl" field=$compute_circle_counts}}
|
||||||
{{include file="field_checkbox.tpl" field=$only_tag_search}}
|
{{include file="field_checkbox.tpl" field=$only_tag_search}}
|
||||||
|
{{include file="field_input.tpl" field=$search_age_days}}
|
||||||
{{include file="field_input.tpl" field=$max_comments}}
|
{{include file="field_input.tpl" field=$max_comments}}
|
||||||
{{include file="field_input.tpl" field=$max_display_comments}}
|
{{include file="field_input.tpl" field=$max_display_comments}}
|
||||||
{{include file="field_input.tpl" field=$itemspage_network}}
|
{{include file="field_input.tpl" field=$itemspage_network}}
|
||||||
|
|
|
@ -250,6 +250,7 @@
|
||||||
<div class="panel-body">
|
<div class="panel-body">
|
||||||
{{include file="field_checkbox.tpl" field=$compute_circle_counts}}
|
{{include file="field_checkbox.tpl" field=$compute_circle_counts}}
|
||||||
{{include file="field_checkbox.tpl" field=$only_tag_search}}
|
{{include file="field_checkbox.tpl" field=$only_tag_search}}
|
||||||
|
{{include file="field_input.tpl" field=$search_age_days}}
|
||||||
{{include file="field_input.tpl" field=$max_comments}}
|
{{include file="field_input.tpl" field=$max_comments}}
|
||||||
{{include file="field_input.tpl" field=$max_display_comments}}
|
{{include file="field_input.tpl" field=$max_display_comments}}
|
||||||
{{include file="field_input.tpl" field=$itemspage_network}}
|
{{include file="field_input.tpl" field=$itemspage_network}}
|
||||||
|
|
Loading…
Reference in a new issue