Compare commits

...

39 commits

Author SHA1 Message Date
Michael Vogel
25f2ad1b97
Merge pull request #13838 from MrPetovan/task/refactor-throwaway-fulltext-search
Refactor user-defined channel match
2024-01-29 16:53:41 +01:00
Hypolite Petovan
391e41d6d6 Fix exists() condition in DisposableFullTextSearch 2024-01-29 07:37:40 -05:00
Hypolite Petovan
50cc2be3d1 Improve uniqueness loop by adding an exists() call in DisposableFullTextSearch 2024-01-29 06:56:30 -05:00
Hypolite Petovan
9af5abaf0b
Merge pull request #13858 from annando/tags
Use unified functions for tag string handling
2024-01-28 23:14:09 -05:00
Michael
25f45cf116 Logging improved 2024-01-28 12:22:58 +00:00
Michael
b948b2c7cb Added tests 2024-01-28 06:56:25 +00:00
Michael
d2702dfe9c Use centralized functions for tag string handling 2024-01-28 06:32:55 +00:00
Hypolite Petovan
7397b38763 Ensure identifier uniqueness in Disposable FullTextSearch 2024-01-27 11:33:28 -05:00
Hypolite Petovan
9e738253e7
Merge pull request #13856 from annando/spam2
Compare lengths of hashtags with the content length for improved spam detection
2024-01-26 18:59:38 -05:00
Michael Vogel
120044f3da
Update src/Protocol/Relay.php
Co-authored-by: Hypolite Petovan <hypolite@mrpetovan.com>
2024-01-27 00:13:14 +01:00
Hypolite Petovan
142a563a80
Merge pull request #13857 from annando/discover
New channel option "discover"
2024-01-26 17:39:44 -05:00
Hypolite Petovan
abd5768044 Add documentation to DisposableFullTextSearch->idenfier field 2024-01-26 16:48:55 -05:00
Michael
f1173853f3 Merge remote-tracking branch 'upstream/develop' into discover 2024-01-26 13:54:25 +00:00
Michael
ef8461733b The "nosharer" widget is added to the network 2024-01-26 13:51:20 +00:00
Michael
b8f80a8d25 Compare lengths of hashtags with the content length for improved spam detection 2024-01-26 11:22:01 +00:00
Hypolite Petovan
c19af39d8a
Merge pull request #13853 from annando/error
Fixes "Counts::update(): Argument #3 ($vid) must be of type int, null given"
2024-01-25 18:12:59 -05:00
Michael Vogel
e1381cfc5c
Update src/Model/Post/Counts.php
Co-authored-by: Hypolite Petovan <hypolite@mrpetovan.com>
2024-01-25 22:16:38 +01:00
Hypolite Petovan
efe1ea5c3c
Merge pull request #13850 from annando/reaction
Fix "Data too long for column 'reaction' at row 1"
2024-01-25 15:12:50 -05:00
Michael Vogel
09edf251ee
Anti spam measures against hashtag spam (#13855) 2024-01-25 19:41:07 +01:00
Michael
dcb6fa32a1 Fixes "Counts::update(): Argument #3 ($vid) must be of type int, null given" 2024-01-25 13:20:50 +00:00
Michael
08ee1e1f63 New channel option "discover" 2024-01-25 10:50:28 +00:00
Hypolite Petovan
b3e4b9675a
Merge pull request #13851 from annando/account-overview
The account overview is now grouped by the account type
2024-01-24 14:49:14 -05:00
Hypolite Petovan
0ee87b8116 Refactor user-defined channel match
- Remove disposable full-text search feature from UserDefinedChannels repository
- Fix return value of Repository\UserDefinedChannel->match
2024-01-24 14:46:07 -05:00
Hypolite Petovan
1191024609 Add throwaway full-text search feature in a dedicated Database class
- Add explicit return type to UserDefinedChannels->current() to help IDE auto-completion
2024-01-24 14:42:45 -05:00
Michael
8f9de98c35 Updated messages.po 2024-01-23 22:11:34 +00:00
Michael
dc8b87e008 The account overview is now grouped by the account type 2024-01-23 22:05:14 +00:00
Michael
03e831b607 Fix "Data too long for column 'reaction' at row 1" 2024-01-23 09:59:47 +00:00
Hypolite Petovan
2e5046f8c3
Merge pull request #13846 from annando/search
Improved search results and performance by adding a separate search index table
2024-01-22 22:31:20 -05:00
Michael
7b638617f1 Merge remote-tracking branch 'upstream/develop' into search 2024-01-22 14:09:29 +00:00
Hypolite Petovan
ec6ff7380e
Merge pull request #13849 from annando/title
Import the image attribut "title"
2024-01-22 08:21:26 -05:00
Hypolite Petovan
a0f7625c34
Merge pull request #13848 from annando/getbodystring
Changed leftover "getBody" to "getBodyString"
2024-01-22 08:20:37 -05:00
Michael
7e88b703d8 Import the image attribut "title" 2024-01-22 07:47:38 +00:00
Michael
ad6976a407 Changed leftover "getBody" to "getBodyString" 2024-01-22 07:38:05 +00:00
Michael
39e1f2c0fc Code standards 2024-01-21 17:05:18 +00:00
Michael
5a33a494e8 Updated full text indexes 2024-01-21 16:39:28 +00:00
Michael
6389133575 Expiry post search index entries 2024-01-21 16:24:59 +00:00
Michael
919f97c9a0 Postupdate added 2024-01-17 21:10:33 +00:00
Michael
e9f7ea0afa New search keyword "language" 2024-01-17 20:25:49 +00:00
Michael
ee9a68e40c New table "post-searchindex" 2024-01-17 19:46:22 +00:00
40 changed files with 970 additions and 547 deletions

View file

@ -1,6 +1,6 @@
-- ------------------------------------------
-- Friendica 2024.03-dev (Yellow Archangel)
-- DB_UPDATE_VERSION 1546
-- DB_UPDATE_VERSION 1548
-- ------------------------------------------
@ -1243,7 +1243,7 @@ CREATE TABLE IF NOT EXISTS `post-category` (
CREATE TABLE IF NOT EXISTS `post-counts` (
`uri-id` int unsigned NOT NULL COMMENT 'Id of the item-uri table entry that contains the item uri',
`vid` smallint unsigned NOT NULL COMMENT 'Id of the verb table entry that contains the activity verbs',
`reaction` varchar(1) NOT NULL COMMENT 'Emoji Reaction',
`reaction` varchar(4) NOT NULL COMMENT 'Emoji Reaction',
`parent-uri-id` int unsigned COMMENT 'Id of the item-uri table that contains the parent uri',
`count` int unsigned DEFAULT 0 COMMENT 'Number of activities',
PRIMARY KEY(`uri-id`,`vid`,`reaction`),
@ -1293,7 +1293,6 @@ CREATE TABLE IF NOT EXISTS `post-content` (
PRIMARY KEY(`uri-id`),
INDEX `plink` (`plink`(191)),
INDEX `resource-id` (`resource-id`),
FULLTEXT INDEX `title-content-warning-body` (`title`,`content-warning`,`body`),
INDEX `quote-uri-id` (`quote-uri-id`),
FOREIGN KEY (`uri-id`) REFERENCES `item-uri` (`id`) ON UPDATE RESTRICT ON DELETE CASCADE,
FOREIGN KEY (`quote-uri-id`) REFERENCES `item-uri` (`id`) ON UPDATE RESTRICT ON DELETE CASCADE
@ -1460,6 +1459,21 @@ CREATE TABLE IF NOT EXISTS `post-question-option` (
FOREIGN KEY (`uri-id`) REFERENCES `item-uri` (`id`) ON UPDATE RESTRICT ON DELETE CASCADE
) DEFAULT COLLATE utf8mb4_general_ci COMMENT='Question option';
--
-- TABLE post-searchindex
--
CREATE TABLE IF NOT EXISTS `post-searchindex` (
`uri-id` int unsigned NOT NULL COMMENT 'Id of the item-uri table entry that contains the item uri',
`network` char(4) COMMENT '',
`private` tinyint unsigned COMMENT '0=public, 1=private, 2=unlisted',
`searchtext` mediumtext COMMENT 'Simplified text for the full text search',
`created` datetime COMMENT '',
PRIMARY KEY(`uri-id`),
INDEX `created` (`created`),
FULLTEXT INDEX `searchtext` (`searchtext`),
FOREIGN KEY (`uri-id`) REFERENCES `item-uri` (`id`) ON UPDATE RESTRICT ON DELETE CASCADE
) DEFAULT COLLATE utf8mb4_general_ci COMMENT='Content for all posts';
--
-- TABLE post-tag
--
@ -1711,7 +1725,6 @@ CREATE TABLE IF NOT EXISTS `profile` (
`net-publish` boolean NOT NULL DEFAULT '0' COMMENT 'publish profile in global directory',
PRIMARY KEY(`id`),
INDEX `uid_is-default` (`uid`,`is-default`),
FULLTEXT INDEX `pub_keywords` (`pub_keywords`),
FOREIGN KEY (`uid`) REFERENCES `user` (`uid`) ON UPDATE RESTRICT ON DELETE CASCADE
) DEFAULT COLLATE utf8mb4_general_ci COMMENT='user profiles data';

View file

@ -25,6 +25,10 @@ Predefined Channels
* Posts from people you interact with on a more than average level.
* Posts from the accounts that you follow with a more than average number of interactions-
* Posts from accounts where you activated "notify on new posts" or where you have set the channel frequency accordingly.
* Discover: Posts from contacts you don't follow, but that might be of interest for you to follow. In detail, it consists of:
* Posts from people you don't follow but you interact with on a more than average level.
* Posts from people you don't follow but that interact with you on a more than average level.
* Popular posts from people you don't follow but you interacted with or who interacted with you on any level.
* What's Hot: Posts with a more than average number of interactions.
* Language: Posts in your language.
* Followers: Posts from your followers that you don't follow.
@ -80,6 +84,7 @@ Additionally to the search for content, there are additional keywords that can b
* visibility:public
* visibility:unlisted
* visibility:private
* language - Use "language:code" to search for posts with the given language in the [ISO 639-1](https://en.wikipedia.org/wiki/ISO_639-1) format.
Remember that you can combine these kerywords.
So for example you can create a channel with all posts that talk about the Fediverse - that aren't posted in the Fediverse with the search terms: "fediverse -network:apub -network:dfrn"

View file

@ -70,6 +70,7 @@ Database Tables
| [post-media](help/database/db_post-media) | Attached media |
| [post-question](help/database/db_post-question) | Question |
| [post-question-option](help/database/db_post-question-option) | Question option |
| [post-searchindex](help/database/db_post-searchindex) | Content for all posts |
| [post-tag](help/database/db_post-tag) | post relation to tags |
| [post-thread](help/database/db_post-thread) | Thread related data |
| [post-thread-user](help/database/db_post-thread-user) | Thread related data per user |

View file

@ -31,11 +31,10 @@ Indexes
------------
| Name | Fields |
| -------------------------- | -------------------------------------- |
| ------------ | ------------ |
| PRIMARY | uri-id |
| plink | plink(191) |
| resource-id | resource-id |
| title-content-warning-body | FULLTEXT, title, content-warning, body |
| quote-uri-id | quote-uri-id |
Foreign Keys

View file

@ -10,7 +10,7 @@ Fields
| ------------- | ----------------------------------------------------------- | ----------------- | ---- | --- | ------- | ----- |
| uri-id | Id of the item-uri table entry that contains the item uri | int unsigned | NO | PRI | NULL | |
| vid | Id of the verb table entry that contains the activity verbs | smallint unsigned | NO | PRI | NULL | |
| reaction | Emoji Reaction | varchar(1) | NO | PRI | NULL | |
| reaction | Emoji Reaction | varchar(4) | NO | PRI | NULL | |
| parent-uri-id | Id of the item-uri table that contains the parent uri | int unsigned | YES | | NULL | |
| count | Number of activities | int unsigned | YES | | 0 | |

View file

@ -0,0 +1,33 @@
Table post-searchindex
===========
Content for all posts
Fields
------
| Field | Description | Type | Null | Key | Default | Extra |
| ---------- | --------------------------------------------------------- | ---------------- | ---- | --- | ------- | ----- |
| uri-id | Id of the item-uri table entry that contains the item uri | int unsigned | NO | PRI | NULL | |
| network | | char(4) | YES | | NULL | |
| private | 0=public, 1=private, 2=unlisted | tinyint unsigned | YES | | NULL | |
| searchtext | Simplified text for the full text search | mediumtext | YES | | NULL | |
| created | | datetime | YES | | NULL | |
Indexes
------------
| Name | Fields |
| ---------- | -------------------- |
| PRIMARY | uri-id |
| created | created |
| searchtext | FULLTEXT, searchtext |
Foreign Keys
------------
| Field | Target Table | Target Field |
|-------|--------------|--------------|
| uri-id | [item-uri](help/database/db_item-uri) | id |
Return to [database documentation](help/database)

View file

@ -57,10 +57,9 @@ Indexes
------------
| Name | Fields |
| -------------- | ---------------------- |
| -------------- | --------------- |
| PRIMARY | id |
| uid_is-default | uid, is-default |
| pub_keywords | FULLTEXT, pub_keywords |
Foreign Keys
------------

View file

@ -21,6 +21,12 @@
namespace Friendica\Content\Conversation\Collection;
use Friendica\Content\Conversation\Entity;
class UserDefinedChannels extends Timelines
{
public function current(): Entity\UserDefinedChannel
{
return parent::current();
}
}

View file

@ -25,6 +25,7 @@ class Channel extends Timeline
{
const WHATSHOT = 'whatshot';
const FORYOU = 'foryou';
const DISCOVER = 'discover';
const FOLLOWERS = 'followers';
const SHARERSOFSHARERS = 'sharersofsharers';
const IMAGE = 'image';

View file

@ -40,6 +40,7 @@ final class Channel extends Timeline
$tabs = [
new ChannelEntity(ChannelEntity::FORYOU, $this->l10n->t('For you'), $this->l10n->t('Posts from contacts you interact with and who interact with you'), 'y'),
new ChannelEntity(ChannelEntity::DISCOVER, $this->l10n->t('Discover'), $this->l10n->t('Posts from accounts that you don\'t follow, but that you might like.'), 'o'),
new ChannelEntity(ChannelEntity::WHATSHOT, $this->l10n->t('What\'s Hot'), $this->l10n->t('Posts with a lot of interactions'), 'h'),
new ChannelEntity(ChannelEntity::LANGUAGE, $native, $this->l10n->t('Posts in %s', $native), 'g'),
new ChannelEntity(ChannelEntity::FOLLOWERS, $this->l10n->t('Followers'), $this->l10n->t('Posts from your followers that you don\'t follow'), 'f'),
@ -54,6 +55,6 @@ final class Channel extends Timeline
public function isTimeline(string $selectedTab): bool
{
return in_array($selectedTab, [ChannelEntity::WHATSHOT, ChannelEntity::FORYOU, ChannelEntity::FOLLOWERS, ChannelEntity::SHARERSOFSHARERS, ChannelEntity::IMAGE, ChannelEntity::VIDEO, ChannelEntity::AUDIO, ChannelEntity::LANGUAGE]);
return in_array($selectedTab, [ChannelEntity::WHATSHOT, ChannelEntity::FORYOU, ChannelEntity::DISCOVER, ChannelEntity::FOLLOWERS, ChannelEntity::SHARERSOFSHARERS, ChannelEntity::IMAGE, ChannelEntity::VIDEO, ChannelEntity::AUDIO, ChannelEntity::LANGUAGE]);
}
}

View file

@ -28,6 +28,7 @@ use Friendica\Content\Conversation\Factory;
use Friendica\Core\Config\Capability\IManageConfigValues;
use Friendica\Database\Database;
use Friendica\Database\DBA;
use Friendica\Database\DisposableFullTextSearch;
use Friendica\Model\Contact;
use Friendica\Model\Post\Engagement;
use Friendica\Model\User;
@ -38,8 +39,7 @@ class UserDefinedChannel extends \Friendica\BaseRepository
{
protected static $table_name = 'channel';
/** @var IManageConfigValues */
private $config;
private IManageConfigValues $config;
public function __construct(Database $database, LoggerInterface $logger, Factory\UserDefinedChannel $factory, IManageConfigValues $config)
{
@ -156,21 +156,22 @@ class UserDefinedChannel extends \Friendica\BaseRepository
return true;
}
return $this->db->select('check-full-text-search', [], ["`pid` = ? AND MATCH (`searchtext`) AGAINST (? IN BOOLEAN MODE)", getmypid(), $this->escapeKeywords($searchtext)]) !== false;
return $this->db->select('check-full-text-search', [], ["`pid` = ? AND MATCH (`searchtext`) AGAINST (? IN BOOLEAN MODE)", getmypid(), Engagement::escapeKeywords($searchtext)]) !== false;
}
/**
* Checks, if one of the user defined channels matches with the given search text or languages
* Checks if one of the user-defined channels matches the given language or item text via full-text search
*
* @param string $searchtext
* @param string $haystack
* @param string $language
* @return boolean
* @throws \Exception
*/
public function match(string $searchtext, string $language): bool
public function match(string $haystack, string $language): bool
{
$users = $this->db->selectToArray('user', ['uid'], $this->getUserCondition());
if (empty($users)) {
return [];
return false;
}
$uids = array_column($users, 'uid');
@ -189,15 +190,11 @@ class UserDefinedChannel extends \Friendica\BaseRepository
$search .= '(' . $channel->fullTextSearch . ') ';
}
$this->insertCheckFullTextSearch($searchtext);
$result = $this->inFulltext($search);
$this->deleteCheckFullTextSearch();
return $result;
return (new DisposableFullTextSearch($this->db, $haystack))->match(Engagement::escapeKeywords($search));
}
/**
* Fetch the channel users that have got matching channels
* List the IDs of the relay/group users that have matching user-defined channels based on an item details
*
* @param string $searchtext
* @param string $language
@ -206,6 +203,7 @@ class UserDefinedChannel extends \Friendica\BaseRepository
* @param int $owner_id
* @param int $reshare_id
* @return array
* @throws \Exception
*/
public function getMatchingChannelUsers(string $searchtext, string $language, array $tags, int $media_type, int $owner_id, int $reshare_id): array
{
@ -221,62 +219,53 @@ class UserDefinedChannel extends \Friendica\BaseRepository
return [];
}
$this->insertCheckFullTextSearch($searchtext);
$disposableFullTextSearch = new DisposableFullTextSearch($this->db, $searchtext);
$uids = [];
$filteredChannels = $this->select(['uid' => array_column($users, 'uid'), 'publish' => true, 'valid' => true])->filter(
function (Entity\UserDefinedChannel $channel) use ($owner_id, $reshare_id, $language, $tags, $media_type, $disposableFullTextSearch, $searchtext) {
static $uids = [];
foreach ($this->select(['uid' => array_column($users, 'uid'), 'publish' => true, 'valid' => true]) as $channel) {
// Filter out channels from already picked users
if (in_array($channel->uid, $uids)) {
continue;
return false;
}
if (!empty($channel->circle) && ($channel->circle > 0) && !in_array($channel->uid, $uids)) {
if (!$this->inCircle($channel->circle, $channel->uid, $owner_id) && !$this->inCircle($channel->circle, $channel->uid, $reshare_id)) {
continue;
if (
($channel->circle ?? 0)
&& !$this->inCircle($channel->circle, $channel->uid, $owner_id)
&& !$this->inCircle($channel->circle, $channel->uid, $reshare_id)
) {
return false;
}
if (!in_array($language, $channel->languages ?: User::getWantedLanguages($channel->uid))) {
return false;
}
if (!empty($channel->languages) && !in_array($channel->uid, $uids)) {
if (!in_array($language, $channel->languages)) {
continue;
if ($channel->includeTags && !$this->inTaglist($channel->includeTags, $tags)) {
return false;
}
} elseif (!in_array($language, User::getWantedLanguages($channel->uid))) {
continue;
if ($channel->excludeTags && $this->inTaglist($channel->excludeTags, $tags)) {
return false;
}
if (!empty($channel->includeTags) && !in_array($channel->uid, $uids)) {
if (!$this->inTaglist($channel->includeTags, $tags)) {
continue;
}
}
if (!empty($channel->excludeTags) && !in_array($channel->uid, $uids)) {
if ($this->inTaglist($channel->excludeTags, $tags)) {
continue;
}
}
if (!empty($channel->mediaType) && !in_array($channel->uid, $uids)) {
if (!($channel->mediaType & $media_type)) {
continue;
}
}
if (!empty($channel->fullTextSearch) && !in_array($channel->uid, $uids)) {
if (!$this->inFulltext($channel->fullTextSearch)) {
continue;
if ($channel->mediaType && !($channel->mediaType & $media_type)) {
return false;
}
if ($channel->fullTextSearch && !$disposableFullTextSearch->match(Engagement::escapeKeywords($channel->fullTextSearch))) {
return false;
}
$uids[] = $channel->uid;
$this->logger->debug('Matching channel found.', ['uid' => $channel->uid, 'label' => $channel->label, 'language' => $language, 'tags' => $tags, 'media_type' => $media_type, 'searchtext' => $searchtext]);
}
$this->deleteCheckFullTextSearch();
return $uids;
return true;
}
);
private function insertCheckFullTextSearch(string $searchtext)
{
$this->db->insert('check-full-text-search', ['pid' => getmypid(), 'searchtext' => $searchtext], Database::INSERT_UPDATE);
}
private function deleteCheckFullTextSearch()
{
$this->db->delete('check-full-text-search', ['pid' => getmypid()]);
return $filteredChannels->column('uid');
}
private function inCircle(int $circleId, int $uid, int $cid): bool
@ -308,20 +297,7 @@ class UserDefinedChannel extends \Friendica\BaseRepository
return false;
}
private function inFulltext(string $fullTextSearch): bool
{
return $this->db->exists('check-full-text-search', ["`pid` = ? AND MATCH (`searchtext`) AGAINST (? IN BOOLEAN MODE)", getmypid(), $this->escapeKeywords($fullTextSearch)]);
}
private function escapeKeywords(string $fullTextSearch): string
{
foreach (Engagement::KEYWORDS as $keyword) {
$fullTextSearch = preg_replace('~(' . $keyword . ':.[\w@\.-]+)~', '"$1"', $fullTextSearch);
}
return $fullTextSearch;
}
private function getUserCondition()
private function getUserCondition(): array
{
$condition = ["`verified` AND NOT `blocked` AND NOT `account_removed` AND NOT `account_expired` AND `user`.`uid` > ?", 0];

View file

@ -255,12 +255,19 @@ class BBCode
// Removes attachments
$text = self::removeAttachment($text);
// Add images because of possible alt texts
// Add text from attached media
if (!empty($uri_id)) {
$text = Post\Media::addAttachmentsToBody($uri_id, $text, [Post\Media::IMAGE]);
foreach (Post\Media::getByURIId($uri_id, [Post\Media::HTML]) as $media) {
$text .= ' ' . $media['name'] . ' ' . $media['description'];
foreach (Post\Media::getByURIId($uri_id) as $media) {
if (!empty($media['description']) && (stripos($text, $media['description']) === false)) {
$text .= ' ' . $media['description'];
}
if (in_array($media['type'], [Post\Media::HTML, Post\Media::ACTIVITY])) {
foreach (['name', 'author-name', 'publisher-name'] as $key) {
if (!empty($media[$key] && stripos($text, $media[$key]) === false)) {
$text .= ' ' . $media[$key];
}
}
}
}
}

View file

@ -302,6 +302,7 @@ class HTML
self::tagToBBCode($doc, 'a', ['href' => '/(.+)/'], '[url=$1]', '[/url]');
self::tagToBBCode($doc, 'img', ['src' => '/(.+)/', 'alt' => '/(.+)/'], '[img=$1]$2', '[/img]', true);
self::tagToBBCode($doc, 'img', ['src' => '/(.+)/', 'title' => '/(.+)/'], '[img=$1]$2', '[/img]', true);
self::tagToBBCode($doc, 'img', ['src' => '/(.+)/', 'width' => '/(\d+)/', 'height' => '/(\d+)/'], '[img=$2x$3]$1', '[/img]', true);
self::tagToBBCode($doc, 'img', ['src' => '/(.+)/'], '[img]$1', '[/img]', true);

View file

@ -0,0 +1,67 @@
<?php
/**
* @copyright Copyright (C) 2010-2024, the Friendica project
*
* @license GNU AGPL version 3 or any later version
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*
*/
namespace Friendica\Database;
/**
* Full-text search on a haystack string that isn't present in the database.
* The haystack is inserted in a temporary table with a FULLTEXT index, then any number of
* matches can be performed on it before the row is deleted when the class instance is destroyed,
* either manually or at the end of the script at the latest.
*/
class DisposableFullTextSearch
{
private Database $db;
/** @var int Unique identifier of the haystack in the database. */
private int $identifier;
public function __construct(Database $database, string $haystack)
{
$this->db = $database;
// Unique identifier generation. Two DisposableFullTextSearch object should never have the same as the first object destruction
// would delete both check-full-text-search rows before the second object destruction is called, leading to unexpected behavior.
do {
// Maximum value is indicated by the INT UNSIGNED type of the check-full-text-search.pid field
$this->identifier = random_int(0, pow(2, 32) - 1);
} while ($this->db->exists('check-full-text-search', ['pid' => $this->identifier]));
// If the `exists()` call fails and return false because the database is unavailable, the `insert()` call will likely fail as well, which means
// all subsequent calls to `match()` will return false because the haystack won't have been inserted.
// However, at this point there may be bigger problems to worry about.
$this->db->insert('check-full-text-search', ['pid' => $this->identifier, 'searchtext' => $haystack]);
}
public function __destruct()
{
$this->db->delete('check-full-text-search', ['pid' => $this->identifier]);
}
/**
* @param string $needle Boolean mode search string
* @return bool
* @throws \Exception
*/
public function match(string $needle): bool
{
return $this->db->exists('check-full-text-search', ["`pid` = ? AND MATCH (`searchtext`) AGAINST (? IN BOOLEAN MODE)", $this->identifier, $needle]);
}
}

View file

@ -52,7 +52,7 @@ class PostUpdate
// Needed for the helper function to read from the legacy term table
const OBJECT_TYPE_POST = 1;
const VERSION = 1544;
const VERSION = 1547;
/**
* Calls the post update functions
@ -128,6 +128,9 @@ class PostUpdate
if (!self::update1544()) {
return false;
}
if (!self::update1547()) {
return false;
}
return true;
}
@ -1358,4 +1361,62 @@ class PostUpdate
return false;
}
/**
* Create "post-searchindex" entries for old entries.
*
* @return bool "true" when the job is done
* @throws \Friendica\Network\HTTPException\InternalServerErrorException
* @throws \ImagickException
*/
private static function update1547()
{
// Was the script completed?
if (DI::keyValue()->get('post_update_version') >= 1547) {
return true;
}
$id = (int)(DI::keyValue()->get('post_update_version_1547_id') ?? 0);
if ($id == 0) {
$post = Post::selectFirstPost(['uri-id'], [], ['order' => ['uri-id' => true]]);
$id = (int)($post['uri-id'] ?? 0);
}
Logger::info('Start', ['uri-id' => $id]);
$rows = 0;
$condition = ["`uri-id` < ? AND `gravity` IN (?, ?)", $id, Item::GRAVITY_COMMENT, Item::GRAVITY_PARENT];
$limit = Post\SearchIndex::searchAgeDateLimit();
if (!empty($limit)) {
DBA::mergeConditions($condition, ["`created` > ?", $limit]);
}
$posts = Post::selectPosts(['uri-id', 'network', 'private', 'created'], $condition, ['order' => ['uri-id' => true], 'limit' => 1000]);
if (DBA::errorNo() != 0) {
Logger::error('Database error', ['no' => DBA::errorNo(), 'message' => DBA::errorMessage()]);
return false;
}
while ($post = Post::fetch($posts)) {
$id = $post['uri-id'];
Post\SearchIndex::insert($post['uri-id'], $post['network'], $post['private'], $post['created'], true);
++$rows;
}
DBA::close($posts);
DI::keyValue()->set('post_update_version_1547_id', $id);
Logger::info('Processed', ['rows' => $rows, 'last' => $id]);
if ($rows <= 100) {
DI::keyValue()->set('post_update_version', 1547);
Logger::info('Done');
return true;
}
return false;
}
}

View file

@ -2207,7 +2207,7 @@ class GServer
*/
private static function analyseRootBody($curlResult, array $serverdata): array
{
if (empty($curlResult->getBody())) {
if (empty($curlResult->getBodyString())) {
return $serverdata;
}
@ -2220,7 +2220,7 @@ class GServer
$platforms = array_merge($ap_platforms, $dfrn_platforms, $zap_platforms, $platforms);
$doc = new DOMDocument();
@$doc->loadHTML($curlResult->getBody());
@$doc->loadHTML($curlResult->getBodyString());
$xpath = new DOMXPath($doc);
$assigned = false;

View file

@ -34,6 +34,7 @@ use Friendica\Core\Protocol;
use Friendica\Core\Renderer;
use Friendica\Core\System;
use Friendica\Core\Worker;
use Friendica\Database\Database;
use Friendica\Database\DBA;
use Friendica\DI;
use Friendica\Model\Post\Category;
@ -243,6 +244,10 @@ class Item
$content_fields['raw-body'] = BBCode::removeAttachment($content_fields['raw-body']);
Post\Content::update($item['uri-id'], $content_fields);
$searchtext = Post\Engagement::getSearchTextForUriId($item['uri-id'], true);
DBA::update('post-engagement', ['searchtext' => $searchtext], ['uri-id' => $item['uri-id']]);
Post\SearchIndex::update($item['uri-id']);
}
if (!empty($fields['file'])) {
@ -1444,6 +1449,10 @@ class Item
$engagement_uri_id = Post\Engagement::storeFromItem($posted_item);
if (in_array($posted_item['gravity'], [self::GRAVITY_PARENT, self::GRAVITY_COMMENT])) {
Post\SearchIndex::insert($posted_item['uri-id'], $posted_item['network'], $posted_item['private'], $posted_item['created']);
}
if (($posted_item['gravity'] == self::GRAVITY_ACTIVITY) && ($posted_item['verb'] == Activity::ANNOUNCE) && ($posted_item['parent-uri-id'] == $posted_item['thr-parent-id'])) {
self::reshareChannelPost($posted_item['thr-parent-id'], $posted_item['author-id']);
} elseif ($engagement_uri_id) {

View file

@ -22,11 +22,10 @@
namespace Friendica\Model\Post;
use \BadMethodCallException;
use Friendica\Core\Protocol;
use Friendica\Database\Database;
use Friendica\Database\DBA;
use Friendica\Database\DBStructure;
use Friendica\DI;
use Friendica\Model\Item;
use Friendica\Model\Post;
class Content
@ -109,9 +108,12 @@ class Content
*/
public static function getURIIdListBySearch(string $search, int $uid = 0, int $start = 0, int $limit = 100, int $last_uriid = 0)
{
$condition = ["`uri-id` IN (SELECT `uri-id` FROM `post-content` WHERE MATCH (`title`, `content-warning`, `body`) AGAINST (? IN BOOLEAN MODE))
AND (`uid` = ? OR (`uid` = ? AND NOT `global`)) AND (`network` IN (?, ?, ?, ?) OR (`uid` = ? AND `uid` != ?))",
str_replace('@', ' ', $search), 0, $uid, Protocol::ACTIVITYPUB, Protocol::DFRN, Protocol::DIASPORA, Protocol::OSTATUS, $uid, 0];
$search = Post\Engagement::escapeKeywords($search);
if ($uid != 0) {
$condition = ["MATCH (`searchtext`) AGAINST (? IN BOOLEAN MODE) and (private = ? OR `uri-id` in (SELECT `uri-id` FROM `post-user` where `uid` = ?))", $search, Item::PUBLIC, $uid];
} else {
$condition = ["MATCH (`searchtext`) AGAINST (? IN BOOLEAN MODE) and private = ?", $search, Item::PUBLIC];
}
if (!empty($last_uriid)) {
$condition = DBA::mergeConditions($condition, ["`uri-id` < ?", $last_uriid]);
@ -122,7 +124,7 @@ class Content
'limit' => [$start, $limit]
];
$tags = Post::select(['uri-id'], $condition, $params);
$tags = DBA::select('post-searchindex', ['uri-id'], $condition, $params);
$uriids = [];
while ($tag = DBA::fetch($tags)) {
@ -135,9 +137,12 @@ class Content
public static function countBySearch(string $search, int $uid = 0)
{
$condition = ["`uri-id` IN (SELECT `uri-id` FROM `post-content` WHERE MATCH (`title`, `content-warning`, `body`) AGAINST (? IN BOOLEAN MODE))
AND (`uid` = ? OR (`uid` = ? AND NOT `global`)) AND (`network` IN (?, ?, ?, ?) OR (`uid` = ? AND `uid` != ?))",
str_replace('@', ' ', $search), 0, $uid, Protocol::ACTIVITYPUB, Protocol::DFRN, Protocol::DIASPORA, Protocol::OSTATUS, $uid, 0];
return Post::count($condition);
$search = Post\Engagement::escapeKeywords($search);
if ($uid != 0) {
$condition = ["MATCH (`searchtext`) AGAINST (? IN BOOLEAN MODE) and (private = ? OR `uri-id` in (SELECT `uri-id` FROM `post-user` where `uid` = ?))", $search, Item::PUBLIC, $uid];
} else {
$condition = ["MATCH (`searchtext`) AGAINST (? IN BOOLEAN MODE) and private = ?", $search, Item::PUBLIC];
}
return DBA::count('post-searchindex', $condition);
}
}

View file

@ -78,7 +78,7 @@ class Counts
{
self::update($uri_id, $parent_uri_id, Verb::getID(Activity::POST), Activity::POST);
$activities = DBA::p("SELECT `parent-uri-id`, `vid`, `verb`, `body` FROM `post-view` WHERE `thr-parent-id` = ? AND `gravity` = ? GROUP BY `parent-uri-id`, `vid`, `verb`, `body`", $uri_id, Item::GRAVITY_ACTIVITY);
$activities = DBA::p("SELECT `parent-uri-id`, `vid`, `verb`, `body` FROM `post-view` WHERE `thr-parent-id` = ? AND `gravity` = ? AND `vid` IS NOT NULL GROUP BY `parent-uri-id`, `vid`, `verb`, `body`", $uri_id, Item::GRAVITY_ACTIVITY);
while ($activity = DBA::fetch($activities)) {
self::update($uri_id, $activity['parent-uri-id'], $activity['vid'], $activity['verb'], $activity['body']);
}

View file

@ -24,7 +24,6 @@ namespace Friendica\Model\Post;
use Friendica\Content\Text\BBCode;
use Friendica\Core\Logger;
use Friendica\Core\Protocol;
use Friendica\Database\Database;
use Friendica\Database\DBA;
use Friendica\DI;
use Friendica\Model\Contact;
@ -39,7 +38,7 @@ use Friendica\Util\DateTimeFormat;
class Engagement
{
const KEYWORDS = ['source', 'server', 'from', 'to', 'group', 'tag', 'network', 'platform', 'visibility'];
const KEYWORDS = ['source', 'server', 'from', 'to', 'group', 'tag', 'network', 'platform', 'visibility', 'language'];
/**
* Store engagement data from an item array
@ -146,7 +145,7 @@ class Engagement
'owner-contact-type' => $author['contact-type'],
'owner-nick' => $author['nick'],
'owner-addr' => $author['addr'],
'author-gsid' => $author['gsid'],
'owner-gsid' => $author['gsid'],
];
foreach ($receivers as $receiver) {
@ -158,6 +157,24 @@ class Engagement
return self::getSearchText($item, $receivers, $tags);
}
public static function getSearchTextForUriId(int $uri_id, bool $refresh = false): string
{
if (!$refresh) {
$engagement = DBA::selectFirst('post-engagement', ['searchtext'], ['uri-id' => $uri_id]);
if (!empty($engagement['searchtext'])) {
return $engagement['searchtext'];
}
}
$post = Post::selectFirstPost(['uri-id', 'network', 'title', 'content-warning', 'body', 'private',
'author-id', 'author-contact-type', 'author-nick', 'author-addr', 'author-gsid',
'owner-id', 'owner-contact-type', 'owner-nick', 'owner-addr', 'owner-gsid'], ['uri-id' => $uri_id]);
if (empty($post['uri-id'])) {
return '';
}
return self::getSearchTextForItem($post);
}
private static function getSearchTextForItem(array $item): string
{
$receivers = array_column(Tag::getByURIId($item['uri-id'], [Tag::MENTION, Tag::IMPLICIT_MENTION, Tag::EXCLUSIVE_MENTION, Tag::AUDIENCE]), 'url');
@ -167,61 +184,61 @@ class Engagement
private static function getSearchText(array $item, array $receivers, array $tags): string
{
$body = '[nosmile]network:' . $item['network'];
$body = '[nosmile]network_' . $item['network'];
if (!empty($item['author-gsid'])) {
$gserver = DBA::selectFirst('gserver', ['platform', 'nurl'], ['id' => $item['author-gsid']]);
$platform = preg_replace( '/[\W]/', '', $gserver['platform'] ?? '');
if (!empty($platform)) {
$body .= ' platform:' . $platform;
$body .= ' platform_' . $platform;
}
$body .= ' server:' . parse_url($gserver['nurl'], PHP_URL_HOST);
$body .= ' server_' . parse_url($gserver['nurl'], PHP_URL_HOST);
}
if (($item['owner-contact-type'] == Contact::TYPE_COMMUNITY) && !empty($item['owner-gsid']) && ($item['owner-gsid'] != ($item['author-gsid'] ?? 0))) {
$gserver = DBA::selectFirst('gserver', ['platform', 'nurl'], ['id' => $item['owner-gsid']]);
$platform = preg_replace( '/[\W]/', '', $gserver['platform'] ?? '');
if (!empty($platform) && !strpos($body, 'platform:' . $platform)) {
$body .= ' platform:' . $platform;
if (!empty($platform) && !strpos($body, 'platform_' . $platform)) {
$body .= ' platform_' . $platform;
}
$body .= ' server:' . parse_url($gserver['nurl'], PHP_URL_HOST);
$body .= ' server_' . parse_url($gserver['nurl'], PHP_URL_HOST);
}
switch ($item['private']) {
case Item::PUBLIC:
$body .= ' visibility:public';
$body .= ' visibility_public';
break;
case Item::UNLISTED:
$body .= ' visibility:unlisted';
$body .= ' visibility_unlisted';
break;
case Item::PRIVATE:
$body .= ' visibility:private';
$body .= ' visibility_private';
break;
}
if (in_array(Contact::TYPE_COMMUNITY, [$item['author-contact-type'], $item['owner-contact-type']])) {
$body .= ' source:group';
$body .= ' source_group';
} elseif ($item['author-contact-type'] == Contact::TYPE_PERSON) {
$body .= ' source:person';
$body .= ' source_person';
} elseif ($item['author-contact-type'] == Contact::TYPE_NEWS) {
$body .= ' source:service';
$body .= ' source_service';
} elseif ($item['author-contact-type'] == Contact::TYPE_ORGANISATION) {
$body .= ' source:organization';
$body .= ' source_organization';
} elseif ($item['author-contact-type'] == Contact::TYPE_RELAY) {
$body .= ' source:application';
$body .= ' source_application';
}
if ($item['author-contact-type'] == Contact::TYPE_COMMUNITY) {
$body .= ' group:' . $item['author-nick'] . ' group:' . $item['author-addr'];
$body .= ' group_' . $item['author-nick'] . ' group_' . $item['author-addr'];
} elseif (in_array($item['author-contact-type'], [Contact::TYPE_PERSON, Contact::TYPE_NEWS, Contact::TYPE_ORGANISATION])) {
$body .= ' from:' . $item['author-nick'] . ' from:' . $item['author-addr'];
$body .= ' from_' . $item['author-nick'] . ' from_' . $item['author-addr'];
}
if ($item['author-id'] != $item['owner-id']) {
if ($item['owner-contact-type'] == Contact::TYPE_COMMUNITY) {
$body .= ' group:' . $item['owner-nick'] . ' group:' . $item['owner-addr'];
$body .= ' group_' . $item['owner-nick'] . ' group_' . $item['owner-addr'];
} elseif (in_array($item['owner-contact-type'], [Contact::TYPE_PERSON, Contact::TYPE_NEWS, Contact::TYPE_ORGANISATION])) {
$body .= ' from:' . $item['owner-nick'] . ' from:' . $item['owner-addr'];
$body .= ' from_' . $item['owner-nick'] . ' from_' . $item['owner-addr'];
}
}
@ -231,15 +248,20 @@ class Engagement
continue;
}
if (($contact['contact-type'] == Contact::TYPE_COMMUNITY) && !strpos($body, 'group:' . $contact['addr'])) {
$body .= ' group:' . $contact['nick'] . ' group:' . $contact['addr'];
if (($contact['contact-type'] == Contact::TYPE_COMMUNITY) && !strpos($body, 'group_' . $contact['addr'])) {
$body .= ' group_' . $contact['nick'] . ' group_' . $contact['addr'];
} elseif (in_array($contact['contact-type'], [Contact::TYPE_PERSON, Contact::TYPE_NEWS, Contact::TYPE_ORGANISATION])) {
$body .= ' to:' . $contact['nick'] . ' to:' . $contact['addr'];
$body .= ' to_' . $contact['nick'] . ' to_' . $contact['addr'];
}
}
foreach ($tags as $tag) {
$body .= ' tag:' . $tag;
$body .= ' tag_' . $tag;
}
if (!empty($item['language'])) {
$languages = json_decode($item['language'], true);
$body .= ' language_' . array_key_first($languages);
}
$body .= ' ' . $item['title'] . ' ' . $item['content-warning'] . ' ' . $item['body'];
@ -293,4 +315,12 @@ class Engagement
return DateTimeFormat::utc('now - ' . DI::config()->get('channel', 'engagement_hours') . ' hour');
}
public static function escapeKeywords(string $fullTextSearch): string
{
foreach (Engagement::KEYWORDS as $keyword) {
$fullTextSearch = preg_replace('~(' . $keyword . '):(.[\w\*@\.-]+)~', '"$1_$2"', $fullTextSearch);
}
return $fullTextSearch;
}
}

View file

@ -0,0 +1,93 @@
<?php
/**
* @copyright Copyright (C) 2010-2024, the Friendica project
*
* @license GNU AGPL version 3 or any later version
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*
*/
namespace Friendica\Model\Post;
use Friendica\Core\Logger;
use Friendica\Database\Database;
use Friendica\Database\DBA;
use Friendica\DI;
use Friendica\Model\Post;
use Friendica\Util\DateTimeFormat;
class SearchIndex
{
/**
* Insert a post-searchindex entry
*
* @param int $uri_id
* @param string $network
* @param int $private
* @param string $created
* @param bool $refresh
*/
public static function insert(int $uri_id, string $network, int $private, string $created, bool $refresh = false)
{
$limit = self::searchAgeDateLimit();
if (!empty($limit) && (strtotime($created) < strtotime($limit))) {
return;
}
$search = [
'uri-id' => $uri_id,
'network' => $network,
'private' => $private,
'created' => $created,
'searchtext' => Post\Engagement::getSearchTextForUriId($uri_id, $refresh),
];
return DBA::insert('post-searchindex', $search, Database::INSERT_UPDATE);
}
/**
* update a post-searchindex entry
*
* @param int $uri_id
*/
public static function update(int $uri_id)
{
$searchtext = Post\Engagement::getSearchTextForUriId($uri_id, true);
return DBA::update('post-searchindex', ['searchtext' => $searchtext], ['uri-id' => $uri_id]);
}
/**
* Expire old searchindex entries
*
* @return void
*/
public static function expire()
{
$limit = self::searchAgeDateLimit();
if (empty($limit)) {
return;
}
DBA::delete('post-searchindex', ["`created` < ?", $limit]);
Logger::notice('Cleared expired searchindex entries', ['limit' => $limit, 'rows' => DBA::affectedRows()]);
}
public static function searchAgeDateLimit(): string
{
$days = DI::config()->get('system', 'search_age_days');
if (empty($days)) {
return '';
}
return DateTimeFormat::utc('now - ' . $days . ' day');
}
}

View file

@ -667,12 +667,11 @@ class Tag
*/
private static function getBlockedSQL(): string
{
$blocked_txt = DI::config()->get('system', 'blocked_tags');
if (empty($blocked_txt)) {
$blocked = Strings::getTagArrayByString(DI::config()->get('system', 'blocked_tags'));
if (empty($blocked)) {
return '';
}
$blocked = explode(',', $blocked_txt);
array_walk($blocked, function (&$value) {
$value = "'" . DBA::escape(trim($value)) . "'";
});

View file

@ -140,6 +140,7 @@ class Site extends BaseAdmin
$temppath = (!empty($_POST['temppath']) ? trim($_POST['temppath']) : '');
$singleuser = (!empty($_POST['singleuser']) ? trim($_POST['singleuser']) : '');
$only_tag_search = !empty($_POST['only_tag_search']);
$search_age_days = (!empty($_POST['search_age_days']) ? intval($_POST['search_age_days']) : 0);
$compute_circle_counts = !empty($_POST['compute_circle_counts']);
$process_view = !empty($_POST['process_view']);
$archival_days = (!empty($_POST['archival_days']) ? intval($_POST['archival_days']) : 0);
@ -158,6 +159,7 @@ class Site extends BaseAdmin
$relay_scope = (!empty($_POST['relay_scope']) ? trim($_POST['relay_scope']) : '');
$relay_server_tags = (!empty($_POST['relay_server_tags']) ? trim($_POST['relay_server_tags']) : '');
$relay_deny_tags = (!empty($_POST['relay_deny_tags']) ? trim($_POST['relay_deny_tags']) : '');
$relay_max_tags = (!empty($_POST['relay_max_tags']) ? intval($_POST['relay_max_tags']) : 0);
$relay_user_tags = !empty($_POST['relay_user_tags']);
$relay_deny_undetected_language = !empty($_POST['relay_deny_undetected_language']);
@ -269,7 +271,7 @@ class Site extends BaseAdmin
$transactionConfig->set('system', 'explicit_content' , $explicit_content);
$transactionConfig->set('system', 'proxify_content' , $proxify_content);
$transactionConfig->set('system', 'local_search' , $local_search);
$transactionConfig->set('system', 'blocked_tags' , $blocked_tags);
$transactionConfig->set('system', 'blocked_tags' , Strings::cleanTags($blocked_tags));
$transactionConfig->set('system', 'cache_contact_avatar' , $cache_contact_avatar);
$transactionConfig->set('system', 'check_new_version_url' , $check_new_version_url);
@ -314,7 +316,8 @@ class Site extends BaseAdmin
$transactionConfig->set('system', 'temppath', $temppath);
$transactionConfig->set('system', 'only_tag_search' , $only_tag_search);
$transactionConfig->set('system', 'only_tag_search', $only_tag_search);
$transactionConfig->set('system', 'search_age_days', $search_age_days);
$transactionConfig->set('system', 'compute_circle_counts', $compute_circle_counts);
$transactionConfig->set('system', 'process_view', $process_view);
$transactionConfig->set('system', 'archival_days', $archival_days);
@ -329,8 +332,9 @@ class Site extends BaseAdmin
$transactionConfig->set('system', 'relay_directly' , $relay_directly);
$transactionConfig->set('system', 'relay_scope' , $relay_scope);
$transactionConfig->set('system', 'relay_server_tags' , $relay_server_tags);
$transactionConfig->set('system', 'relay_deny_tags' , $relay_deny_tags);
$transactionConfig->set('system', 'relay_server_tags' , Strings::cleanTags($relay_server_tags));
$transactionConfig->set('system', 'relay_deny_tags' , Strings::cleanTags($relay_deny_tags));
$transactionConfig->set('system', 'relay_max_tags' , $relay_max_tags);
$transactionConfig->set('system', 'relay_user_tags' , $relay_user_tags);
$transactionConfig->set('system', 'relay_deny_undetected_language', $relay_deny_undetected_language);
$transactionConfig->set('system', 'relay_language_quality' , $relay_language_quality);
@ -567,6 +571,7 @@ class Site extends BaseAdmin
'$itemspage_network_mobile' => ['itemspage_network_mobile', DI::l10n()->t('Items per page for mobile devices'), DI::config()->get('system', 'itemspage_network_mobile'), DI::l10n()->t('Number of items per page in stream pages (network, community, profile/contact statuses, search) for mobile devices.')],
'$temppath' => ['temppath', DI::l10n()->t('Temp path'), DI::config()->get('system', 'temppath'), DI::l10n()->t('If you have a restricted system where the webserver can\'t access the system temp path, enter another path here.')],
'$only_tag_search' => ['only_tag_search', DI::l10n()->t('Only search in tags'), DI::config()->get('system', 'only_tag_search'), DI::l10n()->t('On large systems the text search can slow down the system extremely.')],
'$search_age_days' => ['search_age_days', DI::l10n()->t('Maximum age of items in the search table'), DI::config()->get('system', 'search_age_days'), DI::l10n()->t('Maximum age of items in the search table in days. Lower values will increase the performance and reduce disk usage. 0 means no age restriction.')],
'$compute_circle_counts' => ['compute_circle_counts', DI::l10n()->t('Generate counts per contact circle when calculating network count'), DI::config()->get('system', 'compute_circle_counts'), DI::l10n()->t('On systems with users that heavily use contact circles the query can be very expensive.')],
'$process_view' => ['process_view', DI::l10n()->t('Process "view" activities'), DI::config()->get('system', 'process_view'), DI::l10n()->t('"view" activities are mostly geberated by Peertube systems. Per default they are not processed for performance reasons. Only activate this option on performant system.')],
'$archival_days' => ['archival_days', DI::l10n()->t('Days, after which a contact is archived'), DI::config()->get('system', 'archival_days'), DI::l10n()->t('Number of days that we try to deliver content or to update the contact data before we archive a contact.')],
@ -583,6 +588,7 @@ class Site extends BaseAdmin
'$relay_scope' => ['relay_scope', DI::l10n()->t('Relay scope'), DI::config()->get('system', 'relay_scope'), DI::l10n()->t('Can be "all" or "tags". "all" means that every public post should be received. "tags" means that only posts with selected tags should be received.'), [Relay::SCOPE_NONE => DI::l10n()->t('Disabled'), Relay::SCOPE_ALL => DI::l10n()->t('all'), Relay::SCOPE_TAGS => DI::l10n()->t('tags')]],
'$relay_server_tags' => ['relay_server_tags', DI::l10n()->t('Server tags'), DI::config()->get('system', 'relay_server_tags'), DI::l10n()->t('Comma separated list of tags for the "tags" subscription.')],
'$relay_deny_tags' => ['relay_deny_tags', DI::l10n()->t('Deny Server tags'), DI::config()->get('system', 'relay_deny_tags'), DI::l10n()->t('Comma separated list of tags that are rejected.')],
'$relay_max_tags' => ['relay_max_tags', DI::l10n()->t('Maximum amount of tags'), DI::config()->get('system', 'relay_max_tags'), DI::l10n()->t('Maximum amount of tags in a post before it is rejected as spam. The post has to contain at least one link. Posts from subscribed accounts will not be rejected.')],
'$relay_user_tags' => ['relay_user_tags', DI::l10n()->t('Allow user tags'), DI::config()->get('system', 'relay_user_tags'), DI::l10n()->t('If enabled, the tags from the saved searches will used for the "tags" subscription in addition to the "relay_server_tags".')],
'$relay_deny_undetected_language' => ['relay_deny_undetected_language', DI::l10n()->t('Deny undetected languages'), DI::config()->get('system', 'relay_deny_undetected_language'), DI::l10n()->t('If enabled, posts with undetected languages will be rejected.')],
'$relay_language_quality' => ['relay_language_quality', DI::l10n()->t('Language Quality'), DI::config()->get('system', 'relay_language_quality'), DI::l10n()->t('The minimum language quality that is required to accept the post.')],

View file

@ -23,7 +23,6 @@ namespace Friendica\Module\Api\Mastodon;
use Friendica\Core\Logger;
use Friendica\Core\Protocol;
use Friendica\Core\System;
use Friendica\Database\DBA;
use Friendica\DI;
use Friendica\Model\Contact;
@ -154,10 +153,9 @@ class Search extends BaseApi
substr($q, 1), 0, $uid, Protocol::ACTIVITYPUB, Protocol::DFRN, Protocol::DIASPORA, Protocol::OSTATUS, $uid, 0];
$table = 'tag-search-view';
} else {
$condition = ["`uri-id` IN (SELECT `uri-id` FROM `post-content` WHERE MATCH (`title`, `content-warning`, `body`) AGAINST (? IN BOOLEAN MODE))
AND (`uid` = ? OR (`uid` = ? AND NOT `global`)) AND (`network` IN (?, ?, ?, ?) OR (`uid` = ? AND `uid` != ?))",
str_replace('@', ' ', $q), 0, $uid, Protocol::ACTIVITYPUB, Protocol::DFRN, Protocol::DIASPORA, Protocol::OSTATUS, $uid, 0];
$table = 'post-user-view';
$q = Post\Engagement::escapeKeywords($q);
$condition = ["MATCH (`searchtext`) AGAINST (? IN BOOLEAN MODE) and (private = ? OR `uri-id` in (SELECT `uri-id` FROM `post-user` where `uid` = ?))", $q, Item::PUBLIC, $uid];
$table = 'post-searchindex';
}
if (!empty($max_id)) {

View file

@ -115,7 +115,7 @@ class Channel extends Timeline
$this->page['aside'] .= Widget::accountTypes('channel/' . $this->selectedTab, $this->accountTypeString);
if (!in_array($this->selectedTab, [ChannelEntity::FOLLOWERS, ChannelEntity::FORYOU])) {
if (!in_array($this->selectedTab, [ChannelEntity::FOLLOWERS, ChannelEntity::FORYOU, ChannelEntity::DISCOVER])) {
$this->page['aside'] .= $this->getNoSharerWidget('channel');
}

View file

@ -25,6 +25,7 @@ use Friendica\App;
use Friendica\App\Mode;
use Friendica\Content\BoundariesPager;
use Friendica\Content\Conversation;
use Friendica\Content\Conversation\Entity\Channel;
use Friendica\Content\Conversation\Entity\Network as NetworkEntity;
use Friendica\Content\Conversation\Factory\Timeline as TimelineFactory;
use Friendica\Content\Conversation\Repository\UserDefinedChannel;
@ -142,6 +143,11 @@ class Network extends Timeline
$this->page['aside'] .= Widget\SavedSearches::getHTML($this->args->getQueryString());
$this->page['aside'] .= Widget::fileAs('filed', '');
if (($this->channel->isTimeline($this->selectedTab) || $this->userDefinedChannel->isTimeline($this->selectedTab, $this->session->getLocalUserId())) &&
!in_array($this->selectedTab, [Channel::FOLLOWERS, Channel::FORYOU, Channel::DISCOVER])) {
$this->page['aside'] .= $this->getNoSharerWidget('network');
}
if (Feature::isEnabled($this->session->getLocalUserId(), 'trending_tags')) {
$this->page['aside'] .= TrendingTags::getHTML($this->selectedTab);
}

View file

@ -291,6 +291,20 @@ class Timeline extends BaseModule
$cid, $this->getMedianRelationThreadScore($cid, 4), $this->getMedianComments($uid, 4), $this->getMedianActivities($uid, 4), $cid,
$uid, Contact\User::FREQUENCY_ALWAYS
];
} elseif ($this->selectedTab == ChannelEntity::DISCOVER) {
$cid = Contact::getPublicIdByUserId($uid);
$condition = [
"`owner-id` IN (SELECT `cid` FROM `contact-relation` WHERE `relation-cid` = ? AND NOT `follows`) AND
(`owner-id` IN (SELECT `cid` FROM `contact-relation` WHERE `relation-cid` = ? AND NOT `follows` AND `relation-thread-score` > ?) OR
`owner-id` IN (SELECT `cid` FROM `contact-relation` WHERE `cid` = ? AND `relation-thread-score` > ?) OR
((`comments` >= ? OR `activities` >= ?) AND
(`owner-id` IN (SELECT `cid` FROM `contact-relation` WHERE `cid` = ? AND `relation-thread-score` > ?)) OR
(`owner-id` IN (SELECT `cid` FROM `contact-relation` WHERE `relation-cid` = ? AND `relation-thread-score` > ?))))",
$cid, $cid, $this->getMedianRelationThreadScore($cid, 4), $cid, $this->getMedianRelationThreadScore($cid, 4),
$this->getMedianComments($uid, 4), $this->getMedianActivities($uid, 4), $cid, 0, $cid, 0
];
} elseif ($this->selectedTab == ChannelEntity::FOLLOWERS) {
$condition = ["`owner-id` IN (SELECT `pid` FROM `account-user-view` WHERE `uid` = ? AND `rel` = ?)", $uid, Contact::FOLLOWER];
} elseif ($this->selectedTab == ChannelEntity::SHARERSOFSHARERS) {
@ -398,11 +412,7 @@ class Timeline extends BaseModule
}
if (!empty($channel->fullTextSearch)) {
$search = $channel->fullTextSearch;
foreach (Engagement::KEYWORDS as $keyword) {
$search = preg_replace('~(' . $keyword . ':.[\w@\.-]+)~', '"$1"', $search);
}
$condition = DBA::mergeConditions($condition, ["MATCH (`searchtext`) AGAINST (? IN BOOLEAN MODE)", $search]);
$condition = DBA::mergeConditions($condition, ["MATCH (`searchtext`) AGAINST (? IN BOOLEAN MODE)", Engagement::escapeKeywords($channel->fullTextSearch)]);
}
if (!empty($channel->includeTags)) {

View file

@ -50,22 +50,21 @@ class Summary extends BaseModeration
parent::content();
$accounts = [
[$this->t('Normal Account'), 0],
[$this->t('Automatic Follower Account'), 0],
[$this->t('Public Group Account'), 0],
[$this->t('Automatic Friend Account'), 0],
[$this->t('Blog Account'), 0],
[$this->t('Private Group Account'), 0]
[$this->t('Personal Page'), 0],
[$this->t('Organisation Page'), 0],
[$this->t('News Page'), 0],
[$this->t('Community Group'), 0],
[$this->t('Channel Relay'), 0],
];
$users = 0;
$pageFlagsCountStmt = $this->database->p('SELECT `page-flags`, COUNT(`uid`) AS `count` FROM `user` WHERE `uid` != ? GROUP BY `page-flags`', 0);
while ($pageFlagsCount = $this->database->fetch($pageFlagsCountStmt)) {
$accounts[$pageFlagsCount['page-flags']][1] = $pageFlagsCount['count'];
$users += $pageFlagsCount['count'];
$accountTypeCountStmt = $this->database->p('SELECT `account-type`, COUNT(`uid`) AS `count` FROM `user` WHERE `uid` != ? GROUP BY `account-type`', 0);
while ($AccountTypeCount = $this->database->fetch($accountTypeCountStmt)) {
$accounts[$AccountTypeCount['account-type']][1] = $AccountTypeCount['count'];
$users += $AccountTypeCount['count'];
}
$this->database->close($pageFlagsCountStmt);
$this->database->close($accountTypeCountStmt);
$this->logger->debug('accounts', ['accounts' => $accounts]);

View file

@ -34,6 +34,7 @@ use Friendica\Module\BaseSettings;
use Friendica\Module\Response;
use Friendica\Network\HTTPException;
use Friendica\Util\Profiler;
use Friendica\Util\Strings;
use Psr\Log\LoggerInterface;
class Channels extends BaseSettings
@ -80,8 +81,8 @@ class Channels extends BaseSettings
'access-key' => substr(mb_strtolower($request['new_access_key']), 0, 1),
'uid' => $uid,
'circle' => (int)$request['new_circle'],
'include-tags' => $this->cleanTags($request['new_include_tags']),
'exclude-tags' => $this->cleanTags($request['new_exclude_tags']),
'include-tags' => Strings::cleanTags($request['new_include_tags']),
'exclude-tags' => Strings::cleanTags($request['new_exclude_tags']),
'full-text-search' => $request['new_text_search'],
'media-type' => ($request['new_image'] ? 1 : 0) | ($request['new_video'] ? 2 : 0) | ($request['new_audio'] ? 4 : 0),
'languages' => $request['new_languages'],
@ -109,8 +110,8 @@ class Channels extends BaseSettings
'access-key' => substr(mb_strtolower($request['access_key'][$id]), 0, 1),
'uid' => $uid,
'circle' => (int)$request['circle'][$id],
'include-tags' => $this->cleanTags($request['include_tags'][$id]),
'exclude-tags' => $this->cleanTags($request['exclude_tags'][$id]),
'include-tags' => Strings::cleanTags($request['include_tags'][$id]),
'exclude-tags' => Strings::cleanTags($request['exclude_tags'][$id]),
'full-text-search' => $request['text_search'][$id],
'media-type' => ($request['image'][$id] ? 1 : 0) | ($request['video'][$id] ? 2 : 0) | ($request['audio'][$id] ? 4 : 0),
'languages' => $request['languages'][$id],
@ -222,18 +223,4 @@ class Channels extends BaseSettings
'$form_security_token' => self::getFormSecurityToken('settings_channels'),
]);
}
private function cleanTags(string $tag_list): string
{
$tags = [];
$tagitems = explode(',', mb_strtolower($tag_list));
foreach ($tagitems as $tag) {
$tag = trim($tag, '# ');
if (!empty($tag)) {
$tags[] = preg_replace('#\s#u', '', $tag);
}
}
return implode(',', $tags);
}
}

View file

@ -22,10 +22,10 @@
namespace Friendica\Module\WellKnown;
use Friendica\BaseModule;
use Friendica\Core\System;
use Friendica\DI;
use Friendica\Model\Search;
use Friendica\Protocol\Relay;
use Friendica\Util\Strings;
/**
* Node subscription preferences for social relay systems
@ -43,13 +43,7 @@ class XSocialRelay extends BaseModule
$userTags = [];
if ($scope == Relay::SCOPE_TAGS) {
$server_tags = $config->get('system', 'relay_server_tags');
$tagitems = explode(',', $server_tags);
/// @todo Check if it was better to use "strtolower" on the tags
foreach ($tagitems as $tag) {
$systemTags[] = trim($tag, '# ');
}
$systemTags = Strings::getTagArrayByString($config->get('system', 'relay_server_tags'));
if ($config->get('system', 'relay_user_tags')) {
$userTags = Search::getUserTags();

View file

@ -97,24 +97,28 @@ class Relay
$body = ActivityPub\Processor::normalizeMentionLinks($body);
$denyTags = [];
if ($scope == self::SCOPE_TAGS) {
$tagList = self::getSubscribedTags();
} else {
$tagList = [];
}
$deny_tags = $config->get('system', 'relay_deny_tags');
$tagitems = explode(',', mb_strtolower($deny_tags));
foreach ($tagitems as $tag) {
$tag = trim($tag, '# ');
$denyTags[] = $tag;
}
$denyTags = Strings::getTagArrayByString($config->get('system', 'relay_deny_tags'));
if (!empty($tagList) || !empty($denyTags)) {
$content = mb_strtolower(BBCode::toPlaintext($body, false));
$max_tags = $config->get('system', 'relay_max_tags');
if ($max_tags && (count($tags) > $max_tags) && preg_match('/[^@!#]\[url\=.*?\].*?\[\/url\]/ism', $body)) {
$cleaned = preg_replace('/[@!#]\[url\=.*?\].*?\[\/url\]/ism', '', $body);
$content_cleaned = mb_strtolower(BBCode::toPlaintext($cleaned, false));
if (strlen($content_cleaned) < strlen($content) / 2) {
Logger::info('Possible hashtag spam detected - rejected', ['hashtags' => $tags, 'network' => $network, 'url' => $url, 'causer' => $causer, 'content' => $content]);
return false;
}
}
foreach ($tags as $tag) {
$tag = mb_strtolower($tag);
if (in_array($tag, $denyTags)) {
@ -157,10 +161,7 @@ class Relay
*/
public static function getSubscribedTags(): array
{
$tags = [];
foreach (explode(',', mb_strtolower(DI::config()->get('system', 'relay_server_tags'))) as $tag) {
$tags[] = trim($tag, '# ');
}
$tags = Strings::getTagArrayByString(DI::config()->get('system', 'relay_server_tags'));
if (DI::config()->get('system', 'relay_user_tags')) {
$tags = array_merge($tags, Search::getUserTags());

View file

@ -578,4 +578,36 @@ class Strings
return $styled_url;
}
/**
* Sort a comma separated list of hashtags, convert them to lowercase and remove duplicates
*
* @param string $tag_list
* @return string
*/
public static function cleanTags(string $tag_list): string
{
$tags = [];
$tagitems = explode(',', str_replace([' ', ';', '#'], ',', mb_strtolower($tag_list)));
foreach ($tagitems as $tag) {
if (!empty($tag)) {
$tags[] = preg_replace('#\s#u', '', $tag);
}
}
$tags = array_unique($tags);
asort($tags);
return implode(',', $tags);
}
/**
* Get a tag array out of a comma separated list of tags
*
* @param string $tag_list
* @return array
*/
public static function getTagArrayByString(string $tag_list): array
{
return explode(',', self::cleanTags($tag_list));
}
}

View file

@ -123,6 +123,8 @@ class Cron
Worker::add(Worker::PRIORITY_LOW, 'ExpireActivities');
Worker::add(Worker::PRIORITY_LOW, 'ExpireSearchIndex');
Worker::add(Worker::PRIORITY_LOW, 'RemoveUnusedTags');
Worker::add(Worker::PRIORITY_LOW, 'RemoveUnusedContacts');

View file

@ -0,0 +1,35 @@
<?php
/**
* @copyright Copyright (C) 2010-2024, the Friendica project
*
* @license GNU AGPL version 3 or any later version
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*
*/
namespace Friendica\Worker;
use Friendica\Model\Post;
/**
* Expire old search index entries
*/
class ExpireSearchIndex
{
public static function execute($param = '', $hook_function = '')
{
Post\SearchIndex::expire();
}
}

View file

@ -56,7 +56,7 @@ use Friendica\Database\DBA;
// This file is required several times during the test in DbaDefinition which justifies this condition
if (!defined('DB_UPDATE_VERSION')) {
define('DB_UPDATE_VERSION', 1546);
define('DB_UPDATE_VERSION', 1548);
}
return [
@ -1270,7 +1270,7 @@ return [
"fields" => [
"uri-id" => ["type" => "int unsigned", "not null" => "1", "primary" => "1", "foreign" => ["item-uri" => "id"], "comment" => "Id of the item-uri table entry that contains the item uri"],
"vid" => ["type" => "smallint unsigned", "not null" => "1", "primary" => "1", "foreign" => ["verb" => "id", "on delete" => "restrict"], "comment" => "Id of the verb table entry that contains the activity verbs"],
"reaction" => ["type" => "varchar(1)", "not null" => "1", "primary" => "1", "comment" => "Emoji Reaction"],
"reaction" => ["type" => "varchar(4)", "not null" => "1", "primary" => "1", "comment" => "Emoji Reaction"],
"parent-uri-id" => ["type" => "int unsigned", "foreign" => ["item-uri" => "id"], "comment" => "Id of the item-uri table that contains the parent uri"],
"count" => ["type" => "int unsigned", "default" => 0, "comment" => "Number of activities"],
],
@ -1319,7 +1319,6 @@ return [
"PRIMARY" => ["uri-id"],
"plink" => ["plink(191)"],
"resource-id" => ["resource-id"],
"title-content-warning-body" => ["FULLTEXT", "title", "content-warning", "body"],
"quote-uri-id" => ["quote-uri-id"],
]
],
@ -1480,6 +1479,21 @@ return [
"PRIMARY" => ["uri-id", "id"],
]
],
"post-searchindex" => [
"comment" => "Content for all posts",
"fields" => [
"uri-id" => ["type" => "int unsigned", "not null" => "1", "primary" => "1", "foreign" => ["item-uri" => "id"], "comment" => "Id of the item-uri table entry that contains the item uri"],
"network" => ["type" => "char(4)", "comment" => ""],
"private" => ["type" => "tinyint unsigned", "comment" => "0=public, 1=private, 2=unlisted"],
"searchtext" => ["type" => "mediumtext", "comment" => "Simplified text for the full text search"],
"created" => ["type" => "datetime", "comment" => ""],
],
"indexes" => [
"PRIMARY" => ["uri-id"],
"created" => ["created"],
"searchtext" => ["FULLTEXT", "searchtext"],
]
],
"post-tag" => [
"comment" => "post relation to tags",
"fields" => [
@ -1708,7 +1722,6 @@ return [
"indexes" => [
"PRIMARY" => ["id"],
"uid_is-default" => ["uid", "is-default"],
"pub_keywords" => ["FULLTEXT", "pub_keywords"],
]
],
"profile_field" => [

View file

@ -237,6 +237,10 @@ return [
// Minimum value for the language detection quality for relay posts. The value must be between 0 and 1.
'relay_language_quality' => 0,
// relay_max_tags (Integer)
// Maximum amount of tags in a post before it is rejected as spam.
'relay_max_tags' => 20,
// proxify_content (Boolean)
// Use the proxy functionality for fetching external content
'proxify_content' => true,

View file

@ -214,4 +214,21 @@ class StringsTest extends TestCase
self::assertEquals($originalText, $text);
}
public function testCleanTags()
{
$rawTags = 'Open, #Source, Friendica Software; Federation #Fediverse';
$cleaned = 'federation,fediverse,friendica,open,software,source';
self::assertEquals($cleaned, Strings::cleanTags($rawTags));
}
public function testgetTagArrayByString()
{
$list = 'Open, #Source, Friendica Software; Federation #Fediverse';
$tags = ['federation', 'fediverse', 'friendica', 'open', 'software', 'source'];
self::assertEquals($tags, Strings::getTagArrayByString($list));
}
}

File diff suppressed because it is too large Load diff

View file

@ -116,6 +116,7 @@
<h2>{{$performance}}</h2>
{{include file="field_checkbox.tpl" field=$compute_circle_counts}}
{{include file="field_checkbox.tpl" field=$only_tag_search}}
{{include file="field_input.tpl" field=$search_age_days}}
{{include file="field_input.tpl" field=$max_comments}}
{{include file="field_input.tpl" field=$max_display_comments}}
{{include file="field_input.tpl" field=$itemspage_network}}
@ -159,6 +160,7 @@
{{include file="field_select.tpl" field=$relay_scope}}
{{include file="field_input.tpl" field=$relay_server_tags}}
{{include file="field_input.tpl" field=$relay_deny_tags}}
{{include file="field_input.tpl" field=$relay_max_tags}}
{{include file="field_checkbox.tpl" field=$relay_user_tags}}
{{include file="field_checkbox.tpl" field=$relay_directly}}
{{include file="field_checkbox.tpl" field=$relay_deny_undetected_language}}

View file

@ -250,6 +250,7 @@
<div class="panel-body">
{{include file="field_checkbox.tpl" field=$compute_circle_counts}}
{{include file="field_checkbox.tpl" field=$only_tag_search}}
{{include file="field_input.tpl" field=$search_age_days}}
{{include file="field_input.tpl" field=$max_comments}}
{{include file="field_input.tpl" field=$max_display_comments}}
{{include file="field_input.tpl" field=$itemspage_network}}
@ -328,6 +329,7 @@
{{include file="field_select.tpl" field=$relay_scope}}
{{include file="field_input.tpl" field=$relay_server_tags}}
{{include file="field_input.tpl" field=$relay_deny_tags}}
{{include file="field_input.tpl" field=$relay_max_tags}}
{{include file="field_checkbox.tpl" field=$relay_user_tags}}
{{include file="field_checkbox.tpl" field=$relay_directly}}
{{include file="field_checkbox.tpl" field=$relay_deny_undetected_language}}