From 35995633ae6f5737a324f9023b0648f01883d1d1 Mon Sep 17 00:00:00 2001 From: Michael Date: Wed, 30 Aug 2023 19:17:42 +0000 Subject: [PATCH] Calculate the interaction score --- database.sql | 6 ++- doc/database/db_contact-relation.md | 18 ++++--- src/Model/Contact/Relation.php | 75 +++++++++++++++++++++++++++++ src/Worker/Cron.php | 3 ++ src/Worker/UpdateAllSuggestions.php | 2 +- src/Worker/UpdateScores.php | 46 ++++++++++++++++++ static/dbstructure.config.php | 6 ++- static/defaults.config.php | 4 ++ 8 files changed, 150 insertions(+), 10 deletions(-) create mode 100644 src/Worker/UpdateScores.php diff --git a/database.sql b/database.sql index 11fbd514e..c66de5bdd 100644 --- a/database.sql +++ b/database.sql @@ -1,6 +1,6 @@ -- ------------------------------------------ -- Friendica 2023.09-dev (Giant Rhubarb) --- DB_UPDATE_VERSION 1529 +-- DB_UPDATE_VERSION 1530 -- ------------------------------------------ @@ -513,6 +513,10 @@ CREATE TABLE IF NOT EXISTS `contact-relation` ( `last-interaction` datetime NOT NULL DEFAULT '0001-01-01 00:00:00' COMMENT 'Date of the last interaction', `follow-updated` datetime NOT NULL DEFAULT '0001-01-01 00:00:00' COMMENT 'Date of the last update of the contact relationship', `follows` boolean NOT NULL DEFAULT '0' COMMENT '', + `score` smallint unsigned COMMENT 'score for interactions of cid on relation-cid', + `relation-score` smallint unsigned COMMENT 'score for interactions of relation-cid on cid', + `thread-score` smallint unsigned COMMENT 'score for interactions of cid on threads of relation-cid', + `relation-thread-score` smallint unsigned COMMENT 'score for interactions of relation-cid on threads of cid', PRIMARY KEY(`cid`,`relation-cid`), INDEX `relation-cid` (`relation-cid`), FOREIGN KEY (`cid`) REFERENCES `contact` (`id`) ON UPDATE RESTRICT ON DELETE CASCADE, diff --git a/doc/database/db_contact-relation.md b/doc/database/db_contact-relation.md index 8628b1ef6..1e9a2c41c 100644 --- a/doc/database/db_contact-relation.md +++ b/doc/database/db_contact-relation.md @@ -6,13 +6,17 @@ Contact relations Fields ------ -| Field | Description | Type | Null | Key | Default | Extra | -| ---------------- | --------------------------------------------------- | ------------ | ---- | --- | ------------------- | ----- | -| cid | contact the related contact had interacted with | int unsigned | NO | PRI | 0 | | -| relation-cid | related contact who had interacted with the contact | int unsigned | NO | PRI | 0 | | -| last-interaction | Date of the last interaction | datetime | NO | | 0001-01-01 00:00:00 | | -| follow-updated | Date of the last update of the contact relationship | datetime | NO | | 0001-01-01 00:00:00 | | -| follows | | boolean | NO | | 0 | | +| Field | Description | Type | Null | Key | Default | Extra | +| --------------------- | -------------------------------------------------------- | ----------------- | ---- | --- | ------------------- | ----- | +| cid | contact the related contact had interacted with | int unsigned | NO | PRI | 0 | | +| relation-cid | related contact who had interacted with the contact | int unsigned | NO | PRI | 0 | | +| last-interaction | Date of the last interaction | datetime | NO | | 0001-01-01 00:00:00 | | +| follow-updated | Date of the last update of the contact relationship | datetime | NO | | 0001-01-01 00:00:00 | | +| follows | | boolean | NO | | 0 | | +| score | score for interactions of cid on relation-cid | smallint unsigned | YES | | NULL | | +| relation-score | score for interactions of relation-cid on cid | smallint unsigned | YES | | NULL | | +| thread-score | score for interactions of cid on threads of relation-cid | smallint unsigned | YES | | NULL | | +| relation-thread-score | score for interactions of relation-cid on threads of cid | smallint unsigned | YES | | NULL | | Indexes ------------ diff --git a/src/Model/Contact/Relation.php b/src/Model/Contact/Relation.php index d0be33cca..94b5f63ff 100644 --- a/src/Model/Contact/Relation.php +++ b/src/Model/Contact/Relation.php @@ -31,6 +31,8 @@ use Friendica\Model\APContact; use Friendica\Model\Contact; use Friendica\Model\Profile; use Friendica\Model\User; +use Friendica\Model\Verb; +use Friendica\Protocol\Activity; use Friendica\Protocol\ActivityPub; use Friendica\Util\DateTimeFormat; use Friendica\Util\Strings; @@ -770,4 +772,77 @@ class Relation ['limit' => [$offset, $count], 'order' => [$shuffle ? 'RAND()' : 'name']] ); } + + /** + * Calculate the interaction scores for the given user + * + * @param integer $uid + * @return void + */ + public static function calculateInteractionScore(int $uid) + { + $days = DI::config()->get('system', 'interaction_score_days'); + $contact_id = Contact::getPublicIdByUserId($uid); + + Logger::debug('Calculation - start', ['uid' => $uid, 'cid' => $contact_id, 'days' => $days]); + + $follow = Verb::getID(Activity::FOLLOW); + $view = Verb::getID(Activity::VIEW); + $read = Verb::getID(Activity::READ); + + DBA::update('contact-relation', ['score' => 0, 'relation-score' => 0, 'thread-score' => 0, 'relation-thread-score' => 0], ['cid' => $contact_id]); + + $total = DBA::fetchFirst("SELECT count(*) AS `activity` FROM `post-user` INNER JOIN `post` ON `post`.`uri-id` = `post-user`.`thr-parent-id` WHERE `post-user`.`author-id` = ? AND `post-user`.`received` >= ? AND `post-user`.`uid` = ? AND `post`.`author-id` != ? AND NOT `post`.`vid` IN (?, ?, ?)", + $contact_id, DateTimeFormat::utc('now - ' . $days . ' day'), $uid, $contact_id, $follow, $view, $read); + + Logger::debug('Calculate score', ['uid' => $uid, 'total' => $total['activity']]); + + $interactions = DBA::p("SELECT `post`.`author-id`, count(*) AS `activity` FROM `post-user` INNER JOIN `post` ON `post`.`uri-id` = `post-user`.`thr-parent-id` WHERE `post-user`.`author-id` = ? AND `post-user`.`received` >= ? AND `post-user`.`uid` = ? AND `post`.`author-id` != ? AND NOT `post`.`vid` IN (?, ?, ?) GROUP BY `post`.`author-id`", + $contact_id, DateTimeFormat::utc('now - ' . $days . ' day'), $uid, $contact_id, $follow, $view, $read); + while ($interaction = DBA::fetch($interactions)) { + $score = min((int)(($interaction['activity'] / $total['activity']) * 65535), 65535); + DBA::update('contact-relation', ['score' => $score], ['cid' => $contact_id, 'relation-cid' => $interaction['author-id']]); + } + DBA::close($interactions); + + $total = DBA::fetchFirst("SELECT count(*) AS `activity` FROM `post-user` INNER JOIN `post` ON `post`.`uri-id` = `post-user`.`parent-uri-id` WHERE `post-user`.`author-id` = ? AND `post-user`.`received` >= ? AND `post-user`.`uid` = ? AND `post`.`author-id` != ? AND NOT `post`.`vid` IN (?, ?, ?)", + $contact_id, DateTimeFormat::utc('now - ' . $days . ' day'), $uid, $contact_id, $follow, $view, $read); + + Logger::debug('Calculate thread-score', ['uid' => $uid, 'total' => $total['activity']]); + + $interactions = DBA::p("SELECT `post`.`author-id`, count(*) AS `activity` FROM `post-user` INNER JOIN `post` ON `post`.`uri-id` = `post-user`.`parent-uri-id` WHERE `post-user`.`author-id` = ? AND `post-user`.`received` >= ? AND `post-user`.`uid` = ? AND `post`.`author-id` != ? AND NOT `post`.`vid` IN (?, ?, ?) GROUP BY `post`.`author-id`", + $contact_id, DateTimeFormat::utc('now - ' . $days . ' day'), $uid, $contact_id, $follow, $view, $read); + while ($interaction = DBA::fetch($interactions)) { + $score = min((int)(($interaction['activity'] / $total['activity']) * 65535), 65535); + DBA::update('contact-relation', ['thread-score' => $score], ['cid' => $contact_id, 'relation-cid' => $interaction['author-id']]); + } + DBA::close($interactions); + + $total = DBA::fetchFirst("SELECT count(*) AS `activity` FROM `post-user` INNER JOIN `post` ON `post-user`.`uri-id` = `post`.`thr-parent-id` WHERE `post-user`.`author-id` = ? AND `post-user`.`received` >= ? AND `post-user`.`uid` = ? AND `post`.`author-id` != ? AND NOT `post`.`vid` IN (?, ?, ?)", + $contact_id, DateTimeFormat::utc('now - ' . $days . ' day'), $uid, $contact_id, $follow, $view, $read); + + Logger::debug('Calculate relation-score', ['uid' => $uid, 'total' => $total['activity']]); + + $interactions = DBA::p("SELECT `post`.`author-id`, count(*) AS `activity` FROM `post-user` INNER JOIN `post` ON `post-user`.`uri-id` = `post`.`thr-parent-id` WHERE `post-user`.`author-id` = ? AND `post-user`.`received` >= ? AND `post-user`.`uid` = ? AND `post`.`author-id` != ? AND NOT `post`.`vid` IN (?, ?, ?) GROUP BY `post`.`author-id`", + $contact_id, DateTimeFormat::utc('now - ' . $days . ' day'), $uid, $contact_id, $follow, $view, $read); + while ($interaction = DBA::fetch($interactions)) { + $score = min((int)(($interaction['activity'] / $total['activity']) * 65535), 65535); + DBA::update('contact-relation', ['relation-score' => $score], ['cid' => $contact_id, 'relation-cid' => $interaction['author-id']]); + } + DBA::close($interactions); + + $total = DBA::fetchFirst("SELECT count(*) AS `activity` FROM `post-user` INNER JOIN `post` ON `post-user`.`uri-id` = `post`.`parent-uri-id` WHERE `post-user`.`author-id` = ? AND `post-user`.`received` >= ? AND `post-user`.`uid` = ? AND `post`.`author-id` != ? AND NOT `post`.`vid` IN (?, ?, ?)", + $contact_id, DateTimeFormat::utc('now - ' . $days . ' day'), $uid, $contact_id, $follow, $view, $read); + + Logger::debug('Calculate relation-thread-score', ['uid' => $uid, 'total' => $total['activity']]); + + $interactions = DBA::p("SELECT `post`.`author-id`, count(*) AS `activity` FROM `post-user` INNER JOIN `post` ON `post-user`.`uri-id` = `post`.`parent-uri-id` WHERE `post-user`.`author-id` = ? AND `post-user`.`received` >= ? AND `post-user`.`uid` = ? AND `post`.`author-id` != ? AND NOT `post`.`vid` IN (?, ?, ?) GROUP BY `post`.`author-id`", + $contact_id, DateTimeFormat::utc('now - ' . $days . ' day'), $uid, $contact_id, $follow, $view, $read); + while ($interaction = DBA::fetch($interactions)) { + $score = min((int)(($interaction['activity'] / $total['activity']) * 65535), 65535); + DBA::update('contact-relation', ['relation-thread-score' => $score], ['cid' => $contact_id, 'relation-cid' => $interaction['author-id']]); + } + DBA::close($interactions); + Logger::debug('Calculation - end', ['uid' => $uid]); + } } diff --git a/src/Worker/Cron.php b/src/Worker/Cron.php index 18ede945c..607de5c0d 100644 --- a/src/Worker/Cron.php +++ b/src/Worker/Cron.php @@ -104,6 +104,9 @@ class Cron // Clear cache entries Worker::add(Worker::PRIORITY_LOW, 'ClearCache'); + // Update interaction scores + Worker::add(Worker::PRIORITY_LOW, 'UpdateScores'); + DI::keyValue()->set('last_cron_hourly', time()); } diff --git a/src/Worker/UpdateAllSuggestions.php b/src/Worker/UpdateAllSuggestions.php index 285cfe5bb..329fe25cf 100644 --- a/src/Worker/UpdateAllSuggestions.php +++ b/src/Worker/UpdateAllSuggestions.php @@ -32,7 +32,7 @@ class UpdateAllSuggestions { public static function execute() { - $users = DBA::select('user', ['uid'], ["`last-activity` > ?", DateTimeFormat::utc('now - 3 days', 'Y-m-d')]); + $users = DBA::select('user', ['uid'], ["`last-activity` > ? AND `uid` > ?", DateTimeFormat::utc('now - 3 days', 'Y-m-d'), 0]); while ($user = DBA::fetch($users)) { Contact\Relation::updateCachedSuggestions($user['uid']); } diff --git a/src/Worker/UpdateScores.php b/src/Worker/UpdateScores.php new file mode 100644 index 000000000..66f776ad7 --- /dev/null +++ b/src/Worker/UpdateScores.php @@ -0,0 +1,46 @@ +. + * + */ + +namespace Friendica\Worker; + +use Friendica\Core\Logger; +use Friendica\Database\DBA; +use Friendica\Model\Contact\Relation; + +/** + * Update the interaction scores + */ +class UpdateScores +{ + public static function execute($param = '', $hook_function = '') + { + Logger::notice('Start score update'); + + $users = DBA::select('user', ['uid'], ["NOT `account_expired` AND NOT `account_removed` AND `uid` > ?", 0]); + while ($user = DBA::fetch($users)) { + Relation::calculateInteractionScore($user['uid']); + } + DBA::close($users); + + Logger::notice('Score update done'); + return; + } +} diff --git a/static/dbstructure.config.php b/static/dbstructure.config.php index 71c7d5cb1..7520a1109 100644 --- a/static/dbstructure.config.php +++ b/static/dbstructure.config.php @@ -56,7 +56,7 @@ use Friendica\Database\DBA; // This file is required several times during the test in DbaDefinition which justifies this condition if (!defined('DB_UPDATE_VERSION')) { - define('DB_UPDATE_VERSION', 1529); + define('DB_UPDATE_VERSION', 1530); } return [ @@ -571,6 +571,10 @@ return [ "last-interaction" => ["type" => "datetime", "not null" => "1", "default" => DBA::NULL_DATETIME, "comment" => "Date of the last interaction"], "follow-updated" => ["type" => "datetime", "not null" => "1", "default" => DBA::NULL_DATETIME, "comment" => "Date of the last update of the contact relationship"], "follows" => ["type" => "boolean", "not null" => "1", "default" => "0", "comment" => ""], + "score" => ["type" => "smallint unsigned", "comment" => "score for interactions of cid on relation-cid"], + "relation-score" => ["type" => "smallint unsigned", "comment" => "score for interactions of relation-cid on cid"], + "thread-score" => ["type" => "smallint unsigned", "comment" => "score for interactions of cid on threads of relation-cid"], + "relation-thread-score" => ["type" => "smallint unsigned", "comment" => "score for interactions of relation-cid on threads of cid"], ], "indexes" => [ "PRIMARY" => ["cid", "relation-cid"], diff --git a/static/defaults.config.php b/static/defaults.config.php index 33ed8211b..b13f3fd24 100644 --- a/static/defaults.config.php +++ b/static/defaults.config.php @@ -349,6 +349,10 @@ return [ // This has to be quite large to deal with embedded private photos. False to use the system value. 'ini_pcre_backtrack_limit' => 500000, + // interaction_score_days (Integer) + // Number of days that are used to calculate the interaction score. + 'interaction_score_days' => 30, + // invitation_only (Boolean) // If set true registration is only possible after a current member of the node has sent an invitation. 'invitation_only' => false,