forked from leftypol/leftypol
Merge pull request 'Refactor the search subsystem' (#127) from rework-search into config
Reviewed-on: leftypol/leftypol#127
This commit is contained in:
commit
6e9d0a4e77
13 changed files with 1093 additions and 173 deletions
|
|
@ -1,5 +1,7 @@
|
||||||
[www]
|
[www]
|
||||||
access.log = /proc/self/fd/2
|
access.log = /proc/self/fd/2
|
||||||
|
php_admin_value[error_log] = /proc/self/fd/2
|
||||||
|
php_admin_flag[log_errors] = on
|
||||||
|
|
||||||
; Ensure worker stdout and stderr are sent to the main error log.
|
; Ensure worker stdout and stderr are sent to the main error log.
|
||||||
catch_workers_output = yes
|
catch_workers_output = yes
|
||||||
|
|
|
||||||
13
inc/Data/FiltersParseResult.php
Normal file
13
inc/Data/FiltersParseResult.php
Normal file
|
|
@ -0,0 +1,13 @@
|
||||||
|
<?php
|
||||||
|
namespace Vichan\Data;
|
||||||
|
|
||||||
|
|
||||||
|
class FiltersParseResult {
|
||||||
|
public array $body = [];
|
||||||
|
public ?string $subject = null;
|
||||||
|
public ?string $name = null;
|
||||||
|
public ?string $board = null;
|
||||||
|
public ?string $flag = null;
|
||||||
|
public ?int $id = null;
|
||||||
|
public ?int $thread = null;
|
||||||
|
}
|
||||||
285
inc/Data/Flags.php
Normal file
285
inc/Data/Flags.php
Normal file
|
|
@ -0,0 +1,285 @@
|
||||||
|
<?php
|
||||||
|
namespace Vichan\Data;
|
||||||
|
|
||||||
|
|
||||||
|
class Flags {
|
||||||
|
/**
|
||||||
|
* Short names of the flags embedded with vichan.
|
||||||
|
*/
|
||||||
|
public const EMBEDDED_FLAGS = [
|
||||||
|
'a1',
|
||||||
|
'a2',
|
||||||
|
'ac',
|
||||||
|
'ad',
|
||||||
|
'ae',
|
||||||
|
'af',
|
||||||
|
'ag',
|
||||||
|
'ai',
|
||||||
|
'al',
|
||||||
|
'am',
|
||||||
|
'an',
|
||||||
|
'ao',
|
||||||
|
'ap',
|
||||||
|
'aq',
|
||||||
|
'ar',
|
||||||
|
'as',
|
||||||
|
'at',
|
||||||
|
'au',
|
||||||
|
'aw',
|
||||||
|
'ax',
|
||||||
|
'az',
|
||||||
|
'ba',
|
||||||
|
'bb',
|
||||||
|
'bd',
|
||||||
|
'be',
|
||||||
|
'bf',
|
||||||
|
'bg',
|
||||||
|
'bh',
|
||||||
|
'bi',
|
||||||
|
'bj',
|
||||||
|
'bl',
|
||||||
|
'bm',
|
||||||
|
'bn',
|
||||||
|
'bo',
|
||||||
|
'bq',
|
||||||
|
'br',
|
||||||
|
'bs',
|
||||||
|
'bt',
|
||||||
|
'bu',
|
||||||
|
'bv',
|
||||||
|
'bw',
|
||||||
|
'by',
|
||||||
|
'bz',
|
||||||
|
'ca',
|
||||||
|
'cat',
|
||||||
|
'cc',
|
||||||
|
'cd',
|
||||||
|
'cf',
|
||||||
|
'cg',
|
||||||
|
'ch',
|
||||||
|
'ci',
|
||||||
|
'ck',
|
||||||
|
'cl',
|
||||||
|
'cm',
|
||||||
|
'cn',
|
||||||
|
'co',
|
||||||
|
'cp',
|
||||||
|
'cr',
|
||||||
|
'cs',
|
||||||
|
'cu',
|
||||||
|
'cv',
|
||||||
|
'cw',
|
||||||
|
'cx',
|
||||||
|
'cy',
|
||||||
|
'cz',
|
||||||
|
'de',
|
||||||
|
'dg',
|
||||||
|
'dj',
|
||||||
|
'dk',
|
||||||
|
'dm',
|
||||||
|
'do',
|
||||||
|
'dz',
|
||||||
|
'ea',
|
||||||
|
'ec',
|
||||||
|
'ee',
|
||||||
|
'eg',
|
||||||
|
'eh',
|
||||||
|
'er',
|
||||||
|
'es',
|
||||||
|
'et',
|
||||||
|
'eu',
|
||||||
|
'fi',
|
||||||
|
'fj',
|
||||||
|
'fk',
|
||||||
|
'fm',
|
||||||
|
'fo',
|
||||||
|
'fr',
|
||||||
|
'fx',
|
||||||
|
'ga',
|
||||||
|
'gb',
|
||||||
|
'gd',
|
||||||
|
'ge',
|
||||||
|
'gf',
|
||||||
|
'gg',
|
||||||
|
'gh',
|
||||||
|
'gi',
|
||||||
|
'gl',
|
||||||
|
'gm',
|
||||||
|
'gn',
|
||||||
|
'gp',
|
||||||
|
'gq',
|
||||||
|
'gr',
|
||||||
|
'gs',
|
||||||
|
'gt',
|
||||||
|
'gu',
|
||||||
|
'gw',
|
||||||
|
'gy',
|
||||||
|
'hk',
|
||||||
|
'hm',
|
||||||
|
'hn',
|
||||||
|
'hr',
|
||||||
|
'ht',
|
||||||
|
'hu',
|
||||||
|
'ic',
|
||||||
|
'id',
|
||||||
|
'ie',
|
||||||
|
'il',
|
||||||
|
'im',
|
||||||
|
'in',
|
||||||
|
'io',
|
||||||
|
'iq',
|
||||||
|
'ir',
|
||||||
|
'is',
|
||||||
|
'it',
|
||||||
|
'je',
|
||||||
|
'jm',
|
||||||
|
'jo',
|
||||||
|
'jp',
|
||||||
|
'ke',
|
||||||
|
'kg',
|
||||||
|
'kh',
|
||||||
|
'ki',
|
||||||
|
'km',
|
||||||
|
'kn',
|
||||||
|
'kp',
|
||||||
|
'kr',
|
||||||
|
'kw',
|
||||||
|
'ky',
|
||||||
|
'kz',
|
||||||
|
'la',
|
||||||
|
'lb',
|
||||||
|
'lc',
|
||||||
|
'li',
|
||||||
|
'lk',
|
||||||
|
'lr',
|
||||||
|
'ls',
|
||||||
|
'lt',
|
||||||
|
'lu',
|
||||||
|
'lv',
|
||||||
|
'ly',
|
||||||
|
'ma',
|
||||||
|
'mc',
|
||||||
|
'md',
|
||||||
|
'me',
|
||||||
|
'mf',
|
||||||
|
'mg',
|
||||||
|
'mh',
|
||||||
|
'mk',
|
||||||
|
'ml',
|
||||||
|
'mm',
|
||||||
|
'mn',
|
||||||
|
'mo',
|
||||||
|
'mp',
|
||||||
|
'mq',
|
||||||
|
'mr',
|
||||||
|
'ms',
|
||||||
|
'mt',
|
||||||
|
'mu',
|
||||||
|
'mv',
|
||||||
|
'mw',
|
||||||
|
'mx',
|
||||||
|
'my',
|
||||||
|
'mz',
|
||||||
|
'na',
|
||||||
|
'nc',
|
||||||
|
'ne',
|
||||||
|
'nf',
|
||||||
|
'ng',
|
||||||
|
'ni',
|
||||||
|
'nl',
|
||||||
|
'no',
|
||||||
|
'np',
|
||||||
|
'nr',
|
||||||
|
'nt',
|
||||||
|
'nu',
|
||||||
|
'nz',
|
||||||
|
'o1',
|
||||||
|
'om',
|
||||||
|
'pa',
|
||||||
|
'pe',
|
||||||
|
'pf',
|
||||||
|
'pg',
|
||||||
|
'ph',
|
||||||
|
'pk',
|
||||||
|
'pl',
|
||||||
|
'pm',
|
||||||
|
'pn',
|
||||||
|
'pr',
|
||||||
|
'ps',
|
||||||
|
'pt',
|
||||||
|
'pw',
|
||||||
|
'py',
|
||||||
|
'qa',
|
||||||
|
're',
|
||||||
|
'ro',
|
||||||
|
'rs',
|
||||||
|
'ru',
|
||||||
|
'rw',
|
||||||
|
'sa',
|
||||||
|
'sb',
|
||||||
|
'sc',
|
||||||
|
'sd',
|
||||||
|
'se',
|
||||||
|
'sf',
|
||||||
|
'sg',
|
||||||
|
'sh',
|
||||||
|
'si',
|
||||||
|
'sj',
|
||||||
|
'sk',
|
||||||
|
'sl',
|
||||||
|
'sm',
|
||||||
|
'sn',
|
||||||
|
'so',
|
||||||
|
'sr',
|
||||||
|
'ss',
|
||||||
|
'st',
|
||||||
|
'su',
|
||||||
|
'sv',
|
||||||
|
'sx',
|
||||||
|
'sy',
|
||||||
|
'sz',
|
||||||
|
'ta',
|
||||||
|
'tc',
|
||||||
|
'td',
|
||||||
|
'tf',
|
||||||
|
'tg',
|
||||||
|
'th',
|
||||||
|
'ti',
|
||||||
|
'tj',
|
||||||
|
'tk',
|
||||||
|
'tl',
|
||||||
|
'tm',
|
||||||
|
'tn',
|
||||||
|
'to',
|
||||||
|
'tp',
|
||||||
|
'tr',
|
||||||
|
'tt',
|
||||||
|
'tv',
|
||||||
|
'tw',
|
||||||
|
'tz',
|
||||||
|
'ua',
|
||||||
|
'ug',
|
||||||
|
'uk',
|
||||||
|
'um',
|
||||||
|
'us',
|
||||||
|
'uy',
|
||||||
|
'uz',
|
||||||
|
'va',
|
||||||
|
'vc',
|
||||||
|
've',
|
||||||
|
'vg',
|
||||||
|
'vi',
|
||||||
|
'vn',
|
||||||
|
'vu',
|
||||||
|
'wf',
|
||||||
|
'ws',
|
||||||
|
'xx',
|
||||||
|
'ye',
|
||||||
|
'yt',
|
||||||
|
'yu',
|
||||||
|
'za',
|
||||||
|
'zm',
|
||||||
|
'zr',
|
||||||
|
'zw',
|
||||||
|
];
|
||||||
|
}
|
||||||
32
inc/Data/SearchFilters.php
Normal file
32
inc/Data/SearchFilters.php
Normal file
|
|
@ -0,0 +1,32 @@
|
||||||
|
<?php
|
||||||
|
namespace Vichan\Data;
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* POD with the fragments of each filter.
|
||||||
|
*/
|
||||||
|
class SearchFilters {
|
||||||
|
/**
|
||||||
|
* @var array<array<string>>
|
||||||
|
*/
|
||||||
|
public array $body = [];
|
||||||
|
/**
|
||||||
|
* @var array<string>
|
||||||
|
*/
|
||||||
|
public array $subject = [];
|
||||||
|
/**
|
||||||
|
* @var array<string>
|
||||||
|
*/
|
||||||
|
public array $name = [];
|
||||||
|
/**
|
||||||
|
* @var ?string
|
||||||
|
*/
|
||||||
|
public ?string $board = null;
|
||||||
|
/**
|
||||||
|
* @var array<string>
|
||||||
|
*/
|
||||||
|
public array $flag = [];
|
||||||
|
public ?int $id = null;
|
||||||
|
public ?int $thread = null;
|
||||||
|
public float $weight = 0;
|
||||||
|
}
|
||||||
98
inc/Data/SearchQueries.php
Normal file
98
inc/Data/SearchQueries.php
Normal file
|
|
@ -0,0 +1,98 @@
|
||||||
|
<?php
|
||||||
|
namespace Vichan\Data;
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Implements flood control for search queries.
|
||||||
|
*/
|
||||||
|
class SearchQueries {
|
||||||
|
private \PDO $pdo;
|
||||||
|
private int $queries_for_single;
|
||||||
|
private int $range_for_single;
|
||||||
|
private int $queries_for_all;
|
||||||
|
private int $range_for_all;
|
||||||
|
private bool $auto_gc;
|
||||||
|
|
||||||
|
|
||||||
|
private function checkFloodImpl(string $ip, string $phrase): bool {
|
||||||
|
$now = \time();
|
||||||
|
|
||||||
|
$query = $this->pdo->prepare("SELECT COUNT(2) FROM `search_queries` WHERE `ip` = :ip AND `time` > :time");
|
||||||
|
$query->bindValue(':ip', $ip);
|
||||||
|
$query->bindValue(':time', $now - $this->range_for_single, \PDO::PARAM_INT);
|
||||||
|
$query->execute();
|
||||||
|
if ($query->fetchColumn() > $this->queries_for_single) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
$query = $this->pdo->prepare("SELECT COUNT(2) FROM `search_queries` WHERE `time` > :time");
|
||||||
|
$query->bindValue(':time', $now - $this->range_for_all, \PDO::PARAM_INT);
|
||||||
|
$query->execute();
|
||||||
|
if ($query->fetchColumn() > $this->queries_for_all) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
$query = $this->pdo->prepare("INSERT INTO `search_queries` VALUES (:ip, :time, :query)");
|
||||||
|
$query->bindValue(':ip', $ip);
|
||||||
|
$query->bindValue(':time', $now, \PDO::PARAM_INT);
|
||||||
|
$query->bindValue(':query', $phrase);
|
||||||
|
$query->execute();
|
||||||
|
|
||||||
|
if ($this->auto_gc) {
|
||||||
|
$this->purgeExpired();
|
||||||
|
}
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @param \PDO $pdo PDO to access the DB.
|
||||||
|
* @param int $queries_for_single Maximum number of queries for a single IP, in seconds.
|
||||||
|
* @param int $range_for_single Maximum age of the oldest query to consider from a single IP.
|
||||||
|
* @param int $queries_for_all Maximum number of queries for all IPs.
|
||||||
|
* @param int $range_for_all Maximum age of the oldest query to consider from all IPs, in seconds.
|
||||||
|
* @param bool $auto_gc If to run the cleanup at every check. Must be invoked from the outside otherwise.
|
||||||
|
*/
|
||||||
|
public function __construct(
|
||||||
|
\PDO $pdo,
|
||||||
|
int $queries_for_single,
|
||||||
|
int $range_for_single,
|
||||||
|
int $queries_for_all,
|
||||||
|
int $range_for_all,
|
||||||
|
bool $auto_gc
|
||||||
|
) {
|
||||||
|
$this->pdo = $pdo;
|
||||||
|
$this->queries_for_single = $queries_for_single;
|
||||||
|
$this->range_for_single = $range_for_single;
|
||||||
|
$this->queries_for_all = $queries_for_all;
|
||||||
|
$this->range_for_all = $range_for_all;
|
||||||
|
$this->auto_gc = $auto_gc;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Check if the IP-query pair overflows the limit.
|
||||||
|
*
|
||||||
|
* @param string $ip Source IP.
|
||||||
|
* @param string $phrase The search query.
|
||||||
|
* @return bool True if the request goes over the limit.
|
||||||
|
*/
|
||||||
|
public function checkFlood(string $ip, string $phrase): bool {
|
||||||
|
$this->pdo->beginTransaction();
|
||||||
|
try {
|
||||||
|
$ret = $this->checkFloodImpl($ip, $phrase);
|
||||||
|
$this->pdo->commit();
|
||||||
|
return $ret;
|
||||||
|
} catch (\Exception $e) {
|
||||||
|
$this->pdo->rollBack();
|
||||||
|
throw $e;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public function purgeExpired(): int {
|
||||||
|
// Cleanup search queries table.
|
||||||
|
$query = $this->pdo->prepare("DELETE FROM `search_queries` WHERE `time` <= :expiry_limit");
|
||||||
|
$query->bindValue(':expiry_limit', \time() - $this->range_for_all, \PDO::PARAM_INT);
|
||||||
|
$query->execute();
|
||||||
|
return $query->rowCount();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -13,6 +13,36 @@ class UserPostQueries {
|
||||||
|
|
||||||
private \PDO $pdo;
|
private \PDO $pdo;
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Escapes wildcards from LIKE operators using the default escape character.
|
||||||
|
*/
|
||||||
|
private static function escapeLike(string $str): string {
|
||||||
|
// Escape any existing escape characters.
|
||||||
|
$str = \str_replace('\\', '\\\\', $str);
|
||||||
|
// Escape wildcard characters.
|
||||||
|
$str = \str_replace('%', '\\%', $str);
|
||||||
|
$str = \str_replace('_', '\\_', $str);
|
||||||
|
return $str;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Joins the fragments of filter into a list of bindable parameters for the CONCAT sql function.
|
||||||
|
* Given prefix = cat and fragments_count = 3, we get [ "'%'", ":cat0%", "'%', ":cat1", "'%'" ":cat2%", "'%'" ];
|
||||||
|
*
|
||||||
|
* @param string $prefix The prefix for the parameter binding
|
||||||
|
* @param int $fragments_count MUST BE >= 1.
|
||||||
|
* @return array
|
||||||
|
*/
|
||||||
|
private static function arrayOfFragments(string $prefix, int $fragments_count): array {
|
||||||
|
$args = [ "'%'" ];
|
||||||
|
for ($i = 0; $i < $fragments_count; $i++) {
|
||||||
|
$args[] = ":$prefix$i";
|
||||||
|
$args[] = "'%'";
|
||||||
|
}
|
||||||
|
return $args;
|
||||||
|
}
|
||||||
|
|
||||||
public function __construct(\PDO $pdo) {
|
public function __construct(\PDO $pdo) {
|
||||||
$this->pdo = $pdo;
|
$this->pdo = $pdo;
|
||||||
}
|
}
|
||||||
|
|
@ -156,4 +186,89 @@ class UserPostQueries {
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Search among the user posts with the given filters.
|
||||||
|
* The subject, name and elements of the bodies filters are fragments which are joined together with wildcards, to
|
||||||
|
* allow for more flexible filtering.
|
||||||
|
*
|
||||||
|
* @param string $board The board where to search in.
|
||||||
|
* @param array<string> $subject Fragments of the subject filter.
|
||||||
|
* @param array<string> $name Fragments of the name filter.
|
||||||
|
* @param array<string> $flags An array of the flag names to search among the HTML.
|
||||||
|
* @param ?int $id Post id filter.
|
||||||
|
* @param ?int $thread Thread id filter.
|
||||||
|
* @param array<array<string>> $bodies An array whose element are arrays containing the fragments of multiple body filters, each
|
||||||
|
* searched independently from the others
|
||||||
|
* @param integer $limit The maximum number of results.
|
||||||
|
* @throws PDOException On error.
|
||||||
|
* @return array<array>
|
||||||
|
*/
|
||||||
|
public function searchPosts(string $board, array $subject, array $name, array $flags, ?int $id, ?int $thread, array $bodies, int $limit): array {
|
||||||
|
$where_acc = [];
|
||||||
|
|
||||||
|
if (!empty($subject)) {
|
||||||
|
$like_arg = self::arrayOfFragments('subj', \count($subject));
|
||||||
|
$where_acc[] = 'subject LIKE CONCAT(' . \implode(', ', $like_arg) . ')';
|
||||||
|
}
|
||||||
|
if (!empty($name)) {
|
||||||
|
$like_arg = self::arrayOfFragments('name', \count($name));
|
||||||
|
$where_acc[] = 'name LIKE CONCAT(' . \implode(', ', $like_arg) . ')';
|
||||||
|
}
|
||||||
|
if (!empty($flags)) {
|
||||||
|
$flag_acc = [];
|
||||||
|
for ($i = 0; $i < \count($flags); $i++) {
|
||||||
|
// Yes, vichan stores the flag inside the generated HTML. Now you know why it's slow as shit.
|
||||||
|
// English lacks the words to express my feelings about it in a satisfying manner.
|
||||||
|
$flag_acc[] = "CONCAT('%<tinyboard>', :flag$i, '</tinyboard>%')";
|
||||||
|
}
|
||||||
|
$where_acc[] = 'body_nomarkup LIKE (' . \implode(' OR ', $flag_acc) . ')';
|
||||||
|
}
|
||||||
|
if ($id !== null) {
|
||||||
|
$where_acc[] = 'id = :id';
|
||||||
|
}
|
||||||
|
if ($thread !== null) {
|
||||||
|
$where_acc[] = 'thread = :thread';
|
||||||
|
}
|
||||||
|
for ($i = 0; $i < \count($bodies); $i++) {
|
||||||
|
$body = $bodies[$i];
|
||||||
|
$like_arg = self::arrayOfFragments("body_{$i}_", \count($body));
|
||||||
|
$where_acc[] = 'body_nomarkup LIKE CONCAT(' . \implode(', ', $like_arg) . ')';
|
||||||
|
}
|
||||||
|
|
||||||
|
if (empty($where_acc)) {
|
||||||
|
return [];
|
||||||
|
}
|
||||||
|
|
||||||
|
$sql = "SELECT * FROM `posts_$board` WHERE " . \implode(' AND ', $where_acc) . ' LIMIT :limit';
|
||||||
|
$query = $this->pdo->prepare($sql);
|
||||||
|
|
||||||
|
for ($i = 0; $i < \count($subject); $i++) {
|
||||||
|
$query->bindValue(":subj$i", self::escapeLike($subject[$i]));
|
||||||
|
}
|
||||||
|
for ($i = 0; $i < \count($name); $i++) {
|
||||||
|
$query->bindValue(":name$i", self::escapeLike($name[$i]));
|
||||||
|
}
|
||||||
|
for ($i = 0; $i < \count($flags); $i++) {
|
||||||
|
$query->bindValue(":flag$i", self::escapeLike($flags[$i]));
|
||||||
|
}
|
||||||
|
if ($id !== null) {
|
||||||
|
$query->bindValue(':id', $id, \PDO::PARAM_INT);
|
||||||
|
}
|
||||||
|
if ($thread !== null) {
|
||||||
|
$query->bindValue(':thread', $thread, \PDO::PARAM_INT);
|
||||||
|
}
|
||||||
|
for ($body_i = 0; $body_i < \count($bodies); $body_i++) {
|
||||||
|
$body = $bodies[$body_i];
|
||||||
|
|
||||||
|
for ($i = 0; $i < \count($body); $i++) {
|
||||||
|
$query->bindValue(":body_{$body_i}_{$i}", self::escapeLike($body[$i]));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
$query->bindValue(':limit', $limit, \PDO::PARAM_INT);
|
||||||
|
|
||||||
|
$query->execute();
|
||||||
|
return $query->fetchAll(\PDO::FETCH_ASSOC);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
417
inc/Service/SearchService.php
Normal file
417
inc/Service/SearchService.php
Normal file
|
|
@ -0,0 +1,417 @@
|
||||||
|
<?php
|
||||||
|
namespace Vichan\Service;
|
||||||
|
|
||||||
|
use Vichan\Data\Driver\LogDriver;
|
||||||
|
use Vichan\Data\{FiltersParseResult, UserPostQueries, SearchFilters, SearchQueries};
|
||||||
|
|
||||||
|
|
||||||
|
class SearchService {
|
||||||
|
private const COMMON_WORDS = [
|
||||||
|
'anon', 'thread', 'board', 'post', 'reply', 'image', 'topic', 'bump', 'sage', 'tripcode', 'groyper',
|
||||||
|
'mod', 'admin', 'ban', 'rules', 'sticky', 'archive', 'catalog', 'report', 'captcha', 'proxy', 'the',
|
||||||
|
'vpn', 'tor', 'doxx', 'spam', 'troll', 'bait', 'flame', 'greentext', 'copypasta', 'meme', 'this',
|
||||||
|
'shitpost', 'shitposting', 'edgy', 'kek', 'lulz', 'rekt', 'smug', 'lewd', 'nsfw', 'anonymous', 'glowie',
|
||||||
|
'cringe', 'normie', 'boomer', 'zoomer', 'incel', 'chad', 'stacy', 'simp', 'based', 'redpill', 'color',
|
||||||
|
'blackpill', 'whitepill', 'bluepill', 'clownworld', 'coomer', 'doomer', 'wojak', 'soyjak', 'pepe',
|
||||||
|
'style', 'weight', 'size', 'freedom', 'speech', 'censorship', 'moderation', 'community', 'anonymous',
|
||||||
|
'reply', 'search', 'group', 'merge', 'flatten', 'lock', 'unlock', 'hide', 'uyghur', 'soyshit', 'glow',
|
||||||
|
'also', 'only', 'just', 'even', 'very', 'than', 'then', 'that', 'this', 'with',
|
||||||
|
'from', 'into', 'onto', 'over', 'under', 'about', 'after', 'before', 'since', 'while',
|
||||||
|
'because', 'although', 'though', 'unless', 'until', 'where', 'which', 'whose', 'there', 'their',
|
||||||
|
'these', 'those', 'being', 'having', 'doing', 'going', 'would', 'could', 'should', 'shall', 'everything',
|
||||||
|
'might', 'must', 'will', 'have', 'been', 'were', 'wasn', 'aren', 'isn', 'does', 'isn’t', 'mustn’t',
|
||||||
|
'didn', 'hadn', 'hasn', 'don’t', 'can’t', 'won’t', 'cannot', 'haven', 'weren', 'didnt', 'since',
|
||||||
|
'mustn', 'mightn', 'shouldn', 'wouldn', 'might’ve', 'would’ve', 'should’ve', 'could’ve', 'must’ve',
|
||||||
|
'wasn’t', 'weren’t', 'hasn’t', 'hadn’t', 'won’t', 'wouldn’t', 'shouldn’t', 'couldn’t', 'mightn’t',
|
||||||
|
'each', 'such', 'some', 'most', 'many', 'more', 'much', 'less', 'few', 'none', 'although', 'because',
|
||||||
|
'both', 'either', 'neither', 'every', 'anyone', 'someone', 'everyone', 'nobody', 'nothing', 'so',
|
||||||
|
'above', 'below', 'along', 'across', 'among', 'until', 'and', 'but', 'or', 'nor', 'for', 'yet',
|
||||||
|
];
|
||||||
|
|
||||||
|
private const MAX_LENGTH_SUBJECT = 100; // posts.sql
|
||||||
|
private const MAX_LENGTH_NAME = 35; // posts.sql
|
||||||
|
|
||||||
|
private LogDriver $log;
|
||||||
|
private UserPostQueries $user_queries;
|
||||||
|
private SearchQueries $search_queries;
|
||||||
|
private ?array $flag_map;
|
||||||
|
private float $max_weight;
|
||||||
|
private int $max_query_length;
|
||||||
|
private int $post_limit;
|
||||||
|
private array $searchable_board_uris;
|
||||||
|
|
||||||
|
|
||||||
|
private static function truncateQuery(string $text, int $byteLimit): ?string {
|
||||||
|
if (\strlen($text) <= $byteLimit) {
|
||||||
|
return $text;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Cut at byte length, trimming incomplete multibyte character at the end.
|
||||||
|
$cut = \mb_convert_encoding(\substr($text, 0, $byteLimit), 'UTF-8', 'UTF-8');
|
||||||
|
|
||||||
|
// Try the last space.
|
||||||
|
$spacePos = \strrpos($cut, ' ');
|
||||||
|
if ($spacePos !== false) {
|
||||||
|
return \substr($cut, 0, $spacePos);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Fallback to the last word boundary.
|
||||||
|
if (\preg_match('/^(.+)\b/u', $cut, $m)) {
|
||||||
|
return $m[1];
|
||||||
|
}
|
||||||
|
|
||||||
|
// Too long but could not cut.
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
private static function trim(string $str): string {
|
||||||
|
return \trim($str, "* \n\r\t\v\0");
|
||||||
|
}
|
||||||
|
|
||||||
|
private static function unescape(string $str): string {
|
||||||
|
return \strtr($str, [
|
||||||
|
'\\\\' => '\\',
|
||||||
|
'\\*' => '*',
|
||||||
|
'\\"' => '"'
|
||||||
|
]);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Split the filter into fragments along the wildcards, handling escaping.
|
||||||
|
*
|
||||||
|
* @param string $str The full filter.
|
||||||
|
* @return array<string>
|
||||||
|
*/
|
||||||
|
private static function split(string $str): array {
|
||||||
|
// Split the fragments
|
||||||
|
return \preg_split('/(?:\\\\\\\\)*\\\\\*|(?:\\\\\\\\)*\*+/', $str);
|
||||||
|
}
|
||||||
|
|
||||||
|
private static function weightByContent(array $fragments): float {
|
||||||
|
$w = 0;
|
||||||
|
|
||||||
|
foreach ($fragments as $fragment) {
|
||||||
|
$short = \strlen($fragment) < 4;
|
||||||
|
if (\in_array($fragment, self::COMMON_WORDS)) {
|
||||||
|
$w += $short ? 16 : 6;
|
||||||
|
} elseif ($short) {
|
||||||
|
$w += 6;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return $w;
|
||||||
|
}
|
||||||
|
|
||||||
|
private static function filterAndWeight(string $filter): array {
|
||||||
|
$fragments = self::split($filter);
|
||||||
|
$acc = [];
|
||||||
|
$total_len = 0;
|
||||||
|
|
||||||
|
foreach ($fragments as $fragment) {
|
||||||
|
$fragment = self::trim(self::unescape($fragment));
|
||||||
|
|
||||||
|
if (!empty($fragment)) {
|
||||||
|
$total_len += \strlen($fragment);
|
||||||
|
$acc[] = $fragment;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Interword wildcards
|
||||||
|
$interword = \min(\count($fragments) - 1, 0);
|
||||||
|
// Wildcards over the total length of the word. Ergo the number of fragments minus 1.
|
||||||
|
$perc = $interword / $total_len * 100;
|
||||||
|
$wildcard_weight = $perc + \count($fragments) * 2;
|
||||||
|
|
||||||
|
return [ $acc, $total_len, $wildcard_weight ];
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Gets a subset of the given strings which match every filter.
|
||||||
|
*
|
||||||
|
* @param array<string> $fragments User provided fragments to search in the flags.
|
||||||
|
* @param array<string> $strings An array of strings.
|
||||||
|
* @return array<string> An array of strings, subset of $strings.
|
||||||
|
*/
|
||||||
|
private static function matchStrings(array $strings, array $fragments): array {
|
||||||
|
return \array_filter($strings, function ($str) use ($fragments) {
|
||||||
|
// Saves the last position. We use this to ensure the fragments are one after the other.
|
||||||
|
$last_ret = 0;
|
||||||
|
foreach ($fragments as $fragment) {
|
||||||
|
if ($last_ret + 1 > \strlen($fragment)) {
|
||||||
|
// Cannot possibly match.
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
$last_ret = \stripos($str, $fragment, $last_ret + 1);
|
||||||
|
if ($last_ret === false) {
|
||||||
|
// Exclude flags that don't much even a single fragment.
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Parses a raw search query.
|
||||||
|
*
|
||||||
|
* @param string $raw_query Raw user query. Phrases are searched in the post bodies. The user can specify also
|
||||||
|
* additional filters in the <key>:<value> format.
|
||||||
|
* Available filters:
|
||||||
|
* - board: the board, value can be quoted
|
||||||
|
* - subject: post subject, value can be quoted, supports wildcards
|
||||||
|
* - name: post name, value can be quoted, supports wildcards
|
||||||
|
* - flag: post flag, value can be quoted, supports wildcards
|
||||||
|
* - id: post id, must be numeric
|
||||||
|
* - thread: thread id, must be numeric
|
||||||
|
* The remaining text is split into chunks and searched in the post body.
|
||||||
|
* @return FiltersParseResult
|
||||||
|
*/
|
||||||
|
public function parse(string $raw_query): FiltersParseResult{
|
||||||
|
$tres = self::truncateQuery($raw_query, $this->max_query_length);
|
||||||
|
if ($tres === null) {
|
||||||
|
throw new \RuntimeException('Could not truncate query');
|
||||||
|
}
|
||||||
|
|
||||||
|
$pres = \preg_match_all(
|
||||||
|
'/(?:
|
||||||
|
\b(board):
|
||||||
|
(?:
|
||||||
|
"([^"]+)" # [2] board: "quoted"
|
||||||
|
|
|
||||||
|
([^\s"]+) # [3] board: unquoted
|
||||||
|
)
|
||||||
|
|
|
||||||
|
\b(subject|name|flag):
|
||||||
|
(?:
|
||||||
|
"((?:\\\\\\\\|\\\\\"|\\\\\*|[^"\\\\])*)" # [5] quoted with wildcards
|
||||||
|
|
|
||||||
|
((?:\\\\\\\\|\\\\\*|[^\s\\\\])++) # [6] unquoted with wildcards
|
||||||
|
)
|
||||||
|
|
|
||||||
|
\b(id|thread):
|
||||||
|
(\d+) # [8] numeric only
|
||||||
|
|
|
||||||
|
"((?:\\\\\\\\|\\\\\"|\\\\\*|[^"\\\\])*)" # [9] quoted free text
|
||||||
|
|
|
||||||
|
([^"\s]++) # [10] unquoted free text block
|
||||||
|
)/iux',
|
||||||
|
$tres,
|
||||||
|
$matches,
|
||||||
|
\PREG_SET_ORDER
|
||||||
|
);
|
||||||
|
if ($pres === false) {
|
||||||
|
throw new \RuntimeException('Could not decode the query');
|
||||||
|
}
|
||||||
|
|
||||||
|
$filters = new FiltersParseResult();
|
||||||
|
|
||||||
|
foreach ($matches as $m) {
|
||||||
|
if (!empty($m[1])) {
|
||||||
|
// board (no wildcards).
|
||||||
|
$value = \trim(!empty($m[2]) ? $m[2] : $m[3], '/');
|
||||||
|
|
||||||
|
$filters->board = $value;
|
||||||
|
} elseif (!empty($m[4])) {
|
||||||
|
// subject, name, flag (with wildcards).
|
||||||
|
$key = \strtolower($m[4]);
|
||||||
|
$value = !empty($m[5]) ? $m[5] : $m[6];
|
||||||
|
|
||||||
|
if ($key === 'name') {
|
||||||
|
$filters->name = $value;
|
||||||
|
} elseif ($key === 'subject') {
|
||||||
|
$filters->subject = $value;
|
||||||
|
} else {
|
||||||
|
$filters->flag = $value;
|
||||||
|
}
|
||||||
|
} elseif (!empty($m[7])) {
|
||||||
|
$key = \strtolower($m[7]);
|
||||||
|
$value = (int)$m[8];
|
||||||
|
|
||||||
|
if ($key === 'id') {
|
||||||
|
$filters->id = $value;
|
||||||
|
} else {
|
||||||
|
$filters->thread = $value;
|
||||||
|
}
|
||||||
|
} elseif (!empty($m[9]) || !empty($m[10])) {
|
||||||
|
$value = !empty($m[9]) ? $m[9] : $m[10];
|
||||||
|
|
||||||
|
$filters->body[] = $value;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return $filters;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @param LogDriver $log Log river.
|
||||||
|
* @param UserPostQueries $user_queries User posts queries.
|
||||||
|
* @param SearchQueries $search_queries Search queries for flood detection.
|
||||||
|
* @param ?array $flag_map The key-value map of user flags, or null to disable flag search.
|
||||||
|
* @param float $max_weight The maximum weight of the parsed user query. Body filters that go beyond this limit are discarded.
|
||||||
|
* @param int $max_query_length Maximum length of the raw input query before it's truncated.
|
||||||
|
* @param int $post_limit Maximum number of results.
|
||||||
|
* @param ?array $searchable_board_uris The uris of the board that can be searched. Null to search all the boards.
|
||||||
|
*/
|
||||||
|
public function __construct(
|
||||||
|
LogDriver $log,
|
||||||
|
UserPostQueries $user_queries,
|
||||||
|
SearchQueries $search_queries,
|
||||||
|
?array $flag_map,
|
||||||
|
float $max_weight,
|
||||||
|
int $max_query_length,
|
||||||
|
int $post_limit,
|
||||||
|
?array $searchable_board_uris
|
||||||
|
) {
|
||||||
|
$this->log = $log;
|
||||||
|
$this->user_queries = $user_queries;
|
||||||
|
$this->search_queries = $search_queries;
|
||||||
|
$this->flag_map = $flag_map;
|
||||||
|
$this->max_weight = $max_weight;
|
||||||
|
$this->max_query_length = $max_query_length;
|
||||||
|
$this->post_limit = $post_limit;
|
||||||
|
$this->searchable_board_uris = $searchable_board_uris ?? listBoards(true);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Reduces the user provided filters and assigns them a total weight.
|
||||||
|
*
|
||||||
|
* @param FiltersParseResult $filters The filters to sanitize, reduce and weight.
|
||||||
|
* @return SearchFilters
|
||||||
|
*/
|
||||||
|
public function reduceAndWeight(FiltersParseResult $filters): SearchFilters {
|
||||||
|
$weighted = new SearchFilters();
|
||||||
|
|
||||||
|
if ($filters->subject !== null) {
|
||||||
|
list($fragments, $total_len, $wildcard_weight) = self::filterAndWeight($filters->subject);
|
||||||
|
|
||||||
|
if ($total_len <= self::MAX_LENGTH_SUBJECT) {
|
||||||
|
$weighted->subject = $fragments;
|
||||||
|
$weighted->weight = $wildcard_weight;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if ($filters->name !== null) {
|
||||||
|
list($fragments, $total_len, $wildcard_weight) = self::filterAndWeight($filters->name);
|
||||||
|
|
||||||
|
if ($total_len <= self::MAX_LENGTH_NAME) {
|
||||||
|
$weighted->name = $fragments;
|
||||||
|
$weighted->weight += $wildcard_weight;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// No wildcard support, and obligatory anyway so it weights 0.
|
||||||
|
$weighted->board = $filters->board;
|
||||||
|
if ($filters->flag !== null) {
|
||||||
|
$weighted->flag = [];
|
||||||
|
|
||||||
|
if (!empty($this->flag_map)) {
|
||||||
|
$max_flag_length = \array_reduce($this->flag_map, fn($max, $str) => \max($max, \strlen($str)), 0);
|
||||||
|
|
||||||
|
list($fragments, $total_len, $wildcard_weight) = self::filterAndWeight($filters->flag);
|
||||||
|
|
||||||
|
// Add 2 to account for possible wildcards on the ends.
|
||||||
|
if ($total_len <= $max_flag_length + 2) {
|
||||||
|
$weighted->flag = $fragments;
|
||||||
|
$weighted->weight += $wildcard_weight;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
$weighted->id = $filters->id;
|
||||||
|
$weighted->thread = $filters->thread;
|
||||||
|
if (!empty($filters->body)) {
|
||||||
|
foreach ($filters->body as $keyword) {
|
||||||
|
list($fragments, $total_len, $wildcard_weight) = self::filterAndWeight($keyword);
|
||||||
|
$content_weight = self::weightByContent($fragments);
|
||||||
|
$str_weight = $content_weight + $wildcard_weight;
|
||||||
|
|
||||||
|
if ($str_weight + $weighted->weight <= $this->max_weight) {
|
||||||
|
$weighted->weight += $str_weight;
|
||||||
|
$weighted->body[] = $fragments;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return $weighted;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Run a search on user posts with the given filters.
|
||||||
|
*
|
||||||
|
* @param SearchFilters $filters An array of filters made by {@see self::parse()}.
|
||||||
|
* @param ?string $fallback_board Fallback board if there isn't a board filter.
|
||||||
|
* @return ?array Data array straight from the PDO, with all the fields in posts.sql, or null if the query was too broad.
|
||||||
|
*/
|
||||||
|
public function search(string $ip, string $raw_query, SearchFilters $filters, ?string $fallback_board): ?array {
|
||||||
|
$board = !empty($filters->board) ? $filters->board : $fallback_board;
|
||||||
|
if ($board === null) {
|
||||||
|
return [];
|
||||||
|
}
|
||||||
|
|
||||||
|
// Only board is specified.
|
||||||
|
if (empty($filters->subject) &&
|
||||||
|
empty($filters->name) &&
|
||||||
|
empty($filters->flag) &&
|
||||||
|
$filters->id === null &&
|
||||||
|
$filters->thread === null &&
|
||||||
|
empty($filters->body)
|
||||||
|
) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!\in_array($board, $this->searchable_board_uris)) {
|
||||||
|
return [];
|
||||||
|
}
|
||||||
|
|
||||||
|
$weight_perc = ($filters->weight / $this->max_weight) * 100;
|
||||||
|
if ($weight_perc > 85) {
|
||||||
|
/// Over 85 of the weight.
|
||||||
|
$this->log->log(LogDriver::NOTICE, "$ip search: weight $weight_perc ({$filters->weight}) query '$raw_query'");
|
||||||
|
} else {
|
||||||
|
$this->log->log(LogDriver::INFO, "$ip search: weight $weight_perc ({$filters->weight}) query '$raw_query'");
|
||||||
|
}
|
||||||
|
|
||||||
|
$flags = [];
|
||||||
|
if ($filters->flag !== null && !empty($this->flag_map)) {
|
||||||
|
$flags = $this->matchStrings($this->flag_map, $filters->flag);
|
||||||
|
if (empty($flags)) {
|
||||||
|
// The query doesn't match any flags so it will always fail anyway.
|
||||||
|
return [];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return $this->user_queries->searchPosts(
|
||||||
|
$board,
|
||||||
|
$filters->subject,
|
||||||
|
$filters->name,
|
||||||
|
$flags,
|
||||||
|
$filters->id,
|
||||||
|
$filters->thread,
|
||||||
|
$filters->body,
|
||||||
|
$this->post_limit
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Check if the IP-query pair passes the limit.
|
||||||
|
*
|
||||||
|
* @param string $ip Source IP.
|
||||||
|
* @param string $phrase The search query.
|
||||||
|
* @return bool True if the request goes over the limit.
|
||||||
|
*/
|
||||||
|
public function checkFlood(string $ip, string $raw_query) {
|
||||||
|
return $this->search_queries->checkFlood($ip, $raw_query);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns the uris of the boards that may be searched.
|
||||||
|
*/
|
||||||
|
public function getSearchableBoards(): array {
|
||||||
|
return $this->searchable_board_uris;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return bool True if the flag filter is enabled.
|
||||||
|
*/
|
||||||
|
public function isFlagFilterEnabled(): bool {
|
||||||
|
return !empty($this->flag_map);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -1856,7 +1856,15 @@
|
||||||
// Limit of search results
|
// Limit of search results
|
||||||
$config['search']['search_limit'] = 100;
|
$config['search']['search_limit'] = 100;
|
||||||
|
|
||||||
// Boards for searching
|
// Maximum weigth of the search query.
|
||||||
|
// Body search filters are discarded if they make the query heavier than this.
|
||||||
|
$config['search']['max_weight'] = 80;
|
||||||
|
|
||||||
|
// Maximum length of the user sent search query.
|
||||||
|
// Characters beyond the limit are truncated and ignored.
|
||||||
|
$config['search']['max_length'] = 768;
|
||||||
|
|
||||||
|
// Uncomment to limit the search feature to the given boards by uri.
|
||||||
//$config['search']['boards'] = array('a', 'b', 'c', 'd', 'e');
|
//$config['search']['boards'] = array('a', 'b', 'c', 'd', 'e');
|
||||||
|
|
||||||
// Enable public logs? 0: NO, 1: YES, 2: YES, but drop names
|
// Enable public logs? 0: NO, 1: YES, 2: YES, but drop names
|
||||||
|
|
|
||||||
|
|
@ -1,8 +1,9 @@
|
||||||
<?php
|
<?php
|
||||||
namespace Vichan;
|
namespace Vichan;
|
||||||
|
|
||||||
use Vichan\Data\{IpNoteQueries, ReportQueries, UserPostQueries};
|
use Vichan\Data\{IpNoteQueries, ReportQueries, SearchQueries, UserPostQueries, Flags};
|
||||||
use Vichan\Data\Driver\{CacheDriver, ErrorLogLogDriver, FileLogDriver, LogDriver, StderrLogDriver, SyslogLogDriver};
|
use Vichan\Data\Driver\{CacheDriver, ErrorLogLogDriver, FileLogDriver, LogDriver, StderrLogDriver, SyslogLogDriver};
|
||||||
|
use Vichan\Service\SearchService;
|
||||||
|
|
||||||
defined('TINYBOARD') or exit;
|
defined('TINYBOARD') or exit;
|
||||||
|
|
||||||
|
|
@ -69,6 +70,29 @@ function build_context(array $config): Context {
|
||||||
sql_open();
|
sql_open();
|
||||||
return $pdo;
|
return $pdo;
|
||||||
},
|
},
|
||||||
|
SearchService::class => function($c) {
|
||||||
|
$config = $c->get('config');
|
||||||
|
if ($config['user_flag']) {
|
||||||
|
$flags = $config['user_flags'];
|
||||||
|
} elseif ($config['country_flags']) {
|
||||||
|
$flags = Flags::EMBEDDED_FLAGS;
|
||||||
|
} else {
|
||||||
|
$flags = null;
|
||||||
|
}
|
||||||
|
|
||||||
|
$board_uris = $config['search']['boards'] ?? null;
|
||||||
|
|
||||||
|
return new SearchService(
|
||||||
|
$c->get(LogDriver::class),
|
||||||
|
$c->get(UserPostQueries::class),
|
||||||
|
$c->get(SearchQueries::class),
|
||||||
|
$flags,
|
||||||
|
$config['search']['max_weight'],
|
||||||
|
$config['search']['max_length'],
|
||||||
|
$config['search']['search_limit'],
|
||||||
|
$board_uris
|
||||||
|
);
|
||||||
|
},
|
||||||
ReportQueries::class => function($c) {
|
ReportQueries::class => function($c) {
|
||||||
$auto_maintenance = (bool)$c->get('config')['auto_maintenance'];
|
$auto_maintenance = (bool)$c->get('config')['auto_maintenance'];
|
||||||
$pdo = $c->get(\PDO::class);
|
$pdo = $c->get(\PDO::class);
|
||||||
|
|
@ -78,5 +102,19 @@ function build_context(array $config): Context {
|
||||||
return new UserPostQueries($c->get(\PDO::class));
|
return new UserPostQueries($c->get(\PDO::class));
|
||||||
},
|
},
|
||||||
IpNoteQueries::class => fn($c) => new IpNoteQueries($c->get(\PDO::class), $c->get(CacheDriver::class)),
|
IpNoteQueries::class => fn($c) => new IpNoteQueries($c->get(\PDO::class), $c->get(CacheDriver::class)),
|
||||||
|
SearchQueries::class => function($c) {
|
||||||
|
$config = $c->get('config');
|
||||||
|
list($queries_for_single, $range_for_single_min) = $config['search']['queries_per_minutes'];
|
||||||
|
list($queries_for_all, $range_for_all_min) = $config['search']['queries_per_minutes_all'];
|
||||||
|
|
||||||
|
return new SearchQueries(
|
||||||
|
$c->get(\PDO::class),
|
||||||
|
$queries_for_single,
|
||||||
|
$range_for_single_min * 60,
|
||||||
|
$queries_for_all,
|
||||||
|
$range_for_all_min * 60,
|
||||||
|
(bool)$config['auto_maintenance']
|
||||||
|
);
|
||||||
|
}
|
||||||
]);
|
]);
|
||||||
}
|
}
|
||||||
|
|
|
||||||
209
search.php
209
search.php
|
|
@ -1,178 +1,77 @@
|
||||||
<?php
|
<?php
|
||||||
|
|
||||||
|
use Vichan\Service\SearchService;
|
||||||
|
|
||||||
require 'inc/bootstrap.php';
|
require 'inc/bootstrap.php';
|
||||||
|
|
||||||
if (!$config['search']['enable']) {
|
if (!$config['search']['enable']) {
|
||||||
die(_("Post search is disabled"));
|
die(_("Post search is disabled"));
|
||||||
}
|
}
|
||||||
|
|
||||||
$queries_per_minutes = $config['search']['queries_per_minutes'];
|
$ctx = Vichan\build_context($config);
|
||||||
$queries_per_minutes_all = $config['search']['queries_per_minutes_all'];
|
$search_service = $ctx->get(SearchService::class);
|
||||||
$search_limit = $config['search']['search_limit'];
|
|
||||||
|
|
||||||
if (isset($config['search']['boards'])) {
|
if (isset($_GET['search']) && !empty($_GET['search'])) {
|
||||||
$boards = $config['search']['boards'];
|
$raw_search = $_GET['search'];
|
||||||
} else {
|
$ip = $_SERVER['REMOTE_ADDR'];
|
||||||
$boards = listBoards(TRUE);
|
$fallback_board = (isset($_GET['board']) && !empty($_GET['board'])) ? $_GET['board'] : null;
|
||||||
}
|
|
||||||
|
|
||||||
$body = Element('search_form.html', Array('boards' => $boards, 'board' => isset($_GET['board']) ? $_GET['board'] : false, 'search' => isset($_GET['search']) ? str_replace('"', '"', utf8tohtml($_GET['search'])) : false));
|
|
||||||
|
|
||||||
if (isset($_GET['search']) && !empty($_GET['search']) && isset($_GET['board']) && in_array($_GET['board'], $boards)) {
|
if ($search_service->checkFlood($ip, $raw_search)) {
|
||||||
$phrase = $_GET['search'];
|
|
||||||
$_body = '';
|
|
||||||
|
|
||||||
$query = prepare("SELECT COUNT(*) FROM ``search_queries`` WHERE `ip` = :ip AND `time` > :time");
|
|
||||||
$query->bindValue(':ip', $_SERVER['REMOTE_ADDR']);
|
|
||||||
$query->bindValue(':time', time() - ($queries_per_minutes[1] * 60));
|
|
||||||
$query->execute() or error(db_error($query));
|
|
||||||
if ($query->fetchColumn() > $queries_per_minutes[0])
|
|
||||||
error(_('Wait a while before searching again, please.'));
|
error(_('Wait a while before searching again, please.'));
|
||||||
|
|
||||||
$query = prepare("SELECT COUNT(*) FROM ``search_queries`` WHERE `time` > :time");
|
|
||||||
$query->bindValue(':time', time() - ($queries_per_minutes_all[1] * 60));
|
|
||||||
$query->execute() or error(db_error($query));
|
|
||||||
if ($query->fetchColumn() > $queries_per_minutes_all[0])
|
|
||||||
error(_('Wait a while before searching again, please.'));
|
|
||||||
|
|
||||||
|
|
||||||
$query = prepare("INSERT INTO ``search_queries`` VALUES (:ip, :time, :query)");
|
|
||||||
$query->bindValue(':ip', $_SERVER['REMOTE_ADDR']);
|
|
||||||
$query->bindValue(':time', time());
|
|
||||||
$query->bindValue(':query', $phrase);
|
|
||||||
$query->execute() or error(db_error($query));
|
|
||||||
|
|
||||||
_syslog(LOG_NOTICE, 'Searched /' . $_GET['board'] . '/ for "' . $phrase . '"');
|
|
||||||
|
|
||||||
// Cleanup search queries table
|
|
||||||
$query = prepare("DELETE FROM ``search_queries`` WHERE `time` <= :time");
|
|
||||||
$query->bindValue(':time', time() - ($queries_per_minutes_all[1] * 60));
|
|
||||||
$query->execute() or error(db_error($query));
|
|
||||||
|
|
||||||
openBoard($_GET['board']);
|
|
||||||
|
|
||||||
$filters = Array();
|
|
||||||
|
|
||||||
function search_filters($m) {
|
|
||||||
global $filters;
|
|
||||||
$name = $m[2];
|
|
||||||
$value = isset($m[4]) ? $m[4] : $m[3];
|
|
||||||
|
|
||||||
if (!in_array($name, array('id', 'thread', 'subject', 'name'))) {
|
|
||||||
// unknown filter
|
|
||||||
return $m[0];
|
|
||||||
}
|
|
||||||
|
|
||||||
$filters[$name] = $value;
|
|
||||||
|
|
||||||
return $m[1];
|
|
||||||
}
|
}
|
||||||
|
|
||||||
$phrase = trim(preg_replace_callback('/(^|\s)(\w+):("(.*)?"|[^\s]*)/', 'search_filters', $phrase));
|
// Actually do the search.
|
||||||
|
$parse_res = $search_service->parse($raw_search);
|
||||||
|
$filters = $search_service->reduceAndWeight($parse_res);
|
||||||
|
$search_res = $search_service->search($ip, $raw_search, $filters, $fallback_board);
|
||||||
|
|
||||||
if (!preg_match('/[^*^\s]/', $phrase) && empty($filters)) {
|
// Needed to set a global variable further down the stack, plus the template.
|
||||||
_syslog(LOG_WARNING, 'Query too broad.');
|
$actual_board = $filters->board ?? $fallback_board;
|
||||||
$body .= '<p class="unimportant" style="text-align:center">(Query too broad.)</p>';
|
|
||||||
echo Element('page.html', Array(
|
|
||||||
'config'=>$config,
|
|
||||||
'title'=>'Search',
|
|
||||||
'body'=>$body,
|
|
||||||
));
|
|
||||||
exit;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Escape escape character
|
$body = Element('search_form.html', [
|
||||||
$phrase = str_replace('!', '!!', $phrase);
|
'boards' => $search_service->getSearchableBoards(),
|
||||||
|
'board' => $actual_board,
|
||||||
|
'search' => \str_replace('"', '"', utf8tohtml($_GET['search'])),
|
||||||
|
'flags_enabled' => $search_service->isFlagFilterEnabled()
|
||||||
|
]);
|
||||||
|
|
||||||
// Remove SQL wildcard
|
if ($search_res === null) {
|
||||||
$phrase = str_replace('%', '!%', $phrase);
|
$body .= '<hr/><p style="text-align:center" class="unimportant">(' . _('Query too broad.') . ')</p>';
|
||||||
|
} elseif (empty($search_res)) {
|
||||||
// Use asterisk as wildcard to suit convention
|
$body .= '<hr/><p style="text-align:center" class="unimportant">(' . _('No results.') . ')</p>';
|
||||||
$phrase = str_replace('*', '%', $phrase);
|
|
||||||
|
|
||||||
// Remove `, it's used by table prefix magic
|
|
||||||
$phrase = str_replace('`', '!`', $phrase);
|
|
||||||
|
|
||||||
$like = '';
|
|
||||||
$match = Array();
|
|
||||||
|
|
||||||
// Find exact phrases
|
|
||||||
if (preg_match_all('/"(.+?)"/', $phrase, $m)) {
|
|
||||||
foreach($m[1] as &$quote) {
|
|
||||||
$phrase = str_replace("\"{$quote}\"", '', $phrase);
|
|
||||||
$match[] = $pdo->quote($quote);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
$words = explode(' ', $phrase);
|
|
||||||
foreach($words as &$word) {
|
|
||||||
if (empty($word)) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
$match[] = $pdo->quote($word);
|
|
||||||
}
|
|
||||||
|
|
||||||
$like = '';
|
|
||||||
foreach($match as &$phrase) {
|
|
||||||
if (!empty($like)) {
|
|
||||||
$like .= ' AND ';
|
|
||||||
}
|
|
||||||
$phrase = preg_replace('/^\'(.+)\'$/', '\'%$1%\'', $phrase);
|
|
||||||
$like .= '`body` LIKE ' . $phrase . ' ESCAPE \'!\'';
|
|
||||||
}
|
|
||||||
|
|
||||||
foreach($filters as $name => $value) {
|
|
||||||
if (!empty($like)) {
|
|
||||||
$like .= ' AND ';
|
|
||||||
}
|
|
||||||
$like .= '`' . $name . '` = '. $pdo->quote($value);
|
|
||||||
}
|
|
||||||
|
|
||||||
$like = str_replace('%', '%%', $like);
|
|
||||||
|
|
||||||
$query = prepare(sprintf("SELECT * FROM ``posts_%s`` WHERE " . $like . " ORDER BY `time` DESC LIMIT :limit", $board['uri']));
|
|
||||||
$query->bindValue(':limit', $search_limit, PDO::PARAM_INT);
|
|
||||||
$query->execute() or error(db_error($query));
|
|
||||||
|
|
||||||
if ($query->rowCount() == $search_limit) {
|
|
||||||
_syslog(LOG_WARNING, 'Query too broad.');
|
|
||||||
$body .= '<p class="unimportant" style="text-align:center">('._('Query too broad.').')</p>';
|
|
||||||
echo Element('page.html', Array(
|
|
||||||
'config'=>$config,
|
|
||||||
'title'=>'Search',
|
|
||||||
'body'=>$body,
|
|
||||||
));
|
|
||||||
exit;
|
|
||||||
}
|
|
||||||
|
|
||||||
$temp = '';
|
|
||||||
while ($post = $query->fetch()) {
|
|
||||||
if (!$post['thread']) {
|
|
||||||
$po = new Thread($post);
|
|
||||||
} else {
|
|
||||||
$po = new Post($post);
|
|
||||||
}
|
|
||||||
$temp .= $po->build(true) . '<hr/>';
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!empty($temp))
|
|
||||||
$_body .= '<fieldset><legend>' .
|
|
||||||
sprintf(ngettext('%d result in', '%d results in', $query->rowCount()),
|
|
||||||
$query->rowCount()) . ' <a href="/' .
|
|
||||||
sprintf($config['board_path'], $board['uri']) . $config['file_index'] .
|
|
||||||
'">' .
|
|
||||||
sprintf($config['board_abbreviation'], $board['uri']) . ' - ' . $board['title'] .
|
|
||||||
'</a></legend>' . $temp . '</fieldset>';
|
|
||||||
|
|
||||||
$body .= '<hr/>';
|
|
||||||
if (!empty($_body)) {
|
|
||||||
$body .= $_body;
|
|
||||||
} else {
|
} else {
|
||||||
$body .= '<p style="text-align:center" class="unimportant">('._('No results.').')</p>';
|
$body .= '<hr/>';
|
||||||
|
|
||||||
|
openBoard($actual_board);
|
||||||
|
|
||||||
|
$posts_html = '';
|
||||||
|
foreach ($search_res as $post) {
|
||||||
|
if (!$post['thread']) {
|
||||||
|
$po = new Thread($post);
|
||||||
|
} else {
|
||||||
|
$po = new Post($post);
|
||||||
|
}
|
||||||
|
$posts_html .= $po->build(true) . '<hr/>';
|
||||||
|
}
|
||||||
|
|
||||||
|
$body .= '<fieldset><legend>' .
|
||||||
|
sprintf(ngettext('%d result in', '%d results in', \count($search_res)), \count($search_res)) . ' <a href="/' .
|
||||||
|
sprintf($config['board_path'], $board['uri']) . $config['file_index'] . '">' .
|
||||||
|
sprintf($config['board_abbreviation'], $board['uri']) . ' - ' . $board['title'] .
|
||||||
|
'</a></legend>' . $posts_html . '</fieldset>';
|
||||||
}
|
}
|
||||||
|
} else {
|
||||||
|
$body = Element('search_form.html', [
|
||||||
|
'boards' => $search_service->getSearchableBoards(),
|
||||||
|
'board' => false,
|
||||||
|
'search' => false,
|
||||||
|
'flags_enabled' => $search_service->isFlagFilterEnabled()
|
||||||
|
]);
|
||||||
}
|
}
|
||||||
|
|
||||||
echo Element('page.html', Array(
|
echo Element('page.html', Array(
|
||||||
'config'=>$config,
|
'config'=>$config,
|
||||||
'title'=>_('Search'),
|
'title'=> _('Search'),
|
||||||
'body'=>'' . $body
|
'body'=> $body
|
||||||
));
|
));
|
||||||
|
|
|
||||||
|
|
@ -753,10 +753,6 @@ table.test td img {
|
||||||
margin: 0;
|
margin: 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
fieldset label {
|
|
||||||
display: block;
|
|
||||||
}
|
|
||||||
|
|
||||||
div.pages {
|
div.pages {
|
||||||
/*! color: #89A; */
|
/*! color: #89A; */
|
||||||
/*! background: #D6DAF0; */
|
/*! background: #D6DAF0; */
|
||||||
|
|
|
||||||
|
|
@ -1,24 +1,33 @@
|
||||||
<div class="ban">
|
<div class="ban">
|
||||||
|
<style>
|
||||||
|
form > p {
|
||||||
|
align-content: center;
|
||||||
|
}
|
||||||
|
</style>
|
||||||
<h2>{% trans %}Search{% endtrans %}</h2>
|
<h2>{% trans %}Search{% endtrans %}</h2>
|
||||||
<form style="display:inline" action="" method="get">
|
<form style="width:100%;display:flex;flex-wrap:wrap;justify-content:space-between;text-align:start" action="" method="get">
|
||||||
|
<p style="padding-right:0"><label for="search">{% trans %}Phrase:{% endtrans %}</label></p>
|
||||||
|
<p style="flex-grow:1"><input style="width:100%" id="search" name="search" type="text" size="40" value="{{ search }}"></p>
|
||||||
<p>
|
<p>
|
||||||
<label style="display:inline" for="search">{% trans %}Phrase:{% endtrans %}</label>
|
|
||||||
<input id="search" name="search" type="text" size="40" value="{{ search }}">
|
|
||||||
<select name="board">
|
<select name="board">
|
||||||
<option value="none">{% trans %}Select board{% endtrans %}…</option>
|
<option value="none">{% trans %}Select board{% endtrans %}…</option>
|
||||||
|
|
||||||
{% for b2 in boards %}
|
{% for b2 in boards %}
|
||||||
{% if b2 == b %}
|
{% if b2 == b %}
|
||||||
<option value="{{ b2 }}" selected>/{{ b2 }}/</option>
|
<option value="{{ b2 }}" selected>/{{ b2 }}/</option>
|
||||||
{% else %}
|
{% else %}
|
||||||
<option value="{{ b2 }}">/{{ b2 }}/</option>
|
<option value="{{ b2 }}">/{{ b2 }}/</option>
|
||||||
{% endif %}
|
{% endif %}
|
||||||
{% endfor %}
|
{% endfor %}
|
||||||
</select>
|
</select>
|
||||||
<input type="submit" value="{% trans %}Search{% endtrans %}" />
|
<input type="submit" value="{% trans %}Search{% endtrans %}" />
|
||||||
</p>
|
</p>
|
||||||
</form>
|
</form>
|
||||||
<p style="font-size:8pt;margin:5px">
|
<p style="font-size:8pt;margin:5px">
|
||||||
{% trans %}Search is case-insensitive and based on keywords. To match exact phrases, use "quotes". Use an asterisk (*) for wildcard.</p><p style="font-size:8pt;margin:5px">You may apply the following filters to your searches: <strong>id</strong>, <strong>thread</strong>, <strong>subject</strong>, and <strong>name</strong>. To apply a filter, simply add to your query, for example, <em>name:Anonymous</em> or <em>subject:"Some Thread"</em>. Wildcards cannot be used in filters.{% endtrans %}
|
{% if flags_enabled %}
|
||||||
|
{% trans %}Search is case-insensitive and based on keywords. To match exact phrases, use "quotes". Use an asterisk (*) for wildcard.</p><p style="font-size:8pt;margin:5px">You may apply the following filters to your searches: <strong>id</strong>, <strong>thread</strong>, <strong>subject</strong>, <strong>name</strong>, <strong>flag</strong> and <strong>board</strong> (as an alternative syntax). To apply a filter, simply add to your query, for example, <em>name:Anonymous</em> or <em>subject:"Some Thread"</em>. The <strong>id</strong>, <strong>thread</strong> and <strong>board</strong> filters do not support wildcards.{% endtrans %}
|
||||||
|
{% else %}
|
||||||
|
{% trans %}Search is case-insensitive and based on keywords. To match exact phrases, use "quotes". Use an asterisk (*) for wildcard.</p><p style="font-size:8pt;margin:5px">You may apply the following filters to your searches: <strong>id</strong>, <strong>thread</strong>, <strong>subject</strong>, <strong>name</strong> and <strong>board</strong> (as an alternative syntax). To apply a filter, simply add to your query, for example, <em>name:Anonymous</em> or <em>subject:"Some Thread"</em>. The <strong>id</strong>, <strong>thread</strong> and <strong>board</strong> filters do not support wildcards.{% endtrans %}
|
||||||
|
{% endif %}
|
||||||
</p>
|
</p>
|
||||||
</div>
|
</div>
|
||||||
|
|
|
||||||
|
|
@ -3,7 +3,7 @@
|
||||||
* Performs maintenance tasks. Invoke this periodically if the auto_maintenance configuration option is turned off.
|
* Performs maintenance tasks. Invoke this periodically if the auto_maintenance configuration option is turned off.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
use Vichan\Data\ReportQueries;
|
use Vichan\Data\{ReportQueries, SearchQueries};
|
||||||
|
|
||||||
require dirname(__FILE__) . '/inc/cli.php';
|
require dirname(__FILE__) . '/inc/cli.php';
|
||||||
|
|
||||||
|
|
@ -45,9 +45,17 @@ if ($config['cache']['enabled'] === 'fs') {
|
||||||
$fs_cache->collect();
|
$fs_cache->collect();
|
||||||
$delta = microtime(true) - $start;
|
$delta = microtime(true) - $start;
|
||||||
echo "Deleted $deleted_count expired filesystem cache items in $delta seconds!\n";
|
echo "Deleted $deleted_count expired filesystem cache items in $delta seconds!\n";
|
||||||
$time_tot = $delta;
|
$time_tot += $delta;
|
||||||
$deleted_tot = $deleted_count;
|
$deleted_tot = $deleted_count;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
echo "Clearing old search log...\n";
|
||||||
|
$search_queries = $ctx->get(SearchQueries::class);
|
||||||
|
$start = microtime(true);
|
||||||
|
$deleted_count = $search_queries->purgeExpired();
|
||||||
|
$delta = microtime(true) - $start;
|
||||||
|
$time_tot += $delta;
|
||||||
|
$deleted_tot = $deleted_count;
|
||||||
|
|
||||||
$time_tot = number_format((float)$time_tot, 4, '.', '');
|
$time_tot = number_format((float)$time_tot, 4, '.', '');
|
||||||
modLog("Deleted $deleted_tot expired entries in {$time_tot}s with maintenance tool");
|
modLog("Deleted $deleted_tot expired entries in {$time_tot}s with maintenance tool");
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue