Compare commits
19 commits
3caa94eeea
...
946f836cb5
Author | SHA1 | Date | |
---|---|---|---|
946f836cb5 | |||
e32dfcb51a | |||
92e24dac67 | |||
9b60540e6b | |||
e02fd2a3c4 | |||
4b5ce63f4b | |||
a0d218dbc2 | |||
3238319e26 | |||
ad62e81752 | |||
923d4ef2b9 | |||
47bd9fa127 | |||
6b01a4ad6f | |||
e6d0681d0f | |||
19efb78e92 | |||
ce7be3e0aa | |||
aa7aa4b205 | |||
b4d5f23e78 | |||
a99cc34f7e | |||
48c5f6a4e0 |
10 changed files with 666 additions and 279 deletions
13
inc/Data/FiltersParseResult.php
Normal file
13
inc/Data/FiltersParseResult.php
Normal file
|
@ -0,0 +1,13 @@
|
|||
<?php
|
||||
namespace Vichan\Data;
|
||||
|
||||
|
||||
class FiltersParseResult {
|
||||
public array $body = [];
|
||||
public ?string $subject = null;
|
||||
public ?string $name = null;
|
||||
public ?string $board = null;
|
||||
public ?string $flag = null;
|
||||
public ?int $id = null;
|
||||
public ?int $thread = null;
|
||||
}
|
283
inc/Data/Flags.php
Normal file
283
inc/Data/Flags.php
Normal file
|
@ -0,0 +1,283 @@
|
|||
<?php
|
||||
|
||||
class Flags {
|
||||
/**
|
||||
* Short names of the flags embedded with vichan.
|
||||
*/
|
||||
public const EMBEDDED_FLAGS = [
|
||||
'a1',
|
||||
'a2',
|
||||
'ac',
|
||||
'ad',
|
||||
'ae',
|
||||
'af',
|
||||
'ag',
|
||||
'ai',
|
||||
'al',
|
||||
'am',
|
||||
'an',
|
||||
'ao',
|
||||
'ap',
|
||||
'aq',
|
||||
'ar',
|
||||
'as',
|
||||
'at',
|
||||
'au',
|
||||
'aw',
|
||||
'ax',
|
||||
'az',
|
||||
'ba',
|
||||
'bb',
|
||||
'bd',
|
||||
'be',
|
||||
'bf',
|
||||
'bg',
|
||||
'bh',
|
||||
'bi',
|
||||
'bj',
|
||||
'bl',
|
||||
'bm',
|
||||
'bn',
|
||||
'bo',
|
||||
'bq',
|
||||
'br',
|
||||
'bs',
|
||||
'bt',
|
||||
'bu',
|
||||
'bv',
|
||||
'bw',
|
||||
'by',
|
||||
'bz',
|
||||
'ca',
|
||||
'cat',
|
||||
'cc',
|
||||
'cd',
|
||||
'cf',
|
||||
'cg',
|
||||
'ch',
|
||||
'ci',
|
||||
'ck',
|
||||
'cl',
|
||||
'cm',
|
||||
'cn',
|
||||
'co',
|
||||
'cp',
|
||||
'cr',
|
||||
'cs',
|
||||
'cu',
|
||||
'cv',
|
||||
'cw',
|
||||
'cx',
|
||||
'cy',
|
||||
'cz',
|
||||
'de',
|
||||
'dg',
|
||||
'dj',
|
||||
'dk',
|
||||
'dm',
|
||||
'do',
|
||||
'dz',
|
||||
'ea',
|
||||
'ec',
|
||||
'ee',
|
||||
'eg',
|
||||
'eh',
|
||||
'er',
|
||||
'es',
|
||||
'et',
|
||||
'eu',
|
||||
'fi',
|
||||
'fj',
|
||||
'fk',
|
||||
'fm',
|
||||
'fo',
|
||||
'fr',
|
||||
'fx',
|
||||
'ga',
|
||||
'gb',
|
||||
'gd',
|
||||
'ge',
|
||||
'gf',
|
||||
'gg',
|
||||
'gh',
|
||||
'gi',
|
||||
'gl',
|
||||
'gm',
|
||||
'gn',
|
||||
'gp',
|
||||
'gq',
|
||||
'gr',
|
||||
'gs',
|
||||
'gt',
|
||||
'gu',
|
||||
'gw',
|
||||
'gy',
|
||||
'hk',
|
||||
'hm',
|
||||
'hn',
|
||||
'hr',
|
||||
'ht',
|
||||
'hu',
|
||||
'ic',
|
||||
'id',
|
||||
'ie',
|
||||
'il',
|
||||
'im',
|
||||
'in',
|
||||
'io',
|
||||
'iq',
|
||||
'ir',
|
||||
'is',
|
||||
'it',
|
||||
'je',
|
||||
'jm',
|
||||
'jo',
|
||||
'jp',
|
||||
'ke',
|
||||
'kg',
|
||||
'kh',
|
||||
'ki',
|
||||
'km',
|
||||
'kn',
|
||||
'kp',
|
||||
'kr',
|
||||
'kw',
|
||||
'ky',
|
||||
'kz',
|
||||
'la',
|
||||
'lb',
|
||||
'lc',
|
||||
'li',
|
||||
'lk',
|
||||
'lr',
|
||||
'ls',
|
||||
'lt',
|
||||
'lu',
|
||||
'lv',
|
||||
'ly',
|
||||
'ma',
|
||||
'mc',
|
||||
'md',
|
||||
'me',
|
||||
'mf',
|
||||
'mg',
|
||||
'mh',
|
||||
'mk',
|
||||
'ml',
|
||||
'mm',
|
||||
'mn',
|
||||
'mo',
|
||||
'mp',
|
||||
'mq',
|
||||
'mr',
|
||||
'ms',
|
||||
'mt',
|
||||
'mu',
|
||||
'mv',
|
||||
'mw',
|
||||
'mx',
|
||||
'my',
|
||||
'mz',
|
||||
'na',
|
||||
'nc',
|
||||
'ne',
|
||||
'nf',
|
||||
'ng',
|
||||
'ni',
|
||||
'nl',
|
||||
'no',
|
||||
'np',
|
||||
'nr',
|
||||
'nt',
|
||||
'nu',
|
||||
'nz',
|
||||
'o1',
|
||||
'om',
|
||||
'pa',
|
||||
'pe',
|
||||
'pf',
|
||||
'pg',
|
||||
'ph',
|
||||
'pk',
|
||||
'pl',
|
||||
'pm',
|
||||
'pn',
|
||||
'pr',
|
||||
'ps',
|
||||
'pt',
|
||||
'pw',
|
||||
'py',
|
||||
'qa',
|
||||
're',
|
||||
'ro',
|
||||
'rs',
|
||||
'ru',
|
||||
'rw',
|
||||
'sa',
|
||||
'sb',
|
||||
'sc',
|
||||
'sd',
|
||||
'se',
|
||||
'sf',
|
||||
'sg',
|
||||
'sh',
|
||||
'si',
|
||||
'sj',
|
||||
'sk',
|
||||
'sl',
|
||||
'sm',
|
||||
'sn',
|
||||
'so',
|
||||
'sr',
|
||||
'ss',
|
||||
'st',
|
||||
'su',
|
||||
'sv',
|
||||
'sx',
|
||||
'sy',
|
||||
'sz',
|
||||
'ta',
|
||||
'tc',
|
||||
'td',
|
||||
'tf',
|
||||
'tg',
|
||||
'th',
|
||||
'ti',
|
||||
'tj',
|
||||
'tk',
|
||||
'tl',
|
||||
'tm',
|
||||
'tn',
|
||||
'to',
|
||||
'tp',
|
||||
'tr',
|
||||
'tt',
|
||||
'tv',
|
||||
'tw',
|
||||
'tz',
|
||||
'ua',
|
||||
'ug',
|
||||
'uk',
|
||||
'um',
|
||||
'us',
|
||||
'uy',
|
||||
'uz',
|
||||
'va',
|
||||
'vc',
|
||||
've',
|
||||
'vg',
|
||||
'vi',
|
||||
'vn',
|
||||
'vu',
|
||||
'wf',
|
||||
'ws',
|
||||
'xx',
|
||||
'ye',
|
||||
'yt',
|
||||
'yu',
|
||||
'za',
|
||||
'zm',
|
||||
'zr',
|
||||
'zw',
|
||||
];
|
||||
}
|
|
@ -2,12 +2,31 @@
|
|||
namespace Vichan\Data;
|
||||
|
||||
|
||||
/**
|
||||
* POD with the fragments of each filter.
|
||||
*/
|
||||
class SearchFilters {
|
||||
public array $body = [];
|
||||
public ?string $subject = null;
|
||||
public ?string $name = null;
|
||||
public ?string $board = null;
|
||||
public ?string $flag = null;
|
||||
public ?int $id = null;
|
||||
public ?int $thread = null;
|
||||
/**
|
||||
* @var array<array<string>>
|
||||
*/
|
||||
public array $body;
|
||||
/**
|
||||
* @var array<string>
|
||||
*/
|
||||
public array $subject;
|
||||
/**
|
||||
* @var array<string>
|
||||
*/
|
||||
public array $name;
|
||||
/**
|
||||
* @var array<string>
|
||||
*/
|
||||
public array $board;
|
||||
/**
|
||||
* @var array<string>
|
||||
*/
|
||||
public array $flag;
|
||||
public ?int $id;
|
||||
public ?int $thread;
|
||||
public float $weight;
|
||||
}
|
||||
|
|
|
@ -6,20 +6,25 @@ class SearchQueries {
|
|||
private \PDO $pdo;
|
||||
private int $queries_per_minutes_single;
|
||||
private int $queries_per_minutes_all;
|
||||
private bool $auto_gc;
|
||||
|
||||
|
||||
private function checkFloodImpl(string $ip, string $phrase): bool {
|
||||
$now = time();
|
||||
$now = \time();
|
||||
$expiry_limit = \time() - ($this->queries_per_minutes_all * 60);
|
||||
|
||||
$query = $this->pdo->prepare("SELECT COUNT(*) FROM `search_queries` WHERE `ip` = :ip AND `time` > :time");
|
||||
$query = $this->pdo->prepare("SELECT COUNT(*) FROM `search_queries` WHERE `ip` = :ip AND `time` > :time AND `time` <= :expiry_limit");
|
||||
$query->bindValue(':ip', $ip);
|
||||
$query->bindValue(':time', $now - ($this->queries_per_minutes_single * 60));
|
||||
$query->bindValue(':time', $now - ($this->queries_per_minutes_single * 60), \PDO::PARAM_INT);
|
||||
$query->bindValue(':expiry_limit', $expiry_limit, \PDO::PARAM_INT);
|
||||
$query->execute();
|
||||
if ($query->fetchColumn() > $this->queries_per_minutes_single) {
|
||||
return false;
|
||||
}
|
||||
|
||||
$query = $this->pdo->prepare("SELECT COUNT(*) FROM `search_queries` WHERE `time` > :time");
|
||||
$query->bindValue(':time', $now - ($this->queries_per_minutes_all * 60));
|
||||
$query = $this->pdo->prepare("SELECT COUNT(*) FROM `search_queries` WHERE `time` > :time AND `time` <= :expiry_limit");
|
||||
$query->bindValue(':time', $now - ($this->queries_per_minutes_all * 60), \PDO::PARAM_INT);
|
||||
$query->bindValue(':expiry_limit', $expiry_limit, \PDO::PARAM_INT);
|
||||
$query->execute();
|
||||
if ($query->fetchColumn() > $this->queries_per_minutes_all) {
|
||||
return false;
|
||||
|
@ -27,24 +32,31 @@ class SearchQueries {
|
|||
|
||||
$query = $this->pdo->prepare("INSERT INTO `search_queries` VALUES (:ip, :time, :query)");
|
||||
$query->bindValue(':ip', $ip);
|
||||
$query->bindValue(':time', $now);
|
||||
$query->bindValue(':time', $now, \PDO::PARAM_INT);
|
||||
$query->bindValue(':query', $phrase);
|
||||
$query->execute();
|
||||
|
||||
// Cleanup search queries table
|
||||
$query = prepare("DELETE FROM `search_queries` WHERE `time` <= :time");
|
||||
$query->bindValue(':time', time() - ($this->queries_per_minutes_all * 60));
|
||||
$query->execute();
|
||||
if ($this->auto_gc) {
|
||||
$this->purgeExpired();
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
public function __construct(\PDO $pdo, int $queries_per_minutes_single, int $queries_per_minutes_all) {
|
||||
public function __construct(\PDO $pdo, int $queries_per_minutes_single, int $queries_per_minutes_all, bool $auto_gc) {
|
||||
$this->pdo = $pdo;
|
||||
$this->queries_per_minutes_single = $queries_per_minutes_single;
|
||||
$this->queries_per_minutes_all = $queries_per_minutes_all;
|
||||
$this->auto_gc = $auto_gc;
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if the IP-query pair overflows the limit.
|
||||
*
|
||||
* @param string $ip Source IP.
|
||||
* @param string $phrase The search query.
|
||||
* @return bool True if the request goes over the limit
|
||||
*/
|
||||
public function checkFlood(string $ip, string $phrase): bool {
|
||||
$this->pdo->beginTransaction();
|
||||
try {
|
||||
|
@ -56,4 +68,12 @@ class SearchQueries {
|
|||
throw $e;
|
||||
}
|
||||
}
|
||||
|
||||
public function purgeExpired(): int {
|
||||
// Cleanup search queries table.
|
||||
$query = prepare("DELETE FROM `search_queries` WHERE `time` <= :expiry_limit");
|
||||
$query->bindValue(':expiry_limit', \time() - ($this->queries_per_minutes_all * 60), \PDO::PARAM_INT);
|
||||
$query->execute();
|
||||
return $query->rowCount();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
namespace Vichan\Service;
|
||||
|
||||
use Vichan\Data\Driver\LogDriver;
|
||||
use Vichan\Data\{UserPostQueries, SearchFilters, SearchFiltersWeighted};
|
||||
use Vichan\Data\{FiltersParseResult, UserPostQueries, SearchFilters, SearchQueries};
|
||||
|
||||
|
||||
class SearchService {
|
||||
|
@ -33,10 +33,12 @@ class SearchService {
|
|||
|
||||
private LogDriver $log;
|
||||
private UserPostQueries $user_queries;
|
||||
private SearchQueries $search_queries;
|
||||
private ?array $flag_map;
|
||||
private float $max_weight;
|
||||
private int $max_query_length;
|
||||
private int $post_limit;
|
||||
private array $searchable_board_uris;
|
||||
|
||||
|
||||
private static function truncateQuery(string $text, int $byteLimit): ?string {
|
||||
|
@ -62,42 +64,35 @@ class SearchService {
|
|||
return null;
|
||||
}
|
||||
|
||||
private static function trimEnd(string $str): string {
|
||||
return \rtrim($str, "* \n\r\t\v\0");
|
||||
private static function trim(string $str): string {
|
||||
return \trim($str, "* \n\r\t\v\0");
|
||||
}
|
||||
|
||||
private function sanitizeAndTransform(string $str): array {
|
||||
// Escape UserQueries's wildcards.
|
||||
$str = $this->user_queries->escapeSearchPosts($str);
|
||||
// Coalesce multiple wildcards.
|
||||
$wildcard_count = 0;
|
||||
$str = \preg_replace_callback('/(?:\\\\\\\\)*\\\\\*|(?:\\\\\\\\)*\*+/', function($match) use (&$wildcard_count) {
|
||||
$wildcard_count++;
|
||||
return UserPostQueries::SEARCH_POSTS_WILDCARD;
|
||||
}, $str);
|
||||
// Query is too broad.
|
||||
if ($str === UserPostQueries::SEARCH_POSTS_WILDCARD) {
|
||||
return [ null, 0 ];
|
||||
}
|
||||
// Unescape.
|
||||
$str = \strtr($str, [
|
||||
private static function unescape(string $str): string {
|
||||
return \strtr($str, [
|
||||
'\\\\' => '\\',
|
||||
'\\*' => '*',
|
||||
'\\"' => '"'
|
||||
]);
|
||||
|
||||
return [ $str, $wildcard_count ];
|
||||
}
|
||||
|
||||
private static function weightByContent(string $str): float {
|
||||
$w = 1;
|
||||
/**
|
||||
* Split the filter into fragments along the wildcards, handling escaping.
|
||||
*
|
||||
* @param string $str The full filter.
|
||||
* @return array<string>
|
||||
*/
|
||||
private static function split(string $str): array {
|
||||
// Split the fragments
|
||||
return \preg_split('/(?:\\\\\\\\)*\\\\\*|(?:\\\\\\\\)*\*+/', $str);
|
||||
}
|
||||
|
||||
// Count common and short words.
|
||||
$trim = \trim($str, UserPostQueries::SEARCH_POSTS_WILDCARD . " \n\r\t\v\0");
|
||||
$words = \explode(' ', $trim);
|
||||
foreach ($words as $word) {
|
||||
$short = \strlen($word) < 4;
|
||||
if (\in_array($word, self::COMMON_WORDS)) {
|
||||
private static function weightByContent(array $fragments): float {
|
||||
$w = 0;
|
||||
|
||||
foreach ($fragments as $fragment) {
|
||||
$short = \strlen($fragment) < 4;
|
||||
if (\in_array($fragment, self::COMMON_WORDS)) {
|
||||
$w += $short ? 16 : 6;
|
||||
} elseif ($short) {
|
||||
$w += 6;
|
||||
|
@ -107,24 +102,54 @@ class SearchService {
|
|||
return $w;
|
||||
}
|
||||
|
||||
private static function weightByWildcards(string $str, int $wildcards): float {
|
||||
// Wildcards over the total length of the word.
|
||||
$perc = $wildcards / \strlen($str) * 100;
|
||||
return $perc + $wildcards * 2;
|
||||
}
|
||||
|
||||
private function matchFlag(string $query): array {
|
||||
$query = \preg_quote($query);
|
||||
$query = \str_replace('\\*', '.*', $query);
|
||||
$regex = "/^*$query*$/i";
|
||||
|
||||
private static function filterAndWeight(string $filter): array {
|
||||
$fragments = self::split($filter);
|
||||
$acc = [];
|
||||
foreach ($this->flag_map as $key => $value) {
|
||||
if (\preg_match($regex, $value)) {
|
||||
$acc[] = $key;
|
||||
$total_len = 0;
|
||||
|
||||
foreach ($fragments as $fragment) {
|
||||
$fragment = self::trim(self::unescape($fragment));
|
||||
|
||||
if (!empty($fragment)) {
|
||||
$total_len += \strlen($fragment);
|
||||
$acc[] = $fragment;
|
||||
}
|
||||
}
|
||||
return $acc;
|
||||
|
||||
// Interword wildcards
|
||||
$interword = \min(\count($fragments) - 1, 0);
|
||||
// Wildcards over the total length of the word. Ergo the number of fragments minus 1.
|
||||
$perc = $interword / $total_len * 100;
|
||||
$wildcard_weight = $perc + \count($fragments) * 2;
|
||||
|
||||
return [ $acc, $total_len, $wildcard_weight ];
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets a subset of the flags which match every filter.
|
||||
*
|
||||
* @param array<string> $fragments User provided fragments to search in the flags.
|
||||
* @param array<string> $flags An array of flags.
|
||||
* @return array<string> An array of flags
|
||||
*/
|
||||
private static function matchFlags(array $flags, array $fragments): array {
|
||||
return \array_filter($flags, function ($str) use ($fragments) {
|
||||
// Saves the last position. We use this to ensure the fragments are one after the other.
|
||||
$last_ret = 0;
|
||||
foreach ($fragments as $fragment) {
|
||||
if ($last_ret + 1 > \strlen($fragment)) {
|
||||
// Cannot possibly match.
|
||||
return false;
|
||||
}
|
||||
|
||||
$last_ret = \stripos($str, $fragment, $last_ret + 1);
|
||||
if ($last_ret === false) {
|
||||
// Exclude flags that don't much even a single fragment.
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -140,9 +165,9 @@ class SearchService {
|
|||
* - id: post id, must be numeric
|
||||
* - thread: thread id, must be numeric
|
||||
* The remaining text is split into chunks and searched in the post body.
|
||||
* @return SearchFilters
|
||||
* @return FiltersParseResult
|
||||
*/
|
||||
public function parse(string $raw_query): SearchFilters {
|
||||
public function parse(string $raw_query): FiltersParseResult{
|
||||
$tres = self::truncateQuery($raw_query, $this->max_query_length);
|
||||
if ($tres === null) {
|
||||
throw new \RuntimeException('Could not truncate query');
|
||||
|
@ -179,7 +204,7 @@ class SearchService {
|
|||
throw new \RuntimeException('Could not decode the query');
|
||||
}
|
||||
|
||||
$filters = new SearchFilters();
|
||||
$filters = new FiltersParseResult();
|
||||
|
||||
foreach ($matches as $m) {
|
||||
if (!empty($m[1])) {
|
||||
|
@ -219,90 +244,90 @@ class SearchService {
|
|||
}
|
||||
|
||||
/**
|
||||
* @param LogDriver $log Log river.
|
||||
* @param UserPostQueries $user_queries User posts queries.
|
||||
* @param ?flag_map $max_flag_length The key-value map of user flags, or null to disable flag search.
|
||||
* @param SearchQueries $search_queries Search queries for flood detection.
|
||||
* @param ?array $flag_map The key-value map of user flags, or null to disable flag search.
|
||||
* @param float $max_weight The maximum weight of the parsed user query. Body filters that go beyond this limit are discarded.
|
||||
* @param int $max_query_length Maximum length of the raw input query before it's truncated.
|
||||
* @param int $post_limit Maximum number of results.
|
||||
* @param ?array $searchable_board_uris The uris of the board that can be searched. Null to search all the boards.
|
||||
*/
|
||||
public function __construct(LogDriver $log, UserPostQueries $user_queries, ?array $flag_map, float $max_weight, int $max_query_length, int $post_limit) {
|
||||
public function __construct(
|
||||
LogDriver $log,
|
||||
UserPostQueries $user_queries,
|
||||
?array $flag_map,
|
||||
float $max_weight,
|
||||
int $max_query_length,
|
||||
int $post_limit,
|
||||
?array $searchable_board_uris
|
||||
) {
|
||||
$this->log = $log;
|
||||
$this->user_queries = $user_queries;
|
||||
$this->flag_map = $flag_map;
|
||||
$this->max_weight = $max_weight;
|
||||
$this->max_query_length = $max_query_length;
|
||||
$this->post_limit = $post_limit;
|
||||
$this->searchable_board_uris = $searchable_board_uris ?? listBoards(true);
|
||||
}
|
||||
|
||||
/**
|
||||
* Reduces the user provided filters and assigns them a total weight.
|
||||
*
|
||||
* @param SearchFilters $filters The filters to sanitize, reduce and weight.
|
||||
* @return SearchFiltersWeighted
|
||||
* @param FiltersParseResult $filters The filters to sanitize, reduce and weight.
|
||||
* @return SearchFilters
|
||||
*/
|
||||
public function reduceAndWeight(SearchFilters $filters): SearchFiltersWeighted {
|
||||
$weighted = new SearchFiltersWeighted();
|
||||
public function reduceAndWeight(FiltersParseResult $filters): SearchFilters {
|
||||
$weighted = new SearchFilters();
|
||||
|
||||
if ($filters->subject !== null) {
|
||||
if (\strlen($filters->subject) > self::MAX_LENGTH_SUBJECT) {
|
||||
$filters->subject = null;
|
||||
list($fragments, $total_len, $wildcard_weight) = self::filterAndWeight($filters->subject);
|
||||
|
||||
if ($total_len > self::MAX_LENGTH_SUBJECT) {
|
||||
$weighted->subject = [];
|
||||
} else {
|
||||
list($str, $wildcards) = $this->sanitizeAndTransform($filters->subject);
|
||||
if ($str === null) {
|
||||
$filters->subject = null;
|
||||
} else {
|
||||
$str = self::trimEnd($str);
|
||||
$weighted->weight += self::weightByWildcards($str, $wildcards);
|
||||
$filters->subject = $str;
|
||||
}
|
||||
$weighted->subject = $fragments;
|
||||
$weighted->weight = $wildcard_weight;
|
||||
}
|
||||
}
|
||||
if ($filters->name !== null) {
|
||||
if (\strlen($filters->name) > self::MAX_LENGTH_NAME) {
|
||||
$filters->name = null;
|
||||
list($fragments, $total_len, $wildcard_weight) = self::filterAndWeight($filters->name);
|
||||
|
||||
if ($total_len > self::MAX_LENGTH_NAME) {
|
||||
$weighted->name = [];
|
||||
} else {
|
||||
list($str, $wildcards) = $this->sanitizeAndTransform($filters->name);
|
||||
if ($str === null) {
|
||||
$filters->name = null;
|
||||
} else {
|
||||
$str = self::trimEnd($str);
|
||||
$weighted->weight += self::weightByWildcards($str, $wildcards);
|
||||
$filters->name = $str;
|
||||
}
|
||||
$weighted->name = $fragments;
|
||||
$weighted->weight += $wildcard_weight;
|
||||
}
|
||||
}
|
||||
if ($filters->flag !== null) {
|
||||
$max_flag_length = \array_reduce($this->flag_map, function($current_max, $str) {
|
||||
return \max($current_max, \strlen($str));
|
||||
}, 0);
|
||||
$weighted->flag = [];
|
||||
|
||||
if ($this->flag_map !== null && !empty($this->flag_map)) {
|
||||
$max_flag_length = \array_reduce($this->flag_map, fn($max, $str) => \max($max, \strlen($str)), 0);
|
||||
|
||||
list($fragments, $total_len, $wildcard_weight) = self::filterAndWeight($filters->flag);
|
||||
|
||||
if ($this->flag_map === null
|
||||
|| empty($this->flag_map)
|
||||
// Add 2 to account for possible wildcards on the ends.
|
||||
|| \strlen($filters->flag) > $max_flag_length + 2) {
|
||||
$filters->flag = null;
|
||||
} else {
|
||||
$str = \trim($str);
|
||||
$weighted->weight += self::weightByWildcards($str, $wildcards);
|
||||
$filters->flag = $str;
|
||||
}
|
||||
}
|
||||
if ($filters->body !== null) {
|
||||
$acc = [];
|
||||
foreach ($filters->body as $str) {
|
||||
$str = self::trimEnd($str);
|
||||
list($str, $wildcards) = $this->sanitizeAndTransform($str);
|
||||
|
||||
if ($str !== null && !empty($str)) {
|
||||
$w_content = self::weightByContent($str);
|
||||
$w_wildcards = self::weightByWildcards($str, $wildcards);
|
||||
|
||||
$w = $w_content + $w_wildcards;
|
||||
if ($w + $weighted->weight <= $this->max_weight) {
|
||||
$weighted->weight += $w;
|
||||
$acc[] = $str;
|
||||
}
|
||||
if ($total_len <= $max_flag_length + 2) {
|
||||
$weighted->flag = $fragments;
|
||||
$weighted->weight += $wildcard_weight;
|
||||
}
|
||||
}
|
||||
}
|
||||
$weighted->id = $filters->id;
|
||||
$weighted->thread = $filters->thread;
|
||||
if ($filters->body !== null) {
|
||||
foreach ($filters->body as $str) {
|
||||
list($fragments, $total_len, $wildcard_weight) = self::filterAndWeight($str);
|
||||
$content_weight = self::weightByContent($fragments);
|
||||
$str_weight = $content_weight + $wildcard_weight;
|
||||
|
||||
$filters->body = $acc;
|
||||
if ($str_weight + $weighted->weight <= $this->max_weight) {
|
||||
$weighted->weight += $str_weight;
|
||||
$filters->body[] = $fragments;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return $weighted;
|
||||
|
@ -311,18 +336,17 @@ class SearchService {
|
|||
/**
|
||||
* Run a search on user posts with the given filters.
|
||||
*
|
||||
* @param SearchFiltersWeighted $filters An array of filters made by {@see self::parse()}.
|
||||
* @param SearchFilters $filters An array of filters made by {@see self::parse()}.
|
||||
* @param ?string $fallback_board Fallback board if there isn't a board filter.
|
||||
* @return array Data array straight from the PDO, with all the fields in posts.sql
|
||||
*/
|
||||
public function search(string $ip, string $raw_query, SearchFiltersWeighted $filters, ?string $fallback_board): array {
|
||||
public function search(string $ip, string $raw_query, SearchFilters $filters, ?string $fallback_board): array {
|
||||
$board = $filters->board ?? $fallback_board;
|
||||
if ($board === null) {
|
||||
return [];
|
||||
}
|
||||
|
||||
$valid_uris = listBoards(true);
|
||||
if (!\in_array($board, $valid_uris)) {
|
||||
if (!\in_array($board, $this->searchable_board_uris)) {
|
||||
return [];
|
||||
}
|
||||
|
||||
|
@ -334,7 +358,7 @@ class SearchService {
|
|||
$this->log->log(LogDriver::INFO, "$ip search: weight $weight_perc ({$filters->weight}) query '$raw_query'");
|
||||
}
|
||||
|
||||
$flags = $filters->flag !== null ? $this->matchFlag($filters->flag) : null;
|
||||
$flags = $filters->flag !== null ? $this->matchFlags($this->flag_map, $filters->flag) : null;
|
||||
|
||||
return $this->user_queries->searchPosts(
|
||||
$board,
|
||||
|
@ -347,4 +371,22 @@ class SearchService {
|
|||
$this->post_limit
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if the IP-query pair passes the limit.
|
||||
*
|
||||
* @param string $ip Source IP.
|
||||
* @param string $phrase The search query.
|
||||
* @return bool True if the request goes over the limit.
|
||||
*/
|
||||
public function checkFlood(string $ip, string $raw_query) {
|
||||
return $this->search_queries->checkFlood($ip, $raw_query);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the uris of the boards that may be searched.
|
||||
*/
|
||||
public function getSearchableBoards(): array {
|
||||
return $this->searchable_board_uris;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1856,7 +1856,15 @@
|
|||
// Limit of search results
|
||||
$config['search']['search_limit'] = 100;
|
||||
|
||||
// Boards for searching
|
||||
// Maximum weigth of the search query.
|
||||
// Body search filters are discarded if they make the query heavier than this.
|
||||
$config['search']['max_weight'] = 80;
|
||||
|
||||
// Maximum length of the user sent search query.
|
||||
// Characters beyond the limit are truncated and ignored.
|
||||
$config['search']['max_length'] = 768;
|
||||
|
||||
// Uncomment to limit the search feature to the given boards by uri.
|
||||
//$config['search']['boards'] = array('a', 'b', 'c', 'd', 'e');
|
||||
|
||||
// Enable public logs? 0: NO, 1: YES, 2: YES, but drop names
|
||||
|
|
|
@ -1,8 +1,10 @@
|
|||
<?php
|
||||
namespace Vichan;
|
||||
|
||||
use Vichan\Data\{IpNoteQueries, ReportQueries, UserPostQueries};
|
||||
use Flags;
|
||||
use Vichan\Data\{IpNoteQueries, ReportQueries, SearchQueries, UserPostQueries};
|
||||
use Vichan\Data\Driver\{CacheDriver, ErrorLogLogDriver, FileLogDriver, LogDriver, StderrLogDriver, SyslogLogDriver};
|
||||
use Vichan\Service\SearchService;
|
||||
|
||||
defined('TINYBOARD') or exit;
|
||||
|
||||
|
@ -69,6 +71,28 @@ function build_context(array $config): Context {
|
|||
sql_open();
|
||||
return $pdo;
|
||||
},
|
||||
SearchService::class => function($c) {
|
||||
$config = $c->get('config');
|
||||
if ($config['user_flag']) {
|
||||
$flags = $config['user_flags'];
|
||||
} elseif ($config['country_flags']) {
|
||||
$flags = Flags::EMBEDDED_FLAGS;
|
||||
} else {
|
||||
$flags = null;
|
||||
}
|
||||
|
||||
$board_uris = $config['search']['boards'] ?? null;
|
||||
|
||||
return new SearchService(
|
||||
$c->get(LogDriver::class),
|
||||
$c->get(UserPostQueries::class),
|
||||
$flags,
|
||||
$config['search']['max_weight'],
|
||||
$config['search']['max_length'],
|
||||
$config['search']['search_limit'],
|
||||
$board_uris
|
||||
);
|
||||
},
|
||||
ReportQueries::class => function($c) {
|
||||
$auto_maintenance = (bool)$c->get('config')['auto_maintenance'];
|
||||
$pdo = $c->get(\PDO::class);
|
||||
|
@ -78,5 +102,14 @@ function build_context(array $config): Context {
|
|||
return new UserPostQueries($c->get(\PDO::class));
|
||||
},
|
||||
IpNoteQueries::class => fn($c) => new IpNoteQueries($c->get(\PDO::class), $c->get(CacheDriver::class)),
|
||||
SearchQueries::class => function($c) {
|
||||
$config = $c->get('config');
|
||||
return new SearchQueries(
|
||||
$c->get(\PDO::class),
|
||||
$config['search']['queries_per_minutes'],
|
||||
$config['search']['queries_per_minutes_all'],
|
||||
$config['auto_maintenance']
|
||||
);
|
||||
}
|
||||
]);
|
||||
}
|
||||
|
|
200
search.php
200
search.php
|
@ -1,174 +1,70 @@
|
|||
<?php
|
||||
|
||||
use Vichan\Service\SearchService;
|
||||
|
||||
require 'inc/bootstrap.php';
|
||||
|
||||
if (!$config['search']['enable']) {
|
||||
die(_("Post search is disabled"));
|
||||
}
|
||||
|
||||
$queries_per_minutes = $config['search']['queries_per_minutes'];
|
||||
$queries_per_minutes_all = $config['search']['queries_per_minutes_all'];
|
||||
$search_limit = $config['search']['search_limit'];
|
||||
$ctx = Vichan\build_context($config);
|
||||
$search_service = $ctx->get(SearchService::class);
|
||||
|
||||
if (isset($config['search']['boards'])) {
|
||||
$boards = $config['search']['boards'];
|
||||
} else {
|
||||
$boards = listBoards(TRUE);
|
||||
}
|
||||
if (isset($_GET['search']) && !empty($_GET['search'])) {
|
||||
$raw_search = $_GET['search'];
|
||||
$ip = $_SERVER['REMOTE_ADDR'];
|
||||
$fallback_board = (isset($_GET['board']) && !empty($_GET['board'])) ? $_GET['board'] : null;
|
||||
|
||||
$body = Element('search_form.html', Array('boards' => $boards, 'board' => isset($_GET['board']) ? $_GET['board'] : false, 'search' => isset($_GET['search']) ? str_replace('"', '"', utf8tohtml($_GET['search'])) : false));
|
||||
|
||||
if (isset($_GET['search']) && !empty($_GET['search']) && isset($_GET['board']) && in_array($_GET['board'], $boards)) {
|
||||
$phrase = $_GET['search'];
|
||||
$_body = '';
|
||||
|
||||
$query = prepare("SELECT COUNT(*) FROM ``search_queries`` WHERE `ip` = :ip AND `time` > :time");
|
||||
$query->bindValue(':ip', $_SERVER['REMOTE_ADDR']);
|
||||
$query->bindValue(':time', time() - ($queries_per_minutes[1] * 60));
|
||||
$query->execute() or error(db_error($query));
|
||||
if ($query->fetchColumn() > $queries_per_minutes[0])
|
||||
if ($search_service->checkFlood($ip, $raw_search)) {
|
||||
error(_('Wait a while before searching again, please.'));
|
||||
|
||||
$query = prepare("SELECT COUNT(*) FROM ``search_queries`` WHERE `time` > :time");
|
||||
$query->bindValue(':time', time() - ($queries_per_minutes_all[1] * 60));
|
||||
$query->execute() or error(db_error($query));
|
||||
if ($query->fetchColumn() > $queries_per_minutes_all[0])
|
||||
error(_('Wait a while before searching again, please.'));
|
||||
|
||||
|
||||
$query = prepare("INSERT INTO ``search_queries`` VALUES (:ip, :time, :query)");
|
||||
$query->bindValue(':ip', $_SERVER['REMOTE_ADDR']);
|
||||
$query->bindValue(':time', time());
|
||||
$query->bindValue(':query', $phrase);
|
||||
$query->execute() or error(db_error($query));
|
||||
|
||||
_syslog(LOG_NOTICE, 'Searched /' . $_GET['board'] . '/ for "' . $phrase . '"');
|
||||
|
||||
// Cleanup search queries table
|
||||
$query = prepare("DELETE FROM ``search_queries`` WHERE `time` <= :time");
|
||||
$query->bindValue(':time', time() - ($queries_per_minutes_all[1] * 60));
|
||||
$query->execute() or error(db_error($query));
|
||||
|
||||
openBoard($_GET['board']);
|
||||
|
||||
$filters = Array();
|
||||
|
||||
function search_filters($m) {
|
||||
global $filters;
|
||||
$name = $m[2];
|
||||
$value = isset($m[4]) ? $m[4] : $m[3];
|
||||
|
||||
if (!in_array($name, array('id', 'thread', 'subject', 'name'))) {
|
||||
// unknown filter
|
||||
return $m[0];
|
||||
}
|
||||
|
||||
$filters[$name] = $value;
|
||||
|
||||
return $m[1];
|
||||
}
|
||||
|
||||
$phrase = trim(preg_replace_callback('/(^|\s)(\w+):("(.*)?"|[^\s]*)/', 'search_filters', $phrase));
|
||||
// Actually do the search.
|
||||
$parse_res = $search_service->parse($raw_search);
|
||||
$filters = $search_service->reduceAndWeight($parse_res);
|
||||
$search_res = $search_service->search($ip, $raw_search, $filters, $fallback_board);
|
||||
|
||||
if (!preg_match('/[^*^\s]/', $phrase) && empty($filters)) {
|
||||
_syslog(LOG_WARNING, 'Query too broad.');
|
||||
$body .= '<p class="unimportant" style="text-align:center">(Query too broad.)</p>';
|
||||
echo Element('page.html', Array(
|
||||
'config'=>$config,
|
||||
'title'=>'Search',
|
||||
'body'=>$body,
|
||||
));
|
||||
exit;
|
||||
}
|
||||
|
||||
// Escape escape character
|
||||
$phrase = str_replace('!', '!!', $phrase);
|
||||
// Needed to set a global variable further down the stack, plus the template.
|
||||
$actual_board = $filter->board ?? $fallback_board;
|
||||
|
||||
// Remove SQL wildcard
|
||||
$phrase = str_replace('%', '!%', $phrase);
|
||||
$body = Element('search_form.html', [
|
||||
'boards' => $search_service->getSearchableBoards(),
|
||||
'board' => $_GET['board'],
|
||||
'search' => \str_replace('"', '"', utf8tohtml($_GET['search']))
|
||||
]);
|
||||
|
||||
// Use asterisk as wildcard to suit convention
|
||||
$phrase = str_replace('*', '%', $phrase);
|
||||
|
||||
// Remove `, it's used by table prefix magic
|
||||
$phrase = str_replace('`', '!`', $phrase);
|
||||
|
||||
$like = '';
|
||||
$match = Array();
|
||||
|
||||
// Find exact phrases
|
||||
if (preg_match_all('/"(.+?)"/', $phrase, $m)) {
|
||||
foreach($m[1] as &$quote) {
|
||||
$phrase = str_replace("\"{$quote}\"", '', $phrase);
|
||||
$match[] = $pdo->quote($quote);
|
||||
}
|
||||
}
|
||||
|
||||
$words = explode(' ', $phrase);
|
||||
foreach($words as &$word) {
|
||||
if (empty($word)) {
|
||||
continue;
|
||||
}
|
||||
$match[] = $pdo->quote($word);
|
||||
}
|
||||
|
||||
$like = '';
|
||||
foreach($match as &$phrase) {
|
||||
if (!empty($like)) {
|
||||
$like .= ' AND ';
|
||||
}
|
||||
$phrase = preg_replace('/^\'(.+)\'$/', '\'%$1%\'', $phrase);
|
||||
$like .= '`body` LIKE ' . $phrase . ' ESCAPE \'!\'';
|
||||
}
|
||||
|
||||
foreach($filters as $name => $value) {
|
||||
if (!empty($like)) {
|
||||
$like .= ' AND ';
|
||||
}
|
||||
$like .= '`' . $name . '` = '. $pdo->quote($value);
|
||||
}
|
||||
|
||||
$like = str_replace('%', '%%', $like);
|
||||
|
||||
$query = prepare(sprintf("SELECT * FROM ``posts_%s`` WHERE " . $like . " ORDER BY `time` DESC LIMIT :limit", $board['uri']));
|
||||
$query->bindValue(':limit', $search_limit, PDO::PARAM_INT);
|
||||
$query->execute() or error(db_error($query));
|
||||
|
||||
if ($query->rowCount() == $search_limit) {
|
||||
_syslog(LOG_WARNING, 'Query too broad.');
|
||||
$body .= '<p class="unimportant" style="text-align:center">('._('Query too broad.').')</p>';
|
||||
echo Element('page.html', Array(
|
||||
'config'=>$config,
|
||||
'title'=>'Search',
|
||||
'body'=>$body,
|
||||
));
|
||||
exit;
|
||||
}
|
||||
|
||||
$temp = '';
|
||||
while ($post = $query->fetch()) {
|
||||
if (!$post['thread']) {
|
||||
$po = new Thread($post);
|
||||
} else {
|
||||
$po = new Post($post);
|
||||
}
|
||||
$temp .= $po->build(true) . '<hr/>';
|
||||
}
|
||||
|
||||
if (!empty($temp))
|
||||
$_body .= '<fieldset><legend>' .
|
||||
sprintf(ngettext('%d result in', '%d results in', $query->rowCount()),
|
||||
$query->rowCount()) . ' <a href="/' .
|
||||
sprintf($config['board_path'], $board['uri']) . $config['file_index'] .
|
||||
'">' .
|
||||
sprintf($config['board_abbreviation'], $board['uri']) . ' - ' . $board['title'] .
|
||||
'</a></legend>' . $temp . '</fieldset>';
|
||||
|
||||
$body .= '<hr/>';
|
||||
if (!empty($_body)) {
|
||||
$body .= $_body;
|
||||
if (empty($search_res)) {
|
||||
$body .= '<hr/><p style="text-align:center" class="unimportant">(' . _('No results.') . ')</p>';
|
||||
} else {
|
||||
$body .= '<p style="text-align:center" class="unimportant">('._('No results.').')</p>';
|
||||
$body .= '<hr/>';
|
||||
|
||||
openBoard($actual_board);
|
||||
|
||||
$posts_html = '';
|
||||
foreach ($search_res as $post) {
|
||||
if (!$post['thread']) {
|
||||
$po = new Thread($post);
|
||||
} else {
|
||||
$po = new Post($post);
|
||||
}
|
||||
$posts_html .= $po->build(true) . '<hr/>';
|
||||
}
|
||||
|
||||
$body .= '<fieldset><legend>' .
|
||||
sprintf(ngettext('%d result in', '%d results in', \count($search_res)), \count($search_res)) . ' <a href="/' .
|
||||
sprintf($config['board_path'], $board['uri']) . $config['file_index'] . '">' .
|
||||
sprintf($config['board_abbreviation'], $board['uri']) . ' - ' . $board['title'] .
|
||||
'</a></legend>' . $posts_html . '</fieldset>';
|
||||
}
|
||||
} else {
|
||||
$body = Element('search_form.html', [
|
||||
'boards' => $search_service->getSearchableBoards(),
|
||||
'board' => false,
|
||||
'search' => false
|
||||
]);
|
||||
}
|
||||
|
||||
echo Element('page.html', Array(
|
||||
|
|
64
tests/SearchServiceTest.php
Normal file
64
tests/SearchServiceTest.php
Normal file
|
@ -0,0 +1,64 @@
|
|||
<?php
|
||||
use PHPUnit\Framework\TestCase;
|
||||
use PHPUnit\Framework;
|
||||
use Vichan\Data\Driver\{LogDriver, StderrLogDriver};
|
||||
use Vichan\Data\UserPostQueries;
|
||||
use Vichan\Service\SearchService;
|
||||
|
||||
|
||||
class SearchServiceTest extends TestCase {
|
||||
public function testBasicSearch(): void {
|
||||
$srv = new SearchService(
|
||||
$this->createMock(LogDriver::class),
|
||||
$this->createMock(UserPostQueries::class),
|
||||
null,
|
||||
100,
|
||||
250,
|
||||
100,
|
||||
);
|
||||
|
||||
$filters = $srv->parse("free world all large board:kino board:\"poly\" name:coolie maybe subject:\"subj\" flag:\"pirate\" id:76 thread:8 but not so much");
|
||||
Framework\assertTrue($filters->body === [ 'free world all large', 'maybe', 'but not so much' ]);
|
||||
Framework\assertTrue($filters->subject === 'subj');
|
||||
Framework\assertTrue($filters->name === 'coolie');
|
||||
Framework\assertTrue($filters->flag === 'pirate');
|
||||
Framework\assertTrue($filters->id === 76);
|
||||
Framework\assertTrue($filters->thread === 8);
|
||||
}
|
||||
|
||||
public function testWeight() {
|
||||
$user_queries = $this->createMock(UserPostQueries::class);
|
||||
$user_queries->method('escapeSearchPosts')
|
||||
->willReturnMap([
|
||||
[ 'abcd', 'abcd' ],
|
||||
[ 'abc', 'abc' ],
|
||||
[ 'a*cd', 'a\\*cd' ],
|
||||
[ 'a*c', 'a\\*c' ],
|
||||
]);
|
||||
|
||||
$srv = new SearchService(
|
||||
new StderrLogDriver('test', LogDriver::DEBUG),
|
||||
$user_queries,
|
||||
null,
|
||||
100,
|
||||
250,
|
||||
100,
|
||||
);
|
||||
|
||||
$f = $srv->parse('abcd');
|
||||
$no_wildcards = $srv->reduceAndWeight($f)->weight;
|
||||
|
||||
$f = $srv->parse('abc*');
|
||||
$end_wildcard = $srv->reduceAndWeight($f)->weight;
|
||||
|
||||
$f = $srv->parse('a*cd');
|
||||
$middle_wildcard = $srv->reduceAndWeight($f)->weight;
|
||||
|
||||
$f = $srv->parse('a*c*');
|
||||
$wildcards = $srv->reduceAndWeight($f)->weight;
|
||||
|
||||
Framework\assertTrue($no_wildcards < $end_wildcard);
|
||||
Framework\assertTrue($end_wildcard < $middle_wildcard);
|
||||
Framework\assertTrue($middle_wildcard < $wildcards);
|
||||
}
|
||||
}
|
|
@ -4,6 +4,7 @@
|
|||
*/
|
||||
|
||||
use Vichan\Data\ReportQueries;
|
||||
use Vichan\Data\SearchQueries;
|
||||
|
||||
require dirname(__FILE__) . '/inc/cli.php';
|
||||
|
||||
|
@ -45,9 +46,17 @@ if ($config['cache']['enabled'] === 'fs') {
|
|||
$fs_cache->collect();
|
||||
$delta = microtime(true) - $start;
|
||||
echo "Deleted $deleted_count expired filesystem cache items in $delta seconds!\n";
|
||||
$time_tot = $delta;
|
||||
$time_tot += $delta;
|
||||
$deleted_tot = $deleted_count;
|
||||
}
|
||||
|
||||
echo "Clearing old search log...\n";
|
||||
$search_queries = $ctx->get(SearchQueries::class);
|
||||
$start = microtime(true);
|
||||
$deleted_count = $search_queries->purgeExpired();
|
||||
$delta = microtime(true) - $start;
|
||||
$time_tot += $delta;
|
||||
$deleted_tot = $deleted_count;
|
||||
|
||||
$time_tot = number_format((float)$time_tot, 4, '.', '');
|
||||
modLog("Deleted $deleted_tot expired entries in {$time_tot}s with maintenance tool");
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue