Compare commits
19 commits
3caa94eeea
...
946f836cb5
Author | SHA1 | Date | |
---|---|---|---|
946f836cb5 | |||
e32dfcb51a | |||
92e24dac67 | |||
9b60540e6b | |||
e02fd2a3c4 | |||
4b5ce63f4b | |||
a0d218dbc2 | |||
3238319e26 | |||
ad62e81752 | |||
923d4ef2b9 | |||
47bd9fa127 | |||
6b01a4ad6f | |||
e6d0681d0f | |||
19efb78e92 | |||
ce7be3e0aa | |||
aa7aa4b205 | |||
b4d5f23e78 | |||
a99cc34f7e | |||
48c5f6a4e0 |
10 changed files with 666 additions and 279 deletions
13
inc/Data/FiltersParseResult.php
Normal file
13
inc/Data/FiltersParseResult.php
Normal file
|
@ -0,0 +1,13 @@
|
||||||
|
<?php
|
||||||
|
namespace Vichan\Data;
|
||||||
|
|
||||||
|
|
||||||
|
class FiltersParseResult {
|
||||||
|
public array $body = [];
|
||||||
|
public ?string $subject = null;
|
||||||
|
public ?string $name = null;
|
||||||
|
public ?string $board = null;
|
||||||
|
public ?string $flag = null;
|
||||||
|
public ?int $id = null;
|
||||||
|
public ?int $thread = null;
|
||||||
|
}
|
283
inc/Data/Flags.php
Normal file
283
inc/Data/Flags.php
Normal file
|
@ -0,0 +1,283 @@
|
||||||
|
<?php
|
||||||
|
|
||||||
|
class Flags {
|
||||||
|
/**
|
||||||
|
* Short names of the flags embedded with vichan.
|
||||||
|
*/
|
||||||
|
public const EMBEDDED_FLAGS = [
|
||||||
|
'a1',
|
||||||
|
'a2',
|
||||||
|
'ac',
|
||||||
|
'ad',
|
||||||
|
'ae',
|
||||||
|
'af',
|
||||||
|
'ag',
|
||||||
|
'ai',
|
||||||
|
'al',
|
||||||
|
'am',
|
||||||
|
'an',
|
||||||
|
'ao',
|
||||||
|
'ap',
|
||||||
|
'aq',
|
||||||
|
'ar',
|
||||||
|
'as',
|
||||||
|
'at',
|
||||||
|
'au',
|
||||||
|
'aw',
|
||||||
|
'ax',
|
||||||
|
'az',
|
||||||
|
'ba',
|
||||||
|
'bb',
|
||||||
|
'bd',
|
||||||
|
'be',
|
||||||
|
'bf',
|
||||||
|
'bg',
|
||||||
|
'bh',
|
||||||
|
'bi',
|
||||||
|
'bj',
|
||||||
|
'bl',
|
||||||
|
'bm',
|
||||||
|
'bn',
|
||||||
|
'bo',
|
||||||
|
'bq',
|
||||||
|
'br',
|
||||||
|
'bs',
|
||||||
|
'bt',
|
||||||
|
'bu',
|
||||||
|
'bv',
|
||||||
|
'bw',
|
||||||
|
'by',
|
||||||
|
'bz',
|
||||||
|
'ca',
|
||||||
|
'cat',
|
||||||
|
'cc',
|
||||||
|
'cd',
|
||||||
|
'cf',
|
||||||
|
'cg',
|
||||||
|
'ch',
|
||||||
|
'ci',
|
||||||
|
'ck',
|
||||||
|
'cl',
|
||||||
|
'cm',
|
||||||
|
'cn',
|
||||||
|
'co',
|
||||||
|
'cp',
|
||||||
|
'cr',
|
||||||
|
'cs',
|
||||||
|
'cu',
|
||||||
|
'cv',
|
||||||
|
'cw',
|
||||||
|
'cx',
|
||||||
|
'cy',
|
||||||
|
'cz',
|
||||||
|
'de',
|
||||||
|
'dg',
|
||||||
|
'dj',
|
||||||
|
'dk',
|
||||||
|
'dm',
|
||||||
|
'do',
|
||||||
|
'dz',
|
||||||
|
'ea',
|
||||||
|
'ec',
|
||||||
|
'ee',
|
||||||
|
'eg',
|
||||||
|
'eh',
|
||||||
|
'er',
|
||||||
|
'es',
|
||||||
|
'et',
|
||||||
|
'eu',
|
||||||
|
'fi',
|
||||||
|
'fj',
|
||||||
|
'fk',
|
||||||
|
'fm',
|
||||||
|
'fo',
|
||||||
|
'fr',
|
||||||
|
'fx',
|
||||||
|
'ga',
|
||||||
|
'gb',
|
||||||
|
'gd',
|
||||||
|
'ge',
|
||||||
|
'gf',
|
||||||
|
'gg',
|
||||||
|
'gh',
|
||||||
|
'gi',
|
||||||
|
'gl',
|
||||||
|
'gm',
|
||||||
|
'gn',
|
||||||
|
'gp',
|
||||||
|
'gq',
|
||||||
|
'gr',
|
||||||
|
'gs',
|
||||||
|
'gt',
|
||||||
|
'gu',
|
||||||
|
'gw',
|
||||||
|
'gy',
|
||||||
|
'hk',
|
||||||
|
'hm',
|
||||||
|
'hn',
|
||||||
|
'hr',
|
||||||
|
'ht',
|
||||||
|
'hu',
|
||||||
|
'ic',
|
||||||
|
'id',
|
||||||
|
'ie',
|
||||||
|
'il',
|
||||||
|
'im',
|
||||||
|
'in',
|
||||||
|
'io',
|
||||||
|
'iq',
|
||||||
|
'ir',
|
||||||
|
'is',
|
||||||
|
'it',
|
||||||
|
'je',
|
||||||
|
'jm',
|
||||||
|
'jo',
|
||||||
|
'jp',
|
||||||
|
'ke',
|
||||||
|
'kg',
|
||||||
|
'kh',
|
||||||
|
'ki',
|
||||||
|
'km',
|
||||||
|
'kn',
|
||||||
|
'kp',
|
||||||
|
'kr',
|
||||||
|
'kw',
|
||||||
|
'ky',
|
||||||
|
'kz',
|
||||||
|
'la',
|
||||||
|
'lb',
|
||||||
|
'lc',
|
||||||
|
'li',
|
||||||
|
'lk',
|
||||||
|
'lr',
|
||||||
|
'ls',
|
||||||
|
'lt',
|
||||||
|
'lu',
|
||||||
|
'lv',
|
||||||
|
'ly',
|
||||||
|
'ma',
|
||||||
|
'mc',
|
||||||
|
'md',
|
||||||
|
'me',
|
||||||
|
'mf',
|
||||||
|
'mg',
|
||||||
|
'mh',
|
||||||
|
'mk',
|
||||||
|
'ml',
|
||||||
|
'mm',
|
||||||
|
'mn',
|
||||||
|
'mo',
|
||||||
|
'mp',
|
||||||
|
'mq',
|
||||||
|
'mr',
|
||||||
|
'ms',
|
||||||
|
'mt',
|
||||||
|
'mu',
|
||||||
|
'mv',
|
||||||
|
'mw',
|
||||||
|
'mx',
|
||||||
|
'my',
|
||||||
|
'mz',
|
||||||
|
'na',
|
||||||
|
'nc',
|
||||||
|
'ne',
|
||||||
|
'nf',
|
||||||
|
'ng',
|
||||||
|
'ni',
|
||||||
|
'nl',
|
||||||
|
'no',
|
||||||
|
'np',
|
||||||
|
'nr',
|
||||||
|
'nt',
|
||||||
|
'nu',
|
||||||
|
'nz',
|
||||||
|
'o1',
|
||||||
|
'om',
|
||||||
|
'pa',
|
||||||
|
'pe',
|
||||||
|
'pf',
|
||||||
|
'pg',
|
||||||
|
'ph',
|
||||||
|
'pk',
|
||||||
|
'pl',
|
||||||
|
'pm',
|
||||||
|
'pn',
|
||||||
|
'pr',
|
||||||
|
'ps',
|
||||||
|
'pt',
|
||||||
|
'pw',
|
||||||
|
'py',
|
||||||
|
'qa',
|
||||||
|
're',
|
||||||
|
'ro',
|
||||||
|
'rs',
|
||||||
|
'ru',
|
||||||
|
'rw',
|
||||||
|
'sa',
|
||||||
|
'sb',
|
||||||
|
'sc',
|
||||||
|
'sd',
|
||||||
|
'se',
|
||||||
|
'sf',
|
||||||
|
'sg',
|
||||||
|
'sh',
|
||||||
|
'si',
|
||||||
|
'sj',
|
||||||
|
'sk',
|
||||||
|
'sl',
|
||||||
|
'sm',
|
||||||
|
'sn',
|
||||||
|
'so',
|
||||||
|
'sr',
|
||||||
|
'ss',
|
||||||
|
'st',
|
||||||
|
'su',
|
||||||
|
'sv',
|
||||||
|
'sx',
|
||||||
|
'sy',
|
||||||
|
'sz',
|
||||||
|
'ta',
|
||||||
|
'tc',
|
||||||
|
'td',
|
||||||
|
'tf',
|
||||||
|
'tg',
|
||||||
|
'th',
|
||||||
|
'ti',
|
||||||
|
'tj',
|
||||||
|
'tk',
|
||||||
|
'tl',
|
||||||
|
'tm',
|
||||||
|
'tn',
|
||||||
|
'to',
|
||||||
|
'tp',
|
||||||
|
'tr',
|
||||||
|
'tt',
|
||||||
|
'tv',
|
||||||
|
'tw',
|
||||||
|
'tz',
|
||||||
|
'ua',
|
||||||
|
'ug',
|
||||||
|
'uk',
|
||||||
|
'um',
|
||||||
|
'us',
|
||||||
|
'uy',
|
||||||
|
'uz',
|
||||||
|
'va',
|
||||||
|
'vc',
|
||||||
|
've',
|
||||||
|
'vg',
|
||||||
|
'vi',
|
||||||
|
'vn',
|
||||||
|
'vu',
|
||||||
|
'wf',
|
||||||
|
'ws',
|
||||||
|
'xx',
|
||||||
|
'ye',
|
||||||
|
'yt',
|
||||||
|
'yu',
|
||||||
|
'za',
|
||||||
|
'zm',
|
||||||
|
'zr',
|
||||||
|
'zw',
|
||||||
|
];
|
||||||
|
}
|
|
@ -2,12 +2,31 @@
|
||||||
namespace Vichan\Data;
|
namespace Vichan\Data;
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* POD with the fragments of each filter.
|
||||||
|
*/
|
||||||
class SearchFilters {
|
class SearchFilters {
|
||||||
public array $body = [];
|
/**
|
||||||
public ?string $subject = null;
|
* @var array<array<string>>
|
||||||
public ?string $name = null;
|
*/
|
||||||
public ?string $board = null;
|
public array $body;
|
||||||
public ?string $flag = null;
|
/**
|
||||||
public ?int $id = null;
|
* @var array<string>
|
||||||
public ?int $thread = null;
|
*/
|
||||||
|
public array $subject;
|
||||||
|
/**
|
||||||
|
* @var array<string>
|
||||||
|
*/
|
||||||
|
public array $name;
|
||||||
|
/**
|
||||||
|
* @var array<string>
|
||||||
|
*/
|
||||||
|
public array $board;
|
||||||
|
/**
|
||||||
|
* @var array<string>
|
||||||
|
*/
|
||||||
|
public array $flag;
|
||||||
|
public ?int $id;
|
||||||
|
public ?int $thread;
|
||||||
|
public float $weight;
|
||||||
}
|
}
|
||||||
|
|
|
@ -6,20 +6,25 @@ class SearchQueries {
|
||||||
private \PDO $pdo;
|
private \PDO $pdo;
|
||||||
private int $queries_per_minutes_single;
|
private int $queries_per_minutes_single;
|
||||||
private int $queries_per_minutes_all;
|
private int $queries_per_minutes_all;
|
||||||
|
private bool $auto_gc;
|
||||||
|
|
||||||
|
|
||||||
private function checkFloodImpl(string $ip, string $phrase): bool {
|
private function checkFloodImpl(string $ip, string $phrase): bool {
|
||||||
$now = time();
|
$now = \time();
|
||||||
|
$expiry_limit = \time() - ($this->queries_per_minutes_all * 60);
|
||||||
|
|
||||||
$query = $this->pdo->prepare("SELECT COUNT(*) FROM `search_queries` WHERE `ip` = :ip AND `time` > :time");
|
$query = $this->pdo->prepare("SELECT COUNT(*) FROM `search_queries` WHERE `ip` = :ip AND `time` > :time AND `time` <= :expiry_limit");
|
||||||
$query->bindValue(':ip', $ip);
|
$query->bindValue(':ip', $ip);
|
||||||
$query->bindValue(':time', $now - ($this->queries_per_minutes_single * 60));
|
$query->bindValue(':time', $now - ($this->queries_per_minutes_single * 60), \PDO::PARAM_INT);
|
||||||
|
$query->bindValue(':expiry_limit', $expiry_limit, \PDO::PARAM_INT);
|
||||||
$query->execute();
|
$query->execute();
|
||||||
if ($query->fetchColumn() > $this->queries_per_minutes_single) {
|
if ($query->fetchColumn() > $this->queries_per_minutes_single) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
$query = $this->pdo->prepare("SELECT COUNT(*) FROM `search_queries` WHERE `time` > :time");
|
$query = $this->pdo->prepare("SELECT COUNT(*) FROM `search_queries` WHERE `time` > :time AND `time` <= :expiry_limit");
|
||||||
$query->bindValue(':time', $now - ($this->queries_per_minutes_all * 60));
|
$query->bindValue(':time', $now - ($this->queries_per_minutes_all * 60), \PDO::PARAM_INT);
|
||||||
|
$query->bindValue(':expiry_limit', $expiry_limit, \PDO::PARAM_INT);
|
||||||
$query->execute();
|
$query->execute();
|
||||||
if ($query->fetchColumn() > $this->queries_per_minutes_all) {
|
if ($query->fetchColumn() > $this->queries_per_minutes_all) {
|
||||||
return false;
|
return false;
|
||||||
|
@ -27,24 +32,31 @@ class SearchQueries {
|
||||||
|
|
||||||
$query = $this->pdo->prepare("INSERT INTO `search_queries` VALUES (:ip, :time, :query)");
|
$query = $this->pdo->prepare("INSERT INTO `search_queries` VALUES (:ip, :time, :query)");
|
||||||
$query->bindValue(':ip', $ip);
|
$query->bindValue(':ip', $ip);
|
||||||
$query->bindValue(':time', $now);
|
$query->bindValue(':time', $now, \PDO::PARAM_INT);
|
||||||
$query->bindValue(':query', $phrase);
|
$query->bindValue(':query', $phrase);
|
||||||
$query->execute();
|
$query->execute();
|
||||||
|
|
||||||
// Cleanup search queries table
|
if ($this->auto_gc) {
|
||||||
$query = prepare("DELETE FROM `search_queries` WHERE `time` <= :time");
|
$this->purgeExpired();
|
||||||
$query->bindValue(':time', time() - ($this->queries_per_minutes_all * 60));
|
}
|
||||||
$query->execute();
|
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
public function __construct(\PDO $pdo, int $queries_per_minutes_single, int $queries_per_minutes_all) {
|
public function __construct(\PDO $pdo, int $queries_per_minutes_single, int $queries_per_minutes_all, bool $auto_gc) {
|
||||||
$this->pdo = $pdo;
|
$this->pdo = $pdo;
|
||||||
$this->queries_per_minutes_single = $queries_per_minutes_single;
|
$this->queries_per_minutes_single = $queries_per_minutes_single;
|
||||||
$this->queries_per_minutes_all = $queries_per_minutes_all;
|
$this->queries_per_minutes_all = $queries_per_minutes_all;
|
||||||
|
$this->auto_gc = $auto_gc;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Check if the IP-query pair overflows the limit.
|
||||||
|
*
|
||||||
|
* @param string $ip Source IP.
|
||||||
|
* @param string $phrase The search query.
|
||||||
|
* @return bool True if the request goes over the limit
|
||||||
|
*/
|
||||||
public function checkFlood(string $ip, string $phrase): bool {
|
public function checkFlood(string $ip, string $phrase): bool {
|
||||||
$this->pdo->beginTransaction();
|
$this->pdo->beginTransaction();
|
||||||
try {
|
try {
|
||||||
|
@ -56,4 +68,12 @@ class SearchQueries {
|
||||||
throw $e;
|
throw $e;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public function purgeExpired(): int {
|
||||||
|
// Cleanup search queries table.
|
||||||
|
$query = prepare("DELETE FROM `search_queries` WHERE `time` <= :expiry_limit");
|
||||||
|
$query->bindValue(':expiry_limit', \time() - ($this->queries_per_minutes_all * 60), \PDO::PARAM_INT);
|
||||||
|
$query->execute();
|
||||||
|
return $query->rowCount();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -2,7 +2,7 @@
|
||||||
namespace Vichan\Service;
|
namespace Vichan\Service;
|
||||||
|
|
||||||
use Vichan\Data\Driver\LogDriver;
|
use Vichan\Data\Driver\LogDriver;
|
||||||
use Vichan\Data\{UserPostQueries, SearchFilters, SearchFiltersWeighted};
|
use Vichan\Data\{FiltersParseResult, UserPostQueries, SearchFilters, SearchQueries};
|
||||||
|
|
||||||
|
|
||||||
class SearchService {
|
class SearchService {
|
||||||
|
@ -33,10 +33,12 @@ class SearchService {
|
||||||
|
|
||||||
private LogDriver $log;
|
private LogDriver $log;
|
||||||
private UserPostQueries $user_queries;
|
private UserPostQueries $user_queries;
|
||||||
|
private SearchQueries $search_queries;
|
||||||
private ?array $flag_map;
|
private ?array $flag_map;
|
||||||
private float $max_weight;
|
private float $max_weight;
|
||||||
private int $max_query_length;
|
private int $max_query_length;
|
||||||
private int $post_limit;
|
private int $post_limit;
|
||||||
|
private array $searchable_board_uris;
|
||||||
|
|
||||||
|
|
||||||
private static function truncateQuery(string $text, int $byteLimit): ?string {
|
private static function truncateQuery(string $text, int $byteLimit): ?string {
|
||||||
|
@ -62,42 +64,35 @@ class SearchService {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
private static function trimEnd(string $str): string {
|
private static function trim(string $str): string {
|
||||||
return \rtrim($str, "* \n\r\t\v\0");
|
return \trim($str, "* \n\r\t\v\0");
|
||||||
}
|
}
|
||||||
|
|
||||||
private function sanitizeAndTransform(string $str): array {
|
private static function unescape(string $str): string {
|
||||||
// Escape UserQueries's wildcards.
|
return \strtr($str, [
|
||||||
$str = $this->user_queries->escapeSearchPosts($str);
|
|
||||||
// Coalesce multiple wildcards.
|
|
||||||
$wildcard_count = 0;
|
|
||||||
$str = \preg_replace_callback('/(?:\\\\\\\\)*\\\\\*|(?:\\\\\\\\)*\*+/', function($match) use (&$wildcard_count) {
|
|
||||||
$wildcard_count++;
|
|
||||||
return UserPostQueries::SEARCH_POSTS_WILDCARD;
|
|
||||||
}, $str);
|
|
||||||
// Query is too broad.
|
|
||||||
if ($str === UserPostQueries::SEARCH_POSTS_WILDCARD) {
|
|
||||||
return [ null, 0 ];
|
|
||||||
}
|
|
||||||
// Unescape.
|
|
||||||
$str = \strtr($str, [
|
|
||||||
'\\\\' => '\\',
|
'\\\\' => '\\',
|
||||||
'\\*' => '*',
|
'\\*' => '*',
|
||||||
'\\"' => '"'
|
'\\"' => '"'
|
||||||
]);
|
]);
|
||||||
|
|
||||||
return [ $str, $wildcard_count ];
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private static function weightByContent(string $str): float {
|
/**
|
||||||
$w = 1;
|
* Split the filter into fragments along the wildcards, handling escaping.
|
||||||
|
*
|
||||||
|
* @param string $str The full filter.
|
||||||
|
* @return array<string>
|
||||||
|
*/
|
||||||
|
private static function split(string $str): array {
|
||||||
|
// Split the fragments
|
||||||
|
return \preg_split('/(?:\\\\\\\\)*\\\\\*|(?:\\\\\\\\)*\*+/', $str);
|
||||||
|
}
|
||||||
|
|
||||||
// Count common and short words.
|
private static function weightByContent(array $fragments): float {
|
||||||
$trim = \trim($str, UserPostQueries::SEARCH_POSTS_WILDCARD . " \n\r\t\v\0");
|
$w = 0;
|
||||||
$words = \explode(' ', $trim);
|
|
||||||
foreach ($words as $word) {
|
foreach ($fragments as $fragment) {
|
||||||
$short = \strlen($word) < 4;
|
$short = \strlen($fragment) < 4;
|
||||||
if (\in_array($word, self::COMMON_WORDS)) {
|
if (\in_array($fragment, self::COMMON_WORDS)) {
|
||||||
$w += $short ? 16 : 6;
|
$w += $short ? 16 : 6;
|
||||||
} elseif ($short) {
|
} elseif ($short) {
|
||||||
$w += 6;
|
$w += 6;
|
||||||
|
@ -107,24 +102,54 @@ class SearchService {
|
||||||
return $w;
|
return $w;
|
||||||
}
|
}
|
||||||
|
|
||||||
private static function weightByWildcards(string $str, int $wildcards): float {
|
private static function filterAndWeight(string $filter): array {
|
||||||
// Wildcards over the total length of the word.
|
$fragments = self::split($filter);
|
||||||
$perc = $wildcards / \strlen($str) * 100;
|
|
||||||
return $perc + $wildcards * 2;
|
|
||||||
}
|
|
||||||
|
|
||||||
private function matchFlag(string $query): array {
|
|
||||||
$query = \preg_quote($query);
|
|
||||||
$query = \str_replace('\\*', '.*', $query);
|
|
||||||
$regex = "/^*$query*$/i";
|
|
||||||
|
|
||||||
$acc = [];
|
$acc = [];
|
||||||
foreach ($this->flag_map as $key => $value) {
|
$total_len = 0;
|
||||||
if (\preg_match($regex, $value)) {
|
|
||||||
$acc[] = $key;
|
foreach ($fragments as $fragment) {
|
||||||
|
$fragment = self::trim(self::unescape($fragment));
|
||||||
|
|
||||||
|
if (!empty($fragment)) {
|
||||||
|
$total_len += \strlen($fragment);
|
||||||
|
$acc[] = $fragment;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return $acc;
|
|
||||||
|
// Interword wildcards
|
||||||
|
$interword = \min(\count($fragments) - 1, 0);
|
||||||
|
// Wildcards over the total length of the word. Ergo the number of fragments minus 1.
|
||||||
|
$perc = $interword / $total_len * 100;
|
||||||
|
$wildcard_weight = $perc + \count($fragments) * 2;
|
||||||
|
|
||||||
|
return [ $acc, $total_len, $wildcard_weight ];
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Gets a subset of the flags which match every filter.
|
||||||
|
*
|
||||||
|
* @param array<string> $fragments User provided fragments to search in the flags.
|
||||||
|
* @param array<string> $flags An array of flags.
|
||||||
|
* @return array<string> An array of flags
|
||||||
|
*/
|
||||||
|
private static function matchFlags(array $flags, array $fragments): array {
|
||||||
|
return \array_filter($flags, function ($str) use ($fragments) {
|
||||||
|
// Saves the last position. We use this to ensure the fragments are one after the other.
|
||||||
|
$last_ret = 0;
|
||||||
|
foreach ($fragments as $fragment) {
|
||||||
|
if ($last_ret + 1 > \strlen($fragment)) {
|
||||||
|
// Cannot possibly match.
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
$last_ret = \stripos($str, $fragment, $last_ret + 1);
|
||||||
|
if ($last_ret === false) {
|
||||||
|
// Exclude flags that don't much even a single fragment.
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -140,9 +165,9 @@ class SearchService {
|
||||||
* - id: post id, must be numeric
|
* - id: post id, must be numeric
|
||||||
* - thread: thread id, must be numeric
|
* - thread: thread id, must be numeric
|
||||||
* The remaining text is split into chunks and searched in the post body.
|
* The remaining text is split into chunks and searched in the post body.
|
||||||
* @return SearchFilters
|
* @return FiltersParseResult
|
||||||
*/
|
*/
|
||||||
public function parse(string $raw_query): SearchFilters {
|
public function parse(string $raw_query): FiltersParseResult{
|
||||||
$tres = self::truncateQuery($raw_query, $this->max_query_length);
|
$tres = self::truncateQuery($raw_query, $this->max_query_length);
|
||||||
if ($tres === null) {
|
if ($tres === null) {
|
||||||
throw new \RuntimeException('Could not truncate query');
|
throw new \RuntimeException('Could not truncate query');
|
||||||
|
@ -179,7 +204,7 @@ class SearchService {
|
||||||
throw new \RuntimeException('Could not decode the query');
|
throw new \RuntimeException('Could not decode the query');
|
||||||
}
|
}
|
||||||
|
|
||||||
$filters = new SearchFilters();
|
$filters = new FiltersParseResult();
|
||||||
|
|
||||||
foreach ($matches as $m) {
|
foreach ($matches as $m) {
|
||||||
if (!empty($m[1])) {
|
if (!empty($m[1])) {
|
||||||
|
@ -219,90 +244,90 @@ class SearchService {
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
* @param LogDriver $log Log river.
|
||||||
* @param UserPostQueries $user_queries User posts queries.
|
* @param UserPostQueries $user_queries User posts queries.
|
||||||
* @param ?flag_map $max_flag_length The key-value map of user flags, or null to disable flag search.
|
* @param SearchQueries $search_queries Search queries for flood detection.
|
||||||
|
* @param ?array $flag_map The key-value map of user flags, or null to disable flag search.
|
||||||
|
* @param float $max_weight The maximum weight of the parsed user query. Body filters that go beyond this limit are discarded.
|
||||||
|
* @param int $max_query_length Maximum length of the raw input query before it's truncated.
|
||||||
|
* @param int $post_limit Maximum number of results.
|
||||||
|
* @param ?array $searchable_board_uris The uris of the board that can be searched. Null to search all the boards.
|
||||||
*/
|
*/
|
||||||
public function __construct(LogDriver $log, UserPostQueries $user_queries, ?array $flag_map, float $max_weight, int $max_query_length, int $post_limit) {
|
public function __construct(
|
||||||
|
LogDriver $log,
|
||||||
|
UserPostQueries $user_queries,
|
||||||
|
?array $flag_map,
|
||||||
|
float $max_weight,
|
||||||
|
int $max_query_length,
|
||||||
|
int $post_limit,
|
||||||
|
?array $searchable_board_uris
|
||||||
|
) {
|
||||||
$this->log = $log;
|
$this->log = $log;
|
||||||
$this->user_queries = $user_queries;
|
$this->user_queries = $user_queries;
|
||||||
$this->flag_map = $flag_map;
|
$this->flag_map = $flag_map;
|
||||||
$this->max_weight = $max_weight;
|
$this->max_weight = $max_weight;
|
||||||
$this->max_query_length = $max_query_length;
|
$this->max_query_length = $max_query_length;
|
||||||
$this->post_limit = $post_limit;
|
$this->post_limit = $post_limit;
|
||||||
|
$this->searchable_board_uris = $searchable_board_uris ?? listBoards(true);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Reduces the user provided filters and assigns them a total weight.
|
* Reduces the user provided filters and assigns them a total weight.
|
||||||
*
|
*
|
||||||
* @param SearchFilters $filters The filters to sanitize, reduce and weight.
|
* @param FiltersParseResult $filters The filters to sanitize, reduce and weight.
|
||||||
* @return SearchFiltersWeighted
|
* @return SearchFilters
|
||||||
*/
|
*/
|
||||||
public function reduceAndWeight(SearchFilters $filters): SearchFiltersWeighted {
|
public function reduceAndWeight(FiltersParseResult $filters): SearchFilters {
|
||||||
$weighted = new SearchFiltersWeighted();
|
$weighted = new SearchFilters();
|
||||||
|
|
||||||
if ($filters->subject !== null) {
|
if ($filters->subject !== null) {
|
||||||
if (\strlen($filters->subject) > self::MAX_LENGTH_SUBJECT) {
|
list($fragments, $total_len, $wildcard_weight) = self::filterAndWeight($filters->subject);
|
||||||
$filters->subject = null;
|
|
||||||
|
if ($total_len > self::MAX_LENGTH_SUBJECT) {
|
||||||
|
$weighted->subject = [];
|
||||||
} else {
|
} else {
|
||||||
list($str, $wildcards) = $this->sanitizeAndTransform($filters->subject);
|
$weighted->subject = $fragments;
|
||||||
if ($str === null) {
|
$weighted->weight = $wildcard_weight;
|
||||||
$filters->subject = null;
|
|
||||||
} else {
|
|
||||||
$str = self::trimEnd($str);
|
|
||||||
$weighted->weight += self::weightByWildcards($str, $wildcards);
|
|
||||||
$filters->subject = $str;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if ($filters->name !== null) {
|
if ($filters->name !== null) {
|
||||||
if (\strlen($filters->name) > self::MAX_LENGTH_NAME) {
|
list($fragments, $total_len, $wildcard_weight) = self::filterAndWeight($filters->name);
|
||||||
$filters->name = null;
|
|
||||||
|
if ($total_len > self::MAX_LENGTH_NAME) {
|
||||||
|
$weighted->name = [];
|
||||||
} else {
|
} else {
|
||||||
list($str, $wildcards) = $this->sanitizeAndTransform($filters->name);
|
$weighted->name = $fragments;
|
||||||
if ($str === null) {
|
$weighted->weight += $wildcard_weight;
|
||||||
$filters->name = null;
|
|
||||||
} else {
|
|
||||||
$str = self::trimEnd($str);
|
|
||||||
$weighted->weight += self::weightByWildcards($str, $wildcards);
|
|
||||||
$filters->name = $str;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if ($filters->flag !== null) {
|
if ($filters->flag !== null) {
|
||||||
$max_flag_length = \array_reduce($this->flag_map, function($current_max, $str) {
|
$weighted->flag = [];
|
||||||
return \max($current_max, \strlen($str));
|
|
||||||
}, 0);
|
if ($this->flag_map !== null && !empty($this->flag_map)) {
|
||||||
|
$max_flag_length = \array_reduce($this->flag_map, fn($max, $str) => \max($max, \strlen($str)), 0);
|
||||||
|
|
||||||
|
list($fragments, $total_len, $wildcard_weight) = self::filterAndWeight($filters->flag);
|
||||||
|
|
||||||
if ($this->flag_map === null
|
|
||||||
|| empty($this->flag_map)
|
|
||||||
// Add 2 to account for possible wildcards on the ends.
|
// Add 2 to account for possible wildcards on the ends.
|
||||||
|| \strlen($filters->flag) > $max_flag_length + 2) {
|
if ($total_len <= $max_flag_length + 2) {
|
||||||
$filters->flag = null;
|
$weighted->flag = $fragments;
|
||||||
} else {
|
$weighted->weight += $wildcard_weight;
|
||||||
$str = \trim($str);
|
|
||||||
$weighted->weight += self::weightByWildcards($str, $wildcards);
|
|
||||||
$filters->flag = $str;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if ($filters->body !== null) {
|
|
||||||
$acc = [];
|
|
||||||
foreach ($filters->body as $str) {
|
|
||||||
$str = self::trimEnd($str);
|
|
||||||
list($str, $wildcards) = $this->sanitizeAndTransform($str);
|
|
||||||
|
|
||||||
if ($str !== null && !empty($str)) {
|
|
||||||
$w_content = self::weightByContent($str);
|
|
||||||
$w_wildcards = self::weightByWildcards($str, $wildcards);
|
|
||||||
|
|
||||||
$w = $w_content + $w_wildcards;
|
|
||||||
if ($w + $weighted->weight <= $this->max_weight) {
|
|
||||||
$weighted->weight += $w;
|
|
||||||
$acc[] = $str;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
$weighted->id = $filters->id;
|
||||||
|
$weighted->thread = $filters->thread;
|
||||||
|
if ($filters->body !== null) {
|
||||||
|
foreach ($filters->body as $str) {
|
||||||
|
list($fragments, $total_len, $wildcard_weight) = self::filterAndWeight($str);
|
||||||
|
$content_weight = self::weightByContent($fragments);
|
||||||
|
$str_weight = $content_weight + $wildcard_weight;
|
||||||
|
|
||||||
$filters->body = $acc;
|
if ($str_weight + $weighted->weight <= $this->max_weight) {
|
||||||
|
$weighted->weight += $str_weight;
|
||||||
|
$filters->body[] = $fragments;
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return $weighted;
|
return $weighted;
|
||||||
|
@ -311,18 +336,17 @@ class SearchService {
|
||||||
/**
|
/**
|
||||||
* Run a search on user posts with the given filters.
|
* Run a search on user posts with the given filters.
|
||||||
*
|
*
|
||||||
* @param SearchFiltersWeighted $filters An array of filters made by {@see self::parse()}.
|
* @param SearchFilters $filters An array of filters made by {@see self::parse()}.
|
||||||
* @param ?string $fallback_board Fallback board if there isn't a board filter.
|
* @param ?string $fallback_board Fallback board if there isn't a board filter.
|
||||||
* @return array Data array straight from the PDO, with all the fields in posts.sql
|
* @return array Data array straight from the PDO, with all the fields in posts.sql
|
||||||
*/
|
*/
|
||||||
public function search(string $ip, string $raw_query, SearchFiltersWeighted $filters, ?string $fallback_board): array {
|
public function search(string $ip, string $raw_query, SearchFilters $filters, ?string $fallback_board): array {
|
||||||
$board = $filters->board ?? $fallback_board;
|
$board = $filters->board ?? $fallback_board;
|
||||||
if ($board === null) {
|
if ($board === null) {
|
||||||
return [];
|
return [];
|
||||||
}
|
}
|
||||||
|
|
||||||
$valid_uris = listBoards(true);
|
if (!\in_array($board, $this->searchable_board_uris)) {
|
||||||
if (!\in_array($board, $valid_uris)) {
|
|
||||||
return [];
|
return [];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -334,7 +358,7 @@ class SearchService {
|
||||||
$this->log->log(LogDriver::INFO, "$ip search: weight $weight_perc ({$filters->weight}) query '$raw_query'");
|
$this->log->log(LogDriver::INFO, "$ip search: weight $weight_perc ({$filters->weight}) query '$raw_query'");
|
||||||
}
|
}
|
||||||
|
|
||||||
$flags = $filters->flag !== null ? $this->matchFlag($filters->flag) : null;
|
$flags = $filters->flag !== null ? $this->matchFlags($this->flag_map, $filters->flag) : null;
|
||||||
|
|
||||||
return $this->user_queries->searchPosts(
|
return $this->user_queries->searchPosts(
|
||||||
$board,
|
$board,
|
||||||
|
@ -347,4 +371,22 @@ class SearchService {
|
||||||
$this->post_limit
|
$this->post_limit
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Check if the IP-query pair passes the limit.
|
||||||
|
*
|
||||||
|
* @param string $ip Source IP.
|
||||||
|
* @param string $phrase The search query.
|
||||||
|
* @return bool True if the request goes over the limit.
|
||||||
|
*/
|
||||||
|
public function checkFlood(string $ip, string $raw_query) {
|
||||||
|
return $this->search_queries->checkFlood($ip, $raw_query);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns the uris of the boards that may be searched.
|
||||||
|
*/
|
||||||
|
public function getSearchableBoards(): array {
|
||||||
|
return $this->searchable_board_uris;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -1856,7 +1856,15 @@
|
||||||
// Limit of search results
|
// Limit of search results
|
||||||
$config['search']['search_limit'] = 100;
|
$config['search']['search_limit'] = 100;
|
||||||
|
|
||||||
// Boards for searching
|
// Maximum weigth of the search query.
|
||||||
|
// Body search filters are discarded if they make the query heavier than this.
|
||||||
|
$config['search']['max_weight'] = 80;
|
||||||
|
|
||||||
|
// Maximum length of the user sent search query.
|
||||||
|
// Characters beyond the limit are truncated and ignored.
|
||||||
|
$config['search']['max_length'] = 768;
|
||||||
|
|
||||||
|
// Uncomment to limit the search feature to the given boards by uri.
|
||||||
//$config['search']['boards'] = array('a', 'b', 'c', 'd', 'e');
|
//$config['search']['boards'] = array('a', 'b', 'c', 'd', 'e');
|
||||||
|
|
||||||
// Enable public logs? 0: NO, 1: YES, 2: YES, but drop names
|
// Enable public logs? 0: NO, 1: YES, 2: YES, but drop names
|
||||||
|
|
|
@ -1,8 +1,10 @@
|
||||||
<?php
|
<?php
|
||||||
namespace Vichan;
|
namespace Vichan;
|
||||||
|
|
||||||
use Vichan\Data\{IpNoteQueries, ReportQueries, UserPostQueries};
|
use Flags;
|
||||||
|
use Vichan\Data\{IpNoteQueries, ReportQueries, SearchQueries, UserPostQueries};
|
||||||
use Vichan\Data\Driver\{CacheDriver, ErrorLogLogDriver, FileLogDriver, LogDriver, StderrLogDriver, SyslogLogDriver};
|
use Vichan\Data\Driver\{CacheDriver, ErrorLogLogDriver, FileLogDriver, LogDriver, StderrLogDriver, SyslogLogDriver};
|
||||||
|
use Vichan\Service\SearchService;
|
||||||
|
|
||||||
defined('TINYBOARD') or exit;
|
defined('TINYBOARD') or exit;
|
||||||
|
|
||||||
|
@ -69,6 +71,28 @@ function build_context(array $config): Context {
|
||||||
sql_open();
|
sql_open();
|
||||||
return $pdo;
|
return $pdo;
|
||||||
},
|
},
|
||||||
|
SearchService::class => function($c) {
|
||||||
|
$config = $c->get('config');
|
||||||
|
if ($config['user_flag']) {
|
||||||
|
$flags = $config['user_flags'];
|
||||||
|
} elseif ($config['country_flags']) {
|
||||||
|
$flags = Flags::EMBEDDED_FLAGS;
|
||||||
|
} else {
|
||||||
|
$flags = null;
|
||||||
|
}
|
||||||
|
|
||||||
|
$board_uris = $config['search']['boards'] ?? null;
|
||||||
|
|
||||||
|
return new SearchService(
|
||||||
|
$c->get(LogDriver::class),
|
||||||
|
$c->get(UserPostQueries::class),
|
||||||
|
$flags,
|
||||||
|
$config['search']['max_weight'],
|
||||||
|
$config['search']['max_length'],
|
||||||
|
$config['search']['search_limit'],
|
||||||
|
$board_uris
|
||||||
|
);
|
||||||
|
},
|
||||||
ReportQueries::class => function($c) {
|
ReportQueries::class => function($c) {
|
||||||
$auto_maintenance = (bool)$c->get('config')['auto_maintenance'];
|
$auto_maintenance = (bool)$c->get('config')['auto_maintenance'];
|
||||||
$pdo = $c->get(\PDO::class);
|
$pdo = $c->get(\PDO::class);
|
||||||
|
@ -78,5 +102,14 @@ function build_context(array $config): Context {
|
||||||
return new UserPostQueries($c->get(\PDO::class));
|
return new UserPostQueries($c->get(\PDO::class));
|
||||||
},
|
},
|
||||||
IpNoteQueries::class => fn($c) => new IpNoteQueries($c->get(\PDO::class), $c->get(CacheDriver::class)),
|
IpNoteQueries::class => fn($c) => new IpNoteQueries($c->get(\PDO::class), $c->get(CacheDriver::class)),
|
||||||
|
SearchQueries::class => function($c) {
|
||||||
|
$config = $c->get('config');
|
||||||
|
return new SearchQueries(
|
||||||
|
$c->get(\PDO::class),
|
||||||
|
$config['search']['queries_per_minutes'],
|
||||||
|
$config['search']['queries_per_minutes_all'],
|
||||||
|
$config['auto_maintenance']
|
||||||
|
);
|
||||||
|
}
|
||||||
]);
|
]);
|
||||||
}
|
}
|
||||||
|
|
200
search.php
200
search.php
|
@ -1,174 +1,70 @@
|
||||||
<?php
|
<?php
|
||||||
|
|
||||||
|
use Vichan\Service\SearchService;
|
||||||
|
|
||||||
require 'inc/bootstrap.php';
|
require 'inc/bootstrap.php';
|
||||||
|
|
||||||
if (!$config['search']['enable']) {
|
if (!$config['search']['enable']) {
|
||||||
die(_("Post search is disabled"));
|
die(_("Post search is disabled"));
|
||||||
}
|
}
|
||||||
|
|
||||||
$queries_per_minutes = $config['search']['queries_per_minutes'];
|
$ctx = Vichan\build_context($config);
|
||||||
$queries_per_minutes_all = $config['search']['queries_per_minutes_all'];
|
$search_service = $ctx->get(SearchService::class);
|
||||||
$search_limit = $config['search']['search_limit'];
|
|
||||||
|
|
||||||
if (isset($config['search']['boards'])) {
|
if (isset($_GET['search']) && !empty($_GET['search'])) {
|
||||||
$boards = $config['search']['boards'];
|
$raw_search = $_GET['search'];
|
||||||
} else {
|
$ip = $_SERVER['REMOTE_ADDR'];
|
||||||
$boards = listBoards(TRUE);
|
$fallback_board = (isset($_GET['board']) && !empty($_GET['board'])) ? $_GET['board'] : null;
|
||||||
}
|
|
||||||
|
|
||||||
$body = Element('search_form.html', Array('boards' => $boards, 'board' => isset($_GET['board']) ? $_GET['board'] : false, 'search' => isset($_GET['search']) ? str_replace('"', '"', utf8tohtml($_GET['search'])) : false));
|
|
||||||
|
|
||||||
if (isset($_GET['search']) && !empty($_GET['search']) && isset($_GET['board']) && in_array($_GET['board'], $boards)) {
|
if ($search_service->checkFlood($ip, $raw_search)) {
|
||||||
$phrase = $_GET['search'];
|
|
||||||
$_body = '';
|
|
||||||
|
|
||||||
$query = prepare("SELECT COUNT(*) FROM ``search_queries`` WHERE `ip` = :ip AND `time` > :time");
|
|
||||||
$query->bindValue(':ip', $_SERVER['REMOTE_ADDR']);
|
|
||||||
$query->bindValue(':time', time() - ($queries_per_minutes[1] * 60));
|
|
||||||
$query->execute() or error(db_error($query));
|
|
||||||
if ($query->fetchColumn() > $queries_per_minutes[0])
|
|
||||||
error(_('Wait a while before searching again, please.'));
|
error(_('Wait a while before searching again, please.'));
|
||||||
|
|
||||||
$query = prepare("SELECT COUNT(*) FROM ``search_queries`` WHERE `time` > :time");
|
|
||||||
$query->bindValue(':time', time() - ($queries_per_minutes_all[1] * 60));
|
|
||||||
$query->execute() or error(db_error($query));
|
|
||||||
if ($query->fetchColumn() > $queries_per_minutes_all[0])
|
|
||||||
error(_('Wait a while before searching again, please.'));
|
|
||||||
|
|
||||||
|
|
||||||
$query = prepare("INSERT INTO ``search_queries`` VALUES (:ip, :time, :query)");
|
|
||||||
$query->bindValue(':ip', $_SERVER['REMOTE_ADDR']);
|
|
||||||
$query->bindValue(':time', time());
|
|
||||||
$query->bindValue(':query', $phrase);
|
|
||||||
$query->execute() or error(db_error($query));
|
|
||||||
|
|
||||||
_syslog(LOG_NOTICE, 'Searched /' . $_GET['board'] . '/ for "' . $phrase . '"');
|
|
||||||
|
|
||||||
// Cleanup search queries table
|
|
||||||
$query = prepare("DELETE FROM ``search_queries`` WHERE `time` <= :time");
|
|
||||||
$query->bindValue(':time', time() - ($queries_per_minutes_all[1] * 60));
|
|
||||||
$query->execute() or error(db_error($query));
|
|
||||||
|
|
||||||
openBoard($_GET['board']);
|
|
||||||
|
|
||||||
$filters = Array();
|
|
||||||
|
|
||||||
function search_filters($m) {
|
|
||||||
global $filters;
|
|
||||||
$name = $m[2];
|
|
||||||
$value = isset($m[4]) ? $m[4] : $m[3];
|
|
||||||
|
|
||||||
if (!in_array($name, array('id', 'thread', 'subject', 'name'))) {
|
|
||||||
// unknown filter
|
|
||||||
return $m[0];
|
|
||||||
}
|
|
||||||
|
|
||||||
$filters[$name] = $value;
|
|
||||||
|
|
||||||
return $m[1];
|
|
||||||
}
|
}
|
||||||
|
|
||||||
$phrase = trim(preg_replace_callback('/(^|\s)(\w+):("(.*)?"|[^\s]*)/', 'search_filters', $phrase));
|
// Actually do the search.
|
||||||
|
$parse_res = $search_service->parse($raw_search);
|
||||||
|
$filters = $search_service->reduceAndWeight($parse_res);
|
||||||
|
$search_res = $search_service->search($ip, $raw_search, $filters, $fallback_board);
|
||||||
|
|
||||||
if (!preg_match('/[^*^\s]/', $phrase) && empty($filters)) {
|
|
||||||
_syslog(LOG_WARNING, 'Query too broad.');
|
|
||||||
$body .= '<p class="unimportant" style="text-align:center">(Query too broad.)</p>';
|
|
||||||
echo Element('page.html', Array(
|
|
||||||
'config'=>$config,
|
|
||||||
'title'=>'Search',
|
|
||||||
'body'=>$body,
|
|
||||||
));
|
|
||||||
exit;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Escape escape character
|
// Needed to set a global variable further down the stack, plus the template.
|
||||||
$phrase = str_replace('!', '!!', $phrase);
|
$actual_board = $filter->board ?? $fallback_board;
|
||||||
|
|
||||||
// Remove SQL wildcard
|
$body = Element('search_form.html', [
|
||||||
$phrase = str_replace('%', '!%', $phrase);
|
'boards' => $search_service->getSearchableBoards(),
|
||||||
|
'board' => $_GET['board'],
|
||||||
|
'search' => \str_replace('"', '"', utf8tohtml($_GET['search']))
|
||||||
|
]);
|
||||||
|
|
||||||
// Use asterisk as wildcard to suit convention
|
if (empty($search_res)) {
|
||||||
$phrase = str_replace('*', '%', $phrase);
|
$body .= '<hr/><p style="text-align:center" class="unimportant">(' . _('No results.') . ')</p>';
|
||||||
|
|
||||||
// Remove `, it's used by table prefix magic
|
|
||||||
$phrase = str_replace('`', '!`', $phrase);
|
|
||||||
|
|
||||||
$like = '';
|
|
||||||
$match = Array();
|
|
||||||
|
|
||||||
// Find exact phrases
|
|
||||||
if (preg_match_all('/"(.+?)"/', $phrase, $m)) {
|
|
||||||
foreach($m[1] as &$quote) {
|
|
||||||
$phrase = str_replace("\"{$quote}\"", '', $phrase);
|
|
||||||
$match[] = $pdo->quote($quote);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
$words = explode(' ', $phrase);
|
|
||||||
foreach($words as &$word) {
|
|
||||||
if (empty($word)) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
$match[] = $pdo->quote($word);
|
|
||||||
}
|
|
||||||
|
|
||||||
$like = '';
|
|
||||||
foreach($match as &$phrase) {
|
|
||||||
if (!empty($like)) {
|
|
||||||
$like .= ' AND ';
|
|
||||||
}
|
|
||||||
$phrase = preg_replace('/^\'(.+)\'$/', '\'%$1%\'', $phrase);
|
|
||||||
$like .= '`body` LIKE ' . $phrase . ' ESCAPE \'!\'';
|
|
||||||
}
|
|
||||||
|
|
||||||
foreach($filters as $name => $value) {
|
|
||||||
if (!empty($like)) {
|
|
||||||
$like .= ' AND ';
|
|
||||||
}
|
|
||||||
$like .= '`' . $name . '` = '. $pdo->quote($value);
|
|
||||||
}
|
|
||||||
|
|
||||||
$like = str_replace('%', '%%', $like);
|
|
||||||
|
|
||||||
$query = prepare(sprintf("SELECT * FROM ``posts_%s`` WHERE " . $like . " ORDER BY `time` DESC LIMIT :limit", $board['uri']));
|
|
||||||
$query->bindValue(':limit', $search_limit, PDO::PARAM_INT);
|
|
||||||
$query->execute() or error(db_error($query));
|
|
||||||
|
|
||||||
if ($query->rowCount() == $search_limit) {
|
|
||||||
_syslog(LOG_WARNING, 'Query too broad.');
|
|
||||||
$body .= '<p class="unimportant" style="text-align:center">('._('Query too broad.').')</p>';
|
|
||||||
echo Element('page.html', Array(
|
|
||||||
'config'=>$config,
|
|
||||||
'title'=>'Search',
|
|
||||||
'body'=>$body,
|
|
||||||
));
|
|
||||||
exit;
|
|
||||||
}
|
|
||||||
|
|
||||||
$temp = '';
|
|
||||||
while ($post = $query->fetch()) {
|
|
||||||
if (!$post['thread']) {
|
|
||||||
$po = new Thread($post);
|
|
||||||
} else {
|
|
||||||
$po = new Post($post);
|
|
||||||
}
|
|
||||||
$temp .= $po->build(true) . '<hr/>';
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!empty($temp))
|
|
||||||
$_body .= '<fieldset><legend>' .
|
|
||||||
sprintf(ngettext('%d result in', '%d results in', $query->rowCount()),
|
|
||||||
$query->rowCount()) . ' <a href="/' .
|
|
||||||
sprintf($config['board_path'], $board['uri']) . $config['file_index'] .
|
|
||||||
'">' .
|
|
||||||
sprintf($config['board_abbreviation'], $board['uri']) . ' - ' . $board['title'] .
|
|
||||||
'</a></legend>' . $temp . '</fieldset>';
|
|
||||||
|
|
||||||
$body .= '<hr/>';
|
|
||||||
if (!empty($_body)) {
|
|
||||||
$body .= $_body;
|
|
||||||
} else {
|
} else {
|
||||||
$body .= '<p style="text-align:center" class="unimportant">('._('No results.').')</p>';
|
$body .= '<hr/>';
|
||||||
|
|
||||||
|
openBoard($actual_board);
|
||||||
|
|
||||||
|
$posts_html = '';
|
||||||
|
foreach ($search_res as $post) {
|
||||||
|
if (!$post['thread']) {
|
||||||
|
$po = new Thread($post);
|
||||||
|
} else {
|
||||||
|
$po = new Post($post);
|
||||||
|
}
|
||||||
|
$posts_html .= $po->build(true) . '<hr/>';
|
||||||
|
}
|
||||||
|
|
||||||
|
$body .= '<fieldset><legend>' .
|
||||||
|
sprintf(ngettext('%d result in', '%d results in', \count($search_res)), \count($search_res)) . ' <a href="/' .
|
||||||
|
sprintf($config['board_path'], $board['uri']) . $config['file_index'] . '">' .
|
||||||
|
sprintf($config['board_abbreviation'], $board['uri']) . ' - ' . $board['title'] .
|
||||||
|
'</a></legend>' . $posts_html . '</fieldset>';
|
||||||
}
|
}
|
||||||
|
} else {
|
||||||
|
$body = Element('search_form.html', [
|
||||||
|
'boards' => $search_service->getSearchableBoards(),
|
||||||
|
'board' => false,
|
||||||
|
'search' => false
|
||||||
|
]);
|
||||||
}
|
}
|
||||||
|
|
||||||
echo Element('page.html', Array(
|
echo Element('page.html', Array(
|
||||||
|
|
64
tests/SearchServiceTest.php
Normal file
64
tests/SearchServiceTest.php
Normal file
|
@ -0,0 +1,64 @@
|
||||||
|
<?php
|
||||||
|
use PHPUnit\Framework\TestCase;
|
||||||
|
use PHPUnit\Framework;
|
||||||
|
use Vichan\Data\Driver\{LogDriver, StderrLogDriver};
|
||||||
|
use Vichan\Data\UserPostQueries;
|
||||||
|
use Vichan\Service\SearchService;
|
||||||
|
|
||||||
|
|
||||||
|
class SearchServiceTest extends TestCase {
|
||||||
|
public function testBasicSearch(): void {
|
||||||
|
$srv = new SearchService(
|
||||||
|
$this->createMock(LogDriver::class),
|
||||||
|
$this->createMock(UserPostQueries::class),
|
||||||
|
null,
|
||||||
|
100,
|
||||||
|
250,
|
||||||
|
100,
|
||||||
|
);
|
||||||
|
|
||||||
|
$filters = $srv->parse("free world all large board:kino board:\"poly\" name:coolie maybe subject:\"subj\" flag:\"pirate\" id:76 thread:8 but not so much");
|
||||||
|
Framework\assertTrue($filters->body === [ 'free world all large', 'maybe', 'but not so much' ]);
|
||||||
|
Framework\assertTrue($filters->subject === 'subj');
|
||||||
|
Framework\assertTrue($filters->name === 'coolie');
|
||||||
|
Framework\assertTrue($filters->flag === 'pirate');
|
||||||
|
Framework\assertTrue($filters->id === 76);
|
||||||
|
Framework\assertTrue($filters->thread === 8);
|
||||||
|
}
|
||||||
|
|
||||||
|
public function testWeight() {
|
||||||
|
$user_queries = $this->createMock(UserPostQueries::class);
|
||||||
|
$user_queries->method('escapeSearchPosts')
|
||||||
|
->willReturnMap([
|
||||||
|
[ 'abcd', 'abcd' ],
|
||||||
|
[ 'abc', 'abc' ],
|
||||||
|
[ 'a*cd', 'a\\*cd' ],
|
||||||
|
[ 'a*c', 'a\\*c' ],
|
||||||
|
]);
|
||||||
|
|
||||||
|
$srv = new SearchService(
|
||||||
|
new StderrLogDriver('test', LogDriver::DEBUG),
|
||||||
|
$user_queries,
|
||||||
|
null,
|
||||||
|
100,
|
||||||
|
250,
|
||||||
|
100,
|
||||||
|
);
|
||||||
|
|
||||||
|
$f = $srv->parse('abcd');
|
||||||
|
$no_wildcards = $srv->reduceAndWeight($f)->weight;
|
||||||
|
|
||||||
|
$f = $srv->parse('abc*');
|
||||||
|
$end_wildcard = $srv->reduceAndWeight($f)->weight;
|
||||||
|
|
||||||
|
$f = $srv->parse('a*cd');
|
||||||
|
$middle_wildcard = $srv->reduceAndWeight($f)->weight;
|
||||||
|
|
||||||
|
$f = $srv->parse('a*c*');
|
||||||
|
$wildcards = $srv->reduceAndWeight($f)->weight;
|
||||||
|
|
||||||
|
Framework\assertTrue($no_wildcards < $end_wildcard);
|
||||||
|
Framework\assertTrue($end_wildcard < $middle_wildcard);
|
||||||
|
Framework\assertTrue($middle_wildcard < $wildcards);
|
||||||
|
}
|
||||||
|
}
|
|
@ -4,6 +4,7 @@
|
||||||
*/
|
*/
|
||||||
|
|
||||||
use Vichan\Data\ReportQueries;
|
use Vichan\Data\ReportQueries;
|
||||||
|
use Vichan\Data\SearchQueries;
|
||||||
|
|
||||||
require dirname(__FILE__) . '/inc/cli.php';
|
require dirname(__FILE__) . '/inc/cli.php';
|
||||||
|
|
||||||
|
@ -45,9 +46,17 @@ if ($config['cache']['enabled'] === 'fs') {
|
||||||
$fs_cache->collect();
|
$fs_cache->collect();
|
||||||
$delta = microtime(true) - $start;
|
$delta = microtime(true) - $start;
|
||||||
echo "Deleted $deleted_count expired filesystem cache items in $delta seconds!\n";
|
echo "Deleted $deleted_count expired filesystem cache items in $delta seconds!\n";
|
||||||
$time_tot = $delta;
|
$time_tot += $delta;
|
||||||
$deleted_tot = $deleted_count;
|
$deleted_tot = $deleted_count;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
echo "Clearing old search log...\n";
|
||||||
|
$search_queries = $ctx->get(SearchQueries::class);
|
||||||
|
$start = microtime(true);
|
||||||
|
$deleted_count = $search_queries->purgeExpired();
|
||||||
|
$delta = microtime(true) - $start;
|
||||||
|
$time_tot += $delta;
|
||||||
|
$deleted_tot = $deleted_count;
|
||||||
|
|
||||||
$time_tot = number_format((float)$time_tot, 4, '.', '');
|
$time_tot = number_format((float)$time_tot, 4, '.', '');
|
||||||
modLog("Deleted $deleted_tot expired entries in {$time_tot}s with maintenance tool");
|
modLog("Deleted $deleted_tot expired entries in {$time_tot}s with maintenance tool");
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue