Compare commits

...

19 commits

Author SHA1 Message Date
946f836cb5 search.php: (untested) refactor, use SearchService 2025-06-02 00:38:23 +02:00
e32dfcb51a context.php: add SearchQueries 2025-06-02 00:38:23 +02:00
92e24dac67 context.php: add SearchService 2025-06-02 00:38:23 +02:00
9b60540e6b Flags.php: add the flags that come embedded with php 2025-06-02 00:38:23 +02:00
e02fd2a3c4 config.php: add search.max_length 2025-06-02 00:38:23 +02:00
4b5ce63f4b config.php: better documentation for search.boards 2025-06-02 00:38:23 +02:00
a0d218dbc2 config.php: add search.max_weight to the configuration options 2025-06-02 00:38:23 +02:00
3238319e26 context.php: add UserPostQueries 2025-06-02 00:38:23 +02:00
ad62e81752 maintenance.php: add SearchQueries cleanup 2025-06-02 00:38:23 +02:00
923d4ef2b9 SearchQueries.php: extract garbage cleanup 2025-06-02 00:38:23 +02:00
47bd9fa127 SearchQueries.php: add doc 2025-06-02 00:38:23 +02:00
6b01a4ad6f FiltersParseResult.php: add 2025-06-02 00:38:23 +02:00
e6d0681d0f SearchServiceTest.php: add basic testign for the SearchService 2025-06-02 00:38:23 +02:00
19efb78e92 SearchService.php: expose searchable boards 2025-06-02 00:38:23 +02:00
ce7be3e0aa SearchService.php: add checkFlood 2025-06-02 00:38:23 +02:00
aa7aa4b205 SearchService.php: default to all boards 2025-06-02 00:38:23 +02:00
b4d5f23e78 SearchService.php: limit the searchable boards 2025-06-02 00:38:23 +02:00
a99cc34f7e SearchService.php: fix flag matching 2025-06-02 00:38:23 +02:00
48c5f6a4e0 SearchService.php: rework but untested 2025-06-02 00:38:23 +02:00
10 changed files with 666 additions and 279 deletions

View file

@ -0,0 +1,13 @@
<?php
namespace Vichan\Data;
class FiltersParseResult {
public array $body = [];
public ?string $subject = null;
public ?string $name = null;
public ?string $board = null;
public ?string $flag = null;
public ?int $id = null;
public ?int $thread = null;
}

283
inc/Data/Flags.php Normal file
View file

@ -0,0 +1,283 @@
<?php
class Flags {
/**
* Short names of the flags embedded with vichan.
*/
public const EMBEDDED_FLAGS = [
'a1',
'a2',
'ac',
'ad',
'ae',
'af',
'ag',
'ai',
'al',
'am',
'an',
'ao',
'ap',
'aq',
'ar',
'as',
'at',
'au',
'aw',
'ax',
'az',
'ba',
'bb',
'bd',
'be',
'bf',
'bg',
'bh',
'bi',
'bj',
'bl',
'bm',
'bn',
'bo',
'bq',
'br',
'bs',
'bt',
'bu',
'bv',
'bw',
'by',
'bz',
'ca',
'cat',
'cc',
'cd',
'cf',
'cg',
'ch',
'ci',
'ck',
'cl',
'cm',
'cn',
'co',
'cp',
'cr',
'cs',
'cu',
'cv',
'cw',
'cx',
'cy',
'cz',
'de',
'dg',
'dj',
'dk',
'dm',
'do',
'dz',
'ea',
'ec',
'ee',
'eg',
'eh',
'er',
'es',
'et',
'eu',
'fi',
'fj',
'fk',
'fm',
'fo',
'fr',
'fx',
'ga',
'gb',
'gd',
'ge',
'gf',
'gg',
'gh',
'gi',
'gl',
'gm',
'gn',
'gp',
'gq',
'gr',
'gs',
'gt',
'gu',
'gw',
'gy',
'hk',
'hm',
'hn',
'hr',
'ht',
'hu',
'ic',
'id',
'ie',
'il',
'im',
'in',
'io',
'iq',
'ir',
'is',
'it',
'je',
'jm',
'jo',
'jp',
'ke',
'kg',
'kh',
'ki',
'km',
'kn',
'kp',
'kr',
'kw',
'ky',
'kz',
'la',
'lb',
'lc',
'li',
'lk',
'lr',
'ls',
'lt',
'lu',
'lv',
'ly',
'ma',
'mc',
'md',
'me',
'mf',
'mg',
'mh',
'mk',
'ml',
'mm',
'mn',
'mo',
'mp',
'mq',
'mr',
'ms',
'mt',
'mu',
'mv',
'mw',
'mx',
'my',
'mz',
'na',
'nc',
'ne',
'nf',
'ng',
'ni',
'nl',
'no',
'np',
'nr',
'nt',
'nu',
'nz',
'o1',
'om',
'pa',
'pe',
'pf',
'pg',
'ph',
'pk',
'pl',
'pm',
'pn',
'pr',
'ps',
'pt',
'pw',
'py',
'qa',
're',
'ro',
'rs',
'ru',
'rw',
'sa',
'sb',
'sc',
'sd',
'se',
'sf',
'sg',
'sh',
'si',
'sj',
'sk',
'sl',
'sm',
'sn',
'so',
'sr',
'ss',
'st',
'su',
'sv',
'sx',
'sy',
'sz',
'ta',
'tc',
'td',
'tf',
'tg',
'th',
'ti',
'tj',
'tk',
'tl',
'tm',
'tn',
'to',
'tp',
'tr',
'tt',
'tv',
'tw',
'tz',
'ua',
'ug',
'uk',
'um',
'us',
'uy',
'uz',
'va',
'vc',
've',
'vg',
'vi',
'vn',
'vu',
'wf',
'ws',
'xx',
'ye',
'yt',
'yu',
'za',
'zm',
'zr',
'zw',
];
}

View file

@ -2,12 +2,31 @@
namespace Vichan\Data;
/**
* POD with the fragments of each filter.
*/
class SearchFilters {
public array $body = [];
public ?string $subject = null;
public ?string $name = null;
public ?string $board = null;
public ?string $flag = null;
public ?int $id = null;
public ?int $thread = null;
/**
* @var array<array<string>>
*/
public array $body;
/**
* @var array<string>
*/
public array $subject;
/**
* @var array<string>
*/
public array $name;
/**
* @var array<string>
*/
public array $board;
/**
* @var array<string>
*/
public array $flag;
public ?int $id;
public ?int $thread;
public float $weight;
}

View file

@ -6,20 +6,25 @@ class SearchQueries {
private \PDO $pdo;
private int $queries_per_minutes_single;
private int $queries_per_minutes_all;
private bool $auto_gc;
private function checkFloodImpl(string $ip, string $phrase): bool {
$now = time();
$now = \time();
$expiry_limit = \time() - ($this->queries_per_minutes_all * 60);
$query = $this->pdo->prepare("SELECT COUNT(*) FROM `search_queries` WHERE `ip` = :ip AND `time` > :time");
$query = $this->pdo->prepare("SELECT COUNT(*) FROM `search_queries` WHERE `ip` = :ip AND `time` > :time AND `time` <= :expiry_limit");
$query->bindValue(':ip', $ip);
$query->bindValue(':time', $now - ($this->queries_per_minutes_single * 60));
$query->bindValue(':time', $now - ($this->queries_per_minutes_single * 60), \PDO::PARAM_INT);
$query->bindValue(':expiry_limit', $expiry_limit, \PDO::PARAM_INT);
$query->execute();
if ($query->fetchColumn() > $this->queries_per_minutes_single) {
return false;
}
$query = $this->pdo->prepare("SELECT COUNT(*) FROM `search_queries` WHERE `time` > :time");
$query->bindValue(':time', $now - ($this->queries_per_minutes_all * 60));
$query = $this->pdo->prepare("SELECT COUNT(*) FROM `search_queries` WHERE `time` > :time AND `time` <= :expiry_limit");
$query->bindValue(':time', $now - ($this->queries_per_minutes_all * 60), \PDO::PARAM_INT);
$query->bindValue(':expiry_limit', $expiry_limit, \PDO::PARAM_INT);
$query->execute();
if ($query->fetchColumn() > $this->queries_per_minutes_all) {
return false;
@ -27,24 +32,31 @@ class SearchQueries {
$query = $this->pdo->prepare("INSERT INTO `search_queries` VALUES (:ip, :time, :query)");
$query->bindValue(':ip', $ip);
$query->bindValue(':time', $now);
$query->bindValue(':time', $now, \PDO::PARAM_INT);
$query->bindValue(':query', $phrase);
$query->execute();
// Cleanup search queries table
$query = prepare("DELETE FROM `search_queries` WHERE `time` <= :time");
$query->bindValue(':time', time() - ($this->queries_per_minutes_all * 60));
$query->execute();
if ($this->auto_gc) {
$this->purgeExpired();
}
return true;
}
public function __construct(\PDO $pdo, int $queries_per_minutes_single, int $queries_per_minutes_all) {
public function __construct(\PDO $pdo, int $queries_per_minutes_single, int $queries_per_minutes_all, bool $auto_gc) {
$this->pdo = $pdo;
$this->queries_per_minutes_single = $queries_per_minutes_single;
$this->queries_per_minutes_all = $queries_per_minutes_all;
$this->auto_gc = $auto_gc;
}
/**
* Check if the IP-query pair overflows the limit.
*
* @param string $ip Source IP.
* @param string $phrase The search query.
* @return bool True if the request goes over the limit
*/
public function checkFlood(string $ip, string $phrase): bool {
$this->pdo->beginTransaction();
try {
@ -56,4 +68,12 @@ class SearchQueries {
throw $e;
}
}
public function purgeExpired(): int {
// Cleanup search queries table.
$query = prepare("DELETE FROM `search_queries` WHERE `time` <= :expiry_limit");
$query->bindValue(':expiry_limit', \time() - ($this->queries_per_minutes_all * 60), \PDO::PARAM_INT);
$query->execute();
return $query->rowCount();
}
}

View file

@ -2,7 +2,7 @@
namespace Vichan\Service;
use Vichan\Data\Driver\LogDriver;
use Vichan\Data\{UserPostQueries, SearchFilters, SearchFiltersWeighted};
use Vichan\Data\{FiltersParseResult, UserPostQueries, SearchFilters, SearchQueries};
class SearchService {
@ -33,10 +33,12 @@ class SearchService {
private LogDriver $log;
private UserPostQueries $user_queries;
private SearchQueries $search_queries;
private ?array $flag_map;
private float $max_weight;
private int $max_query_length;
private int $post_limit;
private array $searchable_board_uris;
private static function truncateQuery(string $text, int $byteLimit): ?string {
@ -62,42 +64,35 @@ class SearchService {
return null;
}
private static function trimEnd(string $str): string {
return \rtrim($str, "* \n\r\t\v\0");
private static function trim(string $str): string {
return \trim($str, "* \n\r\t\v\0");
}
private function sanitizeAndTransform(string $str): array {
// Escape UserQueries's wildcards.
$str = $this->user_queries->escapeSearchPosts($str);
// Coalesce multiple wildcards.
$wildcard_count = 0;
$str = \preg_replace_callback('/(?:\\\\\\\\)*\\\\\*|(?:\\\\\\\\)*\*+/', function($match) use (&$wildcard_count) {
$wildcard_count++;
return UserPostQueries::SEARCH_POSTS_WILDCARD;
}, $str);
// Query is too broad.
if ($str === UserPostQueries::SEARCH_POSTS_WILDCARD) {
return [ null, 0 ];
}
// Unescape.
$str = \strtr($str, [
private static function unescape(string $str): string {
return \strtr($str, [
'\\\\' => '\\',
'\\*' => '*',
'\\"' => '"'
]);
return [ $str, $wildcard_count ];
}
private static function weightByContent(string $str): float {
$w = 1;
/**
* Split the filter into fragments along the wildcards, handling escaping.
*
* @param string $str The full filter.
* @return array<string>
*/
private static function split(string $str): array {
// Split the fragments
return \preg_split('/(?:\\\\\\\\)*\\\\\*|(?:\\\\\\\\)*\*+/', $str);
}
// Count common and short words.
$trim = \trim($str, UserPostQueries::SEARCH_POSTS_WILDCARD . " \n\r\t\v\0");
$words = \explode(' ', $trim);
foreach ($words as $word) {
$short = \strlen($word) < 4;
if (\in_array($word, self::COMMON_WORDS)) {
private static function weightByContent(array $fragments): float {
$w = 0;
foreach ($fragments as $fragment) {
$short = \strlen($fragment) < 4;
if (\in_array($fragment, self::COMMON_WORDS)) {
$w += $short ? 16 : 6;
} elseif ($short) {
$w += 6;
@ -107,24 +102,54 @@ class SearchService {
return $w;
}
private static function weightByWildcards(string $str, int $wildcards): float {
// Wildcards over the total length of the word.
$perc = $wildcards / \strlen($str) * 100;
return $perc + $wildcards * 2;
}
private function matchFlag(string $query): array {
$query = \preg_quote($query);
$query = \str_replace('\\*', '.*', $query);
$regex = "/^*$query*$/i";
private static function filterAndWeight(string $filter): array {
$fragments = self::split($filter);
$acc = [];
foreach ($this->flag_map as $key => $value) {
if (\preg_match($regex, $value)) {
$acc[] = $key;
$total_len = 0;
foreach ($fragments as $fragment) {
$fragment = self::trim(self::unescape($fragment));
if (!empty($fragment)) {
$total_len += \strlen($fragment);
$acc[] = $fragment;
}
}
return $acc;
// Interword wildcards
$interword = \min(\count($fragments) - 1, 0);
// Wildcards over the total length of the word. Ergo the number of fragments minus 1.
$perc = $interword / $total_len * 100;
$wildcard_weight = $perc + \count($fragments) * 2;
return [ $acc, $total_len, $wildcard_weight ];
}
/**
* Gets a subset of the flags which match every filter.
*
* @param array<string> $fragments User provided fragments to search in the flags.
* @param array<string> $flags An array of flags.
* @return array<string> An array of flags
*/
private static function matchFlags(array $flags, array $fragments): array {
return \array_filter($flags, function ($str) use ($fragments) {
// Saves the last position. We use this to ensure the fragments are one after the other.
$last_ret = 0;
foreach ($fragments as $fragment) {
if ($last_ret + 1 > \strlen($fragment)) {
// Cannot possibly match.
return false;
}
$last_ret = \stripos($str, $fragment, $last_ret + 1);
if ($last_ret === false) {
// Exclude flags that don't much even a single fragment.
return false;
}
}
return true;
});
}
/**
@ -140,9 +165,9 @@ class SearchService {
* - id: post id, must be numeric
* - thread: thread id, must be numeric
* The remaining text is split into chunks and searched in the post body.
* @return SearchFilters
* @return FiltersParseResult
*/
public function parse(string $raw_query): SearchFilters {
public function parse(string $raw_query): FiltersParseResult{
$tres = self::truncateQuery($raw_query, $this->max_query_length);
if ($tres === null) {
throw new \RuntimeException('Could not truncate query');
@ -179,7 +204,7 @@ class SearchService {
throw new \RuntimeException('Could not decode the query');
}
$filters = new SearchFilters();
$filters = new FiltersParseResult();
foreach ($matches as $m) {
if (!empty($m[1])) {
@ -219,90 +244,90 @@ class SearchService {
}
/**
* @param LogDriver $log Log river.
* @param UserPostQueries $user_queries User posts queries.
* @param ?flag_map $max_flag_length The key-value map of user flags, or null to disable flag search.
* @param SearchQueries $search_queries Search queries for flood detection.
* @param ?array $flag_map The key-value map of user flags, or null to disable flag search.
* @param float $max_weight The maximum weight of the parsed user query. Body filters that go beyond this limit are discarded.
* @param int $max_query_length Maximum length of the raw input query before it's truncated.
* @param int $post_limit Maximum number of results.
* @param ?array $searchable_board_uris The uris of the board that can be searched. Null to search all the boards.
*/
public function __construct(LogDriver $log, UserPostQueries $user_queries, ?array $flag_map, float $max_weight, int $max_query_length, int $post_limit) {
public function __construct(
LogDriver $log,
UserPostQueries $user_queries,
?array $flag_map,
float $max_weight,
int $max_query_length,
int $post_limit,
?array $searchable_board_uris
) {
$this->log = $log;
$this->user_queries = $user_queries;
$this->flag_map = $flag_map;
$this->max_weight = $max_weight;
$this->max_query_length = $max_query_length;
$this->post_limit = $post_limit;
$this->searchable_board_uris = $searchable_board_uris ?? listBoards(true);
}
/**
* Reduces the user provided filters and assigns them a total weight.
*
* @param SearchFilters $filters The filters to sanitize, reduce and weight.
* @return SearchFiltersWeighted
* @param FiltersParseResult $filters The filters to sanitize, reduce and weight.
* @return SearchFilters
*/
public function reduceAndWeight(SearchFilters $filters): SearchFiltersWeighted {
$weighted = new SearchFiltersWeighted();
public function reduceAndWeight(FiltersParseResult $filters): SearchFilters {
$weighted = new SearchFilters();
if ($filters->subject !== null) {
if (\strlen($filters->subject) > self::MAX_LENGTH_SUBJECT) {
$filters->subject = null;
list($fragments, $total_len, $wildcard_weight) = self::filterAndWeight($filters->subject);
if ($total_len > self::MAX_LENGTH_SUBJECT) {
$weighted->subject = [];
} else {
list($str, $wildcards) = $this->sanitizeAndTransform($filters->subject);
if ($str === null) {
$filters->subject = null;
} else {
$str = self::trimEnd($str);
$weighted->weight += self::weightByWildcards($str, $wildcards);
$filters->subject = $str;
}
$weighted->subject = $fragments;
$weighted->weight = $wildcard_weight;
}
}
if ($filters->name !== null) {
if (\strlen($filters->name) > self::MAX_LENGTH_NAME) {
$filters->name = null;
list($fragments, $total_len, $wildcard_weight) = self::filterAndWeight($filters->name);
if ($total_len > self::MAX_LENGTH_NAME) {
$weighted->name = [];
} else {
list($str, $wildcards) = $this->sanitizeAndTransform($filters->name);
if ($str === null) {
$filters->name = null;
} else {
$str = self::trimEnd($str);
$weighted->weight += self::weightByWildcards($str, $wildcards);
$filters->name = $str;
}
$weighted->name = $fragments;
$weighted->weight += $wildcard_weight;
}
}
if ($filters->flag !== null) {
$max_flag_length = \array_reduce($this->flag_map, function($current_max, $str) {
return \max($current_max, \strlen($str));
}, 0);
$weighted->flag = [];
if ($this->flag_map !== null && !empty($this->flag_map)) {
$max_flag_length = \array_reduce($this->flag_map, fn($max, $str) => \max($max, \strlen($str)), 0);
list($fragments, $total_len, $wildcard_weight) = self::filterAndWeight($filters->flag);
if ($this->flag_map === null
|| empty($this->flag_map)
// Add 2 to account for possible wildcards on the ends.
|| \strlen($filters->flag) > $max_flag_length + 2) {
$filters->flag = null;
} else {
$str = \trim($str);
$weighted->weight += self::weightByWildcards($str, $wildcards);
$filters->flag = $str;
}
}
if ($filters->body !== null) {
$acc = [];
foreach ($filters->body as $str) {
$str = self::trimEnd($str);
list($str, $wildcards) = $this->sanitizeAndTransform($str);
if ($str !== null && !empty($str)) {
$w_content = self::weightByContent($str);
$w_wildcards = self::weightByWildcards($str, $wildcards);
$w = $w_content + $w_wildcards;
if ($w + $weighted->weight <= $this->max_weight) {
$weighted->weight += $w;
$acc[] = $str;
}
if ($total_len <= $max_flag_length + 2) {
$weighted->flag = $fragments;
$weighted->weight += $wildcard_weight;
}
}
}
$weighted->id = $filters->id;
$weighted->thread = $filters->thread;
if ($filters->body !== null) {
foreach ($filters->body as $str) {
list($fragments, $total_len, $wildcard_weight) = self::filterAndWeight($str);
$content_weight = self::weightByContent($fragments);
$str_weight = $content_weight + $wildcard_weight;
$filters->body = $acc;
if ($str_weight + $weighted->weight <= $this->max_weight) {
$weighted->weight += $str_weight;
$filters->body[] = $fragments;
}
}
}
return $weighted;
@ -311,18 +336,17 @@ class SearchService {
/**
* Run a search on user posts with the given filters.
*
* @param SearchFiltersWeighted $filters An array of filters made by {@see self::parse()}.
* @param SearchFilters $filters An array of filters made by {@see self::parse()}.
* @param ?string $fallback_board Fallback board if there isn't a board filter.
* @return array Data array straight from the PDO, with all the fields in posts.sql
*/
public function search(string $ip, string $raw_query, SearchFiltersWeighted $filters, ?string $fallback_board): array {
public function search(string $ip, string $raw_query, SearchFilters $filters, ?string $fallback_board): array {
$board = $filters->board ?? $fallback_board;
if ($board === null) {
return [];
}
$valid_uris = listBoards(true);
if (!\in_array($board, $valid_uris)) {
if (!\in_array($board, $this->searchable_board_uris)) {
return [];
}
@ -334,7 +358,7 @@ class SearchService {
$this->log->log(LogDriver::INFO, "$ip search: weight $weight_perc ({$filters->weight}) query '$raw_query'");
}
$flags = $filters->flag !== null ? $this->matchFlag($filters->flag) : null;
$flags = $filters->flag !== null ? $this->matchFlags($this->flag_map, $filters->flag) : null;
return $this->user_queries->searchPosts(
$board,
@ -347,4 +371,22 @@ class SearchService {
$this->post_limit
);
}
/**
* Check if the IP-query pair passes the limit.
*
* @param string $ip Source IP.
* @param string $phrase The search query.
* @return bool True if the request goes over the limit.
*/
public function checkFlood(string $ip, string $raw_query) {
return $this->search_queries->checkFlood($ip, $raw_query);
}
/**
* Returns the uris of the boards that may be searched.
*/
public function getSearchableBoards(): array {
return $this->searchable_board_uris;
}
}

View file

@ -1856,7 +1856,15 @@
// Limit of search results
$config['search']['search_limit'] = 100;
// Boards for searching
// Maximum weigth of the search query.
// Body search filters are discarded if they make the query heavier than this.
$config['search']['max_weight'] = 80;
// Maximum length of the user sent search query.
// Characters beyond the limit are truncated and ignored.
$config['search']['max_length'] = 768;
// Uncomment to limit the search feature to the given boards by uri.
//$config['search']['boards'] = array('a', 'b', 'c', 'd', 'e');
// Enable public logs? 0: NO, 1: YES, 2: YES, but drop names

View file

@ -1,8 +1,10 @@
<?php
namespace Vichan;
use Vichan\Data\{IpNoteQueries, ReportQueries, UserPostQueries};
use Flags;
use Vichan\Data\{IpNoteQueries, ReportQueries, SearchQueries, UserPostQueries};
use Vichan\Data\Driver\{CacheDriver, ErrorLogLogDriver, FileLogDriver, LogDriver, StderrLogDriver, SyslogLogDriver};
use Vichan\Service\SearchService;
defined('TINYBOARD') or exit;
@ -69,6 +71,28 @@ function build_context(array $config): Context {
sql_open();
return $pdo;
},
SearchService::class => function($c) {
$config = $c->get('config');
if ($config['user_flag']) {
$flags = $config['user_flags'];
} elseif ($config['country_flags']) {
$flags = Flags::EMBEDDED_FLAGS;
} else {
$flags = null;
}
$board_uris = $config['search']['boards'] ?? null;
return new SearchService(
$c->get(LogDriver::class),
$c->get(UserPostQueries::class),
$flags,
$config['search']['max_weight'],
$config['search']['max_length'],
$config['search']['search_limit'],
$board_uris
);
},
ReportQueries::class => function($c) {
$auto_maintenance = (bool)$c->get('config')['auto_maintenance'];
$pdo = $c->get(\PDO::class);
@ -78,5 +102,14 @@ function build_context(array $config): Context {
return new UserPostQueries($c->get(\PDO::class));
},
IpNoteQueries::class => fn($c) => new IpNoteQueries($c->get(\PDO::class), $c->get(CacheDriver::class)),
SearchQueries::class => function($c) {
$config = $c->get('config');
return new SearchQueries(
$c->get(\PDO::class),
$config['search']['queries_per_minutes'],
$config['search']['queries_per_minutes_all'],
$config['auto_maintenance']
);
}
]);
}

View file

@ -1,174 +1,70 @@
<?php
use Vichan\Service\SearchService;
require 'inc/bootstrap.php';
if (!$config['search']['enable']) {
die(_("Post search is disabled"));
}
$queries_per_minutes = $config['search']['queries_per_minutes'];
$queries_per_minutes_all = $config['search']['queries_per_minutes_all'];
$search_limit = $config['search']['search_limit'];
$ctx = Vichan\build_context($config);
$search_service = $ctx->get(SearchService::class);
if (isset($config['search']['boards'])) {
$boards = $config['search']['boards'];
} else {
$boards = listBoards(TRUE);
}
if (isset($_GET['search']) && !empty($_GET['search'])) {
$raw_search = $_GET['search'];
$ip = $_SERVER['REMOTE_ADDR'];
$fallback_board = (isset($_GET['board']) && !empty($_GET['board'])) ? $_GET['board'] : null;
$body = Element('search_form.html', Array('boards' => $boards, 'board' => isset($_GET['board']) ? $_GET['board'] : false, 'search' => isset($_GET['search']) ? str_replace('"', '&quot;', utf8tohtml($_GET['search'])) : false));
if (isset($_GET['search']) && !empty($_GET['search']) && isset($_GET['board']) && in_array($_GET['board'], $boards)) {
$phrase = $_GET['search'];
$_body = '';
$query = prepare("SELECT COUNT(*) FROM ``search_queries`` WHERE `ip` = :ip AND `time` > :time");
$query->bindValue(':ip', $_SERVER['REMOTE_ADDR']);
$query->bindValue(':time', time() - ($queries_per_minutes[1] * 60));
$query->execute() or error(db_error($query));
if ($query->fetchColumn() > $queries_per_minutes[0])
if ($search_service->checkFlood($ip, $raw_search)) {
error(_('Wait a while before searching again, please.'));
$query = prepare("SELECT COUNT(*) FROM ``search_queries`` WHERE `time` > :time");
$query->bindValue(':time', time() - ($queries_per_minutes_all[1] * 60));
$query->execute() or error(db_error($query));
if ($query->fetchColumn() > $queries_per_minutes_all[0])
error(_('Wait a while before searching again, please.'));
$query = prepare("INSERT INTO ``search_queries`` VALUES (:ip, :time, :query)");
$query->bindValue(':ip', $_SERVER['REMOTE_ADDR']);
$query->bindValue(':time', time());
$query->bindValue(':query', $phrase);
$query->execute() or error(db_error($query));
_syslog(LOG_NOTICE, 'Searched /' . $_GET['board'] . '/ for "' . $phrase . '"');
// Cleanup search queries table
$query = prepare("DELETE FROM ``search_queries`` WHERE `time` <= :time");
$query->bindValue(':time', time() - ($queries_per_minutes_all[1] * 60));
$query->execute() or error(db_error($query));
openBoard($_GET['board']);
$filters = Array();
function search_filters($m) {
global $filters;
$name = $m[2];
$value = isset($m[4]) ? $m[4] : $m[3];
if (!in_array($name, array('id', 'thread', 'subject', 'name'))) {
// unknown filter
return $m[0];
}
$filters[$name] = $value;
return $m[1];
}
$phrase = trim(preg_replace_callback('/(^|\s)(\w+):("(.*)?"|[^\s]*)/', 'search_filters', $phrase));
// Actually do the search.
$parse_res = $search_service->parse($raw_search);
$filters = $search_service->reduceAndWeight($parse_res);
$search_res = $search_service->search($ip, $raw_search, $filters, $fallback_board);
if (!preg_match('/[^*^\s]/', $phrase) && empty($filters)) {
_syslog(LOG_WARNING, 'Query too broad.');
$body .= '<p class="unimportant" style="text-align:center">(Query too broad.)</p>';
echo Element('page.html', Array(
'config'=>$config,
'title'=>'Search',
'body'=>$body,
));
exit;
}
// Escape escape character
$phrase = str_replace('!', '!!', $phrase);
// Needed to set a global variable further down the stack, plus the template.
$actual_board = $filter->board ?? $fallback_board;
// Remove SQL wildcard
$phrase = str_replace('%', '!%', $phrase);
$body = Element('search_form.html', [
'boards' => $search_service->getSearchableBoards(),
'board' => $_GET['board'],
'search' => \str_replace('"', '&quot;', utf8tohtml($_GET['search']))
]);
// Use asterisk as wildcard to suit convention
$phrase = str_replace('*', '%', $phrase);
// Remove `, it's used by table prefix magic
$phrase = str_replace('`', '!`', $phrase);
$like = '';
$match = Array();
// Find exact phrases
if (preg_match_all('/"(.+?)"/', $phrase, $m)) {
foreach($m[1] as &$quote) {
$phrase = str_replace("\"{$quote}\"", '', $phrase);
$match[] = $pdo->quote($quote);
}
}
$words = explode(' ', $phrase);
foreach($words as &$word) {
if (empty($word)) {
continue;
}
$match[] = $pdo->quote($word);
}
$like = '';
foreach($match as &$phrase) {
if (!empty($like)) {
$like .= ' AND ';
}
$phrase = preg_replace('/^\'(.+)\'$/', '\'%$1%\'', $phrase);
$like .= '`body` LIKE ' . $phrase . ' ESCAPE \'!\'';
}
foreach($filters as $name => $value) {
if (!empty($like)) {
$like .= ' AND ';
}
$like .= '`' . $name . '` = '. $pdo->quote($value);
}
$like = str_replace('%', '%%', $like);
$query = prepare(sprintf("SELECT * FROM ``posts_%s`` WHERE " . $like . " ORDER BY `time` DESC LIMIT :limit", $board['uri']));
$query->bindValue(':limit', $search_limit, PDO::PARAM_INT);
$query->execute() or error(db_error($query));
if ($query->rowCount() == $search_limit) {
_syslog(LOG_WARNING, 'Query too broad.');
$body .= '<p class="unimportant" style="text-align:center">('._('Query too broad.').')</p>';
echo Element('page.html', Array(
'config'=>$config,
'title'=>'Search',
'body'=>$body,
));
exit;
}
$temp = '';
while ($post = $query->fetch()) {
if (!$post['thread']) {
$po = new Thread($post);
} else {
$po = new Post($post);
}
$temp .= $po->build(true) . '<hr/>';
}
if (!empty($temp))
$_body .= '<fieldset><legend>' .
sprintf(ngettext('%d result in', '%d results in', $query->rowCount()),
$query->rowCount()) . ' <a href="/' .
sprintf($config['board_path'], $board['uri']) . $config['file_index'] .
'">' .
sprintf($config['board_abbreviation'], $board['uri']) . ' - ' . $board['title'] .
'</a></legend>' . $temp . '</fieldset>';
$body .= '<hr/>';
if (!empty($_body)) {
$body .= $_body;
if (empty($search_res)) {
$body .= '<hr/><p style="text-align:center" class="unimportant">(' . _('No results.') . ')</p>';
} else {
$body .= '<p style="text-align:center" class="unimportant">('._('No results.').')</p>';
$body .= '<hr/>';
openBoard($actual_board);
$posts_html = '';
foreach ($search_res as $post) {
if (!$post['thread']) {
$po = new Thread($post);
} else {
$po = new Post($post);
}
$posts_html .= $po->build(true) . '<hr/>';
}
$body .= '<fieldset><legend>' .
sprintf(ngettext('%d result in', '%d results in', \count($search_res)), \count($search_res)) . ' <a href="/' .
sprintf($config['board_path'], $board['uri']) . $config['file_index'] . '">' .
sprintf($config['board_abbreviation'], $board['uri']) . ' - ' . $board['title'] .
'</a></legend>' . $posts_html . '</fieldset>';
}
} else {
$body = Element('search_form.html', [
'boards' => $search_service->getSearchableBoards(),
'board' => false,
'search' => false
]);
}
echo Element('page.html', Array(

View file

@ -0,0 +1,64 @@
<?php
use PHPUnit\Framework\TestCase;
use PHPUnit\Framework;
use Vichan\Data\Driver\{LogDriver, StderrLogDriver};
use Vichan\Data\UserPostQueries;
use Vichan\Service\SearchService;
class SearchServiceTest extends TestCase {
public function testBasicSearch(): void {
$srv = new SearchService(
$this->createMock(LogDriver::class),
$this->createMock(UserPostQueries::class),
null,
100,
250,
100,
);
$filters = $srv->parse("free world all large board:kino board:\"poly\" name:coolie maybe subject:\"subj\" flag:\"pirate\" id:76 thread:8 but not so much");
Framework\assertTrue($filters->body === [ 'free world all large', 'maybe', 'but not so much' ]);
Framework\assertTrue($filters->subject === 'subj');
Framework\assertTrue($filters->name === 'coolie');
Framework\assertTrue($filters->flag === 'pirate');
Framework\assertTrue($filters->id === 76);
Framework\assertTrue($filters->thread === 8);
}
public function testWeight() {
$user_queries = $this->createMock(UserPostQueries::class);
$user_queries->method('escapeSearchPosts')
->willReturnMap([
[ 'abcd', 'abcd' ],
[ 'abc', 'abc' ],
[ 'a*cd', 'a\\*cd' ],
[ 'a*c', 'a\\*c' ],
]);
$srv = new SearchService(
new StderrLogDriver('test', LogDriver::DEBUG),
$user_queries,
null,
100,
250,
100,
);
$f = $srv->parse('abcd');
$no_wildcards = $srv->reduceAndWeight($f)->weight;
$f = $srv->parse('abc*');
$end_wildcard = $srv->reduceAndWeight($f)->weight;
$f = $srv->parse('a*cd');
$middle_wildcard = $srv->reduceAndWeight($f)->weight;
$f = $srv->parse('a*c*');
$wildcards = $srv->reduceAndWeight($f)->weight;
Framework\assertTrue($no_wildcards < $end_wildcard);
Framework\assertTrue($end_wildcard < $middle_wildcard);
Framework\assertTrue($middle_wildcard < $wildcards);
}
}

View file

@ -4,6 +4,7 @@
*/
use Vichan\Data\ReportQueries;
use Vichan\Data\SearchQueries;
require dirname(__FILE__) . '/inc/cli.php';
@ -45,9 +46,17 @@ if ($config['cache']['enabled'] === 'fs') {
$fs_cache->collect();
$delta = microtime(true) - $start;
echo "Deleted $deleted_count expired filesystem cache items in $delta seconds!\n";
$time_tot = $delta;
$time_tot += $delta;
$deleted_tot = $deleted_count;
}
echo "Clearing old search log...\n";
$search_queries = $ctx->get(SearchQueries::class);
$start = microtime(true);
$deleted_count = $search_queries->purgeExpired();
$delta = microtime(true) - $start;
$time_tot += $delta;
$deleted_tot = $deleted_count;
$time_tot = number_format((float)$time_tot, 4, '.', '');
modLog("Deleted $deleted_tot expired entries in {$time_tot}s with maintenance tool");