diff --git a/inc/Data/Driver/HttpDriver.php b/inc/Data/Driver/HttpDriver.php new file mode 100644 index 00000000..2e379f27 --- /dev/null +++ b/inc/Data/Driver/HttpDriver.php @@ -0,0 +1,135 @@ +inner); + \curl_setopt_array($this->inner, [ + \CURLOPT_URL => $url, + \CURLOPT_TIMEOUT => $timeout, + \CURLOPT_USERAGENT => 'Tinyboard', + \CURLOPT_PROTOCOLS => \CURLPROTO_HTTP | \CURLPROTO_HTTPS, + ]); + } + + public function __construct(int $timeout, int $max_file_size) { + $this->inner = \curl_init(); + $this->timeout = $timeout; + $this->max_file_size = $max_file_size; + } + + public function __destruct() { + \curl_close($this->inner); + } + + /** + * Execute a GET request. + * + * @param string $endpoint Uri endpoint. + * @param ?array $data Optional GET parameters. + * @param ?array $data Optional HTTP headers. + * @param int $timeout Optional request timeout in seconds. Use the default timeout if 0. + * @return string Returns the body of the response. + * @throws RuntimeException Throws on IO error. + */ + public function requestGet(string $endpoint, ?array $data, ?array $headers = null, int $timeout = 0): string { + if (!empty($data)) { + $endpoint .= '?' . \http_build_query($data); + } + if ($timeout == 0) { + $timeout = $this->timeout; + } + + $this->resetTowards($endpoint, $timeout); + if (!empty($headers)) { + \curl_setopt($this->inner, \CURLOPT_HTTPHEADER, $headers); + } + \curl_setopt($this->inner, \CURLOPT_RETURNTRANSFER, true); + $ret = \curl_exec($this->inner); + + if ($ret === false) { + throw new \RuntimeException(\curl_error($this->inner)); + } + return $ret; + } + + /** + * Execute a POST request. + * + * @param string $endpoint Uri endpoint. + * @param ?array $data Optional POST parameters. + * @param int $timeout Optional request timeout in seconds. Use the default timeout if 0. + * @return string Returns the body of the response. + * @throws RuntimeException Throws on IO error. + */ + public function requestPost(string $endpoint, ?array $data, int $timeout = 0): string { + if ($timeout == 0) { + $timeout = $this->timeout; + } + + $this->resetTowards($endpoint, $timeout); + \curl_setopt($this->inner, \CURLOPT_POST, true); + if (!empty($data)) { + \curl_setopt($this->inner, \CURLOPT_POSTFIELDS, \http_build_query($data)); + } + \curl_setopt($this->inner, \CURLOPT_RETURNTRANSFER, true); + $ret = \curl_exec($this->inner); + + if ($ret === false) { + throw new \RuntimeException(\curl_error($this->inner)); + } + return $ret; + } + + /** + * Download the url's target with curl. + * + * @param string $url Url to the file to download. + * @param ?array $data Optional GET parameters. + * @param resource $fd File descriptor to save the content to. + * @param int $timeout Optional request timeout in seconds. Use the default timeout if 0. + * @return bool Returns true on success, false if the file was too large. + * @throws RuntimeException Throws on IO error. + */ + public function requestGetInto(string $endpoint, ?array $data, mixed $fd, int $timeout = 0): bool { + if (!empty($data)) { + $endpoint .= '?' . \http_build_query($data); + } + if ($timeout == 0) { + $timeout = $this->timeout; + } + + $this->resetTowards($endpoint, $timeout); + // Adapted from: https://stackoverflow.com/a/17642638 + $opt = (\PHP_MAJOR_VERSION >= 8 && \PHP_MINOR_VERSION >= 2) ? \CURLOPT_XFERINFOFUNCTION : \CURLOPT_PROGRESSFUNCTION; + \curl_setopt_array($this->inner, [ + \CURLOPT_NOPROGRESS => false, + $opt => fn($res, $next_dl, $dl, $next_up, $up) => (int)($dl <= $this->max_file_size), + \CURLOPT_FAILONERROR => true, + \CURLOPT_FOLLOWLOCATION => false, + \CURLOPT_FILE => $fd, + \CURLOPT_IPRESOLVE => CURL_IPRESOLVE_V4, + ]); + $ret = \curl_exec($this->inner); + + if ($ret === false) { + if (\curl_errno($this->inner) === CURLE_ABORTED_BY_CALLBACK) { + return false; + } + + throw new \RuntimeException(\curl_error($this->inner)); + } + return true; + } +} diff --git a/inc/Data/OembedResponse.php b/inc/Data/OembedResponse.php new file mode 100644 index 00000000..0e99f5ff --- /dev/null +++ b/inc/Data/OembedResponse.php @@ -0,0 +1,11 @@ +log = $log; + $this->oembed_extractor = $oembed_extractor; + $this->embed_entries = $embed_entries; + $this->thumb_download_timeout = $thumb_download_timeout; + } + + private function make_tmp_file(): string { + $ret = \tempnam($this->tmp_dir, self::TMP_FILE_PREFIX); + if ($ret === false) { + throw new \RuntimeException("Could not create temporary file in {$this->tmp_dir}"); + } + \register_shutdown_function(fn() => @unlink($ret)); + return $ret; + } + + /** + * Downloads the thumbnail into a temporary file. + * + * @return ?string The path to the temporary file, null if the file was too large. + */ + private function fetchThumbnail(string $thumbnail_url): ?string { + $tmp_file = $this->make_tmp_file(); + $fd = \fopen($tmp_file, 'w+b'); + if ($fd === false) { + throw new \RuntimeException("Could not open temporary file $tmp_file for read/write"); + } + + $ret = $this->http->requestGetInto($thumbnail_url, null, $fd, $this->thumb_download_timeout); + return $ret ? $tmp_file : null; + } + + /** + * Returns the path to the thumbnail from a matched url, if any. + * + * @param string $url The url to embed. + * @param int $entry_index The index of the embedding entry. + * @return ?array Returns the url to the thumbnail and the path to the fallback. + */ + private function extractThumb(string $url, int $entry_index) { + $embed_entry = $this->embed_entries[$entry_index]; + $match_regex = $embed_entry['match_regex']; + $type = $embed_entry['type']; + + if ($type === 'oembed') { + $thumbnail_url_fallback = $embed_entry['thumbnail_url_fallback'] ?? null; + $provider = $embed_entry['provider_url']; + $oembed_resp = $this->oembed_extractor->fetch($provider, $url); + + return [ $oembed_resp->thumbnail_url, $thumbnail_url_fallback ]; + } elseif ($type === 'regex') { + $thumbnail_url_regex = $embed_entry['thumbnail_url']; + // Plz somebody review this. + return [ \preg_replace($match_regex, $thumbnail_url_regex, $url), null ]; + } else { + $this->log->log(LogDriver::ERROR, "Unknown embed type '$type' in embed entry $entry_index, ignoring the entry"); + return [ null, null ]; + } + } + + /** + * Find the embed entry matching with the url, if any. + * + * @param string $url Url to embed. MUST BE ALREADY VALIDATED. + * @return int The index of the matched embed entry or null. + */ + public function matchEmbed(string $url): ?int { + for ($i = 0; $i < \count($this->embed_entries); $i++) { + $match_regex = $this->embed_entries[$i]['match_regex']; + if (\preg_match($match_regex, $url, $matches)) { + return $i; + } + } + + return null; + } + + /** + * Get the embed's thumbnail if possible. May download it from the network into a temporary file, or use a static file. + * + * @param string $url Url to embed. MUST BE ALREADY VALIDATED. + * @param int The index of the matched embed entry. + * @return ?array Null if no thumbnail can be selected, otherwise an array with the local file path to the thumbnail + * and if the the file is a temporary or a static one. + */ + public function getEmbedThumb(string $url, int $entry_index): ?array { + $ret = $this->extractThumb($url, $entry_index); + list($thumbnail_url, $thumbnail_url_fallback) = $ret; + if (!isset($thumbnail_url, $thumbnail_url_fallback)) { + return null; + } + + if (\filter_var($thumbnail_url, \FILTER_VALIDATE_URL) === false) { + $this->log->log(LogDriver::ERROR, "Thumbnail URL '$thumbnail_url' from embed entry $entry_index is not a valid URL, trying fallback"); + } else { + $tmp_file = $this->fetchThumbnail($thumbnail_url); + if ($tmp_file !== null) { + return [ $tmp_file, true ]; + } + $this->log->log(LogDriver::NOTICE, "Thumbnail at '$thumbnail_url' was too large, trying fallback"); + } + + if ($thumbnail_url_fallback === null) { + return null; + } + return [ $thumbnail_url_fallback, false ]; + } + + public function renderEmbed(string $url, int $entry_index, string $thumbnail_path): string { + $embed_entry = $this->embed_entries[$entry_index]; + $match_regex = $embed_entry['match_regex']; + $html = $embed_entry['html']; + + $ret = \preg_replace($match_regex, $html, $url); + if (!\is_string($ret)) { + throw new \RuntimeException("Error while applying regex replacement for embed entry $entry_index"); + } + + \str_replace('%%embed_url%%', $url, $ret); + \str_replace('%%thumbnail_path%%', $thumbnail_path, $ret); + return $ret; + } +} diff --git a/inc/Service/Embed/OembedExtractor.php b/inc/Service/Embed/OembedExtractor.php new file mode 100644 index 00000000..f86b2013 --- /dev/null +++ b/inc/Service/Embed/OembedExtractor.php @@ -0,0 +1,66 @@ +cache = $cache; + $this->http = $http; + $this->provider_timeout = $provider_timeout; + } + + /** + * Fetch the oembed data from the given provider with the given url. + * + * @param string $identifier Opaque identifier for caching, must be unique for each $url-$provider combination. + * @return OembedResponse The serialized remove response. May be cached. + */ + public function fetch(string $provider_url, string $url): OembedResponse { + $ret = $this->cache->get("oembed_embedder_$provider_url$url"); + if ($ret === null) { + $body = $this->http->requestGet( + $provider_url, + [ + 'url' => $url, + 'format' => 'json' + ], + [ + 'Content-Type: application/json' + ], + $this->provider_timeout + ); + $json = \json_decode($body, true, 512, \JSON_THROW_ON_ERROR); + + $ret = [ + 'title' => $json['title'] ?? null, + 'thumbnail_url' => $json['thumbnail_url'] ?? null, + ]; + + $cache_timeout = self::DEFAULT_CACHE_TIMEOUT; + if (isset($json['cache_age'])) { + $cache_age = \intval($json['cache_age']); + if ($cache_age > 0) { + $cache_age = \max($cache_age, self::MIN_CACHE_TIMEOUT); + } + } + + $this->cache->set("oembed_embedder_$provider_url$url", $ret, $cache_timeout); + } + + $resp = new OembedResponse(); + $resp->title = $ret['title']; + $resp->thumbnail_url = $ret['thumbnail_url']; + return $resp; + } +} diff --git a/inc/config.php b/inc/config.php index 25031bfb..8afb699f 100644 --- a/inc/config.php +++ b/inc/config.php @@ -1265,6 +1265,37 @@ $config['embed_width'] = 300; $config['embed_height'] = 246; + // Download timeout for the remove embed thumbnails in seconds. + $config['embed_thumb_timeout'] = 2; + + /** + * Replacement parameters: + * - $1-$N: matched arguments from 'match_regex'. + * - %%thumbnail_path%%: Path to the downloaded thumbnail. + */ + $config['embedding_2'] = [ + [ + 'match_regex' => '/^(?:(?:https?:)?\/\/)?((?:www|m)\.)?(?:(?:youtube(?:-nocookie)?\.com|youtu\.be))(?:\/(?:[\w\-]+\?v=|embed\/|live\/|v\/)?)([\w\-]{11})((?:\?|\&)\S+)?$/i', + 'type' => 'regex', + 'thumbnail_url' => 'https://img.youtube.com/vi/$2/0.jpg', + 'html' => '
+ + + +
' + ], + [ + 'match_regex' => '/^https?:\/\/(\w+\.)?tiktok\.com\/@([a-z0-9\-_]+)\/video\/([0-9]+)\?.*$/i', + 'type' => 'oembed', + 'provider_url' => 'https://www.tiktok.com/oembed', + 'html' => '
+ + + +
' + ] + ]; + /* * ==================== * Error messages diff --git a/inc/context.php b/inc/context.php index 11a153ec..c5375d89 100644 --- a/inc/context.php +++ b/inc/context.php @@ -2,7 +2,9 @@ namespace Vichan; use Vichan\Data\{IpNoteQueries, ReportQueries, UserPostQueries}; -use Vichan\Data\Driver\{CacheDriver, ErrorLogLogDriver, FileLogDriver, LogDriver, StderrLogDriver, SyslogLogDriver}; +use Vichan\Data\Driver\{CacheDriver, HttpDriver, ErrorLogLogDriver, FileLogDriver, LogDriver, StderrLogDriver, SyslogLogDriver}; +use Vichan\Service\Embed\EmbedService; +use Vichan\Service\Embed\OembedExtractor; defined('TINYBOARD') or exit; @@ -63,6 +65,10 @@ function build_context(array $config): Context { // Use the global for backwards compatibility. return \cache::getCache(); }, + HttpDriver::class => function($c) { + $config = $c->get('config'); + return new HttpDriver($config['upload_by_url_timeout'], $config['max_filesize']); + }, \PDO::class => function($c) { global $pdo; // Ensure the PDO is initialized. @@ -78,5 +84,19 @@ function build_context(array $config): Context { return new UserPostQueries($c->get(\PDO::class)); }, IpNoteQueries::class => fn($c) => new IpNoteQueries($c->get(\PDO::class), $c->get(CacheDriver::class)), + OembedExtractor::class => fn($c) => new OembedExtractor( + $c->get(CacheDriver::class), + $c->get(HttpDriver::class), + $c->get('config')['embed_thumb_timeout'] + ), + EmbedService::class => function($c) { + $config = $c->get('config'); + return new EmbedService( + $c->get(LogDriver::class), + $c->get(OembedExtractor::class), + $config['embedding_2'], + $config['embed_thumb_timeout'] + ); + } ]); } diff --git a/post.php b/post.php index 27a45413..b8d2073f 100644 --- a/post.php +++ b/post.php @@ -262,6 +262,26 @@ function send_matrix_report( } } +function normalize_files(array $file_array) { + $out_files = []; + // If more than 0 files were uploaded + if (!empty($file_array['tmp_name'][0])) { + $i = 0; + $n = count($file_array['tmp_name']); + while ($i < $n) { + $out_files[strval($i + 1)] = array( + 'name' => $file_array['name'][$i], + 'tmp_name' => $file_array['tmp_name'][$i], + 'type' => $file_array['type'][$i], + 'error' => $file_array['error'][$i], + 'size' => $file_array['size'][$i] + ); + $i++; + } + } + return $out_files; +} + /** * Deletes the (single) captcha associated with the ip and code. * @@ -924,7 +944,6 @@ function handle_post(Context $ctx) isset($post['thread']) ? $post['thread'] : ($config['try_smarter'] && isset($_POST['page']) ? 0 - (int) $_POST['page'] : null) ) ); - //$post['antispam_hash'] = checkSpam(); if ($post['antispam_hash'] === true) { error($config['error']['spam']); @@ -953,7 +972,11 @@ function handle_post(Context $ctx) // Check for an embed field if ($config['enable_embedding'] && isset($_POST['embed']) && !empty($_POST['embed'])) { // yep; validate it - $value = $_POST['embed']; + $value = \trim($_POST['embed']); + if (\filter_var($value, \FILTER_VALIDATE_URL) === false) { + error($config['error']['invalid_embed']); + } + foreach ($config['embedding'] as &$embed) { if (preg_match($embed[0], $value)) { // Valid link @@ -990,23 +1013,7 @@ function handle_post(Context $ctx) // Convert multiple upload format to array of files. This makes the following code // work the same whether we used the JS or HTML multiple file upload techniques. if (array_key_exists('file_multiple', $_FILES)) { - $file_array = $_FILES['file_multiple']; - $_FILES = []; - // If more than 0 files were uploaded - if (!empty($file_array['tmp_name'][0])) { - $i = 0; - $n = count($file_array['tmp_name']); - while ($i < $n) { - $_FILES[strval($i + 1)] = array( - 'name' => $file_array['name'][$i], - 'tmp_name' => $file_array['tmp_name'][$i], - 'type' => $file_array['type'][$i], - 'error' => $file_array['error'][$i], - 'size' => $file_array['size'][$i] - ); - $i++; - } - } + $_FILES = normalize_files($_FILES['file_multiple']); } // We must do this check now before the passowrd is hashed and overwritten.