Compare commits

...
Sign in to create a new pull request.

20 commits

Author SHA1 Message Date
fe5368f096 HttpDriver.php: set requestGet header to default to null 2025-03-28 11:09:12 +01:00
fbf0c051f0 OembedExtractor.php: fix 2025-03-28 11:08:01 +01:00
83e04c639a context.php: update 2025-03-18 09:59:35 +01:00
3924b41c46 OembedExtractor.php: move to right namespace 2025-03-18 09:59:16 +01:00
28a747b335 EmbedService.php: move to right namespace 2025-03-18 09:58:45 +01:00
811698d9ef post.php: extract normalize _FILES 2025-03-17 15:37:37 +01:00
388fc2c05d context.php: add OembedExtractor and EmbedService 2025-03-17 15:18:02 +01:00
ffe7a44635 config.php: add embed thumb download timeout 2025-03-17 15:13:29 +01:00
b71d53c1a8 post.php: validate embed url 2025-03-17 15:10:19 +01:00
01811cb50f EmbedService.php: refactor 2025-03-17 15:07:02 +01:00
cd8d0e060f config.php: update embedding_2 2025-03-17 15:07:02 +01:00
256a9682fa EmbedService.php: download and handle thumbnails 2025-03-17 12:56:35 +01:00
8ac67e9e85 OembedResponse.php: trim 2025-03-17 12:56:35 +01:00
5a8c661257 OembedExtractor.php: finalize 2025-03-17 12:56:35 +01:00
4850a8ddd3 config.php: add WIP embedding_2 2025-03-17 12:56:35 +01:00
698451a6d5 EmbedService.php: add WIP 2025-03-17 12:56:35 +01:00
231fcb9ca9 OembedExtractor.php: add extractor 2025-03-17 12:56:35 +01:00
0e9c9de5c6 OembedResponse.php: add oembed POD 2025-03-17 12:56:35 +01:00
bf42570d5d HttpDriver.php: add headers to requestGet 2025-03-17 12:56:33 +01:00
a5cc1c2b42 HttpDriver.php: backport from upstream 2025-03-16 22:30:02 +01:00
7 changed files with 431 additions and 20 deletions

View file

@ -0,0 +1,135 @@
<?php
namespace Vichan\Data\Driver;
defined('TINYBOARD') or exit;
/**
* Honestly this is just a wrapper for cURL. Still useful to mock it and have an OOP API on PHP 7.
*/
class HttpDriver {
private mixed $inner;
private int $timeout;
private int $max_file_size;
private function resetTowards(string $url, int $timeout): void {
\curl_reset($this->inner);
\curl_setopt_array($this->inner, [
\CURLOPT_URL => $url,
\CURLOPT_TIMEOUT => $timeout,
\CURLOPT_USERAGENT => 'Tinyboard',
\CURLOPT_PROTOCOLS => \CURLPROTO_HTTP | \CURLPROTO_HTTPS,
]);
}
public function __construct(int $timeout, int $max_file_size) {
$this->inner = \curl_init();
$this->timeout = $timeout;
$this->max_file_size = $max_file_size;
}
public function __destruct() {
\curl_close($this->inner);
}
/**
* Execute a GET request.
*
* @param string $endpoint Uri endpoint.
* @param ?array $data Optional GET parameters.
* @param ?array $data Optional HTTP headers.
* @param int $timeout Optional request timeout in seconds. Use the default timeout if 0.
* @return string Returns the body of the response.
* @throws RuntimeException Throws on IO error.
*/
public function requestGet(string $endpoint, ?array $data, ?array $headers = null, int $timeout = 0): string {
if (!empty($data)) {
$endpoint .= '?' . \http_build_query($data);
}
if ($timeout == 0) {
$timeout = $this->timeout;
}
$this->resetTowards($endpoint, $timeout);
if (!empty($headers)) {
\curl_setopt($this->inner, \CURLOPT_HTTPHEADER, $headers);
}
\curl_setopt($this->inner, \CURLOPT_RETURNTRANSFER, true);
$ret = \curl_exec($this->inner);
if ($ret === false) {
throw new \RuntimeException(\curl_error($this->inner));
}
return $ret;
}
/**
* Execute a POST request.
*
* @param string $endpoint Uri endpoint.
* @param ?array $data Optional POST parameters.
* @param int $timeout Optional request timeout in seconds. Use the default timeout if 0.
* @return string Returns the body of the response.
* @throws RuntimeException Throws on IO error.
*/
public function requestPost(string $endpoint, ?array $data, int $timeout = 0): string {
if ($timeout == 0) {
$timeout = $this->timeout;
}
$this->resetTowards($endpoint, $timeout);
\curl_setopt($this->inner, \CURLOPT_POST, true);
if (!empty($data)) {
\curl_setopt($this->inner, \CURLOPT_POSTFIELDS, \http_build_query($data));
}
\curl_setopt($this->inner, \CURLOPT_RETURNTRANSFER, true);
$ret = \curl_exec($this->inner);
if ($ret === false) {
throw new \RuntimeException(\curl_error($this->inner));
}
return $ret;
}
/**
* Download the url's target with curl.
*
* @param string $url Url to the file to download.
* @param ?array $data Optional GET parameters.
* @param resource $fd File descriptor to save the content to.
* @param int $timeout Optional request timeout in seconds. Use the default timeout if 0.
* @return bool Returns true on success, false if the file was too large.
* @throws RuntimeException Throws on IO error.
*/
public function requestGetInto(string $endpoint, ?array $data, mixed $fd, int $timeout = 0): bool {
if (!empty($data)) {
$endpoint .= '?' . \http_build_query($data);
}
if ($timeout == 0) {
$timeout = $this->timeout;
}
$this->resetTowards($endpoint, $timeout);
// Adapted from: https://stackoverflow.com/a/17642638
$opt = (\PHP_MAJOR_VERSION >= 8 && \PHP_MINOR_VERSION >= 2) ? \CURLOPT_XFERINFOFUNCTION : \CURLOPT_PROGRESSFUNCTION;
\curl_setopt_array($this->inner, [
\CURLOPT_NOPROGRESS => false,
$opt => fn($res, $next_dl, $dl, $next_up, $up) => (int)($dl <= $this->max_file_size),
\CURLOPT_FAILONERROR => true,
\CURLOPT_FOLLOWLOCATION => false,
\CURLOPT_FILE => $fd,
\CURLOPT_IPRESOLVE => CURL_IPRESOLVE_V4,
]);
$ret = \curl_exec($this->inner);
if ($ret === false) {
if (\curl_errno($this->inner) === CURLE_ABORTED_BY_CALLBACK) {
return false;
}
throw new \RuntimeException(\curl_error($this->inner));
}
return true;
}
}

View file

@ -0,0 +1,11 @@
<?php
namespace Vichan\Data;
/**
* Raw return values, those aren't validated beyond being not null and the type.
*/
class OembedResponse {
public ?string $title;
public ?string $thumbnail_url;
}

View file

@ -0,0 +1,141 @@
<?php
namespace Vichan\Service\Embed;
use Vichan\Service\Embed\OembedExtractor;
use Vichan\Data\Driver\{HttpDriver, LogDriver};
class EmbedService {
private const TMP_FILE_PREFIX = 'oembed-thumb-';
private LogDriver $log;
private OembedExtractor $oembed_extractor;
private HttpDriver $http;
private string $tmp_dir;
private array $embed_entries;
private int $thumb_download_timeout;
public function __construct(LogDriver $log, OembedExtractor $oembed_extractor, array $embed_entries, int $thumb_download_timeout) {
$this->log = $log;
$this->oembed_extractor = $oembed_extractor;
$this->embed_entries = $embed_entries;
$this->thumb_download_timeout = $thumb_download_timeout;
}
private function make_tmp_file(): string {
$ret = \tempnam($this->tmp_dir, self::TMP_FILE_PREFIX);
if ($ret === false) {
throw new \RuntimeException("Could not create temporary file in {$this->tmp_dir}");
}
\register_shutdown_function(fn() => @unlink($ret));
return $ret;
}
/**
* Downloads the thumbnail into a temporary file.
*
* @return ?string The path to the temporary file, null if the file was too large.
*/
private function fetchThumbnail(string $thumbnail_url): ?string {
$tmp_file = $this->make_tmp_file();
$fd = \fopen($tmp_file, 'w+b');
if ($fd === false) {
throw new \RuntimeException("Could not open temporary file $tmp_file for read/write");
}
$ret = $this->http->requestGetInto($thumbnail_url, null, $fd, $this->thumb_download_timeout);
return $ret ? $tmp_file : null;
}
/**
* Returns the path to the thumbnail from a matched url, if any.
*
* @param string $url The url to embed.
* @param int $entry_index The index of the embedding entry.
* @return ?array Returns the url to the thumbnail and the path to the fallback.
*/
private function extractThumb(string $url, int $entry_index) {
$embed_entry = $this->embed_entries[$entry_index];
$match_regex = $embed_entry['match_regex'];
$type = $embed_entry['type'];
if ($type === 'oembed') {
$thumbnail_url_fallback = $embed_entry['thumbnail_url_fallback'] ?? null;
$provider = $embed_entry['provider_url'];
$oembed_resp = $this->oembed_extractor->fetch($provider, $url);
return [ $oembed_resp->thumbnail_url, $thumbnail_url_fallback ];
} elseif ($type === 'regex') {
$thumbnail_url_regex = $embed_entry['thumbnail_url'];
// Plz somebody review this.
return [ \preg_replace($match_regex, $thumbnail_url_regex, $url), null ];
} else {
$this->log->log(LogDriver::ERROR, "Unknown embed type '$type' in embed entry $entry_index, ignoring the entry");
return [ null, null ];
}
}
/**
* Find the embed entry matching with the url, if any.
*
* @param string $url Url to embed. MUST BE ALREADY VALIDATED.
* @return int The index of the matched embed entry or null.
*/
public function matchEmbed(string $url): ?int {
for ($i = 0; $i < \count($this->embed_entries); $i++) {
$match_regex = $this->embed_entries[$i]['match_regex'];
if (\preg_match($match_regex, $url, $matches)) {
return $i;
}
}
return null;
}
/**
* Get the embed's thumbnail if possible. May download it from the network into a temporary file, or use a static file.
*
* @param string $url Url to embed. MUST BE ALREADY VALIDATED.
* @param int The index of the matched embed entry.
* @return ?array Null if no thumbnail can be selected, otherwise an array with the local file path to the thumbnail
* and if the the file is a temporary or a static one.
*/
public function getEmbedThumb(string $url, int $entry_index): ?array {
$ret = $this->extractThumb($url, $entry_index);
list($thumbnail_url, $thumbnail_url_fallback) = $ret;
if (!isset($thumbnail_url, $thumbnail_url_fallback)) {
return null;
}
if (\filter_var($thumbnail_url, \FILTER_VALIDATE_URL) === false) {
$this->log->log(LogDriver::ERROR, "Thumbnail URL '$thumbnail_url' from embed entry $entry_index is not a valid URL, trying fallback");
} else {
$tmp_file = $this->fetchThumbnail($thumbnail_url);
if ($tmp_file !== null) {
return [ $tmp_file, true ];
}
$this->log->log(LogDriver::NOTICE, "Thumbnail at '$thumbnail_url' was too large, trying fallback");
}
if ($thumbnail_url_fallback === null) {
return null;
}
return [ $thumbnail_url_fallback, false ];
}
public function renderEmbed(string $url, int $entry_index, string $thumbnail_path): string {
$embed_entry = $this->embed_entries[$entry_index];
$match_regex = $embed_entry['match_regex'];
$html = $embed_entry['html'];
$ret = \preg_replace($match_regex, $html, $url);
if (!\is_string($ret)) {
throw new \RuntimeException("Error while applying regex replacement for embed entry $entry_index");
}
\str_replace('%%embed_url%%', $url, $ret);
\str_replace('%%thumbnail_path%%', $thumbnail_path, $ret);
return $ret;
}
}

View file

@ -0,0 +1,66 @@
<?php
namespace Vichan\Service\Embed;
use Vichan\Data\Driver\{CacheDriver, HttpDriver};
use Vichan\Data\OembedResponse;
class OembedExtractor {
private const DEFAULT_CACHE_TIMEOUT = 3600; // 1 hour.
private const MIN_CACHE_TIMEOUT = 900; // 15 minutes.
private CacheDriver $cache;
private HttpDriver $http;
private int $provider_timeout;
public function __construct(CacheDriver $cache, HttpDriver $http, int $provider_timeout) {
$this->cache = $cache;
$this->http = $http;
$this->provider_timeout = $provider_timeout;
}
/**
* Fetch the oembed data from the given provider with the given url.
*
* @param string $identifier Opaque identifier for caching, must be unique for each $url-$provider combination.
* @return OembedResponse The serialized remove response. May be cached.
*/
public function fetch(string $provider_url, string $url): OembedResponse {
$ret = $this->cache->get("oembed_embedder_$provider_url$url");
if ($ret === null) {
$body = $this->http->requestGet(
$provider_url,
[
'url' => $url,
'format' => 'json'
],
[
'Content-Type: application/json'
],
$this->provider_timeout
);
$json = \json_decode($body, true, 512, \JSON_THROW_ON_ERROR);
$ret = [
'title' => $json['title'] ?? null,
'thumbnail_url' => $json['thumbnail_url'] ?? null,
];
$cache_timeout = self::DEFAULT_CACHE_TIMEOUT;
if (isset($json['cache_age'])) {
$cache_age = \intval($json['cache_age']);
if ($cache_age > 0) {
$cache_age = \max($cache_age, self::MIN_CACHE_TIMEOUT);
}
}
$this->cache->set("oembed_embedder_$provider_url$url", $ret, $cache_timeout);
}
$resp = new OembedResponse();
$resp->title = $ret['title'];
$resp->thumbnail_url = $ret['thumbnail_url'];
return $resp;
}
}

View file

@ -1265,6 +1265,37 @@
$config['embed_width'] = 300;
$config['embed_height'] = 246;
// Download timeout for the remove embed thumbnails in seconds.
$config['embed_thumb_timeout'] = 2;
/**
* Replacement parameters:
* - $1-$N: matched arguments from 'match_regex'.
* - %%thumbnail_path%%: Path to the downloaded thumbnail.
*/
$config['embedding_2'] = [
[
'match_regex' => '/^(?:(?:https?:)?\/\/)?((?:www|m)\.)?(?:(?:youtube(?:-nocookie)?\.com|youtu\.be))(?:\/(?:[\w\-]+\?v=|embed\/|live\/|v\/)?)([\w\-]{11})((?:\?|\&)\S+)?$/i',
'type' => 'regex',
'thumbnail_url' => 'https://img.youtube.com/vi/$2/0.jpg',
'html' => '<div class="video-container" data-video-id="$2" data-iframe-width="360" data-iframe-height="202">
<a href="https://youtu.be/$2" target="_blank" class="file">
<img style="width:360px;height:202px;object-fit:cover" src="%%thumbnail_path%%" class="post-image"/>
</a>
</div>'
],
[
'match_regex' => '/^https?:\/\/(\w+\.)?tiktok\.com\/@([a-z0-9\-_]+)\/video\/([0-9]+)\?.*$/i',
'type' => 'oembed',
'provider_url' => 'https://www.tiktok.com/oembed',
'html' => '<div class="tiktok-embed" data-video-author="$2" data-video-id="$3" data-iframe-width="168" data-iframe-height="300">
<a href="https://www.tiktok.com/@$2/video/$3" target="_blank" class="file">
<img style="width:168px;height:300px;object-fit:cover" src="%%thumbnail_path%%" class="post-image"/>
</a>
</div>'
]
];
/*
* ====================
* Error messages

View file

@ -2,7 +2,9 @@
namespace Vichan;
use Vichan\Data\{IpNoteQueries, ReportQueries, UserPostQueries};
use Vichan\Data\Driver\{CacheDriver, ErrorLogLogDriver, FileLogDriver, LogDriver, StderrLogDriver, SyslogLogDriver};
use Vichan\Data\Driver\{CacheDriver, HttpDriver, ErrorLogLogDriver, FileLogDriver, LogDriver, StderrLogDriver, SyslogLogDriver};
use Vichan\Service\Embed\EmbedService;
use Vichan\Service\Embed\OembedExtractor;
defined('TINYBOARD') or exit;
@ -63,6 +65,10 @@ function build_context(array $config): Context {
// Use the global for backwards compatibility.
return \cache::getCache();
},
HttpDriver::class => function($c) {
$config = $c->get('config');
return new HttpDriver($config['upload_by_url_timeout'], $config['max_filesize']);
},
\PDO::class => function($c) {
global $pdo;
// Ensure the PDO is initialized.
@ -78,5 +84,19 @@ function build_context(array $config): Context {
return new UserPostQueries($c->get(\PDO::class));
},
IpNoteQueries::class => fn($c) => new IpNoteQueries($c->get(\PDO::class), $c->get(CacheDriver::class)),
OembedExtractor::class => fn($c) => new OembedExtractor(
$c->get(CacheDriver::class),
$c->get(HttpDriver::class),
$c->get('config')['embed_thumb_timeout']
),
EmbedService::class => function($c) {
$config = $c->get('config');
return new EmbedService(
$c->get(LogDriver::class),
$c->get(OembedExtractor::class),
$config['embedding_2'],
$config['embed_thumb_timeout']
);
}
]);
}

View file

@ -262,6 +262,26 @@ function send_matrix_report(
}
}
function normalize_files(array $file_array) {
$out_files = [];
// If more than 0 files were uploaded
if (!empty($file_array['tmp_name'][0])) {
$i = 0;
$n = count($file_array['tmp_name']);
while ($i < $n) {
$out_files[strval($i + 1)] = array(
'name' => $file_array['name'][$i],
'tmp_name' => $file_array['tmp_name'][$i],
'type' => $file_array['type'][$i],
'error' => $file_array['error'][$i],
'size' => $file_array['size'][$i]
);
$i++;
}
}
return $out_files;
}
/**
* Deletes the (single) captcha associated with the ip and code.
*
@ -924,7 +944,6 @@ function handle_post(Context $ctx)
isset($post['thread']) ? $post['thread'] : ($config['try_smarter'] && isset($_POST['page']) ? 0 - (int) $_POST['page'] : null)
)
);
//$post['antispam_hash'] = checkSpam();
if ($post['antispam_hash'] === true) {
error($config['error']['spam']);
@ -953,7 +972,11 @@ function handle_post(Context $ctx)
// Check for an embed field
if ($config['enable_embedding'] && isset($_POST['embed']) && !empty($_POST['embed'])) {
// yep; validate it
$value = $_POST['embed'];
$value = \trim($_POST['embed']);
if (\filter_var($value, \FILTER_VALIDATE_URL) === false) {
error($config['error']['invalid_embed']);
}
foreach ($config['embedding'] as &$embed) {
if (preg_match($embed[0], $value)) {
// Valid link
@ -990,23 +1013,7 @@ function handle_post(Context $ctx)
// Convert multiple upload format to array of files. This makes the following code
// work the same whether we used the JS or HTML multiple file upload techniques.
if (array_key_exists('file_multiple', $_FILES)) {
$file_array = $_FILES['file_multiple'];
$_FILES = [];
// If more than 0 files were uploaded
if (!empty($file_array['tmp_name'][0])) {
$i = 0;
$n = count($file_array['tmp_name']);
while ($i < $n) {
$_FILES[strval($i + 1)] = array(
'name' => $file_array['name'][$i],
'tmp_name' => $file_array['tmp_name'][$i],
'type' => $file_array['type'][$i],
'error' => $file_array['error'][$i],
'size' => $file_array['size'][$i]
);
$i++;
}
}
$_FILES = normalize_files($_FILES['file_multiple']);
}
// We must do this check now before the passowrd is hashed and overwritten.