EmbedService.php: download and handle thumbnails

This commit is contained in:
Zankaria 2025-03-17 12:47:42 +01:00
parent 8ac67e9e85
commit 256a9682fa

View file

@ -1,64 +1,130 @@
<?php
namespace Vichan\Services\Embed;
use RuntimeException;
use Vichan\Context;
use Vichan\Data\Driver\HttpDriver;
use Vichan\Data\Driver\LogDriver;
class EmbedService {
private array $tuples;
private LogDriver $log;
public function __construct(LogDriver $log) {
class EmbedService {
private const TMP_FILE_PREFIX = 'oembed-thumb-';
/**
* The input embed text isn't a valid URL.
*/
public const MATCH_EMBED_ERR_NOT_AN_URL = 0;
/**
* The embed url doesn't match any supported url.
*/
public const MATCH_EMBED_ERR_NO_MATCH = 1;
/**
* The embed url doesn't have a thumbnail.
*/
public const MATCH_EMBED_ERR_NO_THUMBNAIL = 2;
private LogDriver $log;
private OembedExtractor $oembed_extractor;
private HttpDriver $http;
private string $tmp_dir;
private array $embed_entries;
private int $thumb_download_timeout;
public function __construct(LogDriver $log, OembedExtractor $oembed_extractor, array $embed_entries, int $thumb_download_timeout) {
$this->log = $log;
$this->oembed_extractor = $oembed_extractor;
$this->embed_entries = $embed_entries;
$this->thumb_download_timeout = $thumb_download_timeout;
}
private function make_tmp_file(): string {
$ret = \tempnam($this->tmp_dir, self::TMP_FILE_PREFIX);
if ($ret === false) {
throw new \RuntimeException("Could not create temporary file in {$this->tmp_dir}");
}
\register_shutdown_function(fn() => @unlink($ret));
return $ret;
}
/**
* Undocumented function
* Downloads the thumbnail into a temporary file.
*
* @param Context $ctx
* @param string $rawText
* @return void
* @return ?string The path to the temporary file, null if the file was too large.
*/
public function matchEmbed(Context $ctx, string $rawText) {
if (\filter_var($rawText, \FILTER_VALIDATE_URL) === false) {
return null;
private function fetchThumbnail(string $thumbnail_url): ?string {
$tmp_file = $this->make_tmp_file();
$fd = \fopen($tmp_file, 'w+b');
if ($fd === false) {
throw new \RuntimeException("Could not open temporary file $tmp_file for read/write");
}
$rawText = \trim($rawText);
foreach ($this->tuples as $cfg) {
if (!isset($cfg['match_regex'])) {
throw new \RuntimeException('Missing \'match_regex\' field');
}
$match_regex = $cfg['match_regex'];
$ret = $this->http->requestGetInto($thumbnail_url, null, $fd, $this->thumb_download_timeout);
return $ret ? $tmp_file : null;
}
/**
* Matches an alleged embed url and returns the path to the thumbnail, if any.
*
* @param string $rawText
* @return ?array Returns the url to the thumbnail and the path to the fallback if any embedding matches, null otherwise.
*/
private function matchAndExtract(string $rawText) {
foreach ($this->embed_entries as $embed_entry) {
$match_regex = $embed_entry['match_regex'];
if (\preg_match($match_regex, $rawText, $matches)) {
if (!isset($cfg['type'])) {
throw new \RuntimeException('Missing \'type\' field');
}
$type = $cfg['type'];
$type = $embed_entry['type'];
if ($type === 'oembed') {
if (!isset($cfg['provider'])) {
throw new \RuntimeException('Missing \'provider\' field');
}
$provider = $cfg['provider'];
$extractor = $ctx->get(OembedExtractor::class);
$oembed_resp = $extractor->fetch($provider, $rawText);
$thumbnail_url_fallback = $embed_entry['thumbnail_url_fallback'] ?? null;
$provider = $embed_entry['provider'];
$oembed_resp = $this->oembed_extractor->fetch($provider, $rawText);
return [ $oembed_resp->thumbnail_url, $thumbnail_url_fallback ];
} elseif ($type === 'regex') {
if (!isset($cfg['thumbnail_url'])) {
throw new \RuntimeException('Missing \'thumbnail_url\' field');
}
$thumbnail_url_regex = $cfg['thumbnail_url'];
$thumbnail_url_regex = $embed_entry['thumbnail_url'];
// Plz somebody review this.
$thumbnail_url = \preg_replace($match_regex, $thumbnail_url_regex, $rawText);
return [ \preg_replace($match_regex, $thumbnail_url_regex, $rawText), null ];
} else {
$this->log->log(LogDriver::ERROR, "Unknown embed type '$type', ignoring");
$this->log->log(LogDriver::ERROR, "Unknown embed type '$type', ignoring the embed entry");
}
}
}
return null;
}
/**
* @return array|int Returns the url to the thumbnail and if it should be moved if any embedding matches,
* otherwise it returns a MATCH_EMBED_ERR_* constant.
*/
public function matchEmbed(string $rawText) {
$rawText = \trim($rawText);
if (\filter_var($rawText, \FILTER_VALIDATE_URL) === false) {
return self::MATCH_EMBED_ERR_NOT_AN_URL;
}
$ret = $this->matchAndExtract($rawText);
if ($ret === null) {
return self::MATCH_EMBED_ERR_NO_MATCH;
}
list($thumbnail_url, $thumbnail_url_fallback) = $ret;
if (!isset($thumbnail_url, $thumbnail_url_fallback)) {
return self::MATCH_EMBED_ERR_NO_THUMBNAIL;
}
if (\filter_var($thumbnail_url, \FILTER_VALIDATE_URL) === false) {
$this->log->log(LogDriver::ERROR, "Thumbnail URL '$thumbnail_url' is not a valid URL, trying fallback");
} else {
$tmp_file = $this->fetchThumbnail($thumbnail_url);
if ($tmp_file !== null) {
return [ $tmp_file, true ];
}
$this->log->log(LogDriver::NOTICE, "Thumbnail at '$thumbnail_url' was too large, trying fallback");
}
if ($thumbnail_url_fallback === null) {
return self::MATCH_EMBED_ERR_NO_THUMBNAIL;
}
return [ $thumbnail_url_fallback, false ];
}
}