EmbedService.php: download and handle thumbnails

This commit is contained in:
Zankaria 2025-03-17 12:47:42 +01:00
parent 8ac67e9e85
commit 256a9682fa

View file

@ -1,64 +1,130 @@
<?php <?php
namespace Vichan\Services\Embed; namespace Vichan\Services\Embed;
use RuntimeException; use Vichan\Data\Driver\HttpDriver;
use Vichan\Context;
use Vichan\Data\Driver\LogDriver; use Vichan\Data\Driver\LogDriver;
class EmbedService {
private array $tuples;
private LogDriver $log;
public function __construct(LogDriver $log) { class EmbedService {
private const TMP_FILE_PREFIX = 'oembed-thumb-';
/**
* The input embed text isn't a valid URL.
*/
public const MATCH_EMBED_ERR_NOT_AN_URL = 0;
/**
* The embed url doesn't match any supported url.
*/
public const MATCH_EMBED_ERR_NO_MATCH = 1;
/**
* The embed url doesn't have a thumbnail.
*/
public const MATCH_EMBED_ERR_NO_THUMBNAIL = 2;
private LogDriver $log;
private OembedExtractor $oembed_extractor;
private HttpDriver $http;
private string $tmp_dir;
private array $embed_entries;
private int $thumb_download_timeout;
public function __construct(LogDriver $log, OembedExtractor $oembed_extractor, array $embed_entries, int $thumb_download_timeout) {
$this->log = $log; $this->log = $log;
$this->oembed_extractor = $oembed_extractor;
$this->embed_entries = $embed_entries;
$this->thumb_download_timeout = $thumb_download_timeout;
}
private function make_tmp_file(): string {
$ret = \tempnam($this->tmp_dir, self::TMP_FILE_PREFIX);
if ($ret === false) {
throw new \RuntimeException("Could not create temporary file in {$this->tmp_dir}");
}
\register_shutdown_function(fn() => @unlink($ret));
return $ret;
} }
/** /**
* Undocumented function * Downloads the thumbnail into a temporary file.
* *
* @param Context $ctx * @return ?string The path to the temporary file, null if the file was too large.
* @param string $rawText
* @return void
*/ */
public function matchEmbed(Context $ctx, string $rawText) { private function fetchThumbnail(string $thumbnail_url): ?string {
if (\filter_var($rawText, \FILTER_VALIDATE_URL) === false) { $tmp_file = $this->make_tmp_file();
return null; $fd = \fopen($tmp_file, 'w+b');
if ($fd === false) {
throw new \RuntimeException("Could not open temporary file $tmp_file for read/write");
} }
$rawText = \trim($rawText);
foreach ($this->tuples as $cfg) { $ret = $this->http->requestGetInto($thumbnail_url, null, $fd, $this->thumb_download_timeout);
if (!isset($cfg['match_regex'])) { return $ret ? $tmp_file : null;
throw new \RuntimeException('Missing \'match_regex\' field');
} }
$match_regex = $cfg['match_regex'];
/**
* Matches an alleged embed url and returns the path to the thumbnail, if any.
*
* @param string $rawText
* @return ?array Returns the url to the thumbnail and the path to the fallback if any embedding matches, null otherwise.
*/
private function matchAndExtract(string $rawText) {
foreach ($this->embed_entries as $embed_entry) {
$match_regex = $embed_entry['match_regex'];
if (\preg_match($match_regex, $rawText, $matches)) { if (\preg_match($match_regex, $rawText, $matches)) {
if (!isset($cfg['type'])) { $type = $embed_entry['type'];
throw new \RuntimeException('Missing \'type\' field');
}
$type = $cfg['type'];
if ($type === 'oembed') { if ($type === 'oembed') {
if (!isset($cfg['provider'])) { $thumbnail_url_fallback = $embed_entry['thumbnail_url_fallback'] ?? null;
throw new \RuntimeException('Missing \'provider\' field'); $provider = $embed_entry['provider'];
} $oembed_resp = $this->oembed_extractor->fetch($provider, $rawText);
$provider = $cfg['provider'];
$extractor = $ctx->get(OembedExtractor::class);
$oembed_resp = $extractor->fetch($provider, $rawText);
return [ $oembed_resp->thumbnail_url, $thumbnail_url_fallback ];
} elseif ($type === 'regex') { } elseif ($type === 'regex') {
if (!isset($cfg['thumbnail_url'])) { $thumbnail_url_regex = $embed_entry['thumbnail_url'];
throw new \RuntimeException('Missing \'thumbnail_url\' field');
}
$thumbnail_url_regex = $cfg['thumbnail_url'];
// Plz somebody review this. // Plz somebody review this.
$thumbnail_url = \preg_replace($match_regex, $thumbnail_url_regex, $rawText); return [ \preg_replace($match_regex, $thumbnail_url_regex, $rawText), null ];
} else { } else {
$this->log->log(LogDriver::ERROR, "Unknown embed type '$type', ignoring"); $this->log->log(LogDriver::ERROR, "Unknown embed type '$type', ignoring the embed entry");
} }
} }
} }
return null;
}
/**
* @return array|int Returns the url to the thumbnail and if it should be moved if any embedding matches,
* otherwise it returns a MATCH_EMBED_ERR_* constant.
*/
public function matchEmbed(string $rawText) {
$rawText = \trim($rawText);
if (\filter_var($rawText, \FILTER_VALIDATE_URL) === false) {
return self::MATCH_EMBED_ERR_NOT_AN_URL;
}
$ret = $this->matchAndExtract($rawText);
if ($ret === null) {
return self::MATCH_EMBED_ERR_NO_MATCH;
}
list($thumbnail_url, $thumbnail_url_fallback) = $ret;
if (!isset($thumbnail_url, $thumbnail_url_fallback)) {
return self::MATCH_EMBED_ERR_NO_THUMBNAIL;
}
if (\filter_var($thumbnail_url, \FILTER_VALIDATE_URL) === false) {
$this->log->log(LogDriver::ERROR, "Thumbnail URL '$thumbnail_url' is not a valid URL, trying fallback");
} else {
$tmp_file = $this->fetchThumbnail($thumbnail_url);
if ($tmp_file !== null) {
return [ $tmp_file, true ];
}
$this->log->log(LogDriver::NOTICE, "Thumbnail at '$thumbnail_url' was too large, trying fallback");
}
if ($thumbnail_url_fallback === null) {
return self::MATCH_EMBED_ERR_NO_THUMBNAIL;
}
return [ $thumbnail_url_fallback, false ];
} }
} }