From 256a9682fa26dee67c80807ec7cdc13f38fb1b6e Mon Sep 17 00:00:00 2001 From: Zankaria Date: Mon, 17 Mar 2025 12:47:42 +0100 Subject: [PATCH] EmbedService.php: download and handle thumbnails --- inc/Services/Embed/EmbedService.php | 140 ++++++++++++++++++++-------- 1 file changed, 103 insertions(+), 37 deletions(-) diff --git a/inc/Services/Embed/EmbedService.php b/inc/Services/Embed/EmbedService.php index eb3ba21b..9db84d4a 100644 --- a/inc/Services/Embed/EmbedService.php +++ b/inc/Services/Embed/EmbedService.php @@ -1,64 +1,130 @@ log = $log; + $this->oembed_extractor = $oembed_extractor; + $this->embed_entries = $embed_entries; + $this->thumb_download_timeout = $thumb_download_timeout; + } + + private function make_tmp_file(): string { + $ret = \tempnam($this->tmp_dir, self::TMP_FILE_PREFIX); + if ($ret === false) { + throw new \RuntimeException("Could not create temporary file in {$this->tmp_dir}"); + } + \register_shutdown_function(fn() => @unlink($ret)); + return $ret; } /** - * Undocumented function + * Downloads the thumbnail into a temporary file. * - * @param Context $ctx - * @param string $rawText - * @return void + * @return ?string The path to the temporary file, null if the file was too large. */ - public function matchEmbed(Context $ctx, string $rawText) { - if (\filter_var($rawText, \FILTER_VALIDATE_URL) === false) { - return null; + private function fetchThumbnail(string $thumbnail_url): ?string { + $tmp_file = $this->make_tmp_file(); + $fd = \fopen($tmp_file, 'w+b'); + if ($fd === false) { + throw new \RuntimeException("Could not open temporary file $tmp_file for read/write"); } - $rawText = \trim($rawText); - foreach ($this->tuples as $cfg) { - if (!isset($cfg['match_regex'])) { - throw new \RuntimeException('Missing \'match_regex\' field'); - } - $match_regex = $cfg['match_regex']; + $ret = $this->http->requestGetInto($thumbnail_url, null, $fd, $this->thumb_download_timeout); + return $ret ? $tmp_file : null; + } + + /** + * Matches an alleged embed url and returns the path to the thumbnail, if any. + * + * @param string $rawText + * @return ?array Returns the url to the thumbnail and the path to the fallback if any embedding matches, null otherwise. + */ + private function matchAndExtract(string $rawText) { + foreach ($this->embed_entries as $embed_entry) { + $match_regex = $embed_entry['match_regex']; if (\preg_match($match_regex, $rawText, $matches)) { - if (!isset($cfg['type'])) { - throw new \RuntimeException('Missing \'type\' field'); - } - $type = $cfg['type']; + $type = $embed_entry['type']; if ($type === 'oembed') { - if (!isset($cfg['provider'])) { - throw new \RuntimeException('Missing \'provider\' field'); - } - $provider = $cfg['provider']; - - $extractor = $ctx->get(OembedExtractor::class); - $oembed_resp = $extractor->fetch($provider, $rawText); - + $thumbnail_url_fallback = $embed_entry['thumbnail_url_fallback'] ?? null; + $provider = $embed_entry['provider']; + $oembed_resp = $this->oembed_extractor->fetch($provider, $rawText); + return [ $oembed_resp->thumbnail_url, $thumbnail_url_fallback ]; } elseif ($type === 'regex') { - if (!isset($cfg['thumbnail_url'])) { - throw new \RuntimeException('Missing \'thumbnail_url\' field'); - } - $thumbnail_url_regex = $cfg['thumbnail_url']; + $thumbnail_url_regex = $embed_entry['thumbnail_url']; // Plz somebody review this. - $thumbnail_url = \preg_replace($match_regex, $thumbnail_url_regex, $rawText); + return [ \preg_replace($match_regex, $thumbnail_url_regex, $rawText), null ]; } else { - $this->log->log(LogDriver::ERROR, "Unknown embed type '$type', ignoring"); + $this->log->log(LogDriver::ERROR, "Unknown embed type '$type', ignoring the embed entry"); } } } + + return null; + } + + /** + * @return array|int Returns the url to the thumbnail and if it should be moved if any embedding matches, + * otherwise it returns a MATCH_EMBED_ERR_* constant. + */ + public function matchEmbed(string $rawText) { + $rawText = \trim($rawText); + if (\filter_var($rawText, \FILTER_VALIDATE_URL) === false) { + return self::MATCH_EMBED_ERR_NOT_AN_URL; + } + + $ret = $this->matchAndExtract($rawText); + if ($ret === null) { + return self::MATCH_EMBED_ERR_NO_MATCH; + } + list($thumbnail_url, $thumbnail_url_fallback) = $ret; + if (!isset($thumbnail_url, $thumbnail_url_fallback)) { + return self::MATCH_EMBED_ERR_NO_THUMBNAIL; + } + + if (\filter_var($thumbnail_url, \FILTER_VALIDATE_URL) === false) { + $this->log->log(LogDriver::ERROR, "Thumbnail URL '$thumbnail_url' is not a valid URL, trying fallback"); + } else { + $tmp_file = $this->fetchThumbnail($thumbnail_url); + if ($tmp_file !== null) { + return [ $tmp_file, true ]; + } + $this->log->log(LogDriver::NOTICE, "Thumbnail at '$thumbnail_url' was too large, trying fallback"); + } + + if ($thumbnail_url_fallback === null) { + return self::MATCH_EMBED_ERR_NO_THUMBNAIL; + } + return [ $thumbnail_url_fallback, false ]; } }