forked from leftypol/leftypol
EmbedService.php: download and handle thumbnails
This commit is contained in:
parent
8ac67e9e85
commit
256a9682fa
1 changed files with 103 additions and 37 deletions
|
@ -1,64 +1,130 @@
|
||||||
<?php
|
<?php
|
||||||
namespace Vichan\Services\Embed;
|
namespace Vichan\Services\Embed;
|
||||||
|
|
||||||
use RuntimeException;
|
use Vichan\Data\Driver\HttpDriver;
|
||||||
use Vichan\Context;
|
|
||||||
use Vichan\Data\Driver\LogDriver;
|
use Vichan\Data\Driver\LogDriver;
|
||||||
|
|
||||||
class EmbedService {
|
|
||||||
private array $tuples;
|
|
||||||
private LogDriver $log;
|
|
||||||
|
|
||||||
public function __construct(LogDriver $log) {
|
class EmbedService {
|
||||||
|
private const TMP_FILE_PREFIX = 'oembed-thumb-';
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The input embed text isn't a valid URL.
|
||||||
|
*/
|
||||||
|
public const MATCH_EMBED_ERR_NOT_AN_URL = 0;
|
||||||
|
/**
|
||||||
|
* The embed url doesn't match any supported url.
|
||||||
|
*/
|
||||||
|
public const MATCH_EMBED_ERR_NO_MATCH = 1;
|
||||||
|
/**
|
||||||
|
* The embed url doesn't have a thumbnail.
|
||||||
|
*/
|
||||||
|
public const MATCH_EMBED_ERR_NO_THUMBNAIL = 2;
|
||||||
|
|
||||||
|
private LogDriver $log;
|
||||||
|
private OembedExtractor $oembed_extractor;
|
||||||
|
private HttpDriver $http;
|
||||||
|
private string $tmp_dir;
|
||||||
|
private array $embed_entries;
|
||||||
|
private int $thumb_download_timeout;
|
||||||
|
|
||||||
|
|
||||||
|
public function __construct(LogDriver $log, OembedExtractor $oembed_extractor, array $embed_entries, int $thumb_download_timeout) {
|
||||||
$this->log = $log;
|
$this->log = $log;
|
||||||
|
$this->oembed_extractor = $oembed_extractor;
|
||||||
|
$this->embed_entries = $embed_entries;
|
||||||
|
$this->thumb_download_timeout = $thumb_download_timeout;
|
||||||
|
}
|
||||||
|
|
||||||
|
private function make_tmp_file(): string {
|
||||||
|
$ret = \tempnam($this->tmp_dir, self::TMP_FILE_PREFIX);
|
||||||
|
if ($ret === false) {
|
||||||
|
throw new \RuntimeException("Could not create temporary file in {$this->tmp_dir}");
|
||||||
|
}
|
||||||
|
\register_shutdown_function(fn() => @unlink($ret));
|
||||||
|
return $ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Undocumented function
|
* Downloads the thumbnail into a temporary file.
|
||||||
*
|
*
|
||||||
* @param Context $ctx
|
* @return ?string The path to the temporary file, null if the file was too large.
|
||||||
* @param string $rawText
|
|
||||||
* @return void
|
|
||||||
*/
|
*/
|
||||||
public function matchEmbed(Context $ctx, string $rawText) {
|
private function fetchThumbnail(string $thumbnail_url): ?string {
|
||||||
if (\filter_var($rawText, \FILTER_VALIDATE_URL) === false) {
|
$tmp_file = $this->make_tmp_file();
|
||||||
return null;
|
$fd = \fopen($tmp_file, 'w+b');
|
||||||
|
if ($fd === false) {
|
||||||
|
throw new \RuntimeException("Could not open temporary file $tmp_file for read/write");
|
||||||
}
|
}
|
||||||
$rawText = \trim($rawText);
|
|
||||||
|
|
||||||
foreach ($this->tuples as $cfg) {
|
$ret = $this->http->requestGetInto($thumbnail_url, null, $fd, $this->thumb_download_timeout);
|
||||||
if (!isset($cfg['match_regex'])) {
|
return $ret ? $tmp_file : null;
|
||||||
throw new \RuntimeException('Missing \'match_regex\' field');
|
|
||||||
}
|
}
|
||||||
$match_regex = $cfg['match_regex'];
|
|
||||||
|
/**
|
||||||
|
* Matches an alleged embed url and returns the path to the thumbnail, if any.
|
||||||
|
*
|
||||||
|
* @param string $rawText
|
||||||
|
* @return ?array Returns the url to the thumbnail and the path to the fallback if any embedding matches, null otherwise.
|
||||||
|
*/
|
||||||
|
private function matchAndExtract(string $rawText) {
|
||||||
|
foreach ($this->embed_entries as $embed_entry) {
|
||||||
|
$match_regex = $embed_entry['match_regex'];
|
||||||
|
|
||||||
if (\preg_match($match_regex, $rawText, $matches)) {
|
if (\preg_match($match_regex, $rawText, $matches)) {
|
||||||
if (!isset($cfg['type'])) {
|
$type = $embed_entry['type'];
|
||||||
throw new \RuntimeException('Missing \'type\' field');
|
|
||||||
}
|
|
||||||
$type = $cfg['type'];
|
|
||||||
|
|
||||||
if ($type === 'oembed') {
|
if ($type === 'oembed') {
|
||||||
if (!isset($cfg['provider'])) {
|
$thumbnail_url_fallback = $embed_entry['thumbnail_url_fallback'] ?? null;
|
||||||
throw new \RuntimeException('Missing \'provider\' field');
|
$provider = $embed_entry['provider'];
|
||||||
}
|
$oembed_resp = $this->oembed_extractor->fetch($provider, $rawText);
|
||||||
$provider = $cfg['provider'];
|
|
||||||
|
|
||||||
$extractor = $ctx->get(OembedExtractor::class);
|
|
||||||
$oembed_resp = $extractor->fetch($provider, $rawText);
|
|
||||||
|
|
||||||
|
|
||||||
|
return [ $oembed_resp->thumbnail_url, $thumbnail_url_fallback ];
|
||||||
} elseif ($type === 'regex') {
|
} elseif ($type === 'regex') {
|
||||||
if (!isset($cfg['thumbnail_url'])) {
|
$thumbnail_url_regex = $embed_entry['thumbnail_url'];
|
||||||
throw new \RuntimeException('Missing \'thumbnail_url\' field');
|
|
||||||
}
|
|
||||||
$thumbnail_url_regex = $cfg['thumbnail_url'];
|
|
||||||
// Plz somebody review this.
|
// Plz somebody review this.
|
||||||
$thumbnail_url = \preg_replace($match_regex, $thumbnail_url_regex, $rawText);
|
return [ \preg_replace($match_regex, $thumbnail_url_regex, $rawText), null ];
|
||||||
} else {
|
} else {
|
||||||
$this->log->log(LogDriver::ERROR, "Unknown embed type '$type', ignoring");
|
$this->log->log(LogDriver::ERROR, "Unknown embed type '$type', ignoring the embed entry");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return array|int Returns the url to the thumbnail and if it should be moved if any embedding matches,
|
||||||
|
* otherwise it returns a MATCH_EMBED_ERR_* constant.
|
||||||
|
*/
|
||||||
|
public function matchEmbed(string $rawText) {
|
||||||
|
$rawText = \trim($rawText);
|
||||||
|
if (\filter_var($rawText, \FILTER_VALIDATE_URL) === false) {
|
||||||
|
return self::MATCH_EMBED_ERR_NOT_AN_URL;
|
||||||
|
}
|
||||||
|
|
||||||
|
$ret = $this->matchAndExtract($rawText);
|
||||||
|
if ($ret === null) {
|
||||||
|
return self::MATCH_EMBED_ERR_NO_MATCH;
|
||||||
|
}
|
||||||
|
list($thumbnail_url, $thumbnail_url_fallback) = $ret;
|
||||||
|
if (!isset($thumbnail_url, $thumbnail_url_fallback)) {
|
||||||
|
return self::MATCH_EMBED_ERR_NO_THUMBNAIL;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (\filter_var($thumbnail_url, \FILTER_VALIDATE_URL) === false) {
|
||||||
|
$this->log->log(LogDriver::ERROR, "Thumbnail URL '$thumbnail_url' is not a valid URL, trying fallback");
|
||||||
|
} else {
|
||||||
|
$tmp_file = $this->fetchThumbnail($thumbnail_url);
|
||||||
|
if ($tmp_file !== null) {
|
||||||
|
return [ $tmp_file, true ];
|
||||||
|
}
|
||||||
|
$this->log->log(LogDriver::NOTICE, "Thumbnail at '$thumbnail_url' was too large, trying fallback");
|
||||||
|
}
|
||||||
|
|
||||||
|
if ($thumbnail_url_fallback === null) {
|
||||||
|
return self::MATCH_EMBED_ERR_NO_THUMBNAIL;
|
||||||
|
}
|
||||||
|
return [ $thumbnail_url_fallback, false ];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue