forked from leftypol/leftypol
tesseract OCR support for spamfilters
This commit is contained in:
parent
36d762514c
commit
8a46c7a0d5
4 changed files with 52 additions and 6 deletions
43
post.php
43
post.php
|
@ -652,14 +652,14 @@ if (isset($_POST['delete'])) {
|
|||
$post['filehash'] = md5($allhashes);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
if (!hasPermission($config['mod']['bypass_filters'], $board['uri'])) {
|
||||
require_once 'inc/filters.php';
|
||||
|
||||
require_once 'inc/filters.php';
|
||||
|
||||
do_filters($post);
|
||||
}
|
||||
|
||||
if ($post['has_file']) {
|
||||
|
||||
if ($post['has_file']) {
|
||||
foreach ($post['files'] as $key => &$file) {
|
||||
if ($file['is_an_image']) {
|
||||
if ($config['ie_mime_type_detection'] !== false) {
|
||||
|
@ -787,6 +787,34 @@ if (isset($_POST['delete'])) {
|
|||
$file['thumbwidth'] = $size[0];
|
||||
$file['thumbheight'] = $size[1];
|
||||
}
|
||||
|
||||
if ($config['tesseract_ocr']) { // Let's OCR it!
|
||||
$fname = $file['tmp_name'];
|
||||
|
||||
if ($file['height'] > 500 || $file['width'] > 500) {
|
||||
$fname = $file['thumb'];
|
||||
}
|
||||
|
||||
if ($fname == 'spoiler') { // We don't have that much CPU time, do we?
|
||||
}
|
||||
else {
|
||||
$tmpname = "tmp/tesseract/".rand(0,10000000);
|
||||
|
||||
// Preprocess command is an ImageMagick b/w quantization
|
||||
$error = shell_exec_error(sprintf($config['tesseract_preprocess_command'], escapeshellarg($fname)) . " | " .
|
||||
'tesseract stdin '.escapeshellarg($tmpname).' '.$config['tesseract_params']);
|
||||
$tmpname .= ".txt";
|
||||
|
||||
$value = @file_get_contents($tmpname);
|
||||
@unlink($tmpname);
|
||||
|
||||
if ($value && trim($value)) {
|
||||
// This one has an effect, that the body is appended to a post body. So you can write a correct
|
||||
// spamfilter.
|
||||
$post['body_nomarkup'] .= "<tinyboard ocr image $key>".htmlspecialchars($value)."</tinyboard>";
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!isset($dont_copy_file) || !$dont_copy_file) {
|
||||
if (isset($file['file_tmp'])) {
|
||||
|
@ -827,6 +855,11 @@ if (isset($_POST['delete'])) {
|
|||
}
|
||||
}
|
||||
|
||||
// Do filters again if OCRing
|
||||
if ($config['tesseract_ocr'] && !hasPermission($config['mod']['bypass_filters'], $board['uri'])) {
|
||||
do_filters($post);
|
||||
}
|
||||
|
||||
if (!hasPermission($config['mod']['postunoriginal'], $board['uri']) && $config['robot_enable'] && checkRobot($post['body_nomarkup'])) {
|
||||
undoImage($post);
|
||||
if ($config['robot_mute']) {
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue