<?php

// (c) Copyright by authors of the Tiki Wiki CMS Groupware Project
//
// All Rights Reserved. See copyright.txt for details and a complete list of authors.
// Licensed under the GNU LESSER GENERAL PUBLIC LICENSE. See license.txt for details.
// $Id$

namespace Tiki\File;

use Tiki\TikiInit;
use Tiki\Process\Process;
use Tiki\FileGallery\File;
use Tiki\Lib\Alchemy\AlchemyLib;
use Tiki\Lib\Unoconv\UnoconvLib;
use Unoconv\Exception\RuntimeException;

class OcrHelper
{
    /**
     * Extract text from multiple file types for OCR purposes
     * @param $fileId
     * @return bool|string
     * @throws \Exception
     */
    public static function extractText($fileId)
    {
        $file = File::id($fileId);

        if (empty($file)) {
            return false;
        }

        if (substr(PHP_OS, 0, 3) == 'WIN') {
            return false;
        }

        global $mimetypes, $tikidomain;
        include_once('lib/mime/mimetypes.php');

        $tempReadableFilePath = $file->getWrapper()->getReadableFile();
        $pdfTempFile = null;
        $tempFileName = implode(DIRECTORY_SEPARATOR, ['temp', 'cache', $tikidomain, uniqid()]);
        $mimetypeMatched = false;

        if (FileHelper::isOfficeDocument($file->filetype)) {
            $mimetypeMatched = true;
            try {
                $unoconv = new UnoconvLib();
                $unoconv->convertFile($tempReadableFilePath, $tempFileName, 'txt');
            } catch (RuntimeException $e) {
                // Unoconv was unable to extract text without converting to PDF first
                $pdfTempFile = PDFHelper::convertToPDF($fileId);
            }
        }

        if ($file->filetype == $mimetypes['pdf'] || (! empty($pdfTempFile) && file_exists($pdfTempFile))) {
            $mimetypeMatched = true;
            $tempReadableFilePath = $pdfTempFile ?: $tempReadableFilePath;
            $process = new Process(['which', 'pdftotext']);
            $process->run();
            $pdfToTextPath = preg_replace('/\s+/', ' ', trim($process->getOutput()));

            if (empty($pdfToTextPath) || TikiInit::isWindows()) {
                throw new \Exception('Text cannot be extracted because pdftotext library is unavailable or is not supported.');
            }

            $process = new Process([$pdfToTextPath, $tempReadableFilePath, $tempFileName]);
            $process->run();
        }

        if (! $mimetypeMatched) {
            throw new \Exception('Mime-type not supported for OCR text extraction');
        }

        if ($pdfTempFile) {
            unlink($pdfTempFile);
        }

        $ocrContent = file_get_contents($tempFileName);
        unlink($tempFileName);

        return $ocrContent;
    }

    /**
     * Convert an image to another image file format
     * @param $fileId
     * @param $newPathWithExtension
     * @return bool|string|null
     * @TODO - This function should probably be moved into a more generic place.
     */
    public static function convertImage($fileId, $newPathWithExtension)
    {
        $file = File::id($fileId);

        if (empty($file)) {
            return false;
        }

        $alchemy = new AlchemyLib();
        $temporaryFile = $file->getWrapper()->getReadableFile();

        return $alchemy->convertToImage($temporaryFile, $newPathWithExtension);
    }
}