<?php

// (c) Copyright by authors of the Tiki Wiki CMS Groupware Project
//
// All Rights Reserved. See copyright.txt for details and a complete list of authors.
// Licensed under the GNU LESSER GENERAL PUBLIC LICENSE. See license.txt for details.
// $Id$

require_once('tikiimporter_wiki.php');

/**
 * Parses a MediaWiki-style XML dump to import it into TikiWiki.
 * Requires PHP5 DOM extension.
 * Based on the work done on http://dev.tiki.org/MediaWiki+to+TikiWiki+converter
 *
 * @package tikiimporter
 */
class TikiImporter_Wiki_Mediawiki extends TikiImporter_Wiki
{
    public $softwareName = 'Mediawiki';

    /**
     * The DOM representation of the Mediawiki XML dump
     * @var DOMDocument object
     */
    public $dom = '';

    /**
     * Array of the valid mime types for the
     * input file
     */
    public $validTypes = ['application/xml', 'text/xml', 'text/html'];

    /**
     * The directory used to save the attachments.
     * It is defined on $this->import()
     */
    public $attachmentsDestDir = '';

    /**
     * Text_Wiki object to handle Mediawiki
     * syntax parsing
     */
    public $parser = '';

    /**
     * @see lib/importer/TikiImporter#importOptions()
     */
    public static function importOptions()
    {
        $options = [
                [
                        'name' => 'importAttachments',
                        'type' => 'checkbox',
                        'label' => tra('Import images and attachments (see documentation for more information)')
                ],
                [
                        'name' => 'maketoc',
                        'type' => 'checkbox',
                        'label' => tra('Add a maketoc at the top of each page')
                ],
        ];

        return $options;
    }

    /**
     * Check for DOMDocument.
     *
     * @see lib/importer/TikiImporter#checkRequirements()
     *
     * @return void
     * @throws Exception if DOMDocument not available
     */
    public function checkRequirements()
    {
        if (! class_exists('DOMDocument')) {
            throw new Exception(tra('Class DOMDocument not available, check your PHP installation. For more information see http://php.net/manual/en/book.dom.php'));
        }
    }

    /**
     * Start the importing process by loading the XML file.
     *
     * @see lib/importer/TikiImporter_Wiki#import()
     *
     * @param string $filePath path to the XML file
     * @return void
     * @throws UnexpectedValueException if invalid file mime type
     */
    public function import($filePath = null)
    {
        if ($filePath == null) {
            die("This particular implementation of the method requires an explicity file path.");
        }

        if (isset($_FILES['importFile']) && ! in_array($_FILES['importFile']['type'], $this->validTypes)) {
            throw new UnexpectedValueException(tra('Invalid file MIME type'));
        }

        if (! empty($_POST['importAttachments']) && $_POST['importAttachments'] == 'on') {
            $this->checkRequirementsForAttachments();
        }

        $this->saveAndDisplayLog("Loading and validating the XML file\n");

        $this->dom = new DOMDocument();
        $this->dom->load($filePath);

        $this->configureParser();

        if (! empty($_POST['importAttachments']) && $_POST['importAttachments'] == 'on') {
            $this->downloadAttachments();
        }

        parent::import();
    }

    /**
     * Create a Text_Wiki object to handle the parsing
     * of Mediawiki syntax and define some configuration
     * option
     */
    public function configureParser()
    {
        $this->parser = Text_Wiki::factory('Mediawiki');

        // do not replace space by underscore in wikilinks
        $this->parser->setParseConf('Wikilink', 'spaceUnderscore', false);

        // define possible localized namespace for image and files in the wikilink syntax
        $namespaces = $this->dom->getElementsByTagName('namespace');
        $prefix = ['Image', 'image'];
        if ($namespaces->length > 0) {
            foreach ($namespaces as $namespace) {
                if ($namespace->getAttribute('key') == '-2' || $namespace->getAttribute('key') == '6') {
                    $prefix[] = $namespace->nodeValue;
                }
            }
        }
        $this->parser->setParseConf('Image', 'prefix', $prefix);
    }

    /**
     * At present this method only validates the Mediawiki XML
     * against its DTD (Document Type Definition). Mediawiki XML
     * versions from 0.3 till 0.10 are supported.
     *
     * Note: we use schemaValidate() instead of validate() because
     * for some unknown reason the former method is unable to automatically
     * retrieve Mediawiki XML DTD and dies with "no DTD found" error.
     *
     * @see lib/importer/TikiImporter#validateInput()
     *
     * @throws DOMException if XML file does not validate against schema
     */
    public function validateInput()
    {
        $mediawiki = $this->dom->getElementsByTagName('mediawiki');

        if ($mediawiki->length > 0) {
            $xmlVersion = $mediawiki->item(0)->getAttribute('version');

            switch ($xmlVersion) {
                case '0.3':
                case '0.4':
                case '0.5':
                case '0.6':
                case '0.7':
                case '0.8':
                case '0.9':
                case '0.10':
                    $xmlDtdFile = __DIR__ . "/mediawiki_dump_v$xmlVersion.xsd";
                    break;
                default:
                    throw new DOMException(tr("MediaWiki XML file version %0 is not supported.", $xmlVersion));
                    break;
            }

            if (@$this->dom->schemaValidate($xmlDtdFile)) {
                return true;
            }
        }

        throw new DOMException(tra('The XML file does not validate against the MediaWiki XML schema'));
    }

    /**
     * Check for all the requirements to import attachments
     * and also set the $this->attachmentsDestDir.
     * If one of them is not satisfied the script will die.
     *
     * @returns void
     */
    public function checkRequirementsForAttachments()
    {
        global $tikidomain;

        $this->attachmentsDestDir = __DIR__ . '/../../img/wiki_up/';
        if ($tikidomain) {
            $this->attachmentsDestDir .= $tikidomain;
        }

        if (ini_get('allow_url_fopen') === false) {
            $this->saveAndDisplayLog(
                tra(
                    "Aborting: you need to enable the PHP setting 'allow_url_fopen' to be able to import attachments. Fix the problem or try to import without the attachments."
                ) . '\n'
            );
            die;
        }

        if (! file_exists($this->attachmentsDestDir)) {
            $this->saveAndDisplayLog(
                tr(
                    'Aborting: the destination directory for attachments (%0) does not exist. Correct this problem or try to import without the attachments.',
                    $this->attachmentsDestDir
                ) . '\n'
            );
            die;
        } elseif (! is_writable($this->attachmentsDestDir)) {
            $this->saveAndDisplayLog(
                tr(
                    'Aborting: the destination directory for attachments (%0) is not writable. Correct this problem or try to import without attachments.',
                    $this->attachmentsDestDir
                ) . "\n"
            );
            die;
        }
    }

    /**
     * Foreach page check if it is a wiki page or a wiki page
     * attachment and call the proper method, respectively
     * $this->extractInfo() and $this->handleFileUpload()
     *
     * In the case of a wiki page append the returned value of
     * $this->extractInfo() to $parsedData array
     *
     * @return array $parsedData
     */
    public function parseData()
    {
        $parsedData = [];
        $pages = $this->dom->getElementsByTagName('page');

        $this->saveAndDisplayLog("\n" . tra("Parsing pages:") . "\n");

        foreach ($pages as $page) {
            $isAttachment = $page->getElementsByTagName('upload');
            // is a wiki page and not an attachment
            if ($isAttachment->length == 0) {
                try {
                    $parsedData[] = $this->extractInfo($page);
                } catch (ImporterParserException $e) {
                    $this->saveAndDisplayLog($e->getMessage(), true);
                }
            }
        }

        return $parsedData;
    }

    /**
     * Searches for the last version of each attachments in the XML file
     * and try to download it to the img/wiki_up/ directory
     *
     * Note: it is not possible to generate the Mediawiki
     * XML file with the <upload> tag through the web interface
     * (Special:Export). This is only possible through the Mediawiki
     * script maintanance/dumpBackup.php with the experimental option
     * --uploads
     *
     * @return void
     */
    public function downloadAttachments()
    {
        $pages = $this->dom->getElementsByTagName('page');

        if ($this->dom->getElementsByTagName('upload')->length == 0) {
            $this->saveAndDisplayLog(
                "\n\n" .
                tra("No attachments were found to import. Be sure to create the XML file with the dumpDump.php script and with the option --uploads. This is the only way to import attachments.") .
                "\n",
                true
            );
            return;
        }

        $this->saveAndDisplayLog("\n\n" . tra("Importing attachments:") . "\n");

        foreach ($pages as $page) {
            $attachments = $page->getElementsByTagName('upload');

            if ($attachments->length > 0) {
                $i = $attachments->length - 1;
                $lastVersion = $attachments->item($i);

                $fileName = $lastVersion->getElementsByTagName('filename')->item(0)->nodeValue;
                $fileUrl = $lastVersion->getElementsByTagName('src')->item(0)->nodeValue;

                if (file_exists($this->attachmentsDestDir . $fileName)) {
                    $this->saveAndDisplayLog(
                        tr(
                            'File %0 is not being imported because there is already a file with the same name in the destination directory (%1)',
                            $fileName,
                            $this->attachmentsDestDir
                        ) . "\n",
                        true
                    );
                    continue;
                }

                if (@fopen($fileUrl, 'r')) {
                    $attachmentContent = @file_get_contents($fileUrl);
                    $newFile = fopen($this->attachmentsDestDir . $fileName, 'w');
                    fwrite($newFile, $attachmentContent);
                    $this->saveAndDisplayLog(tr('File %0 successfully imported!', $fileName) . "\n");
                } else {
                    $this->saveAndDisplayLog(tr('Unable to download file %0. File not found.', $fileName) . "\n", true);
                }
            }
        }
    }

    /**
     * Parse an DOM representation of a Mediawiki page and return all the values
     * that will be imported (page name, page content for all revisions). The
     * property TikiImporter_Wiki::revisionsNumber define how many wiki page
     * revisions are parsed.
     *
     * Note: the names of the keys are changed to reflected the names used by
     * Tiki builtin function (i.e. 'title' is changed to 'name' as used in
     * TikiLib::create_page() which will be called by TikiImporter_Wiki::insertPage())
     *
     * @param DOMElement $page
     * @return array $data information for one wiki page
     * @throws ImporterParserException if fail to parse all revisions of a page
     */
    public function extractInfo(DOMElement $page)
    {
        $data = [];
        $data['revisions'] = [];

        $totalRevisions = $page->getElementsByTagName('revision')->length;
        if ($this->revisionsNumber != 0 && $totalRevisions > $this->revisionsNumber) {
            $j = true;
        }

        $i = 0;
        foreach ($page->childNodes as $node) {
            if ($node instanceof DOMElement) {
                switch ($node->tagName) {
                    case 'id':
                        break;

                    case 'title':
                        $data['name'] = (string) $node->textContent;
                        break;

                    case 'revision':
                        $i++;
                        if (! isset($j) || ($i > ($totalRevisions - $this->revisionsNumber))) {
                            try {
                                $data['revisions'][] = $this->extractRevision($node);
                            } catch (ImporterParserException $e) {
                                $this->saveAndDisplayLog(
                                    tr(
                                        'Error while parsing revision %0 of the page "%1". There could be a problem in the page syntax or in the Text_Wiki parser used by the importer.',
                                        $i,
                                        $data['name']
                                    ) . "\n",
                                    true
                                );
                            }
                        }
                        break;

                    default:
                        print "Unknown tag : {$node->tagName}\n";
                }
            }
        }

        $countRevisions = count($data['revisions']);
        if ($countRevisions > 0) {
            $msg = tr(
                'Page "%0" successfully parsed with %1 revisions (from a total of %2 revisions).',
                $data['name'],
                $countRevisions,
                $totalRevisions
            ) . "\n";
            $this->saveAndDisplayLog($msg);
            return $data;
        } else {
            throw new ImporterParserException(tr('Page "%0" is NOT going to be imported. It was not possible to parse any of the page revisions.', $data['name']) . "\n", true);
        }
    }

    /**
     * Parse an DOM representation of a Mediawiki page revisions and return all the values
     * that will be imported (page content converted to Tiki syntax, lastModif, minor, user and ip address)
     *
     * Note: the names of the keys are changed to reflected the names used by
     * Tiki builtin function (i.e. 'text' is changed to 'data' as used in TikiLib::create_page())
     *
     * @param DOMElement $page
     * @return array $data information for one wiki page revision
     * @throws ImporterParserException if unable to parse revision content
     */
    public function extractRevision(DOMElement $revision)
    {
        global $prefs;
        $data = [];
        $data['minor'] = false;
        $data['comment'] = '';

        foreach ($revision->childNodes as $node) {
            if ($node instanceof DOMElement) {
                switch ($node->tagName) {
                    case 'id':
                        break;

                    case 'comment':
                        $data['comment'] = $node->textContent;
                        break;

                    case 'text':
                        $text = $this->convertMarkup($node->textContent);
                        if ($text instanceof PEAR_Error) {
                            throw new ImporterParserException($text->message);
                        } else {
                            $data['data'] = $text;
                            if ($prefs['feature_categories'] == 'y') {
                                $this->extractCategories($data);
                            }
                        }
                        break;

                    case 'timestamp':
                        $data['lastModif'] = strtotime($node->textContent);
                        break;

                    case 'minor':
                        $data['minor'] = true;
                        break;

                    case 'contributor':
                        $data = array_merge($data, $this->extractContributor($node));
                        break;
                }
            }
        }

        return $data;
    }

    /**
     * Extracts the categories from the page data
     **/
    public function extractCategories(&$data)
    {
        if (preg_match_all('/(\(\(Category:(\s*[^\)]+\s*)\)\)\s*)/', $data['data'], $matches)) {
            foreach ($matches[1] as $match) {
                $data['data'] = str_replace($match, '', $data['data']);
            }
            $data['categories'] = $matches[2];
        }
    }

    /**
     * Parse an DOM representation of a Mediawiki page revision contributor and return
     * the username and ip address
     *
     * @param DOMElement $contributor
     * @return array $data
     */
    public function extractContributor(DOMElement $contributor)
    {
        $data = [];

        foreach ($contributor->childNodes as $node) {
            if ($node instanceof DOMElement) {
                switch ($node->tagName) {
                    case 'id':
                        break;
                    case 'ip':
                        $data[$node->tagName] = (string) $node->textContent;
                        break;
                    case 'username':
                        $data['user'] = (string) $node->textContent;
                        break;
                    default:
                        print "Unknown tag in contributor: {$node->tagName}\n";
                }
            }
        }

        if (! isset($data['user'])) {
            $data['user'] = 'anonymous';
        }

        if (! isset($data['ip'])) {
            $data['ip'] = '0.0.0.0';
        }

        return $data;
    }

    /**
     * Utility for converting MediaWiki markup to TikiWiki markup
     * Uses Text_Wiki PEAR library for heavy lifting
     *
     * @param string $mediawikiText
     * @return string $tikiText
     */
    public function convertMarkup($mediawikiText)
    {
        if (! empty($mediawikiText)) {
            $tikiText = $this->parser->transform($mediawikiText, 'Tiki');
            return $tikiText;
        }
    }
}