<?php
|
|
|
|
// (c) Copyright by authors of the Tiki Wiki CMS Groupware Project
|
|
//
|
|
// All Rights Reserved. See copyright.txt for details and a complete list of authors.
|
|
// Licensed under the GNU LESSER GENERAL PUBLIC LICENSE. See license.txt for details.
|
|
// $Id$
|
|
|
|
require_once('tikiimporter_wiki.php');
|
|
|
|
/**
|
|
* Parses a MediaWiki-style XML dump to import it into TikiWiki.
|
|
* Requires PHP5 DOM extension.
|
|
* Based on the work done on http://dev.tiki.org/MediaWiki+to+TikiWiki+converter
|
|
*
|
|
* @package tikiimporter
|
|
*/
|
|
class TikiImporter_Wiki_Mediawiki extends TikiImporter_Wiki
|
|
{
|
|
public $softwareName = 'Mediawiki';
|
|
|
|
/**
|
|
* The DOM representation of the Mediawiki XML dump
|
|
* @var DOMDocument object
|
|
*/
|
|
public $dom = '';
|
|
|
|
/**
|
|
* Array of the valid mime types for the
|
|
* input file
|
|
*/
|
|
public $validTypes = ['application/xml', 'text/xml', 'text/html'];
|
|
|
|
/**
|
|
* The directory used to save the attachments.
|
|
* It is defined on $this->import()
|
|
*/
|
|
public $attachmentsDestDir = '';
|
|
|
|
/**
|
|
* Text_Wiki object to handle Mediawiki
|
|
* syntax parsing
|
|
*/
|
|
public $parser = '';
|
|
|
|
/**
|
|
* @see lib/importer/TikiImporter#importOptions()
|
|
*/
|
|
public static function importOptions()
|
|
{
|
|
$options = [
|
|
[
|
|
'name' => 'importAttachments',
|
|
'type' => 'checkbox',
|
|
'label' => tra('Import images and attachments (see documentation for more information)')
|
|
],
|
|
[
|
|
'name' => 'maketoc',
|
|
'type' => 'checkbox',
|
|
'label' => tra('Add a maketoc at the top of each page')
|
|
],
|
|
];
|
|
|
|
return $options;
|
|
}
|
|
|
|
/**
|
|
* Check for DOMDocument.
|
|
*
|
|
* @see lib/importer/TikiImporter#checkRequirements()
|
|
*
|
|
* @return void
|
|
* @throws Exception if DOMDocument not available
|
|
*/
|
|
public function checkRequirements()
|
|
{
|
|
if (! class_exists('DOMDocument')) {
|
|
throw new Exception(tra('Class DOMDocument not available, check your PHP installation. For more information see http://php.net/manual/en/book.dom.php'));
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Start the importing process by loading the XML file.
|
|
*
|
|
* @see lib/importer/TikiImporter_Wiki#import()
|
|
*
|
|
* @param string $filePath path to the XML file
|
|
* @return void
|
|
* @throws UnexpectedValueException if invalid file mime type
|
|
*/
|
|
public function import($filePath = null)
|
|
{
|
|
if ($filePath == null) {
|
|
die("This particular implementation of the method requires an explicity file path.");
|
|
}
|
|
|
|
if (isset($_FILES['importFile']) && ! in_array($_FILES['importFile']['type'], $this->validTypes)) {
|
|
throw new UnexpectedValueException(tra('Invalid file MIME type'));
|
|
}
|
|
|
|
if (! empty($_POST['importAttachments']) && $_POST['importAttachments'] == 'on') {
|
|
$this->checkRequirementsForAttachments();
|
|
}
|
|
|
|
$this->saveAndDisplayLog("Loading and validating the XML file\n");
|
|
|
|
$this->dom = new DOMDocument();
|
|
$this->dom->load($filePath);
|
|
|
|
$this->configureParser();
|
|
|
|
if (! empty($_POST['importAttachments']) && $_POST['importAttachments'] == 'on') {
|
|
$this->downloadAttachments();
|
|
}
|
|
|
|
parent::import();
|
|
}
|
|
|
|
/**
|
|
* Create a Text_Wiki object to handle the parsing
|
|
* of Mediawiki syntax and define some configuration
|
|
* option
|
|
*/
|
|
public function configureParser()
|
|
{
|
|
$this->parser = Text_Wiki::factory('Mediawiki');
|
|
|
|
// do not replace space by underscore in wikilinks
|
|
$this->parser->setParseConf('Wikilink', 'spaceUnderscore', false);
|
|
|
|
// define possible localized namespace for image and files in the wikilink syntax
|
|
$namespaces = $this->dom->getElementsByTagName('namespace');
|
|
$prefix = ['Image', 'image'];
|
|
if ($namespaces->length > 0) {
|
|
foreach ($namespaces as $namespace) {
|
|
if ($namespace->getAttribute('key') == '-2' || $namespace->getAttribute('key') == '6') {
|
|
$prefix[] = $namespace->nodeValue;
|
|
}
|
|
}
|
|
}
|
|
$this->parser->setParseConf('Image', 'prefix', $prefix);
|
|
}
|
|
|
|
/**
|
|
* At present this method only validates the Mediawiki XML
|
|
* against its DTD (Document Type Definition). Mediawiki XML
|
|
* versions from 0.3 till 0.10 are supported.
|
|
*
|
|
* Note: we use schemaValidate() instead of validate() because
|
|
* for some unknown reason the former method is unable to automatically
|
|
* retrieve Mediawiki XML DTD and dies with "no DTD found" error.
|
|
*
|
|
* @see lib/importer/TikiImporter#validateInput()
|
|
*
|
|
* @throws DOMException if XML file does not validate against schema
|
|
*/
|
|
public function validateInput()
|
|
{
|
|
$mediawiki = $this->dom->getElementsByTagName('mediawiki');
|
|
|
|
if ($mediawiki->length > 0) {
|
|
$xmlVersion = $mediawiki->item(0)->getAttribute('version');
|
|
|
|
switch ($xmlVersion) {
|
|
case '0.3':
|
|
case '0.4':
|
|
case '0.5':
|
|
case '0.6':
|
|
case '0.7':
|
|
case '0.8':
|
|
case '0.9':
|
|
case '0.10':
|
|
$xmlDtdFile = __DIR__ . "/mediawiki_dump_v$xmlVersion.xsd";
|
|
break;
|
|
default:
|
|
throw new DOMException(tr("MediaWiki XML file version %0 is not supported.", $xmlVersion));
|
|
break;
|
|
}
|
|
|
|
if (@$this->dom->schemaValidate($xmlDtdFile)) {
|
|
return true;
|
|
}
|
|
}
|
|
|
|
throw new DOMException(tra('The XML file does not validate against the MediaWiki XML schema'));
|
|
}
|
|
|
|
/**
|
|
* Check for all the requirements to import attachments
|
|
* and also set the $this->attachmentsDestDir.
|
|
* If one of them is not satisfied the script will die.
|
|
*
|
|
* @returns void
|
|
*/
|
|
public function checkRequirementsForAttachments()
|
|
{
|
|
global $tikidomain;
|
|
|
|
$this->attachmentsDestDir = __DIR__ . '/../../img/wiki_up/';
|
|
if ($tikidomain) {
|
|
$this->attachmentsDestDir .= $tikidomain;
|
|
}
|
|
|
|
if (ini_get('allow_url_fopen') === false) {
|
|
$this->saveAndDisplayLog(
|
|
tra(
|
|
"Aborting: you need to enable the PHP setting 'allow_url_fopen' to be able to import attachments. Fix the problem or try to import without the attachments."
|
|
) . '\n'
|
|
);
|
|
die;
|
|
}
|
|
|
|
if (! file_exists($this->attachmentsDestDir)) {
|
|
$this->saveAndDisplayLog(
|
|
tr(
|
|
'Aborting: the destination directory for attachments (%0) does not exist. Correct this problem or try to import without the attachments.',
|
|
$this->attachmentsDestDir
|
|
) . '\n'
|
|
);
|
|
die;
|
|
} elseif (! is_writable($this->attachmentsDestDir)) {
|
|
$this->saveAndDisplayLog(
|
|
tr(
|
|
'Aborting: the destination directory for attachments (%0) is not writable. Correct this problem or try to import without attachments.',
|
|
$this->attachmentsDestDir
|
|
) . "\n"
|
|
);
|
|
die;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Foreach page check if it is a wiki page or a wiki page
|
|
* attachment and call the proper method, respectively
|
|
* $this->extractInfo() and $this->handleFileUpload()
|
|
*
|
|
* In the case of a wiki page append the returned value of
|
|
* $this->extractInfo() to $parsedData array
|
|
*
|
|
* @return array $parsedData
|
|
*/
|
|
public function parseData()
|
|
{
|
|
$parsedData = [];
|
|
$pages = $this->dom->getElementsByTagName('page');
|
|
|
|
$this->saveAndDisplayLog("\n" . tra("Parsing pages:") . "\n");
|
|
|
|
foreach ($pages as $page) {
|
|
$isAttachment = $page->getElementsByTagName('upload');
|
|
// is a wiki page and not an attachment
|
|
if ($isAttachment->length == 0) {
|
|
try {
|
|
$parsedData[] = $this->extractInfo($page);
|
|
} catch (ImporterParserException $e) {
|
|
$this->saveAndDisplayLog($e->getMessage(), true);
|
|
}
|
|
}
|
|
}
|
|
|
|
return $parsedData;
|
|
}
|
|
|
|
/**
|
|
* Searches for the last version of each attachments in the XML file
|
|
* and try to download it to the img/wiki_up/ directory
|
|
*
|
|
* Note: it is not possible to generate the Mediawiki
|
|
* XML file with the <upload> tag through the web interface
|
|
* (Special:Export). This is only possible through the Mediawiki
|
|
* script maintanance/dumpBackup.php with the experimental option
|
|
* --uploads
|
|
*
|
|
* @return void
|
|
*/
|
|
public function downloadAttachments()
|
|
{
|
|
$pages = $this->dom->getElementsByTagName('page');
|
|
|
|
if ($this->dom->getElementsByTagName('upload')->length == 0) {
|
|
$this->saveAndDisplayLog(
|
|
"\n\n" .
|
|
tra("No attachments were found to import. Be sure to create the XML file with the dumpDump.php script and with the option --uploads. This is the only way to import attachments.") .
|
|
"\n",
|
|
true
|
|
);
|
|
return;
|
|
}
|
|
|
|
$this->saveAndDisplayLog("\n\n" . tra("Importing attachments:") . "\n");
|
|
|
|
foreach ($pages as $page) {
|
|
$attachments = $page->getElementsByTagName('upload');
|
|
|
|
if ($attachments->length > 0) {
|
|
$i = $attachments->length - 1;
|
|
$lastVersion = $attachments->item($i);
|
|
|
|
$fileName = $lastVersion->getElementsByTagName('filename')->item(0)->nodeValue;
|
|
$fileUrl = $lastVersion->getElementsByTagName('src')->item(0)->nodeValue;
|
|
|
|
if (file_exists($this->attachmentsDestDir . $fileName)) {
|
|
$this->saveAndDisplayLog(
|
|
tr(
|
|
'File %0 is not being imported because there is already a file with the same name in the destination directory (%1)',
|
|
$fileName,
|
|
$this->attachmentsDestDir
|
|
) . "\n",
|
|
true
|
|
);
|
|
continue;
|
|
}
|
|
|
|
if (@fopen($fileUrl, 'r')) {
|
|
$attachmentContent = @file_get_contents($fileUrl);
|
|
$newFile = fopen($this->attachmentsDestDir . $fileName, 'w');
|
|
fwrite($newFile, $attachmentContent);
|
|
$this->saveAndDisplayLog(tr('File %0 successfully imported!', $fileName) . "\n");
|
|
} else {
|
|
$this->saveAndDisplayLog(tr('Unable to download file %0. File not found.', $fileName) . "\n", true);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Parse an DOM representation of a Mediawiki page and return all the values
|
|
* that will be imported (page name, page content for all revisions). The
|
|
* property TikiImporter_Wiki::revisionsNumber define how many wiki page
|
|
* revisions are parsed.
|
|
*
|
|
* Note: the names of the keys are changed to reflected the names used by
|
|
* Tiki builtin function (i.e. 'title' is changed to 'name' as used in
|
|
* TikiLib::create_page() which will be called by TikiImporter_Wiki::insertPage())
|
|
*
|
|
* @param DOMElement $page
|
|
* @return array $data information for one wiki page
|
|
* @throws ImporterParserException if fail to parse all revisions of a page
|
|
*/
|
|
public function extractInfo(DOMElement $page)
|
|
{
|
|
$data = [];
|
|
$data['revisions'] = [];
|
|
|
|
$totalRevisions = $page->getElementsByTagName('revision')->length;
|
|
if ($this->revisionsNumber != 0 && $totalRevisions > $this->revisionsNumber) {
|
|
$j = true;
|
|
}
|
|
|
|
$i = 0;
|
|
foreach ($page->childNodes as $node) {
|
|
if ($node instanceof DOMElement) {
|
|
switch ($node->tagName) {
|
|
case 'id':
|
|
break;
|
|
|
|
case 'title':
|
|
$data['name'] = (string) $node->textContent;
|
|
break;
|
|
|
|
case 'revision':
|
|
$i++;
|
|
if (! isset($j) || ($i > ($totalRevisions - $this->revisionsNumber))) {
|
|
try {
|
|
$data['revisions'][] = $this->extractRevision($node);
|
|
} catch (ImporterParserException $e) {
|
|
$this->saveAndDisplayLog(
|
|
tr(
|
|
'Error while parsing revision %0 of the page "%1". There could be a problem in the page syntax or in the Text_Wiki parser used by the importer.',
|
|
$i,
|
|
$data['name']
|
|
) . "\n",
|
|
true
|
|
);
|
|
}
|
|
}
|
|
break;
|
|
|
|
default:
|
|
print "Unknown tag : {$node->tagName}\n";
|
|
}
|
|
}
|
|
}
|
|
|
|
$countRevisions = count($data['revisions']);
|
|
if ($countRevisions > 0) {
|
|
$msg = tr(
|
|
'Page "%0" successfully parsed with %1 revisions (from a total of %2 revisions).',
|
|
$data['name'],
|
|
$countRevisions,
|
|
$totalRevisions
|
|
) . "\n";
|
|
$this->saveAndDisplayLog($msg);
|
|
return $data;
|
|
} else {
|
|
throw new ImporterParserException(tr('Page "%0" is NOT going to be imported. It was not possible to parse any of the page revisions.', $data['name']) . "\n", true);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Parse an DOM representation of a Mediawiki page revisions and return all the values
|
|
* that will be imported (page content converted to Tiki syntax, lastModif, minor, user and ip address)
|
|
*
|
|
* Note: the names of the keys are changed to reflected the names used by
|
|
* Tiki builtin function (i.e. 'text' is changed to 'data' as used in TikiLib::create_page())
|
|
*
|
|
* @param DOMElement $page
|
|
* @return array $data information for one wiki page revision
|
|
* @throws ImporterParserException if unable to parse revision content
|
|
*/
|
|
public function extractRevision(DOMElement $revision)
|
|
{
|
|
global $prefs;
|
|
$data = [];
|
|
$data['minor'] = false;
|
|
$data['comment'] = '';
|
|
|
|
foreach ($revision->childNodes as $node) {
|
|
if ($node instanceof DOMElement) {
|
|
switch ($node->tagName) {
|
|
case 'id':
|
|
break;
|
|
|
|
case 'comment':
|
|
$data['comment'] = $node->textContent;
|
|
break;
|
|
|
|
case 'text':
|
|
$text = $this->convertMarkup($node->textContent);
|
|
if ($text instanceof PEAR_Error) {
|
|
throw new ImporterParserException($text->message);
|
|
} else {
|
|
$data['data'] = $text;
|
|
if ($prefs['feature_categories'] == 'y') {
|
|
$this->extractCategories($data);
|
|
}
|
|
}
|
|
break;
|
|
|
|
case 'timestamp':
|
|
$data['lastModif'] = strtotime($node->textContent);
|
|
break;
|
|
|
|
case 'minor':
|
|
$data['minor'] = true;
|
|
break;
|
|
|
|
case 'contributor':
|
|
$data = array_merge($data, $this->extractContributor($node));
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
return $data;
|
|
}
|
|
|
|
/**
|
|
* Extracts the categories from the page data
|
|
**/
|
|
public function extractCategories(&$data)
|
|
{
|
|
if (preg_match_all('/(\(\(Category:(\s*[^\)]+\s*)\)\)\s*)/', $data['data'], $matches)) {
|
|
foreach ($matches[1] as $match) {
|
|
$data['data'] = str_replace($match, '', $data['data']);
|
|
}
|
|
$data['categories'] = $matches[2];
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Parse an DOM representation of a Mediawiki page revision contributor and return
|
|
* the username and ip address
|
|
*
|
|
* @param DOMElement $contributor
|
|
* @return array $data
|
|
*/
|
|
public function extractContributor(DOMElement $contributor)
|
|
{
|
|
$data = [];
|
|
|
|
foreach ($contributor->childNodes as $node) {
|
|
if ($node instanceof DOMElement) {
|
|
switch ($node->tagName) {
|
|
case 'id':
|
|
break;
|
|
case 'ip':
|
|
$data[$node->tagName] = (string) $node->textContent;
|
|
break;
|
|
case 'username':
|
|
$data['user'] = (string) $node->textContent;
|
|
break;
|
|
default:
|
|
print "Unknown tag in contributor: {$node->tagName}\n";
|
|
}
|
|
}
|
|
}
|
|
|
|
if (! isset($data['user'])) {
|
|
$data['user'] = 'anonymous';
|
|
}
|
|
|
|
if (! isset($data['ip'])) {
|
|
$data['ip'] = '0.0.0.0';
|
|
}
|
|
|
|
return $data;
|
|
}
|
|
|
|
/**
|
|
* Utility for converting MediaWiki markup to TikiWiki markup
|
|
* Uses Text_Wiki PEAR library for heavy lifting
|
|
*
|
|
* @param string $mediawikiText
|
|
* @return string $tikiText
|
|
*/
|
|
public function convertMarkup($mediawikiText)
|
|
{
|
|
if (! empty($mediawikiText)) {
|
|
$tikiText = $this->parser->transform($mediawikiText, 'Tiki');
|
|
return $tikiText;
|
|
}
|
|
}
|
|
}
|