You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

524 lines
18 KiB

<?php
// (c) Copyright by authors of the Tiki Wiki CMS Groupware Project
//
// All Rights Reserved. See copyright.txt for details and a complete list of authors.
// Licensed under the GNU LESSER GENERAL PUBLIC LICENSE. See license.txt for details.
// $Id$
require_once('tikiimporter_wiki.php');
/**
* Parses a MediaWiki-style XML dump to import it into TikiWiki.
* Requires PHP5 DOM extension.
* Based on the work done on http://dev.tiki.org/MediaWiki+to+TikiWiki+converter
*
* @package tikiimporter
*/
class TikiImporter_Wiki_Mediawiki extends TikiImporter_Wiki
{
public $softwareName = 'Mediawiki';
/**
* The DOM representation of the Mediawiki XML dump
* @var DOMDocument object
*/
public $dom = '';
/**
* Array of the valid mime types for the
* input file
*/
public $validTypes = ['application/xml', 'text/xml', 'text/html'];
/**
* The directory used to save the attachments.
* It is defined on $this->import()
*/
public $attachmentsDestDir = '';
/**
* Text_Wiki object to handle Mediawiki
* syntax parsing
*/
public $parser = '';
/**
* @see lib/importer/TikiImporter#importOptions()
*/
public static function importOptions()
{
$options = [
[
'name' => 'importAttachments',
'type' => 'checkbox',
'label' => tra('Import images and attachments (see documentation for more information)')
],
[
'name' => 'maketoc',
'type' => 'checkbox',
'label' => tra('Add a maketoc at the top of each page')
],
];
return $options;
}
/**
* Check for DOMDocument.
*
* @see lib/importer/TikiImporter#checkRequirements()
*
* @return void
* @throws Exception if DOMDocument not available
*/
public function checkRequirements()
{
if (! class_exists('DOMDocument')) {
throw new Exception(tra('Class DOMDocument not available, check your PHP installation. For more information see http://php.net/manual/en/book.dom.php'));
}
}
/**
* Start the importing process by loading the XML file.
*
* @see lib/importer/TikiImporter_Wiki#import()
*
* @param string $filePath path to the XML file
* @return void
* @throws UnexpectedValueException if invalid file mime type
*/
public function import($filePath = null)
{
if ($filePath == null) {
die("This particular implementation of the method requires an explicity file path.");
}
if (isset($_FILES['importFile']) && ! in_array($_FILES['importFile']['type'], $this->validTypes)) {
throw new UnexpectedValueException(tra('Invalid file MIME type'));
}
if (! empty($_POST['importAttachments']) && $_POST['importAttachments'] == 'on') {
$this->checkRequirementsForAttachments();
}
$this->saveAndDisplayLog("Loading and validating the XML file\n");
$this->dom = new DOMDocument();
$this->dom->load($filePath);
$this->configureParser();
if (! empty($_POST['importAttachments']) && $_POST['importAttachments'] == 'on') {
$this->downloadAttachments();
}
parent::import();
}
/**
* Create a Text_Wiki object to handle the parsing
* of Mediawiki syntax and define some configuration
* option
*/
public function configureParser()
{
$this->parser = Text_Wiki::factory('Mediawiki');
// do not replace space by underscore in wikilinks
$this->parser->setParseConf('Wikilink', 'spaceUnderscore', false);
// define possible localized namespace for image and files in the wikilink syntax
$namespaces = $this->dom->getElementsByTagName('namespace');
$prefix = ['Image', 'image'];
if ($namespaces->length > 0) {
foreach ($namespaces as $namespace) {
if ($namespace->getAttribute('key') == '-2' || $namespace->getAttribute('key') == '6') {
$prefix[] = $namespace->nodeValue;
}
}
}
$this->parser->setParseConf('Image', 'prefix', $prefix);
}
/**
* At present this method only validates the Mediawiki XML
* against its DTD (Document Type Definition). Mediawiki XML
* versions from 0.3 till 0.10 are supported.
*
* Note: we use schemaValidate() instead of validate() because
* for some unknown reason the former method is unable to automatically
* retrieve Mediawiki XML DTD and dies with "no DTD found" error.
*
* @see lib/importer/TikiImporter#validateInput()
*
* @throws DOMException if XML file does not validate against schema
*/
public function validateInput()
{
$mediawiki = $this->dom->getElementsByTagName('mediawiki');
if ($mediawiki->length > 0) {
$xmlVersion = $mediawiki->item(0)->getAttribute('version');
switch ($xmlVersion) {
case '0.3':
case '0.4':
case '0.5':
case '0.6':
case '0.7':
case '0.8':
case '0.9':
case '0.10':
$xmlDtdFile = __DIR__ . "/mediawiki_dump_v$xmlVersion.xsd";
break;
default:
throw new DOMException(tr("MediaWiki XML file version %0 is not supported.", $xmlVersion));
break;
}
if (@$this->dom->schemaValidate($xmlDtdFile)) {
return true;
}
}
throw new DOMException(tra('The XML file does not validate against the MediaWiki XML schema'));
}
/**
* Check for all the requirements to import attachments
* and also set the $this->attachmentsDestDir.
* If one of them is not satisfied the script will die.
*
* @returns void
*/
public function checkRequirementsForAttachments()
{
global $tikidomain;
$this->attachmentsDestDir = __DIR__ . '/../../img/wiki_up/';
if ($tikidomain) {
$this->attachmentsDestDir .= $tikidomain;
}
if (ini_get('allow_url_fopen') === false) {
$this->saveAndDisplayLog(
tra(
"Aborting: you need to enable the PHP setting 'allow_url_fopen' to be able to import attachments. Fix the problem or try to import without the attachments."
) . '\n'
);
die;
}
if (! file_exists($this->attachmentsDestDir)) {
$this->saveAndDisplayLog(
tr(
'Aborting: the destination directory for attachments (%0) does not exist. Correct this problem or try to import without the attachments.',
$this->attachmentsDestDir
) . '\n'
);
die;
} elseif (! is_writable($this->attachmentsDestDir)) {
$this->saveAndDisplayLog(
tr(
'Aborting: the destination directory for attachments (%0) is not writable. Correct this problem or try to import without attachments.',
$this->attachmentsDestDir
) . "\n"
);
die;
}
}
/**
* Foreach page check if it is a wiki page or a wiki page
* attachment and call the proper method, respectively
* $this->extractInfo() and $this->handleFileUpload()
*
* In the case of a wiki page append the returned value of
* $this->extractInfo() to $parsedData array
*
* @return array $parsedData
*/
public function parseData()
{
$parsedData = [];
$pages = $this->dom->getElementsByTagName('page');
$this->saveAndDisplayLog("\n" . tra("Parsing pages:") . "\n");
foreach ($pages as $page) {
$isAttachment = $page->getElementsByTagName('upload');
// is a wiki page and not an attachment
if ($isAttachment->length == 0) {
try {
$parsedData[] = $this->extractInfo($page);
} catch (ImporterParserException $e) {
$this->saveAndDisplayLog($e->getMessage(), true);
}
}
}
return $parsedData;
}
/**
* Searches for the last version of each attachments in the XML file
* and try to download it to the img/wiki_up/ directory
*
* Note: it is not possible to generate the Mediawiki
* XML file with the <upload> tag through the web interface
* (Special:Export). This is only possible through the Mediawiki
* script maintanance/dumpBackup.php with the experimental option
* --uploads
*
* @return void
*/
public function downloadAttachments()
{
$pages = $this->dom->getElementsByTagName('page');
if ($this->dom->getElementsByTagName('upload')->length == 0) {
$this->saveAndDisplayLog(
"\n\n" .
tra("No attachments were found to import. Be sure to create the XML file with the dumpDump.php script and with the option --uploads. This is the only way to import attachments.") .
"\n",
true
);
return;
}
$this->saveAndDisplayLog("\n\n" . tra("Importing attachments:") . "\n");
foreach ($pages as $page) {
$attachments = $page->getElementsByTagName('upload');
if ($attachments->length > 0) {
$i = $attachments->length - 1;
$lastVersion = $attachments->item($i);
$fileName = $lastVersion->getElementsByTagName('filename')->item(0)->nodeValue;
$fileUrl = $lastVersion->getElementsByTagName('src')->item(0)->nodeValue;
if (file_exists($this->attachmentsDestDir . $fileName)) {
$this->saveAndDisplayLog(
tr(
'File %0 is not being imported because there is already a file with the same name in the destination directory (%1)',
$fileName,
$this->attachmentsDestDir
) . "\n",
true
);
continue;
}
if (@fopen($fileUrl, 'r')) {
$attachmentContent = @file_get_contents($fileUrl);
$newFile = fopen($this->attachmentsDestDir . $fileName, 'w');
fwrite($newFile, $attachmentContent);
$this->saveAndDisplayLog(tr('File %0 successfully imported!', $fileName) . "\n");
} else {
$this->saveAndDisplayLog(tr('Unable to download file %0. File not found.', $fileName) . "\n", true);
}
}
}
}
/**
* Parse an DOM representation of a Mediawiki page and return all the values
* that will be imported (page name, page content for all revisions). The
* property TikiImporter_Wiki::revisionsNumber define how many wiki page
* revisions are parsed.
*
* Note: the names of the keys are changed to reflected the names used by
* Tiki builtin function (i.e. 'title' is changed to 'name' as used in
* TikiLib::create_page() which will be called by TikiImporter_Wiki::insertPage())
*
* @param DOMElement $page
* @return array $data information for one wiki page
* @throws ImporterParserException if fail to parse all revisions of a page
*/
public function extractInfo(DOMElement $page)
{
$data = [];
$data['revisions'] = [];
$totalRevisions = $page->getElementsByTagName('revision')->length;
if ($this->revisionsNumber != 0 && $totalRevisions > $this->revisionsNumber) {
$j = true;
}
$i = 0;
foreach ($page->childNodes as $node) {
if ($node instanceof DOMElement) {
switch ($node->tagName) {
case 'id':
break;
case 'title':
$data['name'] = (string) $node->textContent;
break;
case 'revision':
$i++;
if (! isset($j) || ($i > ($totalRevisions - $this->revisionsNumber))) {
try {
$data['revisions'][] = $this->extractRevision($node);
} catch (ImporterParserException $e) {
$this->saveAndDisplayLog(
tr(
'Error while parsing revision %0 of the page "%1". There could be a problem in the page syntax or in the Text_Wiki parser used by the importer.',
$i,
$data['name']
) . "\n",
true
);
}
}
break;
default:
print "Unknown tag : {$node->tagName}\n";
}
}
}
$countRevisions = count($data['revisions']);
if ($countRevisions > 0) {
$msg = tr(
'Page "%0" successfully parsed with %1 revisions (from a total of %2 revisions).',
$data['name'],
$countRevisions,
$totalRevisions
) . "\n";
$this->saveAndDisplayLog($msg);
return $data;
} else {
throw new ImporterParserException(tr('Page "%0" is NOT going to be imported. It was not possible to parse any of the page revisions.', $data['name']) . "\n", true);
}
}
/**
* Parse an DOM representation of a Mediawiki page revisions and return all the values
* that will be imported (page content converted to Tiki syntax, lastModif, minor, user and ip address)
*
* Note: the names of the keys are changed to reflected the names used by
* Tiki builtin function (i.e. 'text' is changed to 'data' as used in TikiLib::create_page())
*
* @param DOMElement $page
* @return array $data information for one wiki page revision
* @throws ImporterParserException if unable to parse revision content
*/
public function extractRevision(DOMElement $revision)
{
global $prefs;
$data = [];
$data['minor'] = false;
$data['comment'] = '';
foreach ($revision->childNodes as $node) {
if ($node instanceof DOMElement) {
switch ($node->tagName) {
case 'id':
break;
case 'comment':
$data['comment'] = $node->textContent;
break;
case 'text':
$text = $this->convertMarkup($node->textContent);
if ($text instanceof PEAR_Error) {
throw new ImporterParserException($text->message);
} else {
$data['data'] = $text;
if ($prefs['feature_categories'] == 'y') {
$this->extractCategories($data);
}
}
break;
case 'timestamp':
$data['lastModif'] = strtotime($node->textContent);
break;
case 'minor':
$data['minor'] = true;
break;
case 'contributor':
$data = array_merge($data, $this->extractContributor($node));
break;
}
}
}
return $data;
}
/**
* Extracts the categories from the page data
**/
public function extractCategories(&$data)
{
if (preg_match_all('/(\(\(Category:(\s*[^\)]+\s*)\)\)\s*)/', $data['data'], $matches)) {
foreach ($matches[1] as $match) {
$data['data'] = str_replace($match, '', $data['data']);
}
$data['categories'] = $matches[2];
}
}
/**
* Parse an DOM representation of a Mediawiki page revision contributor and return
* the username and ip address
*
* @param DOMElement $contributor
* @return array $data
*/
public function extractContributor(DOMElement $contributor)
{
$data = [];
foreach ($contributor->childNodes as $node) {
if ($node instanceof DOMElement) {
switch ($node->tagName) {
case 'id':
break;
case 'ip':
$data[$node->tagName] = (string) $node->textContent;
break;
case 'username':
$data['user'] = (string) $node->textContent;
break;
default:
print "Unknown tag in contributor: {$node->tagName}\n";
}
}
}
if (! isset($data['user'])) {
$data['user'] = 'anonymous';
}
if (! isset($data['ip'])) {
$data['ip'] = '0.0.0.0';
}
return $data;
}
/**
* Utility for converting MediaWiki markup to TikiWiki markup
* Uses Text_Wiki PEAR library for heavy lifting
*
* @param string $mediawikiText
* @return string $tikiText
*/
public function convertMarkup($mediawikiText)
{
if (! empty($mediawikiText)) {
$tikiText = $this->parser->transform($mediawikiText, 'Tiki');
return $tikiText;
}
}
}