import() */ public $attachmentsDestDir = ''; /** * Text_Wiki object to handle Mediawiki * syntax parsing */ public $parser = ''; /** * @see lib/importer/TikiImporter#importOptions() */ public static function importOptions() { $options = [ [ 'name' => 'importAttachments', 'type' => 'checkbox', 'label' => tra('Import images and attachments (see documentation for more information)') ], [ 'name' => 'maketoc', 'type' => 'checkbox', 'label' => tra('Add a maketoc at the top of each page') ], ]; return $options; } /** * Check for DOMDocument. * * @see lib/importer/TikiImporter#checkRequirements() * * @return void * @throws Exception if DOMDocument not available */ public function checkRequirements() { if (! class_exists('DOMDocument')) { throw new Exception(tra('Class DOMDocument not available, check your PHP installation. For more information see http://php.net/manual/en/book.dom.php')); } } /** * Start the importing process by loading the XML file. * * @see lib/importer/TikiImporter_Wiki#import() * * @param string $filePath path to the XML file * @return void * @throws UnexpectedValueException if invalid file mime type */ public function import($filePath = null) { if ($filePath == null) { die("This particular implementation of the method requires an explicity file path."); } if (isset($_FILES['importFile']) && ! in_array($_FILES['importFile']['type'], $this->validTypes)) { throw new UnexpectedValueException(tra('Invalid file MIME type')); } if (! empty($_POST['importAttachments']) && $_POST['importAttachments'] == 'on') { $this->checkRequirementsForAttachments(); } $this->saveAndDisplayLog("Loading and validating the XML file\n"); $this->dom = new DOMDocument(); $this->dom->load($filePath); $this->configureParser(); if (! empty($_POST['importAttachments']) && $_POST['importAttachments'] == 'on') { $this->downloadAttachments(); } parent::import(); } /** * Create a Text_Wiki object to handle the parsing * of Mediawiki syntax and define some configuration * option */ public function configureParser() { $this->parser = Text_Wiki::factory('Mediawiki'); // do not replace space by underscore in wikilinks $this->parser->setParseConf('Wikilink', 'spaceUnderscore', false); // define possible localized namespace for image and files in the wikilink syntax $namespaces = $this->dom->getElementsByTagName('namespace'); $prefix = ['Image', 'image']; if ($namespaces->length > 0) { foreach ($namespaces as $namespace) { if ($namespace->getAttribute('key') == '-2' || $namespace->getAttribute('key') == '6') { $prefix[] = $namespace->nodeValue; } } } $this->parser->setParseConf('Image', 'prefix', $prefix); } /** * At present this method only validates the Mediawiki XML * against its DTD (Document Type Definition). Mediawiki XML * versions from 0.3 till 0.10 are supported. * * Note: we use schemaValidate() instead of validate() because * for some unknown reason the former method is unable to automatically * retrieve Mediawiki XML DTD and dies with "no DTD found" error. * * @see lib/importer/TikiImporter#validateInput() * * @throws DOMException if XML file does not validate against schema */ public function validateInput() { $mediawiki = $this->dom->getElementsByTagName('mediawiki'); if ($mediawiki->length > 0) { $xmlVersion = $mediawiki->item(0)->getAttribute('version'); switch ($xmlVersion) { case '0.3': case '0.4': case '0.5': case '0.6': case '0.7': case '0.8': case '0.9': case '0.10': $xmlDtdFile = __DIR__ . "/mediawiki_dump_v$xmlVersion.xsd"; break; default: throw new DOMException(tr("MediaWiki XML file version %0 is not supported.", $xmlVersion)); break; } if (@$this->dom->schemaValidate($xmlDtdFile)) { return true; } } throw new DOMException(tra('The XML file does not validate against the MediaWiki XML schema')); } /** * Check for all the requirements to import attachments * and also set the $this->attachmentsDestDir. * If one of them is not satisfied the script will die. * * @returns void */ public function checkRequirementsForAttachments() { global $tikidomain; $this->attachmentsDestDir = __DIR__ . '/../../img/wiki_up/'; if ($tikidomain) { $this->attachmentsDestDir .= $tikidomain; } if (ini_get('allow_url_fopen') === false) { $this->saveAndDisplayLog( tra( "Aborting: you need to enable the PHP setting 'allow_url_fopen' to be able to import attachments. Fix the problem or try to import without the attachments." ) . '\n' ); die; } if (! file_exists($this->attachmentsDestDir)) { $this->saveAndDisplayLog( tr( 'Aborting: the destination directory for attachments (%0) does not exist. Correct this problem or try to import without the attachments.', $this->attachmentsDestDir ) . '\n' ); die; } elseif (! is_writable($this->attachmentsDestDir)) { $this->saveAndDisplayLog( tr( 'Aborting: the destination directory for attachments (%0) is not writable. Correct this problem or try to import without attachments.', $this->attachmentsDestDir ) . "\n" ); die; } } /** * Foreach page check if it is a wiki page or a wiki page * attachment and call the proper method, respectively * $this->extractInfo() and $this->handleFileUpload() * * In the case of a wiki page append the returned value of * $this->extractInfo() to $parsedData array * * @return array $parsedData */ public function parseData() { $parsedData = []; $pages = $this->dom->getElementsByTagName('page'); $this->saveAndDisplayLog("\n" . tra("Parsing pages:") . "\n"); foreach ($pages as $page) { $isAttachment = $page->getElementsByTagName('upload'); // is a wiki page and not an attachment if ($isAttachment->length == 0) { try { $parsedData[] = $this->extractInfo($page); } catch (ImporterParserException $e) { $this->saveAndDisplayLog($e->getMessage(), true); } } } return $parsedData; } /** * Searches for the last version of each attachments in the XML file * and try to download it to the img/wiki_up/ directory * * Note: it is not possible to generate the Mediawiki * XML file with the tag through the web interface * (Special:Export). This is only possible through the Mediawiki * script maintanance/dumpBackup.php with the experimental option * --uploads * * @return void */ public function downloadAttachments() { $pages = $this->dom->getElementsByTagName('page'); if ($this->dom->getElementsByTagName('upload')->length == 0) { $this->saveAndDisplayLog( "\n\n" . tra("No attachments were found to import. Be sure to create the XML file with the dumpDump.php script and with the option --uploads. This is the only way to import attachments.") . "\n", true ); return; } $this->saveAndDisplayLog("\n\n" . tra("Importing attachments:") . "\n"); foreach ($pages as $page) { $attachments = $page->getElementsByTagName('upload'); if ($attachments->length > 0) { $i = $attachments->length - 1; $lastVersion = $attachments->item($i); $fileName = $lastVersion->getElementsByTagName('filename')->item(0)->nodeValue; $fileUrl = $lastVersion->getElementsByTagName('src')->item(0)->nodeValue; if (file_exists($this->attachmentsDestDir . $fileName)) { $this->saveAndDisplayLog( tr( 'File %0 is not being imported because there is already a file with the same name in the destination directory (%1)', $fileName, $this->attachmentsDestDir ) . "\n", true ); continue; } if (@fopen($fileUrl, 'r')) { $attachmentContent = @file_get_contents($fileUrl); $newFile = fopen($this->attachmentsDestDir . $fileName, 'w'); fwrite($newFile, $attachmentContent); $this->saveAndDisplayLog(tr('File %0 successfully imported!', $fileName) . "\n"); } else { $this->saveAndDisplayLog(tr('Unable to download file %0. File not found.', $fileName) . "\n", true); } } } } /** * Parse an DOM representation of a Mediawiki page and return all the values * that will be imported (page name, page content for all revisions). The * property TikiImporter_Wiki::revisionsNumber define how many wiki page * revisions are parsed. * * Note: the names of the keys are changed to reflected the names used by * Tiki builtin function (i.e. 'title' is changed to 'name' as used in * TikiLib::create_page() which will be called by TikiImporter_Wiki::insertPage()) * * @param DOMElement $page * @return array $data information for one wiki page * @throws ImporterParserException if fail to parse all revisions of a page */ public function extractInfo(DOMElement $page) { $data = []; $data['revisions'] = []; $totalRevisions = $page->getElementsByTagName('revision')->length; if ($this->revisionsNumber != 0 && $totalRevisions > $this->revisionsNumber) { $j = true; } $i = 0; foreach ($page->childNodes as $node) { if ($node instanceof DOMElement) { switch ($node->tagName) { case 'id': break; case 'title': $data['name'] = (string) $node->textContent; break; case 'revision': $i++; if (! isset($j) || ($i > ($totalRevisions - $this->revisionsNumber))) { try { $data['revisions'][] = $this->extractRevision($node); } catch (ImporterParserException $e) { $this->saveAndDisplayLog( tr( 'Error while parsing revision %0 of the page "%1". There could be a problem in the page syntax or in the Text_Wiki parser used by the importer.', $i, $data['name'] ) . "\n", true ); } } break; default: print "Unknown tag : {$node->tagName}\n"; } } } $countRevisions = count($data['revisions']); if ($countRevisions > 0) { $msg = tr( 'Page "%0" successfully parsed with %1 revisions (from a total of %2 revisions).', $data['name'], $countRevisions, $totalRevisions ) . "\n"; $this->saveAndDisplayLog($msg); return $data; } else { throw new ImporterParserException(tr('Page "%0" is NOT going to be imported. It was not possible to parse any of the page revisions.', $data['name']) . "\n", true); } } /** * Parse an DOM representation of a Mediawiki page revisions and return all the values * that will be imported (page content converted to Tiki syntax, lastModif, minor, user and ip address) * * Note: the names of the keys are changed to reflected the names used by * Tiki builtin function (i.e. 'text' is changed to 'data' as used in TikiLib::create_page()) * * @param DOMElement $page * @return array $data information for one wiki page revision * @throws ImporterParserException if unable to parse revision content */ public function extractRevision(DOMElement $revision) { global $prefs; $data = []; $data['minor'] = false; $data['comment'] = ''; foreach ($revision->childNodes as $node) { if ($node instanceof DOMElement) { switch ($node->tagName) { case 'id': break; case 'comment': $data['comment'] = $node->textContent; break; case 'text': $text = $this->convertMarkup($node->textContent); if ($text instanceof PEAR_Error) { throw new ImporterParserException($text->message); } else { $data['data'] = $text; if ($prefs['feature_categories'] == 'y') { $this->extractCategories($data); } } break; case 'timestamp': $data['lastModif'] = strtotime($node->textContent); break; case 'minor': $data['minor'] = true; break; case 'contributor': $data = array_merge($data, $this->extractContributor($node)); break; } } } return $data; } /** * Extracts the categories from the page data **/ public function extractCategories(&$data) { if (preg_match_all('/(\(\(Category:(\s*[^\)]+\s*)\)\)\s*)/', $data['data'], $matches)) { foreach ($matches[1] as $match) { $data['data'] = str_replace($match, '', $data['data']); } $data['categories'] = $matches[2]; } } /** * Parse an DOM representation of a Mediawiki page revision contributor and return * the username and ip address * * @param DOMElement $contributor * @return array $data */ public function extractContributor(DOMElement $contributor) { $data = []; foreach ($contributor->childNodes as $node) { if ($node instanceof DOMElement) { switch ($node->tagName) { case 'id': break; case 'ip': $data[$node->tagName] = (string) $node->textContent; break; case 'username': $data['user'] = (string) $node->textContent; break; default: print "Unknown tag in contributor: {$node->tagName}\n"; } } } if (! isset($data['user'])) { $data['user'] = 'anonymous'; } if (! isset($data['ip'])) { $data['ip'] = '0.0.0.0'; } return $data; } /** * Utility for converting MediaWiki markup to TikiWiki markup * Uses Text_Wiki PEAR library for heavy lifting * * @param string $mediawikiText * @return string $tikiText */ public function convertMarkup($mediawikiText) { if (! empty($mediawikiText)) { $tikiText = $this->parser->transform($mediawikiText, 'Tiki'); return $tikiText; } } }