You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

540 lines
20 KiB

<?php
// (c) Copyright by authors of the Tiki Wiki CMS Groupware Project
//
// All Rights Reserved. See copyright.txt for details and a complete list of authors.
// Licensed under the GNU LESSER GENERAL PUBLIC LICENSE. See license.txt for details.
// $Id$
if (! defined("_ECHOSERVER_HTML_GRAMMARPARSER")) {
define("_ECHOSERVER_HTML_GRAMMARPARSER", 1);
/**
*
*/
class HtmlGrammarParser
{
public $line;
public $column;
public $pos;
public $length;
public $data;
public $pg;
public $pgpos;
public $parpos;
public $incomment;
public $name;
public $allreadyparsed;
public $errors;
public $errpos;
public $quotstate;
public $firstprev;
public $secondprev;
public $firststate;
public $secondstate;
public $iseof;
public $mode;
public $tagname;
public $parname;
/**********************************************************************************
* Class constructor
**********************************************************************************/
public function __construct($data)
{
$this->firstprev = ["state" => 0,"word" => ""];
$this->secondprev = ["state" => 0,"word" => ""];
$this->line = 0;
$this->pos = 0;
$this->errors = [];
$this->errpos = -1;
$this->incomment = -1;
$this->allreadyparsed = 0;
$this->pg = [];
$this->pgpos = -1;
$this->quotstate = -1;
$this->iseof = false;
$this->firststate = 0;
$this->secondstate = 0;
$this->mode = 1;
if (gettype($this->data) == "array") {
$this->pg=&$data;
$this->allreadyparsed = 1;
return;
}
clearstatcache();
$this->name = $data;
if (! file_exists($this->name)) {
$this->SetError(1, "File $this->name not exists.", 0, 0, "Error");
return;
}
if (! $fp = fopen($this->name, "r")) {
$this->SetError(1, "Can't open file $this->name.", 0, 0, "Error");
return;
}
flock($fp, 1);
$this->data = fread($fp, filesize($this->name));
flock($fp, 3);
fclose($fp);
$this->length = strlen($this->data);
}
/********************************************************************************************
* Store parser's errors and warnings
********************************************************************************************/
public function SetError($e, $str, $line = 0, $column = 0, $errtype = "Warning")
{
$this->errors[++$this->errpos]["type"] = $errtype;
$this->errors[$this->errpos]["code"] = $e;
$this->err = $e;
$this->errstr = "<b>$errtype:</b> $e, $str";
if ($line) {
if (strlen($this->name)) {
$this->errstr .= "object <font color=\"red\">$this->name</font>";
}
$this->errstr .= " Line <b>$line</b>, Column <b>$column</b>";
}
$this->errors[$this->errpos]["str"] = $this->errstr . "<br>\r\n";
}
/********************************************************************************************
* Print parser's errors and warnings
********************************************************************************************/
public function PrintErrors()
{
for ($i = 0; $i <= $this->errpos; $i++) {
print $this->errors[$i]["str"];
}
}
/********************************************************************************************
* Get word from data
********************************************************************************************/
public function GetWord($word)
{
$word = "";
$found = 0;
$iter = 0;
if ($this->pos > $this->length) {
return false;
}
while (! $found) {
if ($this->pos > $this->length) {
return false;
}
if ($this->pos == $this->length) {
$this->pos++;
return $word;
}
switch ($this->data[$this->pos]) {
case "*":
if ($this->quotstate == 1) {
$word .= $this->data[$this->pos++];
$this->column++;
break;
}
$this->column++;
$this->pos++;
if ($word[0] == "/") {
$found = 1;
}
$word .= $this->data[$this->pos - 1];
break;
case "/":
if ($this->quotstate == 1) {
$word .= $this->data[$this->pos++];
$this->column++;
break;
}
$this->column++;
$this->pos++;
if ($word[0] == "*") {
$found = 1;
}
$word .= $this->data[$this->pos - 1];
break;
case " ":
case "\r":
case "\t":
if ($this->quotstate == 1) {
$word .= $this->data[$this->pos++];
$this->column++;
break;
}
$this->column++;
$this->pos++;
if (strlen($word)) {
$found = 1;
}
break;
case "\n":
if ($this->quotstate == 1) {
$word .= $this->data[$this->pos++];
$this->column++;
break;
}
$this->column = 0;
$this->line++;
$this->pos++;
if (strlen($word)) {
$found = 1;
}
break;
case ">":
case "<":
case "=":
if ($this->quotstate == 1) {
$word .= $this->data[$this->pos++];
$this->column++;
} else {
if (! strlen($word)) {
$word = $this->data[$this->pos++];
$this->column++;
}
$found = 1;
}
break;
case "\"":
if ($this->pos) {
if ($this->data[$this->pos - 1] == "\\") {
$word .= $this->data[$this->pos++];
$this->column++;
} else {
if (! strlen($word)) {
$this->quotstate *= -1;
$word = $this->data[$this->pos++];
$this->column++;
}
$found = 1;
}
} else {
$word = $this->data[$this->pos++];
$this->column++;
$found = 1;
}
break;
default:
$this->column++;
$word .= $this->data[$this->pos++];
}
}
return true;
}
/********************************************************************************************
* Parse grammar first step
********************************************************************************************
Parse
< [] [] >
in/state 0 1 2 3
< 1 -1 -1 1
[ -1 2 -1 -1
] -1 -1 1 -1
> -1 3 -1 -1
word -1 1 2 -1
EOF -1 -1 -1 -2
-2 end parse
0 begin parse, waiting '<'
1 got '<' need to parse parameters, or wait '>' or wait '['
2 got '[' or ']' need to parse parameters
3 got '>', waiting eof or '<'
********************************************************************************************/
public function ParseFirst($word)
{
if ($this->iseof) {
$this->firstprev["state"] = 0;
$this->firstprev["word"] = "";
return true;
}
$automat = [
"0" => [ 1, -1, -1, 1],
"1" => [-1, 2, -1, -1],
"2" => [-1, -1, 1, -1],
"3" => [-1, 3, -1, -1],
"4" => [-1, 1, 2, -1],
"5" => [-1, -1, -1, -2]
];
switch ($word) {
case "<":
$instate = 0;
$this->pgpos++;
$this->parpos = -1;
break;
case "[":
$this->parpos++;
$instate = 1;
break;
case "]":
$instate = 2;
break;
case ">":
$instate = 3;
break;
default:
$instate = 4;
break;
}
$this->firststate = $automat[$instate][$this->firststate];
if ($this->firststate == -1) {
return false;
}
switch ($this->firststate) {
case 1:
$this->mode = 1;
if ($this->firstprev["state"] == 1) {
if (! $this->ParseSecond($word)) {
return false;
}
}
break;
case 2:
switch ($this->firstprev["state"]) {
case 1:
$this->mode = 2;
break;
case 3:
case 2:
if ($this->firstprev["state"] == 2) {
$this->mode = 2;
} else {
$this->mode = 1;
}
break;
}
if ($this->firstprev["state"] == 2) {
if (! $this->ParseSecond($word)) {
return false;
}
}
break;
case 3:
if (isset($this->pg[$this->pgpos]["tag"]["nohavesametag"])) {
$this->pg[$this->pgpos]["tag"]["closeon"]["in"][] = $this->tagname;
}
break;
}
$this->firstprev["state"] = $this->firststate;
$this->firstprev["word"] = $word;
return true;
}
/********************************************************************************************
* Parse grammar second step
********************************************************************************************
Parse
par1="value" par2=value
in/state 0 1 2 3 4
= -1 2 -1 3 -1
" -1 -1 3 4 -1
word 1 -1 4 3 1
EOF -1 -1 -1 -1 -1
-3 end parse by '>'
-2 end parse by ']'
0 begin parse waiting parname
1 got parname, waiting '=' or new parname
2 got '=' waiting any word as value or first '"'
3 collect words to next '"'
4 got parvalue, waiting new parname
********************************************************************************************/
public function ParseSecond($word)
{
if ($this->iseof) {
return false;
}
$automat = [
"0" => [-1, 2, -1, 3, -1],
"1" => [-1, -1, 3, 4, -1],
"2" => [ 1, 1, 4, 3, 1],
"3" => [-1, -1, -1, -1, -1]
];
switch ($word) {
case "=":
$instate = 0;
break;
case "\"":
$instate = 1;
break;
default:
$instate = 2;
break;
}
$this->secondstate = $automat[$instate][$this->secondstate];
if ($this->secondstate == -1) {
return false;
}
switch ($this->secondstate) {
case 1:
$this->parname = $word;
if (! ereg("[a-zA-Z_-]+([0-9]+)?", $word)) {
$this->SetError(1, "Fatal error.", $this->line, $this->column, "Error");
return false;
}
switch ($this->mode) {
case 1:
$this->pg[$this->pgpos]["tag"][$this->parname] = "";
break;
case 2:
$this->pg[$this->pgpos]["pars"][$this->parpos][$this->parname] = "";
break;
}
break;
case 4:
switch ($this->mode) {
case 1:
if ($this->secondprev["state"] == 3) {
$this->pg[$this->pgpos]["tag"][$this->parname] = $this->secondprev["word"];
} else {
$this->pg[$this->pgpos]["tag"][$this->parname] = $word;
}
if ($this->parname == "closeon") {
$notexists = [];
$exists = [];
$this->ParseCloseOn($this->pg[$this->pgpos]["tag"][$this->parname], &$notexists, &$exists);
$this->pg[$this->pgpos]["tag"][$this->parname] = [];
$this->pg[$this->pgpos]["tag"][$this->parname]["notin"] = $notexists;
$this->pg[$this->pgpos]["tag"][$this->parname]["in"] = $exists;
} elseif ($this->parname == "tag") {
$this->tagname = $this->pg[$this->pgpos]["tag"]["tag"];
}
break;
case 2:
if ($this->secondprev["state"] == 3) {
$this->pg[$this->pgpos]["pars"][$this->parpos][$this->parname] = $this->secondprev["word"];
} else {
$this->pg[$this->pgpos]["pars"][$this->parpos][$this->parname] = $word;
}
break;
}
break;
}
$this->secondprev["state"] = $this->secondstate;
$this->secondprev["word"] = $word;
return true;
}
/********************************************************************************************
* Parse closeon structure
********************************************************************************************/
public function ParseCloseOn($str, $notexists, $exists)
{
$arr = explode("|", $str);
if (! is_array($arr)) {
if (! strlen($str)) {
return;
} else {
$arr[] = $str;
}
}
for ($i = 0; $i < sizeof($arr); $i++) {
if ($arr[$i][0] == "!") {
$notexists[] = substr($arr[$i], 1, strlen($arr[$i]) - 1);
} else {
$exists[] = $arr[$i];
}
}
}
/********************************************************************************************
* Parse grammar
********************************************************************************************/
public function Parse()
{
if ($this->allreadyparsed) {
return true;
}
$this->line = 1;
while (1) {
$isword = $this->GetWord(&$word);
if (! $isword) {
$this->iseof = true;
}
switch (strtolower($word)) {
case "/*":
$this->incomment *= -1;
break;
case "*/":
if ($this->incomment != 1) {
$this->SetError(1, "Not found begin of comment operator.", $this->line, $this->column, "Error");
return;
}
$this->incomment *= -1;
break;
default:
if ($this->incomment == 1) {
break;
}
if (! $this->ParseFirst($word)) {
$this->SetError(1, "Fatal error", $this->line, $this->column, "Error");
return false;
}
break;
}
if ($this->iseof) {
break;
}
}
if ($this->incomment == 1) {
$this->SetError(1, "Not found end of comment operator.", $this->line, $this->column, "Error");
return false;
}
$this->PrepareGrammar();
return true;
}
/********************************************************************************************
* Prepare grammar for future using
********************************************************************************************/
public function PrepareGrammar()
{
$edittagsaftertable = $this->ScanGrammar();
$l = sizeof($this->pg);
for ($i = 0; $i < $l; $i++) {
$this->pg[$this->pg[$i]["tag"]["tag"]] = $this->pg[$i]["tag"];
if (isset($this->pg[$i]["pars"])) {
$n = sizeof($this->pg[$i]["pars"]);
for ($j = 0; $j < $n; $j++) {
$this->pg[$this->pg[$i]["tag"]["tag"]]["pars"][$this->pg[$i]["pars"][$j]["par"]] = $this->pg[$i]["pars"][$j];
}
} else {
$this->pg[$this->pg[$i]["tag"]["tag"]]["pars"] = [];
}
unset($this->pg["$i"]);
}
$this->pg["EDIT_TAGS_AFTER_TABLE"] = $edittagsaftertable;
}
/********************************************************************************************
* Scan grammar for creating edittagsafter table
********************************************************************************************/
public function ScanGrammar()
{
$edittagsaftertable = [];
for ($i = 0; $i < sizeof($this->pg); $i++) {
if (isset($this->pg[$i]["tag"]["edittagsafter"])) {
if (! in_array($this->pg[$i]["tag"]["edittagsafter"], $edittagsaftertable)) {
$edittagsaftertable[] = $this->pg[$i]["tag"]["edittagsafter"];
}
}
}
return $edittagsaftertable;
}
/********************************************************************************************
* Save precompiled grammar in file
********************************************************************************************/
public function SaveGrammar($name)
{
$str = serialize($this->pg);
if (! $fp = fopen($name, "w")) {
print "<br>Error: Can't create file $name. Unable to save grammar.<br>";
}
flock($fp, 2);
fwrite($fp, $str, strlen($str));
flock($fp, 3);
fclose($fp);
}
}
}