Detailed introduction and use, please click Source sources.
- /**
- * HTML Tag Parsing package
- *
- * @category Tagparse
- * @package Tagparse
- * @author Kun
- * @copyright Kun
- * @license http://www.php.net/license/3_01.txt PHP license 3.01
- * @version 1.0
- * @link http://www.blogkun.com
- * @since 1.0
- */
- namespace Tagparse;
- /**
- * Tagdomroot
- *
- * @category Tagparse
- * @package Tagparse
- * @author Kun
- * @copyright Kun
- * @license http://www.php.net/license/3_01.txt PHP license 3.01
- * @version 1.0
- * @link http://www.blogkun.com
- * @since 1.0
- */
- Class Tagdomroot
- {
- Public $tag = ' root ';
- Public $plaintext;
- Public $child = Array ();
- Public $level = 0;
- public static $TagParseError = false;
- protected static $TagSet = Array ();
- protected static $FoundNode = Array ();
- public static $ErrorTag = Array ();
- /**
- * Initproperty
- *
- * @access Public
- *
- * @return NULL
- */
- Public Function Initproperty ()
- {
- $TagParseError = false;
- $TagSet = Array ();
- $FoundNode = Array ();
- $DumpScriptCode = Array ();
- $ErrorTag = Array ();
- }
- /**
- * __construct
- *
- * @param string $STR the tag string to be parse.
- *
- * @access Public
- *
- * @return Tagdomroot
- */
- Public function __construct ($STR)
- {
- $this->_removenoise ($STR);
- if ($str = = = null) {
- Self:: $TagParseError = true;
- } else {
- $l = Strpos ($str, ' < ');
- if ($l!== false) {
- $this->plaintext = substr ($str, 0, $l);
- }
- $res = Preg_match_all (' ~> (. *?) <~s ', $str, $matches);
- if ($res!== false && $res > 0) {
- $this->plaintext. = Implode ($matches [1]);
- }
- $r = Strrpos ($str, ' > ');
- if ($r!== false) {
- $this->plaintext. = substr ($str, $r + 1);
- }
- $tagCollect = Array ();
- $attrCollect = Array ();
- $innerContentCollect = Array ();
- if ($this->parsetag ($str, $tagCollect, $attrCollect, $innerContentCollect) = = = = False) {
- Self:: $TagParseError = true;
- }
- foreach ($tagCollect as $index = = $tag) {
- $this->child[] = new Tagdomnode ($tag, $this, $attrCollect [$index], $innerContentCollect [$index], $this->level+1 );
- }
- }
- }
- /**
- * Parsetag
- *
- * @param mixed $str Description.
- * @param mixed & $tagCollect Description.
- * @param mixed & $attrCollect Description.
- * @param mixed & $innerContentCollect Description.
- *
- * @access protected
- *
- * @return Boolean Value.
- */
- protected function Parsetag ($STR, array & $tagCollect, array & $attrCollect, array & $innerContentCollect)
- {
- $selfClosingTags = Array (' img ' = = 1, ' br ' = + 1, ' input ' = 1, ' meta ' = 1, ' link ' = 1, ' hr ' = 1, ' base ' = 1, ' embed ' = 1, ' spacer ' and 1 ';
- $end =-2;
- $close = 0;
- $error = false;
- $tag = ";
- while (true) {
- $l = Strpos ($str, ' < ', $end +strlen ($tag) +2);
- if ($l = = = False) {//parse End
- Break
- }
- if (Strpos (substr ($str, $l, 2), '/')!== false) {//surplus closing Tag,discard
- $error = true;
- $end = $l +strlen ($tag);
- Self:: $ErrorTag [] = substr ($str, $l, Strpos ($str, ' > ', $l)-$l + 1);
- Continue
- }
- $r = Strpos ($str, ' > ', $l);
- $tag = substr ($str, $l +1, $r-$l-1);
- if (!ctype_alpha ($tag [0]) | | strpos ($tag, ' < ')!== false) {
- $end = $r + 1;
- Continue
- }
- $tag = Preg_replace ("~\n+~", "', $tag);
- $space = Strpos ($tag, ');
- if ($space!== false) {
- $attrCollect [] = substr ($tag, $space + 1);
- $tag = substr ($tag, 0, $space);
- } else {
- $attrCollect [] = ";
- }
- $tagCollect [] = $tag;
- if (Isset ($selfClosingTags [$tag])) {
- $innerContentCollect [] = ";
- $end = $r-strlen ($tag)-2;
- $close = $r +1;
- Continue
- }
- $countOpen =-1;
- $open = Strpos ($str, ' < '. $tag, $close);
- $close = Strpos ($str, " , $open);
- if ($close = = = False) {//surplus opening tag
- $innerContentCollect [] = substr ($str, $r + 1);
- $error = true;
- Self:: $ErrorTag [] = ' < '. $tag. ' > ';
- Break
- }
- $start = $open;
- while ($open < $close && $open!== false) {
- $countOpen + +;
- $open = Strpos ($str, ' < '. $tag, $open +strlen ($tag));
- }
- while ($countOpen > 0 && $close!== false) {
- $open = Strpos ($str, ' < '. $tag, $close +strlen ($tag) +3);
- $close = Strpos ($str, " , $close +strlen ($tag) +3);
- if ($close = = = False) {
- Break
- }
- $countOpen--;
- while ($open < $close && $open!== false) {
- $open = Strpos ($str, ' < '. $tag, $open +strlen ($tag) +3);
- $countOpen + +;
- }
- }
- if ($close = = = False) {//label closed not paired
- $innerContentCollect [] = substr ($str, $r + 1);
- $error = true;
- Break
- }
- $end = $close;
- $r = Strpos ($str, ' > ', $start);
- $innerContentCollect [] = substr ($str, $r +1, $end-$r-1);
- }
- return! $error;
- }
- /**
- * _removenoise
- *
- * @param string & $STR The tag string to is parse.
- *
- * @access Private
- *
- * @return String
- */
- Private Function _removenoise (& $STR)
- {
- $str = Preg_replace (' ~ ~is ', ' ', $str);
- $str = Preg_replace (' ~ ~is ', ' ', $str);
- $str = preg_replace (' ~~is ', ' ', $str);
- }
- /**
- * Parseselectors
- *
- * @param string $selectors user s Select condition.
- * @param array & $selectorsTag tags
- * @param array & $selectorsAttr attributes
- *
- * @access protected
- *
- * @return NULL
- */
- protected function Parseselectors ($selectors, array & $selectorsTag, array & $selectorsAttr)
- {
- Preg_match_all (' ~ ([\w\d]+) (\[[\w\d-= ". _/]+\])? ~ ', $selectors, $matches);
- $selectorsTag = $matches [1];
- foreach ($matches [2] as $key = + $value) {
- $selectorsAttr [$key] = array ();
- if ($value!== ") {
- Preg_match_all (' ~ ([\w\d-]+] = "([\w\d-. _/]+)" ~ ', $value, $matches);
- foreach ($matches [1] as $index = + $attr) {
- $selectorsAttr [$key] [$attr] = $matches [2][$index];
- }
- }
- }
- }
- /**
- * Find
- *
- * @param mixed $selectors user ' s select condition.
- * @param array $selectorsTag tags.
- * @param array $selectorsAttr attributes.
- *
- * @access Public
- *
- * @return Array
- */
- Public function Find ($selectors, $selectorsTag = Array (), $selectorsAttr = Array ())
- {
- if ($selectors!== null) {
- $this->parseselectors ($selectors, $selectorsTag, $selectorsAttr);
- }
- Var_dump ($selectorsTag, $selectorsAttr); exit ();
- if (!empty ($selectorsTag)) {
- $this->seek ($selectorsTag, $selectorsAttr);
- foreach ($this->child as $key = + $node) {
- $node->find (null, $SELECTORSTAG, $selectorsAttr);
- }
- }
- if ($selectors!== null) {
- $res = self:: $FoundNode;
- Self:: $FoundNode = Array ();
- return $res;
- }
- }
- /**
- * Findglobal
- *
- * @param string $selectors user s Select condition.
- *
- * @access Public
- *
- * @return Array
- */
- Public Function Findglobal ($selectors)
- {
- $space = Strpos ($selectors, ' ', Strpos ($selectors, '] '));
- if ($space = = = False) {
- return $this->findoneglobal ($selectors);
- } else {
- $selectorsAttr = Array ();
- $selectorsTag = Array ();
- $this->findoneglobal (substr ($selectors, 0, $space), false);
- $this->parseselectors (substr ($selectors, $space + 1), $selectorsTag, $selectorsAttr);
- if (!empty (self:: $FoundNode) &&!empty ($selectorsTag)) {
- $nodes = self:: $FoundNode;
- Self:: $FoundNode = Array ();
- foreach ($nodes as $key = = $node) {
- $node->seek ($selectorsTag, $selectorsAttr);
- }
- }
- }
- $res = self:: $FoundNode;
- Self:: $FoundNode = Array ();
- return $res;
- }
- /**
- * Seek
- *
- * @param array $selectorsTag tags.
- * @param array $selectorsAttr attributes.
- *
- * @access protected
- *
- * @return NULL
- */
- protected function Seek ($selectorsTag, $selectorsAttr)
- {
- foreach ($this->child as $key = + $node) {
- $isFind = true;
- if ($node->tag = = = = $selectorsTag [0]) {
- foreach ($selectorsAttr [0] as $attrName = + $value) {
- if (Isset ($node->attr[$attrName])
- && (Preg_match (' ~.*? '. $value. *?~ ', $node->attr[$attrName]) > 0
- || Preg_match (' ~^ '. $value. ' $~ ', $node->attr[$attrName]) > 0
- || Preg_match (' ~^ '. $value. ' ~ ', $node->attr[$attrName]) > 0
- || Preg_match (' ~ '. $value. ' $~ ', $node->attr[$attrName]) > 0)
- ) {
- Continue
- } else {
- $isFind = false;
- Break
- }
- }
- } else {
- $isFind = false;
- }
- if ($isFind) {
- if (count ($selectorsTag) = = = 1) {
- Self:: $FoundNode [] = $node;
- } else {
- $node->seek (
- Array_slice ($selectorsTag, 1),
- Array_slice ($selectorsAttr, 1)
- );
- }
- }
- }
- }
- /**
- * Findoneglobal
- *
- * @param string $selector user s Select condition.
- * @param bool $isReturn weather return value.
- *
- * @access Public
- *
- * @return Array
- */
- Public Function Findoneglobal ($selector, $isReturn = True)
- {
- Preg_match (' ~ ([\w\d]+) (\[[\w\d-= ". _/]+\])? ~ ', $selector, $matches);
- $tag = $matches [1];
- $attr = Array ();
- if (Isset ($matches [2])) {
- Preg_match_all (' ~ ([\w\d-]+] = "([\w\d-. _/]+)" ~ ', $matches [2], $matches);
- foreach ($matches [1] as $key = + $value) {
- $attr [$value] = $matches [2][$key];
- }
- }
- if (Isset (self:: $TagSet [$tag])) {
- foreach (self:: $TagSet [$tag] as $attrValue = = $nodeArray) {
- $isFind = true;
- foreach ($attr as $attrName = = $value) {
- if (Preg_match (' ~ '. $attrName. ' =".*? '. $value. '. *? ' ~ ', $attrValue)
- || Preg_match (' ~ '. $attrName. ' = "'. $value. '. *? ' ~ ', $attrValue)
- || Preg_match (' ~ '. $attrName. ' =".*? '. $value. ' " ~ ', $attrValue)
- || Preg_match (' ~ '. $attrName. ' = "'. $value. '" ~ ', $attrValue)
- ) {
- Continue
- } else {
- $isFind = false;
- Break
- }
- }
- if ($isFind) {
- foreach ($nodeArray as $key = = $node) {
- Self:: $FoundNode [] = $node;
- }
- }
- }
- }
- if ($isReturn) {
- $res = self:: $FoundNode;
- Self:: $FoundNode = Array ();
- return $res;
- }
- }
- }
- /**
- * Tagdomnode
- *
- * @uses Tagdomroot
- *
- * @category Tagparse
- * @package Tagparse
- * @author Kun
- * @copyright Kun
- * @license http://www.php.net/license/3_01.txt PHP license 3.01
- * @version 1.0
- * @link http://www.blogkun.com
- * @since 1.0
- */
- Class Tagdomnode extends Tagdomroot
- {
- Public $attr = Array ();
- public $parent = null;
- /**
- * __construct
- *
- * @param mixed $tag tag.
- * @param mixed $parent parent node.
- * @param mixed $attr attribute.
- * @param mixed $innerContent tag content.
- * @param mixed $level node level.
- *
- * @access Public
- *
- * @return Tagdomnode
- */
- Public function __construct ($tag, $parent, $attr, $innerContent, $level)
- {
- $this->tag = $tag;
- $this->parent = $parent;
- $this->_parseattr ($attr);
- $this->level = $level;
- $l = Strpos ($innerContent, ' < ');
- if ($l!== false) {
- $this->plaintext = substr ($innerContent, 0, $l);
- }
- $res = Preg_match_all (' ~> (. *?) <~s ', $innerContent, $matches);
- if ($res!== false && $res > 0) {
- $this->plaintext. = Implode ($matches [1]);
- } else {
- $this->plaintext. = $innerContent;
- }
- $r = Strrpos ($innerContent, ' > ');
- if ($r!== false) {
- $this->plaintext. = substr ($innerContent, $r + 1);
- }
- $tagCollect = Array ();
- $attrCollect = Array ();
- $innerContentCollect = Array ();
- if ($this->parsetag ($innerContent, $tagCollect, $attrCollect, $innerContentCollect) = = = = False) {
- Self:: $TagParseError = true;
- }
- foreach ($tagCollect as $index = = $tag) {
- $this->child[] = new Tagdomnode ($tag, $this, $attrCollect [$index], $innerContentCollect [$index], $this->level+1 );
- }
- if (!isset (self:: $TagSet [$this->tag])) {
- Self:: $TagSet [$this->tag] = array ();
- }
- if (!isset (self:: $TagSet [$this->tag][$attr])) {
- Self:: $TagSet [$this->tag][$attr] = array ();
- }
- Self:: $TagSet [$this->tag][$attr] = & $this;
- }
- /**
- * _parseattr
- *
- * @param string $str attribute string.
- *
- * @access Public
- *
- * @return NULL
- */
- Private Function _parseattr ($STR)
- {
- Preg_match_all (' ~ (? [ \w-]+) = "(?. *?)" ~s ', $str, $matches);
- foreach ($matches [' attrname '] as $key = + $value) {
- $this->attr[$value] = $matches [' AttrValue '] [$key];
- }
- }
- }
Copy Code |