Because it is HTML-formatted content, it is clearly inappropriate to intercept the first few characters of the content directly. And if you remove all HTML format and then intercept and can not achieve the desired effect, and then search the Internet after a pass, write the following code should be able to meet the basic requirements. (JS is written, because it is easy to debug)
Copy Code code as follows:
var br = {};
Br.sptags = ["img", "BR", "HR"];/* do not need to appear in pairs of tags * * *
Br.contain = function (arr,it) {
for (Var i=0,len=arr.length;i<len;i++) {
if (arr[i]==it) {
return true;
}
}
return false;
}
BR.SUBARTC = function (article,worldnum) {
var result = [];
* * First intercept the required string/
var wcount = 0;
var starttags = [],endtags = [];
var isintag = false;
for (Var i=0,len=article.length;i<len;i++) {
var w = article[i];
Result.push (w);
if (w== "<") {
Isintag = true;
}
if (!isintag) {
wcount++;
if (wcount==worldnum) {
Break
}
}
if (w== ">") {
Isintag = false;
}
}
/* to the string processing * *
var j=0;
Isintag = false;
var Isstarttag = true;
var tagtemp = "";
while (J<i) {
W = result[j];
if (Isintag) {
if (w== ">" | | | w== "" | | w== "/") {
Isintag = false;
if (Isstarttag) {
Starttags.push (tagtemp);
}else{
Endtags.push (tagtemp);
}
Tagtemp = "";
}
if (Isintag) {
Tagtemp+=w;
}
}
if (w== "<") {
Isintag = true;
if (result[j+1]== "/") {
Isstarttag = false;
j + +;
}else{
Isstarttag = true;
}
}
j + +;
}
* * Remove IMG,BR and so do not need to appear in pairs of Mark * *
var newstarttags = [];
for (Var x=0,len=starttags.length;x<len;x++) {
if (!br.contain (Br.sptags,starttags[x])) {
Newstarttags.push (Starttags[x]);
}
}
/* Add no closing tag * *
var unendtagscount = newstarttags.length-endtags.length;
while (unendtagscount>0) {
Result.push ("<");
Result.push ("/")
Result.push (Newstarttags[unendtagscount-1]);
Result.push (">");
unendtagscount--;
}
Return Result.join ("");
};
Basic ideas:
1. Bypassing the mark to get the actual number of words, such as the need to display the contents of the first 100 words, bypassing the tag to retrieve the 100th word of the actual index. The string before this index is then intercepted.
2. The start and end tags that exist in this string are obtained according to a obtained string. Note: The start tag mark here is open with "<" and the next character is not "/".
3. The tag that does not need to be in pairs in the start tag obtained from the 2 is eliminated. such as BR,IMG,HR and so on.
4. Compare the end tag obtained from the start tag and 2 in the 3 process, and match it in the appropriate position.
This feature has not been rigorously tested, if you are interested can help test, have a better idea can also be replies to the discussion.
Author: cnblogs bravfing