Aspose. Words: simple Word documents, aspose. wordsword
Aspose. Words: generate WORD Documents
Aspose. words. document doc = new Aspose. words. document (); Aspose. words. documentBuilder builder = new Aspose. words. documentBuilder (doc); builder. writeln ("Exam 1-Title"); builder. writeln ("Exam 1-Des "); string subject = @ "<p> 111-1 </p> "; builder. writeln (TextNoHTML (subject); string [] imgurls = GetHtmlImageUrlList (subject); string imgtargeturl = ""; foreach (string imgurl in imgurls) {imgtargeturl = Server. mapPath (imgurl); if (File. exists (imgtargeturl) {builder. insertImage (imgtargeturl, 400,300) ;}// builder. insertImage (@ "C: \ Users \ Sale \ Pictures \ 1366-768 wallpaper \ 1357640083366.jpg", 400,300); doc. save (@ "D: \ 1.docx ");
C # retrieve plain text and remove html tags
/// <Summary> /// Method for converting html text into text content NoHTML /// </summary> /// <param name = "Htmlstring"> HTML text value </param> // <returns> </returns> public string TextNoHTML (string Htmlstring) {// Delete the script Htmlstring = Regex. replace (Htmlstring, @ "<script [^>] *?>. *? </Script> "," ", RegexOptions. ignoreCase); // delete HTML Htmlstring = Regex. replace (Htmlstring, @ "<(. [^>] *)> "," ", RegexOptions. ignoreCase); Htmlstring = Regex. replace (Htmlstring, @ "([/r/n]) [/s] +", "", RegexOptions. ignoreCase); Htmlstring = Regex. replace (Htmlstring, @ "-->", "", RegexOptions. ignoreCase); Htmlstring = Regex. replace (Htmlstring, @ "<! --. * "," ", RegexOptions. ignoreCase); Htmlstring = Regex. replace (Htmlstring, @ "& (quot | #34);", "/", RegexOptions. ignoreCase); Htmlstring = Regex. replace (Htmlstring, @ "& (amp | #38);", "&", RegexOptions. ignoreCase); Htmlstring = Regex. replace (Htmlstring, @ "& (lt | #60);", "<", RegexOptions. ignoreCase); Htmlstring = Regex. replace (Htmlstring, @ "& (gt | #62);", ">", RegexOptions. ignoreCase); Htmlstring = Regex. replace (Htmlstring, @ "& (nbsp | #160);", "", RegexOptions. ignoreCase); Htmlstring = Regex. replace (Htmlstring, @ "& (iexcl | #161);", "/xa1", RegexOptions. ignoreCase); Htmlstring = Regex. replace (Htmlstring, @ "& (cent |# 162);", "/xa2", RegexOptions. ignoreCase); Htmlstring = Regex. replace (Htmlstring, @ "& (pound | #163);", "/xa3", RegexOptions. ignoreCase); Htmlstring = Regex. replace (Htmlstring, @ "& (copy | #169);", "/xa9", RegexOptions. ignoreCase); Htmlstring = Regex. replace (Htmlstring, @ "& # (/d +);", "", RegexOptions. ignoreCase); // replace <and> to mark Htmlstring. replace ("<", ""); Htmlstring. replace (">", ""); Htmlstring. replace ("/r/n", ""); // return the string that removes the html Tag. return Htmlstring ;}
C # obtain img src
/// <Summary> /// obtain the Img path /// </summary> /// <param name = "htmlText"> Html string text </param> // /<returns> returns the image path as an array </returns> public static string [] GetHtmlImageUrlList (string htmlText) {Regex regImg = new Regex (@ "] *? \ Bsrc [\ s \ t \ r \ n] * = [\ s \ t \ r \ n] * ["']? [\ S \ t \ r \ n] * (? [^ \ s \ t \ r \ n "" '<>] *) [^ <>] *? /? [\ S \ t \ r \ n] *> ", RegexOptions. ignoreCase); // create a MatchCollection object for matches to save the number of matching objects (img tag) MatchCollection matches = regImg. matches (htmlText); int I = 0; string [] sUrlList = new string [matches. count]; // traverse all img Tag objects foreach (Match match in matches) {// obtain the path src of all Img, and save it to the array sUrlList [I ++] = match. groups ["imgUrl"]. value;} return sUrlList ;}