Conversion between doc pdf ppt and TXT:
The role of a component is generally to read a file into a character format, rather than simply converting the file name suffix, so you need to write the read into the TXT file.
Add office Reference
. When programming the word and ppt files in the office, make sure that the word and PPT programmable components are installed when you install the Office (which can be viewed during custom installation) or install "Microsoft Office 2003 primary InterOP assemblies"
After installation, add reference on the programming page:
Add reference-com-Microsoft PowerPoint object 11.0 libaray/word 11.0 Object Library;
You have to add the office component.
Using Microsoft. Office. InterOP. word;
Using Microsoft. Office. InterOP. PowerPoint;
Using orgdomainbox. pdmodel;
Using orgdomainbox. util;
Using Microsoft. Office. InterOP. word;
Using Microsoft. Office. InterOP. PowerPoint;
Public void cmd2txt (fileinfo file, fileinfo txtfile)
{
Pddocument Doc = pddocument. Load (file. fullname );
Extends textstripper extends stripper = new extends textstripper ();
String text = javasstripper. gettext (DOC );
Streamwriter swpdfchange = new streamwriter (txtfile. fullname, false, encoding. getencoding ("gb2312 "));
Swpdfchange. Write (text );
Swpdfchange. Close ();
}
For tables in the DOC file, the read result is that the gridlines are removed and the content is read by row.
Public void word2text (fileinfo file, fileinfo txtfile)
{
Object readonly = true;
Object missing = system. reflection. Missing. value;
Object filename = file. fullname;
Microsoft. Office. InterOP. Word. applicationclass wordapp = new Microsoft. Office. InterOP. Word. applicationclass ();
Document Doc = wordapp. Documents. Open (ref filename,
Ref missing, ref readonly, ref missing,
Ref missing, ref missing,
Ref missing, ref missing, ref missing );
String text = Doc. content. text;
Doc. Close (ref missing, ref missing, ref missing );
Wordapp. Quit (ref missing, ref missing, ref missing );
Streamwriter swwordchange = new streamwriter (txtfile. fullname, false, encoding. getencoding ("gb2312 "));
Swwordchange. Write (text );
Swwordchange. Close ();
}
Public void ppt2txt (fileinfo file, fileinfo txtfile)
{
Microsoft. Office. InterOP. PowerPoint. Application Pa = new Microsoft. Office. InterOP. PowerPoint. applicationclass ();
Microsoft. Office. InterOP. PowerPoint. Presentation pp = pa. Presentations. Open (file. fullname,
Microsoft. Office. Core. msotristate. msotrue,
Microsoft. Office. Core. msotristate. msofalse,
Microsoft. Office. Core. msotristate. msofalse );
String PPS = "";
Streamwriter swpptchange = new streamwriter (txtfile. fullname, false, encoding. getencoding ("gb2312 "));
Foreach (Microsoft. Office. InterOP. PowerPoint. slide in pp. slides)
{
Foreach (Microsoft. Office. InterOP. PowerPoint. Shape shape in slide. SHAPES)
PPS + = shape. textframe. textrange. Text. tostring ();
}
Swpptchange. Write (PPS );
Swpptchange. Close ();
}
Read different types of files
Public streamreader text2reader (fileinfo file)
{
Streamreader ST = NULL;
Switch (file. extension. tolower ())
{
Case ". txt ":
St = new streamreader (file. fullname, encoding. getencoding ("gb2312 "));
Break;
Case ". Doc ":
Fileinfo wordfile = new fileinfo (@ "E: \ My Programs \ 200807program \ filesearch \ app_data \ word2txt.txt"); // you cannot use the relative path. Find a solution to improve it.
Word2text (file, wordfile );
St = new streamreader (wordfile. fullname, encoding. getencoding ("gb2312 "));
Break;
Case ". pdf ":
Fileinfo pdffile = new fileinfo (@ "E: \ My Programs \ 200807program \ filesearch \ app_data \ 4242txt.txt ");
20172txt (file, pdffile );
St = new streamreader (pdffile. fullname, encoding. getencoding ("gb2312 "));
Break;
Case ". ppt ":
Fileinfo pptfile = new fileinfo (@ "E: \ My Programs \ 200807program \ filesearch \ app_data \ ppt2txt.txt ");
Ppt2txt (file, pptfile );
St = new streamreader (pptfile. fullname, encoding. getencoding ("gb2312 "));
Break;
}
Return st;
}