First, Lucene requires additional packages if you want to work with Office
Here are a few ways
One is to use POI
The second is to use Jacob
The code here uses Jacob for word processing.
Code:
Package Jacob;
Import com.jacob.activeX.ActiveXComponent;
Import Com.jacob.com.Dispatch;
Import com.jacob.com.Variant;
public class Wordreader {
Public Wordreader () {
TODO auto-generated Constructor stub
}
public static void Extractdoc (String inputfile, String outputFile) {
Boolean flag = false;
Open the Word application
Activexcomponent app = new Activexcomponent ("Word.Application");
try{
Set Word not visible
App.setproperty ("Visible", new Variant (false));
Open Word File
Dispatch Doc1 = App.getproperty ("Document"). Todispatch ();
Dispatch doc2 = Dispatch.invoke (Doc1, "Open", Dispatch.method, New Object[]{inputfile, new variant (FALSE), new variant (tr UE)}, new Int[1]). Todispatch ();
Save to temp file as txt format
Dispatch.invoke (DOC2, "SaveAs", Dispatch.method, New Object[]{outputfile, New Variant (7)}, new int[1]);
Close Word
Variant F = new Variant (FALSE);
Dispatch.call (DOC2, "Close", f);
Flag = true;
}catch (Exception e) {
E.printstacktrace ();
}finally{
App.invoke ("Quit", New variant[]{});
}
if (flag = = True) {
System.out.println ("transformed successfully");
}else{
System.out.println ("Transform Failed");
}
}
public static void Main (string[] args) {
TODO auto-generated Method Stub
Wordreader.extractdoc ("E:\\hadoop.doc", "e:\\jacob.txt");
}
}
Lucene for Office Word, Excel action