
Java
最近我也在学习这个话题。根据我的经验,以下是一个专业的、详细的版本:import
Java.io.*;import
Java.util.*;import org.apache.poi.poifs.filesystem.*;import org.apache.poi.util.LittleEndian;public class
wordFileReader {public
wordFileReader() {}public static boolean read
wordFile(String filePath) throws IOException, FileNotFoundException {boolean result = false;FileInputStream file = new FileInputStream(filePath);POIFSFileSystem poifs = new POIFSFileSystem(file);DirectoryEntry root = poifs.getRoot();DocumentEntry de = root.createDocument(new Path("
wordDocument"), new ByteArrayInputStream(poifs.getEntry("
wordDocument").getData()));FileOutputStream output = new FileOutputStream(filePath.substring(0, filePath.lastIndexOf("/")) + ".doc");de.copy(de, output);file.close();output.close();return result;}public static void m
AIn(String[] args) throws IOException {String filePath = "example.doc";boolean result = read
wordFile(filePath);System.out.println("File successfully read.");}public String extractText(InputStream input) throws IOException {ArrayList text = new ArrayList();POIFSFileSystem poifs = new POIFSFileSystem(input);DocumentEntry headerProps = (DocumentEntry) poifs.getRoot().getEntry(new Path("
wordDocument"));DocumentInputStream din = poifs.createDocumentInputStream(headerProps);byte[] header = din.readAllBytes();din.close();headerProps.close();int info = LittleEndian.getShort(header, 0xa);boolean useTable1 = (info & 0x200) != 0;if (useTable1) {tableName = "1Table";} else {tableName = "0Table";}DocumentEntry table = poifs.getRoot().getEntry(tableName);byte[] tableStream = din.readAllBytes();int multiple = findText(tableStream, complexOffset, text);return text.toString();}}public static int findText(byte[] tableStream, int complexOffset, ArrayList text) {// code goes here}}以上是一个专业的、详细的版本。你可以根据需要进行修改和优化。