欢迎您访问程序员文章站本站旨在为大家提供分享程序员计算机编程知识!
您现在的位置是: 首页

POI读取word文件,(支持HSSF和XSSF两种方式)

程序员文章站 2022-07-13 12:59:03
...

POI读取word文件,(支持HSSF和XSSF两种方式)

参考:HSSF,XSSF,SXSSF三种方式

1.引用maven(版本必须一致)

   <dependency>
      <groupId>org.apache.poi</groupId>
      <artifactId>poi</artifactId>
      <version>4.1.1</version>
    </dependency>
    <dependency>
      <groupId>org.apache.poi</groupId>
      <artifactId>poi-ooxml</artifactId>
      <version>4.1.1</version>
    </dependency>
    <dependency>
      <groupId>org.apache.poi</groupId>
      <artifactId>poi-scratchpad</artifactId>
      <version>4.1.1</version>
    </dependency>

2.读取word

public class POIUtil {

  /**
   * @Description: POI 读取  word
   * @create: 2019-07-27 9:48
   * @update logs
   */
  public static String readWord(String path) throws Exception {

//    WordExtractor extractor = new WordExtractor(is);

    String content = null;
    File file = new File(path);
    if (file.exists() && file.isFile()) {
      InputStream is = null;
      XWPFDocument xwpfDocument = null;
      POIXMLTextExtractor extractor = null;
      HWPFDocument hwpfDocument = null;
      WordExtractor wordExtractor = null;
      try {
        is = new FileInputStream(file);
        xwpfDocument = new XWPFDocument(is);
        extractor = new XWPFWordExtractor(xwpfDocument);
        // 文档文本内容
        content = extractor.getText();
//          // 文档图片内容
//          List<XWPFPictureData> pictures = docx.getAllPictures();
//          for (XWPFPictureData picture : pictures) {
//            byte[] bytev = picture.getData();
//            // 输出图片到磁盘
//            FileOutputStream out = new FileOutputStream(
//                "D:\\temp\\temp\\" + UUID.randomUUID() + picture.getFileName());
//            out.write(bytev);
//            out.close();
//          }
      } catch (FileNotFoundException e) {
      } catch (IOException e) {
      } catch (OLE2NotOfficeXmlFileException e) {//较低版本的word文件
        is = new FileInputStream(file);
        hwpfDocument = new HWPFDocument(is);
        wordExtractor = new WordExtractor(hwpfDocument);
        // 文档文本内容
        content = wordExtractor.getText();
      } finally {
        try {
          if (extractor != null) {
            extractor.close();
          }
          if (xwpfDocument != null) {
            xwpfDocument.close();
          }
          if (wordExtractor != null) {
            wordExtractor.close();
          }
          if (hwpfDocument != null) {
            hwpfDocument.close();
          }
          if (is != null) {
            is.close();
          }
        } catch (IOException e) {
        }
      }
    }
    return content;
  }

  public static void main(String[] args) {
    String path = "/Users/jj/Desktop/胜多负少的范德萨.doc";
//    String path = "/Users/jj/Desktop/测试1 2.doc";
//    String path = "/Users/jj/Desktop/测试1.docx";

    try {
      System.out.println(readWord(path));
    } catch (Exception e) {
      e.printStackTrace();
    }
  }


}
相关标签: java