java读取doc,pdf问题。_框架

PDFBox是一个开源的对pdf文件进行 *** 作的库。 PDFBox-073jar加入classpath。同时FontBox10jar加入classpath，否则报错

import javaioFileInputStream;

import javaioFileNotFoundException;

import javaioIOException;

import orgpdfboxpdfparserPDFParser;

import orgpdfboxpdmodelPDDocument;

import orgpdfboxutilPDFTextStripper;

public class PdfReader {

simply reader all the text from a pdf file

You have to deal with the format of the output text by yourself

2008-2-25

@param pdfFilePath file path

@return all text in the pdf file

public static String getTextFromPDF(String pdfFilePath)

{

String result = null;

FileInputStream is = null;

PDDocument document = null;

try {

is = new FileInputStream(pdfFilePath);

PDFParser parser = new PDFParser(is);

parserparse();

document = parsergetPDDocument();

PDFTextStripper stripper = new PDFTextStripper();

result = strippergetText(document);

} catch (FileNotFoundException e) {

// TODO Auto-generated catch block

eprintStackTrace();

} catch (IOException e) {

// TODO Auto-generated catch block

eprintStackTrace();

} finally {

if (is != null) {

try {

isclose();

} catch (IOException e) {

// TODO Auto-generated catch block

eprintStackTrace();

}

if (document != null) {

try {

documentclose();

} catch (IOException e) {

// TODO Auto-generated catch block

eprintStackTrace();

}

return result;

}

public static void main(String[] args)

{

String str=PdfReadergetTextFromPDF("C:\\Readpdf");

Systemoutprintln(str);

}

代码2：

import javaioFile;

import javaioFileOutputStream;

import javaioOutputStreamWriter;

import javaioWriter;

import javanetMalformedURLException;

import javanetURL;

import orgpdfboxpdmodelPDDocument;

import orgpdfboxutilPDFTextStripper;

public class PDFReader {

public void readFdf(String file) throws Exception {

boolean sort = false;

String pdfFile = file;

String textFile = null;

String encoding = "UTF-8";

int startPage = 1;

int endPage = IntegerMAX_VALUE;

Writer output = null;

PDDocument document = null;

try {

// 首先当作一个URL来装载文件，如果得到异常再从本地文件系统//去装载文件

URL url = new URL(pdfFile);

//注意参数已不是以前版本中的URL而是File。

document = PDDocumentload(pdfFile);

// 获取PDF的文件名

String fileName = urlgetFile();

// 以原来PDF的名称来命名新产生的txt文件

if (fileNamelength() > 4) {

File outputFile = new File(fileNamesubstring(0, fileName

length() - 4)

+ "txt");

textFile = outputFilegetName();

}

} catch (MalformedURLException e) {

// 如果作为URL装载得到异常则从文件系统装载

//注意参数已不是以前版本中的URL而是File。

document = PDDocumentload(pdfFile);

if (pdfFilelength() > 4) {

textFile = pdfFilesubstring(0, pdfFilelength() - 4)

+ "txt";

}

output = new OutputStreamWriter(new FileOutputStream(textFile),

encoding);

PDFTextStripper stripper = null;

stripper = new PDFTextStripper();

// 设置是否排序

strippersetSortByPosition(sort);

// 设置起始页

strippersetStartPage(startPage);

// 设置结束页

strippersetEndPage(endPage);

// 调用PDFTextStripper的writeText提取并输出文本

stripperwriteText(document, output);

} finally {

if (output != null) {

// 关闭输出流

outputclose();

}

if (document != null) {

// 关闭PDF Document

documentclose();

}

@param args

public static void main(String[] args) {

// TODO Auto-generated method stub

PDFReader pdfReader = new PDFReader();

try {

// 取得E盘下的SpringGuidepdf的内容

pdfReaderreadFdf("C:\\Readpdf");

} catch (Exception e) {

eprintStackTrace();

}

2、抽取支持中文的pdf文件－xpdf

xpdf是一个开源项目，我们可以调用他的本地方法来实现抽取中文pdf文件。

>import javaio;

Title: pdf extraction

Description: email:chris@matrixorgcn

Company: Matrixorgcn

@author chris

@version 10,who use this example pls remain the declare

public class PdfWin {

public PdfWin() {

}

public static void main(String args[]) throws Exception

{

String PATH_TO_XPDF="C:Program Filesxpdfpdftotextexe";

String filename="c:apdf";

String[] cmd = new String[] { PATH_TO_XPDF, "-enc", "UTF-8", "-q", filename, "-"};

Process p = RuntimegetRuntime()exec(cmd);

BufferedInputStream bis = new BufferedInputStream(pgetInputStream());

InputStreamReader reader = new InputStreamReader(bis, "UTF-8");

StringWriter out = new StringWriter();

char [] buf = new char[10000];

int len;

while((len = readerread(buf))>= 0) {

//outwrite(buf, 0, len);

Systemoutprintln("the length is"+len);

}

readerclose();

String ts=new String(buf);

Systemoutprintln("the str is"+ts);

}

引入poi的jar包，大致如下：

读取代码如下，应该能看得明白吧

import javaioFileInputStream;

import javaioFileNotFoundException;

import javaioIOException;

import javaioInputStream;

import javamathBigDecimal;

import javatextDecimalFormat;

import javatextSimpleDateFormat;

import orgapachepoixssfusermodelXSSFCell;

import orgapachepoixssfusermodelXSSFRow;

import orgapachepoixssfusermodelXSSFSheet;

import orgapachepoixssfusermodelXSSFWorkbook;

public class ExcelUtil2007 {

/读取excel文件流的指定索引的sheet

@param inputStream excel文件流

@param sheetIndex 要读取的sheet的索引

@return

@throws FileNotFoundException

@throws IOException

public static XSSFSheet readExcel(InputStream inputStream,int sheetIndex) throws FileNotFoundException, IOException

{

return readExcel(inputStream)getSheetAt(sheetIndex);

}

/读取excel文件的指定索引的sheet

@param filePath excel文件路径

@param sheetIndex 要读取的sheet的索引

@return

@throws IOException

@throws FileNotFoundException

public static XSSFSheet readExcel(String filePath,int sheetIndex) throws FileNotFoundException, IOException

{

return readExcel(filePath)getSheetAt(sheetIndex);

}

/读取excel文件的指定索引的sheet

@param filePath excel文件路径

@param sheetName 要读取的sheet的名称

@return

@throws IOException

@throws FileNotFoundException

public static XSSFSheet readExcel(String filePath,String sheetName) throws FileNotFoundException, IOException

{

return readExcel(filePath)getSheet(sheetName);

}

/读取excel文件，返回XSSFWorkbook对象

@param filePath excel文件路径

@return

@throws FileNotFoundException

@throws IOException

public static XSSFWorkbook readExcel(String filePath) throws FileNotFoundException, IOException

{

XSSFWorkbook wb=new XSSFWorkbook(new FileInputStream(filePath));

return wb;

}

/读取excel文件流，返回XSSFWorkbook对象

@param inputStream excel文件流

@return

@throws FileNotFoundException

@throws IOException

public static XSSFWorkbook readExcel(InputStream inputStream) throws FileNotFoundException, IOException

{

XSSFWorkbook wb=new XSSFWorkbook(inputStream);

return wb;

}

/读取excel中指定的单元格，并返回字符串形式的值

1数字

2字符

3公式（返回的为公式内容，非单元格的值）

4空

@param st 要读取的sheet对象

@param rowIndex 行索引

@param colIndex 列索引

@param isDate 是否要取的是日期（是则返回yyyy-MM-dd格式的字符串）

@return

public static String getCellString(XSSFSheet st,int rowIndex,int colIndex,boolean isDate){

String s="";

XSSFRow row=stgetRow(rowIndex);

if(row == null) return "";

XSSFCell cell=rowgetCell(colIndex);

if(cell == null) return "";

if (cellgetCellType() == 0) {//数字

if(isDate)s=new SimpleDateFormat("yyyy-MM-dd")format(cellgetDateCellValue());

else s = trimPointO(StringvalueOf(getStringValue(cell))trim());

}else if (cellgetCellType() == 1){//字符（excel中的空格，不是全角，也不是半角，不知道是神马,反正就是" "这个）

s=cellgetRichStringCellValue()getString()replaceAll(" ", " ")trim();

// s=cellgetStringCellValue();//07API新增，好像跟上一句一致

}

else if (cellgetCellType() == 2){//公式

s=cellgetCellFormula();

}

else if (cellgetCellType() == 3){//空

s="";

}

return s;

}

/如果数字以 0 结尾，则去掉0

@param s

@return

public static String trimPointO(String s) {

if (sendsWith("0"))

return ssubstring(0, slength() - 2);

else

return s;

}

/处理科学计数法和百分比模式的数字单元格

@param cell

@return

public static String getStringValue(XSSFCell cell) {

String sValue = null;

short dataFormat = cellgetCellStyle()getDataFormat();

double d = cellgetNumericCellValue();

BigDecimal b = new BigDecimal(DoubletoString(d));

//百分比样式的

if (dataFormat == 0xa || dataFormat == 9) {

b=bmultiply(new BigDecimal(100));

//String temp=btoPlainString();

DecimalFormat df=new DecimalFormat("000");//保留两位小数的百分比格式

sValue = dfformat(b) + "%";

}else{

sValue = btoPlainString();

}

return sValue;

}

这个不好办。

你可以现在Excel中看看下拉列表取的哪些行列的数据，然后在程序中读取这些行列的数据。

或者试试下面的代码（基于POI）：

XSSFWorkbook excel = new XSSFWorkbook(in);

XSSFSheet sheet = excelgetSheetAt(0);

XSSFRow r = thissheetgetRow(row);// row=行号

XSSFCell c= rgetCell(col));// col=列号

CellRangeAddress array = cellgetArrayFormulaRange();

Systemoutprintln(arrayformatAsString());// 看看对不对

// 调用其他array方法获取值

以上就是关于java读取doc,pdf问题。全部的内容，包括:java读取doc,pdf问题。、java怎么读取excel数据、java 读取Excel 下拉列表里面所有的选项等相关内容解答，如果想了解更多相关内容，可以关注我们，你们的支持是我们更新的动力！

欢迎分享，转载请注明来源：内存溢出

原文地址:https://54852.com/web/9863787.html

java读取doc,pdf问题。

发表评论

评论列表（0条）