Exception reading XLSB File Apache POI java.io.CharConversionException - java

Im developing a Java aplication that reads an excel xlsb file using Apache POI, but I got an exception while reading it, my code is as follows:
import java.io.IOException;
import java.io.InputStream;
import org.apache.poi.xssf.eventusermodel.XSSFReader;
import org.apache.poi.xssf.model.SharedStringsTable;
import org.apache.poi.xssf.usermodel.XSSFRichTextString;
import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
import org.apache.poi.openxml4j.exceptions.OpenXML4JException;
import org.apache.poi.openxml4j.opc.Package;
import org.xml.sax.Attributes;
import org.xml.sax.ContentHandler;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.XMLReader;
import org.xml.sax.helpers.DefaultHandler;
import org.xml.sax.helpers.XMLReaderFactory;
import java.util.Iterator;
public class Prueba {
public static void main (String [] args){
String direccion = "C:/Documents and Settings/RSalasL/My Documents/New Folder/masstigeoct12.xlsb";
Package pkg;
try {
pkg = Package.open(direccion);
XSSFReader r = new XSSFReader(pkg);
SharedStringsTable sst = r.getSharedStringsTable();
XMLReader parser = fetchSheetParser(sst);
Iterator<InputStream> sheets = r.getSheetsData();
while(sheets.hasNext()) {
System.out.println("Processing new sheet:\n");
InputStream sheet = sheets.next();
InputSource sheetSource = new InputSource(sheet);
parser.parse(sheetSource);
sheet.close();
System.out.println("");
}
} catch (InvalidFormatException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (OpenXML4JException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (SAXException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
public void processAllSheets(String filename) throws Exception {
Package pkg = Package.open(filename);
XSSFReader r = new XSSFReader( pkg );
SharedStringsTable sst = r.getSharedStringsTable();
XMLReader parser = fetchSheetParser(sst);
Iterator<InputStream> sheets = r.getSheetsData();
while(sheets.hasNext()) {
System.out.println("Processing new sheet:\n");
InputStream sheet = sheets.next();
InputSource sheetSource = new InputSource(sheet);
parser.parse(sheetSource);
sheet.close();
System.out.println("");
}
}
public static XMLReader fetchSheetParser(SharedStringsTable sst) throws SAXException {
XMLReader parser =
XMLReaderFactory.createXMLReader(
"org.apache.xerces.parsers.SAXParser"
);
ContentHandler handler = new SheetHandler(sst);
parser.setContentHandler(handler);
return parser;
}
private static class SheetHandler extends DefaultHandler {
private SharedStringsTable sst;
private String lastContents;
private boolean nextIsString;
private SheetHandler(SharedStringsTable sst) {
this.sst = sst;
}
public void startElement(String uri, String localName, String name,
Attributes attributes) throws SAXException {
// c => cell
if(name.equals("c")) {
// Print the cell reference
System.out.print(attributes.getValue("r") + " - ");
// Figure out if the value is an index in the SST
String cellType = attributes.getValue("t");
if(cellType != null && cellType.equals("s")) {
nextIsString = true;
} else {
nextIsString = false;
}
}
// Clear contents cache
lastContents = "";
}
public void endElement(String uri, String localName, String name)
throws SAXException {
// Process the last contents as required.
// Do now, as characters() may be called more than once
if(nextIsString) {
int idx = Integer.parseInt(lastContents);
lastContents = new XSSFRichTextString(sst.getEntryAt(idx)).toString();
nextIsString = false;
}
// v => contents of a cell
// Output after we've seen the string contents
if(name.equals("v")) {
System.out.println(lastContents);
}
}
public void characters(char[] ch, int start, int length)
throws SAXException {
lastContents += new String(ch, start, length);
}
}
}
And the exception is this:
java.io.CharConversionException: Characters larger than 4 bytes are not supported: byte 0x83 implies a length of more than 4 bytes
at org.apache.xmlbeans.impl.piccolo.xml.UTF8XMLDecoder.decode(UTF8XMLDecoder.java:162)
at org.apache.xmlbeans.impl.piccolo.xml.XMLStreamReader$FastStreamDecoder.read(XMLStreamReader.java:762)
at org.apache.xmlbeans.impl.piccolo.xml.XMLStreamReader.read(XMLStreamReader.java:162)
at org.apache.xmlbeans.impl.piccolo.xml.PiccoloLexer.yy_refill(PiccoloLexer.java:3474)
at org.apache.xmlbeans.impl.piccolo.xml.PiccoloLexer.yylex(PiccoloLexer.java:3958)
at org.apache.xmlbeans.impl.piccolo.xml.Piccolo.yylex(Piccolo.java:1290)
at org.apache.xmlbeans.impl.piccolo.xml.Piccolo.yyparse(Piccolo.java:1400)
at org.apache.xmlbeans.impl.piccolo.xml.Piccolo.parse(Piccolo.java:714)
at org.apache.xmlbeans.impl.store.Locale$SaxLoader.load(Locale.java:3439)
at org.apache.xmlbeans.impl.store.Locale.parseToXmlObject(Locale.java:1270)
at org.apache.xmlbeans.impl.store.Locale.parseToXmlObject(Locale.java:1257)
at org.apache.xmlbeans.impl.schema.SchemaTypeLoaderBase.parse(SchemaTypeLoaderBase.java:345)
at org.openxmlformats.schemas.spreadsheetml.x2006.main.WorkbookDocument$Factory.parse(Unknown Source)
at org.apache.poi.xssf.eventusermodel.XSSFReader$SheetIterator.<init>(XSSFReader.java:207)
at org.apache.poi.xssf.eventusermodel.XSSFReader$SheetIterator.<init>(XSSFReader.java:166)
at org.apache.poi.xssf.eventusermodel.XSSFReader.getSheetsData(XSSFReader.java:160)
at EDManager.Prueba.main(Prueba.java:36)
The file has 2 sheets, one with 329 rows and 3 columns and the other with 566 rows and 3 columns, I just want to read the file to find if a value is in the second sheet.

Apache POI doesn't support the .xlsb file format for anything other than text extraction. Apache POI will happily provide full read or write support .xls files (via HSSF) and .xlsx files (via XSSF), or both (via the common SS UserModel interface).
However, the .xlsb format is not supported for generatl operations - it's a very odd hybrid between the two, and the large amount of work involved has meant no-one has been willing to volunteer/sponsor the work required.
What Apache POI does offer for .xlsb, as of Apache POI 3.15 beta3 / 3.16, is a text extractor for .xlsb files - XSSFBEventBasedExcelExtractor. You can use that to get the text out of your file, or with a few tweaks convert it to something like CSV
For full read/write support, you'll need to convert your file to either .xls (if it doesn't have very large numbers of rows/columns), or .xlsx (if it does). If you're really really keen to help though, you could review the source code for XSSFBEventBasedExcelExtractor, then have a go at contributing patches to add full support to POI for it!
(Additionally, I think from the exception that your particular .xlsb file is partly corrupt, but even if it wasn't it still wouldn't be supported by Apache POI for anything other than text extraction, sorry)

I have tried reading XLSB file using Apache POI and it is successful. Below is the code snippet I have used.
import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
import org.apache.poi.openxml4j.exceptions.OpenXML4JException;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.ss.usermodel.DataFormatter;
import org.apache.poi.xssf.binary.XSSFBSharedStringsTable;
import org.apache.poi.xssf.binary.XSSFBSheetHandler;
import org.apache.poi.xssf.binary.XSSFBStylesTable;
import org.apache.poi.xssf.eventusermodel.XSSFBReader;
import org.xml.sax.SAXException;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.List;
public class ApachePoiXLSB {
public static void main (String [] args){
String xlsbFileName = "test.xlsb";
OPCPackage pkg;
try {
pkg = OPCPackage.open(xlsbFileName);
XSSFBReader r = new XSSFBReader(pkg);
XSSFBSharedStringsTable sst = new XSSFBSharedStringsTable(pkg);
XSSFBStylesTable xssfbStylesTable = r.getXSSFBStylesTable();
XSSFBReader.SheetIterator it = (XSSFBReader.SheetIterator) r.getSheetsData();
List<String> sheetTexts = new ArrayList<>();
while (it.hasNext()) {
InputStream is = it.next();
String name = it.getSheetName();
TestSheetHandler testSheetHandler = new TestSheetHandler();
testSheetHandler.startSheet(name);
XSSFBSheetHandler sheetHandler = new XSSFBSheetHandler(is,
xssfbStylesTable,
it.getXSSFBSheetComments(),
sst, testSheetHandler,
new DataFormatter(),
false);
sheetHandler.parse();
testSheetHandler.endSheet();
sheetTexts.add(testSheetHandler.toString());
}
System.out.println("output text:"+sheetTexts);
} catch (InvalidFormatException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (OpenXML4JException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (SAXException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}
import org.apache.poi.xssf.eventusermodel.XSSFSheetXMLHandler;
import org.apache.poi.xssf.usermodel.XSSFComment;
class TestSheetHandler implements XSSFSheetXMLHandler.SheetContentsHandler {
private final StringBuilder sb = new StringBuilder();
public void startSheet(String sheetName) {
sb.append("<sheet name=\"").append(sheetName).append(">");
}
public void endSheet() {
sb.append("</sheet>");
}
#Override
public void startRow(int rowNum) {
sb.append("\n<tr num=\"").append(rowNum).append(">");
}
#Override
public void endRow(int rowNum) {
sb.append("\n</tr num=\"").append(rowNum).append(">");
}
#Override
public void cell(String cellReference, String formattedValue, XSSFComment comment) {
formattedValue = (formattedValue == null) ? "" : formattedValue;
if (comment == null) {
sb.append("\n\t<td ref=\"").append(cellReference).append("\">").append(formattedValue).append("</td>");
} else {
sb.append("\n\t<td ref=\"").append(cellReference).append("\">")
.append(formattedValue)
.append("<span type=\"comment\" author=\"")
.append(comment.getAuthor()).append("\">")
.append(comment.getString().toString().trim()).append("</span>")
.append("</td>");
}
}
#Override
public void headerFooter(String text, boolean isHeader, String tagName) {
if (isHeader) {
sb.append("<header tagName=\"").append(tagName).append("\">").append(text).append("</header>");
} else {
sb.append("<footer tagName=\"").append(tagName).append("\">").append(text).append("</footer>");
}
}
#Override
public String toString() {
return sb.toString();
}
}

I have a implementation using the smartxls, and my code firts convert the xlsb to xlsx and after can use ApachePoi. The next method receive a java.io.File and verify if its extension is xlsb and convert this to xlsx and replace file whit the new. This works for me.
private void processXLSBFile(File file) {
WorkBook workBook = new WorkBook();
String filePath = file.getAbsolutePath();
if (FilenameUtils.getExtension(filePath).equalsIgnoreCase((Static.XLSB_EXT))) {
try {
workBook.readXLSB(new java.io.FileInputStream(filePath));
filePath = filePath.replaceAll("(?i)".concat(Static.XLSB),
Static.XLSX_EXT.toLowerCase());
workBook.writeXLSX(new java.io.FileOutputStream(filePath));
final File xlsb = new File(filePath);
file = xlsb;
} catch (Exception e) {
logger.error(e.getMessage(), e);
MensajesJSFUtil
.mostrarMensajeNegocio(new GTMException(e, ClaveMensaje.COMANDAS_ADJUNTAR_XLSBFILE_READERROR));
}
}
}

Related

How I can create TOC using apache poi

I have the code which create table of content like this:
image screenshot ,
but i want resualt like this:
image screenshot ,
and this is my code
package com.example;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import org.apache.commons.compress.archivers.dump.InvalidFormatException;
import org.apache.poi.openxml4j.exceptions.OpenXML4JException;
import org.apache.poi.xwpf.usermodel.XWPFDocument;
import org.apache.poi.xwpf.usermodel.XWPFParagraph;
import org.apache.poi.xwpf.usermodel.XWPFRun;
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTSimpleField;
These are libraries which I'm using for this project
and I'm also using the maven package manager
public class toc {
public static void main(String[] args) throws IOException, OpenXML4JException {
XWPFDocument docTemplate = null;
try {
File file = new File(
"outfile02.docx"); // "C:\\Reports\\Template.docx";
FileInputStream fis = new FileInputStream(file);
docTemplate = new XWPFDocument(fis);
generateTOC(docTemplate);
saveDocument(docTemplate);
} catch (FileNotFoundException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
} finally {
if (docTemplate != null) {
docTemplate.close();
}
}
}
save function
private static void saveDocument(XWPFDocument docTemplate) throws FileNotFoundException, IOException {
FileOutputStream outputFile = null;
try {
outputFile = new FileOutputStream("outfile03.docx");
docTemplate.write(outputFile);
} finally {
if (outputFile != null) {
outputFile.close();
}
}
}
TOC generator function
public static void generateTOC(XWPFDocument document)
throws InvalidFormatException, FileNotFoundException, IOException {
String findText = "${TOC}";
String replaceText = "";
for (XWPFParagraph p : document.getParagraphs()) {
for (XWPFRun r : p.getRuns()) {
int pos = r.getTextPosition();
String text = r.getText(pos);
if (text != null && text.contains(findText)) {
text = text.replace(findText, replaceText);
r.setText(text, 0);
addField(p, "TOC \\o \"1-3\" \\h \\z \\u");
break;
}
}
}
}
and this is last function
private static void addField(XWPFParagraph paragraph, String fieldName) {
CTSimpleField ctSimpleField = paragraph.getCTP().addNewFldSimple();
// ctSimpleField.setInstr(fieldName + " \\h ");
ctSimpleField.setInstr(fieldName);
ctSimpleField.addNewR().addNewT().setStringValue("outfile03.docx");
}
}

Java FileInputStream out of memory issue

I am trying to read an excel file and then write to csv file using xssf .I am getting out of memory error(Heap space). i see that fileinputstream is good for memory management ,but still i see the issue
package xlsxtocsv;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.util.Locale;
import org.apache.poi.ss.usermodel.DataFormatter;
import org.apache.poi.xssf.usermodel.XSSFRow;
import org.apache.poi.xssf.usermodel.XSSFSheet;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
public class xlsxtocsv
{
private static final String NEW_LINE_CHARACTER="\r\n";
/**
* Write the string into a text file
* #param csvFileName
* #param csvData
* #throws Exception
*/
private static void writeCSV(String csvFileName,String csvData) throws Exception{
FileOutputStream writer = new FileOutputStream(csvFileName);
writer.write(csvData.getBytes());
writer.close();
System.out.println("Sucessfully written data to "+csvFileName);
}
public static void excelXToCSVfile(String excelFileName,String csvFileName,String Field_Delimiter,int Sheet_Number) {
checkValidFile(excelFileName);
XSSFWorkbook myWorkBook;
try {
myWorkBook = new XSSFWorkbook(new FileInputStream(excelFileName));
XSSFSheet mySheet = myWorkBook.getSheetAt(Sheet_Number);
String csvData="";
DataFormatter formatter = new DataFormatter(Locale.US);
checkValidFile(excelFileName);
int rows = mySheet.getPhysicalNumberOfRows();
String prefix="\"";
for (int eachRow = 0;eachRow<rows;eachRow++) {
XSSFRow myRow = (XSSFRow) mySheet.getRow(eachRow);
for ( int i=0;i<myRow.getLastCellNum();i++){
if(i==0)
{
csvData += prefix+formatter.formatCellValue(myRow.getCell(i))+prefix;
}
else
{
csvData += Field_Delimiter+prefix+formatter.formatCellValue(myRow.getCell(i))+prefix;
}
}
csvData+=NEW_LINE_CHARACTER;
}
try {
writeCSV(csvFileName, csvData);
} catch (Exception e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
} catch (FileNotFoundException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
/**
* get Cell value from XLSX file column
* #param myCell
* #return
* #throws Exception
*/
private static void checkValidFile(String fileName){
boolean valid=true;
try{
File f = new File(fileName);
if ( !f.exists() || f.isDirectory() ){
valid=false;
}
}catch(Exception e){
valid=false;
}
if ( !valid){
System.out.println("File doesn't exist: " + fileName);
System.exit(0);
}
}
public static void main(String[] args) throws Exception
{
String inp_file_name="";
String Output_file_name="";
String delimiter=",";
//inp_file_name=args[0];
//Output_file_name=args[1];
enter code here
//delimiter=args[2];
inp_file_name="C:/Users/xxx/Desktop/cloudera_shared/test_data.xlsx";
Output_file_name="C:/Users/xxx/Desktop/cloudera_shared/test_data.csv";
delimiter="|";
if(args.length==4 && (args[3].equals("") == false))
{
int Sheet_Number=Integer.parseInt(args[3]);
excelXToCSVfile(inp_file_name,Output_file_name,delimiter,Sheet_Number);
}
else
{
excelXToCSVfile(inp_file_name,Output_file_name,delimiter,0);
}
}
}
You can set the max size of the heap memory available to your Java process like this (here I increase it to 1024 MB).
java -Xmx1024m -jar myProgram.jar
If you run java -X you can see the different options available.
Try running your program in a profiler to get a better idea of which parts are memory intensive.
I can suggest you the problems beyond your code:
String csvData should be replaced with StringBuffer csvData.
You can declare FileOutputStream(nameFile, true) (set append is true)
You can use the multithread to execute 2 tasks:
First: read content from your file excel.
Two: write that content which is has just read.

How can I go back to Main method in my code, And depending on Condition?

In this program I am Reading .xlsx file. And adding cell data to vector, if vector size is less-than 12 no need to read remaining data, and i need to go main method.
How can I do in my program ?
This is my Code :
package com.read;
import java.text.SimpleDateFormat;
import java.util.Calendar;
import java.util.Vector;
import org.apache.poi.openxml4j.opc.OPCPackage;
import java.io.InputStream;
import org.apache.poi.xssf.eventusermodel.XSSFReader;
import org.apache.poi.xssf.model.SharedStringsTable;
import org.apache.poi.xssf.usermodel.XSSFRichTextString;
import org.xml.sax.Attributes;
import org.xml.sax.ContentHandler;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.XMLReader;
import org.xml.sax.helpers.DefaultHandler;
import org.xml.sax.helpers.XMLReaderFactory;
public class SendDataToDb {
public static void main(String[] args) {
SendDataToDb sd = new SendDataToDb();
try {
sd.processOneSheet("C:/Users/User/Desktop/New folder/Untitled 2.xlsx");
System.out.println("in Main method");
} catch (Exception e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
public void processOneSheet(String filename) throws Exception {
System.out.println("executing Process Method");
OPCPackage pkg = OPCPackage.open(filename);
XSSFReader r = new XSSFReader( pkg );
SharedStringsTable sst = r.getSharedStringsTable();
System.out.println("count "+sst.getCount());
XMLReader parser = fetchSheetParser(sst);
// To look up the Sheet Name / Sheet Order / rID,
// you need to process the core Workbook stream.
// Normally it's of the form rId# or rSheet#
InputStream sheet2 = r.getSheet("rId2");
System.out.println("Sheet2");
InputSource sheetSource = new InputSource(sheet2);
parser.parse(sheetSource);
sheet2.close();
}
public XMLReader fetchSheetParser(SharedStringsTable sst) throws SAXException {
//System.out.println("EXECUTING fetchSheetParser METHOD");
XMLReader parser = XMLReaderFactory.createXMLReader("org.apache.xerces.parsers.SAXParser");
ContentHandler handler = new SheetHandler(sst);
parser.setContentHandler(handler);
System.out.println("Method :fetchSheetParser");
return parser;
}
/**
* See org.xml.sax.helpers.DefaultHandler javadocs
*/
private class SheetHandler extends DefaultHandler {
private SharedStringsTable sst;
private String lastContents;
private boolean nextIsString;
Vector values = new Vector(20);
private SheetHandler(SharedStringsTable sst) {
this.sst = sst;
}
public void startElement(String uri, String localName, String name,
Attributes attributes) throws SAXException {
// c => cell
//long l = Long.valueOf(attributes.getValue("r"));
if(name.equals("c")){
columnNum++;
}
if(name.equals("c")) {
// Print the cell reference
// Figure out if the value is an index in the SST
String cellType = attributes.getValue("t");
if(cellType != null && cellType.equals("s")) {
nextIsString = true;
} else {
nextIsString = false;
}
}
// Clear contents cache
lastContents = "";
}
public void endElement(String uri, String localName, String name)
throws SAXException {
//System.out.println("Method :222222222");
// Process the last contents as required.
// Do now, as characters() may be called more than once
if(nextIsString) {
int idx = Integer.parseInt(lastContents);
lastContents = new XSSFRichTextString(sst.getEntryAt(idx)).toString();
nextIsString = false;
}
// v => contents of a cell
// Output after we've seen the string contents
if(name.equals("v")) {
values.add(lastContents);
}
if(name.equals("row")) {
System.out.println(values);
//values.setSize(50);
System.out.println(values.size()+" "+values.capacity());
//********************************************************
//I AM CHECKING CONDITION HERE, IF CONDITION IS TRUE I NEED STOP THE REMAINING PROCESS AND GO TO MAIN METHOD.
if(values.size() < 12)
values.removeAllElements();
//WHAT CODE I NEED TO WRITE HERE TO STOP THE EXECUTION OF REMAINING PROCESS AND GO TO MAIN
//***************************************************************
}
}
public void characters(char[] ch, int start, int length)
throws SAXException {
//System.out.println("method : 333333333333");
lastContents += new String(ch, start, length);
}
}
}
check the code in between lines of //******************************
and
//******************************************
You can throw a SAXException wherever you want the parsing to stop:
throw new SAXException("<Your message>")
and handle it in the main method.
After your checking, you should throw the Exception to get out from there and get it back to the main method.
throw new Exception("vector size has to be less than 12");

how to use At4J or 7-Zip-JBinding to get an InputStream of a file?

I looked into at4j and 7-Zip-JBinding (their javadoc and documentation) but they doesn't seem to be able to read without extracting (and get an InputStream from archived file)
Is there any method I'm missing or haven't found ?
a solution other than extracting to a temporary folder to read it
I'm expecting an answer in how to do it in at4j or 7-Zip-JBinding
in other words I want to know how to utilize below mentioned function in at4j or 7-Zip-JBinding
I know java's built in one has getInputStream I'm currently using it this way
import java.util.zip.ZipEntry;
import java.util.zip.ZipFile;
import java.io.ByteArrayInputStream;
import java.io.InputStream;
/**
* get input stream of current file
* #param path path inside zip
* #return InputStream
*/
public InputStream getInputStream(String path){
try {
ZipEntry entry = zipFile.getEntry(path);
if(entry!=null){
return zipFile.getInputStream(entry);
}
return new ByteArrayInputStream("Not Found".getBytes());
} catch (Exception ex) {
//handle exception
}
return null;
}
(^^ zipFile is a ZipFile object)
found the solution using 7-Zip-JBinding
just need to use ByteArrayInputStream ,this so far worked for a small file
pass a archive as argument to get all files inside printed
file ExtractItemsSimple.java
import java.io.IOException;
import java.io.RandomAccessFile;
import net.sf.sevenzipjbinding.ISevenZipInArchive;
import net.sf.sevenzipjbinding.SevenZip;
import net.sf.sevenzipjbinding.SevenZipException;
import net.sf.sevenzipjbinding.impl.RandomAccessFileInStream;
import net.sf.sevenzipjbinding.simple.ISimpleInArchive;
import net.sf.sevenzipjbinding.simple.ISimpleInArchiveItem;
public class ExtractItemsSimple {
public static void main(String[] args) {
RandomAccessFile randomAccessFile = null;
ISevenZipInArchive inArchive = null;
try {
randomAccessFile = new RandomAccessFile(args[0], "r");
inArchive = SevenZip.openInArchive(null, // autodetect archive type
new RandomAccessFileInStream(randomAccessFile));
ISimpleInArchive simpleInArchive = inArchive.getSimpleInterface();
for (ISimpleInArchiveItem item : simpleInArchive.getArchiveItems()) {
final int[] hash = new int[] { 0 };
if (!item.isFolder()) {
System.out.println(ArchieveInputStreamHandler.slurp(new ArchieveInputStreamHandler(item).getInputStream(),1000));
}
}
} catch (Exception e) {
System.err.println("Error occurs: " + e);
System.exit(1);
} finally {
if (inArchive != null) {
try {
inArchive.close();
} catch (SevenZipException e) {
System.err.println("Error closing archive: " + e);
}
}
if (randomAccessFile != null) {
try {
randomAccessFile.close();
} catch (IOException e) {
System.err.println("Error closing file: " + e);
}
}
}
}
}
file ArchieveInputStreamHandler.java
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import java.io.UnsupportedEncodingException;
import net.sf.sevenzipjbinding.ISequentialOutStream;
import net.sf.sevenzipjbinding.SevenZipException;
import net.sf.sevenzipjbinding.simple.ISimpleInArchiveItem;
public class ArchieveInputStreamHandler {
private ISimpleInArchiveItem item;
private ByteArrayInputStream arrayInputStream;
public ArchieveInputStreamHandler(ISimpleInArchiveItem item) {
this.item = item;
}
public InputStream getInputStream() throws SevenZipException{
item.extractSlow(new ISequentialOutStream() {
#Override
public int write(byte[] data) throws SevenZipException {
arrayInputStream = new ByteArrayInputStream(data);
return data.length; // Return amount of consumed data
}
});
return arrayInputStream;
}
//got from http://stackoverflow.com/questions/309424/read-convert-an-inputstream-to-a-string
public static String slurp(final InputStream is, final int bufferSize){
final char[] buffer = new char[bufferSize];
final StringBuilder out = new StringBuilder();
try {
final Reader in = new InputStreamReader(is, "UTF-8");
try {
for (;;) {
int rsz = in.read(buffer, 0, buffer.length);
if (rsz < 0)
break;
out.append(buffer, 0, rsz);
}
}
finally {
in.close();
}
}
catch (UnsupportedEncodingException ex) {
/* ... */
}
catch (IOException ex) {
/* ... */
}
return out.toString();
}
}
Are you looking for http://docs.oracle.com/javase/6/docs/api/java/util/zip/ZipInputStream.html which can extract entries in zip file without extracting it completely.

Parsing XML usin SAX in java

I have this code to parse XML data ..
But when startelement and endelement functions are called they don't get the parameters values ( as name parameter don't have any data ) when tying to print the parameters values. it dont't have any value, why ?
I call updateArticle function in the following code
import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.ArrayList;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;
import org.xml.sax.Attributes;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.XMLReader;
import org.xml.sax.helpers.DefaultHandler;
public class RSSHandler extends DefaultHandler {
// Used to define what elements we are currently in
private boolean inItem = false;
private boolean inTitle = false;
private boolean inLink = false;
// Feed and Article objects to use for temporary storage
private Article currentArticle = new Article();
private Feed currentFeed = new Feed();
// Number of articles added so far
private int articlesAdded = 0;
private ArrayList<Article> articles = new ArrayList<Article>();
private ArrayList<Feed> feeds = new ArrayList<Feed>();
// Number of articles to download
private static final int ARTICLES_LIMIT = 15;
// The possible values for targetFlag
private static final int TARGET_FEED = 0;
private static final int TARGET_ARTICLES = 1;
// A flag to know if looking for Articles or Feed name
private int targetFlag;
public RSSHandler(){ }
public void startElement(String uri, String name, String qName,Attributes atts) {
if (name.trim().equals("title"))
inTitle = true;
else if (name.trim().equals("item"))
inItem = true;
else if (name.trim().equals("link"))
inLink = true;
System.out.println(name.trim());
}
public void endElement(String uri, String name, String qName)throws SAXException {
if (name.trim().equals("title"))
inTitle = false;
else if (name.trim().equals("item"))
inItem = false;
else if (name.trim().equals("link"))
inLink = false;
// Check if looking for feed, and if feed is complete
if (targetFlag == TARGET_FEED && currentFeed.url != null && currentFeed.title != null) {
// We know everything we need to know, so insert feed and exit
System.out.println("add current feed");
feeds.add(currentFeed);
// throw new SAXException();
}
// Check if looking for article, and if article is complete
if (targetFlag == TARGET_ARTICLES && currentArticle.url != null && currentArticle.title != null) {
Article article = new Article();
article.feedId = currentFeed.id;
article.title = currentArticle.title;
article.url = currentArticle.url;
System.out.print(article.title);
articles.add(article);
//store articles in database
currentArticle.title = null;
currentArticle.url = null;
// Lets check if we've hit our limit on number of articles
articlesAdded++;
if (articlesAdded >= ARTICLES_LIMIT)
throw new SAXException();
}
}
public ArrayList<Article> getArticles(){
return this.articles;
}
public ArrayList<Feed> getFeeds(){
return this.feeds;
}
public void characters(char ch[], int start, int length) {
String chars = (new String(ch).substring(start, start + length));
System.out.println(chars);
try {
// If not in item, then title/link refers to feed
if (!inItem) {
if (inTitle)
currentFeed.title = chars;
} else {
if (inLink)
currentArticle.url = new URL(chars);
if (inTitle)
currentArticle.title = chars;
}
} catch (MalformedURLException e) {
}
}
public void createFeed(URL url) {
try {
targetFlag = TARGET_FEED;
currentFeed.url = url;
SAXParserFactory spf = SAXParserFactory.newInstance();
SAXParser sp = spf.newSAXParser();
XMLReader xr = sp.getXMLReader();
xr.setContentHandler(this);
xr.parse(new InputSource(url.openStream()));
} catch (IOException e) {}
catch (SAXException e) {}
catch (ParserConfigurationException e) {}
}
public void updateArticles(Feed feed) {
try {
targetFlag = TARGET_ARTICLES;
currentFeed = feed;
System.out.println(feed.url.toString());
SAXParserFactory spf = SAXParserFactory.newInstance();
SAXParser sp = spf.newSAXParser();
XMLReader xr = sp.getXMLReader();
xr.setContentHandler(this);
xr.parse(new InputSource(currentFeed.url.openStream()));
} catch (IOException e) {}
catch (SAXException e) {}
catch (ParserConfigurationException e) {}
}
}
One of the most appalling design decisions in JAXP (and there were many) was that SAXParserFactory by default creates a parser that is not namespace-aware. Always call setNamespaceAware(true) on the returned parser. Otherwise, the XMLReader will call startElement using the options defined for a non-namespace-aware parser, which means it will supply the lexical QName, but not the local-name and URI.
The qName parameter contains the element name.
Here's an example although it's hard to read because of formatting.
Namespacing etc. makes a difference in where/how to get at element names.

Categories