I am reading an Excel file using POI's XSSF and SAX (Event API). The Excel sheet has thousands of rows so this is the only way that I have found to have good performance. Now I would like to read Excel file from one particularly row, for example row 6 because these Excel files are updated every day and I store the last row that I have already stored so I can read only the new rows.
How can I start from one row?
private void getExcelField(AcquisitionForm acquisitionForm) throws ExcelReadException, IOException{
InputStream stream=null;
OPCPackage p=null;
try{
p = OPCPackage.open(acquisitionForm.getDatatablePath(), PackageAccess.READ);
ReadOnlySharedStringsTable strings = new ReadOnlySharedStringsTable(p);
XSSFReader xssfReader = new XSSFReader(p);
StylesTable styles = xssfReader.getStylesTable();
XSSFReader.SheetIterator iter = (XSSFReader.SheetIterator) xssfReader.getSheetsData();
int index = 0;
//Test with one sheet
iter.hasNext();
//(iter.hasNext()) {
stream = iter.next();
String sheetName = iter.getSheetName();
processSheet(styles, strings, stream, acquisitionForm);
stream.close();
//++index;
//}
p.close();
}catch(Exception e){
throw new ExcelReadException("An error occured during excel file reading ", e);
}finally{
if (stream!=null)
stream.close();
if (p!=null)
p.close();
//++index;
// }
}
}
/**
* Parses and shows the content of one sheet
* using the specified styles and shared-strings tables.
* #param styles
* #param strings
* #param sheetInputStream
* #throws ExcelReadException
*/
private void processSheet(StylesTable styles, ReadOnlySharedStringsTable strings, InputStream sheetInputStream, AcquisitionForm acquisitionForm) throws Exception {
InputSource sheetSource = new InputSource(sheetInputStream);
SAXParserFactory saxFactory = SAXParserFactory.newInstance();
SAXParser saxParser = saxFactory.newSAXParser();
XMLReader sheetParser = saxParser.getXMLReader();
//ContentHandler handler = new MyXSSFSheetHandler(styles, strings);
MyXSSFSheetHandler handler = new MyXSSFSheetHandler(styles, strings,databaseAcquisitionServices, acquisitionForm);
sheetParser.setContentHandler(handler);
sheetParser.parse(sheetSource);
}
MyXSSFSheetHandler
public MyXSSFSheetHandler(StylesTable styles, ReadOnlySharedStringsTable strings,DatabaseAcquisitionServices databaseAcquisitionServices, AcquisitionForm acquisitionForm, int sheetIndex) {
this.stylesTable = styles;
this.sharedStringsTable = strings;
this.formatter = new DataFormatter();
this.value = new StringBuffer();
this.nextDataType = XssfDataType.NUMBER;
this.databaseAcquisitionServices=databaseAcquisitionServices;
this.acquisitionForm=acquisitionForm;
this.sheetIndex = sheetIndex;
}
/**
*
*/
public void startElement(String uri, String localName, String name,
Attributes attributes) throws SAXException {
if ("inlineStr".equals(name) || "v".equals(name)) {
vIsOpen = true;
// Clear contents cache
value.setLength(0);
}
// c => cell
else if ("c".equals(name)) {
// Get the cell reference
cellCoordinate = attributes.getValue("r");
int firstDigit = -1;
for (int c = 0; c < cellCoordinate.length(); ++c) {
if (Character.isDigit(cellCoordinate.charAt(c))) {
firstDigit = c;
break;
}
}
thisColumn = nameToColumn(cellCoordinate.substring(0, firstDigit));
// Set up defaults.
this.nextDataType = XssfDataType.NUMBER;
this.formatIndex = -1;
this.formatString = null;
String cellType = attributes.getValue("t");
String cellStyleStr = attributes.getValue("s");
if ("b".equals(cellType)) {
nextDataType = XssfDataType.BOOL;
} else if ("e".equals(cellType)) {
nextDataType = XssfDataType.ERROR;
} else if ("inlineStr".equals(cellType)) {
nextDataType = XssfDataType.INLINESTR;
} else if ("s".equals(cellType)) {
nextDataType = XssfDataType.SSTINDEX;
} else if ("str".equals(cellType)) {
nextDataType = XssfDataType.FORMULA;
} else if (cellStyleStr != null) {
// It's a number, but almost certainly one
// with a special style or format
int styleIndex = Integer.parseInt(cellStyleStr);
XSSFCellStyle style = stylesTable.getStyleAt(styleIndex);
this.formatIndex = style.getDataFormat();
this.formatString = style.getDataFormatString();
if (this.formatString == null) {
this.formatString = BuiltinFormats.getBuiltinFormat(this.formatIndex);
}
}
}
}
/*
* (non-Javadoc)
* #see org.xml.sax.helpers.DefaultHandler#endElement(java.lang.String, java.lang.String, java.lang.String)
*/
public void endElement(String uri, String localName, String name)
throws SAXException {
String cellValue = null;
//String thisStr = null;
// v => contents of a cell
if ("v".equals(name)) {
// Process the value contents as required.
// Do now, as characters() may be called more than once
switch (nextDataType) {
case BOOL:
char first = value.charAt(0);
//thisStr = first == '0' ? "FALSE" : "TRUE";
//cellValue= new Boolean(first =='0' ? false: true);
cellValue=first == '0' ? "false" : "true";
break;
case ERROR:
//thisStr = "\"ERROR:" + value.toString() + '"';
cellValue=new String(value.toString());
break;
case FORMULA:
// A formula could result in a string value,
// so always add double-quote characters.
//thisStr = '"' + value.toString() + '"';
cellValue=new String(value.toString());
break;
case INLINESTR:
XSSFRichTextString rtsi = new XSSFRichTextString(value.toString());
//thisStr = '"' + rtsi.toString() + '"';
cellValue=new String(rtsi.toString());
break;
case SSTINDEX:
String sstIndex = value.toString();
try {
int idx = Integer.parseInt(sstIndex);
XSSFRichTextString rtss = new XSSFRichTextString(sharedStringsTable.getEntryAt(idx));
// thisStr = '"' + rtss.toString() + '"';
cellValue=new String(rtss.toString());
}
catch (NumberFormatException ex) {
System.out.println("Failed to parse SST index '" + sstIndex + "': " + ex.toString());
}
break;
case NUMBER:
String n = value.toString();
if (this.formatString != null && n.length() > 0){
cellValue = formatter.formatRawCellContents(Double.parseDouble(n), this.formatIndex, this.formatString);
//cellValue=new Double(Double.parseDouble(n));
}
else{
//thisStr = n;
cellValue=new String(n);
}
break;
default:
cellValue="";
//thisStr = "(TODO: Unexpected type: " + nextDataType + ")";
break;
}
// Output after we've seen the string contents
// Emit commas for any fields that were missing on this row
if (lastColumnNumber == -1) {
lastColumnNumber = 0;
}
// for (int i = lastColumnNumber; i < thisColumn; ++i){
// System.out.print(',');
// }
// Might be the empty string.
rowValues.put(cellCoordinate,cellValue);
//System.out.print(cellValue);
// Update column
if (thisColumn > -1)
lastColumnNumber = thisColumn;
} else if ("row".equals(name)) {
// We're onto a new row
//I have to pass new HashMap because otherwise all the map into archiveAcquisition have the same values
databaseAcquisitionServices.archiveAcquisition(new TreeMap<>(rowValues), rowCounter, acquisitionForm, sheetIndex);
//Clear the structure used to store row data
rowValues.clear();
rowCounter++;
//System.out.println();
lastColumnNumber = -1;
}
}
/**
* Captures characters only if a suitable element is open.
* Originally was just "v"; extended for inlineStr also.
*/
public void characters(char[] ch, int start, int length)
throws SAXException {
if (vIsOpen)
value.append(ch, start, length);
}
/**
* Converts an Excel column name like "C" to a zero-based index.
*
* #param name
* #return Index corresponding to the specified name
*/
private int nameToColumn(String name) {
int column = -1;
for (int i = 0; i < name.length(); ++i) {
int c = name.charAt(i);
column = (column + 1) * 26 + c - 'A';
}
return column;
}
You will likely need to count the number of rows that you see in a class derived from SheetContentsHandler, I don't think you need to have a separate XSSFSheetHAndler, I would rather use the default one and only have a dervied SheetContentsHandler which has callback-methods for all the things that you are interested in, e.g. startRow, endRow, cell.
You can take a look at the XLSX2CSV sample for an example of how streaming reading of XLSX files can be done, especially how the class SheetToCSV is used to get calls for each row/cell, you could e.g. use something like if (currentRow < startRow) { return } in the method cell() to skip rows.
The cell-method is called with a parameter String cellReference which can be used to retrieve the row/cell coordinates via new CellReference(cellReference).getCol() and new CellReference(cellReference).getRow().
Related
EDIT: I tried this using a multiple file upload, and I got 3 of 4 uploaded files, so it seems that the upload mechanism is only setting the offset too high for the first file.
I found a method for uploading files using the com.sun.net.httpserver library here, but it is not actually detecting any files from the form input. What it does is it finds the "starting point" of uploaded files, represented by List<Integer> offsets and then looks through the file to get bytes from it. The problem is that it sets each part of offsets as a number very close to the end of the form data, so nothing actually gets parsed. Here is my HttpHandler:
package app;
import com.sun.net.httpserver.*;
import java.io.*;
import java.nio.charset.Charset;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
public class FormDataHandler implements HttpHandler {
#Override
public void handle(HttpExchange httpExchange) throws IOException {
Headers headers = httpExchange.getRequestHeaders();
String contentType = headers.getFirst("Content-Type");
if(contentType.startsWith("multipart/form-data")){
//found form data
String boundary = contentType.substring(contentType.indexOf("boundary=")+9);
// as of rfc7578 - prepend "\r\n--"
byte[] boundaryBytes = ("\r\n--" + boundary).getBytes(Charset.forName("UTF-8"));
byte[] payload = getInputAsBinary(httpExchange.getRequestBody());
ArrayList<MultiPart> list = new ArrayList<>();
List<Integer> offsets = searchBytes(payload, boundaryBytes, 0, payload.length - 1);
System.out.println(offsets);
for(int idx=0;idx<offsets.size();idx++){
int startPart = offsets.get(idx);
int endPart = payload.length;
if(idx<offsets.size()-1){
endPart = offsets.get(idx+1);
}
byte[] part = Arrays.copyOfRange(payload,startPart,endPart);
//look for header
int headerEnd = indexOf(part,"\r\n\r\n".getBytes(Charset.forName("UTF-8")),0,part.length-1);
/*This conditional is always false because headerEnd is not
found, due to part.length being to small, due to startPart
being too small, due to the current offset being to small*/
if(headerEnd>0) {
MultiPart p = new MultiPart();
byte[] head = Arrays.copyOfRange(part, 0, headerEnd);
String header = new String(head);
// extract name from header
int nameIndex = header.indexOf("\r\nContent-Disposition: form-data; name=");
if (nameIndex >= 0) {
int startMarker = nameIndex + 39;
//check for extra filename field
int fileNameStart = header.indexOf("; filename=");
if (fileNameStart >= 0) {
String filename = header.substring(fileNameStart + 11, header.indexOf("\r\n", fileNameStart));
p.filename = filename.replace('"', ' ').replace('\'', ' ').trim();
p.name = header.substring(startMarker, fileNameStart).replace('"', ' ').replace('\'', ' ').trim();
p.type = PartType.FILE;
} else {
int endMarker = header.indexOf("\r\n", startMarker);
if (endMarker == -1)
endMarker = header.length();
p.name = header.substring(startMarker, endMarker).replace('"', ' ').replace('\'', ' ').trim();
p.type = PartType.TEXT;
}
} else {
// skip entry if no name is found
continue;
}
// extract content type from header
int typeIndex = header.indexOf("\r\nContent-Type:");
if (typeIndex >= 0) {
int startMarker = typeIndex + 15;
int endMarker = header.indexOf("\r\n", startMarker);
if (endMarker == -1)
endMarker = header.length();
p.contentType = header.substring(startMarker, endMarker).trim();
}
//handle content
if (p.type == PartType.TEXT) {
//extract text value
byte[] body = Arrays.copyOfRange(part, headerEnd + 4, part.length);
p.value = new String(body);
} else {
//must be a file upload
p.bytes = Arrays.copyOfRange(part, headerEnd + 4, part.length);
}
list.add(p);
}
}
handle(httpExchange,list);
}else{
//if no form data is present, still call handle method
handle(httpExchange,null);
}
}
public void handle(HttpExchange he, List<MultiPart> parts) throws IOException {
OutputStream os = he.getResponseBody();
String response = "<h1>" + parts.size() + "</h1>";
he.sendResponseHeaders(200, response.length());
os.write(response.getBytes());
os.close();
}
public static byte[] getInputAsBinary(InputStream requestStream) {
ByteArrayOutputStream bos = new ByteArrayOutputStream();
try {
byte[] buf = new byte[100000];
int bytesRead=0;
while ((bytesRead = requestStream.read(buf)) != -1){
//while (requestStream.available() > 0) {
// int i = requestStream.read(buf);
bos.write(buf, 0, bytesRead);
}
requestStream.close();
bos.close();
} catch (IOException e) {
}
return bos.toByteArray();
}
/**
* Search bytes in byte array returns indexes within this byte-array of all
* occurrences of the specified(search bytes) byte array in the specified
* range
* borrowed from https://github.com/riversun/finbin/blob/master/src/main/java/org/riversun/finbin/BinarySearcher.java
*
* #param srcBytes
* #param searchBytes
* #param searchStartIndex
* #param searchEndIndex
* #return result index list
*/
public List<Integer> searchBytes(byte[] srcBytes, byte[] searchBytes, int searchStartIndex, int searchEndIndex) {
final int destSize = searchBytes.length;
final List<Integer> positionIndexList = new ArrayList<Integer>();
int cursor = searchStartIndex;
while (cursor < searchEndIndex + 1) {
int index = indexOf(srcBytes, searchBytes, cursor, searchEndIndex);
if (index >= 0) {
positionIndexList.add(index);
cursor = index + destSize;
} else {
cursor++;
}
}
return positionIndexList;
}
/**
* Returns the index within this byte-array of the first occurrence of the
* specified(search bytes) byte array.<br>
* Starting the search at the specified index, and end at the specified
* index.
* borrowed from https://github.com/riversun/finbin/blob/master/src/main/java/org/riversun/finbin/BinarySearcher.java
*
* #param srcBytes
* #param searchBytes
* #param startIndex
* #param endIndex
* #return
*/
public int indexOf(byte[] srcBytes, byte[] searchBytes, int startIndex, int endIndex) {
if (searchBytes.length == 0 || (endIndex - startIndex + 1) < searchBytes.length) {
return -1;
}
int maxScanStartPosIdx = srcBytes.length - searchBytes.length;
final int loopEndIdx;
if (endIndex < maxScanStartPosIdx) {
loopEndIdx = endIndex;
} else {
loopEndIdx = maxScanStartPosIdx;
}
int lastScanIdx = -1;
label: // goto label
for (int i = startIndex; i <= loopEndIdx; i++) {
for (int j = 0; j < searchBytes.length; j++) {
if (srcBytes[i + j] != searchBytes[j]) {
continue label;
}
lastScanIdx = i + j;
}
if (endIndex < lastScanIdx || lastScanIdx - i + 1 < searchBytes.length) {
// it becomes more than the last index
// or less than the number of search bytes
return -1;
}
return i;
}
return -1;
}
public static class MultiPart {
public PartType type;
public String contentType;
public String name;
public String filename;
public String value;
public byte[] bytes;
}
public enum PartType{
TEXT,FILE
}
}
And here is my form:
<form action="http://localhost:8080/upl" method="post" enctype="multipart/form-data">
<input type="file" name="file">
<input type="submit" value="Submit">
</form>
Does anybody know why this won't work? If not, is there a better option for HttpServer file uploads? I tried the Apache Commons API, but that didn't work either.
I'm trying to load an excel file(xlsx) into a Workbook Object using apache POI 3.10.
I'm receiving a java.lang.OutofMemoryError.
I'm using Java 8 with the -Xmx2g argument on the JVM.
All 4 cores(64bit System) and my RAM(4gb) are maxed out when I run the program.
The excel sheet has 43 columns and 166,961 Rows which equal 7,179,323 Cells.
I'm using Apache POIs WorkBookFactory.create(new File) because it uses less memory than using InputFileStream.
Does anyone have any ideas how to optimize memory usage or another way to create the Workbook?
Below is my test Reader class, don't judge, it's rough and includes debugging statements:
import java.io.File;
import java.io.IOException;
import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
import org.apache.poi.ss.usermodel.Workbook;
import org.apache.poi.ss.usermodel.WorkbookFactory;
public class Reader {
private Workbook wb;
public Reader(File excel) {
System.out.println("CONSTRUCTOR");
wb = null;
try {
wb = WorkbookFactory.create(excel);
} catch (IOException e) {
System.out.println("IO Exception");
System.out.println(e.getMessage());
} catch (InvalidFormatException e) {
System.out.println("Invalid Format");
System.out.println(e.getMessage());
}
}
public boolean exists() { return (wb != null); }
public void print() {}
public static void main(String[] args) {
System.out.println("START PRG");
//File f = new File("oldfilename.xls");
File f = new File("filename.xlsx");
System.out.println("PATH:" + f.getAbsoluteFile());
if (!f.exists()) {
System.out.println("File does not exist.");
System.exit(0);
}
System.out.println("FILE");
Reader r = new Reader(f);
System.out.println("Reader");
r.print();
System.out.println("PRG DONE");
}
}
apparently loading a 24mb file shouldn't be causing OOM...
at first glance it appears to me, though Xmx set to 2G, there's actually not that much memory free in system. in other words OS and other processes may have taken more than 2G out of 4G of physical memory! Check available physical memory first. in case available below what's expected, try closing some other running apps/processes.
if that's not the case and there's indeed enough memory left, without profiling it's really hard to identify the real cause. use a profile tool to check JVM status, related to memory first. you may simply use jconsole (as it comes with JDK). #see this on how to activate JMX
once you are connected, check readings related to memory, specifically below memory spaces:
old gen
young gen
perm gen
monitor these spaces and see where it's struggling. I assume this is a standalone application. in case this is deployed on server (as web or services), you may consider '-XX:NewRatio' option for distributing heap spaces effectively and efficiently. #see tuning related details here.
Please confirm these before proceeding,
Is there any infinite execution in looping(for/while)
Ensure your physical storage size
Maximize buffer memory
Note
As per my understanding Apache POI will not consume that much amount of memory.
I am just a beginner, but may I ask you some questions.
Why not use XSSFWorkbook class to open XLSX file. I mean, I always use it to handle XLSX files, and this time I tried with a file(7 MB; that was the largest I could find in my computer), and it worked perfectly.
Why not use newer File API(NIO, Java 7). Again, I do not know if this will make any difference or not. But, it worked for me.
Windows 7 Ultimate | 64 bit | Intel 2nd Gen Core i3|Eclipse Juno|JDK 1.7.45|Apache POI 3.9
Path file = Paths.get("XYZABC.xlsx");
try {
XSSFWorkbook wb = new XSSFWorkbook(Files.newInputStream(file, StandardOpenOption.READ));
} catch (IOException e) {
System.out.println("Some IO Error!!!!");
}
Do, tell if it works for you or not.
Did you tried using SXSSFWorkbook? We also used Apache POI to handle relatively big XLSX files, and we also had memory problems when using plain XSSFWorkbook. Although we didn't have to read in the files, we were just writing tens of thousands of lines of informations. Using this, our memory problems got solved. You can pass an XSSFWorkbook to its constructor and the size of data you want to keep in memory.
Java 1.8
based on HSSF and XSSF Limitations
my poi version is 3.17 POI Examples
lauches my code
public class Controller {
EX stressTest;
public void fineFile() {
String stresstest = "C:\\Stresstest.xlsx";
HashMap<String, String[]> stressTestMap = new HashMap<>();
stressTestMap.put("aaaa", new String[]{"myField", "The field"});
stressTestMap.put("bbbb", new String[]{"other", "Other value"});
try {
InputStream stressTestIS = new FileInputStream(stresstest);
stressTest = new EX(stresstest, stressTestIS, stressTestMap);
} catch (IOException exp) {
}
}
public void printErr() {
if (stressTest.thereAreErrors()) {
try {
FileWriter myWriter = new FileWriter(
"C:\\logErrorsStressTest" +
(new SimpleDateFormat("ddMMyyyyHHmmss")).format(new Date()) +
".txt"
);
myWriter.write(stressTest.getBodyFileErrors());
myWriter.close();
} catch (IOException e) {
e.printStackTrace();
}
} else {
}
}
public void createBD() {
List<OneObjectWhatever> entitiesList =
(
!stressTest.thereAreErrors()
? ((List<OneObjectWhatever>) stressTest.toListCustomerObject(OneObjectWhatever.class))
: new ArrayList<>()
);
entitiesList.forEach(entity -> {
Field[] fields = entity.getClass().getDeclaredFields();
String valueString = "";
for (Field attr : fields) {
try {
attr.setAccessible(true);
valueString += " StressTest:" + attr.getName() + ": -" + attr.get(fields) + "- ";
attr.setAccessible(true);
} catch (Exception reflectionError) {
System.out.println(reflectionError);
}
}
});
}
}
MY CODE
public class EX {
private HashMap<Integer, HashMap<Integer, String> > rows;
private List<String> errors;
private int maxColOfHeader, minColOfHeader;
private HashMap<Integer, String> header;
private HashMap<String,String[]> relationHeaderClassPropertyDescription;
private void initVariables(String name, InputStream file) {
this.rows = new HashMap();
this.header = new HashMap<>();
this.errors = new ArrayList<String>(){{add("["+name+"] empty cells in position -> ");}};
try{
InputStream is = FileMagic.prepareToCheckMagic(file);
FileMagic fm = FileMagic.valueOf(is);
is.close();
switch (fm) {
case OLE2:
XLS2CSVmra xls2csv = new XLS2CSVmra(name, 50, rows);
xls2csv.process();
System.out.println("OLE2");
break;
case OOXML:
File flatFile = new File(name);
OPCPackage p = OPCPackage.open(flatFile, PackageAccess.READ);
XLSX2CSV xlsx2csv = new XLSX2CSV(p, System.out, 50, this.rows);
xlsx2csv.process();
p.close();
System.out.println("OOXML");
break;
default:
System.out.println("Your InputStream was neither an OLE2 stream, nor an OOXML stream");
break;
}
} catch (IOException | EncryptedDocumentException | SAXException | OpenXML4JException exp){
System.out.println(exp);
exp.printStackTrace();
}
int rowHeader = rows.keySet().stream().findFirst().get();
this.header.putAll(rows.get(rowHeader));
this.rows.remove(rowHeader);
this.minColOfHeader = this.header.keySet().stream().findFirst().get();
this.maxColOfHeader = this.header.entrySet().stream()
.mapToInt(e -> e.getKey()).max()
.orElseThrow(NoSuchElementException::new);
}
public EX(String name, InputStream file, HashMap<String,String[]> relationHeaderClassPropertyDescription_) {
this.relationHeaderClassPropertyDescription = relationHeaderClassPropertyDescription_;
initVariables(name, file);
validate();
}
private void validate(){
rows.forEach((inx,row) -> {
for(int i = minColOfHeader; i <= maxColOfHeader; i++) {
//System.out.println("r:"+inx+" c:"+i+" cr:"+(!row.containsKey(i))+" vr:"+((!row.containsKey(i)) || row.get(i).trim().isEmpty())+" ch:"+header.containsKey(i)+" vh:"+(header.containsKey(i) && (!header.get(i).trim().isEmpty()))+" val:"+(row.containsKey(i)&&!row.get(i).trim().isEmpty()?row.get(i):"empty"));
if((!row.containsKey(i)) || row.get(i).trim().isEmpty()) {
if(header.containsKey(i) && (!header.get(i).trim().isEmpty())) {
String description = getRelationHeaders(i,1);
errors.add(" ["+header.get(i)+"]{"+description+"} = fila: "+(inx+1)+" - columna: "+ CellReference.convertNumToColString(i));
// System.out.println(" fila: "+inx+" - columna: " + i + " - valor: "+ (row.get(i).isEmpty()?"empty":row.get(i)));
}
}
}
});
header.forEach((i,v)->{System.out.println("stressTestMap.put(\""+v+"\", new String[]{\"{"+i+"}\",\"Mi descripcion XD\"});");});
}
public String getBodyFileErrors()
{
return String.join(System.lineSeparator(), errors);
}
public boolean thereAreErrors() {
return errors.stream().count() > 1;
}
public<T extends Class> List<? extends Object> toListCustomerObject(T type) {
List<Object> list = new ArrayList<>();
rows.forEach((inx, row) -> {
try {
Object obj = type.newInstance();
for(int i = minColOfHeader; i <= maxColOfHeader; i++) {
if (row.containsKey(i) && !row.get(i).trim().isEmpty()) {
if (header.containsKey(i) && !header.get(i).trim().isEmpty()) {
if(relationHeaderClassPropertyDescription.containsKey(header.get(i))) {
String nameProperty = getRelationHeaders(i,0);
Field field = type.getDeclaredField(nameProperty);
try{
field.setAccessible(true);
field.set(obj, (isConvertibleTo(field.getType(),row.get(i)) ? toObject(field.getType(),row.get(i)) : defaultValue(field.getType())) );
field.setAccessible(false);
}catch (Exception fex) {
//System.out.println("113"+fex);
continue;
}
}
}
}
}
list.add(obj);
} catch (Exception ex) {
//System.out.println("123:"+ex);
}
});
return list;
}
private Object toObject( Class clazz, String value ) {
if( Boolean.class == clazz || Boolean.TYPE == clazz) return Boolean.parseBoolean( value );
if( Byte.class == clazz || Byte.TYPE == clazz) return Byte.parseByte( value );
if( Short.class == clazz || Short.TYPE == clazz) return Short.parseShort( value );
if( Integer.class == clazz || Integer.TYPE == clazz) return Integer.parseInt( value );
if( Long.class == clazz || Long.TYPE == clazz) return Long.parseLong( value );
if( Float.class == clazz || Float.TYPE == clazz) return Float.parseFloat( value );
if( Double.class == clazz || Double.TYPE == clazz) return Double.parseDouble( value );
return value;
}
private boolean isConvertibleTo( Class clazz, String value ) {
String ptn = "";
if( Boolean.class == clazz || Boolean.TYPE == clazz) ptn = ".*";
if( Byte.class == clazz || Byte.TYPE == clazz) ptn = "^\\d+$";
if( Short.class == clazz || Short.TYPE == clazz) ptn = "^\\d+$";
if( Integer.class == clazz || Integer.TYPE == clazz) ptn = "^\\d+$";
if( Long.class == clazz || Long.TYPE == clazz) ptn = "^\\d+$";
if( Float.class == clazz || Float.TYPE == clazz) ptn = "^\\d+(\\.\\d+)?$";
if( Double.class == clazz || Double.TYPE == clazz) ptn = "^\\d+(\\.\\d+)?$";
Pattern pattern = Pattern.compile(ptn, Pattern.CASE_INSENSITIVE);
Matcher matcher = pattern.matcher(value);
return matcher.find();
}
private Object defaultValue( Class clazz) {
if( Boolean.class == clazz || Boolean.TYPE == clazz) return Boolean.parseBoolean( "false" );
if( Byte.class == clazz || Byte.TYPE == clazz) return Byte.parseByte( "0" );
if( Short.class == clazz || Short.TYPE == clazz) return Short.parseShort( "0" );
if( Integer.class == clazz || Integer.TYPE == clazz) return Integer.parseInt( "0" );
if( Long.class == clazz || Long.TYPE == clazz) return Long.parseLong( "0" );
if( Float.class == clazz || Float.TYPE == clazz) return Float.parseFloat( "0.0" );
if( Double.class == clazz || Double.TYPE == clazz) return Double.parseDouble( "0.0" );
return "";
}
private String getRelationHeaders(Integer columnIndexHeader, Integer TypeOrDescription /*0 - Type, 1 - Description*/) {
try {
return relationHeaderClassPropertyDescription.get(header.get(columnIndexHeader))[TypeOrDescription];
} catch (Exception e) {
}
return header.get(columnIndexHeader);
}
}
these are the modifications I made to the examples:
XLSX2CSV
public class XLSX2CSV {
/**
* Uses the XSSF Event SAX helpers to do most of the work
* of parsing the Sheet XML, and outputs the contents
* as a (basic) CSV.
*/
private class SheetToCSV implements SheetContentsHandler {
private boolean firstCellOfRow = false;
private int currentRow = -1;
private int currentCol = -1;
HashMap<Integer, String> valuesCell;
private void outputMissingRows(int number) {
for (int i=0; i<number; i++) {
for (int j=0; j<minColumns; j++) {
output.append(',');
}
output.append('\n');
}
}
#Override
public void startRow(int rowNum) {
// If there were gaps, output the missing rows
outputMissingRows(rowNum-currentRow-1);
// Prepare for this row
firstCellOfRow = true;
currentRow = rowNum;
currentCol = -1;
valuesCell = new HashMap<>();
}
#Override
public void endRow(int rowNum) {
// Ensure the minimum number of columns
for (int i = currentCol; i < minColumns; i++) {
output.append(',');
}
output.append('\n');
if (!valuesCell.isEmpty())
_rows.put(rowNum, valuesCell);
}
#Override
public void cell(String cellReference, String formattedValue,
XSSFComment comment) {
if (firstCellOfRow) {
firstCellOfRow = false;
} else {
output.append(',');
}
// gracefully handle missing CellRef here in a similar way as XSSFCell does
if (cellReference == null) {
cellReference = new CellAddress(currentRow, currentCol).formatAsString();
}
// Did we miss any cells?
int thisCol = (new CellReference(cellReference)).getCol();
int missedCols = thisCol - currentCol - 1;
for (int i = 0; i < missedCols; i++) {
output.append(',');
}
currentCol = thisCol;
if (!formattedValue.isEmpty())
valuesCell.put(thisCol, formattedValue);
// Number or string?
output.append(formattedValue);
/*try {
//noinspection ResultOfMethodCallIgnored
Double.parseDouble(formattedValue);
output.append(formattedValue);
} catch (NumberFormatException e) {
output.append('"');
output.append(formattedValue);
output.append('"');
}*/
}
#Override
public void headerFooter(String text, boolean isHeader, String tagName) {
// Skip, no headers or footers in CSV
}
}
///////////////////////////////////////
private final OPCPackage xlsxPackage;
/**
* Number of columns to read starting with leftmost
*/
private final int minColumns;
/**
* Destination for data
*/
private final PrintStream output;
public HashMap<Integer, HashMap<Integer, String>> _rows;
/**
* Creates a new XLSX -> CSV converter
*
* #param pkg The XLSX package to process
* #param output The PrintStream to output the CSV to
* #param minColumns The minimum number of columns to output, or -1 for no minimum
*/
public XLSX2CSV(OPCPackage pkg, PrintStream output, int minColumns, HashMap<Integer, HashMap<Integer, String> > __rows) {
this.xlsxPackage = pkg;
this.output = output;
this.minColumns = minColumns;
this._rows = __rows;
}
/**
* Parses and shows the content of one sheet
* using the specified styles and shared-strings tables.
*
* #param styles The table of styles that may be referenced by cells in the sheet
* #param strings The table of strings that may be referenced by cells in the sheet
* #param sheetInputStream The stream to read the sheet-data from.
* #exception java.io.IOException An IO exception from the parser,
* possibly from a byte stream or character stream
* supplied by the application.
* #throws SAXException if parsing the XML data fails.
*/
public void processSheet(
StylesTable styles,
ReadOnlySharedStringsTable strings,
SheetContentsHandler sheetHandler,
InputStream sheetInputStream) throws IOException, SAXException {
DataFormatter formatter = new DataFormatter();
InputSource sheetSource = new InputSource(sheetInputStream);
try {
XMLReader sheetParser = SAXHelper.newXMLReader();
ContentHandler handler = new XSSFSheetXMLHandler(
styles, null, strings, sheetHandler, formatter, false);
sheetParser.setContentHandler(handler);
sheetParser.parse(sheetSource);
} catch(ParserConfigurationException e) {
throw new RuntimeException("SAX parser appears to be broken - " + e.getMessage());
}
}
/**
* Initiates the processing of the XLS workbook file to CSV.
*
* #throws IOException If reading the data from the package fails.
* #throws SAXException if parsing the XML data fails.
*/
public void process() throws IOException, OpenXML4JException, SAXException {
ReadOnlySharedStringsTable strings = new ReadOnlySharedStringsTable(this.xlsxPackage);
XSSFReader xssfReader = new XSSFReader(this.xlsxPackage);
StylesTable styles = xssfReader.getStylesTable();
XSSFReader.SheetIterator iter = (XSSFReader.SheetIterator) xssfReader.getSheetsData();
int index = 0;
while (iter.hasNext()) {
InputStream stream = iter.next();
String sheetName = iter.getSheetName();
this.output.println();
this.output.println(sheetName + " [index=" + index + "]:");
processSheet(styles, strings, new SheetToCSV(), stream);
stream.close();
++index;
break;
}
}
}
XLS2CSVmra
public class XLS2CSVmra implements HSSFListener {
private int minColumns;
private POIFSFileSystem fs;
private PrintStream output;
public HashMap<Integer, HashMap<Integer, String>> _rows;
private HashMap<Integer, String> valuesCell;
private int lastRowNumber;
private int lastColumnNumber;
/** Should we output the formula, or the value it has? */
private boolean outputFormulaValues = false;
/** For parsing Formulas */
private SheetRecordCollectingListener workbookBuildingListener;
private HSSFWorkbook stubWorkbook;
// Records we pick up as we process
private SSTRecord sstRecord;
private FormatTrackingHSSFListener formatListener;
/** So we known which sheet we're on */
private int sheetIndex = -1;
private BoundSheetRecord[] orderedBSRs;
private List<BoundSheetRecord> boundSheetRecords = new ArrayList<BoundSheetRecord>();
// For handling formulas with string results
private int nextRow;
private int nextColumn;
private boolean outputNextStringRecord;
/**
* Creates a new XLS -> CSV converter
* #param fs The POIFSFileSystem to process
* #param output The PrintStream to output the CSV to
* #param minColumns The minimum number of columns to output, or -1 for no minimum
*/
public XLS2CSVmra(POIFSFileSystem fs, PrintStream output, int minColumns, HashMap<Integer, HashMap<Integer, String>> __rows) {
this.fs = fs;
this.output = output;
this.minColumns = minColumns;
this._rows = __rows;
this.valuesCell = new HashMap<>();
}
/**
* Creates a new XLS -> CSV converter
* #param filename The file to process
* #param minColumns The minimum number of columns to output, or -1 for no minimum
* #throws IOException
* #throws FileNotFoundException
*/
public XLS2CSVmra(String filename, int minColumns, HashMap<Integer, HashMap<Integer, String>> __rows) throws IOException, FileNotFoundException {
this(
new POIFSFileSystem(new FileInputStream(filename)),
System.out, minColumns,
__rows
);
}
/**
* Initiates the processing of the XLS file to CSV
*/
public void process() throws IOException {
MissingRecordAwareHSSFListener listener = new MissingRecordAwareHSSFListener(this);
formatListener = new FormatTrackingHSSFListener(listener);
HSSFEventFactory factory = new HSSFEventFactory();
HSSFRequest request = new HSSFRequest();
if(outputFormulaValues) {
request.addListenerForAllRecords(formatListener);
} else {
workbookBuildingListener = new SheetRecordCollectingListener(formatListener);
request.addListenerForAllRecords(workbookBuildingListener);
}
factory.processWorkbookEvents(request, fs);
}
/**
* Main HSSFListener method, processes events, and outputs the
* CSV as the file is processed.
*/
#Override
public void processRecord(Record record) {
if(sheetIndex>0)
return;
int thisRow = -1;
int thisColumn = -1;
String thisStr = null;
switch (record.getSid())
{
case BoundSheetRecord.sid:
if(sheetIndex==-1)
boundSheetRecords.add((BoundSheetRecord)record);
break;
case BOFRecord.sid:
BOFRecord br = (BOFRecord)record;
if(br.getType() == BOFRecord.TYPE_WORKSHEET && sheetIndex==-1) {
// Create sub workbook if required
if(workbookBuildingListener != null && stubWorkbook == null) {
stubWorkbook = workbookBuildingListener.getStubHSSFWorkbook();
}
// Output the worksheet name
// Works by ordering the BSRs by the location of
// their BOFRecords, and then knowing that we
// process BOFRecords in byte offset order
sheetIndex++;
if(orderedBSRs == null) {
orderedBSRs = BoundSheetRecord.orderByBofPosition(boundSheetRecords);
}
output.println();
output.println(
orderedBSRs[sheetIndex].getSheetname() +
" [" + (sheetIndex+1) + "]:"
);
}
break;
case SSTRecord.sid:
sstRecord = (SSTRecord) record;
break;
case BlankRecord.sid:
BlankRecord brec = (BlankRecord) record;
thisRow = brec.getRow();
thisColumn = brec.getColumn();
thisStr = "";
break;
case BoolErrRecord.sid:
BoolErrRecord berec = (BoolErrRecord) record;
thisRow = berec.getRow();
thisColumn = berec.getColumn();
thisStr = "";
break;
case FormulaRecord.sid:
FormulaRecord frec = (FormulaRecord) record;
thisRow = frec.getRow();
thisColumn = frec.getColumn();
if(outputFormulaValues) {
if(Double.isNaN( frec.getValue() )) {
// Formula result is a string
// This is stored in the next record
outputNextStringRecord = true;
nextRow = frec.getRow();
nextColumn = frec.getColumn();
} else {
thisStr = formatListener.formatNumberDateCell(frec);
}
} else {
thisStr = '"' +
HSSFFormulaParser.toFormulaString(stubWorkbook, frec.getParsedExpression()) + '"';
}
break;
case StringRecord.sid:
if(outputNextStringRecord) {
// String for formula
StringRecord srec = (StringRecord)record;
thisStr = srec.getString();
thisRow = nextRow;
thisColumn = nextColumn;
outputNextStringRecord = false;
}
break;
case LabelRecord.sid:
LabelRecord lrec = (LabelRecord) record;
thisRow = lrec.getRow();
thisColumn = lrec.getColumn();
thisStr = '"' + lrec.getValue() + '"';
break;
case LabelSSTRecord.sid:
LabelSSTRecord lsrec = (LabelSSTRecord) record;
thisRow = lsrec.getRow();
thisColumn = lsrec.getColumn();
if(sstRecord == null) {
thisStr = '"' + "(No SST Record, can't identify string)" + '"';
} else {
thisStr = '"' + sstRecord.getString(lsrec.getSSTIndex()).toString() + '"';
}
break;
case NoteRecord.sid:
NoteRecord nrec = (NoteRecord) record;
thisRow = nrec.getRow();
thisColumn = nrec.getColumn();
// TODO: Find object to match nrec.getShapeId()
thisStr = '"' + "(TODO)" + '"';
break;
case NumberRecord.sid:
NumberRecord numrec = (NumberRecord) record;
thisRow = numrec.getRow();
thisColumn = numrec.getColumn();
// Format
thisStr = formatListener.formatNumberDateCell(numrec);
break;
case RKRecord.sid:
RKRecord rkrec = (RKRecord) record;
thisRow = rkrec.getRow();
thisColumn = rkrec.getColumn();
thisStr = '"' + "(TODO)" + '"';
break;
default:
break;
}
// Handle new row
if(thisRow != -1 && thisRow != lastRowNumber) {
lastColumnNumber = -1;
}
// Handle missing column
if(record instanceof MissingCellDummyRecord) {
MissingCellDummyRecord mc = (MissingCellDummyRecord)record;
thisRow = mc.getRow();
thisColumn = mc.getColumn();
thisStr = "";
}
// If we got something to print out, do so
if(thisStr != null) {
if (thisColumn > 0) {
output.print(',');
}
if (!thisStr.isEmpty())
valuesCell.put(thisColumn, thisStr);
output.print(thisStr);
}
// Update column and row count
if(thisRow > -1)
lastRowNumber = thisRow;
if(thisColumn > -1)
lastColumnNumber = thisColumn;
// Handle end of row
if(record instanceof LastCellOfRowDummyRecord) {
// Print out any missing commas if needed
if(minColumns > 0) {
// Columns are 0 based
if(lastColumnNumber == -1) { lastColumnNumber = 0; }
for(int i=lastColumnNumber; i<(minColumns); i++) {
output.print(',');
}
}
// We're onto a new row
lastColumnNumber = -1;
// End the row
output.println();
if(!valuesCell.isEmpty()) {
HashMap<Integer, String> newRow = new HashMap<>();
valuesCell.forEach((inx,vStr) -> {
newRow.put(inx, vStr);
});
_rows.put(lastRowNumber, newRow);
valuesCell = new HashMap<>();
}
}
}
}
I am getting java.lang.IllegalStateException: Zip File is closed when i try to read a xlsx file. The code is being accessed from a spring based app. I am using apache poi 3.9. With apache POI 3.8 i was getting Can't read file. The same code works fine when ran from my eclipse locally, but i get the exception when the app is deployed on my websphere server and is accessed when hosted on websphere. Can anyone please let me know what the problem is? The source code can be found below.
import java.io.IOException;
import java.io.InputStream;
import java.math.BigDecimal;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.TreeMap;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;
import org.apache.commons.validator.GenericValidator;
import org.apache.log4j.Logger;
import org.apache.poi.openxml4j.exceptions.OpenXML4JException;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.ss.usermodel.BuiltinFormats;
import org.apache.poi.ss.usermodel.DataFormatter;
import org.apache.poi.xssf.eventusermodel.ReadOnlySharedStringsTable;
import org.apache.poi.xssf.eventusermodel.XSSFReader;
import org.apache.poi.xssf.model.StylesTable;
import org.apache.poi.xssf.usermodel.XSSFCellStyle;
import org.apache.poi.xssf.usermodel.XSSFRichTextString;
import org.apache.struts.action.ActionMessage;
import org.apache.struts.action.ActionMessages;
import org.xml.sax.Attributes;
import org.xml.sax.ContentHandler;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.XMLReader;
import org.xml.sax.helpers.DefaultHandler;
public class XSSFListenerUtil {
private static List<String> allColValList = null;
private static boolean invalidTemplate = false;
private final static Logger log = Logger.getLogger(HSSFListenerUtil.class.getName()); // logger for the process.
private static BaseViewBean baseViewBean$Session;
/**
* The type of the data value is indicated by an attribute on the cell.
* The value is usually in a "v" element within the cell.
*/
enum xssfDataType {
BOOL, ERROR, FORMULA, INLINESTR, SSTINDEX, NUMBER,
}
class MyXSSFSheetHandler extends DefaultHandler {
/**
* Table with styles
*/
private StylesTable stylesTable;
/**
* Table with unique strings
*/
private ReadOnlySharedStringsTable sharedStringsTable;
/**
* Destination for data
*/
//private final PrintStream output;
/**
* Number of columns to read starting with leftmost
*/
private final int minColumnCount;
// Set when V start element is seen
private boolean vIsOpen;
// Set when cell start element is seen;
// used when cell close element is seen.
private xssfDataType nextDataType;
// Used to format numeric cell values.
private short formatIndex;
private String formatString;
private final DataFormatter formatter;
private int thisColumn = -1;
// The last column printed to the output stream
private int lastColumnNumber = -1;
// Gathers characters as they are seen.
private StringBuffer value;
/**
* Accepts objects needed while parsing.
*
* #param styles Table of styles
* #param strings Table of shared strings
* #param cols Minimum number of columns to show
* #param target Sink for output
*/
public MyXSSFSheetHandler(
StylesTable styles,
ReadOnlySharedStringsTable strings,
int cols) {
this.stylesTable = styles;
this.sharedStringsTable = strings;
this.minColumnCount = cols;
this.value = new StringBuffer();
this.nextDataType = xssfDataType.NUMBER;
this.formatter = new DataFormatter();
}
/*
* (non-Javadoc)
* #see org.xml.sax.helpers.DefaultHandler#startElement(java.lang.String, java.lang.String, java.lang.String, org.xml.sax.Attributes)
*/
public void startElement(String uri, String localName, String name,
Attributes attributes) throws SAXException {
if ("inlineStr".equals(name) || "v".equals(name)) {
vIsOpen = true;
// Clear contents cache
value.setLength(0);
}
// c => cell
else if ("c".equals(name)) {
// Get the cell reference
String r = attributes.getValue("r");
int firstDigit = -1;
for (int c = 0; c < r.length(); ++c) {
if (Character.isDigit(r.charAt(c))) {
firstDigit = c;
break;
}
}
thisColumn = nameToColumn(r.substring(0, firstDigit));
// Set up defaults.
this.nextDataType = xssfDataType.NUMBER;
this.formatIndex = -1;
this.formatString = null;
String cellType = attributes.getValue("t");
String cellStyleStr = attributes.getValue("s");
if ("b".equals(cellType))
nextDataType = xssfDataType.BOOL;
else if ("e".equals(cellType))
nextDataType = xssfDataType.ERROR;
else if ("inlineStr".equals(cellType))
nextDataType = xssfDataType.INLINESTR;
else if ("s".equals(cellType))
nextDataType = xssfDataType.SSTINDEX;
else if ("str".equals(cellType))
nextDataType = xssfDataType.FORMULA;
else if (cellStyleStr != null) {
// It's a number, but almost certainly one with a special style or format
int styleIndex = Integer.parseInt(cellStyleStr);
XSSFCellStyle style = stylesTable.getStyleAt(styleIndex);
this.formatIndex = style.getDataFormat();
this.formatString = style.getDataFormatString();
if (this.formatString == null)
this.formatString = BuiltinFormats.getBuiltinFormat(this.formatIndex);
}
}
}
StringBuffer columnvalue = new StringBuffer();
/*
* (non-Javadoc)
* #see org.xml.sax.helpers.DefaultHandler#endElement(java.lang.String, java.lang.String, java.lang.String)
*/
public void endElement(String uri, String localName, String name)
throws SAXException {
String thisStr = null;
// v => contents of a cell
if ("v".equals(name)) {
// Process the value contents as required.
// Do now, as characters() may be called more than once
switch (nextDataType) {
case BOOL:
char first = value.charAt(0);
thisStr = first == '0' ? "FALSE" : "TRUE";
break;
case ERROR:
thisStr = "\"ERROR:" + value.toString() + '"';
break;
case FORMULA:
// A formula could result in a string value, so always add double-quote characters.
thisStr = '"' + value.toString() + '"';
break;
case INLINESTR:
XSSFRichTextString rtsi = new XSSFRichTextString(value.toString());
thisStr = '"' + rtsi.toString() + '"';
break;
case SSTINDEX:
String sstIndex = value.toString();
try {
int idx = Integer.parseInt(sstIndex);
XSSFRichTextString rtss = new XSSFRichTextString(sharedStringsTable.getEntryAt(idx));
thisStr = '"' + rtss.toString() + '"';
}
catch (NumberFormatException ex) {
log.error("Failed to parse SST index '" + sstIndex + "': " + ex.toString());
throw new SAXException("Failed to parse SST index '" + sstIndex + "': " + ex.toString());
}
break;
case NUMBER:
String n = value.toString();
if (this.formatString != null)
thisStr = formatter.formatRawCellContents(Double.parseDouble(n), this.formatIndex, this.formatString);
else
thisStr = n;
break;
default:
thisStr = "(TODO: Unexpected type: " + nextDataType + ")";
break;
}
// Output after we've seen the string contents
// Emit commas for any fields that were missing on this row
if (lastColumnNumber == -1) {
lastColumnNumber = 0;
}
for (int i = lastColumnNumber; i < thisColumn; ++i){
//output.print(',');
columnvalue.append(",");
}
// Might be an empty string.
columnvalue.append(thisStr);
// Update column
if (thisColumn > -1)
lastColumnNumber = thisColumn;
} else if ("row".equals(name)) {
// Print out any missing commas if needed
if (minColumns > 0) {
// Columns are 0 based
if (lastColumnNumber == -1) {
lastColumnNumber = 0;
}
for (int i = lastColumnNumber; i < (this.minColumnCount); i++) {
//output.print(',');
columnvalue.append(",");
}
}
// We're onto a new row
if(!GenericValidator.isBlankOrNull(columnvalue.toString())){
String completeVal = columnvalue.toString().replaceAll("\"", "");
allColValList.add(completeVal);
}
columnvalue = new StringBuffer("");
lastColumnNumber = -1;
}
}
/**
* Captures characters only if a suitable element is open.
* Originally was just "v"; extended for inlineStr also.
*/
public void characters(char[] ch, int start, int length)
throws SAXException {
if (vIsOpen)
value.append(ch, start, length);
}
/**
* Converts an Excel column name like "C" to a zero-based index.
*
* #param name
* #return Index corresponding to the specified name
*/
private int nameToColumn(String name) {
int column = -1;
for (int i = 0; i < name.length(); ++i) {
int c = name.charAt(i);
column = (column + 1) * 26 + c - 'A';
}
return column;
}
}
private OPCPackage xlsxPackage;
private int minColumns;
/**
*
* #param pkg The XLSX package to process
* #param output The PrintStream to output the CSV to
* #param minColumns The minimum number of columns to output, or -1 for no minimum
*/
public XSSFListenerUtil(OPCPackage pkg, int minCol, ArrayList<String> colList, BaseViewBean baseViewBean$Session) {
this.xlsxPackage = pkg;
this.minColumns = minCol;
XSSFListenerUtil.allColValList = colList;
XSSFListenerUtil.baseViewBean$Session = baseViewBean$Session;
}
/**
* Parses and shows the content of one sheet
* using the specified styles and shared-strings tables.
*
* #param styles
* #param strings
* #param sheetInputStream
*/
public void processSheet(
StylesTable styles,
ReadOnlySharedStringsTable strings,
InputStream sheetInputStream)
throws IOException, ParserConfigurationException, SAXException {
InputSource sheetSource = new InputSource(sheetInputStream);
SAXParserFactory saxFactory = SAXParserFactory.newInstance();
SAXParser saxParser = saxFactory.newSAXParser();
XMLReader sheetParser = saxParser.getXMLReader();
ContentHandler handler = new MyXSSFSheetHandler(styles, strings, this.minColumns);
sheetParser.setContentHandler(handler);
sheetParser.parse(sheetSource);
}
/**
* Initiates the processing of the XLS workbook file to CSV.
*
* #throws IOException
* #throws OpenXML4JException
* #throws ParserConfigurationException
* #throws SAXException
*/
public void process()
throws IOException, OpenXML4JException, ParserConfigurationException, SAXException {
ReadOnlySharedStringsTable strings = new ReadOnlySharedStringsTable(xlsxPackage);
XSSFReader xssfReader = new XSSFReader(xlsxPackage);
StylesTable styles = xssfReader.getStylesTable();
XSSFReader.SheetIterator iter = (XSSFReader.SheetIterator) xssfReader.getSheetsData();
int index = 0;
while (iter.hasNext()) {
InputStream stream = iter.next();
processSheet(styles, strings, stream);
stream.close();
++index;
}
}
public static void parseAndRetrieveExcelDetails(String filePath, InvestmentDimDao investmentDao,
BaseViewBean baseViewBean$Session, ActionMessages errors, boolean invalidTemplateIdentifier) throws Exception{
int minColumns = -1;
invalidTemplate = invalidTemplateIdentifier;
ArrayList<String> initialList = new ArrayList<String>();
//OPCPackage pkg = OPCPackage.open(filePath, PackageAccess.READ);
OPCPackage pkg = OPCPackage.open(filePath);
XSSFListenerUtil xssfSheetProcessor = new XSSFListenerUtil(pkg, minColumns, initialList, baseViewBean$Session);
xssfSheetProcessor.process();
validateAndExtractExcelData();
log.info("Finished processing all records");
if(invalidTemplate){
errors.add("failure", new ActionMessage("secaccess.file.invaliddata"));
}else{
log.info("All records parsed successfully");
}
allColValList.clear();
allColValList = null;
pkg.close();
System.gc();
}
private static void validateAndExtractExcelData() throws NumberFormatException {
int rowCnt = 1;
int colCnt = 0;
ExcelListenerBean listenerBean;
ExcelListenerBean bean;
Map <String, ExcelListenerBean> excelRecords = new TreeMap<String, ExcelListenerBean>();
for(String colVal : allColValList) {
if(rowCnt==1 && !AppGlobalConstants.HardCodedValues.G_L_CALC_HEADER1.equals(colVal)) {
invalidTemplate = true;
}
if(rowCnt==2 && !AppGlobalConstants.HardCodedValues.G_L_CALC_HEADER2.equals(colVal) & !invalidTemplate) {
invalidTemplate = true;
}
if(rowCnt==3 && !AppGlobalConstants.HardCodedValues.G_L_CALC_HEADER3.equals(colVal) & !invalidTemplate) {
invalidTemplate = true;
}
if(rowCnt > 3 && !invalidTemplate) {
listenerBean = new ExcelListenerBean(AppGlobalConstants.HardCodedValues.SQL_TYPE);
String[] allValues = colVal.split(",");
for(String cellContents : allValues){
switch (colCnt) {
case 0:
if(GenericValidator.isBlankOrNull(cellContents)){
invalidTemplate = true;
}else{
listenerBean = new ExcelListenerBean(AppGlobalConstants.HardCodedValues.SQL_TYPE);
listenerBean.setId(rowCnt);
listenerBean.setShortName(cellContents);
}
break;
case 1:
if(GenericValidator.isBlankOrNull(cellContents)){
invalidTemplate = true;
break;
}else{
listenerBean.setFmrCusip(cellContents);
}
break;
case 2:
if(GenericValidator.isBlankOrNull(cellContents) || !GenericValidator.isDouble(cellContents)){
invalidTemplate = true;
break;
}else{
listenerBean.setIncorrectTrdShares(Double.valueOf(cellContents));
}
break;
case 3:
if(GenericValidator.isBlankOrNull(cellContents) || !GenericValidator.isDouble(cellContents)){
invalidTemplate = true;
break;
}else{
listenerBean.setIncorrectTrdPrice(Double.valueOf(cellContents));
}
break;
case 4:
if(GenericValidator.isBlankOrNull(cellContents)){
invalidTemplate = true;
break;
}else{
listenerBean.setIncorrectTrdBuySell(cellContents);
}
break;
case 5:
if(GenericValidator.isBlankOrNull(cellContents) || !GenericValidator.isDouble(cellContents)){
invalidTemplate = true;
break;
}else{
listenerBean.setIncorrectTrdCommRate(Double.valueOf(cellContents));
}
break;
case 6:
if(GenericValidator.isBlankOrNull(cellContents) || !GenericValidator.isDouble(cellContents)){
invalidTemplate = true;
break;
}else{
listenerBean.setIncorrectTrdCommission(Double.valueOf(cellContents));
}
break;
case 7:
if(GenericValidator.isBlankOrNull(cellContents) || !GenericValidator.isDouble(cellContents)){
invalidTemplate = true;
break;
}else{
listenerBean.setIncorrectTrdFees(Double.valueOf(cellContents));
}
break;
case 8:
if(GenericValidator.isBlankOrNull(cellContents.toString()) || !GenericValidator.isDouble(cellContents.toString())){
invalidTemplate = true;
break;
}else{
listenerBean.setIncorrectTrdNet(Double.valueOf(cellContents.toString()));
}
break;
case 9:
if(GenericValidator.isBlankOrNull(cellContents) || !GenericValidator.isDouble(cellContents)){
invalidTemplate = true;
break;
}else{
listenerBean.setCorrectionTrdShares(Double.valueOf(cellContents));
}
break;
case 10:
if(GenericValidator.isBlankOrNull(cellContents) || !GenericValidator.isDouble(cellContents)){
invalidTemplate = true;
break;
}else{
listenerBean.setCorrectionTrdPrice(Double.valueOf(cellContents));
}
break;
case 11:
if(GenericValidator.isBlankOrNull(cellContents)){
invalidTemplate = true;
break;
}else{
listenerBean.setCorrectionTrdBuySell(cellContents);
}
break;
case 12:
if(GenericValidator.isBlankOrNull(cellContents) || !GenericValidator.isDouble(cellContents)){
invalidTemplate = true;
break;
}else{
listenerBean.setCorrectionTrdCommRate(Double.valueOf(cellContents));
}
break;
case 13:
if(GenericValidator.isBlankOrNull(cellContents) || !GenericValidator.isDouble(cellContents)){
invalidTemplate = true;
break;
}else{
listenerBean.setCorrectionTrdCommission(Double.valueOf(cellContents));
}
break;
case 14:
if(GenericValidator.isBlankOrNull(cellContents) || !GenericValidator.isDouble(cellContents)){
invalidTemplate = true;
break;
}else{
listenerBean.setCorrectionTrdFees(Double.valueOf(cellContents));
}
break;
case 15:
if(GenericValidator.isBlankOrNull(cellContents) || !GenericValidator.isDouble(cellContents)){
invalidTemplate = true;
break;
}else{
listenerBean.setCorrectionTrdNet(Double.valueOf(cellContents));
}
break;
}
colCnt++;
}
bean = excelRecords.get(listenerBean.getShortName());
if (bean != null){
listenerBean.setAcctGainLossAmt(new BigDecimal(Double.valueOf(bean.getAcctGainLossAmt()) + listenerBean.getCorrectionTrdNet() + listenerBean.getIncorrectTrdNet()).toPlainString());
} else {
listenerBean.setAcctGainLossAmt(new BigDecimal(listenerBean.getCorrectionTrdNet() + listenerBean.getIncorrectTrdNet()).toPlainString());
}
excelRecords.put(listenerBean.getShortName(), listenerBean);
}
colCnt = 0;
rowCnt++;
}
baseViewBean$Session.setExcelRecLst(new ArrayList<ExcelListenerBean>(excelRecords.values()));
log.info("Number of records for update is "+excelRecords.size());
}
}
I was running into same problem but then I realize that my file name was TestData.xlsx and I was using TestData.csv
public class seventhma {
XSSFSheet m_sheet;
int m_iNbRows;
int m_iCurrentRow = 0;
private static final String JAVA_TOSTRING = "EEE MMM dd HH:mm:ss zzz yyyy";
public seventhma(XSSFSheet sheet) {
m_sheet = sheet;
m_iNbRows = sheet.getPhysicalNumberOfRows();
}
/*
* Returns the contents of an Excel row in the form of a String array.
*
* #see com.ibm.ccd.common.parsing.Parser#splitLine()
*/
public String[] splitLine() throws Exception {
// if (m_iCurrentRow == m_iNbRows)
// return null;
XSSFRow row = m_sheet.getRow(m_iCurrentRow);
if (row == null) {
return null;
} else {
int cellIndex = 0;
int noOfCells = row.getPhysicalNumberOfCells();
String[] values = new String[noOfCells];
short firstCellNum = row.getFirstCellNum();
short lastCellNum = row.getLastCellNum();
if (firstCellNum >= 0 && lastCellNum >= 0) {
for (short iCurrent = firstCellNum; iCurrent < lastCellNum; iCurrent++) {
XSSFCell cell = (XSSFCell) row.getCell(iCurrent);
if (cell == null) {
values[iCurrent] = "";
cellIndex++;
continue;
} else {
switch (cell.getCellType()) {
case XSSFCell.CELL_TYPE_NUMERIC:
double value = cell.getNumericCellValue();
if (DateUtil.isCellDateFormatted(cell))
{
if (DateUtil.isValidExcelDate(value)) {
Date date = DateUtil.getJavaDate(value);
SimpleDateFormat dateFormat = new SimpleDateFormat(JAVA_TOSTRING);
values[iCurrent] = dateFormat.format(date);
} else {
// throw new
// Exception("Invalid Date value found at row number "
// +
// row.getRowNum()+" and column number "+cell.getCellNum());
}
} else {
values[iCurrent] = value + "";
}
break;
case XSSFCell.CELL_TYPE_STRING:
values[iCurrent] = cell.getStringCellValue();
break;
case XSSFCell.CELL_TYPE_BLANK:
values[iCurrent] = null;
break;
default:
values[iCurrent] = null;
}
}
}
}
m_iCurrentRow++;
return values;
}
}
public static void main(String args[]) {
XSSFWorkbook workBook = null;
File file = new File("E:\\Local\\Local2.xlsx");
InputStream excelDocumentStream = null;
try {
excelDocumentStream = new FileInputStream(file);
// POIFSFileSystem fsPOI = new POIFSFileSystem(new
// BufferedInputStream(excelDocumentStream));
BufferedInputStream bfs = new BufferedInputStream(excelDocumentStream);
workBook = new XSSFWorkbook(bfs);
seventhma parser = new seventhma(workBook.getSheetAt(0));
String[] res = null;
while ((res = parser.splitLine()) != null) {
for (int i = 0; i < res.length; i++) {
System.out.println("[" + res[i] + "]" + "\t");
}
System.out.println(res.length);
}
bfs = null;
excelDocumentStream.close();
} catch (Exception e) {
System.out.println(e);
e.printStackTrace();
}
}
}
This program gives java heap out of space and when excel sheet containing 16 columns is uploaded it gives ArrayIndexOutOfBoundException.I had increased memory of eclipse upto -Xmx1600m but that also didnt work.
You get the ArrayIndexOutOfBoundException on the values array because you use the row.getPhysicalNumberOfCells() to determine its size. But row.getPhysicalNumberOfCells() will only count the cells that are actually filled in the file.
For example if you create an Excel sheet and only fill columns A, C and F and don't touch the other cells at all row.getPhysicalNumberOfCells() will return 3.
But you are iterating over all the cells by getting row.getFirstCellNum() and row.getLastCellNum(). So values[iCurrent] will sure be out of bounds once you reach cell F.
Regarding the OutOfMemory issue:
XSSF uses a LOT of memory. Try pushing your VM to as much memory as is possible for your machine. Or if you are just reading the files then try to go with the eventmodel API instead of the usermodel (think SAX vs. DOM).
(source: apache.org)
I am reading an Excel sheet using POI's XSSF and SAX (Event API). The Excel sheet has thousands of rows of user information like user name, email, address, age, department etc.
I need to read each row from Excel, convert it into a User object and add this User object to a List of User objects.
I can read the Excel sheet successfully, but I am not sure at what point while reading I should create an instance of the User object and populate it with the data from the Excel sheet.
Below is my entire working code.
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.PrintStream;
import java.util.ArrayList;
import java.util.List;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;
import org.apache.poi.openxml4j.exceptions.OpenXML4JException;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.openxml4j.opc.PackageAccess;
import org.apache.poi.ss.usermodel.BuiltinFormats;
import org.apache.poi.ss.usermodel.DataFormatter;
import org.apache.poi.xssf.eventusermodel.ReadOnlySharedStringsTable;
import org.apache.poi.xssf.eventusermodel.XSSFReader;
import org.apache.poi.xssf.model.StylesTable;
import org.apache.poi.xssf.usermodel.XSSFCellStyle;
import org.apache.poi.xssf.usermodel.XSSFRichTextString;
import org.xml.sax.Attributes;
import org.xml.sax.ContentHandler;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.XMLReader;
import org.xml.sax.helpers.DefaultHandler;
public class ExcelSheetParser {
enum xssfDataType {
BOOL, ERROR, FORMULA, INLINESTR, SSTINDEX, NUMBER,
}
int countrows = 0;
class XSSFSheetHandler extends DefaultHandler {
/**
* Table with styles
*/
private StylesTable stylesTable;
/**
* Table with unique strings
*/
private ReadOnlySharedStringsTable sharedStringsTable;
/**
* Destination for data
*/
private final PrintStream output;
private List<?> list = new ArrayList();
private Class clazz;
/**
* Number of columns to read starting with leftmost
*/
private final int minColumnCount;
// Set when V start element is seen
private boolean vIsOpen;
// Set when cell start element is seen;
// used when cell close element is seen.
private xssfDataType nextDataType;
// Used to format numeric cell values.
private short formatIndex;
private String formatString;
private final DataFormatter formatter;
private int thisColumn = -1;
// The last column printed to the output stream
private int lastColumnNumber = -1;
// Gathers characters as they are seen.
private StringBuffer value;
/**
* Accepts objects needed while parsing.
*
* #param styles
* Table of styles
* #param strings
* Table of shared strings
* #param cols
* Minimum number of columns to show
* #param target
* Sink for output
*/
public XSSFSheetHandler(StylesTable styles,
ReadOnlySharedStringsTable strings, int cols, PrintStream target, Class clazz) {
this.stylesTable = styles;
this.sharedStringsTable = strings;
this.minColumnCount = cols;
this.output = target;
this.value = new StringBuffer();
this.nextDataType = xssfDataType.NUMBER;
this.formatter = new DataFormatter();
this.clazz = clazz;
}
public void startElement(String uri, String localName, String name,
Attributes attributes) throws SAXException {
if ("inlineStr".equals(name) || "v".equals(name)) {
vIsOpen = true;
// Clear contents cache
value.setLength(0);
}
// c => cell
else if ("c".equals(name)) {
// Get the cell reference
String r = attributes.getValue("r");
int firstDigit = -1;
for (int c = 0; c < r.length(); ++c) {
if (Character.isDigit(r.charAt(c))) {
firstDigit = c;
break;
}
}
thisColumn = nameToColumn(r.substring(0, firstDigit));
// Set up defaults.
this.nextDataType = xssfDataType.NUMBER;
this.formatIndex = -1;
this.formatString = null;
String cellType = attributes.getValue("t");
String cellStyleStr = attributes.getValue("s");
if ("b".equals(cellType))
nextDataType = xssfDataType.BOOL;
else if ("e".equals(cellType))
nextDataType = xssfDataType.ERROR;
else if ("inlineStr".equals(cellType))
nextDataType = xssfDataType.INLINESTR;
else if ("s".equals(cellType))
nextDataType = xssfDataType.SSTINDEX;
else if ("str".equals(cellType))
nextDataType = xssfDataType.FORMULA;
else if (cellStyleStr != null) {
// It's a number, but almost certainly one
// with a special style or format
int styleIndex = Integer.parseInt(cellStyleStr);
XSSFCellStyle style = stylesTable.getStyleAt(styleIndex);
this.formatIndex = style.getDataFormat();
this.formatString = style.getDataFormatString();
if (this.formatString == null)
this.formatString = BuiltinFormats
.getBuiltinFormat(this.formatIndex);
}
}
}
public void endElement(String uri, String localName, String name)
throws SAXException {
String thisStr = null;
// v => contents of a cell
if ("v".equals(name)) {
// Process the value contents as required.
// Do now, as characters() may be called more than once
switch (nextDataType) {
case BOOL:
char first = value.charAt(0);
thisStr = first == '0' ? "FALSE" : "TRUE";
break;
case ERROR:
thisStr = "\"ERROR:" + value.toString() + '"';
break;
case FORMULA:
// A formula could result in a string value,
// so always add double-quote characters.
thisStr = '"' + value.toString() + '"';
break;
case INLINESTR:
// TODO: have seen an example of this, so it's untested.
XSSFRichTextString rtsi = new XSSFRichTextString(value
.toString());
thisStr = '"' + rtsi.toString() + '"';
break;
case SSTINDEX:
String sstIndex = value.toString();
try {
int idx = Integer.parseInt(sstIndex);
XSSFRichTextString rtss = new XSSFRichTextString(
sharedStringsTable.getEntryAt(idx));
thisStr = '"' + rtss.toString() + '"';
} catch (NumberFormatException ex) {
output.println("Failed to parse SST index '" + sstIndex
+ "': " + ex.toString());
}
break;
case NUMBER:
String n = value.toString();
if (this.formatString != null)
thisStr = formatter.formatRawCellContents(Double
.parseDouble(n), this.formatIndex,
this.formatString);
else
thisStr = n;
break;
default:
thisStr = "(TODO: Unexpected type: " + nextDataType + ")";
break;
}
// Output after we've seen the string contents
// Emit commas for any fields that were missing on this row
if (lastColumnNumber == -1) {
lastColumnNumber = 0;
}
for (int i = lastColumnNumber; i < thisColumn; ++i)
output.print(',');
// Might be the empty string.
output.print(thisColumn +" : "+thisStr);
// Update column
if (thisColumn > -1)
lastColumnNumber = thisColumn;
} else if ("row".equals(name)) {
// Print out any missing commas if needed
if (minColumns > 0) {
// Columns are 0 based
if (lastColumnNumber == -1) {
lastColumnNumber = 0;
}
for (int i = lastColumnNumber; i < (this.minColumnCount); i++) {
output.print(',');
}
}
// We're onto a new row
output.println();
output.println(countrows++);
lastColumnNumber = -1;
}
}
/**
* Captures characters only if a suitable element is open. Originally
* was just "v"; extended for inlineStr also.
*/
public void characters(char[] ch, int start, int length)
throws SAXException {
if (vIsOpen)
value.append(ch, start, length);
}
/**
* Converts an Excel column name like "C" to a zero-based index.
*
* #param name
* #return Index corresponding to the specified name
*/
private int nameToColumn(String name) {
int column = -1;
for (int i = 0; i < name.length(); ++i) {
int c = name.charAt(i);
column = (column + 1) * 26 + c - 'A';
}
return column;
}
}
// /////////////////////////////////////
private OPCPackage xlsxPackage;
private int minColumns;
private PrintStream output;
private Class clazz;
/**
* Creates a new XLSX -> CSV converter
*
* #param pkg
* The XLSX package to process
* #param output
* The PrintStream to output the CSV to
* #param minColumns
* The minimum number of columns to output, or -1 for no minimum
*/
public ExcelSheetParser(OPCPackage pkg, PrintStream output, int minColumns, Class clazz) {
this.xlsxPackage = pkg;
this.output = output;
this.minColumns = minColumns;
this.clazz = clazz;
}
/**
* Parses and shows the content of one sheet using the specified styles and
* shared-strings tables.
*
* #param styles
* #param strings
* #param sheetInputStream
*/
public void processSheet(StylesTable styles,
ReadOnlySharedStringsTable strings, InputStream sheetInputStream)
throws IOException, ParserConfigurationException, SAXException {
InputSource sheetSource = new InputSource(sheetInputStream);
SAXParserFactory saxFactory = SAXParserFactory.newInstance();
SAXParser saxParser = saxFactory.newSAXParser();
XMLReader sheetParser = saxParser.getXMLReader();
ContentHandler handler = new XSSFSheetHandler(styles, strings,
this.minColumns, this.output, this.clazz);
sheetParser.setContentHandler(handler);
sheetParser.parse(sheetSource);
}
/**
* Initiates the processing of the XLS workbook file to CSV.
*
* #throws IOException
* #throws OpenXML4JException
* #throws ParserConfigurationException
* #throws SAXException
*/
public void process() throws IOException, OpenXML4JException,
ParserConfigurationException, SAXException {
ReadOnlySharedStringsTable strings = new ReadOnlySharedStringsTable(
this.xlsxPackage);
XSSFReader xssfReader = new XSSFReader(this.xlsxPackage);
StylesTable styles = xssfReader.getStylesTable();
XSSFReader.SheetIterator iter = (XSSFReader.SheetIterator) xssfReader
.getSheetsData();
int index = 0;
while (iter.hasNext()) {
InputStream stream = iter.next();
String sheetName = iter.getSheetName();
this.output.println(sheetName + " [index=" + index + "]:");
processSheet(styles, strings, stream);
stream.close();
++index;
}
}
}
What I'd probably do is start building the User object when the row starts. As you hit the cells in the row, you populate your User object. When the row ends, validate the User object, and if it's fine add it then. Because you're doing SAX parsing, you'll get the start and events for all of these, so you can attach your logic there.
I'd suggest you take a look at XLSX2CSV in the Apache POI Examples. It shows how to go about handling the different kinds of cell contents (which you'll need for populating your user object), how to do something when you reach the end of the row, as well as handling missing cells etc.
I think you can create a user object at following location in your code:
// We're onto a new row
output.println();
// Convert output to a new user object
// ....
// ....
First of all where you are saving value in thisStr variable, if this is a valid value then put this value in Map.
You should create USer object in endElement() method in
else if ("row".equals(name)) {
// use map create USER object here
}
and You can add Users object in global list and if you want to persist it then you can persist it sheet by sheet OR all data at a time.
while (iter.hasNext()) {
InputStream stream = iter.next();
String sheetName = iter.getSheetName();
this.output.println(sheetName + " [index=" + index + "]:");
processSheet(styles, strings, stream);
stream.close();
++index;
//for persisting USERS data sheet by sheet write your code here.........
}
// for persisting complete data of all sheets write your code here...
This is working for me.