Parsing XML usin SAX in java - java

I have this code to parse XML data ..
But when startelement and endelement functions are called they don't get the parameters values ( as name parameter don't have any data ) when tying to print the parameters values. it dont't have any value, why ?
I call updateArticle function in the following code
import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.ArrayList;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;
import org.xml.sax.Attributes;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.XMLReader;
import org.xml.sax.helpers.DefaultHandler;
public class RSSHandler extends DefaultHandler {
// Used to define what elements we are currently in
private boolean inItem = false;
private boolean inTitle = false;
private boolean inLink = false;
// Feed and Article objects to use for temporary storage
private Article currentArticle = new Article();
private Feed currentFeed = new Feed();
// Number of articles added so far
private int articlesAdded = 0;
private ArrayList<Article> articles = new ArrayList<Article>();
private ArrayList<Feed> feeds = new ArrayList<Feed>();
// Number of articles to download
private static final int ARTICLES_LIMIT = 15;
// The possible values for targetFlag
private static final int TARGET_FEED = 0;
private static final int TARGET_ARTICLES = 1;
// A flag to know if looking for Articles or Feed name
private int targetFlag;
public RSSHandler(){ }
public void startElement(String uri, String name, String qName,Attributes atts) {
if (name.trim().equals("title"))
inTitle = true;
else if (name.trim().equals("item"))
inItem = true;
else if (name.trim().equals("link"))
inLink = true;
System.out.println(name.trim());
}
public void endElement(String uri, String name, String qName)throws SAXException {
if (name.trim().equals("title"))
inTitle = false;
else if (name.trim().equals("item"))
inItem = false;
else if (name.trim().equals("link"))
inLink = false;
// Check if looking for feed, and if feed is complete
if (targetFlag == TARGET_FEED && currentFeed.url != null && currentFeed.title != null) {
// We know everything we need to know, so insert feed and exit
System.out.println("add current feed");
feeds.add(currentFeed);
// throw new SAXException();
}
// Check if looking for article, and if article is complete
if (targetFlag == TARGET_ARTICLES && currentArticle.url != null && currentArticle.title != null) {
Article article = new Article();
article.feedId = currentFeed.id;
article.title = currentArticle.title;
article.url = currentArticle.url;
System.out.print(article.title);
articles.add(article);
//store articles in database
currentArticle.title = null;
currentArticle.url = null;
// Lets check if we've hit our limit on number of articles
articlesAdded++;
if (articlesAdded >= ARTICLES_LIMIT)
throw new SAXException();
}
}
public ArrayList<Article> getArticles(){
return this.articles;
}
public ArrayList<Feed> getFeeds(){
return this.feeds;
}
public void characters(char ch[], int start, int length) {
String chars = (new String(ch).substring(start, start + length));
System.out.println(chars);
try {
// If not in item, then title/link refers to feed
if (!inItem) {
if (inTitle)
currentFeed.title = chars;
} else {
if (inLink)
currentArticle.url = new URL(chars);
if (inTitle)
currentArticle.title = chars;
}
} catch (MalformedURLException e) {
}
}
public void createFeed(URL url) {
try {
targetFlag = TARGET_FEED;
currentFeed.url = url;
SAXParserFactory spf = SAXParserFactory.newInstance();
SAXParser sp = spf.newSAXParser();
XMLReader xr = sp.getXMLReader();
xr.setContentHandler(this);
xr.parse(new InputSource(url.openStream()));
} catch (IOException e) {}
catch (SAXException e) {}
catch (ParserConfigurationException e) {}
}
public void updateArticles(Feed feed) {
try {
targetFlag = TARGET_ARTICLES;
currentFeed = feed;
System.out.println(feed.url.toString());
SAXParserFactory spf = SAXParserFactory.newInstance();
SAXParser sp = spf.newSAXParser();
XMLReader xr = sp.getXMLReader();
xr.setContentHandler(this);
xr.parse(new InputSource(currentFeed.url.openStream()));
} catch (IOException e) {}
catch (SAXException e) {}
catch (ParserConfigurationException e) {}
}
}

One of the most appalling design decisions in JAXP (and there were many) was that SAXParserFactory by default creates a parser that is not namespace-aware. Always call setNamespaceAware(true) on the returned parser. Otherwise, the XMLReader will call startElement using the options defined for a non-namespace-aware parser, which means it will supply the lexical QName, but not the local-name and URI.

The qName parameter contains the element name.
Here's an example although it's hard to read because of formatting.
Namespacing etc. makes a difference in where/how to get at element names.

Related

I am trying to use a url xml parse but it looks like i keep getting an empty xml

I am trying to get info from a weather API called even though when i am making the request i am getting a response, but when i am trying to get only a specific part of the response i get null response every time can someone help? here is the code for my handler :
package weathercalls;
import java.util.ArrayList;
import org.xml.sax.Attributes;
import org.xml.sax.helpers.DefaultHandler;
public class Handler extends DefaultHandler
{
// Create three array lists to store the data
public ArrayList<Integer> lows = new ArrayList<Integer>();
public ArrayList<Integer> highs = new ArrayList<Integer>();
public ArrayList<String> regions = new ArrayList<String>();
// Make sure that the code in DefaultHandler's
// constructor is called:
public Handler()
{
super();
}
/*** Below are the three methods that we are extending ***/
#Override
public void startDocument()
{
System.out.println("Start document");
}
#Override
public void endDocument()
{
System.out.println("End document");
}
// This is where all the work is happening:
#Override
public void startElement(String uri, String name, String qName, Attributes atts)
{
if(qName.compareTo("region") == 0)
{
String region = atts.getLocalName(0);
System.out.println("Day: " + region);
this.regions.add(region);
}
if(qName.compareToIgnoreCase("wind_degree") == 0)
{
int low = atts.getLength();
System.out.println("Low: " + low);
this.lows.add(low);
}
if(qName.compareToIgnoreCase("high") == 0)
{
int high = Integer.parseInt(atts.getValue(0));
System.out.println("High: " + high);
this.highs.add(high);
}
}
}
and here is my main file code :
package weathercalls;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.URL;
import java.net.URLConnection;
import org.xml.sax.InputSource;
import org.xml.sax.XMLReader;
import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;
import org.xml.sax.SAXException;
public class weatherCalls {
public static void main(String[] args) throws Exception {
//Main url
String main_url = "http://api.weatherapi.com/v1/";
//Live or Weekly forecast
String live_weather = "current.xml?key=";
//String sevendays_weather = "orecast.xml?key=";
//API Key + q
String API_Key = "c2e285e55db74def97f151114201701&q=";
//Location Setters
String location = "London";
InputSource inSource = null;
InputStream in = null;
XMLReader xr = null;
/**
URL weather = new URL(main_url + live_weather + API_Key + location);
URLConnection yc = weather.openConnection();
BufferedReader in1 = new BufferedReader(
new InputStreamReader(
yc.getInputStream()));
String inputLine;
while ((inputLine = in1.readLine()) != null)
System.out.println(inputLine);
in1.close();**/
try
{
// Turn the string into a URL object
String complete_url = main_url + live_weather + API_Key + location;
URL urlObject = new URL(complete_url);
// Open the stream (which returns an InputStream):
in = urlObject.openStream();
/** Now parse the data (the stream) that we received back ***/
// Create an XML reader
SAXParserFactory parserFactory = SAXParserFactory.newInstance();
SAXParser parser = parserFactory.newSAXParser();
xr = parser.getXMLReader();
// Tell that XML reader to use our special Google Handler
Handler ourSpecialHandler = new Handler();
xr.setContentHandler(ourSpecialHandler);
// We have an InputStream, but let's just wrap it in
// an InputSource (the SAX parser likes it that way)
inSource = new InputSource(in);
// And parse it!
xr.parse(inSource);
System.out.println(complete_url);
System.out.println(urlObject);
System.out.println(in);
System.out.println(xr);
System.out.println(inSource);
System.out.println(parser);
}
catch(IOException ioe)
{
ioe.printStackTrace();
}
catch(SAXException se)
{
se.printStackTrace();
}
}
}
and this is my console print:
Start document
Day: null
Low: 0
End document
http://api.weatherapi.com/v1/current.xml?key=c2e285e55db74def97f151114201701&q=London
http://api.weatherapi.com/v1/current.xml?key=c2e285e55db74def97f151114201701&q=London
sun.net.www.protocol.http.HttpURLConnection$HttpInputStream#2471cca7
com.sun.org.apache.xerces.internal.jaxp.SAXParserImpl$JAXPSAXParser#5fe5c6f
org.xml.sax.InputSource#6979e8cb
com.sun.org.apache.xerces.internal.jaxp.SAXParserImpl#763d9750
I think you are trying to extract the values from the XML tags and if it is the case then you are doing it wrong. Attributes object contains the attributes of a particular tag and to get the value you have to do some extra work. Similar to the start of a tag, there are separate events for the contents and the end of a tag. current_tag variable will keep track of the current tag being processed. Below is a sample code:
class Handler extends DefaultHandler {
// Create three array lists to store the data
public ArrayList<Integer> lows = new ArrayList<Integer>();
public ArrayList<Integer> highs = new ArrayList<Integer>();
public ArrayList<String> regions = new ArrayList<String>();
// Make sure that the code in DefaultHandler's
// constructor is called:
public Handler() {
super();
}
/*** Below are the three methods that we are extending ***/
#Override
public void startDocument() {
System.out.println("Start document");
}
#Override
public void endDocument() {
System.out.println("End document");
}
//Keeps track of the current tag;
String currentTag = "";
// This is where all the work is happening:
#Override
public void startElement(String uri, String name, String qName, Attributes atts) {
//Save the current tag being handled
currentTag = qName;
}
//Detect end tag
#Override
public void endElement(String uri, String localName, String qName) throws SAXException {
//Reset it
currentTag = "";
}
#Override
public void characters(char[] ch, int start, int length) throws SAXException {
//Rules based on current tag
switch (currentTag) {
case "region":
String region = String.valueOf(ch, start, length);
this.regions.add(region);
System.out.println("Day: " + region);
break;
case "wind_degree":
int low = Integer.parseInt(String.valueOf(ch, start, length));
System.out.println("Low: " + low);
this.lows.add(low);
break;
case "high":
int high = Integer.parseInt(String.valueOf(ch, start, length));
System.out.println("High: " + high);
this.highs.add(high);
break;
}
}}
NOTE: Please refrain from sharing your API keys or passwords on the internet.

How can I go back to Main method in my code, And depending on Condition?

In this program I am Reading .xlsx file. And adding cell data to vector, if vector size is less-than 12 no need to read remaining data, and i need to go main method.
How can I do in my program ?
This is my Code :
package com.read;
import java.text.SimpleDateFormat;
import java.util.Calendar;
import java.util.Vector;
import org.apache.poi.openxml4j.opc.OPCPackage;
import java.io.InputStream;
import org.apache.poi.xssf.eventusermodel.XSSFReader;
import org.apache.poi.xssf.model.SharedStringsTable;
import org.apache.poi.xssf.usermodel.XSSFRichTextString;
import org.xml.sax.Attributes;
import org.xml.sax.ContentHandler;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.XMLReader;
import org.xml.sax.helpers.DefaultHandler;
import org.xml.sax.helpers.XMLReaderFactory;
public class SendDataToDb {
public static void main(String[] args) {
SendDataToDb sd = new SendDataToDb();
try {
sd.processOneSheet("C:/Users/User/Desktop/New folder/Untitled 2.xlsx");
System.out.println("in Main method");
} catch (Exception e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
public void processOneSheet(String filename) throws Exception {
System.out.println("executing Process Method");
OPCPackage pkg = OPCPackage.open(filename);
XSSFReader r = new XSSFReader( pkg );
SharedStringsTable sst = r.getSharedStringsTable();
System.out.println("count "+sst.getCount());
XMLReader parser = fetchSheetParser(sst);
// To look up the Sheet Name / Sheet Order / rID,
// you need to process the core Workbook stream.
// Normally it's of the form rId# or rSheet#
InputStream sheet2 = r.getSheet("rId2");
System.out.println("Sheet2");
InputSource sheetSource = new InputSource(sheet2);
parser.parse(sheetSource);
sheet2.close();
}
public XMLReader fetchSheetParser(SharedStringsTable sst) throws SAXException {
//System.out.println("EXECUTING fetchSheetParser METHOD");
XMLReader parser = XMLReaderFactory.createXMLReader("org.apache.xerces.parsers.SAXParser");
ContentHandler handler = new SheetHandler(sst);
parser.setContentHandler(handler);
System.out.println("Method :fetchSheetParser");
return parser;
}
/**
* See org.xml.sax.helpers.DefaultHandler javadocs
*/
private class SheetHandler extends DefaultHandler {
private SharedStringsTable sst;
private String lastContents;
private boolean nextIsString;
Vector values = new Vector(20);
private SheetHandler(SharedStringsTable sst) {
this.sst = sst;
}
public void startElement(String uri, String localName, String name,
Attributes attributes) throws SAXException {
// c => cell
//long l = Long.valueOf(attributes.getValue("r"));
if(name.equals("c")){
columnNum++;
}
if(name.equals("c")) {
// Print the cell reference
// Figure out if the value is an index in the SST
String cellType = attributes.getValue("t");
if(cellType != null && cellType.equals("s")) {
nextIsString = true;
} else {
nextIsString = false;
}
}
// Clear contents cache
lastContents = "";
}
public void endElement(String uri, String localName, String name)
throws SAXException {
//System.out.println("Method :222222222");
// Process the last contents as required.
// Do now, as characters() may be called more than once
if(nextIsString) {
int idx = Integer.parseInt(lastContents);
lastContents = new XSSFRichTextString(sst.getEntryAt(idx)).toString();
nextIsString = false;
}
// v => contents of a cell
// Output after we've seen the string contents
if(name.equals("v")) {
values.add(lastContents);
}
if(name.equals("row")) {
System.out.println(values);
//values.setSize(50);
System.out.println(values.size()+" "+values.capacity());
//********************************************************
//I AM CHECKING CONDITION HERE, IF CONDITION IS TRUE I NEED STOP THE REMAINING PROCESS AND GO TO MAIN METHOD.
if(values.size() < 12)
values.removeAllElements();
//WHAT CODE I NEED TO WRITE HERE TO STOP THE EXECUTION OF REMAINING PROCESS AND GO TO MAIN
//***************************************************************
}
}
public void characters(char[] ch, int start, int length)
throws SAXException {
//System.out.println("method : 333333333333");
lastContents += new String(ch, start, length);
}
}
}
check the code in between lines of //******************************
and
//******************************************
You can throw a SAXException wherever you want the parsing to stop:
throw new SAXException("<Your message>")
and handle it in the main method.
After your checking, you should throw the Exception to get out from there and get it back to the main method.
throw new Exception("vector size has to be less than 12");

Getting Parent Child Hierarchy in Sax XML parser

I'm using SAX (Simple API for XML) to parse an XML document. I'm getting output for all the tags the file have, but i want it to show the tags in parent child hierarchy.
For Example:
This is my output
<dblp>
<www>
<author>
</author><title>
</title><url>
</url><year>
</year></www><inproceedings>
<month>
</month><pages>
</pages><booktitle>
</booktitle><note>
</note><cdrom>
</cdrom></inproceedings><article>
<journal>
</journal><volume>
</volume></article><ee>
</ee><book>
<publisher>
</publisher><isbn>
</isbn></book><incollection>
<crossref>
</crossref></incollection><editor>
</editor><series>
</series></dblp>
But i want it to display the output like this (it displays the children with extra spacing (that's how i want it to be))
<dblp>
<www>
<author>
</author>
<title>
</title>
<url>
</url>
<year>
</year>
</www>
<inproceedings>
<month>
</month>
<pages>
</pages>
<booktitle>
</booktitle>
<note>
</note>
<cdrom>
</cdrom>
</inproceedings>
<article>
<journal>
</journal>
<volume>
</volume>
</article>
<ee>
</ee>
<book>
<publisher>
</publisher>
<isbn>
</isbn>
</book>
<incollection>
<crossref>
</crossref>
</incollection>
<editor>
</editor>
<series>
</series>
</dblp>
But i can't figure out how can i detect that parser is parsing a parent tag or a children.
here is my code:
package com.teamincredibles.sax;
import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;
import org.xml.sax.Attributes;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.DefaultHandler;
public class Parser extends DefaultHandler {
public void getXml() {
try {
SAXParserFactory saxParserFactory = SAXParserFactory.newInstance();
SAXParser saxParser = saxParserFactory.newSAXParser();
final MySet openingTagList = new MySet();
final MySet closingTagList = new MySet();
DefaultHandler defaultHandler = new DefaultHandler() {
public void startDocument() throws SAXException {
System.out.println("Starting Parsing...\n");
}
public void endDocument() throws SAXException {
System.out.print("\n\nDone Parsing!");
}
public void startElement(String uri, String localName, String qName,
Attributes attributes) throws SAXException {
if (!openingTagList.contains(qName)) {
openingTagList.add(qName);
System.out.print("<" + qName + ">\n");
}
}
public void characters(char ch[], int start, int length)
throws SAXException {
/*for(int i=start; i<(start+length);i++){
System.out.print(ch[i]);
}*/
}
public void endElement(String uri, String localName, String qName)
throws SAXException {
if (!closingTagList.contains(qName)) {
closingTagList.add(qName);
System.out.print("</" + qName + ">");
}
}
};
saxParser.parse("xml/sample.xml", defaultHandler);
} catch (Exception e) {
e.printStackTrace();
}
}
public static void main(String args[]) {
Parser readXml = new Parser();
readXml.getXml();
}
}
You can consider a StAX implementation:
package be.duo.stax;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import javax.xml.stream.XMLInputFactory;
import javax.xml.stream.XMLStreamConstants;
import javax.xml.stream.XMLStreamException;
import javax.xml.stream.XMLStreamReader;
public class StaxExample {
public void getXml() {
InputStream is = null;
try {
is = new FileInputStream("c:\\dev\\sample.xml");
XMLInputFactory inputFactory = XMLInputFactory.newInstance();
XMLStreamReader reader = inputFactory.createXMLStreamReader(is);
parse(reader, 0);
} catch(Exception ex) {
System.out.println(ex.getMessage());
} finally {
if(is != null) {
try {
is.close();
} catch(IOException ioe) {
System.out.println(ioe.getMessage());
}
}
}
}
private void parse(XMLStreamReader reader, int depth) throws XMLStreamException {
while(true) {
if(reader.hasNext()) {
switch(reader.next()) {
case XMLStreamConstants.START_ELEMENT:
writeBeginTag(reader.getLocalName(), depth);
parse(reader, depth+1);
break;
case XMLStreamConstants.END_ELEMENT:
writeEndTag(reader.getLocalName(), depth-1);
return;
}
}
}
}
private void writeBeginTag(String tag, int depth) {
for(int i = 0; i < depth; i++) {
System.out.print(" ");
}
System.out.println("<" + tag + ">");
}
private void writeEndTag(String tag, int depth) {
for(int i = 0; i < depth; i++) {
System.out.print(" ");
}
System.out.println("</" + tag + ">");
}
public static void main(String[] args) {
StaxExample app = new StaxExample();
app.getXml();
}
}
There is an idiom for StAX with a loop like this for every tag in the XML:
private MyTagObject parseMyTag(XMLStreamReader reader, String myTag) throws XMLStreamException {
MyTagObject myTagObject = new MyTagObject();
while (true) {
switch (reader.next()) {
case XMLStreamConstants.START_ELEMENT:
String localName = reader.getLocalName();
if(localName.equals("myOtherTag1")) {
myTagObject.setMyOtherTag1(parseMyOtherTag1(reader, localName));
} else if(localName.equals("myOtherTag2")) {
myTagObject.setMyOtherTag2(parseMyOtherTag2(reader, localName));
}
// and so on
break;
case XMLStreamConstants.END_ELEMENT:
if(reader.getLocalName().equals(myTag) {
return myTagObject;
}
break;
}
}
well what have you tried? you should use a transformer found here: How to pretty print XML from Java?
Transformer transformer = TransformerFactory.newInstance().newTransformer();
transformer.setOutputProperty(OutputKeys.INDENT, "yes");
//initialize StreamResult with File object to save to file
StreamResult result = new StreamResult(new StringWriter());
DOMSource source = new DOMSource(doc);
transformer.transform(source, result);
String xmlString = result.getWriter().toString();
System.out.println(xmlString);
Almost any useful SAX application needs to maintain a stack. When startElement is called, you push information to the stack, when endElement is called, you pop the stack. Exactly what you put on the stack depends on the application; it's often the element name. For your application, you don't actually need a full stack, you only need to know its depth. You could get by with maintaining this using depth++ in startElement and depth-- in endElement(). Then you just output depth spaces before the element name.

Parsing issue in Java StAX XML

I've been searching for a solution to this problem and found nothing. I'm creating a simple chat over Java TCP/IP sockets as an assignment and I've encountered a problem creating an XML string containing the letter åäö.
Lets say I pass an XMLObject with the fields
name="Server", color="#FF0000" text="åäö"
the generated xmlstring that is returned by the parser will be
<?xml version="1.0"?><message sender="Server"><text color="#FF0000”>???</text></message>
åäö turns into ???. Any idea? Thanks!
** UPDATE**
Ok, so I just realised the problem arises when getting some text from a JTextArea.
If I run this code:
package self.edu.javaprojekt.view;
import self.edu.javaprojekt.model.XMLObject;
import self.edu.javaprojekt.model.XMLParser;
public class Test {
/**
* #param args
*/
public static void main(String[] args) {
XMLObject object = new XMLObject();
object.setColor("#FF0000");
object.setSender("Server");
object.setText("åäö");
String parsed = XMLParser.parse(object);
System.out.println(parsed);
System.out.println(XMLParser.parse(parsed));
}
}
I get the proper result:
<?xml version="1.0" encoding="UTF-8"?><message sender="Server"><text color="#FF0000">åäö</text></message>
XMLObject [sender=Server, text=åäö, color=#FF0000]
But with a text area. The text that is returned in the following method is added to an XMLObject and then sent for parsing.
public String readAndClearMessageArea() {
String text = messageTextArea.getText();
appendToPane("\n" + controller.getName() + ": " + text, controller.getColor());
messageTextArea.setText("");
return text;
}
I get the weird result. Any new ideas? Thanks!!
This is the parser
package self.edu.javaprojekt.model;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.InputStream;
import java.io.OutputStream;
import java.io.UnsupportedEncodingException;
import java.nio.ByteBuffer;
import java.nio.charset.Charset;
import java.util.Iterator;
import javax.xml.stream.XMLEventFactory;
import javax.xml.stream.XMLEventReader;
import javax.xml.stream.XMLEventWriter;
import javax.xml.stream.XMLInputFactory;
import javax.xml.stream.XMLOutputFactory;
import javax.xml.stream.XMLStreamException;
import javax.xml.stream.events.Attribute;
import javax.xml.stream.events.Characters;
import javax.xml.stream.events.EndElement;
import javax.xml.stream.events.StartDocument;
import javax.xml.stream.events.StartElement;
import javax.xml.stream.events.XMLEvent;
public class XMLParser {
public static final String MESSAGE_TAG = "message";
public static final String SENDER_TAG = "sender";
public static final String TEXT_TAG = "text";
public static final String COLOR_TAG = "color";
public static final String DISCONNECT_TAG = "disconnect";
public static final String ENCRYPTED_TAG = "encrypted";
public static final String ENCRYPTED_TYPE_TAG = "type";
public static final String KEY_REQUEST_TAG = "keyrequest";
public static String parse(XMLObject object) {
// create an XMLOutputFactory
XMLOutputFactory outputFactory = XMLOutputFactory.newInstance();
// create XMLEventWriter
XMLEventWriter eventWriter;
ByteArrayOutputStream out = new ByteArrayOutputStream();
String xmlString = "";
try {
eventWriter = outputFactory.createXMLEventWriter(out);
// create an EventFactory
XMLEventFactory eventFactory = XMLEventFactory.newInstance();
//XMLEvent end = eventFactory.createDTD("\n");
//XMLEvent tab = eventFactory.createDTD("\t");
// create and write Start Tag
StartDocument startDocument = eventFactory.createStartDocument("UTF-8");
eventWriter.add(startDocument);
// create message open tag
//eventWriter.add(end);
StartElement messageStartElement = eventFactory.createStartElement(
"", "", MESSAGE_TAG);
eventWriter.add(messageStartElement);
eventWriter.add(eventFactory.createAttribute(SENDER_TAG,
object.getSender()));
//eventWriter.add(end);
// create the text
//eventWriter.add(tab);
StartElement textStartElement = eventFactory.createStartElement("",
"", TEXT_TAG);
eventWriter.add(textStartElement);
eventWriter.add(eventFactory.createAttribute(COLOR_TAG,
object.getColor()));
Characters characters = eventFactory.createCharacters(object.getText());
eventWriter.add(characters);
//eventWriter.add(tab);
eventWriter.add(eventFactory.createEndElement("", "", TEXT_TAG));
//eventWriter.add(end);
eventWriter.add(eventFactory.createEndElement("", "", MESSAGE_TAG));
//eventWriter.add(end);
eventWriter.add(eventFactory.createEndDocument());
eventWriter.close();
xmlString = out.toString("UTF-8");
} catch (XMLStreamException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (UnsupportedEncodingException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
return xmlString;
}
#SuppressWarnings("unchecked")
public static XMLObject parse(String string) {
XMLObject object = null;
try {
// First, create a new XMLInputFactory
XMLInputFactory inputFactory = XMLInputFactory.newInstance();
// Setup a new eventReader
InputStream in;
if (!string.equals("")) {
in = new ByteArrayInputStream(Charset.forName("UTF-8")
.encode(string).array());
} else {
in = new FileInputStream("xmltest.txt");
}
XMLEventReader eventReader = inputFactory.createXMLEventReader(in);
// read the XML document
while (eventReader.hasNext()) {
XMLEvent event = eventReader.nextEvent();
if (event.isStartElement()) {
StartElement startElement = event.asStartElement();
// If we have a message
if (startElement.getName().getLocalPart()
.equals(MESSAGE_TAG)) {
object = new XMLObject();
// We read the attributes from this tag and add the
// sender
// attribute to our object
Iterator<Attribute> attributes = startElement
.getAttributes();
while (attributes.hasNext()) {
Attribute attribute = attributes.next();
if (attribute.getName().getLocalPart()
.equals(SENDER_TAG)) {
object.setSender(attribute.getValue());
}
}
}
if (startElement.getName().getLocalPart().equals(TEXT_TAG)) {
// We read the attributes from this tag and add the
// color
// attribute to our object
Iterator<Attribute> attributes = startElement
.getAttributes();
while (attributes.hasNext()) {
Attribute attribute = attributes.next();
if (attribute.getName().getLocalPart()
.equals(COLOR_TAG)) {
object.setColor(attribute.getValue());
}
}
// And then get the text
event = eventReader.nextEvent();
object.setText(event.asCharacters().getData());
continue;
}
}
// If we reach the end of the message, return the object
if (event.isEndElement()) {
EndElement endElement = event.asEndElement();
if (endElement.getName().getLocalPart().equals(MESSAGE_TAG)) {
return object;
}
}
}
} catch (XMLStreamException | FileNotFoundException e) {
e.printStackTrace();
}
return null;
}
}
This is the class holding the xml values
package self.edu.javaprojekt.model;
public class XMLObject {
private String sender, text, color;
public String getSender() {
return sender;
}
public String getText() {
return text;
}
public String getColor() {
return color;
}
public void setSender(String sender) {
this.sender = sender;
}
public void setText(String text) {
this.text = text;
}
public void setColor(String color) {
this.color = color;
}
#Override
public String toString() {
return "XMLObject [sender=" + sender + ", text=" + text + ", color="
+ color + "]";
}
}
I have seen this before, you can use the different encoding to see which one work for you. But try ISO-8859-1 or UTF-16. Not sure which encoding you need.

Exception reading XLSB File Apache POI java.io.CharConversionException

Im developing a Java aplication that reads an excel xlsb file using Apache POI, but I got an exception while reading it, my code is as follows:
import java.io.IOException;
import java.io.InputStream;
import org.apache.poi.xssf.eventusermodel.XSSFReader;
import org.apache.poi.xssf.model.SharedStringsTable;
import org.apache.poi.xssf.usermodel.XSSFRichTextString;
import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
import org.apache.poi.openxml4j.exceptions.OpenXML4JException;
import org.apache.poi.openxml4j.opc.Package;
import org.xml.sax.Attributes;
import org.xml.sax.ContentHandler;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.XMLReader;
import org.xml.sax.helpers.DefaultHandler;
import org.xml.sax.helpers.XMLReaderFactory;
import java.util.Iterator;
public class Prueba {
public static void main (String [] args){
String direccion = "C:/Documents and Settings/RSalasL/My Documents/New Folder/masstigeoct12.xlsb";
Package pkg;
try {
pkg = Package.open(direccion);
XSSFReader r = new XSSFReader(pkg);
SharedStringsTable sst = r.getSharedStringsTable();
XMLReader parser = fetchSheetParser(sst);
Iterator<InputStream> sheets = r.getSheetsData();
while(sheets.hasNext()) {
System.out.println("Processing new sheet:\n");
InputStream sheet = sheets.next();
InputSource sheetSource = new InputSource(sheet);
parser.parse(sheetSource);
sheet.close();
System.out.println("");
}
} catch (InvalidFormatException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (OpenXML4JException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (SAXException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
public void processAllSheets(String filename) throws Exception {
Package pkg = Package.open(filename);
XSSFReader r = new XSSFReader( pkg );
SharedStringsTable sst = r.getSharedStringsTable();
XMLReader parser = fetchSheetParser(sst);
Iterator<InputStream> sheets = r.getSheetsData();
while(sheets.hasNext()) {
System.out.println("Processing new sheet:\n");
InputStream sheet = sheets.next();
InputSource sheetSource = new InputSource(sheet);
parser.parse(sheetSource);
sheet.close();
System.out.println("");
}
}
public static XMLReader fetchSheetParser(SharedStringsTable sst) throws SAXException {
XMLReader parser =
XMLReaderFactory.createXMLReader(
"org.apache.xerces.parsers.SAXParser"
);
ContentHandler handler = new SheetHandler(sst);
parser.setContentHandler(handler);
return parser;
}
private static class SheetHandler extends DefaultHandler {
private SharedStringsTable sst;
private String lastContents;
private boolean nextIsString;
private SheetHandler(SharedStringsTable sst) {
this.sst = sst;
}
public void startElement(String uri, String localName, String name,
Attributes attributes) throws SAXException {
// c => cell
if(name.equals("c")) {
// Print the cell reference
System.out.print(attributes.getValue("r") + " - ");
// Figure out if the value is an index in the SST
String cellType = attributes.getValue("t");
if(cellType != null && cellType.equals("s")) {
nextIsString = true;
} else {
nextIsString = false;
}
}
// Clear contents cache
lastContents = "";
}
public void endElement(String uri, String localName, String name)
throws SAXException {
// Process the last contents as required.
// Do now, as characters() may be called more than once
if(nextIsString) {
int idx = Integer.parseInt(lastContents);
lastContents = new XSSFRichTextString(sst.getEntryAt(idx)).toString();
nextIsString = false;
}
// v => contents of a cell
// Output after we've seen the string contents
if(name.equals("v")) {
System.out.println(lastContents);
}
}
public void characters(char[] ch, int start, int length)
throws SAXException {
lastContents += new String(ch, start, length);
}
}
}
And the exception is this:
java.io.CharConversionException: Characters larger than 4 bytes are not supported: byte 0x83 implies a length of more than 4 bytes
at org.apache.xmlbeans.impl.piccolo.xml.UTF8XMLDecoder.decode(UTF8XMLDecoder.java:162)
at org.apache.xmlbeans.impl.piccolo.xml.XMLStreamReader$FastStreamDecoder.read(XMLStreamReader.java:762)
at org.apache.xmlbeans.impl.piccolo.xml.XMLStreamReader.read(XMLStreamReader.java:162)
at org.apache.xmlbeans.impl.piccolo.xml.PiccoloLexer.yy_refill(PiccoloLexer.java:3474)
at org.apache.xmlbeans.impl.piccolo.xml.PiccoloLexer.yylex(PiccoloLexer.java:3958)
at org.apache.xmlbeans.impl.piccolo.xml.Piccolo.yylex(Piccolo.java:1290)
at org.apache.xmlbeans.impl.piccolo.xml.Piccolo.yyparse(Piccolo.java:1400)
at org.apache.xmlbeans.impl.piccolo.xml.Piccolo.parse(Piccolo.java:714)
at org.apache.xmlbeans.impl.store.Locale$SaxLoader.load(Locale.java:3439)
at org.apache.xmlbeans.impl.store.Locale.parseToXmlObject(Locale.java:1270)
at org.apache.xmlbeans.impl.store.Locale.parseToXmlObject(Locale.java:1257)
at org.apache.xmlbeans.impl.schema.SchemaTypeLoaderBase.parse(SchemaTypeLoaderBase.java:345)
at org.openxmlformats.schemas.spreadsheetml.x2006.main.WorkbookDocument$Factory.parse(Unknown Source)
at org.apache.poi.xssf.eventusermodel.XSSFReader$SheetIterator.<init>(XSSFReader.java:207)
at org.apache.poi.xssf.eventusermodel.XSSFReader$SheetIterator.<init>(XSSFReader.java:166)
at org.apache.poi.xssf.eventusermodel.XSSFReader.getSheetsData(XSSFReader.java:160)
at EDManager.Prueba.main(Prueba.java:36)
The file has 2 sheets, one with 329 rows and 3 columns and the other with 566 rows and 3 columns, I just want to read the file to find if a value is in the second sheet.
Apache POI doesn't support the .xlsb file format for anything other than text extraction. Apache POI will happily provide full read or write support .xls files (via HSSF) and .xlsx files (via XSSF), or both (via the common SS UserModel interface).
However, the .xlsb format is not supported for generatl operations - it's a very odd hybrid between the two, and the large amount of work involved has meant no-one has been willing to volunteer/sponsor the work required.
What Apache POI does offer for .xlsb, as of Apache POI 3.15 beta3 / 3.16, is a text extractor for .xlsb files - XSSFBEventBasedExcelExtractor. You can use that to get the text out of your file, or with a few tweaks convert it to something like CSV
For full read/write support, you'll need to convert your file to either .xls (if it doesn't have very large numbers of rows/columns), or .xlsx (if it does). If you're really really keen to help though, you could review the source code for XSSFBEventBasedExcelExtractor, then have a go at contributing patches to add full support to POI for it!
(Additionally, I think from the exception that your particular .xlsb file is partly corrupt, but even if it wasn't it still wouldn't be supported by Apache POI for anything other than text extraction, sorry)
I have tried reading XLSB file using Apache POI and it is successful. Below is the code snippet I have used.
import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
import org.apache.poi.openxml4j.exceptions.OpenXML4JException;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.ss.usermodel.DataFormatter;
import org.apache.poi.xssf.binary.XSSFBSharedStringsTable;
import org.apache.poi.xssf.binary.XSSFBSheetHandler;
import org.apache.poi.xssf.binary.XSSFBStylesTable;
import org.apache.poi.xssf.eventusermodel.XSSFBReader;
import org.xml.sax.SAXException;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.List;
public class ApachePoiXLSB {
public static void main (String [] args){
String xlsbFileName = "test.xlsb";
OPCPackage pkg;
try {
pkg = OPCPackage.open(xlsbFileName);
XSSFBReader r = new XSSFBReader(pkg);
XSSFBSharedStringsTable sst = new XSSFBSharedStringsTable(pkg);
XSSFBStylesTable xssfbStylesTable = r.getXSSFBStylesTable();
XSSFBReader.SheetIterator it = (XSSFBReader.SheetIterator) r.getSheetsData();
List<String> sheetTexts = new ArrayList<>();
while (it.hasNext()) {
InputStream is = it.next();
String name = it.getSheetName();
TestSheetHandler testSheetHandler = new TestSheetHandler();
testSheetHandler.startSheet(name);
XSSFBSheetHandler sheetHandler = new XSSFBSheetHandler(is,
xssfbStylesTable,
it.getXSSFBSheetComments(),
sst, testSheetHandler,
new DataFormatter(),
false);
sheetHandler.parse();
testSheetHandler.endSheet();
sheetTexts.add(testSheetHandler.toString());
}
System.out.println("output text:"+sheetTexts);
} catch (InvalidFormatException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (OpenXML4JException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (SAXException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}
import org.apache.poi.xssf.eventusermodel.XSSFSheetXMLHandler;
import org.apache.poi.xssf.usermodel.XSSFComment;
class TestSheetHandler implements XSSFSheetXMLHandler.SheetContentsHandler {
private final StringBuilder sb = new StringBuilder();
public void startSheet(String sheetName) {
sb.append("<sheet name=\"").append(sheetName).append(">");
}
public void endSheet() {
sb.append("</sheet>");
}
#Override
public void startRow(int rowNum) {
sb.append("\n<tr num=\"").append(rowNum).append(">");
}
#Override
public void endRow(int rowNum) {
sb.append("\n</tr num=\"").append(rowNum).append(">");
}
#Override
public void cell(String cellReference, String formattedValue, XSSFComment comment) {
formattedValue = (formattedValue == null) ? "" : formattedValue;
if (comment == null) {
sb.append("\n\t<td ref=\"").append(cellReference).append("\">").append(formattedValue).append("</td>");
} else {
sb.append("\n\t<td ref=\"").append(cellReference).append("\">")
.append(formattedValue)
.append("<span type=\"comment\" author=\"")
.append(comment.getAuthor()).append("\">")
.append(comment.getString().toString().trim()).append("</span>")
.append("</td>");
}
}
#Override
public void headerFooter(String text, boolean isHeader, String tagName) {
if (isHeader) {
sb.append("<header tagName=\"").append(tagName).append("\">").append(text).append("</header>");
} else {
sb.append("<footer tagName=\"").append(tagName).append("\">").append(text).append("</footer>");
}
}
#Override
public String toString() {
return sb.toString();
}
}
I have a implementation using the smartxls, and my code firts convert the xlsb to xlsx and after can use ApachePoi. The next method receive a java.io.File and verify if its extension is xlsb and convert this to xlsx and replace file whit the new. This works for me.
private void processXLSBFile(File file) {
WorkBook workBook = new WorkBook();
String filePath = file.getAbsolutePath();
if (FilenameUtils.getExtension(filePath).equalsIgnoreCase((Static.XLSB_EXT))) {
try {
workBook.readXLSB(new java.io.FileInputStream(filePath));
filePath = filePath.replaceAll("(?i)".concat(Static.XLSB),
Static.XLSX_EXT.toLowerCase());
workBook.writeXLSX(new java.io.FileOutputStream(filePath));
final File xlsb = new File(filePath);
file = xlsb;
} catch (Exception e) {
logger.error(e.getMessage(), e);
MensajesJSFUtil
.mostrarMensajeNegocio(new GTMException(e, ClaveMensaje.COMANDAS_ADJUNTAR_XLSBFILE_READERROR));
}
}
}

Categories