XML parsing fails on Blackberry - java

I am using the following code for parsing an XML file. But I don't get any response. Can anyone help?
I am also getting a warning when I open a connection:
"Warning!: Invocation of questionable method: java.lang.String.() found"`?
public static void main(String arg[]){
XML_Parsing_Sample application = new XML_Parsing_Sample();
//create a new instance of the application
//and start the application on the event thread
application.enterEventDispatcher();
}
public XML_Parsing_Sample() {
_screen.setTitle("XML Parsing");//setting title
_screen.add(new RichTextField("Requesting....."));
_screen.add(new SeparatorField());
pushScreen(_screen); // creating a screen
//creating a connection thread to run in the background
_connectionthread = new Connection();
_connectionthread.start();//starting the thread operation
}
public void updateField(String node, String element){
//receiving the parsed node and its value from the thread
//and updating it here
//so it can be displayed on the screen
String title="Title";
_screen.add(new RichTextField(node+" : "+element));
if(node.equals(title)){
_screen.add(new SeparatorField());
}
}
private class Connection extends Thread{
public Connection(){
super();
}
public void run(){
// define variables later used for parsing
Document doc;
StreamConnection conn;
try{
//providing the location of the XML file,
//your address might be different
conn=(StreamConnection)Connector.open
("http://www.w3schools.com/xml/note.xml",Connector.READ);
//next few lines creates variables to open a
//stream, parse it, collect XML data and
//extract the data which is required.
//In this case they are elements,
//node and the values of an element
DocumentBuilderFactory docBuilderFactory
= DocumentBuilderFactory. newInstance();
DocumentBuilder docBuilder
= docBuilderFactory.newDocumentBuilder();
docBuilder.isValidating();
doc = docBuilder.parse(conn.openInputStream());
doc.getDocumentElement ().normalize ();
NodeList list=doc.getElementsByTagName("*");
_node=new String();
_element = new String();
//this "for" loop is used to parse through the
//XML document and extract all elements and their
//value, so they can be displayed on the device
for (int i=0;i<list.getLength();i++){
Node value=list.item(i).
getChildNodes().item(0);
_node=list.item(i).getNodeName();
_element=value.getNodeValue();
updateField(_node,_element);
}//end for
}//end try
//will catch any exception thrown by the XML parser
catch (Exception e){
System.out.println(e.toString());
}
}//end connection function
}// end connection class

You are probably timing out.
Try opening the connection as HTTP connection instead, and use the new ConnectionFactory class to get rid of annoying suffixes.

public InputStream getResult(String url) {
System.out.println("in get result");
HttpConnection httpConn;
httpConn = (HttpConnection) getHTTPConnection(url);
try {
if (httpConn != null) {
final int iResponseCode = httpConn.getResponseCode();
if (iResponseCode == httpConn.HTTP_OK) {
_inputStream = httpConn.openInputStream();
byte[] data = new byte[20];
int len = 0;
int size = 0;
ByteArrayOutputStream baos = new ByteArrayOutputStream();
try {
while (-1 != (len = _inputStream.read(data))) {
baos.write(data, 0, len);
size += len;
}
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
InputStream is2 = new ByteArrayInputStream(
baos.toByteArray());
return is2;
} else {
return null;
}
}
} catch (IOException e) {
System.err.println("Caught IOException: " + e.getMessage());
}
try {
httpConn.close();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
return null;
}
To download xml
To parse the downloaded xml.
public Document XMLfromInputStream(InputStream xml) {
Document doc = null;
DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
dbf.setAllowUndefinedNamespaces(true);
dbf.setCoalescing(true);
dbf.setExpandEntityReferences(true);
try {
DocumentBuilder db;
db = dbf.newDocumentBuilder();
InputSource _source = new InputSource();
_source.setEncoding("UTF-8");
_source.setByteStream(xml);
db.setAllowUndefinedNamespaces(true);
doc = db.parse(_source);
} catch (SAXException e) {
System.out.println("Wrong XML file structure: " + e.getMessage());
return null;
} catch (IOException e) {
System.out.println("I/O exeption: " + e.getMessage());
return null;
} catch (net.rim.device.api.xml.parsers.ParserConfigurationException e) {
// TODO Auto-generated catch block
e.printStackTrace();
return null;
} finally {
}
return doc;
}
Then parse this document in to element
Element rootElement = document.getDocumentElement();
rootElement.normalize();
displayNode( rootElement, 0 );
private void displayNode( Node node, int depth )
{
if ( node.getNodeType() == Node.ELEMENT_NODE )
{
StringBuffer buffer = new StringBuffer();
indentStringBuffer( buffer, depth );
NodeList childNodes = node.getChildNodes();
int numChildren = childNodes.getLength();
Node firstChild = childNodes.item( 0 );
// If the node has only one child and that child is a Text node, then it's of
// the form <Element>Text</Element>, so print 'Element = "Text"'.
if ( numChildren == 1 && firstChild.getNodeType() == Node.TEXT_NODE )
{
buffer.append( node.getNodeName() ).append( " = \"" ).append( firstChild.getNodeValue() ).append( '"' );
add( new RichTextField( buffer.toString() ) );
}
else
{
// The node either has > 1 children, or it has at least one Element node child.
// Either way, its children have to be visited. Print the name of the element
// and recurse.
buffer.append( node.getNodeName() );
add( new RichTextField( buffer.toString() ) );
// Recursively visit all this node's children.
for ( int i = 0; i < numChildren; ++i )
{
displayNode( childNodes.item( i ), depth + 1 );
}
}
}
else
{
// Node is not an Element node, so we know it is a Text node. Make sure it is
// not an "empty" Text node (normalize() doesn't consider a Text node consisting
// of only newlines and spaces to be "empty"). If it is not empty, print it.
String nodeValue = node.getNodeValue();
if ( nodeValue.trim().length() != 0 )
{
StringBuffer buffer = new StringBuffer();
indentStringBuffer( buffer, depth );
buffer.append( '"' ).append( nodeValue ).append( '"' );
add( new RichTextField( buffer.toString() ) );
}
}
}
/**
* Adds leading spaces to the provided string buffer according to the depth of
* the node it represents.
*
* #param buffer The string buffer to add leading spaces to.
* #param depth The depth of the node the string buffer represents.
*/
private static void indentStringBuffer( StringBuffer buffer, int depth )
{
int indent = depth * _tab;
for ( int i = 0; i < indent; ++i )
{
buffer.append( ' ' );
}
}
You can download and parse the xml using above sample.

NamedNodeMap attributes = (NamedNodeMap)value.getAttributes();
for (int g = 0; g < attributes.getLength(); g++) {
Attr attribute = (Attr)attributes.item(g);
System.out.println(" Attribute: " + attribute.getName() +
" with value " +attribute.getValue());
Above code fetches attributes and its values.. cheers

Related

org.xml.sax.SAXParseException The markup in the document following the root element must be well-formed

I know there are some similar threads out there but none of them are any help to me, I'm trying to create XML file via Java but I'm seeing this error constantly on this line
doc = db.parse(is);
, can someone please tell me where am I going wrong ?
Error:-
Severe: [Fatal Error] :2:2: The markup in the document following the root element must be well-formed.
Severe: org.xml.sax.SAXParseException; lineNumber: 2; columnNumber: 2; The markup in the document following the root element must be well-formed.
at com.sun.org.apache.xerces.internal.parsers.DOMParser.parse(DOMParser.java:257)
at com.sun.org.apache.xerces.internal.jaxp.DocumentBuilderImpl.parse(DocumentBuilderImpl.java:348)
at com.digivote.digivote.XmlServlet.doPost(XmlServlet.java:194)
My code:
#Override
protected void doPost(HttpServletRequest request, HttpServletResponse response)
throws ServletException, IOException {
response.setContentType("text/html");
String level = request.getParameter("level");
String add = request.getParameter("add");
String addCat = request.getParameter("addCat");
String addQuestions = request.getParameter("addQuestion");
String add_multiChoice = request.getParameter("add_multiChoice");
try
{
DocumentBuilder db = DocumentBuilderFactory.newInstance().newDocumentBuilder();
// root elements
Document doc = db.newDocument();
Element rootElement = doc.createElement("XML");
doc.appendChild(rootElement);
/***** GeneratedSet ****/
Element genSet = doc.createElement("GeneratedSet");
rootElement.appendChild(genSet);
Attr genDate = doc.createAttribute("genDate");
genDate.setValue("11/01/2018");
genSet.setAttributeNode(genDate);
Attr genBy = doc.createAttribute("genBy");
genBy.setValue("Tom Jones");
genSet.setAttributeNode(genBy);
Attr lev = doc.createAttribute("level");
lev.setValue(level);
genSet.setAttributeNode(lev);
/****** Questions *****/
Element questions = doc.createElement("Questions");
genSet.appendChild(questions);
Attr noq = doc.createAttribute("Questions");
noq.setValue(addQuestions);
questions.setAttributeNode(noq);
Attr type = doc.createAttribute("Type");
type.setValue(add);
questions.setAttributeNode(type);
/****** Question *****/
int numOfQues = Integer.parseInt(addQuestions) ;
int leve = Integer.parseInt(level) ;
String retXML = null;
if(add != null){
InputSource is = new InputSource();
genMaths gm = new genMaths();
retXML = gm.additionQ(leve, 2, numOfQues, 0);
is.setCharacterStream(new StringReader(retXML));
doc = db.parse(is);
//doc = db.parse(new InputSource(new StringReader(is.toString())));
}
TransformerFactory transformerFactory = TransformerFactory.newInstance();
Transformer transformer = transformerFactory.newTransformer();
DOMSource source = new DOMSource(doc);
StreamResult result = new StreamResult(new File("C:\\Users\\xxxx\\Documents\\XML Files\\RandomQuestions.xml"));
transformer.transform(source, result);
System.out.println("File saved!");
} catch (ParserConfigurationException | TransformerException pce) {} catch (SAXException ex) {
Logger.getLogger(XmlServlet.class.getName()).log(Level.SEVERE, null, ex);
}
}
}
retXML:-
public String forwardQ ( int level, int operands, int operation, int questions, int type )
{
StringBuffer retXML = new StringBuffer ();
float operand = 0;
float answer = 0;
int minVal = (int)getLevelMin(level);
int maxVal = (int)getLevelMax(level);
String addSign = "";
for ( int j = 0; j < questions; j++ )
{
retXML.append("<Question Text='");
addSign = "";
if ( operation == 1 )
{
answer = 0;
}
else
{
answer = 1;
}
for ( int i = 0; i < operands; i++ )
{
operand = randNum(minVal, maxVal);
if ( operation == 1 )
{
answer = answer + operand;
}
else
{
answer = answer * operand;
}
retXML.append( addSign + operand );
if ( operation == 1 )
{
addSign = " + ";
}
else
{
addSign = " x ";
}
}
if ( type == 1 )
{
retXML.append("' Answer='"+ answer + "'>");
}
else
{
retXML.append(createMultiChoice(answer));
}
retXML.append("</Question>" + System.getProperty("line.separator"));
}
return ( retXML.toString() );
}
public String additionQ ( int level, int operands, int questions, int type )
{
return ( forwardQ ( level, operands, 1, questions, type ) );
}
From your code, it looks like your XML will end up as
<Question ...></Question>
<Question ...></Question>
You cannot have multiple root elements in an XML document. You could try wrapping all your Question elements in a root element, like
<Questions>
<Question ...></Question>
<Question ...></Question>
</Questions>
Just ensure you only have one element as the root of the contents you are converting

Replace Data to word Document In Alfresco using java code excluding junk characters

I am doing Bulk Upload Task in Alfresco.
Before this i created custom action to call java code, i also successfully read data from excel sheet, and i found node reference of target document as well as source Document. Using that node reference i am also able to create new multiple Documents.
Now My requirement is, I want to replace Excel Data in that newly created Document. I tried to replace it, But It replacing the String only in First line of document, and it also deleting Rest of the existing contents inside newly created document. I have written Below code for this.
In below code i am first simply trying to replace some hard coded data to the Document.
But My requirement is i want to replace the data inside document which i already read from excel file.
Java Code:
public class MoveReplacedActionExecuter extends ActionExecuterAbstractBase {
InputStream is;
Cell cell = null;
public static final String NAME = "move-replaced";
private FileFolderService fileFolderService;
private NodeService nodeService;
private ContentService contentService;
private SearchService searchService;
#Override
protected void addParameterDefinitions(List < ParameterDefinition > paramList) {
}
public void executeImpl(Action ruleAction, NodeRef actionedUponNodeRef) {
try {
ContentReader contentReader = contentService.getReader(actionedUponNodeRef, ContentModel.PROP_CONTENT);
is = contentReader.getContentInputStream();
} catch (NullPointerException ne) {
System.out.println("Null Pointer Exception" + ne);
}
try {
Workbook workbook = new XSSFWorkbook(is);
Sheet firstSheet = workbook.getSheetAt(0);
Iterator < Row > iterator = firstSheet.rowIterator();
while (iterator.hasNext()) {
ArrayList < String > al = new ArrayList < > ();
System.out.println("");
Row nextRow = iterator.next();
Iterator < Cell > cellIterator = nextRow.cellIterator();
while (cellIterator.hasNext()) {
cell = cellIterator.next();
switch (cell.getCellType()) {
case Cell.CELL_TYPE_STRING:
System.out.print("\t" + cell.getStringCellValue());
al.add(cell.getStringCellValue());
break;
case Cell.CELL_TYPE_BOOLEAN:
System.out.print("\t" + cell.getBooleanCellValue());
al.add(String.valueOf(cell.getBooleanCellValue()));
break;
case Cell.CELL_TYPE_NUMERIC:
System.out.print("\t" + cell.getNumericCellValue());
al.add(String.valueOf(cell.getNumericCellValue()));
break;
}
}
}
is.close();
} catch (Exception e) {
e.printStackTrace();
}
String query = "PATH:\"/app:company_home/cm:Dipak/cm:OfferLetterTemplate.doc\"";
SearchParameters sp = new SearchParameters();
StoreRef storeRef = new StoreRef(StoreRef.PROTOCOL_WORKSPACE, "SpacesStore");
sp.addStore(storeRef);
sp.setLanguage(SearchService.LANGUAGE_LUCENE);
sp.setQuery(query);
ResultSet resultSet = searchService.query(sp);
System.out.println("Result Set" + resultSet.length());
NodeRef sourceNodeRef = null;
for (ResultSetRow row: resultSet) {
NodeRef currentNodeRef = row.getNodeRef();
sourceNodeRef = currentNodeRef;
System.out.println(currentNodeRef.toString());
}
NodeRef n = new NodeRef("workspace://SpacesStore/78342318-37b8-4b42-aadc-bb0ed5d413d9");
try {
org.alfresco.service.cmr.model.FileInfo fi = fileFolderService.copy(sourceNodeRef, n, "JustCreated" + Math.random() + ".doc");
NodeRef newNode = fi.getNodeRef();
QName TYPE_AUTHORTY = QName.createQName("sunpharma.hr.model", "hrdoctype");
nodeService.setType(newNode, TYPE_AUTHORTY);
ContentReader contentReader1 = contentService.getReader(newNode, ContentModel.PROP_CONTENT);
InputStream is2 = contentReader1.getContentInputStream();
POIFSFileSystem fs = new POIFSFileSystem(is2);
HWPFDocument doc = new HWPFDocument(fs);
doc = replaceText1(doc, "Company", "Datamatics");
ContentWriter writerDoc = contentService.getWriter(newNode, ContentModel.PROP_CONTENT, true);
writerDoc.putContent(doc.getDocumentText());
} catch (FileExistsException | FileNotFoundException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}
private static HWPFDocument replaceText1(HWPFDocument doc, String findText, String replaceText) {
System.out.println("In the method replacetext" + replaceText);
Range r1 = doc.getRange();
System.out.println("Range of Doc : " + r1);
for (int i = 0; i < r1.numSections(); ++i) {
Section s = r1.getSection(i);
for (int x = 0; x < s.numParagraphs(); x++) {
Paragraph p = s.getParagraph(x);
for (int z = 0; z < p.numCharacterRuns(); z++) {
CharacterRun run = p.getCharacterRun(z);
String text = run.text();
if (text.contains(findText)) {
run.replaceText(findText, replaceText);
} else {
System.out.println("NO text found");
}
}
}
}
return doc;
}
public void setFileFolderService(FileFolderService fileFolderService) {
this.fileFolderService = fileFolderService;
}
public void setNodeService(NodeService nodeService) {
this.nodeService = nodeService;
}
public void setContentService(ContentService contentService) {
this.contentService = contentService;
}
public void setSearchService(SearchService searchService) {
this.searchService = searchService;
}
}
Its not possible to take direct file stream object in alfresco.
so i created one file at local drive, in background i performed all replacement operations. and after that i read all data using file input stream object. and later i used file that stream with node.
and it gave me my desired output. :)

Get text from xml - Android

I have this xml online http://64.182.231.116/~spencerf/test.xml
And I am trying to get the two text values Assorted Cereal and Yogurt Parfait (2). Here is how I am currently parsing it, and I get the values I want as well as all the values under then, all the numbers and such, but I just want to get the names, and I am struggling how to just do that, any help or guidance would be great. Here is my code:
String currentDay = "";
String currentMeal = "";
String counter = "";
String icon1 = "";
LinkedHashMap<String, List<String>> itemsByCounter = new LinkedHashMap<String , List<String>>();
List<String> items = new ArrayList<String>();
while (eventType != XmlResourceParser.END_DOCUMENT) {
String tagName = xmlData.getName();
switch (eventType) {
case XmlResourceParser.START_TAG:
if (tagName.equalsIgnoreCase("day")) {
currentDay = xmlData.getAttributeValue(null, "name");
}
if (tagName.equalsIgnoreCase("meal")) {
currentMeal = xmlData.getAttributeValue(null, "name");
}
if (tagName.equalsIgnoreCase("counter") && currentDay.equalsIgnoreCase(day) && currentMeal.equalsIgnoreCase(meal)) {
counter = xmlData.getAttributeValue(null, "name");
}
if (tagName.equalsIgnoreCase("name") && counter != null && currentDay.equalsIgnoreCase(day) && currentMeal.equalsIgnoreCase(meal)) {
icon1 = xmlData.getAttributeValue(null, "icon1");
Log.i(TAG, "icon1: " + icon1);
}
break;
case XmlResourceParser.TEXT:
if (currentDay.equalsIgnoreCase(day) && currentMeal.equalsIgnoreCase(meal) && counter !=(null)) {
if (xmlData.getText().trim().length() > 0) {
//Here gets everything but I just want 2 names
Log.i(TAG, "data: " + xmlData.getText());
items.add(xmlData.getText().trim().replaceAll(" +", " "));
}
}
break;
case XmlPullParser.END_TAG:
if (tagName.equalsIgnoreCase("counter")) {
if (items.size() > 0) {
itemsByCounter.put(counter, items);
items = new ArrayList<String>();
recordsFound++;
}
}
break;
}
eventType = xmlData.next();
So as you can see in the comment in my code I am getting everything under the name tag, back but I just want the value of the name, and not all the other stuff.
You will need to store the name in its own child element (meaning put an end tag before the nutritional facts). Under each dish, you could have this:
<name>Assorted Cereal</name>
<nutrition_facts> ... </nutrition_facts>
Not tested but could do it along these lines:
List<Nutrition_Facts> nutrition_facts = new ArrayList<Nutrition_Facts>();
XMLDOMParser parser = new XMLDOMParser();
AssetManager manager = context.getAssets();
InputStream stream;
try {
stream = manager.open("test.xml"); //need full path to your file here - mine is stored in assets folder
Document doc = parser.getDocument(stream);
}catch(IOException ex){
System.out.printf("Error reading map %s\n", ex.getMessage());
}
NodeList nodeList = doc.getElementsByTagName("nutrition_facts");
for (int i = 0; i < nodeList.getLength(); i++) {
Element e = (Element) nodeList.item(i);
String name;
if (elementName.equals(e.getAttribute("Assorted Cereal"))){
name = e.getAttribute("name");
//do some stuff
}
}
//XMLDOMParser Class
public class XMLDOMParser {
//Returns the entire XML document
public Document getDocument(InputStream inputStream) {
Document document = null;
DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
try {
DocumentBuilder db = factory.newDocumentBuilder();
InputSource inputSource = new InputSource(inputStream);
document = db.parse(inputSource);
} catch (ParserConfigurationException e) {
Log.e("Error: ", e.getMessage());
return null;
} catch (SAXException e) {
Log.e("Error: ", e.getMessage());
return null;
} catch (IOException e) {
Log.e("Error: ", e.getMessage());
return null;
}
return document;
}
/*
* I take a XML element and the tag name, look for the tag and get
* the text content i.e for <employee><name>Kumar</name></employee>
* XML snippet if the Element points to employee node and tagName
* is name I will return Kumar. Calls the private method
* getTextNodeValue(node) which returns the text value, say in our
* example Kumar. */
public String getValue(Element item, String name) {
NodeList nodes = item.getElementsByTagName(name);
return this.getTextNodeValue(nodes.item(0));
}
private final String getTextNodeValue(Node node) {
Node child;
if (node != null) {
if (node.hasChildNodes()) {
child = node.getFirstChild();
while(child != null) {
if (child.getNodeType() == Node.TEXT_NODE) {
return child.getNodeValue();
}
child = child.getNextSibling();
}
}
}
return "";
}
}

Java - Print any detail of HTML element

I am fairly new to Java, at least regarding interacting with web. Anyway, I am making an app that has to grab HTML out of a webpage, and parse it.
By parsing I mean finding out what the element has in the 'class="" ' attribute, or in any attribute available in the element. Also finding out what is inside the element. This is where I have searched so far: http://www.java2s.com/Code/Java/Development-Class/HTMLDocumentElementIteratorExample.htm
I found very little regarding this.
I know there are lots of Java parsers out there. I have tried JTidy, and the default Swing parser. I would prefer to use the built-in-to-java parser.
Here is what i have so far (this is just method for testing how it works, proper code will come when i know what & how. Also connection is a URLConnection variable, and connection has been established before this method gets called. < just to clarify):
public void parse() {
try {
InputStream is = connection.getInputStream();
InputStreamReader isr = new InputStreamReader(is);
BufferedReader br = new BufferedReader(isr);
String line;
while ((line = br.readLine()) != null) {
System.out.println(line);
}
// copied from http://www.java2s.com/Code/Java/Development-Class/HTMLDocumentElementIteratorExample.htm
HTMLEditorKit htmlKit = new HTMLEditorKit();
HTMLDocument htmlDoc = (HTMLDocument) htmlKit.createDefaultDocument();
HTMLEditorKit.Parser parser = new ParserDelegator();
HTMLEditorKit.ParserCallback callback = htmlDoc.getReader(0);
parser.parse(br, callback, true);
// Parse
ElementIterator iterator = new ElementIterator(htmlDoc);
Element element;
while ((element = iterator.next()) != null) {
AttributeSet attributes = element.getAttributes();
Object name = attributes.getAttribute(StyleConstants.NameAttribute);
System.out.println ("All attrs of " + name + ": " + attributes.getAttributeNames().toString());
Enumeration e = attributes.getAttributeNames();
Object obj;
while (e.hasMoreElements()) {
obj = e.nextElement();
System.out.println (obj.toString());
System.out.println ("attribute of class = " + attributes.containsAttribute("class", "login"));
}
if ((name instanceof HTML.Tag)
&& ((name == HTML.Tag.H1) || (name == HTML.Tag.H2) || (name == HTML.Tag.H3))) {
// Build up content text as it may be within multiple elements
StringBuffer text = new StringBuffer();
int count = element.getElementCount();
for (int i = 0; i < count; i++) {
Element child = element.getElement(i);
AttributeSet childAttributes = child.getAttributes();
if (childAttributes.getAttribute(StyleConstants.NameAttribute) == HTML.Tag.CONTENT) {
int startOffset = child.getStartOffset();
int endOffset = child.getEndOffset();
int length = endOffset - startOffset;
text.append(htmlDoc.getText(startOffset, length));
}
}
System.out.println(name + ": " + text.toString());
}
}
} catch (IOException e) {
System.out.println ("Exception?1 " + e.getMessage() );
} catch (Exception e) {
System.out.println ("Exception? " + e.getMessage());
}
}
The question is: How do I get any element's attributes and print them out?
This code is needlessly verbose. I would suggest using a better library like Jsoup. Here's some code to find out all the attributes of all divs on this page.
String url = "http://stackoverflow.com/questions/7311269"
+ "/java-print-any-detail-of-html-element";
Document doc = Jsoup.connect(url).get();
Elements divs = doc.select("div");
int i = 0;
for (Element div : divs) {
System.out.format("Div #%d:\n", ++i);
for(Attribute attr : div.attributes()) {
System.out.format("%s = %s\n", attr.getKey(), attr.getValue());
}
}
Follow the Jsoup Cookbook for a gentle introduction to the this powerful library.

XML and DOM getting #text output

I'm trying to read a Collada XML file by iterating through a child node list, and every other output reads #text. What's this about? My code:
public void runTest() {
File file = new File( "test.dae" );
DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
DocumentBuilder builder = null;
try {
builder = factory.newDocumentBuilder();
}
catch( ParserConfigurationException error ) {
System.out.println( "--ParserConfigurationException: " + error.getMessage() ); return;
}
Document document = null;
try {
document = builder.parse( file );
}
catch( IOException error ) {
System.out.println( "--IOException: " + error.getMessage() ); return;
}
catch( SAXException error ) {
System.out.println( "--SAXException: " + error.getMessage() ); return;
}
Node node = document.getDocumentElement();
String node_name = node.getNodeName();
System.out.println( node_name );
NodeList node_list = node.getChildNodes();
for( int iterator = 0; iterator < node_list.getLength(); iterator++ ) {
Node child_node = node_list.item( iterator );
String child_node_name = child_node.getNodeName();
System.out.println( "-- " + child_node_name );
}
}
"#text" is simply the result of calling the getNodeName() method on a Text node. (As you will see if you look at the API documentation for org.w3c.dom.Node.) If you want the actual text contents of the node, you would use the getNodeValue() method.
And if you weren't expecting there to be any Text nodes, don't forget that even little bits of whitespace, like new-line characters, are treated as text.

Categories