XML and DOM getting #text output - java

I'm trying to read a Collada XML file by iterating through a child node list, and every other output reads #text. What's this about? My code:
public void runTest() {
File file = new File( "test.dae" );
DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
DocumentBuilder builder = null;
try {
builder = factory.newDocumentBuilder();
}
catch( ParserConfigurationException error ) {
System.out.println( "--ParserConfigurationException: " + error.getMessage() ); return;
}
Document document = null;
try {
document = builder.parse( file );
}
catch( IOException error ) {
System.out.println( "--IOException: " + error.getMessage() ); return;
}
catch( SAXException error ) {
System.out.println( "--SAXException: " + error.getMessage() ); return;
}
Node node = document.getDocumentElement();
String node_name = node.getNodeName();
System.out.println( node_name );
NodeList node_list = node.getChildNodes();
for( int iterator = 0; iterator < node_list.getLength(); iterator++ ) {
Node child_node = node_list.item( iterator );
String child_node_name = child_node.getNodeName();
System.out.println( "-- " + child_node_name );
}
}

"#text" is simply the result of calling the getNodeName() method on a Text node. (As you will see if you look at the API documentation for org.w3c.dom.Node.) If you want the actual text contents of the node, you would use the getNodeValue() method.
And if you weren't expecting there to be any Text nodes, don't forget that even little bits of whitespace, like new-line characters, are treated as text.

Related

org.xml.sax.SAXParseException The markup in the document following the root element must be well-formed

I know there are some similar threads out there but none of them are any help to me, I'm trying to create XML file via Java but I'm seeing this error constantly on this line
doc = db.parse(is);
, can someone please tell me where am I going wrong ?
Error:-
Severe: [Fatal Error] :2:2: The markup in the document following the root element must be well-formed.
Severe: org.xml.sax.SAXParseException; lineNumber: 2; columnNumber: 2; The markup in the document following the root element must be well-formed.
at com.sun.org.apache.xerces.internal.parsers.DOMParser.parse(DOMParser.java:257)
at com.sun.org.apache.xerces.internal.jaxp.DocumentBuilderImpl.parse(DocumentBuilderImpl.java:348)
at com.digivote.digivote.XmlServlet.doPost(XmlServlet.java:194)
My code:
#Override
protected void doPost(HttpServletRequest request, HttpServletResponse response)
throws ServletException, IOException {
response.setContentType("text/html");
String level = request.getParameter("level");
String add = request.getParameter("add");
String addCat = request.getParameter("addCat");
String addQuestions = request.getParameter("addQuestion");
String add_multiChoice = request.getParameter("add_multiChoice");
try
{
DocumentBuilder db = DocumentBuilderFactory.newInstance().newDocumentBuilder();
// root elements
Document doc = db.newDocument();
Element rootElement = doc.createElement("XML");
doc.appendChild(rootElement);
/***** GeneratedSet ****/
Element genSet = doc.createElement("GeneratedSet");
rootElement.appendChild(genSet);
Attr genDate = doc.createAttribute("genDate");
genDate.setValue("11/01/2018");
genSet.setAttributeNode(genDate);
Attr genBy = doc.createAttribute("genBy");
genBy.setValue("Tom Jones");
genSet.setAttributeNode(genBy);
Attr lev = doc.createAttribute("level");
lev.setValue(level);
genSet.setAttributeNode(lev);
/****** Questions *****/
Element questions = doc.createElement("Questions");
genSet.appendChild(questions);
Attr noq = doc.createAttribute("Questions");
noq.setValue(addQuestions);
questions.setAttributeNode(noq);
Attr type = doc.createAttribute("Type");
type.setValue(add);
questions.setAttributeNode(type);
/****** Question *****/
int numOfQues = Integer.parseInt(addQuestions) ;
int leve = Integer.parseInt(level) ;
String retXML = null;
if(add != null){
InputSource is = new InputSource();
genMaths gm = new genMaths();
retXML = gm.additionQ(leve, 2, numOfQues, 0);
is.setCharacterStream(new StringReader(retXML));
doc = db.parse(is);
//doc = db.parse(new InputSource(new StringReader(is.toString())));
}
TransformerFactory transformerFactory = TransformerFactory.newInstance();
Transformer transformer = transformerFactory.newTransformer();
DOMSource source = new DOMSource(doc);
StreamResult result = new StreamResult(new File("C:\\Users\\xxxx\\Documents\\XML Files\\RandomQuestions.xml"));
transformer.transform(source, result);
System.out.println("File saved!");
} catch (ParserConfigurationException | TransformerException pce) {} catch (SAXException ex) {
Logger.getLogger(XmlServlet.class.getName()).log(Level.SEVERE, null, ex);
}
}
}
retXML:-
public String forwardQ ( int level, int operands, int operation, int questions, int type )
{
StringBuffer retXML = new StringBuffer ();
float operand = 0;
float answer = 0;
int minVal = (int)getLevelMin(level);
int maxVal = (int)getLevelMax(level);
String addSign = "";
for ( int j = 0; j < questions; j++ )
{
retXML.append("<Question Text='");
addSign = "";
if ( operation == 1 )
{
answer = 0;
}
else
{
answer = 1;
}
for ( int i = 0; i < operands; i++ )
{
operand = randNum(minVal, maxVal);
if ( operation == 1 )
{
answer = answer + operand;
}
else
{
answer = answer * operand;
}
retXML.append( addSign + operand );
if ( operation == 1 )
{
addSign = " + ";
}
else
{
addSign = " x ";
}
}
if ( type == 1 )
{
retXML.append("' Answer='"+ answer + "'>");
}
else
{
retXML.append(createMultiChoice(answer));
}
retXML.append("</Question>" + System.getProperty("line.separator"));
}
return ( retXML.toString() );
}
public String additionQ ( int level, int operands, int questions, int type )
{
return ( forwardQ ( level, operands, 1, questions, type ) );
}
From your code, it looks like your XML will end up as
<Question ...></Question>
<Question ...></Question>
You cannot have multiple root elements in an XML document. You could try wrapping all your Question elements in a root element, like
<Questions>
<Question ...></Question>
<Question ...></Question>
</Questions>
Just ensure you only have one element as the root of the contents you are converting

Java editor HTML kit class listener for check boxes in public page

I am using the java editor HTML kit class to show a public html page that contains checkboxes. I want to build a listener to these checkboxes that arise in the public page that is shown in my panel. Does anyone have any idea what of stuff I should look for? I really appreciate any help!
Here is some really old code that might help get your started.
Note how this code is checking for a DefaultComboBoxModel? Well, I"m guessing that for a check box a ButtonModel will also be found in one of your elements. Once you have access to the model you should be able to add a listener to it.
No guarantee this will work. I suggest you create some simple test HTML to see what kind of output you get.
import java.io.*;
import java.net.*;
import java.util.*;
import javax.swing.*;
import javax.swing.text.*;
import javax.swing.text.html.*;
class GetHTML
{
public static void main(String[] args)
{
EditorKit kit = new HTMLEditorKit();
Document doc = kit.createDefaultDocument();
// The Document class does not yet handle charset's properly.
doc.putProperty("IgnoreCharsetDirective", Boolean.TRUE);
try
{
// Create a reader on the HTML content.
Reader rd = getReader(args[0]);
// Parse the HTML.
kit.read(rd, doc, 0);
System.out.println( doc.getText(0, doc.getLength()) );
System.out.println("----");
// Iterate through the elements of the HTML document.
ElementIterator it = new ElementIterator(doc);
Element elem = null;
while ( (elem = it.next()) != null )
{
AttributeSet as = elem.getAttributes();
System.out.println( "\n" + elem.getName() + " : " + as.getAttributeCount() );
/*
if ( elem.getName().equals( HTML.Tag.IMG.toString() ) )
{
Object o = elem.getAttributes().getAttribute( HTML.Attribute.SRC );
System.out.println( o );
}
*/
Enumeration enum1 = as.getAttributeNames();
while( enum1.hasMoreElements() )
{
Object name = enum1.nextElement();
Object value = as.getAttribute( name );
System.out.println( "\t" + name + " : " + value );
if (value instanceof DefaultComboBoxModel)
{
DefaultComboBoxModel model = (DefaultComboBoxModel)value;
for (int j = 0; j < model.getSize(); j++)
{
Object o = model.getElementAt(j);
Object selected = model.getSelectedItem();
if ( o.equals( selected ) )
System.out.println( o + " : selected" );
else
System.out.println( o );
}
}
}
//
if ( elem.getName().equals( HTML.Tag.SELECT.toString() ) )
{
Object o = as.getAttribute( HTML.Attribute.ID );
System.out.println( o );
}
// Wierd, the text for each tag is stored in a 'content' element
if (elem.getElementCount() == 0)
{
int start = elem.getStartOffset();
int end = elem.getEndOffset();
System.out.println( "\t" + doc.getText(start, end - start) );
}
}
}
catch (Exception e)
{
e.printStackTrace();
}
System.exit(1);
}
// Returns a reader on the HTML data. If 'uri' begins
// with "http:", it's treated as a URL; otherwise,
// it's assumed to be a local filename.
static Reader getReader(String uri)
throws IOException
{
// Retrieve from Internet.
if (uri.startsWith("http:"))
{
URLConnection conn = new URL(uri).openConnection();
return new InputStreamReader(conn.getInputStream());
}
// Retrieve from file.
else
{
return new FileReader(uri);
}
}
}

XML parsing fails on Blackberry

I am using the following code for parsing an XML file. But I don't get any response. Can anyone help?
I am also getting a warning when I open a connection:
"Warning!: Invocation of questionable method: java.lang.String.() found"`?
public static void main(String arg[]){
XML_Parsing_Sample application = new XML_Parsing_Sample();
//create a new instance of the application
//and start the application on the event thread
application.enterEventDispatcher();
}
public XML_Parsing_Sample() {
_screen.setTitle("XML Parsing");//setting title
_screen.add(new RichTextField("Requesting....."));
_screen.add(new SeparatorField());
pushScreen(_screen); // creating a screen
//creating a connection thread to run in the background
_connectionthread = new Connection();
_connectionthread.start();//starting the thread operation
}
public void updateField(String node, String element){
//receiving the parsed node and its value from the thread
//and updating it here
//so it can be displayed on the screen
String title="Title";
_screen.add(new RichTextField(node+" : "+element));
if(node.equals(title)){
_screen.add(new SeparatorField());
}
}
private class Connection extends Thread{
public Connection(){
super();
}
public void run(){
// define variables later used for parsing
Document doc;
StreamConnection conn;
try{
//providing the location of the XML file,
//your address might be different
conn=(StreamConnection)Connector.open
("http://www.w3schools.com/xml/note.xml",Connector.READ);
//next few lines creates variables to open a
//stream, parse it, collect XML data and
//extract the data which is required.
//In this case they are elements,
//node and the values of an element
DocumentBuilderFactory docBuilderFactory
= DocumentBuilderFactory. newInstance();
DocumentBuilder docBuilder
= docBuilderFactory.newDocumentBuilder();
docBuilder.isValidating();
doc = docBuilder.parse(conn.openInputStream());
doc.getDocumentElement ().normalize ();
NodeList list=doc.getElementsByTagName("*");
_node=new String();
_element = new String();
//this "for" loop is used to parse through the
//XML document and extract all elements and their
//value, so they can be displayed on the device
for (int i=0;i<list.getLength();i++){
Node value=list.item(i).
getChildNodes().item(0);
_node=list.item(i).getNodeName();
_element=value.getNodeValue();
updateField(_node,_element);
}//end for
}//end try
//will catch any exception thrown by the XML parser
catch (Exception e){
System.out.println(e.toString());
}
}//end connection function
}// end connection class
You are probably timing out.
Try opening the connection as HTTP connection instead, and use the new ConnectionFactory class to get rid of annoying suffixes.
public InputStream getResult(String url) {
System.out.println("in get result");
HttpConnection httpConn;
httpConn = (HttpConnection) getHTTPConnection(url);
try {
if (httpConn != null) {
final int iResponseCode = httpConn.getResponseCode();
if (iResponseCode == httpConn.HTTP_OK) {
_inputStream = httpConn.openInputStream();
byte[] data = new byte[20];
int len = 0;
int size = 0;
ByteArrayOutputStream baos = new ByteArrayOutputStream();
try {
while (-1 != (len = _inputStream.read(data))) {
baos.write(data, 0, len);
size += len;
}
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
InputStream is2 = new ByteArrayInputStream(
baos.toByteArray());
return is2;
} else {
return null;
}
}
} catch (IOException e) {
System.err.println("Caught IOException: " + e.getMessage());
}
try {
httpConn.close();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
return null;
}
To download xml
To parse the downloaded xml.
public Document XMLfromInputStream(InputStream xml) {
Document doc = null;
DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
dbf.setAllowUndefinedNamespaces(true);
dbf.setCoalescing(true);
dbf.setExpandEntityReferences(true);
try {
DocumentBuilder db;
db = dbf.newDocumentBuilder();
InputSource _source = new InputSource();
_source.setEncoding("UTF-8");
_source.setByteStream(xml);
db.setAllowUndefinedNamespaces(true);
doc = db.parse(_source);
} catch (SAXException e) {
System.out.println("Wrong XML file structure: " + e.getMessage());
return null;
} catch (IOException e) {
System.out.println("I/O exeption: " + e.getMessage());
return null;
} catch (net.rim.device.api.xml.parsers.ParserConfigurationException e) {
// TODO Auto-generated catch block
e.printStackTrace();
return null;
} finally {
}
return doc;
}
Then parse this document in to element
Element rootElement = document.getDocumentElement();
rootElement.normalize();
displayNode( rootElement, 0 );
private void displayNode( Node node, int depth )
{
if ( node.getNodeType() == Node.ELEMENT_NODE )
{
StringBuffer buffer = new StringBuffer();
indentStringBuffer( buffer, depth );
NodeList childNodes = node.getChildNodes();
int numChildren = childNodes.getLength();
Node firstChild = childNodes.item( 0 );
// If the node has only one child and that child is a Text node, then it's of
// the form <Element>Text</Element>, so print 'Element = "Text"'.
if ( numChildren == 1 && firstChild.getNodeType() == Node.TEXT_NODE )
{
buffer.append( node.getNodeName() ).append( " = \"" ).append( firstChild.getNodeValue() ).append( '"' );
add( new RichTextField( buffer.toString() ) );
}
else
{
// The node either has > 1 children, or it has at least one Element node child.
// Either way, its children have to be visited. Print the name of the element
// and recurse.
buffer.append( node.getNodeName() );
add( new RichTextField( buffer.toString() ) );
// Recursively visit all this node's children.
for ( int i = 0; i < numChildren; ++i )
{
displayNode( childNodes.item( i ), depth + 1 );
}
}
}
else
{
// Node is not an Element node, so we know it is a Text node. Make sure it is
// not an "empty" Text node (normalize() doesn't consider a Text node consisting
// of only newlines and spaces to be "empty"). If it is not empty, print it.
String nodeValue = node.getNodeValue();
if ( nodeValue.trim().length() != 0 )
{
StringBuffer buffer = new StringBuffer();
indentStringBuffer( buffer, depth );
buffer.append( '"' ).append( nodeValue ).append( '"' );
add( new RichTextField( buffer.toString() ) );
}
}
}
/**
* Adds leading spaces to the provided string buffer according to the depth of
* the node it represents.
*
* #param buffer The string buffer to add leading spaces to.
* #param depth The depth of the node the string buffer represents.
*/
private static void indentStringBuffer( StringBuffer buffer, int depth )
{
int indent = depth * _tab;
for ( int i = 0; i < indent; ++i )
{
buffer.append( ' ' );
}
}
You can download and parse the xml using above sample.
NamedNodeMap attributes = (NamedNodeMap)value.getAttributes();
for (int g = 0; g < attributes.getLength(); g++) {
Attr attribute = (Attr)attributes.item(g);
System.out.println(" Attribute: " + attribute.getName() +
" with value " +attribute.getValue());
Above code fetches attributes and its values.. cheers

Java - Print any detail of HTML element

I am fairly new to Java, at least regarding interacting with web. Anyway, I am making an app that has to grab HTML out of a webpage, and parse it.
By parsing I mean finding out what the element has in the 'class="" ' attribute, or in any attribute available in the element. Also finding out what is inside the element. This is where I have searched so far: http://www.java2s.com/Code/Java/Development-Class/HTMLDocumentElementIteratorExample.htm
I found very little regarding this.
I know there are lots of Java parsers out there. I have tried JTidy, and the default Swing parser. I would prefer to use the built-in-to-java parser.
Here is what i have so far (this is just method for testing how it works, proper code will come when i know what & how. Also connection is a URLConnection variable, and connection has been established before this method gets called. < just to clarify):
public void parse() {
try {
InputStream is = connection.getInputStream();
InputStreamReader isr = new InputStreamReader(is);
BufferedReader br = new BufferedReader(isr);
String line;
while ((line = br.readLine()) != null) {
System.out.println(line);
}
// copied from http://www.java2s.com/Code/Java/Development-Class/HTMLDocumentElementIteratorExample.htm
HTMLEditorKit htmlKit = new HTMLEditorKit();
HTMLDocument htmlDoc = (HTMLDocument) htmlKit.createDefaultDocument();
HTMLEditorKit.Parser parser = new ParserDelegator();
HTMLEditorKit.ParserCallback callback = htmlDoc.getReader(0);
parser.parse(br, callback, true);
// Parse
ElementIterator iterator = new ElementIterator(htmlDoc);
Element element;
while ((element = iterator.next()) != null) {
AttributeSet attributes = element.getAttributes();
Object name = attributes.getAttribute(StyleConstants.NameAttribute);
System.out.println ("All attrs of " + name + ": " + attributes.getAttributeNames().toString());
Enumeration e = attributes.getAttributeNames();
Object obj;
while (e.hasMoreElements()) {
obj = e.nextElement();
System.out.println (obj.toString());
System.out.println ("attribute of class = " + attributes.containsAttribute("class", "login"));
}
if ((name instanceof HTML.Tag)
&& ((name == HTML.Tag.H1) || (name == HTML.Tag.H2) || (name == HTML.Tag.H3))) {
// Build up content text as it may be within multiple elements
StringBuffer text = new StringBuffer();
int count = element.getElementCount();
for (int i = 0; i < count; i++) {
Element child = element.getElement(i);
AttributeSet childAttributes = child.getAttributes();
if (childAttributes.getAttribute(StyleConstants.NameAttribute) == HTML.Tag.CONTENT) {
int startOffset = child.getStartOffset();
int endOffset = child.getEndOffset();
int length = endOffset - startOffset;
text.append(htmlDoc.getText(startOffset, length));
}
}
System.out.println(name + ": " + text.toString());
}
}
} catch (IOException e) {
System.out.println ("Exception?1 " + e.getMessage() );
} catch (Exception e) {
System.out.println ("Exception? " + e.getMessage());
}
}
The question is: How do I get any element's attributes and print them out?
This code is needlessly verbose. I would suggest using a better library like Jsoup. Here's some code to find out all the attributes of all divs on this page.
String url = "http://stackoverflow.com/questions/7311269"
+ "/java-print-any-detail-of-html-element";
Document doc = Jsoup.connect(url).get();
Elements divs = doc.select("div");
int i = 0;
for (Element div : divs) {
System.out.format("Div #%d:\n", ++i);
for(Attribute attr : div.attributes()) {
System.out.format("%s = %s\n", attr.getKey(), attr.getValue());
}
}
Follow the Jsoup Cookbook for a gentle introduction to the this powerful library.

reading from XMl file in java

I have a simple XML file
<requirements>
<requirement>
<name> SwitchON</name>
<id>1</id>
<text>The Light shall turn on when the Switch is on.</text>
</requirement>
<requirement>
<name>SwitchOFF</name>
<id>2</id>
<text>The Light shall turn off when the Switch is off.</text>
</requirement>
<requirement>
<name>Lightbulb</name>
<id>3</id>
<text>The Light bulb shall be connected </text>
</requirement>
<requirement>
<name>Power</name>
<id>4</id>
<text>The Light shall have the power supply</text>
</requirement>
</requirements>
I am trying to show the information in this file in a table model.
I have a method (readFromXMl) that reads the XML file and returns a table model.
public static RequirementTable readFromXMl(String fileName) {
RequirementTable T = new RequirementTable();
Requirement R = new Requirement();
try {
DocumentBuilderFactory docBuilderFactory = DocumentBuilderFactory.newInstance();
DocumentBuilder docBuilder = docBuilderFactory.newDocumentBuilder();
Document doc = docBuilder.parse(new File(fileName));
doc.getDocumentElement().normalize();
NodeList listOfRequirements = doc.getElementsByTagName("requirement");
int test = listOfRequirements.getLength();
System.out.println("Total no of people : " + test);
for (int i = 0; i < listOfRequirements.getLength(); i++) {
Node RequirementNode = listOfRequirements.item(i);
if (RequirementNode.getNodeType() == Node.ELEMENT_NODE) {
Element RequirementElement = (Element) RequirementNode;
NodeList IdList = RequirementElement.getElementsByTagName("id");
Element IdElement = (Element) IdList.item(0);
NodeList textIdList = IdElement.getChildNodes();
R.setId(Integer.parseInt(textIdList.item(0).getNodeValue()));
NodeList DescriptionList = RequirementElement.getElementsByTagName("text");
Element DescriptionElement = (Element) DescriptionList.item(0);
NodeList textDescriptionList = DescriptionElement.getChildNodes();
R.setText(textDescriptionList.item(0).toString());
NodeList NameList = RequirementElement.getElementsByTagName("name");
Element NameElement = (Element) NameList;
NodeList textNameList = NameElement.getChildNodes();
if (textNameList.item(0).toString().equals("SwitchON")) {
T.addRequirement((SwitchOnReq)R);
} else if (textNameList.item(0).toString().equals("SwitchOFF")) {
T.addRequirement((SwitchOFFReq)R);
} else if (textNameList.item(0).toString().equals("LightBulb")) {
T.addRequirement((BulbRequirement)R);
} else if (textNameList.item(0).toString().equals("Power")) {
T.addRequirement((PowerRequirement)R);
}
}
}
} catch (SAXParseException err) {
System.out.println("** Parsing error" + ", line " + err.getLineNumber() + ", uri " + err.getSystemId());
System.out.println(" " + err.getMessage());
} catch (SAXException e) {
Exception x = e.getException();
((x == null) ? e : x).printStackTrace();
} catch (Throwable t) {
t.printStackTrace();
}
return T;
}
However in this line I am getting an error which says the the pointer is null
Element IdElement = (Element) IdList.item(0); IdElement is null!!
Instead of all the looping and other xml ugliness, let me suggest a little helper method:
private static String getNodeValue(Node n, String path)
throws XPathExpressionException {
XPath xpath = XPathFactory.newInstance().newXPath();
return (String) xpath.evaluate(path, n, XPathConstants.STRING);
}
Use like this:
for (int i = 0; i < listOfRequirements.getLength(); i++) {
Node RequirementNode = listOfRequirements.item(i);
System.out.println("name:" + getNodeValue(RequirementNode, "name"));
System.out.println("id:" + getNodeValue(RequirementNode, "id"));
System.out.println("text:" + getNodeValue(RequirementNode, "text"));
...
to get all the values and set your requirements.

Categories