sorting elements from xml file in java - java

i want to sort elements in xml file according to chosen algorithm type by user in java.i have merged xml file which contains 10 xml files. so it will be like first user select type of algorithm and then program sort elements as user wants.how can i do this? pls help me
here is how i merged xml files
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStreamReader;
public class Merge2 {
public static void main(String[] args) {
String sourceFile1Path = "C:\\Users\\project\\Numbers1.xml";
String sourceFile2Path = "C:\\Users\\project\\Numbers2.xml";
String sourceFile3Path = "C:\\Users\\project\\Numbers3.xml";
String sourceFile4Path = "C:\\Users\\project\\Numbers4.xml";
String sourceFile5Path = "C:\\Users\\project\\Numbers5.xml";
String sourceFile6Path = "C:\\Users\\project\\Numbers6.xml";
String sourceFile7Path = "C:\\Users\\project\\Numbers7.xml";
String sourceFile8Path = "C:\\Users\\project\\Numbers8.xml";
String sourceFile9Path = "C:\\Users\\project\\Numbers9.xml";
String sourceFile10Path = "C:\\Users\\project\\Numbers10.xml";
String mergedFilePath = "C:\\Users\\project\\mergedXML.xml";
File[] files = new File[10];
files[0] = new File(sourceFile1Path);
files[1] = new File(sourceFile2Path);
files[2] = new File(sourceFile3Path);
files[3] = new File(sourceFile4Path);
files[4] = new File(sourceFile5Path);
files[5] = new File(sourceFile6Path);
files[6] = new File(sourceFile7Path);
files[7] = new File(sourceFile8Path);
files[8] = new File(sourceFile9Path);
files[9] = new File(sourceFile10Path);
File mergedFile = new File(mergedFilePath);
mergeFiles(files, mergedFile);
}
public static void mergeFiles(File[] files, File mergedFile) {
FileWriter fstream = null;
BufferedWriter out = null;
try {
fstream = new FileWriter(mergedFile, true);
out = new BufferedWriter(fstream);
} catch (IOException e1) {
e1.printStackTrace();
}
for (File f : files) {
System.out.println("merging: " + f.getName());
FileInputStream fis;
try {
fis = new FileInputStream(f);
BufferedReader in = new BufferedReader(new InputStreamReader(fis));
String aLine;
while ((aLine = in.readLine()) != null) {
out.write(aLine);
out.newLine();
}
in.close();
} catch (IOException e) {
e.printStackTrace();
}
}
try {
out.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
and my merged xml file starts like below
<numberList>
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<number index="1">770820</number>
<number index="2">640903</number>
<number index="3">997132</number>
<number index="4">193504</number>
i want to show it like
<numberList>
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<number index="1">193504</number>
<number index="2">640903</number>
<number index="3">770820</number>
<number index="4">997132</number>

I don't understand the sort thing but just for you to know, your code could be far more concise :
String pathTemplate = "C:\\Users\\project\\Numbers%d.xml";
String mergedFilePath = "C:\\Users\\project\\mergedXML.xml";
int nFiles = 10;
File[] files = new File[nFiles];
for (int i=0 ; i<nFiles ; i++)
files[i] = new File(String.format(pathTemplate,i + 1));
//...
If you explain clearer your problem about sorting the merge file I'll complete my answer.

Related

how to use relative file path to read a file in java?

file path is not working, input1.txt is located in the same directory as library.java.
What should i do to correct it ?
How should i give path so that it read thr text file ?
package SimpleLibrarySystem;
import java.time.LocalDateTime;
import java.util.*;
import java.io.*;
import java.util.ArrayList;
import java.util.HashMap;
public class Library
{
ArrayList <Book> var = new ArrayList<Book>();
HashMap<Book, LocalDateTime> var1 = new HashMap<Book, LocalDateTime>();
public Library(String person, LocalDateTime time)
{
try{
File myfile = new File("input1.txt") ;
Scanner br = new Scanner(myfile);
String line = br.nextLine();
while ((line != null))
{
String a = line;
line = br.nextLine();
String b = line;
Book a1 = new Book(a,b,person);
Book a2 = new Book (a,b, "");
var.add(a2);
var1.put(a1,time);
//System.out.println(a + " "+ b);
line = br.nextLine();
}
br.close();
}
catch (Exception e)
{
System.out.println("not working");
}
}
}
with code:
public class Main{
public static void main(String[] args) {
int counter = 0;
try (FileReader fileReader = new FileReader("src/file.txt"); Scanner sc = new Scanner(fileReader)) {
while (sc.hasNextLine()) {
++counter;
sc.nextLine();
}
} catch (Exception e) {
throw new IllegalStateException(e.getMessage());
}
System.out.println(counter);
}
}
checkout: how to read file in Java

Problem with Parsing CSV and Marshalling to XML

I am trying to solve an issue with Parsing data from CSV file and Marshalling to XML. The code I wrote works almost flawless, but output is not ok, since program outputs only last line into the XML. Can please someone help me where I went wrong?
I have tried to put whole new Object into the While loop, but then it outputs last line into the XML for n-times (if CSV file has 24 lines, it will output the last line 24 times into XML)....
import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.math.BigDecimal;
import java.util.regex.Pattern;
import generated.*;
import javax.xml.bind.JAXBContext;
import javax.xml.bind.JAXBElement;
import javax.xml.bind.Marshaller;
public class eSpremnicaHandel {
public void eSpremnicaHandel(String csvFile) {
String line = "";
String cvsSplitBy = "|";
RegistriranaPosiljka regpos = new RegistriranaPosiljka();
Naslovnik nasl = new Naslovnik();
Posiljka pos = new Posiljka();
Storitev sto = new Storitev();
Oddaja oddaja = new Oddaja();
ArrayOfRegPosiljka regp = new ArrayOfRegPosiljka();
oddaja.setWPID("CertSubject");
oddaja.setKOMID(21553);
oddaja.setSTODD(0);
oddaja.setPOGID(2427);
oddaja.setPODID(0);
oddaja.setPOSID("7733");
oddaja.setRegistriranePosiljke(regp);
ObjectFactory fc = new ObjectFactory();
Oddaja odd = fc.createOddaja();
odd.setWPID(oddaja.getWPID());
odd.setSTODD(oddaja.getSTODD());
odd.setKOMID(oddaja.getKOMID());
odd.setPOGID(oddaja.getPOGID());
odd.setPODID(oddaja.getPODID());
odd.setPOSID(oddaja.getPOSID());
regp.getREGPOS().add(regpos);
odd.getRegistriranePosiljke();
try (BufferedReader br = new BufferedReader(new FileReader(csvFile))) {
while ((line = br.readLine()) != null) {
// use delimiter as separator
String[] podatki = line.split(Pattern.quote(cvsSplitBy));
regpos.setZAPST(Integer.parseInt(podatki[0]));
regpos.setREG(podatki[1]);
nasl.setNAZ(podatki[2]);
nasl.setPOSTST(podatki[3]);
pos.setMAS(new BigDecimal(podatki[4]));
sto.setNAZ(podatki[7]);
regpos.setNAS(nasl);
}
} catch (IOException e) {
e.printStackTrace();
}
try{
JAXBContext jc = JAXBContext.newInstance("generated");
JAXBElement<Oddaja> element = fc.createOddaja(oddaja);
Marshaller ms = jc.createMarshaller();
ms.setProperty(Marshaller.JAXB_FORMATTED_OUTPUT, true);
ms.marshal(element, System.out);
ms.marshal(element, new File("src/eSpremnicaXML/test2.xml"));
} catch(Exception e) {
e.printStackTrace();
}
}
}
Please try this version of method. Main change is creating new object of RegistriranaPosiljka is moved under loop
public static void eSpremnicaHandel(String csvFile) {
String line = "";
String cvsSplitBy = "|";
Oddaja oddaja = new Oddaja();
ArrayOfRegPosiljka regp = new ArrayOfRegPosiljka();
oddaja.setWPID("CertSubject");
oddaja.setKOMID(21553);
oddaja.setSTODD(0);
oddaja.setPOGID(2427);
oddaja.setPODID(0);
oddaja.setPOSID("7733");
oddaja.setRegistriranePosiljke(regp);
ObjectFactory fc = new ObjectFactory();
Oddaja odd = fc.createOddaja();
odd.setWPID(oddaja.getWPID());
odd.setSTODD(oddaja.getSTODD());
odd.setKOMID(oddaja.getKOMID());
odd.setPOGID(oddaja.getPOGID());
odd.setPODID(oddaja.getPODID());
odd.setPOSID(oddaja.getPOSID());
odd.setRegistriranePosiljke(oddaja.getRegistriranePosiljke());
try (BufferedReader br = new BufferedReader(new FileReader(csvFile))) {
while ((line = br.readLine()) != null) {
RegistriranaPosiljka regpos = new RegistriranaPosiljka();
// use delimiter as separator
String[] podatki = line.split(Pattern.quote(cvsSplitBy));
regpos.setZAPST(Integer.parseInt(podatki[0]));
regpos.setREG(podatki[1]);
Naslovnik nasl = new Naslovnik();
nasl.setNAZ(podatki[2]);
nasl.setPOSTST(podatki[3]);
Posiljka pos = new Posiljka();
pos.setMAS(new BigDecimal(podatki[4]));
Storitev sto = new Storitev();
sto.setNAZ(podatki[7]);
regpos.setNAS(nasl);
regp.getREGPOS().add(regpos);
}
} catch (IOException e) {
e.printStackTrace();
}
try{
JAXBContext jc = JAXBContext.newInstance("generated");
JAXBElement<Oddaja> element = fc.createOddaja(oddaja);
Marshaller ms = jc.createMarshaller();
ms.setProperty(Marshaller.JAXB_FORMATTED_OUTPUT, true);
ms.marshal(element, System.out);
ms.marshal(element, new File("src/eSpremnicaXML/test2.xml"));
} catch(Exception e) {
e.printStackTrace();
}
}

Java and Weka: Creating a string attribute

I'm trying to create a Java program that converts a text file to an ARFF file for Weka. Somehow my name attribute is set to numerical, but it should be set to a string. I tried everything, I tried fixing it fixing
attr.add(new Attribute("name"));
to
attr.add(new Attribute("name",true));
But when I run it, it prints the names as number (which is in the 2nd column)
1,0,?,?,?
1000,1,?,?,?
1002,2,?,?,?
2,3,?,?,?
3000,4,?,?,?
What am I doing wrong?
import java.util.ArrayList;
import java.io.BufferedReader;
import java.io.FileReader;
import java.io.IOException;
import java.io.*;
import weka.core.Attribute;
import weka.core.DenseInstance;
import weka.core.Instance;
import java.util.*;
import weka.core.Instances;
import weka.core.converters.ArffSaver;
public class WekaCreateARFF {
private static final String FILENAME = "Some File";
public static void main(String[] args) throws IOException {
ArrayList<String> input = new ArrayList<String>();
ArrayList<Attribute> attr = new ArrayList<Attribute>();
Instances dataset;
double [] values;
BufferedReader br = null;
FileReader fr = null;
String date = null;
double id;
String n = null;
Instance inst = new DenseInstance(5);
List nominal_state = new ArrayList(5);
nominal_state.add("CA");
nominal_state.add("NC");
nominal_state.add("TX");
nominal_state.add("SC");
nominal_state.add("NY");
List nominal_party = new ArrayList(2);
nominal_party.add("republican");
nominal_party.add("democrat");
attr.add(new Attribute("id"));
attr.add(new Attribute("name",true));
attr.add(new Attribute("political party", nominal_party));
attr.add(new Attribute("state", nominal_state));
attr.add(new Attribute("birth date", date));
try {
fr = new FileReader(FILENAME);
br = new BufferedReader(fr);
String entry;
dataset = new Instances("SimpleARFF",attr,0);
values = new double[dataset.numAttributes()];
while ((entry = br.readLine()) != null) {
//System.out.println(entry);
input.add(entry);
for (int i = 0; i<5; i++ ) {
String[] parts = entry.split(",");
String part1 = parts[0];
String name = parts[1];
id = Double.parseDouble(part1);
inst.setValue(attr.get(0), id);
inst.setValue(attr.get(1), name);
}
System.out.println(inst);
dataset.add(new DenseInstance(1.0, values));
}
//System.out.println(dataset);
//ArffSaver arff = new ArffSaver();
//arff.setInstances(dataset);
//arff.setFile(new File("Simple.arff"));
//arff.writeBatch();
} catch (IOException e) {
e.printStackTrace();
} finally {
try {
if (br != null)
br.close();
if (fr != null)
fr.close();
} catch (IOException ex) {
ex.printStackTrace();
}
}
}
}
You probably want this constructor:
http://weka.sourceforge.net/doc.dev/weka/core/Attribute.html#Attribute-java.lang.String-boolean-
That is, you essentially have to add a boolean flag to tell Weka that you want a String attribute, and not a numeric attribute (the default):
new Attribute("blah", true)
should give you a String-attribute.

Java I/O writing, mixingcase and getExtension problems

I am changing names of all files in directory and if it's text file I am changing the content but it doesn't seem to work the name of the file is changed right but content is blank/gone heres is my code:
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import org.apache.commons.io.FilenameUtils;
public class FileOps {
public static File folder = new File(
"C:\\Users\\N\\Desktop\\New folder\\RenamingFiles\\src\\renaming\\Files");
public static File[] listOfFiles = folder.listFiles();
public static void main(String[] argv) throws IOException {
toUpperCase();
}
public static void toUpperCase() throws FileNotFoundException {
for (int i = 0; i < listOfFiles.length; i++) {
if (listOfFiles[i].isFile()) {
String newname = mixCase(listOfFiles[i].getName());
if (listOfFiles[i].renameTo(new File(folder, newname))) {
String extension = FilenameUtils
.getExtension(listOfFiles[i].getName());
if (extension.equals("txt") || extension.equals("pdf")
|| extension.equals("docx")
|| extension.equals("log")) {
rewrite(listOfFiles[i]);
System.out.println("Done");
}
}
} else {
System.out.println("Nope");
}
}
}
public static String mixCase(String in) {
StringBuilder sb = new StringBuilder();
if (in != null) {
char[] arr = in.toCharArray();
if (arr.length > 0) {
char f = arr[0];
boolean first = Character.isUpperCase(f);
for (int i = 0; i < arr.length; i++) {
sb.append((first) ? Character.toLowerCase(arr[i])
: Character.toUpperCase(arr[i]));
first = !first;
}
}
}
return sb.toString();
}
public static void rewrite(File file) throws FileNotFoundException {
FileReader reader = new FileReader(file.getAbsolutePath());
BufferedReader inFile = new BufferedReader(reader);
try {
FileWriter fwriter = new FileWriter(file.getAbsolutePath());
BufferedWriter outw = new BufferedWriter(fwriter);
while (inFile.readLine() != null) {
String line = mixCase(inFile.readLine());
outw.write(line);
}
inFile.close();
outw.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
There is several issue with your code:
Your rewrite function is perform on old name File. It should be done on the renamed File:
String newname = mixCase(listOfFiles[i].getName());
File renamedFile = new File(folder, newname);
if (listOfFiles[i].renameTo(renamedFile )) {
String extension = FilenameUtils
.getExtension(listOfFiles[i].getName());
if (extension.equals("txt") || extension.equals("pdf")
|| extension.equals("docx")
|| extension.equals("log")) {
rewrite(renamedFile);
System.out.println("Done");
}
}
you are trying to read docx and pdf file like regular text file. This cannot work. You will have to use external library like POI and pdf Box
Your rewrite function is not safe. You must unsure to close the ressources:
FileReader reader = null;
BufferedReader inFile = null;
try {
reader = new FileReader(file.getAbsolutePath());
inFile = new BufferedReader(reader);
FileWriter fwriter = new FileWriter(file.getAbsolutePath());
BufferedWriter outw = new BufferedWriter(fwriter);
while (inFile.readLine() != null) {
String line = mixCase(inFile.readLine());
outw.write(line);
}
} catch (IOException e) {
e.printStackTrace();
}finally
{
if(infile != null)
inFile.close();
if(reader != null)
reader .close();
}
You can't re-write a file on top of itself. you need to write the new content to a new file, then rename again.

How to parse HTML with java properly?

Scenario/Requirement:
Download html page from some URL
Download images that were mentioned in html tags.
Change tags for images in my file, so I can open it with my browser offline and see them.
I made first 2 points, but am having difficulties with the third one.Tags do not change.What am I doing wrong?
The job is to open a file, find img src tag and replace it by another tag! Can you give me an example?
Code:
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import java.io.*;
import javax.swing.text.html.HTML;
import javax.swing.text.html.HTMLEditorKit;
import javax.swing.text.html.parser.ParserDelegator;
import java.awt.image.BufferedImage;
import java.net.URL;
import java.net.URLConnection;
import javax.imageio.ImageIO;
import javax.swing.text.AttributeSet;
import javax.swing.text.html.HTMLDocument;
public class ExtractAllImages {
static String result_doc = "/home/foo/index.html";
static String home_folder = "/home/foo/";
static String start_webURL = "http://www.oracle.com/";
public static void main(String args[]) throws Exception {
String webUrl = start_webURL;
URL url = new URL(webUrl);
URLConnection connection = url.openConnection();
InputStream is = connection.getInputStream();
InputStreamReader isr = new InputStreamReader(is);
BufferedReader br = new BufferedReader(isr);
HTMLEditorKit htmlKit = new HTMLEditorKit();
HTMLDocument htmlDoc = (HTMLDocument) htmlKit.createDefaultDocument();
HTMLEditorKit.Parser parser = new ParserDelegator();
HTMLEditorKit.ParserCallback callback = htmlDoc.getReader(0);
parser.parse(br, callback, true);
FileWriter writer = new FileWriter(result_doc);
htmlKit.write(writer, htmlDoc, 0, htmlDoc.getLength());
writer.close();
int number_or_images = 0;
String[] array = new String[4096];
for (HTMLDocument.Iterator iterator = htmlDoc.getIterator(HTML.Tag.IMG); iterator.isValid(); iterator.next()) {
AttributeSet attributes = iterator.getAttributes();
String imgSrc = (String) attributes.getAttribute(HTML.Attribute.SRC);
System.out.println("img_src = " + imgSrc);
if (imgSrc != null && (imgSrc.endsWith(".jpg") || (imgSrc.endsWith(".png")) || (imgSrc.endsWith(".jpeg")) || (imgSrc.endsWith(".bmp")) || (imgSrc.endsWith(".ico")))) {
try {
downloadImage(webUrl, imgSrc);
} catch (IOException ex) {
System.out.println(ex.getMessage());
}
}
array[number_or_images] = imgSrc;
number_or_images++;
///TODO change
}
for(int i =0; i < number_or_images; i++)
{
System.out.println("before = "+array[i]);
while(true)
{
int count = array[i].indexOf('/');
if(count == -1) break;
array[i] = array[i].substring(count+1);
}
System.out.println("after = " + array[i]);
}
//TODO open file and replace tags
int i =0;
File input = new File(result_doc);
Document doc = Jsoup.parse(input, "UTF-8");
System.out.println( input.canWrite());
for( Element img : doc.select("img[src]") )
{
String s = img.attr("src");
System.out.println(s);
img.attr("src", "/home/foo/"+array[i]); // set attribute 'src' to 'your-source-here'
s = img.attr("src");
System.out.println(s);
++i;
}
}
private static void downloadImage(String url, String imgSrc) throws IOException {
BufferedImage image = null;
try {
if (!(imgSrc.startsWith("http"))) {
url = url + imgSrc;
} else {
url = imgSrc;
}
imgSrc = imgSrc.substring(imgSrc.lastIndexOf("/") + 1);
String imageFormat = null;
imageFormat = imgSrc.substring(imgSrc.lastIndexOf(".") + 1);
String imgPath = null;
imgPath = home_folder + imgSrc + "";
URL imageUrl = new URL(url);
image = ImageIO.read(imageUrl);
if (image != null) {
File file = new File(imgPath);
ImageIO.write(image, imageFormat, file);
}
} catch (Exception ex) {
ex.printStackTrace();
}
}
}
Solved.
I didn't save changes. Need to add code befire "downloadImage()"
int i = 0;
File input = new File(result_doc);
Document doc = Jsoup.parse(input, "UTF-8");
for( Element img : doc.select("img[src]") ) {
img.attr("src",array[i]); // set attribute 'src' to 'your-source-here'
++i;
}
try {
String strmb = doc.outerHtml();
bw = new BufferedWriter(new FileWriter(result_doc));
bw.write(strmb);
bw.close();
}
catch (Exception ex) {
System.out.println("Program stopped. The problem is " + "\"" +
ex.getMessage()+"\"");
}
You can go with JSOUP
Try something like below
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
public static void getAllTags(){
try {
File input=new File("H:\\html pages\\index1.html");
Document document=Jsoup.parse(input, "UTF-8");
Document parse=Jsoup.parse(document.html());
Elements body=parse.select("body");
Elements bodyTags=body.select("*");
for (Element element : bodyTags) {
//Do what you want with tag
System.out.println(element.tagName());
}
} catch (Exception e) {
e.printStackTrace();
}
If you want to parse html then try this
public static void parseHTML(){
try {
File input = new File("H:\\html\\index1.html");
Document document = Jsoup.parse(input, "UTF-8");
Document parse = Jsoup.parse(document.html());
Elements bodyElements = parse.select("div");
Elements elements = bodyElements.select("*");
for (Element element : elements) {
FilterHtml.setHtmlTAG(element.tagName());
FilterHtml.ParseXml();
Elements body = bodyElements.select(FilterHtml.getXmlTAG());
if (body.is(FilterHtml.getXmlTAG())) {
Elements tag = parse.select(FilterHtml.getXmlTAG());
//Do something meaning full with tag
System.out.println(tag.text());
}
}
} catch (Exception e) {
e.printStackTrace();
}
}
Hope this would help. if yes please mark it green.

Categories