Want to find content difference between two text files with java - java

I have two text files,
a.txt
b.txt
Each text files contains some file paths. b.txt contains some more file paths than a.txt. I would like to determine which paths are added and which are removed from a.txt so that it corresponds to paths in b.txt.
For example,
abc.txt contains
E:\Users\Documents\hello\a.properties
E:\Users\Documents\hello\b.properties
E:\Users\Documents\hello\c.properties
and xyz.txt contains
E:\Users\Documents\hello\a.properties
E:\Users\Documents\hello\c.properties
E:\Users\Documents\hello\g.properties
E:\Users\Documents\hello\h.properties
Now how to find that g.prop and h.prop are added and b.prop is removed?
Could anyone explain how it is done? I could only find how to check for identical contents.

The below code will serve your purpose irrespective of the content of the file.
import java.io.BufferedReader;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.List;
public class Test {
public Test(){
System.out.println("Test.Test()");
}
public static void main(String[] args) throws Exception {
BufferedReader br1 = null;
BufferedReader br2 = null;
String sCurrentLine;
List<String> list1 = new ArrayList<String>();
List<String> list2 = new ArrayList<String>();
br1 = new BufferedReader(new FileReader("test.txt"));
br2 = new BufferedReader(new FileReader("test2.txt"));
while ((sCurrentLine = br1.readLine()) != null) {
list1.add(sCurrentLine);
}
while ((sCurrentLine = br2.readLine()) != null) {
list2.add(sCurrentLine);
}
List<String> tmpList = new ArrayList<String>(list1);
tmpList.removeAll(list2);
System.out.println("content from test.txt which is not there in test2.txt");
for(int i=0;i<tmpList.size();i++){
System.out.println(tmpList.get(i)); //content from test.txt which is not there in test2.txt
}
System.out.println("content from test2.txt which is not there in test.txt");
tmpList = list2;
tmpList.removeAll(list1);
for(int i=0;i<tmpList.size();i++){
System.out.println(tmpList.get(i)); //content from test2.txt which is not there in test.txt
}
}
}

The memory will be a problem as you need to load both files into the program.
I am using HashSet to ignore duplicates.Try this:
import java.io.BufferedReader;
import java.io.FileReader;
import java.util.HashSet;
public class FileReader1 {
public static void main(String args[]) {
String filename = "abc.txt";
String filename2 = "xyz.txt";
HashSet <String> al = new HashSet<String>();
HashSet <String> al1 = new HashSet<String>();
HashSet <String> diff1 = new HashSet<String>();
HashSet <String> diff2 = new HashSet<String>();
String str = null;
String str2 = null;
try {
BufferedReader in = new BufferedReader(new FileReader(filename));
while ((str = in.readLine()) != null) {
al.add(str);
}
in.close();
} catch (Exception e) {
e.printStackTrace();
}
try {
BufferedReader in = new BufferedReader(new FileReader(filename2));
while ((str2 = in.readLine()) != null) {
al1.add(str2);
}
in.close();
} catch (Exception e) {
e.printStackTrace();
}
for (String str3 : al) {
if (!al1.contains(str3)) {
diff1.add(str3);
}
}
for (String str5 : al1) {
if (!al.contains(str5)) {
diff2.add(str5);
}
}
for (String str4 : diff1) {
System.out.println("Removed Path: "+str4);
}
for (String str4 : diff2) {
System.out.println("Added Path: "+str4);
}
}
}
Output:
Removed Path: E:\Users\Documents\hello\b.properties
Added Path: E:\Users\Documents\hello\h.properties
Added Path: E:\Users\Documents\hello\g.properties

You can simple do follow
import java.io.IOException;
import java.nio.charset.Charset;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.List;
public class Test {
public static void main(final String[] args) throws IOException {
final Path firstFile = Paths.get("/home/src/main/resources/a.txt");
final Path secondFile = Paths.get("/home/src/main/resources/b.txt");
final List<String> firstFileContent = Files.readAllLines(firstFile,
Charset.defaultCharset());
final List<String> secondFileContent = Files.readAllLines(secondFile,
Charset.defaultCharset());
System.out.println(diffFiles(firstFileContent, secondFileContent));
System.out.println(diffFiles(secondFileContent, firstFileContent));
}
private static List<String> diffFiles(final List<String> firstFileContent,
final List<String> secondFileContent) {
final List<String> diff = new ArrayList<String>();
for (final String line : firstFileContent) {
if (!secondFileContent.contains(line)) {
diff.add(line);
}
}
return diff;
}
}

Compare files [Scanner and ArrayList]:
protected static void compareFiles(String firstFile, String secondFile)
throws Exception {
Scanner x = new Scanner(new File(firstFile));
List<String> list1 = getScannerList(x);
x = new Scanner(new File(secondFile));
List<String> list2 = getScannerList(x);
x.close();
System.out.println("File Extras");
printLnList(listExtras(list1, new ArrayList<String>(list2)));
System.out.println("File Removals");
printLnList(listExtras(list2, list1));
}
protected static List<String> listExtras(List<String> list1,
List<String> list2) throws Exception {
list2.removeAll(list1);
return list2;
}
protected static List<String> getScannerList(Scanner sc) throws Exception {
List<String> scannerList = new ArrayList<String>();
while (sc.hasNext())
scannerList.add(sc.nextLine());
return scannerList;
}
protected static void printLnList(List<String> list) {
for (String string : list)
System.out.println(string);
}
Program output:
File Extras
E:\Users\Documents\hello\g.properties
E:\Users\Documents\hello\h.properties
File Removals
E:\Users\Documents\hello\b.properties

Related

StringTokenizer doesn't read the firs line of the file.txt

I'm trying to take every single words from a text file and put them into a ArrayList but the StringTokenizer doesn't read the first line of the text file... What's wrong?
public class BufferReader {
public static void main(String[] args) throws FileNotFoundException, IOException {
BufferedReader reader = new BufferedReader(new FileReader("C://Java-projects//EsameJava//prova.txt"));
String line = reader.readLine();
List<String> str = new ArrayList<>();
while ((line = reader.readLine()) != null) {
StringTokenizer token = new StringTokenizer(line);
while (token.hasMoreTokens()) {
str.add(token.nextToken());
}
}
System.out.println(str);
The only solution I found is to start the text file from the second line but it's not what I want...
This is how you could marry the (very) old and the new(er) to provide a collection of words:
import java.text.BreakIterator;
import java.util.ArrayList;
import java.util.List;
import java.util.stream.Stream;
import java.nio.file.Files;
import java.nio.file.Paths;
public class WordCollector {
public static void main(String[] args) {
try {
List<String> words = WordCollector.getWords(Files.lines(Paths.get(args[0])));
System.out.println(words);
} catch (Throwable t) {
t.printStackTrace();
}
}
public static List<String> getWords(Stream<String> lines) {
List<String> result = new ArrayList<>();
BreakIterator boundary = BreakIterator.getWordInstance();
lines.forEach(line -> {
boundary.setText(line);
int start = boundary.first();
for (int end = boundary.next(); end != BreakIterator.DONE; start = end, end = boundary.next()) {
String candidate = line.substring(start, end).replaceAll("\\p{Punct}", "").trim();
if (candidate.length() > 0) {
result.add(candidate);
}
}
});
return result;
}
}

Java and Weka: Creating a string attribute

I'm trying to create a Java program that converts a text file to an ARFF file for Weka. Somehow my name attribute is set to numerical, but it should be set to a string. I tried everything, I tried fixing it fixing
attr.add(new Attribute("name"));
to
attr.add(new Attribute("name",true));
But when I run it, it prints the names as number (which is in the 2nd column)
1,0,?,?,?
1000,1,?,?,?
1002,2,?,?,?
2,3,?,?,?
3000,4,?,?,?
What am I doing wrong?
import java.util.ArrayList;
import java.io.BufferedReader;
import java.io.FileReader;
import java.io.IOException;
import java.io.*;
import weka.core.Attribute;
import weka.core.DenseInstance;
import weka.core.Instance;
import java.util.*;
import weka.core.Instances;
import weka.core.converters.ArffSaver;
public class WekaCreateARFF {
private static final String FILENAME = "Some File";
public static void main(String[] args) throws IOException {
ArrayList<String> input = new ArrayList<String>();
ArrayList<Attribute> attr = new ArrayList<Attribute>();
Instances dataset;
double [] values;
BufferedReader br = null;
FileReader fr = null;
String date = null;
double id;
String n = null;
Instance inst = new DenseInstance(5);
List nominal_state = new ArrayList(5);
nominal_state.add("CA");
nominal_state.add("NC");
nominal_state.add("TX");
nominal_state.add("SC");
nominal_state.add("NY");
List nominal_party = new ArrayList(2);
nominal_party.add("republican");
nominal_party.add("democrat");
attr.add(new Attribute("id"));
attr.add(new Attribute("name",true));
attr.add(new Attribute("political party", nominal_party));
attr.add(new Attribute("state", nominal_state));
attr.add(new Attribute("birth date", date));
try {
fr = new FileReader(FILENAME);
br = new BufferedReader(fr);
String entry;
dataset = new Instances("SimpleARFF",attr,0);
values = new double[dataset.numAttributes()];
while ((entry = br.readLine()) != null) {
//System.out.println(entry);
input.add(entry);
for (int i = 0; i<5; i++ ) {
String[] parts = entry.split(",");
String part1 = parts[0];
String name = parts[1];
id = Double.parseDouble(part1);
inst.setValue(attr.get(0), id);
inst.setValue(attr.get(1), name);
}
System.out.println(inst);
dataset.add(new DenseInstance(1.0, values));
}
//System.out.println(dataset);
//ArffSaver arff = new ArffSaver();
//arff.setInstances(dataset);
//arff.setFile(new File("Simple.arff"));
//arff.writeBatch();
} catch (IOException e) {
e.printStackTrace();
} finally {
try {
if (br != null)
br.close();
if (fr != null)
fr.close();
} catch (IOException ex) {
ex.printStackTrace();
}
}
}
}
You probably want this constructor:
http://weka.sourceforge.net/doc.dev/weka/core/Attribute.html#Attribute-java.lang.String-boolean-
That is, you essentially have to add a boolean flag to tell Weka that you want a String attribute, and not a numeric attribute (the default):
new Attribute("blah", true)
should give you a String-attribute.

Run two classes one after the other

How to run two classes in which one gives some data in a textfile & the other should take that file and process it?
I have two Java files. File1 processes something and outputs a text file. File2 should take that text file and process it to create a final output.
My requirement is to have two independent java files that work together.
File1
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.util.HashSet;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Set;
import java.util.Map;
import java.util.TreeMap;
import java.util.Iterator;
import java.util.List;
import java.util.ArrayList;
public class FlatFileParser
{
public static void main(String[] args)
{
try
{
// The stream we're reading from
BufferedReader in;
List<String> ls = new ArrayList<String>();
BufferedWriter out1 = new BufferedWriter(new FileWriter("inValues.txt" , true ));
BufferedReader out11 = new BufferedReader(new FileReader("inValues.txt"));
// Return value of next call to next()
String nextline;
String line="";
if (args[0].equals("1"))
{
in = new BufferedReader(new FileReader(args[1]));
nextline = in.readLine();
while(nextline != null)
{
nextline = nextline.replaceAll("\\<packet","\n<packet");
System.out.println(nextline);
nextline = in.readLine();
}
in.close();
}
else
{
in = new BufferedReader(new FileReader(args[1]));
nextline = in.readLine();
HashMap<String,String> inout = new HashMap<String,String>();
while(nextline != null)
{
try
{
if (nextline.indexOf("timetracker")>0)
{
String from = "";
String indate = "";
if (nextline.indexOf("of in")>0)
{
int posfrom = nextline.indexOf("from");
int posnextAt = nextline.indexOf("#", posfrom);
int posts = nextline.indexOf("timestamp");
from = nextline.substring(posfrom+5,posnextAt);
indate = nextline.substring(posts+11, posts+23);
String dd = indate.split(" ")[1];
String key = dd+"-"+from+"-"+indate;
//String key = from+"-"+indate;
String intime = "-in-"+nextline.substring(posts+24, posts+35);
inout.put(key, intime);
}
else if (nextline.indexOf("of out")>0)
{
int posfrom = nextline.indexOf("from");
int posnextAt = nextline.indexOf("#", posfrom);
int posts = nextline.indexOf("timestamp");
from = nextline.substring(posfrom+5,posnextAt);
indate = nextline.substring(posts+11, posts+23);
String dd = indate.split(" ")[1];
String key = dd+"-"+from+"-"+indate;
String outtime = "-out-"+nextline.substring(posts+24, posts+35);
if (inout.containsKey(key))
{
String val = inout.get(key);
if (!(val.indexOf("out")>0))
inout.put(key, val+outtime);
}
else
{
inout.put(key, outtime);
}
}
}
}
catch(Exception e)
{
System.err.println(nextline);
System.err.println(e.getMessage());
}
nextline = in.readLine();
}
in.close();
for(String key: inout.keySet())
{
String val = inout.get(key);
out1.write(key+" , "+val+"\n");
}
out1.close();
}
}
catch (IOException e)
{
throw new IllegalArgumentException(e);
}
}
File2
import java.io.BufferedReader;
import java.io.IOException;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.List;
import java.io.File;
import java.io.FileReader;
public class RecordParser
{
private static BufferedReader reader;
private List<Person> resource;
private List<String> finalRecords;
public RecordParser(BufferedReader reader)
{
this.reader = reader;
this.resource = new ArrayList<Person>();
this.finalRecords = new ArrayList<String>();
}
public void execute() throws IOException
{
String line = null;
while ((line = reader.readLine()) != null)
{
String[] parts = line.split(" , ");
addPerson(new Person(parts[0]));
if ((parts[1].contains("-in-")) && (parts[1].contains("-out-")))
{
String[] inout = parts[1].split("-out-");
Person person = getPerson(parts[0]);
person.setInTime(inout[0]);
person.setOutTime("-out-" + inout[1]);
}
else if (parts[1].contains("-in-"))
{
Person person = getPerson(parts[0]);
person.setInTime(parts[1]);
}
else
{
Person person = getPerson(parts[0]);
person.setOutTime(parts[1]);
}
}
// finalRecords the resource to the String list
for (Person p : resource)
{
finalRecords.add(p.getPerson());
}
}
private void addPerson(Person person)
{
for (Person p : resource)
{
if (p.getNameDate().equals(person.getNameDate()))
{
return;
}
}
resource.add(person);
}
private Person getPerson(String nameDate)
{
for (Person p : resource)
{
if (p.getNameDate().equals(nameDate))
{
return p;
}
}
return null;
}
public List<String> getfinalRecords()
{
return finalRecords;
}
public static void main(String[] args)
{
try {
BufferedReader reader = new BufferedReader(new FileReader("sample.txt"));
RecordParser recordParser = new RecordParser(reader);
recordParser.execute();
for (String s : recordParser.getfinalRecords())
{
System.out.println(s);
}
reader.close();
} catch (IOException e)
{
e.printStackTrace();
}
}
public class Person
{
private String nameDate;
private String inTime;
private String outTime;
public Person (String nameDate)
{
this.nameDate = nameDate;
this.inTime = "missing in";
this.outTime = "missing out";
}
public void setInTime(String inTime)
{
this.inTime = inTime;
}
public void setOutTime(String outTime)
{
this.outTime = outTime;
}
public String getNameDate()
{
return nameDate;
}
public String getPerson()
{
StringBuilder builder = new StringBuilder();
builder.append(nameDate);
builder.append(" , ");
builder.append(inTime);
builder.append(" , ");
builder.append(outTime);
return builder.toString();
}
}
}
I want to be able to import the values from inValues.txt (created in File1) and process them in File2.
Create a batch/sh file and run one java program after the other. If you want to pass the file details to the second program you can do that by providing a run time argument.
on windows:
java -classpath .;yourjars FlatFileParser
java -classpath .;yourjars RecordParser {optionalfiledetails}
on linux
java -classpath .:yourjars FlatFileParser
java -classpath .:yourjars RecordParser {optionalfiledetails}

How to create a regex who verify the existence of a number into an array in java

i want to verify if a number for example 701234567 is an element of my array in java. For this, my code search if my number who is begening with 7 and have 9 digits is a element of my array "numbercall.txt" who have 5 elements. This is my text file:
numbercall.txt [ 702345678, 714326578, 701234567, 791234567,751234567]
This is my code:
import java.io.BufferedReader;
import java.io.DataInputStream;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class TestNumberLt {
static String[] arr= null;
String filename = "fichiers/numbercall.txt";
static String a = null ;
static List<String> list = new ArrayList<String>();
public static void main(String [] args) throws IOException{
FileInputStream fstream_school = new FileInputStream(filename);
DataInputStream data_input = new DataInputStream(fstream_school);
BufferedReader buffer = new BufferedReader(new InputStreamReader(data_input));
String str_line;
while ((str_line = buffer.readLine()) != null)
{
str_line = str_line.trim();
if ((str_line.length()!=0))
{
list.add(str_line);
}
}
int b = 773214576;
//convert the arraylist to a array
arr = (String[])list.toArray(new String[list.size()]);
Pattern p = Pattern.compile("^7[0|6|7][0-9]{7}$");
Matcher m ;
//a loop for verify if a number exist in this array
for (int j = 0; j < list.size();)
{
System.out.print(" "+list.get(j)+ " ");
m = p.matcher(list.get(j));
/*while(m.find())
System.out.println(m.group());*/
if(list.get(j).equals(b))
{
System.out.println("Trouvé "+list.get(j));
break;
}
else
{
System.out.println("ce numéro ("+b+") n'existe pas!");
}
break;
}
}
}
Do it simply like this
String str_line= "702345678,714326578,701234567,791234567,751234567";
String[] strArray = str_line.split(",");
String key = "702345678";
for(String v:strArray) {
if(v.equals(key)) {
System.out.println("found");
}
}
I'm not realy sure of what you want, but if you just need the index of b in your array just do this:
public static void main(String [] args) throws IOException{
...
int b = 773214576;
int tmp = list.indexOf(b+"");
if(tmp!=-1) {
System.out.println("Trouvé "+ b + " à l'index " + tmp);
} else {
System.out.println("Ce numéro ("+b+") n'existe pas!");
}
...
}
Another answer, using Guava :
(in this case, there really is no need, you could simply use split() method from String object, but like Guava readibility and returns)
package stackoverflow;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import com.google.common.base.Splitter;
public class RegexExample {
String filename = "numbercall.txt";
public boolean isInList(String numberToCheck) throws IOException {
BufferedReader file = loadFile();
for (String number : extractNumberListFrom(file)) {
if (number.trim().equals(numberToCheck)) {
return true;
}
}
return false;
}
private Iterable<String> extractNumberListFrom(BufferedReader buffer) throws IOException {
StringBuilder numberList = new StringBuilder();
String line;
while ((line = buffer.readLine()) != null) {
numberList.append(line);
}
return Splitter.on(",").split(numberList.toString());
}
private BufferedReader loadFile() {
InputStream fstream_school = RegexExample.class.getResourceAsStream(filename);
BufferedReader buffer = new BufferedReader(new InputStreamReader(fstream_school));
return buffer;
}
}

read 7 columns from .csv file in java?

I am new to Java. Can anybody suggest to me how to read a CSV file with 7 columns?
Here is my code:
import java.io.BufferedReader;
import java.io.DataInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.InputStreamReader;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
public class main {
public static Map<String,String> map1 = null;
public static Map<String,String> map2 = null;
public static void main(String[] args) {
try {
readFileandPopulate();
for (Map.Entry<String, String> entry : map1.entrySet()) {
System.out.println("Key : " + entry.getKey() + " Value : "
+ entry.getValue()+" address :"+map2.get(entry.getKey()));
//insert into DB
}
} catch (Exception e) {//Catch exception if any
e.printStackTrace();
System.err.println("Error: " + e.getMessage());
}
}
public static void readFileandPopulate() throws Exception {
FileInputStream fstream = new FileInputStream("/Users/Desktop/Pattern Recognition/DataSetR/1a19.csv");
// Use DataInputStream to read binary NOT text.
BufferedReader br = new BufferedReader(new InputStreamReader(fstream));
String strLine;
map1 = new HashMap<String,String>();
while ((strLine = br.readLine()) != null) {
System.out.println(strLine);
String temp[] = strLine.split(",");
map1.put(temp[0], temp[1]);
}
in.close();
System.out.println("done 1");
}
}
Is there any method to just read few columns and delete remaining data or can I store all values in array and use index of array to calculate one equation using values?
I would recommend using a CSV parser, such as http://opencsv.sourceforge.net/. Then you could do the following:
CSVReader reader = new CSVReader(new FileReader(filePath), ',');
List<String[]> rows= reader.readAll();
for (String[] row : rows)
{
for (int i = 0; i < numColumnsToParse; i++)
{
//do something with row[i]
}
}
import java.io.FileReader;
import java.io.IOException;
import java.util.List;
import au.com.bytecode.opencsv.CSVReader;
public class ReadCSV
{
public static void main(String[] args) throws IOException
{
String[] row = null;
String csvFilename = "C:/Users/Hussain/Desktop/data.csv";
CSVReader csvReader = new CSVReader(new FileReader(csvFilename));
List content = csvReader.readAll();
for (Object object : content)
{
row = (String[]) object;
System.out.println("value in row 7 ===>>>"+row[6]);
}
csvReader.close();
}
}
you can add the jar from here

Categories