Comparing two text files and display unique words in java - java

I have two text files. I have to develop a java program which compares the two files and find unique words. I have tried a few methods but didn’t work. Example:
test1.txt:
I am a robot. My name is Sofia.
test2.txt:
Hello I am a man. My name is Alex
Output:
Hello robot man Sofia Alex
I approach was like this:
import java.io.*;
import java.util.*;
public class Main {
public static void main(String[] args)
throws FileNotFoundException {
Scanner input = new Scanner(new File("test1.txt"));
Scanner scan = new Scanner(new File("test2.txt"));
ArrayList<String> al = new ArrayList<String>();
ArrayList<String> a2 = new ArrayList<String>();
String test;
while (input.hasNext()) {
String next = input.next();
}
System.out.println("arraylist" + al);
while (scan.hasNext()) {
test = scan.next();
a2.add(test);
}
System.out.println("arraylist2" + a2);
for( int i = 0; i < al.size(); i++){
for(int j = 0; j < a2.size(); j++){
if(al.get(i).equals(a2.get(j))){
break;}
else{
System.out.println(al.get(i));break;
}
}
}
}
}

Note that this is a quick and dirty approach and pretty inefficient. Furthermore I dont know your exact requirements (full stops? Upper/lowercase?).
Also take into account that this program doesn't check which list is longer. But this should give you a good hint:
import java.io.File;
import java.io.FileNotFoundException;
import java.util.ArrayList;
import java.util.Scanner;
public class Main {
public static void main(String[] args) throws FileNotFoundException {
Scanner input = new Scanner(new File("test1.txt"));
Scanner scan = new Scanner(new File("test2.txt"));
ArrayList<String> list1 = new ArrayList<String>();
ArrayList<String> list2 = new ArrayList<String>();
while (input.hasNext()) {
list1.add(input.next());
}
while (scan.hasNext()) {
list2.add(scan.next());
}
// iterate over list 1
for (int i = list1.size() - 1; i >= 0; i--) {
// if there is a occurence of two identical strings
if (list2.contains(list1.get(i))) {
// remove the String from list 2
list2.remove(list2.indexOf(list1.get(i)));
// remove the String from list 1
list1.remove(i);
}
}
// merge the lists
list1.addAll(list2);
// remove full stops
for (int i = 0; i < list1.size(); i++) {
list1.set(i, list1.get(i).replace(".", ""));
}
System.out.println("Unique Values: " + list1);
}
}

Assumptions are the text file contains only (.) as sentence terminator.
public static void main(String[] args) throws Exception
{
// Skipping reading from file and storing in string
String stringFromFileOne = "I am a robot. My name is Sofia.";
String stringFromFileTwo = "Hello I am a man. My name is Alex";
Set<String> set1 = Arrays.asList(stringFromFileOne.split(" "))
.stream()
.map(s -> s.toLowerCase())
.map(m -> m.contains(".") ? m.replace(".", "") : m)
.sorted()
.collect(Collectors.toSet());
Set<String> set2 = Arrays.asList(stringFromFileTwo.split(" "))
.stream()
.map(s -> s.toLowerCase())
.map(m -> m.contains(".") ? m.replace(".", "") : m)
.sorted()
.collect(Collectors.toSet());
List<String> uniqueWords;
if (set1.size() > set2.size()) {
uniqueWords = getUniqueWords(set2, set1);
} else {
uniqueWords = getUniqueWords(set1, set2);
}
System.out.println("uniqueWords:" + uniqueWords);
}
private static List<String> getUniqueWords(Set<String> removeFromSet, Set<String> iterateOverSet) {
List<String> uniqueWords;
Set<String> tempSet = new HashSet<String>(removeFromSet);
removeFromSet.removeAll(iterateOverSet);
uniqueWords = iterateOverSet.stream().filter(f -> !tempSet.contains(f) && !f.isEmpty())
.collect(Collectors.toList());
uniqueWords.addAll(removeFromSet);
return uniqueWords;
}

You can use guava library which gives you difference between two sets.
import java.io.BufferedReader;
import java.io.DataInputStream;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.HashSet;
import java.util.Set;
import java.util.StringTokenizer;
import com.google.common.collect.Sets;
public class WordTest {
public static void main(String[] args) {
WordTest wordTest = new WordTest();
Set<String> firstFileWords = wordTest.getAllWords("E:\\testing1.txt");
Set<String> secondFileWords = wordTest.getAllWords("E:\\testing2.txt");
Set<String> diff = Sets.difference(firstFileWords, secondFileWords);
Set<String> diff2 = Sets.difference(secondFileWords, firstFileWords);
System.out.println("Set 1: " + firstFileWords);
System.out.println("Set 2: " + secondFileWords);
System.out.println("Difference between " + "Set 1 and Set 2: " + diff);
System.out.println("Difference between " + "Set 2 and Set 1: " + diff2);
}
public Set<String> getAllWords(String path) {
FileInputStream fis = null;
DataInputStream dis = null;
BufferedReader br = null;
Set<String> wordList = new HashSet<>();
try {
fis = new FileInputStream(path);
dis = new DataInputStream(fis);
br = new BufferedReader(new InputStreamReader(dis));
String line = null;
while ((line = br.readLine()) != null) {
StringTokenizer st = new StringTokenizer(line, " ,.;:\"");
while (st.hasMoreTokens()) {
wordList.add(st.nextToken());
}
}
} catch (FileNotFoundException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
} finally {
try {
if (br != null)
br.close();
} catch (Exception ex) {
}
}
return wordList;
}
}

import java.io.BufferedReader;
import java.io.FileReader;
import java.io.IOException;
import java.util.Arrays;
import java.util.HashSet;
public class FileComparision {
public static void main(String[] args) throws IOException {
HashSet<String> uniqueSet=new HashSet<String>();
//split the lines based on the delimiter and add it to set
BufferedReader reader=new BufferedReader(new FileReader("test1.txt"));
String line;
while ((line = reader.readLine()) != null) {
Arrays.asList(line.split(" ")).forEach(word->uniqueSet.add(word) ); ;
}
reader.close();
reader=new BufferedReader(new FileReader("test2.txt"));
while ((line = reader.readLine()) != null) {
Arrays.asList(line.split(" ")).forEach(word->{
if(!uniqueSet.contains(word)) {
uniqueSet.add(word) ;
}else {
uniqueSet.remove(word);
}
});
}
reader.close();
//to remove unnecessary characters
//uniqueSet.remove(".");
System.out.println(uniqueSet);
}
}

public static String readFile(String fileName)throws Exception
{
String data = "";
data = new String(Files.readAllBytes(Paths.get(fileName)));
return data;
}
public static void main(String[] args) throws Exception
{
String data = readFileAsString("C:\\Users\\pb\\Desktop\\text1.txt");
String data1 = readFileAsString("C:\\Users\\pb\\Desktop\\text2.txt");
String array[]=data.split(" ");
String array1[]=data1.split(" ");
for(int i=0;i<=array1.length-1;i++){
if(data.contains(array1[i])){
}else{
System.out.println(array1[i]);
}
}
for(int i=0;i<=array.length-1;i++){
if(data1.contains(array[i])){
}else{
System.out.println(array[i]);
}
}
}

Related

How to print text file left to right, and then upside down?

My goal for this program is to read a text file, print it normally, then print it flipped from left to right, and then flipped upside down. I can print the original, however I'm unsure of how to read the file so it will print in the other two formats, and how to print in these formats. I can only import the file once.
Here is an example output, if my description is inadequate.
The code as it is now:
import java.io.*;
import java.util.*;
public class Problem2
{
public static void main(String[] args) throws IOException
{
File marge = new File("marge.txt");
Scanner fileScan = new Scanner(marge);
String original;
while (fileScan.hasNext())
{
original = fileScan.nextLine();
System.out.println(original);
}
String lefttoright;
while (fileScan.hasNext())
{
lefttoright = fileScan.nextLine();
System.out.println(lefttoright);
}
String upsidedown;
while (fileScan.hasNext())
{
upsidedown = fileScan.nextLine();
System.out.println(upsidedown);
}
fileScan.close();
}
}
Try to use StringBuilder(element).reverse().toString(); where element is a string.
Example of working code:
package test;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
public class test {
public static void main(String[] args) throws IOException {
// TODO Auto-generated method stub
File file = new File("C:\\Users\\xxx\\Documents\\test.txt");
List<String> listString = new ArrayList<>();
BufferedReader br = new BufferedReader(new FileReader(file));
String line;
//write as is
while ((line = br.readLine()) != null) {
System.out.println(line);
}
System.out.println("");
br = new BufferedReader(new FileReader(file));
//write in reverse
while ((line = br.readLine()) != null) {
String result = new StringBuilder(line).reverse().toString();
System.out.println(result);
}
System.out.println("");
br = new BufferedReader(new FileReader(file));
while ((line = br.readLine()) != null) {
listString.add(line);
}
//write down up
Collections.reverse(listString);
for (String element : listString) {
String result = new StringBuilder(element).reverse().toString();
System.out.println(result);
}
}
}
Test example:
test.txt file content:
alpha
tree
123
Output:
alpha
tree
123
ahpla
eert
321
321
eert
ahpla
You might consider as below. this will save you the hassle with reading 3 times from the file.
import java.io.*;
import java.util.*;
public class Problem2 {
public static void main(String[] args) throws IOException {
File marge = new File("marge.txt");
Scanner fileScan = new Scanner(marge);
String original;
while (fileScan.hasNext()) {
original = fileScan.nextLine();
System.out.println(original);
}
System.out.println(original);
System.out.println();
System.out.print(flip(original));
System.out.println();
System.out.print(updsideDown(original));
}
public static String flip(String input) {
StringBuffer output = new StringBuffer();
String[] intermInput = input.split("\n");
for (int i = 0; i < intermInput.length; i++) {
StringBuffer strBuff = new StringBuffer(intermInput[i]);
output.append(strBuff.reverse());
output.append("\n");
}
output.substring(0, output.length());
return output.toString();
}
public static String updsideDown(String input) {
StringBuffer output = new StringBuffer();
String[] intermInput = input.split("\n");
for (int i = intermInput.length - 1; i >= 0; i--) {
output.append(intermInput[i]);
output.append("\n");
}
output.substring(0, output.length());
return output.toString();
}
}
Either use suggestion from YCF_L or use below solution.
import java.io.*;
import java.util.*;
public class Problem2 {
public static void main(String[] args) throws IOException {
File marge = new File("marge.txt");
Scanner fileScan = new Scanner(marge);
String original;
while (fileScan.hasNext()) {
original = fileScan.nextLine();
System.out.println(original);
}
fileScan = new Scanner(marge);
String lefttoright;
while (fileScan.hasNext()) {
lefttoright = fileScan.nextLine();
StringBuffer sb = new StringBuffer(lefttoright);
System.out.println(sb.reverse());
}
fileScan = new Scanner(marge);
String upsidedown;
Stack<String> list = new Stack<String>();
while (fileScan.hasNext()) {
upsidedown = fileScan.nextLine();
list.push(upsidedown);
}
for (int i = 0; i <= list.size(); i++) {
System.out.println(list.pop());
}
fileScan.close();
}
}

Read a file into Map<Integer, ArrayList<Double>>

I saw some similar questions, but mine is a little different.
I define a
Map<Integer, ArrayList<Double>> fl;
My input .txt file:
1 0.56 0.57 0.73 ..
2 2.3 3.50 ...
9 4.98 0.99 ..
How to read the file into the map fl?
Thanks!
Use a Scanner and first call Scanner.readInt() that will give you the first integer.
Then call Scanner.readLine() that will give you all the remaining double in the line as a String. Split it and parse everything to double.
Repeat the same till end of file.
Here's a try.
I've compiled and run the code.
Make sure the input file is in the same directory as your project if you use an IDE.-- This only applies if you do not modify the path below.
package fileread;
import java.io.BufferedReader;
import java.io.FileReader;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
public class FileRead {
private static HashMap<Integer, ArrayList<Double>> map = new HashMap<>();
private static BufferedReader reader;
public static void main(String[] args) {
try
{
reader = new BufferedReader(new FileReader("input"));
//or reader = new BufferedReader(new FileReader("C:\\full-path-to-your-file));
String line;
while((line = reader.readLine()) != null)
{
String[] tokens = line.split(" ");
Integer i;
Double d;
ArrayList<Double> list = new ArrayList<>();
i = Integer.valueOf(tokens[0]);
for(int j = 1; j < tokens.length; j++)
list.add(Double.valueOf(tokens[j]));
map.put(i, list);
}
}catch(IOException ex)
{
//break execution
}finally
{
if(reader != null)
try
{
reader.close();
}catch (IOException ex) {
//don't break :)
}
}
for(Integer i : map.keySet())
{
ArrayList<Double> l = map.get(i);
System.out.print("Line " + i + ": ");
for(Double d: l)
System.out.print(d + " ");
System.out.println();
}
}
}
The code for parsing the file and populating the map should be like below
try {
BufferedReader bReader = new BufferedReader(new FileReader(new File("c:/input .txt")));
String line = "";
Map<Integer, ArrayList<Double>> fl = new HashMap<Integer, ArrayList<Double>>();
while ((line = bReader.readLine()) != null) {
String[] strArray = line.split(" ");
for (int i=0;i<strArray.length;i++) {
ArrayList<Double> value = new ArrayList<Double>();
int key=0;
if(i==0){
key =Integer.valueOf(strArray[0]);
}
else{
value.add(Double.valueOf(strArray[i]));
}
fl.put(key, value);
}
}
} catch (Exception e) {
// TODO Auto-generated catch block
e.printStackTrace();
}

I am trying to count characters from a within file

What I am trying to do here is read a file and count each character. Each character should add +1 to the "int count" and then print out the value of "int count".
I hope that what I am trying to do is clear.
import java.io.*;
import java.util.Scanner;
public class ScanXan {
public static void main(String[] args) throws IOException {
int count = 0;
Scanner scan = null;
Scanner cCount = null;
try {
scan = new Scanner(new BufferedReader(new FileReader("greeting")));
while (scan.hasNextLine()) {
System.out.println(scan.nextLine());
}
}
finally {
if (scan != null) {
scan.close();
}
}
try {
cCount = new Scanner(new BufferedReader(new FileReader("greeting")));
while (cCount.hasNext("")) {
count++;
}
}
finally {
if (cCount != null) {
scan.close();
}
}
System.out.println(count);
}
}
Add a catch block to check for exception
Remove the parameter from hasNext("")
Move to the next token
cCount = new Scanner(new BufferedReader(new FileReader("greeting")));
while (cCount.hasNext()) {
count = count + (cCount.next()).length();
}
Using java 8 Stream API, you can do it as follow
package example;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.stream.Collectors;
import java.util.stream.Stream;
public class CountCharacter {
private static int count=0;
public static void main(String[] args) throws IOException {
Path path = Paths.get("greeting");
try (Stream<String> lines = Files.lines(path, StandardCharsets.UTF_8)) {
count = lines.collect(Collectors.summingInt(String::length));
}
System.out.println("The number of charachters is "+count);
}
}
Well if your looking for a way to count only all chars and integers without any blank spaces and things like 'tab', 'enter' etc.. then you could first remove those empty spaces using this function:
st.replaceAll("\\s+","")
and then you would just do a string count
String str = "a string";
int length = str.length( );
First of all, why would you use try { } without catch(Exception e)
BufferedReader reader = null;
try {
reader = new BufferedReader(new FileReader("greetings.txt"));
String line = null;
String text = "";
while ((line = reader.readLine()) != null) {
text += line;
}
int c = 0; //count of any character except whitespaces
// or you can use what #Alex wrote
// c = text.replaceAll("\\s+", "").length();
for (int i = 0; i < text.length(); i++) {
if (!Character.isWhitespace(text.charAt(i))) {
c++;
}
}
System.out.println("Number of characters: " +c);
} catch (IOException e) {
System.out.println("File Not Found");
} finally {
if (reader != null) { reader.close();
}
}

Word frequency count in 2 files

I have wrote Java code to count sum of occurrences. It uses 2 .txt files as input and gives words and frequencies as output.
I would also like to print, which file how many times contains a given word. Do you have any idea how to do this?
public class JavaApplication2
{
public static void main(String[] args) throws IOException
{
Path filePath1 = Paths.get("test.txt");
Path filePath2 = Paths.get("test2.txt");
Scanner readerL = new Scanner(filePath1);
Scanner readerR = new Scanner(filePath2);
String line1 = readerL.nextLine();
String line2 = readerR.nextLine();
String text = new String();
text=text.concat(line1).concat(line2);
String[] keys = text.split("[!.?:;\\s]");
String[] uniqueKeys;
int count = 0;
System.out.println(text);
uniqueKeys = getUniqueKeys(keys);
for(String key: uniqueKeys)
{
if(null == key)
{
break;
}
for(String s : keys)
{
if(key.equals(s))
{
count++;
}
}
System.out.println("["+key+"] frequency : "+count);
count=0;
}
}
private static String[] getUniqueKeys(String[] keys)
{
String[] uniqueKeys = new String[keys.length];
uniqueKeys[0] = keys[0];
int uniqueKeyIndex = 1;
boolean keyAlreadyExists = false;
for(int i=1; i<keys.length ; i++)
{
for(int j=0; j<=uniqueKeyIndex; j++)
{
if(keys[i].equals(uniqueKeys[j]))
{
keyAlreadyExists = true;
}
}
if(!keyAlreadyExists)
{
uniqueKeys[uniqueKeyIndex] = keys[i];
uniqueKeyIndex++;
}
keyAlreadyExists = false;
}
return uniqueKeys;
}
Firstly, instead of using an array for unique keys, use a HashMap<String, Integer>. It's a lot more efficient.
Your best option is to run your processing over each line/file separately, and store these counts separately. Then merge the two counts to get the overall frequencies.
More Detail:
String[] keys = text.split("[!.?:;\\s]");
HashMap<String,Integer> uniqueKeys = new HashMap<>();
for(String key : keys){
if(uniqueKeys.containsKey(key)){
// if your keys is already in map, increment count of it
uniqueKeys.put(key, uniqueKeys.get(map) + 1);
}else{
// if it isn't in it, add it
uniqueKeys.put(key, 1);
}
}
// You now have the count of all unique keys in a given text
// To print them to console
for(Entry<String, Integer> keyCount : uniqueKeys.getEntrySet()){
System.out.println(keyCount.getKey() + ": " + keyCount.getValue());
}
// To merge, if you're using Java 8
for(Entry<String, Integer> keyEntry : uniqueKeys1.getEntrySet()){
uniqueKeys2.merge(keyEntry.getKey(), keyEntry.getValue(), Integer::add);
}
// To merge, otherwise
for(Entry<String, Integer> keyEntry : uniqueKeys1.getEntrySet()){
if(uniqueKeys2.containsKey()){
uniqueKeys2.put(keyEntry.getKey(),
uniqueKeys2.get(keyEntry.getKey()) + keyEntry.getValue());
}else{
uniqueKeys2.put(keyEntry.getKey(), keyEntry.getValue());
}
}
UPDATE : code for word(s) occurences (thanks #George)
This example is for a file, you can use it for multiple files :
public class MyTest {
Map<String,Integer> mapTable;
public MyTest(List<String> wordList){
//initialize map
makeMap(wordList);
}
public void makeMap(List<String> wordList){
mapTable = new HashMap();
for(int i = 0; i < wordList.size(); i++){
//fill the map up
mapTable.put(wordList.get(i), 0);
}
}
//update occurences in a map
public void updateMap(String [] _words){
for(int i = 0; i < _words.length; i++){
updateWordCount(_words[i]);
}
}
public void updateWordCount(String _word){
int value = 0;
//check if a word present
if(mapTable.containsKey(_word)){
value = mapTable.get(_word);
value++;
mapTable.put(_word, value);
}
}
public void DisplayCounts(){
for( String key : mapTable.keySet()){
System.out.println("Word : "+key+"\t Occurrence(s) :"+mapTable.get(key));
}
}
public void getWordCount(){
String filePath = "C:\\Users\\Jyo\\Desktop\\help.txt";
String line = "";
try {
// FileReader reads text files in the default encoding.
FileReader fileReader = new FileReader(filePath);
// Always wrap FileReader in BufferedReader.
BufferedReader bufferedReader = new BufferedReader(fileReader);
String _words[] = null;
while((line = bufferedReader.readLine()) != null) {
System.out.println(line);
_words = line.split(" ");
updateMap(_words);
}
// Always close files.
bufferedReader.close();
} catch (Exception e) {
System.out.println("Error :"+e.getMessage());
}
}
/**
* #param args the command line arguments
*/
public static void main(String[] args) {
// TODO code application logic here
List<String> wordList = new ArrayList<>();
wordList.add("data");
wordList.add("select");
MyTest mt = new MyTest(wordList);
mt.getWordCount();
mt.DisplayCounts();
}
}
import java.io.;
import java.util.;
public class file1{
public static void main(String[] args) throws Exception{
HashMap<String,Integer> words_fre = new HashMap<String,Integer>();
HashSet<String> words = new HashSet<String>();
try{
File folder = new File("/home/jsrathore/Dropbox/Semester 6th/IR_Lab/lab_01/one");
File[] listOfFiles = folder.listFiles();
BufferedReader bufferedReader=null;
FileInputStream inputfilename=null;
BufferedWriter out= new BufferedWriter(new OutputStreamWriter(new FileOutputStream("outfilename.txt",false), "UTF-8"));
for(File file : listOfFiles){
inputfilename= new FileInputStream(file);
/*System.out.println(file); */
bufferedReader= new BufferedReader(new InputStreamReader(inputfilename, "UTF-8"));
String s;
while((s = bufferedReader.readLine()) != null){
/*System.out.println(line);*/
s = s.replaceAll("\\<.*?>"," ");
if(s.contains("॥") || s.contains(":")|| s.contains("।")||
s.contains(",")|| s.contains("!")|| s.contains("?")){
s=s.replace("॥"," ");
s=s.replace(":"," ");
s=s.replace("।"," ");
s=s.replace(","," ");
s=s.replace("!"," ");
s=s.replace("?"," ");
}
StringTokenizer st = new StringTokenizer(s," ");
while (st.hasMoreTokens()) {
/*out.write(st.nextToken()+"\n");*/
String str=(st.nextToken()).toString();
words.add(str);
}
for(String str : words){
if(words_fre.containsKey(str)){
int a = words_fre.get(str);
words_fre.put(str,a+1);
}else{
words_fre.put(str,1);/*uwords++;//unique words count */
}
}
words.clear();
/*out.write("\n");
out.close();*/
}
Object[] key = words_fre.keySet().toArray();
Arrays.sort(key);
for (int i = 0; i < key.length; i++) {
//System.out.println(key[i]+"= "+words_fre.get(key[i]));
out.write(key[i]+" : "+words_fre.get(key[i]) +"\n");
}
}
out.close();
bufferedReader.close();
}catch(FileNotFoundException ex){
System.out.println("Error in reading line");
}catch(IOException ex){
/*System.out.println("Error in reading line"+fileReader );*/
ex.printStackTrace();
}
}
}
Late answer, however below code will count word frequency efficiently if there are multiple files
import java.io.IOException;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.Scanner;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.atomic.AtomicLong;
public class WordCounter implements Runnable {
private final Scanner scanner;
private Map<String, AtomicLong> sharedCounter;
public WordCounter(Scanner scanner, Map<String, AtomicLong> sharedCounter) {
this.scanner = scanner;
this.sharedCounter = sharedCounter;
}
public void run() {
if (scanner == null) {
return;
}
while (scanner.hasNext()) {
String word = scanner.next().toLowerCase();
sharedCounter.putIfAbsent(word, new AtomicLong(0));
sharedCounter.get(word).incrementAndGet();
}
}
public static void main(String[] args) throws IOException {
// Number of parallel thread to run
int THREAD_COUNT = 10;
List<Path> paths = new ArrayList<>();
// Add path
paths.add(Paths.get("test1.txt"));
paths.add(Paths.get("test2.txt"));
// Shared word counter
Map<String, AtomicLong> sharedCounter = new ConcurrentHashMap<>();
ExecutorService executor = Executors.newFixedThreadPool(THREAD_COUNT);
for (Path path : paths) {
executor.execute(new WordCounter(new Scanner(path), sharedCounter));
}
executor.shutdown();
// Wait until all threads are finish
while (!executor.isTerminated()) {
}
System.out.println(sharedCounter);
}
}

Code not printing anything

I am writing code that reads in a text file through the command line arguments in the main method and prints out each word in it on its own line without printing any word more than once, it will not print anything, can anyone help?
import java.util.*;
import java.io.*;
public class Tokenization {
public static void main(String[] args) throws Exception{
String x = "";
String y = "";
File file = new File(args[0]);
Scanner s = new Scanner(file);
String [] words = null;
while (s.hasNext()){
x = s.nextLine();
}
words = x.split("\\p{Punct}");
String [] moreWords = null;
for (int i = 0; i < words.length;i++){
y = y + " " + words[i];
}
moreWords = y.split("\\s+");
String [] unique = unique(moreWords);
for (int i = 0;i<unique.length;i++){
System.out.println(unique[i]);
}
s.close();
}
public static String[] unique (String [] s) {
String [] uniques = new String[s.length];
for (int i = 0; i < s.length;i++){
for(int j = i + 1; j < s.length;j++){
if (!s[i].equalsIgnoreCase(s[j])){
uniques[i] = s[i];
}
}
}
return uniques;
}
}
You have several problems:
you're reading whole file line by line, but assign only last line to variable x
you're doing 2 splits, both on regexp, it is enough 1
in unique - you're filling only some parts of array, other parts are null
Here is shorter version of what you need:
import java.io.File;
import java.util.HashSet;
import java.util.Scanner;
import java.util.Set;
public class Tokenization {
public static void main(String[] args) throws Exception {
Set<String> words = new HashSet<String>();
try {
File file = new File(args[0]);
Scanner scanner = new Scanner(file);
while (scanner.hasNext()) {
String[] lineWords = scanner.nextLine().split("[\\p{Punct}\\s]+");
for (String s : lineWords)
words.add(s.toLowerCase());
}
scanner.close();
} catch (Exception e) {
System.out.println("Cannot read file [" + e.getMessage() + "]");
System.exit(1);
}
for (String s : words)
System.out.println(s);
}
}

Categories