I've implement Tarjan's algorithm following the Wikipedia's article however I'm experiencing a problem. What I'm trying to do is to find all strongly connected components with size greater than 1.
With smaller sized inputs everything works fine however, when tested with input.txt the algorithm starts to behave strangely - on changing the initial value of n (for example 1, -10, 3) the program gives different results. And that's what I cannot understand. The algorithm should not depend on the initial value since n is used only as time stamp. If have any idea please share it it would help me so much, thank you.
Here's the code:
import java.io.BufferedReader;
import java.io.FileReader;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Stack;
public class GraphAlgorithms {
HashMap<String, HashSet<String>> graph;
int n;
public GraphAlgorithms(String fileName) {
graph = new HashMap<String, HashSet<String>>();
try (BufferedReader reader = new BufferedReader(new FileReader(fileName))) {
String line;
while ((line = reader.readLine()) != null) {
String[] tokens = line.split(" +");
HashSet<String> to = new HashSet<String>();
for (int i = 1; i < tokens.length; i++)
to.add(tokens[i]);
graph.put(tokens[0], to);
}
} catch (IOException e) {
e.printStackTrace();
}
}
public GraphAlgorithms(HashMap<String, HashSet<String>> graph) {
this.graph = graph;
}
private void strongConnect(String v, HashMap<String, Integer> index,
HashMap<String, Integer> lowlink, Stack<String> stack,
HashSet<String> inStack, ArrayList<HashSet<String>> ans)
{
index.put(v, n);
lowlink.put(v, n);
++n;
stack.push(v);
inStack.add(v);
HashSet<String> successors = graph.get(v);
for(String w : successors) {
if (!index.containsKey(w)) {
strongConnect(w, index, lowlink, stack, inStack, ans);
if (lowlink.get(w) < lowlink.get(v))
lowlink.put(v, lowlink.get(w));
}
else if (inStack.contains(w)) {
if (index.get(w) < lowlink.get(v))
lowlink.put(v, index.get(w));
}
}
if (index.get(v) == lowlink.get(v)) {
HashSet<String> connectedComponent = new HashSet<String>();
String w;
do {
w = stack.pop();
inStack.remove(w);
connectedComponent.add(w);
}
while (!w.equals(v));
ans.add(connectedComponent);
}
}
public ArrayList<HashSet<String>> getStronglyConnectedComponents() {
HashMap<String, Integer> index = new HashMap<String, Integer>();
HashMap<String, Integer> lowlink = new HashMap<String, Integer>();
Stack <String> stack = new Stack<String>();
HashSet<String> inStack = new HashSet<String>();
ArrayList<HashSet<String>> ans = new ArrayList<HashSet<String>>();
n = 0;
for (String v : graph.keySet()) {
if (!index.containsKey(v))
strongConnect(v, index, lowlink, stack, inStack, ans);
}
return ans;
}
public void printComponentsLargerThanOne() {
for (HashSet<String> component : getStronglyConnectedComponents()) {
if (component.size() > 1) {
for (String element : component) {
System.out.print(element + " ");
}
System.out.println();
}
}
}
}
And the main class
public class Main {
public static void main(String[] args) {
GraphAlgorithms ga = new GraphAlgorithms("input.txt");
ga.printComponentsLargerThanOne();
}
}
Since Java apparently doesn't always intern objects of type java.lang.Integer, the line
if (index.get(v) == lowlink.get(v)) {
should be
if (index.get(v).equals(lowlink.get(v))) {
Related
I am trying to create a hashtable to get an ArrayList from my text file read it and then count it into an another text file. I should tokenize each word and get the keys and values by counting them. So far I am still at the beginning and I don't get what is wrong with my code, it seems there is no error but it doesn't connect to the text and get the ArrayList or simply my code is wrong. I would appreciate any help. Thanks.
This is the Map file
public class Map {
public static String fileName= "C:Users\\ruken\\OneDrive\\Desktop\\workshop.txt";
private ArrayList<String> arr = new ArrayList<String>();
public ArrayList <String>getList () {
return this.arr;
}
private Hashtable<String, Integer> map = new Hashtable<String, Integer>();
public void load(String path) {
try{
FileReader f2 = new FileReader("C:Users\\ruken\\OneDrive\\Desktop\\workshop.txt");
Scanner s = new Scanner(f2);
while (s.hasNextLine()) {
String line = s.nextLine();
String[] words = line.split("\\s");
for (int i=0;i<words.length; i++){
String word = words[i];
if (! word.isEmpty()){
System.out.println(word);
arr.add(word);
}
}
}
f2.close();
System.out.println("An error occurred");
}
catch(IOException ex1)
{
Collections.sort(arr);
System.out.println("An error occurred.");
for (String counter: arr) {
System.out.println(counter);
}
ex1.printStackTrace();
}
}
public static void main(String[] args) {
Map m =new Map();
m.load("C:Users\\ruken\\OneDrive\\Desktop\\out.txt");
}
public Object get(String word) {
return null;
}
public void put(String word, int i) {
}
}
This is the Reduce file
package com.company;
import java.io.*;
import java.util.*;
public class Reduce {
private Hashtable<String, Integer> map=new Hashtable< String, Integer>();
public Hashtable < String, Integer> getHashTable () {
return map;
}
public void setHashTable ( Hashtable < String, Integer> map){
this.map =map;
}
public void findMin () {
}
public void findMax() {
}
public void sort (ArrayList<String> arr) throws IOException {
Collections.sort(arr);
Iterator it1 = arr.iterator();
while (it1.hasNext()) {
String word = it1.next().toString();
System.out.println(word);
}
}
//constructors
public void reduce (ArrayList<String> words) {
Iterator<String> it1 =words.iterator();
while (it1.hasNext()) {
String word=it1.next();
System.out.println (word);
if (map.containsKey(word)) {
map.put(word, 1);
}
else {
int count = map.get(word);
map.put(word, count+1);
}
System.out.println( map.containsValue(word));
}
}
}
Here is a part of workshop.txt. It is s basic simple text
"
Acknowledgements
I would like to thank Carl Fleischhauer and Prosser Gifford for the
opportunity to learn about areas of human activity unknown to me a scant
ten months ago, and the David and Lucile Packard Foundation for
supporting that opportunity. The help given by others is acknowledged on
a separate page.
19 October 1992
*** *** *** ****** *** *** ***
INTRODUCTION
The Workshop on Electronic Texts (1) drew together representatives of
various projects and interest groups to compare ideas, beliefs,
experiences, and, in particular, methods of placing and presenting
historical textual materials in computerized form. Most attendees gained
much in insight and outlook from the event. But the assembly did not
form a new nation, or, to put it another way, the diversity of projects
and interests was too great to draw the representatives into a cohesive,
action-oriented body.(2)"
Counting word frequency in text can be accomplished using the java stream API
Here is my implementation, followed by explanatory notes.
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.Arrays;
import java.util.Hashtable;
import java.util.Map;
import java.util.function.BiConsumer;
import java.util.function.BinaryOperator;
import java.util.function.Function;
import java.util.function.Supplier;
import java.util.stream.Collectors;
public class WordFreq {
public static void main(String[] args) {
Path path = Paths.get("workshop.txt");
Function<String, String> keyMapper = Function.identity();
Function<String, Integer> valueMapper = (word) -> Integer.valueOf(1);
BinaryOperator<Integer> mergeFunction = (a, b) -> Integer.valueOf(a.intValue() + b.intValue());
Supplier<Hashtable<String, Integer>> mapSupplier = () -> new Hashtable<>();
try {
Map<String, Integer> map = Files.lines(path)
.flatMap(line -> Arrays.stream(line.split("\\b")))
.filter(word -> word.matches("^\\w+$"))
.map(word -> word.toLowerCase())
.collect(Collectors.toMap(keyMapper, valueMapper, mergeFunction, mapSupplier));
BiConsumer<String, Integer> action = (k, v) -> System.out.printf("%3d %s%n", v, k);
map.forEach(action);
}
catch (IOException xIo) {
xIo.printStackTrace();
}
}
}
Method lines() in class java.nio.file.Files creates a stream of the lines of text in the file. In this case the file is your workshop.txt file.
For each line of the file that is read, I split it into words using method split() in class java.lang.String and convert the array returned by method split() into another stream.
Actually each line of text is split at every word boundary so the array of words that method split() returns may contain strings that aren't really words. Therefore I filter the "words" in order to extract only real words.
Then I convert each word to lower case so that my final map will be case-insensitive. In other words, the word The and the word the will be considered the same word.
Finally I create a Map where the map key is a distinct word in the text of file workshop.txt and the map value is an Integer which is the number of occurrences of that word in the text.
Since you stipulated that the Map must be a Hashtable, I explicitly created a Hashtable to store the results of the collect operation on the stream.
The last part of the above code displays the contents of the Hashtable.
I sorted out the first part, "Map" as below, now I have an alphabetically sorted array.
as follows..now I should count the tokenized key values.
"..
yet
yet
yet
yet
yet
yielded
you
young
zeal
zero.
zooming
..."
package com.company;
import java.io.FileReader;
import java.io.IOException;
import java.util.*;
import java.util.Collections;
public class Map {
public static String fileName= "C:\\Users\\ruken\\OneDrive\\Desktop\\workshop.txt";
private ArrayList<String> arr = new ArrayList<String>();
public ArrayList <String>getList () {
return this.arr;
}
private Hashtable<String, Integer> map = new Hashtable<String, Integer>();
public void load() {
try{
FileReader f2 = new FileReader("C:\\Users\\ruken\\OneDrive\\Desktop\\workshop.txt");
Scanner s = new Scanner(f2);
while (s.hasNextLine()) {
String line = s.nextLine();
String[] words = line.split("\\s");
for (int i=0;i<words.length; i++){
String word = words[i];
if (! word.isEmpty()){
System.out.println(word);
arr.add(word);
}
}
}
f2.close();
System.out.println();
}
catch(IOException ex1){
System.out.println("An error occurred.");
ex1.printStackTrace(); }
{
Collections.sort(arr);
System.out.println("Sorted.");
for (String counter: arr) {
System.out.println(counter);
}
}
}
public static void main(String[] args) {
Map m =new Map();
m.load();
}
}
The second part which is doing the reducing is:
package com.company;
import java.io.*;
import java.util.*;
import java.io.FileWriter;
import java.io.IOException;
public class Reduce {
private Hashtable<String, Integer> map = new Hashtable<String, Integer>();
public Hashtable<String, Integer> getHashTable() {
return map;
}
public void setHashTable(Hashtable<String, Integer> map) {
this.map = map;
}
//constructors
public void reduce (ArrayList<String> arr) {
Iterator<String> it1 = arr.iterator();
while (it1.hasNext()) {
String word = it1.next();
System.out.println(word);
if (map.containsKey(word)) {
int a = (int) map.get(word);
a++;
map.put(word, a);
} else {
map.put(word, 1);
}
}
}
public void write () {
try {
FileWriter f1 = new FileWriter("C:\\Users\\ruken\\OneDrive\\Desktop\\output.txt");
Iterator<String> it1 = map.keySet().iterator();
while (it1.hasNext()) {
String word = it1.next().toString();
f1.write(word + "" + ":" + "" + map.get(word) + "\n" );
}
f1.close();
} catch (IOException e) {
e.printStackTrace();
}
}
public static void main(String[] args) {
Map m =new Map();
m.load();
Reduce r = new Reduce ();
ArrayList<String> arr= m.getList();
r.reduce(arr);
r.write();
}
}
I have a working word occurrence program that took me a while to code (still new at Java) and I was wondering if I could get a little assistance. Here is my code that I have so far:
import java.io.*;
import java.util.ArrayList;
import java.util.List;
public class TestWordOccurenceProgram {
public static void main(String[] args) {
String thisLine = null;
try {
FileReader fr = new FileReader("myTextDocument.txt");
BufferedReader br = new BufferedReader(fr);
//List<String> wordList = new ArrayList<>();
List<String> words = new ArrayList<>();
// make ArrayList of integers
List<Integer> counts = new ArrayList<>();
String word = "";
while ((thisLine = br.readLine()) != null ) {
word = word.concat(thisLine);
word = word.concat(" ");
}
String[] wordList = word.split("\\s");
for (int i = 0; i < wordList.length; i++) {
String temp = wordList[i];
if(words.contains(temp)) {
int x = words.indexOf(temp);
int value = counts.get(x);
value++;
counts.set(x, value);
}
else {
words.add(temp);
counts.add(1);
}
}
for (int i = 0; i < words.size(); i++) {
System.out.println(words.get(i) + ": " + counts.get(i));
}
br.close();
} catch (FileNotFoundException e) {
e.printStackTrace();
System.exit(1);
} catch (IOException e) {
e.printStackTrace();
System.exit(1);
}
}
}
Here is what "myTextDocument.txt" has:
i am a rabbit
a happy rabbit am
yay i am a rabbit
a rabbit i am yay
Here is my output:
i: 3
am: 4
a: 4
rabbit: 4
happy: 1
yay: 2
Does anyone know if I could arrange these items from the highest number of word occurrences to the lowest number of word occurrences? Any help would be great!
You can use Map instead of List. and use compare method to sort map via its value.
refer this code :
import java.io.BufferedReader;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.util.Comparator;
import java.util.HashMap;
import java.util.Map;
import java.util.TreeMap;
public class PQ {
public static void main(String[] args) {
String thisLine = null;
try {
FileReader fr = new FileReader("D:\\test.txt");
BufferedReader br = new BufferedReader(fr);
HashMap<String,Integer> map = new HashMap<String,Integer>();
ValueComparator comparator = new ValueComparator(map);
TreeMap<String, Integer> treemap = new TreeMap<String, Integer>(comparator);
while((thisLine = br.readLine()) != null){
String[] str = thisLine.split("\\s+");
for(String s:str){
if(map.containsKey(s)){
Integer i = map.get(s);
i++;
map.put(s,i);
}else{
map.put(s, 1);
}
}
}
treemap.putAll(map);
System.out.println(treemap);
br.close();
} catch (FileNotFoundException e) {
e.printStackTrace();
System.exit(1);
} catch (IOException e) {
e.printStackTrace();
System.exit(1);
}
}
}
class ValueComparator implements Comparator<String>{
Map<String, Integer> base;
public ValueComparator(Map<String, Integer> base) {
this.base = base;
}
public int compare(String a, String b) {
if (base.get(a) >= base.get(b)) {
return -1;
} else {
return 1;
}
}
}
Rather than using two separate lists (one with words, one with counts), why not create a WordAndCount object that has something like getWord and getCount methods? This WordAndCount class can implement Comparable, where you do comparisons based on count. Then, you can store a single List<WordAndCount>, and just sort the single list using Collections.sort.
Roughly, the outline could look like this:
public class WordAndCount implements Comparable<WordAndCount> {
private String word;
private int count;
public WordAndCount(String word) {...}
public void incrementCount() {...}
public int compareTo(WordAndCount other) {...}
}
Wrapping up the combination into a single class makes this much easier to solve, as it provides the easy link between word and its count.
I would recommend using Collections in Java for this, but instead you can use temp variables.
So the idea is to sort by counts. Pseudo-code before outputting:
int tempCount;
String tempWord;
for (int i = 1; i < counts.size(); i++) {
if (counts.get(i) < counts.get(i-1)) {
tempCount = counts.get(i-1);
tempWord = words.get(i-1);
counts.set(i-1, i);
counts.set(i, tempCount);
words.set(i-1, i);
words.set(i, tempWord);
}
You'd need an extra loop around that to correctly order them but hopefully gives you the right idea.
My program is working fine on all parts except one. I am attempting to post as little code as possible. Please let me know if more is needed.
How do I find the name that occurs the most in a String, or StringBuilder? The "getWinner" method is where I am having trouble. I want to find the name (or winner) that occurs the most in a string. If their is a tie, the name that appears first is sufficient. Thanks in advance!
import java.util.ArrayList;
public class BallotBox
{
private ArrayList<String> ballots;
public BallotBox()
{
ballots = new ArrayList<String>();
}
public void addVote(String candidate)
{
ballots.add(candidate);
}
//****below is the method that's presenting a problem.****
public String getWinner()
{
StringBuilder candidates = new StringBuilder();
String winner = "";
for(int i = 0; i < ballots.size(); i++)
{
}
return winner;
}
public int getVoteCount(String candidate)
{
int count = 0;
for(int i = 0; i < ballots.size(); i++)
{
if(ballots.get(i).equals(candidate))
{
count++;
}
}
return count;
}
public String getResults()
{
StringBuilder resultTable = new StringBuilder();
ArrayList<String> printed = new ArrayList<String>();
for (String candidate : ballots)
{
if (!printed.contains(candidate))
{
resultTable.append(String.format("%s (%d)\n", candidate, getVoteCount(candidate)));
printed.add(candidate);
}
}
return resultTable.toString();
}
}
You can try to convert the list to a Set and use the Collections.frequency method.
Set<String> uniqueSet = new HashSet<String>(list);
for (String temp : uniqueSet)
{
System.out.println(temp + ": " + Collections.frequency(list, temp));
}
You'll get the output as shown below.
d: 1
b: 2
c: 2
a: 4
Check the link for more details
http://www.mkyong.com/java/how-to-count-duplicated-items-in-java-list/
You can use a HashMap to keep the votes for every candidate, and update the winner as soon as you find a new winner (more votes than the current winner):
public String getWinner()
{
final Map<String, Integer> votesCount = new HashMap<String, Integer>();
String winner = ballots.get(0);
int winnerVotes = 1;
for(final String ballot : ballots)
{
if (!votesCount.containsKey(ballot))
votesCount.put(ballot, 0);
votesCount.put(ballot, votesCount.get(ballot)+1);
if (votesCount.get(ballot)>winnerVotes)
{
winner = ballot;
winnerVotes = votesCount.get(ballot);
}
}
return winner;
}
Here is a working example. Hope this explains how the above code can be used in your application.
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashSet;
import java.util.Set;
public class BallotBox
{
private ArrayList<String> ballots;
public BallotBox()
{
ballots = new ArrayList<String>();
ballots.add("John");
ballots.add("Eric");
ballots.add("Mary");
ballots.add("Eric");
ballots.add("Mary");
ballots.add("Mary");
ballots.add("John");
ballots.add("Mary");
}
public void addVote(String candidate)
{
ballots.add(candidate);
}
// ****below is the method that's presenting a problem.****
public String getWinner()
{
String winner = "";
// To check who has the highest votes.
int highestVotes = 0;
Set<String> uniqueSet = new HashSet<String>(ballots);
for (String temp : uniqueSet)
{
// The count of each Candidate's votes.
int count = Collections.frequency(ballots, temp);
// The winner is the one with the highest votes.
if(count > highestVotes)
{
highestVotes = count;
winner = temp;
}
}
return winner;
}
public static void main(String[] args)
{
BallotBox ballotBox = new BallotBox();
System.out.println(ballotBox.getWinner());
}
}
My code is:
public class Main{
public static void main(String[] args){
WordGroup wordgroupOne= new WordGroup ("You can discover more about a person in an hour of play than in a year of conversation");
WordGroup wordgroupTwo= new WordGroup ( "When you play play hard when you work dont play at all");
String[] quoteOne = wordgroupOne.getWordArray();
String[] quoteTwo = wordgroupTwo.getWordArray();
for (String words : quoteOne){
System.out.println(words);
}
for (String words : quoteTwo){
System.out.println(words);
}
}
}
WordGroup class:
import java.util.HashSet;
import java.util.HashMap;
public class WordGroup {
public String words;
public WordGroup (String getWords){
words = getWords.toLowerCase();
}
public String[] getWordArray(){
return words.split(" ");
}
public HashSet<String> getWordSet(){
HashSet<String> set = new HashSet<String>();
String[] p = getWordArray();
for (String items : p){
set.add(items);
}
System.out.println(set);
return set;
}
public HashMap<String, Integer> getWordCounts() {
HashMap<String, Integer> map = new HashMap<String, Integer>();
String[] q = getWordArray();
for (String stuff : q) {
Integer oldVal = map.get(stuff);
if (oldVal == null){
oldVal = 0;
}
map.put(stuff, oldVal+1);
}
System.out.println(map);
return map;
}
}
What I am trying to do is use the getWordSet() method using the two WordGroups and
iterate or loop over the HashSet returned and print the words from it.
Call getWordCounts() on the two WordGroups. Use keySet() to retrieve the set of keys. Loop over this set and print out the word and its count for both WordGroups.
Use the getWordSet() method to make complete set of all the words from both WordGroups.
Loop over the new HashSet to print a complete list of all words with the sum counts from each of the hashmaps.
I am struggling with all of these. Any help is much appreciated!!
If you want to create a combined list or set, you will have to merge the lists together and the maps together. I leave that exercise to you.
public static void main(String[] args)
{
WordGroup wg1 = new WordGroup(
"You can discover more about a person in an hour of play than in a year of conversation");
WordGroup wg2 = new WordGroup(
"When you play play hard when you work dont play at all");
wg1.processWord();
// iterate through all the distinct words
Set<String> dw1 = wg1.getDistinctWords();
for (String s : dw1)
{
System.out.println(s);
}
// use map entry to iterate through the entry set
Map<String, Integer> wc1 = wg1.getWordCounts();
for (Map.Entry<String, Integer> entry : wc1.entrySet())
{
if (entry != null)
{
// use stringbuilder to build a temp string
// instead of using +
StringBuilder sb = new StringBuilder();
sb.append(entry.getKey());
sb.append(": ");
sb.append(entry.getValue());
System.out.println(sb);
}
}
}
public class WordGroup
{
// as a class, made the results of the process private
private String originalWord;
// we declare generic versions of the Collections, instead of the specific
// implementation
private Set<String> distinctWords;
private Map<String, Integer> wordCounts;
public WordGroup(String s)
{
this.originalWord = s;
// here we declare and initialize the specific implementation
this.distinctWords = new HashSet<String>();
this.wordCounts = new HashMap<String, Integer>();
}
public void processWord()
{
List<String> toProcess = getWordList();
if (toProcess != null && !toProcess.isEmpty())
{
for (String s : toProcess)
{
// the set will automatically figure out if it should be in the
// set or not.
this.distinctWords.add(s);
// call the update or insert method
upsertString(s);
}
}
}
// this splits the string into a list
// you could probably use a utility class from guava or something to do this
// but i have coded a naive version
private List<String> getWordList()
{
List<String> splitList = new ArrayList<String>();
// check to see if there is anything there
if (this.originalWord != null && !this.originalWord.isEmpty())
{
String lowered = this.originalWord.toLowerCase();
String[] splits = lowered.split(" ");
if (splits != null)
{
int iSize = splits.length;
if (iSize > 0)
{
// basically create a string
for (int i = 0; i < iSize; i++)
{
splitList.add(splits[i]);
}
}
}
}
return splitList;
}
// helper method to see if we need to add to the count
private void upsertString(String s)
{
if (s != null && !s.isEmpty())
{
if (this.wordCounts != null)
{
// default to 1, if its an insert
Integer newCount = 1;
// if it already exists we want to update
if (this.wordCounts.containsKey(s))
{
Integer currentCount = this.wordCounts.get(s);
if (currentCount != null)
{
// update the count by 1
newCount += currentCount;
}
}
// insert the new item
// or overwrite, because it is the same key to the new count
this.wordCounts.put(s, newCount);
}
}
}
public String getOriginalWord()
{
return this.originalWord;
}
public void setOriginalWord(String originalWord)
{
this.originalWord = originalWord;
}
public Set<String> getDistinctWords()
{
return this.distinctWords;
}
public void setDistinctWords(Set<String> distinctWords)
{
this.distinctWords = distinctWords;
}
public Map<String, Integer> getWordCounts()
{
return this.wordCounts;
}
public void setWordCounts(Map<String, Integer> wordCounts)
{
this.wordCounts = wordCounts;
}
}
I am trying to work out how to scan a text file of a conversation find how many positive words and negative words there are. The positive and negative words are contained within two separate text files which are used to 'scan' the conversation text file.
After it finds the number of positive and negative words I am trying to get it to tally each up and then tell me if there are more positive or negative words found.
I have the code below so far, it only gives me a count on the positive words. I am not looking at something like NLP at this stage just something on a much more basic level.
I think I have the second part looking for the negative words in the wrong location. And I think I need to use a boolean to tell me if there are more positive or negative words found, but I can't work out how to do it.
I am pretty stuck as I am new to Java, and programing in general.
Any help would be greatly appreciated.
package omgilisearch;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.util.HashSet;
import java.util.Map;
import java.util.Scanner;
import java.util.Set;
import java.util.TreeMap;
public class SentimentTest {
public static void main(String[] args) throws Exception {
printAllCounts(
readWordFile("ConversationTest.txt", loadKeywords("PositiveWords.txt")));
}
public static void main1(String[] args) throws Exception {
printAllCounts(
readWordFile("ConversationTest.txt", loadKeywords("NegativeWords.txt")));
}
private static Map<String, Integer> readWordFile(
String fname, Set<String> keywords) throws FileNotFoundException
{
final Map<String, Integer> frequencyData = new TreeMap<String, Integer>();
for (Scanner wordFile = new Scanner(new FileReader(fname));
wordFile.hasNext();)
{
final String word = wordFile.next();
if (keywords.contains(word))
frequencyData.put(word, getCount(word, frequencyData) + 1);
}
return frequencyData;
}
private static void printAllCounts(Map<String, Integer> frequencyData) {
System.out.println("-----------------------------------------------");
System.out.println(" Occurrences Word");
for(Map.Entry<String, Integer> e : frequencyData.entrySet())
System.out.printf("%15d %s\n", e.getValue(), e.getKey());
System.out.println("-----------------------------------------------");
}
private static int getCount(String word, Map<String, Integer> frequencyData) {
return frequencyData.containsKey(word)? frequencyData.get(word) : 0;
}
private static Set<String> loadKeywords(String fname)
throws FileNotFoundException
{
final Set<String> result = new HashSet<String>();
for (Scanner s = new Scanner(new FileReader(fname)); s.hasNext();)
result.add(s.next());
return result;
}
}
You would have to have some array of so called "bad" words (wich are hard coded) and then iterate through the whole text file and compare every word in the array with the word you currently inspecting. If the word matches with one of the words in the array, then increase some variable that is holding the amount of badwords eg. badWords++;. I believe this approach should work.
package omgilisearch;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.util.HashSet;
import java.util.Map;
import java.util.Scanner;
import java.util.Set;
import java.util.TreeMap;
public class SentimentTest {
public static void main(String[] args) throws Exception {
printAllCounts(
readWordFile("ConversationTest.txt"));
}
private static Map<String, Integer> readWordFile(String string) {
return null;
}
String[] goodWordsHolder = new String[3];{
goodWordsHolder[0] = "good"; goodWordsHolder[1] = "great";goodWordsHolder[2] = "excellent";
for(int iteration = 0; iteration < goodWordsHolder.length; iteration++) { String currentWordInText;
if(goodWordsHolder[iteration] == currentWordInText) { }// The word is a bad word } }
private static void printAllCounts(Map<String, Integer> frequencyData) {
System.out.println("-----------------------------------------------");
System.out.println(" Occurrences Word");
for(Map.Entry<String, Integer> e : frequencyData.entrySet())
System.out.printf("%15d %s\n", e.getValue(), e.getKey());
System.out.println("-----------------------------------------------");
}
}
package omgilisearch;
import java.io.*;
public class SentimentTest {
public static void main(String[] args) {
String[] lines = new String[0];
String path = "ConversationTest.txt";
BufferedReader br = null;
try {
File file = new File(path);
br = new BufferedReader(
new InputStreamReader(
new FileInputStream(file)));
String line;
while( (line = br.readLine()) != null ) {
lines = add(line, lines);
}
br.close();
} catch(IOException e) {
System.out.println("read error: " + e.getMessage());
}
print(lines);
}
private static String[] add(String s, String[] array) {
String[] goodWordsHolder = new String[3];{
}goodWordsHolder[0] = "good"; goodWordsHolder[1] = "great";goodWordsHolder[2] = "excellent";
for(int iteration = 0; iteration < goodWordsHolder.length; iteration++) { String currentWordInText = null; if(goodWordsHolder[iteration] == currentWordInText) { }}
return goodWordsHolder; }
private static void print(String[] data) {
for(int i = 0; i < data.length; i++)
System.out.println(data[i]);
}
}
Arrays store multiple items of the same information type eg. String[] badWords;. I believe you should use this, since I'm sure you will have more than 1 bad word that you would like to find in the conversation text, if not, then simple use 1 String eg. String badWord;.
I'm not going to write out all the code that will make it work, I'll just give you an algorithm.
public class test {
// The process of picking out all the good and bad words
public static void main(String[] args) {
// Setting up all the needed variables
// Set up all the good words
String[] goodWordsHolder = new String[2];
goodWordsHolder[0] = "firstGoodWord";
goodWordsHolder[1] = "secondGoodWord";
// Set up all the bad words
String[] badWordsHolder = new String[2];
badWordsHolder[0] = "firstBadWord";
badWordsHolder[1] = "secondBadWord";
// Set up the counters
int amountOfGoodWords = 0;
int amountOfBadWords = 0;
int currentWordInText = 0;
// boolean that will exit the loop
boolean ConversationEnded = false;
while(!ConversationEnded) {
// Compare the currentWord from the conversation with the hard coded words
for(int iteration = 0; iteration < goodWordsHolder.length; iteration++) {
if(goodWordsHolder[iteration] == getWordInText(currentWordInText)) {
amountOfGoodWords++;
}
}
for(int iteration = 0; iteration < badWordsHolder.length; iteration++) {
if(badWordsHolder[iteration] == getWordInText(currentWordInText)) {
amountOfBadWords++;
}
}
// Increase the current word value so the next time we compare the next word in the conversation will be compared
currentWordInText++;
// Check that we haven't reached the end of the conversation
if(endOfTheConversationHasBeenReached()) {
// This will exit the while loop
ConversationEnded = true;
}
}
// Now print all the information to the console
System.out.println("Amount of good Words: " + amountOfGoodWords);
System.out.println("Amount of bad Words: " + amountOfBadWords);
if(amountOfGoodWords > amountOfBadWords) {
System.out.println("There are more good words than bad words.");
}
else {
System.out.println("There are more bad words than good words.");
}
}
// The method(s) you'll have to code out yourself. I suggest you read up on the web and so on to assist you with this.
private static String getWordInText(int currentWordInText) {
// TODO Auto-generated method stub
return null;
}
private static boolean endOfTheConversationHasBeenReached() {
// TODO Auto-generated method stub
return false;
}
}
Excuse me if there are any logical errors. The code hasn't been debugged yet. ;) Hopefully this will guide you into the right direction.