Removing values from edges - java

I have data in following format
String [] data = new String[]{"-166444026 0 file 20130801",
"-166444026 0 file 20130802",
"-166444027 0 file 20130802"};
These are tab seperated strings.
Now..
So we have
key, value,type,date.
What I want is to keep latest unique keys in the list.
So for example...
The output of this operation should be...
["-166444026 0 file 20130802",
"-166444027 0 file 20130802"];
Because -166444026 is present twice but the one i removed has date of 20130801 and this one has a later date?
I wrote the code.. but this just returns everything.(I thought it was suppose to remove.. but it isnt..)??
Any clues?
package org.random_scripts;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.json.simple.JSONObject;
public class Regex {
private static boolean alreadyExists(Map<String, Long> dict, String key) {
if(dict.containsKey(key))
return true;
return false;
}
private static void removeEdge(ArrayList<JSONObject>edgeList, String key) {
for(JSONObject edge:edgeList) {
if (edge.get("destination").equals(key))
edge.remove(key);
}
}
public static void main(String[] args) {
String [] data = new String[]{"-166444026 0 file 20130801",
"-166444026 0 file 20130802",
"-166444026 0 file 20130802"};
try {
String key = "185479485";
JSONObject jsn = new JSONObject();
jsn.put("source", key.toString());
ArrayList<JSONObject> edges = new ArrayList<JSONObject>();
Map<String, Long> tracker = new HashMap<String, Long>();
for (int i=0; i < data.length; i++) {
String value = data[i];
//edgeString+= value.toString()+",";
String[] typeChunks = value.toString().split("\t");
String destination = typeChunks[0];
Double reputation = Double.parseDouble(String.valueOf(typeChunks[1]));
System.out.println(tracker.values().toString());
JSONObject edgeJson = new JSONObject();
Long date = Long.valueOf(typeChunks[3]);
if(alreadyExists(tracker,destination)) {
Long prev_date = tracker.get(destination);
System.out.println(true);
if (date > prev_date) {
//remove edge
removeEdge(edges,destination);
System.out.println("edges are" + edges.toString());
}
}
else {
tracker.put(destination, date);
}
edgeJson.put("destination", destination);
edgeJson.put("reputation", reputation);
edgeJson.put("type", typeChunks[2]);
edges.add(edgeJson);
}
jsn.put("edgelist", edges);
System.out.println(jsn.toJSONString());
//context.write(NullWritable.get(), new Text(jsn.toJSONString()));
//edgeString = edgeString.substring(0,edgeString.length()-1);
//edgeString+="]";
}
catch (Exception e) {
System.out.println("Exceptiom");
//context.write(NullWritable.get(),new Text(s) );
}
}
}

Use a HashMap, then assign each line to a key in the HashMap.
The HashMap will retain only the latest assigned value, so you can decide if you want to reassign it or not depending on its date.
If you need to preserve the original order of the lines, you can use a LinkedHashMap which preserves insertion order.
Hope it helps.

Search a last word with a regexp and then just iterate the array?
String key = data[data.length - 1].replaceAll(".*\\s", "");
for (int i = 0; i < data.length; i++) {
if (data[i].endsWith(key)) {
System.out.println(data[i]);
}
}
Out:
-166444026 0 file 20130802
-166444027 0 file 20130802

Related

Trying to create a hashtable to get an arraylist from the text file - Java

I am trying to create a hashtable to get an ArrayList from my text file read it and then count it into an another text file. I should tokenize each word and get the keys and values by counting them. So far I am still at the beginning and I don't get what is wrong with my code, it seems there is no error but it doesn't connect to the text and get the ArrayList or simply my code is wrong. I would appreciate any help. Thanks.
This is the Map file
public class Map {
public static String fileName= "C:Users\\ruken\\OneDrive\\Desktop\\workshop.txt";
private ArrayList<String> arr = new ArrayList<String>();
public ArrayList <String>getList () {
return this.arr;
}
private Hashtable<String, Integer> map = new Hashtable<String, Integer>();
public void load(String path) {
try{
FileReader f2 = new FileReader("C:Users\\ruken\\OneDrive\\Desktop\\workshop.txt");
Scanner s = new Scanner(f2);
while (s.hasNextLine()) {
String line = s.nextLine();
String[] words = line.split("\\s");
for (int i=0;i<words.length; i++){
String word = words[i];
if (! word.isEmpty()){
System.out.println(word);
arr.add(word);
}
}
}
f2.close();
System.out.println("An error occurred");
}
catch(IOException ex1)
{
Collections.sort(arr);
System.out.println("An error occurred.");
for (String counter: arr) {
System.out.println(counter);
}
ex1.printStackTrace();
}
}
public static void main(String[] args) {
Map m =new Map();
m.load("C:Users\\ruken\\OneDrive\\Desktop\\out.txt");
}
public Object get(String word) {
return null;
}
public void put(String word, int i) {
}
}
This is the Reduce file
package com.company;
import java.io.*;
import java.util.*;
public class Reduce {
private Hashtable<String, Integer> map=new Hashtable< String, Integer>();
public Hashtable < String, Integer> getHashTable () {
return map;
}
public void setHashTable ( Hashtable < String, Integer> map){
this.map =map;
}
public void findMin () {
}
public void findMax() {
}
public void sort (ArrayList<String> arr) throws IOException {
Collections.sort(arr);
Iterator it1 = arr.iterator();
while (it1.hasNext()) {
String word = it1.next().toString();
System.out.println(word);
}
}
//constructors
public void reduce (ArrayList<String> words) {
Iterator<String> it1 =words.iterator();
while (it1.hasNext()) {
String word=it1.next();
System.out.println (word);
if (map.containsKey(word)) {
map.put(word, 1);
}
else {
int count = map.get(word);
map.put(word, count+1);
}
System.out.println( map.containsValue(word));
}
}
}
Here is a part of workshop.txt. It is s basic simple text
"
Acknowledgements
I would like to thank Carl Fleischhauer and Prosser Gifford for the
opportunity to learn about areas of human activity unknown to me a scant
ten months ago, and the David and Lucile Packard Foundation for
supporting that opportunity. The help given by others is acknowledged on
a separate page.
19 October 1992
*** *** *** ****** *** *** ***
INTRODUCTION
The Workshop on Electronic Texts (1) drew together representatives of
various projects and interest groups to compare ideas, beliefs,
experiences, and, in particular, methods of placing and presenting
historical textual materials in computerized form. Most attendees gained
much in insight and outlook from the event. But the assembly did not
form a new nation, or, to put it another way, the diversity of projects
and interests was too great to draw the representatives into a cohesive,
action-oriented body.(2)"
Counting word frequency in text can be accomplished using the java stream API
Here is my implementation, followed by explanatory notes.
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.Arrays;
import java.util.Hashtable;
import java.util.Map;
import java.util.function.BiConsumer;
import java.util.function.BinaryOperator;
import java.util.function.Function;
import java.util.function.Supplier;
import java.util.stream.Collectors;
public class WordFreq {
public static void main(String[] args) {
Path path = Paths.get("workshop.txt");
Function<String, String> keyMapper = Function.identity();
Function<String, Integer> valueMapper = (word) -> Integer.valueOf(1);
BinaryOperator<Integer> mergeFunction = (a, b) -> Integer.valueOf(a.intValue() + b.intValue());
Supplier<Hashtable<String, Integer>> mapSupplier = () -> new Hashtable<>();
try {
Map<String, Integer> map = Files.lines(path)
.flatMap(line -> Arrays.stream(line.split("\\b")))
.filter(word -> word.matches("^\\w+$"))
.map(word -> word.toLowerCase())
.collect(Collectors.toMap(keyMapper, valueMapper, mergeFunction, mapSupplier));
BiConsumer<String, Integer> action = (k, v) -> System.out.printf("%3d %s%n", v, k);
map.forEach(action);
}
catch (IOException xIo) {
xIo.printStackTrace();
}
}
}
Method lines() in class java.nio.file.Files creates a stream of the lines of text in the file. In this case the file is your workshop.txt file.
For each line of the file that is read, I split it into words using method split() in class java.lang.String and convert the array returned by method split() into another stream.
Actually each line of text is split at every word boundary so the array of words that method split() returns may contain strings that aren't really words. Therefore I filter the "words" in order to extract only real words.
Then I convert each word to lower case so that my final map will be case-insensitive. In other words, the word The and the word the will be considered the same word.
Finally I create a Map where the map key is a distinct word in the text of file workshop.txt and the map value is an Integer which is the number of occurrences of that word in the text.
Since you stipulated that the Map must be a Hashtable, I explicitly created a Hashtable to store the results of the collect operation on the stream.
The last part of the above code displays the contents of the Hashtable.
I sorted out the first part, "Map" as below, now I have an alphabetically sorted array.
as follows..now I should count the tokenized key values.
"..
yet
yet
yet
yet
yet
yielded
you
young
zeal
zero.
zooming
..."
package com.company;
import java.io.FileReader;
import java.io.IOException;
import java.util.*;
import java.util.Collections;
public class Map {
public static String fileName= "C:\\Users\\ruken\\OneDrive\\Desktop\\workshop.txt";
private ArrayList<String> arr = new ArrayList<String>();
public ArrayList <String>getList () {
return this.arr;
}
private Hashtable<String, Integer> map = new Hashtable<String, Integer>();
public void load() {
try{
FileReader f2 = new FileReader("C:\\Users\\ruken\\OneDrive\\Desktop\\workshop.txt");
Scanner s = new Scanner(f2);
while (s.hasNextLine()) {
String line = s.nextLine();
String[] words = line.split("\\s");
for (int i=0;i<words.length; i++){
String word = words[i];
if (! word.isEmpty()){
System.out.println(word);
arr.add(word);
}
}
}
f2.close();
System.out.println();
}
catch(IOException ex1){
System.out.println("An error occurred.");
ex1.printStackTrace(); }
{
Collections.sort(arr);
System.out.println("Sorted.");
for (String counter: arr) {
System.out.println(counter);
}
}
}
public static void main(String[] args) {
Map m =new Map();
m.load();
}
}
The second part which is doing the reducing is:
package com.company;
import java.io.*;
import java.util.*;
import java.io.FileWriter;
import java.io.IOException;
public class Reduce {
private Hashtable<String, Integer> map = new Hashtable<String, Integer>();
public Hashtable<String, Integer> getHashTable() {
return map;
}
public void setHashTable(Hashtable<String, Integer> map) {
this.map = map;
}
//constructors
public void reduce (ArrayList<String> arr) {
Iterator<String> it1 = arr.iterator();
while (it1.hasNext()) {
String word = it1.next();
System.out.println(word);
if (map.containsKey(word)) {
int a = (int) map.get(word);
a++;
map.put(word, a);
} else {
map.put(word, 1);
}
}
}
public void write () {
try {
FileWriter f1 = new FileWriter("C:\\Users\\ruken\\OneDrive\\Desktop\\output.txt");
Iterator<String> it1 = map.keySet().iterator();
while (it1.hasNext()) {
String word = it1.next().toString();
f1.write(word + "" + ":" + "" + map.get(word) + "\n" );
}
f1.close();
} catch (IOException e) {
e.printStackTrace();
}
}
public static void main(String[] args) {
Map m =new Map();
m.load();
Reduce r = new Reduce ();
ArrayList<String> arr= m.getList();
r.reduce(arr);
r.write();
}
}

How to add all values from collections.frequency for getting duplicate words using java

for(String temp : uniqueSet) {
if((Collections.frequency(list, temp)) >= 2) {
System.out.println(temp + "=" + (Collections.frequency(list, temp) -1));
}
}
I just want to add my repeated words count.But i cant find it.
In my code snippet,I want to get the frequently occurred words from an text file.
The problem is i can get the values of repeated words like ram=4 sam = 4 man =2,From the text file.
Now,
I want to add 4+4+2 and get total repeated word count as 10.
Any suggestions Welcomed.
I am a beginner to java
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FilenameFilter;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import org.apache.commons.io.FileUtils;
public class testsrepeatedwords {
public static void main(String[] args) throws FileNotFoundException, IOException {
FilenameFilter filter = new FilenameFilter() {
public boolean accept(File dir, String name) {
return name.endsWith(".txt");
}
};
File folder = new File("E:\\testfolder\\");
File[] listOfFiles = folder.listFiles(filter);
for (int i = 0; i < listOfFiles.length; i++) {
File file1 = listOfFiles[i];
try {
String content = FileUtils.readFileToString(file1);
} catch (IOException e) {
e.printStackTrace();
}
BufferedReader ins = null;
try {
ins = new BufferedReader ( new InputStreamReader(new FileInputStream(file1)));
} catch (FileNotFoundException e) { e.printStackTrace(); }
String message = org.apache.commons.io.IOUtils.toString(ins);
String[] stringarray = message.split(" ");
List<String> list = new ArrayList<String>(Arrays.asList(stringarray));
list.removeAll(Arrays.asList("", null));
Set<String> uniqueSet = new HashSet<String>(list);
for (String temp : uniqueSet) {
if ( (Collections.frequency(list, temp) ) >= 2 ){
System.out.println(temp+"="+(Collections.frequency(list, temp) -1) ); //after subtraction
int oc = (Collections.frequency(list, temp) -1) ;
// System.out.println(oc);
// System.out.print(oc+" ");
}
}
}}}
This is my full code. :)
is 'uniqueSet' is really a Set? In set elements appeared only once. You should check your uniqueSet implementation first. If this is really a Set then Collections.frequency(list, temp)) >= 2 is always false.
Why not use a map to store the current count? Something like this:
public static void getRepeatCount(String[] c) {
HashMap<String, Integer> wordCount = new HashMap<>();
for(String currStr : c) {
if(wordCount.containsKey(currStr)) {
wordCount.put(currStr, wordCount.get(currStr) + 1);
} else {
wordCount.put(currStr,1);
}
}
int repeatedWords = 0;
for (String currKey : wordCount.keySet()) {
int currRepeatCount = wordCount.get(currKey);
repeatedWords += currRepeatCount;
System.out.println(currKey+" => "+currRepeatCount);
}
System.out.println("Total reapeated words: "+repeatedWords);
}
Test:
public static void main(String[] args) {
String[] ar = {"abc","abc","aa","aa","b"};
getRepeatCount(ar);
}
Output:
aa => 2
b => 1
abc => 2
Total reapeated words: 5
Java 8's streaming API provides a pretty elegant way of doing this. You could stream the list of words, collect it to a frequency map and then stream that map's values and reduce them to a sum:
int countThreshold = 2;
long sum =
words.stream()
.collect(Collectors.groupingBy(Function.identity(),
Collectors.counting()))
.values()
.stream()
.filter(x -> x >= countThreshold)
.reduce(0L, Long::sum);

Adding a value to a sorted array in Java 8

You are provided the following list that contains (semi-random) years from modern history. Save the list to a text file named “events.txt” Write a program that:
Reads in the file “events.txt”
Sorts it with the latest events first
Determines whether the founding of CMU in 1892 was considered a world historic event
If not so yet, adds the event to the list of events
Writes the new list of events to a file named “sorted_events.txt
import java.io.*;
import java.util.*;
public class EventSorter {
public static void main(String[] args) throws FileNotFoundException{
File file =new File("events.txt");
FileReader read = new FileReader(file);
LineNumberReader lines = new LineNumberReader(read);
Scanner readIn = new Scanner(file);
PrintWriter output = new PrintWriter("sorted_events.txt");
try{
//call for the file
//make sure it exists
if(file.exists()){
{
//first write this to determine the number of lines
int lineNumber = 0;
//gets the number of lines
while (lines.readLine() != null){
lineNumber++;
}
int[] event = new int[lineNumber];
int j = 0;
while(readIn.hasNext()){
event[j]=readIn.nextInt();
j++;
}
//sort the array
Arrays.sort(event);
boolean found;
for(int i = 0; i < event.length; i++){
if (event[i] == 1892){
//see if 1892 is on the list
System.out.println("CMU is a historic event");
found = true;
}
else{
addElement(event, 1892);
}
}
int[] sortedEvent = new int[lineNumber];
for(int k = 0; k < event.length; k++){
sortedEvent[k] = event[(event.length-1) - k];
System.out.println(sortedEvent[k]);
}
for(int print = 0 ; print < event.length; print++){
output.println(sortedEvent[print]);
}
}
readIn.close();
output.close();
lines.close();
}else{
System.out.println("File does not exist!");
}
}
catch(IOException e){
e.printStackTrace();
}
}
static int[] addElement(int[] a, int e) {
a = Arrays.copyOf(a, a.length + 1);
a[a.length - 1] = e;
return a;
}
}
I'm going to answer strictly to the title "Adding a value to a sorted array in Java 8". Two options can be:
Create a TreeSet or any SortedSet (or a structure based on a Binary Search Tree) from your array. This collection is sorted by default and any new items you add to it will keep it sorted.
Use something similar to binary search to find where in the array the new element should be placed.
This program does exactly what you want and answers the question of the teacher perfectly.
However, it's using several advanced techniques of Java and I strongly doubt that your teacher will give you any credit for it. So use it at your own risk.
The best you can do with this code is to read it, understand its concept and apply them in conjunction to what you know.
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.List;
import java.util.stream.Collectors;
public class EventSorter {
public static void main(String[] args) {
try {
Path inPath = Paths.get("events.txt");
Path outPath = Paths.get("sorted_events.txt");
int event = 1892;
Comparator<Integer> lastFirst = Comparator.<Integer>naturalOrder().reversed();
List<Integer> events = Files.lines(inPath).map(Integer::valueOf).sorted(lastFirst).collect(Collectors.toCollection(ArrayList::new));
int pos = Collections.binarySearch(events, event, lastFirst);
if (pos < 0) {
events.add(~pos, event);
}
Files.write(outPath, events.stream().map(Object::toString).collect(Collectors.toList()));
} catch (IOException ex) {
System.out.println(ex.getMessage());
}
}
}
events.txt (input)
1982
1821
1934
1809
sorted_events.txt (output)
1982
1934
1892
1821
1809

How to arrange items in list from highest to lowest (word occurence using I/O)?

I have a working word occurrence program that took me a while to code (still new at Java) and I was wondering if I could get a little assistance. Here is my code that I have so far:
import java.io.*;
import java.util.ArrayList;
import java.util.List;
public class TestWordOccurenceProgram {
public static void main(String[] args) {
String thisLine = null;
try {
FileReader fr = new FileReader("myTextDocument.txt");
BufferedReader br = new BufferedReader(fr);
//List<String> wordList = new ArrayList<>();
List<String> words = new ArrayList<>();
// make ArrayList of integers
List<Integer> counts = new ArrayList<>();
String word = "";
while ((thisLine = br.readLine()) != null ) {
word = word.concat(thisLine);
word = word.concat(" ");
}
String[] wordList = word.split("\\s");
for (int i = 0; i < wordList.length; i++) {
String temp = wordList[i];
if(words.contains(temp)) {
int x = words.indexOf(temp);
int value = counts.get(x);
value++;
counts.set(x, value);
}
else {
words.add(temp);
counts.add(1);
}
}
for (int i = 0; i < words.size(); i++) {
System.out.println(words.get(i) + ": " + counts.get(i));
}
br.close();
} catch (FileNotFoundException e) {
e.printStackTrace();
System.exit(1);
} catch (IOException e) {
e.printStackTrace();
System.exit(1);
}
}
}
Here is what "myTextDocument.txt" has:
i am a rabbit
a happy rabbit am
yay i am a rabbit
a rabbit i am yay
Here is my output:
i: 3
am: 4
a: 4
rabbit: 4
happy: 1
yay: 2
Does anyone know if I could arrange these items from the highest number of word occurrences to the lowest number of word occurrences? Any help would be great!
You can use Map instead of List. and use compare method to sort map via its value.
refer this code :
import java.io.BufferedReader;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.util.Comparator;
import java.util.HashMap;
import java.util.Map;
import java.util.TreeMap;
public class PQ {
public static void main(String[] args) {
String thisLine = null;
try {
FileReader fr = new FileReader("D:\\test.txt");
BufferedReader br = new BufferedReader(fr);
HashMap<String,Integer> map = new HashMap<String,Integer>();
ValueComparator comparator = new ValueComparator(map);
TreeMap<String, Integer> treemap = new TreeMap<String, Integer>(comparator);
while((thisLine = br.readLine()) != null){
String[] str = thisLine.split("\\s+");
for(String s:str){
if(map.containsKey(s)){
Integer i = map.get(s);
i++;
map.put(s,i);
}else{
map.put(s, 1);
}
}
}
treemap.putAll(map);
System.out.println(treemap);
br.close();
} catch (FileNotFoundException e) {
e.printStackTrace();
System.exit(1);
} catch (IOException e) {
e.printStackTrace();
System.exit(1);
}
}
}
class ValueComparator implements Comparator<String>{
Map<String, Integer> base;
public ValueComparator(Map<String, Integer> base) {
this.base = base;
}
public int compare(String a, String b) {
if (base.get(a) >= base.get(b)) {
return -1;
} else {
return 1;
}
}
}
Rather than using two separate lists (one with words, one with counts), why not create a WordAndCount object that has something like getWord and getCount methods? This WordAndCount class can implement Comparable, where you do comparisons based on count. Then, you can store a single List<WordAndCount>, and just sort the single list using Collections.sort.
Roughly, the outline could look like this:
public class WordAndCount implements Comparable<WordAndCount> {
private String word;
private int count;
public WordAndCount(String word) {...}
public void incrementCount() {...}
public int compareTo(WordAndCount other) {...}
}
Wrapping up the combination into a single class makes this much easier to solve, as it provides the easy link between word and its count.
I would recommend using Collections in Java for this, but instead you can use temp variables.
So the idea is to sort by counts. Pseudo-code before outputting:
int tempCount;
String tempWord;
for (int i = 1; i < counts.size(); i++) {
if (counts.get(i) < counts.get(i-1)) {
tempCount = counts.get(i-1);
tempWord = words.get(i-1);
counts.set(i-1, i);
counts.set(i, tempCount);
words.set(i-1, i);
words.set(i, tempWord);
}
You'd need an extra loop around that to correctly order them but hopefully gives you the right idea.

Identifying positive and negative words in text

I am trying to work out how to scan a text file of a conversation find how many positive words and negative words there are. The positive and negative words are contained within two separate text files which are used to 'scan' the conversation text file.
After it finds the number of positive and negative words I am trying to get it to tally each up and then tell me if there are more positive or negative words found.
I have the code below so far, it only gives me a count on the positive words. I am not looking at something like NLP at this stage just something on a much more basic level.
I think I have the second part looking for the negative words in the wrong location. And I think I need to use a boolean to tell me if there are more positive or negative words found, but I can't work out how to do it.
I am pretty stuck as I am new to Java, and programing in general.
Any help would be greatly appreciated.
package omgilisearch;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.util.HashSet;
import java.util.Map;
import java.util.Scanner;
import java.util.Set;
import java.util.TreeMap;
public class SentimentTest {
public static void main(String[] args) throws Exception {
printAllCounts(
readWordFile("ConversationTest.txt", loadKeywords("PositiveWords.txt")));
}
public static void main1(String[] args) throws Exception {
printAllCounts(
readWordFile("ConversationTest.txt", loadKeywords("NegativeWords.txt")));
}
private static Map<String, Integer> readWordFile(
String fname, Set<String> keywords) throws FileNotFoundException
{
final Map<String, Integer> frequencyData = new TreeMap<String, Integer>();
for (Scanner wordFile = new Scanner(new FileReader(fname));
wordFile.hasNext();)
{
final String word = wordFile.next();
if (keywords.contains(word))
frequencyData.put(word, getCount(word, frequencyData) + 1);
}
return frequencyData;
}
private static void printAllCounts(Map<String, Integer> frequencyData) {
System.out.println("-----------------------------------------------");
System.out.println(" Occurrences Word");
for(Map.Entry<String, Integer> e : frequencyData.entrySet())
System.out.printf("%15d %s\n", e.getValue(), e.getKey());
System.out.println("-----------------------------------------------");
}
private static int getCount(String word, Map<String, Integer> frequencyData) {
return frequencyData.containsKey(word)? frequencyData.get(word) : 0;
}
private static Set<String> loadKeywords(String fname)
throws FileNotFoundException
{
final Set<String> result = new HashSet<String>();
for (Scanner s = new Scanner(new FileReader(fname)); s.hasNext();)
result.add(s.next());
return result;
}
}
You would have to have some array of so called "bad" words (wich are hard coded) and then iterate through the whole text file and compare every word in the array with the word you currently inspecting. If the word matches with one of the words in the array, then increase some variable that is holding the amount of badwords eg. badWords++;. I believe this approach should work.
package omgilisearch;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.util.HashSet;
import java.util.Map;
import java.util.Scanner;
import java.util.Set;
import java.util.TreeMap;
public class SentimentTest {
public static void main(String[] args) throws Exception {
printAllCounts(
readWordFile("ConversationTest.txt"));
}
private static Map<String, Integer> readWordFile(String string) {
return null;
}
String[] goodWordsHolder = new String[3];{
goodWordsHolder[0] = "good"; goodWordsHolder[1] = "great";goodWordsHolder[2] = "excellent";
for(int iteration = 0; iteration < goodWordsHolder.length; iteration++) { String currentWordInText;
if(goodWordsHolder[iteration] == currentWordInText) { }// The word is a bad word } }
private static void printAllCounts(Map<String, Integer> frequencyData) {
System.out.println("-----------------------------------------------");
System.out.println(" Occurrences Word");
for(Map.Entry<String, Integer> e : frequencyData.entrySet())
System.out.printf("%15d %s\n", e.getValue(), e.getKey());
System.out.println("-----------------------------------------------");
}
}
package omgilisearch;
import java.io.*;
public class SentimentTest {
public static void main(String[] args) {
String[] lines = new String[0];
String path = "ConversationTest.txt";
BufferedReader br = null;
try {
File file = new File(path);
br = new BufferedReader(
new InputStreamReader(
new FileInputStream(file)));
String line;
while( (line = br.readLine()) != null ) {
lines = add(line, lines);
}
br.close();
} catch(IOException e) {
System.out.println("read error: " + e.getMessage());
}
print(lines);
}
private static String[] add(String s, String[] array) {
String[] goodWordsHolder = new String[3];{
}goodWordsHolder[0] = "good"; goodWordsHolder[1] = "great";goodWordsHolder[2] = "excellent";
for(int iteration = 0; iteration < goodWordsHolder.length; iteration++) { String currentWordInText = null; if(goodWordsHolder[iteration] == currentWordInText) { }}
return goodWordsHolder; }
private static void print(String[] data) {
for(int i = 0; i < data.length; i++)
System.out.println(data[i]);
}
}
Arrays store multiple items of the same information type eg. String[] badWords;. I believe you should use this, since I'm sure you will have more than 1 bad word that you would like to find in the conversation text, if not, then simple use 1 String eg. String badWord;.
I'm not going to write out all the code that will make it work, I'll just give you an algorithm.
public class test {
// The process of picking out all the good and bad words
public static void main(String[] args) {
// Setting up all the needed variables
// Set up all the good words
String[] goodWordsHolder = new String[2];
goodWordsHolder[0] = "firstGoodWord";
goodWordsHolder[1] = "secondGoodWord";
// Set up all the bad words
String[] badWordsHolder = new String[2];
badWordsHolder[0] = "firstBadWord";
badWordsHolder[1] = "secondBadWord";
// Set up the counters
int amountOfGoodWords = 0;
int amountOfBadWords = 0;
int currentWordInText = 0;
// boolean that will exit the loop
boolean ConversationEnded = false;
while(!ConversationEnded) {
// Compare the currentWord from the conversation with the hard coded words
for(int iteration = 0; iteration < goodWordsHolder.length; iteration++) {
if(goodWordsHolder[iteration] == getWordInText(currentWordInText)) {
amountOfGoodWords++;
}
}
for(int iteration = 0; iteration < badWordsHolder.length; iteration++) {
if(badWordsHolder[iteration] == getWordInText(currentWordInText)) {
amountOfBadWords++;
}
}
// Increase the current word value so the next time we compare the next word in the conversation will be compared
currentWordInText++;
// Check that we haven't reached the end of the conversation
if(endOfTheConversationHasBeenReached()) {
// This will exit the while loop
ConversationEnded = true;
}
}
// Now print all the information to the console
System.out.println("Amount of good Words: " + amountOfGoodWords);
System.out.println("Amount of bad Words: " + amountOfBadWords);
if(amountOfGoodWords > amountOfBadWords) {
System.out.println("There are more good words than bad words.");
}
else {
System.out.println("There are more bad words than good words.");
}
}
// The method(s) you'll have to code out yourself. I suggest you read up on the web and so on to assist you with this.
private static String getWordInText(int currentWordInText) {
// TODO Auto-generated method stub
return null;
}
private static boolean endOfTheConversationHasBeenReached() {
// TODO Auto-generated method stub
return false;
}
}
Excuse me if there are any logical errors. The code hasn't been debugged yet. ;) Hopefully this will guide you into the right direction.

Categories