Closed. This question needs details or clarity. It is not currently accepting answers.
Want to improve this question? Add details and clarify the problem by editing this post.
Closed 8 years ago.
Improve this question
I am new to java
Can anyone help me with the code to tell how much 2 text files match with each other?
Suppose i have two Files 'a.txt' and 'b.txt'
then i need to know the percentage of match.
thanks
Read in the two files to two Strings str1, str2.
Iterate through each, counting matching chars. Divide number of matches by number of compares, and multiply by 100 to get a percentage.
Scanner sca = new Scanner(new File ("a.txt"));
Scanner scb = new Scanner(new File ("b.txt"));
StringBuilder sba = new StringBuilder();
StringBuilder sbb = new StringBuilder();
while(sca.hasnext()){
sba.append(sca.next());
}
while(scb.hasnext()){
sbb.append(scb.next());
}
String a = sba.toString();
String b = sbb.toString();
int maxlen = Math.max(a.length,b.length);
int matches;
for(int i =0; i<maxlen; i++){
if(a.length <=i || b.length <=i){
break;
}
if(a.chatAt(i)==b.charAt(i)){
matches++;
}
return (((double)matches/(double)maxlen)*100.0)
import java.io.BufferedInputStream;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.io.Reader;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Scanner;
import java.util.StringTokenizer;
class File_meta_Data // class to store the metadata of file so that scoring can be done
{
String FileName;
long lineNumber;
long Position_In_Line;
long Position_In_Document;
File_meta_Data()
{
FileName = null;
lineNumber = 0;
Position_In_Line = 0;
Position_In_Document = 0;
}
}
public class bluestackv1 {
static int getNumberofInputFiles() // seeks number of resource files from user
{
System.out.println("enter the number of files");
Scanner scan = new Scanner(System.in);
return(scan.nextInt());
}
static List getFiles(int Number_of_input_files) // seeks full path of resource files from user
{
Scanner scan = new Scanner(System.in);
List filename = new ArrayList();
int i;
for(i=0;i<Number_of_input_files;i++)
{
System.out.println("enter the filename");
filename.add(scan.next());
}
return(filename);
}
static String getfile() // seeks the full pathname of the file which has to be matched with resource files
{
System.out.println("enter the name of file to be matched");
Scanner scan = new Scanner(System.in);
return(scan.next());
}
static Map MakeIndex(List filename) // output the index in the map.
{
BufferedReader reader = null; //buffered reader to read file
int count;
Map index = new HashMap();
for(count=0;count<filename.size();count++) // for all files mentioned in the resource list create index of its contents
{
try {
reader = new BufferedReader(new FileReader((String) filename.get(count)));
long lineNumber;
lineNumber=0;
int Count_of_words_in_document;
Count_of_words_in_document = 0;
String line = reader.readLine(); // data is read line by line
while(line!=null)
{
StringTokenizer tokens = new StringTokenizer(line, " ");// here the delimiter is <space> bt it can be changed to <\n>,<\t>,<\r> etc depending on problem statement
lineNumber++;
long Count_of_words_in_line;
Count_of_words_in_line = 0;
while(tokens.hasMoreTokens())
{
List<File_meta_Data> temp = new ArrayList<File_meta_Data>();
String word = tokens.nextToken();
File_meta_Data metadata = new File_meta_Data();
Count_of_words_in_document++; // contains the word number in the document
Count_of_words_in_line++; // contains the word number in line. used for scoring
metadata.FileName = filename.get(count).toString();
metadata.lineNumber = lineNumber;
metadata.Position_In_Document = Count_of_words_in_document;
metadata.Position_In_Line = Count_of_words_in_line;
int occurence;
occurence=0;
if(index.containsKey(word)) //if the word has occured already then update the new entry which concatenates the older and new entries
{
Map temp7 = new HashMap();
temp7 = (Map) index.get(word);
if(temp7.containsKey(metadata.FileName)) // entry of child Map is changed
{
List<File_meta_Data> temp8 = new ArrayList<File_meta_Data>();
temp8 = (List<File_meta_Data>)temp7.get(metadata.FileName); //outputs fioles which contain the word along with its location
temp7.remove(metadata.FileName);
temp8.add(metadata);
temp7.put(metadata.FileName, temp8); // updated entry is added
}
else // if the word has occured for the first time and no entry is in the hashMap
{
temp.add(metadata);
temp7.put(metadata.FileName, temp);
temp=null;
}
Map temp9 = new HashMap();
temp9 = (Map) index.get(word);
index.remove(word);
temp9.putAll(temp7);
index.put(word, temp9);
}
else // similarly is done for parent map also
{
Map temp6 = new HashMap();
temp.add(metadata);
temp6.put(metadata.FileName, temp);
index.put(word,temp6);
}
}
line = reader.readLine();
}
index.put("#words_in_file:"+(String)filename.get(count),Count_of_words_in_document);
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
return(index);
}
static String search(Map index,List filename) throws IOException //scores each resource file by comparing with each word in input file
{
double[] overlap = new double[filename.size()]; //stores overlap/coord scores
double[] sigma = new double[filename.size()]; // stores ∑t in q ( tf(t in d) · idf(t)^2 for each resource file
int i;
double max, maxid; // stores file info with max score
max=0;
maxid= -1;
for(i=0;i<filename.size();i++)
{
overlap[i] = 0;
sigma[i] = 0;
}
String bestfile = new String();
double maxscore;
maxscore = -1;
double total;
double cord;
total=0;
File File_to_be_matched = new File(getfile());
BufferedReader reader = new BufferedReader(new FileReader(File_to_be_matched));
String line = reader.readLine();
while(line!=null) //similar to index function
{
StringTokenizer tokens = new StringTokenizer(line, " ");
while(tokens.hasMoreTokens())
{
String word = tokens.nextToken();
double tf,idf;
tf = 0;
idf = 0;
total=total+1;
if(index.containsKey(word))
{
Map temp = new HashMap();
for(i=0;i<filename.size();i++) // for each file a score is calculated for corresponding word which afterwards added
{
int j,count,docFreq;
count=0;
docFreq=0;
temp = (Map) index.get(word);
if(temp.containsKey(filename.get(i)))
{
List l2= (List) temp.get(filename.get(i));
tf = (int) Math.pow((long) l2.size(),0.5); //calculate the term frequency
docFreq = temp.size(); // tells in how many files the word occurs in the file
overlap[i]++;
}
else
{
tf=0;
}
idf = (int) (1 + Math.log((long)(filename.size())/(1+docFreq)));// more the occurence higher similarity of file
sigma[i] = sigma[i] + (int)(Math.pow((long)idf,2) * tf);
}
}
}
line = reader.readLine();
}
double subsetRatio;
for(i=0;i<filename.size();i++) // all scores are added
{
int x = (int)index.get("#words_in_file:"+(String)filename.get(i));
subsetRatio = overlap[i]/x;
overlap[i] = overlap[i]/total;
overlap[i] = overlap[i] * sigma[i];
overlap[i] = overlap[i] * subsetRatio; // files which are subset of some have higher priority
if(max<overlap[i]) // maximum score is calculated
{
max=overlap[i];
maxid = i;
}
}
if(maxid!=-1)
return (String) (filename.get((int) maxid));
else
return("error: Matching does not took place");
}
public static void main(String[] args) throws IOException
{
List filename = new ArrayList();
int Number_of_input_files = getNumberofInputFiles();
filename = getFiles(Number_of_input_files);
Map index = new HashMap();
index = MakeIndex(filename);
//match(index);
while(1==1) //infinite loop
{
String Most_similar_file = search(index,filename);
System.out.println("the most similar file is : "+Most_similar_file);
}
}
}
The problem is to find the most similar file among several resource files.
there are 2 sub-problems to this question
first, as the question states, how to find the most similar file which is done by associating each file with a score by considering different aspects of the content of files
second, to parse each and every word of the input file with a comparatively large resource files
to solve the second problem, Reverse Indexing has been used with HashMaps in java. Since our problem was simple and not modifying i used Inherited Maps instead of Comparator based MapReduce
while searching computing complexity = o(RESOURCEFILES * TOTAL_WORDS_IN _INPUTFILE)
the first problem has been solved by following formula
score(q,d) = coord(q,d) • ∑t in q ( tf(t in d) • idf(t)^2) . subsetRatio
1) coord(q,d) = overlap / maxOverlap
Implication: of the terms in the query, a document that contains more terms will have a higher score
Rational : Score factor based on how many of the query terms are found in the specified document
2) tf(t in d) = sqrt(freq)
Term frequency factor for the term (t) in the document (d).
Implication: the more frequent a term occurs in a document, the greater its score
Rationale: documents which contains more of a term are generally more relevant
3) idf(t) = log(numDocs/(docFreq+1)) + 1 I
implication: the greater the occurrence of a term in different documents, the lower its score
Rational : common terms are less important than uncommon ones
4) SubsetRation = number of occuring words / total words
implication : suppose 2 files, both superlative of input file then file with lesser excessive data will have hiegher similarity
Rational : files with similar content must have higher priority
****************test cases************************
1) input file has no similar word than the resource files
2) input file is similar in content to any one of the file
3) input file is similar in content but different in metadata(meaning position of words is not similar)
4) input file is a subset of resource files
5) input file contains very common words like all 'a' or 'and'
6) input file is not at the location
7) input file cannot be read
Look into opening files, reading them as characters. You actually just need to get a char from each, then check if they match. If they match, then increment the total counter and the match counter. If they don't, only the total counter.
Read more on handling files and streams here: http://docs.oracle.com/javase/tutorial/essential/io/charstreams.html
An example would be this:
BufferedReader br1 = null;
BufferedReader br2 = null;
try
{
br1 = new BufferedReader(new InputStreamReader(new FileInputStream(new File("a.txt")), "UTF-8"));
br2 = new BufferedReader(new InputStreamReader(new FileInputStream(new File("b.txt")), "UTF-8"));
//add logic here
}
catch (Exception e)
{
e.printStackTrace();
}
finally
{
if (br1 != null)
{
try
{
br1.close();
}
catch (Exception e)
{
}
}
if (br2 != null)
{
try
{
br2.close();
}
catch (Exception e)
{
}
}
}
Related
For my Java class, I'm working on a project that is essentially a database for MTG cards. I have to read from a file as part of the project, so I am reading the card information from a file, and then splitting the lines to put each different type of information together to form different object classes for the different types of cards. The main nitpicky issue I'm running into right now is that I need the card text to be on one line in the text file so I can read it line by line, but I'd prefer if it weren't all on one line when I print it to the console. Is there any way to add a character combination into the text of the file itself that will tell my compiler, "line break here," when it reads that, or am I out of luck? I know I could just use \n in the code to achieve this, but as I am looping through the file, there is no way to do so properly that I know of, as not every card's text needs line breaks inserted. If it matters, this is the chunk of my code that deals with that:
import java.io.BufferedReader;
import java.io.FileReader;
import java.util.Scanner;
public class MTG {
public static void main(String[] args) {
int creatureLength = 4;
//Prompt User
Scanner sc = new Scanner(System.in);
System.out.println("Welcome to the Magic: the Gathering card database. This tool currently supports Rare and Mythic Rare cards from the Throne of Eldraine Expansion.");
try {
System.out.println("\nSelect the card type you'd like to view.");
System.out.println(""
+ "(1)Creatures\n"
);
int choice = Integer.parseInt(sc.next());
//Choose type
//Creatures
if(choice == 1){
Creature[] creatures = creatureGen("textfiles/Creatures.txt", creatureLength);
System.out.println("\nViewing creatures. Which card would you like to view?: \n");
for(int k = 0; k < creatureLength; k++) {
System.out.println(
"(" + (k + 1) + ") " + creatures[k].getName());
}
int creatureChoice = Integer.parseInt(sc.next());
try {
System.out.println("\n" + creatures[(creatureChoice - 1)]);}
catch(Exception e) {
System.out.println("Input was not a specified number. Exiting...");
}
}
}
catch(NumberFormatException ex){
System.out.println("Input was not a specified number. Exiting...");
}
sc.close();
}
//Read Creature text file
public static Creature[] creatureGen(String path, int length) {
Creature[] creatures = new Creature[length];
try {
FileReader file = new FileReader(path);
BufferedReader reader = new BufferedReader(file);
String name[] = new String[length];
String cost[] = new String[length];
String color[] = new String[length];
String type[] = new String[length];
String cTypes[] = new String[length];
String tags[] = new String[length];
String text[] = new String[length];
int power[] = new int[length];
int toughness[] = new int[length];
for (int i = 0; i < length; i++) {
String line = reader.readLine();
if(line != null) {
name[i] = line.split("\\|")[0];
cost[i] = line.split("\\|")[1];
color[i] = line.split("\\|")[2];
type[i] = line.split("\\|")[3];
cTypes[i] = line.split("\\|")[4];
tags[i] = line.split("\\|")[5];
text[i] = line.split("\\|")[6];
power[i] = Integer.parseInt(line.split("\\|")[7]);
toughness[i] = Integer.parseInt(line.split("\\|")[8]);
creatures[i] = new Creature(name[i], cost[i], color[i], type[i], cTypes[i], tags[i], text[i], power[i], toughness[i]);
}
}
reader.close();
}
catch (Exception e) {
System.out.println("Error reading file: " + path);
}
return creatures;
}
}
The Creature object class essentially just stores the data that I am putting into it with the creatureGen method. A sample line from the text file I am reading from looks something like this:
Charming Prince|1W|White|Creature|Human Noble||When Charming Prince enters the battlefield, choose one — • Scry 2. • You gain 3 life. • Exile another target creature you own. Return it to the battlefield under your control at the beginning of the next end step.|2|2
It would be ideal to be able to insert line breaks after each of the bullet points in this card, for example, but as I said earlier, I need the text to be in one line for my loop to read it. Is there any way around this when I print this back to the console? I appreciate any help.
Just replace those bullet points with line breaks :
text[i] = line.split("\\|")[6].replaceAll("•","\n");
Also, you should not split each time you need an element, put the result of line.split("\|") in a String[] variable and use it afterwards.
for (int i = 0; i < length; i++) {
String line = reader.readLine();
if(line != null) {
String[] elements = line.split("\\|");
name[i] = elements[0];
cost[i] = elements[1];
color[i] = elements[2];
type[i] = elements3];
cTypes[i] = elements[4];
tags[i] = elements[5];
text[i] = elements[6].replaceAll("•","\n");
power[i] = Integer.parseInt(elements[7]);
toughness[i] = Integer.parseInt(elements[8]);
creatures[i] = new Creature(name[i], cost[i], color[i], type[i], cTypes[i], tags[i], text[i], power[i], toughness[i]);
}
}
Finally, about vocabulary, the compiler is not reading your file. The compiler translates your code into binary instructions for the processor (to summarize).
Your file is read at runtime.
I have a simple .txt file ("theFile.txt") with the following format where the left column are the lineNumber and the right column are the word:
5 today
2 It's
1 "
4 sunny
3 a
6 "
For this txt file, I'm making two separate methods to each get only the number and only the string, plus another method to scan the file and put each lineNumber and word in a double-linked list DLL:
String fileName = "theFile.txt";
public int getNumberOnly() {
int lineNumber;
//code to only get the lineNumber but NOT the words
//This is as far as I got and I need help on this part
return lineNumber;
}
public String getWordsOnly() {
String words;
//code to only get the words but NOT the lineNumber
//This is as far as I got and I need help on this part
return words;
}
public void readAndPrintWholeFile(String fileName){
String fileContents = new String();
File file = new File("theFile.txt");
Scanner scanner = new Scanner(new FileInputStream(fileName));
DLL<T> list = new DLL<T>();
//Print each lineNumber and corresponding words for example
// 5 Today
// 2 It's
while (scanner.hasNextLine())
{
fileContents = scanner.nextLine();
System.out.println(list.getNumbersOnly() + " " + list.getWordOnly());
//prints the lineNumber then space then the word
}
}
//I already have all DLL accessors and mutators such as get & set next/previous nodes here, etc.
I'm stuck on how to code the method bodies for both getNumbersOnly() and getWordOnly()
I've tried my best to get to this point. Thanks for your help.
public static void readAndPrintWholeFile(String filename) throws FileNotFoundException {
String fileContents;
File file = new File(filename);
Scanner scanner = new Scanner(new FileInputStream(file));
Map<String, String> map = new HashMap<>();
while (scanner.hasNextLine()) {
try {
fileContents = scanner.nextLine();
String[] as = fileContents.split(" +");
map.put(as[0], as[1]);
System.out.println(as[0] + " " + as[1]);
} catch (ArrayIndexOutOfBoundsException e) {
//If some problam with File formate e.g. number without word
}
}
}
You can do this in many way and one of them is hear.
Hera I am not implemented getNumberOnly() and getWordsOnly() method and I d't have DLL implementation so put data in map(HashMap).
You need to pass "fileContents" as the parameter to the functions.
public int getNumberOnly(String fileContents) {
int lineNumber;
//Get position of space
int spacePos = fileContents.indexOf(" ");
//Get substring from start till first space is encountered. Also trim off any leading or trailing spaces. Convert string to int via parseInt
lineNumber = parseInt(fileContents.subString(0,spacePos).trim());
return lineNumber;
}
public String getWordsOnly(String fileContents) {
String words;
int spacePos = fileContents.indexOf(" ");
//Get substring from first space till the end
words = fileContents.subString(spacePos).trim();
return words;
}
Good day!
I have created a code using Netbeans and it executes the processes just fine.
Now, i want my input to given and output to be displayed through a user interface. I have then created a 2 Jframes, 1 to collect the user's input and the other to display the results after execution by the code.
But, i am unable to link the interface to the main class(called NgramBetaE) as i am not aware of how i can do so.
I highly welcome suggestions.
The main class in its entirety is;
package ngrambetae;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.util.*;
/**
*
* #author 201102144
*/
public class NgramBetaE {
static LinkedList<String> allWords = new LinkedList<String>();
static LinkedList<String> distinctWords = new LinkedList<String>();
static String[] hashmapWord = null;
static int wordCount;
public static HashMap<String,HashMap<String, Integer>> hashmap = new HashMap<>();
public static HashMap<String,HashMap<String, Integer>> bigramMap = new HashMap<>();
/**
* #param args the command line arguments
*/
public static void main(String[] args) {
//prompt user input
Scanner input = new Scanner(System.in);
//read words from collected corpus; a number of .txt files
File directory = new File("Corpus");
File[] listOfFiles = directory.listFiles();//To read from all listed iles in the "directory"
int lineNumber = 0;
String line;
String files;
String delimiters = "[()?!:;,.\\s]+";
//reading from a list of text files
for (File file : listOfFiles) {
if (file.isFile()) {
files = file.getName();
try {
if (files.endsWith(".txt") || files.endsWith(".TXT")) { //ensures a file being read is a text file
BufferedReader br = new BufferedReader(new FileReader(file));
while ((line = br.readLine()) != null) {
line = line.toLowerCase();
hashmapWord = line.split(delimiters);
//CALCULATING UNIGRAMS
for(int s = 0; s < hashmapWord.length; s++){
String read = hashmapWord[s];
allWords.add(read);
//count the total number of words in all the text files combined
//TEST
wordCount = 0;
for (int i = 0; i < allWords.size(); i++){
wordCount ++;
}
}
//CALCULATING BIGRAM FREQUENCIES
for(int s = 0; s < hashmapWord.length -1; s++){
String read = hashmapWord[s];
final String read1 = hashmapWord[s + 1];
HashMap<String, Integer> counter = bigramMap.get(read);
if (null == counter) {
counter = new HashMap<String, Integer>();
bigramMap.put(read, counter);
}
Integer count = counter.get(read1);
counter.put(read1, count == null ? 1 : count + 1);
}
//CALCULATING TRIGRAM FREQUENCIES
for(int s = 0; s < hashmapWord.length - 2; s++){
String read = hashmapWord[s];
String read1 = hashmapWord[s + 1];
final String read2 = hashmapWord[s + 2];
String readTrigrams = read + " " + read1;
HashMap<String, Integer> counter = hashmap.get(readTrigrams);
if (null == counter) {
counter = new HashMap<String, Integer>();
hashmap.put(readTrigrams, counter);
}
Integer count = counter.get(read2);
counter.put(read2, count == null ? 1 : count + 1);
}
}
br.close();
}
} catch (NullPointerException | IOException e) {
e.printStackTrace();
System.out.println("Unable to read files: " + e);
}
}
}
//COMPUTING THE TOTAL NUMBER OF WORDS FROM ALL THE TEXT FILES COMBINED
System.out.println("THE TOTAL NUMBER OF WORDS IN COLLECTED CORPUS IS : \t" + wordCount + "\n");
for(int i = 0, size = allWords.size(); i < size; i++){
String distinctWord = allWords.get(i);
//adding a word into the 'distinctWords' list if it doesn't already occur
if(!distinctWords.contains(distinctWord)){
distinctWords.add(distinctWord);
}
}
//PRINTING THE DISTINCT WORDS
System.out.println("THE DISTINCT WORDS IN TOTAL ARE :\t " + distinctWords.size() + "\n");
System.out.println("PRINTING CONTENTS OF THE BIGRAMS HASHMAP... ");
System.out.println(bigramMap);
System.out.println("================================================================================================================================================================================================================================================================================================================\n");
System.out.println("PRINTING CONTENTS OF THE TRIGRAMS HASHMAP... ");
System.out.println(hashmap);
System.out.println("================================================================================================================================================================================================================================================================================================================\n");
//QUITTING APPLICATION
String userInput = null;
while(true) {
System.out.println("\n**********************************************************************************************************************************************************************************************************************************");
System.out.println("\n\n\t\tPLEASE ENTER A WORD OR PHRASE YOU WOULD LIKE A PREDICTION OF THE NEXT WORD FROM:");
System.out.println("\t\t\t\t(OR TYPE IN 'Q' OR 'q' TO QUIT)");
userInput = input.nextLine();
if (userInput.equalsIgnoreCase("Q")) break;
//FORMAT USER INPUT
String[] users = userInput.toLowerCase().split("[?!,.\\s]+");
if (users.length < 2) {
userInput = users[0];
//System.out.println("\nENTRY '" + userInput + "' IS TOO SHORT TO PREDICT NEXT WORD. PLEASE ENTER 2 OR MORE WORDS");
//CALCULATING BIGRAM PROBABILITY
int sum = 0;
try {
for(String s : bigramMap.get(userInput).keySet()) {
sum += bigramMap.get(userInput).get(s);
}
String stringHolder = null;
double numHolder = 0.0;
for(String s : bigramMap.get(userInput).keySet()) {
//System.out.println("TWO");
double x = Math.round(bigramMap.get(userInput).put(s, bigramMap.get(userInput).get(s))/ (double)sum *100 );
if(s != null){
if(numHolder < x ){
stringHolder = s;
numHolder = x;
}
}
}
System.out.println("\nNEXT WORD PREDICTED IS '" + stringHolder + "'");
System.out.println("ITS PROBABILITY OF OCCURRENCE IS " + numHolder + "%");
} catch (Exception NullPointerException) {
System.out.println("\nSORRY. MATCH NOT FOUND.");
}
} else {
userInput = users[users.length - 2] + " " + users[users.length - 1];
// System.out.println("FROM USER WE GET....");
// System.out.println(bigrams.get(userInput).keySet());
/* CALCULATING TRIGRAM PROBABILITY*/
int sum = 0;
try {
for(String s : hashmap.get(userInput).keySet()) {
sum += hashmap.get(userInput).get(s);
}
String stringHolder = null;
double numHolder = 0.0;
for(String s : hashmap.get(userInput).keySet()) {
//System.out.println("TWO");
double x = Math.round(hashmap.get(userInput).put(s, hashmap.get(userInput).get(s))/ (double)sum *100 );
if(s != null){
if(numHolder < x ){
stringHolder = s;
numHolder = x;
}
}
}
System.out.println("\nNEXT WORD PREDICTED IS '" + stringHolder + "'");
System.out.println("ITS PROBABILITY OF OCCURRENCE IS " + numHolder + "%");
} catch (Exception NullPointerException) {
System.out.println("\nSORRY. MATCH NOT FOUND.");
}
}
}
input.close();
}
}
My first Jframe which i would like to appear upon running the project has got a single textbox and a single button;
private void jButton1ActionPerformed(java.awt.event.ActionEvent evt) {
String usersInput = jTextField1.getText();
Interface1 s = new Interface1();
s.setVisible(true);
dispose();
}
i would like for the user to enter data in the textbox and when they click on the button 'predict next word' then the output from the code execution is displayed on the second jframe which has got 3 labels and relative text areas.
NOTE; i couldn't paste the screenshots but if you run the NgramBetaE class you will get an idea of how the interfaces will be as i tried to explain them.
Thank you
Don't even try to link your GUI code to your NgramBetaE code as you've more work to do since the NgramBetaE is little more than one huge static main method that gets user input from the console with a Scanner and outputs to the console via printlns. Melding these two is like trying to put a square peg into a round hole.
Instead re-write the whole thing with an eye towards object-oriented coding, including creation of an OOP-compliant model class with instance fields and methods, and a single GUI that gets the input and displays it, that holds an instance of the model class and that calls instance methods on this instance.
Consider creating non-GUI classes and methods for --
Reading in data from your text files
Analyzing and hashing the data held in the text files including calculating word frequencies etc...
Returning needed data after analysis in whatever data form it may be needed.
A method for allowing input of a String/phrase for testing, with return its predicted probability
Then create GUI code for:
Getting selected text file from the user. A JFileChooser and supporting code works well here.
Button to start analysis
JTextField to allow entering of phrase
JTextArea or perhaps JTable to display results of analysis
Note that you should avoid having more than one JFrame in your GUI. For more on this, please have a look at The Use of Multiple JFrames, Good/Bad Practice?
i want to write a code that stores strings in a hashmap as they are read from text files.
i have written the code below and it works, no errors, the frequency of every occurrence of the string combination does not change, it is always 1.
i am asking for assistance on how i can ensure that if a string combination appears more than once in the text file then its frequency should also increase.
this is my code:
import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.util.*;
public class NgramBetaC {
static String[] hashmapWord = null;
public static Map<String,Map<String, Integer>> bigrams = new HashMap<>();
public static void main(String[] args) {
//prompt user input
Scanner input = new Scanner(System.in);
//read words from collected corpus; a number of .txt files
File directory = new File("Corpus4");
File[] listOfFiles = directory.listFiles();//To read from all listed iles in the "directory"
//String bWord[] = null;
int lineNumber = 0;
String line;
String files;
String delimiters = "[\\s+,?!:;.]";
int wordTracker = 0;
//reading from a list of text files
for (File file : listOfFiles) {
if (file.isFile()) {
files = file.getName();
try {
if (files.endsWith(".txt") || files.endsWith(".TXT")) { //ensures a file being read is a text file
BufferedReader br = new BufferedReader(new FileReader(file));
while ((line = br.readLine()) != null) {
line = line.toLowerCase();
hashmapWord = line.split(delimiters);
for(int s = 0; s < hashmapWord.length - 2; s++){
String read = hashmapWord[s];
String read1 = hashmapWord[s + 1];
final String read2 = hashmapWord[s + 2];
String readBigrams = read + " " + read1;
final Integer count = null;
//bigrams.put(readBigrams, new HashMap() {{ put (read2, (count == null)? 1 : count + 1);}});
bigrams.put(readBigrams, new HashMap<String, Integer>());
bigrams.get(readBigrams).put(read2, (count == null) ? 1 : count+1);
} br.close();
}
}
} catch (NullPointerException | IOException e) {
e.printStackTrace();
System.out.println("Unable to read files: " + e);
}
}
}
}
THE LINES CONTAINED IN THE TEXT FILES ARE::
1.i would like some ice cream.
2.i would like to be in dubai this december.
3.i love to eat pasta.
4.i love to prepare pasta myself.
5.who will be coming to see me today?
THE OUTPUT I GET WHEN PRINTING CONTENTS OF THE HASHMAP IS:
{coming to={see=1}, would like={to=1}, in dubai={this=1}, prepare pasta={myself=1}, to eat={pasta=1}, like to={be=1}, to prepare={pasta=1}, will be={coming=1}, love to={prepare=1}, some ice={cream=1}, be in={dubai=1}, be coming={to=1}, dubai this={december=1}, to be={in=1}, i love={to=1}, to see={me=1}, who will={be=1}, like some={ice=1}, i would={like=1}, see me={today=1}}
Please assist! some string combinations are not even appearing.
THE OUTPUT I EXPECT AS I READ FROM THE FILES IS:
{coming to={see=1}, would like={to=1}, in dubai={this=1}, prepare pasta={myself=1}, to eat={pasta=1}, like to={be=1}, to prepare={pasta=1}, will be={coming=1}, love to={prepare=1}, some ice={cream=1}, be in={dubai=1}, be coming={to=1}, dubai this={december=1}, to be={in=1}, i love={to=1}, to see={me=1}, who will={be=1}, like some={ice=1}, i would={like=2}, see me={today=1}, love to {eat=1}, would like {some=1}, i would {love=1}, would love {to=1}}
Tentatively update the current structure without overwriting the originl content
Replace
bigrams.put(readBigrams, new HashMap<String, Integer>());
bigrams.get(readBigrams).put(read2, (count == null) ? 1 : count+1);
With
HashMap<String, Integer> counter = bigrams.get(readBigrams);
if (null == counter) {
counter = new HashMap<String, Integer>();
bigrams.put(readBigrams, counter);
}
Integer count = counter.get(read2);
counter.put(read2, count == null ? 1 : count + 1);
Closed. This question needs to be more focused. It is not currently accepting answers.
Want to improve this question? Update the question so it focuses on one problem only by editing this post.
Closed 7 years ago.
Improve this question
I am still learning JAVA and have been trying to find a solution for my program for a few days, but I haven't gotten it fixed yet.
I have many text files (my program saves). The files look like this:
text (tab) number (tab) number (tab)...
text (tab) number (tab) number (tab)...
(tab) means that there is tabulation mark,
text means that is text (string),
number means that there is number (integer).
number of files can be from 1 up to 32 and file with names like: january1; january2; january3...
I need to read all of those files (ignore strings) and sum only numbers like so:
while ((line = br.readLine()) != null) {
counter=counter+1;
String[] info = line.split("\\s+");
for(int j = 2; j < 8; j++) {
int num = Integer.parseInt(info[j]);
data[j][counter]=data[j][counter]+num;
}
};
Simply I want sum all that "tables" to array of arrays (or to any similar kind of variable) and then display it as table. If someone knows any solution or can link any similar calculation, that would be awesome!
So, as I see it, you have four questions you need answered, this goes against the site etiquette of asking A question, but will give it a shot
How to list a series of files, presumably using some kind of filter
How to read a file and process the data in some meaningful way
How to manage the data in data structure
Show the data in a JTable.
Listing files
Probably the simplest way to list files is to use File#list and pass a FileFilter which meets your needs
File[] files = new File(".").listFiles(new FileFilter() {
#Override
public boolean accept(File pathname) {
return pathname.getName().toLowerCase().startsWith("janurary");
}
});
Now, I'd write a method which took a File object representing the directory you want to list and a FileFilter to use to search it...
public File[] listFiles(File dir, FileFilter filter) throws IOException {
if (dir.exists()) {
if (dir.isDirectory()) {
return dir.listFiles(filter);
} else {
throw new IOException(dir + " is not a valid directory");
}
} else {
throw new IOException(dir + " does not exist");
}
}
This way you could search for a number of different set of files based on different FileFilters.
Of course, you could also use the newer Paths/Files API to find files as well
Reading files...
Reading multiple files comes down to the same thing, reading a single file...
// BufferedReader has a nice readline method which makes
// it easier to read text with. You could use a Scanner
// but I prefer BufferedReader, but that's me...
try (BufferedReader br = new BufferedReader(new FileReader(new File("...")))) {
String line = null;
// Read each line
while ((line = br.readLine()) != null) {
// Split the line into individual parts, on the <tab> character
String parts[] = line.split("\t");
int sum = 0;
// Staring from the first number, sum the line...
for (int index = 1; index < parts.length; index++) {
sum += Integer.parseInt(parts[index].trim());
}
// Store the key/value pairs together some how
}
}
Now, we need some way to store the results of the calculations...
Have a look at Basic I/O for more details
Managing the data
Now, there are any number of ways you could do this, but since the amount of data is variable, you want a data structure that can grow dynamically.
My first thought would be to use a Map, but this assumes you want to combining rows with the same name, otherwise you should just us a List within a List, where the outer List represents the rows and the Inner list represents the column values...
Map<String, Integer> data = new HashMap<>(25);
File[] files = listFiles(someDir, januraryFilter);
for (File file : files {
readFile(file, data);
}
Where readFile is basically the code from before
protected void readData(File file, Map<String, Integer> data) throws IOException {
try (BufferedReader br = new BufferedReader(new FileReader(file))) {
String line = null;
// Read each line
while ((line = br.readLine()) != null) {
//...
// Store the key/value pairs together some how
String name = parts[0];
if (data.containsKey(name)) {
int previous = data.get(name);
sum += previous;
}
data.put(name, sum);
}
}
}
Have a look at the Collections Trail for more details
Showing the data
And finally, we need to show the data. You could simply use a DefaultTableModel, but you already have the data in structure, why not re-use it with a custom TableModel
public class SummaryTableModel extends AbstractTableModel {
private Map<String, Integer> data;
private List<String> keyMap;
public SummaryTableModel(Map<String, Integer> data) {
this.data = new HashMap<>(data);
keyMap = new ArrayList<>(data.keySet());
}
#Override
public int getRowCount() {
return data.size();
}
#Override
public int getColumnCount() {
return 2;
}
#Override
public Class<?> getColumnClass(int columnIndex) {
Class type = Object.class;
switch (columnIndex) {
case 0:
type = String.class;
break;
case 1:
type = Integer.class;
break;
}
return type;
}
#Override
public Object getValueAt(int rowIndex, int columnIndex) {
Object value = null;
switch (columnIndex) {
case 0:
value = keyMap.get(rowIndex);
break;
case 1:
String key = keyMap.get(rowIndex);
value = data.get(key);
break;
}
return value;
}
}
Then you would simply apply it to a JTable...
add(new JScrollPane(new JTable(new SummaryTableModel(data)));
Take a look at How to Use Tables for more details
Conclusion
There are a lot of assumptions that have to be made which are missing from the context of the question; does the order of the files matter? Do you care about duplicate entries?
So it becomes near impossible to provide a single "answer" which will solve all of your problems
I took all the january1 january2... files from the location and used your same function to calculate the value to be stored.
Then I created a table with two headers, Day and Number. Then just added rows according to the values generated.
DefaultTableModel model = new DefaultTableModel();
JTable table = new JTable(model);
String line;
model.addColumn("Day");
model.addColumn("Number");
BufferedReader br = null;
model.addRow(new Object[]{"a","b"});
for(int i = 1; i < 32; i++)
{
try {
String sCurrentLine;
String filename = "january"+i;
br = new BufferedReader(new FileReader("C:\\january"+i+".txt"));
int counter = 0;
while ((sCurrentLine = br.readLine()) != null) {
counter=counter+1;
String[] info = sCurrentLine.split("\\s+");
int sum = 0;
for(int j = 2; j < 8; j++) {
int num = Integer.parseInt(info[j]);
sum += num;
}
model.addRow(new Object[]{filename, sum+""});
}
} catch (IOException e) {
e.printStackTrace();
} finally {
try {
if (br != null)br.close();
} catch (IOException ex) {
ex.printStackTrace();
}
}
}
JFrame f = new JFrame();
f.setSize(300, 300);
f.add(new JScrollPane(table));
f.setVisible(true);
Use Labled Loop and Try-Catch. Below piece adds all number in a line.
You could get some hint from here:
String line = "text 1 2 3 4 del";
String splitLine[] = line.split("\t");
int sumLine = 0;
int i = 0;
contSum: for (; i < splitLine.length; i++) {
try {
sumLine += Integer.parseInt(splitLine[i]);
} catch (Exception e) {
continue contSum;
}
}
System.out.println(sumLine);
Here is another example using vectors . in this example directories will be searched for ".txt" files and added to the JTable.
The doIt method will take in the folder where your text files are located.
this will then with recursion, look for files in folders.
each file found will be split and added following you example file.
public class FileFolderReader
{
private Vector<Vector> rows = new Vector<Vector>();
public static void main(String[] args)
{
FileFolderReader fileFolderReader = new FileFolderReader();
fileFolderReader.doIt("D:\\folderoffiles");
}
private void doIt(String path)
{
System.out.println(findFile(new File(path)) + " in total");
JFrame frame = new JFrame();
frame.setDefaultCloseOperation(JFrame.EXIT_ON_CLOSE);
Vector<String> columnNames = new Vector<String>();
columnNames.addElement("File Name");
columnNames.addElement("Size");
JTable table = new JTable(rows, columnNames);
JScrollPane scrollPane = new JScrollPane(table);
frame.add(scrollPane, BorderLayout.CENTER);
frame.setSize(300, 150);
frame.setVisible(true);
}
private int findFile(File file)
{
int totalPerFile = 0;
int total = 0;
File[] list = file.listFiles(new FilenameFilter()
{
public boolean accept(File dir, String fileName)
{
return fileName.endsWith(".txt");
}
});
if (list != null)
for (File textFile : list)
{
if (textFile.isDirectory())
{
total = findFile(textFile);
}
else
{
totalPerFile = scanFile(textFile);
System.out.println(totalPerFile + " in " + textFile.getName());
Vector<String> rowItem = new Vector<String>();
rowItem.addElement(textFile.getName());
rowItem.addElement(Integer.toString(totalPerFile));
rows.addElement(rowItem);
total = total + totalPerFile;
}
}
return total;
}
public int scanFile(File file)
{
int sum = 0;
Scanner scanner = null;
try
{
scanner = new Scanner(file);
while (scanner.hasNextLine())
{
String line = scanner.nextLine();
String[] info = line.split("\\s+");
int count = 1;
for (String stingInt : info)
{
if (count != 1)
{
sum = sum + Integer.parseInt(stingInt);
}
count++;
}
}
scanner.close();
}
catch (FileNotFoundException e)
{
// you will need to handle this
// don't do this !
e.printStackTrace();
}
return sum;
}
}