Why does my program not read full files? - java

I have written code in Java to read the content of a file. But it is working for small line of file only not for more than 1000 line of file. Please tell me me what error I have made in the below program.
program:
import java.io.DataInputStream;
import java.io.DataOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class aaru
{
public static void main(String args[]) throws FileNotFoundException
{
File sourceFile = new File("E:\\parser\\parse3.txt");
File destinationFile = new File("E:\\parser\\new.txt");
FileInputStream fileIn = new FileInputStream(sourceFile);
FileOutputStream fileOut = new FileOutputStream(destinationFile);
DataInputStream dataIn = new DataInputStream(fileIn);
DataOutputStream dataOut = new DataOutputStream(fileOut);
String str = "";
String[] st;
String sub[] = null;
String word = "";
String contents = "";
String total = "";
String stri = "";
try
{
while ((contents = dataIn.readLine()) != null)
{
total = contents.replaceAll(",", "");
String str1 = total.replaceAll("--", "");
String str2 = str1.replaceAll(";", "");
String str3 = str2.replaceAll("&", "");
String str4 = str3.replaceAll("^", "");
String str5 = str4.replaceAll("#", "");
String str6 = str5.replaceAll("!", "");
String str7 = str6.replaceAll("/", "");
String str8 = str7.replaceAll(":", "");
String str9 = str8.replaceAll("]", "");
String str10 = str9.replaceAll("\\?", "");
String str11 = str10.replaceAll("\\*", "");
String str12 = str11.replaceAll("\\'", "");
Pattern pattern =
Pattern.compile("\\s+", Pattern.CASE_INSENSITIVE | Pattern.DOTALL | Pattern.MULTILINE);
Matcher matcher = pattern.matcher(str12);
//boolean check = matcher.find();
String result = str12;
Pattern p = Pattern.compile("^www\\.|\\#");
Matcher m = p.matcher(result);
stri = m.replaceAll(" ");
int i;
int j;
st = stri.split("\\.");
for (i = 0; i < st.length; i++)
{
st[i] = st[i].trim();
/*if(st[i].startsWith(" "))
st[i]=st[i].substring(1,st[i].length);*/
sub = st[i].split(" ");
if (sub.length > 1)
{
for (j = 0; j < sub.length - 1; j++)
{
word = word + sub[j] + "," + sub[j + 1] + "\r\n";
}
}
else
{
word = word + st[i] + "\r\n";
}
}
}
System.out.println(word);
dataOut.writeBytes(word + "\r\n");
fileIn.close();
fileOut.close();
dataIn.close();
dataOut.close();
} catch (Exception e)
{
System.out.print(e);
}
}
}

It's not immediately obvious why your code doesn't read full files, but here are two hints:
First: Don't use a DataInputStream for reading full lines. Instead wrap your FileInputStream in a InputStreamReader (ideally providing an encoding) and a BufferedReader (as documented by the JavaDoc of DataInputStream.readLine()):
Like this:
BufferedReader reader = new BufferedReader(new InputStreamReader(fileIn, "UTF-8"));
Second: when you don't know how to handle an exception at least print its stack trace like this:
catch(Exception e)
{
e.printStackTrace();
}

Related

Appending column at the end of a csv file java

I have a csv file and I have some data in it and I want to append a column in it. e.g:
Name,Age,Marks
Joe,15,1
Smith,20,2
I want to append that Marks Column through code. The problem I'm getting is
Name,Age,Marks
Joe,15
1
2
Smith,20
1
2
The data is getting written 2 times and also the on the first column (Except the first one). How can I prevent it from doing it ? I've been stuck in this problem from past 1 week
My code:
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
public class appendCol {
public static String appendingCol() {
String stringArray[] = {"Marks", "1", "2"};
StringBuilder sb = new StringBuilder();
for (int i = 0; i < stringArray.length; i++) {
sb.append(stringArray[i]);
}
String str = sb.toString();
return str;
}
public static void main(String[] args) throws IOException {
String line = "";
BufferedWriter writer = new BufferedWriter(new FileWriter("D:\\temp.csv"));
try (BufferedReader br = new BufferedReader(new FileReader("D:\\text1.csv"))) {
while ((line = br.readLine()) != null) {
String newFileLine = line + "," + appendingCol();
writer.write(newFileLine);
writer.newLine();
}
}
writer.close();
}
}
With this as input in text1.csv:
Name,Age
Joe,15
Smith,20
I ran (very tightly adapted from your code):
static void tryStackOverflow () {
String line = "";
try {
BufferedWriter writer = new BufferedWriter (new FileWriter ("temp.csv"));
BufferedReader br = new BufferedReader (new FileReader ("text1.csv"));
while ((line = br.readLine ()) != null) {
String newFileLine = line + "," + appendingCol ();
writer.write (newFileLine);
writer.newLine ();
}
writer.close ();
} catch (IOException excep) {
System.err.println ("Exception " + excep);
}
}
public static String appendingCol () {
String stringArray[] = { "Marks", "1", "2" };
StringBuilder sb = new StringBuilder ();
for (int i = 0; i < stringArray.length; i++) {
sb.append (stringArray [i]);
}
String str = sb.toString ();
return str;
}
and that produced:
Name,Age,Marks12
Joe,15,Marks12
Smith,20,Marks12
Then it seems clear that stringArray should be be in the other method (your main method) and added to line by line. Your code also assumes there are as many lines as elements in that array. But disregarding that, I moved the array and eliminated the appendingCol method and ran this:
static void tryStackOverflow () {
String line = "";
String stringArray[] = { "Marks", "1", "2" };
int lineNum = 0;
try {
BufferedWriter writer = new BufferedWriter (new FileWriter ("temp.csv"));
BufferedReader br = new BufferedReader (new FileReader ("text1.csv"));
while ((line = br.readLine ()) != null) {
String newFileLine = line + "," + stringArray [lineNum++];
writer.write (newFileLine);
writer.newLine ();
}
writer.close ();
} catch (IOException excep) {
System.err.println ("Exception " + excep);
}
}
which produced this:
Name,Age,Marks
Joe,15,1
Smith,20,2
The header needs to be handled separately from the values.
public static void main(String[] args) throws IOException {
Map<String, String[]> csvCol = new HashMap<String, String[]>();
String stringArray[] = { "1", "2" };
csvCol.put('Marks', stringArray);
String line = "";
BufferedWriter writer = new BufferedWriter(new FileWriter("D:\\temp.csv"));
try (BufferedReader br = new BufferedReader(new FileReader("D:\\text1.csv"))) {
String headers = "";
String values = "";
String newFileLine ="";
bool isHeader=true;
while ((line = br.readLine()) != null) {
if(isHeader){
csvCol.forEach((k,v) -> {
headers = "," + k ;
});
newFileLine = line + headers ;
}else{
csvCol.forEach((k,v) -> {
values = "," + v ;
});
newFileLine = line + values;
isHeader = false;
}
writer.write(newFileLine);
writer.newLine();
}
}
writer.close();
}
This problem requires one more column to be appended to the original CSV. The Java implementation is longer in code. However, it is easy to write with the open-source package SPL under Java, as long as one sentence:
+
A
1
=file("temp.csv").export#wc(file("test1.csv").import#wc().(~|="Marks,1,2".split#c()(#)))
SPL provides JDBC for JAVA to call, save the above script as append.splx, and call the script file as a stored procedure in JAVA:
…
Class.forName("com.esproc.jdbc.InternalDriver");
con= DriverManager.getConnection("jdbc:esproc:local://");
st=con.prepareCall("call append()");
st.execute();
…
Or directly execute the SPL string in SQL in JAVA:
…
st = con.prepareStatement("==file(\"temp.csv\").export#wc(file(\"test1.csv\")
.import#wc().(~|=\"Marks,1,2\".split#c()(#)))");
st.execute();
…

BufferedReader ( scanner )

This is what i have for now. I want to know, how many times i have some word in .txt document . Now i am trying to use BufferedReader didn't manage well enough. I guess here is a easier way to solve this, but i don't know.
import java.io.BufferedReader;
import java.io.FileInputStream;
import java.io.InputStream;
import java.io.InputStreamReader;
public class TekstiAnalüsaator {
public static void main(String[] args) throws Exception {
InputStream baidid = new FileInputStream("test.txt");
InputStreamReader tekst = new InputStreamReader(baidid, "UTF-8");
BufferedReader puhverdab = new BufferedReader(tekst);
String rida = puhverdab.readLine();
while (rida != null){
System.out.println("Reading: " + rida);
rida = puhverdab.readLine();
}
puhverdab.close();
}
}
I want to search words using this structure. What file, then what word i need to find, (return) how many times, this word is in the file.
TekstiAnalüsaator analüsaator = new TekstiAnalüsaator("kiri.txt");
int esinemisteArv = analüsaator.sõneEsinemisteArv("kala");
Please see the code example below. This should solve the issue you are facing.
import java.io.*;
public class CountWords {
public static void main(String args[]) throws IOException {
System.out.println(count("Test.java", "static"));
}
public static int count(String filename, String wordToSearch) throws IOException {
int tokencount = 0;
FileReader fr = new FileReader(filename);
BufferedReader br = new BufferedReader(fr);
String s;
int linecount = 0;
String line;
while ((s = br.readLine()) != null) {
if (s.contains(wordToSearch))
tokencount++;
// System.out.println(s);
}
return tokencount;
}
}
It is a bit of a tricky question because counting words in a string is not so simple task. Your approach is fine for reading the file line by line so now the problem is how to count the word matches.
For example you can do the simple check for matches like that:
public static int getCountOFWordsInLine(String line, String test){
int count=0;
int index=0;
while(line.indexOf(test,index ) != -1) {
count++;
index=line.indexOf(test,index)+1;
}
return count;
}
The problem with that approach is that if your word is "test" and your string is "Next word matches asdfatestsdf" it will count it as a match. So you can try using some more advanced regex:
public static int getCountOFWordsInLine(String line, String word) {
int count = 0;
Pattern pattern = Pattern.compile("\\b"+word+"\\b");
Matcher matcher = pattern.matcher(line);
while (matcher.find())
count++;
return count;
}
It actually checks for the word surrounded by \b which is word break
It still won't find the word if it start with uppercase though. If you want to make it case insensitive you can modify the previous method by changing everything to lowercase prior to searching. But it depends on your definition of word.
The whole program will become:
public class MainClass {
public static void main(String[] args) throws InterruptedException {
try {
InputStream baidid = new FileInputStream("c:\\test.txt");
InputStreamReader tekst = new InputStreamReader(baidid, "UTF-8");
BufferedReader puhverdab = new BufferedReader(tekst);
String rida = puhverdab.readLine();
String word="test";
int count=0;
while (rida != null){
System.out.println("Reading: " + rida);
count+=getCountOFWordsInLine(rida,word );
rida = puhverdab.readLine();
}
System.out.println("count:"+count);
puhverdab.close();
}catch(Exception e) {
e.printStackTrace();
}
}
public static int getCountOFWordsInLine(String line, String test) {
int count = 0;
Pattern pattern = Pattern.compile("\\b"+test+"\\b");
Matcher matcher = pattern.matcher(line);
while (matcher.find())
count++;
return count;
}
}
import java.io.*;
import java.until.regex.*;
public class TA
{
public static void main(String[] args) throws Exception
{
InputStream baidid = new FileInputStream("test.txt");
InputStreamReader tekst = new InputStreamReader(baidid, "UTF-8");
BufferedReader puhverdab = new BufferedReader(tekst);
String rida;
String word = argv[0]; // search word passed via command line
int count1=0, count2=0, count3=0, count4=0;
Pattern P1 = Pattern.compile("\\b" + word + "\\b");
Pattern P2 = Pattern.compile("\\b" + word + "\\b", Pattern.CASE_INSENSITIVE);
while ((rida = puhverdab.readLine()) != null)
{
System.out.println("Reading: " + rida);
// Version 1 : counts lines containing [word]
if (rida.contains(word)) count1++;
// Version 2: counts every instance of [word]
into pos=0;
while ((pos = rida.indexOf(word, pos)) != -1) { count2++; pos++; }
// Version 3: looks for surrounding whitespace
Matcher m = P1.matcher(rida);
while (m.find()) count3++;
// Version 4: looks for surrounding whitespace (case insensitive)
Matcher m = P2.matcher(rida);
while (m.find()) count4++;
}
System.out.println("Found exactly " + count1 + " line(s) containing word: \"" + word + "\"");
System.out.println("Found word \"" + word + "\" exactly " + count2 + " time(s)");
System.out.println("Found word \"" + word + "\" surrounded by whitespace " + count3 + " time(s).");
System.out.println("Found, case insensitive search, word \"" + word + "\" surrounded by whitespace " + count4 + " time(s).");
puhverdab.close();
}
}
This reads line-by-line as you've already done, splits a line by whitespace to obtain individual words, and checks each word for a match.
int countWords(String filename, String word) throws Exception {
InputStream inputStream = new FileInputStream(filename);
InputStreamReader inputStreamReader = new InputStreamReader(inputStream, "UTF-8");
BufferedReader reader = new BufferedReader(inputStreamReader);
int count = 0;
String line = reader.readLine();
while (line != null) {
String[] words = line.split("\\s+");
for (String w : words)
if (w.equals(word))
count++;
line = reader.readLine();
}
reader.close();
return count;
}

Replace a substring by a changing replacement string

I'm trying to write a small program that detect comments in a code file, and tag them by a index-tag, meaning a tag with an increasing value.
For example this input:
method int foo (int y) {
int temp; // FIRST COMMENT
temp = 63; // SECOND COMMENT
// THIRD COMMENT
}
should be change to:
method int foo (int y) {
int temp; <TAG_0>// FIRST COMMENT</TAG>
temp = 63; <TAG_1>// SECOND COMMENT</TAG>
<TAG_2>// THIRD COMMENT</TAG>
}
I tried the following code:
String prefix, suffix;
String pattern = "(//.*)";
Pattern r = Pattern.compile(pattern);
Matcher m = r.matcher(fileText);
int i = 0;
suffix = "</TAG>";
while (m.find()) {
prefix = "<TAG_" + i + ">";
System.out.println(m.replaceAll(prefix + m.group() + suffix));
i++;
}
The output for the above code is:
method int foo (int y) {
int temp; <TAG_0>// FIRST COMMENT</TAG>
temp = 63; <TAG_0>// SECOND COMMENT</TAG>
<TAG_0>// THIRD COMMENT</TAG>
}
To replace occurrences of detected patterns, you should use the Matcher#appendReplacement method which fills a StringBuffer:
StringBuffer sb = new StringBuffer();
while (m.find()) {
prefix = "<TAG_" + i + ">";
m.appendReplacement(sb, prefix + m.group() + suffix);
i++;
}
m.appendTail(sb); // append the rest of the contents
The reason replaceAll will do the wrong replacement is that it will have the Matcher scan the whole string to replace every matched pattern with <TAG_0>...</TAG>. In effect, the loop would only execute once.
Have you tried reading the file per line, like:
String prefix, suffix;
suffix = " </TAG>";
try (BufferedReader br = new BufferedReader(new FileReader(file))) {
int i = 0;
for (String line; (line = br.readLine()) != null;) {
if (line.contains("//")) {
prefix = "<TAG_" + i + ">//";
System.out.println(line.split("//*")[0] + " " + prefix + line.split("//*")[1] + suffix);
i++;
}
}
} catch (IOException e) {
}
fichiertexte.txt :
method int foo (int y) {
int temp; // FIRST COMMENT
temp = 63; // SECOND COMMENT
// THIRD COMMENT
}
App.java :
import java.io.BufferedReader;
import java.io.FileInputStream;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class App {
public static void main(String[] args) {
String fileText = "";
String fichier = "fichiertexte.txt";
// lecture du fichier texte
try {
InputStream ips = new FileInputStream(fichier);
InputStreamReader ipsr = new InputStreamReader(ips);
BufferedReader br = new BufferedReader(ipsr);
String ligne;
while ((ligne = br.readLine()) != null) {
//System.out.println(ligne);
fileText += ligne + "\n";
}
br.close();
} catch (Exception e) {
System.err.println(e.toString());
}
String prefix, suffix;
String pattern = "(//.*)";
Pattern r = Pattern.compile(pattern);
Matcher m = r.matcher(fileText);
int i = 0;
suffix = "</TAG>";
StringBuffer sb = new StringBuffer();
while (m.find()) {
prefix = "<TAG_" + i + ">";
m.appendReplacement(sb, prefix + m.group() + suffix);
i++;
}
System.out.println(sb.toString());
}
}
System.out :
method int foo (int y) {
int temp; <TAG_0>// FIRST COMMENT</TAG>
temp = 63; <TAG_1>// SECOND COMMENT</TAG>
<TAG_2>// THIRD COMMENT</TAG>
}

Removing back to back dashes and asterisks in a string

I am having some trouble reading in a file and removing all of the punctuation from the file.
Below is what I currently have and I can not figure out why "----" and "*****" would still occur.
Can anyone point me in a direction to figure out how I need to adjust my replaceAll() in order to make sure repeated occurrences of punctuation can be removed?
public void analyzeFile(File filepath) {
try {
FileInputStream fStream = new FileInputStream(filepath);
DataInputStream in = new DataInputStream(fStream);
BufferedReader br = new BufferedReader(new InputStreamReader(in));
String textFile = "";
String regex = "[a-zA-Z0-9\\s]";
String putString = "";
wordCount = 0;
while ((textFile = br.readLine()) != null) {
if (!textFile.equals("") && textFile.length() > 0) {
String[] words = textFile.split(" ");
wordCount += words.length;
for (int i = 0; i < words.length; i++) {
putString = cleanString(regex, words[i]);
if(putString.length() > 0){
mapInterface.put(putString, 1);
}
}
putString = "";
}
}
in.close();
} catch (Exception e) {
System.out.println("Error while attempting to read file: "
+ filepath + " " + e.getMessage());
}
}
private String cleanString(String regex, String str){
String newString = "";
Pattern regexChecker = Pattern.compile(regex);
Matcher regexMatcher = regexChecker.matcher(str);
while(regexMatcher.find()){
if(regexMatcher.group().length() != 0){
newString += regexMatcher.group().toString();
}
}
return newString;
}
Surely you can use the \w escaped alphanumeric character? This will recognise all letters and numbers, but not punctuation.
putString = words[i].replaceAll("[^\w]+", "");
This replaces any non-word character with an empty string.

HTML tag parsing extracting title and others

trying to extract title from website, when printing title i get a stream closed error. trying to extract between title tags such as . not familiar wit parsing please be thorough when explaining. thanks.
import java.lang.*;
import java.util.Scanner;
import java.net.*;
import java.io.*;
public class Allrecipes{
public static void main(String[] args) throws Exception{
System.out.println("Colby Mehmen");
Scanner input = new Scanner(System.in);
String str1 = "";
str1 = compare();
if (str1.contains("http://allrecipes.com")){
URL oracle = new URL(str1);
BufferedReader in = new BufferedReader(
new InputStreamReader(oracle.openStream()));
String html;
while ((html = in.readLine()) != null)
in.close();
String page = html;
int start = page.indexOf("<title>");
int end = page.indexOf("</title>");
String title = page.substring(start+"<title>".length(),end);
System.out.println(title);
}//end program
}
JSoup
Try JSOUP API it is really easy to use
Document doc = Jsoup.connect(YOUR_WEBSITE).get();
Elements tt = doc.select("title");
System.out.println(tt.text());
Your code
import java.io.BufferedReader;
import java.io.InputStreamReader;
import java.net.URL;
import java.util.Scanner;
public class Allrecipes {
public static void main(String[] args) throws Exception {
System.out.println("Colby Mehmen");
// http://allrecipes.com/Recipe/Cardamom-Maple-Salmon/Detail.aspx?soid=carousel_0_rotd&prop24=rotd
String str1 = "";
str1 = compare();
if (str1.contains("http://allrecipes.com")) {
URL oracle = new URL(str1);
BufferedReader in = new BufferedReader(new InputStreamReader(
oracle.openStream()));
String html = null;
String line;
while ((line = in.readLine()) != null)
html += line;
in.close();
String page = html;
int start = page.indexOf("<title>");
int end = page.indexOf("</title>");
String title = page.substring(start+7, end);
System.out.println(title);
}// end program
}
public static String compare() {
Scanner input = new Scanner(System.in);
System.out.println("Enter recipe URL: ");
String str1 = input.next();
String str2 = "allrecipes.com";
String str3 = "http://";
boolean b = str1.contains(str2);
if (b == true) {
boolean c = str1.contains(str3 + str2);
if (c == false) {
str1 = str3 + str1;
}
}// endifif
boolean d = str1 != str3 + str2;
if (d == false) {
System.out.println("ERROR");
}
/* cOUT */System.out.println(str1);
return str1;
}// end compare
}

Categories