Related
I wrote a big chunk of codes that downloads CSV file from url, then i bulk inserted into sql database, then call data from SQL server and display on Java console. Finally select the column I want to keep and export as a new CSV file.
but all of those codes are in the same class right now. How can I separate them into different class like I want a class for just download file and another class just do the Bulk insert and another class to just do the Select Query.
Thanks for helping me out
below is my code in one class now
public class ProjectTest extends CreateTable {
public static void main(String[] args) throws MalformedURLException {
BufferedReader br = null;
String line = "";
String cvsSplitBy = ",";
URL url = new URL(
"https://quality.data.gov.tw/dq_download_csv.php?nid=43983&md5_url=9d38afbca8243a24b5b89d03a8070aff");
try (InputStream inputStream = url.openStream();
FileOutputStream fos = new FileOutputStream(
"C:\\Users\\ALICE\\Desktop\\Java\\Dropbox\\Java\\virus.csv");
Connection connection = DriverManager
.getConnection("jdbc:sqlserver://localhost:1433;databaseName=JDBCDB", "andy3", "andy"); // andy3
// //
// ,andy
Statement stmt = connection.createStatement(ResultSet.TYPE_SCROLL_SENSITIVE,
ResultSet.CONCUR_UPDATABLE);
FileOutputStream fos2 = new FileOutputStream(
"C:\\Users\\ALICE\\Desktop\\Java\\Dropbox\\Java\\NEWvirus.csv");
OutputStreamWriter osw = new OutputStreamWriter(fos2, "MS950");
BufferedWriter bw = new BufferedWriter(osw);
) {
byte[] buffer = new byte[1024];
int length;
while ((length = inputStream.read(buffer)) != -1) {
fos.write(buffer, 0, length);
}
fos.close();
stmt.executeUpdate("DROP TABLE Virus");
boolean rs = stmt.execute(CreateTable);
System.out.println("Database Created");
PreparedStatement pstmt = connection.prepareStatement(InsertData);
int executeUpdate = pstmt.executeUpdate();
if (executeUpdate > 0) {
System.out.println("Data Inserted");
} else {
System.out.println("Insert ERROR");
}
ResultSet rs4 = stmt.executeQuery("SELECT Count(*) FROM Virus");
int numberOfData = rs4.getInt(1);
System.out.println(numberOfData);
ResultSet rs3 = stmt.executeQuery(selectQuery);
ResultSetMetaData metaData = rs3.getMetaData();
DatabaseMetaData DmetaData = connection.getMetaData();
String[] types = { "TABLE" };
ResultSet rs5 = DmetaData.getTables(null, null, "Virus", types);
List<String> ColNameList = new ArrayList<String>();
while (rs5.next()) {
String tableName = rs5.getString("TABLE_NAME");
ResultSet columnRs = DmetaData.getColumns(null, null, tableName, null);
while (columnRs.next()) {
String columnName = columnRs.getString("COLUMN_NAME");
ColNameList.add(columnName);
}
System.out.print("|" + ColNameList.get(0) + " |");
System.out.print(ColNameList.get(1) + " |");
System.out.print(ColNameList.get(2) + "|");
System.out.print(ColNameList.get(3) + "|");
System.out.print(ColNameList.get(4) + " |");
System.out.print(ColNameList.get(5) + " |");
System.out.print(ColNameList.get(6) + " |");
System.out.print(ColNameList.get(7) + " |");
System.out.print(ColNameList.get(8) + "|");
System.out.print(ColNameList.get(9) + " |");
System.out.print(ColNameList.get(10) + " |");
System.out.print(ColNameList.get(11) + "|");
System.out.print(ColNameList.get(12) + " |");
System.out.print(ColNameList.get(13) + " |");
System.out.print(ColNameList.get(14) + " |");
System.out.print(ColNameList.get(15) + "");
}
System.out.println();
while (rs3.next()) {
coList1.add(rs3.getString(1));
coList2.add(rs3.getString(2));
coList3.add(rs3.getString(3));
coList4.add(rs3.getString(4));
coList5.add(rs3.getString(5));
coList6.add(rs3.getString(6));
coList7.add(rs3.getString(7));
coList8.add(rs3.getString(8));
coList9.add(rs3.getString(9));
coList10.add(rs3.getString(10));
coList11.add(rs3.getString(11));
coList12.add(rs3.getString(12));
coList13.add(rs3.getString(13));
coList14.add(rs3.getString(14));
coList15.add(rs3.getString(15));
coList16.add(rs3.getString(16));
coList17.add(rs3.getString(17));
}
for (int p = 0; p < 20; p++) { // coList9.size(
System.out.print("|" + coList1.get(p) + "|");
String str2 = coList2.get(p);
if (str2.length() < 3) {
String blank = " ";
String repeated = new String(new char[(3 - str2.length())]).replace("\0", blank);
System.out.print(repeated + coList2.get(p) + "|");
} else {
System.out.print(coList2.get(p) + "|");
}
System.out.print(" " + coList3.get(p) + "|");
System.out.print(coList4.get(p) + "|");
System.out.print(coList5.get(p) + "|");
System.out.print(coList6.get(p) + "|");
System.out.print(coList7.get(p) + "|");
System.out.print(coList8.get(p) + "|");
String str = coList9.get(p);
if (str.length() < 5) {
String blank = " ";
String repeated = new String(new char[(5 - str.length())]).replace("\0", blank);
System.out.print(repeated + coList9.get(p) + "|");
} else {
System.out.print(coList9.get(p) + "|");
}
System.out.print(coList10.get(p) + "|");
System.out.print(coList11.get(p) + "|");
String str12 = coList12.get(p);
if (str12.length() < 5) {
String blank = " ";
String repeated = new String(new char[(6 - str12.length())]).replace("\0", blank);
System.out.print(repeated + coList12.get(p) + "|");
} else {
System.out.print(coList12.get(p) + "|");
}
String str13 = coList13.get(p);
if (str13.length() < 5) {
String blank = " ";
String repeated = new String(new char[(4 - str13.length())]).replace("\0", blank);
System.out.print(repeated + coList13.get(p) + "|");
} else {
System.out.print(coList12.get(p) + "|");
}
System.out.print(coList14.get(p) + "|");
System.out.print(coList15.get(p) + "|");
System.out.print(coList16.get(p) + "|");
System.out.print(coList17.get(p) + "|");
System.out.println();
}
rs3.beforeFirst();
StringBuilder builder = new StringBuilder();
builder.append("CaseID").append(",").append("Age").append(",").append("Gender").append(",").append("City")
.append(",").append("SampleDate").append(",").append("VirusType").append(",")
.append("SubType").append(",").append("Locus").append(",").append("Primer").append(",").append("GeneDirection")
.append(",").append("TypingMethod").append(",").append("DNASeq").append(",").append("AminoAcidSeq");
System.out.println(rs3.next());
while (rs3.next()) {
builder.append(System.lineSeparator());
builder.append(rs3.getString(1)).append(",").append(rs3.getString(2)).append(",")
.append(rs3.getString(3)).append(",").append(rs3.getString(5)).append(",")
.append(rs3.getString(7)).append(",").append(rs3.getString(10)).append(",")
.append(rs3.getString(11)).append(",").append(rs3.getString(12)).append(",")
.append(rs3.getString(13)).append(",").append(rs3.getString(14)).append(",")
.append(rs3.getString(15)).append(",").append(rs3.getString(16)).append(",").append(rs3.getString(17)).append(",");
}
bw.write(builder.toString());
} catch (IOException e) {
e.printStackTrace();
} catch (SQLException e1) {
e1.printStackTrace();
}
}
}
Start refactoring your code in small steps and then iteratively improve your design if needed.
As you have mentioned the algorithmic approach, leveraging that this should be your starting step.
CSV file from url,
then i bulk inserted into sql database,
then call data from SQL server and
display on Java console.
Finally select the column I want to keep
and export as a new CSV file.
Small helper functions for each of these steps.
Read more about SOLID design principles if that helps in improving your solution.
I am trying to write 2 different arrays to a csv. The first one I want in the first column, and second array in the second column, like so:
array1val1 array2val1
array1val2 array2val2
I am using the following code:
String userHomeFolder2 = System.getProperty("user.home") + "/Desktop";
String csvFile = (userHomeFolder2 + "/" + fileName.getText() + ".csv");
FileWriter writer = new FileWriter(csvFile);
final String NEW_LINE_SEPARATOR = "\n";
FileWriter fileWriter;
CSVPrinter csvFilePrinter;
CSVFormat csvFileFormat = CSVFormat.DEFAULT.withRecordSeparator(NEW_LINE_SEPARATOR);
fileWriter = new FileWriter(fileName.getText());
csvFilePrinter = new CSVPrinter(fileWriter, csvFileFormat);
try (PrintWriter pw = new PrintWriter(csvFile)) {
pw.printf("%s\n", FILE_HEADER);
for(int z = 0; z < compSource.size(); z+=1) {
//below forces the result to get stored in below variable as a String type
String newStr=compSource.get(z);
String newStr2 = compSource2.get(z);
newStr.replaceAll(" ", "");
newStr2.replaceAll(" ", "");
String[] explode = newStr.split(",");
String[] explode2 = newStr2.split(",");
pw.printf("%s\n", explode, explode2);
}
}
catch (Exception e) {
System.out.println("Error in csvFileWriter");
e.printStackTrace();
} finally {
try {
fileWriter.flush();
fileWriter.close();
csvFilePrinter.close();
} catch (IOException e ) {
System.out.println("Error while flushing/closing");
}
}
However I am getting a strange output into the csv file:
[Ljava.lang.String;#17183ab4
I can run
pw.printf("%s\n", explode);
pw.printf("%s\n", explode2);
Instead of : pw.printf("%s\n", explode, explode2);
and it prints the actual strings but all in one same column.
Does anyone know how to solve this?
1.Your explode and explode2 are actually String Arrays. You are printing the arrays and not the values of it. So you get at the end the ADRESS of the array printed.
You should go through the arrays with a loop and print them out.
for(int i = 0; i<explode.length;++i) {
pw.printf("%s%s\n", explode[i], explode2[i]);
}
2.Also the method printf should be look something like
pw.printf("%s%s\n", explode, explode2);
because youre are printing two arguments, but in ("%s\n", explode, explode2) is only one printed.
Try it out and say if it worked
After these lines:
newStr.replaceAll(" ", "");
newStr2.replaceAll(" ", "");
String[] explode = newStr.split(",");
String[] explode2 = newStr2.split(",");
Use this code:
int maxLength = Math.max(explode.length, explode2.length);
for (int i = 0; i < maxLength; i++) {
String token1 = (i < explode.length) ? explode[i] : "";
String token2 = (i < explode2.length) ? explode2[i] : "";
pw.printf("%s %s\n", token1, token2);
}
This also cover the case that the arrays are of different length.
I have removed all unused variables and made some assumptions about content of compSource.
Moreover, don't forget String is immutable. If you just do "newStr.replaceAll(" ", "");", the replacement will be lost.
public class Tester {
#Test
public void test() throws IOException {
// I assumed compSource and compSource2 are like bellow
List<String> compSource = Arrays.asList("array1val1,array1val2");
List<String> compSource2 = Arrays.asList("array2val1,array2val2");
String userHomeFolder2 = System.getProperty("user.home") + "/Desktop";
String csvFile = (userHomeFolder2 + "/test.csv");
try (PrintWriter pw = new PrintWriter(csvFile)) {
pw.printf("%s\n", "val1,val2");
for (int z = 0; z < compSource.size(); z++) {
String newStr = compSource.get(z);
String newStr2 = compSource2.get(z);
// String is immutable --> store the result otherwise it will be lost
newStr = newStr.replaceAll(" ", "");
newStr2 = newStr2.replaceAll(" ", "");
String[] explode = newStr.split(",");
String[] explode2 = newStr2.split(",");
for (int k = 0; k < explode.length; k++) {
pw.println(explode[k] + "\t" + explode2[k]);
}
}
}
}
}
FileReader fr = new FileReader(inp);
CSVReader reader = new CSVReader(fr, ',', '"');
// writer
File writtenFromWhile = new File(dliRootPath + writtenFromWhilePath);
writtenFromWhile.createNewFile();
CSVWriter writeFromWhile = new CSVWriter(new FileWriter(writtenFromWhile), ',', '"');
int insideWhile = 0;
String[] currRow = null;
while ((currRow = reader.readNext()) != null) {
insideWhile++;
writeFromWhile.writeNext(currRow);
}
System.out.println("inside While: " + insideWhile);
System.out.println("lines read (acc.to CSV reader): " + reader.getLinesRead());
The output is:
inside While: 162199
lines read (acc.to CSV reader): 256865
Even though all lines are written to the output CSV (when viewed in a text editor, Excel shows much lesser number of rows), the while loop does not iterate the same number of times as the rows in input CSV. My main objective is to implement some other logic inside while loop on each line.
I have been trying to debug since two whole days ( a bigger code) without any results.
Please explain how I can loop through while 256865 times
Reference data, complete picture:
Here is the CSV I am reading in the above snippet.
My complete program tries to separate out those records from this CSV which are not present in this CSV, based on the fields title and author (i.e if author and title is the same in 2 records, even if other fields are different, they are counted as duplicate and should not be written to the output file). Here is my complete code (the difference should be around 300000, but i get only ~210000 in the output file with my code):
//TODO ask id
/*(*
* id also there in fields getting matched (thisRow[0] is id)
* u can replace it by thisRow[fielAnd Column.get(0)] to eliminate id
*/
package mainOne;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import com.opencsv.CSVReader;
import com.opencsv.CSVWriter;
public class Diff_V3 {
static String dliRootPath = "/home/gurnoor/Incoming/Untitled Folder 2/";
static String dli = "new-dli-IITG.csv";
static String oldDli = "dli-iisc.csv";
static String newFile = "newSampleFile.csv";// not used
static String unqFile = "UniqueFileFinal.csv";
static String log = "Diff_V3_log.txt";
static String splittedNewDliDir = "/home/gurnoor/Incoming/Untitled Folder 2/splitted new file";
static String splittedOldDliDir = "/home/gurnoor/Incoming/Untitled Folder 2/splitted old file";
// debug
static String testFilePath = "testFile.csv";
static int insidepopulateMapFromSplittedCSV = 0;
public static void main(String[] args) throws IOException, CustomException {
// _readSample(dliRootPath+dli, dliRootPath+newFile);
// System.out.println(areIDsunique(dliRootPath + dli, 550841) );// open
// in geany to get total no
// of lines
// TODO implement sparate function to check equals
// File filteredFile = new File(dliRootPath + "filteredFile.csv");
// filteredFile.createNewFile();
File logFile = new File(dliRootPath + log);
logFile.createNewFile();
new File(dliRootPath + testFilePath).createNewFile();
List<String> fieldsToBeMatched = new ArrayList<>();
fieldsToBeMatched.add("dc.contributor.author[]");
fieldsToBeMatched.add("dc.title[]");
filterUniqueFileds(new File(splittedNewDliDir), new File(splittedOldDliDir), fieldsToBeMatched);
}
/**
* NOTE: might remove the row where fieldToBeMatched is null
*
* #param inpfile
* #param file
* #param filteredFile
* #param fieldsToBeMatched
* #throws IOException
* #throws CustomException
*/
private static void filterUniqueFileds(File newDir, File oldDir, List<String> fieldsToBeMatched)
throws IOException, CustomException {
CSVReader reader = new CSVReader(new FileReader(new File(dliRootPath + dli)), '|');
// writer
File unqFileOp = new File(dliRootPath + unqFile);
unqFileOp.createNewFile();
CSVWriter writer = new CSVWriter(new FileWriter(unqFileOp), '|');
// logWriter
BufferedWriter logWriter = new BufferedWriter(new FileWriter(new File(dliRootPath + log)));
String[] headingRow = // allRows.get(0);
reader.readNext();
writer.writeNext(headingRow);
int headingLen = headingRow.length;
// old List
System.out.println("[INFO] reading old list...");
// CSVReader oldReader = new CSVReader(new FileReader(new
// File(dliRootPath + oldDli)));
Map<String, List<String>> oldMap = new HashMap<>();
oldMap = populateMapFromSplittedCSV(oldMap, oldDir);// populateMapFromCSV(oldMap,
// oldReader);
// oldReader.close();
System.out.println("[INFO] Read old List. Size = " + oldMap.size());
printMapToCSV(oldMap, dliRootPath + testFilePath);
// map of fieldName, ColumnNo
Map<String, Integer> fieldAndColumnNoInNew = new HashMap<>(getColumnNo(fieldsToBeMatched, headingRow));
Map<String, Integer> fieldAndColumnNoInOld = new HashMap<>(
getColumnNo(fieldsToBeMatched, (String[]) oldMap.get("id").toArray()));
// error check: did columnNo get populated?
if (fieldAndColumnNoInNew.isEmpty()) {
reader.close();
writer.close();
throw new CustomException("field to be matched not present in input CSV");
}
// TODO implement own array compare using areEqual()
// error check
// if( !Arrays.equals(headingRow, (String[]) oldMap.get("id").toArray())
// ){
// System.out.println("heading in new file, old file: \n"+
// Arrays.toString(headingRow));
// System.out.println(Arrays.toString((String[])
// oldMap.get("id").toArray()));
// reader.close();
// writer.close();
// oldReader.close();
// throw new CustomException("Heading rows are not same in old and new
// file");
// }
int noOfRecordsInOldList = 0, noOfRecordsWritten = 0, checkManually = 0;
String[] thisRow;
while ((thisRow = reader.readNext()) != null) {
// for(int l=allRows.size()-1; l>=0; l--){
// thisRow=allRows.get(l);
// error check
if (thisRow.length != headingLen) {
String error = "Line no: " + reader.getLinesRead() + " in file: " + dliRootPath + dli
+ " not read. Check manually";
System.err.println(error);
logWriter.append(error + "\n");
logWriter.flush();
checkManually++;
continue;
}
// write if not present in oldMap
if (!oldMap.containsKey(thisRow[0])) {
writer.writeNext(thisRow);
writer.flush();
noOfRecordsWritten++;
} else {
// check if all reqd fields match
List<String> twinRow = oldMap.get(thisRow[0]);
boolean writtenToOp = false;
// for (int k = 0; k < fieldsToBeMatched.size(); k++) {
List<String> newFields = new ArrayList<>(fieldAndColumnNoInNew.keySet());
List<String> oldFields = new ArrayList<>(fieldAndColumnNoInOld.keySet());
// faaltu error check
if (newFields.size() != oldFields.size()) {
reader.close();
writer.close();
CustomException up = new CustomException("something is really wrong");
throw up;
}
// for(String fieldName : fieldAndColumnNoInNew.keySet()){
for (int m = 0; m < newFields.size(); m++) {
int columnInNew = fieldAndColumnNoInNew.get(newFields.get(m)).intValue();
int columnInOld = fieldAndColumnNoInOld.get(oldFields.get(m)).intValue();
String currFieldTwin = twinRow.get(columnInOld);
String currField = thisRow[columnInNew];
if (!areEqual(currField, currFieldTwin)) {
writer.writeNext(thisRow);
writer.flush();
writtenToOp = true;
noOfRecordsWritten++;
System.out.println(noOfRecordsWritten);
break;
}
}
if (!writtenToOp) {
noOfRecordsInOldList++;
// System.out.println("[INFO] present in old List: \n" +
// Arrays.toString(thisRow) + " AND\n"
// + twinRow.toString());
}
}
}
System.out.println("--------------------------------------------------------\nDebug info");
System.out.println("old File: " + oldMap.size());
System.out.println("new File:" + reader.getLinesRead());
System.out.println("no of records in old list (present in both old and new) = " + noOfRecordsInOldList);
System.out.println("checkManually: " + checkManually);
System.out.println("noOfRecordsInOldList+checkManually = " + (noOfRecordsInOldList + checkManually));
System.out.println("no of records written = " + noOfRecordsWritten);
System.out.println();
System.out.println("inside populateMapFromSplittedCSV() " + insidepopulateMapFromSplittedCSV + "times");
logWriter.close();
reader.close();
writer.close();
}
private static void printMapToCSV(Map<String, List<String>> oldMap, String testFilePath2) throws IOException {
// writer
int i = 0;
CSVWriter writer = new CSVWriter(new FileWriter(new File(testFilePath2)), '|');
for (String key : oldMap.keySet()) {
List<String> row = oldMap.get(key);
String[] tempRow = new String[row.size()];
tempRow = row.toArray(tempRow);
writer.writeNext(tempRow);
writer.flush();
i++;
}
writer.close();
System.out.println("[hello from line 210 ( inside printMapToCSV() ) of ur code] wrote " + i + " lines");
}
private static Map<String, List<String>> populateMapFromSplittedCSV(Map<String, List<String>> oldMap, File oldDir)
throws IOException {
File defective = new File(dliRootPath + "defectiveOldFiles.csv");
defective.createNewFile();
CSVWriter defectWriter = new CSVWriter(new FileWriter(defective));
CSVReader reader = null;
for (File oldFile : oldDir.listFiles()) {
insidepopulateMapFromSplittedCSV++;
reader = new CSVReader(new FileReader(oldFile), ',', '"');
oldMap = populateMapFromCSV(oldMap, reader, defectWriter);
// printMapToCSV(oldMap, dliRootPath+testFilePath);
System.out.println(oldMap.size());
reader.close();
}
defectWriter.close();
System.out.println("inside populateMapFromSplittedCSV() " + insidepopulateMapFromSplittedCSV + "times");
return new HashMap<String, List<String>>(oldMap);
}
private static Map<String, Integer> getColumnNo(List<String> fieldsToBeMatched, String[] headingRow) {
Map<String, Integer> fieldAndColumnNo = new HashMap<>();
for (String field : fieldsToBeMatched) {
for (int i = 0; i < headingRow.length; i++) {
String heading = headingRow[i];
if (areEqual(field, heading)) {
fieldAndColumnNo.put(field, Integer.valueOf(i));
break;
}
}
}
return fieldAndColumnNo;
}
private static Map<String, List<String>> populateMapFromCSV(Map<String, List<String>> oldMap, CSVReader oldReader,
CSVWriter defectWriter) throws IOException {
int headingLen = 0;
List<String> headingRow = null;
if (oldReader.getLinesRead() > 1) {
headingRow = oldMap.get("id");
headingLen = headingRow.size();
}
String[] thisRow;
int insideWhile = 0, addedInMap = 0, doesNotContainKey = 0, containsKey = 0;
while ((thisRow = oldReader.readNext()) != null) {
// error check
// if (oldReader.getLinesRead() > 1) {
// if (thisRow.length != headingLen) {
// System.err.println("Line no: " + oldReader.getLinesRead() + " in
// file: " + dliRootPath + oldDli
// + " not read. Check manually");
// defectWriter.writeNext(thisRow);
// defectWriter.flush();
// continue;
// }
// }
insideWhile++;
if (!oldMap.containsKey(thisRow[0])) {
doesNotContainKey++;
List<String> fullRow = Arrays.asList(thisRow);
fullRow = oldMap.put(thisRow[0], fullRow);
if (fullRow == null) {
addedInMap++;
}
} else {
List<String> twinRow = oldMap.get(thisRow[0]);
boolean writtenToOp = false;
// for(String fieldName : fieldAndColumnNoInNew.keySet()){
for (int m = 0; m < headingRow.size(); m++) {
String currFieldTwin = twinRow.get(m);
String currField = thisRow[m];
if (!areEqual(currField, currFieldTwin)) {
System.err.println("do something!!!!!! DUPLICATE ID in old file");
containsKey++;
FileWriter logWriter = new FileWriter(new File((dliRootPath + log)));
System.err.println("[Skipped record] in old file. Row no: " + oldReader.getLinesRead()
+ "\nRecord: " + Arrays.toString(thisRow));
logWriter.append("[Skipped record] in old file. Row no: " + oldReader.getLinesRead()
+ "\nRecord: " + Arrays.toString(thisRow));
logWriter.close();
break;
}
}
}
}
System.out.println("inside while: " + insideWhile);
System.out.println("oldMap size = " + oldMap.size());
System.out.println("addedInMap: " + addedInMap);
System.out.println("doesNotContainKey: " + doesNotContainKey);
System.out.println("containsKey: " + containsKey);
return new HashMap<String, List<String>>(oldMap);
}
private static boolean areEqual(String field, String heading) {
// TODO implement, askSubhayan
return field.trim().equals(heading.trim());
}
/**
* Returns the first duplicate ID OR the string "unique" OR (rarely)
* totalLinesInCSV != totaluniqueIDs
*
* #param inpCSV
* #param totalLinesInCSV
* #return
* #throws IOException
*/
private static String areIDsunique(String inpCSV, int totalLinesInCSV) throws IOException {
CSVReader reader = new CSVReader(new FileReader(new File(dliRootPath + dli)), '|');
List<String[]> allRows = new ArrayList<>(reader.readAll());
reader.close();
Set<String> id = new HashSet<>();
for (String[] thisRow : allRows) {
if (thisRow[0] != null || !thisRow[0].isEmpty() || id.add(thisRow[0])) {
return thisRow[0];
}
}
if (id.size() == totalLinesInCSV) {
return "unique";
} else {
return "totalLinesInCSV != totaluniqueIDs";
}
}
/**
* writes 20 rowsof input csv into the output file
*
* #param input
* #param output
* #throws IOException
*/
public static void _readSample(String input, String output) throws IOException {
File opFile = new File(dliRootPath + newFile);
opFile.createNewFile();
CSVWriter writer = new CSVWriter(new FileWriter(opFile));
CSVReader reader = new CSVReader(new FileReader(new File(dliRootPath + dli)), '|');
for (int i = 0; i < 20; i++) {
// String[] op;
// for(String temp: reader.readNext()){
writer.writeNext(reader.readNext());
// }
// System.out.println();
}
reader.close();
writer.flush();
writer.close();
}
}
RC's comment nailed it!
If you check the java docs you will see that there are two methods in the CSVReader: getLinesRead and getRecordsRead. And they both do exactly what they say. getLinesRead returns the number of lines that was read using the FileReader. getRecordsRead returns the number of records that the CSVReader read. Keep in mind that if you have embedded new lines in the records of your file then it will take multiple line reads to get one record. So it is very conceivable to have a csv file with 100 records but taking 200 line reads to read them all.
Unescaped quotes inside a CSV cell can mess up your whole data. This might happen in a CSV if the data you are working with has been created manually. Below is a function I wrote a while back for this situation. Let me know if this is not the right place to share it.
/**
* removes quotes inside a cell/column puts curated data in
* "../CuratedFiles"
*
* #param curateDir
* #param del Csv column delimiter
* #throws IOException
*/
public static void curateCsvRowQuotes(File curateDir, String del) throws IOException {
File parent = curateDir.getParentFile();
File curatedDir = new File(parent.getAbsolutePath() + "/CuratedFiles");
curatedDir.mkdir();
for (File file : curateDir.listFiles()) {
BufferedReader bufRead = new BufferedReader(new FileReader(file));
// output
File fOp = new File(curatedDir.getAbsolutePath() + "/" + file.getName());
fOp.createNewFile();
BufferedWriter bufW = new BufferedWriter(new FileWriter(fOp));
bufW.append(bufRead.readLine() + "\n");// heading
// logs
File logFile = new File(curatedDir.getAbsolutePath() + "/CurationLogs.txt");
logFile.createNewFile();
BufferedWriter logWriter = new BufferedWriter(new FileWriter(logFile));
String thisLine = null;
int lineCount = 0;
while ((thisLine = bufRead.readLine()) != null) {
String opLine = "";
int endIndex = thisLine.indexOf("\"" + del);
String str = thisLine.substring(0, endIndex);
opLine += str + "\"" + del;
while (endIndex != (-1)) {
// leave out first " in a cell
int tempIndex = thisLine.indexOf("\"" + del, endIndex + 2);
if (tempIndex == (-1)) {
break;
}
str = thisLine.substring(endIndex + 2, tempIndex);
int indexOfQuote = str.indexOf("\"");
opLine += str.substring(0, indexOfQuote + 1);
// remove all "
str = str.substring(indexOfQuote + 1);
str = str.replace("\"", "");
opLine += str + "\"" + del;
endIndex = thisLine.indexOf("\"" + del, endIndex + 2);
}
str = thisLine.substring(thisLine.lastIndexOf("\"" + del) + 2);
if ((str != null) && str.matches("[" + del + "]+")) {
opLine += str;
}
System.out.println(opLine);
bufW.append(opLine + "\n");
bufW.flush();
lineCount++;
}
System.out.println(lineCount + " no of lines in " + file.getName());
bufRead.close();
bufW.close();
}
}
In my case, I've used csvReader.readAll() before the readNext().
Like
List<String[]> myData =csvReader.readAll();
while ((nextRecord = csvReader.readNext()) != null) {
}
So my csvReader.readNext() returns always null. Since all the values were already read by myData.
Please be caution for using readNext() and readAll() functions.
try {
BufferedReader sc = new BufferedReader(new FileReader("/home/aravind/Desktop/India.txt"));
ArrayList<String> name = new ArrayList<>();
ArrayList<String> Location = new ArrayList<>();
ArrayList<String> Id = new ArrayList<>();
ArrayList<String> Details = new ArrayList<>();
String line = " ";
while ((line = sc.readLine()) != null) {
if (!line.trim().equals("")) {
System.out.println(line);
if (line.toLowerCase().contains("name")) {
name.add(line.split(":")[1].trim());
}
if (line.toLowerCase().contains("Location")) {
Location.add(line.split(":")[1].trim());
}
if (line.toLowerCase().contains("Id")) {
Id.add(line.split(":")[1].trim());
}
if (line.toLowerCase().contains("Details")) {
Details.add(line.split(":")[1].trim());
}
}
}
for (int i = 0; i < name.size(); i++) {
PrintWriter out = new PrintWriter(newFileWriter("output.csv"));
out.println("name;Location;Id;Details;");
out.println(name.get(i) + ";"
+ Location.get(i) + ";"
+ Id.get(i) + ";"
+ Details.get(i) + ";");
out.close();
}
sc.close();
} catch (Exception e) {
}
and my input file looks like
name = abc
id = 123
Place = xyz
Details = some texts with two line
name = aaa
id = 54657
Place = dfd
Details = some texts with some lines
What could be the problem why it is not printing in csv file instead prints o/p in console..Kindly help me
In your file, title and value are always separated by "=", whereas at runtime you trim strings by ":". You should replace ":" by "=", thus your trim result will not be empty at index 1.:
name.add(line.split("=")[1].trim());
I want to read specific columns from a multi column csv file and print those columns in other csv file using Java. Any help please? Following is my code to print each token line by line..But I am looking to print only few columns out of the multi column csv.
import java.io.BufferedReader;
import java.io.FileReader;
import java.io.FileWriter;
import java.util.StringTokenizer;
public class ParseCSV {
public static void main(String[] args) {
try
{
//csv file containing data
String strFile = "C:\\Users\\rsaluja\\CMS_Evaluation\\Drupal_12_08_27.csv";
//create BufferedReader to read csv file
BufferedReader br = new BufferedReader( new FileReader(strFile));
String strLine = "";
StringTokenizer st = null;
int lineNumber = 0, tokenNumber = 0;
//read comma separated file line by line
while( (strLine = br.readLine()) != null)
{
lineNumber++;
//break comma separated line using ","
st = new StringTokenizer(strLine, ",");
while(st.hasMoreTokens())
{
//display csv values
tokenNumber++;
System.out.println("Line # " + lineNumber +
", Token # " + tokenNumber
+ ", Token : "+ st.nextToken());
System.out.println(cols[4]);
You should use the excellent OpenCSV for reading and writing CSV files. To adapt your example to use the library it would look like this:
public class ParseCSV {
public static void main(String[] args) {
try {
//csv file containing data
String strFile = "C:/Users/rsaluja/CMS_Evaluation/Drupal_12_08_27.csv";
CSVReader reader = new CSVReader(new FileReader(strFile));
String [] nextLine;
int lineNumber = 0;
while ((nextLine = reader.readNext()) != null) {
lineNumber++;
System.out.println("Line # " + lineNumber);
// nextLine[] is an array of values from the line
System.out.println(nextLine[4] + "etc...");
}
}
}
}
Reading a CSV file in very simple and common in Java. You actually don't require to load any extra third party library to do this for you. CSV (comma separated value) file is just a normal plain-text file, store data in column by column, and split it by a separator (e.g comma ",").
In order to read specific columns from the CSV file, there are several ways. Simplest of all is as below:
Code to read CSV without any 3rd party library
BufferedReader br = new BufferedReader(new FileReader(csvFile));
while ((line = br.readLine()) != null) {
// use comma as separator
String[] cols = line.split(cvsSplitBy);
System.out.println("Coulmn 4= " + cols[4] + " , Column 5=" + cols[5]);
}
If you notice, nothing special is performed here. It is just reading a text file, and spitting it by a separator – ",".
Consider an extract from legacy country CSV data at GeoLite Free Downloadable Databases
"1.0.0.0","1.0.0.255","16777216","16777471","AU","Australia"
"1.0.1.0","1.0.3.255","16777472","16778239","CN","China"
"1.0.4.0","1.0.7.255","16778240","16779263","AU","Australia"
"1.0.8.0","1.0.15.255","16779264","16781311","CN","China"
"1.0.16.0","1.0.31.255","16781312","16785407","JP","Japan"
"1.0.32.0","1.0.63.255","16785408","16793599","CN","China"
"1.0.64.0","1.0.127.255","16793600","16809983","JP","Japan"
"1.0.128.0","1.0.255.255","16809984","16842751","TH","Thailand"
Above code will output as below:
Column 4= "AU" , Column 5="Australia"
Column 4= "CN" , Column 5="China"
Column 4= "AU" , Column 5="Australia"
Column 4= "CN" , Column 5="China"
Column 4= "JP" , Column 5="Japan"
Column 4= "CN" , Column 5="China"
Column 4= "JP" , Column 5="Japan"
Column 4= "TH" , Column 5="Thailand"
You can, in fact, put the columns in a Map and then get the values simply by using the key.
Shishir
I am sorry, but none of these answers provide an optimal solution. If you use a library such as OpenCSV you will have to write a lot of code to handle special cases to extract information from specific columns.
For example, if you have rows with less columns than what you're after, you'll have to write a lot of code to handle it. Using the OpenCSV example:
CSVReader reader = new CSVReader(new FileReader(strFile));
String [] nextLine;
while ((nextLine = reader.readNext()) != null) {
//let's say you are interested in getting columns 20, 30, and 40
String[] outputRow = new String[3];
if(parsedRow.length < 40){
outputRow[2] = null;
} else {
outputRow[2] = parsedRow[40]
}
if(parsedRow.length < 30){
outputRow[1] = null;
} else {
outputRow[1] = parsedRow[30]
}
if(parsedRow.length < 20){
outputRow[0] = null;
} else {
outputRow[0] = parsedRow[20]
}
}
This is a lot of code for a simple requirement. It gets worse if you are trying to get values of columns by name. You should use a more modern parser such as the one provided by uniVocity-parsers.
To reliably and easily get the columns you want, simply write:
CsvParserSettings settings = new CsvParserSettings();
parserSettings.selectIndexes(20, 30, 40);
CsvParser parser = new CsvParser(settings);
List<String[]> allRows = parser.parseAll(new FileReader(yourFile));
Disclosure: I am the author of this library. It's open-source and free (Apache V2.0 license).
To read some specific column
I did something like this:
dpkcs.csv content:
FN,LN,EMAIL,CC
Name1,Lname1,email1#gmail.com,CC1
Nmae2,Lname2,email2r#gmail.com,CC2
The function to read it:
private void getEMailRecepientList() {
List<EmailRecepientData> emailList = null;// Blank list of POJO class
Scanner scanner = null;
BufferedReader reader = null;
try {
reader = new BufferedReader(new FileReader("dpkcs.csv"));
Map<String, Integer> mailHeader = new HashMap<String, Integer>();
// read file line by line
String line = null;
int index = 0;
line = reader.readLine();
// Get header from 1st row of csv
if (line != null) {
StringTokenizer str = new StringTokenizer(line, ",");
int headerCount = str.countTokens();
for (int i = 0; i < headerCount; i++) {
String headerKey = str.nextToken();
mailHeader.put(headerKey.toUpperCase(), new Integer(i));
}
}
emailList = new ArrayList<EmailRecepientData>();
while ((line = reader.readLine()) != null) {
// POJO class for getter and setters
EmailRecepientData email = new EmailRecepientData();
scanner = new Scanner(line);
scanner.useDelimiter(",");
//Use Specific key to get value what u want
while (scanner.hasNext()) {
String data = scanner.next();
if (index == mailHeader.get("EMAIL"))
email.setEmailId(data);
else if (index == mailHeader.get("FN"))
email.setFirstName(data);
else if (index == mailHeader.get("LN"))
email.setLastName(data);
else if (index == mailHeader.get("CC"))
email.setCouponCode(data);
index++;
}
index = 0;
emailList.add(email);
}
reader.close();
} catch (Exception e) {
StringWriter stack = new StringWriter();
e.printStackTrace(new PrintWriter(stack));
} finally {
scanner.close();
}
System.out.println("list--" + emailList);
}
The POJO Class:
public class EmailRecepientData {
private String emailId;
private String firstName;
private String lastName;
private String couponCode;
public String getEmailId() {
return emailId;
}
public void setEmailId(String emailId) {
this.emailId = emailId;
}
public String getFirstName() {
return firstName;
}
public void setFirstName(String firstName) {
this.firstName = firstName;
}
public String getLastName() {
return lastName;
}
public void setLastName(String lastName) {
this.lastName = lastName;
}
public String getCouponCode() {
return couponCode;
}
public void setCouponCode(String couponCode) {
this.couponCode = couponCode;
}
#Override
public String toString() {
return "Email Id=" + emailId + ", First Name=" + firstName + " ,"
+ " Last Name=" + lastName + ", Coupon Code=" + couponCode + "";
}
}
I sugges to use the Apache Commons CSV https://commons.apache.org/proper/commons-csv/
Here is one example:
Path currentRelativePath = Paths.get("");
String currentPath = currentRelativePath.toAbsolutePath().toString();
String csvFile = currentPath + "/pathInYourProject/test.csv";
Reader in;
Iterable<CSVRecord> records = null;
try
{
in = new FileReader(csvFile);
records = CSVFormat.EXCEL.withHeader().parse(in); // header will be ignored
}
catch (IOException e)
{
e.printStackTrace();
}
for (CSVRecord record : records) {
String line = "";
for ( int i=0; i < record.size(); i++)
{
if ( line == "" )
line = line.concat(record.get(i));
else
line = line.concat("," + record.get(i));
}
System.out.println("read line: " + line);
}
It automaticly recognize , and " but not ; (maybe it can be configured...).
My example file is:
col1,col2,col3
val1,"val2",val3
"val4",val5
val6;val7;"val8"
And output is:
read line: val1,val2,val3
read line: val4,val5
read line: val6;val7;"val8"
Last line is considered like one value.
We can use the core java stuff alone to read the CVS file column by column. Here is the sample code I have wrote for my requirement. I believe that it will help for some one.
BufferedReader br = new BufferedReader(new FileReader(csvFile));
String line = EMPTY;
int lineNumber = 0;
int productURIIndex = -1;
int marketURIIndex = -1;
int ingredientURIIndex = -1;
int companyURIIndex = -1;
// read comma separated file line by line
while ((line = br.readLine()) != null) {
lineNumber++;
// use comma as line separator
String[] splitStr = line.split(COMMA);
int splittedStringLen = splitStr.length;
// get the product title and uri column index by reading csv header
// line
if (lineNumber == 1) {
for (int i = 0; i < splittedStringLen; i++) {
if (splitStr[i].equals(PRODUCTURI_TITLE)) {
productURIIndex = i;
System.out.println("product_uri index:" + productURIIndex);
}
if (splitStr[i].equals(MARKETURI_TITLE)) {
marketURIIndex = i;
System.out.println("marketURIIndex:" + marketURIIndex);
}
if (splitStr[i].equals(COMPANYURI_TITLE)) {
companyURIIndex = i;
System.out.println("companyURIIndex:" + companyURIIndex);
}
if (splitStr[i].equals(INGREDIENTURI_TITLE)) {
ingredientURIIndex = i;
System.out.println("ingredientURIIndex:" + ingredientURIIndex);
}
}
} else {
if (splitStr != null) {
String conditionString = EMPTY;
// avoiding arrayindexoutboundexception when the line
// contains only ,,,,,,,,,,,,,
for (String s : splitStr) {
conditionString = s;
}
if (!conditionString.equals(EMPTY)) {
if (productURIIndex != -1) {
productCVSUriList.add(splitStr[productURIIndex]);
}
if (companyURIIndex != -1) {
companyCVSUriList.add(splitStr[companyURIIndex]);
}
if (marketURIIndex != -1) {
marketCVSUriList.add(splitStr[marketURIIndex]);
}
if (ingredientURIIndex != -1) {
ingredientCVSUriList.add(splitStr[ingredientURIIndex]);
}
}
}
}
Finds all files in folder and write that data to ArrayList row.
Initialize
ArrayList<ArrayList<String>> row=new ArrayList<ArrayList<String>>();
BufferedReader br=null;
For Accessing row
for(ArrayList<String> data:row){
data.get(col no);
}
or row.get(0).get(0) // getting first row first col
Functions that reads all files from folders and concatenate them row.
static void readData(){
String path="C:\\Users\\Galaxy Computers\\Desktop\\Java project\\Nasdaq\\";
File files=new File(path);
String[] list=files.list();
try {
String sCurrentLine;
char check;
for(String filename:list){
br = new BufferedReader(new FileReader(path+filename));
br.readLine();//If file contains uneccessary first line.
while ((sCurrentLine = br.readLine()) != null) {
row.add(splitLine(sCurrentLine));
}
}
}
} catch (IOException e) {
e.printStackTrace();
}
try {
if (br != null)br.close();
} catch (IOException ex) {
ex.printStackTrace();
}
}
static ArrayList<String> splitLine(String line){
String[] ar=line.split(",");
ArrayList<String> d=new ArrayList<String>();
for(String data:ar){
d.add(data);
}
return d;
}
Well, how about this !!
This code calculates both row and column count in a csv file. Try this out !!
static int[] getRowsColsNo() {
Scanner scanIn = null;
int rows = 0;
int cols = 0;
String InputLine = "";
try {
scanIn = new Scanner(new BufferedReader(
new FileReader("filename.csv")));
scanIn.useDelimiter(",");
while (scanIn.hasNextLine()) {
InputLine = scanIn.nextLine();
String[] InArray = InputLine.split(",");
rows++;
cols = InArray.length;
}
} catch (Exception e) {
System.out.println(e);
}
return new int[] { rows, cols };
}