Open CSV Performance to write data - java

I came through a link: https://github.com/hyee/OpenCSV which drastically improves the writing time of the JDBC ResultSet to CSV due to setAsyncMode, RESULT_FETCH_SIZE
//Extract ResultSet to CSV file, auto-compress if the fileName extension is ".zip" or ".gz"
//Returns number of records extracted
public int ResultSet2CSV(final ResultSet rs, final String fileName, final String header, final boolean aync) throws Exception {
try (CSVWriter writer = new CSVWriter(fileName)) {
//Define fetch size(default as 30000 rows), higher to be faster performance but takes more memory
ResultSetHelperService.RESULT_FETCH_SIZE=10000;
//Define MAX extract rows, -1 means unlimited.
ResultSetHelperService.MAX_FETCH_ROWS=20000;
writer.setAsyncMode(aync);
int result = writer.writeAll(rs, true);
return result - 1;
}
}
But the problem is I don't know how I can merge above into my requirement. As the link has many other classes involved which I am not sure what they do and if I even need it for my requirement. Still, I tried but it fails to compile whenever I enable 2 commented line code. Below is my code.
Any help on how I can achieve this will be greatly appreciated.
package test;
import java.io.BufferedWriter;
import java.io.FileWriter;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.ResultSet;
import java.sql.Statement;
import java.util.Date;
import com.opencsv.CSVWriter;
import com.opencsv.ResultSetHelperService;
public class OpenCSVTest1
{
static Connection con =null;
static Statement stmt = null;
static ResultSet rs = null;
public static void main(String args[]) throws Exception
{
connection ();
retrieveData(con);
}
private static void connection() throws Exception
{
try
{
Class.forName("<jdbcdriver>");
con = DriverManager.getConnection("jdbc:","<username>","<pass>");
System.out.println("Connection successful");
}
catch (Exception e)
{
System.out.println("Exception while establishing sql connection");
throw e;
}
}
private static void retrieveData(Connection con) throws Exception
{
try
{
stmt=con.createStatement();
stmt = con.createStatement(ResultSet.TYPE_FORWARD_ONLY, ResultSet.CONCUR_READ_ONLY);
String query = "SELECT * FROM dbo.tablename";
rs=stmt.executeQuery(query);
CSVWriter writer = new CSVWriter(new BufferedWriter(new FileWriter("C:\\Data\\File1.csv")));
ResultSetHelperService service = new ResultSetHelperService();
/*** ResultSetHelperService.RESULT_FETCH_SIZE=10000; ***/ // to add
service.setDateTimeFormat("yyyy-MM-dd HH:mm:ss.SSS");
System.out.println("**** Started writing Data to CSV **** " + new Date());
writer.setResultService(service);
/*** writer.setAsyncMode(aync); ***/ // to add
int lines = writer.writeAll(rs, true, true, false);
writer.flush();
writer.close();
System.out.println("** OpenCSV -Completed writing the resultSet at " + new Date() + " Number of lines written to the file " + lines);
}
catch (Exception e)
{
System.out.println("Exception while retrieving data" );
e.printStackTrace();
throw e;
}
finally
{
rs.close();
stmt.close();
con.close();
}
}
}
UPDATE
I have updated my code. Right now code is writing complete resultset in CSV at once using writeAll method which is resulting in time consumption.
Now what I want to do is write resultset to CSV in batches as resultset's first column will always have dynamically generated via SELECT query Auto Increment column (Sqno) with values as (1,2,3..) So not sure how I can read result sets first column and split it accoridngly to write in CSV. may be HashMap might help, so I have also added resultset-tohashmap conversion code if required.
import com.opencsv.CSVWriter;
import com.opencsv.ResultSetHelperService;
import java.io.BufferedWriter;
import java.io.FileWriter;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.ResultSet;
import java.sql.ResultSetMetaData;
import java.sql.SQLException;
import java.sql.Statement;
import java.util.ArrayList;
import java.util.Date;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
public class OpenCSVTest1
{
static int fetchlimit_src = 100;
static Connection con =null;
static Statement stmt = null;
static ResultSet rs = null;
static String filename = "C:\\Data\\filename.csv";
static CSVWriter writer;
public static void main(String args[])
{
try
{
connection();
retrieveData(con);
}
catch(Exception e)
{
System.out.println(e);
}
}
private static void connection() throws Exception
{
try
{
Class.forName("<jdbcdriver>");
con = DriverManager.getConnection("jdbc:","<username>","<pass>");
System.out.println("Connection successful");
}
catch (Exception e)
{
System.out.println("Exception while establishing sql connection");
throw e;
}
}
private static void retrieveData(Connection con) throws Exception
{
try
{
stmt=con.createStatement();
String query = "SELECT ROWNUM AS Sqno, * FROM dbo.tablename "; // Oracle
// String query = "SELECT ROW_NUMBER() OVER(ORDER BY Id ASC) AS Sqno, * FROM dbo.tablename "; // SQLServer
System.out.println(query);
stmt = con.createStatement(ResultSet.TYPE_FORWARD_ONLY, ResultSet.CONCUR_READ_ONLY);
stmt.setFetchSize(fetchlimit_src);
System.out.println("**** Started querying src **** " + new Date());
rs=stmt.executeQuery(query);
System.out.println("**** Completing querying src **** " + new Date());
// resultset_List(rs); // If required store resultset(rs) to HashMap
writetoCSV(rs,filename);
/** How to write resultset to CSV in batches instead of writing all at once to speed up write performance ?
* Hint: resultset first column is Autoincrement [Sqno] (1,2,3...) which might help to split result in batches.
*
**/
}
catch (Exception e)
{
System.out.println("Exception while retrieving data" );
e.printStackTrace();
throw e;
}
finally
{
rs.close();
stmt.close();
con.close();
}
}
private static List<Map<String, Object>> resultset_List(ResultSet rs) throws SQLException
{
ResultSetMetaData md = rs.getMetaData();
int columns = md.getColumnCount();
List<Map<String, Object>> rows = new ArrayList<Map<String, Object>>();
while (rs.next())
{
Map<String, Object> row = new HashMap<String, Object>(columns);
for(int i = 1; i <= columns; ++i)
{
row.put(md.getColumnName(i), rs.getObject(i));
}
rows.add(row);
}
// System.out.println(rows.toString());
return rows;
}
private static void writetoCSV(ResultSet rs, String filename) throws Exception
{
try
{
writer = new CSVWriter(new BufferedWriter(new FileWriter(filename)));
ResultSetHelperService service = new ResultSetHelperService();
service.setDateTimeFormat("yyyy-MM-dd HH:mm:ss.SSS");
long batchlimit = 1000;
long Sqno = 1;
ResultSetMetaData rsmd = rs.getMetaData();
String columnname = rsmd.getColumnLabel(1); // To retrieve columns with labels (for example SELECT ROWNUM AS Sqno)
System.out.println("**** Started writing Data to CSV **** " + new Date());
writer.setResultService(service);
int lines = writer.writeAll(rs, true, true, false);
System.out.println("** OpenCSV -Completed writing the resultSet at " + new Date() + " Number of lines written to the file " + lines);
}
catch (Exception e)
{
System.out.println("Exception while writing data" );
e.printStackTrace();
throw e;
}
finally
{
writer.flush();
writer.close();
}
}
}

You should be able to use the OpenCSV sample, pretty much exactly as it is provided in the documentation. So, there should be no need for you to write any of your own batching logic.
I was able to write a 6 million record result set to a CSV file in about 10 seconds. To be clear -that was just the file-write time, not the DB data-fetch time - but I think that should be fast enough for your needs.
Here is your code, with adaptations for using OpenCSV based on its documented approach... But please see the warning at the end of my notes!
import com.opencsv.CSVWriter;
import com.opencsv.ResultSetHelperService;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.ResultSet;
import java.sql.Statement;
import java.util.Date;
import java.text.SimpleDateFormat;
public class OpenCSVDemo {
static int fetchlimit_src = 100;
static Connection con = null;
static Statement stmt = null;
static ResultSet rs = null;
static String filename = "C:\\Data\\filename.csv";
public static void main(String args[]) {
try {
connection();
retrieveData(con);
} catch (Exception e) {
System.out.println(e);
}
}
private static void connection() throws Exception {
try {
final String jdbcDriver = "YOURS GOES HERE";
final String dbUrl = "YOURS GOES HERE";
final String user = "YOURS GOES HERE";
final String pass = "YOURS GOES HERE";
Class.forName(jdbcDriver);
con = DriverManager.getConnection(dbUrl, user, pass);
System.out.println("Connection successful");
} catch (Exception e) {
System.out.println("Exception while establishing sql connection");
throw e;
}
}
private static void retrieveData(Connection con) throws Exception {
try {
stmt = con.createStatement();
String query = "select title_id, primary_title from imdb.title";
System.out.println(query);
stmt = con.createStatement(ResultSet.TYPE_FORWARD_ONLY, ResultSet.CONCUR_READ_ONLY);
stmt.setFetchSize(fetchlimit_src);
System.out.println("**** Started querying src **** " + new Date());
rs = stmt.executeQuery(query);
System.out.println("**** Completing querying src **** " + new Date());
// resultset_List(rs); // If required store resultset(rs) to HashMap
System.out.println();
String timeStamp = new SimpleDateFormat("yyyy.MM.dd.HH.mm.ss").format(new Date());
System.out.println("Started writing CSV: " + timeStamp);
writeToCsv(rs, filename, null, Boolean.FALSE);
timeStamp = new SimpleDateFormat("yyyy.MM.dd.HH.mm.ss").format(new Date());
System.out.println("Finished writing CSV: " + timeStamp);
System.out.println();
} catch (Exception e) {
System.out.println("Exception while retrieving data");
e.printStackTrace();
throw e;
} finally {
rs.close();
stmt.close();
con.close();
}
}
public static int writeToCsv(final ResultSet rs, final String fileName,
final String header, final boolean aync) throws Exception {
try (CSVWriter writer = new CSVWriter(fileName)) {
//Define fetch size(default as 30000 rows), higher to be faster performance but takes more memory
ResultSetHelperService.RESULT_FETCH_SIZE = 1000;
//Define MAX extract rows, -1 means unlimited.
ResultSetHelperService.MAX_FETCH_ROWS = 2000;
writer.setAsyncMode(aync);
int result = writer.writeAll(rs, true);
return result - 1;
}
}
}
Points to note:
1) I used "async" set to false:
writeToCsv(rs, filename, null, Boolean.FALSE);
You may want to experiment with this and the other settings to see if they make any significant difference for you.
2) Regarding your comment "the link has many other classes involved": The OpenCSV library's entire JAR file needs to be included in your project, as does the related disruptor JAR:
opencsv.jar
disruptor-3.3.6.jar
To get the JAR files, go to the GitHub page, click on the green button, select the zip download, unzip the zip file, and look in the "OpenCSV-master\release" folder.
Add these two JARs to your project in the usual way (depends on how you build your project).
3) WARNING: This code runs OK when you use Oracle's Java 8 JDK/JRE. If you try to use OpenJDK (e.g. for Java 13 or similar) it will not run. This is because of some changes behind the scenes to hidden classes. If you are interested, there are more details here.
If you need to use an OpenJDK version of Java, you may therefore have better luck with the library on which this CSV library is based: see here.

Related

Apache DBCP and Oracle Transparent Application continuity

We have an older application that can't failover when one node of our Oracle RAC goes down. It seems it uses an older version of org.apache.commons.dbcp.BasicDataSource. I can make this work when I use UCP from Oracle but when I use the apache version the app dies as soon as I shut down the node of the RAC it is connected to. Am I missing something or does it not work with Apache DBCP? Thanks
Here is my code.
import org.apache.commons.dbcp.BasicDataSource;
import java.io.PrintStream;
import java.sql.Connection;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.SQLException;
public class BasicDB{
final static String DB_URL ="jdbc:oracle:thin:user/password#pdb_tac";
final static String driverClassName = "oracle.jdbc.replay.OracleDataSourceImpl";
private void pressAnyKeyToContinue()
{
System.out.print("Press any key to continue...");
try { System.in.read(); }
catch(Exception e) { e.printStackTrace(); }
}
public String getInstanceName(Connection conn) throws SQLException {
PreparedStatement pstmt = conn.prepareStatement("select instance_name from v$instance");
String r = new String();
for(ResultSet result = pstmt.executeQuery(); result.next(); r = result.getString("instance_name")) {
}
pstmt.close();
return r;
}
private void doTx(Connection c, int numValue) throws SQLException {
String updsql = "UPDATE test SET v=UPPER(v) WHERE id=?";
PreparedStatement pstmt = null;
pstmt = c.prepareStatement(updsql);
c.setAutoCommit(false);
for(int i = 0; i < numValue; ++i) {
pstmt.setInt(1, i);
pstmt.executeUpdate();
}
c.commit();
pstmt.close();
}
public static void main(String[] args) throws SQLException {
Connection conn = null;
int numValue = 5000;
;
try {
BasicDataSource bods = new BasicDataSource();
bods.setUrl(DB_URL);
bods.setDriverClassName(driverClassName);
bods.setDefaultAutoCommit(false);
BasicDB self = new BasicDB();
conn = bods.getConnection();
String var10001 = self.getInstanceName(conn);
var10000.println("Instance Name = " + var10001);
System.out.println("Performing transactions");
self.pressAnyKeyToContinue();
self.doTx(conn, numValue);
var10001 = self.getInstanceName(conn);
var10000.println("Instance Name = " + var10001);
} catch (Exception var8) {
var8.printStackTrace();
}
}
}
Ok, so it has to do with using the DataSource instead of the DataDriver class. I have run into another error so will create a new question for that.

How to make semicolon (;) as delimiter in java

I made a program that can parse .csv file to database,
I want to make semicolon as delimiter but I got some trouble here ~
This is my code CSVLoader.java
package id.co.lolo.coreservice;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.sql.Connection;
import java.sql.PreparedStatement;
import java.util.Date;
import org.apache.commons.lang.StringUtils;
import au.com.bytecode.opencsv.CSVReader;
public class CSVLoader {
private static final String SQL_INSERT = "INSERT INTO ${table}(${keys}) VALUES(${values})";
private static final String TABLE_REGEX = "\\$\\{table\\}";
private static final String KEYS_REGEX = "\\$\\{keys\\}";
private static final String VALUES_REGEX = "\\$\\{values\\}";
private Connection connection;
private char seprator;
/**
* Public constructor to build CSVLoader object with Connection details. The
* connection is closed on success or failure.
*
* #param connection
*/
public CSVLoader(Connection connection) {
this.connection = connection;
// Set default separator
this.seprator = ',';
}
/**
* Parse CSV file using OpenCSV library and load in given database table.
*
* #param csvFile
* Input CSV file
* #param tableName
* Database table name to import data
* #param truncateBeforeLoad
* Truncate the table before inserting new records.
* #throws Exception
*/
#SuppressWarnings("resource")
public void loadCSV(String csvFile, String tableName,
boolean truncateBeforeLoad) throws Exception {
CSVReader csvReader = null;
if (null == this.connection) {
throw new Exception("Not a valid connection.");
}
try {
csvReader = new CSVReader(new FileReader(csvFile), this.seprator);
} catch (Exception e) {
e.printStackTrace();
throw new Exception("Error occured while executing file. "
+ e.getMessage());
}
String[] headerRow = csvReader.readNext();
if (null == headerRow) {
throw new FileNotFoundException(
"No columns defined in given CSV file."
+ "Please check the CSV file format.");
}
String questionmarks = StringUtils.repeat("?,", headerRow.length);
questionmarks = (String) questionmarks.subSequence(0,
questionmarks.length() - 1);
String query = SQL_INSERT.replaceFirst(TABLE_REGEX, tableName);
query = query
.replaceFirst(KEYS_REGEX, StringUtils.join(headerRow, ","));
query = query.replaceFirst(VALUES_REGEX, questionmarks);
System.out.println("Query: " + query);
String[] nextLine;
Connection con = null;
PreparedStatement ps = null;
try {
con = this.connection;
con.setAutoCommit(false);
ps = con.prepareStatement(query);
if (truncateBeforeLoad) {
// delete data from table before loading csv
con.createStatement().execute("DELETE FROM " + tableName);
}
final int batchSize = 1000;
int count = 0;
Date date = null;
while ((nextLine = csvReader.readNext()) != null) {
if (null != nextLine) {
int index = 1;
for (String string : nextLine) {
date = DateUtil.convertToDate(string);
if (null != date) {
ps.setDate(index++,
new java.sql.Date(date.getTime()));
} else {
ps.setString(index++, string);
}
}
ps.addBatch();
}
if (++count % batchSize == 0) {
ps.executeBatch();
}
}
ps.executeBatch(); // insert remaining records
con.commit();
} catch (Exception e) {
con.rollback();
e.printStackTrace();
throw new Exception(
"Error occured while loading data from file to database."
+ e.getMessage());
} finally {
if (null != ps)
ps.close();
if (null != con)
con.close();
csvReader.close();
}
}
public char getSeprator() {
return seprator;
}
public void setSeprator(char seprator) {
this.seprator = seprator;
}
}
this is Main.java
package id.co.lolo.coreservice;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.SQLException;
public class Main {
#SuppressWarnings("unused")
private static String JDBC_CONNECTION_URL =
"jdbc:mysql://localhost:3306/bni";
public static void main(String[] args) {
try {
CSVLoader loader = new CSVLoader(getCon());
loader.setSeprator(';');
loader.loadCSV("C:\\Log\\Logtima.csv", "coreservice", true);
} catch (Exception e) {
e.printStackTrace();
}
}
private static Connection getCon() {
Connection connection = null;
try {
Class.forName("com.mysql.jdbc.Driver");
connection = DriverManager.getConnection("jdbc:mysql://localhost:3306/bni","root","shikamaru");
} catch (ClassNotFoundException e) {
e.printStackTrace();
} catch (SQLException e) {
e.printStackTrace();
}
return connection;
}
}
I got this Error,
Query: INSERT INTO coreservice(MODULE_NAME,SERVICE_NAME,COUNTER,LOG_DATE,UPDATE_DATE) VALUES(?)
java.sql.BatchUpdateException: Column count doesn't match value count at row 1
at com.mysql.jdbc.PreparedStatement.executeBatchSerially(PreparedStatement.java:1693)
at com.mysql.jdbc.PreparedStatement.executeBatch(PreparedStatement.java:1108)
at id.co.bni.coreservice.CSVLoader.loadCSV(CSVLoader.java:118)
at id.co.bni.coreservice.Main.main(Main.java:20)
java.lang.Exception: Error occured while loading data from file to database.Column count doesn't match value count at row 1
at id.co.bni.coreservice.CSVLoader.loadCSV(CSVLoader.java:123)
at id.co.bni.coreservice.Main.main(Main.java:20)

Why the INSERT query doesn't work in this HttpServer code

Im trying to make a little server for my homework.This is very simple project yet i cant insert some variables (which i took from the client ,in an object form ,through serialization ) into the database .
It shows no errors! That's what i find strange and also the client receive the response without problems.
my Server class is as the following :
package server;
import java.io.IOException;
import java.io.ObjectInputStream;
import java.io.OutputStream;
import java.net.InetSocketAddress;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.SQLException;
import java.sql.Statement;
import org.ietf.jgss.Oid;
import com.sun.net.httpserver.HttpExchange;
import com.sun.net.httpserver.HttpHandler;
import com.sun.net.httpserver.HttpServer;
public class Server {
public static void main(String[] args) throws Exception {
HttpServer server = HttpServer.create(new InetSocketAddress(3333), 0);
server.createContext("/", new MyHandler());
server.setExecutor(null); // creates a default executor
server.start();
}
static class MyHandler implements HttpHandler {
public void handle(HttpExchange t) throws IOException {
ObjectInputStream ios = new ObjectInputStream(t.getRequestBody());
final String url = "jdbc:mysql://localhost/httpServer";
final String user = "root";
final String password = "";
try {
Send oin = (Send) ios.readObject();
String response = "Kjo eshte nje pergjigje nga serveri! \n"
+ "Clienti me id "
+ oin.getId()
+ " dhe me emer "
+ oin.getName()
+ " ka pasur "
+ oin.getAmount()
+ "$ ne llogarine e tij ,por me pas ka terhequr "
+ oin.getPaid()
+ "$ nga llogaria \n"
+ "Kjo terheqe eshte ruajtur ne database dhe tani gjendja e re eshte "
+ (oin.getAmount() - oin.getPaid()) + "$ \n";
t.sendResponseHeaders(200, response.length());
OutputStream os = t.getResponseBody();
os.write(response.getBytes());
os.close();
int id = oin.getId();
String emri = oin.getName();
int amount = oin.getAmount();
int paid = oin.getPaid();
try {
Class.forName("com.mysql.jdbc.Driver");
Connection con = DriverManager.getConnection(url, user,
password);
try {
Statement s = con.createStatement();
s.executeUpdate("INSERT INTO person VALUES ('" + id
+ "','" + emri + "','" + amount + "','" + paid
+ "')");
} catch (SQLException s) {
System.out
.println("Tabel or column or data type is not found!");
}
} catch (Exception e) {
e.printStackTrace();
}
} catch (ClassNotFoundException e) {
e.printStackTrace();
}
}
}
}
can you please help me ?
Or have any idea what the problem may is ?
Edit:
Maybe i am doing something wrong in the Client:
package server;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.ObjectOutputStream;
import java.io.Serializable;
import java.net.HttpURLConnection;
import java.net.InetAddress;
import java.net.Socket;
import java.net.URL;
class Send implements Serializable {
// duhet te implementoje interfacin serizable ne menyre qe tja dergoj
// serverit
private static final long serialVersionUID = 1L;
public int getId() {
return id;
}
public int getAmount() {
return amount;
}
public int getPaid() {
return paid;
}
int id = 1;
int amount = 2000;
int paid = 800;
String name = "Andi Domi";
public String getName() {
return name;
}
}
public class Client {
public static void main(String[] args) throws Exception {
try {
URL url = new URL("http://localhost:3333");
HttpURLConnection s = (HttpURLConnection) url.openConnection();
s.setDoOutput(true);
s.setDoInput(true);
s.setRequestMethod("POST");
s.setUseCaches(false);
Send obj = new Send();
ObjectOutputStream objOut = new ObjectOutputStream(
s.getOutputStream());
objOut.writeObject(obj);
InputStream in = s.getInputStream();
InputStreamReader isr = new InputStreamReader(in);
BufferedReader br = new BufferedReader(isr);
int c;
while ((c = br.read()) != -1) {
System.out.print((char) c);
}
objOut.close();
s.disconnect();
} catch (IOException ex) {
System.err.println(ex);
System.err.print("gabimi eshte ketu");
}
}
}
After your executeUpdate statement you need to do.
con.commit();
to save the transaction.
EDIT: Based on the chat discussion, we learned that the column named emri is actually Emri in the table and was throwing:
com.mysql.jdbc.exceptions.jdbc4.MySQLSyntaxErrorException: Unknown column 'emri' in 'field list'
Changing the name resolves the issue.
Now unrelated to your problem, you should be using a PreparedStatement instead and should be closing your connection and statement
try {
PreparedStatement s = con.prepareStatement("INSERT INTO person(id, emri, amount, paid) VALUES (?,?,?,?)");
s.setInt(1,id);
s.setString(2,emri);
s.setInt(3,amount);
s.setInt(4,paid);
int count = s.executeUpdate();
con.commit();
} catch(Exception e){
e.printStackTrace();
//something bad happened rollback
//any uncommitted changes
con.rollback();
} finally {
if (con != null) {
con.close();
}
}
first, use prepared statement[docs] to avoid from SQL INJECTION
String sql = "INSERT INTO person VALUES (?,?,?,?)";
PreparedStatement prest = con.prepareStatement(sql);
prest.setString(1,id);
prest.setString(2,emri); // or use setInt for integer
prest.setString(3,amount); // or use setInt for integer
prest.setString(4,paid);
prest.executeUpdate()
second, if the the number of values does not match the total number of columns in your table, it will also fail because you are using the implicit type of INSERT statement. To solve it, just supply the column names where you want the values should be stored, eg
String sql = "INSERT INTO person (col1, col2, col3, col4) VALUES (?,?,?,?)";

How do I use Java to read a .csv file and insert its data into SQL Server?

I am very new to Java. I have a task with two steps.
I want to read all data from a .csv file.
After reading that data I have to put it into a SQL Server database.
I have done step one. I'm able to read the .csv file data, but I don't know that how to insert it into a database.
Here's my code for fetching the .csv file data:
import java.io.BufferedReader;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.util.StringTokenizer;
public class DBcvsdataextractor {
/**
* #param args
*/
public static void main(String[] args) {
// TODO Auto-generated method stub
String fileName="D:/USPresident Wikipedia URLs Thumbs HS.csv";
try {
BufferedReader br = new BufferedReader( new FileReader(fileName));
StringTokenizer st = null;
int lineNumber = 0, tokenNumber = 0;
while( (fileName = br.readLine()) != null)
{
if(lineNumber++ == 0)
continue;
//break comma separated line using ","
st = new StringTokenizer(fileName, ",");
while(st.hasMoreTokens())
{
//display csv values
tokenNumber++;
System.out.print(st.nextToken() + '\t');
}
//new line
System.out.println(" ");
//reset token number
tokenNumber = 0;
}
} catch (FileNotFoundException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}
Now, how do I insert that data into SQL Server?
The SQL Server has a tool for this.
Like this:
BULK INSERT CSVTest
FROM 'c:\csvtest.txt'
WITH
(
FIELDTERMINATOR = ',',
ROWTERMINATOR = '\n'
)
Use this command in your java, using JDBC connection, for example.
Hope this helps.
SQL Server file CSV
If you really want to do this with Java, and really want to write your own CSV parser as you currently did, you can
Instead of printing out each 'CSV-file value', you will have to store them. You could for example use an ArrayList for each column in the CSV file, and populate those while reading the CSV file
Once the file is read, you can loop over those ArrayList instances again to construct one big INSERT statement for all data, or one INSERT statement for each row you encountered in the CSV file. If you would opt for the last option, it is not even necessary to use those ArrayList instances. In that case you could construct an indiviual INSERT statement while reading the CSV file, and submit it to the DB after each time a row has been read.
I know the approach of constructing your query while reading the CSV file would be possible as well if you want to go for one big INSERT statement, but separating the INSERT from the reading of the CSV file has the big advantage you can replace your own CSV parser later on by a standard one without too much trouble.
You can customize below class to insert data into sql server.
File ImportCsv.java
package com.example.demo;
import java.io.InputStreamReader;
import java.io.Reader;
import java.io.UnsupportedEncodingException;
import java.sql.Connection;
import java.sql.PreparedStatement;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import com.opencsv.CSVReader;
public class ImportCsv
{
public static void main(String[] args) throws UnsupportedEncodingException, FileNotFoundException
{
readCsv();
}
private static void readCsv() throws UnsupportedEncodingException, FileNotFoundException
{
Reader readerstream = new InputStreamReader(new FileInputStream("D:\\file.csv"), "Unicode");
try (CSVReader reader = new CSVReader(readerstream, ',');
Connection connection = DBConnection.getConnection();)
{
String insertQuery = "Insert into [dbo].[tableName] ([Column1],[Column2],[Column3], [Column4],...[Column8]") values (?,?,?,?,?,?,?,?)";
PreparedStatement pstmt = connection.prepareStatement(insertQuery);
String[] rowData = null;
int i = 0;
while((rowData = reader.readNext()) != null){
for (String data : rowData)
{
//System.out.println(new String(data.replace(" ","")));
String strLine = new String(data.replace(" ",""));
String[] splited = strLine.split("\\s+");
pstmt.setNString(1, splited[0]);
pstmt.setNString(2, splited[1]);
pstmt.setNString(3, splited[2]);
pstmt.setNString(4, splited[3]);
pstmt.setNString(5, splited[4]);
pstmt.setNString(6, splited[5]);
pstmt.setNString(7, splited[6]);
try {
pstmt.setNString(8, splited[7]);
}catch(Exception e) {
}
if (++i % 8 == 0) {
pstmt.addBatch();// add batch
}
if (i % 80 == 0) {// insert when the batch size is 10
pstmt.executeBatch();
}
}}
System.out.println("Data Successfully Uploaded");
}
catch (Exception e)
{
e.printStackTrace();
}
}
}
DBConnection.java
package com.example.demo;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.SQLException;
public class DBConnection {
static {
try {
Class.forName("com.microsoft.sqlserver.jdbc.SQLServerDriver");
} catch (Exception e) {
e.printStackTrace();
}
}
public static Connection getConnection() throws SQLException {
String url = "jdbc:sqlserver://localhost:1433;" +
"databasename=myDBName;user=user;password=pwd;sendStringParametersAsUnicode=true;";
Connection con =DriverManager.getConnection(url);
return con;
}
}

Problems in Database Management - sqlite with Java (IDE: NetBeans)

I have some problems in managing the storage of data via query (NetBeans-> Java-> Sqlite):
1) I have a folder with some txt file, containing several lines of text (the files do not exceed 2 Kb)
2) The program opens the files in sequence, and stores each word in a table
3)When the program comes to analyze too much data (some time more than 40 files ore more then 82) returns the following error
Exception in thread "main" java.sql.SQLException: unable to open database file
at org.sqlite.DB.throwex (DB.java: 288)
at org.sqlite.DB.executeBatch (DB.java: 236)
at org.sqlite.PrepStmt.executeBatch (PrepStmt.java: 83)
The error is in int [] upCountsb = prepb.executeBatch();
Here the code:
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.SQLException;
import java.sql.Statement;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
public static void main(String[] args) throws ClassNotFoundException, SQLException, IOException, InterruptedException {
Class.forName("org.sqlite.JDBC");
Connection conn = DriverManager.getConnection("jdbc:sqlite:C:/Users/.../test.db");
Statement stmt;
stmt = conn.createStatement();
stmt.executeUpdate("DROP TABLE IF EXISTS words");
stmt.executeUpdate("CREATE TABLE words (words)");
String path_dir ="C:/Users/.../good";
File currentDIR = new File("C:/Users/.../good");
File files[]=currentDIR.listFiles();
String tmp="";
ArrayList app = new ArrayList();
//Search in DIR for Files
for( File f1 : files ){
String nameFile = f1.getName();
FileReader f = null;
BufferedReader fIN = null;
String s;
//Open the file xxx.txt
try{
f = new FileReader(path_dir+"/"+nameFile);
fIN = new BufferedReader(f);
s = fIN.readLine();
while(s != null) {
StringTokenizer st = new StringTokenizer(s);
while(st.hasMoreTokens()) {
String str = st.nextToken().toString().toLowerCase();
Pattern pattern =Pattern.compile("\\W", Pattern.MULTILINE);
String newAll = pattern.matcher(str).replaceAll("").trim();
tmp=newAll;
app.add(tmp); //Add all data in the ArrayList app
} // Close While 'hasMoreTokens'
s = fIN.readLine();
} //Close While on File
} //Close TRAY
catch (FileNotFoundException ex) {
ex.printStackTrace();
} catch (IOException ex) {
ex.printStackTrace();
}
f.close(); //Close FileReader
} //Close Scan DIR for FILE
//Add all data in the Tbl od Database
PreparedStatement prep = conn.prepareStatement("insert into words values (?);");
for (int z=0; z<app.size();z++){
prep.setString(1,app.get(z).toString().toLowerCase());
prep.addBatch();
conn.setAutoCommit(false);
prep.executeBatch(); ***//Here I get the error although i use int [] Count =prep.executeBatch();***
conn.setAutoCommit(true);
}
prep.close();
} //Close MAIN
You need to release the resources you are using once you are done with them. E.g. prepb.close() after executing the statement.
The same thing goes for your file handles.
Also, the point of batching is lost if you execute the statement for every insert.
Since your files are very small you might as well prepare all the data in memory before you persist it to a database.
package stackoverflow.wordanalysis;
import java.io.*;
import java.sql.*;
import java.util.*;
import java.util.regex.*;
public class WordFrequencyImporter {
public static void main(String[] args) throws Exception {
List<String> words = readWords("the-directory-from-which-to-read-the-files");
Class.forName("org.sqlite.JDBC");
Connection conn = DriverManager.getConnection("jdbc:sqlite:test.db");
try {
createWordsTable(conn);
persistWords(words, conn);
} finally {
conn.close();
}
}
private static void persistWords(List<String> words, Connection conn)
throws SQLException {
System.out.println("Persisting " + words.size() + " words");
PreparedStatement prep = conn
.prepareStatement("insert into words values (?);");
try {
for (String word : words) {
prep.setString(1, word);
prep.addBatch();
}
conn.setAutoCommit(false);
prep.executeBatch();
conn.setAutoCommit(true);
} finally {
prep.close();
}
}
private static void createWordsTable(Connection conn) throws SQLException {
Statement stmt = conn.createStatement();
try {
stmt.executeUpdate("DROP TABLE IF EXISTS words");
stmt.executeUpdate("CREATE TABLE words (words)");
} finally {
stmt.close();
}
}
private static List<String> readWords(String path_dir) throws IOException {
Pattern pattern = Pattern.compile("\\W", Pattern.MULTILINE);
List<String> words = new ArrayList<String>();
for (File file : new File(path_dir).listFiles()) {
BufferedReader reader = new BufferedReader(new FileReader(file));
System.out.println("Reading " + file);
try {
String s;
while ((s = reader.readLine()) != null) {
StringTokenizer st = new StringTokenizer(s);
while (st.hasMoreTokens()) {
String token = st.nextToken().toString().toLowerCase();
String word = pattern.matcher(token).replaceAll("")
.trim();
words.add(word);
}
}
} finally {
reader.close();
}
}
return words;
}
}

Categories