Splitting huge CSV by custom filter? - java

I have huge (>5GB) CSV file in format:
username,transaction
I want to have as an output separate CSV file for each user with only all of his transactions in the same format. I have few ideas in mind, but i want to hear other ideas for effective (fast and memory efficient) implementation.
Here is what i done up to now. First test is read/process/write in single thread, second test is with many threads. Performance is not that good, so i think i'm doing something wrong. Please correct me.
public class BatchFileReader {
private ICsvBeanReader beanReader;
private double total;
private String[] header;
private CellProcessor[] processors;
private DataTransformer<HashMap<String, List<LoginDto>>> processor;
private boolean hasMoreRecords = true;
public BatchFileReader(String file, DataTransformer<HashMap<String, List<LoginDto>>> processor) {
try {
this.processor = processor;
this.beanReader = new CsvBeanReader(new FileReader(file), CsvPreference.STANDARD_PREFERENCE);
header = CSVUtils.getHeader(beanReader.getHeader(true));
processors = CSVUtils.getProcessors();
} catch (IOException e) {
e.printStackTrace();
}
}
public void read() {
try {
readFile();
} catch (IOException e) {
e.printStackTrace();
} finally {
if (beanReader != null) {
try {
beanReader.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
}
private void readFile() throws IOException {
while (hasMoreRecords) {
long start = System.currentTimeMillis();
HashMap<String, List<LoginDto>> usersBatch = readBatch();
long end = System.currentTimeMillis();
System.out.println("Reading batch for " + ((end - start) / 1000f) + " seconds.");
total +=((end - start)/ 1000f);
if (processor != null && !usersBatch.isEmpty()) {
processor.transform(usersBatch);
}
}
System.out.println("total = " + total);
}
private HashMap<String, List<LoginDto>> readBatch() throws IOException {
HashMap<String, List<LoginDto>> users = new HashMap<String, List<LoginDto>>();
int readLoginCount = 0;
while (readLoginCount < CONFIG.READ_BATCH_SIZE) {
LoginDto login = beanReader.read(LoginDto.class, header, processors);
if (login != null) {
if (!users.containsKey(login.getUsername())) {
List<LoginDto> logins = new LinkedList<LoginDto>();
users.put(login.getUsername(), logins);
}
users.get(login.getUsername()).add(login);
readLoginCount++;
} else {
hasMoreRecords = false;
break;
}
}
return users;
}
}
public class BatchFileWriter {
private final String file;
private final List<T> processedData;
public BatchFileWriter(final String file, List<T> processedData) {
this.file = file;
this.processedData = processedData;
}
public void write() {
try {
writeFile(file, processedData);
} catch (IOException e) {
e.printStackTrace();
} finally {
}
}
private void writeFile(final String file, final List<T> processedData) throws IOException {
System.out.println("START WRITE " + " " + file);
FileWriter writer = new FileWriter(file, true);
long start = System.currentTimeMillis();
for (T record : processedData) {
writer.write(record.toString());
writer.write("\n");
}
writer.flush();
writer.close();
long end = System.currentTimeMillis();
System.out.println("Writing in file " + file + " complete for " + ((end - start) / 1000f) + " seconds.");
}
}
public class LoginsTest {
private static final ExecutorService executor = Executors.newSingleThreadExecutor();
private static final ExecutorService procExec = Executors.newFixedThreadPool(Runtime.getRuntime().availableProcessors() + 1);
#Test
public void testSingleThreadCSVtoCSVSplit() throws InterruptedException, ExecutionException {
long start = System.currentTimeMillis();
DataTransformer<HashMap<String, List<LoginDto>>> simpleSplitProcessor = new DataTransformer<HashMap<String, List<LoginDto>>>() {
#Override
public void transform(HashMap<String, List<LoginDto>> data) {
for (String field : data.keySet()) {
new BatchFileWriter<LoginDto>(field + ".csv", data.get(field)).write();
}
}
};
BatchFileReader reader = new BatchFileReader("loadData.csv", simpleSplitProcessor);
reader.read();
long end = System.currentTimeMillis();
System.out.println("TOTAL " + ((end - start)/ 1000f) + " seconds.");
}
#Test
public void testMultiThreadCSVtoCSVSplit() throws InterruptedException, ExecutionException {
long start = System.currentTimeMillis();
System.out.println(start);
final DataTransformer<HashMap<String, List<LoginDto>>> simpleSplitProcessor = new DataTransformer<HashMap<String, List<LoginDto>>>() {
#Override
public void transform(HashMap<String, List<LoginDto>> data) {
System.out.println("transform");
processAsync(data);
}
};
final CountDownLatch readLatch = new CountDownLatch(1);
executor.execute(new Runnable() {
#Override
public void run() {
BatchFileReader reader = new BatchFileReader("loadData.csv", simpleSplitProcessor);
reader.read();
System.out.println("read latch count down");
readLatch.countDown();
}});
System.out.println("read latch before await");
readLatch.await();
System.out.println("read latch after await");
procExec.shutdown();
executor.shutdown();
long end = System.currentTimeMillis();
System.out.println("TOTAL " + ((end - start)/ 1000f) + " seconds.");
}
private void processAsync(final HashMap<String, List<LoginDto>> data) {
procExec.execute(new Runnable() {
#Override
public void run() {
for (String field : data.keySet()) {
writeASync(field, data.get(field));
}
}
});
}
private void writeASync(final String field, final List<LoginDto> data) {
procExec.execute(new Runnable() {
#Override
public void run() {
new BatchFileWriter<LoginDto>(field + ".csv", data).write();
}
});
}
}

Would it not be better to use unix commands to sort and then split the original file?
Something like: cat txn.csv | sort > txn-sorted.csv
From there get a listing of the unique usernames via grep and then grep the sorted file for each username

If you know Camel already, I'd write a simple Camel route to:
Read line from file
Parse the line
Write to the correct output file
Its a very simple route but if you want it as fast as possible it is then trivially easy make it multithreaded
eg your route would look something like:
from("file:/myfile.csv")
.beanRef("lineParser")
.to("seda:internal-queue");
from("seda:internal-queue")
.concurrentConsumers(5)
.to("fileWriter");
If you don't know Camel then its not worth learning some this one task. However you are probably going to need to make it multithreaded to get the maximum performance. You'll have to experiment where best to put the threading as it will depend on what parts of the operation are slowest.
The multithreading will use up more memory so you'll need to balance memory efficiency against performance.

I would open/append a new output file for each user. If you wanted to minimize memory usage and incur more I/O overhead, you could do something like the following, though you'd probably want to use a real CSV parser like Super CSV (http://supercsv.sourceforge.net/index.html):
Scanner s = new Scanner(new File("/my/dir/users-and-transactions.txt"));
while (s.hasNextLine()) {
String line = s.nextLine();
String[] tokens = line.split(",");
String user = tokens[0];
String transaction = tokens[1];
PrintStream out = new PrintStream(new FileOutputStream("/my/dir/" + user, true));
out.println(transaction);
out.close();
}
s.close();
If you've got a reasonable amount of memory, you could create a Map of user name to OutputStream. Each time you see a user string, you could get the existing OutputStream for that user name or create a new one if none exists.

Related

How to apply multithreading in java to find a word in a directory(main directory and the word given) and recursively its subdirectories

I am quite new on Stack Overflow and a beginner in Java so please forgive me if I have asked this question in an improper way.
PROBLEM
I have an assignment which tells me to make use of multi-threading to search files for a given word, which might be present in any file of type .txt and .html, on any-level in the given directory (So basically the entire directory). The absolute file path of the file has to be displayed on the console if the file contains the given word.
WHAT HAVE I TRIED
So I thought of dividing the task into 2 sections, Searching and Multithreading respectively,
I was able to get the Searching part( File_search.java ). This file has given satisfactory results by searching through the directory and finding all the files in it for the given word.
File_search.java
public class File_search{
String fin_output = "";
public String searchInTextFiles(File dir,String search_word) {
File[] a = dir.listFiles();
for(File f : a){
if(f.isDirectory()) {
searchInTextFiles(f,search_word);
}
else if(f.getName().endsWith(".txt") || f.getName().endsWith(".html") || f.getName().endsWith(".htm") ) {
try {
searchInFile(f,search_word);
} catch (FileNotFoundException e) {
e.printStackTrace();
}
}
}
return fin_output;
}
public void searchInFile(File f,String search_word) throws FileNotFoundException {
final Scanner sc = new Scanner(f);
while(sc.hasNextLine()) {
final String lineFromFile = sc.nextLine();
if(lineFromFile.contains(search_word)) {
fin_output += "FILE : "+f.getAbsolutePath().toString()+"\n";
}
}
}
Now, I want to be able to use multiple threads to execute the task File_search.java using ThreadPoolExecuter service. I'm not sure If I can do it using Runnable ,Callable or by using a Thread class or by any other method?
Can you please help me with the code to do the multi-threading part? Thanks :)
I agree to the comment of #chrylis -cautiouslyoptimistic, but for the purpose of understanding below will help you.
One simpler approach could be to do the traversal of directories in the main Thread, I mean the logic which you have added in function searchInTextFiles and do the searching logic as you did in function searchInFile in a Threadpool of size let's say 10.
Below sample code will help you to understand it better.
public class Traverser {
private List<Future<String>> futureList = new ArrayList<Future<String>>();
private ExecutorService executorService;
public Traverser() {
executorService = Executors.newFixedThreadPool(10);
}
public static void main(String[] args) throws InterruptedException, ExecutionException {
System.out.println("Started");
long start = System.currentTimeMillis();
Traverser traverser = new Traverser();
traverser.searchInTextFiles(new File("Some Directory Path"), "Some Text");
for (Future<String> future : traverser.futureList) {
System.out.println(future.get());
}
traverser.executorService.shutdown();
while(!traverser.executorService.isTerminated()) {
System.out.println("Not terminated yet, sleeping");
Thread.sleep(1000);
}
long end = System.currentTimeMillis();
System.out.println("Time taken :" + (end - start));
}
public void searchInTextFiles(File dir,String searchWord) {
File[] filesList = dir.listFiles();
for(File file : filesList){
if(file.isDirectory()) {
searchInTextFiles(file,searchWord);
}
else if(file.getName().endsWith(".txt") || file.getName().endsWith(".html") || file.getName().endsWith(".htm") ) {
try {
futureList.add(executorService.submit(new SearcherTask(file,searchWord)));
} catch (Exception e) {
e.printStackTrace();
}
}
}
}}
public class SearcherTask implements Callable<String> {
private File inputFile;
private String searchWord;
public SearcherTask(File inputFile, String searchWord) {
this.inputFile = inputFile;
this.searchWord = searchWord;
}
#Override
public String call() throws Exception {
StringBuilder result = new StringBuilder();
Scanner sc = null;
try {
sc = new Scanner(inputFile);
while (sc.hasNextLine()) {
final String lineFromFile = sc.nextLine();
if (lineFromFile.contains(searchWord)) {
result.append("FILE : " + inputFile.getAbsolutePath().toString() + "\n");
}
}
} catch (Exception e) {
//log error
throw e;
} finally {
sc.close();
}
return result.toString();
}}

How to perform addition from multiple files using executor service and gives final output

Below is the piece of code which I have tried. Each file is having an integer and I want to add all the integer and display the output
#Override
public void run() {
BlockingQueue<Integer> d;
try {
d = readFile(file);
//System.out.println("adding the integers ..."+d.take());
i = (int) d.take();
System.out.println("i = "+i);
sum = sum + i;
//System.out.println("ai = "+ai.incrementAndGet());
System.out.println("sum = "+sum );
} catch (IOException | InterruptedException e) {
e.printStackTrace();
}
// ProcessedData p = d.process();
// writeFile(file.getAbsolutePath(), "C:/test");
}
private BlockingQueue<Integer> readFile(File file2) throws IOException, InterruptedException {
FileReader fr = new FileReader(file2);
BufferedReader in = new BufferedReader(new java.io.FileReader(file2));
int content = Integer.parseInt(in.readLine());
System.out.println("content = "+content);
System.out.println("reading and writing to blocking queue...");
blockingQueue.put(content);
return blockingQueue;
}
Here is the solution for the problem -
When I am adding all the integers from the queue using atomic integer, each thread has a different copy of the atomic variable.
So, Every time when addAndGet method is used it is updated with the value from the blocking queue.
I have seggregated and created a singleton class which returns me the same atomic integer object for every thread when requested.
Below is the snippet of code and this solved my problem -
import java.util.concurrent.atomic.AtomicInteger;
public class AtomicState {
private static final AtomicState as = new AtomicState();
public AtomicInteger ai = new AtomicInteger();
private AtomicState() {
}
public AtomicInteger getAtomicIntegerObj() {
return ai;
}
public static AtomicState getAtomicState() {
return as;
}
}

How to give message when Threadpool Executor is completed?

I am trying to give a pop up alert message when my ThreadpoolExecutor is finished executing. It is searching email addresses from websites, once it is done I want a alert message as "Completed". Here is my Thread :-
public class Constant
{
public static final int NUM_OF_THREAD = 60;
public static final int TIME_OUT = 10000;
}
ThreadPoolExecutor poolMainExecutor = (ThreadPoolExecutor) Executors.newFixedThreadPool
(Constant.NUM_OF_THREAD);
Here is my Searching Operation class :-
class SearchingOperation implements Runnable {
URL urldata;
int i;
Set<String> emailAddresses;
int level;
SearchingOperation(URL urldata, int i, Set<String> emailAddresses, int level) {
this.urldata = urldata;
this.i = i;
this.emailAddresses = emailAddresses;
this.level = level;
if (level != 1)
model.setValueAt(urldata.getProtocol() + "://" + urldata.getHost() + "/contacts", i, 3);
}
public void run() {
BufferedReader bufferreader1 = null;
InputStreamReader emailReader = null;
System.out.println(this.i + ":" + poolMainExecutor.getActiveCount() + ":" + level + ";" + urldata.toString());
try {
if (level < 1) {
String httpPatternString = "https?:\\/\\/(www\\.)?[-a-zA-Z0-9#:%._\\+~#=]{2,256}\\.[a-z]{2,6}\\b([-a-zA-Z0-9#:%_\\+.~#?&//=]*)";
String httpString = "";
BufferedReader bufferreaderHTTP = null;
InputStreamReader httpReader = null;
try {
httpReader = new InputStreamReader(urldata.openStream());
bufferreaderHTTP = new BufferedReader(httpReader
);
StringBuilder rawhttp = new StringBuilder();
while ((httpString = bufferreaderHTTP.readLine()) != null) {
rawhttp.append(httpString);
}
if (rawhttp.toString().isEmpty()) {
return;
}
List<String> urls = getURL(rawhttp.toString());
for (String url : urls) {
String fullUrl = getMatchRegex(url, httpPatternString);
if (fullUrl.isEmpty()) {
if (!url.startsWith("/")) {
url = "/" + url;
}
String address = urldata.getProtocol() + "://" + urldata.getHost() + url;
fullUrl = getMatchRegex(address, httpPatternString);
}
if (!addressWorked.contains(fullUrl) && fullUrl.contains(urldata.getHost())) {
addressWorked.add(fullUrl);
sendToSearch(fullUrl);
}
}
} catch (Exception e) {
//System.out.println("652" + e.getMessage());
//e.printStackTrace();
return;
} finally {
try {
if (httpReader != null)
bufferreaderHTTP.close();
} catch (Exception e) {
// e.printStackTrace();
}
try {
if (httpReader != null)
httpReader.close();
} catch (Exception e) {
e.printStackTrace();
}
}
}
String someString = "";
emailReader = new InputStreamReader(urldata.openStream());
bufferreader1 = new BufferedReader(
emailReader);
StringBuilder emailRaw = new StringBuilder();
while ((someString = bufferreader1.readLine()) != null) {
if (someString.contains("#")) {
emailRaw.append(someString).append(";");
}
}
//Set<String> emailAddresses = new HashSet<String>();
String emailAddress;
//Pattern pattern = Pattern
//.compile("\\b[a-zA-Z0-9.-]+#[a-zA-Z0-9.-]+\\.[a-zA-Z0-9.-]+\\b");
Pattern
pattern = Pattern
.compile("\\b[a-zA-Z0-9.-]+#[a-zA-Z0-9.-]+\\.[a-zA-Z0-9.-]+\\b");
Matcher matchs = pattern.matcher(emailRaw);
while (matchs.find()) {
emailAddress = (emailRaw.substring(matchs.start(),
matchs.end()));
// //System.out.println(emailAddress);
if (!emailAddresses.contains(emailAddress)) {
emailAddresses.add(emailAddress);
// //System.out.println(emailAddress);
if (!foundItem.get(i)) {
table.setValueAt("Found", i, 4);
foundItem.set(i, true);
}
String emails = !emailAddresses.isEmpty() ? emailAddresses.toString() : "";
model.setValueAt(emails, i, 2);
model.setValueAt("", i, 3);
}
}
} catch (Exception e) {
//System.out.println("687" + e.getMessage());
} finally {
try {
if (bufferreader1 != null)
bufferreader1.close();
} catch (Exception e) {
e.printStackTrace();
}
try {
if (emailReader != null)
emailReader.close();
} catch (Exception e) {
e.printStackTrace();
}
Thread.currentThread().interrupt();
return;
}
}
After this the final snippet :-
private void sendToSearch(String address) throws Throwable {
SearchingOperation operation = new SearchingOperation(new URL(address), i,
emailAddresses, level + 1);
//operation.run();
try {
final Future handler = poolMainExecutor.submit(operation);
try {
handler.get(Constant.TIME_OUT, TimeUnit.MILLISECONDS);
} catch (TimeoutException e) {
e.printStackTrace();
handler.cancel(false);
}
} catch (Exception e) {
//System.out.println("Time out for:" + address);
} catch (Error error) {
//System.out.println("Time out for:" + address);
} finally {
}
}
Implement Callable<Void> instead of Runnable and wait for all the task to terminate by calling Future<Void>.get():
class SearchingOperation implements Callable<Void>
{
public Void call() throws Exception
{
//same code as in run()
}
}
//submit and wait until the task complete
Future<Void> future = poolMainExecutor.submit(new SearchingOperation()).get();
Use ThreadPoolExecutor.awaitTermination():
Blocks until all tasks have completed execution after a shutdown request, or the timeout occurs, or the current thread is interrupted, whichever happens first.
As in your code, you create your ThreadPoolExecutor first
ThreadPoolExecutor poolMainExecutor = (ThreadPoolExecutor) Executors.newFixedThreadPool(Constant.NUM_OF_THREAD);
Then, you need to add Tasks to it:
poolMainExecutor.execute(myTask);
poolMainExecutor.submit(myTask);
execute will return nothing, while submit will return a Future object. Tasks must implement Runnable or Callable. An object of SearchingOperation is a task for example. The thread pool will execute the tasks in parallel, but each task will be executed by one thread. That means to effectively use NUM_OF_THREAD Threads you need to add at least NUM_OF_THREAD Tasks.
(Optional) Once you got all jobs to work, shutdown your pool. This will prevent new tasks from being submitted. It won't affect running tasks.
poolMainExecutor.shutdown();
At the end, you need to wait for all Tasks to complete. The easiest way is by calling
poolMainExecutor.awaitTermination(Integer.MAX_VALUE, TimeUnit.DAYS);
You should adjust the amount of time you want to wait for the tasks to finish before throwing an exception.
Now that the work is done, notify the user. A simple way is to call one of the Dialog presets from JOptionPane, like:
JOptionPane.showMessageDialog(null, "message", "title", JOptionPane.INFORMATION_MESSAGE);
It will popup a little window with title "title", the message "message", an "information" icon and a button to close it.
This code can be used., it will check whether the execution is completed in every 2.5 seconds.
do {
System.out.println("In Progress");
try {
Thread.sleep(2500);
} catch (InterruptedException e) {
e.printStackTrace();
}
} while (poolMainExecutor.getActiveCount() != 0);
System.out.println("Completed");

Locking files while using Java Logger

I am creating a logger that will log things throughout my program. It seems to work fine. This is what the class looks like.
public class TESTLogger {
protected static File file;
protected static Logger logger = Logger.getLogger("");
public TESTLogger(String logName) {
setupLogger(logName);
}
private static void setupLogger(String logName) {
String basePath = Utils.getBasePath();
File logsDir = new File(basePath);
if(logsDir.exists() == false) {
logsDir.mkdir();
}
String filePath = basePath + File.separator + logName + ".%g.log";
file = new File(filePath);
try {
FileHandler fileHandler = new FileHandler(filePath, 5242880, 5, true);
fileHandler.setFormatter(new java.util.logging.Formatter() {
#Override
public String format(LogRecord logRecord) {
if(logRecord.getLevel() == Level.INFO) {
return "[INFO " + createDateTimeLog() + "] " + logRecord.getMessage() + "\r\n";
} else if(logRecord.getLevel() == Level.WARNING) {
return "[WARN " + createDateTimeLog() + "] " + logRecord.getMessage() + "\r\n";
} else if(logRecord.getLevel() == Level.SEVERE) {
return "[ERROR " + createDateTimeLog() + "] " + logRecord.getMessage() + "\r\n";
} else {
return "[OTHER " + createDateTimeLog() + "] " + logRecord.getMessage() + "\r\n";
}
}
});
logger.addHandler(fileHandler);
} catch (IOException e) {
}
}
private static void writeToFile(Level level, String writeThisToFile) {
logger.log(level, writeThisToFile);
}
private static String createDateTimeLog() {
String dateTime = "";
Date date = new Date();
SimpleDateFormat simpleDateFormat = new SimpleDateFormat("yyyy-MM-dd H:mm:ss");
dateTime = simpleDateFormat.format(date);
return dateTime;
}
public void error(String message) {
writeToFile(Level.SEVERE, message);
}
public void warn(String message) {
writeToFile(Level.WARNING, message);
}
public void info(String message) {
writeToFile(Level.INFO, message);
}
}
When my application starts it creats the TESTLogger object. Then whenever I log I run logger.info / logger.warn / logger.error with my log message. That is working great. However, multiple instances of my jar can be running at the same time. When that happens, it creates a new instance of the log. IE: I could have myLog.0.log. When the second instance of the jar logs something it will go under myLog.0.log.1, then myLog.0.log.2 and so on.
I don't want to create all these different instances of my log file. I thought I might use a File Lock (from java.nio.channels package). However, I have not been able to figure out how to do that with the Java Logger class I am using (java.util.logging).
Any ideas how to prevent this from happening would be great. Thanks in advance.
EDIT:
Ok. So I have rewritten writeToFile and it seems to work a little better. However, every now and again I still get a .1 log. It doesn't happen as much as it used to. And it NEVER gets to .2 (it used to get all the way up to .100). I would still like to prevent this .1, though.
This is what my code looks like now:
private static void writeToFile(Level level, String writeThisToFile) {
try {
File file = new File("FileLock");
FileChannel channel = new RandomAccessFile(file, "rw").getChannel();
FileLock lock = null;
try {
lock = channel.tryLock(0, Long.MAX_VALUE, true);
if(lock != null) {
logger.log(level, writeThisToFile);
}
} catch (OverlappingFileLockException e) {
}
finally {
if(lock != null) {
lock.release();
}
channel.close();
}
} catch (IOException e) {}
}
EDIT #2: What it currently looks like.
Entrance point into my JAR:
public class StartingPoint {
public static void main(String[] args) {
MyLogger logger = new MyLogger("myFirstLogger");
logger.info("Info test message");
logger.warn("Warning test message");
logger.error("Error test message");
}
}
MyLogger class:
public class MyLogger {
protected static File file;
protected static Logger logger = Logger.getLogger("");
public MyLogger(String loggerName) {
setupLogger(loggerName);
}
private void setupLogger(String loggerName) {
String filePath = loggerName + "_%g" + ".log";
file = new File(filePath);
try {
FileHandler fileHandler = new FileHandler(filePath, 5242880, 5, true);
fileHandler.setFormatter(new java.util.logging.Formatter() {
#Override
public String format(LogRecord logRecord) {
if(logRecord.getLevel() == Level.INFO) {
return "[INFO " + createDateTimeLog() + "] " + logRecord.getMessage() + "\r\n";
} else if(logRecord.getLevel() == Level.WARNING) {
return "[WARN " + createDateTimeLog() + "] " + logRecord.getMessage() + "\r\n";
} else if(logRecord.getLevel() == Level.SEVERE) {
return "[ERROR " + createDateTimeLog() + "] " + logRecord.getMessage() + "\r\n";
} else {
return "[OTHER " + createDateTimeLog() + "] " + logRecord.getMessage() + "\r\n";
}
}
});
logger.addHandler(fileHandler);
logger.addHandler(new SharedFileHandler()); // <--- SharedFileHandler added
} catch (IOException e) {}
}
private void writeToFile(Level level, String writeThisToFile) {
logger.log(level, writeThisToFile);
}
private static String createDateTimeLog() {
String dateTime = "";
Date date = new Date();
SimpleDateFormat simpleDateFormat = new SimpleDateFormat("yyyy-MM-dd H:mm:ss");
dateTime = simpleDateFormat.format(date);
return dateTime;
}
public void error(String message) {
writeToFile(Level.SEVERE, message);
}
public void warn(String message) {
writeToFile(Level.WARNING, message);
}
public void info(String message) {
writeToFile(Level.INFO, message);
}
}
And finally... SharedFileHandler:
public class SharedFileHandler extends Handler {
private final FileChannel mutex;
private final String pattern;
public SharedFileHandler() throws IOException {
this("loggerLockFile");
}
public SharedFileHandler(String pattern) throws IOException {
setFormatter(new SimpleFormatter());
this.pattern = pattern;
mutex = new RandomAccessFile(pattern, "rw").getChannel();
}
#Override
public void publish(LogRecord record) {
if (isLoggable(record)) {
record.getSourceMethodName(); //Infer caller.
try {
FileLock ticket = mutex.lock();
try {
doPublish(record);
} finally {
ticket.release();
}
} catch (IOException e) {}
catch (OverlappingFileLockException e) {}
catch (NullPointerException e) {}
}
}
private void doPublish(LogRecord record) throws IOException {
final FileHandler h = new FileHandler(pattern, 5242880, 5, true);
try {
h.setEncoding(getEncoding());
h.setErrorManager(getErrorManager());
h.setFilter(getFilter());
h.setFormatter(getFormatter());
h.setLevel(getLevel());
h.publish(record);
h.flush();
} finally {
h.close();
}
}
#Override
public void flush() {}
#Override
public synchronized void close() throws SecurityException {
super.setLevel(Level.OFF);
try {
mutex.close();
} catch (IOException ioe) {}
}
}
The FileHandler does everything it can to prevent two concurrently running JVMs from writing to the same log file. If this behavior was allowed the log file would be almost impossible to read and understand.
If you really want to write everything to one log file then you have to do one of the following:
Prevent concurrent JVM processes from starting by changing how it is launched.
Have your code detect if another JVM is running your code and exit before creating a FileHandler.
Have each JVM write to a distinct log file and create code to safely merge the files into one.
Create a proxy Handler that creates and closes a FileHandler for each log record. The proxy handler would use a predefined file name (different from the log file) and a FileLock to serialize access to the log file from different JVMs.
Use a dedicated process to write to the log file and have all the JVMs send log messages to that process.
Here is an untested example of a proxy handler:
import java.io.IOException;
import java.nio.channels.FileChannel;
import java.nio.channels.FileLock;
import java.nio.channels.OverlappingFileLockException;
import java.nio.file.Paths;
import java.util.logging.*;
import static java.nio.file.StandardOpenOption.*;
public class SharedFileHandler extends Handler {
private final FileChannel mutex;
private final String pattern;
public SharedFileHandler() throws IOException {
this("%hjava%g.log");
}
public SharedFileHandler(String pattern) throws IOException {
setFormatter(new SimpleFormatter());
this.pattern = pattern;
Path p = Paths.get(new File(".").getCanonicalPath(),
pattern.replace("%", "") + ".lck");
mutex = FileChannel.open(p, CREATE, WRITE, DELETE_ON_CLOSE);
}
#Override
public void publish(LogRecord record) {
if (isLoggable(record)) {
record.getSourceMethodName(); //Infer caller.
try {
FileLock ticket = mutex.lock();
try {
doPublish(ticket, record);
} finally {
ticket.release();
}
} catch (IOException | OverlappingFileLockException ex) {
reportError(null, ex, ErrorManager.WRITE_FAILURE);
}
}
}
private synchronized void doPublish(FileLock ticket, LogRecord record) throws IOException {
if (!ticket.isValid()) {
return;
}
final FileHandler h = new FileHandler(pattern, 5242880, 5, true);
try {
h.setEncoding(getEncoding());
h.setErrorManager(getErrorManager());
h.setFilter((Filter) null);
h.setFormatter(getFormatter());
h.setLevel(getLevel());
h.publish(record);
h.flush();
} finally {
h.close();
}
}
#Override
public void flush() {
}
#Override
public synchronized void close() throws SecurityException {
super.setLevel(Level.OFF);
try {
mutex.close();
} catch (IOException ioe) {
this.reportError(null, ioe, ErrorManager.CLOSE_FAILURE);
}
}
}
Here is a simple test case
public static void main(String[] args) throws Exception {
Random rnd = new Random();
logger.addHandler(new SharedFileHandler());
String id = ManagementFactory.getRuntimeMXBean().getName();
for (int i = 0; i < 600; i++) {
logger.log(Level.INFO, id);
Thread.sleep(rnd.nextInt(100));
}
}

How do we know threadPoolExecutor has finished execution

I have a parent thread that sends messages to MQ and it manages a ThreadPoolExecutor for worker threads which listen to MQ and writes message to output file. I manage a threadpool of size 5. So when I run my program, I have 5 files with messages. Everything works fine until here. I now need to merge these 5 files in my parent thread.
How do I know ThreadPoolExecutor finished processing so I can start merging files.
public class ParentThread {
private MessageSender messageSender;
private MessageReciever messageReciever;
private Queue jmsQueue;
private Queue jmsReplyQueue;
ExecutorService exec = Executors.newFixedThreadPool(5);
public void sendMessages() {
System.out.println("Sending");
File xmlFile = new File("c:/filename.txt");
List<String> lines = null;
try {
lines = FileUtils.readLines(xmlFile, null);
} catch (IOException e) {
e.printStackTrace();
}
for (String line : lines){
messageSender.sendMessage(line, this.jmsQueue, this.jmsReplyQueue);
}
int count = 0;
while (count < 5) {
messageSender.sendMessage("STOP", this.jmsQueue, this.jmsReplyQueue);
count++;
}
}
public void listenMessages() {
long finishDate = new Date().getTime();
for (int i = 0; i < 5; i++) {
Worker worker = new Worker(i, this.messageReciever, this.jmsReplyQueue);
exec.execute(worker);
}
exec.shutdown();
if(exec.isTerminated()){ //PROBLEM is HERE. Control Never gets here.
long currenttime = new Date().getTime() - finishDate;
System.out.println("time taken: "+currenttime);
mergeFiles();
}
}
}
This is my worker class
public class Worker implements Runnable {
private boolean stop = false;
private MessageReciever messageReciever;
private Queue jmsReplyQueue;
private int processId;
private int count = 0;
private String message;
private File outputFile;
private FileWriter outputFileWriter;
public Worker(int processId, MessageReciever messageReciever,
Queue jmsReplyQueue) {
this.processId = processId;
this.messageReciever = messageReciever;
this.jmsReplyQueue = jmsReplyQueue;
}
public void run() {
openOutputFile();
listenMessages();
}
private void listenMessages() {
while (!stop) {
String message = messageReciever.receiveMessage(null,this.jmsReplyQueue);
count++;
String s = "message: " + message + " Recieved by: "
+ processId + " Total recieved: " + count;
System.out.println(s);
writeOutputFile(s);
if (StringUtils.isNotEmpty(message) && message.equals("STOP")) {
stop = true;
}
}
}
private void openOutputFile() {
try {
outputFile = new File("C:/mahi/Test", "file." + processId);
outputFileWriter = new FileWriter(outputFile);
} catch (IOException e) {
System.out.println("Exception while opening file");
stop = true;
}
}
private void writeOutputFile(String message) {
try {
outputFileWriter.write(message);
outputFileWriter.flush();
} catch (IOException e) {
System.out.println("Exception while writing to file");
stop = true;
}
}
}
How will I know when the ThreadPool has finished processing so I can do my other clean up work?
Thanks
If you Worker class implements Callable instead of Runnable, then you'd be able to see when your threads complete by using a Future object to see if the Thread has returned some result (e.g. boolean which would tell you whether it has finished execution or not).
Take a look in section "8. Futures and Callables" # website below, it has exactly what you need imo:
http://www.vogella.com/articles/JavaConcurrency/article.html
Edit: So after all of the Futures indicate that their respective Callable's execution is complete, its safe to assume your executor has finished execution and can be shutdown/terminated manually.
Something like this:
exec.shutdown();
// waiting for executors to finish their jobs
while (!exec.awaitTermination(50, TimeUnit.MILLISECONDS));
// perform clean up work
You can use a thread for monitoring ThreadPoolExecutor like that
import java.util.concurrent.ThreadPoolExecutor;
public class MyMonitorThread implements Runnable {
private ThreadPoolExecutor executor;
private int seconds;
private boolean run=true;
public MyMonitorThread(ThreadPoolExecutor executor, int delay)
{
this.executor = executor;
this.seconds=delay;
}
public void shutdown(){
this.run=false;
}
#Override
public void run()
{
while(run){
System.out.println(
String.format("[monitor] [%d/%d] Active: %d, Completed: %d, Task: %d, isShutdown: %s, isTerminated: %s",
this.executor.getPoolSize(),
this.executor.getCorePoolSize(),
this.executor.getActiveCount(),
this.executor.getCompletedTaskCount(),
this.executor.getTaskCount(),
this.executor.isShutdown(),
this.executor.isTerminated()));
try {
Thread.sleep(seconds*1000);
} catch (InterruptedException e) {
e.printStackTrace();
}
}
}
}
And add
MyMonitorThread monitor = new MyMonitorThread(executorPool, 3);
Thread monitorThread = new Thread(monitor);
monitorThread.start();
to your class where ThreadPoolExecutor is located.
It will show your threadpoolexecutors states in every 3 seconds.

Categories