Is there a better way to generate 5 million csv files quickly - java

I would like to create 5 million csv files, I have waiting for almost 3 hours, but the program is still running. Can somebody give me some advice, how to speed up the file generation.
After these 5 million files generation complete, I have to upload them to s3 bucket.
It would be better if someone know how to generate these files through AWS, thus, we can move files to s3 bucket directly and ignore network speed issue.(Just start to learning AWS, there are lots of knowledge need to know)
The following is my code.
public class ParallelCsvGenerate implements Runnable {
private static AtomicLong baseID = new AtomicLong(8160123456L);
private static ThreadLocalRandom random = ThreadLocalRandom.current();
private static ThreadLocalRandom random2 = ThreadLocalRandom.current();
private static String filePath = "C:\\5millionfiles\\";
private static List<String> headList = null;
private static String csvHeader = null;
public ParallelCsvGenerate() {
headList = generateHeadList();
csvHeader = String.join(",", headList);
}
#Override
public void run() {
for(int i = 0; i < 1000000; i++) {
generateCSV();
}s
}
private void generateCSV() {
StringBuilder builder = new StringBuilder();
builder.append(csvHeader).append(System.lineSeparator());
for (int i = 0; i < headList.size(); i++) {
if(i < headList.size() - 1) {
builder.append(i % 2 == 0 ? generateRandomInteger() : generateRandomStr()).append(",");
} else {
builder.append(i % 2 == 0 ? generateRandomInteger() : generateRandomStr());
}
}
String fileName = String.valueOf(baseID.addAndGet(1));
File csvFile = new File(filePath + fileName + ".csv");
FileWriter fileWriter = null;
try {
fileWriter = new FileWriter(csvFile);
fileWriter.write(builder.toString());
fileWriter.flush();
} catch (Exception e) {
System.err.println(e);
} finally {
try {
if(fileWriter != null) {
fileWriter.close();
}
} catch (IOException e) {
e.printStackTrace();
}
}
}
private static List<String> generateHeadList() {
List<String> headList = new ArrayList<>(20);
String baseFiledName = "Field";
for(int i = 1; i <=20; i++) {
headList.add(baseFiledName + i);
}
return headList;
}
/**
* generate a number in range of 0-50000
* #return
*/
private Integer generateRandomInteger() {
return random.nextInt(0,50000);
}
/**
* generate a string length is 5 - 8
* #return
*/
private String generateRandomStr() {
int strLength = random2.nextInt(5, 8);
String str="abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ";
int length = str.length();
StringBuilder builder = new StringBuilder();
for (int i = 0; i < strLength; i++) {
builder.append(str.charAt(random.nextInt(length)));
}
return builder.toString();
}
Main
ParallelCsvGenerate generate = new ParallelCsvGenerate();
Thread a = new Thread(generate, "A");
Thread b = new Thread(generate, "B");
Thread c = new Thread(generate, "C");
Thread d = new Thread(generate, "D");
Thread e = new Thread(generate, "E");
a.run();
b.run();
c.run();
d.run();
e.run();
Thanks for your guys advice, just refactor the code, and generate 3.8million files using 2.8h, which is much better.
Refactor code:
public class ParallelCsvGenerate implements Callable<Integer> {
private static String filePath = "C:\\5millionfiles\\";
private static String[] header = new String[]{
"FIELD1","FIELD2","FIELD3","FIELD4","FIELD5",
"FIELD6","FIELD7","FIELD8","FIELD9","FIELD10",
"FIELD11","FIELD12","FIELD13","FIELD14","FIELD15",
"FIELD16","FIELD17","FIELD18","FIELD19","FIELD20",
};
private String fileName;
public ParallelCsvGenerate(String fileName) {
this.fileName = fileName;
}
#Override
public Integer call() throws Exception {
try {
generateCSV();
} catch (IOException e) {
e.printStackTrace();
}
return 0;
}
private void generateCSV() throws IOException {
CSVWriter writer = new CSVWriter(new FileWriter(filePath + fileName + ".csv"), CSVWriter.DEFAULT_SEPARATOR, CSVWriter.NO_QUOTE_CHARACTER);
String[] content = new String[]{
RandomGenerator.generateRandomInteger(),
RandomGenerator.generateRandomStr(),
RandomGenerator.generateRandomInteger(),
RandomGenerator.generateRandomStr(),
RandomGenerator.generateRandomInteger(),
RandomGenerator.generateRandomStr(),
RandomGenerator.generateRandomInteger(),
RandomGenerator.generateRandomStr(),
RandomGenerator.generateRandomInteger(),
RandomGenerator.generateRandomStr(),
RandomGenerator.generateRandomInteger(),
RandomGenerator.generateRandomStr(),
RandomGenerator.generateRandomInteger(),
RandomGenerator.generateRandomStr(),
RandomGenerator.generateRandomInteger(),
RandomGenerator.generateRandomStr(),
RandomGenerator.generateRandomInteger(),
RandomGenerator.generateRandomStr(),
RandomGenerator.generateRandomInteger(),
RandomGenerator.generateRandomStr()
};
writer.writeNext(header);
writer.writeNext(content);
writer.close();
}
}
Main
public static void main(String[] args) {
System.out.println("Start generate");
long start = System.currentTimeMillis();
ThreadPoolExecutor threadPoolExecutor = new ThreadPoolExecutor(8, 8,
0L, TimeUnit.MILLISECONDS,
new LinkedBlockingQueue<Runnable>());
List<ParallelCsvGenerate> taskList = new ArrayList<>(3800000);
for(int i = 0; i < 3800000; i++) {
taskList.add(new ParallelCsvGenerate(i+""));
}
try {
List<Future<Integer>> futures = threadPoolExecutor.invokeAll(taskList);
} catch (InterruptedException e) {
e.printStackTrace();
}
System.out.println("Success");
long end = System.currentTimeMillis();
System.out.println("Using time: " + (end-start));
}

You could write directly into the file (without allocating the whole file in one StringBuilder). (I think this is the biggest time+memory bottleneck here: builder.toString())
You could generate each file in parallel.
(little tweaks:) Omit the if's inside loop.
if(i < headList.size() - 1) is not needed, when you do a more clever loop + 1 extra iteration.
The i % 2 == 0 can be eliminated by a better iteration (i+=2) ..and more labor inside the loop (i -> int, i + 1 -> string)
If applicable prefer append(char) to append(String). (Better append(',') than append(",")!)
...

You can use Fork/Join framework (java 7 and above) to make your process in parallel and use multi core of your Cpu.
I'll take an example for you.
import java.util.concurrent.ForkJoinPool;
import java.util.concurrent.ForkJoinTask;
import java.util.concurrent.RecursiveTask;
import java.util.stream.LongStream;
public class ForkJoinAdd extends RecursiveTask<Long> {
private final long[] numbers;
private final int start;
private final int end;
public static final long threshold = 10_000;
public ForkJoinAdd(long[] numbers) {
this(numbers, 0, numbers.length);
}
private ForkJoinAdd(long[] numbers, int start, int end) {
this.numbers = numbers;
this.start = start;
this.end = end;
}
#Override
protected Long compute() {
int length = end - start;
if (length <= threshold) {
return add();
}
ForkJoinAdd firstTask = new ForkJoinAdd(numbers, start, start + length / 2);
firstTask.fork(); //start asynchronously
ForkJoinAdd secondTask = new ForkJoinAdd(numbers, start + length / 2, end);
Long secondTaskResult = secondTask.compute();
Long firstTaskResult = firstTask.join();
return firstTaskResult + secondTaskResult;
}
private long add() {
long result = 0;
for (int i = start; i < end; i++) {
result += numbers[i];
}
return result;
}
public static long startForkJoinSum(long n) {
long[] numbers = LongStream.rangeClosed(1, n).toArray();
ForkJoinTask<Long> task = new ForkJoinAdd(numbers);
return new ForkJoinPool().invoke(task);
}
}
use this example
And if you want to read more about it, Guide to the Fork/Join Framework in Java | Baeldung
and Fork/Join (The Java™ Tutorials
can help you to better understand and better design your app.
be lucky.

Remove the for(int i = 0; i < 1000000; i++) loop from run method (leave a single generateCSV() call.
Create 5 million ParallelCsvGenerate objects.
Submit them to a ThreadPoolExecutor
Converted main:
public static void main(String[] args) {
ThreadPoolExecutor ex = (ThreadPoolExecutor) Executors.newFixedThreadPool(8);
for(int i = 0; i < 5000000; i++) {
ParallelCsvGenerate generate = new ParallelCsvGenerate();
ex.submit(generate);
}
ex.shutdown();
}
It takes roughly 5 minutes to complete on my laptop (4 physical cores with hyperthreading, SSD drive).
EDIT:
I've replaced FileWriter with AsynchronousFileChannel using the following code:
Path file = Paths.get(filePath + fileName + ".csv");
try(AsynchronousFileChannel asyncFile = AsynchronousFileChannel.open(file,
StandardOpenOption.WRITE,
StandardOpenOption.CREATE)) {
asyncFile.write(ByteBuffer.wrap(builder.toString().getBytes()), 0);
} catch (IOException e) {
e.printStackTrace();
}
to achieve 30% speedup.
I believe that the main bottleneck is the hard drive and filesystem itself. Not much more can be achieved here.

Related

How to speed up code with Multithreading?

I have created a password cracker in Java that cracks passwords from a text file list. It then generates a dictionary that contains the following pairs: the word hashed and the original word. I am looking for a way to speed up the program as having it read all of the words from the file and then use multithreading to generate the hashes. How can I break up the list of words so that it is in four separate partitions that I can then have multiple threads operate on in the createDictionary method? Here is what I have so far:
public class Main {
private static final String FNAME = "words.txt";
private final static String PASSWDFNAME = "passwd.txt";
private static Map<String, String> dictionary = new HashMap<>();
public static void main(String[] args) {
// Create dictionary of plain / hashed passwords from list of words
System.out.println("Generating dictionary ...");
long start = System.currentTimeMillis();
createDictionary(FNAME);
System.out.println("Generated " + dictionary.size() + " hashed passwords in dictionary");
long stop = System.currentTimeMillis();
System.out.println("Elapsed time: " + (stop - start) + " milliseconds");
// Read password file, hash plaintext passwords and lookup in dictionary
System.out.println("\nCracking password file ...");
start = System.currentTimeMillis();
crackPasswords(PASSWDFNAME);
stop = System.currentTimeMillis();
System.out.println("Elapsed time: " + (stop - start) + " milliseconds");
}
private static void createDictionary(String fname) {
// Read in list of words
List<String> words = new ArrayList<>();
try (Scanner input = new Scanner(new File(fname));) {
while (input.hasNext()) {
String s = input.nextLine();
if (s != null && s.length() >= 4) {
words.add(s);
}
}
} catch (FileNotFoundException e) {
System.out.println("File " + FNAME + " not found");
e.printStackTrace();
System.exit(-1);
}
// Generate dictionary from word list
for (String word : words) {
generateHashes(word);
}
}
private static void crackPasswords(String fname) {
File pfile = new File(fname);
try (Scanner input = new Scanner(pfile);) {
while (input.hasNext()) {
String s = input.nextLine();
String[] t = s.split(",");
String userid = t[0];
String hashedPassword = t[1];
String password = dictionary.get(hashedPassword);
if (password != null) {
System.out.println("CRACKED - user: "+userid+" has password: "+password);
}
}
} catch (FileNotFoundException ex) {
System.out.println(ex.getMessage());
ex.printStackTrace();
System.exit(-1);
}
}
private static void generateHashes(String word) {
// Convert word to lower case, generate hash, store dictionary entry
String s = word.toLowerCase();
String hashedStr = HashUtils.hashPassword(s);
dictionary.put(hashedStr, s);
// Capitalize word, generate hash, store dictionary entry
s = s.substring(0, 1).toUpperCase() + s.substring(1);
hashedStr = HashUtils.hashPassword(s);
dictionary.put(hashedStr, s);
}
}
It's very simple, check this out:
public static void main(String[] args) {
List<String> words = new ArrayList<>();
List<Thread> threads = new ArrayList<>();
int numThreads = 4;
int threadsSlice = words.size() / numThreads;
for(int i = 0; i < numThreads; i++) {
Thread t = new Thread(new WorkerThread(i * threadsSlice, (i + 1) * threadsSlice, words));
t.start();
threads.add(t);
}
threads.forEach(t -> {
try {
t.join();
} catch (InterruptedException e) {
e.printStackTrace();
}
});
}
static class WorkerThread implements Runnable {
private final int left;
private final int right;
private final List<String> words;
public WorkerThread(int left, int right, List<String> words) {
this.left = left;
this.right = right;
this.words = words;
}
#Override
public void run() {
for (int i = left; i < right; i++) {
generateHashes(words.get(i));
}
}
}
This code is creating 4 threads, each one scanning one partition of your list, and applying the generateHashes method on all the words in the partition.
You can put the words in the heap memory to avoid passing it to each thread via constructor param.
Also make sure to use a ConcurrentMap for your dictionary in the generateHashes method

How can I improve the performance of execution time? And Is their any better way to read this file?

I am trying to split a text file with multiple threads. The file is of 1 GB. I am reading the file by char. The Execution time is 24 min 54 seconds. Instead of reading a file by char is their any better way where I can reduce the execution time.
I'm having a hard time figuring out an approach that will reduce the execution time. Please do suggest me also, if there is any other better way to split file with multiple threads. I am very new to java.
Any help will be appreciated. :)
public static void main(String[] args) throws Exception {
RandomAccessFile raf = new RandomAccessFile("D:\\sample\\file.txt", "r");
long numSplits = 10;
long sourceSize = raf.length();
System.out.println("file length:" + sourceSize);
long bytesPerSplit = sourceSize / numSplits;
long remainingBytes = sourceSize % numSplits;
int maxReadBufferSize = 9 * 1024;
List<String> filePositionList = new ArrayList<String>();
long startPosition = 0;
long endPosition = bytesPerSplit;
for (int i = 0; i < numSplits; i++) {
raf.seek(endPosition);
String strData = raf.readLine();
if (strData != null) {
endPosition = endPosition + strData.length();
}
String str = startPosition + "|" + endPosition;
if (sourceSize > endPosition) {
startPosition = endPosition;
endPosition = startPosition + bytesPerSplit;
} else {
break;
}
filePositionList.add(str);
}
for (int i = 0; i < filePositionList.size(); i++) {
String str = filePositionList.get(i);
String[] strArr = str.split("\\|");
String strStartPosition = strArr[0];
String strEndPosition = strArr[1];
long startPositionFile = Long.parseLong(strStartPosition);
long endPositionFile = Long.parseLong(strEndPosition);
MultithreadedSplit objMultithreadedSplit = new MultithreadedSplit(startPositionFile, endPositionFile);
objMultithreadedSplit.start();
}
long endTime = System.currentTimeMillis();
System.out.println("It took " + (endTime - startTime) + " milliseconds");
}
}
public class MultithreadedSplit extends Thread {
public static String filePath = "D:\\tenlakh\\file.txt";
private int localCounter = 0;
private long start;
private long end;
public static String outPath;
List<String> result = new ArrayList<String>();
public MultithreadedSplit(long startPos, long endPos) {
start = startPos;
end = endPos;
}
#Override
public void run() {
try {
String threadName = Thread.currentThread().getName();
long currentTime = System.currentTimeMillis();
RandomAccessFile file = new RandomAccessFile("D:\\sample\\file.txt", "r");
String outFile = "out_" + threadName + ".txt";
System.out.println("Thread Reading started for start:" + start + ";End:" + end+";threadname:"+threadName);
FileOutputStream out2 = new FileOutputStream("D:\\sample\\" + outFile);
file.seek(start);
int nRecordCount = 0;
char c = (char) file.read();
StringBuilder objBuilder = new StringBuilder();
int nCounter = 1;
while (c != -1) {
objBuilder.append(c);
// System.out.println("char-->" + c);
if (c == '\n') {
nRecordCount++;
out2.write(objBuilder.toString().getBytes());
objBuilder.delete(0, objBuilder.length());
//System.out.println("--->" + nRecordCount);
// break;
}
c = (char) file.read();
nCounter++;
if (nCounter > end) {
break;
}
}
} catch (Exception ex) {
ex.printStackTrace();
}
}
}
The fastest way would be to map the file into memory segment by segment (mapping a large file as a whole may cause undesired side effects). It will skip few relatively expensive copy operations. The operating system will load file into RAM and JRE will expose it to your application as a view into an off-heap memory area in a form of a ByteBuffer. It would usually allow you to squeze last 2x/3x of the performance.
Memory-mapped way requires quite a bit of helper code (see the fragment in the bottom), it's not always the best tactical way. Instead, if your input is line-based and you just need reasonable performance (what you have now is probably not) then just do something like:
import java.nio.Files;
import java.nio.Paths;
...
File.lines(Paths.get("/path/to/the/file"), StandardCharsets.ISO_8859_1)
// .parallel() // parallel processing is still possible
.forEach(line -> { /* your code goes here */ });
For the contrast, a working example of the code for working with the file via memory mapping would look something like below. In case of fixed-size records (when segments can be selected precisely to match record boundaries) subsequent segments can be processed in parallel.
static ByteBuffer mapFileSegment(FileChannel fileChannel, long fileSize, long regionOffset, long segmentSize) throws IOException {
long regionSize = min(segmentSize, fileSize - regionOffset);
// small last region prevention
final long remainingSize = fileSize - (regionOffset + regionSize);
if (remainingSize < segmentSize / 2) {
regionSize += remainingSize;
}
return fileChannel.map(FileChannel.MapMode.READ_ONLY, regionOffset, regionSize);
}
...
final ToIntFunction<ByteBuffer> consumer = ...
try (FileChannel fileChannel = FileChannel.open(Paths.get("/path/to/file", StandardOpenOption.READ)) {
final long fileSize = fileChannel.size();
long regionOffset = 0;
while (regionOffset < fileSize) {
final ByteBuffer regionBuffer = mapFileSegment(fileChannel, fileSize, regionOffset, segmentSize);
while (regionBuffer.hasRemaining()) {
final int usedBytes = consumer.applyAsInt(regionBuffer);
if (usedBytes == 0)
break;
}
regionOffset += regionBuffer.position();
}
} catch (IOException ex) {
throw new UncheckedIOException(ex);
}

Not able to get lock on synchronized method when accessed using more than one thread

I have below code where I am testing thread synchronization for elastic search, but somehow I am not able to success in it, can any one let me know where I am going wrong?
If I enable Thread sleep inside 'startThreadProcess' method then everything works fine because it sleeps for certain amount of time period. Which I don't want to, I want to get proper lock for thread without using thread sleep.
but what is happening in above code I have used Executor for pooling threads. There I am running for loop for count 4 so as to initiate 4 threads from pool. so when my thread submitted using executor submit which calls synchronized method and inside that synchronized method I am giving call to other synchronized method from where I am getting total number of count in particular node and proceeding ahead with that count by incrementing to insert new document where my first thread is yet not completed 2nd thread enters and try to get total number of count from the method which is being called from synchronized method so there I am getting wrong count for thread 2 as my first thread will insert 10000 json documents inside node so I am expecting Thread 2 should get count 10000 and then it should process for insert but here my Thread 2 enters in between and get count any random number and start inserting by incrementing from that number which is not expected scenario
package com.acn.adt.main;
public class ESTest {
private static final String dataPath = "C:\\Elastic Search\\Data.json";
static ESTest esTest = new ESTest();
private static TransportClient client = null;
private Properties elasticPro = null;
private InputStream input = null;
ElasticSearchCrud esCRUD = null;
private final Object lock = new Object();
public static void main(String[] args) {
String strArray[] = new String[] {"1"};
esTest.startProcess(strArray);
}
public void startProcess(String strArray[]) {
try {
input = new FileInputStream(ElasticSearchConstants.ELASTIC_PROPERTIES);
elasticPro = new Properties();
//elasticPro.load(ElasticSearchClient.class.getResourceAsStream(ElasticSearchConstants.ELASTIC_PROPERTIES));
elasticPro.load(input);
System.out.println(elasticPro.getProperty("homeDir"));
long startTime = System.currentTimeMillis();
Settings setting = Settings.builder()
//.put("client.transport.ping_timeout", "100s")
.put("cluster.name", elasticPro.getProperty("cluster"))
//.put("node.name", elasticPro.getProperty("node"))
//.put("client.transport.sniff", Boolean.valueOf(elasticPro.getProperty("transport.sniff")))
.put("client.transport.sniff", false)
.put("cluster.routing.allocation.enable", "all")
.put("cluster.routing.allocation.allow_rebalance", "always")
//.put("client.transport.ignore_cluster_name", true)
.build();
client = new PreBuiltTransportClient(setting)
.addTransportAddress(new TransportAddress(InetAddress.getByName("localhost"),
Integer.valueOf("9300")));
long endTime = System.currentTimeMillis();
System.out.println("Time taken for connecting " + TimeUnit.MILLISECONDS.toSeconds((endTime - startTime)));
ExecutorService executorService = Executors.newFixedThreadPool(10);
for(int i = 1; i <=4; i++) {
if(i==1) {
strArray = new String [] {"1"};
}else if(i == 2) {
strArray = new String [] {"1"};
}else if(i == 3) {
strArray = new String [] {"1"};
}else if(i == 4) {
strArray = new String [] {"1"};
}
executorService.execute(new ESThread(esTest,strArray,i));
}
}catch(Exception e) {
}
}
public class ESThread implements Runnable {
private final Object lock = new Object();
ESTest esTester = null;
String strArr [] = null;
int i =0;
public ESThread(ESTest esTester,String[] strArr,int i) {
this.esTester = esTester;
this.strArr = strArr;
this.i = i;
}
#Override
public void run() {
System.out.println("Name of Current thread is Thread_"+i);
synchronized(lock) {
esTester.startCRUDProcess(strArr);
}
System.out.println("Thread_"+i+" done.");
}
}
public void startCRUDProcess(String [] strArr) {
SearchAPI esSearch = new SearchAPIImpl();
boolean caseFlg = false;
String _indexName = "gcindex";
String _indexType = "gctype";
String _ids = "501,602,702###1,10000,10001";
String _id = "10000";
String[] _strIds = new String[] {"10000","9999"};
System.out.println("Insert Multiple Process is started...");
System.out.println("--------------------------------------");
try {
caseFlg = insertMultipleDocument(dataPath,client,_indexName,_indexType);
} catch (IOException | ParseException e) {
e.printStackTrace();
caseFlg = false;
}
}
public synchronized boolean insertMultipleDocument(String dataPath,TransportClient client,String _indexName,String _indexType) throws FileNotFoundException, ParseException {
try {
JSONParser parser = new JSONParser();
// we know we get an array from the example data
JSONArray jsonArray = (JSONArray) parser.parse( new FileReader( dataPath ) );
BulkRequestBuilder bulkDocument = client.prepareBulk();
#SuppressWarnings("unchecked")
Iterator<JSONObject> it = jsonArray.iterator();
int i = 0;
i = _getTotalHits(client,_indexName,_indexType);
System.out.println("Total number of hits inside index = "+_indexName+" of type = "+_indexType+" are : "+i);
System.out.println("-------------------------------------------------------------------------------------");
while( it.hasNext() ) {
i++;
JSONObject json = it.next();
System.out.println("Insert document for "+i+": " + json.toJSONString() );
// either use client#prepare, or use Requests# to directly build index/delete requests
bulkDocument.add(client.prepareIndex(_indexName, _indexType, i+"")
.setSource(json.toJSONString(), XContentType.JSON )
);
}
BulkResponse bulkResponse = bulkDocument.get();
if (bulkResponse.hasFailures()) {
System.out.println("process failures by iterating through each bulk response item : "+bulkResponse.buildFailureMessage());
return false;
} else {
System.out.println("All Documents inserted successfully...");
/*if(bulkResponse.getItems()!=null) {
for(BulkItemResponse response:bulkResponse.getItems()) {
System.out.println(response.toString());
System.out.println(response.getResponse());
}
}*/
return true;
}
} catch (IOException ex) {
System.out.println("Exception occurred while get Multiple Document : " + ex/*, ex*/);
return false;
}
}
public synchronized int _getTotalHits(TransportClient client,String _indexName,String _indexType){
SearchHits hits = null;
int recCount = 0;
long totalCount = 0;
try {
SearchResponse seacrhResponse = client.prepareSearch(_indexName)
.setTypes(_indexType)
.setSearchType(SearchType.QUERY_THEN_FETCH)
.get();
if (seacrhResponse != null) {
hits = seacrhResponse.getHits();
totalCount = hits.getTotalHits();
System.out.println("count = "+totalCount);
}
recCount = Integer.parseInt(totalCount+"");
}catch(Exception ex) {
System.out.println("Exception occurred while search Index : " + ex/*, ex*/);
}
return recCount;
}
}

fails every time i try to run. issue with text file creation?

need help with writing to and receiving from the text files
it seems to go almost all the way but then it says that no file exists, at that point it should create one and then start writing to it. it says that it failed to find one and then it just ends itself. I don't know why
package sorting;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.Random;
public class Sorting {
private static int[] oneToFiftyThou = new int[50000];
private static int[] fiftyThouToOne = new int[50000];
private static int[] randomFiftyThou = new int[50000];
public static void main(String[] args) {
if(args.length>0) {
if(args[0].equalsIgnoreCase("init")) {
// initialize the 3 files
// 1-50000 file1
// 50000-1 file2
// random 50000 file3
initializeFiles();
writeFiles();
}
} else {
readFilestoArray();
System.out.println(""+oneToFiftyThou[0] + " - " +
oneToFiftyThou[oneToFiftyThou.length-1]);
System.out.println(""+fiftyThouToOne[0] + " - " +
fiftyThouToOne[fiftyThouToOne.length-1]);
System.out.println(""+randomFiftyThou[0] + " - " +
randomFiftyThou[randomFiftyThou.length-1]);
intInsertionSort(oneToFiftyThou);
intInsertionSort(fiftyThouToOne);
intInsertionSort(randomFiftyThou);
}
}
private static void initializeFiles() {
//Array one
for(int i=1; i<oneToFiftyThou.length+1; i++) {
oneToFiftyThou[i-1] = i;
}
//Array two
for(int i=50000; i>0; i--) {
fiftyThouToOne[fiftyThouToOne.length-(i)] = i;
}
//Array Three Random. Copy Array one into a new Array and shuffle.
System.arraycopy(oneToFiftyThou, 0, randomFiftyThou, 0,
randomFiftyThou.length);
Random random = new Random();
for(int i=randomFiftyThou.length-1; i>0; i--) {
int index = random.nextInt(i+1);
//Swap the values
int value = randomFiftyThou[index];
randomFiftyThou[index] = randomFiftyThou[i];
randomFiftyThou[i] = value;
}
}
public static void writeFiles() {
ArrayList<int[]> arrayList = new ArrayList<int[]>();
arrayList.add(oneToFiftyThou);
arrayList.add(fiftyThouToOne);
arrayList.add(randomFiftyThou);
int fileIter = 1;
for(Iterator<int[]> iter = arrayList.iterator();
iter.hasNext(); ) {
int[] array = iter.next();
try {
File file = new File("file"+fileIter+".txt");
//check for file, create it if it doesn't exist
if(!file.exists()) {
file.createNewFile();
}
FileWriter fileWriter = new FileWriter(file);
BufferedWriter bufferWriter = new BufferedWriter
(fileWriter);
for(int i = 0; i<array.length; i++) {
bufferWriter.write(""+array[i]);
if(i!=array.length-1) {
bufferWriter.newLine();
}
}
bufferWriter.close();
fileIter++;
}catch(IOException ioe) {
ioe.printStackTrace();
System.exit(-1);
}
}
}
public static void readFilestoArray() {
ArrayList<int[]> arrayList = new ArrayList<int[]>();
arrayList.add(oneToFiftyThou);
arrayList.add(fiftyThouToOne);
arrayList.add(randomFiftyThou);
int fileIter = 1;
for(Iterator<int[]> iter = arrayList.iterator();
iter.hasNext(); ) {
int[] array = iter.next();
try {
File file = new File("file"+fileIter+".txt");
//check for file, exit with error if file doesn't exist
if(!file.exists()) {
System.out.println("file doesn't exist "
+ file.getName());
System.exit(-1);
}
FileReader fileReader = new FileReader(file);
BufferedReader bufferReader = new BufferedReader
(fileReader);
for(int i = 0; i<array.length; i++) {
array[i] = Integer.parseInt
(bufferReader.readLine());
}
bufferReader.close();
fileIter++;
}catch(IOException ioe) {
ioe.printStackTrace();
System.exit(-1);
}
}
}
private static void intInsertionSort(int[] intArray) {
int comparisonCount = 0;
long startTime = System.currentTimeMillis();
for(int i=1; i<intArray.length;i++) {
int tempValue = intArray[i];
int j = 0;
for(j=i-1; j>=0 && tempValue<intArray[j];j--){
comparisonCount++;
intArray[j+1] = intArray[j];
}
intArray[j+1] = tempValue;
}
long endTime=System.currentTimeMillis();
System.out.println("Comparison Count = " + comparisonCount
+ " running time (in millis) = " +
(endTime-startTime) );
}
}
Well, works for me. Execute it in console like that:
java Sorting init
Then execute it another time:
java Sorting
Works perfectly. If you are in Eclipse go to run configuration > arguments and put init there.
Point is in your main method you are checking if someone invoked the program with init parameter, if yes then you create those files and write to them, if not - you are reading from them. You are probably invoking without init and the files are not there yet, that's why it doesn't work.

Multithreading in java having array of threads [duplicate]

This question already has answers here:
How to use an ExecutorCompletionService
(2 answers)
Closed 7 years ago.
public static void getTestData() {
try {
filename = "InventoryData_" + form_id;
PrintWriter writer = new PrintWriter("/Users/pnroy/Documents/" +filename + ".txt");
pids = new ArrayList<ProductId>();
GetData productList = new GetData();
System.out.println("Getting productId");
pids = productList.GetProductIds(form_id);
int perThreadSize = pids.size() / numberOfCrawlers;
ArrayList<ArrayList<ProductId>> perThreadData = new
ArrayList<ArrayList<ProductId>>(numberOfCrawlers);
for (int i = 1; i <= numberOfCrawlers; i++) {
perThreadData.add(new ArrayList<ProductId>(perThreadSize));
for (int j = 0; j < perThreadSize; j++) {
ProductId ids = new ProductId();
ids.setEbProductID((pids.get(((i - 1) * perThreadSize + j))).getEbProductID());
ids.setECProductID((pids.get(((i - 1) * perThreadSize + j))).getECProductID());
perThreadData.get(i - 1).add(ids);
}
}
BlockingQueue<String> q = new LinkedBlockingQueue<String>();
Consumer c1 = new Consumer(q);
Thread[] thread = new Thread[numberOfCrawlers];
for (int k = 0; k <= numberOfCrawlers; k++) {
// System.out.println(k);
GetCombinedData data = new GetCombinedData();
thread[k] = new Thread(data);
thread[k].setDaemon(true);
data.setVal(perThreadData.get(k), filename, q);
thread[k].start();
// writer.println(data.getResult());
}
new Thread(c1).start();
for (int l = 0; l <= numberOfCrawlers; l++) {
thread[l].join();
}
} catch (Exception e) {
}
}
Here number of crawlers is the number of threads.
The run method of GetCombined class has the following code:
The pids is passed as perThreadData.get(k-1) from the main method
The class CassController queries a API and i get a string result after some processing.
public void run(){
try{
for(int i=0;i<pids.size();i++){
//System.out.println("before cassini");
CassController cass = new CassController();
String result=cass.getPaginationDetails(pids.get(i));
queue.put(result);
// System.out.println(result);
Thread.sleep(1000);
}
writer.close();
}catch(Exception ex){
}
Consumer.java has the following code :
public class Consumer implements Runnable{
private final BlockingQueue queue;
Consumer(BlockingQueue q) { queue = q; }
public void run(){
try {
while (queue.size()>0)
{
consume(queue.take());
}
} catch (InterruptedException ex)
{
}
}
void consume(Object x) {
try{
PrintWriter writer = new PrintWriter(new FileWriter("/Users/pnroy/Documents/Inventory", true));
writer.println(x.toString());
writer.close();
}catch(IOException ex){
}
}
So if i set the number of crawlers to 10 and if there are 500 records each thread will process 50 records.I need to write the results into a file.I am confused how i can achieve this since its a array of thread and each thread is doing a bunch of operations.
I tried using blocking queue but that is printing repetitive results.I am new to multi threading and not sure how can i handle the case.
Can you please suggest.
With the introduction of many useful high-level concurrency classes, it now recommended not to directly use the Thread class anymore. Even the BlockingQueue class is rather low-level.
Instead, you have a nice application for the CompletionService, which builds upon the ExecutorService. The below example shows how to use it.
You want to replace the code in PartialResultTask (that's where the main processing happens) and System.out.println (that's where you probably want to write your result to a file).
public class ParallelProcessing {
public static void main(String[] args) {
ExecutorService executionService = Executors.newFixedThreadPool(10);
CompletionService<String> completionService = new ExecutorCompletionService<>(executionService);
// submit tasks
for (int i = 0; i < 500; i++) {
completionService.submit(new PartialResultTask(i));
}
// collect result
for (int i = 0; i < 500; i++) {
String result = getNextResult(completionService);
if (result != null)
System.out.println(result);
}
executionService.shutdown();
}
private static String getNextResult(CompletionService<String> completionService) {
Future<String> result = null;
while (result == null) {
try {
result = completionService.take();
} catch (InterruptedException e) {
// ignore and retry
}
}
try {
return result.get();
} catch (ExecutionException e) {
e.printStackTrace();
return null;
} catch (InterruptedException e) {
e.printStackTrace();
return null;
}
}
static class PartialResultTask implements Callable<String> {
private int n;
public PartialResultTask(int n) {
this.n = n;
}
#Override
public String call() {
return String.format("Partial result %d", n);
}
}
}

Categories