How to optimize this simulation in Java? - java

I am trying to do a multi threading simulation in Java and I have managed to do it with a queue but the execution time is high, any ideas on how I could optimize this? Can using recursion save time?
The input has to be like this:
2 5 It means that there are two threads(workers) for 5 jobs
1 2 3 4 5 This is the jobs that are an integer which means the time cost of processing that job so the output will be this:
0 0 The two threads try to simultaneously take jobs from the list, so thread with index 0 actually
1 0 takes the first job and starts working on it at the moment 0
0 1 After 1 second, thread 0 is done with the first job and takes the third job from the list, and starts processing it immediately at time 1.
1 2 One second later, thread 1 is done with the second job and takes the fourth job from the list, and starts processing it immediately at time 2
0 4 Finally, after 2 more seconds, thread 0 is done with the third job and takes the fifth job from the list, and starts processing it immediately at time 4
This is the code:
import java.io.*;
import java.util.HashMap;
import java.util.HashSet;
import java.util.PriorityQueue;
import java.util.Set;
import java.util.StringTokenizer;
public class JobQueue {
private int numWorkers;
private int[] jobs;
private int[] assignedWorker;
private long[] startTime;
private FastScanner in;
private PrintWriter out;
public static void main(String[] args) throws IOException {
new JobQueue().solve();
}
private void readData() throws IOException {
numWorkers = in.nextInt();
int m = in.nextInt();
jobs = new int[m];
for (int i = 0; i < m; ++i) {
jobs[i] = in.nextInt();
}
}
private void writeResponse() {
for (int i = 0; i < jobs.length; ++i) {
out.println(assignedWorker[i] + " " + startTime[i]);
}
}
private void assignJobs() {
// TODO: replace this code with a faster algorithm.
assignedWorker = new int[jobs.length];
startTime = new long[jobs.length];
PriorityQueue<Integer> nextTimesQueue = new PriorityQueue<Integer>();
HashMap<Integer, Set<Integer>> workersReadyAtTimeT = new HashMap<Integer,Set<Integer>>();
long[] nextFreeTime = new long[numWorkers];
int duration = 0;
int bestWorker = 0;
for (int i = 0; i < jobs.length; i++) {
duration = jobs[i];
if(i<numWorkers) {
bestWorker = i;
nextTimesQueue.add(duration);
addToSet(workersReadyAtTimeT, duration, i,0);
}else {
int currentTime = nextTimesQueue.poll();
Set<Integer> workersReady = workersReadyAtTimeT.get(currentTime);
if (workersReady.size()>1) {
bestWorker = workersReady.iterator().next();
workersReady.remove(bestWorker);
workersReadyAtTimeT.remove(currentTime);
workersReadyAtTimeT.put(currentTime,workersReady);
nextTimesQueue.add(currentTime);
} else {
bestWorker = workersReady.iterator().next();
workersReadyAtTimeT.remove(currentTime);
nextTimesQueue.add(currentTime+duration);
addToSet(workersReadyAtTimeT, duration, bestWorker, currentTime);
}
}
assignedWorker[i] = bestWorker;
startTime[i] = nextFreeTime[bestWorker];
nextFreeTime[bestWorker] += duration;
}
}
private void addToSet(HashMap<Integer, Set<Integer>> workersReadyAtTimeT, int duration, int worker, int current) {
if(workersReadyAtTimeT.get(current+duration)==null) {
HashSet<Integer> s = new HashSet<Integer>();
s.add(worker);
workersReadyAtTimeT.put(current+duration, s);
}else {
Set<Integer> s = workersReadyAtTimeT.get(current+duration);
s.add(worker);
workersReadyAtTimeT.put(current+duration,s);
}
}
public void solve() throws IOException {
in = new FastScanner();
out = new PrintWriter(new BufferedOutputStream(System.out));
readData();
assignJobs();
writeResponse();
out.close();
}
static class FastScanner {
private BufferedReader reader;
private StringTokenizer tokenizer;
public FastScanner() {
reader = new BufferedReader(new InputStreamReader(System.in));
tokenizer = null;
}
public String next() throws IOException {
while (tokenizer == null || !tokenizer.hasMoreTokens()) {
tokenizer = new StringTokenizer(reader.readLine());
}
return tokenizer.nextToken();
}
public int nextInt() throws IOException {
return Integer.parseInt(next());
}
}
}

It seems to me that your jobsList object is completely redundant, everything it contains is also in the jobs array and when you take the front element you get the item at jobs[i]. To speed up a little you could take the constructors of the ints out of the loop and just assign new numbers to them. Another optimization would be to not search during the first numWorkers jobs because you know you still have idle workers until you have exausted your pool. Once you have found one good worker you dont have to keep looking so you can continue out of your for-loop.
public class JobQueue {
private int numWorkers;
private int[] jobs;
private int[] assignedWorker;
private long[] startTime;
private void readData() throws IOException {
numWorkers = in.nextInt();
int m = in.nextInt();
jobs = new int[m];
for (int i = 0; i < m; ++i) {
jobs[i] = in.nextInt();
}
}
private void assignJobs() {
assignedWorker = new int[jobs.length];
startTime = new long[jobs.length];
long[] nextFreeTime = new long[numWorkers];
int duration = 0;
int bestWorker = 0;
for (int i = 0; i < jobs.length; i++) {
duration = jobs[i];
bestWorker = 0;
if (i< numWorkers){
bestWorker= i;
} else{
for (int j = 0; j < numWorkers; ++j) {
if (nextFreeTime[j] < nextFreeTime[bestWorker])
bestWorker = j;
continue;
}
}
assignedWorker[i] = bestWorker;
startTime[i] = nextFreeTime[bestWorker];
nextFreeTime[bestWorker] += duration;
}
}
However, both your solution and this slightly trimmed down one take 2 milliseconds to run. I also looked at having HashMap to maintain a NextWorker marker but at some point you catch up with it and end up looking everytime for the next one and don't win much.
You could try having an ordered List/Queue, but then you have expensive inserts instead of expensive searches, and you have to kee track of the timeslice. But a version like that could look like this:
private void assignJobs() {
assignedWorker = new int[jobs.length];
startTime = new long[jobs.length];
PriorityQueue<Integer> nextTimesQueue = new PriorityQueue<Integer>();
HashMap<Integer, Set<Integer>> workersReadyAtTimeT = new HashMap<Integer,Set<Integer>>();
long[] nextFreeTime = new long[numWorkers];
int duration = 0;
int bestWorker = 0;
for (int i = 0; i < jobs.length; i++) {
duration = jobs[i];
if(i<numWorkers) {
bestWorker = i;
nextTimesQueue.add(duration);
addToSet(workersReadyAtTimeT, duration, i,0);
}else {
int currentTime = nextTimesQueue.poll();
Set<Integer> workersReady = workersReadyAtTimeT.get(currentTime);
if (workersReady.size()>1) {
bestWorker = workersReady.iterator().next();
workersReady.remove(bestWorker);
workersReadyAtTimeT.remove(currentTime);
workersReadyAtTimeT.put(currentTime,workersReady);
nextTimesQueue.add(currentTime);
} else {
bestWorker = workersReady.iterator().next();
workersReadyAtTimeT.remove(currentTime);
nextTimesQueue.add(currentTime+duration);
addToSet(workersReadyAtTimeT, duration, bestWorker, currentTime);
}
}
assignedWorker[i] = bestWorker;
startTime[i] = nextFreeTime[bestWorker];
nextFreeTime[bestWorker] += duration;
}
}
private void addToSet(HashMap<Integer, Set<Integer>> workersReadyAtTimeT, int duration, int worker, int current) {
if(workersReadyAtTimeT.get(current+duration)==null) {
HashSet<Integer> s = new HashSet<Integer>();
s.add(worker);
workersReadyAtTimeT.put(current+duration, s);
}else {
Set<Integer> s = workersReadyAtTimeT.get(current+duration);
s.add(worker);
workersReadyAtTimeT.put(current+duration,s);
}
}

Related

GC overhead limit exceeded while training OpenNLP's NameFinderME

I want to get probability score for the extracted names using NameFinderME, but using the provided model gives very bad probabilities using the probs function.
For example, "Scott F. Fitzgerald" gets a score around 0.5 (averaging log probabilities, and taking an exponent), while "North Japan" and "Executive Vice President, Corporate Relations and Chief Philanthropy Officer" both get a score higher than 0.9...
I have more than 2 million first names, and another 2 million last names (with their frequency counts) And I want to synthetically create a huge dataset from outer multiplication of the first names X middle names (using the first names pool) X last names.
The problem is, I don't even get to go over all the last names once (even when discarding freq counts and only using each name only once) before I get a GC overhead limit exceeded exception...
I'm implementing a ObjectStream and give it to the train function:
public class OpenNLPNameStream implements ObjectStream<NameSample> {
private List<Map<String, Object>> firstNames = null;
private List<Map<String, Object>> lastNames = null;
private int firstNameIdx = 0;
private int firstNameCountIdx = 0;
private int middleNameIdx = 0;
private int middleNameCountIdx = 0;
private int lastNameIdx = 0;
private int lastNameCountIdx = 0;
private int firstNameMaxCount = 0;
private int middleNameMaxCount = 0;
private int lastNameMaxCount = 0;
private int firstNameKBSize = 0;
private int lastNameKBSize = 0;
Span span[] = new Span[1];
String fullName[] = new String[3];
String partialName[] = new String[2];
private void increaseFirstNameCountIdx()
{
firstNameCountIdx++;
if (firstNameCountIdx == firstNameMaxCount) {
firstNameIdx++;
if (firstNameIdx == firstNameKBSize)
return; //no need to update anything - this is the end of the run...
firstNameMaxCount = getFirstNameMaxCount(firstNameIdx);
firstNameCountIdx = 0;
}
}
private void increaseMiddleNameCountIdx()
{
lastNameCountIdx++;
if (middleNameCountIdx == middleNameMaxCount) {
if (middleNameIdx == firstNameKBSize) {
resetMiddleNameIdx();
increaseFirstNameCountIdx();
} else {
middleNameMaxCount = getMiddleNameMaxCount(middleNameIdx);
middleNameCountIdx = 0;
}
}
}
private void increaseLastNameCountIdx()
{
lastNameCountIdx++;
if (lastNameCountIdx == lastNameMaxCount) {
lastNameIdx++;
if (lastNameIdx == lastNameKBSize) {
resetLastNameIdx();
increaseMiddleNameCountIdx();
}
else {
lastNameMaxCount = getLastNameMaxCount(lastNameIdx);
lastNameCountIdx = 0;
}
}
}
private void resetLastNameIdx()
{
lastNameIdx = 0;
lastNameMaxCount = getLastNameMaxCount(0);
lastNameCountIdx = 0;
}
private void resetMiddleNameIdx()
{
middleNameIdx = 0;
middleNameMaxCount = getMiddleNameMaxCount(0);
middleNameCountIdx = 0;
}
private int getFirstNameMaxCount(int i)
{
return 1; //compromised on using just
//String occurences = (String) firstNames.get(i).get("occurences");
//return Integer.parseInt(occurences);
}
private int getMiddleNameMaxCount(int i)
{
return 3; //compromised on using just
//String occurences = (String) firstNames.get(i).get("occurences");
//return Integer.parseInt(occurences);
}
private int getLastNameMaxCount(int i)
{
return 1;
//String occurences = (String) lastNames.get(i).get("occurences");
//return Integer.parseInt(occurences);
}
#Override
public NameSample read() throws IOException {
if (firstNames == null) {
firstNames = CSVFileTools.readFileFromInputStream("namep_first_name_idf.csv", new ClassPathResource("namep_first_name_idf.csv").getInputStream());
firstNameKBSize = firstNames.size();
firstNameMaxCount = getFirstNameMaxCount(0);
middleNameMaxCount = getFirstNameMaxCount(0);
}
if (lastNames == null) {
lastNames = CSVFileTools.readFileFromInputStream("namep_last_name_idf.csv",new ClassPathResource("namep_last_name_idf.csv").getInputStream());
lastNameKBSize = lastNames.size();
lastNameMaxCount = getLastNameMaxCount(0);
}
increaseLastNameCountIdx();;
if (firstNameIdx == firstNameKBSize)
return null; //we've finished iterating over all permutations!
String [] sentence;
if (firstNameCountIdx < firstNameMaxCount / 3)
{
span[0] = new Span(0,2,"Name");
sentence = partialName;
sentence[0] = (String)firstNames.get(firstNameIdx).get("first_name");
sentence[1] = (String)lastNames.get(lastNameIdx).get("last_name");
}
else
{
span[0] = new Span(0,3,"name");
sentence = fullName;
sentence[0] = (String)firstNames.get(firstNameIdx).get("first_name");
sentence[2] = (String)lastNames.get(lastNameIdx).get("last_name");
if (firstNameCountIdx < 2*firstNameCountIdx/3) {
sentence[1] = (String)firstNames.get(middleNameIdx).get("first_name");
}
else {
sentence[1] = ((String)firstNames.get(middleNameIdx).get("first_name")).substring(0,1) + ".";
}
}
return new NameSample(sentence,span,true);
}
#Override
public void reset() throws IOException, UnsupportedOperationException {
firstNameIdx = 0;
firstNameCountIdx = 0;
middleNameIdx = 0;
middleNameCountIdx = 0;
lastNameIdx = 0;
lastNameCountIdx = 0;
firstNameMaxCount = 0;
middleNameMaxCount = 0;
lastNameMaxCount = 0;
}
#Override
public void close() throws IOException {
reset();
firstNames = null;
lastNames = null;
}
}
And
TokenNameFinderModel model = NameFinderME.train("en","person",new OpenNLPNameStream(),TrainingParameters.defaultParams(),new TokenNameFinderFactory());
model.serialize(new FileOutputStream("trainedNames.bin",false));
I get the following error after a few minutes of running:
java.lang.OutOfMemoryError: GC overhead limit exceeded
at opennlp.tools.util.featuregen.WindowFeatureGenerator.createFeatures(WindowFeatureGenerator.java:112)
at opennlp.tools.util.featuregen.AggregatedFeatureGenerator.createFeatures(AggregatedFeatureGenerator.java:79)
at opennlp.tools.util.featuregen.CachedFeatureGenerator.createFeatures(CachedFeatureGenerator.java:69)
at opennlp.tools.namefind.DefaultNameContextGenerator.getContext(DefaultNameContextGenerator.java:118)
at opennlp.tools.namefind.DefaultNameContextGenerator.getContext(DefaultNameContextGenerator.java:37)
at opennlp.tools.namefind.NameFinderEventStream.generateEvents(NameFinderEventStream.java:113)
at opennlp.tools.namefind.NameFinderEventStream.createEvents(NameFinderEventStream.java:137)
at opennlp.tools.namefind.NameFinderEventStream.createEvents(NameFinderEventStream.java:36)
at opennlp.tools.util.AbstractEventStream.read(AbstractEventStream.java:62)
at opennlp.tools.util.AbstractEventStream.read(AbstractEventStream.java:27)
at opennlp.tools.util.AbstractObjectStream.read(AbstractObjectStream.java:32)
at opennlp.tools.ml.model.HashSumEventStream.read(HashSumEventStream.java:46)
at opennlp.tools.ml.model.HashSumEventStream.read(HashSumEventStream.java:29)
at opennlp.tools.ml.model.TwoPassDataIndexer.computeEventCounts(TwoPassDataIndexer.java:130)
at opennlp.tools.ml.model.TwoPassDataIndexer.<init>(TwoPassDataIndexer.java:83)
at opennlp.tools.ml.AbstractEventTrainer.getDataIndexer(AbstractEventTrainer.java:74)
at opennlp.tools.ml.AbstractEventTrainer.train(AbstractEventTrainer.java:91)
at opennlp.tools.namefind.NameFinderME.train(NameFinderME.java:337)
Edit: After increasing the memory of the JVM to 8GB, I still don't get past the first 2 million last names, but now the Exception is:
java.lang.OutOfMemoryError: Java heap space
at java.util.HashMap.resize(HashMap.java:703)
at java.util.HashMap.putVal(HashMap.java:662)
at java.util.HashMap.put(HashMap.java:611)
at opennlp.tools.ml.model.AbstractDataIndexer.update(AbstractDataIndexer.java:141)
at opennlp.tools.ml.model.TwoPassDataIndexer.computeEventCounts(TwoPassDataIndexer.java:134)
at opennlp.tools.ml.model.TwoPassDataIndexer.<init>(TwoPassDataIndexer.java:83)
at opennlp.tools.ml.AbstractEventTrainer.getDataIndexer(AbstractEventTrainer.java:74)
at opennlp.tools.ml.AbstractEventTrainer.train(AbstractEventTrainer.java:91)
at opennlp.tools.namefind.NameFinderME.train(NameFinderME.java:337)
It seems the problem stems from the fact I'm creating a new NameSample along with new Spans and Strings at every read call... But I can't reuse Spans or NameSamples, since they're immutables.
Should I just write my own language model, is there a better Java library for doing this sort of thing (I'm only interested in getting the probability the extracted text is actually a name) are there parameters I should tweak for the model I'm training?
Any advice would be appreciated.

Java MultiThreading Primes

been working on this program for a while and I think I've made much more progress. My java skills are not very good, but I think I'm close. Everything should compile without issue except for my "public void run" in my worker class. The program prompts the user for how many threads they want and then parses through a text file of random numbers to find all the prime numbers. My issue seems to be in the algorithm for the prime numbers. How do I write the algorithm so it parses the data down and finds the prime numbers?
I have posted the entire program below, but please see the worker class towards the bottom. Any help would be greatly appreciated in solving this issue. Thank you.
import java.io.BufferedReader;
import java.io.FileReader;
import java.io.IOException;
import java.util.Scanner;
public class PrimeNumbers{
public static void main(String[] args) throws IOException {
int[] numbers = new int[100000];
int count;
int index = 0;
String datafile = "dataset529.txt"; //string which contains datafile
String line; //current line of text file
try (BufferedReader br = new BufferedReader(new FileReader(datafile))) { //reads in the datafile
while ((line = br.readLine()) != null) { //reads through each line
numbers[index++] = Integer.parseInt(line); //pulls out the number of each line and puts it in numberset[]
}
}
System.out.println("How many threads would you like to use?");
Scanner scan = new Scanner(System.in);
int z = scan.nextInt();
Thread[] threads = new Thread[z]; //creates threads as per user
worker[] finder = new worker[z]; //assigns finder to each thread created
int range = numbers.length / z; //breaks up each worker into a section depending on thread count.
for (count = 0; count < z; count++) {
int startAt = count * range;
int endAt = startAt + range;
finder[count] = new worker(startAt, endAt, numbers);
}
for (count = 0; count < z; count++) { //moves to next thread
threads[count] = new Thread(finder[count]);
threads[count].start();
}
boolean processing = false;
do {
processing = false;
for (Thread t : threads) {
if (t.isAlive()) {
processing = true;
break;
}
}
} while (processing);
for (worker worker : finder) {
System.out.println("Max of thread is: " + worker.getPrime());
}
}
public static class worker implements Runnable {
private int start;
private int stop;
private int numberset[];
public worker(int start, int stop, int[] numberset) {
this.start = start;
this.stop = stop;
this.numberset = numberset;
}
#Override
public void run() {
for (int x = start; x < stop; x++) {
if (numberset[]%3 && != 1 && != 2 && !=3)
return prime
}
}
public int getPrime() {
return true
}
}
}

Round Robin Scheduler

I need to implement a "round-robin" scheduler with a job class that I cannot modify. Round-robin scheduler should process the job that has been waiting the longest first, then reset timer to zero. If two jobs have same wait time, lower id is processed first. The job class only gives three values (job id, remaining duration, and priority(which is not needed for this). each job has a start time, so only a couple of jobs may be available during first cycle, few more next cycle, etc. Since the "job array" I am calling is different every time I call it, I'm not sure how to store the wait times.
This is the job class:
public class Jobs{
private int[] stas = new int[0];
private int[] durs = new int[0];
private int[] lefs = new int[0];
private int[] pris = new int[0];
private int[] fins = new int[0];
private int clock;
public Jobs()
{
this("joblist.csv");
}
public Jobs(String filename)
{
BufferedReader fp = null;
String line = "";
String[] b = null;
int[] tmp;
try
{
fp = new BufferedReader(new FileReader(filename));
while((line = fp.readLine()) != null)
{
b = line.split(",");
if(b.length == 3)
{
try
{
int sta = Integer.parseInt(b[0]);
//System.out.println("sta: " + b[0]);
int dur = Integer.parseInt(b[1]);
//System.out.println("dur: " + b[1]);
int pri = Integer.parseInt(b[2]);
//System.out.println("pri: " + b[2]);
stas = app(stas, sta);
//System.out.println("stas: " + Arrays.toString(stas));
durs = app(durs, dur);
//System.out.println("durs: " + Arrays.toString(durs));
lefs = app(lefs, dur);
//System.out.println("lefs: " + Arrays.toString(lefs));
pris = app(pris, pri);
//System.out.println("pris: " + Arrays.toString(pris));
fins = app(fins, -1);
//System.out.println("fins: " + Arrays.toString(fins));
}
catch(NumberFormatException e) {}
}
}
fp.close();
}
catch(FileNotFoundException e) { e.printStackTrace(); }
catch(IOException e) { e.printStackTrace(); }
clock = 0;
}
public boolean done()
{
boolean done = true;
for(int i=0; done && i<lefs.length; i++)
if(lefs[i]>0) done=false;
return done;
}
public int getClock() { return clock; }
public int[][] getJobs()
{
int count = 0;
for(int i=0; i<stas.length; i++)
if(stas[i]<=clock && lefs[i]>0)
count++;
int[][] jobs = new int[count][3];
count = 0;
for(int i=0; i<stas.length; i++)
if(stas[i]<=clock && lefs[i]>0)
{
jobs[count] = new int[]{i, lefs[i], pris[i]};
count++;
}
return jobs;
}
public int cycle() { return cycle(-1); }
public int cycle(int j)
{
if(j>=0 && j<lefs.length && clock>=stas[j] && lefs[j]>0)
{
lefs[j]--;
if(lefs[j] == 0) fins[j] = clock+1;
}
clock++;
return clock;
}
private int[] app(int[] a, int b)
{
int[] tmp = new int[a.length+1];
for(int i=0; i<a.length; i++) tmp[i] = a[i];
tmp[a.length] = b;
return tmp;
}
public String report()
{
String r = "JOB,PRIORITY,START,DURATION,FINISH,DELAY,PRI*DELAY\n";
float dn=0;
float pdn=0;
for(int i=0; i<stas.length; i++)
{
if(fins[i]>=0)
{
int delay = ((fins[i]-stas[i])-durs[i]);
r+= ""+i+","+pris[i]+","+stas[i]+","+durs[i]+","+fins[i]+","+delay+","+(pris[i]*delay)+"\n";
dn+= delay;
pdn+= pris[i]*delay;
}
else
{
int delay = ((clock*10-stas[i])-durs[i]);
r+= ""+i+","+pris[i]+","+stas[i]+","+durs[i]+","+fins[i]+","+delay+","+(pris[i]*delay)+"\n";
dn+= delay;
pdn+= pris[i]*delay;
}
}
if(stas.length>0)
{
r+= "Avg,,,,,"+(dn/stas.length)+","+pdn/stas.length+"\n";
}
return r;
}
public String toString()
{
String r = "There are "+stas.length+" jobs:\n";
for(int i=0; i<stas.length; i++)
{
r+= " JOB "+i+": START="+stas[i]+" DURATION="+durs[i]+" DURATION_LEFT="+lefs[i]+" PRIORITY="+pris[i]+"\n";
}
return r;
}
I don't need full code, just an idea of how to store wait times and cycle the correct job.
While a array based solution 'may' work, I would advocate a more object oriented approach. Create 'Job' class with the desire attributes (id, start_time, wait etc). Using the csv file, create Job objects and hold them in a list. Write a comparator to sort this jobs-list (in this case based on job wait/age would be the factor).
The job executor then has to do the following:
while(jobs exist) {
iterate on the list {
if job is executable // start_time > current sys_time
consume cycles/job for executable jobs
mark completed jobs (optional)
}
remove the completed jobs
}
//\ This loop will add +1 to each job
for(int i = 0; i < jobs.length; i++)
{
waitTime[jobs[i][0]] += 1;
}
int longestWait = 0;//\ This holds value for greatest wait time
int nextJob = 0; //\ This holds value for index of job with greatest wait time
//\ this loop will check for the greatest wait time and and set variables accordingly
for(int i = 0; i < waitTime.length; i++)
{
if(waitTime[i] > longestWait)
{
longestWait = waitTime[i];
nextJob = i;
}
}
//\ this cycles the job with the highest wait time
jobsource.cycle(nextJob);
//\ this resets the wait time for processed job
waitTime[nextJob] = 0;

Need Help intrepretting the results of this program

So, I made a small program to test Multithreading in java and compare the time it takes to scale an array using a while loop and then creating multiple threads and running those threads. I'm unsure about then numbers I'm getting when the program finishes, so I was wondering if I made a boneheaded error at some point and messed something up to get very disparate numbers.
Code below:
import java.util.Scanner;
public class arrayScaling {
public static void main(String[] args) throws InterruptedException {
Scanner input = new Scanner(System.in);
System.out.println("Enter the amount of number you want the program to generate:");
int numOfNumbs = input.nextInt();
int [] arrayForNumbers = new int [numOfNumbs];
int [] newArrayForNumbers = new int [numOfNumbs];
for (int i = 0; i < arrayForNumbers.length; i++) {
arrayForNumbers[i] = (int) ((Math.random() * 25) + 1);
}
long startTime = System.nanoTime();
for (int i = 0; i < arrayForNumbers.length; i++) {
newArrayForNumbers[i] = newArrayForNumbers[i] * 3;
}
long endTime = System.nanoTime();
System.out.println();
long totalExecutionTime = endTime-startTime;
System.out.println("Time it takes execute scaling is " +
totalExecutionTime + " nanoseconds");
System.out.println();
int numOfNumLeftOver = numOfNumbs % 5;
int numOfNumDivided = numOfNumbs / 5;
int [] temp = null;
int [] temp2 = null;
int [] temp3 = null;
int [] temp4 = null;
int [] temp5 = null;
MyThread thread1 = new MyThread (numOfNumbs/5);
MyThread thread2 = new MyThread (numOfNumbs/5);
MyThread thread3 = new MyThread (numOfNumbs/5);
MyThread thread4 = new MyThread (numOfNumbs/5);
MyThread thread5;
if (numOfNumLeftOver != 0) {
numOfNumDivided = numOfNumDivided + numOfNumLeftOver;
thread5 = new MyThread (numOfNumDivided);
}
else {
thread5 = new MyThread (numOfNumbs/5);
}
int tempNum = 0;
for ( int i = 0; i < thread1.getArray().length; i ++) {
temp = thread1.getArray();
temp[tempNum] = arrayForNumbers[tempNum];
tempNum++;
}
for ( int i = 0; i < thread2.getArray().length; i ++) {
temp2 = thread2.getArray();
temp2[i] = arrayForNumbers[tempNum];
tempNum++;
}
for ( int i = 0; i < thread3.getArray().length; i ++) {
temp3 = thread3.getArray();
temp3[i] = arrayForNumbers[tempNum];
tempNum++;
}
for ( int i = 0; i < thread4.getArray().length; i ++) {
temp4 = thread4.getArray();
temp4[i] = arrayForNumbers[tempNum];
tempNum++;
}
for ( int i = 0; i < thread5.getArray().length; i ++) {
temp5 = thread5.getArray();
temp5[i] = arrayForNumbers[tempNum];
tempNum++;
}
thread1.setArray(temp);
thread2.setArray(temp2);
thread3.setArray(temp3);
thread4.setArray(temp4);
thread5.setArray(temp5);
long startTime2 = System.nanoTime();
thread1.start();
thread2.start();
thread3.start();
thread4.start();
thread5.start();
thread1.join();
thread2.join();
thread3.join();
thread4.join();
thread5.join();
long endTime2 = System.nanoTime();
long newTotalExecutionTime = endTime2 - startTime2;
System.out.println("Time it takes execute scaling w/ multiple threads is " +
newTotalExecutionTime + " nanoseconds");
if (newTotalExecutionTime < totalExecutionTime) {
System.out.println("Multithreading was more effective");
}
else if (totalExecutionTime < newTotalExecutionTime) {
System.out.println("The original algorithm was more effective");
}
else if (totalExecutionTime == newTotalExecutionTime) {
System.out.println("Both method worked at the same speed");
}
input.close();
}
}
public class MyThread extends Thread {
private int [] array;
private int [] scaleArray;
public MyThread(int size) {
array = new int [size];
scaleArray = new int [size];
}
public int[] getArray() {
return array;
}
public void setArray(int[] array) {
this.array = array;
}
public int[] getScaleArray() {
return scaleArray;
}
public void setScaleArray(int[] scaleArray) {
this.scaleArray = scaleArray;
}
public void run () {
for (int z = 0; z < array.length; z++){
scaleArray[z] = 3 * array[z];
}
}
}
And the output of this program is:
Enter the amount of number you want the program to generate:
16
Time it takes execute scaling is 893 nanoseconds
Time it takes execute scaling w/ multiple threads is 590345 nanoseconds
The original algorithm was more effective
Your results don't surprise me in the slightest. There's a lot of overhead to creating threads, starting them, waiting for them to finish and so on. Don't forget, 590345ns is still less than a millisecond; but most of that is to do with shuffling threads, not with multiplying the numbers.
If you want to see the threaded part of the program outperform the other part, try generating a whole lot more than 16 numbers.

How can I show that in Java Fork/Join framework work-stealing occurs?

I would like to improve my fork/join little example to show that during Java Fork/Join framework execution work stealing occurs.
What changes I need to do to following code? Purpose of example: just do a linear research of a value breaking up work between multiple threads.
package com.stackoverflow.questions;
import java.util.LinkedList;
import java.util.List;
import java.util.Random;
import java.util.concurrent.ForkJoinPool;
import java.util.concurrent.RecursiveTask;
public class CounterFJ<T extends Comparable<T>> extends RecursiveTask<Integer> {
private static final long serialVersionUID = 5075739389907066763L;
private List<T> _list;
private T _test;
private int _lastCount = -1;
private int _start;
private int _end;
private int _divideFactor = 4;
private static final int THRESHOLD = 20;
public CounterFJ(List<T> list, T test, int start, int end, int factor) {
_list = list;
_test = test;
_start = start;
_end = end;
_divideFactor = factor;
}
public CounterFJ(List<T> list, T test, int factor) {
this(list, test, 0, list.size(), factor);
}
#Override
protected Integer compute() {
if (_end - _start < THRESHOLD) {
int count = 0;
for (int i = _start; i < _end; i++) {
if (_list.get(i).compareTo(_test) == 0) {
count++;
}
}
_lastCount = count;
return new Integer(count);
}
LinkedList<CounterFJ<T>> taskList = new LinkedList<>();
int step = (_end - _start) / _divideFactor;
for (int j = 0; j < _divideFactor; j++) {
CounterFJ<T> task = null;
if (j == 0)
task = new CounterFJ<T>(_list, _test, _start, _start + step, _divideFactor);
else if (j == _divideFactor - 1)
task = new CounterFJ<T>(_list, _test, _start + (step * j), _end, _divideFactor);
else
task = new CounterFJ<T>(_list, _test, _start + (step * j), _start + (step * (j + 1)), _divideFactor);
// task.fork();
taskList.add(task);
}
invokeAll(taskList);
_lastCount = 0;
for (CounterFJ<T> task : taskList) {
_lastCount += task.join();
}
return new Integer(_lastCount);
}
public int getResult() {
return _lastCount;
}
public static void main(String[] args) {
LinkedList<Long> list = new LinkedList<Long>();
long range = 200;
Random r = new Random(42);
for (int i = 0; i < 1000; i++) {
list.add(new Long((long) (r.nextDouble() * range)));
}
CounterFJ<Long> counter = new CounterFJ<>(list, new Long(100), 4);
ForkJoinPool pool = new ForkJoinPool();
long time = System.currentTimeMillis();
pool.invoke(counter);
System.out.println("Fork join counter in " + (System.currentTimeMillis() - time));
System.out.println("Occurrences:" + counter.getResult());
}
}
Finally I managed how to and it's not difficult so I leave this for future readers.
In the costructor of the RecursiveTask save thread that created the instance itself. In the compute method check if executing thread is the same or not. If not work-stealing has occurred.
So I added this member variable
private long _threadId = -1;
private static int stolen_tasks = 0;
changed constructor like this:
public CounterFJ(List<T> list, T test, int start, int end, int factor) {
_list = list;
_threadId = Thread.currentThread().getId(); //added
_test = test;
_start = start;
_end = end;
_branchFactor = factor;
}
and added comparison into compute method:
#Override
protected Integer compute() {
long thisThreadId = Thread.currentThread().getId();
if (_threadId != thisThreadId){
stolen_tasks++;
}
// rest of the method

Categories