I'm working on implements a tree that represent electric circus (without any circles, as in this picture)
I use this implementation:
Binary_Oprtator
public abstract class Binary_Oprtator {
abstract int calc(int x, int y);
#Override
public String toString() {
return super.toString().substring(0, super.toString().indexOf('#'));
}
}
And gate
public class and extends Binary_Oprtator {
public int calc(int x, int y){
return (x&y);
}
}
Or gate
public class or extends Binary_Oprtator {
public int calc(int x, int y){
return (x|y);
}
}
gate_node
public class gate_node {
gate_node father_c;
gate_node right_c, left_c;
Binary_Oprtator op;
int value;
int right_v, left_v;
int array_index;
int arr_size;
boolean leaf;
boolean isRightChild;
public gate_node(Binary_Oprtator op, int array_index, int arr_size, boolean right) {
this.array_index = array_index;
this.arr_size = arr_size;
this.left_c = null;
this.right_c = null;
this.op = op;
right_v = left_v = -1;
this.leaf = false;
this.isRightChild = right;
}
void set_left_son(Binary_Oprtator op) {
this.left_c = new gate_node(op, array_index, arr_size / 2,false);
this.left_c.father_c = this;
this.left_c.leaf = false;
this.left_c.isRightChild = false;
}
void set_right_son(Binary_Oprtator op) {
this.right_c = new gate_node(op, array_index + arr_size / 2,
arr_size / 2,true);
this.right_c.father_c = this;
this.right_c.leaf = false;
this.right_c.isRightChild = true;
}
void set_left_son_as_leaf(Binary_Oprtator op) throws InterruptedException {
this.left_c = new gate_node(op, array_index, arr_size / 2,false);
this.left_c.father_c = this;
this.left_c.leaf = true;
this.left_c.left_v = main_class.arr[array_index];
this.left_c.right_v = main_class.arr[array_index + 1];
this.left_c.isRightChild = false;
main_class.queue.put(this.left_c);
}
void set_right_son_as_leaf(Binary_Oprtator op) throws InterruptedException {
this.right_c = new gate_node(op, array_index + arr_size / 2,
arr_size / 2,true);
this.right_c.father_c = this;
this.right_c.left_v = main_class.arr[array_index + 2];
this.right_c.right_v = main_class.arr[array_index + 3];
this.right_c.leaf = true;
this.right_c.isRightChild = true;
main_class.queue.put(this.right_c);
}
gate_node get_left() {
return this.left_c;
}
gate_node get_right() {
return this.right_c;
}
int compute() {
/*
* The following use of a static sInputCounter assumes that the
* static/global input array is ordered from left to right, irrespective
* of "depth".
*/
final int left, right;
if (this.left_c.leaf != true) {
left = this.left_c.compute();
} else {
left = this.left_c.op.calc(this.left_c.left_v, this.left_c.right_v);
}
if (this.right_c.leaf != true) {
right = this.right_c.compute();
} else {
right = this.right_c.op.calc(this.right_c.left_v,
this.right_c.right_v);
}
return op.calc(left, right);
}
int compute_with_print() {
/*
* The following use of a static sInputCounter assumes that the
* static/global input array is ordered from left to right, irrespective
* of "depth".
*/
final int left, right;
System.out.print(this.op + "(");
if (null != this.left_c) {
left = this.left_c.compute_with_print();
System.out.print(",");
} else {
left = main_class.arr[array_index];
System.out.print(left + ",");
}
if (null != this.right_c) {
right = this.right_c.compute_with_print();
System.out.print(")");
} else {
right = main_class.arr[array_index + 1];
System.out.print(right + ")");
}
return op.calc(left, right);
}
}
tree
public class tree {
gate_node head;
public tree(Binary_Oprtator op,int array_index,int arr_size) {
this.head = new gate_node(op,array_index,arr_size,true);
head.father_c=null;
}
void calc_head_value(){
int t_value = head.op.calc(head.left_v,head.right_v);
/* System.out.println(head.left_v+" "+head.op.toString()+" "+head.right_v+" = "+head.op.calc(head.left_v,head.right_v));
*/ head.value = t_value;
}
int compute() {
return head.compute();
}
int compute_with_print(){
return head.compute_with_print();
}
void set_left_son(Binary_Oprtator op){
head.left_c = new gate_node(op,head.array_index,head.arr_size/2,false);
head.left_c.father_c=head;
}
void set_right_son(Binary_Oprtator op){
head.right_c = new gate_node(op,head.array_index + head.arr_size/2,head.arr_size/2,true);
head.right_c.father_c=head;
}
void set_right_son_as_leaf(Binary_Oprtator op) throws InterruptedException {
head.right_c = new gate_node(op,head.array_index,head.arr_size/2,false);
head.right_c.father_c=head;
head.right_c.father_c = head;
head.right_c.left_v = main_class.arr[head.array_index + 2];
head.right_c.right_v = main_class.arr[head.array_index + 3];
head.right_c.leaf = true;
head.right_c.isRightChild = true;
main_class.queue.put(head.right_c);
}
void set_left_son_as_leaf(Binary_Oprtator op) throws InterruptedException {
head.left_c = new gate_node(op, head.array_index, head.arr_size / 2,false);
head.left_c.father_c = head;
head.left_c.leaf = true;
head.left_c.left_v = main_class.arr[head.array_index];
head.left_c.right_v = main_class.arr[head.array_index + 1];
head.left_c.isRightChild = false;
main_class.queue.put(head.left_c);
}
gate_node get_left(){
return head.left_c;
}
gate_node get_right(){
return head.right_c;
}
}
main_class
import java.util.concurrent.ArrayBlockingQueue;
import java.util.concurrent.BlockingQueue;
public class main_class {
public static int arr[] = { 1, 0, 0, 0, 1, 0, 0, 1 };
static final BlockingQueue<gate_node> queue = new ArrayBlockingQueue<>(6);
public static void main(String[] args) throws InterruptedException {
/*************************************
* compute using multi threads
************************************/
System.out.println("compute using Multi threading");
//start a consumer... wait for nodes to be insert into the queue
Consumer consumer = new Consumer();
consumer.start();
tree t = new tree(new and(), 0, arr.length);
t.set_left_son(new or());
t.get_left().set_left_son_as_leaf(new and());
t.get_left().set_right_son_as_leaf(new or());
t.set_right_son(new and());
t.get_right().set_left_son_as_leaf(new or());
t.get_right().set_right_son_as_leaf(new or());
consumer.join();
t.calc_head_value(); //calc the head
System.out.println("The result is: " + t.head.value);
System.out.println();
/******************************
* compute with a single thread
********************************/
System.out.println("compute with a single thread");
int res = t.compute();
System.out.println("The result is: " + res);
/***********************************************
* printing a arithmatic expression of the tree
*************************************************/
System.out.println();
t.compute_with_print();
}
}
Consumer
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
public class Consumer extends Thread {
Consumer() {
}
#Override
public void run() {
gate_node temp;
// the threads pool parts
ExecutorService executor = Executors.newFixedThreadPool(4);
try {
while ((temp = main_class.queue.take()).father_c != null) {
Runnable worker = new computingThread(temp);
executor.execute(worker);
}
} catch (InterruptedException e) {
e.printStackTrace();
}
executor.shutdown();
while (!executor.isTerminated()) {
}
}
}
computingThread
public class computingThread implements Runnable {
gate_node head;
int t_value;
public computingThread(gate_node head) {
this.head = head;
this.t_value = -1;
}
#Override
public void run() {
/* System.out.println("Start: "+this.hashCode()); */
t_value = head.op.calc(head.left_v,head.right_v);
/* System.out.println("thread: "+this.hashCode()+" is running ==> "+head.left_v+" "+head.op.toString()+" "+head.right_v+" = "+head.op.calc(head.left_v,head.right_v));
*/ head.value = this.t_value;
// update the father
if (head.isRightChild == true) { //update right fathers entire
head.father_c.right_v = t_value;
/*System.out.println("isRightChild");*/
} else { //update left fathers entire
head.father_c.left_v = t_value;
}
if ((head.father_c.right_v != -1) && (head.father_c.left_v != -1)){ //father is ready to compute-> to the queue!
try {
main_class.queue.put(head.father_c);
} catch (InterruptedException e) {
e.printStackTrace();
}
}
/* try {
Thread.sleep(1);
} catch (InterruptedException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}*/
/* System.out.println("thread: "+this.hashCode()+" is done!");
*/ return;
}
}
Here what I'm trying to do:
I'm trying to do a parllel comput that use multithreads to compute the finite value of the tree (each node gets two values, produce an outcome based on his opreator, pass it on the tree.. until the root is caculted). What I did is to set a queue of fixed number of spaces.
I insert the leafs to the queue as the tree is build. then I start a consumer that takes each leafs, caculate it, pass the result on the the right entrie of his father, and when both entreis are inserted into the father node, it also goes to the queue, and so on.. until the root is cacluted).
the only problem is that I cannot uses a queue that is smaller from the number of leafs in the tree, and I don't know why.
maybe becuase while I'm building the tree I'm inserting the leafs to the tree and if the queue is smaller then the leafs, I'm doing a: main_class.queue.put(this.right_c); when queue is already full, and that cause the progrem to wait until spaces on the queue will be freed which doesnt happen (cause I'm didn't start the threads yet).
Does anyone have any solution to that?
and another question? is that consider a parrlel computation? meaning if I set a queue with size 2, does that mean the I will do all the computation with only two threads (because I want to set is like the number of core CPU of a certain computer).
Thanks and sorry for my bad spelling.
I think you modelled it in a more complicated way than it was needed. I would not base my modelling on a tree. An electric circuit, is not always a tree. You could have more than one nodes acting as the circuits outputs, right?
I would base my modelling on the gate node. I would have a Gate class with two inputs and one output. Inputs and outputs, would be of type GateValue. Output would be calculated using a different way if the gate is an and or an or gate.
Then I would combine them building my circuit, like this:
gate1.Input1 = gate2.Output
gate1.Input2 = gate3.Output
etc.
Then, I would calculate the value of the last gate (output of the whole circuit) which would cause other gates to calculate their values. This way, you would not need a "parallel" calculation mechanism. As soon as you have no feedback loops in your circuit, this would work fine.
Hope I helped!
Related
Implementing the monitor example for multithreading programming in java I managed to write this:
import java.util.*;
import java.util.concurrent.*;
class Monitor{
Semaphore s_prod; // Production semaphore
Semaphore s_rec; // Recolection semaphore
int capacidad; // capacity of the queue
ArrayList<Integer> cola; // my queue
Monitor(int n){
this.capacidad = n;
this.cola = new ArrayList<Integer>(this.capacidad);
this.s_prod = new Semaphore(this.capacidad);
this.s_rec = new Semaphore(0);
}
public void Producir(int n){ // Producing
try {
this.s_prod.acquire();
} catch (InterruptedException e) {
System.out.println(e.getMessage());
}
this.cola.add(n);
System.out.println("|Prod:" + n + " | " + cola); // Printing production
this.s_rec.release();
}
public int Recolectar(){ // Recolecting
try {
this.s_rec.acquire();
} catch (InterruptedException e) {
System.out.println(e.getMessage());
}
int temp = this.cola.get(0);
this.cola.remove(0);
System.out.println("|Rec:" + temp + " | " + cola); // Printing recolection
this.s_prod.release();
return temp;
}
}
class Productor implements Runnable{ // Productor
Thread t;
Monitor m;
int a, b; // Produces number from 'a' to 'b'
boolean finish; // flag that indicates the end of production
Productor(String nombre, Monitor m, int i, int j){
this.t = new Thread(this, "Productor " + nombre);
this.a = i;
this.b = j;
this.m = m;
finish = false;
}
public void run(){
for(int i = a; i <= b; i++){
m.Producir(i);
try{
Thread.sleep(50);
} catch (InterruptedException e) {
System.out.println(e.getMessage());
}
}
this.finish = true;
}
public void empezar(){
this.t.start();
}
}
class Recolector implements Runnable{ // Consumer
Thread t;
Monitor m;
Recolector(String nombre, Monitor m){
this.t = new Thread(this, "Recolector " + nombre);
this.m = m;
}
public void run(){
/*
while all the producers are still working and, even if they finished, the queue
needs to be emptied
*/
while(!(Checking.product_finished && m.cola.size() == 0)){
m.Recolectar();
try{
Thread.sleep(2000);
} catch (InterruptedException e) {
System.out.println(e.getMessage());
}
}
}
public void empezar(){
this.t.start();
}
}
class Checking implements Runnable{ // Threads that checks if the producers finished
public volatile static boolean product_finished;
Productor[] p_array;
Checking(Productor[] prods){
p_array = prods;
new Thread(this).start();
}
public void run(){
boolean flag = true;
while(flag){
flag = false;
for(Productor p:p_array){
if(!p.finish){
flag = true;
}
}
}
System.out.println("Produccion finalizada!");
product_finished = true;
}
}
public class Test{
public static void main(String args[]){
Monitor m = new Monitor(3); // monitor for queue of capacity=3
Productor p1 = new Productor("P1", m, 10, 20);
Productor p2 = new Productor("P2", m, -50, -40);
Recolector r1 = new Recolector("R1", m);
Recolector r2 = new Recolector("R1", m);
Recolector r3 = new Recolector("R1", m);
Productor[] p_array = new Productor[2];
p_array[0] = p1;
p_array[1] = p2;
Checking ch = new Checking(p_array);
p1.empezar();
p2.empezar();
r1.empezar();
r2.empezar();
r3.empezar();
}
}
When running it the output shows the action performed and the queue status, but there are some problems, like ignoring the maximum length of the queue, producing two elements at the same time and deleting the first one by accident, or going out of bounds of the queue. I assume that a "synchronized" statement is needed but I cant make it work without blocking the whole program.
Also, in the "Checker" class, I use a thread that constatly checks if the producers finished their work, but I'd like to know if there is a better way to check that without overheating my PC.
The output for this values in the main function should only produce up to three numbers and wait for a free spot to produce new ones
I hope my answer will help you.
First of all, your code is ignoring the maximum length of the queue because you are using ArrayList, and its constructor parameter is initialCapacity, not the maximum capacity. The ArrayList will be resized once its size reaches initialCapacity. I suggest you to use LinkedBlockingQueue which has a maximum fixed capacity. This type of queue is also very suitable for your task because all producers will wait until there is free space in the queue and all consumers will wait until there is available elements. So you don't need semaphores.
To check that all producers have finished their work you could use CompletableFuture which provides many helpful methods.
The complete code would look like this:
public class Producer implements Runnable {
private BlockingQueue<Integer> queue;
private int a, b;
public Producer(BlockingQueue<Integer> queue, int a, int b) {
this.queue = queue;
this.a = a;
this.b = b;
}
#Override
public void run() {
for (int i = a; i <= b; i++){
try {
//producer will wait here if there is no space in the queue
queue.put(i);
System.out.println("Put: " + i);
Thread.sleep(50);
} catch (InterruptedException e) {
System.out.println(e.getMessage());
}
}
}
}
public class Consumer implements Runnable {
private BlockingQueue<Integer> queue;
public boolean finish = false;
public Consumer(BlockingQueue<Integer> queue) {
this.queue = queue;
}
#Override
public void run() {
try {
while (!finish || queue.size() > 0) {
// consumer will wait here if the queue is empty;
// we have to poll with timeout because several consumers may pass
// here while queue size is less than number of consumers;
// timeout should be at least equal to producing interval
Integer temp = queue.poll(3, TimeUnit.SECONDS);
if (temp != null) {
System.out.println("Took: " + temp);
Thread.sleep(2000);
}
}
} catch (InterruptedException e) {
System.out.println(e.getMessage());
}
}
}
And to test it:
BlockingQueue<Integer> queue = new LinkedBlockingQueue<>(3); //queue of capacity = 3
Producer p1 = new Producer(queue, 10, 20);
Producer p2 = new Producer(queue, -50, -40);
List<Consumer> consumers = new ArrayList<>();
CompletableFuture[] consumersFutures = new CompletableFuture[3];
for (int i = 0; i < 3; i++) {
Consumer consumer = new Consumer(queue);
consumers.add(consumer);
//this static method runs Runnable in separate thread
consumersFutures[i] = CompletableFuture.runAsync(consumer);
}
CompletableFuture[] producersFutures = new CompletableFuture[2];
producersFutures[0] = CompletableFuture.runAsync(p1);
producersFutures[1] = CompletableFuture.runAsync(p2);
// allOf returns new CompletableFuture that is completed only
// when the last given future completes
CompletableFuture.allOf(producersFutures).thenAccept(v -> {
System.out.println("Completed producing!");
for (Consumer consumer: consumers) {
consumer.finish = true;
}
});
// waiting for all consumers to complete
CompletableFuture.allOf(consumersFutures).get();
System.out.println("Completed consuming!");
Java producer-consumer program using thread & synchronized queue, the program is separated into 3 classes but it couldn't be run.
Queue.java:
public class Queue {
static final int MAXQUEUE = 3;
int[] queue = new int[MAXQUEUE];
int front, rear;
public Queue(){ front = 0; rear = 0; }
public boolean isEmpty(){ return (front==rear); }
public boolean isFull(){
int index = rear+1 < MAXQUEUE ? rear+1 : 0;
return (index == front);
}
public void enqueue(int value) {
queue[rear] = value;
rear = rear+1 < MAXQUEUE ? rear+1 : 0;
}
public int dequeue(){
int data = queue[front];
front = front+1 < MAXQUEUE ? rear+1 : 0;
return data;
}
}
SynchronizedQueue.java:
import java.util.Queue;
public class SynchronizedQueue {
Queue queue;
public SynchronizedQueue() {queue = new Queue(); }
public synchronized void enqueue(int value) {
try {
while (queue.isFull()) {
System.out.println();
System.out.println("Queue is full, please wait....");
wait();
}
}
catch (InterruptedException e) { }
((SynchronizedQueue) queue).enqueue(value);
notify();
}
public synchronized int dequeue() {
try {
while (queue.isEmpty()) {
System.out.println();
System.out.println("Queue is empty, please wait....");
wait();
}
}
catch ( InterruptedException e ) { }
int data = ((SynchronizedQueue) queue).dequeue();
notify();
return data;
}
}
Main program Ch10_3.java:
class Producer extends Thread {
public int count = 0;
public void run() {
int value;
while ( Ch10_3.isRunning ) {
value = (int)(Math.random()*100);
Ch10_3.squeue.enqueue(value);
System.out.print(">" + value + "]");
count++;
try {
Thread.sleep((int)(Math.random()*100));
}
catch( InterruptedException e) { }
}
System.out.println("\n" + Thread.currentThread() + "Producer thread end.");
}
}
class Consumer extends Thread {
public int count = 0;
public void run() {
int data;
while (Ch10_3.isRunning) {
data = Ch10_3.squeue.dequeue();
System.out.println("[" + data + ">");
count++;
try {
Thread.sleep((int)(Math.random()*100));
}
catch( InterruptedException e) { }
}
System.out.println("\n" + Thread.currentThread() + "Consumer thread end.");
}
}
public class Ch10_3 {
static final int MAXITEMS = 10;
static SynchonizedQueue squeue = new SynchronizedQueue();
static boolean isRunning = true;
public static void main(String[] args) {
Producer producer = new Producer();
Consumer consumer = new Consumer();
producer.start(); consumer.start();
while (true)
if (producer.count >= MAXITEMS && producer.count == consumer.count)
{ isRunning = false; break; }
}
}
Error message:
Exception in thread "main" java.lang.Error: Unresolved compilation
problem: at Ch10_3.main(Ch10_3.java:41)
In the catch blocks from enqueue and dequeue methods form class SynchronizedQueue you are trying to cast the queue member attribute which is of type Queue, to SynchronizedQueue.
In SynchronizedQueue.enqueue() we have:
((SynchronizedQueue) queue).enqueue(value);
Since there is no relation between Queue and SynchronizedQueue the compiler gives a compilation error. You should remove the cast.
But the best solution is to just use a java.util.concurrent.BlockingQueue implementation available in JAVA SDK, which will handle all the synchronisation part for you.
I have a problem with possible deadlock in "producer - consumer task".
Everything should be working in following way:
Producer should generate and add int[] arrays to collection
Consumer should take those arrays, put them to second collection and print in output
In debbug mode I have noticed that after a while both taks are suspended on this.wait(); method.
Could you help me and explain what is wrong with this code? :)
Thanks!
Producer task class
public class ProducerTask extends Thread{
private static final Object bufforLock = new Object();
private static LinkedList<Integer[]> buffor;
public ProducerTask(){
if(buffor == null)
buffor = new LinkedList<>();
this.setName("#ProducerTask");
}
#Override
public void run() {
synchronized (this) {
try {
for (int i = 0; i < 100; i++) {
while (isBufforFull()) {
System.err.println("ProducerTask is waiting");
this.wait();
}
Integer[] randomIntArray = getRandomIntArray();
addToBuffor(randomIntArray);
}
}
catch (InterruptedException ex) {
}
}
}
public static void removeLast(){
synchronized(bufforLock){
buffor.removeLast();
bufforLock.notifyAll();
}
}
public static Integer[] getLast(){
synchronized(bufforLock){
return buffor.getLast();
}
}
public static boolean isBufforFull(){
synchronized(bufforLock){
return buffor.size() == 10;
}
}
public static boolean isBufforEmpty(){
synchronized(bufforLock){
return buffor.isEmpty();
}
}
public static void addToBuffor(Integer[] array){
synchronized(bufforLock){
buffor.addFirst(array);
bufforLock.notifyAll();
}
}
public static LinkedList<Integer[]> getBuffor(){
synchronized(bufforLock){
return buffor;
}
}
private Integer[] getRandomIntArray(){
int maxSize = 10;
Integer[] array = new Integer[maxSize];
for(int i = 0 ; i < maxSize ; i++){
int value = (int) (Math.random() * 100);
array[i] = Integer.valueOf(value);
}
return array;
}
}
Consumer task class
public class ConsumerTask extends Thread {
private static LinkedList<Integer[]> buffor;
public ConsumerTask() {
if (buffor == null) {
buffor = new LinkedList<>();
}
this.setName("#ConsumerTask");
}
#Override
public void run() {
synchronized (this) {
try {
while (true) {
while (ProducerTask.isBufforEmpty()) {
System.err.println("ConsumerTask is waiting");
this.wait();
}
Integer[] array = ProducerTask.getLast();
this.arraySortByInserting(array);
this.buffor.addFirst(array);
ProducerTask.removeLast();
}
}
catch (InterruptedException ex) {}
}
}
private Integer[] arraySortByInserting(Integer[] aArrayToSort) {
if(aArrayToSort == null || aArrayToSort.length == 0)
return null;
this.printArray(aArrayToSort, "Array before sorting");
for (int i = 1; i < aArrayToSort.length; i++) {
int intValue = aArrayToSort[i];
int j = i;
while ((j > 0) && (aArrayToSort[j - 1] > intValue)) {
aArrayToSort[j] = aArrayToSort[j - 1];
j--;
}
aArrayToSort[j] = intValue;
}
this.printArray(aArrayToSort, "Array after sorting");
return aArrayToSort;
}
private void printArray(Integer[] aArray, String aMessage) {
System.out.print(aMessage + " [");
for (int intElement : aArray) {
System.out.print(intElement + " ");
}
System.out.print("]");
System.out.println();
}
}
You need a common object which would be used for inter thread communication.
Right now you are using this as object on which you get lock on and you notify on bufferLock in producer thread and same applies for consumer thread.
Remember both are two different instances and both successfully obtain lock on individual objects and then both enters wait state.
I have defined a filter for the termination condition by k-means.
if I run my app it always compute only one iteration.
I think the problem is here:
DataSet<GeoTimeDataCenter> finalCentroids = loop.closeWith(newCentroids, newCentroids.join(loop).where("*").equalTo("*").filter(new MyFilter()));
or maybe the filter function:
public static final class MyFilter implements FilterFunction<Tuple2<GeoTimeDataCenter, GeoTimeDataCenter>> {
private static final long serialVersionUID = 5868635346889117617L;
public boolean filter(Tuple2<GeoTimeDataCenter, GeoTimeDataCenter> tuple) throws Exception {
if(tuple.f0.equals(tuple.f1)) {
return true;
}
else {
return false;
}
}
}
best regards,
paul
my full code here:
public void run() {
//load properties
Properties pro = new Properties();
FileSystem fs = null;
try {
pro.load(FlinkMain.class.getResourceAsStream("/config.properties"));
fs = FileSystem.get(new URI(pro.getProperty("hdfs.namenode")),new org.apache.hadoop.conf.Configuration());
} catch (Exception e) {
e.printStackTrace();
}
int maxIteration = Integer.parseInt(pro.getProperty("maxiterations"));
String outputPath = fs.getHomeDirectory()+pro.getProperty("flink.output");
// set up execution environment
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
// get input points
DataSet<GeoTimeDataTupel> points = getPointDataSet(env);
DataSet<GeoTimeDataCenter> centroids = null;
try {
centroids = getCentroidDataSet(env);
} catch (Exception e1) {
e1.printStackTrace();
}
// set number of bulk iterations for KMeans algorithm
IterativeDataSet<GeoTimeDataCenter> loop = centroids.iterate(maxIteration);
DataSet<GeoTimeDataCenter> newCentroids = points
// compute closest centroid for each point
.map(new SelectNearestCenter(this.getBenchmarkCounter())).withBroadcastSet(loop, "centroids")
// count and sum point coordinates for each centroid
.groupBy(0).reduceGroup(new CentroidAccumulator())
// compute new centroids from point counts and coordinate sums
.map(new CentroidAverager(this.getBenchmarkCounter()));
// feed new centroids back into next iteration with termination condition
DataSet<GeoTimeDataCenter> finalCentroids = loop.closeWith(newCentroids, newCentroids.join(loop).where("*").equalTo("*").filter(new MyFilter()));
DataSet<Tuple2<Integer, GeoTimeDataTupel>> clusteredPoints = points
// assign points to final clusters
.map(new SelectNearestCenter(-1)).withBroadcastSet(finalCentroids, "centroids");
// emit result
clusteredPoints.writeAsCsv(outputPath+"/points", "\n", " ");
finalCentroids.writeAsText(outputPath+"/centers");//print();
// execute program
try {
env.execute("KMeans Flink");
} catch (Exception e) {
e.printStackTrace();
}
}
public static final class MyFilter implements FilterFunction<Tuple2<GeoTimeDataCenter, GeoTimeDataCenter>> {
private static final long serialVersionUID = 5868635346889117617L;
public boolean filter(Tuple2<GeoTimeDataCenter, GeoTimeDataCenter> tuple) throws Exception {
if(tuple.f0.equals(tuple.f1)) {
return true;
}
else {
return false;
}
}
}
I think the problem is the filter function (modulo the code you haven't posted). Flink's termination criterion works the following way: The termination criterion is met if the provided termination DataSet is empty. Otherwise the next iteration is started if the maximum number of iterations has not been exceeded.
Flink's filter function keeps only those elements for which the FilterFunction returns true. Thus, with your MyFilter implementation you only keep the centroids which are before and after the iteration identical. This implies that you'll obtain an empty DataSet if all centroids have changed and, thus, the iteration terminates. This is clearly the inverse of the actual termination criterion. The termination criterion should be: Continue with k-means as long as there is a centroid which has changed.
You can do this with a coGroup function where you emit elements if there is no matching centroid from the preceding centroid DataSet. This is similar to a left outer join, just that you discard non null matches.
public static void main(String[] args) throws Exception {
// set up the execution environment
final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
DataSet<Element> oldDS = env.fromElements(new Element(1, "test"), new Element(2, "test"), new Element(3, "foobar"));
DataSet<Element> newDS = env.fromElements(new Element(1, "test"), new Element(3, "foobar"), new Element(4, "test"));
DataSet<Element> filtered = newDS.coGroup(oldDS).where("*").equalTo("*").with(new FilterCoGroup());
filtered.print();
}
public static class FilterCoGroup implements CoGroupFunction<Element, Element, Element> {
#Override
public void coGroup(
Iterable<Element> newElements,
Iterable<Element> oldElements,
Collector<Element> collector) throws Exception {
List<Element> persistedElements = new ArrayList<Element>();
for(Element element: oldElements) {
persistedElements.add(element);
}
for(Element newElement: newElements) {
boolean contained = false;
for(Element oldElement: persistedElements) {
if(newElement.equals(oldElement)){
contained = true;
}
}
if(!contained) {
collector.collect(newElement);
}
}
}
}
public static class Element implements Key {
private int id;
private String name;
public Element(int id, String name) {
this.id = id;
this.name = name;
}
public Element() {
this(-1, "");
}
#Override
public int hashCode() {
return 31 + 7 * name.hashCode() + 11 * id;
}
#Override
public boolean equals(Object obj) {
if(obj instanceof Element) {
Element element = (Element) obj;
return id == element.id && name.equals(element.name);
} else {
return false;
}
}
#Override
public int compareTo(Object o) {
if(o instanceof Element) {
Element element = (Element) o;
if(id == element.id) {
return name.compareTo(element.name);
} else {
return id - element.id;
}
} else {
throw new RuntimeException("Comparing incompatible types.");
}
}
#Override
public void write(DataOutputView dataOutputView) throws IOException {
dataOutputView.writeInt(id);
dataOutputView.writeUTF(name);
}
#Override
public void read(DataInputView dataInputView) throws IOException {
id = dataInputView.readInt();
name = dataInputView.readUTF();
}
#Override
public String toString() {
return "(" + id + "; " + name + ")";
}
}
I'm having a bit of a problem with writing a multithreaded algorithm in Java. Here's what I've got:
public class NNDFS implements NDFS {
//Array of all worker threads
private Thread[] threadArray;
//Concurrent HashMap containing a mapping of graph-states and
//algorithm specific state objects (NDFSState)
private ConcurrentHashMap<State, NDFSState> stateStore;
//Whether the algorithm is done and whether a cycle is found
private volatile boolean done;
private volatile boolean cycleFound;
/**
Constructor that creates the threads, each with their own graph
#param file The file from which we can create the graph
#param stateStore Mapping between graph-states and state belonging to our algorithm
#param nrWorkers Number of working threads we need
*/
public NNDFS(File file, Map<State, NDFSState> stateStore, int nrWorkers) throws FileNotFoundException {
int i;
this.stateStore = new ConcurrentHashMap<State, NDFSState>(stateStore);
threadArray = new Thread[nrWorkers];
for(i=0;i<nrWorkers;i++){
Graph graph = GraphFactory.createGraph(file);
threadArray[i] = new Thread(new NDFSRunnable(graph, i));
}
}
/**
Class which implements a single thread running the NDFS algorithm
*/
class NDFSRunnable implements Runnable{
private Graph graph;
//Neccesary as Java apparently doesn't allow us to get this ID
private long threadId;
NDFSRunnable(Graph graph, long threadId){
this.graph = graph;
this.threadId = threadId;
}
public void run(){
try {
System.out.printf("Thread id = %d\n", threadId);
//Start by executing the blue DFS for the first graph
mcdfsBlue(graph.getInitialState(), threadId);
} catch (CycleFound e) {
//We must catch all exceptions that are thrown from within our thread
//If exceptions "exit" the thread, the thread will silently fail
//and we dont want that. We use 2 booleans instead, to indicate the status of the algorithm
cycleFound = true;
}
//Either the algorithm was aborted because of a CycleFound exception
//or we completed our Blue DFS without finding a cycle. We are done!
done = true;
}
public void mcdfsBlue(State s, long id) throws CycleFound {
if(done == true){
return;
}
//System.out.printf("Thread %d begint nu aan een dfsblue\n", id);
int i;
int counter = 0;
NDFSState state = stateStore.get(s);
if(state == null){
state = new NDFSState();
stateStore.put(s,state);
}
state.setColor(id, Color.CYAN);
List<State> children = graph.post(s);
i = state.incNextBlue();
while(counter != children.size()){
NDFSState child = stateStore.get(children.get(i%children.size()));
if(child == null){
child = new NDFSState();
stateStore.put(children.get(i % children.size()),child);
}
if(child.getLocalColor(id) == Color.WHITE && !child.isRed()){
mcdfsBlue(children.get(i % children.size()), id);
}
i++;
counter++;
}
if(s.isAccepting()){
state.incRedDFSCount();
mcdfsRed(s, id);
}
state.setColor(id, Color.BLUE);
}
public void mcdfsRed(State s, long id) throws CycleFound {
if(done == true){
return;
}
int i;
int counter = 0;
NDFSState state = stateStore.get(s);
state.setPink(id, true);
List<State> children = graph.post(s);
i = state.incNextRed();
while(counter != children.size()){
NDFSState child = stateStore.get(children.get(i%children.size()));
if(child == null){
child = new NDFSState();
stateStore.put(children.get(i%children.size()),child);
}
if(child.getLocalColor(id) == Color.CYAN){
throw new CycleFound();
}
if(!child.isPink(id) && !child.isRed()){
mcdfsRed(children.get(i%children.size()), id);
}
i++;
counter++;
}
if(s.isAccepting()){
state.decRedDFSCountAndWait();
}
state.setRed();
state.setPink(id, false);
}
}
public void init() {}
public void ndfs() throws Result {
int i;
done = false;
cycleFound = false;
for(i=0;i<threadArray.length;i++){
System.out.printf("Launch thread %d\n",i);
threadArray[i].run();
}
try {
for(i=0;i<threadArray.length;i++){
threadArray[i].join();
}
} catch (InterruptedException e) {
}
//We want to show the result by throwing an exception (weird, but yeah :-/)
if (cycleFound) {
throw new CycleFound();
} else {
throw new NoCycleFound();
}
}
}
However, when I run this, it seems like the first thread is called, completes, and then the next is called etc. What I want obviously, is that all threads are started simultaneously! Otherwise the algorithm has very little use...
Thanks for your time/help!
Regards,
Linus
Use threadArray[i].start(); to launch your thread.
If you use threadArray[i].run();, all it does is call the method normally, in the same thread as the caller.