BigQuery job status is always "PENDING" - java

I'm trying to run the following code, but the status variable is always "PENDING". Could you please tell me what am I doing wrong?
Job execute = bigquery.jobs().insert(PROJECT_ID, runJob).execute();
String status;
while(status.equalsIgnoreCase("PENDING")) {
status = execute.getStatus().getState();
System.out.println("Status: " + status);
Thread.wait(1000);
}

Your code isn't making a request to BigQuery to get the updated state, it's just checking the state of the Job returned by the insert call.
Instead, you should poll for the state of the job by issuing a jobs.get request, and check that state, e.g.:
Job job = bigquery.jobs().insert(PROJECT_ID, runJob).execute();
String status = job.getStatus().getState();
while(!status.equalsIgnoreCase("DONE")) {
status = bigquery.jobs().get(PROJECT_ID, job.getId()).execute().getStatus().getState();
System.out.println("Status: " + status);
Thread.wait(1000);
}
*Edited based on Jordan Tigani's comment.

I have realized that checking until the status is not "Done" might not yield the error at all times. Sometimes, the error can be caught after the job is in the "Done" state. i.e., Job goes from "pending" to "done" in some errors, skipping the "running" stage. Therefore, it might be good to check the error field in job['status'] even after the job is "Done".

Rather than have a busy wait loop synchronously blocking the thread running the insert, I've gone with a scheduled thread that maintains a queue of job id's. It loops through the jobs and checks their status, logging errors when discovered.
The crucial bits here are,
Schedule a thread to monitor jobs
jobPollScheduler.scheduleAtFixedRate(new JobPoll(), SCHEDULE_SECONDS, SCHEDULE_SECONDS, TimeUnit.SECONDS);
loop through a queue of jobs and check their progress. Re-queue anything that isn't DONE
while ((job = jobs.poll()) != null) {
final Job statusJob = bigQuery.jobs().get(projectId, job.jobId).execute();
if ("DONE".equals(statusJob.getStatus().getState())) {
final ErrorProto errorResult = statusJob.getStatus().getErrorResult();
if (errorResult == null || errorResult.toString() == null) {
logger.debug("status={}, job={}", statusJob.getStatus().getState(), job);
} else {
logger.error("status={}, errorResult={}, job={}", statusJob.getStatus().getState(), errorResult, job);
}
} else {
// job isn't done, yet. Add it back to queue.
add(job.jobId);
logger.debug("will check again, status={}, job={}", statusJob.getStatus().getState(), job);
}
}
The full working set of classes
import com.google.api.services.bigquery.Bigquery;
import com.google.api.services.bigquery.model.ErrorProto;
import com.google.api.services.bigquery.model.Job;
import com.google.common.primitives.Longs;
import com.google.common.util.concurrent.ThreadFactoryBuilder;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.Objects;
import java.util.Queue;
import java.util.concurrent.DelayQueue;
import java.util.concurrent.Delayed;
import java.util.concurrent.Executors;
import java.util.concurrent.ScheduledExecutorService;
import java.util.concurrent.TimeUnit;
import java.util.function.Supplier;
import javax.annotation.Nonnull;
/**
* Monitor BigQuery inserts
*/
public class BigQueryMonitorSo21064586 {
private static final Logger logger = LoggerFactory.getLogger(BigQueryMonitorSo21064586.class);
private static final int SCHEDULE_SECONDS = 5;
private final ScheduledExecutorService jobPollScheduler =
Executors.newSingleThreadScheduledExecutor(new ThreadFactoryBuilder().setNameFormat("big-query-monitory-%d").build());
private final Queue jobs = new DelayQueue();
private final Supplier connectionSupplier;
private final String projectId;
/**
* #param connectionSupplier gives us a connection to BigQuery
* #param projectId Google cloud project
*/
public BigQueryMonitorSo21064586(#Nonnull final Supplier connectionSupplier, #Nonnull final String projectId) {
this.connectionSupplier = connectionSupplier;
this.projectId = projectId;
}
public BigQueryMonitorSo21064586 start() {
jobPollScheduler.scheduleAtFixedRate(new JobPoll(), SCHEDULE_SECONDS, SCHEDULE_SECONDS, TimeUnit.SECONDS);
return this;
}
/**
* #param jobId insert query job id
*/
public void add(final String jobId) {
final DelayedJobCheck job = new DelayedJobCheck(jobId);
try {
if (!jobs.offer(job)) {
logger.error("could not enqueue BigQuery job, job={}", job);
}
} catch (final Exception e) {
logger.error("failed to add job to queue, job={}", job, e);
}
}
public void shutdown() {
jobPollScheduler.shutdown();
}
private class JobPoll implements Runnable {
/**
* go through the queue and remove anything that is done
*/
#Override
public void run() {
try {
final Bigquery bigQuery = connectionSupplier.get();
DelayedJobCheck job;
while ((job = jobs.poll()) != null) {
final Job statusJob = bigQuery.jobs().get(projectId, job.jobId).execute();
if ("DONE".equals(statusJob.getStatus().getState())) {
final ErrorProto errorResult = statusJob.getStatus().getErrorResult();
if (errorResult == null || errorResult.toString() == null) {
logger.debug("status={}, job={}", statusJob.getStatus().getState(), job);
} else {
logger.error("status={}, errorResult={}, job={}", statusJob.getStatus().getState(), errorResult, job);
}
} else {
// job isn't done, yet. Add it back to queue.
add(job.jobId);
logger.debug("will check again, status={}, job={}", statusJob.getStatus().getState(), job);
}
}
} catch (final Exception e) {
logger.error("exception monitoring big query status, size={}", jobs.size(), e);
}
}
}
private static class DelayedJobCheck extends DelayedImpl {
private final String jobId;
DelayedJobCheck(final String jobId) {
super(SCHEDULE_SECONDS, TimeUnit.SECONDS);
this.jobId = jobId;
}
#Override
public boolean equals(final Object obj) {
if (this == obj) {
return true;
}
if (obj == null || getClass() != obj.getClass()) {
return false;
}
if (!super.equals(obj)) {
return false;
}
final DelayedJobCheck other = (DelayedJobCheck) obj;
return Objects.equals(jobId, other.jobId);
}
#Override
public int hashCode() {
return Objects.hash(super.hashCode(), jobId);
}
}
private static class DelayedImpl implements Delayed {
/**
* timestamp when delay expires
*/
private final long expiry;
/**
* #param amount how long the delay should be
* #param timeUnit units of the delay
*/
DelayedImpl(final long amount, final TimeUnit timeUnit) {
final long more = TimeUnit.MILLISECONDS.convert(amount, timeUnit);
expiry = System.currentTimeMillis() + more;
}
#Override
public long getDelay(#Nonnull final TimeUnit unit) {
final long diff = expiry - System.currentTimeMillis();
return unit.convert(diff, TimeUnit.MILLISECONDS);
}
#Override
public int compareTo(#Nonnull final Delayed o) {
return Longs.compare(expiry, ((DelayedImpl) o).expiry);
}
#Override
public boolean equals(final Object obj) {
if (this == obj) {
return true;
}
if (!(obj instanceof DelayedImpl)) {
return false;
}
final DelayedImpl delayed = (DelayedImpl) obj;
return expiry == delayed.expiry;
}
#Override
public int hashCode() {
return Objects.hash(expiry);
}
}
}

Related

SubmissionPublisher on submit not invoking onNext of subscriber

Every interval I retrieve tweets with a certain query.
These tweets have to be passed to services which calculate and manipulate those tweets.
So these services are subscribed to my publisher. So publisher.hasSubscribers() returns true. But the submit or offer function does not invoke the onNext of my subscriber.
So as a "fix", I cycle through my subscribers and invoke it myself. But that shouldn't be the case.
This is the constructor of my publisher.
public TwitterStreamer(Executor executor, int maxBufferCapacity, long period, TimeUnit unit, String searchQuery){
super(executor, maxBufferCapacity);
this.searchQuery = searchQuery;
scheduler = new ScheduledThreadPoolExecutor(1);
this.tweetGetter = scheduler.scheduleAtFixedRate(
() -> {
List<String> tweets = getTweets(searchQuery);
/* this.lastCall = LocalDateTime.now();
for(Flow.Subscriber sub : this.getSubscribers()){
sub.onNext(tweets);
}*/
this.submit(tweets);
if(tweets.size() >= 20) this.close();
}, 0, period, unit);
}
This is my subscriber
package myFlowAPI;
import Interfaces.IProcess;
import Services.LogToFileService;
import java.util.List;
import java.util.concurrent.Flow;
import java.util.concurrent.atomic.AtomicInteger;
public class MySubscriber implements Flow.Subscriber<List<String>> {
private Flow.Subscription subscription;
private AtomicInteger count;
private IProcess processor;
private String name;
private int DEMAND = 0;
public MySubscriber(String name, IProcess processor){
this.name = name;
this.processor = processor;
}
#Override
public void onSubscribe(Flow.Subscription subscription) {
this.subscription = subscription;
}
#Override
public void onNext(List<String> item) {
Object result = this.processor.process(item);
this.readResult(result);
switch (this.processor.getClass().getSimpleName()){
case "CalculateTweetStatsService":
if((Integer) result >= 20){
this.subscription.cancel();
}
break;
}
}
#Override
public void onError(Throwable throwable) {
System.out.println("Error is thrown " + throwable.getMessage());
}
#Override
public void onComplete() {
if(this.processor instanceof LogToFileService){
((LogToFileService) processor).closeResource();
}
System.out.println("complete");
}
private void readResult(Object result){
System.out.println("Result of " + this.processor.getClass().getSimpleName() + " processor is " + result.toString());
}
}
This is the main where I subscribe to the publisher
public static void main(String[] args) {
ScheduledExecutorService executor = Executors.newScheduledThreadPool(Runtime.getRuntime().availableProcessors());
String searchQuery;
try{
searchQuery = args[0] != null ? args[0] : "#capgemini50";
}catch (ArrayIndexOutOfBoundsException ex){
searchQuery = "#capgemini50";
}
TwitterStreamer streamer = new TwitterStreamer(executor, 5, 15L, SECONDS, searchQuery);
MySubscriber subscriber1 = new MySubscriber("LogFileSubscriber", new LogToFileService("./tweetsLogger.txt"));
MySubscriber subscriber2 = new MySubscriber("TotalTweetSubscriber",new CalculateTweetStatsService());
streamer.subscribe(subscriber1);
streamer.subscribe(subscriber2);
}
You need the subscriber to explicitly request data e.g. upon subscription (see https://docs.oracle.com/javase/9/docs/api/java/util/concurrent/Flow.Subscription.html#request-long-):
#Override
public void onSubscribe(Flow.Subscription subscription) {
this.subscription = subscription;
this.subscription.request(1);
}
Same upon processing in onNext() to request the next item.

How to implement retry policies while sending data to another application?

I am working on my application which sends data to zeromq. Below is what my application does:
I have a class SendToZeroMQ that send data to zeromq.
Add same data to retryQueue in the same class so that it can be retried later on if acknowledgment is not received. It uses guava cache with maximumSize limit.
Have a separate thread which receives acknowledgement from the zeromq for the data that was sent earlier and if acknowledgement is not received, then SendToZeroMQ will retry sending that same piece of data. And if acknowledgement is received, then we will remove it from retryQueue so that it cannot be retried again.
Idea is very simple and I have to make sure my retry policy works fine so that I don't loose my data. This is very rare but in case if we don't receive acknolwedgements.
I am thinking of building two types of RetryPolicies but I am not able to understand how to build that here corresponding to my program:
RetryNTimes: In this it will retry N times with a particular sleep between each retry and after that, it will drop the record.
ExponentialBackoffRetry: In this it will exponentially keep retrying. We can set some max retry limit and after that it won't retry and will drop the record.
Below is my SendToZeroMQ class which sends data to zeromq, also retry every 30 seconds from a background thread and start ResponsePoller runnable which keeps running forever:
public class SendToZeroMQ {
private final ScheduledExecutorService executorService = Executors.newScheduledThreadPool(5);
private final Cache<Long, byte[]> retryQueue =
CacheBuilder
.newBuilder()
.maximumSize(10000000)
.concurrencyLevel(200)
.removalListener(
RemovalListeners.asynchronous(new CustomListener(), executorService)).build();
private static class Holder {
private static final SendToZeroMQ INSTANCE = new SendToZeroMQ();
}
public static SendToZeroMQ getInstance() {
return Holder.INSTANCE;
}
private SendToZeroMQ() {
executorService.submit(new ResponsePoller());
// retry every 30 seconds for now
executorService.scheduleAtFixedRate(new Runnable() {
#Override
public void run() {
for (Entry<Long, byte[]> entry : retryQueue.asMap().entrySet()) {
sendTo(entry.getKey(), entry.getValue());
}
}
}, 0, 30, TimeUnit.SECONDS);
}
public boolean sendTo(final long address, final byte[] encodedRecords) {
Optional<ZMQSocketInfo> liveSockets = PoolManager.getInstance().getNextSocket();
if (!liveSockets.isPresent()) {
return false;
}
return sendTo(address, encodedRecords, liveSockets.get().getSocket());
}
public boolean sendTo(final long address, final byte[] encodedByteArray, final Socket socket) {
ZMsg msg = new ZMsg();
msg.add(encodedByteArray);
boolean sent = msg.send(socket);
msg.destroy();
// adding to retry queue
retryQueue.put(address, encodedByteArray);
return sent;
}
public void removeFromRetryQueue(final long address) {
retryQueue.invalidate(address);
}
}
Below is my ResponsePoller class which polls all the acknowledgement from the zeromq. And if we get an acknowledgement back from the zeromq then we will remove that record from the retry queue so that it doesn't get retried otherwise it will get retried.
public class ResponsePoller implements Runnable {
private static final Random random = new Random();
#Override
public void run() {
ZContext ctx = new ZContext();
Socket client = ctx.createSocket(ZMQ.PULL);
String identity = String.format("%04X-%04X", random.nextInt(), random.nextInt());
client.setIdentity(identity.getBytes(ZMQ.CHARSET));
client.bind("tcp://" + TestUtils.getIpaddress() + ":8076");
PollItem[] items = new PollItem[] {new PollItem(client, Poller.POLLIN)};
while (!Thread.currentThread().isInterrupted()) {
// Tick once per second, pulling in arriving messages
for (int centitick = 0; centitick < 100; centitick++) {
ZMQ.poll(items, 10);
if (items[0].isReadable()) {
ZMsg msg = ZMsg.recvMsg(client);
Iterator<ZFrame> it = msg.iterator();
while (it.hasNext()) {
ZFrame frame = it.next();
try {
long address = TestUtils.getAddress(frame.getData());
// remove from retry queue since we got the acknowledgment for this record
SendToZeroMQ.getInstance().removeFromRetryQueue(address);
} catch (Exception ex) {
// log error
} finally {
frame.destroy();
}
}
msg.destroy();
}
}
}
ctx.destroy();
}
}
Question:
As you can see above, I am sending encodedRecords to zeromq using SendToZeroMQ class and then it gets retried every 30 seconds depending on whether we got an acknolwedgement back from ResponsePoller class or not.
For each encodedRecords there is a unique key called address and that's what we will get back from zeromq as an acknowledgement.
How can I go ahead and extend this example to build two retry policies that I mentioned above and then I can pick what retry policy I want to use while sending data. I came up with below interface but then I am not able understand how should I move forward to implement those retry policies and use it in my above code.
public interface RetryPolicy {
/**
* Called when an operation has failed for some reason. This method should return
* true to make another attempt.
*/
public boolean allowRetry(int retryCount, long elapsedTimeMs);
}
Can I use guava-retrying or failsafe here becuase these libraries already have many retry policies which I can use?
I am not able to work out all the details regarding how to use the relevant API-s, but as for algorithm, you could try:
the retry-policy needs to have some sort of state attached to each message (atleast the number of times the current message has been retried, possible what the current delay is). You need to decide whether the RetryPolicy should keep that itself or if you want to store it inside the message.
instead of allowRetry, you could have a method calculating when the next retry should occur (in absolute time or as a number of milliseconds in the future), which will be a function of the state mentioned above
the retry queue should contain information on when each message should be retried.
instead of using scheduleAtFixedRate, find the message in the retry queue which has the lowest when_is_next_retry (possibly by sorting on absolute retry-timestamp and picking the first), and let the executorService reschedule itself using schedule and the time_to_next_retry
for each retry, pull it from the retry queue, send the message, use the RetryPolicy for calculating when the next retry should be (if it is to be retried) and insert back into the retry queue with a new value for when_is_next_retry (if the RetryPolicy returns -1, it could mean that the message shall not be retried any more)
not a perfect way, but can be achieved by below way as well.
public interface RetryPolicy {
public boolean allowRetry();
public void decreaseRetryCount();
}
Create two implementation. For RetryNTimes
public class RetryNTimes implements RetryPolicy {
private int maxRetryCount;
public RetryNTimes(int maxRetryCount) {
this.maxRetryCount = maxRetryCount;
}
public boolean allowRetry() {
return maxRetryCount > 0;
}
public void decreaseRetryCount()
{
maxRetryCount = maxRetryCount-1;
}}
For ExponentialBackoffRetry
public class ExponentialBackoffRetry implements RetryPolicy {
private int maxRetryCount;
private final Date retryUpto;
public ExponentialBackoffRetry(int maxRetryCount, Date retryUpto) {
this.maxRetryCount = maxRetryCount;
this.retryUpto = retryUpto;
}
public boolean allowRetry() {
Date date = new Date();
if(maxRetryCount <= 0 || date.compareTo(retryUpto)>=0)
{
return false;
}
return true;
}
public void decreaseRetryCount() {
maxRetryCount = maxRetryCount-1;
}}
You need to make some changes in SendToZeroMQ class
public class SendToZeroMQ {
private final ScheduledExecutorService executorService = Executors.newScheduledThreadPool(5);
private final Cache<Long,RetryMessage> retryQueue =
CacheBuilder
.newBuilder()
.maximumSize(10000000)
.concurrencyLevel(200)
.removalListener(
RemovalListeners.asynchronous(new CustomListener(), executorService)).build();
private static class Holder {
private static final SendToZeroMQ INSTANCE = new SendToZeroMQ();
}
public static SendToZeroMQ getInstance() {
return Holder.INSTANCE;
}
private SendToZeroMQ() {
executorService.submit(new ResponsePoller());
// retry every 30 seconds for now
executorService.scheduleAtFixedRate(new Runnable() {
public void run() {
for (Map.Entry<Long, RetryMessage> entry : retryQueue.asMap().entrySet()) {
RetryMessage retryMessage = entry.getValue();
if(retryMessage.getRetryPolicy().allowRetry())
{
retryMessage.getRetryPolicy().decreaseRetryCount();
entry.setValue(retryMessage);
sendTo(entry.getKey(), retryMessage.getMessage(),retryMessage);
}else
{
retryQueue.asMap().remove(entry.getKey());
}
}
}
}, 0, 30, TimeUnit.SECONDS);
}
public boolean sendTo(final long address, final byte[] encodedRecords, RetryMessage retryMessage) {
Optional<ZMQSocketInfo> liveSockets = PoolManager.getInstance().getNextSocket();
if (!liveSockets.isPresent()) {
return false;
}
if(null==retryMessage)
{
RetryPolicy retryPolicy = new RetryNTimes(10);
retryMessage = new RetryMessage(retryPolicy,encodedRecords);
retryQueue.asMap().put(address,retryMessage);
}
return sendTo(address, encodedRecords, liveSockets.get().getSocket());
}
public boolean sendTo(final long address, final byte[] encodedByteArray, final ZMQ.Socket socket) {
ZMsg msg = new ZMsg();
msg.add(encodedByteArray);
boolean sent = msg.send(socket);
msg.destroy();
return sent;
}
public void removeFromRetryQueue(final long address) {
retryQueue.invalidate(address);
}}
Here is a working little simulation of your environment that shows how this can be done. Note the Guava cache is the wrong data structure here, since you aren't interested in eviction (I think). So I'm using a concurrent hashmap:
package experimental;
import static java.util.concurrent.TimeUnit.MILLISECONDS;
import java.util.Arrays;
import java.util.Iterator;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ConcurrentMap;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;
import java.util.concurrent.ScheduledExecutorService;
class Experimental {
/** Return the desired backoff delay in millis for the given retry number, which is 1-based. */
interface RetryStrategy {
long getDelayMs(int retry);
}
enum ConstantBackoff implements RetryStrategy {
INSTANCE;
#Override
public long getDelayMs(int retry) {
return 1000L;
}
}
enum ExponentialBackoff implements RetryStrategy {
INSTANCE;
#Override
public long getDelayMs(int retry) {
return 100 + (1L << retry);
}
}
static class Sender {
private final ScheduledExecutorService executorService = Executors.newScheduledThreadPool(4);
private final ConcurrentMap<Long, Retrier> pending = new ConcurrentHashMap<>();
/** Send the given data with given address on the given socket. */
void sendTo(long addr, byte[] data, int socket) {
System.err.println("Sending " + Arrays.toString(data) + "#" + addr + " on " + socket);
}
private class Retrier implements Runnable {
private final RetryStrategy retryStrategy;
private final long addr;
private final byte[] data;
private final int socket;
private int retry;
private Future<?> future;
Retrier(RetryStrategy retryStrategy, long addr, byte[] data, int socket) {
this.retryStrategy = retryStrategy;
this.addr = addr;
this.data = data;
this.socket = socket;
this.retry = 0;
}
synchronized void start() {
if (future == null) {
future = executorService.submit(this);
pending.put(addr, this);
}
}
synchronized void cancel() {
if (future != null) {
future.cancel(true);
future = null;
}
}
private synchronized void reschedule() {
if (future != null) {
future = executorService.schedule(this, retryStrategy.getDelayMs(++retry), MILLISECONDS);
}
}
#Override
synchronized public void run() {
sendTo(addr, data, socket);
reschedule();
}
}
long getVerifiedAddr() {
System.err.println("Pending messages: " + pending.size());
Iterator<Long> i = pending.keySet().iterator();
long addr = i.hasNext() ? i.next() : 0;
return addr;
}
class CancellationPoller implements Runnable {
#Override
public void run() {
while (!Thread.currentThread().isInterrupted()) {
try {
Thread.sleep(1000);
} catch (InterruptedException ex) {
Thread.currentThread().interrupt();
}
long addr = getVerifiedAddr();
if (addr == 0) {
continue;
}
System.err.println("Verified message (to be cancelled) " + addr);
Retrier retrier = pending.remove(addr);
if (retrier != null) {
retrier.cancel();
}
}
}
}
Sender initialize() {
executorService.submit(new CancellationPoller());
return this;
}
void sendWithRetriesTo(RetryStrategy retryStrategy, long addr, byte[] data, int socket) {
new Retrier(retryStrategy, addr, data, socket).start();
}
}
public static void main(String[] args) {
Sender sender = new Sender().initialize();
for (long i = 1; i <= 10; i++) {
sender.sendWithRetriesTo(ConstantBackoff.INSTANCE, i, null, 42);
}
for (long i = -1; i >= -10; i--) {
sender.sendWithRetriesTo(ExponentialBackoff.INSTANCE, i, null, 37);
}
}
}
You can use apache camel. It provides a component for zeromq, and tools like errohandler, redeliverypolicy, deadletter channel and such things are natively provided.

Dispose Child Thread As per the Id

I am generating a child thread when I receive data from user.
What are the steps if I want to dispose the previous user child thread if the same user sends data again and wants to generate a new user child thread again?
Right, so java can't dispose of the thread, a thread simply runs until it terminates.
So:
To get rid of the thread you need to allow the threads run method to end and then get rid of all references to the Thread and any Runnable it's constructed with.
You want to toggle the thread finishing so, a simple example:
class SimpleRunnable implements Runnable {
public volatile boolean run = true; //Volatile for thread safety.
public void run() {
while(run) {
System.out.println("WHOOOO!"); //Boy, will this be annoying
}
}
}
Creating a thread from this runnable:
SimpleRunnable run = new SimpleRunnable();
Thread thread = new Thread(run);
Thread.start(); //run thread
//Stop thread
run.run=false;
//Thread will be removed when out of scope
Youu need to create a Runnable per user in your case, and then call set the stop variable when a new thread is created.
For example, you could store each runnable in a ConcurrentHashMap by userId.
ConcurrentHashMap<String,SimpleRunnable> runnablesByUser = new ConcurrentHashMap<>();
public void startNewThreadForUser(String userId){
//Time passes, retrieve and kill old thread:
SimpleRunnable oldRunnable = runnableByUser.get(userId);
if(oldRunnable!=null){
oldRunnable.run=false;
}
SimpleRunnable newRunnableUserOne = new SimpleRunnable();
runnablesByUser.put(userId,newRunnableUserOne);
Thread thread = new Thread(newRunnableUserOne);
thread.start();
}
Calls to the method would then kill an old thread if found, release the old one from scope by replacing it with a new one in the ConcurrentHashMap and finally start the new thread.
Like so:
public void startThreeThreads(){
startNewThreadForUser("User1");//starts Thread for User1
startNewThreadForUser("User2");//starts Thread for User2
startNewThreadForUser("User1");//Replaces Thread for User1
}
Managing running threads is typically done in a thread pool and this is rough in all sorts of ways, but hopefully it's useful.
I can elaborate that mechanism if you want.
Starting a new thread every time that you receive data from a user will lead to running out of resources, besides causing an unnecessary overhead of managing too many threads. Your computer has a limited number of threads that can run at any single time and is limited by your CPU. to find out that number you can use command
Runtime.getRuntime().availableProcessors()
on the other hand, if the jobs that you want to process require a lot of I/O processing, you should launch a few more threads than "Runtime.getRuntime().availableProcessors()", or you will be under-using your CPU.
what I would do is to use a "ExecutorService" which will handle the threads for you (no need to manually start, stop threads). Just start an "ExecutorService" with the total number of threads that you want to execute simultaneously, and then every time that you get more work from a User, submit the new task (as a Callable) to the ExecutorService. The executorService will handle the execution of that task for you, and once it is done it will become available for garbage collection.
for example, see code below:
import java.util.Random;
import java.util.concurrent.Callable;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.TimeUnit;
public class MultipleClientsExample {
public static final int TOTAL_THREADS_TO_PROCESS_CUSTOMER_WORK = 4;
public static final Random random = new Random();
public static int customerCounter = 0;
public static void main(String[] args) throws InterruptedException {
MultipleClientsExample multipleClientsExample = new MultipleClientsExample();
multipleClientsExample.doTheWork();
}
private void doTheWork() throws InterruptedException {
ExecutorService executorService = Executors.newFixedThreadPool(TOTAL_THREADS_TO_PROCESS_CUSTOMER_WORK);
while (customerCounter < 10) {
try {
CustomerInput customerInput = getWorkFromCustomer();
System.out.println("main program. received work from customer: " + customerInput.getCustomerId());
executorService.submit(new WorkToBeDone(customerInput.getCustomerId(), customerInput.getWorkInfo()));
} catch (InterruptedException e) {
break;
}
customerCounter++;
}
executorService.shutdown();
executorService.awaitTermination(5, TimeUnit.SECONDS);
}
private CustomerInput getWorkFromCustomer() throws InterruptedException {
while (true) {
String customerId = String.valueOf(random.nextInt(10));
CustomerInput customerInput = new CustomerInput(customerId, "work from customer: " + customerId);
return customerInput;
}
}
}
class WorkToBeDone implements Callable<Void> {
private String clientId;
private String workInfo;
public WorkToBeDone(String clientId, String workInfo) {
this.clientId = clientId;
this.workInfo = workInfo;
}
#Override
public Void call() throws Exception {
System.out.println("inside a working thread: it is going to do the work of customer: " + clientId);
try {
Thread.sleep(5000);
} catch (InterruptedException e) {
System.out.println("worker processing job from customer: " + clientId + " was interrupted. ending now");
return null;
}
System.out.println("work completed for customer: " + clientId);
return null;
}
}
class CustomerInput {
private String customerId;
private String workInfo;
public CustomerInput(String customerId, String workInfo) {
this.customerId = customerId;
this.workInfo = workInfo;
}
public String getCustomerId() {
return customerId;
}
public String getWorkInfo() {
return workInfo;
}
}
In case you want the ability to cancel a task that has already been submitted to the thread pool, you will have to keep reference of the Future values of each task, and make sure to remove the reference of the tasks that completed and that you cancelled, so they are ready to be garbage collected (otherwise you will have a memory leak).
for example
import java.util.Iterator;
import java.util.Map;
import java.util.concurrent.Callable;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;
import java.util.concurrent.TimeUnit;
public class MultipleClientsExample {
public static final int TOTAL_THREADS_TO_PROCESS_CUSTOMER_WORK = 4;
public static int customerCounter = 0;
public static void main(String[] args) throws InterruptedException {
MultipleClientsExample multipleClientsExample = new MultipleClientsExample();
multipleClientsExample.doTheWork();
}
private void doTheWork() throws InterruptedException {
final ExecutorService executorService = Executors.newFixedThreadPool(TOTAL_THREADS_TO_PROCESS_CUSTOMER_WORK);
Map<String, Future<String>> map = new ConcurrentHashMap<>();
while (customerCounter < 11) {
try {
WorkToBeDone workToBeDone = getWorkFromCustomer();
System.out.println("main program. received work from customer: " + workToBeDone.getClientId());
Future<String> resultFuture = executorService.submit(workToBeDone);
map.put(workToBeDone.getClientId(), resultFuture);
} catch (InterruptedException e) {
break;
}
customerCounter++;
}
// cancel job of customer with id: 10
Future<String> resultFuture = map.get("10");
System.out.println("cancelling job of customerId: 10");
resultFuture.cancel(true);
// remove references of all completed jobs
Thread.sleep(2000);
System.out.println("looking for jobs that completed or were cancelled.");
Iterator<Map.Entry<String, Future<String>>> iterator = map.entrySet().iterator();
while (iterator.hasNext()) {
Map.Entry<String, Future<String>> entry = iterator.next();
if (entry.getValue().isCancelled() || entry.getValue().isDone()) {
System.out.println("removing reference of job for customer: " + entry.getKey());
iterator.remove();
}
}
// simpler way to remove entries from map (but doesn't print output of jobs removed from map)
// map.entrySet().removeIf(entry -> entry.getValue().isCancelled() || entry.getValue().isDone());
executorService.shutdown();
executorService.awaitTermination(5, TimeUnit.SECONDS);
}
private WorkToBeDone getWorkFromCustomer() throws InterruptedException {
String customerId = String.valueOf(customerCounter);
WorkToBeDone workToBeDone = new WorkToBeDone(customerId, "work from customer: " + customerId);
return workToBeDone;
}
}
class WorkToBeDone implements Callable<String> {
private String clientId;
private String workInfo;
public String getClientId() {
return clientId;
}
public WorkToBeDone(String clientId, String workInfo) {
this.clientId = clientId;
this.workInfo = workInfo;
}
#Override
public String call() throws Exception {
System.out.println("inside a working thread: it is going to do the work of customer: " + clientId);
try {
Thread.sleep(2000);
} catch (InterruptedException e) {
System.out.println("worker processing job from customer: " + clientId + " was interrupted. ending now");
return clientId;
}
System.out.println("work completed for customer: " + clientId);
return clientId;
}
}

Detecting user inactivity in a desktop application

I have an application in Eclipse RCP where I want to fire a function called 'LogOutUser()' if the user leaves his/ her application idle for, say, five minutes.
How do I go about doing this?
I don't know if the RCP framework supports this internally. However, I wrote my own "helper" class, which is a singleton client session manager. Eclipse won't know natively how you connect to your datasource. In my case I am connecting using EJB3 calls and listening to JMS queues and topics.
My class was written to detect when the datasource or "server" went down. It would also reconnect when the server came up. The server inactivity is detected by listening to heartbeat DTO's sent by the server. This feedback is useful to present to the user. I have adapted this class to cater for user interface inactivity.
The class is quite simple. It is a singleton, so it can be called simply at any point in your client-side RCP application. The heartbeat uses an observer and so you will have to add a HeartBeatEventListener to hook into this functionality. You can adapt the class to do the same for the user interface inactivity. However, I have just provided an updateUserInterfaceActivity() method which you must call when there is user activity. Perhaps this can be hooked into a global mouse and a global keyboard event handler.
I have also added a TrayItem to update the user...
Here is the class:
package com.kingsleywebb.clientsessionmanagement;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.eclipse.swt.SWT;
import org.eclipse.swt.graphics.Image;
import org.eclipse.swt.widgets.Display;
import org.eclipse.swt.widgets.Event;
import org.eclipse.swt.widgets.Listener;
import org.eclipse.swt.widgets.Menu;
import org.eclipse.swt.widgets.MenuItem;
import org.eclipse.swt.widgets.Shell;
import org.eclipse.swt.widgets.ToolTip;
import org.eclipse.swt.widgets.TrayItem;
import com.kingsleywebb.clientsessionmanagement.entity.HeartbeatDTO;
public class ClientSessionManagement implements HeartbeatEventListener {
private static final Image IMG_CONNECTED = null; // Set this to a "connected image"
private static final Image IMG_DISCONNECTED = null; // Set this to a "disconnected image"
private static final long CONNECTION_INACTIVITY_TIME_MS = 30000L; // 30 seconds
private static final long USER_INTERFACE_INACTIVITY_TIME_MS = 300000L; // 5 minutes
private static final Log LOG = LogFactory.getLog(ClientSessionManagement.class);
private static ClientSessionManagement theInstance = null;
private static long connectionTimestamp = 0;
private static long userInterfaceActivityTimestamp = 0;
private synchronized static void createInstance() {
if (theInstance == null) {
theInstance = new ClientSessionManagement();
}
}
public static ClientSessionManagement getInstance() {
if (theInstance == null) {
createInstance();
}
return theInstance;
}
private ClientSessionManagement() {
this.connectionListenerList = new ArrayList<ConnectionListener>();
updateConnectionTimestamp();
Cron cron = new Cron();
Thread cronThread = new Thread(cron);
cronThread.start();
}
private boolean connected = true;
private ToolTip toolTipConnected;
private ToolTip toolTipDisconnected;
private TrayItem trayItem = null;
private String appName = null;
private String version = null;
private String whiteLabel = null;
private String userName = null;
private String deskName = null;
private String serverName = null;
private String userMnemonic = null;
private MenuItem miShowPopups;
private MenuItem miSoundBeep;
private List<ConnectionListener> connectionListenerList;
private void updateConnectionTimestamp() {
ClientSessionManagement.connectionTimestamp = System.currentTimeMillis();
}
private synchronized long getLastHeartbeatInMsAgo() {
return System.currentTimeMillis() - ClientSessionManagement.connectionTimestamp;
}
public synchronized void updateHeartbeat() {
updateConnectionTimestamp();
}
public synchronized void checkHeartbeatInterval() {
if (getLastHeartbeatInMsAgo() < CONNECTION_INACTIVITY_TIME_MS) {
showConnected();
}
else {
showDisconnected();
}
}
private void updateUserInterfaceActivityTimestamp() {
ClientSessionManagement.userInterfaceActivityTimestamp = System.currentTimeMillis();
}
private synchronized long getLastUserInterfaceActivityInMsAgo() {
return System.currentTimeMillis() - ClientSessionManagement.userInterfaceActivityTimestamp;
}
public synchronized void updateUserInterfaceActivity() {
updateUserInterfaceActivityTimestamp();
}
public synchronized void checkUserInterfaceActivityInterval() {
if (getLastUserInterfaceActivityInMsAgo() > USER_INTERFACE_INACTIVITY_TIME_MS) {
logoutUser();
}
}
private void logoutUser() {
// Implement logout functionality here
}
private void showConnected() {
if (!connected) {
connected = true;
Display.getDefault().asyncExec(new Runnable() {
public void run() {
// Update icon
if (trayItem != null) {
trayItem.setImage(ClientSessionManagement.IMG_CONNECTED);
trayItem.getToolTip().setVisible(false);
trayItem.setToolTip(toolTipConnected);
trayItem.getToolTip().setVisible(true);
}
// Update hover tooltip
updateHoverTooltip();
}
});
notifyConnectionListeners();
}
}
private void showDisconnected() {
if (connected) {
connected = false;
Display.getDefault().asyncExec(new Runnable() {
public void run() {
// Update icon
if (trayItem != null) {
trayItem.setImage(ClientSessionManagement.IMG_DISCONNECTED);
trayItem.getToolTip().setVisible(false);
trayItem.setToolTip(toolTipDisconnected);
trayItem.getToolTip().setVisible(true);
}
// Update hover tooltip
updateHoverTooltip();
}
});
notifyConnectionListeners();
}
}
private void updateHoverTooltip() {
if (trayItem != null) {
// Application info
String applicationInfo = null;
if (appName != null && version != null && whiteLabel != null) {
// appName* | version | whitelabel
applicationInfo = " Application: " + " " + appName + " " + version + " [" + whiteLabel + "]\r\n";
}
// User info
String userInfo = null;
if (userName != null && deskName != null && serverName != null) {
userInfo = " User: " + " " + userName + " (" + deskName + ") on " + serverName + "\r\n";
}
// Connection info
String connectionInfo = connected ? " Server Connected" : " SERVER DISCONNECTED!!!";
String status = connectionInfo + "\r\n\r\n" + (applicationInfo != null ? applicationInfo : "") +
(userInfo != null ? userInfo : "");
trayItem.setToolTipText(status);
LOG.info(status);
}
}
public void setTrayItem(Shell shell, TrayItem trayItem) {
this.trayItem = trayItem;
/*
* Property files to persist these settings - removed for simplicity
*
* final WorkstationProperties p = WorkstationProperties.getInstance();
* boolean showNotificationPopups = !"No".equalsIgnoreCase(p.getProperty("notifications.showNotificationPopups"));
* boolean soundNotificationBeep = !"No".equalsIgnoreCase(p.getProperty("notifications.soundNotificationBeep"));
*/
boolean showNotificationPopups = true;
boolean soundNotificationBeep = true;
final Menu menu = new Menu (shell, SWT.POP_UP);
miShowPopups = new MenuItem (menu, SWT.CHECK);
miShowPopups.setSelection(showNotificationPopups);
miShowPopups.setText("Show Notification Popups");
miShowPopups.addListener (SWT.Selection, new Listener () {
public void handleEvent (Event event) {
LOG.info("notifications.showNotificationPopups = " + miShowPopups.getSelection());
// Property files to persist these settings - removed for simplicity
//p.setProperty("notifications.showNotificationPopups", miShowPopups.getSelection() ? "Yes" : "No");
}
});
miSoundBeep = new MenuItem (menu, SWT.CHECK);
miSoundBeep.setSelection(soundNotificationBeep);
miSoundBeep.setText("Play Notification Beep");
miSoundBeep.addListener (SWT.Selection, new Listener () {
public void handleEvent (Event event) {
LOG.info("notifications.soundNotificationBeep = " + miSoundBeep.getSelection());
// Property files to persist these settings - removed for simplicity
//p.setProperty("notifications.soundNotificationBeep", miSoundBeep.getSelection() ? "Yes" : "No");
}
});
this.trayItem.addListener (SWT.MenuDetect, new Listener () {
public void handleEvent (Event event) {
menu.setVisible (true);
}
});
toolTipConnected = new ToolTip(shell, SWT.BALLOON);
toolTipConnected.setText((appName != null ? appName : "<Application Name>") + " Status");
toolTipConnected.setMessage("Connected to server.");
toolTipConnected.setLocation(600, 600);
toolTipConnected.setVisible(false);
toolTipDisconnected = new ToolTip(shell, SWT.ICON_WARNING);
toolTipDisconnected.setText((appName != null ? appName : "<Application Name>") + " Status");
toolTipDisconnected.setMessage("DISCONNECTED from server.");
toolTipDisconnected.setLocation(500, 500);
toolTipDisconnected.setVisible(false);
this.trayItem.setToolTip(toolTipConnected);
}
public boolean isShowPopups() {
return miShowPopups.getSelection();
}
public boolean isSoundBeep() {
return miSoundBeep.getSelection();
}
public void setAppName(String appName) {
this.appName = appName;
}
public void setVersion(String version) {
this.version = version;
}
public void setWhiteLabel(String whiteLabel) {
this.whiteLabel = whiteLabel;
}
public void setUserName(String userName) {
this.userName = userName;
}
public void setDeskName(String deskName) {
this.deskName = deskName;
}
public void setServerName(String serverName) {
this.serverName = serverName;
updateHoverTooltip();
}
public String getUserMnemonic() {
return userMnemonic;
}
public void setUserMnemonic(String userMnemonic) {
this.userMnemonic = userMnemonic;
}
public void heartbeatArrived(HeartbeatDTO heartbeatDTO) {
updateHeartbeat();
}
public boolean isConnected() {
return connected;
}
public boolean addConnectionListener(ConnectionListener connectionListener) {
return connectionListenerList.add(connectionListener);
}
public boolean removeConnectionListener(ConnectionListener connectionListener) {
return connectionListenerList.remove(connectionListener);
}
public void notifyConnectionListeners() {
for (Iterator<ConnectionListener> i = connectionListenerList.iterator(); i.hasNext();) {
ConnectionListener connectionListener = i.next();
if (connected) {
connectionListener.connected();
}
else {
connectionListener.disconnected();
}
}
}
/**
*
* #author Kingsley Webb
*
* Check heartbeat interval periodically display warning to user accordingly.
*/
class Cron implements Runnable {
public void run() {
// Wait 15s extra before 1st check
try {
Thread.sleep(15000);
} catch (InterruptedException e) {
LOG.error(e);
}
while (true) {
// Check every 5s - increase for better performance, but you get the idea...
try {
Thread.sleep(5000);
} catch (InterruptedException e) {
LOG.error(e);
}
checkHeartbeatInterval();
checkUserInterfaceActivityInterval();
}
}
}
}
Some other supporting classes:
package com.kingsleywebb.clientsessionmanagement;
public interface ConnectionListener {
public void connected();
public void disconnected();
}
package com.kingsleywebb.clientsessionmanagement;
import com.kingsleywebb.clientsessionmanagement.entity.HeartbeatDTO;
public interface HeartbeatEventListener {
public void heartbeatArrived(HeartbeatDTO heartbeatDTO);
}
If you take a look in the bundle org.eclipse.ui.ide.application there is a class org.eclipse.ui.internal.ide.application.IDEIdleHelper which tries to perform gc after a interval of user inactivity. Probably you can reuse the logic that detects the inactivity
I usually use a Display.addFilter(eventType, listener) for the event types that should keep the session alive combined with a Display.timerExec(milliseconds, runnable) that is run periodically and tests for the last interesting event.
I use milliseconds = 5000 so the period is 5 min up to 5 min 5 sec before the user is logged out (or whatever...). Also I listener for the SWT event types (in 3.7) KeyDown, KeyUp, MouseDown, MouseUp, MouseVerticalWheel, MouseHorizontalWheel MouseDoubleClick, Touch, Gesture, Activate, Iconify, Deiconify, Move, and Resize.

Make an existing code in Java parallel/multithread

I have a very simple crawler. I want to make my current code run in a few threads. Could you provide me a little tutorial or article to help me achive this test?
I'm originally a .Net developer and in .Net I have no problem whatsoever running codes in multithread but unfortunately I don't know anything about threads in Java.
My crawler is a command-line software so don't worry about GUI.
Thank you in advance.
Java does multithreading through the Thread class. One of the most common ways to make existing code multithreaded is to use the Runnable interface to define what you want to call at thread start, and then start it off.
public class SomeFunctions
{
public static void FunctionA() {}
public static void FunctionB() {}
public static void FunctionC() {}
}
// ...
Thread t1 = new Thread(new Runnable() {
public void run() {
SomeFunctions.FunctionA();
}
});
t1.start();
// (rinse and repeat for the other functions)
Dry coded, but it should at least get the general concept across. Of course, as soon as you go into multithreading land, you have concurrency issues and need to make sure everything is appropriately syhchronized, etc., but any language will have those issues.
If you're worried about synchronization, you have a few tools at your disposal. The easiest is the recursive mutex functionality built into Java, the "synchronized" keyword. More classical means are also available through various classes in the java.util.concurrent and java.util.concurrent.locks packages such as Semaphore and ReadWriteLock
http://download.oracle.com/javase/6/docs/api/java/util/concurrent/package-summary.html
http://download.oracle.com/javase/6/docs/api/java/util/concurrent/locks/package-summary.html
You can take a look at my webcrawler example. Sry for the lengthiness.
import java.net.MalformedURLException;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
/**
* A web crawler with a Worker pool
*
* #author Adriaan
*/
public class WebCrawler implements Manager {
private Set<Worker> workers = new HashSet<Worker>();
private List<String> toCrawl = new ArrayList<String>();
private Set<String> crawled = new HashSet<String>();
private Set<String> hosts = new HashSet<String>();
private Set<String> results = new HashSet<String>();
private int maxResults;
public WebCrawler(String url, int numberOfWorkers, int maxResults) {
this.maxResults = maxResults;
toCrawl.add(url);
createWorkers(numberOfWorkers);
}
public void createWorkers(int numberOfWorkers) {
for (int i = 0; i < numberOfWorkers; i++) {
workers.add(new Worker(this));
}
}
private void stopWorkers() {
for (Worker worker : workers) {
worker.terminate();
}
}
public synchronized Job getNewJob() {
while (toCrawl.size() == 0) {
try {
wait();
} catch (InterruptedException e) {
// ignore
}
}
return new EmailAddressCrawlJob().setDescription(toCrawl.remove(0));
}
public synchronized void jobCompleted(Job job) {
// System.out.println("crawled: " + job.getDescription());
crawled.add(job.getDescription());
String host = getHost(job.getDescription());
boolean knownHost = hosts.contains(host);
if (!knownHost) {
System.out.println("host: " + host);
hosts.add(host);
}
for (String url : job.getNewDescriptions()) {
if (!crawled.contains(url)) {
if (knownHost) {
toCrawl.add(toCrawl.size() - 1, url);
} else {
toCrawl.add(url);
}
}
}
for (String result : job.getResults()) {
if (results.add(result)) {
System.out.println("result: " + result);
}
}
notifyAll();
if (results.size() >= maxResults) {
stopWorkers();
System.out.println("Crawled hosts:");
for (String crawledHost : hosts) {
System.out.println(crawledHost);
}
Set<String> uncrawledHosts = new HashSet<String>();
for (String toCrawlUrl : toCrawl) {
uncrawledHosts.add(getHost(toCrawlUrl));
}
System.out.println("Uncrawled hosts:");
for (String unCrawledHost : uncrawledHosts) {
System.out.println(unCrawledHost);
}
}
if (crawled.size() % 10 == 0) {
System.out.println("crawled=" + crawled.size() + " toCrawl="
+ toCrawl.size() + " results=" + results.size() + " hosts="
+ hosts.size() + " lastHost=" + host);
}
}
public String getHost(String host) {
int hostStart = host.indexOf("://") + 3;
if (hostStart > 0) {
int hostEnd = host.indexOf("/", hostStart);
if (hostEnd < 0) {
hostEnd = host.length();
}
host = host.substring(hostStart, hostEnd);
}
return host;
}
public static void main(String[] args) throws MalformedURLException {
new WebCrawler("http://www.nu.nl/", 5, 20);
}
}
Worker
**
* A Worker proactively gets a Job, executes it and notifies its manager that
* the Job is completed.
*
* #author Adriaan
*/
public class Worker extends Thread {
private final Manager manager;
private Job job = null;
private boolean isWorking;
public Worker(Manager manager) {
this.manager = manager;
isWorking = true;
start();
}
#Override
public void run() {
System.out.println("Worker " + Thread.currentThread().getId()
+ " starting ");
while (isWorking) {
job = manager.getNewJob();
job.execute();
manager.jobCompleted(job);
}
}
public void terminate() {
isWorking = false;
}
}
Manager interface
/**
* Manager interface for Workers
*
* #author Adriaan
*/
public interface Manager {
/**
* Gets a new job
*
* #return
*/
public Job getNewJob();
/**
* Indicates the job is completed
*
* #param job
*/
public void jobCompleted(Job job);
}
Job
import java.util.HashSet;
import java.util.Set;
/**
* A Job is a unit of work defined by a String (the description). During execution the
* job can obtain results and new job descriptions.
*
* #author Adriaan
*/
public abstract class Job {
private String description;
private Set<String> results = new HashSet<String>();
private Set<String> newDescriptions = new HashSet<String>();
/**
* Sets the job description
*
* #param description
* #return this for chaining
*/
public Job setDescription(String description) {
this.description = description;
return this;
}
/**
* Executes the job
*/
public abstract void execute();
/**
* Gets the results obtained
*
* #return
*/
public Set<String> getResults() {
return results;
}
/**
* Gets the now job descriptions obtained
*
* #return
*/
public Set<String> getNewDescriptions() {
return newDescriptions;
}
/**
* Gets the job description
*
* #return
*/
public String getDescription() {
return description;
}
/**
* Allows the implementation to add an obtained result
*
* #param result
*/
void addResult(String result) {
results.add(result);
}
/**
* Allows the implementation to add an obtained description
*
* #param result
*/
void addNewDescription(String newDescription) {
newDescriptions.add(newDescription);
}
}
A Job which crawls a page for email addresses:
import java.io.IOException;
import java.io.InputStream;
import java.net.MalformedURLException;
import java.net.URL;
import java.net.URLConnection;
import java.util.StringTokenizer;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* A Job which crawls HTTP or HTTPS URL's for email adresses, collecting new
* URL's to crawl along the way.
*
* #author Adriaan
*/
public class EmailAddressCrawlJob extends Job {
#Override
public void execute() {
try {
URL url = new URL(getDescription());
if (url != null) {
String text = readText(url);
extractNewDescriptions(text, url);
extractResults(text);
}
} catch (MalformedURLException e) {
System.err.println("Bad url " + getDescription());
}
}
private String readText(URL url) {
URLConnection connection;
try {
connection = url.openConnection();
InputStream input = connection.getInputStream();
byte[] buffer = new byte[1000];
int num = input.read(buffer);
if (num > 0) {
StringBuilder builder = new StringBuilder();
builder.append(new String(buffer, 0, num));
while (num != -1) {
num = input.read(buffer);
if (num != -1) {
builder.append(new String(buffer, 0, num));
}
}
return builder.toString();
}
} catch (IOException e) {
//System.err.println("Could not read from " + url);
}
return "";
}
private void extractNewDescriptions(String text, URL url) {
// URL extracting code from Sun example
String lowerCaseContent = text.toLowerCase();
int index = 0;
while ((index = lowerCaseContent.indexOf("<a", index)) != -1) {
if ((index = lowerCaseContent.indexOf("href", index)) == -1) {
break;
}
if ((index = lowerCaseContent.indexOf("=", index)) == -1) {
break;
}
index++;
String remaining = text.substring(index);
StringTokenizer st = new StringTokenizer(remaining, "\t\n\r\">#");
String strLink = st.nextToken();
if (strLink.startsWith("javascript:")) {
continue;
}
URL urlLink;
try {
urlLink = new URL(url, strLink);
strLink = urlLink.toString();
} catch (MalformedURLException e) {
// System.err.println("Could not create url: " + target
// + " + " + strLink);
continue;
}
// only look at http links
String protocol = urlLink.getProtocol();
if (protocol.compareTo("http") != 0
&& protocol.compareTo("https") != 0) {
// System.err.println("Ignoring: " + protocol
// + " protocol in " + urlLink);
continue;
}
addNewDescription(urlLink.toString());
}
}
private void extractResults(String text) {
Pattern p = Pattern
.compile("([\\w\\-]([\\.\\w])+[\\w]+#([\\w\\-]+\\.)+[A-Za-z]{2,4})");
Matcher m = p.matcher(text);
while (m.find()) {
addResult(m.group(1));
}
}
}
I know this answer is a bit verbose, but I thought OP might be best helped with a working example and I happened to have made one not so long ago.
A very basic java program that will give the abstract idea of the Multi Threading..
public class MyThread extends Thread {
String word;
public MyThread(String rm){
word = rm;
}
public void run(){
try {
for(;;){
System.out.println(word);
Thread.sleep(1000);
}
} catch(InterruptedException e) {
System.out.println("sleep interrupted");
}
}
public static void main(String[] args) {
Thread t1=new MyThread("First Thread");
Thread t2=new MyThread("Second Thread");
t1.start();
t2.start();
}
}
And the Output will be..
First Thread
Second Thread
First Thread
Second Thread
First Thread
Go with this PPT it will help you with the basics..
Here

Categories