High concurrent client with NIO - java

I'm writing a tool that will generate a high amount of HTTP calls against a webserver. At this moment I'm interested on how many requests can I make per second. I'm not interested now of the result of those requests.
I'm measuring the time spent to send 1k requests against google.com and I get 69 milliseconds :
but when I'm sniffing the traffic with WireShark I see that sending all the GET requests is taking almost 4 seconds:
start of the calls
end of the calls
Tool has been run from IntelliJ on Windows 10, I7 1.8 Ghz, 32 GB of RAM.
My question is: why I have this difference? Sending 1k HTTP GET requests should be quick, but it takes almost 4 seconds. What I'm doing wrong here?
The code above is only for testing purposes and it's quite ugly, so bear with me. Also I'm not quite familiar with NIO.
import org.apache.commons.lang3.time.StopWatch;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.concurrent.atomic.AtomicInteger;
public class UJPPHighTrafficClient {
public static final Logger logger = LoggerFactory.getLogger(UJPPHighTrafficClient.class);
public static final int iterations = 1000;
public static void main(String[] args) {
private static void doStartClient() {
logger.info("starting the client");
UJPPHighTrafficExecutor executor = new UJPPHighTrafficExecutor();
StopWatch watch = new StopWatch();
for (int i = 0; i < iterations; i++) {
logger.info("Run " + iterations + " executions in " + watch.getTime() + " milliseconds");
import org.apache.http.HttpHost;
import org.apache.http.HttpResponse;
import org.apache.http.ProtocolVersion;
import org.apache.http.concurrent.FutureCallback;
import org.apache.http.config.ConnectionConfig;
import org.apache.http.impl.nio.DefaultHttpClientIODispatch;
import org.apache.http.impl.nio.pool.BasicNIOConnPool;
import org.apache.http.impl.nio.reactor.DefaultConnectingIOReactor;
import org.apache.http.impl.nio.reactor.IOReactorConfig;
import org.apache.http.message.BasicHttpEntityEnclosingRequest;
import org.apache.http.nio.protocol.*;
import org.apache.http.nio.reactor.ConnectingIOReactor;
import org.apache.http.nio.reactor.IOEventDispatch;
import org.apache.http.nio.reactor.IOReactorException;
import org.apache.http.protocol.*;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.IOException;
import java.io.InterruptedIOException;
import java.util.concurrent.atomic.AtomicInteger;
public class UJPPHighTrafficExecutor {
private final Logger logger = LoggerFactory.getLogger("debug");
public static ConnectingIOReactor requestsReactor = null;
private static BasicNIOConnPool clientConnectionPool = null;
public static HttpAsyncRequester clientRequester = null;
public static Thread runnerThread = null;
private static AtomicInteger counter = null;
public static final int cores = Runtime.getRuntime().availableProcessors() * 2;
public UJPPHighTrafficExecutor() {
counter = new AtomicInteger();
public void initializeConnectionManager() {
try {
requestsReactor =
new DefaultConnectingIOReactor(IOReactorConfig.
clientConnectionPool = new BasicNIOConnPool(requestsReactor, ConnectionConfig.DEFAULT);
clientRequester = initializeHttpClient(requestsReactor);
} catch (IOReactorException ex) {
logger.error(" initializeConnectionManager " + ex.getMessage());
private HttpAsyncRequester initializeHttpClient(final ConnectingIOReactor ioReactor) {
// Create HTTP protocol processing chain
HttpProcessor httpproc = HttpProcessorBuilder.create()
// Use standard client-side protocol interceptors
.add(new RequestContent(true)).
add(new RequestTargetHost()).
add(new RequestConnControl())
.add(new RequestExpectContinue(true)).
// Create HTTP requester
HttpAsyncRequester requester = new HttpAsyncRequester(httpproc);
// Create client-side HTTP protocol handler
HttpAsyncRequestExecutor protocolHandler = new HttpAsyncRequestExecutor();
// Create client-side I/O event dispatch
final IOEventDispatch ioEventDispatch =
new DefaultHttpClientIODispatch(protocolHandler, ConnectionConfig.DEFAULT);
// Run the I/O reactor in a separate thread
runnerThread = new Thread("Client") {
public void run() {
try {
} catch (InterruptedIOException ex) {
logger.error("Interrupted", ex);
} catch (IOException e) {
logger.error("I/O error", e);
} catch (Exception e) {
logger.error("Exception encountered in Client ", e.getMessage(), e);
logger.info("Client shutdown");
return requester;
public void run() {
HttpHost httpHost = new HttpHost("google.com", 80, "http");
final HttpCoreContext coreContext = HttpCoreContext.create();
ProtocolVersion ver = new ProtocolVersion("HTTP", 1, 1);
BasicHttpEntityEnclosingRequest request = new BasicHttpEntityEnclosingRequest("GET", "/", ver);
clientRequester.execute(new BasicAsyncRequestProducer(httpHost, request), new BasicAsyncResponseConsumer(),
clientConnectionPool, coreContext,
// Handle HTTP response from a callback
new FutureCallback<HttpResponse>() {
public void completed(final HttpResponse response) {
logger.info("Completed " + response.toString());
public void failed(final Exception ex) {
logger.info("Failed " + ex.getMessage());
public void cancelled() {
logger.info("Cancelled ");
private void checkCounter() {
counter.set(counter.get() + 1);
if (counter.get() == UJPPHighTrafficClient.iterations) {
try {
} catch (Exception ex) {

You code is timing how long it is to set up 1000 iterations of http connection, and not the time to complete those connections many of which are still running 3-4 seconds later. To see a more accurate figure put a local field t0 into UJPPHighTrafficExecutor:
public class UJPPHighTrafficExecutor {
long t0 = System.nanoTime();
...and then checkCounter() can print a time for completing all iterations:
private void checkCounter() {
counter.set(counter.get() + 1);
if (counter.get() == UJPPHighTrafficClient.iterations) {
try {
} catch (Exception ex) {
long t1 = System.nanoTime();
System.out.println("ELAPSED MILLIS: ~"+TimeUnit.NANOSECONDS.toMillis(t1-t0));
This will print a much larger number for 1000 iterations:
Note that counter.set(counter.get() + 1) is not safe way to increment AtomicInteger , remove the line and increment inside the if statement:
if (counter.incrementAndGet() == UJPPHighTrafficClient.iterations)


How to get server status using multi-threads periodically

The below code works fine and it connects to a given server (host, port) and gets the connection status.
What it does is:
PollService implements the Callable interface and connects to a server(host, port) then it returns the status.
Since this should happen periodically, it iterates the Hashmap entries in a while(true) loop infinitely.
The problem: On the server-side, I see it takes 2 or 3 seconds to reach the thread and if I use Runnable with periodic implementation it connects within 1 sec. Looks like iterating the Hashmap infinitely is a slow approach.
However, I can not use Runnable as it doesn't return the status of the connection which I need later to use.
Below is the ServiceMonitor class (client) which connects to the server.
package org.example;
import java.time.LocalDateTime;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;
import java.util.logging.Level;
import java.util.logging.Logger;
import java.util.stream.Collectors;
public class ServicesMonitor {
private ExecutorService scheduledExecutorService = null;
private static Logger logger = Logger.getLogger(ServicesMonitor.class.getName());
private final Map<ServiceType, List<ClientMonitorService>> clientMonitorServicesMap = new HashMap<>();
public void registerInterest(ClientMonitorService clientMonitorService) {
clientMonitorServicesMap.computeIfAbsent(clientMonitorService.getServiceToMonitor().getServiceType(), v -> new ArrayList<>()).add(clientMonitorService);
public Map<ServiceType, List<ClientMonitorService>> getClineMonitorService() {
return clientMonitorServicesMap;
public void poll(){
//Observable.interval(1, TimeUnit.SECONDS).st
public void pollServices() {
scheduledExecutorService = Executors.newFixedThreadPool(clientMonitorServicesMap.size());
try {
while (true) {
clientMonitorServicesMap.forEach((k, v) -> {
Future<Boolean> val = scheduledExecutorService.submit(new PollService(k));
try {
boolean result = val.get();
System.out.println("service " + k.getHost() + ":" + k.getPort() + "status is " + result);
if (result) {
List<ClientMonitorService> list = v.stream().filter(a -> LocalDateTime.now().getSecond() % a.getServiceToMonitor().getFreqSec() == 0)
list.stream().forEach(a -> System.out.println(a.getClientId()));
} catch (InterruptedException e) {
} catch (ExecutionException e) {
} catch (Exception e) {
logger.log(Level.SEVERE, e.getMessage());
} finally {
How to improve the performance of this code by reducing the time it takes to connect to the server?
How to improve this code?
after using the get(1, TimeUnit.SECONDS); I started to see improvement on the server side as well (Reaching the threads less than 1 second) since we are not waiting more than 1 second on the client side.
while (true) {
clientMonitorServicesMap.forEach((k, v) -> {
Future<Boolean> val = scheduledExecutorService.submit(new PollService(k));
try {
boolean result = val.get(1, TimeUnit.SECONDS);
System.out.println("service " + k.getHost() + ":" + k.getPort() + "status is " + result);
if (result) {
List<ClientMonitorService> list = v.stream()
//.filter(a -> LocalDateTime.now().getSecond() % a.getServiceToMonitor().getFreqSec() == 0)
list.stream().forEach(a -> System.out.println(a.getClientId()));
} catch (InterruptedException e) {
logger.log(Level.WARNING,"Interrupted -> " + k.getHost()+":"+k.getPort());
} catch (ExecutionException e) {
logger.log(Level.INFO,"ExecutionException exception -> "+ k.getHost()+":"+k.getPort());
} catch (TimeoutException e) {
logger.log(Level.INFO,"TimeoutException exception -> "+ k.getHost()+":"+k.getPort());

spark application does not stop when multiple threads share the same spark context

I have tried to reproduce the problem i am facing. My problem statement - In a folder multiple files are present. I need to do word counts for each file and print the result. Each file should be processed parallely! of course, there is a limit to parallelism. I have written the following code to accomplish it. It is running fine.The cluster is having spark installation of mapR. The cluster has spark.scheduler.mode = FIFO.
Q1- is there a better way to accomplish the task mentioned above?
Q2- i have observed that the application does not stop even when it
has completed the word counting of avaialble files. i am unable to
figure out how to deal with it?
package groupId.artifactId;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;
import java.util.concurrent.TimeUnit;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaSparkContext;
public class Executor {
* #param args
public static void main(String[] args) {
final int threadPoolSize = 5;
SparkConf sparkConf = new SparkConf().setMaster("yarn-client").setAppName("Tracker").set("spark.ui.port","0");
JavaSparkContext jsc = new JavaSparkContext(sparkConf);
ExecutorService executor = Executors.newFixedThreadPool(threadPoolSize);
List<Future> listOfFuture = new ArrayList<Future>();
for (int i = 0; i < 20; i++) {
if (listOfFuture.size() < threadPoolSize) {
FlexiWordCount flexiWordCount = new FlexiWordCount(jsc, i);
Future future = executor.submit(flexiWordCount);
} else {
boolean allFutureDone = false;
while (!allFutureDone) {
allFutureDone = checkForAllFuture(listOfFuture);
System.out.println("Threads not completed yet!");
try {
Thread.sleep(2000);//waiting for 2 sec, before next check
} catch (InterruptedException e) {
// TODO Auto-generated catch block
System.out.println("printing of future done");
System.out.println("future list got cleared");
try {
executor.awaitTermination(5, TimeUnit.MINUTES);
} catch (InterruptedException e) {
// TODO Auto-generated catch block
private static void printFutureResult(List<Future> listOfFuture) {
Iterator<Future> iterateFuture = listOfFuture.iterator();
while (iterateFuture.hasNext()) {
Future tempFuture = iterateFuture.next();
try {
System.out.println("Future result " + tempFuture.get());
} catch (InterruptedException e) {
// TODO Auto-generated catch block
} catch (ExecutionException e) {
// TODO Auto-generated catch block
private static boolean checkForAllFuture(List<Future> listOfFuture) {
boolean status = true;
Iterator<Future> iterateFuture = listOfFuture.iterator();
while (iterateFuture.hasNext()) {
Future tempFuture = iterateFuture.next();
if (!tempFuture.isDone()) {
status = false;
return status;
package groupId.artifactId;
import java.io.Serializable;
import java.util.Arrays;
import java.util.concurrent.Callable;
import org.apache.spark.api.java.JavaPairRDD;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.function.FlatMapFunction;
import org.apache.spark.api.java.function.Function2;
import org.apache.spark.api.java.function.PairFunction;
import scala.Tuple2;
public class FlexiWordCount implements Callable<Object>,Serializable {
private static final long serialVersionUID = 1L;
private JavaSparkContext jsc;
private int fileId;
public FlexiWordCount(JavaSparkContext jsc, int fileId) {
this.jsc = jsc;
this.fileId = fileId;
private static class Reduction implements Function2<Integer, Integer, Integer>{
public Integer call(Integer i1, Integer i2) {
return i1 + i2;
private static class KVPair implements PairFunction<String, String, Integer>{
public Tuple2<String, Integer> call(String paramT)
throws Exception {
return new Tuple2<String, Integer>(paramT, 1);
private static class Flatter implements FlatMapFunction<String, String>{
public Iterable<String> call(String s) {
return Arrays.asList(s.split(" "));
public Object call() throws Exception {
JavaRDD<String> jrd = jsc.textFile("/root/folder/experiment979/" + fileId +".txt");
System.out.println("inside call() for fileId = " + fileId);
JavaRDD<String> words = jrd.flatMap(new Flatter());
JavaPairRDD<String, Integer> ones = words.mapToPair(new KVPair());
JavaPairRDD<String, Integer> counts = ones.reduceByKey(new Reduction());
return counts.collect();
Why is Program not closing automatically ?
Ans : you have not closed the Sparkcontex , try changing main method to this :
public static void main(String[] args) {
final int threadPoolSize = 5;
SparkConf sparkConf = new SparkConf().setMaster("yarn-client").setAppName("Tracker").set("spark.ui.port","0");
JavaSparkContext jsc = new JavaSparkContext(sparkConf);
ExecutorService executor = Executors.newFixedThreadPool(threadPoolSize);
List<Future> listOfFuture = new ArrayList<Future>();
for (int i = 0; i < 20; i++) {
if (listOfFuture.size() < threadPoolSize) {
FlexiWordCount flexiWordCount = new FlexiWordCount(jsc, i);
Future future = executor.submit(flexiWordCount);
} else {
boolean allFutureDone = false;
while (!allFutureDone) {
allFutureDone = checkForAllFuture(listOfFuture);
System.out.println("Threads not completed yet!");
try {
Thread.sleep(2000);//waiting for 2 sec, before next check
} catch (InterruptedException e) {
// TODO Auto-generated catch block
System.out.println("printing of future done");
System.out.println("future list got cleared");
try {
executor.awaitTermination(5, TimeUnit.MINUTES);
} catch (InterruptedException e) {
// TODO Auto-generated catch block
Is there a better way ?
Ans : Yes you should pass the directory of the files to sparkcontext and use .textFile over directory , in this case spark would parallaize the reads from directories over the executors . If you try to create threads yourself and then use the same spark context to re-submit job for each file you are adding a extra overhead of submitting application to yarn queue .
I think the fastest approach would be to directly pass the entire directory and create RDD out of it and then then let spark launch parallel task to process all the files in different executors .You can experiment with using .repartition() method over the RDD , as it would launch that many tasks to run parallely .

Java (Raspberry pi) Thread

I am a student who is studying java.(Especially Raspberry pi) I have a question this multuthread. It can be compiled. But it doesn't work in my kit. If you don't mind guys, could you check my code and help me?
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.net.Socket;
import java.net.SocketAddress;
import java.util.logging.Level;
import java.util.logging.Logger;
import java.io.File;
import java.io.FileWriter;
public class RcvThread2 implements Runnable{
private static final int sizeBuf = 50;
private Socket clientSocket;
private Logger logger;
private SocketAddress clientAddress;
public RcvThread2(Socket clntSock, SocketAddress clientAddress, Logger logger) {
this.clientSocket = clntSock;
this.logger = logger;
this.clientAddress = clientAddress;
static class CloseExtends extends Thread {
static final String GPIO_OUT = "out";
static final String GPIO_ON = "1";
static final String GPIO_OFF = "0";
static final String[] GpioChannels = {"18"};
public static void main(String[] args) {
FileWriter[] commandChannels;
try {
FileWriter unexportFile = new FileWriter("sys/class/gpio/unexport");
FileWriter exportFile = new FileWriter("sys/class/gpio/gpio/export");
for(String gpioChannel : GpioChannels) {
File exportFileCheck =
new File("sys/class/gpio/gpio" +gpioChannel);
if(exportFileCheck.exists()) {
FileWriter directionFile = new FileWriter("/sys/class/gpio/gpio" + gpioChannel + "/direction");
FileWriter commandChannel = new FileWriter("sys/class/gpio/gpio" + GpioChannels[0] + "/value");
int period = 20;
int repeatLoop = 25;
int counter;
while(true) {
for(counter = 0; counter < repeatLoop; counter++) {
java.lang.Thread.sleep(2, 20000);
} catch(Exception exception) {
public void main(){
try {
InputStream ins = clientSocket.getInputStream();
OutputStream outs = clientSocket.getOutputStream();
int rcvBufSize;
byte[] rcvBuf = new byte[sizeBuf];
while ((rcvBufSize = ins.read(rcvBuf)) != -1) {
String rcvData = new String(rcvBuf, 0, rcvBufSize, "UTF-8");
if(rcvData.compareTo("MotorLock") == 0) {
CloseExtends te = new CloseExtends();
if(rcvData.compareTo("MotorOpen") == 0) {
logger.info("Received data :" + rcvData + " (" + clientAddress + ")");
outs.write(rcvBuf, 0, rcvBufSize);
logger.info(clientSocket.getRemoteSocketAddress() + "Closed");
} catch (IOException ex) {
logger.log(Level.WARNING, "Exception in RcvThread", ex);
}finally {
System.out.println("Disconnected! Client IP :" + clientAddress);
} catch (IOException e) {}
The lower main method never gets called.
If you run your program it will execute the public static void main(String[] args) { method.
I think this is the method you want to run in the second thread?!
If you declare and run your new thread using
CloseExtends te = new CloseExtends();
it will run the threads public void run() { method.
So if I understand your intention correctly you should change the name of the main method in the CloseExtends class to the threads run method and change the signature of the lower main method to the java programs main method public static void main(String[] args) {.
I would not name any other method "main" if it is not really a main method.
You can see an example of creating a new thread with the Runnable interface here: https://docs.oracle.com/javase/tutorial/essential/concurrency/runthread.html

Java Tor Lib : How to setup Orchid Tor Lib with Java?

I am trying to implement Orchid Tor lib with Java code; unfortunately and because the lack of documentation I am not able to make it work, this is what I did:
private final static String DEFAULT_SOCKS_PORT = "9050";
TorClient torClient = new TorClient();
torClient.addInitializationListener(new TorInitializationListener() {
public void initializationProgress(String string, int i) {
System.out.println(">>> [ "+ i + "% ]: "+ string);
// throw new UnsupportedOperationException("Not supported yet."); //To change body of generated methods, choose Tools | Templates.
public void initializationCompleted() {
try {
System.out.println("Tor is ready to go!");
System.out.println("is online "+isOnline()); //isOnilne is just function return true if connected by pinging google.com
} catch (Exception e) {
private static void setSystemProperties(String host, String port)
System.setProperty("proxyHost", host);
System.setProperty("proxyPort", port);
System.setProperty("http.proxyHost", host);
System.setProperty("http.proxyPort", port);
System.setProperty("https.proxyHost", host);
System.setProperty("https.proxyPort", port);
System.setProperty("socks.proxyHost", host);
System.setProperty("socks.proxyPort", DEFAULT_SOCKS_PORT);
System.setProperty("socksProxyHost", host);
System.setProperty("socksProxyPort", DEFAULT_SOCKS_PORT);
This seems to work using Java8.
Dependencies: orchid-1.0.0.jar, jsoup-1.8.2.jar & commons-io-2.4.jar
package orchiddemo;
import com.subgraph.orchid.TorClient;
import com.subgraph.orchid.TorInitializationListener;
import java.io.BufferedReader;
import java.io.InputStreamReader;
import java.io.PrintWriter;
import java.net.HttpURLConnection;
import java.net.InetSocketAddress;
import java.net.Proxy;
import java.net.Socket;
import java.net.URL;
import java.util.logging.Level;
import java.util.logging.Logger;
import org.apache.commons.io.IOUtils;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
public class OrchidDemo {
private static TorClient client;
public static void main(String[] args) {
private static void startOrchid() {
//listen on (default)
client = new TorClient();
client.enableSocksListener();//or client.enableSocksListener(yourPortNum);
private static void stopOrchid() {
public static TorInitializationListener createInitalizationListner() {
return new TorInitializationListener() {
public void initializationProgress(String message, int percent) {
System.out.println(">>> [ " + percent + "% ]: " + message);
public void initializationCompleted() {
System.out.println("Tor is ready to go!");
private static void doTests() {
private static void testOrchidUsingProxyObject() {
Thread thread = new Thread() {
public void run() {
try {
//Caution: Native Java DNS lookup will occur outside of the tor network.
//Monitor traffic on port 53 using tcpdump or equivalent.
URL url = new URL("https://wtfismyip.com/");
Proxy proxy = new Proxy(Proxy.Type.SOCKS, new InetSocketAddress("localhost", 9150));
HttpURLConnection uc = (HttpURLConnection) url.openConnection(proxy);
Document document = Jsoup.parse(IOUtils.toString(uc.getInputStream()));
String result = document.select("div[id=tor").text();
System.out.println("testOrchidUsingProxyObject: " + result);
} catch (Exception ex) {
Logger.getLogger(OrchidDemo.class.getName()).log(Level.SEVERE, null, ex);
private static void testOrchidUsingSystemPropsProxy() {
Thread thread = new Thread() {
public void run() {
try {
//Caution: Native Java DNS lookup will occur outside of the tor network.
//Monitor traffic on port 53 using tcpdump or equivalent.
System.setProperty("socksProxyHost", "");
System.setProperty("socksProxyPort", "9150");
Document document = Jsoup.connect("https://wtfismyip.com/").get();
String result = document.select("div[id=tor").text();
System.out.println("testOrchidUsingSystemPropsProxy: " + result);
System.setProperty("socksProxyHost", "");
System.setProperty("socksProxyPort", "");
} catch (Exception ex) {
Logger.getLogger(OrchidDemo.class.getName()).log(Level.SEVERE, null, ex);
private static void testOrchidUsingSocket() {
Thread thread = new Thread() {
public void run() {
try {
// This does not appear to leak the DNS lookup, but requires confirmation!
Socket socket = client.getSocketFactory().createSocket("www.google.com", 80);
PrintWriter writer = new PrintWriter(socket.getOutputStream(), true);
BufferedReader reader = new BufferedReader(new InputStreamReader(socket.getInputStream()));
writer.println("GET /");
String line;
System.out.println("testOrchidUsingSocket: ");
while ((line = reader.readLine()) != null) {
} catch (Exception ex) {
Logger.getLogger(OrchidDemo.class.getName()).log(Level.SEVERE, null, ex);
The DNS leak is a drama, but silvertunnel can help: NetAddressNameService
I'm hoping someone might know of a better way....

Setting up low-latency client/server example in Netty

I have a simple ECHO server and client written using Netty. The server and client are on the same machine. I was expecting mean latency of the order of a couple of milliseconds, however, regardless of what I try I can never bring the latency down to sub-millisecond durations. Any help would be greatly appreciated.
Update: Even when using System.nanoTime I see the latency around 25-30ms.
import org.jboss.netty.bootstrap.ClientBootstrap;
import org.jboss.netty.channel.*;
import org.jboss.netty.channel.socket.nio.NioClientSocketChannelFactory;
import org.jboss.netty.handler.execution.ExecutionHandler;
import org.jboss.netty.handler.execution.OrderedMemoryAwareThreadPoolExecutor;
import java.net.InetSocketAddress;
import java.util.concurrent.Executors;
public class EchoClient {
public static void main(String[] args) {
if (args.length != 1) {
System.err.println(String.format("usage: %s <num-msgs>", EchoClient.class.getCanonicalName()));
final long NUM_MSGS = Integer.parseInt(args[0]);
final EchoClientHandler echoClientHandler = new EchoClientHandler();
final ExecutionHandler e =
new ExecutionHandler(new OrderedMemoryAwareThreadPoolExecutor(4, 128 * 1024L, 128 * 1024L));
ChannelFactory factory =
new NioClientSocketChannelFactory(Executors.newCachedThreadPool(),
ClientBootstrap bootstrap = new ClientBootstrap(factory);
bootstrap.setPipelineFactory(new ChannelPipelineFactory() {
public ChannelPipeline getPipeline() throws Exception {
return Channels.pipeline(new TestPayloadEncoder(),
new TestPayloadDecoder(),
bootstrap.setOption("tcpNoDelay", true);
bootstrap.setOption("keepAlive", false);
bootstrap.setOption("child.keepAlive", false);
bootstrap.setOption("sendBufferSize", 128 * 1024L);
bootstrap.setOption("receiveBufferSize", 128 * 1024L);
for (int i = 0; i < NUM_MSGS; i++) {
final InetSocketAddress serverAddr =
new InetSocketAddress("localhost", 8080);
bootstrap.connect(serverAddr).addListener(new ChannelFutureListener() {
public void operationComplete(ChannelFuture f) throws Exception {
if (f.isSuccess()) {
f.getChannel().write(new TestPayload());
while (echoClientHandler.numMsgs.get() < NUM_MSGS);
System.out.println(String.format("mean transfer time: %.2fms",
((float) echoClientHandler.aggTime.get()) /
import org.jboss.netty.channel.ChannelHandlerContext;
import org.jboss.netty.channel.ExceptionEvent;
import org.jboss.netty.channel.MessageEvent;
import org.jboss.netty.channel.SimpleChannelHandler;
import java.util.concurrent.atomic.AtomicLong;
public class EchoClientHandler extends SimpleChannelHandler {
public final AtomicLong numMsgs = new AtomicLong(0);
public final AtomicLong aggTime = new AtomicLong(0);
public void messageReceived(ChannelHandlerContext ctx, MessageEvent e) throws Exception {
long recvTime = System.currentTimeMillis();
TestPayload m = (TestPayload) e.getMessage();
aggTime.addAndGet(recvTime - m.getTime());
public void exceptionCaught(ChannelHandlerContext ctx, ExceptionEvent e) throws Exception {
import org.jboss.netty.bootstrap.ServerBootstrap;
import org.jboss.netty.channel.ChannelFactory;
import org.jboss.netty.channel.ChannelPipeline;
import org.jboss.netty.channel.ChannelPipelineFactory;
import org.jboss.netty.channel.Channels;
import org.jboss.netty.channel.socket.nio.NioServerSocketChannelFactory;
import org.jboss.netty.handler.execution.ExecutionHandler;
import org.jboss.netty.handler.execution.OrderedMemoryAwareThreadPoolExecutor;
import java.net.InetSocketAddress;
import java.util.concurrent.Executors;
public class EchoServer {
public static void main(String[] args) {
ChannelFactory factory =
new NioServerSocketChannelFactory(Executors.newFixedThreadPool(4),
ServerBootstrap bootstrap = new ServerBootstrap(factory);
final ExecutionHandler e =
new ExecutionHandler(new OrderedMemoryAwareThreadPoolExecutor(4, 128 * 1024L, 128 * 1024L));
bootstrap.setPipelineFactory(new ChannelPipelineFactory() {
public ChannelPipeline getPipeline() throws Exception {
return Channels.pipeline(e, new EchoServerHandler());
bootstrap.setOption("reuseAddr", true);
bootstrap.setOption("keepAlive", false);
bootstrap.setOption("child.reuseAddr", true);
bootstrap.setOption("child.soLinger", 0);
bootstrap.setOption("child.keepAlive", false);
bootstrap.setOption("child.tcpNoDelay", true);
bootstrap.setOption("child.sendBufferSize", 128 * 1024L);
bootstrap.setOption("child.receiveBufferSize", 128 * 1024L);
bootstrap.bind(new InetSocketAddress("localhost", 8080));
import org.jboss.netty.channel.*;
public class EchoServerHandler extends SimpleChannelHandler {
public void messageReceived(ChannelHandlerContext ctx, MessageEvent e) throws Exception {
public void exceptionCaught(ChannelHandlerContext ctx, ExceptionEvent e) throws Exception {
import org.jboss.netty.buffer.ChannelBuffer;
import java.util.Date;
import java.util.Random;
public class TestPayload {
private static final int PREAMBLE_LEN = (Long.SIZE + Integer.SIZE) / 8;
private static final Random RNG;
static {
RNG = new Random();
RNG.setSeed(new Date().getTime());
private final int paddingLen;
private final byte[] padding;
private final long time;
public TestPayload() {
public TestPayload(int sizeInBytes) {
this.paddingLen = sizeInBytes;
this.padding = new byte[this.paddingLen];
this.time = System.currentTimeMillis();
private TestPayload(long time, int paddingLen, byte[] padding) {
this.paddingLen = paddingLen;
this.padding = padding;
this.time = time;
public long getTime() {
return this.time;
public void writeTo(ChannelBuffer buf) {
public static TestPayload readFrom(ChannelBuffer buf) {
if (buf.readableBytes() < PREAMBLE_LEN) {
return null;
long time = buf.readLong();
int paddingLen = buf.readInt();
if (buf.readableBytes() < paddingLen) {
return null;
byte[] padding = new byte[paddingLen];
return new TestPayload(time, paddingLen, padding);
public int getLength() {
return PREAMBLE_LEN + this.paddingLen;
Are you running your client and your server in different JVMs? If so, measuring time across JVM boundaries is not as straight forward as you would think. For example using System.nanoTime() is not necessarily going to work according to the oracle java doc:
The values returned by this method become meaningful only when the difference between two such values, obtained within the same instance of a Java virtual machine, is computed.
Assuming you can find a reliable way to measure time across JVMs and if your goal is to isolate how long it takes a Netty client to send to a Netty server then simplify your use case to isolate this as much as possible. For example, in the above code you are counting the time to send/receive an array of 65536 bytes. Remove this from the timing experiment to help isolate where the bottlenecks are.
How many runs are you collecting timing from? Are you excluding initialization time of Netty itself (running a few messages between client/server before taking timing)?
Also how does adjusting your configuration impact performance? There are plenty of knobs to tweak (thread pool size, send/receive buff size, etc...).
What version of Netty are you using, and is there an option to force a flush after you write?
I don't see the code for EchoClient. It looks like you copy/pasted the code for EchoClientHandler where EchoClient's code should be.
