I wrote a neural network in java and it looked like a good idea to take the computation on the gpu for performance issue. The problem I have is that its too slow... I have used jocl to do so. I dont now if its the kernel here is some code:
private static String programSource = "__kernel void "
+ "sampleKernel(__constant float *input,"
+ " __global float *weights,"
+ " __constant int *length,"
+ " __global float *dst)" + " {"
+ " __private int gid = get_global_id(0);"
+ " __private int pos = (gid*length[0]);"
+ " __private float tmp = 0;"
+ " __private int l = length[0];" + " dst[gid] = 0;"
+ " for(int i = 0; i < l; i++){"
+ " tmp += gewichte[pos+i]*input[i];"
+ " }"
+ " dst[gid] = tanh(tmp);" + "}";
making the weights __constant made the programm even slower(maybe it has to permanently switch data between global and local memory because the weights array is too big)
it seems like the most time takes this line:
tmp += gewichte[pos+i]*input[i];
one kernel call represents the computation of one Neural Network Layer and for every neuron of the layer one shader should perform( tanh(weightsOnThisNeuron + OutputFromAllNeuronsOfPreviousLayer).
I prepare all the kernels and store them so that if I want to execute them, they dont have to be prepared again and again.
The only IO between GPU and CPU is at the beginning and at the end when I retrieve the Output
Here is the code where I initialize the network and run the kernels:
public OpenClNetz(float[][][] gew, cl_context context,
cl_command_queue commandQueue) throws Exception {
if (context == null) {
throw new Exception("context == null, Konstruktor schlug fehl");
}
if (commandQueue == null) {
throw new Exception("commandQueue == null, Konstruktor schlug fehl");
}
this.layersize = new int[gew.length + 1];
for (int i = 0; i < layersize.length - 1; i++) {
this.layersize[i] = gew[i][0].length;
}
this.layersize[this.layersize.length - 1] = gew[gew.length - 1].length;
this.context = context;
builded = false;
this.commandQueue = commandQueue;
this.output = new float[layersize[layersize.length - 1]];
gewichte = new cl_mem[layersize.length - 1];
tmp = new cl_mem[layersize.length - 1];
lengths = new cl_mem[layersize.length - 1];
input = new cl_mem();
float[] tmpG;
int[][] tmpL = new int[layersize.length - 1][];
for (int i = 0; i < gewichte.length; i++) {
tmpG = new float[layersize[i] * layersize[i + 1]];
tmpL[i] = new int[1];
tmpL[i][0] = layersize[i];
int n = 0;
for (int j = 0; j < layersize[i + 1]; j++) {
for (int k = 0; k < layersize[i]; k++) {
tmpG[n] = gew[i][j][k];
n++;
}
}
gewichte[i] = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, Sizeof.cl_float * tmpG.length, Pointer.to(tmpG),
null);
lengths[i] = clCreateBuffer(context, CL_MEM_READ_WRITE
| CL_MEM_COPY_HOST_PTR, Sizeof.cl_int, Pointer.to(tmpL[i]), null);
tmp[i] = clCreateBuffer(context, CL_MEM_READ_WRITE, Sizeof.cl_float
* layersize[i + 1], null, null);
}
}
public void setInput(float[] in) {
if (in.length != layersize[0]) {
System.out
.println("array Länge entspricht nicht der Inputsize, setInput schlug fehl");
return;
}
input = clCreateBuffer(context, CL_MEM_READ_WRITE
| CL_MEM_COPY_HOST_PTR, Sizeof.cl_float * layersize[0],
Pointer.to(in), null);
clSetKernelArg(kernel[0], 0, Sizeof.cl_mem, Pointer.to(input));
}
public void buildProgramm() {
program = clCreateProgramWithSource(context, 1,
new String[] { programSource }, null, null);
clBuildProgram(program, 0, null, null, null, null);
builded = true;
kernel = new cl_kernel[gewichte.length];
kernel[0] = clCreateKernel(program, "sampleKernel", null);
clSetKernelArg(kernel[0], 0, Sizeof.cl_mem, Pointer.to(input));
clSetKernelArg(kernel[0], 1, Sizeof.cl_mem, Pointer.to(gewichte[0]));
clSetKernelArg(kernel[0], 2, Sizeof.cl_mem, Pointer.to(lengths[0]));
clSetKernelArg(kernel[0], 3, Sizeof.cl_mem, Pointer.to(tmp[0]));
for (int i = 1; i < gewichte.length; i++) {
kernel[i] = clCreateKernel(program, "sampleKernel", null);
clSetKernelArg(kernel[i], 0, Sizeof.cl_mem, Pointer.to(tmp[i - 1]));
clSetKernelArg(kernel[i], 1, Sizeof.cl_mem, Pointer.to(gewichte[i]));
clSetKernelArg(kernel[i], 2, Sizeof.cl_mem, Pointer.to(lengths[i]));
clSetKernelArg(kernel[i], 3, Sizeof.cl_mem, Pointer.to(tmp[i]));
}
}
public void run() throws Exception {
if (!builded) {
throw new Exception(
"buildProgramm muss zuerst aufgerufen werden, run schlug fehl");
}
long global_work_size[] = new long[] { layersize[1] };
this.local_work_size = new long[] { 8 };
// Execute the kernel
clEnqueueNDRangeKernel(commandQueue, kernel[0], 1, null,
global_work_size, local_work_size, 0, null, null);
for (int i = 1; i < gewichte.length; i++) {
global_work_size = new long[] { layersize[i + 1] };
// Execute the kernel
clEnqueueNDRangeKernel(commandQueue, kernel[i], 1, null,
global_work_size, local_work_size, 0, null, null);
}
}
thats the main:
public class TEST{
public static void main(String args[]) throws Exception
{
// The platform, device type and device number
// that will be used
final int platformIndex = 0;
final long deviceType = CL_DEVICE_TYPE_DEFAULT;
final int deviceIndex = 0;
// Enable exceptions and subsequently omit error checks in this sample
CL.setExceptionsEnabled(true);
// Obtain the number of platforms
int numPlatformsArray[] = new int[1];
clGetPlatformIDs(0, null, numPlatformsArray);
int numPlatforms = numPlatformsArray[0];
// Obtain a platform ID
cl_platform_id platforms[] = new cl_platform_id[numPlatforms];
clGetPlatformIDs(platforms.length, platforms, null);
cl_platform_id platform = platforms[platformIndex];
// Initialize the context properties
cl_context_properties contextProperties = new cl_context_properties();
contextProperties.addProperty(CL_CONTEXT_PLATFORM, platform);
// Obtain the number of devices for the platform
int numDevicesArray[] = new int[1];
clGetDeviceIDs(platform, deviceType, 0, null, numDevicesArray);
int numDevices = numDevicesArray[0];
// Obtain a device ID
cl_device_id devices[] = new cl_device_id[numDevices];
clGetDeviceIDs(platform, deviceType, numDevices, devices, null);
cl_device_id device = devices[deviceIndex];
// Create a context for the selected device
cl_context context = clCreateContext(
contextProperties, 1, new cl_device_id[]{device},
null, null, null);
// Create a command-queue for the selected device
cl_command_queue commandQueue =
clCreateCommandQueue(context, device, 0, null);
int[] layersize = {512,512,512};
float[] in = new float[512];
for(int i = 0; i < 512; i++){
in[i] = (float) (Math.random()*1.4 -0.7);
}
Netz net = new Netz(layersize);
net.set_Input(in);
OpenClNetz netz= new OpenClNetz(net.gewichte,context,commandQueue);
netz.buildProgramm();
netz.setInput(in);
double time = System.currentTimeMillis();
for(int i = 0; i < 10000; i++){
netz.run();
}
System.out.println(Arrays.toString(netz.retrieveOutput()));
System.out.println("time OpenCl: " + (System.currentTimeMillis()-time));
time = System.currentTimeMillis();
for(int i = 0; i < 10000; i++){
net.start();
}
System.out.println("time normal: " + (System.currentTimeMillis()-time));
System.out.println(Arrays.toString(netz.retrieveOutput()));
System.out.println(Arrays.toString(net.start()));
netz.destroy();
// Release kernel, program, and memory objects
clReleaseCommandQueue(commandQueue);
clReleaseContext(context);
has somebody any idea how I can make this faster??
the output is for:
normal (running on CPU) : 6475ms
running on GPU (local worksize = 1) : 19110ms
running on GPU (local worksize = 2) : 11778ms
running on GPU (local worksize = 4) : 8985ms
running on GPU (local worksize = 8) : 6880ms
running on GPU (local worksize = 16) : 8237ms (it becomes slower ?! O.o)
running on GPU (local worksize = 32) : 9298ms (Im kinda new to Jocl)
running on GPU (local worksize = 64) : 10062ms
Related
I am trying to determine the core and delta points of a fingerprint. I'm using the Poincaré index method, but I am unable to successfully detect this points and I can't figure out why.
First I divide the image in 15x15 blocks, then I calculate the x and y gradients which i use in obtaining the orientation map. After getting the mean orientation for each block then i apply the Poincaré index method, described in the image below (credits: Handbook of Fingerprint Recognition, Davide Maltoni):
And the code is this:
public static void detectSgPoints(int blkSze, Mat src) {
utils.console2file("cons_2_file");
int windowX = 1;
int windowY = 1;
if (blkSze < src.width()) {
windowX = src.width() - blkSze;
}
if (blkSze < src.height()) {
windowY = src.height() - blkSze;
}
Map<Point, Double> map = new HashMap<>();
double[][] avg = new double[src.height() / blkSze][src.width() / blkSze];
int m = 0;
int n = 0;
for (int i = 0; i < windowY; i += blkSze) {
for (int j = 0; j < windowX; j += blkSze) {
Mat block = utils.getROI(src, new Rect(j, i, blkSze, blkSze));
Mat dx = new Mat(new Size(blkSze, blkSze), CvType.CV_64FC1);
Mat dy = new Mat(new Size(blkSze, blkSze), CvType.CV_64FC1);
Imgproc.Sobel(block, dx, CvType.CV_64FC1, 1, 0);
Imgproc.Sobel(block, dy, CvType.CV_64FC1, 0, 1);
Mat orientation = calculateOrientation(dx, dy);
int cpx = j + (blkSze / 2), cpy = i + (blkSze / 2);
avg[m][n] = avgAngle(orientation, false);
if (avg[m][n] < 0) {
avg[m][n] = 360 + avg[m][n];
}
map.put(new Point(cpx, cpy), avg[m][n]);
n++;
}
n = 0;
m++;
}
for (int mm = 1; mm < avg.length - 1; mm++) {
for (int nn = 1; nn < avg[0].length - 1; nn++) {
int j = nn * blkSze;
int i = mm * blkSze;
double psum = 0;
int cpx = j + (blkSze / 2), cpy = i + (blkSze / 2);
for (int k = 0; k < anglePos2.length - 1; k++) {
double dif = 0.0;
dif = avg[mm + anglePos2[k + 1][0]][nn + anglePos2[k + 1][1]]
- avg[mm + anglePos2[k][0]][nn + anglePos2[k][1]];
System.out.println("adding " + "(" + avg[mm + anglePos2[k +1[0]][nn + anglePos2[k + 1][1]] + "-"
+ avg[mm + anglePos2[k][0]][nn + anglePos2[k][1]] + ") = " + dif + " to " + psum);
psum = psum + dif;
}
double poincare = psum;
System.out.println("cpx = " + cpx + ", cpy = " + cpy + " poincare = " + poincare);
}
}
}
private static double avgAngle(Mat orientation, boolean toDegrees) {
List<Double> angle = new ArrayList<>();
for (int i = 0; i < orientation.height(); i++) {
for (int j = 0; j < orientation.width(); j++) {
double value = orientation.get(i, j)[0];
value = Math.toDegrees(value);
angle.add(value);
}
}
return getMeanAngle(angle);
}
public static double getMeanAngle(List<Double> sample) {
double x_component = 0.0;
double y_component = 0.0;
double avg_d, avg_r;
for (double angle_d : sample) {
double angle_r;
angle_r = Math.toRadians(angle_d);
x_component += Math.cos(angle_r);
y_component += Math.sin(angle_r);
}
x_component /= sample.size();
y_component /= sample.size();
avg_r = Math.atan2(y_component, x_component);
avg_d = Math.toDegrees(avg_r);
return avg_d;
}
public static Mat calculateOrientation(Mat dx, Mat dy) {
Mat orientation = new Mat(dx.size(), CvType.CV_32F);
for (int i = 0; i < dx.rows(); i++) {
for (int j = 0; j < dx.cols(); j++) {
double valueX = dx.get(i, j)[0];
double valueY = dy.get(i, j)[0];
double result = Math.atan2(valueY, valueX);
orientation.put(i, j, result);
}
}
return orientation;
}
Where is the problem?
I am learning JCuda and studying with JCuda samples.
When I studied a KMeans algorithm code using JCuda, I got a "CUDA_ERROR_ILLEGAL_ADDRESS" when executed line cuCtxSynchronize();
It confused me a lot. How can I solve it?
Here is KMeansKernel.cu
extern "C"
__global__ void add(int n, float *a, float *b, float *sum)
{
int i = blockIdx.x * blockDim.x + threadIdx.x;
if (i<n)
{
sum[i] = a[i] + b[i];
}
}
Main method(my class named "CUDA"):
public static void main(String[] args){
// omit some code which input kinds of parameters
try {
// Open image file
BufferedImage bi = ImageIO.read(picFiles);
if (bi == null) {
System.out.println("ERROR: File input error.");
return;
}
// Read image data
int length = bi.getWidth() * bi.getHeight();
int[] imageProperty = new int[length*5];
int[] pixel;
int count = 0;
for (int y = 0; y < bi.getHeight(); y++) {
for (int x = 0; x < bi.getWidth(); x++) {
pixel = bi.getRaster().getPixel(x, y, new int[4]);
imageProperty[count*5 ] = pixel[0];
imageProperty[count*5+1] = pixel[1];
imageProperty[count*5+2] = pixel[2];
imageProperty[count*5+3] = x;
imageProperty[count*5+4] = y;
count++;
}
}
//setup
JCudaDriver.setExceptionsEnabled(true);
// Create the PTX file
String ptxFileName;
try
{
ptxFileName = preparePtxFile("KmeansKernel.cu");
}
catch (IOException e)
{
System.out.println("Warning...");
System.out.println(e.getMessage());
System.out.println("Exiting...");
return;
}
cuInit(0);
CUdevice device = new CUdevice();
cuDeviceGet(device, 0);
CUcontext context = new CUcontext();
cuCtxCreate(context, 0, device);
CUmodule module = new CUmodule();
cuModuleLoad(module, ptxFileName);
CUfunction kmeansFunction = new CUfunction();
System.out.println("x");
cuModuleGetFunction(kmeansFunction, module, "add");
//copy host input to device
CUdeviceptr imageDevice = new CUdeviceptr();
cuMemAlloc(imageDevice, imageProperty.length * Sizeof.INT);
cuMemcpyHtoD(imageDevice, Pointer.to(imageProperty), imageProperty.length * Sizeof.INT);
int blockSizeX = 256;
int gridSizeX = (int) Math.ceil((double)(imageProperty.length / 5) / blockSizeX);
long et = System.currentTimeMillis();
System.out.println(((double)(et-st)/1000.0) + "s");
for (int k = startClusters; k <= endClusters; k++) {
long startTime = System.currentTimeMillis();
int[] clusters = new int[length];
int[] c = new int[k*5];
int h = 0;
for(int i = 0; i < k; i++) {
c[i*5] = imageProperty[h*5];
c[i*5+1] = imageProperty[h*5+1];
c[i*5+2] = imageProperty[h*5+2];
c[i*5+3] = imageProperty[h*5+3];
c[i*5+4] = imageProperty[h*5+4];
h += length / k;
}
double tolerance = 1e-4;
**//got warning in following line
CUDA.KmeansKernel(kmeansFunction, imageDevice, imageProperty, clusters, c, k, tolerance, distanceWeight, colorWeight, blockSizeX, gridSizeX);**
int[] output = calculateAveragePixels(imageProperty, clusters);
BufferedImage outputImage = new BufferedImage(bi.getWidth(), bi.getHeight(), BufferedImage.TYPE_INT_RGB);
for (int i = 0; i < length; i++) {
int rgb = output[i*5];
rgb = (rgb * 256) + output[i*5+1];
rgb = (rgb * 256) + output[i*5+2];
outputImage.setRGB(i%bi.getWidth(), i/bi.getWidth(), rgb);
}
String fileName = (picFiles.getName()) + ".bmp";
File outputFile = new File("output/" + fileName);
ImageIO.write(outputImage, "BMP", outputFile);
long runTime = System.currentTimeMillis() - startTime;
System.out.println("Completed iteration k=" + k + " in " + ((double)runTime/1000.0) + "s");
}
System.out.println("Files saved to " + outputDirectory.getAbsolutePath() + "\\");
cuMemFree(imageDevice);
} catch (IOException e) {
e.printStackTrace();
}
}
Method KmeansKernel:
private static void KmeansKernel(CUfunction kmeansFunction, CUdeviceptr imageDevice, int[] imageProperty, int[] clusters, int[] c,
int k, double tolerance, double distanceWeight, double colorWeight,
int blockSizeX, int gridSizeX) {
CUdeviceptr clustersDevice = new CUdeviceptr();
cuMemAlloc(clustersDevice, clusters.length * Sizeof.INT);
// Alloc device output
CUdeviceptr centroidPixels = new CUdeviceptr();
cuMemAlloc(centroidPixels, k * 5 * Sizeof.INT);
CUdeviceptr errorDevice = new CUdeviceptr();
cuMemAlloc(errorDevice, Sizeof.DOUBLE * clusters.length);
int[] c1 = new int[k*5];
cuMemcpyHtoD(centroidPixels, Pointer.to(c), Sizeof.INT * 5 * k);
// begin algorithm
int[] counts = new int[k];
double old_error, error = Double.MAX_VALUE;
int l = 0;
do {
l++;
old_error = error;
error = 0;
Arrays.fill(counts, 0);
Arrays.fill(c1, 0);
cuMemcpyHtoD(centroidPixels, Pointer.to(c), k * 5 * Sizeof.INT);
Pointer kernelParameters = Pointer.to(
Pointer.to(new int[] {clusters.length}),
Pointer.to(new int[] {k}),
Pointer.to(new double[] {colorWeight}),
Pointer.to(new double[] {distanceWeight}),
Pointer.to(errorDevice),
Pointer.to(imageDevice),
Pointer.to(centroidPixels),
Pointer.to(clustersDevice)
);
cuLaunchKernel(kmeansFunction,
gridSizeX, 1, 1,
blockSizeX, 1, 1,
0, null,
kernelParameters, null
);
**cuCtxSynchronize(); //got warning here.why?**
cuMemcpyDtoH(Pointer.to(clusters), clustersDevice, Sizeof.INT*clusters.length);
for (int i = 0; i < clusters.length; i++) {
int cluster = clusters[i];
counts[cluster]++;
c1[cluster*5] += imageProperty[i*5];
c1[cluster*5+1] += imageProperty[i*5+1];
c1[cluster*5+2] += imageProperty[i*5+2];
c1[cluster*5+3] += imageProperty[i*5+3];
c1[cluster*5+4] += imageProperty[i*5+4];
}
for (int i = 0; i < k; i++) {
if (counts[i] > 0) {
c[i*5] = c1[i*5] / counts[i];
c[i*5+1] = c1[i*5+1] / counts[i];
c[i*5+2] = c1[i*5+2] / counts[i];
c[i*5+3] = c1[i*5+3] / counts[i];
c[i*5+4] = c1[i*5+4] / counts[i];
} else {
c[i*5] = c1[i*5];
c[i*5+1] = c1[i*5+1];
c[i*5+2] = c1[i*5+2];
c[i*5+3] = c1[i*5+3];
c[i*5+4] = c1[i*5+4];
}
}
double[] errors = new double[clusters.length];
cuMemcpyDtoH(Pointer.to(errors), errorDevice, Sizeof.DOUBLE*clusters.length);
error = sumArray(errors);
System.out.println("" + l + " iterations");
} while (Math.abs(old_error - error) > tolerance);
cuMemcpyDtoH(Pointer.to(clusters), clustersDevice, clusters.length * Sizeof.INT);
cuMemFree(errorDevice);
cuMemFree(centroidPixels);
cuMemFree(clustersDevice);
}
Stack trace:
Exception in thread "main" jcuda.CudaException: CUDA_ERROR_ILLEGAL_ADDRESS
at jcuda.driver.JCudaDriver.checkResult(JCudaDriver.java:330)
at jcuda.driver.JCudaDriver.cuCtxSynchronize(JCudaDriver.java:1938)
at com.test.CUDA.KmeansKernel(CUDA.java:269)
at com.test.CUDA.main(CUDA.java:184)
As #talonmies mentions, the kernelParameters you are passing to the cuLaunchKernel method are not in line with add kernel function signature.
You get the error at cuCtxSynchronize because CUDA execution model is asynchronous: cuLaunchKernel returns immediately and actual execution of the kernel on the device is asynchronous. cuCtxSynchronize documentation reads:
Note that this function may also return error codes from previous, asynchronous launches.
The second kernelParameters entry is an int k, where the second parameter of add method is a pointer to float, hence most probably the illegal access error.
I use Java implemented Held-KarpTSP algorithm algo to solve a 25 cities TSP problem.
The program passes with 4 cities.
When it runs with 25 cities it won't stop for several hours. I use jVisualVM to see what's the hotspot, after some optimization now it shows
98% of time is in real computing instead in Map.contains or Map.get.
So I'd like to have your advice, and here is the code:
private void solve() throws Exception {
long beginTime = System.currentTimeMillis();
int counter = 0;
List<BitSetEndPointID> previousCosts;
List<BitSetEndPointID> currentCosts;
//maximum number of elements is c(n,[n/2])
//To calculate m-set's costs just need to keep (m-1)set's costs
List<BitSetEndPointID> lastKeys = new ArrayList<BitSetEndPointID>();
int m;
if (totalNodes < 10) {
//for test data, generate them on the fly
SetUtil3.generateMSet(totalNodes);
}
//m=1
BitSet beginSet = new BitSet();
beginSet.set(0);
previousCosts = new ArrayList<BitSetEndPointID>(1);
BitSetEndPointID beginner = new BitSetEndPointID(beginSet, 0);
beginner.setCost(0f);
previousCosts.add(beginner);
//for m=2 to totalNodes
for (m = 2; m <= totalNodes; m++) {// sum(m=2..n 's C(n,m)*(m-1)(m-1)) ==> O(n^2 * 2^n)
//pick m elements from total nodes, the element id is the index of nodeCoordinates
// the first node is always present
BitSet[] msets;
if (totalNodes < 10) {
msets = SetUtil3.msets[m - 1];
} else {
//for real data set, will read from serialized file
msets = SetUtil3.getMsets(totalNodes, m-1);
}
currentCosts = new ArrayList<BitSetEndPointID>(msets.length);
//System.out.println(m + " sets' size: " + msets.size());
for (BitSet mset : msets) { //C(n,m) mset
int[] candidates = allSetBits(mset, m);
//mset is a BitSet which makes sure begin point 0 comes first
//so end point candidate begins with 1. candidate[0] is always begin point 0
for (int i = 1; i < candidates.length; i++) { // m-1 bits are set
//set the new last point as j, j must not be the same as begin point 0
int j = candidates[i];
//middleNodes = mset -{j}
BitSet middleNodes = (BitSet) mset.clone();
middleNodes.clear(j);
//loop through all possible points which are second to the last
//and get min(A[S-{j},k] + k->j), k!=j
float min = Float.MAX_VALUE;
int k;
for (int ki = 0; ki < candidates.length; ki++) {// m-1 calculation
k = candidates[ki];
if (k == j) continue;
float middleCost = 0;
BitSetEndPointID key = new BitSetEndPointID(middleNodes, k);
int index = previousCosts.indexOf(key);
if (index != -1) {
//System.out.println("get value from map in m " + m + " y key " + middleNodes);
middleCost = previousCosts.get(index).getCost();
} else if (k == 0 && !middleNodes.equals(beginSet)) {
continue;
} else {
System.out.println("middleCost not found!");
continue;
// System.exit(-1);
}
float lastCost = distances[k][j];
float cost = middleCost + lastCost;
if (cost < min) {
min = cost;
}
counter++;
if (counter % 500000 == 0) {
try {
Thread.currentThread().sleep(100);
} catch (InterruptedException iex) {
System.out.println("Who dares interrupt my precious sleep?!");
}
}
}
//set the costs for chosen mset and last point j
BitSetEndPointID key = new BitSetEndPointID(mset, j);
key.setCost(min);
currentCosts.add(key);
// System.out.println("===========================================>mset " + mset + " and end at " +
// j + " 's min cost: " + min);
// if (m == totalNodes) {
// lastKeys.add(key);
// }
}
}
previousCosts = currentCosts;
System.out.println("...");
}
calcLastStop(lastKeys, previousCosts);
System.out.println(" cost " + (System.currentTimeMillis() - beginTime) / 60000 + " minutes.");
}
private void calcLastStop(List<BitSetEndPointID> lastKeys, List<BitSetEndPointID> costs) {
//last step, calculate the min(A[S={1..n},k] +k->1)
float finalMinimum = Float.MAX_VALUE;
for (BitSetEndPointID key : costs) {
float middleCost = key.getCost();
Integer endPoint = key.lastPointID;
float lastCost = distances[endPoint][0];
float cost = middleCost + lastCost;
if (cost < finalMinimum) {
finalMinimum = cost;
}
}
System.out.println("final result: " + finalMinimum);
}
You can speed up your code by using arrays of primitives (it's likely to have to better memory layout than a list of objects) and operating on bitmasks directly (without bitsets or other objects). Here is some code (it generates a random graph but you can easily change it so that it reads your graph):
import java.io.*;
import java.util.*;
class Main {
final static float INF = 1e10f;
public static void main(String[] args) {
final int n = 25;
float[][] dist = new float[n][n];
Random random = new Random();
for (int i = 0; i < n; i++)
for (int j = i + 1; j < n; j++)
dist[i][j] = dist[j][i] = random.nextFloat();
float[][] dp = new float[n][1 << n];
for (int i = 0; i < dp.length; i++)
Arrays.fill(dp[i], INF);
dp[0][1] = 0.0f;
for (int mask = 1; mask < (1 << n); mask++) {
for (int lastNode = 0; lastNode < n; lastNode++) {
if ((mask & (1 << lastNode)) == 0)
continue;
for (int nextNode = 0; nextNode < n; nextNode++) {
if ((mask & (1 << nextNode)) != 0)
continue;
dp[nextNode][mask | (1 << nextNode)] = Math.min(
dp[nextNode][mask | (1 << nextNode)],
dp[lastNode][mask] + dist[lastNode][nextNode]);
}
}
}
double res = INF;
for (int lastNode = 0; lastNode < n; lastNode++)
res = Math.min(res, dist[lastNode][0] + dp[lastNode][(1 << n) - 1]);
System.out.println(res);
}
}
It takes only a couple of minutes to complete on my computer:
time java Main
...
real 2m5.546s
user 2m2.264s
sys 0m1.572s
I need to design an algorithm to find the maximum value I can get from (stepping) along an int[] at predefined (step lengths).
Input is the number of times we can "use" each step length; and is given by n2, n5 and n10. n2 means that we move 2 spots in the array, n5 means 5 spots and n10 means 10 spots. We can only move forward (from left to right).
The int[] contains the values 1..5, the size of the array is (n2*2 + n5*5 + n10*10). The starting point is int[0].
Example: we start at int[0]. From here we can move to int[0+2] == 3, int[0+5] == 4 or int[0+10] == 1. Let's move to int[5] since it has the highest value. From int[5] we can move to int[5+2], int[5+5] or int[5+10] etc.
We should move along the array in step lengths of 2, 5 or 10 (and we can only use each step length n2-, n5- and n10-times) in such a manner that we step in the array to collect as high sum as possible.
The output is the maximum value possible.
public class Main {
private static int n2 = 5;
private static int n5 = 3;
private static int n10 = 2;
private static final int[] pokestops = new int[n2 * 2 + n5 * 5 + n10 * 10];
public static void main(String[] args) {
Random rand = new Random();
for (int i = 0; i < pokestops.length; i++) {
pokestops[i] = Math.abs(rand.nextInt() % 5) + 1;
}
System.out.println(Arrays.toString(pokestops));
//TODO: return the maximum value possible
}
}
This is an answer in pseudocode (I didn't run it, but it should work).
fill dp with -1.
dp(int id, int 2stepcount, int 5stepcount, int 10stepcount) {
if(id > array_length - 1) return 0;
if(dp[id][2stepcount][5stepcount][10stepcount] != -1) return dp[id][2stepcount][5stepcount][10stepcount];
else dp[id][2stepcount][5stepcount][10stepcount] = 0;
int 2step = 2stepcount < max2stepcount? dp(id + 2, 2stepcount + 1, 5stepcount, 10stepcount) : 0;
int 5step = 5stepcount < max5stepcount? dp(id + 5, 2stepcount, 5stepcount + 1, 10stepcount) : 0;
int 10step = 10stepcount < max10stepcount? dp(id + 10, 2stepcount, 5stepcount, 10stepcount + 1) : 0;
dp[id][2stepcount][5stepcount][10stepcount] += array[id] + max(2step, 5step, 10step);
return dp[id][2stepcount][5stepcount][10stepcount];
}
Call dp(0,0,0,0) and the answer is in dp[0][0][0][0].
If you wanna go backwards, then you do this:
fill dp with -1.
dp(int id, int 2stepcount, int 5stepcount, int 10stepcount) {
if(id > array_length - 1 || id < 0) return 0;
if(dp[id][2stepcount][5stepcount][10stepcount] != -1) return dp[id][2stepcount][5stepcount][10stepcount];
else dp[id][2stepcount][5stepcount][10stepcount] = 0;
int 2stepForward = 2stepcount < max2stepcount? dp(id + 2, 2stepcount + 1, 5stepcount, 10stepcount) : 0;
int 5stepForward = 5stepcount < max5stepcount? dp(id + 5, 2stepcount, 5stepcount + 1, 10stepcount) : 0;
int 10stepForward = 10stepcount < max10stepcount? dp(id + 10, 2stepcount, 5stepcount, 10stepcount + 1) : 0;
int 2stepBackward = 2stepcount < max2stepcount? dp(id - 2, 2stepcount + 1, 5stepcount, 10stepcount) : 0;
int 5stepBackward = 5stepcount < max5stepcount? dp(id - 5, 2stepcount, 5stepcount + 1, 10stepcount) : 0;
int 10stepBackward = 10stepcount < max10stepcount? dp(id - 10, 2stepcount, 5stepcount, 10stepcount + 1) : 0;
dp[id][2stepcount][5stepcount][10stepcount] += array[id] + max(2stepForward, 5stepForward, 10stepForward, 2stepBackward, 5backForward, 10backForward);
return dp[id][2stepcount][5stepcount][10stepcount];
}
But your paths don't get fulled explored, because we stop if the index is negative or greater than the array size - 1, you can add the wrap around functionality, I guess.
this is a solution but i am not sure how optimal it is !
i did some optimization on it but i think much more can be done
I posted it with the example written in question
import java.util.Arrays;
import java.util.Random;
public class FindMax {
private static int n2 = 5;
private static int n5 = 3;
private static int n10 = 2;
private static final int[] pokestops = new int[n2 * 2 + n5 * 5 + n10 * 10];
public static int findMaxValue(int n2, int n5, int n10, int pos, int[] pokestops) {
System.out.print("|");
if (n2 <= 0 || n5 <= 0 || n10 <= 0) {
return 0;
}
int first;
int second;
int third;
if (pokestops[pos] == 5 || ((first = findMaxValue(n2 - 1, n5, n10, pos + 2, pokestops)) == 5) || ((second = findMaxValue(n2, n5 - 1, n10, pos + 5, pokestops)) == 5) || ((third = findMaxValue(n2, n5, n10 - 1, pos + 10, pokestops)) == 5)) {
return 5;
}
return Math.max(Math.max(Math.max(first, second), third), pokestops[pos]);
}
public static void main(String[] args) {
Random rand = new Random();
for (int i = 0; i < pokestops.length; i++) {
pokestops[i] = Math.abs(rand.nextInt() % 5) + 1;
}
System.out.println(Arrays.toString(pokestops));
//TODO: return the maximum value possible
int max = findMaxValue(n2, n5, n10, 0, pokestops);
System.out.println("");
System.out.println("Max is :" + max);
}
}
You need to calculate following dynamic programming dp[c2][c5][c10][id] - where c2 is number of times you've stepped by 2, c5 - by 5, c10 - by 10 and id - where is your current position. I will write example for c2 and c5 only, it can be easily extended.
int[][][][] dp = new int[n2 + 1][n5 + 1][pokestops.length + 1];
for (int[][][] dp2 : dp) for (int[][] dp3 : dp2) Arrays.fill(dp3, Integer.MAX_VALUE);
dp[0][0][0] = pokestops[0];
for (int c2 = 0; c2 <= n2; c2++) {
for (int c5 = 0; c5 <= n5; c5++) {
for (int i = 0; i < pokestops.length; i++) {
if (c2 < n2 && dp[c2 + 1][c5][i + 2] < dp[c2][c5][i] + pokestops[i + 2]) {
dp[c2 + 1][c5][i + 2] = dp[c2][c5][i] + pokestops[i + 2];
}
if (c5 < n5 && dp[c2][c5 + 1][i + 5] < dp[c2][c5][i] + pokestops[i + 5]) {
dp[c2][c5 + 1][i + 5] = dp[c2][c5][i] + pokestops[i + 5];
}
}
}
}
I know the target language is java, but I like pyhton and conversion will not be complicated.
You can define a 4-dimensional array dp where dp[i][a][b][c] is the maximum value that you can
get starting in position i when you already has a steps of length 2, b of length 5 and c of length
10. I use memoization to get a cleaner code.
import random
values = []
memo = {}
def dp(pos, n2, n5, n10):
state = (pos, n2, n5, n10)
if state in memo:
return memo[state]
res = values[pos]
if pos + 2 < len(values) and n2 > 0:
res = max(res, values[pos] + dp(pos + 2, n2 - 1, n5, n10))
if pos + 5 < len(values) and n5 > 0:
res = max(res, values[pos] + dp(pos + 5, n2, n5 - 1, n10))
if pos + 10 < len(values) and n10 > 0:
res = max(res, values[pos] + dp(pos + 10, n2, n5, n10 - 1))
memo[state] = res
return res
n2, n5, n10 = 5, 3, 2
values = [random.randint(1, 5) for _ in range(n2*2 + n5*5 + n10*10)]
print dp(0, n2, n5, n10)
Suspiciously like homework. Not tested:
import java.util.Arrays;
import java.util.Random;
public class Main {
private static Step[] steps = new Step[]{
new Step(2, 5),
new Step(5, 3),
new Step(10, 2)
};
private static final int[] pokestops = new int[calcLength(steps)];
private static int calcLength(Step[] steps) {
int total = 0;
for (Step step : steps) {
total += step.maxCount * step.size;
}
return total;
}
public static void main(String[] args) {
Random rand = new Random();
for (int i = 0; i < pokestops.length; i++) {
pokestops[i] = Math.abs(rand.nextInt() % 5) + 1;
}
System.out.println(Arrays.toString(pokestops));
int[] initialCounts = new int[steps.length];
for (int i = 0; i < steps.length; i++) {
initialCounts[i] = steps[i].maxCount;
}
Counts counts = new Counts(initialCounts);
Tree base = new Tree(0, null, counts);
System.out.println(Tree.max.currentTotal);
}
static class Tree {
final int pos;
final Tree parent;
private final int currentTotal;
static Tree max = null;
Tree[] children = new Tree[steps.length*2];
public Tree(int pos, Tree parent, Counts counts) {
this.pos = pos;
this.parent = parent;
if (pos < 0 || pos >= pokestops.length || counts.exceeded()) {
currentTotal = -1;
} else {
int tmp = parent == null ? 0 : parent.currentTotal;
this.currentTotal = tmp + pokestops[pos];
if (max == null || max.currentTotal < currentTotal) max = this;
for (int i = 0; i < steps.length; i++) {
children[i] = new Tree(pos + steps[i].size, this, counts.decrement(i));
// uncomment to allow forward-back traversal:
//children[2*i] = new Tree(pos - steps[i].size, this, counts.decrement(i));
}
}
}
}
static class Counts {
int[] counts;
public Counts(int[] counts) {
int[] tmp = new int[counts.length];
System.arraycopy(counts, 0, tmp, 0, counts.length);
this.counts = tmp;
}
public Counts decrement(int i) {
int[] tmp = new int[counts.length];
System.arraycopy(counts, 0, tmp, 0, counts.length);
tmp[i] -= 1;
return new Counts(tmp);
}
public boolean exceeded() {
for (int count : counts) {
if (count < 0) return true;
}
return false;
}
}
static class Step {
int size;
int maxCount;
public Step(int size, int maxCount) {
this.size = size;
this.maxCount = maxCount;
}
}
}
There's a line you can uncomment to allow forward and back movement (I'm sure someone said in the comments that was allowed, but now I see in your post it says forward only...)
I want my program to randomly generate 1 and 0 but it should look like it's in a queue. The 1 is represented by a person and 0 means that there's no person. It should generate random 1 and 0 like this 0 0 0 0 1 1 1 1 1 1, if the line is in the left and vice versa if it's in the right, not like this 1 1 1 0 0 1 0 0 1 1.
public void randPeople(){
int person1 = rand.nextInt((1 - 0) + 1) + 0;
int person2 = rand.nextInt((1 - 0) + 1) + 0;
int person3 = rand.nextInt((1 - 0) + 1) + 0;
int person4 = rand.nextInt((1 - 0) + 1) + 0;
int person5 = rand.nextInt((1 - 0) + 1) + 0;
int person6 = rand.nextInt((1 - 0) + 1) + 0;
int person7 = rand.nextInt((1 - 0) + 1) + 0;
int person8 = rand.nextInt((1 - 0) + 1) + 0;
int person9 = rand.nextInt((1 - 0) + 1) + 0;
int person10 = rand.nextInt((1 - 0) + 1) + 0;
EntryFloor1.setText(Integer.toString(person1) + " " + Integer.toString(person2) + " " +
Integer.toString(person3) + " " + Integer.toString(person4) + " " +
Integer.toString(person5) + " " + Integer.toString(person6) + " " +
Integer.toString(person7) + " " + Integer.toString(person8) + " " +
Integer.toString(person9) + " " + Integer.toString(person10));
}
Implemented a simple random function to generate 0s and 1s
int[] queue = new int[10];
Random r = new Random();
int rand = r.nextInt(queue.length);
int r1 = 1 - rand % 2;
int r2 = rand % 2;
for (int i = 0; i < queue.length; i++) {
if (i <= rand) {
queue[i] = r1;
} else {
queue[i] = r2;
}
}
System.out.println("Queue " + Arrays.toString(queue));
output
Queue [1, 1, 1, 0, 0, 0, 0, 0, 0, 0]
With Java8 generator
final int size = 10;
final Random random = new Random();
boolean order = random.nextBoolean();
Object[] arr = IntStream.generate(() -> random.nextInt(size) % 2).limit(size).boxed().sorted((i1, i2) -> order ? i1 - i2 : i2 - i1).toArray();
System.out.println("Arrays " + Arrays.toString(arr));
output
Arrays [1, 1, 1, 1, 1, 0, 0, 0, 0, 0]
I believe this will do what you want. Just as a side note - if you only care about whether or not a person exists I would suggest using boolean values to represent the people. Since you specifically said 1 and 0 I wrote the code below to produce that output. You'll notice that I do use the nextBoolean() method of the Random class as it is simpler and more readable for what I'm doing.
import java.util.ArrayList;
import java.util.List;
import java.util.Random;
public class RandPeopleMain {
public static void main(String[] args) {
System.out.println(lineUpLeft(10));
System.out.println(lineUpRight(10));
}
public static List<Integer> lineUpLeft(int numPeople){
Random rand = new Random();
List<Integer> list = new ArrayList<Integer>();
for(int i=0; i < numPeople; i++){
boolean person = rand.nextBoolean();
if(person){
list.add(1);
}
}
for(int i=list.size(); i < numPeople; i++){
list.add(0);
}
return list;
}
public static List<Integer> lineUpRight(int numPeople){
Random rand = new Random();
List<Integer> list = new ArrayList<Integer>();
for(int i=0; i < numPeople; i++){
boolean person = rand.nextBoolean();
if(person){
list.add(1);
}
}
for(int i=list.size(); i < numPeople; i++){
list.add(0, 0);
}
return list;
}
}
Example output:
[1, 1, 1, 1, 1, 1, 0, 0, 0, 0]
[0, 0, 0, 1, 1, 1, 1, 1, 1, 1]
I provided the above methods to illustrate how similar aligning the people left or right can be. Here is a more generic version that uses a single method with an additional parameter to specify the alignment.
public static List<Integer> lineUpPeople(int numPeople, boolean alignLeft){
Random rand = new Random();
List<Integer> list = new ArrayList<Integer>();
for(int i=0; i < numPeople; i++){
boolean person = rand.nextBoolean();
if(person){
list.add(1);
}
}
for(int i=list.size(); i < numPeople; i++){
if(alignLeft)
list.add(0);
else
list.add(0,0);
}
return list;
}
Lastly, here is another way to approach the problem. Rather than generating a random value for each potential person simply generate the number of people and then fill in the list/queue appropriately.
public static List<Integer> lineUp(int maxPeople, boolean alignLeft){
Random rand = new Random();
int numPeople = rand.nextInt(maxPeople+1);
List<Integer> list = new ArrayList<Integer>();
for(int i=0; i < maxPeople; i++){
if(i < numPeople)
list.add(1);
else if(alignLeft)
list.add(0);
else
list.add(0,0);
}
return list;
}
Try this:
Random r = new Random();
boolean b = r.nextBoolean(); // left or right
int l = r.nextInt(11); // breakpoint to change from 0 to 1 or other way
System.out.println(b + " " + l);
int person[] = new int[10];
for (int i = 0; i < 10; i++) {
if (b) {
if (i < l)
person[i] = 1;
else
person[i] = 0;
} else {
if (i < l)
person[i] = 0;
else
person[i] = 1;
}
}
System.out.println(Arrays.toString(person));