Neural Network implementation for Number Recognition in Java - java

Firstly, i should mention that i only recently started programming (about a year ago). My main-language is Java.
to elaborate on what I've done:
to learn about Neural Networks i watched 3Blue1Brown´s series on the topic
after (mostly) understanding it i started to make the actual Implementation
I implemented a File-Reader to turn the raw numbers of the Database (i used MNIST just like 3b1b) into three arrays : 1 for the labels, 1 for the Images and one for the grayscale-values (i mapped the RGB values between 0 and 1) in one input-array
I then designed a test() and train() method, this is what i did:
public class Network {
int L;
int[] Lsize;
double[][][] weights;
double[][] biases;
public Network(int... Lsize) {
L = Lsize.length;
this.Lsize = Lsize;
weights = new double[L - 1][][];
for (int i = 0; i < L - 1; i++) {
weights[i] = new double[Lsize[i + 1]][Lsize[i]];
for (int j = 0; j < Lsize[i + 1]; j++) {
for (int k = 0; k < Lsize[i]; k++) {
weights[i][j][k] = (Math.random() * 2) - 1;
}
}
}
biases = new double[L - 1][];
for (int i = 0; i < L - 1; i++) {
biases[i] = new double[Lsize[i + 1]];
for (int j = 0; j < Lsize[i + 1]; j++) {
biases[i][j] = (Math.random() * 2) - 1;
}
}
}
public static void main(String[] args) {
Network n = new Network(28 * 28, 16, 16, 10);
Database mnist_train = new Database(60000, 28, 28, "mnist_train");
Database mnist_test = new Database(10000, 28, 28, "mnist_test");
System.out.println("accuracy= " + n.accuracy(mnist_test));
for(int i = 0; i < 50; i ++) {
n.train(mnist_train, 10, 0.1);
System.out.println("accuracy= " + n.accuracy(mnist_test));
}
}
public void train(Database data,int batchsize,double factor) {
Batch[] batches = data.dividetoBatches(batchsize);
for (int b = 0; b < data.n / batchsize; b++) {
System.out.println("Step " + b + " started!");
Batch batch = batches[b];
double[][][] averagegweights = new double[L - 1][][];
double[][] averagegbiases = new double[L - 1][];
for (int i = 0; i < L - 1; i++) {
averagegweights[i] = new double[Lsize[i + 1]][Lsize[i]];
averagegbiases[i] = new double[Lsize[i + 1]];
}
double averagecost = 0;
for (int e = 0; e < batchsize; e++) {
double[] target = batch.target[e];
double[] values = batch.values[e];
double[][] z = new double[L - 1][];
double[][] a = new double[L][];
a[0] = values;
for (int i = 0; i < L - 1; i++) {
a[i + 1] = new double[Lsize[i + 1]];
z[i] = new double[Lsize[i + 1]];
for (int j = 0; j < Lsize[i + 1]; j++) {
double sum = biases[i][j];
for (int k = 0; k < Lsize[i]; k++) {
sum += weights[i][j][k] * a[i + 1][j];
}
z[i][j] = sum;
a[i + 1][j] = sigmoid(sum);
}
}
double[][][] gweights = new double[L - 1][][];
double[][] gbiases = new double[L - 1][];
double[][] dCa = new double[L][];
double cost = 0;
dCa[L - 1] = new double[Lsize[L - 1]];
for (int i = 0; i < Lsize[L - 1]; i++) {
dCa[L-1][i] = 2 * (target[i] - a[L - 1][i]);
cost += (target[i] - a[L - 1][i]) * (target[i] - a[L - 1][i]);
}
// Backpropagation:
for (int i = L - 2; i >= 0; i--) {
dCa[i] = new double[Lsize[i]];
gweights[i] = new double[Lsize[i+1]][Lsize[i]];
gbiases[i] = new double[Lsize[i]];
for (int j = 0; j < Lsize[i + 1]; j++) {
gbiases[i][j] = dsigmoid(z[i][j]) * dCa[i+1][j];
for (int k = 0; k < Lsize[i]; k++) {
gweights[i][j][k] = a[i][k] * dsigmoid(z[i][j]) * dCa[i+1][j];
}
}
for (int k = 0; k < Lsize[i]; k++) {
dCa[i][k] = 0;
for (int j = 0; j < Lsize[i + 1]; j++) {
dCa[i][k] += weights[i][j][k] * dsigmoid(z[i][j]) * dCa[i + 1][j];
}
}
}
for (int i = 0; i < L - 1; i++) {
for (int j = 0; j < Lsize[i + 1]; j++) {
averagegbiases[i][j] += gbiases[i][j];
for (int k = 0; k < Lsize[i]; k++) {
averagegweights[i][j][k] += gweights[i][j][k];
}
}
}
averagecost += cost;
}
for (int i = 0; i < L - 1; i++) {
for (int j = 0; j < Lsize[i + 1]; j++) {
averagegbiases[i][j] = averagegbiases[i][j]/batchsize * -1;
for (int k = 0; k < Lsize[i]; k++) {
averagegweights[i][j][k] = averagegweights[i][j][k]/batchsize * -1;
}
}
}
for (int i = 0; i < L - 1; i++) {
for (int j = 0; j < Lsize[i + 1]; j++) {
biases[i][j] += averagegbiases[i][j] * factor;
for (int k = 0; k < Lsize[i]; k++) {
weights[i][j][k] += averagegweights[i][j][k] * factor;
}
}
}
averagecost = averagecost/batchsize;
System.out.println("averagecost = " + averagecost);
// System.out.println(Arrays.deepToString(batch.target));
System.out.println("Das sollte eine" + data.labels[0] + " sein!");
data.shuffle();
double[] output = test(data.values[0]);
for (int i = 0; i < output.length; i++) {
float val = (float) output[i];
System.out.print(i + ": ");
System.out.printf("%.2f", val);
System.out.println();
}
System.out.println("Step " + b + " finished!");
}
}
public double accuracy(Database data) {
int rightanswers = 0;
int answers = 0;
for(int i = 0; i < data.n; i++) {
if(maxIndex(test(data.values[i])) == data.labels[i]) {
rightanswers++;
}
answers++;
}
return (double)rightanswers/(double)answers;
}
public int maxIndex(double[] output) {
int index = 0;
for(int i = 0; i < output.length; i++) {
if(output[i] > output[index])
index = i;
}
return index;
}
public double[] test(double[] input) {
double[][] values = new double[L][];
values[0] = input;
for (int i = 0; i < L - 1; i++) {
values[i + 1] = new double[Lsize[i + 1]];
for (int j = 0; j < Lsize[i + 1]; j++) {
double sum = biases[i][j];
for (int k = 0; k < Lsize[i]; k++) {
sum += weights[i][j][k] * values[i][k];
}
values[i + 1][j] = sigmoid(sum);
}
}
double[] output = values[L - 1];
return output;
}
public double sigmoid(double x) {
return 1 / (1 + Math.pow(Math.E, x));
}
// derivative of the sigmoid-function
public double dsigmoid(double x) {
return sigmoid(x) * (1 - sigmoid(x));
}
}
my problem now is when i run the training the Cost-function decreases, but only because all of the output-values are nearing 0 and not because the network has actually found the right number to the picture.
after about one run through the Database the average Cost stagnates around 0.9
am i missing something fundamental or am i just not noticing an simple error
Thank you in advance
I`m sorry for my bad English, I´m actually German

The first thing you can do is to increase the complexity of the network. I just tried your architecture with my implementation and the network you currently have definitely isn't big enough to fit handwritten digits. I would recommend 784,128,128,10 as that had around 90% accuracy for 3 runs over the training set. If that doesn't yield better results, make sure your implementation of a neural network is functional on something less complex like the XOR problem. If XOR is learnable by your implementation, it could be the way you are altering the dataset. Debugging a neural network is tough, but you have to make sure the foundation of it is built correctly before looking at higher-level problems like learning rate and optimizers. You're using vanilla gradient descent so you should definitely implement momentum as it is easier to implement than what you've already done and is a significant improvement to your current method.

Related

How to sort 2D array by columns and by diagonal using Selection Sort in Java

Firsly, I have a Matrix which I need to sort in such way (shown on the picture) using Selection Sort:
the way to sort the Matrix
Thus, the array should be sorted in increasing way from up to down. What's more, we have to sort elements of the right diagonal of Matrix.
Here's my code, but it doesn't work properly because I take diagonal into account.
public class PAS {
public static void main(String[] args) {
int n = 5; int max = 25; int minimL = 0; int MinRow, Temp;
int[][] array = new int [n][n];
for (int i = 0; i < n; i++)
for (int j = 0; j < n; j++)
array[i][j] = (int) (Math.random() * (max - minimL + 1) + minimL);
for (int i = 0; i < n; i++) {
for (int j = 0; j < n; j++)
System.out.print(" " + array[i][j]);
System.out.println();
}
System.out.println();
for (int NumCol = n - 1; NumCol >= 0; NumCol --) {
for (int NumRow = 0; NumRow < n; NumRow++) {
if (NumCol >= n - NumRow - 1) // Getting the right diagonal of Matrix changing position of columns to rows in order to sort them
{
MinRow = NumRow;
for (int j = NumRow + 1; j < n; j++)
if (array[(n - 1) - NumCol][NumCol] > array[MinRow][(n - 1) - NumCol])
MinRow = j;
Temp = array[NumRow][(n - 1) - NumCol];
array[(n - 1) - NumCol][NumCol] = array[MinRow][(n - 1) - NumCol];
array[MinRow][(n - 1) - NumCol] = Temp;
}
}
}
for (int i = 0; i < n; i++) {
for (int j = 0; j < n; j++) {
System.out.print(" " + array[i][j]);
}
System.out.println();
}
}
}
Is it possible to sort it properly in the way shown on the picture taking O(n^3) notation?

Bin packing, how do I include multiple 'bin sizes'

The code below is working well, however it isn't quite what I am looking for. It finds an efficient way to cut variable sizes (user input) from one static stock size. I want to alter the code so it finds an efficient way to cut the variable sizes from multiple static stock sizes, 4 to be specific.
public static void binPacking(double[] FinalCuttingList, double size, int TotalCuts)
{
int StockLengthCount = 0;
double[] StockValues = new double[TotalCuts];
for (int i = 0; i < StockValues.length; i++)
StockValues[i] = size;
for (int i = 0; i < TotalCuts; i++)
for (int o = 0; o < StockValues.length; o++)
{
if (StockValues[o] - FinalCuttingList[i] >= 0)
{
StockValues[o] -= FinalCuttingList[i];
break;
}
}
for (int i = 0; i < StockValues.length; i++)
if (StockValues[i] != size)
StockLengthCount++;
System.out
.println("Number of 3400mm pieces required is :"
+ StockLengthCount);
}
static double[] sort(double[] sequence)
{
// Sort in descending order
for (int i = 0; i < sequence.length; i++)
for (int o = 0; o < sequence.length - 1; o++)
if (sequence[o] < sequence[o + 1])
{
sequence[o] = sequence[o] + sequence[o + 1];
sequence[o + 1] = sequence[o] - sequence[o + 1];
sequence[o] = sequence[o] - sequence[o + 1];
}
return sequence;
}

Why is the output always converging to 0.5?

I was trying to solve a XOR problem, but the output always converged to 0.5, so i tried a simpler problem like NOT and the same thing happened.
I really don't know what's going on, i checked the code a million times and everything seems to be right, when i debugged it saving the neural network info I saw that the either the weight values or the biases values were getting really large. To do that I followed the 3 blue 1 brown youtube series about neural network and some other videos, too.
this is my code:
PS: I put the entire code here but I think the main problem is inside the bakpropag function
class NeuralNetwork {
int inNum, hiddenLayersNum, outNum, netSize;
int[] hiddenLayerSize;
Matrix[] weights;
Matrix[] biases;
Matrix[] sums;
Matrix[] activations;
Matrix[] error;
Matrix inputs;
long samples = 0;
float learningRate;
//Constructor------------------------------------------------------------------------------------------------------
NeuralNetwork(int inNum, int hiddenLayersNum, int[] hiddenLayerSize, int outNum, float learningRate) {
this.inNum = inNum;
this.hiddenLayersNum = hiddenLayersNum;
this.hiddenLayerSize = hiddenLayerSize;
this.outNum = outNum;
this.netSize = hiddenLayersNum + 1;
this.learningRate = learningRate;
//output layer plus the hidden layer size
//Note: I'm not adding the input layer because it doesn't have weights
weights = new Matrix[netSize];
//no biases added to the output layer
biases = new Matrix[netSize - 1];
sums = new Matrix[netSize];
activations = new Matrix[netSize];
error = new Matrix[netSize];
initializeHiddenLayer();
initializeOutputLayer();
}
//Initializing Algorithms------------------------------------------------------------------------------------------
void initializeHiddenLayer() {
for (int i = 0; i < hiddenLayersNum; i++) {
if (i == 0) {//only the first hidden layer takes the inputs
weights[i] = new Matrix(hiddenLayerSize[i], inNum);
} else {
weights[i] = new Matrix(hiddenLayerSize[i], hiddenLayerSize[i - 1]);
}
biases[i] = new Matrix(hiddenLayerSize[i], 1);
sums[i] = new Matrix(hiddenLayerSize[i], 1);
activations[i] = new Matrix(hiddenLayerSize[i], 1);
error[i] = new Matrix(hiddenLayerSize[i], 1);
}
}
void initializeOutputLayer() {
//the output layer takes the last hidden layer activation values
weights[netSize - 1] = new Matrix(outNum, hiddenLayerSize[hiddenLayerSize.length - 1]);
activations[netSize - 1] = new Matrix(outNum, 1);
sums[netSize - 1] = new Matrix(outNum, 1);
error[netSize - 1] = new Matrix(outNum, 1);
for (Matrix m : weights) {
for (int i = 0; i < m.i; i++) {
for (int j = 0; j < m.j; j++) {
m.values[i][j] = random(-1, 1);
}
}
}
for (Matrix m : biases) {
for (int i = 0; i < m.i; i++) {
for (int j = 0; j < m.j; j++) {
m.values[i][j] = 1;
}
}
}
for (Matrix m : sums) {
for (int i = 0; i < m.i; i++) {
for (int j = 0; j < m.j; j++) {
m.values[i][j] = 0;
}
}
}
}
//Calculation------------------------------------------------------------------------------------------------------
void calculate(float[] inputs) {
this.inputs = new Matrix(0, 0);
this.inputs = this.inputs.arrayToCollumn(inputs);
sums[0] = (weights[0].matrixMult(this.inputs)).sum(biases[0]);
activations[0] = sigM(sums[0]);
for (int i = 1; i < netSize - 1; i++) {
sums[i] = weights[i].matrixMult(activations[i - 1]);
activations[i] = sigM(sums[i]).sum(biases[i]);
}
//there's no biases in the output layer
//And the output layer uses sigmoid function
sums[netSize - 1] = weights[netSize - 1].matrixMult(activations[netSize - 1 - 1]);
activations[netSize - 1] = sigM(sums[netSize - 1]);
}
//Sending outputs--------------------------------------------------------------------------------------------------
Matrix getOuts() {
return activations[netSize - 1];
}
//Backpropagation--------------------------------------------------------------------------------------------------
void calcError(float[] exp) {
Matrix expected = new Matrix(0, 0);
expected = expected.arrayToCollumn(exp);
//E = (output - expected)
error[netSize - 1] = this.getOuts().diff(expected);
samples++;
}
void backPropag(int layer) {
if (layer == netSize - 1) {
error[layer].scalarDiv(samples);
for (int i = layer - 1; i >= 0; i--) {
prevLayerCost(i);
}
weightError(layer);
backPropag(layer - 1);
} else {
weightError(layer);
biasError(layer);
if (layer != 0)
backPropag(layer - 1);
}
}
void weightError(int layer) {
if (layer != 0) {
for (int i = 0; i < weights[layer].i; i++) {
for (int j = 0; j < weights[layer].j; j++) {
float changeWeight = 0;
if (layer != netSize - 1)
changeWeight = activations[layer - 1].values[j][0] * deriSig(sums[layer].values[i][0]) * error[layer].values[i][0];
else
changeWeight = activations[layer - 1].values[j][0] * deriSig(sums[layer].values[i][0]) * error[layer].values[i][0];
weights[layer].values[i][j] += -learningRate * changeWeight;
}
}
} else {
for (int i = 0; i < weights[layer].i; i++) {
for (int j = 0; j < weights[layer].j; j++) {
float changeWeight = this.inputs.values[j][0] * deriSig(sums[layer].values[i][0]) * error[layer].values[i][0];
weights[layer].values[i][j] += -learningRate * changeWeight;
}
}
}
}
void biasError(int layer) {
for (int i = 0; i < biases[layer].i; i++) {
for (int j = 0; j < biases[layer].j; j++) {
float changeBias = 0;
if (layer != netSize - 1)
changeBias = deriSig(sums[layer].values[i][0]) * error[layer].values[i][0];
biases[layer].values[i][j] += -learningRate * changeBias;
}
}
}
void prevLayerCost(int layer) {
for (int i = 0; i < activations[layer].i; i++) {
for (int j = 0; j < activations[layer + 1].j; j++) {//for all conections of that neuron to the next layer
if (layer != netSize - 1)
error[layer].values[i][0] += weights[layer + 1].values[j][i] * deriSig(sums[layer + 1].values[j][0]) * error[layer + 1].values[j][0];
else
error[layer].values[i][0] += weights[layer + 1].values[j][i] * deriSig(sums[layer + 1].values[j][0]) * error[layer + 1].values[j][0];
}
}
}
//Activation Functions---------------------------------------------------------------------------------------------
Matrix reLUM(Matrix m) {
Matrix temp = m.copyM();
for (int i = 0; i < temp.i; i++) {
for (int j = 0; j < temp.j; j++) {
temp.values[i][j] = ReLU(m.values[i][j]);
}
}
return temp;
}
float ReLU(float x) {
return max(0, x);
}
float deriReLU(float x) {
if (x <= 0)
return 0;
else
return 1;
}
Matrix sigM(Matrix m) {
Matrix temp = m.copyM();
for (int i = 0; i < temp.i; i++) {
for (int j = 0; j < temp.j; j++) {
temp.values[i][j] = sig(m.values[i][j]);
}
}
return temp;
}
float sig(float x) {
return 1 / (1 + exp(-x));
}
float deriSig(float x) {
return sig(x) * (1 - sig(x));
}
//Saving Files-----------------------------------------------------------------------------------------------------
void SaveNeuNet() {
for (int i = 0; i < weights.length; i++) {
weights[i].saveM("weights\\weightLayer" + i);
}
for (int i = 0; i < biases.length; i++) {
biases[i].saveM("biases\\biasLayer" + i);
}
for (int i = 0; i < activations.length; i++) {
activations[i].saveM("activations\\activationLayer" + i);
}
for (int i = 0; i < error.length; i++) {
error[i].saveM("errors\\errorLayer" + i);
}
}
}
and this is the Matrix code:
class Matrix {
int i, j, size;
float[][] values;
Matrix(int i, int j) {
this.i = i;
this.j = j;
this.size = i * j;
values = new float[i][j];
}
Matrix sum (Matrix other) {
if (other.i == this.i && other.j == this.j) {
for (int x = 0; x < this.i; x++) {
for (int z = 0; z < this.j; z++) {
values[x][z] += other.values[x][z];
}
}
return this;
}
return null;
}
Matrix diff(Matrix other) {
if (other.i == this.i && other.j == this.j) {
for (int x = 0; x < this.i; x++) {
for (int z = 0; z < this.j; z++) {
values[x][z] -= other.values[x][z];
}
}
return this;
}
return null;
}
Matrix scalarMult(float k) {
for (int i = 0; i < this.i; i++) {
for (int j = 0; j < this.j; j++) {
values[i][j] *= k;
}
}
return this;
}
Matrix scalarDiv(float k) {
if (k != 0) {
for (int i = 0; i < this.i; i++) {
for (int j = 0; j < this.j; j++) {
values[i][j] /= k;
}
}
return this;
} else
return null;
}
Matrix matrixMult(Matrix other) {
if (this.j != other.i)
return null;
else {
Matrix temp = new Matrix(this.i, other.j);
for (int i = 0; i < temp.i; i++) {
for (int j = 0; j < temp.j; j++) {
for (int k = 0; k < this.j; k++) {
temp.values[i][j] += this.values[i][k] * other.values[k][j];
}
}
}
return temp;
}
}
Matrix squaredValues(){
for (int i = 0; i < this.i; i++){
for (int j = 0; j < this.j; j++){
values[i][j] = sq(values[i][j]);
}
}
return this;
}
void printM() {
for (int x = 0; x < this.i; x++) {
print("| ");
for (int z = 0; z < this.j; z++) {
print(values[x][z] + " | ");
}
println();
}
}
void saveM(String name) {
String out = "";
for (int x = 0; x < this.i; x++) {
out += "| ";
for (int z = 0; z < this.j; z++) {
out += values[x][z] + " | ";
}
out += "\n";
}
saveStrings("outputs\\" + name + ".txt", new String[] {out});
}
Matrix arrayToCollumn(float[] array) {
Matrix temp = new Matrix(array.length, 1);
for (int i = 0; i < array.length; i++)
temp.values[i][0] = array[i];
return temp;
}
Matrix arrayToLine(float[] array) {
Matrix temp = new Matrix(1, array.length);
for (int j = 0; j < array.length; j++)
temp.values[0][j] = array[j];
return temp;
}
Matrix copyM(){
Matrix temp = new Matrix(i, j);
for (int i = 0; i < this.i; i++){
for (int j = 0; j < this.j; j++){
temp.values[i][j] = this.values[i][j];
}
}
return temp;
}
}
As I said, the outputs are always converging to 0.5 instead of the actual value 1 or 0
I rewrote the code and it is working now! I have no idea what was wrong with the code before but this one works:
class NeuralNetwork {
int netSize;
float learningRate;
Matrix[] weights;
Matrix[] biases;
Matrix[] activations;
Matrix[] sums;
Matrix[] errors;
NeuralNetwork(int inNum, int hiddenNum, int[] hiddenLayerSize, int outNum, float learningRate) {
netSize = hiddenNum + 1;
this.learningRate = learningRate;
weights = new Matrix[netSize];
biases = new Matrix[netSize - 1];
activations = new Matrix[netSize];
sums = new Matrix[netSize];
errors = new Matrix[netSize];
initializeMatrices(inNum, hiddenNum, hiddenLayerSize, outNum);
}
//INITIALIZING MATRICES
void initializeMatrices(int inNum, int hiddenNum, int[] layerSize, int outNum) {
for (int i = 0; i < hiddenNum; i++) {
if (i == 0)
weights[i] = new Matrix(layerSize[0], inNum);
else
weights[i] = new Matrix(layerSize[i], layerSize[i - 1]);
biases[i] = new Matrix(layerSize[i], 1);
activations[i] = new Matrix(layerSize[i], 1);
errors[i] = new Matrix(layerSize[i], 1);
sums[i] = new Matrix(layerSize[i], 1);
weights[i].randomize(-1, 1);
biases[i].randomize(-1, 1);
activations[i].randomize(-1, 1);
}
weights[netSize - 1] = new Matrix(outNum, layerSize[layerSize.length - 1]);
activations[netSize - 1] = new Matrix(outNum, 1);
errors[netSize - 1] = new Matrix(outNum, 1);
sums[netSize - 1] = new Matrix(outNum, 1);
weights[netSize - 1].randomize(-1, 1);
activations[netSize - 1].randomize(-1, 1);
}
//---------------------------------------------------------------------------------------------------------------
void forwardPropag(float[] ins) {
Matrix inputs = new Matrix(0, 0);
inputs = inputs.arrayToCollumn(ins);
sums[0] = (weights[0].matrixMult(inputs)).sum(biases[0]);
activations[0] = sigM(sums[0]);
for (int i = 1; i < netSize - 1; i++) {
sums[i] = (weights[i].matrixMult(activations[i - 1])).sum(biases[i]);
activations[i] = sigM(sums[i]);
}
//output layer does not have biases
sums[netSize - 1] = weights[netSize - 1].matrixMult(activations[netSize - 2]);
activations[netSize - 1] = sigM(sums[netSize - 1]);
}
Matrix predict(float[] inputs) {
forwardPropag(inputs);
return activations[netSize - 1].copyM();
}
//SUPERVISED LEARNING - BACKPROPAGATION
void train(float[] inps, float[] expec) {
Matrix expected = new Matrix(0, 0);
expected = expected.arrayToCollumn(expec);
errors[netSize - 1] = predict(inps).diff(expected);
calcErorrPrevLayers();
adjustWeights(inps);
adjustBiases();
for (Matrix m : errors){
m.reset();
}
}
void calcErorrPrevLayers() {
for (int l = netSize - 2; l >= 0; l--) {
for (int i = 0; i < activations[l].i; i++) {
for (int j = 0; j < activations[l + 1].i; j++) {
errors[l].values[i][0] += weights[l + 1].values[j][i] * dSig(sums[l + 1].values[j][0]) * errors[l + 1].values[j][0];
}
}
}
}
void adjustWeights(float[] inputs) {
for (int l = 0; l < netSize; l++) {
if (l == 0) {
//for ervery neuron n in the first layer
for (int n = 0; n < activations[l].i; n++) {
//for every weight w of the first layer
for (int w = 0; w < inputs.length; w++) {
float weightChange = inputs[w] * dSig(sums[l].values[n][0]) * errors[l].values[n][0];
weights[l].values[n][w] += -learningRate * weightChange;
}
}
} else {
//for ervery neuron n in the first layer
for (int n = 0; n < activations[l].i; n++) {
//for every weight w of the first layer
for (int w = 0; w < activations[l - 1].i; w++) {
float weightChange = activations[l - 1].values[w][0] * dSig(sums[l].values[n][0]) * errors[l].values[n][0];
weights[l].values[n][w] += -learningRate * weightChange;
}
}
}
}
}
void adjustBiases() {
for (int l = 0; l < netSize - 1; l++) {
//for ervery neuron n in the first layer
for (int n = 0; n < activations[l].i; n++) {
float biasChange = dSig(sums[l].values[n][0]) * errors[l].values[n][0];
biases[l].values[n][0] += -learningRate * biasChange;
}
}
}
//ACTIVATION FUNCTION
float sig(float x) {
return 1 / (1 + exp(-x));
}
float dSig(float x) {
return sig(x) * (1 - sig(x));
}
Matrix sigM(Matrix m) {
Matrix temp = m.copyM();
for (int i = 0; i < m.i; i++) {
for (int j = 0; j < m.j; j++) {
temp.values[i][j] = sig(m.values[i][j]);
}
}
return temp;
}
}

Java - How to Solve this 2D Array Hour Glass?

I am working on a problem where I've to print the largest sum among all the hourglasses in the array. You can find the details about the problem here-
What I tried:
public class Solution {
public static void main(String[] args) {
Scanner in = new Scanner(System.in);
int arr[][] = new int[6][6];
for (int arr_i = 0; arr_i < 6; arr_i++) {
for (int arr_j = 0; arr_j < 6; arr_j++) {
arr[arr_i][arr_j] = in.nextInt();
}
}
int sum = 0;
int tmp_sum = 0;
for (int arr_i = 0; arr_i < 4; arr_i++) {
for (int arr_j = 0; arr_j < 4; arr_j++) {
if (arr[arr_i][arr_j] > 0) {
sum = sum + (arr[arr_i][arr_j]) + (arr[arr_i][arr_j + 1]) + (arr[arr_i][arr_j + 2]);
sum = sum + (arr[arr_i + 1][arr_j + 1]);
sum = sum + (arr[arr_i + 2][arr_j]) + (arr[arr_i + 2][arr_j + 1]) + (arr[arr_i + 2][arr_j + 2]);
if (tmp_sum < sum) {
tmp_sum = sum;
}
sum = 0;
}
}
}
System.out.println(tmp_sum);
}
}
Input:
1 1 1 0 0 0
0 1 0 0 0 0
1 1 1 0 0 0
0 9 2 -4 -4 0
0 0 0 -2 0 0
0 0 -1 -2 -4 0
Output:
12
Expected Output:
13
Screenshot:
I don't know where I'm doing wrong. I cannot understand why the expected output is 13. According to the description given in the problem it should be 10. Is this a wrong question or my understanding about this is wrong?
Remove the if (arr[arr_i][arr_j] > 0) statement. It prevents finding the answer at row 1, column 0, because that cell is 0.
Comments for other improvements to your code:
What if the best hourglass sum is -4? You should initialize tmp_sum to Integer.MIN_VALUE. And name it maxSum, to better describe it's purpose.
You shouldn't define sum outside the loop. Declare it when it is first assigned, then you don't have to reset it to 0 afterwards.
Your iterators should be just i and j. Those are standard names for integer iterators, and keeps code ... cleaner.
If you prefer longer names, use row and col, since that is what they represent.
You don't need parenthesis around the array lookups.
For clarity, I formatted the code below to show the hourglass shape in the array lookups.
Scanner in = new Scanner(System.in);
int arr[][] = new int[6][6];
for (int i = 0; i < 6; i++){
for (int j = 0; j < 6; j++){
arr[i][j] = in.nextInt();
}
}
int maxSum = Integer.MIN_VALUE;
for (int i = 0; i < 4; i++) {
for (int j = 0; j < 4; j++) {
int sum = arr[i ][j] + arr[i ][j + 1] + arr[i ][j + 2]
+ arr[i + 1][j + 1]
+ arr[i + 2][j] + arr[i + 2][j + 1] + arr[i + 2][j + 2];
if (maxSum < sum) {
maxSum = sum;
}
}
}
System.out.println(maxSum);
This was my solution. I wrapped an if statement around the code that calculates the sum, that makes sure we don't go out of bounds.
public static void main(String[] args) {
Scanner in = new Scanner(System.in);
int arr[][] = new int[6][6];
int max = Integer.MIN_VALUE;
int tempMax = 0;
for(int i=0; i < 6; i++){
for(int j=0; j < 6; j++){
arr[i][j] = in.nextInt();
}
}
for(int i=0; i < 6; i++){
for(int j=0; j < 6; j++){
if (i + 2 < 6 && j + 2 < 6) {
tempMax += arr[i][j] + arr[i][j + 1] + arr[i][j + 2];
tempMax += arr[i + 1][j + 1];
tempMax += arr[i + 2][j] + arr[i + 2][j + 1] + arr[i + 2][j + 2];
if (max < tempMax) {
max = tempMax;
}
tempMax = 0;
}
}
}
System.out.println(max);
}
Here's the simple and easy to understand C# equivalent code for your hourglass problem.
class Class1
{
static int[][] CreateHourGlassForIndex(int p, int q, int[][] arr)
{
int[][] hourGlass = new int[3][];
int x = 0, y = 0;
for (int i = p; i <= p + 2; i++)
{
hourGlass[x] = new int[3];
int[] temp = new int[3];
int k = 0;
for (int j = q; j <= q + 2; j++)
{
temp[k] = arr[i][j];
k++;
}
hourGlass[x] = temp;
x++;
}
return hourGlass;
}
static int findSumOfEachHourGlass(int[][] arr)
{
int sum = 0;
for (int i = 0; i < arr.Length; i++)
{
for (int j = 0; j < arr.Length; j++)
{
if (!((i == 1 && j == 0) || (i == 1 && j == 2)))
sum += arr[i][j];
}
}
return sum;
}
static void Main(string[] args)
{
int[][] arr = new int[6][];
for (int arr_i = 0; arr_i < 6; arr_i++)
{
string[] arr_temp = Console.ReadLine().Split(' ');
arr[arr_i] = Array.ConvertAll(arr_temp, Int32.Parse);
}
int[] sum = new int[16];
int k = 0;
for (int i = 0; i < 4; i++)
{
for (int j = 0; j < 4; j++)
{
int[][] hourGlass = CreateHourGlassForIndex(i, j, arr);
sum[k] = findSumOfEachHourGlass(hourGlass);
k++;
}
}
//max in sum array
Console.WriteLine(sum.Max());
}
}
Happy Coding.
Thanks,
Ankit Bajpai
You can try this code:
I think this will be easy to understand for beginners.
public class Solution {
public static void main(String[] args) {
Scanner in = new Scanner(System.in);
int arr[][] = new int[6][6];
for(int arr_i=0; arr_i < 6; arr_i++){
for(int arr_j=0; arr_j < 6; arr_j++){
arr[arr_i][arr_j] = in.nextInt();
}
}
int sum = 0;
int sum2 = 0;
int sum3 = 0;
int x = 0;
int max = Integer.MIN_VALUE;
for(int i = 0; i < 4; i++){
for(int j = 0; j < 4; j++){
for(int k = 0; k < 3; k++){
sum += arr[i][j+k]; //top elements of hour glass
sum2 += arr[i+2][j+k]; //bottom elements of hour glass
sum3 = arr[i+1][j+1]; //middle elements of hour glass
x = sum + sum2 + sum3; //add all elements of hour glass
}
if(max < x){
max = x;
}
sum = 0;
sum2 = 0;
sum3 = 0;
x = 0;
}
}
System.out.println(max);
}
}
Here is another easy option, hope it helps:
import java.io.*;
import java.util.*;
import java.text.*;
import java.math.*;
import java.util.regex.*;
public class Solution {
public static void main(String[] args) {
Scanner in = new Scanner(System.in);
int a[][] = new int[6][6];
for(int i=0; i < 6; i++){
for(int j=0; j < 6; j++){
a[i][j] = in.nextInt();
}
}
int hg = Integer.MIN_VALUE, sum;
for(int i=0; i<4; i++){
for(int j=0; j<4; j++){
sum = 0;
sum = sum + a[i][j] + a[i][j+1] + a[i][j+2];
sum = sum + a[i+1][j+1];
sum = sum + a[i+2][j] + a[i+2][j+1] + a[i+2][j+2];
if(sum>hg)
hg = sum;
}
}
System.out.println(hg);
in.close();
}
}
there is another opetion in case of -(minus) and zero output we can use shorted ser Treeset for the same . below is the sameple code
public class Solution {
public static void main(String[] args) {
Scanner in = new Scanner(System.in);
int arr[][] = new int[6][6];
for(int i=0; i < 6; i++){
for(int j=0; j < 6; j++){
arr[i][j] = in.nextInt();
}
}
int sum=0;int output=0;
Set<Integer> set=new TreeSet<Integer>();
for(int k=0;k<4;k++ )
{
for(int y=0;y<4;y++)
{
sum=arr[k][y]+arr[k][y+1]+arr[k][y+2]+arr[k+1][y+1]+arr[k+2][y]+arr[k+2][y+1]+arr[k+2][y+2]; set.add(sum);
}
}
int p=0;
for(int u:set)
{
p++;
if(p==set.size())
output=u;
}
System.out.println(output);
}
}
Solved in PHP, may be helpful.
<?php
$handle = fopen ("php://stdin","r");
$input = [];
while(!feof($handle))
{
$temp = fgets($handle);
$input[] = explode(" ",$temp);
}
$maxSum = PHP_INT_MIN;
for($i=0; $i<4; $i++)
{
for($j=0; $j<4; $j++)
{
$sum = $input[$i][$j] + $input[$i][$j + 1] + $input[$i][$j + 2]
+ $input[$i + 1][$j + 1] +
$input[$i + 2][$j] + $input[$i + 2][$j + 1] + $input[$i + 2][$j + 2];
if($sum > $maxSum)
{
$maxSum = $sum;
}
}
}
echo $maxSum;
?>
Passes all test cases
import java.io.*;
import java.util.*;
public class Solution {
public static void main(String[] args) {
Scanner read = new Scanner(System.in);
int rowSize = 6;
int colSize = 6;
int[][] array = new int[rowSize][colSize];
for(int row = 0; row < rowSize; row++) {
for(int col = 0; col < colSize; col++) {
array[row][col] = read.nextInt();
}
}
read.close();
int max = Integer.MIN_VALUE;
for(int row = 0; row < 4; row++) {
for(int col = 0; col < 4; col++) {
int sum = calculateHourglassSum(array, row, col);
if(sum > max) {
max = sum;
}
}
}
System.out.println(max);
}
private static int calculateHourglassSum(int[][] array, int rowIndex, int colIndex) {
int sum = 0;
for(int row = rowIndex; row < rowIndex + 3; row++) {
for(int col = colIndex; col < colIndex + 3; col++) {
if(row == rowIndex + 1 && col != colIndex + 1) {
continue;
}
sum += array[row][col];
}
}
return sum;
}
}
function galssSum(array) {
let maxGlass = 0;
if (array[0].length == 3) {
maxGlass = 1;
} else if (array[0].length > 3) {
maxGlass = array.length - 2;
}
let maxValue = -100000;
for (let i = 0; i < maxGlass; i++) {
for (let j = 0; j < maxGlass; j++) {
let a = array[i][j] + array[i][j + 1] + array[i][j + 2];
let b = array[i + 1][j + 1];
let c = array[i + 2][j] + array[i + 2][j + 1] + array[i + 2][j + 2];
let sum = a + b + c;
if (maxValue<sum) {
maxValue = sum;
}
}
}
return maxValue;
}
console.log(galssSum([[1, 1, 1, 0, 0, 0], [0, 1, 0, 0, 0, 0], [1, 1, 1, 0, 0, 0], [0, 0, 2, 4, 4, 0], [0, 0, 0, 2, 0, 0], [0, 0, 1, 2, 4, 0]]));
int hourglassSum(vector<vector<int>> vec) {
int res = 0;
int size = ((vec[0].size())-2) * ((vec.size())-2);
//cout<<size<<endl;
vector<int> res_vec(size);
int j = 0;
int itr =0 ;
int cnt = 0;
int mid = 0;
int l =0;
while((l+2) < vec.size())
{
while((j+2) < vec.size())
{
for(int i =j ;i<j+3; i+=2)
{
//cout<<i<<" :";
for(int k=l;k<l+3;k++)
{
//cout<<k<<" ";
res_vec[itr] += vec[i][k];
}
//cout<<endl;
}
res_vec[itr] += vec[j+1][l+1];
//cout<<endl;
itr++;
j++;
}
l++;
j=0;
}
int max=res_vec[0];
for(int i =1;i<res_vec.size();i++)
{
if(max < res_vec[i])
{
max = res_vec[i];
}
//cout<<res_vec[i]<< " ";
}
res = max;
//cout<<endl;
return res;
}
// Complete the hourglassSum function below.
static int hourglassSum(int[][] arr) {
int max = Integer.MIN_VALUE;
for (int i = 0; i < arr.length - 2; i++) {
for (int j = 0; j < arr.length - 2; j++) {
int hourGlassSum = (arr[i][j] + arr[i][j + 1] + arr[i][j + 2])
+ (arr[i + 1][j + 1])
+ (arr[i + 2][j] + arr[i + 2][j + 1] + arr[i + 2][j + 2]);
max = Math.max(hourGlassSum,max);
}
}
return max;
}
public static int hourglassSum(List<List<Integer>> arr) {
// Write your code here
int maxSum = Integer.MIN_VALUE;
for (int i = 0; i < 4; i++) {
for (int j = 0; j < 4; j++) {
int sum = arr.get(i).get(j) +arr.get(i).get(j+1) +
arr.get(i).get(j+2)+arr.get(i+1).get(j+1)+
arr.get(i+2).get(j)+arr.get(i+2).get(j+1)+arr.get(i+2).get(j+2);
if (maxSum < sum) {
maxSum = sum;
}
}
}
return maxSum;
}
}
Iterative way,Passing all test cases in hackerank web
public static int hourglassSum(List<List<Integer>> arr) {
// Write your code here
int rowsCount=arr.size();
int colCount=arr.get(0).size();
Integer max=Integer.MIN_VALUE;
Integer subSum=0;
for(int r=0; (r+3)<=rowsCount; r++)
{
for(int c=0; (c+3)<=colCount; c++)
{
subSum= hourglassSubSum(arr,r,c);
System.out.println("r,c,subSum "+r+" "+c+" "+" "+subSum);
if(subSum>max)
{
max=subSum;
}
}
}
return max;
}
public static int hourglassSubSum(List<List<Integer>> hourglassArray,
int rowIndex,int colIndex) {
// Write your code here
Integer subSum=0;
for(int i=rowIndex;i<(rowIndex+3);i++)
{
for(int j=colIndex;j<(colIndex+3);j++)
{
if(i==(rowIndex+1) && (j==colIndex || j==colIndex+2))
{
continue;
}
subSum=subSum+hourglassArray.get(i).get(j);
}
}
return subSum;
}
Solution for actual "2D Array - DS" challenge from HackerRank https://www.hackerrank.com/challenges/2d-array
public static int hourglassSum(List<List<Integer>> arr) {
int maxSum = Integer.MIN_VALUE;
for (int col=0; col <= 3; col++) {
for (int row=0; row <= 3; row++) {
int sum = calcHourglass(arr, col, row);
maxSum = Math.max(sum, maxSum);
}
}
return maxSum;
}
private static int calcHourglass(List<List<Integer>> arr, int col, int row) {
int sum = 0;
for (int i=0; i < 3; i++) {
sum += arr.get(row).get(col+i); // the top of the hourglass
sum += arr.get(row+2).get(col+i); // the bottom of the hourglass
}
sum += arr.get(row+1).get(col+1); // the center
return sum;
}
import java.io.*;
import java.util.Scanner;
public class Solution {
public static void main(String[] args) {
Scanner in = new Scanner(System.in);
int low = -9,high = 5;
int lh = low * high;
int sum = 0, i, j;
int max = 0;
int a[][] = new int[6][6];
for (i = 0; i < 6; i++) {
for (j = 0; j < 6; j++) {
a[i][j] = in.nextInt();
}
}
for (i = 0; i < 4; i++) {
for (j = 0; j < 4; j++) {
sum = (a[i][j] + a[i][j+1] + a[i][j+2]);
sum = sum + a[i+1][j+1];
sum = sum + (a[i+2][j] + a[i+2][j+1] + a[i+2][j+2]);
if (sum > lh) lh = sum;
}
}
System.out.print(lh);
}
}
Here you go..
public static void main(String[] args) {
Scanner in = new Scanner(System.in);
int a[][] = new int[6][6];
int max = 0;
for (int i = 0; i < 6; i++) {
for (int j = 0; j < 6; j++) {
a[i][j] = in.nextInt();
}
}
for (int i = 0; i < 4; i++) {
for (int j = 0; j < 4; j++) {
int sum = a[i][j] + a[i][j + 1] + a[i][j + 2] + a[i + 1][j + 1]
+ a[i + 2][j] + a[i + 2][j + 1] + a[i + 2][j + 2];
if (sum > max || (i == 0 && j == 0)) {
max = sum;
}
}
}
System.out.println(max);
}

having errors in my project for java

so im done with the whole thing and i compile it when i ran it it had a error for said: arrayindexexception and i try looking for my mistake and i could not find it so i need someone to see if they can help me
public class Merging
{
public static int[] merge(int[] arrA, int[] arrB)
{
int[] sum = new int[arrA.length + arrB.length];
int i = 0, j = 0, k = 0;
while ( i < arrA.length && j < arrB.length)
{
if(arrA[i] < arrB[j])
{
sum[k] = arrA[i];
i++;
k++;
}else
sum[k] = arrB[i];
j++;
k++;
}
return sum;
}
public static void main(String[] args)
{
int a = (int)(Math.random() * (50-20+1)+20);
int b = (int)(Math.random() * (50-20+1)+20);
int[] a1 = new int[a];
int[] a2 = new int[b];
int i = 0;
while(i < a1.length && 1 < a2.length)
{
a1[i] = (int) (Math.random() * (150-20+1)+20);
a2[i] = (int) (Math.random() * (150-20+1)+20);
i++;
}
for(int j = 0; j < a1.length; j++)
{
System.out.print(a1[j]);
}
System.out.println();
for(int k = 0; k < a2.length; k++)
{
System.out.print(a1[k]);
}
System.out.println();
System.out.print(merge(a1,a2));
}
}
Two errors.
1.) Change from 1 < a2.length to i < a2.length
while (i < a1.length && i < a2.length) {
a1[i] = (int) (Math.random() * (150 - 20 + 1) + 20);
a2[i] = (int) (Math.random() * (150 - 20 + 1) + 20);
i++;
}
2.) Change from System.out.print(a1[k]); to System.out.print(a2[k]);
for (int k = 0 ; k < a2.length ; k++) {
System.out.print(a2[k]);
}
In your while loop
while(i < a1.length && 1 < a2.length)
the "1" should be an "i"
One of your problems is here
for(int k = 0; k < a2.length; k++)
{
System.out.print(a1[k]);
}
You're iterating through a1, but your index goes to the length of a2.

Categories