I have implemented a simple neural network in Java that is supposed to solve the XOR problem (http://www.mind.ilstu.edu/curriculum/artificial_neural_net/xor_problem_and_solution.php).
I was wondering what you need Neuron and a NeuronLayer etc. classes for, so I decided to try to make it work using only one class.
Unfortunately, after some epochs, it reaches a state where the error is one of two values: 0.5 or -0.5.
I have tried limiting the range that the starting weights are chosen from but that didn't change anything.
Maybe, some of you can help me spot a mistake or improve the network.
Thanks in advance!
Here is my code:
int inCount = 2;
int hidCount = 3;
int outCount = 1;
float learningRate = 0.01;
float[] inputs = new float[inCount];
float[] hidden = new float[hidCount];
float[] outputs = new float[outCount];
float[][] IHweights = new float[inCount][hidCount];
float[][] HOweights = new float[hidCount][outCount];
void setup ()
{
for (int i = 0; i < IHweights.length; i++)
{
for (int e = 0; e < IHweights[i].length; e++)
{
float newWeight = random(-1,1);
while(newWeight > -0.5 && newWeight < 0.5)
{
newWeight = random(-1,1);
}
IHweights[i][e] = newWeight;
println(IHweights[i][e]+"\n");
}
}
for (int i = 0; i < HOweights.length; i++)
{
for (int e = 0; e < HOweights[i].length; e++)
{
float newWeight = random(-1,1);
while(newWeight > -0.5 && newWeight < 0.5)
{
newWeight = random(-1,1);
}
HOweights[i][e] = newWeight;
}
}
}
void draw ()
{
float[] inData = {round(random(1)),round(random(1))};
println(inData[0]+" "+inData[1]);
float[] expResult = {(int) inData[0]^(int) inData[1]};
println(" -> "+expResult[0]);
feedForward(inData,expResult);
}
public float sigmoid (float x)
{
if (x>10)
{
return 1;
}
if (x<-10)
{
return 0;
}
return 1/(1+exp(-x));
}
public void feedForward (float[] input, float[] expOut)
{
inputs = input;
for (int i = 0; i < hidCount; i++)
{
float var = 0;
for (int e = 0; e < inCount; e++)
{
var += inputs[e] * IHweights[e][i];
}
hidden[i] = sigmoid(var);
}
for (int i = 0; i < outCount; i++)
{
float var = 0;
for (int e = 0; e < hidCount; e++)
{
var += hidden[e] * HOweights[e][i];
}
outputs[i] = sigmoid(var);
}
float[] error = new float[outCount];
float[] deltaOut = new float[outCount];
for (int i = 0; i < outCount; i++)
{
error[i] = expOut[i] - outputs[i];
deltaOut[i] = outputs[i] * (1-outputs[i]) * error[i];
}
float[] deltaHid = new float[hidCount];
for (int i = 0; i < hidCount; i++)
{
deltaHid[i] = hidden[i] * (1-hidden[i]);
for (int e = 0; e < outCount; e++)
{
deltaHid[i] += HOweights[i][e] * outputs[e];
}
}
for (int i = 0; i < inCount; i++)
{
for (int e = 0; e < hidCount; e++)
{
IHweights[i][e] += inputs[i] * deltaHid[e] * learningRate;
}
}
for (int i = 0; i < hidCount; i++)
{
for (int e = 0; e < outCount; e++)
{
HOweights[i][e] += hidden[i] * deltaOut[e] * learningRate;
}
}
println(error[0]);
}
Related
Im currently trying to implement SAT in java but for some reason it doesn't work. I have rewritten my code multiple times, looked over it even more and watched many tutorials but cant find my mistake. In some cases for some edges it works partly properly but otherwise it detects collision when not colliding.
Later i'll add AABB collision detection for better performance.
Here are the relevant parts of my code:
SAT class:
public class SAT {
public static boolean checkSAT(Polygon poly1, Polygon poly2) {
Vector[] axes = new Vector[poly1.p.length + poly2.p.length];
for (int i = 0; i < poly1.p.length + poly2.p.length; i++) {
int a = i; if(i == poly1.p.length) a -= poly1.p.length;
axes[i] = poly1.getEdge(a).getNormal().getNormalized();
}
double p1_min = Double.POSITIVE_INFINITY, p1_max = Double.NEGATIVE_INFINITY,
p2_min = Double.POSITIVE_INFINITY, p2_max = Double.NEGATIVE_INFINITY;
for (int i = 0; i < axes.length; i++) {
for (int j = 0; j < poly1.p.length; j++) {
double proj = axes[i].dotProduct(poly1.p[j]);
if(proj < p1_min) p1_min = proj;
if(proj > p1_max) p1_max = proj;
}
for (int j = 0; j < poly2.p.length; j++) {
double proj = axes[i].dotProduct(poly2.p[j]);
if(proj < p2_min) p2_min = proj;
if(proj > p2_max) p2_max = proj;
}
if (p1_max < p2_min || p2_max < p1_min)
return false;
}
return true;
}
}
vector class:
public class Vector {
public final double x;
public final double y;
public Vector(double x, double y) {
this.x = x;
this.y = y;
}
public Vector getNormal() {
return new Vector(-y, x);
}
public double getLength() {
return Math.sqrt(x*x + y*y);
}
public Vector getNormalized() {
double l = getLength();
return new Vector(x/l, y/l);
}
public double dotProduct(Vector vec) {
return x * vec.x + y * vec.y;
}
}
relevant parts of the polygon class:
public class Polygon {
public Vector[] m; //"model" of the polygon
public Vector[] p; //coordinates of the corners of the polygon in space
public double posX;
public double posY;
public Polygon(Vector[] m) {
this.m = m;
p = new Vector[m.length];
transform();
}
//later i'll add rotation
public void transform() {
for (int i = 0; i < m.length; i++) {
p[i] = new Vector(m[i].x + posX, m[i].y + posY);
}
}
public void setPosition(Vector pos) {
posX = pos.x;
posY = pos.y;
transform();
}
public Vector getEdge(int i) {
if(i >= p.length) i = 0;
int j = i+1; if(j >= p.length) j = 0;
return new Vector(p[j].x - p[i].x, p[j].y - p[i].y);
}
}
Update:
I found the mistake and it's just plain stupid!! And on top of that I spend more than 5 hours finding it!!!!!!
double p1_min = Double.POSITIVE_INFINITY, p1_max = Double.NEGATIVE_INFINITY,
p2_min = Double.POSITIVE_INFINITY, p2_max = Double.NEGATIVE_INFINITY;
//those doubles should be declared inside the for loop
for (int i = 0; i < axes.length; i++) {
//right here
for (int j = 0; j < poly1.p.length; j++) {
double proj = axes[i].dotProduct(poly1.p[j]);
if(proj < p1_min) p1_min = proj;
if(proj > p1_max) p1_max = proj;
}
for (int j = 0; j < poly2.p.length; j++) {
double proj = axes[i].dotProduct(poly2.p[j]);
if(proj < p2_min) p2_min = proj;
if(proj > p2_max) p2_max = proj;
}
if (p1_max < p2_min || p2_max < p1_min)
return false;
}
I was trying to solve a XOR problem, but the output always converged to 0.5, so i tried a simpler problem like NOT and the same thing happened.
I really don't know what's going on, i checked the code a million times and everything seems to be right, when i debugged it saving the neural network info I saw that the either the weight values or the biases values were getting really large. To do that I followed the 3 blue 1 brown youtube series about neural network and some other videos, too.
this is my code:
PS: I put the entire code here but I think the main problem is inside the bakpropag function
class NeuralNetwork {
int inNum, hiddenLayersNum, outNum, netSize;
int[] hiddenLayerSize;
Matrix[] weights;
Matrix[] biases;
Matrix[] sums;
Matrix[] activations;
Matrix[] error;
Matrix inputs;
long samples = 0;
float learningRate;
//Constructor------------------------------------------------------------------------------------------------------
NeuralNetwork(int inNum, int hiddenLayersNum, int[] hiddenLayerSize, int outNum, float learningRate) {
this.inNum = inNum;
this.hiddenLayersNum = hiddenLayersNum;
this.hiddenLayerSize = hiddenLayerSize;
this.outNum = outNum;
this.netSize = hiddenLayersNum + 1;
this.learningRate = learningRate;
//output layer plus the hidden layer size
//Note: I'm not adding the input layer because it doesn't have weights
weights = new Matrix[netSize];
//no biases added to the output layer
biases = new Matrix[netSize - 1];
sums = new Matrix[netSize];
activations = new Matrix[netSize];
error = new Matrix[netSize];
initializeHiddenLayer();
initializeOutputLayer();
}
//Initializing Algorithms------------------------------------------------------------------------------------------
void initializeHiddenLayer() {
for (int i = 0; i < hiddenLayersNum; i++) {
if (i == 0) {//only the first hidden layer takes the inputs
weights[i] = new Matrix(hiddenLayerSize[i], inNum);
} else {
weights[i] = new Matrix(hiddenLayerSize[i], hiddenLayerSize[i - 1]);
}
biases[i] = new Matrix(hiddenLayerSize[i], 1);
sums[i] = new Matrix(hiddenLayerSize[i], 1);
activations[i] = new Matrix(hiddenLayerSize[i], 1);
error[i] = new Matrix(hiddenLayerSize[i], 1);
}
}
void initializeOutputLayer() {
//the output layer takes the last hidden layer activation values
weights[netSize - 1] = new Matrix(outNum, hiddenLayerSize[hiddenLayerSize.length - 1]);
activations[netSize - 1] = new Matrix(outNum, 1);
sums[netSize - 1] = new Matrix(outNum, 1);
error[netSize - 1] = new Matrix(outNum, 1);
for (Matrix m : weights) {
for (int i = 0; i < m.i; i++) {
for (int j = 0; j < m.j; j++) {
m.values[i][j] = random(-1, 1);
}
}
}
for (Matrix m : biases) {
for (int i = 0; i < m.i; i++) {
for (int j = 0; j < m.j; j++) {
m.values[i][j] = 1;
}
}
}
for (Matrix m : sums) {
for (int i = 0; i < m.i; i++) {
for (int j = 0; j < m.j; j++) {
m.values[i][j] = 0;
}
}
}
}
//Calculation------------------------------------------------------------------------------------------------------
void calculate(float[] inputs) {
this.inputs = new Matrix(0, 0);
this.inputs = this.inputs.arrayToCollumn(inputs);
sums[0] = (weights[0].matrixMult(this.inputs)).sum(biases[0]);
activations[0] = sigM(sums[0]);
for (int i = 1; i < netSize - 1; i++) {
sums[i] = weights[i].matrixMult(activations[i - 1]);
activations[i] = sigM(sums[i]).sum(biases[i]);
}
//there's no biases in the output layer
//And the output layer uses sigmoid function
sums[netSize - 1] = weights[netSize - 1].matrixMult(activations[netSize - 1 - 1]);
activations[netSize - 1] = sigM(sums[netSize - 1]);
}
//Sending outputs--------------------------------------------------------------------------------------------------
Matrix getOuts() {
return activations[netSize - 1];
}
//Backpropagation--------------------------------------------------------------------------------------------------
void calcError(float[] exp) {
Matrix expected = new Matrix(0, 0);
expected = expected.arrayToCollumn(exp);
//E = (output - expected)
error[netSize - 1] = this.getOuts().diff(expected);
samples++;
}
void backPropag(int layer) {
if (layer == netSize - 1) {
error[layer].scalarDiv(samples);
for (int i = layer - 1; i >= 0; i--) {
prevLayerCost(i);
}
weightError(layer);
backPropag(layer - 1);
} else {
weightError(layer);
biasError(layer);
if (layer != 0)
backPropag(layer - 1);
}
}
void weightError(int layer) {
if (layer != 0) {
for (int i = 0; i < weights[layer].i; i++) {
for (int j = 0; j < weights[layer].j; j++) {
float changeWeight = 0;
if (layer != netSize - 1)
changeWeight = activations[layer - 1].values[j][0] * deriSig(sums[layer].values[i][0]) * error[layer].values[i][0];
else
changeWeight = activations[layer - 1].values[j][0] * deriSig(sums[layer].values[i][0]) * error[layer].values[i][0];
weights[layer].values[i][j] += -learningRate * changeWeight;
}
}
} else {
for (int i = 0; i < weights[layer].i; i++) {
for (int j = 0; j < weights[layer].j; j++) {
float changeWeight = this.inputs.values[j][0] * deriSig(sums[layer].values[i][0]) * error[layer].values[i][0];
weights[layer].values[i][j] += -learningRate * changeWeight;
}
}
}
}
void biasError(int layer) {
for (int i = 0; i < biases[layer].i; i++) {
for (int j = 0; j < biases[layer].j; j++) {
float changeBias = 0;
if (layer != netSize - 1)
changeBias = deriSig(sums[layer].values[i][0]) * error[layer].values[i][0];
biases[layer].values[i][j] += -learningRate * changeBias;
}
}
}
void prevLayerCost(int layer) {
for (int i = 0; i < activations[layer].i; i++) {
for (int j = 0; j < activations[layer + 1].j; j++) {//for all conections of that neuron to the next layer
if (layer != netSize - 1)
error[layer].values[i][0] += weights[layer + 1].values[j][i] * deriSig(sums[layer + 1].values[j][0]) * error[layer + 1].values[j][0];
else
error[layer].values[i][0] += weights[layer + 1].values[j][i] * deriSig(sums[layer + 1].values[j][0]) * error[layer + 1].values[j][0];
}
}
}
//Activation Functions---------------------------------------------------------------------------------------------
Matrix reLUM(Matrix m) {
Matrix temp = m.copyM();
for (int i = 0; i < temp.i; i++) {
for (int j = 0; j < temp.j; j++) {
temp.values[i][j] = ReLU(m.values[i][j]);
}
}
return temp;
}
float ReLU(float x) {
return max(0, x);
}
float deriReLU(float x) {
if (x <= 0)
return 0;
else
return 1;
}
Matrix sigM(Matrix m) {
Matrix temp = m.copyM();
for (int i = 0; i < temp.i; i++) {
for (int j = 0; j < temp.j; j++) {
temp.values[i][j] = sig(m.values[i][j]);
}
}
return temp;
}
float sig(float x) {
return 1 / (1 + exp(-x));
}
float deriSig(float x) {
return sig(x) * (1 - sig(x));
}
//Saving Files-----------------------------------------------------------------------------------------------------
void SaveNeuNet() {
for (int i = 0; i < weights.length; i++) {
weights[i].saveM("weights\\weightLayer" + i);
}
for (int i = 0; i < biases.length; i++) {
biases[i].saveM("biases\\biasLayer" + i);
}
for (int i = 0; i < activations.length; i++) {
activations[i].saveM("activations\\activationLayer" + i);
}
for (int i = 0; i < error.length; i++) {
error[i].saveM("errors\\errorLayer" + i);
}
}
}
and this is the Matrix code:
class Matrix {
int i, j, size;
float[][] values;
Matrix(int i, int j) {
this.i = i;
this.j = j;
this.size = i * j;
values = new float[i][j];
}
Matrix sum (Matrix other) {
if (other.i == this.i && other.j == this.j) {
for (int x = 0; x < this.i; x++) {
for (int z = 0; z < this.j; z++) {
values[x][z] += other.values[x][z];
}
}
return this;
}
return null;
}
Matrix diff(Matrix other) {
if (other.i == this.i && other.j == this.j) {
for (int x = 0; x < this.i; x++) {
for (int z = 0; z < this.j; z++) {
values[x][z] -= other.values[x][z];
}
}
return this;
}
return null;
}
Matrix scalarMult(float k) {
for (int i = 0; i < this.i; i++) {
for (int j = 0; j < this.j; j++) {
values[i][j] *= k;
}
}
return this;
}
Matrix scalarDiv(float k) {
if (k != 0) {
for (int i = 0; i < this.i; i++) {
for (int j = 0; j < this.j; j++) {
values[i][j] /= k;
}
}
return this;
} else
return null;
}
Matrix matrixMult(Matrix other) {
if (this.j != other.i)
return null;
else {
Matrix temp = new Matrix(this.i, other.j);
for (int i = 0; i < temp.i; i++) {
for (int j = 0; j < temp.j; j++) {
for (int k = 0; k < this.j; k++) {
temp.values[i][j] += this.values[i][k] * other.values[k][j];
}
}
}
return temp;
}
}
Matrix squaredValues(){
for (int i = 0; i < this.i; i++){
for (int j = 0; j < this.j; j++){
values[i][j] = sq(values[i][j]);
}
}
return this;
}
void printM() {
for (int x = 0; x < this.i; x++) {
print("| ");
for (int z = 0; z < this.j; z++) {
print(values[x][z] + " | ");
}
println();
}
}
void saveM(String name) {
String out = "";
for (int x = 0; x < this.i; x++) {
out += "| ";
for (int z = 0; z < this.j; z++) {
out += values[x][z] + " | ";
}
out += "\n";
}
saveStrings("outputs\\" + name + ".txt", new String[] {out});
}
Matrix arrayToCollumn(float[] array) {
Matrix temp = new Matrix(array.length, 1);
for (int i = 0; i < array.length; i++)
temp.values[i][0] = array[i];
return temp;
}
Matrix arrayToLine(float[] array) {
Matrix temp = new Matrix(1, array.length);
for (int j = 0; j < array.length; j++)
temp.values[0][j] = array[j];
return temp;
}
Matrix copyM(){
Matrix temp = new Matrix(i, j);
for (int i = 0; i < this.i; i++){
for (int j = 0; j < this.j; j++){
temp.values[i][j] = this.values[i][j];
}
}
return temp;
}
}
As I said, the outputs are always converging to 0.5 instead of the actual value 1 or 0
I rewrote the code and it is working now! I have no idea what was wrong with the code before but this one works:
class NeuralNetwork {
int netSize;
float learningRate;
Matrix[] weights;
Matrix[] biases;
Matrix[] activations;
Matrix[] sums;
Matrix[] errors;
NeuralNetwork(int inNum, int hiddenNum, int[] hiddenLayerSize, int outNum, float learningRate) {
netSize = hiddenNum + 1;
this.learningRate = learningRate;
weights = new Matrix[netSize];
biases = new Matrix[netSize - 1];
activations = new Matrix[netSize];
sums = new Matrix[netSize];
errors = new Matrix[netSize];
initializeMatrices(inNum, hiddenNum, hiddenLayerSize, outNum);
}
//INITIALIZING MATRICES
void initializeMatrices(int inNum, int hiddenNum, int[] layerSize, int outNum) {
for (int i = 0; i < hiddenNum; i++) {
if (i == 0)
weights[i] = new Matrix(layerSize[0], inNum);
else
weights[i] = new Matrix(layerSize[i], layerSize[i - 1]);
biases[i] = new Matrix(layerSize[i], 1);
activations[i] = new Matrix(layerSize[i], 1);
errors[i] = new Matrix(layerSize[i], 1);
sums[i] = new Matrix(layerSize[i], 1);
weights[i].randomize(-1, 1);
biases[i].randomize(-1, 1);
activations[i].randomize(-1, 1);
}
weights[netSize - 1] = new Matrix(outNum, layerSize[layerSize.length - 1]);
activations[netSize - 1] = new Matrix(outNum, 1);
errors[netSize - 1] = new Matrix(outNum, 1);
sums[netSize - 1] = new Matrix(outNum, 1);
weights[netSize - 1].randomize(-1, 1);
activations[netSize - 1].randomize(-1, 1);
}
//---------------------------------------------------------------------------------------------------------------
void forwardPropag(float[] ins) {
Matrix inputs = new Matrix(0, 0);
inputs = inputs.arrayToCollumn(ins);
sums[0] = (weights[0].matrixMult(inputs)).sum(biases[0]);
activations[0] = sigM(sums[0]);
for (int i = 1; i < netSize - 1; i++) {
sums[i] = (weights[i].matrixMult(activations[i - 1])).sum(biases[i]);
activations[i] = sigM(sums[i]);
}
//output layer does not have biases
sums[netSize - 1] = weights[netSize - 1].matrixMult(activations[netSize - 2]);
activations[netSize - 1] = sigM(sums[netSize - 1]);
}
Matrix predict(float[] inputs) {
forwardPropag(inputs);
return activations[netSize - 1].copyM();
}
//SUPERVISED LEARNING - BACKPROPAGATION
void train(float[] inps, float[] expec) {
Matrix expected = new Matrix(0, 0);
expected = expected.arrayToCollumn(expec);
errors[netSize - 1] = predict(inps).diff(expected);
calcErorrPrevLayers();
adjustWeights(inps);
adjustBiases();
for (Matrix m : errors){
m.reset();
}
}
void calcErorrPrevLayers() {
for (int l = netSize - 2; l >= 0; l--) {
for (int i = 0; i < activations[l].i; i++) {
for (int j = 0; j < activations[l + 1].i; j++) {
errors[l].values[i][0] += weights[l + 1].values[j][i] * dSig(sums[l + 1].values[j][0]) * errors[l + 1].values[j][0];
}
}
}
}
void adjustWeights(float[] inputs) {
for (int l = 0; l < netSize; l++) {
if (l == 0) {
//for ervery neuron n in the first layer
for (int n = 0; n < activations[l].i; n++) {
//for every weight w of the first layer
for (int w = 0; w < inputs.length; w++) {
float weightChange = inputs[w] * dSig(sums[l].values[n][0]) * errors[l].values[n][0];
weights[l].values[n][w] += -learningRate * weightChange;
}
}
} else {
//for ervery neuron n in the first layer
for (int n = 0; n < activations[l].i; n++) {
//for every weight w of the first layer
for (int w = 0; w < activations[l - 1].i; w++) {
float weightChange = activations[l - 1].values[w][0] * dSig(sums[l].values[n][0]) * errors[l].values[n][0];
weights[l].values[n][w] += -learningRate * weightChange;
}
}
}
}
}
void adjustBiases() {
for (int l = 0; l < netSize - 1; l++) {
//for ervery neuron n in the first layer
for (int n = 0; n < activations[l].i; n++) {
float biasChange = dSig(sums[l].values[n][0]) * errors[l].values[n][0];
biases[l].values[n][0] += -learningRate * biasChange;
}
}
}
//ACTIVATION FUNCTION
float sig(float x) {
return 1 / (1 + exp(-x));
}
float dSig(float x) {
return sig(x) * (1 - sig(x));
}
Matrix sigM(Matrix m) {
Matrix temp = m.copyM();
for (int i = 0; i < m.i; i++) {
for (int j = 0; j < m.j; j++) {
temp.values[i][j] = sig(m.values[i][j]);
}
}
return temp;
}
}
currently I am trying to implement a CPLEX exact solution for the Asymmetric Capacitated Vehicle Routing Problem with the MTZ sub-tour elimination constraints.
My problems occurs when I try to implement Lazy Constraint Callbacks. More specifically I get a null pointer exception. There are almost no tutorials for implementing callbacks, so your help will be deeply appreciated.
This is my code:
CVRP class
import java.io.BufferedReader;
import java.io.FileReader;
import java.io.IOException;
import java.util.ArrayList;
import ilog.concert.*;
import ilog.cplex.*;
public class ACVRP {
// euclidean distance method
public static double distance(int x1, int y1, int x2, int y2) {
return Math.sqrt((x2 - x1) * (x2 - x1) + (y2 - y1) * (y2 - y1));
}
public static void solveModel() {
int n = 32; // number of customers
int k = 5; // number of vehicles
int c = 100; // capacity of vehicles
int datacoords[][] = new int[n][2];
double[][] node = new double[n][n]; // dissimilarity matrix
int[] demand = new int[n]; // demand of every customer
try {
// load matrixes
FileReader frd = new FileReader("demands.txt");
FileReader frcoords = new FileReader("coords.txt");
BufferedReader brd = new BufferedReader(frd);
BufferedReader brcoords = new BufferedReader(frcoords);
String str;
int counter = 0;
while ((str = brd.readLine()) != null) {
String[] splitStr = str.trim().split("\\s+");
demand[counter] = Integer.parseInt(splitStr[1]);
counter++;
}
counter = 0;
while ((str = brcoords.readLine()) != null) {
String[] splitStr = str.trim().split("\\s+");
datacoords[counter][0] = Integer.parseInt(splitStr[1]);
datacoords[counter][1] = Integer.parseInt(splitStr[2]);
counter++;
}
for(int i = 0; i < n; i++){
for(int j = 0; j < n; j++){
node[i][j] = distance(datacoords[i][0],datacoords[i][1],datacoords[j][0],datacoords[j][1]);
// if (i == j ){
// node[i][j] = 99999999;
// }
}
}
brd.close();
brcoords.close();
IloCplex cplex = new IloCplex();
// variables
IloIntVar[][] x = new IloIntVar[n][];
for (int i = 0; i < n; i++) {
x[i] = cplex.boolVarArray(n);
for (int j = 0; j < n; j++) {
x[i][j].setName("x." + i + "." + j );
}
}
// mtz variables
IloNumVar[] u = cplex.numVarArray(n, 0, Double.MAX_VALUE);
for (int j = 0; j < n; j++) {
u[j].setName("u." + j);
}
//objective
IloLinearNumExpr conObj = cplex.linearNumExpr();
for (int i = 0; i < n; i++) {
for (int j = 0; j < n; j++) {
if ( i != j ){
conObj.addTerm(node[i][j], x[i][j]) ;
}
}
}
cplex.addMinimize(conObj);
// constraints
for (int i = 1; i < n; i++) {
IloLinearNumExpr equation1 = cplex.linearNumExpr();
for (int j = 0; j < n; j++) {
if (i!=j) {
equation1.addTerm(1.0, x[i][j]);
}
}
cplex.addEq(equation1, 1.0);
}
for (int j = 1; j < n; j++) {
IloLinearNumExpr equation2 = cplex.linearNumExpr();
for (int i = 0; i < n; i++) {
if (i!=j) {
equation2.addTerm(1.0, x[i][j]);
}
}
cplex.addEq(equation2, 1.0);
}
IloLinearNumExpr equation3 = cplex.linearNumExpr();
for (int i = 1; i < n; i++) {
equation3.addTerm(1.0, x[i][0]);
}
cplex.addEq(equation3, k);
IloLinearNumExpr equation4 = cplex.linearNumExpr();
for (int j = 1; j < n; j++) {
equation4.addTerm(1.0, x[0][j]);
}
cplex.addEq(equation4, k);
cplex.use(new LazyContstraintMTZ(n, c, demand, x, u, cplex));
//parameters
//cplex.setParam(IloCplex.Param.TimeLimit,50);
//cplex.setParam(IloCplex.Param.Preprocessing.Reduce, 0);
// cplex.setParam(IloCplex.Param.RootAlgorithm, IloCplex.Algorithm.Primal);
// solve model
cplex.solve();
cplex.exportModel("model.lp");
System.out.println(cplex.getBestObjValue());
for (int i = 0; i < n; i++) {
for (int j = 0; j < n; j++) {
if (i != j) {
if (cplex.getValue(x[i][j]) != 0) {
System.out.println("name: " + x[i][j].getName() + " value: " + cplex.getValue(x[i][j]));
}
}
}
}
// end
cplex.end();
} catch (IloException | NumberFormatException | IOException exc) {
exc.printStackTrace();
}
}
}
class for lazy constraint :
import ilog.concert.*;
import ilog.cplex.*;
public class LazyContstraintMTZ extends IloCplex.LazyConstraintCallback {
int n; // number of customers
int c; // capacity of vehicles
int[] demand; // demand of every customer
IloIntVar[][] x;
IloNumVar[] u;
IloCplex cplex;
IloRange[] rng;
//constructor
LazyContstraintMTZ(int n, int c, int[] demand, IloIntVar[][] x, IloNumVar[] u, IloCplex cplex){
this.n = n;
this.c = c;
this.demand = demand;
this.x = x;
this.u = u;
this.cplex = cplex;
}
protected void main() throws IloException {
// Get the current x solution
// double[][] sol = new double[n][n];
// for (int i = 0; i < n; i++) {
// for (int j = 0; j < n; j++) {
// sol[i][j] = cplex.getValue(x[i][j]);
// }
// }
for (int i = 1; i < n; i++) {
for (int j = 1; j < n; j++) {
if (i!=j && demand[i]+demand[j]<=c){
IloLinearNumExpr equation5 = cplex.linearNumExpr();
equation5.addTerm(1.0, u[i]);
equation5.addTerm(-1.0, u[j]);
equation5.addTerm(c, x[i][j]);
rng[i].setExpr(equation5);
rng[i].setBounds(Double.MIN_VALUE, c-demand[j]);
cplex.addLazyConstraint(rng[i]);
}
}
}
for (int i = 1; i < n; i++) {
IloLinearNumExpr equation6 = cplex.linearNumExpr();
equation6.addTerm(1.0, u[i]);
rng[i].setExpr(equation6);
rng[i].setBounds(demand[i], c);
cplex.addLazyConstraint(rng[i]);
}
}
}
As far as I can tell, rng is never initialized in your callback class. So it is always null and as soon as you attempt to set an element in it, you will get that NullPointerException.
Note that you don't even need that array. Instead of
rng[i].setExpr(equation5);
rng[i].setBounds(Double.MIN_VALUE, c-demand[j]);
cplex.addLazyConstraint(rng[i]);
you can just write
IloRange rng = cplex.range(Double.MIN_VALUE, equation5, c - demand[j]);
cplex.addLazyConstraint(rng);
(and similarly for equation6).
Also note that Double.MIN_VALUE is likely not what you want. This gives the smallest representable number larger than 0. I guess what you want is Double.NEGATIVE_INFINITY to specify a range without lower bound. In that case you could also just write
IloRange rng = cplex.le(equation5, c - demand[j]);
I have a class which is :
public class CCTest {
public double f;
public double[][][] x;
public double counter;
};
and i have assigned random number to x,
CCTest[] cls = new CCTest[5];
for (int i = 0; i < cls.length; i++) {
cls[i] = new CCTest();
}
for (int i = 0; i < (Size = 5); i++) {
cls[i].x = new double[this.c][this.D][this.Size];
for (int j = 0; j < this.D; j++) {
cls[i].x = getRandomX(this.c, this.D, this.Size);
}
}
then I tried to display the result using :
public static void display(double[][][] array) {
int rows = array.length;
int columns = array[0].length;
int depth = array[0][0].length;
for (int d = 0; d < depth; d++) {
for (int r = 0; r < rows; r++) {
for (int c = 0; c < columns; c++) {
System.out.print(array[r][c][d] + " ");
}
System.out.println();
}
System.out.println();
}
}
The Random Generation method is :
public static double[][][] getRandomX(int x, int y, int z) {
double[][][] result = new double[x][y][z];
Random r = new Random();
for (int i = 0; i < z; i++) {
for (int j = 0; j < y; j++) {
for (int k = 0; k < x; k++) {
result[k][j][i] = r.nextDouble();
}
}
}
return result;
}
but the output is empty [] , any idea please
The inner loop : for (int j = 0; j < this.D; j++) {...} is useless so you can remove this.The display and getRandomX() functions are fine. Try this in main , works in my environment:
CCTest[] cls = new CCTest[5];
for (int i = 0; i < cls.length; i++) {
cls[i] = new CCTest();
}
for (int i = 0; i < (Size = 5); i++) {
cls[i].x = new double[c][D][S];
cls[i].x = getRandomX(c, D, S);
}
for (int i = 0; i < (Size = 5); i++) {
display(cls[0].x);
}
Your display method should rather look like:
public static void display(double[][][] array) {
for (int x = 0; x < array.length; x++) {
for (int y = 0; y < array[x].length; y++) {
for (int z = 0; z < array[x][y].length; z++) {
System.out.println(array[x][y][z]);
}
}
}
}
There is another question which comes to my mind. What is getRandomX? You haven't shown us. I'd use the following:
public static double[][][] getRandom3DArray(double[][][] array) {
Random r = new Random();
for (int x = 0; x < array.length; x++) {
for (int y = 0; y < array[x].length; y++) {
for (int z = 0; z < array[x][y].length; z++) {
array[x][y][z] = r.nextDouble();
}
}
}
return array;
}
You're mistaking rows with depth in your display.
I am trying to train a two-state Hidden Markov model with a scaled Baum-Welch, but I noticed when my emission sequence is too small. My probabilities turn to NaN in java. Is this normal? I have posted my code in java below:
import java.util.ArrayList;
/*
Scaled Baum-Welch Algorithm implementation
author: Ricky Chang
*/
public class HMModeltest {
public static double[][] stateTransitionMatrix = new double[2][2]; // State Transition Matrix
public static double[][] emissionMatrix; // Emission Probability Matrix
public static double[] pi = new double[2]; // Initial State Distribution
double[] scaler; // This is used for scaling to prevent underflow
private static int emissions_id = 1; // To identify if the emissions are for the price changes or spread changes
private static int numEmissions = 0; // The amount of emissions
private static int numStates = 2; // The number of states in hmm
public static double improvementVar; // Used to assess how much the model has improved
private static double genState; // Generated state, it is used to generate observations below
// Create an ArrayList to store the emissions
public static ArrayList<Integer> eSequence = new ArrayList<Integer>();
// Initialize H, emission_id: 1 is price change, 2 are spreads; count is for the amount of different emissions
public HMModeltest(int id, int count){
emissions_id = id;
numEmissions = count;
stateTransitionMatrix = set2DValues(numStates,numStates); // Give the STM row stochastic values
emissionMatrix = new double[numStates][numEmissions];
emissionMatrix = set2DValues(numStates,numEmissions); // Give the Emission probability matrix row stochastic values
pi = set1DValues(numStates); // Give the initial matrix row stochastic values
}
// Categorize the price change emissions; I may want to put these in the Implementation.
private int identifyE1(double e){
if( e == 0) return 4;
if( e > 0){
if(e == 1) return 5;
else if(e == 3) return 6;
else if(e == 5) return 7;
else return 8;
}
else{
if(e == -1) return 3;
else if(e == -3) return 2;
else if(e == -5) return 1;
else return 0;
}
}
// Categorize the spread emissions
private int identifyE2(double e){
if(e == 1) return 0;
else if(e == 3) return 1;
else return 2;
}
public void updateE(int emission){
if(emissions_id == 1) eSequence.add( identifyE1(emission) );
else eSequence.add( identifyE2(emission) );
}
// Used to intialize random row stochastic values to vectors
private double[] set1DValues(int col){
double sum = 0;
double temp = 0;
double [] returnVector = new double[col];
for(int i = 0; i < col; i++){
temp = Math.round(Math.random() * 1000);
returnVector[i] = temp;
sum = sum + temp;
}
for(int i = 0; i < col; i++){
returnVector[i] = returnVector[i] / sum;
}
return returnVector;
}
// Used to initialize random row stochastic values to matrices
public double[][] set2DValues(int row, int col){
double sum = 0;
double temp = 0;
double[][] returnMatrix = new double[row][col];
for(int i = 0; i < row; i++){
for(int j = 0; j < col; j++){
temp = Math.round(Math.random() * 1000);
returnMatrix[i][j] = temp;
sum = sum + temp;
}
for(int j = 0; j < col; j++){
returnMatrix[i][j] = returnMatrix[i][j] / sum;
}
sum = 0;
}
return returnMatrix;
}
// Use forward algorithm to calculate alpha for all states and times
public double[][] forwardAlgo(int time){
double alpha[][] = new double[numStates][time];
scaler = new double[time];
// Intialize alpha for time 0
scaler[0] = 0; // c0 is for scaling purposes to avoid underflow
for(int i = 0; i < numStates; i ++){
alpha[i][0] = pi[i] * emissionMatrix[i][eSequence.get(0)];
scaler[0] = scaler[0] + alpha[i][0];
}
// Scale alpha_0
scaler[0] = 1 / scaler[0];
for(int i = 0; i < numStates; i++){
alpha[i][0] = scaler[0] * alpha[i][0];
}
// Use recursive method to calculate alpha
double tempAlpha = 0;
for(int t = 1; t < time; t++){
scaler[t] = 0;
for(int i = 0; i < numStates; i++){
for(int j = 0; j < numStates; j++){
tempAlpha = tempAlpha + alpha[j][t-1] * stateTransitionMatrix[j][i];
}
alpha[i][t] = tempAlpha * emissionMatrix[i][eSequence.get(t)];
scaler[t] = scaler[t] + alpha[i][t];
tempAlpha = 0;
}
scaler[t] = 1 / scaler[t];
for(int i = 0; i < numStates; i++){
alpha[i][t] = scaler[t] * alpha[i][t];
}
}
System.out.format("scaler: ");
for(int t = 0; t < time; t++){
System.out.format("%f, ", scaler[t]);
}
System.out.print('\n');
return alpha;
}
// Use backward algorithm to calculate beta for all states
public double[][] backwardAlgo(int time){
double beta[][] = new double[2][time];
// Intialize beta for current time
for(int i = 0; i < numStates; i++){
beta[i][time-1] = scaler[time-1];
}
// Use recursive method to calculate beta
double tempBeta = 0;
for(int t = time-2; t >= 0; t--){
for(int i = 0; i < numStates; i++){
for(int j = 0; j < numStates; j++){
tempBeta = tempBeta + (stateTransitionMatrix[i][j] * emissionMatrix[j][eSequence.get(t+1)] * beta[j][t+1]);
}
beta[i][t] = tempBeta;
beta[i][t] = scaler[t] * beta[i][t];
tempBeta = 0;
}
}
return beta;
}
// Calculate the probability of emission sequence given the model (it is also the denominator to calculate gamma and digamma)
public double calcP(int t, double[][] alpha, double[][] beta){
double p = 0;
for(int i = 0; i < numStates; i++){
for(int j = 0; j < numStates; j++){
p = p + (alpha[i][t] * stateTransitionMatrix[i][j] * emissionMatrix[j][eSequence.get(t+1)] * beta[j][t+1]);
}
}
return p;
}
// Calculate digamma; i and j are both states
public double calcDigamma(double p, int t, int i, int j, double[][] alpha, double[][] beta){
double digamma = (alpha[i][t] * stateTransitionMatrix[i][j] * emissionMatrix[j][eSequence.get(t+1)] * beta[j][t+1]) / p;
return digamma;
}
public void updatePi(double[][] gamma){
for(int i = 0; i < numStates; i++){
pi[i] = gamma[i][0];
}
}
public void updateAll(){
int time = eSequence.size();
double alpha[][] = forwardAlgo(time);
double beta[][] = backwardAlgo(time);
double initialp = calcLogEProb(time);
double nextState0, nextState1;
double p = 0;
double[][][] digamma = new double[numStates][numStates][time];
double[][] gamma = new double[numStates][time];
for(int t = 0; t < time-1; t++){
p = calcP(t, alpha, beta);
for(int i = 0; i < numStates; i++){
gamma[i][t] = 0;
for(int j = 0; j < numStates; j++){
digamma[i][j][t] = calcDigamma(p, t, i, j, alpha, beta);
gamma[i][t] = gamma[i][t] + digamma[i][j][t];
}
}
}
updatePi(gamma);
updateA(digamma, gamma);
updateB(gamma);
alpha = forwardAlgo(time);
double postp = calcLogEProb(time);
improvementVar = postp - initialp;
}
// Update the state transition matrix
public void updateA(double[][][] digamma, double[][] gamma){
int time = eSequence.size();
double num = 0;
double denom = 0;
for(int i = 0; i < numStates; i++){
for(int j = 0; j < numStates; j++){
for(int t = 0; t < time-1; t++){
num = num + digamma[i][j][t];
denom = denom + gamma[i][t];
}
stateTransitionMatrix[i][j] = num/denom;
num = 0;
denom = 0;
}
}
}
public void updateB(double[][] gamma){
int time = eSequence.size();
double num = 0;
double denom = 0;
// k is an observation, j is a state, t is time
for(int i = 0; i < numStates; i++){
for(int k = 0; k < numEmissions; k++){
for(int t = 0; t < time-1; t++){
if( eSequence.get(t) == k) num = num + gamma[i][t];
denom = denom + gamma[i][t];
}
emissionMatrix[i][k] = num/denom;
num = 0;
denom = 0;
}
}
}
public double calcLogEProb(int time){
double logProb = 0;
for(int t = 0; t < time; t++){
logProb = logProb + Math.log(scaler[t]);
}
return -logProb;
}
public double calcNextState(int time, int state, double[][] gamma){
double p = 0;
for(int i = 0; i < numStates; i++){
for(int j = 0; j < numStates; j++){
p = p + gamma[i][time-2] * stateTransitionMatrix[i][j] * stateTransitionMatrix[j][state];
}
}
return p;
}
// Print parameters
public void print(){
System.out.println("Pi:");
System.out.print('[');
for(int i = 0; i < 2; i++){
System.out.format("%f, ", pi[i]);
}
System.out.print(']');
System.out.print('\n');
System.out.println("A:");
for(int i = 0; i < 2; i++){
System.out.print('[');
for(int j = 0; j < 2; j++){
System.out.format("%f, ", stateTransitionMatrix[i][j]);
}
System.out.print(']');
System.out.print('\n');
}
System.out.println("B:");
for(int i = 0; i < 2; i++){
System.out.print('[');
for(int j = 0; j < 9; j++){
System.out.format("%f, ", emissionMatrix[i][j]);
}
System.out.print(']');
System.out.print('\n');
}
System.out.print('\n');
}
/* Generate sample data to test HMM training with the following params:
* [ .3, .7 ]
* [ .8, .2 ] [ .45 .1 .08 .05 .03 .02 .05 .2 .02 ]
* [ .36 .02 .06 .15 .04 .05 .2 .1 .02 ]
* With these as observations: {-10, -5, -3, -1, 0, 1, 3, 5, 10}
*/
public static int sampleDataGen(){
double rand = 0;
rand = Math.random();
if(genState == 1){
if(rand < .3) genState = 1;
else genState = 2;
rand = Math.random();
if(rand < .45) return -10;
else if(rand < .55) return -5;
else if(rand < .63) return -3;
else if(rand < .68) return -1;
else if(rand < .71) return 0;
else if(rand < .73) return 1;
else if(rand < .78) return 3;
else if(rand < .98) return 5;
else return 10;
}
else {
if(rand < .8) genState = 1;
else genState = 2;
rand = Math.random();
if(rand < .36) return -10;
else if(rand < .38) return -5;
else if(rand < .44) return -3;
else if(rand < .59) return -1;
else if(rand < .63) return 0;
else if(rand < .68) return 1;
else if(rand < .88) return 3;
else if(rand < .98) return 5;
else return 10;
}
}
public static void main(String[] args){
HMModeltest test = new HMModeltest(1,9);
test.print();
System.out.print('\n');
for(int i = 0; i < 20; i++){
test.updateE(sampleDataGen());
}
test.updateAll();
System.out.print('\n');
test.print();
System.out.print('\n');
for(int i = 0; i < 10; i++){
test.updateE(sampleDataGen());
}
test.updateAll();
System.out.print('\n');
test.print();
System.out.print('\n');
}
}
My guess is that since the sample is too small, sometimes the probabilities don't exist for some observations. But it would be nice to have some confirmation.
You could refer the "Scaling" section in Rabiner's paper, which solves the underflow problem.
You could also do the calculations in log space, that's what HTK and R do. Multiplication and division become addition and subtraction. For the other two, look at the LAdd/ LSub and logspace_add/logspace_sub functions in the respective toolkits.
The log-sum-exp trick might be helpful too.