SIMD transposition of 8x8 matrix of 32-bit values in Java

SIMD transposition of 8x8 matrix of 32-bit values in Java - java

I found the following code in C++ for fast transposition of an 8x8 matrix of 32-bit values: https://stackoverflow.com/a/51887176/1915854
inline void Transpose8x8Shuff(unsigned long *in)
{
__m256 *inI = reinterpret_cast<__m256 *>(in);
__m256 rI[8];
rI[0] = _mm256_unpacklo_ps(inI[0], inI[1]);
rI[1] = _mm256_unpackhi_ps(inI[0], inI[1]);
rI[2] = _mm256_unpacklo_ps(inI[2], inI[3]);
rI[3] = _mm256_unpackhi_ps(inI[2], inI[3]);
rI[4] = _mm256_unpacklo_ps(inI[4], inI[5]);
rI[5] = _mm256_unpackhi_ps(inI[4], inI[5]);
rI[6] = _mm256_unpacklo_ps(inI[6], inI[7]);
rI[7] = _mm256_unpackhi_ps(inI[6], inI[7]);
__m256 rrF[8];
__m256 *rF = reinterpret_cast<__m256 *>(rI);
rrF[0] = _mm256_shuffle_ps(rF[0], rF[2], _MM_SHUFFLE(1,0,1,0));
rrF[1] = _mm256_shuffle_ps(rF[0], rF[2], _MM_SHUFFLE(3,2,3,2));
rrF[2] = _mm256_shuffle_ps(rF[1], rF[3], _MM_SHUFFLE(1,0,1,0));
rrF[3] = _mm256_shuffle_ps(rF[1], rF[3], _MM_SHUFFLE(3,2,3,2));
rrF[4] = _mm256_shuffle_ps(rF[4], rF[6], _MM_SHUFFLE(1,0,1,0));
rrF[5] = _mm256_shuffle_ps(rF[4], rF[6], _MM_SHUFFLE(3,2,3,2));
rrF[6] = _mm256_shuffle_ps(rF[5], rF[7], _MM_SHUFFLE(1,0,1,0));
rrF[7] = _mm256_shuffle_ps(rF[5], rF[7], _MM_SHUFFLE(3,2,3,2));
rF = reinterpret_cast<__m256 *>(in);
rF[0] = _mm256_permute2f128_ps(rrF[0], rrF[4], 0x20);
rF[1] = _mm256_permute2f128_ps(rrF[1], rrF[5], 0x20);
rF[2] = _mm256_permute2f128_ps(rrF[2], rrF[6], 0x20);
rF[3] = _mm256_permute2f128_ps(rrF[3], rrF[7], 0x20);
rF[4] = _mm256_permute2f128_ps(rrF[0], rrF[4], 0x31);
rF[5] = _mm256_permute2f128_ps(rrF[1], rrF[5], 0x31);
rF[6] = _mm256_permute2f128_ps(rrF[2], rrF[6], 0x31);
rF[7] = _mm256_permute2f128_ps(rrF[3], rrF[7], 0x31);
}
However, converting it to Java vector API ( https://download.java.net/java/early_access/panama/docs/api/jdk.incubator.vector/jdk/incubator/vector/IntVector.html ) is not straightforward, because the Java vector API doesn't map directly to CPU instructions / C++ intrinsics.
Can you share what the equivalents of the following intrinsics/macros in Java are?
_mm256_unpacklo_ps()
_mm256_unpackhi_ps()
_mm256_shuffle_ps()
_MM_SHUFFLE()
_mm256_permute2f128_ps()
I can use the latest JDK 19.
UPDATE: following the suggestion by #Soonts , I've implemented the following, and it passes tests, but it's terribly slow:
public class SimdOps {
public static final VectorSpecies<Integer> SPECIES_INT = IntVector.SPECIES_256;
public static final VectorSpecies<Long> SPECIES_LONG = LongVector.SPECIES_256;
public static final VectorShuffle<Integer> vsUnpackLo = VectorShuffle.fromValues(SPECIES_INT, 0, -8, 1, -7, 4, -4,
5, -3);
public static final VectorShuffle<Integer> vsUnpackHi = VectorShuffle.fromValues(SPECIES_INT, 2, -6, 3, -5, 6, -2,
7, -1);
public static final VectorShuffle<Integer> vsShuffle1010 = VectorShuffle.fromValues(SPECIES_INT, 0, 1, -8, -7, 4,
5, -4, -3);
public static final VectorShuffle<Integer> vsShuffle3232 = VectorShuffle.fromValues(SPECIES_INT, 2, 3, -6, -5, 6, 7,
-2, -1);
public static final VectorShuffle<Integer> vsPermute0x20 = VectorShuffle.fromValues(SPECIES_INT, 0, 1, 2, 3, -8, -7,
-6, -5);
public static final VectorShuffle<Integer> vsPermute0x31 = VectorShuffle.fromValues(SPECIES_INT, 4, 5, 6, 7, -4, -3,
-2, -1);
// Transpose 8x8 matrix of 32-bit integers, stored in 256-bit SIMD vectors
public static final void transpose8x8(IntVector[] inpM) {
assert inpM.length == Constants.INTS_PER_SIMD;
// https://stackoverflow.com/questions/25622745/transpose-an-8x8-float-using-avx-avx2
// https://stackoverflow.com/questions/73977998/simd-transposition-of-8x8-matrix-of-32-bit-values-in-java
final IntVector rI0 = inpM[0].rearrange(vsUnpackLo, inpM[1]);
final IntVector rI1 = inpM[0].rearrange(vsUnpackHi, inpM[1]);
final IntVector rI2 = inpM[2].rearrange(vsUnpackLo, inpM[3]);
final IntVector rI3 = inpM[2].rearrange(vsUnpackHi, inpM[3]);
final IntVector rI4 = inpM[4].rearrange(vsUnpackLo, inpM[5]);
final IntVector rI5 = inpM[4].rearrange(vsUnpackHi, inpM[5]);
final IntVector rI6 = inpM[6].rearrange(vsUnpackLo, inpM[7]);
final IntVector rI7 = inpM[6].rearrange(vsUnpackHi, inpM[7]);
final IntVector rrF0 = rI0.rearrange(vsShuffle1010, rI2);
final IntVector rrF1 = rI0.rearrange(vsShuffle3232, rI2);
final IntVector rrF2 = rI1.rearrange(vsShuffle1010, rI3);
final IntVector rrF3 = rI1.rearrange(vsShuffle3232, rI3);
final IntVector rrF4 = rI4.rearrange(vsShuffle1010, rI6);
final IntVector rrF5 = rI4.rearrange(vsShuffle3232, rI6);
final IntVector rrF6 = rI5.rearrange(vsShuffle1010, rI7);
final IntVector rrF7 = rI5.rearrange(vsShuffle3232, rI7);
inpM[0] = rrF0.rearrange(vsPermute0x20, rrF4);
inpM[1] = rrF1.rearrange(vsPermute0x20, rrF5);
inpM[2] = rrF2.rearrange(vsPermute0x20, rrF6);
inpM[3] = rrF3.rearrange(vsPermute0x20, rrF7);
inpM[4] = rrF0.rearrange(vsPermute0x31, rrF4);
inpM[5] = rrF1.rearrange(vsPermute0x31, rrF5);
inpM[6] = rrF2.rearrange(vsPermute0x31, rrF6);
inpM[7] = rrF3.rearrange(vsPermute0x31, rrF7);
}
};
And the bottleneck is jdk.incubator.vector.Int256Vector.rearrange(VectorShuffle, Vector) . It's at least 10 times slower than the scalar code. Any ideas?

Disclaimer: I never wrote anything similar in Java.
Based on the documentation, the rearrange seems the only way to go.
The only issue is how to translate C intrinsics into the integers for the VectorShuffle<Float>.
Here's C++ code to find out:
void printShuffle( __m256 v, const char* name )
{
__m256i iv = _mm256_cvtps_epi32( v );
std::array<int, 8> a;
_mm256_storeu_si256( ( __m256i* )a.data(), iv );
printf( "%s: %i, %i, %i, %i, %i, %i, %i, %i\n", name,
a[ 0 ], a[ 1 ], a[ 2 ], a[ 3 ], a[ 4 ], a[ 5 ], a[ 6 ], a[ 7 ] );
}
#define TEST( expr ) printShuffle( expr, #expr )
void printJavaRearranges()
{
const __m256 a = _mm256_setr_ps( 0, 1, 2, 3, 4, 5, 6, 7 );
const __m256 b = _mm256_sub_ps( a, _mm256_set1_ps( 8 ) );
TEST( _mm256_unpacklo_ps( a, b ) );
TEST( _mm256_unpackhi_ps( a, b ) );
TEST( _mm256_shuffle_ps( a, b, _MM_SHUFFLE(1,0,1,0) ) );
TEST( _mm256_shuffle_ps( a, b, _MM_SHUFFLE(3,2,3,2) ) );
TEST( _mm256_permute2f128_ps( a, b, 0x20 ) );
TEST( _mm256_permute2f128_ps( a, b, 0x31 ) );
}
Output:
_mm256_unpacklo_ps( a, b ): 0, -8, 1, -7, 4, -4, 5, -3
_mm256_unpackhi_ps( a, b ): 2, -6, 3, -5, 6, -2, 7, -1
_mm256_shuffle_ps( a, b, _MM_SHUFFLE(1,0,1,0) ): 0, 1, -8, -7, 4, 5, -4, -3
_mm256_shuffle_ps( a, b, _MM_SHUFFLE(3,2,3,2) ): 2, 3, -6, -5, 6, 7, -2, -1
_mm256_permute2f128_ps( a, b, 0x20 ): 0, 1, 2, 3, -8, -7, -6, -5
_mm256_permute2f128_ps( a, b, 0x31 ): 4, 5, 6, 7, -4, -3, -2, -1
The _mm256_permute2f128_ps instruction can selectively zero out lanes, Java's vector API probably can't do that. Fortunately, the immediate values in your source code don't zero out any pieces.
If you're lucky, the runtime might map these values (when they are known to JIT in advance and never change) into the corresponding AVX instructions.

Related

Machine learning on a 8 x 8 matrix - Java

I am trying to implement a machine learning algorithm (k-nn for example).As it stands my Main class, which essentially builds 8×8-pixel matrices into an array to be manipulated later. (See the data description and sample dataset.) As it stands my arrays are printing as a like so:
, Class Code: 7 DataSet:[0, 0, 3, 9, 15, 8, 0, 0, 0, 1, 15, 16, 16, 7, 0, 0, 0, 0, 5, 16, 16, 10, 4, 0, 0, 0, 3, 16, 16, 16, 9, 0, 0, 0, 0, 15, 14, 4, 0, 0, 0, 0, 0, 13, 5, 0, 0, 0, 0, 0, 1, 15, 3, 0, 0, 0, 0, 0, 4, 13, 0, 0, 0, 0, 7]
Now for my starting point I'm looking to try to implement a very basic kNN algorithm as something to build from but I am having trouble manipulating the datasets that are being outputted. I have been reading up on Foundations of Machine Learning by M. Mohri but it hasn't been of any help. My Main class for building my data:
import java.util.*;
import java.io.*;
public class Main {
static class Data {
int[] dataSet;
int classCode;
public Data(int[] dataSet, int label) {
this.dataSet = dataSet;
this.classCode = label;
}
#Override
public String toString() {
return "Class Code: " + classCode + " DataSet:" + Arrays.toString(dataSet) + "\n";
}
}
ArrayList<Data> dataSetList;
int[][] dataArray = new int[2810][65];
private void readFile(String csvFile) {
int instances = 0;
dataSetList = new ArrayList<>();
try {
Scanner scan = new Scanner(new BufferedReader(new FileReader(csvFile)));
while (scan.hasNext()) {
String line = scan.next();
String[] extractedDataFromFile = line.split(",");
for (int i = 0; i < extractedDataFromFile.length; i++) {
dataArray[instances][i] = Integer.parseInt(extractedDataFromFile[i]);
}
dataSetList.add(new Data(dataArray[instances], dataArray[instances][extractedDataFromFile.length - 1]));
instances++;
}
System.out.println(dataSetList.toString());
} catch (FileNotFoundException ex) {
System.out.println(ex.getMessage());
}
}
public static void main(String[] args) {
Main main = new Main();
main.readFile("dataset1.csv");
}
}
This is my first time experimenting with machine learning so any feedback or approach to this would be hugely appreciated.
EDIT//
I'm looking at a basic kNN implementation as a starting point whether someone can redirect me to material on implementing on a similar data set or an example using my current provided code. I apologize if my initial post was a little vague

How to use SharedPreferences in Achartengine line graph?

I want to use SharedPreferences in the code for LineGraph:
public class LineGraph{
Context applicationContext;
SharedPreferences prefs = PreferenceManager.getDefaultSharedPreferences(applicationContext);
public Intent getIntent(Context context) {
// Our first data
int[] x = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 }; // x values!
int[] y = { 30, 34, 45, 57, 77, 89, 100, 111 ,123 ,145 }; // y values!
TimeSeries series = new TimeSeries("Line1");
for( int i = 0; i < x.length; i++)
{
series.add(x[i], y[i]);
}
// Our second data
int[] x2 = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 }; // x values!
int[] y2 = { 145, 123, 111, 100, 89, 77, 57, 45, 34, 30}; // y values!
TimeSeries series2 = new TimeSeries("Line2");
for( int i = 0; i < x2.length; i++)
{
series2.add(x2[i], y2[i]);
}
XYMultipleSeriesDataset dataset = new XYMultipleSeriesDataset();
dataset.addSeries(series);
dataset.addSeries(series2);
XYMultipleSeriesRenderer mRenderer = new XYMultipleSeriesRenderer(); // Holds a collection of XYSeriesRenderer and customizes the graph
mRenderer.setZoomButtonsVisible(true);
XYSeriesRenderer renderer = new XYSeriesRenderer(); // This will be used to customize line 1
XYSeriesRenderer renderer2 = new XYSeriesRenderer(); // This will be used to customize line 2
mRenderer.addSeriesRenderer(renderer);
mRenderer.addSeriesRenderer(renderer2);
// Customization time for line 1!
renderer.setColor(Color.WHITE);
renderer.setPointStyle(PointStyle.SQUARE);
renderer.setFillPoints(true);
// Customization time for line 2!
renderer2.setColor(Color.YELLOW);
renderer2.setPointStyle(PointStyle.DIAMOND);
renderer2.setFillPoints(true);
Intent intent = ChartFactory.getLineChartIntent(context, dataset, mRenderer, "Line Graph Title");
return intent;
}
}
But the SharedPreferences prefs = PreferenceManager.getDefaultSharedPreferences(applicationContext);
does not work, it stopped the application. Why?

Call this within your method, not at the class level.
Something like this:
public class LineGraph {
public Intent getIntent(Context context) {
SharedPreferences prefs = PreferenceManager.getDefaultSharedPreferences(context);
// Our first data
int[] x = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 }; // x values!
...
The way you had it, applicationContext was always going to be null.

Please provide your logcat-output.
Apart from this I assume that you receive a nullpointer exception when itializing the shared preferences, because the applicationContext-variable is null.
Place the initialization of your shared preferences into the getIntent()-method and pass the context-argument.

Restricting decimal places in an array

Currently, the output of this array has a too large decimal place trail. How can I restrict this to say 2 decimal places? By this I mean the array 'percentage1'. I've seen methods to do it online, but I don't understand how I would implement those methods into the code as shown below.
int[] correct1 = {20, 20, 13, 15, 22, 18, 19, 21, 23, 25};
int[] incorrect1 = {2, 1, 5, 2, 2, 5, 8, 1, 0, 0};
double[] percentage1 = new double[correct1.length];
for(int a = 0; a < correct1.length; a++ ){
percentage1[a] = (((double)correct1[a] / (correct1[a] + incorrect1[a]))*100);
}
Any help would be very much appreciated. Thanks

Please try adding a DecimalFormat object.
Add this to the beginning of the loop, it declares the format you're looking for - 2 decimal places: DecimalFormat df = new DecimalFormat("#.##");
Format it using format, then convert it back into a double. The reason why you need to restore it back is that format returns a String.
percentage1[a] = Double.valueOf(df.format((((double)correct1[a] / (correct1[a] + incorrect1[a]))*100)));
See revised code below:
public static void main(String[] args) {
// TODO Auto-generated method stub
int[] correct1 = {20, 20, 13, 15, 22, 18, 19, 21, 23, 25};
int[] incorrect1 = {2, 1, 5, 2, 2, 5, 8, 1, 0, 0};
double[] percentage1 = new double[correct1.length];
DecimalFormat df = new DecimalFormat("#.##");
for(int a = 0; a < correct1.length; a++ ){
percentage1[a] = Double.valueOf(df.format((((double)correct1[a] / (correct1[a] + incorrect1[a]))*100)));
System.out.println(percentage1[a]);
}
}
Sample result:
90.91
95.24
72.22
88.24
91.67
78.26
70.37
95.45
100.0
100.0

You can't. Doubles don't have decimal places. They have binary places. If you want decimal places you have to use a decimal radix, i.e. a String created by DecimalFormat, or a BigDecimal.
Proof here.

My table is outputting incorrect values for the percentage

public class apples {
private static String[] level1 = new String[] { "A", "B", "I", "K", "N", "O", "P", "S", "T", "W" };
public static void main(String[] args) {
int[] scores1 = { 99, 80, 56, 88, 70, 35, 67, 60, 78, 56 };
int[] correct1 = {20, 20, 13, 15, 22, 18, 19, 21, 23, 25};
int[] incorrect1 = {2, 1, 5, 2, 2, 5, 8, 1, 0, 0};
double[] percentage1 = new double[correct1.length];
for(int a = 0; a < correct1.length; a++ ){
percentage1[a] = (double)((correct1[a] / (correct1[a] + incorrect1[a]))*100);
}
System.out.println("Character \t Correct \t Incorrect \t Percentage");
for(int counter = 0; counter<scores1.length;counter++){
System.out.println(level1[counter] + "\t\t " + correct1[counter] + "\t\t " + incorrect1[counter] + "\t\t " + percentage1[counter]);
}
}
}
This outputs a table with 4 headings. The character, correct and incorrect columns show as expected. However the percentage row is not working properly. For example, character 'A', correct 20 and incorrect 2 gives a percentage of 0.0. Any 'incorrect' value > 0 outputs a percentage value of 0, and any 'incorrect' value which = 0 gives a percentage value of 100 (which is correct)... Can someone please explain where I have gone wrong?

You are dealing with integers here, and for integer division, the result is truncated. You'll need to cast the original values to double instead, or multiply one part by 1.0 to get it as a double:
percentage1[a] = ((correct1[a]*1.0 / (correct1[a] + incorrect1[a]))*100);

percentage1[a] = (double)((correct1[a] / (correct1[a] + incorrect1[a]))*100);
The above code casts to a double after the calculation is competed.
To cast as part of the calculation, use:
percentage1[a] = (( ((double)correct1[a]) / (correct1[a] + incorrect1[a]))*100);

You calculations here
percentage1[a] = (double)((correct1[a] / (correct1[a] + incorrect1[a]))*100);
perform integer division (you just cast them afterwards to double). If you want them to return the actual floating point division result, you have to cast all operands to double before the calculation.
So the fastest option would be to change this:
double[] correct1 = {20, 20, 13, 15, 22, 18, 19, 21, 23, 25};
double[] incorrect1 = {2, 1, 5, 2, 2, 5, 8, 1, 0, 0};
Another would be to change the computation to something like this
percentage1[a] = (1.0 * correct1[a] / (correct1[a] + incorrect1[a]))*100;
or to simplify a little:
percentage1[a] = 100.0 * correct1[a] / (correct1[a] + incorrect1[a]);

Using Number Randoms in Arrays that randomize

I am making a small class for a little project. It's a text based RPG game and I am trying to create a drop class for when NPC's die. I have create a couple Math.random methods (which are exactly the same, just differently named for my convenience.) to drop random amounts for an item id (name) and to get the rarity of the item dropped. It all works fine, but it only randomizes one time (on startup or run) and it won't randomize the amounts after that. I am also randomizing it between 2 numbers, for example, 25 and 50, the random not going lower then 25 or higher then 50.
My question is: How can I randomize a integer in a 2D Array or a general array after each time a NPC dies, so the random number that is first obtained changes and doesn't stay the same. Because right now, it stays at the number choose. if the number is 25, then the next npc I kill, the amount would still be 25.. and 25.. and 25.. and so on. I need it to randomize or change.
Please help, thank you.
public class DropConfig {
private static final int
ALWAYS = 0,
VERY_COMMON = rate(1, 9),
COMMON = rate(10, 20),
UNCOMMON = rate(30, 40),
RARE = rate(50, 60),
VERY_RARE = rate(70, 80),
SUPER_RARE = rate(90, 100);
public static final int[][] NPC_DROPS = {
// Normal NPC's
{1, 526, 1, ALWAYS},
{2, 526, 1, ALWAYS},
{3, 526, 1, ALWAYS},
{1, 995, drop(1, 50), ALWAYS},
{2, 995, drop(1, 50), ALWAYS},
{3, 995, drop(1, 50), ALWAYS},
// Moderate NPC's
{9, 526, 1, ALWAYS},
{9, 995, drop(250, 500), UNCOMMON},
{9, 555, drop(2, 7), VERY_COMMON},
{9, 995, drop(5, 50), VERY_COMMON},
{9, 1050, 1, RARE},
};
public static int rate(int min, int max) {
return 1 + (int)(Math.random() * ((max - min) + 1));
}
//Same as rate, different name for looks.
public static int drop(int min, int max) {
return 1 + (int)(Math.random() * ((max - min) + 1));
}
Heres where I call the drops method
public void npcDeath() {
int npc = 0;
if (npc == null)
return;
for(npc = 0; npc < DropConfig.NPC_DROPS.length; npc++) {
if(npc == DropConfig.NPC_DROPS[npc][0]) {
if(Misc.random(DropConfig.NPC_DROPS[npc][3]) == 0) { //Drops ALWAYS item
Item(DropConfig.NPC_DROPS[npc][1], DropConfig.NPC_DROPS[npc][2]);
}
}
}
}

If I understand correctly, you would like the elements of the NPC_DROPS array initialized with a call to drop() to be reinitialized each time this NPC_DROPS array is used.
Well, NPC_DROPS is a constant, so it can't change. Generate it each time it's accessed, using a method:
public static int[][] generateNpcDrops(){
return new int[][] {
// Normal NPC's
{1, 526, 1, ALWAYS},
{2, 526, 1, ALWAYS},
{3, 526, 1, ALWAYS},
{1, 995, drop(1, 50), ALWAYS},
{2, 995, drop(1, 50), ALWAYS},
{3, 995, drop(1, 50), ALWAYS},
// Moderate NPC's
{9, 526, 1, ALWAYS},
{9, 995, drop(250, 500), UNCOMMON},
{9, 555, drop(2, 7), VERY_COMMON},
{9, 995, drop(5, 50), VERY_COMMON},
{9, 1050, 1, RARE},
}
}
...
public void npcDeath() {
int npc = 0;
if (npc == null)
return;
int[][] npcDrops = DropConfig.generateNpcDrops();
for(npc = 0; npc < npcDrops.length; npc++) {
if(npc == npcDrops[npc][0]) {
if(Misc.random(npcDrops[npc][3]) == 0) { //Drops ALWAYS item
Item(c, npcDrops[npc][1], npcDrops[npc][2]);
}
}
}
}

You want to place functions as constants. You can do this in a language like Scala naturally, but in Java you have to work a little harder.
In every case, you need to call a function to get the actual value.
You can use enum and anonymous methods, but the simplest way/hack is to encode your ranges.
public static int rate(int min, int max) { // same for drop.
int range = max - min;
return (range << 16) | (min & 0xFFFF);
}
public static int eval(int minMax) {
int min = (short) minMax;
int range = (short) (minMax >> 16);
if (range == 0)
return min; // plain number.
else
return min + (int) (Math.random() * (range + 1));
}
You need to call eval() to turn your encoded range into a random number.

Creating an instance of Random with the same seed you will get the same 'random' number. Have you considered using SecureRandom? The difference between SecureRandom and Random is that SecureRandom produces non-deterministic output on each call.

We Keep Coding

Java is a programming language and computing platform first released by Sun Microsystems in 1995.

SIMD transposition of 8x8 matrix of 32-bit values in Java - java

Related

Machine learning on a 8 x 8 matrix - Java

How to use SharedPreferences in Achartengine line graph?

Restricting decimal places in an array

My table is outputting incorrect values for the percentage

Using Number Randoms in Arrays that randomize

Categories

Resources