Loading sklearn model in Java. Model created with DNNClassifier in python - java

The goal is to open in Java a model created/trained in python with tensorflow.contrib.learn.learn.DNNClassifier.
At the moment the main issue is to know the name of the "tensor" to give in java on the session runner method.
I have this test code in python :
from __future__ import division, print_function, absolute_import
import tensorflow as tf
import pandas as pd
import tensorflow.contrib.learn as learn
import numpy as np
from sklearn import metrics
from sklearn.cross_validation import train_test_split
from tensorflow.contrib import layers
from tensorflow.contrib.learn.python.learn.utils import input_fn_utils
from tensorflow.python.ops import array_ops
from tensorflow.python.framework import dtypes
from tensorflow.python.util.compat import as_text
print(tf.VERSION)
df = pd.read_csv('../NNNormalizeData-out.csv')
inputs = []
target = []
y=0;
for x in df.columns:
if y != 35 :
#print("added %d" %y)
inputs.append(x)
else :
target.append(x)
y+=1
total_inputs,total_output = df.as_matrix(inputs).astype(np.float32),df.as_matrix([target]).astype(np.int32)
train_inputs, test_inputs, train_output, test_output = train_test_split(total_inputs, total_output, test_size=0.2, random_state=42)
feature_columns = [tf.contrib.layers.real_valued_column("", dimension=train_inputs.shape[1],dtype=tf.float32)]
#target_column = [tf.contrib.layers.real_valued_column("output", dimension=train_output.shape[1])]
classifier = learn.DNNClassifier(hidden_units=[10, 20, 5], n_classes=5
,feature_columns=feature_columns)
classifier.fit(train_inputs, train_output, steps=100)
#Save Model into saved_model.pbtxt file (possible to Load in Java)
tfrecord_serving_input_fn = tf.contrib.learn.build_parsing_serving_input_fn(layers.create_feature_spec_for_parsing(feature_columns))
classifier.export_savedmodel(export_dir_base="test", serving_input_fn = tfrecord_serving_input_fn,as_text=True)
# Measure accuracy
pred = list(classifier.predict(test_inputs, as_iterable=True))
score = metrics.accuracy_score(test_output, pred)
print("Final score: {}".format(score))
# test individual samples
sample_1 = np.array( [[0.37671986791414125,0.28395908337619136,-0.0966095873607713,-1.0,0.06891621389763203,-0.09716678086712205,0.726029084013637,4.984689881073479E-4,-0.30296253267499107,-0.16192917054985334,0.04820256230479658,0.4951319883569152,0.5269983894210499,-0.2560313828048315,-0.3710980821053321,-0.4845867212612598,-0.8647234314469595,-0.6491591208322198,-1.0,-0.5004549422844073,-0.9880910165770813,0.5540293108747256,0.5625990251930839,0.7420121698556554,0.5445551415657979,0.4644276850235627,0.7316976292340245,0.636690006814346,0.16486621649984112,-0.0466018967678159,0.5261100063227044,0.6256168612312738,-0.544295484930702,0.379125782517193,0.6959368575211544]], dtype=float)
sample_2 = np.array( [[1.0,0.7982741870963959,1.0,-0.46270838239235024,0.040320274521029376,0.443451913224413,-1.0,1.0,1.0,-1.0,0.36689718911339564,-0.13577379160035796,-0.5162916256414466,-0.03373651520104648,1.0,1.0,1.0,1.0,0.786999801054777,-0.43856035121103853,-0.8199093927945158,1.0,-1.0,-1.0,-0.1134921695894473,-1.0,0.6420892436196663,0.7871737734493178,1.0,0.6501788845358409,1.0,1.0,1.0,-0.17586627413625022,0.8817194210401085]], dtype=float)
pred = list(classifier.predict(sample_2, as_iterable=True))
print("Prediction for sample_1 is:{} ".format(pred))
pred = list(classifier.predict_proba(sample_2, as_iterable=True))
print("Prediction for sample_2 is:{} ".format(pred))
A model_saved.pbtxt file is created.
I try to load this model in Java with the following code :
public class HelloTF {
public static void main(String[] args) throws Exception {
SavedModelBundle bundle=SavedModelBundle.load("/java/workspace/APIJavaSampleCode/tfModels/dnn/ModelSave","serve");
Session s = bundle.session();
double[] inputDouble = {1.0,0.7982741870963959,1.0,-0.46270838239235024,0.040320274521029376,0.443451913224413,-1.0,1.0,1.0,-1.0,0.36689718911339564,-0.13577379160035796,-0.5162916256414466,-0.03373651520104648,1.0,1.0,1.0,1.0,0.786999801054777,-0.43856035121103853,-0.8199093927945158,1.0,-1.0,-1.0,-0.1134921695894473,-1.0,0.6420892436196663,0.7871737734493178,1.0,0.6501788845358409,1.0,1.0,1.0,-0.17586627413625022,0.8817194210401085};
float [] inputfloat=new float[inputDouble.length];
for(int i=0;i<inputfloat.length;i++)
{
inputfloat[i]=(float)inputDouble[i];
}
Tensor inputTensor = Tensor.create(new long[] {35}, FloatBuffer.wrap(inputfloat) );
Tensor result = s.runner()
.feed("input_example_tensor", inputTensor)
.fetch("dnn/multi_class_head/predictions/probabilities")
.run().get(0);
float[] m = new float[5];
float[] vector = result.copyTo(m);
float maxVal = 0;
int inc = 0;
int predict = -1;
for(float val : vector)
{
System.out.println(val+" ");
if(val > maxVal) {
predict = inc;
maxVal = val;
}
inc++;
}
System.out.println(predict);
}
}
I get the error on the .run().get(0); line :
Exception in thread "main" org.tensorflow.TensorFlowException: Output 0 of type float does not match declared output type string for node _recv_input_example_tensor_0 = _Recv[_output_shapes=[[-1]], client_terminated=true, recv_device="/job:localhost/replica:0/task:0/cpu:0", send_device="/job:localhost/replica:0/task:0/cpu:0", send_device_incarnation=3663984897684684554, tensor_name="input_example_tensor:0", tensor_type=DT_STRING, _device="/job:localhost/replica:0/task:0/cpu:0"]()
at org.tensorflow.Session.run(Native Method)
at org.tensorflow.Session.access$100(Session.java:48)
at org.tensorflow.Session$Runner.runHelper(Session.java:285)
at org.tensorflow.Session$Runner.run(Session.java:235)
at tensorflow.HelloTF.main(HelloTF.java:35)

Ok I finally Solve : the main problem was the name of the input to use in java that is ""dnn/input_from_feature_columns/input_from_feature_columns/concat" and not "input_example_tensor".
I have discover this using the graph navigation with: tensorboard --logdir=D:\python\Workspace\Autoencoder\src\dnn\ModelSave
here is the java code :
public class HelloTF {
public static void main(String[] args) throws Exception {
SavedModelBundle bundle=SavedModelBundle.load("/java/workspace/APIJavaSampleCode/tfModels/dnn/ModelSave","serve");
Session s = bundle.session();
double[] inputDouble = {1.0,0.7982741870963959,1.0,-0.46270838239235024,0.040320274521029376,0.443451913224413,-1.0,1.0,1.0,-1.0,0.36689718911339564,-0.13577379160035796,-0.5162916256414466,-0.03373651520104648,1.0,1.0,1.0,1.0,0.786999801054777,-0.43856035121103853,-0.8199093927945158,1.0,-1.0,-1.0,-0.1134921695894473,-1.0,0.6420892436196663,0.7871737734493178,1.0,0.6501788845358409,1.0,1.0,1.0,-0.17586627413625022,0.8817194210401085};
float [] inputfloat=new float[inputDouble.length];
for(int i=0;i<inputfloat.length;i++)
{
inputfloat[i]=(float)inputDouble[i];
}
FloatBuffer.wrap(inputfloat) );
float[][] data= new float[1][35];
data[0]=inputfloat;
Tensor inputTensor=Tensor.create(data);
Tensor result = s.runner()
.feed("dnn/input_from_feature_columns/input_from_feature_columns/concat", inputTensor)
//.feed("input_example_tensor", inputTensor)
//.fetch("tensorflow/serving/classify")
.fetch("dnn/multi_class_head/predictions/probabilities")
//.fetch("dnn/zero_fraction_3/Cast")
.run().get(0);
float[][] m = new float[1][5];
float[][] vector = result.copyTo(m);
float maxVal = 0;
int inc = 0;
int predict = -1;
for(float val : vector[0])
{
System.out.println(val+" ");
if(val > maxVal) {
predict = inc;
maxVal = val;
}
inc++;
}
System.out.println(predict);
}
}
I have tested the output :
phyton side :
Prediction for sample_2 is:[3]
Prediction for sample_2 is:[array([ 0.17157166, 0.24475774, 0.16158019, 0.24648622, 0.17560424], dtype=float32)]
Java Side :
0.17157166
0.24475774
0.16158019
0.24648622
0.17560424
3

The error message offers a clue: the tensor named "input_example_tensor" in the model expects to have string contents, whereas you provided float values.
Judging by the name of the tensor and your code, I'd guess that the tensor you're feeding is defined in input_fn_utils.py. This tensor is passed to the tf.parse_example() op, which expects a vector of tf.train.Example protocol buffers, serialized as strings.

I got an error without feed("input_example_tensor", inputTensor) on Tensorflow 1.1.
But I found that example.proto can be fed as "input_example_tensor", although it took a lot of time to figure out how to create string tensors for serialized protocol buffer.
This is how I created inputTensor.
org.tensorflow.example.Example.Builder example = org.tensorflow.example.Example.newBuilder();
/* set some features to example... */
Tensor exampleTensor = Tensor.create(example.build().toByteArray());
// Here, the shape of exampleTensor is not specified yet.
// Set the shape to feed this as "input_example_tensor"
Graph g = bundle.graph();
Output examplePlaceholder =
g.opBuilder("Placeholder", "example")
.setAttr("dtype", exampleTensor.dataType())
.build().output(0);
Tensor shapeTensor = Tensor.create(new long[]{1}, IntBuffer.wrap(new int[]{1}));
Output shapeConst = g.opBuilder("Const", "shape")
.setAttr("dtype", shapeTensor.dataType())
.setAttr("value", shapeTensor)
.build().output(0);
Output shaped = g.opBuilder("Reshape", "output").addInput(examplePlaceholder).addInput(shapeConst).build().output(0);
Tensor inputTensor = s.runner().feed(examplePlaceholder, exampleTensor).fetch(shaped).run().get(0);
// Now, inputTensor has shape of [1] and ready to feed.

Your parameters in .feed() and .fetch() should be matching with your input and output datatype.
You can look at your savedmodel.pbtxt file. There are details about your paramaters and their input/output types.
For instance,
my java code
Tensor result = s.runner()
.feed("ParseExample/ParseExample", inputTensor)
.fetch("dnn/binary_logistic_head/predictions/probabilities")
.run().get(0);
my savedModel.pbtxt (part of it)
node {
name: "ParseExample/ParseExample"
op: "ParseExample"
input: "input_example_tensor"
input: "ParseExample/ParseExample/names"
input: "ParseExample/ParseExample/dense_keys_0"
input: "ParseExample/Const"
attr {
key: "Ndense"
value {
i: 1
}
}
attr {
key: "Nsparse"
value {
i: 0
}
}
attr {
key: "Tdense"
value {
list {
type: DT_FLOAT
}
}
}
attr {
key: "_output_shapes"
value {
list {
shape {
dim {
size: -1
}
dim {
size: 2
}
}
}
}
}
attr {
key: "dense_shapes"
value {
list {
shape {
dim {
size: 2
}
}
}
}
}
attr {
key: "sparse_types"
value {
list {
}
}
}
}
outputs {
key: "scores"
value {
name: "dnn/binary_logistic_head/predictions/probabilities:0"
dtype: DT_FLOAT
tensor_shape {
dim {
size: -1
}
dim {
size: 2
}
}
}
}
They both compatible with my datatype, float.

Related

Can I write this code without using serialization?

For my project I was wondering whether there is a way I can do this assignment without using serialization. Here are the guidelines to the project and the code I already have together:
The Canadian Forest Service wants to do a simple simulation of the growth and pruning of forests. Each forest has a name and exactly 10 trees. The trees are planted when they are 1' to 5' tall, and each tree has a individual growth rate of 50%-100% per year. For the simulation new trees are constructed randomly within these bounds. A forest is reaped (by lumberjacks) on demand - all trees above a specifed height are cut down and replaced with new trees.
The user interface to the simulation must allow the user to:
Display the current forest (with tree heights to 2 decimal places)
Discard the current forest and create a new forest
Simulate a year's growth in the current forest
Reap the current forest of trees over a user specified height, replacing the reaped trees with random new trees.
Save the information about the current forest to file (named after the forest)
Discard the current forest and load the information about a forest from a file.
Class1
import java.io.*;
import java.util.*;
public class Forest{
//constants
private static final int MAX_NUM_TREES = 10;
//variables
int index;
private String name;
private Tree[] arrayOfTrees;
public Forest(String forestName){
//Constructor class that takes a name and creates an array of trees().
index = 0;
name = forestName;
arrayOfTrees = new Tree[MAX_NUM_TREES];
for(index = 0; index < arrayOfTrees.length; index++){
arrayOfTrees[index] = new Tree();
}
}
public void display(){
// displays the array of trees and the index
index = 0;
if(name != null){
System.out.println(name);
for(index = 0; index < arrayOfTrees.length; index ++){
System.out.printf("%2d : %s\n", (index + 1), arrayOfTrees[index]);
}
}else{
System.out.println("No forest.");
}
}
public void yearGrowth(){
//grows each tree in the array
index = 0;
for(index = 0; index < arrayOfTrees.length ; index ++){
arrayOfTrees[index].grow();
}
}
public void reap(int reapHeight){
//reaps the trees and prints out the old and new information
index = 0;
for(index = 0; index < arrayOfTrees.length; index++){
if(arrayOfTrees[index].getHeight() >= reapHeight){
System.out.println("Cut " + (index+1) + " : " + arrayOfTrees[index] );
arrayOfTrees[index] = new Tree();
System.out.println("New " + (index+1) + " : " + arrayOfTrees[index] );
}
}
}
public static void saveForest(Forest forest) throws IOException {
//saves the forest
String name = forest.getName();
ObjectOutputStream toStream;
toStream = new ObjectOutputStream(new FileOutputStream(name));
toStream.writeObject(forest);
toStream.close();
}
public static Forest loadForest(String fileName) throws IOException {
//loads the forest
ObjectInputStream fromStream = null;
Forest local;
fromStream = new ObjectInputStream(new FileInputStream(fileName));
try {
local = (Forest)fromStream.readObject();
}catch (ClassNotFoundException e) {
System.out.println(e.getMessage());
return(null);
}finally{
try {
if (fromStream != null) {
fromStream.close();
}
} catch (IOException e) {
System.out.println(e.getMessage());
return(null);
}
}
return(local);
}
public String getName(){
return (name);
}
}
Class2
import java.util.Random;
import java.util.*;
import java.io.*;
public class Tree{
//creates the variables as the
private double height;
private double growthRate;
private static Random rand = new Random();
final double MIN_HEIGHT = 1;
final double MIN_GROWTH_RATE = 0.5;
final double MAX_HEIGHT = 5;
final double MAX_GROWTH_RATE = 1.0;
public Tree() {
//creates tree with a height and a growth rate
Random rand = new Random();
height = (MIN_HEIGHT + ((Math.random() * (MAX_HEIGHT - MIN_HEIGHT))));
growthRate = (MIN_GROWTH_RATE + (Math.random() * (MAX_GROWTH_RATE - MIN_GROWTH_RATE)));
}
public double grow(){
//tree grows and returns height
height = height * (1 + growthRate);
return height;
}
public double getHeight(){
return (height);
}
public double getGrowthRate(){
return (growthRate);
}
public String toString(){
//toString formats the output with height and growthrate
return (String.format("%7.2f (%2d%% pa)", height, ((int)(growthRate * 100))));
}
}
If by serialization you understand standard java serialization with ObjectXXXStream, then yes, you can avoid it.
If you mean serialization in more broad way, then no. Files cant directly store java objects you have to convert them to bytes (which is serialization by definition).
PS: If you actually ask "How?" you should include it in your question.

Converting an iterative function to recursive

I am trying to convert an iterative function to Recursion.
But once I tried to do that it is runnning continuously like an infinite loop.
This is my iterative code
private static Node buildModelTree(String[] args) {
// TODO Auto-generated method stub
String clsIndex = args[3];
splitted.add(currentsplit);
double entropy = 0;
int total_attributes = (Integer.parseInt(clsIndex));// class index
int split_size = splitted.size();
GainRatio gainObj = new GainRatio();
while (split_size > current_index) { //iterate through all distinct pair for building children
currentsplit = (SplitInfo) splitted.get(current_index);
System.out.println("After currentsplit --->" + currentsplit);
gainObj = new GainRatio();
int res = 0;
res = ToolRunner.run(new Configuration(),new CopyOfFunID3Driver(), args);
gainObj.getcount(current_index);
entropy = gainObj.currNodeEntophy();
clsIndex = gainObj.majorityLabel();
currentsplit.classIndex = clsIndex;
if (entropy != 0.0 && currentsplit.attr_index.size() != total_attributes) { //calculate gain ration
bestGain(total_attributes,entropy,gainObj);
} else {
//When entropy is zero build tree
Node branch = new Node();
String rule = "";
Gson gson = new Gson();
int temp_size = currentsplit.attr_index.size();
for (int val = 0; val < temp_size; val++) {
int g = 0;
g = (Integer) currentsplit.attr_index.get(val);
if (val == 0) {
rule = g + " " + currentsplit.attr_value.get(val);
//JSON
// branch.add(g, currentsplit.attr_value.get(val).toString(), new Node(currentsplit.classIndex, true));
} else {
rule = rule + " " + g + " "+ currentsplit.attr_value.get(val);
//branch.add(g, currentsplit.attr_value.get(val).toString(), buildModelTree(args));
}
}
rule = rule + " " + currentsplit.classIndex;
}
split_size = splitted.size();
current_index++;
}
}
where all should I make change?
I am trying to build tree. So inoredr to get the tree structure I am trying to make my id3 code recursive.
with my current code I am only getting output as this ,But I want it as tree structure
Please suggest.
The Recursion algorithm must have following
1.Each time the function invokes itself, the Problem size has to be reduced.
(ie. If suppose first you are calling the function with array of size n, then the next time it has to be lesser than n.
Base Case - the condition for the return statement.
(For example, if the array size is 0 then return)
In your code, these two are missing.
You're keep on calling the function with the same size of array. That's the problem.
Thanks

DEMA & TEMA Ta-lib Java Implementation

I am currently working with Ta-lib Java implementations. I can run properly MA & SUM. But having problem while try to run DEMA, TEMA. The output is all zeros.
I am calling the DEMA & TEMA method of Ta-lib as follows
import com.tictactec.ta.lib.Core;
import com.tictactec.ta.lib.MInteger;
public class TALibJava {
double[] array = {207.650, 205.160, 210.870, 209.350, 207.250, 209.960, 207.650, 205.160, 188.170, 186.020};
double[] output = new double[array.length];
int period = 5;
Core core = new Core();
int lookback = 0;
MInteger begin = new MInteger();
MInteger length = new MInteger();
public void callDEMA() {
lookback = core.demaLookback(period);
core.dema(0, array.length - 1, array, 0, begin, length, output);
System.out.println("DEMA Output: ");
print();
}
public void callTEMA() {
lookback = core.temaLookback(period);
core.tema(0, array.length - 1, array, 0, begin, length, output);
System.out.println("TEMA Output: ");
print();
}
public void print() {
for(int i=0;i<array.length;i++) {
System.out.print(output[i] + "\t ");
}
System.out.println("");
}
public static void main(String args[]) {
TALibJava obj = new TALibJava();
obj.callDEMA();
obj.callTEMA();
}
}
Perhaps the input parameters are not properly set. Please suggest me what I'm doing wrong.
According to the source code of dema(), optInTimePeriod cannot be 0:
else if( ((int)optInTimePeriod < 2) || ((int)optInTimePeriod > 100000) )
return RetCode.BadParam ;
That's why your current code returns "BadParam" and not "Success" when you call dema().
(Same thing goes for tema())

Getting PDF TextObjects with PDFBox

I have a PDF from which I extracted a page using PDFBox:
(...)
File input = new File("C:\\temp\\sample.pdf");
document = PDDocument.load(input);
List allPages = document.getDocumentCatalog().getAllPages();
PDPage page = (PDPage) allPages.get(2);
PDStream contents = page.getContents();
if (contents != null) {
System.out.println(contents.getInputStreamAsString());
(...)
This gives the following result, which looks like something you'd expect, based on the PDF spec.
q
/GS0 gs
/Fm0 Do
Q
/Span <</Lang (en-US)/MCID 88 >>BDC
BT
/CS0 cs 0 0 0 scn
/GS1 gs
/T1_0 1 Tf
8.5 0 0 8.5 70.8661 576 Tm
(This page has been intentionally left blank.)Tj
ET
EMC
1 1 1 scn
/GS0 gs
22.677 761.102 28.346 32.599 re
f
/Span <</Lang (en-US)/MCID 89 >>BDC
BT
0.531 0.53 0.528 scn
/T1_1 1 Tf
9 0 0 9 45.7136 761.1024 Tm
(2)Tj
ET
EMC
q
0 g
/Fm1 Do
Q
What I'm looking for is to extract the PDF TextObjects (as described in par 5.3 of the PDF spec) on the page as java Objects, so basically the pieces between BT an ET (two of 'en on this page).
They should at least contain everything between the brackets preceding 'Tj' as a String, and an x and y coördinate based on the 'Tm' (or a 'Td' operator, etc.). Other attributes would be a bonus, but are not required.
The PDFTextStripper seems to give me either each character with attributes as a TextPosition (too much noise for my purpose), or all the Text as one long String.
Does PDFBox have a feature that parses a Page and provides TextObjects like this that I missed? Or else, if I am to extend PDFBox to get what I need, where should I start? Any help is welcome.
EDIT: Found another question here, that gives inspiration on how I might build what I need. If I succeed, I'll check back. Still looking forward to any help you may have, though.
Thanks,
Phil
Based on the linked question and the hint by mkl yesterday (thanks!), I've decided to build something to parse the tokens.
Something to consider is that within a PDF Text Object, the attributes precede the operator, so I collect all attributes in a collection until I encounter the operator.
Then, when I know what operator the attributes belong to, I move them to their proper locations.
This is what I've come up with:
import java.io.File;
import java.util.List;
import org.apache.pdfbox.pdfparser.PDFStreamParser;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDPage;
import org.apache.pdfbox.pdmodel.common.PDStream;
import org.apache.pdfbox.util.PDFOperator;
public class TextExtractor {
public static void main(String[] args) {
try {
File input = new File("C:\\some\\file.pdf");
PDDocument document = PDDocument.load(input);
List allPages = document.getDocumentCatalog().getAllPages();
// just parsing page 2 here, as it's only a sample
PDPage page = (PDPage) allPages.get(2);
PDStream contents = page.getContents();
PDFStreamParser parser = new PDFStreamParser(contents.getStream());
parser.parse();
List tokens = parser.getTokens();
boolean parsingTextObject = false; //boolean to check whether the token being parsed is part of a TextObject
PDFTextObject textobj = new PDFTextObject();
for (int i = 0; i < tokens.size(); i++)
{
Object next = tokens.get(i);
if (next instanceof PDFOperator) {
PDFOperator op = (PDFOperator) next;
switch(op.getOperation()){
case "BT":
//BT: Begin Text.
parsingTextObject = true;
textobj = new PDFTextObject();
break;
case "ET":
parsingTextObject = false;
System.out.println("Text: " + textobj.getText() + "#" + textobj.getX() + "," + textobj.getY());
break;
case "Tj":
textobj.setText();
break;
case "Tm":
textobj.setMatrix();
break;
default:
//System.out.println("unsupported operation " + op.getOperation());
}
textobj.clearAllAttributes();
}
else if (parsingTextObject) {
textobj.addAttribute(next);
}
}
} catch (Exception e) {
e.printStackTrace();
}
}
}
In combination with:
import java.util.ArrayList;
import java.util.List;
import org.apache.pdfbox.cos.COSFloat;
import org.apache.pdfbox.cos.COSInteger;
import org.apache.pdfbox.cos.COSString;
class PDFTextObject{
private List attributes = new ArrayList<Object>();
private String text = "";
private float x = -1;
private float y = -1;
public void clearAllAttributes(){
attributes = new ArrayList<Object>();
}
public void addAttribute(Object anAttribute){
attributes.add(anAttribute);
}
public void setText(){
//Move the contents of the attributes to the text attribute.
for (int i = 0; i < attributes.size(); i++){
if (attributes.get(i) instanceof COSString){
COSString aString = (COSString) attributes.get(i);
text = text + aString.getString();
}
else {
System.out.println("Whoops! Wrong type of property...");
}
}
}
public String getText(){
return text;
}
public void setMatrix(){
//Move the contents of the attributes to the x and y attributes.
//A Matrix has 6 attributes, the last two of which are x and y
for (int i = 4; i < attributes.size(); i++){
float curval = -1;
if (attributes.get(i) instanceof COSInteger){
COSInteger aCOSInteger = (COSInteger) attributes.get(i);
curval = aCOSInteger.floatValue();
}
if (attributes.get(i) instanceof COSFloat){
COSFloat aCOSFloat = (COSFloat) attributes.get(i);
curval = aCOSFloat.floatValue();
}
switch(i) {
case 4:
x = curval;
break;
case 5:
y = curval;
break;
}
}
}
public float getX(){
return x;
}
public float getY(){
return y;
}
}
It gives the output:
Text: This page has been intentionally left blank.#70.8661,576.0
Text: 2#45.7136,761.1024
While it does the trick, I'm sure I've broken some conventions and haven't always written the most elegant code. Improvements and alternate solutions are welcome.
I added a version of the Phil response with pdfbox-2.0.1
import java.io.File;
import java.util.ArrayList;
import java.util.List;
import org.apache.pdfbox.pdfparser.PDFStreamParser;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDPage;
import org.apache.pdfbox.pdmodel.PDPageTree;
import org.apache.pdfbox.contentstream.operator.Operator;
import org.apache.pdfbox.cos.COSFloat;
import org.apache.pdfbox.cos.COSInteger;
import org.apache.pdfbox.cos.COSString;
public class TextExtractor {
public static void main(String[] args) {
try {
File input = new File("src\\test\\resources\\files\\file1.pdf");
PDDocument document = PDDocument.load(input);
PDPageTree allPages = document.getDocumentCatalog().getPages();
// just parsing page 2 here, as it's only a sample
PDPage page = allPages.get(0);
PDFStreamParser parser = new PDFStreamParser(page);
parser.parse();
List tokens = parser.getTokens();
boolean parsingTextObject = false; // boolean to check whether the token
// being parsed is part of a TextObject
PDFTextObject textobj = new PDFTextObject();
for (int i = 0; i < tokens.size(); i++) {
Object next = tokens.get(i);
if (next instanceof Operator) {
Operator op = (Operator) next;
switch (op.getName()) {
case "BT":
// BT: Begin Text.
parsingTextObject = true;
textobj = new PDFTextObject();
break;
case "ET":
parsingTextObject = false;
System.out.println("Text: " + textobj.getText() + "#" + textobj.getX() + "," + textobj.getY());
break;
case "Tj":
textobj.setText();
break;
case "Tm":
textobj.setMatrix();
break;
default:
System.out.println("unsupported operation " + op);
}
textobj.clearAllAttributes();
} else if (parsingTextObject) {
textobj.addAttribute(next);
} else {
System.out.println("ignore "+next.getClass()+" -> "+next);
}
}
} catch (Exception e) {
e.printStackTrace();
}
}
static class PDFTextObject{
private List attributes = new ArrayList<Object>();
private String text = "";
private float x = -1;
private float y = -1;
public void clearAllAttributes(){
attributes = new ArrayList<Object>();
}
public void addAttribute(Object anAttribute){
attributes.add(anAttribute);
}
public void setText(){
//Move the contents of the attributes to the text attribute.
for (int i = 0; i < attributes.size(); i++){
if (attributes.get(i) instanceof COSString){
COSString aString = (COSString) attributes.get(i);
text = text + aString.getString();
}
else {
System.out.println("Whoops! Wrong type of property...");
}
}
}
public String getText(){
return text;
}
public void setMatrix(){
//Move the contents of the attributes to the x and y attributes.
//A Matrix has 6 attributes, the last two of which are x and y
for (int i = 4; i < attributes.size(); i++){
float curval = -1;
if (attributes.get(i) instanceof COSInteger){
COSInteger aCOSInteger = (COSInteger) attributes.get(i);
curval = aCOSInteger.floatValue();
}
if (attributes.get(i) instanceof COSFloat){
COSFloat aCOSFloat = (COSFloat) attributes.get(i);
curval = aCOSFloat.floatValue();
}
switch(i) {
case 4:
x = curval;
break;
case 5:
y = curval;
break;
}
}
}
public float getX(){
return x;
}
public float getY(){
return y;
}
}
}

incompatible types found : void, what is wrong?

I am trying to write a class that find the closest two vectors and return a sum.
I have tried to understand so hard but I can't find the reason why I get this message, it's the only error I get:
java:93: incompatible types
found : void
required: EDU.gatech.cc.is.util.Vec2
result = one.add(two);
^
Line 93 is at the end of the code, I put some arrows to indicate it!
enter code here
package EDU.gatech.cc.is.clay;
import java.util.*;
import EDU.gatech.cc.is.clay.*;
import java.lang.*;
import EDU.gatech.cc.is.abstractrobot.*;
import EDU.gatech.cc.is.util.Vec2;
import EDU.gatech.cc.is.util.Units;
public class MAX_go_in_between extends NodeVec2
{
public static final boolean DEBUG = /*true;*/ Node.DEBUG;
private SocSmall abstract_robot;
public MAX_go_in_between(SocSmall ar)
{
abstract_robot = ar;
}
long last_spott = 0;
Vec2 result = new Vec2();
public Vec2 Value(long timestamp)
{
if (DEBUG) System.out.println("MAX_Avoid_walls: Value()");
if ((timestamp > last_spott) || (timestamp == -1))
{
if (timestamp != -1) last_spott = timestamp;
Vec2 one;
Vec2 two;
//array of Vec2 of all the opponents
Vec2[] list_opp = abstract_robot.getOpponents(timestamp);
//empty array of vec2 where will be put the opponents in front of the robot
ArrayList<Vec2> list_opp_in_front;
Vec2 temp;
// find which opponents are in front and put them in the arraylist
for(int i=0; i<list_opp.length; i++)
{
temp = list_opp[i];
if(temp.x >= 0.0)
{
list_opp_in_front.add(temp);
}
}
//get closest opponent and sets it to index 0
for(int i=1; i<list_opp_in_front.size()-1; i++)
{
temp = list_opp_in_front.get(i);
if(list_opp_in_front.get(0).r<temp.r)
{
list_opp_in_front.set(i, list_opp_in_front.get(0));
list_opp_in_front.set(0, temp);
}
}
//get second closest opponent and sets it to index 1
for(int i=2; i<list_opp_in_front.size()-1; i++)
{
temp = list_opp_in_front.get(i);
if(list_opp_in_front.get(1).r<temp.r)
{
list_opp_in_front.set(i, list_opp_in_front.get(1));
list_opp_in_front.set(1, temp);
}
// sum both vectors
one = list_opp_in_front.get(0);
two = list_opp_in_front.get(1);
=============>>>>
=============>>>> result = one.add(two);
}
}
return(result);
}
}
Here is the Vec2.add(Vec2) method:
public void add(Vec2 other)
{
x = x + other.x;
y = y + other.y;
r = Math.sqrt(x*x + y*y);
if (r > 0)
t = Math.atan2(y,x);
}
result = one.add (two);
public void add (Vec2 other)
// ^^^^
From this, the member function add does not return anything that you can put into result. With a line like:
x = x + other.x;
(where x is a member of "the current object" and other is the object you're adding to it), it's a dead certainty that one.Add (two) is meant to modify one rather than just use it in a calculation.
So, rather than:
one = list_opp_in_front.get (0);
two = list_opp_in_front.get (1);
result = one.add (two);
you'll probably need something like:
result = list_opp_in_front.get (0);
two = list_opp_in_front.get (1);
result.add (two);
As per your method declaration public void add(Vec2 other), you are adding two into one. Thus one itself is your result, hence there no need of return.
just remove the return statement and treat one as result object.

Categories