Open CV Face Recognition not accurate

Open CV Face Recognition not accurate - java

In my app I'm trying to do face recognition on a specific image using Open CV, here first I'm training one image and then after training that image if I run face recognition on that image it successfully recognizes that trained face. However, when I turn to another picture of the same person recognition does not work. It just works on the trained image, so my question is how do I rectify it?
Update:
What i want to do is that user should select image of a person from storage and then after training that selected image i want to fetch all images from storage which matches face of my trained image
Here is my activity class:
public class MainActivity extends AppCompatActivity {
private Mat rgba,gray;
private CascadeClassifier classifier;
private MatOfRect faces;
private ArrayList<Mat> images;
private ArrayList<String> imagesLabels;
private Storage local;
ImageView mimage;
Button prev,next;
ArrayList<Integer> imgs;
private int label[] = new int[1];
private double predict[] = new double[1];
Integer pos = 0;
private String[] uniqueLabels;
FaceRecognizer recognize;
private boolean trainfaces() {
if(images.isEmpty())
return false;
List<Mat> imagesMatrix = new ArrayList<>();
for (int i = 0; i < images.size(); i++)
imagesMatrix.add(images.get(i));
Set<String> uniqueLabelsSet = new HashSet<>(imagesLabels); // Get all unique labels
uniqueLabels = uniqueLabelsSet.toArray(new String[uniqueLabelsSet.size()]); // Convert to String array, so we can read the values from the indices
int[] classesNumbers = new int[uniqueLabels.length];
for (int i = 0; i < classesNumbers.length; i++)
classesNumbers[i] = i + 1; // Create incrementing list for each unique label starting at 1
int[] classes = new int[imagesLabels.size()];
for (int i = 0; i < imagesLabels.size(); i++) {
String label = imagesLabels.get(i);
for (int j = 0; j < uniqueLabels.length; j++) {
if (label.equals(uniqueLabels[j])) {
classes[i] = classesNumbers[j]; // Insert corresponding number
break;
}
}
}
Mat vectorClasses = new Mat(classes.length, 1, CvType.CV_32SC1); // CV_32S == int
vectorClasses.put(0, 0, classes); // Copy int array into a vector
recognize = LBPHFaceRecognizer.create(3,8,8,8,200);
recognize.train(imagesMatrix, vectorClasses);
if(SaveImage())
return true;
return false;
}
public void cropedImages(Mat mat) {
Rect rect_Crop=null;
for(Rect face: faces.toArray()) {
rect_Crop = new Rect(face.x, face.y, face.width, face.height);
}
Mat croped = new Mat(mat, rect_Crop);
images.add(croped);
}
public boolean SaveImage() {
File path = new File(Environment.getExternalStorageDirectory(), "TrainedData");
path.mkdirs();
String filename = "lbph_trained_data.xml";
File file = new File(path, filename);
recognize.save(file.toString());
if(file.exists())
return true;
return false;
}
private BaseLoaderCallback callbackLoader = new BaseLoaderCallback(this) {
#Override
public void onManagerConnected(int status) {
switch(status) {
case BaseLoaderCallback.SUCCESS:
faces = new MatOfRect();
//reset
images = new ArrayList<Mat>();
imagesLabels = new ArrayList<String>();
local.putListMat("images", images);
local.putListString("imagesLabels", imagesLabels);
images = local.getListMat("images");
imagesLabels = local.getListString("imagesLabels");
break;
default:
super.onManagerConnected(status);
break;
}
}
};
#Override
protected void onResume() {
super.onResume();
if(OpenCVLoader.initDebug()) {
Log.i("hmm", "System Library Loaded Successfully");
callbackLoader.onManagerConnected(BaseLoaderCallback.SUCCESS);
} else {
Log.i("hmm", "Unable To Load System Library");
OpenCVLoader.initAsync(OpenCVLoader.OPENCV_VERSION, this, callbackLoader);
}
}
#Override
protected void onCreate(Bundle savedInstanceState) {
super.onCreate(savedInstanceState);
setContentView(R.layout.activity_main);
prev = findViewById(R.id.btprev);
next = findViewById(R.id.btnext);
mimage = findViewById(R.id.mimage);
local = new Storage(this);
imgs = new ArrayList();
imgs.add(R.drawable.jonc);
imgs.add(R.drawable.jonc2);
imgs.add(R.drawable.randy1);
imgs.add(R.drawable.randy2);
imgs.add(R.drawable.imgone);
imgs.add(R.drawable.imagetwo);
mimage.setBackgroundResource(imgs.get(pos));
prev.setOnClickListener(new View.OnClickListener() {
#Override
public void onClick(View view) {
if(pos!=0){
pos--;
mimage.setBackgroundResource(imgs.get(pos));
}
}
});
next.setOnClickListener(new View.OnClickListener() {
#Override
public void onClick(View view) {
if(pos<5){
pos++;
mimage.setBackgroundResource(imgs.get(pos));
}
}
});
Button train = (Button)findViewById(R.id.btn_train);
train.setOnClickListener(new View.OnClickListener() {
#RequiresApi(api = Build.VERSION_CODES.KITKAT)
#Override
public void onClick(View view) {
rgba = new Mat();
gray = new Mat();
Mat mGrayTmp = new Mat();
Mat mRgbaTmp = new Mat();
classifier = FileUtils.loadXMLS(MainActivity.this);
Bitmap icon = BitmapFactory.decodeResource(getResources(),
imgs.get(pos));
Bitmap bmp32 = icon.copy(Bitmap.Config.ARGB_8888, true);
Utils.bitmapToMat(bmp32, mGrayTmp);
Utils.bitmapToMat(bmp32, mRgbaTmp);
Imgproc.cvtColor(mGrayTmp, mGrayTmp, Imgproc.COLOR_BGR2GRAY);
Imgproc.cvtColor(mRgbaTmp, mRgbaTmp, Imgproc.COLOR_BGRA2RGBA);
/*Core.transpose(mGrayTmp, mGrayTmp); // Rotate image
Core.flip(mGrayTmp, mGrayTmp, -1); // Flip along both*/
gray = mGrayTmp;
rgba = mRgbaTmp;
Imgproc.resize(gray, gray, new Size(200,200.0f/ ((float)gray.width()/ (float)gray.height())));
if(gray.total() == 0)
Toast.makeText(getApplicationContext(), "Can't Detect Faces", Toast.LENGTH_SHORT).show();
classifier.detectMultiScale(gray,faces,1.1,3,0|CASCADE_SCALE_IMAGE, new Size(30,30));
if(!faces.empty()) {
if(faces.toArray().length > 1)
Toast.makeText(getApplicationContext(), "Mutliple Faces Are not allowed", Toast.LENGTH_SHORT).show();
else {
if(gray.total() == 0) {
Log.i("hmm", "Empty gray image");
return;
}
cropedImages(gray);
imagesLabels.add("Baby");
Toast.makeText(getApplicationContext(), "Picture Set As Baby", Toast.LENGTH_LONG).show();
if (images != null && imagesLabels != null) {
local.putListMat("images", images);
local.putListString("imagesLabels", imagesLabels);
Log.i("hmm", "Images have been saved");
if(trainfaces()) {
images.clear();
imagesLabels.clear();
}
}
}
}else {
/* Bitmap bmp = null;
Mat tmp = new Mat(250, 250, CvType.CV_8U, new Scalar(4));
try {
//Imgproc.cvtColor(seedsImage, tmp, Imgproc.COLOR_RGB2BGRA);
Imgproc.cvtColor(gray, tmp, Imgproc.COLOR_GRAY2RGBA, 4);
bmp = Bitmap.createBitmap(tmp.cols(), tmp.rows(), Bitmap.Config.ARGB_8888);
Utils.matToBitmap(tmp, bmp);
} catch (CvException e) {
Log.d("Exception", e.getMessage());
}*/
/* mimage.setImageBitmap(bmp);*/
Toast.makeText(getApplicationContext(), "Unknown Face", Toast.LENGTH_SHORT).show();
}
}
});
Button recognize = (Button)findViewById(R.id.btn_recognize);
recognize.setOnClickListener(new View.OnClickListener() {
#Override
public void onClick(View view) {
if(loadData())
Log.i("hmm", "Trained data loaded successfully");
rgba = new Mat();
gray = new Mat();
faces = new MatOfRect();
Mat mGrayTmp = new Mat();
Mat mRgbaTmp = new Mat();
classifier = FileUtils.loadXMLS(MainActivity.this);
Bitmap icon = BitmapFactory.decodeResource(getResources(),
imgs.get(pos));
Bitmap bmp32 = icon.copy(Bitmap.Config.ARGB_8888, true);
Utils.bitmapToMat(bmp32, mGrayTmp);
Utils.bitmapToMat(bmp32, mRgbaTmp);
Imgproc.cvtColor(mGrayTmp, mGrayTmp, Imgproc.COLOR_BGR2GRAY);
Imgproc.cvtColor(mRgbaTmp, mRgbaTmp, Imgproc.COLOR_BGRA2RGBA);
/*Core.transpose(mGrayTmp, mGrayTmp); // Rotate image
Core.flip(mGrayTmp, mGrayTmp, -1); // Flip along both*/
gray = mGrayTmp;
rgba = mRgbaTmp;
Imgproc.resize(gray, gray, new Size(200,200.0f/ ((float)gray.width()/ (float)gray.height())));
if(gray.total() == 0)
Toast.makeText(getApplicationContext(), "Can't Detect Faces", Toast.LENGTH_SHORT).show();
classifier.detectMultiScale(gray,faces,1.1,3,0|CASCADE_SCALE_IMAGE, new Size(30,30));
if(!faces.empty()) {
if(faces.toArray().length > 1)
Toast.makeText(getApplicationContext(), "Mutliple Faces Are not allowed", Toast.LENGTH_SHORT).show();
else {
if(gray.total() == 0) {
Log.i("hmm", "Empty gray image");
return;
}
recognizeImage(gray);
}
}else {
Toast.makeText(getApplicationContext(), "Unknown Face", Toast.LENGTH_SHORT).show();
}
}
});
}
private void recognizeImage(Mat mat) {
Rect rect_Crop=null;
for(Rect face: faces.toArray()) {
rect_Crop = new Rect(face.x, face.y, face.width, face.height);
}
Mat croped = new Mat(mat, rect_Crop);
recognize.predict(croped, label, predict);
int indice = (int)predict[0];
Log.i("hmmcheck:",String.valueOf(label[0])+" : "+String.valueOf(indice));
if(label[0] != -1 && indice < 125)
Toast.makeText(getApplicationContext(), "Welcome "+uniqueLabels[label[0]-1]+"", Toast.LENGTH_SHORT).show();
else
Toast.makeText(getApplicationContext(), "You're not the right person", Toast.LENGTH_SHORT).show();
}
private boolean loadData() {
String filename = FileUtils.loadTrained();
if(filename.isEmpty())
return false;
else
{
recognize.read(filename);
return true;
}
}
}
My File Utils Class:
public class FileUtils {
private static String TAG = FileUtils.class.getSimpleName();
private static boolean loadFile(Context context, String cascadeName) {
InputStream inp = null;
OutputStream out = null;
boolean completed = false;
try {
inp = context.getResources().getAssets().open(cascadeName);
File outFile = new File(context.getCacheDir(), cascadeName);
out = new FileOutputStream(outFile);
byte[] buffer = new byte[4096];
int bytesread;
while((bytesread = inp.read(buffer)) != -1) {
out.write(buffer, 0, bytesread);
}
completed = true;
inp.close();
out.flush();
out.close();
} catch (IOException e) {
Log.i(TAG, "Unable to load cascade file" + e);
}
return completed;
}
public static CascadeClassifier loadXMLS(Activity activity) {
InputStream is = activity.getResources().openRawResource(R.raw.lbpcascade_frontalface);
File cascadeDir = activity.getDir("cascade", Context.MODE_PRIVATE);
File mCascadeFile = new File(cascadeDir, "lbpcascade_frontalface_improved.xml");
FileOutputStream os = null;
try {
os = new FileOutputStream(mCascadeFile);
byte[] buffer = new byte[4096];
int bytesRead;
while ((bytesRead = is.read(buffer)) != -1) {
os.write(buffer, 0, bytesRead);
}
is.close();
os.close();
} catch (FileNotFoundException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
return new CascadeClassifier(mCascadeFile.getAbsolutePath());
}
public static String loadTrained() {
File file = new File(Environment.getExternalStorageDirectory(), "TrainedData/lbph_trained_data.xml");
return file.toString();
}
}
These are the images i'm trying to compare here face of person is same still in recognition it's not matching!

Update
According to the new edit in the question, you need a way to identify new people on the fly whose photos might not have been available during the training phase of the model. These tasks are called few shot learning. This is similar to the requirements of the intelligence/police agencies to find their targets using CCTV camera footage. As usually there are not enough images of a specific target, during training, they use models such as FaceNet. I really suggest reading the paper, however, I explain a few of its highlights here:
Generally, the last layer of a classifier is a n*1 vector with n-1 of
the elements almost equal to zero, and one close to 1. The element close to 1, determines the prediction of the classifier about the input's label.
The authors figured out that if they train a
classifier network with a specific loss function on a huge dataset of faces, you can use the semi-final layer output as a representation of any face, irrespective of it being in the training set or not, the authors call this vector Face Embedding.
The previous result means that with a very well trained FaceNet model, you can summarise any face into a vector. The very interesting attribute of this approach is that the vectors of a specific person's face in different angles/positions/states have are proximate in the euclidian space (this property is enforced by the loss function that the authors chose).
In summary, you have a model that gets faces as input and returns vectors. The vectors close to each other are very likely to belong to the same person (For checking that you can use KNN or just simple euclidian distance).
One implementation of FaceNet can be found here. I suggest you try to run it on your computer to get to know what you are actually dealing with. After that, it might be best to do the following:
Transform the FaceNet model mentioned in the repository to its
tflite version (this blogpost might help)
For each photo submitted by the user, use Face API to extract the face(s)
Use the minified model in your app to get the face embeddings of the extracted face.
Process all the images in the gallery of the user, getting the vectors for the faces in the photos.
Then compare each vector found in step4 with each vector found in step3 to get the matches.
Original Answer
You came across one of the most prevalent challenges of machine learning: Overfitting. Face detection and recognition is a huge area of research on its own and almost all the reasonably accurate models are using some kind of deep learning. Note that even detecting a face accurately is not as easy as it seems, however, as you are doing it on android, you can use Face API for this task. (Other more advanced techniques such as MTCNN are too slow/difficult to deploy on a handset). It has been shown that just feeding the model with a face photo with a lot of background noise or multiple people inside does not work. So, you really cannot skip this step.
After getting a nice trimmed face of the candidate targets from the background, you need to overcome the challenge of recognising the detected faces. Again, all the competent models to the best of my knowledge, are using some sort of deep learning/convolutional neural networks. Using them on a mobile phone is a challenge, but thanks to Tensorflow Lite you can minify them and run them within your app. A project about face recognition on android phones that I had worked on is here that you can check.
Keep in mind that any good model should be trained on numerous instances of labelled data, however there are a plethora of models already trained on large datasets of faces or other image recognition tasks, to tweak them and use their existing knowledge, we can employ transfer learning, for a quick start on object detection and transfer learning that is closely related to your case check this blog post.
Overall, you have to get numerous instances of the faces that you want to detect plus numerous face pics of people that you don't care about, then you need to train a model based on the above-mentioned resources, and then you need to use TensorFlow lite to decrease its size and embed it within your app. For each frame then, you call android Face API and feed (the probably detected face) into the model and identify the person.
Depending on your level of tolerance for delay and the number of training set size and number of targets, you can get various results, however, %90+ accuracy is easily achievable if you have only a few target people.

If I understand correctly, you're training the classifier with a single image. In that case, this one specific image is everything the classifier will be able to ever recognise. You would need a noticeably bigger training set of pictures showing the same person, something like 5 or 10 different images at the very least.

1) Change threshold value while initializing LBPHrecognizer to -> LBPHFaceRecognizer(1, 8, 8, 8, 100)
2) train each face with atleast 2-3 pictures since these recognizers mainly work on comparison
3) Set accuracy threshold while recognizing. Do something like this:
//predicting result
// LoadData is a static class that contains trained recognizer
// _result is the gray frame image captured by the camera
LBPHFaceRecognizer.PredictionResult ER = LoadData.recog.Predict(_result);
int temp_result = ER.Label;
imageBox1.SizeMode = PictureBoxSizeMode.StretchImage;
imageBox1.Image = _result.Mat;
//Displaying predicted result on screen
// LBPH returns -1 if face is recognized
if ((temp_result != -1) && (ER.Distance < 55)){
//I get best accuracy at 55, you should try different values to determine best results
// Do something with detected image
}

Related

ML Kit Barcode scanning: Invalid image data size

I would like to detect a barcode within a captured image. I capture an image using android's camera2. Following this, the image's metadata is retrieved and the image is saved to the device. The metadata is all passed along to the next activity, which is where the application attempts to detect a barcode.
This next activity creates a byte[] from the File saved previously. Next, the relevant FirebaseVision objects are created using the data passed with the intent. Finally, the application attempts to call the detectInImage() method, where an error is thrown:
"java.lang.IllegalArgumentException: Invalid image data size."
I suspect this is from the captured image being too large, however I cannot seem to figure out how to capture a smaller image, and I also cannot find anything in the reference documentation regarding the maximum size allowed. Information regarding this error and how to solve it would be very much appreciated. Below is what I believe to be the relevant code.
private final ImageReader.OnImageAvailableListener onImageAvailableListener
= new ImageReader.OnImageAvailableListener() {
#Override
public void onImageAvailable(ImageReader imageReader) {
try{
// Semaphore ensures date is recorded before starting next activity
storeData.acquire();
Image resultImg = imageReader.acquireNextImage(); // Image from camera
imgWidth = resultImg.getWidth();
imgHeight = resultImg.getHeight();
ByteBuffer buffer = resultImg.getPlanes()[0].getBuffer();
data = new byte[buffer.remaining()]; // Byte array with the images data
buffer.get(data);
String timeStamp = new SimpleDateFormat("yyyyMMdd_HHmmss").format(new Date());
// Note: mediaFile directs to Pictures/"ThisProject" folder
File media = new File(mediaFile.getPath() +
File.separator + "IMG_" + timeStamp + ".jpg");
// Saving the image
FileOutputStream fos = null;
try {
fos = new FileOutputStream(media);
fos.write(data);
uri = Uri.fromFile(media);
} catch (IOException e) {
Log.e(TAG, e.getMessage());
} finally {
if (fos != null) {
try {
fos.close();
} catch (IOException e) {
Log.e(TAG, e.getMessage());
}
}
}
resultImg.close();
} catch (InterruptedException e) {
Log.e(TAG, e.getMessage());
}
storeData.release();
}
};
This essentially retrieves the image height & width, then writes it to a file.
The data sent to the next activity consists of the: Image width, Image height, Image rotation, and the Uri directing to the file.
Using this, I try to detect a barcode using Firebase ML Kit:
// uri is the uri referencing the saved image
File f = new File(uri.getPath());
data = new byte[(int) f.length()];
try{
BufferedInputStream bis = new BufferedInputStream(new FileInputStream(f));
DataInputStream dis = new DataInputStream(bis);
dis.readFully(data);
} catch (IOException e) {
Log.e(TAG, e.getMessage());
}
FirebaseVisionBarcodeDetectorOptions options = new FirebaseVisionBarcodeDetectorOptions.Builder().setBarcodeFormats(
FirebaseVisionBarcode.FORMAT_QR_CODE,
FirebaseVisionBarcode.FORMAT_DATA_MATRIX
).build();
FirebaseVisionBarcodeDetector detector = FirebaseVision.getInstance().getVisionBarcodeDetector(options);
FirebaseVisionImage image;
int rotationResult;
switch (imgRotation) {
case 0: {
rotationResult = FirebaseVisionImageMetadata.ROTATION_0;
break;
}
case 90: {
rotationResult = FirebaseVisionImageMetadata.ROTATION_90;
break;
}
case 180: {
rotationResult = FirebaseVisionImageMetadata.ROTATION_180;
break;
}
case 270: {
rotationResult = FirebaseVisionImageMetadata.ROTATION_270;
break;
}
default: {
rotationResult = FirebaseVisionImageMetadata.ROTATION_0;
break;
}
}
FirebaseVisionImageMetadata metadata = new FirebaseVisionImageMetadata.Builder()
.setWidth(imgWidth)
.setHeight(imgHeight)
.setFormat(FirebaseVisionImageMetadata.IMAGE_FORMAT_NV21)
.setRotation(rotationResult)
.build();
image = FirebaseVisionImage.fromByteArray(data, metadata);
Task<List<FirebaseVisionBarcode>> result = detector.detectInImage(image)

A few things.
Your image format should not be NV21 if you use camera2. See here for all camera2 supported image formats:
https://developer.android.com/reference/android/media/Image#getFormat()
Your byte[] is not NV21 and you specified IMAGE_FORMAT_NV21 and led to the error
Most intuitive integration with camera2 is like below:
Specify JPEG format when you instantiate the ImageReader.
onImageAvailable will give you back an android.media.Image and you can directly use FirebaseVisionImage.fromMediaImage(...) to create a FirebaseVisionImage. (You can find how to compute the rotation info from official doc here)
If you must do two Activities, then you need to work around the fact that android.media.Image is not Parcelable. I'd suggest you convert it to Bitmap first which is Parcelable and you can directly set it as an Intent extra (Up to you. Just thinking from end user's perspective, it's non-common to see the barcode being saved to my image gallery.
So you might want to consider skipping the step of saving it to file). Later, in your 2nd Activity, you can use FirebaseVisionImage.fromBitmap(...).

How to run many haarcascade xml files in the same Java program using OpenCV?

I'm new to OpenCV and I want to run a Java program for face detection using OpenCV.
Only including one haarcascade xml file doesn't give me expected results. So I need to run two,three haarcascade files in the same program. (specially "haarcascade_frontalface_alt.xml" and "haarcascade_profileface.xml" together).
I tried to do it with the following code but it didn't work. Please mention how to proceed.
Thank you.
public class LiveFeed extends WatchDogBaseFrame {
private DaemonThread myThread = null;
int count = 0;
VideoCapture webSource = null;
Mat frame = new Mat();
MatOfByte mem = new MatOfByte();
CascadeClassifier faceDetector1 = new CascadeClassifier("/home/erandi/NetBeansProjects/WatchDog/src/ueg/watchdog/view/haarcascade_frontalface_alt.xml");
CascadeClassifier faceDetector2 = new CascadeClassifier("/home/erandi/NetBeansProjects/WatchDog/src/ueg/watchdog/view/haarcascade_eye.xml");
MatOfRect faceDetections = new MatOfRect();
public LiveFeed(WatchDogBaseFrame parentFrame) {
super(parentFrame);
initComponents();
super.setCloseOperation();
jButtonExit.setVisible(false);
}
//class of demon thread
public class DaemonThread implements Runnable {
protected volatile boolean runnable = false;
#Override
public void run() {
synchronized (this) {
while (runnable) {
if (webSource.grab()) {
try {
webSource.retrieve(frame);
Graphics graphics = jPanelVideo.getGraphics();
faceDetector1.detectMultiScale(frame, faceDetections);
faceDetector2.detectMultiScale(frame, faceDetections);
for (Rect rect : faceDetections.toArray()) {
// System.out.println("ttt");
Imgproc.rectangle(frame, new Point(rect.x, rect.y), new Point(rect.x + rect.width, rect.y + rect.height),
new Scalar(0, 255, 0));
}
Imgcodecs.imencode(".bmp", frame, mem);
Image im = ImageIO.read(new ByteArrayInputStream(mem.toArray()));
BufferedImage buff = (BufferedImage) im;
if (graphics.drawImage(buff, 0, 0, getWidth(), getHeight() - 150, 0, 0, buff.getWidth(), buff.getHeight(), null)) {
if (runnable == false) {
System.out.println("Paused ..... ");
this.wait();
}
}
} catch (Exception ex) {
System.out.println("Error");
}
}
}
}
}
}

Object Detection using Haar feature-based cascade classifiers is an effective object detection method proposed by Paul Viola and Michael Jones in their paper, "Rapid Object Detection using a Boosted Cascade of Simple Features" in 2001. It is a machine learning based approach where a cascade function is trained from a lot of positive and negative images. It is then used to detect objects in other images.
OpenCV already contains many pre-trained classifiers for face, eyes, smile etc. Those XML files are stored in opencv/data/haarcascades/ folder.
You can't run many cascade files simultaneously and increase performance. But you can use them one by one as a loop and pass input images through that loop.
Example code is given in this link: OpenCv sample code

Android: Really bad image quality when saving bitmap to sdcard

I am making an OCR app for Android, that will take a screenshot of some text, recognise it and search a key word on Google. If you haven't already realized, I'm trying to make a "Google Now on Tap" clone.
To make the OCR work better, I am first rotating the image, then filtering the image. First by getting rid of the status bar and the navigation bar, then converting it to grayscale, then sharpening.
But the image quality after filtering the image is extremely pixelated, and this greatly effects OCR accuracy.
Here are the images, before and after (just of an IFTTT email I got)
As you can see, the before image is much higher quality than the filtered and rotated one.
Here is my code for rotating, filtering and saving the image:
Firstly taking screenshot, then saving the screenshot.
public void getScreenshot()
{
try
{
Process sh = Runtime.getRuntime().exec("su", null, null);
OutputStream os = sh.getOutputStream();
os.write(("/system/bin/screencap -p " + _path).getBytes("ASCII"));
os.flush();
os.close();
sh.waitFor();
onPhotoTaken();
Toast.makeText(this, "Screenshot taken", Toast.LENGTH_SHORT).show();
}
catch (IOException e)
{
System.out.println("IOException");
}
catch (InterruptedException e)
{
System.out.println("InterruptedException");
}
}
Then, rotate the image:
protected void onPhotoTaken() {
_taken = true;
BitmapFactory.Options options = new BitmapFactory.Options();
options.inSampleSize = 4;
Bitmap bitmap = BitmapFactory.decodeFile(_path, options);
try {
ExifInterface exif = new ExifInterface(_path);
int exifOrientation = exif.getAttributeInt(
ExifInterface.TAG_ORIENTATION,
ExifInterface.ORIENTATION_NORMAL);
Log.v(TAG, "Orient: " + exifOrientation);
int rotate = 0;
switch (exifOrientation) {
case ExifInterface.ORIENTATION_ROTATE_90:
rotate = 90;
break;
case ExifInterface.ORIENTATION_ROTATE_180:
rotate = 180;
break;
case ExifInterface.ORIENTATION_ROTATE_270:
rotate = 270;
break;
}
Log.v(TAG, "Rotation: " + rotate);
if (rotate != 0) {
// Getting width & height of the given image.
int w = bitmap.getWidth();
int h = bitmap.getHeight();
// Setting pre rotate
Matrix mtx = new Matrix();
mtx.preRotate(rotate);
// Rotating Bitmap
bitmap = Bitmap.createBitmap(bitmap, 0, 0, w, h, mtx, false);
}
// Convert to ARGB_8888, required by tess
bitmap = bitmap.copy(Bitmap.Config.ARGB_8888, true);
} catch (IOException e) {
Log.e(TAG, "Couldn't correct orientation: " + e.toString());
}
// _image.setImageBitmap( bitmap );
setImageFilters(bitmap);
}
Then, filter the image:
public void setImageFilters(Bitmap bmpOriginal)
{
//Start by cropping image
Bitmap croppedBitmap = ThumbnailUtils.extractThumbnail(bmpOriginal, 1080, 1420);
//Then convert to grayscale
int width, height;
height = 1420;
width = 1080;
Bitmap bmpGrayscale = Bitmap.createBitmap(width, height, Bitmap.Config.ARGB_8888);
Canvas c = new Canvas(bmpGrayscale);
Paint paint = new Paint();
ColorMatrix cm = new ColorMatrix();
cm.setSaturation(0);
ColorMatrixColorFilter f = new ColorMatrixColorFilter(cm);
paint.setColorFilter(f);
c.drawBitmap(croppedBitmap, 0, 0, paint);
//Finally, sharpen the image
double weight = 11;
double[][] sharpConfig = new double[][]
{
{ 0 , -2 , 0 },
{ -2, weight, -2 },
{ 0 , -2 , 0 }
};
ConvolutionMatrix convMatrix = new ConvolutionMatrix(3);
convMatrix.applyConfig(sharpConfig);
convMatrix.Factor = weight - 8;
Bitmap filteredBitmap = ConvolutionMatrix.computeConvolution3x3(bmpGrayscale, convMatrix);
//Start Optical Character Recognition
startOCR(filteredBitmap);
//Save filtered image
saveFiltered(filteredBitmap);
}
Then, saving the filtered and rotated image:
public void saveFiltered(Bitmap filteredBmp) {
try {
ByteArrayOutputStream bytes = new ByteArrayOutputStream();
filteredBmp.compress(Bitmap.CompressFormat.JPEG, 20, bytes);
//You can create a new file name "test.jpg" in sdcard folder.
File f = new File("/sdcard/SimpleAndroidOCR/ocrgray.jpg");
f.createNewFile();
//Write the bytes in file
FileOutputStream fo = new FileOutputStream(f);
fo.write(bytes.toByteArray());
//Remember close the FileOutput
fo.close();
} catch (Exception e) {
e.printStackTrace();
}
}
Thanks heaps for anyone taking the time to help.

It was actually in my onPhotoTaken method. After taking and saving the screenshot in get screenshot, I am reading the file from the location it was saved to, then filtering it. I changed this line in the onPhotoTaken method:
options.inSampleSize = 4 to options.inSampleSize = 1

It does look like the jpeg compression is messing the image up. Try using a format better suited for images with sharp edges, such as of text. I would recommend png or even gif. You could also store the uncompressed BMP.
Jpeg compression works by exploiting the fact that in most pictures (nature, people, objects), sharp edges are not that visible to the human eye. This makes it really bad for storing sharp edged content, such as text.
Also, your image filter is effectively removing the anti-aliasing of the image, which further decreases the perceived image quality. That might be what you want to do, however, since it might make OCR easier.
I also missed the sampling size due to the images you uploaded being the same size here on the site. From the Android documentation:
If set to a value > 1, requests the decoder to subsample the original
image, returning a smaller image to save memory. The sample size is
the number of pixels in either dimension that correspond to a single
pixel in the decoded bitmap. For example, inSampleSize == 4 returns an
image that is 1/4 the width/height of the original, and 1/16 the
number of pixels. Any value <= 1 is treated the same as 1. Note: the
decoder uses a final value based on powers of 2, any other value will
be rounded down to the nearest power of 2.
Setting options.inSampleSize = 4; to 1 instead will increase the quality.

Takes 25+ Seconds To Save Bitmap From Camera To Phone

I am making a camera app.
There has been a host of issues getting orientation right because some phones don't write EXIF orientation data. Because of this, I get the bitmap, save it (since I don't think I should read EXIF data from the byte[]), then rotate the bitmap, then save over the original file.
It works, and the the orientation issue is fixed. The problem is its taking me 25 seconds or longer on some of the top of the line phones. Can you advise why my code is so slow or advise me on how I can find the problem?
Note: If I only save the image once (i.e. with the wrong orientation) it only takes a couple seconds.
Here is my image capture callback:
private Camera.PictureCallback pictureCallback = new Camera.PictureCallback()
{
#Override
public void onPictureTaken(byte[] data, Camera camera)
{
File pictureFile = getOutputMediaFile(MEDIA_TYPE_IMAGE);
if (pictureFile == null){
Log.d("EditPhotoFragment", "Error creating media file, check storage permissions");
return;
}
try
{
FileOutputStream fos = new FileOutputStream(pictureFile);
fos.write(data);
fos.flush();
fos.close();
orientPicture(pictureFile);
//TODO async
galleryAddPic(pictureFile);
} catch (FileNotFoundException e) {
Log.d("EditPhotoFragment", "File not found: " + e.getMessage());
} catch (IOException e) {
Log.d("EditPhotoFragment", "Error accessing file: " + e.getMessage());
}
}
};
And here is where I orient and resave the image:
private Bitmap orientPicture(File pictureFile)
{
Bitmap bitmap = BitmapFactory.decodeFile(pictureFile.getAbsolutePath());
Uri uri = Uri.parse(pictureFile.toString());
ExifInterface exif = null;
try{
exif = new ExifInterface(uri.getPath());
}catch (Exception e)
{
e.printStackTrace();
}
int exifOrientation = exif.getAttributeInt(ExifInterface.TAG_ORIENTATION, ExifInterface.ORIENTATION_NORMAL);
Matrix matrix = new Matrix();
int rotationInDegrees = 0;
//If the orientation tag is missing need to manually rotate it by the 'default' camera
//orientation and if its front facing need to do 360 - the camera rotation value
if(exifOrientation == ExifInterface.ORIENTATION_UNDEFINED)//All phones in this bucket can go fuck themselves
{
Camera.CameraInfo info = new Camera.CameraInfo();
if(_cameraPreview.isBackFacing())
{
Camera.getCameraInfo(Camera.CameraInfo.CAMERA_FACING_BACK, info);
}else
{
Camera.getCameraInfo(Camera.CameraInfo.CAMERA_FACING_FRONT, info);
}
rotationInDegrees = info.orientation; //set it to the default camera orientation
}else
{
rotationInDegrees = exifToDegrees(exifOrientation);
if(!_cameraPreview.isBackFacing())//handle mirroring of front camera
{
Camera.CameraInfo info = new Camera.CameraInfo();
Camera.getCameraInfo(Camera.CameraInfo.CAMERA_FACING_BACK, info);
rotationInDegrees = 360 - rotationInDegrees; //For the front camera doing 360 - gets the right orientation
}
}
matrix.preRotate(rotationInDegrees);
if(!_cameraPreview.isBackFacing())//mirror it
{
matrix.preScale(1,-1);
}
Bitmap adjustedBitmap = Bitmap.createBitmap(bitmap, 0, 0, bitmap.getWidth(), bitmap.getHeight(), matrix, true);
//This saves the proper image over top if it
try
{
FileOutputStream fos = new FileOutputStream(pictureFile);
ByteArrayOutputStream stream = new ByteArrayOutputStream();
adjustedBitmap.compress(Bitmap.CompressFormat.PNG, 100, stream);
byte[] byteArray = stream.toByteArray();
fos.write(byteArray);
fos.flush();
fos.close();
}catch(Exception e)
{
e.printStackTrace();
}
return adjustedBitmap;
}
SOLUTION
As advised I SHOULD read the exif data which I was able to do without needing an external library thanks to this:
https://stackoverflow.com/a/13581324/3324388

Can you advise why my code is so slow
Perhaps among other reasons, you are writing the image to a file, re-reading the same image from the file, doing the transform, then writing the image back out to a file. That is going to take a lot of time.
Note: If I only save the image once (i.e. with the wrong orientation) it only takes a couple seconds.
That's because you are doing a lot less work, including only ~33% of the disk I/O, and disk I/O is going to be slow.
since I don't think I should read EXIF data from the byte[]
My apologies if you were viciously attacked by a byte[] as a young child or something. However, if you want better performance, you are going to have to read the EXIF data out of the existing in-memory copy of the image.

Decode with Mediacodec and working with OpenCV

I'm working on app for android that using OpenCV.
I have mp4 video file, I need to read 300 frames of 1920x1080 from it and do some image processing manipulation on them.
After a large search I found at the end only this examples.
My problem is that I need a simple thing to do, I just want to read the frames and save them in the device memory or just convert them to OpenCV Matrix.
This is my try(explain at the end):
public void run() {
extractor = new MediaExtractor();
extractor.setDataSource(SAMPLE);
for (int i = 0; i < extractor.getTrackCount(); i++) {
MediaFormat format = extractor.getTrackFormat(i);
String mime = format.getString(MediaFormat.KEY_MIME);
if (mime.startsWith("video/")) {
extractor.selectTrack(i);
decoder = MediaCodec.createDecoderByType(mime);
decoder.configure(format, surface, null, 0);
break;
}
}
if (decoder == null) {
Log.e("DecodeActivity", "Can't find video info!");
return;
}
decoder.start();
ByteBuffer[] inputBuffers = decoder.getInputBuffers();
ByteBuffer[] outputBuffers = decoder.getOutputBuffers();
BufferInfo info = new BufferInfo();
boolean isEOS = false;
long startMs = System.currentTimeMillis();
while (!Thread.interrupted()) {
if (!isEOS) {
int inIndex = decoder.dequeueInputBuffer(10000);
if (inIndex >= 0) {
ByteBuffer buffer = inputBuffers[inIndex];
int sampleSize = extractor.readSampleData(buffer, 0);
if (sampleSize < 0) {
Log.d("DecodeActivity", "InputBuffer BUFFER_FLAG_END_OF_STREAM");
decoder.queueInputBuffer(inIndex, 0, 0, 0, MediaCodec.BUFFER_FLAG_END_OF_STREAM);
isEOS = true;
} else {
decoder.queueInputBuffer(inIndex, 0, sampleSize, extractor.getSampleTime(), 0);
extractor.advance();
}
}
}
int outIndex = decoder.dequeueOutputBuffer(info, 10000);
switch (outIndex) {
case MediaCodec.INFO_OUTPUT_BUFFERS_CHANGED:
Log.d("DecodeActivity", "INFO_OUTPUT_BUFFERS_CHANGED");
outputBuffers = decoder.getOutputBuffers();
break;
case MediaCodec.INFO_OUTPUT_FORMAT_CHANGED:
Log.d("DecodeActivity", "New format " + decoder.getOutputFormat());
break;
case MediaCodec.INFO_TRY_AGAIN_LATER:
Log.d("DecodeActivity", "dequeueOutputBuffer timed out!");
break;
default:
ByteBuffer buffer = outputBuffers[outIndex];
Log.v("DecodeActivity", "We can't use this buffer but render it due to the API limit, " + buffer);
byte[] b = new byte[buffer.remaining()];
// We use a very simple clock to keep the video FPS, or the video
// playback will be too fast
while (info.presentationTimeUs / 1000 > System.currentTimeMillis() - startMs) {
try {
sleep(10);
} catch (InterruptedException e) {
e.printStackTrace();
break;
}
}
decoder.releaseOutputBuffer(outIndex, true);
break;
}
// All decoded frames have been rendered, we can stop playing now
if ((info.flags & MediaCodec.BUFFER_FLAG_END_OF_STREAM) != 0) {
Log.d("DecodeActivity", "OutputBuffer BUFFER_FLAG_END_OF_STREAM");
break;
}
}
decoder.stop();
decoder.release();
extractor.release();
}
In this example I read the frames and show them as Surface.
What I need to change in order to save it as Bitmap/Matrix or save it in the device?
Thanks

I see 2 paths for your code:
For Android 4.3 and up, you can use examples from Grafika as fadden suggested. The MediaCodec decoder should use a surface for configure method, and releaseOutputBuffer with boolean render set to "true" so that rendering takes place to a surface. Then, you can perform manipulations using shaders while rendering to a surface that will be used by a MediaCodec encoder to encode it back as video. This solution is fast on most devices with some exceptions, but it is new and bugs still appear in device hardware.
If you can accept a slower solution, OpenCV has a nice port/integration on Android, with code samples and all, even encoding back to h264 using ffmpeg. For decoding you can still use MediaCodec (if you render the results to a surface, you will need to use glReadPixels, which is slow, to get the data back to cpu). This works also for previous versions of Android.
Either way, you need time, patience and energy, as it will not go without some struggle.

We Keep Coding

Java is a programming language and computing platform first released by Sun Microsystems in 1995.