I am writing an FLV parser in Java and have come up against an issue. The program successfully parses and groups together tags into packets and correctly identifies and assigns a byte array for each tag's body based upon the BodyLength flag in the header. However in my test files it successfully completes this but stops before the last 4 bytes.
The byte sequence left out in the first file is :
00 00 14 C3
And in the second:
00 00 01 46
Clearly it is an issue with the final 4 bytes of both files however I cannot spot the error in my logic. I suspect it might be:
while (in.available() != 0)
However I also doubt this is the case as the program is successfully entering the loop for the final tag however it is just stopping 4 bytes short. Any help is greatly appreciated. (I know proper exception handling is as yet not taking place)
Parser.java
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.lang.reflect.Array;
import java.net.URI;
import java.nio.ByteBuffer;
import java.nio.ByteOrder;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.InputMismatchException;
/**
*
* #author A
*
* Parser class for FLV files
*/
public class Parser {
private static final int HEAD_SIZE = 9;
private static final int TAG_HEAD_SIZE = 15;
private static final byte[] FLVHEAD = { 0x46, 0x4C, 0x56 };
private static final byte AUDIO = 0x08;
private static final byte VIDEO = 0x09;
private static final byte DATA = 0x12;
private static final int TYPE_INDEX = 4;
private File file;
private FileInputStream in;
private ArrayList<Packet> packets;
private byte[] header = new byte[HEAD_SIZE];
Parser() throws FileNotFoundException {
throw new FileNotFoundException();
}
Parser(URI uri) {
file = new File(uri);
init();
}
Parser(File file) {
this.file = file;
init();
}
private void init() {
packets = new ArrayList<Packet>();
}
public void parse() {
boolean test = false;
try {
test = parseHeader();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
if (test) {
System.out.println("Header Verified");
// Add header packet to beginning of list & then null packet
Packet p = new Packet(PTYPE.P_HEAD);
p.setSize(header.length);
p.setByteArr(header);
packets.add(p);
p = null;
try {
parseTags();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
} else {
try {
in.close();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
// throw FileNotFoundException because incorrect file
}
}
private boolean parseHeader() throws FileNotFoundException, IOException {
if (file == null)
throw new FileNotFoundException();
in = new FileInputStream(file);
in.read(header, 0, 9);
return Arrays.equals(FLVHEAD, Arrays.copyOf(header, FLVHEAD.length));
}
private void parseTags() throws IOException {
if (file == null)
throw new FileNotFoundException();
byte[] tagHeader = new byte[TAG_HEAD_SIZE];
Arrays.fill(tagHeader, (byte) 0x00);
byte[] body;
byte[] buf;
PTYPE pt;
int OFFSET = 0;
while (in.available() != 0) {
// Read first 5 - bytes, previous tag size + tag type
in.read(tagHeader, 0, 5);
if (tagHeader[TYPE_INDEX] == AUDIO) {
pt = PTYPE.P_AUD;
} else if (tagHeader[TYPE_INDEX] == VIDEO) {
pt = PTYPE.P_VID;
} else if (tagHeader[TYPE_INDEX] == DATA) {
pt = PTYPE.P_DAT;
} else {
// Header should've been dealt with - if previous data types not
// found then throw exception
System.out.println("Unexpected header format: ");
System.out.print(String.format("%02x\n", tagHeader[TYPE_INDEX]));
System.out.println("Last Tag");
packets.get(packets.size()-1).diag();
System.out.println("Number of tags found: " + packets.size());
throw new InputMismatchException();
}
OFFSET = TYPE_INDEX;
// Read body size - 3 bytes
in.read(tagHeader, OFFSET + 1, 3);
// Body size buffer array - padding for 1 0x00 bytes
buf = new byte[4];
Arrays.fill(buf, (byte) 0x00);
// Fill size bytes
buf[1] = tagHeader[++OFFSET];
buf[2] = tagHeader[++OFFSET];
buf[3] = tagHeader[++OFFSET];
// Calculate body size
int bSize = ByteBuffer.wrap(buf).order(ByteOrder.BIG_ENDIAN)
.getInt();
// Initialise Array
body = new byte[bSize];
// Timestamp
in.read(tagHeader, ++OFFSET, 3);
Arrays.fill(buf, (byte) 0x00);
// Fill size bytes
buf[1] = tagHeader[OFFSET++];
buf[2] = tagHeader[OFFSET++];
buf[3] = tagHeader[OFFSET++];
int milliseconds = ByteBuffer.wrap(buf).order(ByteOrder.BIG_ENDIAN)
.getInt();
// Read padding
in.read(tagHeader, OFFSET, 4);
// Read body
in.read(body, 0, bSize);
// Diagnostics
//printBytes(body);
Packet p = new Packet(pt);
p.setSize(tagHeader.length + body.length);
p.setByteArr(concat(tagHeader, body));
p.setMilli(milliseconds);
packets.add(p);
p = null;
// Zero out for next iteration
body = null;
Arrays.fill(buf, (byte)0x00);
Arrays.fill(tagHeader, (byte)0x00);
milliseconds = 0;
bSize = 0;
OFFSET = 0;
}
in.close();
}
private byte[] concat(byte[] tagHeader, byte[] body) {
int aLen = tagHeader.length;
int bLen = body.length;
byte[] C = (byte[]) Array.newInstance(tagHeader.getClass()
.getComponentType(), aLen + bLen);
System.arraycopy(tagHeader, 0, C, 0, aLen);
System.arraycopy(body, 0, C, aLen, bLen);
return C;
}
private void printBytes(byte[] b) {
System.out.println("\n--------------------");
for (int i = 0; i < b.length; i++) {
System.out.print(String.format("%02x ", b[i]));
if (((i % 8) == 0 ) && i != 0)
System.out.println();
}
}
}
Packet.java
public class Packet {
private PTYPE type = null;
byte[] buf;
int milliseconds;
Packet(PTYPE t) {
this.setType(t);
}
public void setSize(int s) {
buf = new byte[s];
}
public PTYPE getType() {
return type;
}
public void setType(PTYPE type) {
if (this.type == null)
this.type = type;
}
public void setByteArr(byte[] b) {
this.buf = b;
}
public void setMilli(int milliseconds) {
this.milliseconds = milliseconds;
}
public void diag(){
System.out.println("|-- Tag Type: " + type);
System.out.println("|-- Milliseconds: " + milliseconds);
System.out.println("|-- Size: " + buf.length);
System.out.println("|-- Bytes: ");
for(int i = 0; i < buf.length; i++){
System.out.print(String.format("%02x ", buf[i]));
if (((i % 8) == 0 ) && i != 0)
System.out.println();
}
System.out.println();
}
}
jFLV.java
import java.net.URISyntaxException;
public class jFLV {
/**
* #param args
*/
public static void main(String[] args) {
// TODO Auto-generated method stub
Parser p = null;
try {
p = new Parser(jFLV.class.getResource("sample.flv").toURI());
} catch (URISyntaxException e1) {
// TODO Auto-generated catch block
e1.printStackTrace();
}
p.parse();
}
}
PTYPE.java
public enum PTYPE {
P_HEAD,P_VID,P_AUD,P_DAT
};
Both your use of available() and your call to read are broken. Admittedly I would have somewhat expected this to be okay for a FileInputStream (until you reach the end of the stream, at which point ignoring the return value for read could still be disastrous) but I personally assume that streams can always return partial data.
available() only tells you whether there's any data available right now. It's very rarely useful - just ignore it. If you want to read until the end of the stream, you should usually keep calling read until it returns -1. It's slightly tricky to combine that with "I'm trying to read the next block", admittedly. (It would be nice if InputStream had a peek() method, but it doesn't. You can wrap it in a BufferedInputStream and use mark/reset to test that at the start of each loop... ugly, but it should work.)
Next, you're ignoring the result of InputStream.read (in multiple places). You should always use the result of this, rather than assuming it has read the amount of data you've asked for. You might want a couple of helper methods, e.g.
static byte[] readExactly(InputStream input, int size) throws IOException {
byte[] data = new byte[size];
readExactly(input, data);
return data;
}
static void readExactly(InputStream input, byte[] data) throws IOException {
int index = 0;
while (index < data.length) {
int bytesRead = input.read(data, index, data.length - index);
if (bytesRead < 0) {
throw new EOFException("Expected more data");
}
}
}
You should use one of the read methods instead of available, as available() "Returns an estimate of the number of bytes that can be read (or skipped over) from this input stream without blocking by the next invocation of a method for this input stream."
It is not designed to check how long you can read.
Related
I have a StringBuilder (~1GB size) which converts to an almost 1GB String. Then it is sent to AWSS3Client to be put as a file on s3. Now to write the String to s3, I need to further convert the string to a ByteArrayInputStream, which takes another 1GB.
InputStream dataInputStream = new ByteArrayInputStream(data.getBytes(StandardCharsets.UTF_8));
amazonS3Client.putObject(bucket, key, dataInputStream, new ObjectMetadata());
Now I end up with a JVM heap of 3GB. Is there a way to directly send the 1GB StringBuilder directly to S3 with little over 1GB heap?
The only way I could think of is to convert StringBuilder to a File instance (using FileWriter) --> S3 API which takes File as input. But this approach requires a 2-IOPS to the SSD -- penalty that I want to avoid.
Any better way to handle the problem is welcome.
[edit 1]
(as per comments) The builder size changed from 5GB to 1GB. I wanted to convey that the builderis a huge one (and hence used 5GB previously, which was my mistake)
The string-oriented classes are frustratingly locked down with final in many cases, which means that oftentimes buffers that already exist in memory have to be duplicated. This is probably to do with security but is nevertheless wasteful and frustrating. You should be able to do something with the following if you have a StringBuilder. Obviously you'll need a much bigger buffer than in the illustrative main method.
import java.io.ByteArrayInputStream;
import java.io.FilterInputStream;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.io.FileOutputStream;
import java.nio.ByteBuffer;
import java.nio.CharBuffer;
import java.nio.charset.CharsetEncoder;
import java.nio.charset.Charset;
import java.util.Objects;
/**
*
* #author CEHJ
* #version 1.0
*/
public class CharSequenceInputStream extends InputStream {
private CharSequence charSequence;
private int position;
private CharsetEncoder enc;
private Charset outputEncoding;
private CharBuffer cb;
private ByteBuffer bb;
public CharSequenceInputStream(CharSequence charSequence) {
this(charSequence, Charset.forName(System.getProperty("native.encoding", Charset.defaultCharset().name())));
}
public CharSequenceInputStream(CharSequence charSequence, Charset outputEncoding) {
this.charSequence = charSequence;
this.outputEncoding = outputEncoding;
position = 0;
enc = outputEncoding.newEncoder();
cb = CharBuffer.allocate(1);
bb = ByteBuffer.allocate(8);
}
// For testing only
public static void main(String[] args) throws IOException {
if (args.length < 2) {
System.err.printf("Usage: java CharSequenceInputStream <String to be stored> <Output file> [Char encoding for output]");
System.exit(1);
}
String content = args[0];
String outputPath = args[1];
StringBuilder sb = new StringBuilder(content);
CharSequenceInputStream in = null;
if (args.length > 2) {
in = new CharSequenceInputStream(sb, Charset.forName(args[2]));
} else {
in = new CharSequenceInputStream(sb);
}
try (OutputStream out = new FileOutputStream(outputPath)) {
int bytesRead = -1;
final int BUF_SIZE = 8;
byte[] buf = new byte[BUF_SIZE];
while ((bytesRead = in.read(buf, 0, BUF_SIZE)) > -1) {
out.write(buf, 0, bytesRead);
}
}
}
#Override
public int read() throws IOException {
int result = -1;
if (position < charSequence.length()) {
if (bb.remaining() == bb.capacity() || bb.position() == bb.limit()) {
// At start - nothing yet has been decoded
// OR - we've read all bytes in the buffer and need to refill it
cb.clear();
bb.clear();
char currentChar = charSequence.charAt(position++);
cb.append(currentChar);
cb.flip();
enc.reset();
enc.encode(cb, bb, false);
enc.encode(cb, bb, true);
enc.flush(bb);
bb.flip();
}
}
if (bb.position() < bb.limit()) {
result = bb.get() & 0xFF;
}
return result;
}
#Override
public int read(byte[] b) throws IOException {
return read(b, 0, b.length);
}
#Override
public int read(byte b[], int off, int len) throws IOException {
Objects.checkFromIndexSize(off, len, b.length);
if (len == 0) {
return 0;
}
int c = read();
if (c == -1) {
return -1;
}
b[off] = (byte) c;
int i = 1;
try {
for (; i < len; i++) {
c = read();
if (c == -1) {
break;
}
b[off + i] = (byte) c;
}
} catch (IOException ee) {
}
return i;
}
#Override
public void close() {
// NO OP
}
}
I want to read fast line by line big csv files (approx ~ 1gb) in UTF-8. I have created a class for it, but it doesn't work properly. UTF-8 decodes Cyrillic symbol from 2 bytes. I use byte buffer to read it, for example, it has 10 bytes length. So if symbol composed from 10 and 11 bytes in the file it wouldn't be decoded normally :(
public class MyReader extends InputStream {
private FileChannel channel;
private ByteBuffer buffer = ByteBuffer.allocate(10);
private int buffSize = 0;
private int position = 0;
private boolean EOF = false;
private CharBuffer charBuffer;
private MyReader() {}
static MyReader getFromFile(final String path) throws IOException {
MyReader myReader = new MyReader();
myReader.channel = FileChannel.open(Path.of(path),
StandardOpenOption.READ);
myReader.initNewBuffer();
return myReader;
}
private void initNewBuffer() {
try {
buffSize = channel.read(buffer);
buffer.position(0);
charBuffer = Charset.forName("UTF-8").decode(buffer);
buffer.position(0);
} catch (IOException e) {
throw new RuntimeException("Error reading file: {}", e);
}
}
#Override
public int read() throws IOException {
if (EOF) {
return -1;
}
if (position < charBuffer.length()) {
return charBuffer.array()[position++];
} else {
initNewBuffer();
if (buffSize < 1) {
EOF = true;
} else {
position = 0;
}
return read();
}
}
public char[] readLine() throws IOException {
int readResult = 0;
int startPos = position;
while (readResult != -1) {
readResult = read();
}
return Arrays.copyOfRange(charBuffer.array(), startPos, position);
}
}
Bad solution, but it works)
private void initNewBuffer() {
try {
buffSize = channel.read(buffer);
buffer.position(0);
charBuffer = StandardCharsets.UTF_8.decode(buffer);
if (buffSize > 0) {
byte edgeByte = buffer.array()[buffSize - 1];
if (edgeByte == (byte) 0xd0 ||
edgeByte == (byte) 0xd1 ||
edgeByte == (byte) 0xc2 ||
edgeByte == (byte) 0xd2 ||
edgeByte == (byte) 0xd3
) {
channel.position(channel.position() - 1);
charBuffer.limit(charBuffer.limit()-1);
}
}
buffer.position(0);
} catch (IOException e) {
throw new RuntimeException("Error reading file: {}", e);
}
}
First: the gain is questionable.
The Files class has many nice and quite production fast methods.
Bytes with high bit 1 (< 0) are part of a UTF-8 multibyte sequence.
With high bits 10 they are continuation bytes.
Sequences might be upto 6 bytes nowadays (I believe).
So the next buffer starts with some continuation bytes, they belong to the previous buffer.
The programming logic I gladly leave to you.
After running a Junit test for String serialization, it is failed and gave me the following results:
Expected: "netmodel"
Actual: "l"
The serialize method as follows
public static void serializeString(String objectToSerialize, OutputStream outputStream) {
byte[] bytesArr = objectToSerialize.getBytes();
serializeInt(bytesArr.length, outputStream);
try {
outputStream.write(bytesArr);
} catch (IOException e) {
e.printStackTrace();
}
}
And my deserialize method as follows
public static String deserializeString(InputStream inputStream) {
String deserializeObject = "";
char asciiToChar;
int stringByteArrayLength = deserializeInt(inputStream);
byte[] databytesArr = new byte[stringByteArrayLength];
try {
inputStream.read(databytesArr, 0, stringByteArrayLength);
}
catch (IOException e) {
e.printStackTrace();
}
for (int i = 0; i < databytesArr.length; i++) {
asciiToChar = (char) databytesArr[i];
deserializeObject = "" + Character.toString(asciiToChar);
}
return deserializeObject;
}
Finally, I wrote a unit test as follows
public class StringSerializerTest {
private InputStream iStream;
private ByteArrayOutputStream oStream;
#Before
public void init() {
oStream = new ByteArrayOutputStream();
}
String serialzeAndDeserializeObject(String stringValue) {
OutputStreamUtil.serializeString(stringValue, oStream);
iStream = new ByteArrayInputStream(oStream.toByteArray());
return InputStreamUtil.deserializeString(iStream);
}
#Test
public void equals_equal() {
String stringValue = "netmodel";
String deserializedStringValue = serialzeAndDeserializeObject(stringValue);
assertThat(deserializedStringValue).isEqualTo(stringValue);
}
}
what was wrong? and how to fix it?
You are reassigning the entire value of deserializeObject during each iteration of
for (int i = 0; i < databytesArr.length; i++) {
asciiToChar = (char) databytesArr[i];
deserializeObject = "" + Character.toString(asciiToChar);
}
This results in only the last character (l in this case) being stored in deserializeObject. This loop should append the next character to the deserializeObject as in the following:
for (int i = 0; i < databytesArr.length; i++) {
asciiToChar = (char) databytesArr[i];
deserializeObject += Character.toString(asciiToChar);
}
The corrected deserialization logic would be:
public static String deserializeString(InputStream inputStream) {
String deserializeObject = "";
char asciiToChar;
int stringByteArrayLength = deserializeInt(inputStream);
byte[] databytesArr = new byte[stringByteArrayLength];
try {
inputStream.read(databytesArr, 0, stringByteArrayLength);
}
catch (IOException e) {
e.printStackTrace();
}
for (int i = 0; i < databytesArr.length; i++) {
asciiToChar = (char) databytesArr[i];
deserializeObject += Character.toString(asciiToChar);
}
return deserializeObject;
}
The error was already reported by Justin Albano.
However take also care of strings with non-ASCII: like special characters.
Something like the following. Also one should close at the end to ensure flushing in case of a buffered stream. And theoretically a read could yield only a non-blocking part of the array. DataOutputStream has nice methods, though you seem to roll your own serialisation.
public static void serializeString(String objectToSerialize, OutputStream outputStream)
throws IOException {
byte[] bytesArr = objectToSerialize.getBytes(StandardCharsets.UTF_8);
serializeInt(bytesArr.length, outputStream);
outputStream.write(bytesArr);
}
public static String deserializeString(InputStream inputStream)
throws IOException {
int stringByteArrayLength = deserializeInt(inputStream);
byte[] databytesArr = new byte[stringByteArrayLength];
readFully(inputStream, databytesArr);
return new String(databytesArr, StandardCharsets.UTF_8);
}
private static void readFully(InputStream inputStream, byte[] bytes) throws IOException {
int i = 0;
while (i < bytes.length) {
int nread = inputStream.read(bytes, i, bytes.length - i);
if (nread <= 0) {
throw new IOException("Premature EOF");
}
i += nread;
}
}
Mind that StandardCharsets is not in Android SDK, only standard Java.
Im currently creating a Huffman compression program.
But Im having some trouble with writing/reading the bits.
I want to be able to write specific bits to a file.
e.g first "0100" then "0101" should be written as a byte to a new file using fileOutputStream as "01000101" :69
Class BitFileWriter - writes bits to file by saving each byte in buffer and then writing when buffer is full (contains 8 bits).
In the main function of this class I have some tests to se if all bytes will be written to file.
but opening the text file it doesn't read "AB".
/**
* writes bits to file outPutStream.
*/
public class BitFileWriter {
private BufferedOutputStream out;
private int buffer; // 8-bit buffer of bits to write out
private int n; // number of bits remaining in buffer
private String filename;
public BitFileWriter(String filename){
this.filename = filename;
}
private void addBitToBuffer(boolean bit) throws IOException {
// add bit to buffer
this.buffer <<= 1;
if (bit) this.buffer |= 1;
n++;
//if buffer is full write a whole byte.
if(n == 8){
writeByte(this.buffer);
this.n = 0;
this.buffer = 0;
}
}
private void writeByte(int b) throws IOException {
this.out = new BufferedOutputStream(new
FileOutputStream(filename));
out.write(b);
}
public void flush() throws IOException {
this.out.flush();
}
public static void main(String[] args) throws IOException {
BitFileWriter bitFileWriter = new
BitFileWriter("./src/result.txt");
// byte: 01000001, A
bitFileWriter.addBitToBuffer(true);
bitFileWriter.addBitToBuffer(true);
bitFileWriter.addBitToBuffer(false);
bitFileWriter.addBitToBuffer(false);
bitFileWriter.addBitToBuffer(false);
bitFileWriter.addBitToBuffer(false);
bitFileWriter.addBitToBuffer(true);
bitFileWriter.addBitToBuffer(false);
//byte 01000011, B
bitFileWriter.addBitToBuffer(false);
bitFileWriter.addBitToBuffer(true);
bitFileWriter.addBitToBuffer(true);
bitFileWriter.addBitToBuffer(false);
bitFileWriter.addBitToBuffer(false);
bitFileWriter.addBitToBuffer(false);
bitFileWriter.addBitToBuffer(false);
bitFileWriter.addBitToBuffer(false);
bitFileWriter.flush();
}
}
Class BitFileReader - reads bits from file.
but reading all 16 bits that I wanted to write to result.txt doesn´t give me the bits I (think) have written.
/**
* Reads one bit at a time from a file.
*
*
*/
public class BitFileReader {
private BufferedInputStream in;
private int currentByte; // -1 if no more data
private int bitPos; // position in currentByte
/**
* Creates a BitFileReader by opening a connection to an actual file,
* the file named by the File object file in the file system.
*/
public BitFileReader(File file) throws IOException {
in = new BufferedInputStream(new FileInputStream(file));
currentByte = in.read();
bitPos = 7;
}
/** Returns true if this reader has another bit in its input. */
public boolean hasNextBit() {
return currentByte != -1 && in != null;
}
/** Reads a single bit. */
public int nextBit() throws IOException {
int res = (currentByte>>bitPos) & 1;
--bitPos;
if (bitPos < 0) {
currentByte = in.read(); // -1 if end of file has been reached (read returns -1 if end of file).
bitPos = 7;
}
return res ;
}
/** Closes this reader. */
public void close() throws IOException {
if (in != null) {
in.close();
}
}
//Test
public static void main(String[] args) throws IOException {
File temp;
BitFileReader reader;
reader = new BitFileReader(new File("./src/result.txt"));
System.out.print("first byte: ");
for(int i = 0; i <8; i++){
System.out.print(reader.nextBit());
}
System.out.print(". second byte: ");
for(int i = 0; i <8; i++){
System.out.print(reader.nextBit());
}
reader.close();
}
}
Output is: first byte: 01100000. second byte: 11111111
The first thing I would do, is to move the statement:
this.out = new BufferedOutputStream(new
FileOutputStream(filename));
from writeByte to the constructor
I have a byte array which is filled by a serial port event and code is shown below:
private InputStream input = null;
......
......
public void SerialEvent(SerialEvent se){
if(se.getEventType == SerialPortEvent.DATA_AVAILABLE){
int length = input.available();
if(length > 0){
byte[] array = new byte[length];
int numBytes = input.read(array);
String text = new String(array);
}
}
}
The variable text contains the below characters,
"\033[K", "\033[m", "\033[H2J", "\033[6;1H" ,"\033[?12l", "\033[?25h", "\033[5i", "\033[4i", "\033i" and similar types..
As of now, I use String.replace to remove all these characters from the string.
I have tried new String(array , 'CharSet'); //Tried with all CharSet options but I couldn't able to remove those.
Is there any way where I can remove those characters without using replace method?
I gave a unsatisfying answer, thanks to #OlegEstekhin for pointing that out.
As noone else answered yet, and a solution is not a two-liner, here it goes.
Make a wrapping InputStream that throws away escape sequences. I have used a PushbackInputStream, where a partial sequence skipped, may still be pushed back for reading first. Here a FilterInputStream would suffice.
public class EscapeRemovingInputStream extends PushbackInputStream {
public static void main(String[] args) {
String s = "\u001B[kHello \u001B[H12JWorld!";
byte[] buf = s.getBytes(StandardCharsets.ISO_8859_1);
ByteArrayInputStream bais = new ByteArrayInputStream(buf);
EscapeRemovingInputStream bin = new EscapeRemovingInputStream(bais);
try (InputStreamReader in = new InputStreamReader(bin,
StandardCharsets.ISO_8859_1)) {
int c;
while ((c = in.read()) != -1) {
System.out.print((char) c);
}
System.out.println();
} catch (IOException ex) {
Logger.getLogger(EscapeRemovingInputStream.class.getName()).log(
Level.SEVERE, null, ex);
}
}
private static final Pattern ESCAPE_PATTERN = Pattern.compile(
"\u001B\\[(k|m|H\\d+J|\\d+:\\d+H|\\?\\d+\\w|\\d*i)");
private static final int MAX_ESCAPE_LENGTH = 20;
private final byte[] escapeSequence = new byte[MAX_ESCAPE_LENGTH];
private int escapeLength = 0;
private boolean eof = false;
public EscapeRemovingInputStream(InputStream in) {
this(in, MAX_ESCAPE_LENGTH);
}
#Override
public int read(byte[] b, int off, int len) throws IOException {
for (int i = 0; i < len; ++i) {
int c = read();
if (c == -1) {
return i == 0 ? -1 : i;
}
b[off + i] = (byte) c;
}
return len;
}
#Override
public int read() throws IOException {
int c = eof ? -1 : super.read();
if (c == -1) { // Throw away a trailing half escape sequence.
eof = true;
return c;
}
if (escapeLength == 0 && c != 0x1B) {
return c;
} else {
escapeSequence[escapeLength] = (byte) c;
++escapeLength;
String esc = new String(escapeSequence, 0, escapeLength,
StandardCharsets.ISO_8859_1);
if (ESCAPE_PATTERN.matcher(esc).matches()) {
escapeLength = 0;
} else if (escapeLength == MAX_ESCAPE_LENGTH) {
escapeLength = 0;
unread(escapeSequence);
return super.read(); // No longer registering the escape
}
return read();
}
}
}
User calls EscapeRemovingInputStream.read
this read may call some read's itself to fill an byte buffer escapeSequence
(a push-back may be done calling unread)
the original read returns.
The recognition of an escape sequence seems grammatical: command letter, numerical argument(s). Hence I use a regular expression.