Extract binaries line by line in JAVA without saving to a string - java

I have the following method, but this one has a problem and it is that in a part of the file there are some special characters where the character with hexadecimal value 81(.) when the program generates the export of the file is 3F(?) and with that it damages the binary:
public static void exportbinary() {
List<String> files = listFiles(source,1);
System.out.println("Reviewing files to extract data");
String registro = null;
String contenido = "";
for (String s : files) {
//The name of the files to be processed is adjusted
String ogName=s;
System.out.println("Renaming files to extract binary data...");
File sFile = new File(source, s);
sFile.renameTo(new File(source,s+"ext"));
s = s+"ext";
//Start binary extraction process
File sourceFile = new File(source, s);
File destFile = new File(destination, ogName);
if (destFile.exists()) {
destFile.delete();
}
if (!dctmp.exists()) {
try {
dctmp.createNewFile();
} catch (IOException e) {
e.printStackTrace();
}
}
Writer w = null;
try {
w = new BufferedWriter(new FileWriter(dctmp, true));
} catch (IOException e1) {
e1.printStackTrace();
}
System.out.println("Processing" + sourceFile + " en " + destFile);
try (BufferedReader br = new BufferedReader(new FileReader(sourceFile))) {
String data;
while ((data = br.readLine()) != null) {
String line = data;
if (line.substring(0, 6).equals("924007")) {
// Valores binarios
String binarios = line.substring(585, 586 + 82);
line = line.substring(0, 585) + (new String(new char[83]).replace('\0', '0'))
+ line.substring((586 + 82), line.length());
// TID
String TID = line.substring(1005, 1020);
registro = TID + ";" + binarios + "\n";
contenido = contenido + registro;
}
BufferedWriter wrtr = new BufferedWriter(new FileWriter(destFile, true));
wrtr.append(line + "\n");
wrtr.close();
line = null;
}
System.out.println("Data is added to the temporary");
w.append(contenido);
w.close();
br.close();
System.out.println("Successfully complete process");
} catch (Exception e) {
e.printStackTrace();
} finally {
sourceFile.delete();
}
}
}
I did an exercise treating the file with FileInputStream() and it worked and kept the value, and it is the following code:
private void testBinary(File filename) {
try {
FileInputStream FileOrigin = new FileInputStream(filename);
// Arrays
byte[] biyeFile = new byte[(int) filename.length()];
int[] intArray = new int[biyeFile.length];
char[] CharArrayBit = new char[intArray.length];
FileOrigin.read(biyeFile);
for (int i = 0; i < biyeFile.length; i++) {
// Converting each char into its byte equivalent
intArray[i] = (int) Byte.toUnsignedInt(biyeFile[i]);
}
File temp = new File("C:/MOVIMIENTO/ebcdic/ascii/", filename.getName() + ".TMP");
temp.createNewFile();
FileOutputStream fileOutputStream = new FileOutputStream(temp);
for (int j = 0; j < charArrayBit.length; ++j) {
charArrayBit[j] = (char) intArray[j];
try {
if ((intArray[j] != 10) && (intArray[j] != 13)) {
fileOutputStream.write(charArrayBit[j]);
}
} catch (IOException e) {
e.printStackTrace();
}
}
FileOrigin.close();
fileOutputStream.close();
filename.delete();
temp.renameTo(filename);
} catch (IOException e) {
e.printStackTrace();
}
}
But I can't seem to implement the logic I made with the one that is already there and I'm getting stuck :( on how I can deal line by line in bytes. Try this Java - Read line using InputStream
The mess I have is that by using String I already lose the value String data; data = br.readLine() String line = data;
Update:
The idea is that the temporary file where I am printing the TID and binary is as follows:
file dc.tmp
200232133:..3....¢...€ . ...
300232133:.....Þù...€ ....€
400232133:ù....Þù...€ ....€ ....
etc
This is some VB.net code that I'm basing on pulling out the binary:
While POS < inputFile.Length
While reg < rowlen
dato1 = inputFile.ReadByte()
POS = POS + 1
reg = reg + 1
dato11 = BitConverter.GetBytes(dato1)
If (binary = 1 And (reg > 589 And reg <= 589 + 53 + 53)) Then
' here you should read two bytes and convert it to one
byte1 = dato1
' now read the second byte
byte2 = inputFile.ReadByte()
reg = reg + 1
POS = POS + 1
final = CInt("&H" & (Chr(byte1) + Chr(byte2)))
Else
dato2 = Encoding.Convert(source, DEST, dato11)
final = dato2(0)
End If
Print(1, Chr(final))
If (reg < 7) Then 'the first positions give us the type of message
MTI = MTI + Chr(dato1)
End If
If reg = 7 Then
binary = 0
rowlen = 1400
If (MTI = "924007") Then ' binary
binary = 1
rowlen = 1400 + 53
End If
MTI = ""
End If
End While
POS = POS + 2
reg = 0
End While
Thank you.

Related

breaking the lines while chunking into multiple files

I am dividing my file into chunks but only problem i am facing is,
i have .srt file, but while doing chunks, it's cutting the characters i.e in first .srt file it's like 00:26:20,230 --> . in next file it continuing the next time stamp 00:27:40,343.
I need to check the timestamp to be complete and then next full subtitle sentence too. i.e if it's cutting the subtitle timesstamp or dialogue in in file, that tect to be append to next file. Please suggest me how can i achieve.
I am trying like below,
String FilePath = "/Users/meh/Desktop/escapeplan.srt";
FileInputStream fin = new FileInputStream(FilePath);
System.out.println("size: " +fin.getChannel().size());
long abc = 0l;
abc = (fin.getChannel().size())/3;
System.out.println("6: " +abc);
System.out.println("abc: " +abc);
//FilePath = args[1];
File filename = new File(FilePath);
long splitFileSize = 0,bytefileSize=0;
if (filename.exists()) {
try {
//bytefileSize = Long.parseLong(args[2]);
splitFileSize = abc;
Splitme spObj = new Splitme();
spObj.split(FilePath, (long) splitFileSize);
spObj = null;
} catch (Exception e) {
e.printStackTrace();
}
} else {
System.out.println("File Not Found....");
}
public void split(String FilePath, long splitlen) {
long leninfile = 0, leng = 0;
int count = 1, data;
try {
File filename = new File(FilePath);
InputStream infile = new BufferedInputStream(new FileInputStream(filename));
data = infile.read();
System.out.println("data");
System.out.println(data);
while (data != -1) {
filename = new File("/Users/meh/Documents/srt" + count + ".srt");
//RandomAccessFile outfile = new RandomAccessFile(filename, "rw");
OutputStream outfile = new BufferedOutputStream(new FileOutputStream(filename));
while (data != -1 && leng < splitlen) {
outfile.write(data);
leng++;
data = infile.read();
}
leninfile += leng;
leng = 0;
outfile.close();
changeTimeStamp(filename, count);
count++;
}
} catch (Exception e) {
e.printStackTrace();
}
}
i am trying to check the time stamp is in correct format or not. Then i need to check next line to be a dialogue and then the next line to be empty line. then it can stop chunk or else it should append the text from the previous chunk to next chunk file in the beginning of line . so that it may get in correct format.
I tried checking the format like,
while ((strLine = br.readLine()) != null) {
String[] atoms = strLine.split(" --> ");
if (atoms.length == 1) {
out.write(strLine + "\n");
} else {
String startTS = atoms[0];
String endTS = atoms[1];
System.out.print("sri atmos start" + startTS);
System.out.print("sri atmos end" + endTS);
SimpleDateFormat sdf = new SimpleDateFormat("HH:mm:ss,SSS");
sdf.setLenient(false);
try
{
sdf.parse(startTS);
sdf.parse(endTS);
System.out.println("Valid time");
System.out.println("File path" + srcFileNm);
}
catch(Exception e) {
System.out.println("Invalid time");
System.out.println("Exception start" + startTS);
System.out.println("Exception end" + endTS);
}
}
some screens of my output chunks,
Help me how can i make this possible.
I think you should change approach, and fully use basic I/O methods. I tried to encapsulate logic in a small class, that produces a triple with id, msecs and a list of subtitles (if I'm not wrong, you can have more than a line). Then I leaved the remainder externally. Chunker is a class that reads a triple (class Three) from file, so that you can manage it and write it somewhere.
This is just a "quick&dirty" idea that you can refine, but it should work.
package org.norsam.stackoverflow;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
public class Chunker
{
BufferedReader r;
int chunk = 0;
File dir;
public Chunker(File dir, String filename) throws IOException
{
File f = new File(dir, filename);
this.dir = dir;
this.r = new BufferedReader(new FileReader(f));
}
public Three readThree() throws IOException
{
Integer id = Integer.parseInt(r.readLine());
String msecs = r.readLine();
String s = null;
List<String> srt = new ArrayList<>();
while (!(s = r.readLine().trim()).isEmpty()) {
srt.add(s);
}
return new Three(id, msecs, srt);
}
class Three
{
Integer id;
String msecs;
List<String> srts;
Three(Integer id, String msecs, List<String> srts)
{
this.id = id;
this.msecs = msecs;
this.srts = srts;
}
Three doSomething() {
// here you can do something with your data,
// e.g. split msecs on "-->" and check times
return this;
}
void write(BufferedWriter r) throws IOException
{
r.write(id);
r.newLine();
r.write(msecs);
r.newLine();
for (String s : srts) {
r.write(s);
r.newLine();
}
r.newLine();
}
}
public static void main(String[] args) throws IOException
{
String baseDir = "/dir/where/resides/srt";
String filename = "filename.srt";
int elemPerChunk = 50;
int fileNum = 0;
File dir = new File(baseDir);
Chunker chunker = new Chunker(dir, filename);
boolean completed = false;
while (!completed) {
int srtCount = 0;
File f = new File(baseDir, "ch." + (fileNum++) + "." + filename);
BufferedWriter w = new BufferedWriter(new FileWriter(f));
try {
while (srtCount++ < elemPerChunk) {
chunker.readThree().doSomething().write(w);
}
} catch (NullPointerException e) {
completed = true;
}
w.close();
}
}
}

how to get a maximum values of two peaks of an array and take them away using for loops? any ideas?

how to get a maximum value of two peaks of an array and take them away using for loops? any ideas?
was thinking to use 2 for loops to find the values in the array. i am using an acceloremeter and displaying the result within a graph but now i need to find the 2 peaks and take them away to determine the outcome and display it.
SM.unregisterListener(this);
File path = getApplicationContext().getExternalFilesDir(null);
File file = new File(path, "my_file-name.txt");
// String filename = "my_file";
FileOutputStream outputStream;
try {
outputStream = new FileOutputStream(file); //openFileOutput(file, Context.MODE_APPEND);
for (double d : array) {
String s = Double.toString(d) + ",";
outputStream.write(s.getBytes());
}
String newline = "/n";
outputStream.write(newline.getBytes());
outputStream.close();
} catch (Exception e) {
e.printStackTrace();
}
this code values are stored within a file so i can then display it within a graph
Here is how you can find two peaks in one loop:
File file = new File(path, "my_file-name.txt");
// String filename = "my_file";
FileOutputStream outputStream;
try {
outputStream = new FileOutputStream(file); //openFileOutput(file, Context.MODE_APPEND);
double peak1, peak2;
if(array.length >= 2) {
peak1 = array[0];
peak2 = array[1];
} else { // not enough elements
return;
}
for (double d : array) {
// peak2 is greater, leave it;
// save new value to peak1 ?
if(peak1 < peak2 && d > peak1) {
peak1 = d;
} else if(d > peak2) { // peak1 is greater or d is less
peak2 = d;
}
String s = Double.toString(d) + ",";
outputStream.write(s.getBytes());
}
String newline = "/n";
outputStream.write(newline.getBytes());
outputStream.close();
System.out.println("Peaks: " + peak1 + " ; " + peak2);
} catch (Exception e) {
System.out.println("Error: " + e);
//e.printStackTrace();
}
See code sample here.
private void stopSensor() {
SM.unregisterListener(this);
File path = getApplicationContext().getExternalFilesDir(null);
File file = new File(path, "my_file-name.txt");
// String filename = "my_file";
FileOutputStream outputStream;
try {
outputStream = new FileOutputStream(file); //openFileOutput(file, Context.MODE_APPEND);
for (double d : array) {
String s = Double.toString(d) + ",";
outputStream.write(s.getBytes());
}
String newline = "/n";
outputStream.write(newline.getBytes());
outputStream.close();
} catch (Exception e) {
e.printStackTrace();
}
try {
double peak1, peak2;
int peaklocation1, peaklocation2;
if (array.length >= 2) {
peak1 = array[0];
peak2 = array[1];
peaklocation1 = 0;
peaklocation2 = 1;
} else { // not enough elements
return;
}
for (int i = 0; i < array.length; i++) {
double d = array[i];
// peak2 is greater, leave it;
// save new value to peak1 ?
if (peak1 < peak2 && d > peak1) {
peak1 = d;
peaklocation1 = i;
} else if (d > peak2) { // peak1 is greater or d is less
peak2 = d;
peaklocation2 = i;
}
}
int size = peaklocation1;
size = peaklocation1 - peaklocation2 ;
resultText.setText("Result:" + peaklocation1 );
resultText2.setText("Result:" + peaklocation2);
} catch (Exception e) {
// System.out.println("Error: " + e);
//e.printStackTrace();
}

Dealing with txt file via split(), says Nullpointerexception

I know there are many similar questions here, but I still can't solve it. I can get all the results that I want. However, in the end, it still shows nullpointerexception. I don't know why. can anyone help?
public class PointGenterate {
public static void main(String[] args) throws FileNotFoundException {
// TODO Auto-generated method stub
try{
File file = new File("123.txt");
double[] pointsid = new double[10];
String[] data = null;
for(int i = 0; i <10; i++){
double rn = (int)(Math.random()*120);
System.out.println(rn);
pointsid[i] = rn;
}
//read file
InputStreamReader rs = new InputStreamReader(new FileInputStream(file));//create input stream reader object
BufferedReader br = new BufferedReader(rs);
String line = "";
line = br.readLine();
//
File write = new File("output.KML");
write.createNewFile();
BufferedWriter out = new BufferedWriter(new FileWriter(write));
while(line != null){
line = br.readLine();
if(line==" "){
System.out.print("empty");
}else{
data = line.split(",|:|[|]");
}
for(int i = 0; i < data.length; i++){
data[i] = data[i].trim();
System.out.println(data[i] + "num" + i);
}
if(data.length > 15){
double id = Double.parseDouble(data[4]);
for(int i = 0; i <10; i++){
if(id == pointsid[i]){
data[10] = data[10].substring(0, data[10].length()-2);
data[15] = data[15].substring(1,data[15].length());
data[16] = data[16].substring(0, data[16].length()-6);
out.write(data[8]+" "+ data[10]+ " " + data[13] + data[15] + data[16]+ "\r\n");
out.flush();
}
}
}
//System.out.println(line);
}
out.close();
}
catch(Exception e){
e.printStackTrace();
}
}
}
the txt file format is like
{ "type": "Feature", "properties": { "id": 126.000000, "osm_id": 4851918786.000000, "name": "Moray House Library", "type": "library" }, "geometry": { "type": "Point", "coordinates": [ -3.180841771200988, 55.950622362732418 ] } },
this is one line. I have many lines, and actually this is just a test code. if it works. i want to write it as a method in a javaseverlet class. get the string coordinates and return it to my JS font-end.
There's a few issues with your code. In this section:
InputStreamReader rs = new InputStreamReader(new FileInputStream(file));//create input stream reader object
BufferedReader br = new BufferedReader(rs);
String line = "";
line = br.readLine(); // here you read the first line in the file
//
File write = new File("output.KML");
write.createNewFile();
BufferedWriter out = new BufferedWriter(new FileWriter(write));
while(line != null){ // here you check that it's not null (it's not, you read the first line OK)
line = br.readLine(); // here you read the second line (there is no second line, now line is null)
if(line==" "){ // now you check if the line is a space character (this is wrong for 2 reasons, that's not how you compare strings, and a space character is not an empty string)
System.out.print("empty");
}else{
data = line.split(",|:|[|]"); // here you call split() on line but line is null
}
When you checked if the string was empty, you did line == " " which is wrong for 2 reasons. First you cannot use == to compare strings - read this question for details on why not. Second, " " is a string that contains a space character. "" is an empty string.
When you want to check if a string is empty you can do it like this:
line.equals("")
or like this:
line.isEmpty()
Here's your code with a few small changes so that it runs without throwing an exception.
public class PointGenterate {
public static void main(String[] args) throws Exception {
try {
File file = new File("123.txt");
double[] pointsid = new double[10];
String[] data = null;
for(int i = 0; i < 10; i++){
double rn = (int)(Math.random()*120);
System.out.println(rn);
pointsid[i] = rn;
}
//read file
InputStreamReader rs = new InputStreamReader(new FileInputStream(file));//create input stream reader object
BufferedReader br = new BufferedReader(rs);
String line = "";
//
File write = new File("output.KML");
write.createNewFile();
BufferedWriter out = new BufferedWriter(new FileWriter(write));
while((line = br.readLine()) != null){ // read the line and check for null
if(line.isEmpty()) { // is the line equal to the empty string?
System.out.print("empty");
} else {
data = line.split(",|:|[|]");
}
for(int i = 0; i < data.length; i++){
data[i] = data[i].trim();
System.out.println(data[i] + "num" + i);
}
if(data.length > 15){
double id = Double.parseDouble(data[4]);
for(int i = 0; i <10; i++){
if(id == pointsid[i]){
data[10] = data[10].substring(0, data[10].length()-2);
data[15] = data[15].substring(1,data[15].length());
data[16] = data[16].substring(0, data[16].length()-6);
out.write(data[8]+" "+ data[10]+ " " + data[13] + data[15] + data[16]+ "\r\n");
out.flush();
}
}
}
//System.out.println(line);
}
out.close();
}
catch(Exception e){
e.printStackTrace();
}
}
}

Byte Array not printing to file correctly

I'm creating a program that aims to take in n number of splits and split a file into that amount of sub-files. In my SplitFile.java, I'm reading a file, passing an array of substrings that show the text that's supposed to go in each split file. I then convert the string into a byte array and write the byte array to the split file, but each file I'm creating is outputting something just slightly different.
SplitFile.java
import java.io.*;
import java.nio.charset.Charset;
import java.util.ArrayList;
public class SplitFile
{
int numberOfSplits = 1;
File file;
String[] parts = new String[5];
public SplitFile(File file,int numberOfSplits)
{
this.file = file;
this.numberOfSplits = numberOfSplits;
}
public void FileSplitter() throws IOException
{
FileInputStream fis = new FileInputStream(file);
String fileText = readFile();
if(numberOfSplits == 2)
{
int mid = fileText.length() / 2;
parts[0] = fileText.substring(0, mid);
parts[1] = fileText.substring(mid);
}
else if(numberOfSplits == 3)
{
int third = fileText.length() / 3;
int secondThird = third + third;
parts[0] = fileText.substring(0, third);
parts[1] = fileText.substring(third, secondThird);
parts[2] = fileText.substring(secondThird);
}
for(int i = 1; i <= numberOfSplits; i++)
{
BufferedInputStream bis = new BufferedInputStream(new fileInputStream(file));
FileOutputStream out;
String name = file.getName();
byte[] b = parts[i - 1].getBytes(Charset.forName("UTF-8"));
int temp = 0;
while((temp = bis.read(b)) > 0);
{
File newFile = new File(name + " " + i + ".txt");
newFile.createNewFile();
out = new FileOutputStream(newFile);
out.write(b, 0, temp); // Writes to the file
out.close();
temp = 0;
}
}
}
public String readFile() throws IOException
{
BufferedReader br = new BufferedReader(new FileReader(file));
try
{
StringBuilder sb = new StringBuilder();
String line = br.readLine();
while (line != null)
{
sb.append(line);
sb.append("\n");
line = br.readLine();
}
return sb.toString();
}
finally
{
br.close();
}
}
}
If I pass in 2 as the amount of splits I want, it is not splitting it right at the middle, with file 1 being the first half and file 2 being the second half, and instead giving the end of the text file for both files. My problem seems to be here:
while((temp = bis.read(b)) > 0);
{
File newFile = new File(name + " " + i + ".txt");
newFile.createNewFile();
out = new FileOutputStream(newFile);
out.write(b, 0, temp); // Writes to the file
out.close();
temp = 0;
}
An example file I'll use on here is this file:
myFile.txt
abcdefghijklmnopqrstuvwxyz
It splits into two files that go as follows:
myFile.txt 1
nopqrstuvqxyz
myFile.txt 2
opqrstuvqxyz
Any idea on what the problem is?
In your code, you define File newFile = new File(name + " " + i + ".txt"); and out = new FileOutputStream(newFile); in whilte loop, it's not correct.
while((temp = bis.read(b)) > 0); not semicolon here =.="
Many mistake in your code
I will change your code like:
File newFile = new File(name + " " + i + ".txt");
newFile.createNewFile();
out = new FileOutputStream(newFile);
out.write(b); // Writes to the file
out.flush();
out.close();
If you need your code run as you want, here you are
for (int i = 1; i <= numberOfSplits; i++) {
String name = file.getName();
byte[] b = parts[i - 1].getBytes(Charset.forName("UTF-8"));
ByteArrayInputStream bis = new ByteArrayInputStream(b);
int temp = 0;
File newFile = new File(name + " " + i + ".txt");
newFile.createNewFile();
FileOutputStream out = new FileOutputStream(newFile);
while ((temp = bis.read()) > 0)
{
out.write(temp); // Writes to the file
}
out.flush();
out.close();
}

Java - Read file and split into multiple files

I have a file which I would like to read in Java and split this file into n (user input) output files. Here is how I read the file:
int n = 4;
BufferedReader br = new BufferedReader(new FileReader("file.csv"));
try {
String line = br.readLine();
while (line != null) {
line = br.readLine();
}
} finally {
br.close();
}
How do I split the file - file.csv into n files?
Note - Since the number of entries in the file are of the order of 100k, I can't store the file content into an array and then split it and save into multiple files.
Since one file can be very large, each split file could be large as well.
Example:
Source File Size: 5GB
Num Splits: 5: Destination
File Size: 1GB each (5 files)
There is no way to read this large split chunk in one go, even if we have such a memory. Basically for each split we can read a fix size byte-array which we know should be feasible in terms of performance as well memory.
NumSplits: 10 MaxReadBytes: 8KB
public static void main(String[] args) throws Exception
{
RandomAccessFile raf = new RandomAccessFile("test.csv", "r");
long numSplits = 10; //from user input, extract it from args
long sourceSize = raf.length();
long bytesPerSplit = sourceSize/numSplits ;
long remainingBytes = sourceSize % numSplits;
int maxReadBufferSize = 8 * 1024; //8KB
for(int destIx=1; destIx <= numSplits; destIx++) {
BufferedOutputStream bw = new BufferedOutputStream(new FileOutputStream("split."+destIx));
if(bytesPerSplit > maxReadBufferSize) {
long numReads = bytesPerSplit/maxReadBufferSize;
long numRemainingRead = bytesPerSplit % maxReadBufferSize;
for(int i=0; i<numReads; i++) {
readWrite(raf, bw, maxReadBufferSize);
}
if(numRemainingRead > 0) {
readWrite(raf, bw, numRemainingRead);
}
}else {
readWrite(raf, bw, bytesPerSplit);
}
bw.close();
}
if(remainingBytes > 0) {
BufferedOutputStream bw = new BufferedOutputStream(new FileOutputStream("split."+(numSplits+1)));
readWrite(raf, bw, remainingBytes);
bw.close();
}
raf.close();
}
static void readWrite(RandomAccessFile raf, BufferedOutputStream bw, long numBytes) throws IOException {
byte[] buf = new byte[(int) numBytes];
int val = raf.read(buf);
if(val != -1) {
bw.write(buf);
}
}
import java.io.*;
import java.util.Scanner;
public class split {
public static void main(String args[])
{
try{
// Reading file and getting no. of files to be generated
String inputfile = "C:/test.txt"; // Source File Name.
double nol = 2000.0; // No. of lines to be split and saved in each output file.
File file = new File(inputfile);
Scanner scanner = new Scanner(file);
int count = 0;
while (scanner.hasNextLine())
{
scanner.nextLine();
count++;
}
System.out.println("Lines in the file: " + count); // Displays no. of lines in the input file.
double temp = (count/nol);
int temp1=(int)temp;
int nof=0;
if(temp1==temp)
{
nof=temp1;
}
else
{
nof=temp1+1;
}
System.out.println("No. of files to be generated :"+nof); // Displays no. of files to be generated.
//---------------------------------------------------------------------------------------------------------
// Actual splitting of file into smaller files
FileInputStream fstream = new FileInputStream(inputfile); DataInputStream in = new DataInputStream(fstream);
BufferedReader br = new BufferedReader(new InputStreamReader(in)); String strLine;
for (int j=1;j<=nof;j++)
{
FileWriter fstream1 = new FileWriter("C:/New Folder/File"+j+".txt"); // Destination File Location
BufferedWriter out = new BufferedWriter(fstream1);
for (int i=1;i<=nol;i++)
{
strLine = br.readLine();
if (strLine!= null)
{
out.write(strLine);
if(i!=nol)
{
out.newLine();
}
}
}
out.close();
}
in.close();
}catch (Exception e)
{
System.err.println("Error: " + e.getMessage());
}
}
}
Though its a old question but for reference I am listing out the code which I used to split large files to any sizes and it works with any Java versions above 1.4 .
Sample Split and Join blocks were like below:
public void join(String FilePath) {
long leninfile = 0, leng = 0;
int count = 1, data = 0;
try {
File filename = new File(FilePath);
//RandomAccessFile outfile = new RandomAccessFile(filename,"rw");
OutputStream outfile = new BufferedOutputStream(new FileOutputStream(filename));
while (true) {
filename = new File(FilePath + count + ".sp");
if (filename.exists()) {
//RandomAccessFile infile = new RandomAccessFile(filename,"r");
InputStream infile = new BufferedInputStream(new FileInputStream(filename));
data = infile.read();
while (data != -1) {
outfile.write(data);
data = infile.read();
}
leng++;
infile.close();
count++;
} else {
break;
}
}
outfile.close();
} catch (Exception e) {
e.printStackTrace();
}
}
public void split(String FilePath, long splitlen) {
long leninfile = 0, leng = 0;
int count = 1, data;
try {
File filename = new File(FilePath);
//RandomAccessFile infile = new RandomAccessFile(filename, "r");
InputStream infile = new BufferedInputStream(new FileInputStream(filename));
data = infile.read();
while (data != -1) {
filename = new File(FilePath + count + ".sp");
//RandomAccessFile outfile = new RandomAccessFile(filename, "rw");
OutputStream outfile = new BufferedOutputStream(new FileOutputStream(filename));
while (data != -1 && leng < splitlen) {
outfile.write(data);
leng++;
data = infile.read();
}
leninfile += leng;
leng = 0;
outfile.close();
count++;
}
} catch (Exception e) {
e.printStackTrace();
}
}
Complete java code available here in File Split in Java Program link.
a clean solution to edit.
this solution involves loading the entire file into memory.
set all line of a file in List<String> rowsOfFile;
edit maxSizeFile to choice max size of a single file splitted
public void splitFile(File fileToSplit) throws IOException {
long maxSizeFile = 10000000 // 10mb
StringBuilder buffer = new StringBuilder((int) maxSizeFile);
int sizeOfRows = 0;
int recurrence = 0;
String fileName;
List<String> rowsOfFile;
rowsOfFile = Files.readAllLines(fileToSplit.toPath(), Charset.defaultCharset());
for (String row : rowsOfFile) {
buffer.append(row);
numOfRow++;
sizeOfRows += row.getBytes(StandardCharsets.UTF_8).length;
if (sizeOfRows >= maxSizeFile) {
fileName = generateFileName(recurrence);
File newFile = new File(fileName);
try (PrintWriter writer = new PrintWriter(newFile)) {
writer.println(buffer.toString());
}
recurrence++;
sizeOfRows = 0;
buffer = new StringBuilder();
}
}
// last rows
if (sizeOfRows > 0) {
fileName = generateFileName(recurrence);
File newFile = createFile(fileName);
try (PrintWriter writer = new PrintWriter(newFile)) {
writer.println(buffer.toString());
}
}
Files.delete(fileToSplit.toPath());
}
method to generate Name of file:
public String generateFileName(int numFile) {
String extension = ".txt";
return "myFile" + numFile + extension;
}
Have a counter to count no of entries. Let's say one entry per line.
step1: Initially create new subfile, set counter=0;
step2: increment counter as you read each entry from source file to buffer
step3: when counter reaches limit to number of entries that you want to write in each sub file, flush contents of buffer to subfile. close the subfile
step4 : jump to step1 till you have data in source file to read from
There's no need to loop twice through the file. You could estimate the size of each chunk as the source file size divided by number of chunks needed. Then you just stop filling each cunk with data as it's size exceeds estimated.
Here is one that worked for me and I used it to split 10GB file. it also enables you to add a header and a footer. very useful when splitting document based format such as XML and JSON because you need to add document wrapper in the new split files.
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.nio.file.StandardOpenOption;
public class FileSpliter
{
public static void main(String[] args) throws IOException
{
splitTextFiles("D:\\xref.csx", 750000, "", "", null);
}
public static void splitTextFiles(String fileName, int maxRows, String header, String footer, String targetDir) throws IOException
{
File bigFile = new File(fileName);
int i = 1;
String ext = fileName.substring(fileName.lastIndexOf("."));
String fileNoExt = bigFile.getName().replace(ext, "");
File newDir = null;
if(targetDir != null)
{
newDir = new File(targetDir);
}
else
{
newDir = new File(bigFile.getParent() + "\\" + fileNoExt + "_split");
}
newDir.mkdirs();
try (BufferedReader reader = Files.newBufferedReader(Paths.get(fileName)))
{
String line = null;
int lineNum = 1;
Path splitFile = Paths.get(newDir.getPath() + "\\" + fileNoExt + "_" + String.format("%02d", i) + ext);
BufferedWriter writer = Files.newBufferedWriter(splitFile, StandardOpenOption.CREATE);
while ((line = reader.readLine()) != null)
{
if(lineNum == 1)
{
System.out.print("new file created '" + splitFile.toString());
if(header != null && header.length() > 0)
{
writer.append(header);
writer.newLine();
}
}
writer.append(line);
if (lineNum >= maxRows)
{
if(footer != null && footer.length() > 0)
{
writer.newLine();
writer.append(footer);
}
writer.close();
System.out.println(", " + lineNum + " lines written to file");
lineNum = 1;
i++;
splitFile = Paths.get(newDir.getPath() + "\\" + fileNoExt + "_" + String.format("%02d", i) + ext);
writer = Files.newBufferedWriter(splitFile, StandardOpenOption.CREATE);
}
else
{
writer.newLine();
lineNum++;
}
}
if(lineNum <= maxRows) // early exit
{
if(footer != null && footer.length() > 0)
{
writer.newLine();
lineNum++;
writer.append(footer);
}
}
writer.close();
System.out.println(", " + lineNum + " lines written to file");
}
System.out.println("file '" + bigFile.getName() + "' split into " + i + " files");
}
}
Below code used to split a big file into small files with lesser lines.
long linesWritten = 0;
int count = 1;
try {
File inputFile = new File(inputFilePath);
InputStream inputFileStream = new BufferedInputStream(new FileInputStream(inputFile));
BufferedReader reader = new BufferedReader(new InputStreamReader(inputFileStream));
String line = reader.readLine();
String fileName = inputFile.getName();
String outfileName = outputFolderPath + "\\" + fileName;
while (line != null) {
File outFile = new File(outfileName + "_" + count + ".split");
Writer writer = new OutputStreamWriter(new FileOutputStream(outFile));
while (line != null && linesWritten < linesPerSplit) {
writer.write(line);
line = reader.readLine();
linesWritten++;
}
writer.close();
linesWritten = 0;//next file
count++;//nect file count
}
reader.close();
} catch (Exception e) {
e.printStackTrace();
}
Split a file to multiple chunks (in memory operation), here I'm splitting any file to a size of 500kb(500000 bytes) :
public static List<ByteArrayOutputStream> splitFile(File f) {
List<ByteArrayOutputStream> datalist = new ArrayList<>();
try {
int sizeOfFiles = 500000;
byte[] buffer = new byte[sizeOfFiles];
try (FileInputStream fis = new FileInputStream(f); BufferedInputStream bis = new BufferedInputStream(fis)) {
int bytesAmount = 0;
while ((bytesAmount = bis.read(buffer)) > 0) {
try (OutputStream out = new ByteArrayOutputStream()) {
out.write(buffer, 0, bytesAmount);
out.flush();
datalist.add((ByteArrayOutputStream) out);
}
}
}
} catch (Exception e) {
//get the error
}
return datalist; }
I am a bit late to answer, But here's how I did it:
Approach:
First I determine how many bytes each of the individual files should contain then I split the large file by bytes. Only one file chunk worth of data is loaded into memory at a time.
Example:- if a 5 GB file is split into 10 files then only 500MB worth of bytes are loaded into memory at a time which are held in the buffer variable in the splitBySize method below.
Code Explaination:
The method splitFile first gets the number of bytes each of the individual file chunks should contain by calling the getSizeInBytes method, then it calls the splitBySize method which splits the large file by size (i..e maxChunkSize represents the number of bytes each of file chunks will contain).
public static List<File> splitFile(File largeFile, int noOfFiles) throws IOException {
return splitBySize(largeFile, getSizeInBytes(largeFile.length(), noOfFiles));
}
public static List<File> splitBySize(File largeFile, int maxChunkSize) throws IOException {
List<File> list = new ArrayList<>();
int numberOfFiles = 0;
try (InputStream in = Files.newInputStream(largeFile.toPath())) {
final byte[] buffer = new byte[maxChunkSize];
int dataRead = in.read(buffer);
while (dataRead > -1) {
list.add(stageLocally(buffer, dataRead));
numberOfFiles++;
dataRead = in.read(buffer);
}
}
System.out.println("Number of files generated: " + numberOfFiles);
return list;
}
private static int getSizeInBytes(long totalBytes, int numberOfFiles) {
if (totalBytes % numberOfFiles != 0) {
totalBytes = ((totalBytes / numberOfFiles) + 1)*numberOfFiles;
}
long x = totalBytes / numberOfFiles;
if (x > Integer.MAX_VALUE){
throw new NumberFormatException("Byte chunk too large");
}
return (int) x;
}
Full Code:
public class StackOverflow {
private static final String INPUT_FILE_PATH = "/Users/malkesingh/Downloads/5MB.zip";
private static final String TEMP_DIRECTORY = "/Users/malkesingh/temp";
public static void main(String[] args) throws IOException {
File input = new File(INPUT_FILE_PATH);
File outPut = fileJoin2(splitFile(input, 5));
try (InputStream in = Files.newInputStream(input.toPath()); InputStream out = Files.newInputStream(outPut.toPath())) {
System.out.println(IOUtils.contentEquals(in, out));
}
}
public static List<File> splitFile(File largeFile, int noOfFiles) throws IOException {
return splitBySize(largeFile, getSizeInBytes(largeFile.length(), noOfFiles));
}
public static List<File> splitBySize(File largeFile, int maxChunkSize) throws IOException {
List<File> list = new ArrayList<>();
int numberOfFiles = 0;
try (InputStream in = Files.newInputStream(largeFile.toPath())) {
final byte[] buffer = new byte[maxChunkSize];
int dataRead = in.read(buffer);
while (dataRead > -1) {
list.add(stageLocally(buffer, dataRead));
numberOfFiles++;
dataRead = in.read(buffer);
}
}
System.out.println("Number of files generated: " + numberOfFiles);
return list;
}
private static int getSizeInBytes(long totalBytes, int numberOfFiles) {
if (totalBytes % numberOfFiles != 0) {
totalBytes = ((totalBytes / numberOfFiles) + 1)*numberOfFiles;
}
long x = totalBytes / numberOfFiles;
if (x > Integer.MAX_VALUE){
throw new NumberFormatException("Byte chunk too large");
}
return (int) x;
}
private static File stageLocally(byte[] buffer, int length) throws IOException {
File outPutFile = File.createTempFile("temp-", "split", new File(TEMP_DIRECTORY));
try(FileOutputStream fos = new FileOutputStream(outPutFile)) {
fos.write(buffer, 0, length);
}
return outPutFile;
}
public static File fileJoin2(List<File> list) throws IOException {
File outPutFile = File.createTempFile("temp-", "unsplit", new File(TEMP_DIRECTORY));
FileOutputStream fos = new FileOutputStream(outPutFile);
for (File file : list) {
Files.copy(file.toPath(), fos);
}
fos.close();
return outPutFile;
}}
import java.util.*;
import java.io.*;
public class task13 {
public static void main(String[] args)throws IOException{
Scanner s =new Scanner(System.in);
System.out.print("Enter path:");
String a=s.next();
File f=new File(a+".txt");
Scanner st=new Scanner(f);
System.out.println(f.canRead()+"\n"+f.canWrite());
long l=f.length();
System.out.println("Length is:"+l);
System.out.print("Enter no.of partitions:");
int p=s.nextInt();
long x=l/p;
st.useDelimiter("\\Z");
String t=st.next();
int j=0;
System.out.println("Each File Length is:"+x);
for(int i=1;i<=p;i++){
File ft=new File(a+"-"+i+".txt");
ft.createNewFile();
int g=(j*(int)x);
int h=(j+1)*(int)x;
if(g<=l&&h<=l){
FileWriter fw=new FileWriter(a+"-"+i+".txt");
String v=t.substring(g,h);
fw.write(v);
j++;
fw.close();
}}
}}

Categories