Java method to parse strings - java

I have a input string like
{{1,2},{3,4},{5,6}}.
I want to read the numbers into an two dimensional array like
int[][] arr = {{1,2},{3,4},{5,6}}.
Is there a good way to convert this string into int[][] ? Thanks.

The basic idea is as follows, you will have to add error handling code and more logical calculation of array size yourself. I just wanted to display how to split the string and make int [][]array.
String test = "{{1,2},{3,4},{5,6}}";
test = test.replace("{", "");
test = test.replace("}", "");
String []nums = test.split(",");
int numIter = 0;
int outerlength = nums.length/2;
int [][]array = new int[outerlength][];
for(int i=0; i<outerlength; i++){
array[i] = new int[2];
for(int j=0;j<2;j++){
array[i][j] = Integer.parseInt(nums[numIter++]);
}
}
hope it helps!

I'd go for solution using dynamic storage such as List from Collections, and then convert this to fixed primitive array at the end.
public static void main(String[] args) throws Exception {
System.out.println(Arrays.deepToString(parse("{{1,2},{3,4},{5,6}}")));
}
private static int[][] parse(String input) {
List<List<Integer>> output = new ArrayList<>();
List<Integer> destination = null;
Pattern pattern = Pattern.compile("[0-9]+|\\{|\\}");
Matcher matcher = pattern.matcher(input);
int level = 0;
while (matcher.find()) {
String token = matcher.group();
if ("{".equals(token)) {
if (level == 1) {
destination = new ArrayList<Integer>();
output.add(destination);
}
level++;
} else if ("}".equals(token)) {
level--;
} else {
destination.add(Integer.parseInt(token));
}
}
int[][] array = new int[output.size()][];
for (int i = 0; i < output.size(); i++) {
List<Integer> each = output.get(i);
array[i] = new int[each.size()];
for (int k = 0; k < each.size(); k++) {
array[i][k] = each.get(k);
}
}
return array;
}
Another alternative would be translate { to [ and } to ], then you have JSON, just use your favourite JSON parser, here I used GSON.
private static int[][] parse(String string) {
JsonElement element = new JsonParser().parse(string.replace("{", "[").replace("}", "]"));
JsonArray obj = element.getAsJsonArray();
int [][] output = new int[obj.size()][];
for (int i = 0; i < obj.size(); i++) {
JsonArray each = obj.get(i).getAsJsonArray();
output[i] = new int[each.size()];
for (int k = 0; k < each.size(); k++) {
output[i][k] = each.get(k).getAsInt();
}
}
return output;
}

Related

Why is Java string array appending a string to null in first cell?

Here is my class below, that compares elements in two string arrays, and returns the word with the highest frequency in both arrays. However as visible from the output, the first index is appending none to null in spite of initializing both arrays with the String none. Can someone kindly let me know what I am doing wrong that is leading to this?
public class HelloWorld{
public String[] pro;
public String[] con;
String proSplitter;
String conSplitter;
public HelloWorld() {
this.pro = new String[9];
this.con = new String[9];
for(int i=0;i<this.pro.length;i++)
{
this.pro[i]="none";
this.con[i]="none";
}
}
public String[] getPro() {
return pro;
}
public String[] getCon() {
return con;
}
public void setPro(String pros, int proIndex) {
pro[proIndex] = pros;
}
public void setCon(String cons, int conIndex) {
con[conIndex] = cons;
}
public String[] proWord(){
for(int i=0;i<9;i++)
{
proSplitter = proSplitter + pro[i] + ",";
}
for(int i=0;i<9;i++)
{
conSplitter = conSplitter + con[i] + ",";
}
String[] values = proSplitter.split(",");
for(int i=0;i<values.length;i++)
{
values[i] = values[i].trim();
}
String[] values1 = conSplitter.split(",");
for(int i=0;i<values1.length;i++)
{
values1[i] = values1[i].trim();
}
int [] fr = new int [values.length];
int visited = -1;
for(int i = 0; i < values.length; i++){
int count = 1;
for(int j = i+1; j < values.length; j++){
if(!values[i].equalsIgnoreCase("none"))
{
if(values[i].compareTo(values[j])==0){
count++;
//To avoid counting same element again
fr[j] = visited;
}
}
}
if(fr[i] != visited)
fr[i] = count;
}
int max = fr[0];
int index = 0;
for (int i = 0; i < fr.length; i++)
{
if (max < fr[i])
{
max = fr[i];
index = i;
}
}
int [] fr1 = new int [values1.length];
int visited1 = -1;
for(int i = 0; i < values1.length; i++){
int count1 = 1;
for(int j = i+1; j < values1.length; j++){
if(!values1[i].equalsIgnoreCase("none"))
{
if(values1[i].compareTo(values1[j])==0){
count1++;
//To avoid counting same element again
fr1[j] = visited1;
}
}
}
if(fr1[i] != visited1)
fr1[i] = count1;
}
for(int i = 0;i<values.length;i++)
{
System.out.println("pro = "+values[i]);
}
for(int i = 0;i<values1.length;i++)
{
System.out.println("con = "+values1[i]);
}
int max1 = fr1[0];
int index1 = 0;
for (int i = 0; i < fr1.length; i++)
{
if (max1 < fr1[i])
{
max1 = fr1[i];
index1 = i;
}
}
String sentence[] = new String[2];
if(values[index].equalsIgnoreCase(values1[index1])) {
sentence[0] = "balanced";
}else {
sentence[0] = values[index];
sentence[1] = values1[index1];
}
return sentence;
}
public static void main(String[] args){
HelloWorld tracker = new HelloWorld();
tracker.setPro("Apple, Pear", 1);
tracker.setCon("Banana", 1);
tracker.setPro("Apple", 2);
tracker.setCon("Water Melon", 2);
tracker.setPro("Guava", 3);
tracker.setCon("Ball", 3);
tracker.setPro("Apple", 4);
tracker.setCon("Mango, Plum", 4);
String[] arr = tracker.proWord();
System.out.println("pro = "+arr[0]);
System.out.println("con = "+arr[1]);
}
}
The output being generated is :
pro = nullnone
pro = Apple
pro = Pear
pro = Apple
pro = Guava
pro = Apple
pro = none
pro = none
pro = none
pro = none
con = nullnone
con = Banana
con = Water Melon
con = Ball
con = Mango
con = Plum
con = none
con = none
con = none
con = none
pro = Apple
con = nullnone
As mentioned by Arnaud, the immediate problem is that you're leaving proSplitter uninitialized, so its value is null. Then, when you come to append a string to it with proSplitter = proSplitter + pro[i] + ",";, proSplitter will be converted (effectively) to "null", and then stuff is appended to the end. So, instead, make it "" initially.
However, you've got another problem here, which is that you're mutating a member variable each time you invoke that method - so it's not null (or empty) second time around, it still contains what was there previously.
The fix for that is straightforward: instead of using a member variable, declare these as local variables.
You've also got the problem that you're effectively duplicating the code to count the most frequent thing in an array: this is what methods are for, to allow you to run the same code over different inputs.
You can also make use of library methods. For example:
String mostFrequent(String[] array) {
int maxFreq = 0;
String maxFreqS = "";
for (String s : array) {
if (s.equalsIgnoreCase("none")) continue;
int freq = Collections.frequency(Arrays.asList(array), s);
if (freq > maxFreq) {
maxFreq = freq;
maxFreqS = s;
}
}
return maxFreqS;
}
(There are lots of inefficiencies here. The point is more about writing this as a method, to remove the duplication).
Then you can use this inside your existing method, and it will be a whole lot easier for others - and you - to read.

How can i extract trend words from given dataset (Java)? [duplicate]

How to generate an n-gram of a string like:
String Input="This is my car."
I want to generate n-gram with this input:
Input Ngram size = 3
Output should be:
This
is
my
car
This is
is my
my car
This is my
is my car
Give some idea in Java, how to implement that or if any library is available for it.
I am trying to use this NGramTokenizer but its giving n-gram's of character sequence and I want n-grams of word sequence.
I believe this would do what you want:
import java.util.*;
public class Test {
public static List<String> ngrams(int n, String str) {
List<String> ngrams = new ArrayList<String>();
String[] words = str.split(" ");
for (int i = 0; i < words.length - n + 1; i++)
ngrams.add(concat(words, i, i+n));
return ngrams;
}
public static String concat(String[] words, int start, int end) {
StringBuilder sb = new StringBuilder();
for (int i = start; i < end; i++)
sb.append((i > start ? " " : "") + words[i]);
return sb.toString();
}
public static void main(String[] args) {
for (int n = 1; n <= 3; n++) {
for (String ngram : ngrams(n, "This is my car."))
System.out.println(ngram);
System.out.println();
}
}
}
Output:
This
is
my
car.
This is
is my
my car.
This is my
is my car.
An "on-demand" solution implemented as an Iterator:
class NgramIterator implements Iterator<String> {
String[] words;
int pos = 0, n;
public NgramIterator(int n, String str) {
this.n = n;
words = str.split(" ");
}
public boolean hasNext() {
return pos < words.length - n + 1;
}
public String next() {
StringBuilder sb = new StringBuilder();
for (int i = pos; i < pos + n; i++)
sb.append((i > pos ? " " : "") + words[i]);
pos++;
return sb.toString();
}
public void remove() {
throw new UnsupportedOperationException();
}
}
You are looking for ShingleFilter.
Update: The link points to version 3.0.2. This class may be in different package in newer version of Lucene.
This code returns an array of all Strings of the given length:
public static String[] ngrams(String s, int len) {
String[] parts = s.split(" ");
String[] result = new String[parts.length - len + 1];
for(int i = 0; i < parts.length - len + 1; i++) {
StringBuilder sb = new StringBuilder();
for(int k = 0; k < len; k++) {
if(k > 0) sb.append(' ');
sb.append(parts[i+k]);
}
result[i] = sb.toString();
}
return result;
}
E.g.
System.out.println(Arrays.toString(ngrams("This is my car", 2)));
//--> [This is, is my, my car]
System.out.println(Arrays.toString(ngrams("This is my car", 3)));
//--> [This is my, is my car]
/**
*
* #param sentence should has at least one string
* #param maxGramSize should be 1 at least
* #return set of continuous word n-grams up to maxGramSize from the sentence
*/
public static List<String> generateNgramsUpto(String str, int maxGramSize) {
List<String> sentence = Arrays.asList(str.split("[\\W+]"));
List<String> ngrams = new ArrayList<String>();
int ngramSize = 0;
StringBuilder sb = null;
//sentence becomes ngrams
for (ListIterator<String> it = sentence.listIterator(); it.hasNext();) {
String word = (String) it.next();
//1- add the word itself
sb = new StringBuilder(word);
ngrams.add(word);
ngramSize=1;
it.previous();
//2- insert prevs of the word and add those too
while(it.hasPrevious() && ngramSize<maxGramSize){
sb.insert(0,' ');
sb.insert(0,it.previous());
ngrams.add(sb.toString());
ngramSize++;
}
//go back to initial position
while(ngramSize>0){
ngramSize--;
it.next();
}
}
return ngrams;
}
Call:
long startTime = System.currentTimeMillis();
ngrams = ToolSet.generateNgramsUpto("This is my car.", 3);
long stopTime = System.currentTimeMillis();
System.out.println("My time = "+(stopTime-startTime)+" ms with ngramsize = "+ngrams.size());
System.out.println(ngrams.toString());
Output:
My time = 1 ms with ngramsize = 9 [This, is, This is, my, is my, This
is my, car, my car, is my car]
public static void CreateNgram(ArrayList<String> list, int cutoff) {
try
{
NGramModel ngramModel = new NGramModel();
POSModel model = new POSModelLoader().load(new File("en-pos-maxent.bin"));
PerformanceMonitor perfMon = new PerformanceMonitor(System.err, "sent");
POSTaggerME tagger = new POSTaggerME(model);
perfMon.start();
for(int i = 0; i<list.size(); i++)
{
String inputString = list.get(i);
ObjectStream<String> lineStream = new PlainTextByLineStream(new StringReader(inputString));
String line;
while ((line = lineStream.read()) != null)
{
String whitespaceTokenizerLine[] = WhitespaceTokenizer.INSTANCE.tokenize(line);
String[] tags = tagger.tag(whitespaceTokenizerLine);
POSSample sample = new POSSample(whitespaceTokenizerLine, tags);
perfMon.incrementCounter();
String words[] = sample.getSentence();
if(words.length > 0)
{
for(int k = 2; k< 4; k++)
{
ngramModel.add(new StringList(words), k, k);
}
}
}
}
ngramModel.cutoff(cutoff, Integer.MAX_VALUE);
Iterator<StringList> it = ngramModel.iterator();
while(it.hasNext())
{
StringList strList = it.next();
System.out.println(strList.toString());
}
perfMon.stopAndPrintFinalResult();
}catch(Exception e)
{
System.out.println(e.toString());
}
}
Here is my codes to create n-gram. In this case, n = 2, 3. n-gram of words sequence which smaller than cutoff value will ignore from result set. Input is list of sentences, then it parse using a tool of OpenNLP
public static void main(String[] args) {
String[] words = "This is my car.".split(" ");
for (int n = 0; n < 3; n++) {
List<String> list = ngrams(n, words);
for (String ngram : list) {
System.out.println(ngram);
}
System.out.println();
}
}
public static List<String> ngrams(int stepSize, String[] words) {
List<String> ngrams = new ArrayList<String>();
for (int i = 0; i < words.length-stepSize; i++) {
String initialWord = "";
int internalCount = i;
int internalStepSize = i + stepSize;
while (internalCount <= internalStepSize
&& internalCount < words.length) {
initialWord = initialWord+" " + words[internalCount];
++internalCount;
}
ngrams.add(initialWord);
}
return ngrams;
}
Check this out:
public static void main(String[] args) {
NGram nGram = new NGram();
String[] tokens = "this is my car".split(" ");
int i = tokens.length;
List<String> ngrams = new ArrayList<>();
while (i >= 1){
ngrams.addAll(nGram.getNGram(tokens, i, new ArrayList<>()));
i--;
}
System.out.println(ngrams);
}
private List<String> getNGram(String[] tokens, int n, List<String> ngrams) {
StringBuilder strbldr = new StringBuilder();
if (tokens.length < n) {
return ngrams;
}else {
for (int i=0; i<n; i++){
strbldr.append(tokens[i]).append(" ");
}
ngrams.add(strbldr.toString().trim());
String[] newTokens = Arrays.copyOfRange(tokens, 1, tokens.length);
return getNGram(newTokens, n, ngrams);
}
}
Simple recursive function, better running time.

Java array I need your thought

Given an array of strings, return another array containing all of its longest strings.
For (String [] x = {"serm", "aa", "sazi", "vcd", "aba","kart"};)
output will be
{"serm", "sazi" , "kart"}.
The following code is wrong, What can I do to fix it.
public class Tester {
public static void main(String[] args) {
Tester all = new Tester();
String [] x = {"serm", "aa", "sazi", "vcd", "aba","kart"};
String [] y = all.allLongestStrings(x);
System.out.println(y);
}
String[] allLongestStrings(String[] input) {
ArrayList<String> answer = new ArrayList<String>(
Arrays.asList(input[0]));
for (int i = 1; i < input.length; i++) {
if (input[i].length() == answer.get(0).length()) {
answer.add(input[i]);
}
if (input[i].length() > answer.get(0).length()) {
answer.add(input[i]);
}
}
return answer.toArray(new String[0]);
}
}
I will give you solution, but as it homework, it will be only sudo code
problem with your solution is, you are not finging longest strings, but strings same size or bigger than size of first element
let helper = []
let maxLength = 0;
for each string in array
if (len(string) >maxLength){
maxLength = len(string);
clear(helper)
}
if (len(string) == maxLength)
helper.add(string)
}
return helper;
You can try below code
private static String[] solution(String[] inputArray) {
int longestStrSize = 0;
List<String> longestStringList = new ArrayList<>();
for (int i = 0; i < inputArray.length; i++) {
if (inputArray[i] != null) {
if (longestStrSize <= inputArray[i].length()) {
longestStrSize = inputArray[i].length();
longestStringList.add(inputArray[i]);
}
}
}
final int i = longestStrSize;
return longestStringList.stream().filter(x -> x.length() >= i).collect(Collectors.toList()).stream()
.toArray(String[]::new);
}

How do you copy the components of an ArrayList to a regular Array?

public boolean makeReservation(int id, AvilaNatalyPassenger request) {
boolean status = true;
ArrayList<Integer> inputValues = new ArrayList<Integer>();
for (int i = 0; i < 22; i++) {
for (int j = 0; j < 5; j++) {
id = seats[i][j];
if (id != -1) {
if (inputValues.contains(id)) {
status = false;
break;
}
else {
inputValues.add(id);
for(int a = 0; a < inputValues.size; a++)
seats[a] = inputValues[a];
}
}
}
}
return status;
}
This is what I have but its not correct. I need to add what I have in inputVaule arrayList into the array seats.
You can also look at the Java API: http://docs.oracle.com/javase/7/docs/api/index.html?java/util/ArrayList.html
public Object[] toArray()
Returns an array containing all of the elements in this list in proper sequence (from first to last element).
So this is what you could do:
seats[a] = inputValues.toArray();
Furthermore you cannot use inputValues[a] since it is not an array. What you probably could do is
seats[a] = (inputValues.toArray())[a];
To answer your question, here is an example:
ArrayList<String> stock_list = new ArrayList<String>();
stock_list.add("stock1");
stock_list.add("stock2");
String[] stockArr = new String[stock_list.size()];
stockArr = stock_list.toArray(stockArr);
for(String s : stockArr)
System.out.println(s);
Example is taken from here

N-gram generation from a sentence

How to generate an n-gram of a string like:
String Input="This is my car."
I want to generate n-gram with this input:
Input Ngram size = 3
Output should be:
This
is
my
car
This is
is my
my car
This is my
is my car
Give some idea in Java, how to implement that or if any library is available for it.
I am trying to use this NGramTokenizer but its giving n-gram's of character sequence and I want n-grams of word sequence.
I believe this would do what you want:
import java.util.*;
public class Test {
public static List<String> ngrams(int n, String str) {
List<String> ngrams = new ArrayList<String>();
String[] words = str.split(" ");
for (int i = 0; i < words.length - n + 1; i++)
ngrams.add(concat(words, i, i+n));
return ngrams;
}
public static String concat(String[] words, int start, int end) {
StringBuilder sb = new StringBuilder();
for (int i = start; i < end; i++)
sb.append((i > start ? " " : "") + words[i]);
return sb.toString();
}
public static void main(String[] args) {
for (int n = 1; n <= 3; n++) {
for (String ngram : ngrams(n, "This is my car."))
System.out.println(ngram);
System.out.println();
}
}
}
Output:
This
is
my
car.
This is
is my
my car.
This is my
is my car.
An "on-demand" solution implemented as an Iterator:
class NgramIterator implements Iterator<String> {
String[] words;
int pos = 0, n;
public NgramIterator(int n, String str) {
this.n = n;
words = str.split(" ");
}
public boolean hasNext() {
return pos < words.length - n + 1;
}
public String next() {
StringBuilder sb = new StringBuilder();
for (int i = pos; i < pos + n; i++)
sb.append((i > pos ? " " : "") + words[i]);
pos++;
return sb.toString();
}
public void remove() {
throw new UnsupportedOperationException();
}
}
You are looking for ShingleFilter.
Update: The link points to version 3.0.2. This class may be in different package in newer version of Lucene.
This code returns an array of all Strings of the given length:
public static String[] ngrams(String s, int len) {
String[] parts = s.split(" ");
String[] result = new String[parts.length - len + 1];
for(int i = 0; i < parts.length - len + 1; i++) {
StringBuilder sb = new StringBuilder();
for(int k = 0; k < len; k++) {
if(k > 0) sb.append(' ');
sb.append(parts[i+k]);
}
result[i] = sb.toString();
}
return result;
}
E.g.
System.out.println(Arrays.toString(ngrams("This is my car", 2)));
//--> [This is, is my, my car]
System.out.println(Arrays.toString(ngrams("This is my car", 3)));
//--> [This is my, is my car]
/**
*
* #param sentence should has at least one string
* #param maxGramSize should be 1 at least
* #return set of continuous word n-grams up to maxGramSize from the sentence
*/
public static List<String> generateNgramsUpto(String str, int maxGramSize) {
List<String> sentence = Arrays.asList(str.split("[\\W+]"));
List<String> ngrams = new ArrayList<String>();
int ngramSize = 0;
StringBuilder sb = null;
//sentence becomes ngrams
for (ListIterator<String> it = sentence.listIterator(); it.hasNext();) {
String word = (String) it.next();
//1- add the word itself
sb = new StringBuilder(word);
ngrams.add(word);
ngramSize=1;
it.previous();
//2- insert prevs of the word and add those too
while(it.hasPrevious() && ngramSize<maxGramSize){
sb.insert(0,' ');
sb.insert(0,it.previous());
ngrams.add(sb.toString());
ngramSize++;
}
//go back to initial position
while(ngramSize>0){
ngramSize--;
it.next();
}
}
return ngrams;
}
Call:
long startTime = System.currentTimeMillis();
ngrams = ToolSet.generateNgramsUpto("This is my car.", 3);
long stopTime = System.currentTimeMillis();
System.out.println("My time = "+(stopTime-startTime)+" ms with ngramsize = "+ngrams.size());
System.out.println(ngrams.toString());
Output:
My time = 1 ms with ngramsize = 9 [This, is, This is, my, is my, This
is my, car, my car, is my car]
public static void CreateNgram(ArrayList<String> list, int cutoff) {
try
{
NGramModel ngramModel = new NGramModel();
POSModel model = new POSModelLoader().load(new File("en-pos-maxent.bin"));
PerformanceMonitor perfMon = new PerformanceMonitor(System.err, "sent");
POSTaggerME tagger = new POSTaggerME(model);
perfMon.start();
for(int i = 0; i<list.size(); i++)
{
String inputString = list.get(i);
ObjectStream<String> lineStream = new PlainTextByLineStream(new StringReader(inputString));
String line;
while ((line = lineStream.read()) != null)
{
String whitespaceTokenizerLine[] = WhitespaceTokenizer.INSTANCE.tokenize(line);
String[] tags = tagger.tag(whitespaceTokenizerLine);
POSSample sample = new POSSample(whitespaceTokenizerLine, tags);
perfMon.incrementCounter();
String words[] = sample.getSentence();
if(words.length > 0)
{
for(int k = 2; k< 4; k++)
{
ngramModel.add(new StringList(words), k, k);
}
}
}
}
ngramModel.cutoff(cutoff, Integer.MAX_VALUE);
Iterator<StringList> it = ngramModel.iterator();
while(it.hasNext())
{
StringList strList = it.next();
System.out.println(strList.toString());
}
perfMon.stopAndPrintFinalResult();
}catch(Exception e)
{
System.out.println(e.toString());
}
}
Here is my codes to create n-gram. In this case, n = 2, 3. n-gram of words sequence which smaller than cutoff value will ignore from result set. Input is list of sentences, then it parse using a tool of OpenNLP
public static void main(String[] args) {
String[] words = "This is my car.".split(" ");
for (int n = 0; n < 3; n++) {
List<String> list = ngrams(n, words);
for (String ngram : list) {
System.out.println(ngram);
}
System.out.println();
}
}
public static List<String> ngrams(int stepSize, String[] words) {
List<String> ngrams = new ArrayList<String>();
for (int i = 0; i < words.length-stepSize; i++) {
String initialWord = "";
int internalCount = i;
int internalStepSize = i + stepSize;
while (internalCount <= internalStepSize
&& internalCount < words.length) {
initialWord = initialWord+" " + words[internalCount];
++internalCount;
}
ngrams.add(initialWord);
}
return ngrams;
}
Check this out:
public static void main(String[] args) {
NGram nGram = new NGram();
String[] tokens = "this is my car".split(" ");
int i = tokens.length;
List<String> ngrams = new ArrayList<>();
while (i >= 1){
ngrams.addAll(nGram.getNGram(tokens, i, new ArrayList<>()));
i--;
}
System.out.println(ngrams);
}
private List<String> getNGram(String[] tokens, int n, List<String> ngrams) {
StringBuilder strbldr = new StringBuilder();
if (tokens.length < n) {
return ngrams;
}else {
for (int i=0; i<n; i++){
strbldr.append(tokens[i]).append(" ");
}
ngrams.add(strbldr.toString().trim());
String[] newTokens = Arrays.copyOfRange(tokens, 1, tokens.length);
return getNGram(newTokens, n, ngrams);
}
}
Simple recursive function, better running time.

Categories