N-gram generation from a sentence - java

How to generate an n-gram of a string like:
String Input="This is my car."
I want to generate n-gram with this input:
Input Ngram size = 3
Output should be:
This
is
my
car
This is
is my
my car
This is my
is my car
Give some idea in Java, how to implement that or if any library is available for it.
I am trying to use this NGramTokenizer but its giving n-gram's of character sequence and I want n-grams of word sequence.

I believe this would do what you want:
import java.util.*;
public class Test {
public static List<String> ngrams(int n, String str) {
List<String> ngrams = new ArrayList<String>();
String[] words = str.split(" ");
for (int i = 0; i < words.length - n + 1; i++)
ngrams.add(concat(words, i, i+n));
return ngrams;
}
public static String concat(String[] words, int start, int end) {
StringBuilder sb = new StringBuilder();
for (int i = start; i < end; i++)
sb.append((i > start ? " " : "") + words[i]);
return sb.toString();
}
public static void main(String[] args) {
for (int n = 1; n <= 3; n++) {
for (String ngram : ngrams(n, "This is my car."))
System.out.println(ngram);
System.out.println();
}
}
}
Output:
This
is
my
car.
This is
is my
my car.
This is my
is my car.
An "on-demand" solution implemented as an Iterator:
class NgramIterator implements Iterator<String> {
String[] words;
int pos = 0, n;
public NgramIterator(int n, String str) {
this.n = n;
words = str.split(" ");
}
public boolean hasNext() {
return pos < words.length - n + 1;
}
public String next() {
StringBuilder sb = new StringBuilder();
for (int i = pos; i < pos + n; i++)
sb.append((i > pos ? " " : "") + words[i]);
pos++;
return sb.toString();
}
public void remove() {
throw new UnsupportedOperationException();
}
}

You are looking for ShingleFilter.
Update: The link points to version 3.0.2. This class may be in different package in newer version of Lucene.

This code returns an array of all Strings of the given length:
public static String[] ngrams(String s, int len) {
String[] parts = s.split(" ");
String[] result = new String[parts.length - len + 1];
for(int i = 0; i < parts.length - len + 1; i++) {
StringBuilder sb = new StringBuilder();
for(int k = 0; k < len; k++) {
if(k > 0) sb.append(' ');
sb.append(parts[i+k]);
}
result[i] = sb.toString();
}
return result;
}
E.g.
System.out.println(Arrays.toString(ngrams("This is my car", 2)));
//--> [This is, is my, my car]
System.out.println(Arrays.toString(ngrams("This is my car", 3)));
//--> [This is my, is my car]

/**
*
* #param sentence should has at least one string
* #param maxGramSize should be 1 at least
* #return set of continuous word n-grams up to maxGramSize from the sentence
*/
public static List<String> generateNgramsUpto(String str, int maxGramSize) {
List<String> sentence = Arrays.asList(str.split("[\\W+]"));
List<String> ngrams = new ArrayList<String>();
int ngramSize = 0;
StringBuilder sb = null;
//sentence becomes ngrams
for (ListIterator<String> it = sentence.listIterator(); it.hasNext();) {
String word = (String) it.next();
//1- add the word itself
sb = new StringBuilder(word);
ngrams.add(word);
ngramSize=1;
it.previous();
//2- insert prevs of the word and add those too
while(it.hasPrevious() && ngramSize<maxGramSize){
sb.insert(0,' ');
sb.insert(0,it.previous());
ngrams.add(sb.toString());
ngramSize++;
}
//go back to initial position
while(ngramSize>0){
ngramSize--;
it.next();
}
}
return ngrams;
}
Call:
long startTime = System.currentTimeMillis();
ngrams = ToolSet.generateNgramsUpto("This is my car.", 3);
long stopTime = System.currentTimeMillis();
System.out.println("My time = "+(stopTime-startTime)+" ms with ngramsize = "+ngrams.size());
System.out.println(ngrams.toString());
Output:
My time = 1 ms with ngramsize = 9 [This, is, This is, my, is my, This
is my, car, my car, is my car]

public static void CreateNgram(ArrayList<String> list, int cutoff) {
try
{
NGramModel ngramModel = new NGramModel();
POSModel model = new POSModelLoader().load(new File("en-pos-maxent.bin"));
PerformanceMonitor perfMon = new PerformanceMonitor(System.err, "sent");
POSTaggerME tagger = new POSTaggerME(model);
perfMon.start();
for(int i = 0; i<list.size(); i++)
{
String inputString = list.get(i);
ObjectStream<String> lineStream = new PlainTextByLineStream(new StringReader(inputString));
String line;
while ((line = lineStream.read()) != null)
{
String whitespaceTokenizerLine[] = WhitespaceTokenizer.INSTANCE.tokenize(line);
String[] tags = tagger.tag(whitespaceTokenizerLine);
POSSample sample = new POSSample(whitespaceTokenizerLine, tags);
perfMon.incrementCounter();
String words[] = sample.getSentence();
if(words.length > 0)
{
for(int k = 2; k< 4; k++)
{
ngramModel.add(new StringList(words), k, k);
}
}
}
}
ngramModel.cutoff(cutoff, Integer.MAX_VALUE);
Iterator<StringList> it = ngramModel.iterator();
while(it.hasNext())
{
StringList strList = it.next();
System.out.println(strList.toString());
}
perfMon.stopAndPrintFinalResult();
}catch(Exception e)
{
System.out.println(e.toString());
}
}
Here is my codes to create n-gram. In this case, n = 2, 3. n-gram of words sequence which smaller than cutoff value will ignore from result set. Input is list of sentences, then it parse using a tool of OpenNLP

public static void main(String[] args) {
String[] words = "This is my car.".split(" ");
for (int n = 0; n < 3; n++) {
List<String> list = ngrams(n, words);
for (String ngram : list) {
System.out.println(ngram);
}
System.out.println();
}
}
public static List<String> ngrams(int stepSize, String[] words) {
List<String> ngrams = new ArrayList<String>();
for (int i = 0; i < words.length-stepSize; i++) {
String initialWord = "";
int internalCount = i;
int internalStepSize = i + stepSize;
while (internalCount <= internalStepSize
&& internalCount < words.length) {
initialWord = initialWord+" " + words[internalCount];
++internalCount;
}
ngrams.add(initialWord);
}
return ngrams;
}

Check this out:
public static void main(String[] args) {
NGram nGram = new NGram();
String[] tokens = "this is my car".split(" ");
int i = tokens.length;
List<String> ngrams = new ArrayList<>();
while (i >= 1){
ngrams.addAll(nGram.getNGram(tokens, i, new ArrayList<>()));
i--;
}
System.out.println(ngrams);
}
private List<String> getNGram(String[] tokens, int n, List<String> ngrams) {
StringBuilder strbldr = new StringBuilder();
if (tokens.length < n) {
return ngrams;
}else {
for (int i=0; i<n; i++){
strbldr.append(tokens[i]).append(" ");
}
ngrams.add(strbldr.toString().trim());
String[] newTokens = Arrays.copyOfRange(tokens, 1, tokens.length);
return getNGram(newTokens, n, ngrams);
}
}
Simple recursive function, better running time.

Related

Efficient/Fast way to get permutation of a String in java [duplicate]

What is an elegant way to find all the permutations of a string. E.g. permutation for ba, would be ba and ab, but what about longer string such as abcdefgh? Is there any Java implementation example?
public static void permutation(String str) {
permutation("", str);
}
private static void permutation(String prefix, String str) {
int n = str.length();
if (n == 0) System.out.println(prefix);
else {
for (int i = 0; i < n; i++)
permutation(prefix + str.charAt(i), str.substring(0, i) + str.substring(i+1, n));
}
}
(via Introduction to Programming in Java)
Use recursion.
Try each of the letters in turn as the first letter and then find all the permutations of the remaining letters using a recursive call.
The base case is when the input is an empty string the only permutation is the empty string.
Here is my solution that is based on the idea of the book "Cracking the Coding Interview" (P54):
/**
* List permutations of a string.
*
* #param s the input string
* #return the list of permutations
*/
public static ArrayList<String> permutation(String s) {
// The result
ArrayList<String> res = new ArrayList<String>();
// If input string's length is 1, return {s}
if (s.length() == 1) {
res.add(s);
} else if (s.length() > 1) {
int lastIndex = s.length() - 1;
// Find out the last character
String last = s.substring(lastIndex);
// Rest of the string
String rest = s.substring(0, lastIndex);
// Perform permutation on the rest string and
// merge with the last character
res = merge(permutation(rest), last);
}
return res;
}
/**
* #param list a result of permutation, e.g. {"ab", "ba"}
* #param c the last character
* #return a merged new list, e.g. {"cab", "acb" ... }
*/
public static ArrayList<String> merge(ArrayList<String> list, String c) {
ArrayList<String> res = new ArrayList<>();
// Loop through all the string in the list
for (String s : list) {
// For each string, insert the last character to all possible positions
// and add them to the new list
for (int i = 0; i <= s.length(); ++i) {
String ps = new StringBuffer(s).insert(i, c).toString();
res.add(ps);
}
}
return res;
}
Running output of string "abcd":
Step 1: Merge [a] and b:
[ba, ab]
Step 2: Merge [ba, ab] and c:
[cba, bca, bac, cab, acb, abc]
Step 3: Merge [cba, bca, bac, cab, acb, abc] and d:
[dcba, cdba, cbda, cbad, dbca, bdca, bcda, bcad, dbac, bdac, badc, bacd, dcab, cdab, cadb, cabd, dacb, adcb, acdb, acbd, dabc, adbc, abdc, abcd]
Of all the solutions given here and in other forums, I liked Mark Byers the most. That description actually made me think and code it myself.
Too bad I cannot voteup his solution as I am newbie.
Anyways here is my implementation of his description
public class PermTest {
public static void main(String[] args) throws Exception {
String str = "abcdef";
StringBuffer strBuf = new StringBuffer(str);
doPerm(strBuf,0);
}
private static void doPerm(StringBuffer str, int index){
if(index == str.length())
System.out.println(str);
else { //recursively solve this by placing all other chars at current first pos
doPerm(str, index+1);
for (int i = index+1; i < str.length(); i++) {//start swapping all other chars with current first char
swap(str,index, i);
doPerm(str, index+1);
swap(str,i, index);//restore back my string buffer
}
}
}
private static void swap(StringBuffer str, int pos1, int pos2){
char t1 = str.charAt(pos1);
str.setCharAt(pos1, str.charAt(pos2));
str.setCharAt(pos2, t1);
}
}
I prefer this solution ahead of the first one in this thread because this solution uses StringBuffer. I wouldn't say my solution doesn't create any temporary string (it actually does in system.out.println where the toString() of StringBuffer is called). But I just feel this is better than the first solution where too many string literals are created. May be some performance guy out there can evalute this in terms of 'memory' (for 'time' it already lags due to that extra 'swap')
A very basic solution in Java is to use recursion + Set ( to avoid repetitions ) if you want to store and return the solution strings :
public static Set<String> generatePerm(String input)
{
Set<String> set = new HashSet<String>();
if (input == "")
return set;
Character a = input.charAt(0);
if (input.length() > 1)
{
input = input.substring(1);
Set<String> permSet = generatePerm(input);
for (String x : permSet)
{
for (int i = 0; i <= x.length(); i++)
{
set.add(x.substring(0, i) + a + x.substring(i));
}
}
}
else
{
set.add(a + "");
}
return set;
}
All the previous contributors have done a great job explaining and providing the code. I thought I should share this approach too because it might help someone too. The solution is based on (heaps' algorithm )
Couple of things:
Notice the last item which is depicted in the excel is just for helping you better visualize the logic. So, the actual values in the last column would be 2,1,0 (if we were to run the code because we are dealing with arrays and arrays start with 0).
The swapping algorithm happens based on even or odd values of current position. It's very self explanatory if you look at where the swap method is getting called.You can see what's going on.
Here is what happens:
public static void main(String[] args) {
String ourword = "abc";
String[] ourArray = ourword.split("");
permute(ourArray, ourArray.length);
}
private static void swap(String[] ourarray, int right, int left) {
String temp = ourarray[right];
ourarray[right] = ourarray[left];
ourarray[left] = temp;
}
public static void permute(String[] ourArray, int currentPosition) {
if (currentPosition == 1) {
System.out.println(Arrays.toString(ourArray));
} else {
for (int i = 0; i < currentPosition; i++) {
// subtract one from the last position (here is where you are
// selecting the the next last item
permute(ourArray, currentPosition - 1);
// if it's odd position
if (currentPosition % 2 == 1) {
swap(ourArray, 0, currentPosition - 1);
} else {
swap(ourArray, i, currentPosition - 1);
}
}
}
}
Let's use input abc as an example.
Start off with just the last element (c) in a set (["c"]), then add the second last element (b) to its front, end and every possible positions in the middle, making it ["bc", "cb"] and then in the same manner it will add the next element from the back (a) to each string in the set making it:
"a" + "bc" = ["abc", "bac", "bca"] and "a" + "cb" = ["acb" ,"cab", "cba"]
Thus entire permutation:
["abc", "bac", "bca","acb" ,"cab", "cba"]
Code:
public class Test
{
static Set<String> permutations;
static Set<String> result = new HashSet<String>();
public static Set<String> permutation(String string) {
permutations = new HashSet<String>();
int n = string.length();
for (int i = n - 1; i >= 0; i--)
{
shuffle(string.charAt(i));
}
return permutations;
}
private static void shuffle(char c) {
if (permutations.size() == 0) {
permutations.add(String.valueOf(c));
} else {
Iterator<String> it = permutations.iterator();
for (int i = 0; i < permutations.size(); i++) {
String temp1;
for (; it.hasNext();) {
temp1 = it.next();
for (int k = 0; k < temp1.length() + 1; k += 1) {
StringBuilder sb = new StringBuilder(temp1);
sb.insert(k, c);
result.add(sb.toString());
}
}
}
permutations = result;
//'result' has to be refreshed so that in next run it doesn't contain stale values.
result = new HashSet<String>();
}
}
public static void main(String[] args) {
Set<String> result = permutation("abc");
System.out.println("\nThere are total of " + result.size() + " permutations:");
Iterator<String> it = result.iterator();
while (it.hasNext()) {
System.out.println(it.next());
}
}
}
This one is without recursion
public static void permute(String s) {
if(null==s || s.isEmpty()) {
return;
}
// List containing words formed in each iteration
List<String> strings = new LinkedList<String>();
strings.add(String.valueOf(s.charAt(0))); // add the first element to the list
// Temp list that holds the set of strings for
// appending the current character to all position in each word in the original list
List<String> tempList = new LinkedList<String>();
for(int i=1; i< s.length(); i++) {
for(int j=0; j<strings.size(); j++) {
tempList.addAll(merge(s.charAt(i), strings.get(j)));
}
strings.removeAll(strings);
strings.addAll(tempList);
tempList.removeAll(tempList);
}
for(int i=0; i<strings.size(); i++) {
System.out.println(strings.get(i));
}
}
/**
* helper method that appends the given character at each position in the given string
* and returns a set of such modified strings
* - set removes duplicates if any(in case a character is repeated)
*/
private static Set<String> merge(Character c, String s) {
if(s==null || s.isEmpty()) {
return null;
}
int len = s.length();
StringBuilder sb = new StringBuilder();
Set<String> list = new HashSet<String>();
for(int i=0; i<= len; i++) {
sb = new StringBuilder();
sb.append(s.substring(0, i) + c + s.substring(i, len));
list.add(sb.toString());
}
return list;
}
Well here is an elegant, non-recursive, O(n!) solution:
public static StringBuilder[] permutations(String s) {
if (s.length() == 0)
return null;
int length = fact(s.length());
StringBuilder[] sb = new StringBuilder[length];
for (int i = 0; i < length; i++) {
sb[i] = new StringBuilder();
}
for (int i = 0; i < s.length(); i++) {
char ch = s.charAt(i);
int times = length / (i + 1);
for (int j = 0; j < times; j++) {
for (int k = 0; k < length / times; k++) {
sb[j * length / times + k].insert(k, ch);
}
}
}
return sb;
}
One of the simple solution could be just keep swapping the characters recursively using two pointers.
public static void main(String[] args)
{
String str="abcdefgh";
perm(str);
}
public static void perm(String str)
{ char[] char_arr=str.toCharArray();
helper(char_arr,0);
}
public static void helper(char[] char_arr, int i)
{
if(i==char_arr.length-1)
{
// print the shuffled string
String str="";
for(int j=0; j<char_arr.length; j++)
{
str=str+char_arr[j];
}
System.out.println(str);
}
else
{
for(int j=i; j<char_arr.length; j++)
{
char tmp = char_arr[i];
char_arr[i] = char_arr[j];
char_arr[j] = tmp;
helper(char_arr,i+1);
char tmp1 = char_arr[i];
char_arr[i] = char_arr[j];
char_arr[j] = tmp1;
}
}
}
python implementation
def getPermutation(s, prefix=''):
if len(s) == 0:
print prefix
for i in range(len(s)):
getPermutation(s[0:i]+s[i+1:len(s)],prefix+s[i] )
getPermutation('abcd','')
This is what I did through basic understanding of Permutations and Recursive function calling. Takes a bit of time but it's done independently.
public class LexicographicPermutations {
public static void main(String[] args) {
// TODO Auto-generated method stub
String s="abc";
List<String>combinations=new ArrayList<String>();
combinations=permutations(s);
Collections.sort(combinations);
System.out.println(combinations);
}
private static List<String> permutations(String s) {
// TODO Auto-generated method stub
List<String>combinations=new ArrayList<String>();
if(s.length()==1){
combinations.add(s);
}
else{
for(int i=0;i<s.length();i++){
List<String>temp=permutations(s.substring(0, i)+s.substring(i+1));
for (String string : temp) {
combinations.add(s.charAt(i)+string);
}
}
}
return combinations;
}}
which generates Output as [abc, acb, bac, bca, cab, cba].
Basic logic behind it is
For each character, consider it as 1st character & find the combinations of remaining characters. e.g. [abc](Combination of abc)->.
a->[bc](a x Combination of (bc))->{abc,acb}
b->[ac](b x Combination of (ac))->{bac,bca}
c->[ab](c x Combination of (ab))->{cab,cba}
And then recursively calling each [bc],[ac] & [ab] independently.
Use recursion.
when the input is an empty string the only permutation is an empty string.Try for each of the letters in the string by making it as the first letter and then find all the permutations of the remaining letters using a recursive call.
import java.util.ArrayList;
import java.util.List;
class Permutation {
private static List<String> permutation(String prefix, String str) {
List<String> permutations = new ArrayList<>();
int n = str.length();
if (n == 0) {
permutations.add(prefix);
} else {
for (int i = 0; i < n; i++) {
permutations.addAll(permutation(prefix + str.charAt(i), str.substring(i + 1, n) + str.substring(0, i)));
}
}
return permutations;
}
public static void main(String[] args) {
List<String> perms = permutation("", "abcd");
String[] array = new String[perms.size()];
for (int i = 0; i < perms.size(); i++) {
array[i] = perms.get(i);
}
int x = array.length;
for (final String anArray : array) {
System.out.println(anArray);
}
}
}
this worked for me..
import java.util.Arrays;
public class StringPermutations{
public static void main(String args[]) {
String inputString = "ABC";
permute(inputString.toCharArray(), 0, inputString.length()-1);
}
public static void permute(char[] ary, int startIndex, int endIndex) {
if(startIndex == endIndex){
System.out.println(String.valueOf(ary));
}else{
for(int i=startIndex;i<=endIndex;i++) {
swap(ary, startIndex, i );
permute(ary, startIndex+1, endIndex);
swap(ary, startIndex, i );
}
}
}
public static void swap(char[] ary, int x, int y) {
char temp = ary[x];
ary[x] = ary[y];
ary[y] = temp;
}
}
Java implementation without recursion
public Set<String> permutate(String s){
Queue<String> permutations = new LinkedList<String>();
Set<String> v = new HashSet<String>();
permutations.add(s);
while(permutations.size()!=0){
String str = permutations.poll();
if(!v.contains(str)){
v.add(str);
for(int i = 0;i<str.length();i++){
String c = String.valueOf(str.charAt(i));
permutations.add(str.substring(i+1) + c + str.substring(0,i));
}
}
}
return v;
}
Let me try to tackle this problem with Kotlin:
fun <T> List<T>.permutations(): List<List<T>> {
//escape case
if (this.isEmpty()) return emptyList()
if (this.size == 1) return listOf(this)
if (this.size == 2) return listOf(listOf(this.first(), this.last()), listOf(this.last(), this.first()))
//recursive case
return this.flatMap { lastItem ->
this.minus(lastItem).permutations().map { it.plus(lastItem) }
}
}
Core concept: Break down long list into smaller list + recursion
Long answer with example list [1, 2, 3, 4]:
Even for a list of 4 it already kinda get's confusing trying to list all the possible permutations in your head, and what we need to do is exactly to avoid that. It is easy for us to understand how to make all permutations of list of size 0, 1, and 2, so all we need to do is break them down to any of those sizes and combine them back up correctly. Imagine a jackpot machine: this algorithm will start spinning from the right to the left, and write down
return empty/list of 1 when list size is 0 or 1
handle when list size is 2 (e.g. [3, 4]), and generate the 2 permutations ([3, 4] & [4, 3])
For each item, mark that as the last in the last, and find all the permutations for the rest of the item in the list. (e.g. put [4] on the table, and throw [1, 2, 3] into permutation again)
Now with all permutation it's children, put itself back to the end of the list (e.g.: [1, 2, 3][,4], [1, 3, 2][,4], [2, 3, 1][, 4], ...)
import java.io.IOException;
import java.util.ArrayList;
import java.util.Scanner;
public class hello {
public static void main(String[] args) throws IOException {
hello h = new hello();
h.printcomp();
}
int fact=1;
public void factrec(int a,int k){
if(a>=k)
{fact=fact*k;
k++;
factrec(a,k);
}
else
{System.out.println("The string will have "+fact+" permutations");
}
}
public void printcomp(){
String str;
int k;
Scanner in = new Scanner(System.in);
System.out.println("enter the string whose permutations has to b found");
str=in.next();
k=str.length();
factrec(k,1);
String[] arr =new String[fact];
char[] array = str.toCharArray();
while(p<fact)
printcomprec(k,array,arr);
// if incase u need array containing all the permutation use this
//for(int d=0;d<fact;d++)
//System.out.println(arr[d]);
}
int y=1;
int p = 0;
int g=1;
int z = 0;
public void printcomprec(int k,char array[],String arr[]){
for (int l = 0; l < k; l++) {
for (int b=0;b<k-1;b++){
for (int i=1; i<k-g; i++) {
char temp;
String stri = "";
temp = array[i];
array[i] = array[i + g];
array[i + g] = temp;
for (int j = 0; j < k; j++)
stri += array[j];
arr[z] = stri;
System.out.println(arr[z] + " " + p++);
z++;
}
}
char temp;
temp=array[0];
array[0]=array[y];
array[y]=temp;
if (y >= k-1)
y=y-(k-1);
else
y++;
}
if (g >= k-1)
g=1;
else
g++;
}
}
/** Returns an array list containing all
* permutations of the characters in s. */
public static ArrayList<String> permute(String s) {
ArrayList<String> perms = new ArrayList<>();
int slen = s.length();
if (slen > 0) {
// Add the first character from s to the perms array list.
perms.add(Character.toString(s.charAt(0)));
// Repeat for all additional characters in s.
for (int i = 1; i < slen; ++i) {
// Get the next character from s.
char c = s.charAt(i);
// For each of the strings currently in perms do the following:
int size = perms.size();
for (int j = 0; j < size; ++j) {
// 1. remove the string
String p = perms.remove(0);
int plen = p.length();
// 2. Add plen + 1 new strings to perms. Each new string
// consists of the removed string with the character c
// inserted into it at a unique location.
for (int k = 0; k <= plen; ++k) {
perms.add(p.substring(0, k) + c + p.substring(k));
}
}
}
}
return perms;
}
Here is a straightforward minimalist recursive solution in Java:
public static ArrayList<String> permutations(String s) {
ArrayList<String> out = new ArrayList<String>();
if (s.length() == 1) {
out.add(s);
return out;
}
char first = s.charAt(0);
String rest = s.substring(1);
for (String permutation : permutations(rest)) {
out.addAll(insertAtAllPositions(first, permutation));
}
return out;
}
public static ArrayList<String> insertAtAllPositions(char ch, String s) {
ArrayList<String> out = new ArrayList<String>();
for (int i = 0; i <= s.length(); ++i) {
String inserted = s.substring(0, i) + ch + s.substring(i);
out.add(inserted);
}
return out;
}
We can use factorial to find how many strings started with particular letter.
Example: take the input abcd. (3!) == 6 strings will start with every letter of abcd.
static public int facts(int x){
int sum = 1;
for (int i = 1; i < x; i++) {
sum *= (i+1);
}
return sum;
}
public static void permutation(String str) {
char[] str2 = str.toCharArray();
int n = str2.length;
int permutation = 0;
if (n == 1) {
System.out.println(str2[0]);
} else if (n == 2) {
System.out.println(str2[0] + "" + str2[1]);
System.out.println(str2[1] + "" + str2[0]);
} else {
for (int i = 0; i < n; i++) {
if (true) {
char[] str3 = str.toCharArray();
char temp = str3[i];
str3[i] = str3[0];
str3[0] = temp;
str2 = str3;
}
for (int j = 1, count = 0; count < facts(n-1); j++, count++) {
if (j != n-1) {
char temp1 = str2[j+1];
str2[j+1] = str2[j];
str2[j] = temp1;
} else {
char temp1 = str2[n-1];
str2[n-1] = str2[1];
str2[1] = temp1;
j = 1;
} // end of else block
permutation++;
System.out.print("permutation " + permutation + " is -> ");
for (int k = 0; k < n; k++) {
System.out.print(str2[k]);
} // end of loop k
System.out.println();
} // end of loop j
} // end of loop i
}
}
//insert each character into an arraylist
static ArrayList al = new ArrayList();
private static void findPermutation (String str){
for (int k = 0; k < str.length(); k++) {
addOneChar(str.charAt(k));
}
}
//insert one char into ArrayList
private static void addOneChar(char ch){
String lastPerStr;
String tempStr;
ArrayList locAl = new ArrayList();
for (int i = 0; i < al.size(); i ++ ){
lastPerStr = al.get(i).toString();
//System.out.println("lastPerStr: " + lastPerStr);
for (int j = 0; j <= lastPerStr.length(); j++) {
tempStr = lastPerStr.substring(0,j) + ch +
lastPerStr.substring(j, lastPerStr.length());
locAl.add(tempStr);
//System.out.println("tempStr: " + tempStr);
}
}
if(al.isEmpty()){
al.add(ch);
} else {
al.clear();
al = locAl;
}
}
private static void printArrayList(ArrayList al){
for (int i = 0; i < al.size(); i++) {
System.out.print(al.get(i) + " ");
}
}
//Rotate and create words beginning with all letter possible and push to stack 1
//Read from stack1 and for each word create words with other letters at the next location by rotation and so on
/* eg : man
1. push1 - man, anm, nma
2. pop1 - nma , push2 - nam,nma
pop1 - anm , push2 - amn,anm
pop1 - man , push2 - mna,man
*/
public class StringPermute {
static String str;
static String word;
static int top1 = -1;
static int top2 = -1;
static String[] stringArray1;
static String[] stringArray2;
static int strlength = 0;
public static void main(String[] args) throws IOException {
System.out.println("Enter String : ");
InputStreamReader isr = new InputStreamReader(System.in);
BufferedReader bfr = new BufferedReader(isr);
str = bfr.readLine();
word = str;
strlength = str.length();
int n = 1;
for (int i = 1; i <= strlength; i++) {
n = n * i;
}
stringArray1 = new String[n];
stringArray2 = new String[n];
push(word, 1);
doPermute();
display();
}
public static void push(String word, int x) {
if (x == 1)
stringArray1[++top1] = word;
else
stringArray2[++top2] = word;
}
public static String pop(int x) {
if (x == 1)
return stringArray1[top1--];
else
return stringArray2[top2--];
}
public static void doPermute() {
for (int j = strlength; j >= 2; j--)
popper(j);
}
public static void popper(int length) {
// pop from stack1 , rotate each word n times and push to stack 2
if (top1 > -1) {
while (top1 > -1) {
word = pop(1);
for (int j = 0; j < length; j++) {
rotate(length);
push(word, 2);
}
}
}
// pop from stack2 , rotate each word n times w.r.t position and push to
// stack 1
else {
while (top2 > -1) {
word = pop(2);
for (int j = 0; j < length; j++) {
rotate(length);
push(word, 1);
}
}
}
}
public static void rotate(int position) {
char[] charstring = new char[100];
for (int j = 0; j < word.length(); j++)
charstring[j] = word.charAt(j);
int startpos = strlength - position;
char temp = charstring[startpos];
for (int i = startpos; i < strlength - 1; i++) {
charstring[i] = charstring[i + 1];
}
charstring[strlength - 1] = temp;
word = new String(charstring).trim();
}
public static void display() {
int top;
if (top1 > -1) {
while (top1 > -1)
System.out.println(stringArray1[top1--]);
} else {
while (top2 > -1)
System.out.println(stringArray2[top2--]);
}
}
}
Another simple way is to loop through the string, pick the character that is not used yet and put it to a buffer, continue the loop till the buffer size equals to the string length. I like this back tracking solution better because:
Easy to understand
Easy to avoid duplication
The output is sorted
Here is the java code:
List<String> permute(String str) {
if (str == null) {
return null;
}
char[] chars = str.toCharArray();
boolean[] used = new boolean[chars.length];
List<String> res = new ArrayList<String>();
StringBuilder sb = new StringBuilder();
Arrays.sort(chars);
helper(chars, used, sb, res);
return res;
}
void helper(char[] chars, boolean[] used, StringBuilder sb, List<String> res) {
if (sb.length() == chars.length) {
res.add(sb.toString());
return;
}
for (int i = 0; i < chars.length; i++) {
// avoid duplicates
if (i > 0 && chars[i] == chars[i - 1] && !used[i - 1]) {
continue;
}
// pick the character that has not used yet
if (!used[i]) {
used[i] = true;
sb.append(chars[i]);
helper(chars, used, sb, res);
// back tracking
sb.deleteCharAt(sb.length() - 1);
used[i] = false;
}
}
}
Input str: 1231
Output list: {1123, 1132, 1213, 1231, 1312, 1321, 2113, 2131, 2311, 3112, 3121, 3211}
Noticed that the output is sorted, and there is no duplicate result.
Recursion is not necessary, even you can calculate any permutation directly, this solution uses generics to permute any array.
Here is a good information about this algorihtm.
For C# developers here is more useful implementation.
public static void main(String[] args) {
String word = "12345";
Character[] array = ArrayUtils.toObject(word.toCharArray());
long[] factorials = Permutation.getFactorials(array.length + 1);
for (long i = 0; i < factorials[array.length]; i++) {
Character[] permutation = Permutation.<Character>getPermutation(i, array, factorials);
printPermutation(permutation);
}
}
private static void printPermutation(Character[] permutation) {
for (int i = 0; i < permutation.length; i++) {
System.out.print(permutation[i]);
}
System.out.println();
}
This algorithm has O(N) time and space complexity to calculate each permutation.
public class Permutation {
public static <T> T[] getPermutation(long permutationNumber, T[] array, long[] factorials) {
int[] sequence = generateSequence(permutationNumber, array.length - 1, factorials);
T[] permutation = generatePermutation(array, sequence);
return permutation;
}
public static <T> T[] generatePermutation(T[] array, int[] sequence) {
T[] clone = array.clone();
for (int i = 0; i < clone.length - 1; i++) {
swap(clone, i, i + sequence[i]);
}
return clone;
}
private static int[] generateSequence(long permutationNumber, int size, long[] factorials) {
int[] sequence = new int[size];
for (int j = 0; j < sequence.length; j++) {
long factorial = factorials[sequence.length - j];
sequence[j] = (int) (permutationNumber / factorial);
permutationNumber = (int) (permutationNumber % factorial);
}
return sequence;
}
private static <T> void swap(T[] array, int i, int j) {
T t = array[i];
array[i] = array[j];
array[j] = t;
}
public static long[] getFactorials(int length) {
long[] factorials = new long[length];
long factor = 1;
for (int i = 0; i < length; i++) {
factor *= i <= 1 ? 1 : i;
factorials[i] = factor;
}
return factorials;
}
}
My implementation based on Mark Byers's description above:
static Set<String> permutations(String str){
if (str.isEmpty()){
return Collections.singleton(str);
}else{
Set <String> set = new HashSet<>();
for (int i=0; i<str.length(); i++)
for (String s : permutations(str.substring(0, i) + str.substring(i+1)))
set.add(str.charAt(i) + s);
return set;
}
}
Permutation of String:
public static void main(String args[]) {
permu(0,"ABCD");
}
static void permu(int fixed,String s) {
char[] chr=s.toCharArray();
if(fixed==s.length())
System.out.println(s);
for(int i=fixed;i<s.length();i++) {
char c=chr[i];
chr[i]=chr[fixed];
chr[fixed]=c;
permu(fixed+1,new String(chr));
}
}
Here is another simpler method of doing Permutation of a string.
public class Solution4 {
public static void main(String[] args) {
String a = "Protijayi";
per(a, 0);
}
static void per(String a , int start ) {
//bse case;
if(a.length() == start) {System.out.println(a);}
char[] ca = a.toCharArray();
//swap
for (int i = start; i < ca.length; i++) {
char t = ca[i];
ca[i] = ca[start];
ca[start] = t;
per(new String(ca),start+1);
}
}//per
}
A java implementation to print all the permutations of a given string considering duplicate characters and prints only unique characters is as follow:
import java.util.Set;
import java.util.HashSet;
public class PrintAllPermutations2
{
public static void main(String[] args)
{
String str = "AAC";
PrintAllPermutations2 permutation = new PrintAllPermutations2();
Set<String> uniqueStrings = new HashSet<>();
permutation.permute("", str, uniqueStrings);
}
void permute(String prefixString, String s, Set<String> set)
{
int n = s.length();
if(n == 0)
{
if(!set.contains(prefixString))
{
System.out.println(prefixString);
set.add(prefixString);
}
}
else
{
for(int i=0; i<n; i++)
{
permute(prefixString + s.charAt(i), s.substring(0,i) + s.substring(i+1,n), set);
}
}
}
}
String permutaions using Es6
Using reduce() method
const permutations = str => {
if (str.length <= 2)
return str.length === 2 ? [str, str[1] + str[0]] : [str];
return str
.split('')
.reduce(
(acc, letter, index) =>
acc.concat(permutations(str.slice(0, index) + str.slice(index + 1)).map(val => letter + val)),
[]
);
};
console.log(permutations('STR'));
In case anyone wants to generate the permutations to do something with them, instead of just printing them via a void method:
static List<int[]> permutations(int n) {
class Perm {
private final List<int[]> permutations = new ArrayList<>();
private void perm(int[] array, int step) {
if (step == 1) permutations.add(array.clone());
else for (int i = 0; i < step; i++) {
perm(array, step - 1);
int j = (step % 2 == 0) ? i : 0;
swap(array, step - 1, j);
}
}
private void swap(int[] array, int i, int j) {
int buffer = array[i];
array[i] = array[j];
array[j] = buffer;
}
}
int[] nVector = new int[n];
for (int i = 0; i < n; i++) nVector [i] = i;
Perm perm = new Perm();
perm.perm(nVector, n);
return perm.permutations;
}

How can i extract trend words from given dataset (Java)? [duplicate]

How to generate an n-gram of a string like:
String Input="This is my car."
I want to generate n-gram with this input:
Input Ngram size = 3
Output should be:
This
is
my
car
This is
is my
my car
This is my
is my car
Give some idea in Java, how to implement that or if any library is available for it.
I am trying to use this NGramTokenizer but its giving n-gram's of character sequence and I want n-grams of word sequence.
I believe this would do what you want:
import java.util.*;
public class Test {
public static List<String> ngrams(int n, String str) {
List<String> ngrams = new ArrayList<String>();
String[] words = str.split(" ");
for (int i = 0; i < words.length - n + 1; i++)
ngrams.add(concat(words, i, i+n));
return ngrams;
}
public static String concat(String[] words, int start, int end) {
StringBuilder sb = new StringBuilder();
for (int i = start; i < end; i++)
sb.append((i > start ? " " : "") + words[i]);
return sb.toString();
}
public static void main(String[] args) {
for (int n = 1; n <= 3; n++) {
for (String ngram : ngrams(n, "This is my car."))
System.out.println(ngram);
System.out.println();
}
}
}
Output:
This
is
my
car.
This is
is my
my car.
This is my
is my car.
An "on-demand" solution implemented as an Iterator:
class NgramIterator implements Iterator<String> {
String[] words;
int pos = 0, n;
public NgramIterator(int n, String str) {
this.n = n;
words = str.split(" ");
}
public boolean hasNext() {
return pos < words.length - n + 1;
}
public String next() {
StringBuilder sb = new StringBuilder();
for (int i = pos; i < pos + n; i++)
sb.append((i > pos ? " " : "") + words[i]);
pos++;
return sb.toString();
}
public void remove() {
throw new UnsupportedOperationException();
}
}
You are looking for ShingleFilter.
Update: The link points to version 3.0.2. This class may be in different package in newer version of Lucene.
This code returns an array of all Strings of the given length:
public static String[] ngrams(String s, int len) {
String[] parts = s.split(" ");
String[] result = new String[parts.length - len + 1];
for(int i = 0; i < parts.length - len + 1; i++) {
StringBuilder sb = new StringBuilder();
for(int k = 0; k < len; k++) {
if(k > 0) sb.append(' ');
sb.append(parts[i+k]);
}
result[i] = sb.toString();
}
return result;
}
E.g.
System.out.println(Arrays.toString(ngrams("This is my car", 2)));
//--> [This is, is my, my car]
System.out.println(Arrays.toString(ngrams("This is my car", 3)));
//--> [This is my, is my car]
/**
*
* #param sentence should has at least one string
* #param maxGramSize should be 1 at least
* #return set of continuous word n-grams up to maxGramSize from the sentence
*/
public static List<String> generateNgramsUpto(String str, int maxGramSize) {
List<String> sentence = Arrays.asList(str.split("[\\W+]"));
List<String> ngrams = new ArrayList<String>();
int ngramSize = 0;
StringBuilder sb = null;
//sentence becomes ngrams
for (ListIterator<String> it = sentence.listIterator(); it.hasNext();) {
String word = (String) it.next();
//1- add the word itself
sb = new StringBuilder(word);
ngrams.add(word);
ngramSize=1;
it.previous();
//2- insert prevs of the word and add those too
while(it.hasPrevious() && ngramSize<maxGramSize){
sb.insert(0,' ');
sb.insert(0,it.previous());
ngrams.add(sb.toString());
ngramSize++;
}
//go back to initial position
while(ngramSize>0){
ngramSize--;
it.next();
}
}
return ngrams;
}
Call:
long startTime = System.currentTimeMillis();
ngrams = ToolSet.generateNgramsUpto("This is my car.", 3);
long stopTime = System.currentTimeMillis();
System.out.println("My time = "+(stopTime-startTime)+" ms with ngramsize = "+ngrams.size());
System.out.println(ngrams.toString());
Output:
My time = 1 ms with ngramsize = 9 [This, is, This is, my, is my, This
is my, car, my car, is my car]
public static void CreateNgram(ArrayList<String> list, int cutoff) {
try
{
NGramModel ngramModel = new NGramModel();
POSModel model = new POSModelLoader().load(new File("en-pos-maxent.bin"));
PerformanceMonitor perfMon = new PerformanceMonitor(System.err, "sent");
POSTaggerME tagger = new POSTaggerME(model);
perfMon.start();
for(int i = 0; i<list.size(); i++)
{
String inputString = list.get(i);
ObjectStream<String> lineStream = new PlainTextByLineStream(new StringReader(inputString));
String line;
while ((line = lineStream.read()) != null)
{
String whitespaceTokenizerLine[] = WhitespaceTokenizer.INSTANCE.tokenize(line);
String[] tags = tagger.tag(whitespaceTokenizerLine);
POSSample sample = new POSSample(whitespaceTokenizerLine, tags);
perfMon.incrementCounter();
String words[] = sample.getSentence();
if(words.length > 0)
{
for(int k = 2; k< 4; k++)
{
ngramModel.add(new StringList(words), k, k);
}
}
}
}
ngramModel.cutoff(cutoff, Integer.MAX_VALUE);
Iterator<StringList> it = ngramModel.iterator();
while(it.hasNext())
{
StringList strList = it.next();
System.out.println(strList.toString());
}
perfMon.stopAndPrintFinalResult();
}catch(Exception e)
{
System.out.println(e.toString());
}
}
Here is my codes to create n-gram. In this case, n = 2, 3. n-gram of words sequence which smaller than cutoff value will ignore from result set. Input is list of sentences, then it parse using a tool of OpenNLP
public static void main(String[] args) {
String[] words = "This is my car.".split(" ");
for (int n = 0; n < 3; n++) {
List<String> list = ngrams(n, words);
for (String ngram : list) {
System.out.println(ngram);
}
System.out.println();
}
}
public static List<String> ngrams(int stepSize, String[] words) {
List<String> ngrams = new ArrayList<String>();
for (int i = 0; i < words.length-stepSize; i++) {
String initialWord = "";
int internalCount = i;
int internalStepSize = i + stepSize;
while (internalCount <= internalStepSize
&& internalCount < words.length) {
initialWord = initialWord+" " + words[internalCount];
++internalCount;
}
ngrams.add(initialWord);
}
return ngrams;
}
Check this out:
public static void main(String[] args) {
NGram nGram = new NGram();
String[] tokens = "this is my car".split(" ");
int i = tokens.length;
List<String> ngrams = new ArrayList<>();
while (i >= 1){
ngrams.addAll(nGram.getNGram(tokens, i, new ArrayList<>()));
i--;
}
System.out.println(ngrams);
}
private List<String> getNGram(String[] tokens, int n, List<String> ngrams) {
StringBuilder strbldr = new StringBuilder();
if (tokens.length < n) {
return ngrams;
}else {
for (int i=0; i<n; i++){
strbldr.append(tokens[i]).append(" ");
}
ngrams.add(strbldr.toString().trim());
String[] newTokens = Arrays.copyOfRange(tokens, 1, tokens.length);
return getNGram(newTokens, n, ngrams);
}
}
Simple recursive function, better running time.

Same word occurrence in a string. Java [duplicate]

I am writing a very basic java program that calculates frequency of each word in a sentence so far i managed to do this much
import java.io.*;
class Linked {
public static void main(String args[]) throws IOException {
BufferedReader br = new BufferedReader(
new InputStreamReader(System.in));
System.out.println("Enter the sentence");
String st = br.readLine();
st = st + " ";
int a = lengthx(st);
String arr[] = new String[a];
int p = 0;
int c = 0;
for (int j = 0; j < st.length(); j++) {
if (st.charAt(j) == ' ') {
arr[p++] = st.substring(c,j);
c = j + 1;
}
}
}
static int lengthx(String a) {
int p = 0;
for (int j = 0; j < a.length(); j++) {
if (a.charAt(j) == ' ') {
p++;
}
}
return p;
}
}
I have extracted each string and stored it in a array , now problem is actually how to count the no of instances where each 'word' is repeated and how to display so that repeated words not get displayed multiple times , can you help me in this one ?
Use a map with word as a key and count as value, somthing like this
Map<String, Integer> map = new HashMap<>();
for (String w : words) {
Integer n = map.get(w);
n = (n == null) ? 1 : ++n;
map.put(w, n);
}
if you are not allowed to use java.util then you can sort arr using some sorting algoritm and do this
String[] words = new String[arr.length];
int[] counts = new int[arr.length];
words[0] = words[0];
counts[0] = 1;
for (int i = 1, j = 0; i < arr.length; i++) {
if (words[j].equals(arr[i])) {
counts[j]++;
} else {
j++;
words[j] = arr[i];
counts[j] = 1;
}
}
An interesting solution with ConcurrentHashMap since Java 8
ConcurrentMap<String, Integer> m = new ConcurrentHashMap<>();
m.compute("x", (k, v) -> v == null ? 1 : v + 1);
In Java 8, you can write this in two simple lines! In addition you can take advantage of parallel computing.
Here's the most beautiful way to do this:
Stream<String> stream = Stream.of(text.toLowerCase().split("\\W+")).parallel();
Map<String, Long> wordFreq = stream
.collect(Collectors.groupingBy(String::toString,Collectors.counting()));
import java.util.*;
public class WordCounter {
public static void main(String[] args) {
String s = "this is a this is this a this yes this is a this what it may be i do not care about this";
String a[] = s.split(" ");
Map<String, Integer> words = new HashMap<>();
for (String str : a) {
if (words.containsKey(str)) {
words.put(str, 1 + words.get(str));
} else {
words.put(str, 1);
}
}
System.out.println(words);
}
}
Output:
{a=3, be=1, may=1, yes=1, this=7, about=1, i=1, is=3, it=1, do=1, not=1, what=1, care=1}
Try this
public class Main
{
public static void main(String[] args)
{
String text = "the quick brown fox jumps fox fox over the lazy dog brown";
String[] keys = text.split(" ");
String[] uniqueKeys;
int count = 0;
System.out.println(text);
uniqueKeys = getUniqueKeys(keys);
for(String key: uniqueKeys)
{
if(null == key)
{
break;
}
for(String s : keys)
{
if(key.equals(s))
{
count++;
}
}
System.out.println("Count of ["+key+"] is : "+count);
count=0;
}
}
private static String[] getUniqueKeys(String[] keys)
{
String[] uniqueKeys = new String[keys.length];
uniqueKeys[0] = keys[0];
int uniqueKeyIndex = 1;
boolean keyAlreadyExists = false;
for(int i=1; i<keys.length ; i++)
{
for(int j=0; j<=uniqueKeyIndex; j++)
{
if(keys[i].equals(uniqueKeys[j]))
{
keyAlreadyExists = true;
}
}
if(!keyAlreadyExists)
{
uniqueKeys[uniqueKeyIndex] = keys[i];
uniqueKeyIndex++;
}
keyAlreadyExists = false;
}
return uniqueKeys;
}
}
Output:
the quick brown fox jumps fox fox over the lazy dog brown
Count of [the] is : 2
Count of [quick] is : 1
Count of [brown] is : 2
Count of [fox] is : 3
Count of [jumps] is : 1
Count of [over] is : 1
Count of [lazy] is : 1
Count of [dog] is : 1
From Java 10 you can use the following:
import java.util.Arrays;
import java.util.stream.Collectors;
public class StringFrequencyMap {
public static void main(String... args){
String[] wordArray = {"One", "One", "Two","Three", "Two", "two"};
var freq = Arrays.stream(wordArray)
.collect(Collectors.groupingBy(x -> x, Collectors.counting()));
System.out.println(freq);
}
}
Output:
{One=2, two=1, Two=2, Three=1}
You could try this
public static void frequency(String s) {
String trimmed = s.trim().replaceAll(" +", " ");
String[] a = trimmed.split(" ");
ArrayList<Integer> p = new ArrayList<>();
for (int i = 0; i < a.length; i++) {
if (p.contains(i)) {
continue;
}
int d = 1;
for (int j = i+1; j < a.length; j++) {
if (a[i].equals(a[j])) {
d += 1;
p.add(j);
}
}
System.out.println("Count of "+a[i]+" is:"+d);
}
}
package naresh.java;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Set;
public class StringWordDuplicates {
static void duplicate(String inputString){
HashMap<String, Integer> wordCount = new HashMap<String,Integer>();
String[] words = inputString.split(" ");
for(String word : words){
if(wordCount.containsKey(word)){
wordCount.put(word, wordCount.get(word)+1);
}
else{
wordCount.put(word, 1);
}
}
//Extracting of all keys of word count
Set<String> wordsInString = wordCount.keySet();
for(String word : wordsInString){
if(wordCount.get(word)>1){
System.out.println(word+":"+wordCount.get(word));
}
}
}
public static void main(String args[]){
duplicate("I am Java Programmer and IT Server Programmer with Java as Best Java lover");
}
}
class find
{
public static void main(String nm,String w)
{
int l,i;
int c=0;
l=nm.length();String b="";
for(i=0;i<l;i++)
{
char d=nm.charAt(i);
if(d!=' ')
{
b=b+d;
}
if(d==' ')
{
if(b.compareTo(w)==0)
{
c++;
}
b="";
}
}
System.out.println(c);
}
}
public class wordFrequency {
private static Scanner scn;
public static void countwords(String sent) {
sent = sent.toLowerCase().replaceAll("[^a-z ]", "");
ArrayList<String> arr = new ArrayList<String>();
String[] sentarr = sent.split(" ");
Map<String, Integer> a = new HashMap<String, Integer>();
for (String word : sentarr) {
arr.add(word);
}
for (String word : arr) {
int count = Collections.frequency(arr, word);
a.put(word, count);
}
for (String key : a.keySet()) {
System.out.println(key + " = " + a.get(key));
}
}
public static void main(String[] args) {
scn = new Scanner(System.in);
System.out.println("Enter sentence:");
String inp = scn.nextLine();
countwords(inp);
}
}
Determine the frequency of words in a file.
File f = new File(fileName);
Scanner s = new Scanner(f);
Map<String, Integer> counts =
new Map<String, Integer>();
while( s.hasNext() ){
String word = s.next();
if( !counts.containsKey( word ) )
counts.put( word, 1 );
else
counts.put( word,
counts.get(word) + 1 );
}
The following program finds the frequency, sorts it accordingly, and prints it.
Below is the output grouped by frequency:
0-10:
The 2
Is 4
11-20:
Have 13
Done 15
Here is my program:
package com.company;
import java.io.*;
import java.util.*;
import java.lang.*;
/**
* Created by ayush on 12/3/17.
*/
public class Linked {
public static void main(String args[]) throws IOException {
BufferedReader br = new BufferedReader(
new InputStreamReader(System.in));
System.out.println("Enter the sentence");
String st = br.readLine();
st=st.trim();
st = st + " ";
int count = lengthx(st);
System.out.println(count);
String arr[] = new String[count];
int p = 0;
int c = 0;
for (int i = 0; i < st.length(); i++) {
if (st.charAt(i) == ' ') {
arr[p] = st.substring(c,i);
System.out.println(arr[p]);
c = i + 1;
p++;
}
}
Map<String, Integer> map = new HashMap<>();
for (String w : arr) {
Integer n = map.get(w);
n = (n == null) ? 1 : ++n;
map.put(w, n);
}
for (String key : map.keySet()) {
System.out.println(key + " = " + map.get(key));
}
Set<Map.Entry<String, Integer>> entries = map.entrySet();
Comparator<Map.Entry<String, Integer>> valueComparator = new Comparator<Map.Entry<String,Integer>>() {
#Override
public int compare(Map.Entry<String, Integer> e1, Map.Entry<String, Integer> e2) {
Integer v1 = e1.getValue();
Integer v2 = e2.getValue();
return v1.compareTo(v2); }
};
List<Map.Entry<String, Integer>> listOfEntries = new ArrayList<Map.Entry<String, Integer>>(entries);
Collections.sort(listOfEntries, valueComparator);
LinkedHashMap<String, Integer> sortedByValue = new LinkedHashMap<String, Integer>(listOfEntries.size());
for(Map.Entry<String, Integer> entry : listOfEntries){
sortedByValue.put(entry.getKey(), entry.getValue());
}
for(Map.Entry<String, Integer> entry : listOfEntries){
sortedByValue.put(entry.getKey(), entry.getValue());
}
System.out.println("HashMap after sorting entries by values ");
Set<Map.Entry<String, Integer>> entrySetSortedByValue = sortedByValue.entrySet();
for(Map.Entry<String, Integer> mapping : entrySetSortedByValue){
System.out.println(mapping.getKey() + " ==> " + mapping.getValue());
}
}
static int lengthx(String a) {
int count = 0;
for (int j = 0; j < a.length(); j++) {
if (a.charAt(j) == ' ') {
count++;
}
}
return count;
}
}
import java.io.*;
class Linked {
public static void main(String args[]) throws IOException {
BufferedReader br = new BufferedReader(
new InputStreamReader(System.in));
System.out.println("Enter the sentence");
String st = br.readLine();
st = st + " ";
int a = lengthx(st);
String arr[] = new String[a];
int p = 0;
int c = 0;
for (int j = 0; j < st.length(); j++) {
if (st.charAt(j) == ' ') {
arr[p++] = st.substring(c,j);
c = j + 1;
}
}
}
static int lengthx(String a) {
int p = 0;
for (int j = 0; j < a.length(); j++) {
if (a.charAt(j) == ' ') {
p++;
}
}
return p;
}
}
Simply use Java 8 Stream collectors groupby function:
import java.util.function.Function;
import java.util.stream.Collectors;
static String[] COUNTRY_NAMES
= { "China", "Australia", "India", "USA", "USSR", "UK", "China",
"France", "Poland", "Austria", "India", "USA", "Egypt", "China" };
Map<String, Long> result = Stream.of(COUNTRY_NAMES).collect(
Collectors.groupingBy(Function.identity(), Collectors.counting()));
Count frequency of elements of list in java 8
List<Integer> list = new ArrayList<Integer>();
Collections.addAll(list,3,6,3,8,4,9,3,6,9,4,8,3,7,2);
Map<Integer, Long> frequencyMap = list.stream().collect(Collectors.groupingBy(Function.identity(),Collectors.counting()));
System.out.println(frequencyMap);
Note :
For String frequency counting split the string and convert it to list and use streams for count frequency => (Map frequencyMap)*
Check below link
String s[]=st.split(" ");
String sf[]=new String[s.length];
int count[]=new int[s.length];
sf[0]=s[0];
int j=1;
count[0]=1;
for(int i=1;i<s.length;i++)
{
int t=j-1;
while(t>=0)
{
if(s[i].equals(sf[t]))
{
count[t]++;
break;
}
t--;
}
if(t<0)
{
sf[j]=s[i];
count[j]++;
j++;
}
}
Created a simple easy to understand solution for this problem covers all test cases-
import java.util.HashMap;
import java.util.Map;
/*
* Problem Statement - Count Frequency of each word in a given string, ignoring special characters and space
* Input 1 - "To be or Not to be"
* Output 1 - to(2 times), be(2 times), or(1 time), not(1 time)
*
* Input 2 -"Star 123 ### 123 star"
* Output - Star(2 times), 123(2 times)
*/
public class FrequencyofWords {
public static void main(String[] args) {
String s1="To be or not **** to be! is all i ask for";
fnFrequencyofWords(s1);
}
//-------Supporting Function-----------------
static void fnFrequencyofWords(String s1) {
//------- Convert String to proper format----
s1=s1.replaceAll("[^A-Za-z0-9\\s]","");
s1=s1.replaceAll(" +"," ");
s1=s1.toLowerCase();
//-------Create String to an array with words------
String[] s2=s1.split(" ");
System.out.println(s1);
//-------- Create a HashMap to store each word and its count--
Map <String , Integer> map=new HashMap<String, Integer>();
for(int i=0;i<s2.length;i++) {
if(map.containsKey(s2[i])) //---- Verify if Word Already Exits---
{
map.put(s2[i], 1+ map.get(s2[i])); //-- Increment value by 1 if word already exits--
}
else {
map.put(s2[i], 1); // --- Add Word to map and set value as 1 if it does not exist in map--
}
}
System.out.println(map); //--- Print the HashMap with Key, Value Pair-------
}
}
public class WordFrequencyProblem {
public static void main(String args[]){
String s="the quick brown fox jumps fox fox over the lazy dog brown";
String alreadyProcessedWords="";
boolean isCount=false;
String[] splitWord = s.split("\\s|\\.");
for(int i=0;i<splitWord.length;i++){
String word = splitWord[i];
int count = 0;
isCount=false;
if(!alreadyProcessedWords.contains(word)){
for(int j=0;j<splitWord.length;j++){
if(word.equals(splitWord[j])){
count++;
isCount = true;
alreadyProcessedWords=alreadyProcessedWords+word+" ";
}
}
}
if(isCount)
System.out.println(word +"Present "+ count);
}
}
}
public class TestSplit {
public static void main(String[] args) {
String input="Find the repeated word which is repeated in this string";
List<String> output= (List) Arrays.asList(input.split(" "));
for(String str: output) {
int occurrences = Collections.frequency(output, str);
System.out.println("Occurence of " + str+ " is "+occurrences);
}
System.out.println(output);
}
}
Please try these it may be help for you
public static void main(String[] args) {
String str1="I am indian , I am proud to be indian proud.";
Map<String,Integer> map=findFrquenciesInString(str1);
System.out.println(map);
}
private static Map<String,Integer> findFrquenciesInString(String str1) {
String[] strArr=str1.split(" ");
Map<String,Integer> map=new HashMap<>();
for(int i=0;i<strArr.length;i++) {
int count=1;
for(int j=i+1;j<strArr.length;j++) {
if(strArr[i].equals(strArr[j]) && strArr[i]!="-1") {
strArr[j]="-1";
count++;
}
}
if(count>1 && strArr[i]!="-1") {
map.put(strArr[i], count);
strArr[i]="-1";
}
}
return map;
}
try this
public void count()throws IOException
{
BufferedReader in = new BufferedReader(new InputStreamReader(System.in));
System.out.println("enetr the strring");
String s = in.readLine();
int l = s.length();
int a=0,b=0,c=0,i,j,y=0;
char d;
String x;
String n[] = new String [50];
int m[] = new int [50];
for (i=0;i<50;i++)
{
m[i]=0;
}
for (i=0;i<l;i++)
{
d = s.charAt(i);
if((d==' ')||(d=='.'))
{
x = s.substring(a,i);
a= i+1;
for(j=0;j<b;j++)
{
if(x.equalsIgnoreCase(n[j]) == true)
{
m[j]++;
c = 1;
}
}
if(c==0)
{
n[b] = x;
m[b] = 1;
b++;
}
}
c=0;
}
for(i=0;i<b;i++)
{
for (j=0;j<b;j++)
{
if(y<m[j])
{
y=m[j];
}
}
if(m[i]==y)
{
System.out.println(n[i] + " : " + m[i]);
m[i]=0;
}
y=0;
}
}

What is the program for string conversion like a2b4c5 into aabbbbccccc in java language?

i got this program by someone but but i could not handle it please anyone tell me how to handle it in a easy way. Thank you
public class Program {
/**
* #param args
*/
public static void main(String[] args) {
// TODO Auto-generated method stub
String str = "a11b4c5";
System.out.println(getAnswerByPassingString(str));
}
public static String getAnswerByPassingString(String str) {
String number = "";
String letter = "";
String resStr = "";
ArrayList<String> stringList = new ArrayList<String>();
ArrayList<String> numbersList = new ArrayList<String>();
for (int i = 0; i < str.length(); i++) {
char a = str.charAt(i);
if (Character.isDigit(a)) {
number = number + a;
//numbersList.add("" + a);
} else {
letter = letter + a;
stringList.add("" + a);
}
}
Matcher m = Pattern.compile("\\d+").matcher(str);
//List<Integer> numbers = new ArrayList<Integer>();
while(m.find()) {
numbersList.add(""+Integer.parseInt(m.group()));
}
// System.out.println(numbers);
for (int i = 0; i < stringList.size(); i++) {
int j = Integer.parseInt(numbersList.get(i));
String concatStr = stringList.get(i);
int count = 0;
for (int k = 1; k <= j; k++) {
concatStr = concatStr + concatStr;
if (k == j)
count = k;
}
resStr = resStr + concatStr.substring(0, count);
concatStr = "";
}
return resStr;
}
}
Using the below code you can get numbers from the string :
public static void main(String[] args) {
String str = "a2bc45cd5";
Matcher m = Pattern.compile("\\d+").matcher(str);
List<Integer> numbers = new ArrayList<Integer>();
while(m.find()) {
numbers.add(Integer.parseInt(m.group()));
}
System.out.println(numbers);
Matcher m1 = Pattern.compile("[A-z]+").matcher(str);
List<String> string = new ArrayList<String>();
while(m1.find()) {
string.add(m1.group());
}
System.out.println(string);
}
//Output :
[2, 45, 5] // For numbers
[a, bc, cd] // For string
After get the number using loop you achieve your output result.

permutations of a string using iteration

I'm trying to find permutation of a given string, but I want to use iteration. The recursive solution I found online and I do understand it, but converting it to an iterative solution is really not working out. Below I have attached my code. I would really appreciate the help:
public static void combString(String s) {
char[] a = new char[s.length()];
//String temp = "";
for(int i = 0; i < s.length(); i++) {
a[i] = s.charAt(i);
}
for(int i = 0; i < s.length(); i++) {
String temp = "" + a[i];
for(int j = 0; j < s.length();j++) {
//int k = j;
if(i != j) {
System.out.println(j);
temp += s.substring(0,j) + s.substring(j+1,s.length());
}
}
System.out.println(temp);
}
}
Following up on my related question comment, here's a Java implementation that does what you want using the Counting QuickPerm Algorithm:
public static void combString(String s) {
// Print initial string, as only the alterations will be printed later
System.out.println(s);
char[] a = s.toCharArray();
int n = a.length;
int[] p = new int[n]; // Weight index control array initially all zeros. Of course, same size of the char array.
int i = 1; //Upper bound index. i.e: if string is "abc" then index i could be at "c"
while (i < n) {
if (p[i] < i) { //if the weight index is bigger or the same it means that we have already switched between these i,j (one iteration before).
int j = ((i % 2) == 0) ? 0 : p[i];//Lower bound index. i.e: if string is "abc" then j index will always be 0.
swap(a, i, j);
// Print current
System.out.println(join(a));
p[i]++; //Adding 1 to the specific weight that relates to the char array.
i = 1; //if i was 2 (for example), after the swap we now need to swap for i=1
}
else {
p[i] = 0;//Weight index will be zero because one iteration before, it was 1 (for example) to indicate that char array a[i] swapped.
i++;//i index will have the option to go forward in the char array for "longer swaps"
}
}
}
private static String join(char[] a) {
StringBuilder builder = new StringBuilder();
builder.append(a);
return builder.toString();
}
private static void swap(char[] a, int i, int j) {
char temp = a[i];
a[i] = a[j];
a[j] = temp;
}
List<String> results = new ArrayList<String>();
String test_str = "abcd";
char[] chars = test_str.toCharArray();
results.add(new String("" + chars[0]));
for(int j=1; j<chars.length; j++) {
char c = chars[j];
int cur_size = results.size();
//create new permutations combing char 'c' with each of the existing permutations
for(int i=cur_size-1; i>=0; i--) {
String str = results.remove(i);
for(int l=0; l<=str.length(); l++) {
results.add(str.substring(0,l) + c + str.substring(l));
}
}
}
System.out.println("Number of Permutations: " + results.size());
System.out.println(results);
Example:
if we have 3 character string e.g. "abc", we can form permuations as below.
1) construct a string with first character e.g. 'a' and store that in results.
char[] chars = test_str.toCharArray();
results.add(new String("" + chars[0]));
2) Now take next character in string (i.e. 'b') and insert that in all possible positions of previously contsructed strings in results. Since we have only one string in results ("a") at this point, doing so gives us 2 new strings 'ba', 'ab'. Insert these newly constructed strings in results and remove "a".
for(int i=cur_size-1; i>=0; i--) {
String str = results.remove(i);
for(int l=0; l<=str.length(); l++) {
results.add(str.substring(0,l) + c + str.substring(l));
}
}
3) Repeat 2) for every character in the given string.
for(int j=1; j<chars.length; j++) {
char c = chars[j];
....
....
}
This gives us "cba", "bca", "bac" from "ba" and "cab", "acb" and "abc" from "ab"
Work queue allows us to create an elegant iterative solution for this problem.
static List<String> permutations(String string) {
List<String> permutations = new LinkedList<>();
Deque<WorkUnit> workQueue = new LinkedList<>();
// We need to permutate the whole string and haven't done anything yet.
workQueue.add(new WorkUnit(string, ""));
while (!workQueue.isEmpty()) { // Do we still have any work?
WorkUnit work = workQueue.poll();
// Permutate each character.
for (int i = 0; i < work.todo.length(); i++) {
String permutation = work.done + work.todo.charAt(i);
// Did we already build a complete permutation?
if (permutation.length() == string.length()) {
permutations.add(permutation);
} else {
// Otherwise what characters are left?
String stillTodo = work.todo.substring(0, i) + work.todo.substring(i + 1);
workQueue.add(new WorkUnit(stillTodo, permutation));
}
}
}
return permutations;
}
A helper class to hold partial results is very simple.
/**
* Immutable unit of work
*/
class WorkUnit {
final String todo;
final String done;
WorkUnit(String todo, String done) {
this.todo = todo;
this.done = done;
}
}
You can test the above piece of code by wrapping them in this class.
import java.util.*;
public class AllPermutations {
public static void main(String... args) {
String str = args[0];
System.out.println(permutations(str));
}
static List<String> permutations(String string) {
...
}
}
class WorkUnit {
...
}
Try it by compiling and running.
$ javac AllPermutations.java; java AllPermutations abcd
The below implementation can also be easily tweaked to return a list of permutations in reverse order by using a LIFO stack of work instead of a FIFO queue.
import java.util.List;
import java.util.Set;
import java.util.ArrayList;
import java.util.HashSet;
public class Anagrams{
public static void main(String[] args)
{
String inpString = "abcd";
Set<String> combs = getAllCombs(inpString);
for(String comb : combs)
{
System.out.println(comb);
}
}
private static Set<String> getAllCombs(String inpString)
{
Set<String> combs = new HashSet<String>();
if( inpString == null | inpString.isEmpty())
return combs;
combs.add(inpString.substring(0,1));
Set<String> tempCombs = new HashSet<String>();
for(char a : inpString.substring(1).toCharArray())
{
tempCombs.clear();
tempCombs.addAll(combs);
combs.clear();
for(String comb : tempCombs)
{
combs.addAll(getCombs(comb,a));
}
}
return combs;
}
private static Set<String> getCombs(String comb, char a) {
Set<String> combs = new HashSet<String>();
for(int i = 0 ; i <= comb.length(); i++)
{
String temp = comb.substring(0, i) + a + comb.substring(i);
combs.add(temp);
//System.out.println(temp);
}
return combs;
}
}
Just posting my approach to the problem:
import java.util.ArrayDeque;
import java.util.Queue;
public class PermutationIterative {
public static void main(String[] args) {
permutationIterative("abcd");
}
private static void permutationIterative(String str) {
Queue<String> currentQueue = null;
int charNumber = 1;
for (char c : str.toCharArray()) {
if (currentQueue == null) {
currentQueue = new ArrayDeque<>(1);
currentQueue.add(String.valueOf(c));
} else {
int currentQueueSize = currentQueue.size();
int numElements = currentQueueSize * charNumber;
Queue<String> nextQueue = new ArrayDeque<>(numElements);
for (int i = 0; i < currentQueueSize; i++) {
String tempString = currentQueue.remove();
for (int j = 0; j < charNumber; j++) {
int n = tempString.length();
nextQueue.add(tempString.substring(0, j) + c + tempString.substring(j, n));
}
}
currentQueue = nextQueue;
}
charNumber++;
}
System.out.println(currentQueue);
}
}
package vishal villa;
import java.util.Scanner;
public class Permutation {
static void result( String st, String ans)
{
if(st.length() == 0)
System.out.println(ans +" ");
for(int i = 0; i<st.length(); i++)
{
char ch = st.charAt(i);
String r = st.substring(0, i) + st.substring(i + 1);
result(r, ans + ch);
}
}
public static void main(String[] args)
{
Scanner Sc = new Scanner(System.in);
System.out.println("enter the string");
String st = Sc.nextLine();
Permutation p = new Permutation();
p.result(st,"" );
}
}
// Java program to print all permutations of a
// given string.
public class Permutation
{
public static void main(String[] args)
{
String str = "ABC";
int n = str.length();
Permutation permutation = new Permutation();
permutation.permute(str, 0, n-1);
}
/**
* permutation function
* #param str string to calculate permutation for
* #param s starting index
* #param e end index
*/
private void permute(String str, int s, int e)
{
if (s == e)
System.out.println(str);
else
{
for (int i = s; i <= s; i++)
{
str = swap(str,l,i);
permute(str, s+1, e);
str = swap(str,l,i);
}
}
}
/**
* Swap Characters at position
* #param a string value
* #param i position 1
* #param j position 2
* #return swapped string
*/
public String swap(String a, int i, int j)
{
char temp;
char[] charArray = a.toCharArray();
temp = charArray[i] ;
charArray[i] = charArray[j];
charArray[j] = temp;
return String.valueOf(charArray);
}
}

Categories