Same word occurrence in a string. Java [duplicate] - java
I am writing a very basic java program that calculates frequency of each word in a sentence so far i managed to do this much
import java.io.*;
class Linked {
public static void main(String args[]) throws IOException {
BufferedReader br = new BufferedReader(
new InputStreamReader(System.in));
System.out.println("Enter the sentence");
String st = br.readLine();
st = st + " ";
int a = lengthx(st);
String arr[] = new String[a];
int p = 0;
int c = 0;
for (int j = 0; j < st.length(); j++) {
if (st.charAt(j) == ' ') {
arr[p++] = st.substring(c,j);
c = j + 1;
}
}
}
static int lengthx(String a) {
int p = 0;
for (int j = 0; j < a.length(); j++) {
if (a.charAt(j) == ' ') {
p++;
}
}
return p;
}
}
I have extracted each string and stored it in a array , now problem is actually how to count the no of instances where each 'word' is repeated and how to display so that repeated words not get displayed multiple times , can you help me in this one ?
Use a map with word as a key and count as value, somthing like this
Map<String, Integer> map = new HashMap<>();
for (String w : words) {
Integer n = map.get(w);
n = (n == null) ? 1 : ++n;
map.put(w, n);
}
if you are not allowed to use java.util then you can sort arr using some sorting algoritm and do this
String[] words = new String[arr.length];
int[] counts = new int[arr.length];
words[0] = words[0];
counts[0] = 1;
for (int i = 1, j = 0; i < arr.length; i++) {
if (words[j].equals(arr[i])) {
counts[j]++;
} else {
j++;
words[j] = arr[i];
counts[j] = 1;
}
}
An interesting solution with ConcurrentHashMap since Java 8
ConcurrentMap<String, Integer> m = new ConcurrentHashMap<>();
m.compute("x", (k, v) -> v == null ? 1 : v + 1);
In Java 8, you can write this in two simple lines! In addition you can take advantage of parallel computing.
Here's the most beautiful way to do this:
Stream<String> stream = Stream.of(text.toLowerCase().split("\\W+")).parallel();
Map<String, Long> wordFreq = stream
.collect(Collectors.groupingBy(String::toString,Collectors.counting()));
import java.util.*;
public class WordCounter {
public static void main(String[] args) {
String s = "this is a this is this a this yes this is a this what it may be i do not care about this";
String a[] = s.split(" ");
Map<String, Integer> words = new HashMap<>();
for (String str : a) {
if (words.containsKey(str)) {
words.put(str, 1 + words.get(str));
} else {
words.put(str, 1);
}
}
System.out.println(words);
}
}
Output:
{a=3, be=1, may=1, yes=1, this=7, about=1, i=1, is=3, it=1, do=1, not=1, what=1, care=1}
Try this
public class Main
{
public static void main(String[] args)
{
String text = "the quick brown fox jumps fox fox over the lazy dog brown";
String[] keys = text.split(" ");
String[] uniqueKeys;
int count = 0;
System.out.println(text);
uniqueKeys = getUniqueKeys(keys);
for(String key: uniqueKeys)
{
if(null == key)
{
break;
}
for(String s : keys)
{
if(key.equals(s))
{
count++;
}
}
System.out.println("Count of ["+key+"] is : "+count);
count=0;
}
}
private static String[] getUniqueKeys(String[] keys)
{
String[] uniqueKeys = new String[keys.length];
uniqueKeys[0] = keys[0];
int uniqueKeyIndex = 1;
boolean keyAlreadyExists = false;
for(int i=1; i<keys.length ; i++)
{
for(int j=0; j<=uniqueKeyIndex; j++)
{
if(keys[i].equals(uniqueKeys[j]))
{
keyAlreadyExists = true;
}
}
if(!keyAlreadyExists)
{
uniqueKeys[uniqueKeyIndex] = keys[i];
uniqueKeyIndex++;
}
keyAlreadyExists = false;
}
return uniqueKeys;
}
}
Output:
the quick brown fox jumps fox fox over the lazy dog brown
Count of [the] is : 2
Count of [quick] is : 1
Count of [brown] is : 2
Count of [fox] is : 3
Count of [jumps] is : 1
Count of [over] is : 1
Count of [lazy] is : 1
Count of [dog] is : 1
From Java 10 you can use the following:
import java.util.Arrays;
import java.util.stream.Collectors;
public class StringFrequencyMap {
public static void main(String... args){
String[] wordArray = {"One", "One", "Two","Three", "Two", "two"};
var freq = Arrays.stream(wordArray)
.collect(Collectors.groupingBy(x -> x, Collectors.counting()));
System.out.println(freq);
}
}
Output:
{One=2, two=1, Two=2, Three=1}
You could try this
public static void frequency(String s) {
String trimmed = s.trim().replaceAll(" +", " ");
String[] a = trimmed.split(" ");
ArrayList<Integer> p = new ArrayList<>();
for (int i = 0; i < a.length; i++) {
if (p.contains(i)) {
continue;
}
int d = 1;
for (int j = i+1; j < a.length; j++) {
if (a[i].equals(a[j])) {
d += 1;
p.add(j);
}
}
System.out.println("Count of "+a[i]+" is:"+d);
}
}
package naresh.java;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Set;
public class StringWordDuplicates {
static void duplicate(String inputString){
HashMap<String, Integer> wordCount = new HashMap<String,Integer>();
String[] words = inputString.split(" ");
for(String word : words){
if(wordCount.containsKey(word)){
wordCount.put(word, wordCount.get(word)+1);
}
else{
wordCount.put(word, 1);
}
}
//Extracting of all keys of word count
Set<String> wordsInString = wordCount.keySet();
for(String word : wordsInString){
if(wordCount.get(word)>1){
System.out.println(word+":"+wordCount.get(word));
}
}
}
public static void main(String args[]){
duplicate("I am Java Programmer and IT Server Programmer with Java as Best Java lover");
}
}
class find
{
public static void main(String nm,String w)
{
int l,i;
int c=0;
l=nm.length();String b="";
for(i=0;i<l;i++)
{
char d=nm.charAt(i);
if(d!=' ')
{
b=b+d;
}
if(d==' ')
{
if(b.compareTo(w)==0)
{
c++;
}
b="";
}
}
System.out.println(c);
}
}
public class wordFrequency {
private static Scanner scn;
public static void countwords(String sent) {
sent = sent.toLowerCase().replaceAll("[^a-z ]", "");
ArrayList<String> arr = new ArrayList<String>();
String[] sentarr = sent.split(" ");
Map<String, Integer> a = new HashMap<String, Integer>();
for (String word : sentarr) {
arr.add(word);
}
for (String word : arr) {
int count = Collections.frequency(arr, word);
a.put(word, count);
}
for (String key : a.keySet()) {
System.out.println(key + " = " + a.get(key));
}
}
public static void main(String[] args) {
scn = new Scanner(System.in);
System.out.println("Enter sentence:");
String inp = scn.nextLine();
countwords(inp);
}
}
Determine the frequency of words in a file.
File f = new File(fileName);
Scanner s = new Scanner(f);
Map<String, Integer> counts =
new Map<String, Integer>();
while( s.hasNext() ){
String word = s.next();
if( !counts.containsKey( word ) )
counts.put( word, 1 );
else
counts.put( word,
counts.get(word) + 1 );
}
The following program finds the frequency, sorts it accordingly, and prints it.
Below is the output grouped by frequency:
0-10:
The 2
Is 4
11-20:
Have 13
Done 15
Here is my program:
package com.company;
import java.io.*;
import java.util.*;
import java.lang.*;
/**
* Created by ayush on 12/3/17.
*/
public class Linked {
public static void main(String args[]) throws IOException {
BufferedReader br = new BufferedReader(
new InputStreamReader(System.in));
System.out.println("Enter the sentence");
String st = br.readLine();
st=st.trim();
st = st + " ";
int count = lengthx(st);
System.out.println(count);
String arr[] = new String[count];
int p = 0;
int c = 0;
for (int i = 0; i < st.length(); i++) {
if (st.charAt(i) == ' ') {
arr[p] = st.substring(c,i);
System.out.println(arr[p]);
c = i + 1;
p++;
}
}
Map<String, Integer> map = new HashMap<>();
for (String w : arr) {
Integer n = map.get(w);
n = (n == null) ? 1 : ++n;
map.put(w, n);
}
for (String key : map.keySet()) {
System.out.println(key + " = " + map.get(key));
}
Set<Map.Entry<String, Integer>> entries = map.entrySet();
Comparator<Map.Entry<String, Integer>> valueComparator = new Comparator<Map.Entry<String,Integer>>() {
#Override
public int compare(Map.Entry<String, Integer> e1, Map.Entry<String, Integer> e2) {
Integer v1 = e1.getValue();
Integer v2 = e2.getValue();
return v1.compareTo(v2); }
};
List<Map.Entry<String, Integer>> listOfEntries = new ArrayList<Map.Entry<String, Integer>>(entries);
Collections.sort(listOfEntries, valueComparator);
LinkedHashMap<String, Integer> sortedByValue = new LinkedHashMap<String, Integer>(listOfEntries.size());
for(Map.Entry<String, Integer> entry : listOfEntries){
sortedByValue.put(entry.getKey(), entry.getValue());
}
for(Map.Entry<String, Integer> entry : listOfEntries){
sortedByValue.put(entry.getKey(), entry.getValue());
}
System.out.println("HashMap after sorting entries by values ");
Set<Map.Entry<String, Integer>> entrySetSortedByValue = sortedByValue.entrySet();
for(Map.Entry<String, Integer> mapping : entrySetSortedByValue){
System.out.println(mapping.getKey() + " ==> " + mapping.getValue());
}
}
static int lengthx(String a) {
int count = 0;
for (int j = 0; j < a.length(); j++) {
if (a.charAt(j) == ' ') {
count++;
}
}
return count;
}
}
import java.io.*;
class Linked {
public static void main(String args[]) throws IOException {
BufferedReader br = new BufferedReader(
new InputStreamReader(System.in));
System.out.println("Enter the sentence");
String st = br.readLine();
st = st + " ";
int a = lengthx(st);
String arr[] = new String[a];
int p = 0;
int c = 0;
for (int j = 0; j < st.length(); j++) {
if (st.charAt(j) == ' ') {
arr[p++] = st.substring(c,j);
c = j + 1;
}
}
}
static int lengthx(String a) {
int p = 0;
for (int j = 0; j < a.length(); j++) {
if (a.charAt(j) == ' ') {
p++;
}
}
return p;
}
}
Simply use Java 8 Stream collectors groupby function:
import java.util.function.Function;
import java.util.stream.Collectors;
static String[] COUNTRY_NAMES
= { "China", "Australia", "India", "USA", "USSR", "UK", "China",
"France", "Poland", "Austria", "India", "USA", "Egypt", "China" };
Map<String, Long> result = Stream.of(COUNTRY_NAMES).collect(
Collectors.groupingBy(Function.identity(), Collectors.counting()));
Count frequency of elements of list in java 8
List<Integer> list = new ArrayList<Integer>();
Collections.addAll(list,3,6,3,8,4,9,3,6,9,4,8,3,7,2);
Map<Integer, Long> frequencyMap = list.stream().collect(Collectors.groupingBy(Function.identity(),Collectors.counting()));
System.out.println(frequencyMap);
Note :
For String frequency counting split the string and convert it to list and use streams for count frequency => (Map frequencyMap)*
Check below link
String s[]=st.split(" ");
String sf[]=new String[s.length];
int count[]=new int[s.length];
sf[0]=s[0];
int j=1;
count[0]=1;
for(int i=1;i<s.length;i++)
{
int t=j-1;
while(t>=0)
{
if(s[i].equals(sf[t]))
{
count[t]++;
break;
}
t--;
}
if(t<0)
{
sf[j]=s[i];
count[j]++;
j++;
}
}
Created a simple easy to understand solution for this problem covers all test cases-
import java.util.HashMap;
import java.util.Map;
/*
* Problem Statement - Count Frequency of each word in a given string, ignoring special characters and space
* Input 1 - "To be or Not to be"
* Output 1 - to(2 times), be(2 times), or(1 time), not(1 time)
*
* Input 2 -"Star 123 ### 123 star"
* Output - Star(2 times), 123(2 times)
*/
public class FrequencyofWords {
public static void main(String[] args) {
String s1="To be or not **** to be! is all i ask for";
fnFrequencyofWords(s1);
}
//-------Supporting Function-----------------
static void fnFrequencyofWords(String s1) {
//------- Convert String to proper format----
s1=s1.replaceAll("[^A-Za-z0-9\\s]","");
s1=s1.replaceAll(" +"," ");
s1=s1.toLowerCase();
//-------Create String to an array with words------
String[] s2=s1.split(" ");
System.out.println(s1);
//-------- Create a HashMap to store each word and its count--
Map <String , Integer> map=new HashMap<String, Integer>();
for(int i=0;i<s2.length;i++) {
if(map.containsKey(s2[i])) //---- Verify if Word Already Exits---
{
map.put(s2[i], 1+ map.get(s2[i])); //-- Increment value by 1 if word already exits--
}
else {
map.put(s2[i], 1); // --- Add Word to map and set value as 1 if it does not exist in map--
}
}
System.out.println(map); //--- Print the HashMap with Key, Value Pair-------
}
}
public class WordFrequencyProblem {
public static void main(String args[]){
String s="the quick brown fox jumps fox fox over the lazy dog brown";
String alreadyProcessedWords="";
boolean isCount=false;
String[] splitWord = s.split("\\s|\\.");
for(int i=0;i<splitWord.length;i++){
String word = splitWord[i];
int count = 0;
isCount=false;
if(!alreadyProcessedWords.contains(word)){
for(int j=0;j<splitWord.length;j++){
if(word.equals(splitWord[j])){
count++;
isCount = true;
alreadyProcessedWords=alreadyProcessedWords+word+" ";
}
}
}
if(isCount)
System.out.println(word +"Present "+ count);
}
}
}
public class TestSplit {
public static void main(String[] args) {
String input="Find the repeated word which is repeated in this string";
List<String> output= (List) Arrays.asList(input.split(" "));
for(String str: output) {
int occurrences = Collections.frequency(output, str);
System.out.println("Occurence of " + str+ " is "+occurrences);
}
System.out.println(output);
}
}
Please try these it may be help for you
public static void main(String[] args) {
String str1="I am indian , I am proud to be indian proud.";
Map<String,Integer> map=findFrquenciesInString(str1);
System.out.println(map);
}
private static Map<String,Integer> findFrquenciesInString(String str1) {
String[] strArr=str1.split(" ");
Map<String,Integer> map=new HashMap<>();
for(int i=0;i<strArr.length;i++) {
int count=1;
for(int j=i+1;j<strArr.length;j++) {
if(strArr[i].equals(strArr[j]) && strArr[i]!="-1") {
strArr[j]="-1";
count++;
}
}
if(count>1 && strArr[i]!="-1") {
map.put(strArr[i], count);
strArr[i]="-1";
}
}
return map;
}
try this
public void count()throws IOException
{
BufferedReader in = new BufferedReader(new InputStreamReader(System.in));
System.out.println("enetr the strring");
String s = in.readLine();
int l = s.length();
int a=0,b=0,c=0,i,j,y=0;
char d;
String x;
String n[] = new String [50];
int m[] = new int [50];
for (i=0;i<50;i++)
{
m[i]=0;
}
for (i=0;i<l;i++)
{
d = s.charAt(i);
if((d==' ')||(d=='.'))
{
x = s.substring(a,i);
a= i+1;
for(j=0;j<b;j++)
{
if(x.equalsIgnoreCase(n[j]) == true)
{
m[j]++;
c = 1;
}
}
if(c==0)
{
n[b] = x;
m[b] = 1;
b++;
}
}
c=0;
}
for(i=0;i<b;i++)
{
for (j=0;j<b;j++)
{
if(y<m[j])
{
y=m[j];
}
}
if(m[i]==y)
{
System.out.println(n[i] + " : " + m[i]);
m[i]=0;
}
y=0;
}
}
Related
Print two string after making them anagram by deleting characters not number of deletions?
> Here I'm able to print the number repetitions of character but i'm not able to get how to compare and remove.Here i had taken two maps for two strings and keys as charecters and value as repetitons public class NoOfDeletionsTomakeStringAnagram { #SuppressWarnings("resource") public static void main(String[] args) { System.out.println("string"); Scanner sc = new Scanner(System.in); System.out.println("enter string 1"); String s1 = sc.next(); System.out.println("enter string 2"); String s2 = sc.next(); Map<Character, Integer> m1s1 = new HashMap<>(); Map<Character, Integer> m2s2 = new HashMap<>(); for (int i = 0; i < s1.length(); i++) { if (m1s1.containsKey(s1.charAt(i))) { m1s1.put((Character) s1.charAt(i), m1s1.get((Character) s1.charAt(i)) + 1); } else { m1s1.put((Character) s1.charAt(i), 1); } } for (int i = 0; i < s2.length(); i++) { if (m2s2.containsKey(s2.charAt(i))) { m2s2.put((Character) s2.charAt(i), m2s2.get((Character) s2.charAt(i)) + 1); } else { m2s2.put((Character) s2.charAt(i), 1); } } System.out.println("m1s1....." + m1s1); System.out.println("m221...." + m2s2); } } Samlie input: s1=abc s2=cde Here we have to make anagram by deleting charecters from both the strings output: s1=c s2=c
/* package whatever; // don't place package name! */ import java.util.*; import java.lang.*; import java.io.*; class Ideone { public static void main (String[] args) throws java.lang.Exception { System.out.println("string"); Scanner sc = new Scanner(System.in); System.out.println("enter string 1"); String s1 = sc.next(); System.out.println("enter string 2"); String s2 = sc.next(); Map<Character, Integer> m1s1 = new HashMap<>(); Map<Character, Integer> m2s2 = new HashMap<>(); for (int i = 0; i < s1.length(); i++) { if (m1s1.containsKey(s1.charAt(i))) { m1s1.put((Character) s1.charAt(i), m1s1.get((Character) s1.charAt(i)) + 1); } else { m1s1.put((Character) s1.charAt(i), 1); } } for (int i = 0; i < s2.length(); i++) { if (m2s2.containsKey(s2.charAt(i))) { m2s2.put((Character) s2.charAt(i), m2s2.get((Character) s2.charAt(i)) + 1); } else { m2s2.put((Character) s2.charAt(i), 1); } } System.out.println("m1s1....." + m1s1); System.out.println("m221...." + m2s2); // ADDED MY CODE FROM HERE // TAKEN STRING 1 AND MAP OF STRING 2, CHECK CHARACTER BY CHARACTER OF STRING 1 IN MAP //OF STRING 2, IF THE CHARACTER EXISTS PRINT IT & DECREASE ITS VALUE IN MAP BY 1 , IF //AFTER DECREASING THE VALUE BECOMES 0, REMOVE IT FROM MAP, IF THE CHARACTER OF //STRING1 DOES NOT EXIST IN MAP2 DON'T DO ANYTHING, FOLLOW ABOVE RULES FOR EVERY //CHARACTER OF STRING 1 // NOW TAKE STRING2 AND MAP OF STRING1 AND FOLLOW THE EXACT PROCEDURE ABOVE int i,x; char c; for(i=0;i<s1.length();i++) { c=s1.charAt(i); if(m2s2.containsKey(c)) { System.out.print(c); x=m2s2.get((Character)c); x=x-1; if(x==0) m2s2.remove(new Character(c)); else m2s2.put((Character) c,x); } } System.out.println(); for(i=0;i<s2.length();i++) { c=s2.charAt(i); if(m1s1.containsKey(c)) { System.out.print(c); x=m1s1.get((Character)c); x=x-1; if(x==0) m1s1.remove(new Character(c)); else m1s1.put((Character) c,x); } } } }
How to add space(indent) before text except first line when print out. How to move this code to other class just call from main,
package com.Chall; import java.util.ArrayList; import java.util.List; public class Main { public static void main(String[] args) { for (String part : getParts("Indent space in every line ecept first printed line", 3, 5)) { System.out.println(part); } } private static List<String> getParts(String string, int partitionSize, int maxLine) { List<String> parts = new ArrayList<>(); int len = string.length(); int count = 0; for (int i = 0; i < len; i += partitionSize) { parts.add(string.substring(i, Math.min(len, i + partitionSize))); count++; if (maxLine == count) { break; } } return parts; } }
for (3,5) you get: Ind ent sp ace in for (7,5) you get: Indent space i n every line e cept fi With this code : public static void main(String[] args) { printList(getParts("Indent space in every line ecept first printed line", 3, 5)); } private static void printList(List<String> parts) { for (String part : parts) { System.out.println(part); } } private static List<String> getParts(String string, int partitionSize, int maxLine) { List<String> parts = new ArrayList<>(); int len = string.length(); int count = 0; String indent = ""; //<------ for (int i = 0; i < len; i += partitionSize) { parts.add(indent + string.substring(i, Math.min(len, i + partitionSize)).trim()); count++; indent = " "; //<------------- if (maxLine == count) { break; } } return parts; } use trim() to remove space, and then add one
How can i extract trend words from given dataset (Java)? [duplicate]
How to generate an n-gram of a string like: String Input="This is my car." I want to generate n-gram with this input: Input Ngram size = 3 Output should be: This is my car This is is my my car This is my is my car Give some idea in Java, how to implement that or if any library is available for it. I am trying to use this NGramTokenizer but its giving n-gram's of character sequence and I want n-grams of word sequence.
I believe this would do what you want: import java.util.*; public class Test { public static List<String> ngrams(int n, String str) { List<String> ngrams = new ArrayList<String>(); String[] words = str.split(" "); for (int i = 0; i < words.length - n + 1; i++) ngrams.add(concat(words, i, i+n)); return ngrams; } public static String concat(String[] words, int start, int end) { StringBuilder sb = new StringBuilder(); for (int i = start; i < end; i++) sb.append((i > start ? " " : "") + words[i]); return sb.toString(); } public static void main(String[] args) { for (int n = 1; n <= 3; n++) { for (String ngram : ngrams(n, "This is my car.")) System.out.println(ngram); System.out.println(); } } } Output: This is my car. This is is my my car. This is my is my car. An "on-demand" solution implemented as an Iterator: class NgramIterator implements Iterator<String> { String[] words; int pos = 0, n; public NgramIterator(int n, String str) { this.n = n; words = str.split(" "); } public boolean hasNext() { return pos < words.length - n + 1; } public String next() { StringBuilder sb = new StringBuilder(); for (int i = pos; i < pos + n; i++) sb.append((i > pos ? " " : "") + words[i]); pos++; return sb.toString(); } public void remove() { throw new UnsupportedOperationException(); } }
You are looking for ShingleFilter. Update: The link points to version 3.0.2. This class may be in different package in newer version of Lucene.
This code returns an array of all Strings of the given length: public static String[] ngrams(String s, int len) { String[] parts = s.split(" "); String[] result = new String[parts.length - len + 1]; for(int i = 0; i < parts.length - len + 1; i++) { StringBuilder sb = new StringBuilder(); for(int k = 0; k < len; k++) { if(k > 0) sb.append(' '); sb.append(parts[i+k]); } result[i] = sb.toString(); } return result; } E.g. System.out.println(Arrays.toString(ngrams("This is my car", 2))); //--> [This is, is my, my car] System.out.println(Arrays.toString(ngrams("This is my car", 3))); //--> [This is my, is my car]
/** * * #param sentence should has at least one string * #param maxGramSize should be 1 at least * #return set of continuous word n-grams up to maxGramSize from the sentence */ public static List<String> generateNgramsUpto(String str, int maxGramSize) { List<String> sentence = Arrays.asList(str.split("[\\W+]")); List<String> ngrams = new ArrayList<String>(); int ngramSize = 0; StringBuilder sb = null; //sentence becomes ngrams for (ListIterator<String> it = sentence.listIterator(); it.hasNext();) { String word = (String) it.next(); //1- add the word itself sb = new StringBuilder(word); ngrams.add(word); ngramSize=1; it.previous(); //2- insert prevs of the word and add those too while(it.hasPrevious() && ngramSize<maxGramSize){ sb.insert(0,' '); sb.insert(0,it.previous()); ngrams.add(sb.toString()); ngramSize++; } //go back to initial position while(ngramSize>0){ ngramSize--; it.next(); } } return ngrams; } Call: long startTime = System.currentTimeMillis(); ngrams = ToolSet.generateNgramsUpto("This is my car.", 3); long stopTime = System.currentTimeMillis(); System.out.println("My time = "+(stopTime-startTime)+" ms with ngramsize = "+ngrams.size()); System.out.println(ngrams.toString()); Output: My time = 1 ms with ngramsize = 9 [This, is, This is, my, is my, This is my, car, my car, is my car]
public static void CreateNgram(ArrayList<String> list, int cutoff) { try { NGramModel ngramModel = new NGramModel(); POSModel model = new POSModelLoader().load(new File("en-pos-maxent.bin")); PerformanceMonitor perfMon = new PerformanceMonitor(System.err, "sent"); POSTaggerME tagger = new POSTaggerME(model); perfMon.start(); for(int i = 0; i<list.size(); i++) { String inputString = list.get(i); ObjectStream<String> lineStream = new PlainTextByLineStream(new StringReader(inputString)); String line; while ((line = lineStream.read()) != null) { String whitespaceTokenizerLine[] = WhitespaceTokenizer.INSTANCE.tokenize(line); String[] tags = tagger.tag(whitespaceTokenizerLine); POSSample sample = new POSSample(whitespaceTokenizerLine, tags); perfMon.incrementCounter(); String words[] = sample.getSentence(); if(words.length > 0) { for(int k = 2; k< 4; k++) { ngramModel.add(new StringList(words), k, k); } } } } ngramModel.cutoff(cutoff, Integer.MAX_VALUE); Iterator<StringList> it = ngramModel.iterator(); while(it.hasNext()) { StringList strList = it.next(); System.out.println(strList.toString()); } perfMon.stopAndPrintFinalResult(); }catch(Exception e) { System.out.println(e.toString()); } } Here is my codes to create n-gram. In this case, n = 2, 3. n-gram of words sequence which smaller than cutoff value will ignore from result set. Input is list of sentences, then it parse using a tool of OpenNLP
public static void main(String[] args) { String[] words = "This is my car.".split(" "); for (int n = 0; n < 3; n++) { List<String> list = ngrams(n, words); for (String ngram : list) { System.out.println(ngram); } System.out.println(); } } public static List<String> ngrams(int stepSize, String[] words) { List<String> ngrams = new ArrayList<String>(); for (int i = 0; i < words.length-stepSize; i++) { String initialWord = ""; int internalCount = i; int internalStepSize = i + stepSize; while (internalCount <= internalStepSize && internalCount < words.length) { initialWord = initialWord+" " + words[internalCount]; ++internalCount; } ngrams.add(initialWord); } return ngrams; }
Check this out: public static void main(String[] args) { NGram nGram = new NGram(); String[] tokens = "this is my car".split(" "); int i = tokens.length; List<String> ngrams = new ArrayList<>(); while (i >= 1){ ngrams.addAll(nGram.getNGram(tokens, i, new ArrayList<>())); i--; } System.out.println(ngrams); } private List<String> getNGram(String[] tokens, int n, List<String> ngrams) { StringBuilder strbldr = new StringBuilder(); if (tokens.length < n) { return ngrams; }else { for (int i=0; i<n; i++){ strbldr.append(tokens[i]).append(" "); } ngrams.add(strbldr.toString().trim()); String[] newTokens = Arrays.copyOfRange(tokens, 1, tokens.length); return getNGram(newTokens, n, ngrams); } } Simple recursive function, better running time.
Java count occurrence of each item in an array
Is there any method for counting the occurrence of each item on an array? Lets say I have: String[] array = {"name1","name2","name3","name4", "name5"}; Here the output will be: name1 1 name2 1 name3 1 name4 1 name5 1 and if I have: String[] array = {"name1","name1","name2","name2", "name2"}; The output would be: name1 2 name2 3 The output here is just to demonstrate the expected result.
List asList = Arrays.asList(array); Set<String> mySet = new HashSet<String>(asList); for(String s: mySet){ System.out.println(s + " " + Collections.frequency(asList,s)); }
With java-8, you can do it like this: String[] array = {"name1","name2","name3","name4", "name5", "name2"}; Arrays.stream(array) .collect(Collectors.groupingBy(s -> s)) .forEach((k, v) -> System.out.println(k+" "+v.size())); Output: name5 1 name4 1 name3 1 name2 2 name1 1 What it does is: Create a Stream<String> from the original array Group each element by identity, resulting in a Map<String, List<String>> For each key value pair, print the key and the size of the list If you want to get a Map that contains the number of occurences for each word, it can be done doing: Map<String, Long> map = Arrays.stream(array) .collect(Collectors.groupingBy(s -> s, Collectors.counting())); For more informations: Stream Collectors Hope it helps! :)
You could use a MultiSet from Google Collections/Guava or a Bag from Apache Commons. If you have a collection instead of an array, you can use addAll() to add the entire contents to the above data structure, and then apply the count() method to each value. A SortedMultiSet or SortedBag would give you the items in a defined order. Google Collections actually has very convenient ways of going from arrays to a SortedMultiset.
I wrote a solution for this to practice myself. It doesn't seem nearly as awesome as the other answers posted, but I'm going to post it anyway, and then learn how to do this using the other methods as well. Enjoy: public static Integer[] countItems(String[] arr) { List<Integer> itemCount = new ArrayList<Integer>(); Integer counter = 0; String lastItem = arr[0]; for(int i = 0; i < arr.length; i++) { if(arr[i].equals(lastItem)) { counter++; } else { itemCount.add(counter); counter = 1; } lastItem = arr[i]; } itemCount.add(counter); return itemCount.toArray(new Integer[itemCount.size()]); } public static void main(String[] args) { String[] array = {"name1","name1","name2","name2", "name2", "name3", "name1","name1","name2","name2", "name2", "name3"}; Arrays.sort(array); Integer[] cArr = countItems(array); int num = 0; for(int i = 0; i < cArr.length; i++) { num += cArr[i]-1; System.out.println(array[num] + ": " + cArr[i].toString()); } }
Using HashMap it is walk in the park. main(){ String[] array ={"a","ab","a","abc","abc","a","ab","ab","a"}; Map<String,Integer> hm = new HashMap(); for(String x:array){ if(!hm.containsKey(x)){ hm.put(x,1); }else{ hm.put(x, hm.get(x)+1); } } System.out.println(hm); }
It can be done in a very simple way using collections please find the code below String[] array = {"name1","name1","name2","name2", "name2"}; List<String> sampleList=(List<String>) Arrays.asList(array); for(String inpt:array){ int frequency=Collections.frequency(sampleList,inpt); System.out.println(inpt+" "+frequency); } Here the output will be like name1 2 name1 2 name2 3 name2 3 name2 3 To avoid printing redundant keys use HashMap and get your desired output
I would use a hashtable with in key takes the element of the array (here string) and in value an Integer. then go through the list doing something like this : for(String s:array){ if(hash.containsKey(s)){ Integer i = hash.get(s); i++; }else{ hash.put(s, new Interger(1)); }
Count String occurence using hashmap, streams & collections import java.util.Arrays; import java.util.Collections; import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Set; import java.util.stream.Collectors; public class StringOccurence { public static void main(String args[]) { String[] stringArray = { "name1", "name1", "name2", "name2", "name2" }; countStringOccurence(stringArray); countStringOccurenceUsingStream(stringArray); countStringOccurenceUsingCollections(stringArray); } private static void countStringOccurenceUsingCollections(String[] stringArray) { // TODO Auto-generated method stub List<String> asList = Arrays.asList(stringArray); Set<String> set = new HashSet<String>(asList); for (String string : set) { System.out.println(string + " --> " + Collections.frequency(asList, string)); } } private static void countStringOccurenceUsingStream(String[] stringArray) { // TODO Auto-generated method stub Arrays.stream(stringArray).collect(Collectors.groupingBy(s -> s)) .forEach((k, v) -> System.out.println(k + " --> " + v.size())); } private static void countStringOccurence(String[] stringArray) { // TODO Auto-generated method stub Map<String, Integer> map = new HashMap<String, Integer>(); for (String s : stringArray) { if (map.containsKey(s)) { map.put(s, map.get(s) + 1); } else { map.put(s, 1); } } for (Map.Entry<String, Integer> entry : map.entrySet()) { System.out.println(entry.getKey() + " --> " + entry.getValue()); } } }
Here is my solution - The method takes an array of integers(assuming the range between 0 to 100) as input and returns the number of occurrences of each element. let's say the input is [21,34,43,21,21,21,45,65,65,76,76,76]. So the output would be in a map and it is: {34=1, 21=4, 65=2, 76=3, 43=1, 45=1} public Map<Integer, Integer> countOccurrence(int[] numbersToProcess) { int[] possibleNumbers = new int[100]; Map<Integer, Integer> result = new HashMap<Integer, Integer>(); for (int i = 0; i < numbersToProcess.length; ++i) { possibleNumbers[numbersToProcess[i]] = possibleNumbers[numbersToProcess[i]] + 1; result.put(numbersToProcess[i], possibleNumbers[numbersToProcess[i]]); } return result; }
There are several methods which can help, but this is one is using for loop. import java.util.Arrays; public class one_dimensional_for { private static void count(int[] arr) { Arrays.sort(arr); int sum = 0, counter = 0; for (int i = 0; i < arr.length; i++) { if (arr[0] == arr[arr.length - 1]) { System.out.println(arr[0] + ": " + counter + " times"); break; } else { if (i == (arr.length - 1)) { sum += arr[arr.length - 1]; counter++; System.out.println((sum / counter) + " : " + counter + " times"); break; } else { if (arr[i] == arr[i + 1]) { sum += arr[i]; counter++; } else if (arr[i] != arr[i + 1]) { sum += arr[i]; counter++; System.out.println((sum / counter) + " : " + counter + " times"); sum = 0; counter = 0; } } } } } public static void main(String[] args) { int nums[] = { 1, 1, 1, 1, 2, 2, 2, 3, 3, 4, 5, 5, 5, 5, 6 }; count(nums); } }
You can do it by using Arrays.sort and Recursion. The same wine but in a different bottle.... import java.util.Arrays; public class ArrayTest { public static int mainCount=0; public static void main(String[] args) { String prevItem = ""; String[] array = {"name1","name1","name2","name2", "name2"}; Arrays.sort(array); for(String item:array){ if(! prevItem.equals(item)){ mainCount = 0; countArray(array, 0, item); prevItem = item; } } } private static void countArray(String[] arr, int currentPos, String item) { if(currentPos == arr.length){ System.out.println(item + " " + mainCount); return; } else{ if(arr[currentPos].toString().equals(item)){ mainCount += 1; } countArray(arr, currentPos+1, item); } } }
You can use Hash Map as given in the example below: import java.util.HashMap; import java.util.Set; /** * * #author Abdul Rab Khan * */ public class CounterExample { public static void main(String[] args) { String[] array = { "name1", "name1", "name2", "name2", "name2" }; countStringOccurences(array); } /** * This method process the string array to find the number of occurrences of * each string element * * #param strArray * array containing string elements */ private static void countStringOccurences(String[] strArray) { HashMap<String, Integer> countMap = new HashMap<String, Integer>(); for (String string : strArray) { if (!countMap.containsKey(string)) { countMap.put(string, 1); } else { Integer count = countMap.get(string); count = count + 1; countMap.put(string, count); } } printCount(countMap); } /** * This method will print the occurrence of each element * * #param countMap * map containg string as a key, and its count as the value */ private static void printCount(HashMap<String, Integer> countMap) { Set<String> keySet = countMap.keySet(); for (String string : keySet) { System.out.println(string + " : " + countMap.get(string)); } } }
This is a simple script I used in Python but it can be easily adapted. Nothing fancy though. def occurance(arr): results = [] for n in arr: data = {} data["point"] = n data["count"] = 0 for i in range(0, len(arr)): if n == arr[i]: data["count"] += 1 results.append(data) return results
you can find using HashMap with simple technic public class HashMapExample { public static void main(String[] args) { stringArray(); } public static void stringArray() { String[] a = {"name1","name2","name3","name4", "name5"}; Map<String, String> hm = new HashMap<String, String>(); for(int i=0;i<a.length;i++) { String bl=(String)hm.get(a[i]); if(bl==null) { hm.put(a[i],String.valueOf(1)); }else { String k=hm.get(a[i]); int j=Integer.valueOf(k); hm.put(a[i],String.valueOf(j+1)); } } //hm.entrySet(); System.out.println("map elements are "+hm.toString()); } }
You could use for (String x : array){ System.out.println(Collections.frequency(array,x)); }
You can use HashMap, where Key is your string and value - count.
// An Answer w/o using Hashset or map or Arraylist public class Count { static String names[] = {"name1","name1","name2","name2", "name2"}; public static void main(String args[]) { printCount(names); } public static void printCount(String[] names){ java.util.Arrays.sort(names); int n = names.length, c; for(int i=0;i<n;i++){ System.out.print(names[i]+" "); } System.out.println(); int result[] = new int[n]; for(int i=0;i<n;i++){ result[i] = 0; } for(int i =0;i<n;i++){ if (i != n-1){ for(int j=0;j<n;j++){ if(names[i] == names[j] ) result[i]++; } } else if (names[n-2] == names[n-1]){ result[i] = result[i-1]; } else result[i] = 1; } int max = 0,index = 0; for(int i=0;i<n;i++){ System.out.print(result[i]+" "); if (result[i] >= max){ max = result[i]; index = i; } } } }
public class Main { public static void main(String[] args) { String[] a ={"name1","name1","name2","name2", "name2"}; for (int i=0;i<a.length ;i++ ) { int count =0; int count1=0; for(int j=0;j<a.length;j++) { if(a[i]==a[j]) { count++; } } for(int j=i-1;j>=0 ;j--) { if(a[i]==a[j]) { count1++; } } if(count1 ==0) { System.out.println(a[i]+" occurs :"+count); } } } }
import java.util.HashMap; import java.util.Map; public class FrequencyUsingMap { public static void main(String[] args) { int a[] = {1,1,1,1,2,2,3,4,1}; int num = 0; int maxfreq = 0; int maxnum =0; Map<Integer, Integer> map = new HashMap<>(); for(int i = 0; i<a.length; i++){ num = a[i]; if(map.containsKey(num)){ int frq = map.get(num); frq++; map.put(num, frq); if(frq>maxfreq){ maxfreq = frq; maxnum = num; } }else{ map.put(num, 1); } } System.out.println(map); System.out.println("number "+ maxnum + " having max frequency " +maxfreq); } }
I wrote an easy solution for this, have a look: public class Plus_Minus { public static void main(String[] args) { double [] x = { -4, 3, -9, -5, 4, 1 }; double p = 0; double n = 0; int z = 0; for (int i = 0; i < x.length; i++) { if (x[i] > 0) { p += 1; } if (x[i] < 0) { n += 1; } if (x[i] == 0) { z += 1; } } double ppi = p / x.length; double pni = n / x.length; int pzi = z / x.length; System.out.println(ppi); System.out.println(pni); System.out.println(pzi); } }
public class test { static String uniq[]; public static String[] convertWordArray(String str) { str = str.toLowerCase(); String test[] = str.split(" "); return test; } public static void findRepetitiveWordsinString(String str) { String[] test =convertWordArray(str); int len = test.length; int count; List<Integer> l = new ArrayList<>(); for (int i = 0; i < len; i++) { count = 1; for (int j = i + 1; j < len; j++) { if (test[i].equals(test[j])) { count++; test[j] = "0"; } } if (count > 1 && test[i] != "0") { System.out.println(test[i]); l.add(i); } } System.out.println("Repetitive words at index :" +l); uniq = new String[l.size()]; for (int i = 0; i < l.size(); i++) { uniq[i] = test[l.get(i)]; } System.out.println("Number of words that are repeated: " + uniq.length); } public static void countMatches(String a[], String b[]) { int count; for (int i = 0; i < a.length; i++) { count = 0; for (int j = 0; j < b.length; j++) { if (a[i].equals(b[j])) count++; } if (count > 1) { System.out.println("Repeating word is: " + a[i] + " and the repeating count is " + count); } } } public static void main(String[] args) { String str; Scanner scanner = new Scanner(System.in); str = scanner.nextLine(); findRepetitiveWordsinString(str); countMatches(uniq, convertWordArray(str)); } }
import java.util.ArrayList; import java.util.Arrays; import java.util.HashMap; import java.util.List; public class MultiString { public HashMap<String, Integer> countIntem( String[] array ) { Arrays.sort(array); HashMap<String, Integer> map = new HashMap<String, Integer>(); Integer count = 0; String first = array[0]; for( int counter = 0; counter < array.length; counter++ ) { if(first.hashCode() == array[counter].hashCode()) { count = count + 1; } else { map.put(first, count); count = 1; } first = array[counter]; map.put(first, count); } return map; } /** * #param args */ public static void main(String[] args) { // TODO Auto-generated method stub String[] array = { "name1", "name1", "name2", "name2", "name2", "name3", "name1", "name1", "name2", "name2", "name2", "name3" }; HashMap<String, Integer> countMap = new MultiString().countIntem(array); System.out.println(countMap); } } Gives you O(n) complexity.
N-gram generation from a sentence
How to generate an n-gram of a string like: String Input="This is my car." I want to generate n-gram with this input: Input Ngram size = 3 Output should be: This is my car This is is my my car This is my is my car Give some idea in Java, how to implement that or if any library is available for it. I am trying to use this NGramTokenizer but its giving n-gram's of character sequence and I want n-grams of word sequence.
I believe this would do what you want: import java.util.*; public class Test { public static List<String> ngrams(int n, String str) { List<String> ngrams = new ArrayList<String>(); String[] words = str.split(" "); for (int i = 0; i < words.length - n + 1; i++) ngrams.add(concat(words, i, i+n)); return ngrams; } public static String concat(String[] words, int start, int end) { StringBuilder sb = new StringBuilder(); for (int i = start; i < end; i++) sb.append((i > start ? " " : "") + words[i]); return sb.toString(); } public static void main(String[] args) { for (int n = 1; n <= 3; n++) { for (String ngram : ngrams(n, "This is my car.")) System.out.println(ngram); System.out.println(); } } } Output: This is my car. This is is my my car. This is my is my car. An "on-demand" solution implemented as an Iterator: class NgramIterator implements Iterator<String> { String[] words; int pos = 0, n; public NgramIterator(int n, String str) { this.n = n; words = str.split(" "); } public boolean hasNext() { return pos < words.length - n + 1; } public String next() { StringBuilder sb = new StringBuilder(); for (int i = pos; i < pos + n; i++) sb.append((i > pos ? " " : "") + words[i]); pos++; return sb.toString(); } public void remove() { throw new UnsupportedOperationException(); } }
You are looking for ShingleFilter. Update: The link points to version 3.0.2. This class may be in different package in newer version of Lucene.
This code returns an array of all Strings of the given length: public static String[] ngrams(String s, int len) { String[] parts = s.split(" "); String[] result = new String[parts.length - len + 1]; for(int i = 0; i < parts.length - len + 1; i++) { StringBuilder sb = new StringBuilder(); for(int k = 0; k < len; k++) { if(k > 0) sb.append(' '); sb.append(parts[i+k]); } result[i] = sb.toString(); } return result; } E.g. System.out.println(Arrays.toString(ngrams("This is my car", 2))); //--> [This is, is my, my car] System.out.println(Arrays.toString(ngrams("This is my car", 3))); //--> [This is my, is my car]
/** * * #param sentence should has at least one string * #param maxGramSize should be 1 at least * #return set of continuous word n-grams up to maxGramSize from the sentence */ public static List<String> generateNgramsUpto(String str, int maxGramSize) { List<String> sentence = Arrays.asList(str.split("[\\W+]")); List<String> ngrams = new ArrayList<String>(); int ngramSize = 0; StringBuilder sb = null; //sentence becomes ngrams for (ListIterator<String> it = sentence.listIterator(); it.hasNext();) { String word = (String) it.next(); //1- add the word itself sb = new StringBuilder(word); ngrams.add(word); ngramSize=1; it.previous(); //2- insert prevs of the word and add those too while(it.hasPrevious() && ngramSize<maxGramSize){ sb.insert(0,' '); sb.insert(0,it.previous()); ngrams.add(sb.toString()); ngramSize++; } //go back to initial position while(ngramSize>0){ ngramSize--; it.next(); } } return ngrams; } Call: long startTime = System.currentTimeMillis(); ngrams = ToolSet.generateNgramsUpto("This is my car.", 3); long stopTime = System.currentTimeMillis(); System.out.println("My time = "+(stopTime-startTime)+" ms with ngramsize = "+ngrams.size()); System.out.println(ngrams.toString()); Output: My time = 1 ms with ngramsize = 9 [This, is, This is, my, is my, This is my, car, my car, is my car]
public static void CreateNgram(ArrayList<String> list, int cutoff) { try { NGramModel ngramModel = new NGramModel(); POSModel model = new POSModelLoader().load(new File("en-pos-maxent.bin")); PerformanceMonitor perfMon = new PerformanceMonitor(System.err, "sent"); POSTaggerME tagger = new POSTaggerME(model); perfMon.start(); for(int i = 0; i<list.size(); i++) { String inputString = list.get(i); ObjectStream<String> lineStream = new PlainTextByLineStream(new StringReader(inputString)); String line; while ((line = lineStream.read()) != null) { String whitespaceTokenizerLine[] = WhitespaceTokenizer.INSTANCE.tokenize(line); String[] tags = tagger.tag(whitespaceTokenizerLine); POSSample sample = new POSSample(whitespaceTokenizerLine, tags); perfMon.incrementCounter(); String words[] = sample.getSentence(); if(words.length > 0) { for(int k = 2; k< 4; k++) { ngramModel.add(new StringList(words), k, k); } } } } ngramModel.cutoff(cutoff, Integer.MAX_VALUE); Iterator<StringList> it = ngramModel.iterator(); while(it.hasNext()) { StringList strList = it.next(); System.out.println(strList.toString()); } perfMon.stopAndPrintFinalResult(); }catch(Exception e) { System.out.println(e.toString()); } } Here is my codes to create n-gram. In this case, n = 2, 3. n-gram of words sequence which smaller than cutoff value will ignore from result set. Input is list of sentences, then it parse using a tool of OpenNLP
public static void main(String[] args) { String[] words = "This is my car.".split(" "); for (int n = 0; n < 3; n++) { List<String> list = ngrams(n, words); for (String ngram : list) { System.out.println(ngram); } System.out.println(); } } public static List<String> ngrams(int stepSize, String[] words) { List<String> ngrams = new ArrayList<String>(); for (int i = 0; i < words.length-stepSize; i++) { String initialWord = ""; int internalCount = i; int internalStepSize = i + stepSize; while (internalCount <= internalStepSize && internalCount < words.length) { initialWord = initialWord+" " + words[internalCount]; ++internalCount; } ngrams.add(initialWord); } return ngrams; }
Check this out: public static void main(String[] args) { NGram nGram = new NGram(); String[] tokens = "this is my car".split(" "); int i = tokens.length; List<String> ngrams = new ArrayList<>(); while (i >= 1){ ngrams.addAll(nGram.getNGram(tokens, i, new ArrayList<>())); i--; } System.out.println(ngrams); } private List<String> getNGram(String[] tokens, int n, List<String> ngrams) { StringBuilder strbldr = new StringBuilder(); if (tokens.length < n) { return ngrams; }else { for (int i=0; i<n; i++){ strbldr.append(tokens[i]).append(" "); } ngrams.add(strbldr.toString().trim()); String[] newTokens = Arrays.copyOfRange(tokens, 1, tokens.length); return getNGram(newTokens, n, ngrams); } } Simple recursive function, better running time.