Execute SELECT sql by randomly picking tables - java

I am working on a project in which I have two tables in a different database with different schemas. So that means I have two different connection parameters for those two tables to connect using JDBC-
Let's suppose below is the config.property file-
TABLES: table1 table2
#For Table1
table1.url: jdbc:mysql://localhost:3306/garden
table1.user: gardener
table1.password: shavel
table1.driver: jdbc-driver
table1.percentage: 80
#For Table2
table2.url: jdbc:mysql://otherhost:3306/forest
table2.user: forester
table2.password: axe
table2.driver: jdbc-driver
table2.percentage: 20
Below method will read the above config.property file and make a ReadTableConnectionInfo object for each tables.
private static HashMap<String, ReadTableConnectionInfo> tableList = new HashMap<String, ReadTableConnectionInfo>();
private static void readPropertyFile() throws IOException {
prop.load(Read.class.getClassLoader().getResourceAsStream("config.properties"));
tableNames = Arrays.asList(prop.getProperty("TABLES").split(" "));
for (String arg : tableNames) {
ReadTableConnectionInfo ci = new ReadTableConnectionInfo();
String url = prop.getProperty(arg + ".url");
String user = prop.getProperty(arg + ".user");
String password = prop.getProperty(arg + ".password");
String driver = prop.getProperty(arg + ".driver");
double percentage = Double.parseDouble(prop.getProperty(arg + ".percentage"));
ci.setUrl(url);
ci.setUser(user);
ci.setPassword(password);
ci.setDriver(driver);
ci.setPercentage(percentage);
tableList.put(arg, ci);
}
}
Below is the ReadTableConnectionInfo class that will hold all the table connection info for a particular table.
public class ReadTableConnectionInfo {
public String url;
public String user;
public String password;
public String driver;
public String percentage;
public String getUrl() {
return url;
}
public void setUrl(String url) {
this.url = url;
}
public String getUser() {
return user;
}
public void setUser(String user) {
this.user = user;
}
public String getPassword() {
return password;
}
public void setPassword(String password) {
this.password = password;
}
public String getDriver() {
return driver;
}
public void setDriver(String driver) {
this.driver = driver;
}
public double getPercentage() {
return percentage;
}
public void setPercentage(double percentage) {
this.percentage = percentage;
}
}
Now I am creating ExecutorService for specified number of threads and passing this tableList object to constructor of ReadTask class-
// create thread pool with given size
ExecutorService service = Executors.newFixedThreadPool(10);
for (int i = 0; i < 10; i++) {
service.submit(new ReadTask(tableList));
}
Below is my ReadTask that implements Runnable interface in which each thread is supposed to make a connection for each tables.
class ReadTask implements Runnable {
private final HashMap<String, XMPReadTableConnectionInfo> tableLists;
public ReadTask(HashMap<String, ReadTableConnectionInfo> tableList) {
this.tableLists = tableList;
}
#Override
public void run() {
int j = 0;
dbConnection = new Connection[tableLists.size()];
statement = new Statement[tableLists.size()];
//loop around the map values and make the connection list
for (ReadTableConnectionInfo ci : tableLists.values()) {
dbConnection[j] = getDBConnection(ci.getUrl(), ci.getUser(), ci.getPassword(), ci.getDriver());
statement[j] = dbConnection[j].createStatement();
j++;
}
while (System.currentTimeMillis() <= 60 minutes) {
/* Generate random number and check to see whether that random number
* falls between 1 and 80, if yes, then choose table1
* and then use table1 connection and statement that I made above and do a SELECT * on that table.
* If that random numbers falls between 81 and 100 then choose table2
* and then use table2 connection and statement and do a SELECT * on that table
*/
ResultSet rs = statement[what_table_statement].executeQuery(selectTableSQL);
}
}
}
Currently I have two tables, that means each thread will make two connections for each table and then use that particular table connection for doing SELECT * on that table depending on the random generation number.
Algorithm:-
Generate Random number between 1 and 100.
If that random number is less than table1.getPercentage() then choose table1
and then use table1 statement object to make a SELECT sql call to that database.
else choose table2 and then use table2 statement object to make a SELECT sql call to that database.
My Question-
I am having hard time in figuring out how should apply the above algorithm and how should I compare the random number with each tables percentage and then decide which table I need to use and after that figure out which table connection and statements I need to use to make a SELECT sql call.
So that means I need to check getPercentage() method of each table and them compare with the Random Number.
Right now I have only two tables, in future I can have three tables, with percentage distribution might be as 80 10 10.
UPDATE:-
class ReadTask implements Runnable {
private Connection[] dbConnection = null;
private ConcurrentHashMap<ReadTableConnectionInfo, Connection> tableStatement = new ConcurrentHashMap<ReadTableConnectionInfo, Connection>();
public ReadTask(LinkedHashMap<String, XMPReadTableConnectionInfo> tableList) {
this.tableLists = tableList;
}
#Override
public run() {
int j = 0;
dbConnection = new Connection[tableLists.size()];
//loop around the map values and make the connection list
for (ReadTableConnectionInfo ci : tableLists.values()) {
dbConnection[j] = getDBConnection(ci.getUrl(), ci.getUser(), ci.getPassword(), ci.getDriver());
tableStatement.putIfAbsent(ci, dbConnection[j]);
j++;
}
Random random = new SecureRandom();
while ( < 60 minutes) {
double randomNumber = random.nextDouble() * 100.0;
ReadTableConnectionInfo table = selectRandomConnection(randomNumber);
for (Map.Entry<ReadTableConnectionInfo, Connection> entry : tableStatement.entrySet()) {
if (entry.getKey().getTableName().equals(table.getTableName())) {
final String id = generateRandomId(random);
final String selectSql = generateRandomSQL(table);
preparedStatement = entry.getValue().prepareCall(selectSql);
preparedStatement.setString(1, id);
rs = preparedStatement.executeQuery();
}
}
}
}
private String generateRandomSQL(ReadTableConnectionInfo table) {
int rNumber = random.nextInt(table.getColumns().size());
List<String> shuffledColumns = new ArrayList<String>(table.getColumns());
Collections.shuffle(shuffledColumns);
String columnsList = "";
for (int i = 0; i < rNumber; i++) {
columnsList += ("," + shuffledColumns.get(i));
}
final String sql = "SELECT ID" + columnsList + " from "
+ table.getTableName() + " where id = ?";
return sql;
}
private ReadTableConnectionInfo selectRandomConnection(double randomNumber) {
double limit = 0;
for (ReadTableConnectionInfo ci : tableLists.values()) {
limit += ci.getPercentage();
if (random.nextDouble() < limit) {
return ci;
}
throw new IllegalStateException();
}
return null;
}
}

You could think of it as a loop over the available connections, something like the following:
public run() {
...
Random random = new SecureRandom();
while ( < 60 minutes) {
double randomNumber = random.nextDouble() * 100.0;
ReadTableConnectionInfo tableInfo = selectRandomConnection(randomNumber);
// do query...
}
}
private ReadTableConnectionInfo selectRandomConnection(double randomNumber) {
double limit = 0;
for (ReadTableConnectionInfo ci : tableLists.values()) {
limit += ci.getPercentage();
if (randomNumber < limit) {
return ci;
}
throw new IllegalStateException();
}
As long as randomNumber has a maximum value of less then sum(percentage), that'll do the job.
One other thing I thought of: if you're going to end up having so many possible queries that the a looping lookup becomes an issue, you could build a lookup table: create an array such that the total size of the array contains enough entries so that the relative weightings of the queries can be represented with integers.
For your example of three queries, 80:10:10, have a 10-entry array of ReadTableConnectionInfo with eight references pointing to table1, one to table2, and one to table3. Then simply scale your random number to be 0 <= rand < 10 (eg (int)(Math.random() * 10), and use it to index in to your array.

Regardless of how many tables you have, their percentages will always add up to 100. The easiest way to conceptualize how you would choose is to think of each table as representing a range of percentages.
For instance, with three tables that have the percents you mentioned (80%, 10%, 10%), you could conceptualize them as:
Random Number
From To == Table ==
0.0000 0.8000 Table_1
0.8000 0.9000 Table_2
0.9000 1.0000 Table_3
So, generate a Random # between 0.0000 and 1.0000 and then go down the ordered list and see which range fits, and therefore which table to use.
(BTW: I'm not sure why you have two connections for each table.)

You can build a lookup table which contains the table name and its weight:
class LookupTable {
private int[] weights;
private String[] tables;
private int size = 0;
public LookupTable(int n) {
this.weights = new int[n];
this.tables = new String[n];
}
public void addTable(String tableName, int r) {
this.weights[size] = r;
this.tables[size] = tableName;
size++;
}
public String lookupTable(int n) {
for (int i = 0; i < this.size; i++) {
if (this.weights[i] >= n) {
return this.tables[i];
}
}
return null;
}
}
The code to initialize the table:
LookupTable tr = new LookupTable(3);
// make sure adds the range from lower to upper!
tr.addTable("table1", 20);
tr.addTable("table2", 80);
tr.addTable("table3", 100);
The test code:
Random r = new Random(System.currentTimeMillis());
for (int i = 0; i < 10; i++) {
// r.nextInt(101) + 1 would return a number of range [1~100].
int n = r.nextInt(101) + 1;
String tableName = tr.lookupTable(n);
System.out.println(n + ":" + tableName);
}

Related

Limit the resultset and write into a file based on ram size, rows count and columns count

I am developing a system in Java which reads all the MySQL database tables, performs some operation and finally writes all data into a file (separate file for each table).
Since all the database table have different number of columns and different number of rows, there can be memory issue if the data is higher than our system can handle. Therefore, I need to write code that reads the tables values block by block and writes that data into file; and after some iterating all the data are written into that file.
I believe this approach would run in any system with any RAM size so that this system works without running into memory issues. Currently, for any table I am limiting the query result and writing that result in a one file, and iterating this process over and over until all the results are not processed. Here the value of limit size and number of iteration for all tables are dynamic, i.e. depends upon number of rows, columns and RAM size.
Following is the code written so far.
public static void main(String[] args) throws Exception {
List<String> dbList = MySqlUtils.getAllTableNames("datahouse");
for (String tableName : dbList) {
processTable(tableName);
}
}
public static void processTable(String tableName) throws Exception {
String dbname = "datahouse";
int startIndex = 0;
int limit = getMySqlQueryLimit(dbname, tableName);
int endIndex = limit;
int iteratorLength = getIteratorLength(dbname, tableName);
for (int i = 1; i <= iteratorLength; i++) {
ResultSet resultSet = getResultSet(tableName, startIndex, endIndex);
while (resultSet.next()) {
// Write into file after some operation
}
startIndex = endIndex;
endIndex += limit;
}
}
public static ResultSet getResultSet(String tableName, int startLimit, int endLimit) throws SQLException {
StringBuilder builder = new StringBuilder();
builder.append("SELECT * FROM " + tableName);
builder.append("ORDER BY id ASC limit (");
builder.append(startLimit);
builder.append(",");
builder.append(endLimit);
builder.append(")");
return MySqlUtils.getStatement().executeQuery(builder.toString());
}
public static int getMySqlQueryLimit(String dbName, String tableName) throws SQLException {
long ramSize = SystemUtils.getPhysicalMemorySize();
int columnSize = getColumnCount(dbName, tableName);
int totalRows = getRowsCount(dbName, tableName);
//TODO
return 0;
}
public static int getIteratorLength(String dbName, String tableName) {
try {
long ramSize = SystemUtils.getPhysicalMemorySize();
int columnSize = getColumnCount(dbName, tableName);
int totalRows = getRowsCount(dbName, tableName);
//TODO
return 0;
} catch (SQLException e) {
e.printStackTrace();
return 0;
}
}
In processTable() method, there is a dependency between limit and iteratorLength. Is there any algorithm (or any mathematical formula) that can calculate the values for getMySqlQueryLimit() and getIteratorLength(), so that this code can be executed in any of the system independent of RAM size i.e. without running into memory issue?

GC overhead limit exceeded while training OpenNLP's NameFinderME

I want to get probability score for the extracted names using NameFinderME, but using the provided model gives very bad probabilities using the probs function.
For example, "Scott F. Fitzgerald" gets a score around 0.5 (averaging log probabilities, and taking an exponent), while "North Japan" and "Executive Vice President, Corporate Relations and Chief Philanthropy Officer" both get a score higher than 0.9...
I have more than 2 million first names, and another 2 million last names (with their frequency counts) And I want to synthetically create a huge dataset from outer multiplication of the first names X middle names (using the first names pool) X last names.
The problem is, I don't even get to go over all the last names once (even when discarding freq counts and only using each name only once) before I get a GC overhead limit exceeded exception...
I'm implementing a ObjectStream and give it to the train function:
public class OpenNLPNameStream implements ObjectStream<NameSample> {
private List<Map<String, Object>> firstNames = null;
private List<Map<String, Object>> lastNames = null;
private int firstNameIdx = 0;
private int firstNameCountIdx = 0;
private int middleNameIdx = 0;
private int middleNameCountIdx = 0;
private int lastNameIdx = 0;
private int lastNameCountIdx = 0;
private int firstNameMaxCount = 0;
private int middleNameMaxCount = 0;
private int lastNameMaxCount = 0;
private int firstNameKBSize = 0;
private int lastNameKBSize = 0;
Span span[] = new Span[1];
String fullName[] = new String[3];
String partialName[] = new String[2];
private void increaseFirstNameCountIdx()
{
firstNameCountIdx++;
if (firstNameCountIdx == firstNameMaxCount) {
firstNameIdx++;
if (firstNameIdx == firstNameKBSize)
return; //no need to update anything - this is the end of the run...
firstNameMaxCount = getFirstNameMaxCount(firstNameIdx);
firstNameCountIdx = 0;
}
}
private void increaseMiddleNameCountIdx()
{
lastNameCountIdx++;
if (middleNameCountIdx == middleNameMaxCount) {
if (middleNameIdx == firstNameKBSize) {
resetMiddleNameIdx();
increaseFirstNameCountIdx();
} else {
middleNameMaxCount = getMiddleNameMaxCount(middleNameIdx);
middleNameCountIdx = 0;
}
}
}
private void increaseLastNameCountIdx()
{
lastNameCountIdx++;
if (lastNameCountIdx == lastNameMaxCount) {
lastNameIdx++;
if (lastNameIdx == lastNameKBSize) {
resetLastNameIdx();
increaseMiddleNameCountIdx();
}
else {
lastNameMaxCount = getLastNameMaxCount(lastNameIdx);
lastNameCountIdx = 0;
}
}
}
private void resetLastNameIdx()
{
lastNameIdx = 0;
lastNameMaxCount = getLastNameMaxCount(0);
lastNameCountIdx = 0;
}
private void resetMiddleNameIdx()
{
middleNameIdx = 0;
middleNameMaxCount = getMiddleNameMaxCount(0);
middleNameCountIdx = 0;
}
private int getFirstNameMaxCount(int i)
{
return 1; //compromised on using just
//String occurences = (String) firstNames.get(i).get("occurences");
//return Integer.parseInt(occurences);
}
private int getMiddleNameMaxCount(int i)
{
return 3; //compromised on using just
//String occurences = (String) firstNames.get(i).get("occurences");
//return Integer.parseInt(occurences);
}
private int getLastNameMaxCount(int i)
{
return 1;
//String occurences = (String) lastNames.get(i).get("occurences");
//return Integer.parseInt(occurences);
}
#Override
public NameSample read() throws IOException {
if (firstNames == null) {
firstNames = CSVFileTools.readFileFromInputStream("namep_first_name_idf.csv", new ClassPathResource("namep_first_name_idf.csv").getInputStream());
firstNameKBSize = firstNames.size();
firstNameMaxCount = getFirstNameMaxCount(0);
middleNameMaxCount = getFirstNameMaxCount(0);
}
if (lastNames == null) {
lastNames = CSVFileTools.readFileFromInputStream("namep_last_name_idf.csv",new ClassPathResource("namep_last_name_idf.csv").getInputStream());
lastNameKBSize = lastNames.size();
lastNameMaxCount = getLastNameMaxCount(0);
}
increaseLastNameCountIdx();;
if (firstNameIdx == firstNameKBSize)
return null; //we've finished iterating over all permutations!
String [] sentence;
if (firstNameCountIdx < firstNameMaxCount / 3)
{
span[0] = new Span(0,2,"Name");
sentence = partialName;
sentence[0] = (String)firstNames.get(firstNameIdx).get("first_name");
sentence[1] = (String)lastNames.get(lastNameIdx).get("last_name");
}
else
{
span[0] = new Span(0,3,"name");
sentence = fullName;
sentence[0] = (String)firstNames.get(firstNameIdx).get("first_name");
sentence[2] = (String)lastNames.get(lastNameIdx).get("last_name");
if (firstNameCountIdx < 2*firstNameCountIdx/3) {
sentence[1] = (String)firstNames.get(middleNameIdx).get("first_name");
}
else {
sentence[1] = ((String)firstNames.get(middleNameIdx).get("first_name")).substring(0,1) + ".";
}
}
return new NameSample(sentence,span,true);
}
#Override
public void reset() throws IOException, UnsupportedOperationException {
firstNameIdx = 0;
firstNameCountIdx = 0;
middleNameIdx = 0;
middleNameCountIdx = 0;
lastNameIdx = 0;
lastNameCountIdx = 0;
firstNameMaxCount = 0;
middleNameMaxCount = 0;
lastNameMaxCount = 0;
}
#Override
public void close() throws IOException {
reset();
firstNames = null;
lastNames = null;
}
}
And
TokenNameFinderModel model = NameFinderME.train("en","person",new OpenNLPNameStream(),TrainingParameters.defaultParams(),new TokenNameFinderFactory());
model.serialize(new FileOutputStream("trainedNames.bin",false));
I get the following error after a few minutes of running:
java.lang.OutOfMemoryError: GC overhead limit exceeded
at opennlp.tools.util.featuregen.WindowFeatureGenerator.createFeatures(WindowFeatureGenerator.java:112)
at opennlp.tools.util.featuregen.AggregatedFeatureGenerator.createFeatures(AggregatedFeatureGenerator.java:79)
at opennlp.tools.util.featuregen.CachedFeatureGenerator.createFeatures(CachedFeatureGenerator.java:69)
at opennlp.tools.namefind.DefaultNameContextGenerator.getContext(DefaultNameContextGenerator.java:118)
at opennlp.tools.namefind.DefaultNameContextGenerator.getContext(DefaultNameContextGenerator.java:37)
at opennlp.tools.namefind.NameFinderEventStream.generateEvents(NameFinderEventStream.java:113)
at opennlp.tools.namefind.NameFinderEventStream.createEvents(NameFinderEventStream.java:137)
at opennlp.tools.namefind.NameFinderEventStream.createEvents(NameFinderEventStream.java:36)
at opennlp.tools.util.AbstractEventStream.read(AbstractEventStream.java:62)
at opennlp.tools.util.AbstractEventStream.read(AbstractEventStream.java:27)
at opennlp.tools.util.AbstractObjectStream.read(AbstractObjectStream.java:32)
at opennlp.tools.ml.model.HashSumEventStream.read(HashSumEventStream.java:46)
at opennlp.tools.ml.model.HashSumEventStream.read(HashSumEventStream.java:29)
at opennlp.tools.ml.model.TwoPassDataIndexer.computeEventCounts(TwoPassDataIndexer.java:130)
at opennlp.tools.ml.model.TwoPassDataIndexer.<init>(TwoPassDataIndexer.java:83)
at opennlp.tools.ml.AbstractEventTrainer.getDataIndexer(AbstractEventTrainer.java:74)
at opennlp.tools.ml.AbstractEventTrainer.train(AbstractEventTrainer.java:91)
at opennlp.tools.namefind.NameFinderME.train(NameFinderME.java:337)
Edit: After increasing the memory of the JVM to 8GB, I still don't get past the first 2 million last names, but now the Exception is:
java.lang.OutOfMemoryError: Java heap space
at java.util.HashMap.resize(HashMap.java:703)
at java.util.HashMap.putVal(HashMap.java:662)
at java.util.HashMap.put(HashMap.java:611)
at opennlp.tools.ml.model.AbstractDataIndexer.update(AbstractDataIndexer.java:141)
at opennlp.tools.ml.model.TwoPassDataIndexer.computeEventCounts(TwoPassDataIndexer.java:134)
at opennlp.tools.ml.model.TwoPassDataIndexer.<init>(TwoPassDataIndexer.java:83)
at opennlp.tools.ml.AbstractEventTrainer.getDataIndexer(AbstractEventTrainer.java:74)
at opennlp.tools.ml.AbstractEventTrainer.train(AbstractEventTrainer.java:91)
at opennlp.tools.namefind.NameFinderME.train(NameFinderME.java:337)
It seems the problem stems from the fact I'm creating a new NameSample along with new Spans and Strings at every read call... But I can't reuse Spans or NameSamples, since they're immutables.
Should I just write my own language model, is there a better Java library for doing this sort of thing (I'm only interested in getting the probability the extracted text is actually a name) are there parameters I should tweak for the model I'm training?
Any advice would be appreciated.

Null pointer exception, taking data form lists and arrays

I've checked everything several time, can't get where I am wrong..
Main class:
try
{
File productData = new File("productData.txt");
Product [] consideredRange = InputFileData
.readProductDataFile(productData);
ElectronicsEquipmentSupplier management =
new ElectronicsEquipmentSupplier(1, 12, consideredRange);
File customerData = new File("CustomerData.txt");
Scanner fileScan = new Scanner(customerData);
while(fileScan.hasNext())
management.addNewCustomer(InputFileData.
readCustomerData(fileScan));
management.addNewPurchaseOrder("21/01/12", "PSC-1235", "kD/9767", 50);
}
catch(Exception e)
{
System.out.println(e);
e.printStackTrace();
}
InputFileData class works perfectly. I have created an object of ElectronicsEquipmentSupplier with a consideredRange of products. Also added customers to a customersList.
Here is ElectronicsEquipmentSupplier class:
public class ElectronicsEquipmentSupplier
{
private int currentMonth;
private int currentYear;
private Product [] productRange;
private CustomerDetailsList customersList;
private PurchaseOrderList currentYearList;
private PurchaseOrderList lastYearList;
public ElectronicsEquipmentSupplier(int currentMonth, int currentYear, Product [] range)
{
this.currentMonth = currentMonth;
this.currentYear = currentYear;
productRange = new Product[range.length];
customersList = new CustomerDetailsList();
currentYearList = new PurchaseOrderList();
lastYearList = new PurchaseOrderList();
}
public void addNewPurchaseOrder(String dateStr, String customerID,
String productCode, int qty) throws IncorrectPurchaseOrderException
{
// check for positive order quantity
if(qty < 1)
throw new IncorrectPurchaseOrderException("Order quantity must be"
+ " positive!");
// check for the product code in given range and get that product
Product foundProduct = null;
for(int i = 0; i < productRange.length; i++)
{
if(productRange[i].getProductCode().equals(productCode))
{
foundProduct = productRange[i];
break;
}
}
if(foundProduct == null)
throw new IncorrectPurchaseOrderException("Product code is not in"
+ " the product range!");
try
{
// creating OrderDate object and getting appropriate discount
OrderDate newDate = new OrderDate(dateStr);
int discount = customersList.findCustomer(customerID).
getDiscountRate();
// creating purchase order and adding it to a list
PurchaseOrder givenOrder = new PurchaseOrder(newDate, customerID,
foundProduct, qty, discount);
currentYearList.addPurchaseOrder(givenOrder);
// updating the record of purchasing customer
int priceValue = givenOrder.getFullPriceValue();
customersList.findCustomer(customerID)
.updateTotalOrdersValue(priceValue);
}
catch(Exception e)
{
throw new IncorrectPurchaseOrderException("The problem is with: "
+ "\n" + e);
}
}
It shows that I've got NullPointerException at: if(productRange[i].getProductCode().equals(productCode))
and in the main class at:
management.addNewPurchaseOrder("21/01/12", "PSC-1235", "kD/9767", 50);
Can't get why, as I have all required info..
Thank you!
Update 1:
Added this to the main method to solve first issue:
for(int i = 0; i < consideredRange.length; i++)
management.getProductRange()[i] = consideredRange[i];
But now the ID of a customer cannot be found...
That's the method in CustomerDetailsList class, which throws exception:
public CustomerDetails findCustomer(String givenID)
throws CustomerNotFoundException
{
int i = 0;
boolean match = false;
while(!match && i < listOfCustomerDetails.size())
{
match = listOfCustomerDetails.get(i).getCustomerID()
.equals(givenID);
i++;
}
if(!match)
throw new CustomerNotFoundException("The provided ID has not been"
+ " found");
else
return listOfCustomerDetails.get(i);
}
Update 2: updated .findCustomer() as SMA suggested
You are trying to initialize product in constructor like
productRange = new Product[range.length];
And then using it like:
if(productRange[i].getProductCode().equals(productCode))
Now you allocated space for your array but individual array elements i.e. products are not initialized and hence you get NullPointerException. To resolve the issue, you could do something like:
productRange[i] = new Product(..);//and then use it
Most likely because productRange[i] has not been initialized.
In your constructor you need to fully initialise the productRange array. Right now you are just creating an array of null references.
public ElectronicsEquipmentSupplier(int currentMonth,
int currentYear,
Product [] range) {
this.currentMonth = currentMonth;
this.currentYear = currentYear;
productRange = new Product[range.length];
for (int i = 0; i < productRange.length; i++) {
productRange[i] = range[i];
}
customersList = new CustomerDetailsList();
currentYearList = new PurchaseOrderList();
lastYearList = new PurchaseOrderList();
}
The above solution build a new array which reference the same objects as the range array passed to the constructor.
You may just want to reference the array without any allocation, e.g.
public ElectronicsEquipmentSupplier(int currentMonth,
int currentYear,
Product [] range) {
//...
productRange = range;
//...
}
Or do a deep copy of the range array, assuming you have either a Product#clone() method or a Product constructor that takes a Product parameter, e.g.
public ElectronicsEquipmentSupplier(int currentMonth,
int currentYear,
Product [] range) {
//...
productRange = new Product[range.length];
for (int i = 0; i < productRange.length; i++) {
productRange[i] = new Product(range[i]);
}
//...
}
The choice between these different methods depends on how the ElectronicsEquipmentSupplier class is used.

Cassandra + Hector, force compaction in a test to check that empty rows get deleted

We want to test that if a column has TTL (time-to-live) property it eventually will be removed from cassandra entirely along with the empty row which contained it.
As I understood, the algorithm for testing this behavious is
when saving an object, set TTL for a column
wait when TTL time passes, check that returned value is null
wait when GC_GRACE_SECONDS perion passes
check that the row also gets removed
And I failed to check the last item.
As I discovered (eg. here or here and in other places), I need to run compaction. Similar questions have been raised (eg. Hector (Cassandra) Delete Anomaly), but I didn't find anything that helped, and googling hasn't helped much.
So the question is, how I can force compaction from my integration test (using hector) to ensure that it behaves as expected? Or are there other ways to do this?
P.S. Truncating a column family is not an option.
Here are the details.
My tests:
private static final String KEYSPACE = "KEYSPACE";
private static final String COLUMN_FAMILY = "COLUMN_FAMILY";
private static final int GC_CRACE_SECONDS = 5;
// sut
private CassandraService cassandraService;
// dependencies
private Cluster cluster = HFactory.getOrCreateCluster("tstCltr",
"localhost:9160");
private Keyspace keyspace;
#BeforeClass
public static void setupBeforeClass() {
EmbeddedCassandraDaemon.getEmbeddedCassandraDaemon();
}
#Before
public void setUp() throws Exception {
keyspace = createKeyspace(KEYSPACE, cluster,
new QuorumAllConsistencyLevelPolicy());
cassandraService = new CassandraService(cluster, KEYSPACE,
COLUMN_FAMILY, GC_CRACE_SECONDS);
}
#Test
public void rowGetsRemovedAfterGCGraceSeconds() throws Exception {
Object obj = "OBJECT";
String rowKey = "key";
String columnName = "columnName";
logger.info("before persisting rows count is {}" + countRows());
cassandraService.persistObjectWithTtl(rowKey, columnName, obj, 5);
logger.info("after persisting rows count is {}" + countRows());
Object value = retrieve(rowKey, columnName);
assertNotNull(value);
logger.info("before TTL passes rows count is {}" + countRows());
TimeUnit.SECONDS.sleep(6);
Object nullValue = retrieve(rowKey, columnName);
assertNull(nullValue);
logger.info("after TTL passes rows count is {}" + countRows());
TimeUnit.SECONDS.sleep(10);
logger.info("wait 10 more seconds... rows count is {}" + countRows());
System.out.println("================================" + countRows());
TimeUnit.SECONDS.sleep(120);
int countRows = countRows();
logger.info("wait 2 more minutes... rows count is {}" + countRows);
assertEquals(0, countRows);
}
Code for persisting:
public void persistObjectWithTtl(Object rowKey, Object columnName,
Object obj, int ttl) {
LOGGER.debug("Persist {} / {}", rowKey, columnName);
HColumn<Object, Object> column = createColumn(columnName, obj,
SERIALIZER, SERIALIZER);
column.setTtl(ttl);
executeInsertion(rowKey, column);
}
private void executeInsertion(Object rowKey, HColumn<Object, Object> column) {
Mutator<Object> mutator = createMutator(keyspace, SERIALIZER);
mutator.addInsertion(rowKey, this.columnFamilyName, column);
mutator.execute();
}
Setting GcGraceSeconds for a column family:
private void addColumnFamily(String keySpaceName, String columnFamilyName,
int gcGraceSeconds) {
ColumnFamilyDefinition columnFamilyDefinition =
createColumnFamilyDefinition(keySpaceName, columnFamilyName);
ThriftCfDef columnFamilyWithGCGraceSeconds =
new ThriftCfDef(columnFamilyDefinition);
columnFamilyWithGCGraceSeconds.setGcGraceSeconds(gcGraceSeconds);
cluster.addColumnFamily(columnFamilyWithGCGraceSeconds);
}
And the code for counting rows, found on SO:
public int countRows() {
int rowCount = 100;
ObjectSerializer serializer = ObjectSerializer.get();
RangeSlicesQuery<Object, Object, Object> rangeSlicesQuery =
HFactory.createRangeSlicesQuery(keyspace, serializer,
serializer, serializer)
.setColumnFamily(COLUMN_FAMILY)
.setRange(null, null, false, 10)
.setRowCount(rowCount);
Object lastKey = null;
int i = 0;
while (true) {
rangeSlicesQuery.setKeys(lastKey, null);
QueryResult<OrderedRows<Object, Object, Object>> result =
rangeSlicesQuery.execute();
OrderedRows<Object, Object, Object> rows = result.get();
Iterator<Row<Object, Object, Object>> rowsIterator = rows.iterator();
if (lastKey != null && rowsIterator != null) {
rowsIterator.next();
}
while (rowsIterator.hasNext()) {
Row<Object, Object, Object> row = rowsIterator.next();
lastKey = row.getKey();
i++;
if (row.getColumnSlice().getColumns().isEmpty()) {
continue;
}
}
if (rows.getCount() < rowCount) {
break;
}
}
return i;
}
Thanks.
Update:
The reason was that the amount of data was not enoght for compaction to run, so I needed to put more data, and flush tables more frequently to disk. So I ended up with the following test case:
#Test
public void rowGetsRemovedAfterGCGraceSeconds() throws Exception {
final int expectedAmount = 50000;
logger.info("before persisting rows count is {}", countRows());
for (int i = 0; i < expectedAmount; i++) {
String rowKey = RandomStringUtils.randomAlphanumeric(128);
Object obj = RandomStringUtils.randomAlphanumeric(1000);
cassandraService.persistObjectWithTtl(rowKey, COLUMN_NAME, obj, 20);
if (i % 100 == 0) {
StorageService.instance.forceTableFlush(KEYSPACE, COLUMN_FAMILY);
}
}
logger.info("causing major compaction...");
StorageService.instance.forceTableCompaction(KEYSPACE, COLUMN_FAMILY);
logger.info("after major compaction rows count is {}", countRows());
waitAtMost(Duration.TWO_MINUTES)
.pollDelay(Duration.TWO_SECONDS)
.pollInterval(Duration.ONE_HUNDRED_MILLISECONDS)
.until(new Callable<Boolean>() {
#Override
public Boolean call() throws Exception {
int countRows = countRows();
logger.info("the rows count is {}", countRows);
return countRows < expectedAmount;
}
});
}
full code : test class and sut
Since you're working with Java, you can easily force a compaction through JMX by using the forceTableCompaction(keyspace, columnFamily) method of the org.apache.cassandra.db.StorageService MBean.

how to implement multithreaded Breadth-first search in java?

I've done the breadth-first search in a normal way.
now I'm trying to do it in a multithreaded way.
i have one queue which is shared between the threads.
i use synchronize(LockObject) when i remove a node from the queue ( FIFI queue )
so what I'm trying to do is that.
when i thread finds a solution all the other threads will stop immediately.
i assume you are traversing a tree for your BFS.
create a thread pool.
for each unexplored children in the node, retrieve a thread from the thread pool (perhaps using a Semaphore). mark the child node as 'explored' and explore the node's children in a BFS manner. when you have found a solution or done exploring all the nodes, release the semaphore.
^ i've never done this before so i might have missed out something.
Assuming you want to do this iteratively (see note at the bottom why there may be better closed solutions), this is not a great problem for exercising multi threading. The problem is that multithreading is great if you don't depend on previous results, but here you want the minimum amount of coins.
As you point out, a breadth first solution guarantees that once you reach the desired amount, you won't have any further solutions with less coins in a single threaded environment. However, in a multithreaded environment, once you start calculating a solution, you cannot guarantee that it will finish before some other solution. Let's imagine for the value 21: it can be a 20c coin and a 1c or four 5c coins and a 1c; if both are calculating simultaneously, you cannot guarantee that the first (and correct) solution will finish first. In practice, it is unlikely the situation will happen, but when you work with multithreading you want the solution to work in theory, because multithreads always fail in the demonstration, no matter if they should not have failed until the death heat of the universe.
Now you have 2 possible solutions: one is to introduce choke points at the beginning of each level; you don't start that level until the previous level is finished. The other is once you reach a solution continue doing all the calculations with a lower level than the current result (which means you cannot purge the others). Probably with all the synchronization needed you get better performance by going single threaded, but let's go on.
For the first solution, the natural form is to iterate increasing the level. You can use the solution provided by happymeal, with a Semaphore. An alternative is to use the new classes provided by java.
CoinSet getCoinSet(int desiredAmount) throws InterruptedException {
// Use whatever number of threads you prefer or another member of Executors.
final ExecutorService executor = Executors.newFixedThreadPool(10);
ResultContainer container = new ResultContainer();
container.getNext().add(new Producer(desiredAmount, new CoinSet(), container));
while (container.getResult() == null) {
executor.invokeAll(container.setNext(new Vector<Producer>()));
}
return container.getResult();
}
public class Producer implements Callable<CoinSet> {
private final int desiredAmount;
private final CoinSet data;
private final ResultContainer container;
public Producer(int desiredAmount, CoinSet data, ResultContainer container) {
this.desiredAmount = desiredAmount;
this.data = data;
this.container = container;
}
public CoinSet call() {
if (data.getSum() == desiredAmount) {
container.setResult(data);
return data;
} else {
Collection<CoinSet> nextSets = data.addCoins();
for (CoinSet nextSet : nextSets) {
container.getNext().add(new Producer(desiredAmount, nextSet, container));
}
return null;
}
}
}
// Probably it is better to split this class, but you then need to pass too many parameters
// The only really needed part is to create a wrapper around getNext, since invokeAll is
// undefined if you modify the list of tasks.
public class ResultContainer {
// I use Vector because it is synchronized.
private Vector<Producer> next = new Vector<Producer>();
private CoinSet result = null;
// Note I return the existing value.
public Vector<Producer> setNext(Vector<Producer> newValue) {
Vector<Producer> current = next;
next = newValue;
return current;
}
public Vector<Producer> getNext() {
return next;
}
public synchronized void setResult(CoinSet newValue) {
result = newValue;
}
public synchronized CoinSet getResult() {
return result;
}
}
This still has the problem that existing tasks are executed; however, it is simple to fix that; pass the thread executor into each Producer (or the container). Then, when you find a result, call executor.shutdownNow. The threads that are executing won't be interrupted, but the operation in each thread is trivial so it will finish fast; the runnables that have not started won't start.
The second option means you have to let all the current tasks finish, unless you keep track of how many tasks you have run at each level. You no longer need to keep track of the levels, though, and you don't need the while cycle. Instead, you just call
executor.submit(new Producer(new CoinSet(), desiredAmount, container)).get();
And then, the call method is pretty similar (assume you have executor in the Producer):
public CoinSet call() {
if (container.getResult() != null && data.getCount() < container.getResult().getCount()) {
if (data.getSum() == desiredAmount)) {
container.setResult(data);
return data;
} else {
Collection<CoinSet> nextSets = data.addCoins();
for (CoinSet nextSet : nextSets) {
executor.submit(new Producer(desiredAmount, nextSet, container));
}
return null;
}
}
}
and you also have to modify container.setResult, since you cannot depend that between the if and setting the value it has not been set by some other threads (threads are really annoying, aren't they?)
public synchronized void setResult(CoinSet newValue) {
if (newValue.getCount() < result.getCount()) {
result = newValue;
}
}
In all previous answers, CoinSet.getSum() returns the sum of the coins in the set, CoinSet.getCount() returns the number of coins, and CoinSet.addCoins() returns a Collection of CoinSet in which each element is the current CoinSet plus one coin of each possible different value
Note: For the problem of the coins with the values 1, 5, 10 and 20, the simplest solution is take the amount and divide it by the largest coin. Then take the modulus of that and use the next largest value and so on. That is the minimum amount of coins you are going to need. This rule applies (AFAICT) when the following property if true: if for all consecutive pairs of coin values (i.e. in this case, 1-5, 5-10, 10-20) you can reach any int multiple of the lower element in the pair with with a smaller number of coins using the larger element and whatever coins are necessary. You only need to prove it to the min common multiple of both elements in the pair (after that it repeats itself)
I gather from your comment on happymeal's anwer that you are trying to find how to reach a specific amount of money by adding coins of 1c, 5c, 10c and 20c.
Since each coin denomination divides the denomination of the next bigger coin, this can be solved in constant time as follows:
int[] coinCount(int amount) {
int[] coinValue = {20, 10, 5, 1};
int[] coinCount = new int[coinValue.length];
for (int i = 0; i < coinValue.length; i++) {
coinCount[i] = amount / coinValue[i];
amount -= coinCount[i] * coinValue[i];
}
return coinCount;
}
Take home message: Try to optimize your algorithm before resorting to multithreading, because algorithmic improvements can yield much greater improvements.
I've successfully implemented it.
what i did is that i took all the nodes in the first level, let's say 4 nodes.
then i had 2 threads. each one takes 2 nodes and generate their children. whenever a node finds a solution it has to report the level that it found the solution in and limit the searching level so other threads don't exceed the level.
only the reporting method should be synchronized.
i did the code for the coins change problem. this is my code for others to use
Main Class (CoinsProblemBFS.java)
package coinsproblembfs;
import java.util.ArrayList;
import java.util.LinkedList;
import java.util.List;
import java.util.Queue;
import java.util.Scanner;
/**
*
* #author Kassem M. Bagher
*/
public class CoinsProblemBFS
{
private static List<Item> MoneyList = new ArrayList<Item>();
private static Queue<Item> q = new LinkedList<Item>();
private static LinkedList<Item> tmpQ;
public static Object lockLevelLimitation = new Object();
public static int searchLevelLimit = 1000;
public static Item lastFoundNode = null;
private static int numberOfThreads = 2;
private static void InitializeQueu(Item Root)
{
for (int x = 0; x < MoneyList.size(); x++)
{
Item t = new Item();
t.value = MoneyList.get(x).value;
t.Totalvalue = MoneyList.get(x).Totalvalue;
t.Title = MoneyList.get(x).Title;
t.parent = Root;
t.level = 1;
q.add(t);
}
}
private static int[] calculateQueueLimit(int numberOfItems, int numberOfThreads)
{
int total = 0;
int[] queueLimit = new int[numberOfThreads];
for (int x = 0; x < numberOfItems; x++)
{
if (total < numberOfItems)
{
queueLimit[x % numberOfThreads] += 1;
total++;
}
else
{
break;
}
}
return queueLimit;
}
private static void initializeMoneyList(int numberOfItems, Item Root)
{
for (int x = 0; x < numberOfItems; x++)
{
Scanner input = new Scanner(System.in);
Item t = new Item();
System.out.print("Enter the Title and Value for item " + (x + 1) + ": ");
String tmp = input.nextLine();
t.Title = tmp.split(" ")[0];
t.value = Double.parseDouble(tmp.split(" ")[1]);
t.Totalvalue = t.value;
t.parent = Root;
MoneyList.add(t);
}
}
private static void printPath(Item item)
{
System.out.println("\nSolution Found in Thread:" + item.winnerThreadName + "\nExecution Time: " + item.searchTime + " ms, " + (item.searchTime / 1000) + " s");
while (item != null)
{
for (Item listItem : MoneyList)
{
if (listItem.Title.equals(item.Title))
{
listItem.counter++;
}
}
item = item.parent;
}
for (Item listItem : MoneyList)
{
System.out.println(listItem.Title + " x " + listItem.counter);
}
}
public static void main(String[] args) throws InterruptedException
{
Item Root = new Item();
Root.Title = "Root Node";
Scanner input = new Scanner(System.in);
System.out.print("Number of Items: ");
int numberOfItems = input.nextInt();
input.nextLine();
initializeMoneyList(numberOfItems, Root);
System.out.print("Enter the Amount of Money: ");
double searchValue = input.nextDouble();
int searchLimit = (int) Math.ceil((searchValue / MoneyList.get(MoneyList.size() - 1).value));
System.out.print("Number of Threads (Muste be less than the number of items): ");
numberOfThreads = input.nextInt();
if (numberOfThreads > numberOfItems)
{
System.exit(1);
}
InitializeQueu(Root);
int[] queueLimit = calculateQueueLimit(numberOfItems, numberOfThreads);
List<Thread> threadList = new ArrayList<Thread>();
for (int x = 0; x < numberOfThreads; x++)
{
tmpQ = new LinkedList<Item>();
for (int y = 0; y < queueLimit[x]; y++)
{
tmpQ.add(q.remove());
}
BFS tmpThreadObject = new BFS(MoneyList, searchValue, tmpQ);
Thread t = new Thread(tmpThreadObject);
t.setName((x + 1) + "");
threadList.add(t);
}
for (Thread t : threadList)
{
t.start();
}
boolean finish = false;
while (!finish)
{
Thread.sleep(250);
for (Thread t : threadList)
{
if (t.isAlive())
{
finish = false;
break;
}
else
{
finish = true;
}
}
}
printPath(lastFoundNode);
}
}
Item Class (Item.java)
package coinsproblembfs;
/**
*
* #author Kassem
*/
public class Item
{
String Title = "";
double value = 0;
int level = 0;
double Totalvalue = 0;
int counter = 0;
Item parent = null;
long searchTime = 0;
String winnerThreadName="";
}
Threads Class (BFS.java)
package coinsproblembfs;
import java.util.ArrayList;
import java.util.LinkedList;
import java.util.List;
/**
*
* #author Kassem M. Bagher
*/
public class BFS implements Runnable
{
private LinkedList<Item> q;
private List<Item> MoneyList;
private double searchValue = 0;
private long start = 0, end = 0;
public BFS(List<Item> monyList, double searchValue, LinkedList<Item> queue)
{
q = new LinkedList<Item>();
MoneyList = new ArrayList<Item>();
this.searchValue = searchValue;
for (int x = 0; x < queue.size(); x++)
{
q.addLast(queue.get(x));
}
for (int x = 0; x < monyList.size(); x++)
{
MoneyList.add(monyList.get(x));
}
}
private synchronized void printPath(Item item)
{
while (item != null)
{
for (Item listItem : MoneyList)
{
if (listItem.Title.equals(item.Title))
{
listItem.counter++;
}
}
item = item.parent;
}
for (Item listItem : MoneyList)
{
System.out.println(listItem.Title + " x " + listItem.counter);
}
}
private void addChildren(Item node, LinkedList<Item> q, boolean initialized)
{
for (int x = 0; x < MoneyList.size(); x++)
{
Item t = new Item();
t.value = MoneyList.get(x).value;
if (initialized)
{
t.Totalvalue = 0;
t.level = 0;
}
else
{
t.parent = node;
t.Totalvalue = MoneyList.get(x).Totalvalue;
if (t.parent == null)
{
t.level = 0;
}
else
{
t.level = t.parent.level + 1;
}
}
t.Title = MoneyList.get(x).Title;
q.addLast(t);
}
}
#Override
public void run()
{
start = System.currentTimeMillis();
try
{
while (!q.isEmpty())
{
Item node = null;
node = (Item) q.removeFirst();
node.Totalvalue = node.value + node.parent.Totalvalue;
if (node.level < CoinsProblemBFS.searchLevelLimit)
{
if (node.Totalvalue == searchValue)
{
synchronized (CoinsProblemBFS.lockLevelLimitation)
{
CoinsProblemBFS.searchLevelLimit = node.level;
CoinsProblemBFS.lastFoundNode = node;
end = System.currentTimeMillis();
CoinsProblemBFS.lastFoundNode.searchTime = (end - start);
CoinsProblemBFS.lastFoundNode.winnerThreadName=Thread.currentThread().getName();
}
}
else
{
if (node.level + 1 < CoinsProblemBFS.searchLevelLimit)
{
addChildren(node, q, false);
}
}
}
}
} catch (Exception e)
{
e.printStackTrace();
}
}
}
Sample Input:
Number of Items: 4
Enter the Title and Value for item 1: one 1
Enter the Title and Value for item 2: five 5
Enter the Title and Value for item 3: ten 10
Enter the Title and Value for item 4: twenty 20
Enter the Amount of Money: 150
Number of Threads (Muste be less than the number of items): 2

Categories