Can I use java api to connect to Hbase in a standalone mode(without Hadoop)?
Here is my code, and I was wondering how to make it work. Should I set some property to the variable 'config'?
I have these installed locally : Hbase-0.98.0 Hadoop 2.2.0
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.client.Get;
import org.apache.hadoop.hbase.client.HTable;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.ResultScanner;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.util.Bytes;
public class MyLittleHBaseClient {
public static void main(String[] args) throws IOException {
// maybe I should do some configuration here, but I don't know how
Configuration config = HBaseConfiguration.create();
HTable table = new HTable(config, "myLittleHBaseTable");
Put p = new Put(Bytes.toBytes("myLittleRow"));
p.add(Bytes.toBytes("myLittleFamily"), Bytes.toBytes("someQualifier"),
Bytes.toBytes("Some Value"));
table.put(p);
Get g = new Get(Bytes.toBytes("myLittleRow"));
Result r = table.get(g);
byte [] value = r.getValue(Bytes.toBytes("myLittleFamily"),
Bytes.toBytes("someQualifier"));
String valueStr = Bytes.toString(value);
System.out.println("GET: " + valueStr);
Scan s = new Scan();
s.addColumn(Bytes.toBytes("myLittleFamily"), Bytes.toBytes("someQualifier"));
ResultScanner scanner = table.getScanner(s);
try {
for (Result rr = scanner.next(); rr != null; rr = scanner.next()) {
System.out.println("Found row: " + rr);
}
} finally {
scanner.close();
}
}
}
If your hbase-site.xml in standalone mode is empty(), you don't have to set any thing. If have overridden anything in the hbase-site.xml, better add that hbase-site.xml instead of setting parameter separately.
Configuration config = HBaseConfiguration.create();
config.addResource("<HBASE_CONF_DIR_PATH>/hbase-site.xml");
Related
I want to add a quartz job scheduler on my project, but when I run the project there is an error like that, how do I get the quartz job scheduler to read my database.properties?
Thank you very much for your answer, Mr Igor, my problem is I already have a database.properties but when I run my project, the quartz job scheduler runs, and the controller job will connect to the database, an error appears like this even though I have also opened the connection
This is my database.properties :
development.driver=com.microsoft.sqlserver.jdbc.SQLServerDriver
development.username=xxxx
development.password=xxxx
development.url=jdbc:sqlserver://10.10.5.45;databaseName=xxxxxxxxxxxxx;
This is my quartz job class :
package app.controllers.api.prosesgaji;
import java.time.LocalDate;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;
import java.util.Map;
import org.javalite.activejdbc.Base;
import org.javalite.common.Convert;
import org.javalite.http.Http;
import org.javalite.http.Post;
import org.quartz.Job;
import org.quartz.JobExecutionContext;
import org.quartz.JobExecutionException;
import app.controllers.api.pemeliharaandata.KomponenGajiPegawaiHitungGajiController;
import app.models.Mstpegawai;
import core.io.enums.HttpResponses;
public class QuartzJobProsesGaji implements Job {
#Override
public void execute(JobExecutionContext context) throws JobExecutionException {
// TODO Auto-generated method stub
LocalDate localDate = LocalDate.now();
int bulan = localDate.getMonthValue();
int tgl = localDate.getDayOfMonth();
try {
String tglGaji = Convert.toString(localDate);
String nip = "";
String kddati1 = "";
String kddati2 = "";
int kdStapeg = 0;
String tmtStop = "";
int no = 1;
System.out.println("\nStart Create Gaji");
Base.open();
Base.openTransaction();
List<Map> dataPegawai = new ArrayList<>();
dataPegawai = Mstpegawai.getPegawaiQuartzTestBeberapaPegawai(tglGaji);
Mstpegawai mstPegawai = new Mstpegawai();
for (Map map : dataPegawai) {
System.out.println("\nPegawai ke = "+no);
nip = Convert.toString(map.get("nip"));
System.out.println(" - nip = "+nip);
kddati1 = Convert.toString(map.get("kddati1"));
System.out.println(" - kddati1 = "+kddati1);
kddati2 = Convert.toString(map.get("kddati2"));
System.out.println(" - kddati2 = "+kddati2);
kdStapeg = Convert.toInteger(map.get("kdstapeg"));
System.out.println(" - kdStapeg = "+kdStapeg);
tmtStop = Convert.toString(map.get("tmtstop"));
System.out.println(" - tmtStop = "+tmtStop);
KomponenGajiPegawaiHitungGajiController hitungGaji = new KomponenGajiPegawaiHitungGajiController();
hitungGaji.prosesGajiInduk(mstPegawai.fromMap(map), tglGaji);
no++;
}
Base.close();
System.out.println("\nDone Create Gaji\n");
} catch (Exception e) {
e.printStackTrace();
}
}
}
So, why Base.open() didnt work?
You need to familiarize yourself with the docs: http://javalite.io/database_configuration. Chances are you did not provide a config file http://javalite.io/database_configuration#property-file-configuration but are using a DBConnectionFilter, which has no idea where to connect.
I am trying to run some algorithm in apache Spark. I am getting
Java - A master URL must be set in your configuration error even if I set the configuration.
SparkSession spark = SparkSession.builder().appName("Sp_LogistcRegression").config("spark.master", "local").getOrCreate();
This is the code I work with
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.ObjectOutputStream;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.ml.classification.LogisticRegression;
import org.apache.spark.ml.classification.LogisticRegressionModel;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.SparkSession;
import org.apache.spark.mllib.util.MLUtils;
public class Sp_LogistcRegression {
public void trainLogisticregression(String path, String model_path) throws IOException {
//SparkConf conf = new SparkConf().setAppName("Linear Regression Example");
// JavaSparkContext sc = new JavaSparkContext(conf);
SparkSession spark = SparkSession.builder().appName("Sp_LogistcRegression").config("spark.master", "local").getOrCreate();
Dataset<Row> training = spark.read().option("header","true").csv(path);
System.out.print(training.count());
LogisticRegression lr = new LogisticRegression().setMaxIter(10).setRegParam(0.3);
// Fit the model
LogisticRegressionModel lrModel = lr.fit(training);
lrModel.save(model_path);
spark.close();
}
}
This is my test case:
import java.io.File;
import org.junit.Test;
public class Sp_LogistcRegressionTest {
Sp_LogistcRegression spl =new Sp_LogistcRegression ();
#Test
public void test() {
String filename = "datas/seg-large.csv";
ClassLoader classLoader = getClass().getClassLoader();
File file1 = new File(classLoader.getResource(filename).getFile());
spl. trainLogisticregression( file1.getAbsolutePath(), "/tmp");
}
}
Why I am getting this error? I checked the solutions here
Spark - Error "A master URL must be set in your configuration" when submitting an app
It does n´t work.
Any clues ?
your
SparkSession spark = SparkSession.builder().appName("Sp_LogistcRegression").config("spark.master", "local").getOrCreate();
should be
SparkSession spark = SparkSession.builder().appName("Sp_LogistcRegression").master("local").getOrCreate();
Or
when you run, you need to
spark-submit --class mainClass --master local yourJarFile
I am following the book Apache Mahout Cookbook by piero giacomelli. Now when i download the maven sources using netbeans as IDE, i guess the sources are from mahout version 1.0 and not 0.8 as it showing an error in SlopeOneRecommender import alone.
Here is the complete code -
package com.packtpub.mahout.cookbook.chapter01;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.util.List;
import org.apache.commons.cli2.OptionException;
import org.apache.mahout.cf.taste.common.TasteException;
import org.apache.mahout.cf.taste.impl.common.LongPrimitiveIterator;
import org.apache.mahout.cf.taste.impl.model.file.FileDataModel;
import org.apache.mahout.cf.taste.impl.recommender.CachingRecommender;
import org.apache.mahout.cf.taste.impl.recommender.slopeone.SlopeOneRecommender;
import org.apache.mahout.cf.taste.model.DataModel;
import org.apache.mahout.cf.taste.recommender.RecommendedItem;
public class App {
static final String inputFile = "/home/hadoop/ml-1m/ratings.dat";
static final String outputFile = "/home/hadoop/ml-1m/ratings.csv";
public static void main( String[] args ) throws IOException, TasteException, OptionException
{
CreateCsvRatingsFile();
// create data source (model) - from the csv file
File ratingsFile = new File(outputFile);
DataModel model = new FileDataModel(ratingsFile);
// create a simple recommender on our data
CachingRecommender cachingRecommender = new CachingRecommender(new SlopeOneRecommender(model));
// for all users
for (LongPrimitiveIterator it = model.getUserIDs(); it.hasNext();){
long userId = it.nextLong();
// get the recommendations for the user
List<RecommendedItem> recommendations = cachingRecommender.recommend(userId, 10);
// if empty write something
if (recommendations.size() == 0){
System.out.print("User ");
System.out.print(userId);
System.out.println(": no recommendations");
}
// print the list of recommendations for each
for (RecommendedItem recommendedItem : recommendations) {
System.out.print("User ");
System.out.print(userId);
System.out.print(": ");
System.out.println(recommendedItem);
}
}
}
private static void CreateCsvRatingsFile() throws FileNotFoundException, IOException {
BufferedReader br = new BufferedReader(new FileReader(inputFile));
BufferedWriter bw = new BufferedWriter(new FileWriter(outputFile));
String line = null;
String line2write = null;
String[] temp;
int i = 0;
while (
(line = br.readLine()) != null
&& i < 1000
){
i++;
temp = line.split("::");
line2write = temp[0] + "," + temp[1];
bw.write(line2write);
bw.newLine();
bw.flush();
}
br.close();
bw.close();
}
}
The error is being shown only on import org.apache.mahout.cf.taste.impl.recommender.slopeone.SlopeOneRecommender;
and hence on the line where i create an object using this. Error being shown is package does not exist.
Please help. Is it because i am using a newer version of mahout? I am even uncertain if I am using version 0.8 or a higher version as i followed all the links given in the book.
The SlopeOneRecommender was removed from mahout since v0.8. If you want to use it, you can switch to version such as 0.7.
<dependency>
<groupId>org.apache.mahout</groupId>
<artifactId>mahout-core</artifactId>
<version>0.7</version>
</dependency>
See http://permalink.gmane.org/gmane.comp.apache.mahout.user/20282
Exactly,
The SlopeOneRecommender was removed from mahout since v0.8. So either you get back to the version 0.7. Or if your purpose is only to try mahout u can try with other recommenders, such as :ItemAverageRecommender.
I am trying to use HBase as a data source for spark. So the first step turns out to be creating a RDD from a HBase table. Since Spark works with hadoop input formats, i could find a way to use all rows by creating an rdd http://www.vidyasource.com/blog/Programming/Scala/Java/Data/Hadoop/Analytics/2014/01/25/lighting-a-spark-with-hbase But how do we create a RDD for a range scan ?
All suggestions are welcome.
Here is an example of using Scan in Spark:
import java.io.{DataOutputStream, ByteArrayOutputStream}
import java.lang.String
import org.apache.hadoop.hbase.client.Scan
import org.apache.hadoop.hbase.HBaseConfiguration
import org.apache.hadoop.hbase.io.ImmutableBytesWritable
import org.apache.hadoop.hbase.client.Result
import org.apache.hadoop.hbase.mapreduce.TableInputFormat
import org.apache.hadoop.hbase.util.Base64
def convertScanToString(scan: Scan): String = {
val out: ByteArrayOutputStream = new ByteArrayOutputStream
val dos: DataOutputStream = new DataOutputStream(out)
scan.write(dos)
Base64.encodeBytes(out.toByteArray)
}
val conf = HBaseConfiguration.create()
val scan = new Scan()
scan.setCaching(500)
scan.setCacheBlocks(false)
conf.set(TableInputFormat.INPUT_TABLE, "table_name")
conf.set(TableInputFormat.SCAN, convertScanToString(scan))
val rdd = sc.newAPIHadoopRDD(conf, classOf[TableInputFormat], classOf[ImmutableBytesWritable], classOf[Result])
rdd.count
You need to add related libraries to the Spark classpath and make sure they are compatible with your Spark. Tips: you can use hbase classpath to find them.
You can set below conf
val conf = HBaseConfiguration.create()//need to set all param for habse
conf.set(TableInputFormat.SCAN_ROW_START, "row2");
conf.set(TableInputFormat.SCAN_ROW_STOP, "stoprowkey");
this will load rdd only for those reocrds
Here is a Java example with TableMapReduceUtil.convertScanToString(Scan scan):
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableInputFormat;
import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaPairRDD;
import org.apache.spark.api.java.JavaSparkContext;
import java.io.IOException;
public class HbaseScan {
public static void main(String ... args) throws IOException, InterruptedException {
// Spark conf
SparkConf sparkConf = new SparkConf().setMaster("local[4]").setAppName("My App");
JavaSparkContext jsc = new JavaSparkContext(sparkConf);
// Hbase conf
Configuration conf = HBaseConfiguration.create();
conf.set(TableInputFormat.INPUT_TABLE, "big_table_name");
// Create scan
Scan scan = new Scan();
scan.setCaching(500);
scan.setCacheBlocks(false);
scan.setStartRow(Bytes.toBytes("a"));
scan.setStopRow(Bytes.toBytes("d"));
// Submit scan into hbase conf
conf.set(TableInputFormat.SCAN, TableMapReduceUtil.convertScanToString(scan));
// Get RDD
JavaPairRDD<ImmutableBytesWritable, Result> source = jsc
.newAPIHadoopRDD(conf, TableInputFormat.class,
ImmutableBytesWritable.class, Result.class);
// Process RDD
System.out.println(source.count());
}
}
This is the entire source code for the java file.
package gephifyer;
import java.awt.Color;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import org.gephi.data.attributes.api.AttributeColumn;
import org.gephi.data.attributes.api.AttributeController;
import org.gephi.data.attributes.api.AttributeModel;
import org.gephi.graph.api.DirectedGraph;
import org.gephi.graph.api.GraphController;
import org.gephi.graph.api.GraphModel;
import org.gephi.io.exporter.api.ExportController;
import org.gephi.io.importer.api.Container;
import org.gephi.io.importer.api.EdgeDefault;
import org.gephi.io.importer.api.ImportController;
import org.gephi.io.importer.spi.FileImporter;
import org.gephi.io.processor.plugin.DefaultProcessor;
import org.gephi.partition.api.Partition;
import org.gephi.partition.api.PartitionController;
import org.gephi.partition.plugin.NodeColorTransformer;
import org.gephi.preview.api.PreviewController;
import org.gephi.preview.api.PreviewModel;
import org.gephi.preview.api.PreviewProperty;
import org.gephi.preview.types.DependantOriginalColor;
import org.gephi.project.api.ProjectController;
import org.gephi.project.api.Workspace;
import org.gephi.ranking.api.Ranking;
import org.gephi.ranking.api.RankingController;
import org.gephi.ranking.plugin.transformer.AbstractSizeTransformer;
import org.gephi.statistics.plugin.Modularity;
import org.openide.util.Lookup;
import org.gephi.layout.plugin.force.StepDisplacement;
import org.gephi.layout.plugin.force.yifanHu.YifanHu;
import org.gephi.layout.plugin.force.yifanHu.YifanHuLayout;
import org.gephi.layout.plugin.openord.*;
public class Gephifyer {
public void doStuff(String[] args)
{
String filename = new String();
try{
filename = args[0];
} catch (ArrayIndexOutOfBoundsException ex) {
System.out.println("Supply the subreddit name as the argument.");
System.exit(0);
}
ProjectController pc = Lookup.getDefault().lookup(ProjectController.class);
pc.newProject();
Workspace workspace = pc.getCurrentWorkspace();
ImportController importController = Lookup.getDefault().lookup(ImportController.class);
Container container;
try{
File file = new File(filename + ".csv");
//File file = new File(getClass().getResource("askscience.csv").toURI());
container = importController.importFile(file);
container.getLoader().setEdgeDefault(EdgeDefault.DIRECTED);
container.setAllowAutoNode(false); // don't create missing nodes
} catch (Exception ex) {
ex.printStackTrace();
return;
}
// Append imported data to graph api
importController.process(container, new DefaultProcessor(), workspace);
GraphModel graphModel = Lookup.getDefault().lookup(GraphController.class).getModel();
DirectedGraph directedGraph = graphModel.getDirectedGraph();
// Now let's manipulate the graph api, which stores / serves graphs
System.out.println("Nodes: " + directedGraph.getNodeCount() + "\nEdges: " + directedGraph.getEdgeCount());
//Run OpenOrd.
//OpenOrdLayout layout = new OpenOrdLayout(null);
YifanHuLayout layout = new YifanHuLayout(null, new StepDisplacement(0.95f));
layout.setGraphModel(graphModel);
layout.resetPropertiesValues();
layout.initAlgo();
layout.goAlgo();
while (layout.canAlgo()) // This is only possible because OpenOrd has a finite number of iterations.
{
layout.goAlgo();
}
AttributeModel attributemodel = Lookup.getDefault().lookup(AttributeController.class).getModel();
// Get modularity for coloring
Modularity modularity = new Modularity();
modularity.setUseWeight(true);
modularity.setRandom(true);
modularity.setResolution(1.0);
modularity.execute(graphModel, attributemodel);
// Partition with modularity
AttributeColumn modcol = attributemodel.getNodeTable().getColumn(Modularity.MODULARITY_CLASS);
PartitionController partitionController = Lookup.getDefault().lookup(PartitionController.class);
Partition p = partitionController.buildPartition(modcol, directedGraph);
NodeColorTransformer nodeColorTransformer = new NodeColorTransformer();
nodeColorTransformer.randomizeColors(p);
partitionController.transform(p, nodeColorTransformer);
// Ranking
RankingController rankingController = Lookup.getDefault().lookup(RankingController.class);
Ranking degreeRanking = rankingController.getModel().getRanking(Ranking.NODE_ELEMENT, Ranking.INDEGREE_RANKING);
AbstractSizeTransformer sizeTransformer = (AbstractSizeTransformer) rankingController.getModel().getTransformer(Ranking.NODE_ELEMENT, org.gephi.ranking.api.Transformer.RENDERABLE_SIZE);
sizeTransformer.setMinSize(5.0f);
sizeTransformer.setMaxSize(40.0f);
rankingController.transform(degreeRanking,sizeTransformer);
// Finally, the preview model
PreviewController previewController = Lookup.getDefault().lookup(PreviewController.class);
PreviewModel previewModel = previewController.getModel();
previewModel.getProperties().putValue(PreviewProperty.SHOW_NODE_LABELS, Boolean.TRUE);
previewModel.getProperties().putValue(PreviewProperty.NODE_LABEL_COLOR, new DependantOriginalColor(Color.BLACK));
previewModel.getProperties().putValue(PreviewProperty.NODE_LABEL_FONT, previewModel.getProperties().getFontValue(PreviewProperty.NODE_LABEL_FONT).deriveFont(8));
previewModel.getProperties().putValue(PreviewProperty.EDGE_CURVED, Boolean.FALSE);
previewModel.getProperties().putValue(PreviewProperty.EDGE_OPACITY, 50);
previewModel.getProperties().putValue(PreviewProperty.EDGE_RADIUS, 10f);
previewModel.getProperties().putValue(PreviewProperty.BACKGROUND_COLOR, Color.TRANSLUCENT);
previewController.refreshPreview();
System.out.println("starting export");
ExportController ec = Lookup.getDefault().lookup(ExportController.class);
try{
ec.exportFile(new File(filename + ".svg"));
}
catch (IOException ex){
ex.printStackTrace();
return;
}
System.out.println("Done.");
}
public static void main(String[] args)
{
Gephifyer g = new Gephifyer();
g.doStuff(args);
}
}
At its heart, it's the various demos' code cobbled together to do what I want it to do.
I expect a graph that looks like this svg file, but the result is this svg file. That is, the problem is that the above code yields a graph where the arrows aren't fully connected to the nodes, making it look a bit messy. I can't for my life tell where in the code that is happening, though I guess it would be in the preview model part.
previewModel.getProperties().putValue(PreviewProperty.EDGE_RADIUS, 10f); sets the distance of the arrows from the node.