I'M storing these JSON Objects the hazelcast
IMap<String, HazelcastJsonValue> hazelcast = instance.getMap("get");
JSON thta I'm storing
{"id":"01","name":"abc","age":33}
{"id":"02","name":" data","age":37}
{"id":"03","name":"abc","age":39}
if i just want to select only age field with above 35
output:-
[37,39]
How to do this using projetions?
this works for me:
import com.hazelcast.core.*;
import com.hazelcast.projection.Projections;
import com.hazelcast.query.Predicates;
import org.junit.Test;
import java.util.Collection;
import static org.hamcrest.Matchers.containsInAnyOrder;
import static org.junit.Assert.assertThat;
[...]
#Test
public void testJsonProjection() {
HazelcastInstance hz = Hazelcast.newHazelcastInstance();
IMap<Integer, HazelcastJsonValue> map = instance.getMap("myMap");
map.set(0, new HazelcastJsonValue("{\"id\":\"01\",\"name\":\"abc\",\"age\":33}"));
map.set(1, new HazelcastJsonValue("{\"id\":\"02\",\"name\":\" data\",\"age\":37} "));
map.set(2, new HazelcastJsonValue("{\"id\":\"03\",\"name\":\"abc\",\"age\":39}"));
Collection<Long> projection = map.project(
Projections.singleAttribute("age"),
Predicates.greaterEqual("age", 35)
);
assertThat(projection, containsInAnyOrder(37L, 39L));
}
Related
I followed the instructions at Structured Streaming + Kafka and built a program that receives data streams sent from kafka as input, when I receive the data stream I want to pass it to SparkSession variable to do some query work with Spark SQL, so I extend the ForeachWriter class again as follows:
package stream;
import java.io.FileNotFoundException;
import java.io.PrintWriter;
import org.apache.spark.sql.ForeachWriter;
import org.apache.spark.sql.SparkSession;
import org.json.simple.JSONObject;
import org.json.simple.parser.JSONParser;
import org.json.simple.parser.ParseException;
import dataservices.OrderDataServices;
import models.SuccessEvent;
public class MapEventWriter extends ForeachWriter<String>{
private SparkSession spark;
public MapEventWriter(SparkSession spark) {
this.spark = spark;
}
private static final long serialVersionUID = 1L;
#Override
public void close(Throwable errorOrNull) {
// TODO Auto-generated method stub
}
#Override
public boolean open(long partitionId, long epochId) {
// TODO Auto-generated method stub
return true;
}
#Override
public void process(String input) {
OrderDataServices services = new OrderDataServices(this.spark);
}
}
however in the process function, if I use spark variable, the program gives an error, the program passes in my spark as follows:
package demo;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.util.concurrent.TimeoutException;
import org.apache.hadoop.fs.Path;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Encoders;
import org.apache.spark.sql.SparkSession;
import org.apache.spark.sql.streaming.StreamingQuery;
import org.apache.spark.sql.streaming.StreamingQueryException;
import org.json.simple.parser.ParseException;
import dataservices.OrderDataServices;
import models.MapperEvent;
import models.OrderEvent;
import models.SuccessEvent;
import stream.MapEventWriter;
import stream.MapEventWriter1;
public class Demo {
public static void main(String[] args) throws TimeoutException, StreamingQueryException, ParseException, IOException {
try (SparkSession spark = SparkSession.builder().appName("Read kafka").getOrCreate()) {
Dataset<String> data = spark
.readStream()
.format("kafka")
.option("kafka.bootstrap.servers", "localhost:9092")
.option("subscribe", "tiki-1")
.load()
.selectExpr("CAST(value AS STRING)")
.as(Encoders.STRING());
MapEventWriter eventWriter = new MapEventWriter(spark);
StreamingQuery query = data
.writeStream()
.foreach(eventWriter)
.start();
query.awaitTermination();
}
}
}
The error is NullPointerException at the spark call location, that is, no spark variable is initialized.
Hope anyone can help me, I really appreciate it.
Caused by: java.lang.NullPointerException
at org.apache.spark.sql.SparkSession.sessionState$lzycompute(SparkSession.scala:151)
at org.apache.spark.sql.SparkSession.sessionState(SparkSession.scala:149)
at org.apache.spark.sql.DataFrameReader.<init>(DataFrameReader.scala:998)
at org.apache.spark.sql.SparkSession.read(SparkSession.scala:655)
at dataservices.OrderDataServices.<init>(OrderDataServices.java:18)
at stream.MapEventWriter.process(MapEventWriter.java:38)
at stream.MapEventWriter.process(MapEventWriter.java:15)
do some query work with Spark SQL
You wouldn't use a ForEachWriter for that
.selectExpr("CAST(value AS STRING)")
.as(Encoders.STRING()); // or parse your JSON here using a schema
data.select(...) // or move this to a method / class that takes the Dataset as a parameter
// await termination
Im new to spark related work.I had tried codings as in below.
package hdd.models;
import java.util.ArrayList;
import java.util.List;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.function.Function;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.RowFactory;
import org.apache.spark.sql.SQLContext;
import org.apache.spark.sql.types.StructField;
import org.apache.spark.sql.types.StructType;
import org.apache.spark.sql.types.DataType;
import org.apache.spark.sql.types.DataTypes;
import org.apache.spark.sql.SparkSession;
/*
* Analysis of the data using Spark SQL
*
*/
public class HrtDisDataAnalyze {
public HrtDisDataAnalyze() {
}
public static void main(String[] args) {
SparkConfAndCtxBuilder ctxBuilder = new SparkConfAndCtxBuilder();
JavaSparkContext jctx = ctxBuilder.loadSimpleSparkContext("Heart Disease Data Analysis App", "local");
JavaRDD<String> rows = jctx.textFile("file:///C:/Users/harpr/workspace/HrtDisDetection/src/resources/full_data_cleaned.csv");
String schemaString = "age sex cp trestbps chol fbs restecg thalach exang oldpeak slope ca thal num";
List<StructField> fields = new ArrayList<>();
for (String fieldName : schemaString.split(" ")) {
fields.add(DataTypes.createStructField(fieldName, DataTypes.StringType, true));
}
StructType schema = DataTypes.createStructType(fields);
JavaRDD<Row> rowRdd = rows.map(new Function<String, Row>() {
#Override
public Row call(String record) throws Exception {
String[] fields = record.split(",");
return RowFactory.create(fields[0],fields[1],fields[2],fields[3],fields[4],fields[5],fields[6],fields[7],fields[8],fields[9],fields[10],fields[11],fields[12],fields[13]);
}
});
SparkSession sparkSession = SparkSession.builder().config("spark.serializer", "org.apache.spark.serializer.KryoSerializer").config("spark.kryo.registrator", "org.datasyslab.geospark.serde.GeoSparkKryoRegistrator").master("local[*]").appName("testGeoSpark").getOrCreate();
Dataset df = spark.read().csv("usr/local/eclipse1/eclipse/hrtdisdetection/src/resources/cleveland_data_raw.csv");
df.createOrReplaceTempView("heartDisData");
following error occurs in sparksession
"he type org.apache.spark.sql.SparkSession$Builder cannot be resolved. It is indirectly referenced from required .class files"
Note: Im using spark-2.1.0 with scala 2.10.This above code i tried in java eclipse-neon
There is no sense to use builder.
Just create Spark Session at the beginning and call spark context from session.
SparkSession sparkSession = SparkSession.builder().config("spark.serializer", "org.apache.spark.serializer.KryoSerializer").config("spark.kryo.registrator", "org.datasyslab.geospark.serde.GeoSparkKryoRegistrator").master("local[*]").appName("testGeoSpark").getOrCreate();
sparkSession.sparkContext().textFile(yourFileOrURL);
I have added jar file for spark session.
error cleared.
https://jar-download.com/?search_box=org.apache.spark%20spark.sql
I want to make a rest call to bitbucket api with ssh private key as header.For example the url is (http://bitbucket.com/rest/api/1.0/repos/testProject/pull-requests?state=OPEN).
Is there a way call this url with spring rest template and how to pass ssl access key as header.
Instead of using SSH keys use Personal Access Tokens:
https://confluence.atlassian.com/bitbucketserver/personal-access-tokens-939515499.html
(Introduced in Bitbucket 5.5)
Then you could use code like this:
package com.company.bitbucket.tools.application;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.StringJoiner;
import java.util.stream.Collectors;
import org.springframework.http.client.ClientHttpRequestInterceptor;
import org.springframework.stereotype.Controller;
import org.springframework.web.bind.annotation.RequestMapping;
import org.springframework.web.bind.annotation.RequestMethod;
import org.springframework.web.client.RestTemplate;
import org.springframework.web.servlet.ModelAndView;
import com.company.bitbucket.tools.HttpRequestInterceptor;
import com.company.bitbucket.tools.ProjectValue;
import com.company.bitbucket.tools.Projects;
import com.company.bitbucket.tools.UserValue;
import com.company.bitbucket.tools.Users;
#Controller
public class ProjectController {
public static String BITBUCKET_URL = "https://bitbucket.company.com/rest/api/latest/";
public static String PROJECTS = "projects";
public static String PERMISSIONS = "permissions/users?permission=PROJECT_ADMIN";
public static String PAT = "<put your generated token in here>";
#RequestMapping(value={"/projects"}, method = RequestMethod.GET)
public ModelAndView listProjects(){
HashMap<String, String> list = getAdmins();
ModelAndView model = new ModelAndView("projects");
model.addObject("adminMap", list);
return model;
}
private HashMap<String, String> getAdmins(){
HashMap<String, String> projectMap = new HashMap<>();
RestTemplate restTemplate = new RestTemplate();
List<ClientHttpRequestInterceptor> interceptors = new ArrayList<ClientHttpRequestInterceptor>();
interceptors.add(new HttpRequestInterceptor("Authorization", "Bearer ".concat(PAT)));
restTemplate.setInterceptors(interceptors);
Projects projects = restTemplate.getForObject(BITBUCKET_URL.concat("projects?limit=100"), Projects.class);
for(ProjectValue projectValue: projects.getValues()) {
String projectUrl = String.format("%s/%s/%s/%s", BITBUCKET_URL, PROJECTS, projectValue.getKey(), PERMISSIONS);
Users users = restTemplate.getForObject(projectUrl, Users.class);
List<String> names = new ArrayList<>();
for (UserValue value: users.getValues()) {
names.add(value.getUser().getDisplayName());
}
String commaSeparatedNames = String.join(", ", names);
projectMap.put(projectValue.getName(), commaSeparatedNames);
}
return projectMap;
}
}
This code gets a list of project admins using the rest api, but you could change to make whatever rest request you wish.
Spec: mongo-java-driver-3.3.0.jar,jdk1.7,Mongodb 3.0.12
MongoShell : db.getCollection("Table-WEBSRVS-DTLS").find({"col1":"1000","col4":"EMEA"},{"col1":"1","col2":"1"})
Question : How to achieve this mongoshell command in Java for Mongo-java 3.x API ?
thx
Here is the equivalent Java code for the above query. You may need to change the database and collection names accordingly in the below code.
import org.bson.Document;
import com.mongodb.MongoClient;
import com.mongodb.client.FindIterable;
import com.mongodb.client.MongoCollection;
import com.mongodb.client.MongoDatabase;
import com.mongodb.client.model.Filters;
import com.mongodb.client.model.Projections;
public class GetDataFromTableWebsrvsDtls {
public static void main(String[] args) {
MongoClient client = new MongoClient();
MongoDatabase database = client.getDatabase("localhost");
MongoCollection<Document> collection = database.getCollection("TableWebsrvsDtls");
FindIterable<Document> collectionData = collection
.find(Filters.and(Filters.eq("col1", "1000"), Filters.eq("col4", "EMEA")))
.projection(Projections.include("col1", "col2"));
for (Document doc : collectionData) {
System.out.println(doc.toJson());
}
client.close();
}
}
I'm having trouble working with PartitionStrategy and titanDb and DynamoDb as a backend. I'm using dynamoDb as a local backend using dynamodb-titan-storage-backend plugin. Below is a simple test case I've written in java:
import com.thinkaurelius.titan.core.TitanEdge;
import com.thinkaurelius.titan.core.TitanFactory;
import com.thinkaurelius.titan.core.TitanGraph;
import com.thinkaurelius.titan.core.TitanGraphQuery;
import com.thinkaurelius.titan.core.TitanTransaction;
import com.thinkaurelius.titan.core.TitanVertex;
import org.apache.commons.configuration.BaseConfiguration;
import org.apache.tinkerpop.gremlin.process.traversal.dsl.graph.GraphTraversal;
import org.apache.tinkerpop.gremlin.process.traversal.dsl.graph.GraphTraversalSource;
import org.apache.tinkerpop.gremlin.process.traversal.strategy.decoration.PartitionStrategy;
import org.apache.tinkerpop.gremlin.structure.Direction;
import org.apache.tinkerpop.gremlin.structure.Vertex;
import org.apache.tinkerpop.gremlin.structure.VertexProperty;
import org.junit.Before;
import org.junit.Test;
import org.junit.runner.RunWith;
import java.util.Iterator;
public class TitanTest {
TitanGraph titanGraph;
#Before
public void setUp() {
BaseConfiguration conf = new BaseConfiguration();
conf.setProperty("storage.backend", "com.amazon.titan.diskstorage.dynamodb.DynamoDBStoreManager");
conf.setProperty("storage.dynamodb.client.endpoint", "http://localhost:4567");
titanGraph = TitanFactory.open(conf);
}
#Test
public void testAddVertexToTitanGraph(){
titanGraph.addVertex( "name", "Bob", "age", "4.6x10^9");
titanGraph.traversal().V().forEachRemaining(it -> {
System.out.println("Found " + it.value("name"));
});
}
#Test
public void addVertexViaPartitionStrategy() {
PartitionStrategy partitionStrategy = PartitionStrategy.build().partitionKey("_partition").writePartition("a").addReadPartition("a").create();
GraphTraversalSource graphTraversalSource = GraphTraversalSource.build().with(partitionStrategy).create(titanGraph);
GraphTraversal<Vertex, Vertex> marko = graphTraversalSource.addV("name", "marko", "age", 29);
graphTraversalSource.V().forEachRemaining(it -> {
System.out.println("name:" + it.value("name").toString());
});
}
}
However, I don't seem to get anything printed out when I run the addVertexViaPartitionStrategy test case. I've followed the example here:
http://tinkerpop.apache.org/docs/3.0.1-incubating/#_partitionstrategy
I'm able to read and write to the database, see test testAddVertexToTitanGraph, just not able to create a vertex when using partition stratergy.
I just ran into the same issue with Titan 1.0.0 with a cassandra back-end. Apparently you have to fetch the result of the traversal in order for the Vertex to be added.
So this works:
GraphTraversalSource gA = GraphTraversalSource.build().with(partitionStrategy).create(g);
GraphTraversal<Vertex, Vertex> addV = gA.addV();
Vertex v = addV.next();
System.out.println("v: " + v);
GraphTraversal<Vertex, Long> count = gA.V().count();
long amt = count.next();
System.out.println("Partition vertex count: " + amt);