I have a class named child2 which i want to serialize and deserialize my class contains a LocalDateTime attribute for which i have to write a custom serializer i tried with the two solutions but both was throwing exceptions
here is my code
Solution 1
case class Child2(var str:String,var Num:Int,MyList:List[Int],val myDate : LocalDateTime = LocalDateTime.now()){
var number:Int=555
}
class Message1SerializerDateTime extends Serializer[LocalDateTime]{
private val LocalDateTimeClass = classOf[LocalDateTime]
def deserialize(implicit format: Formats): PartialFunction[(TypeInfo, JValue), LocalDateTime] = {
case (TypeInfo(LocalDateTimeClass, _), json) => json match {
case JString(dt) => LocalDateTime.parse(dt)
case x => throw new MappingException("Can't convert " + x + " to LocalDateTime")
}
}
def serialize(implicit format: Formats): PartialFunction[Any, JValue] = {
case x: LocalDateTime => JString(x.toString)
}
}
object MessageTest extends App {
implicit val formats = /*Serialization.formats(NoTypeHints)*/DefaultFormats + new FieldSerializer[Child2]+new Message1SerializerDateTime
var c= new Child2("Mary", 5,List(1, 2),LocalDateTime.now())
c.number=1
// println("number"+c.number)
val ser = write(c)
println("Child class converted to string" +ser)
var obj=read[Child2](ser)
println("object of Child is "+obj)
println("str"+obj.str)
println("Num"+obj.Num)
println("MyList"+obj)
println("myDate"+obj.myDate)
println("number"+obj.number)
}
and its throwing mapping exception
Child class converted to string{"number":1,"str":"Mary","Num":5,"MyList":[1,2],"myDate":"2015-07-28T16:45:44.030"}
[error] (run-main-2) net.liftweb.json.MappingException: unknown error
net.liftweb.json.MappingException: unknown error
at net.liftweb.json.Extraction$.extract(Extraction.scala:46)
at net.liftweb.json.JsonAST$JValue.extract(JsonAST.scala:312)
at net.liftweb.json.Serialization$.read(Serialization.scala:58)
at MessageTest$.delayedEndpoint$MessageTest$1(MessageTest.scala:42)
at MessageTest$delayedInit$body.apply(MessageTest.scala:15)
at scala.Function0$class.apply$mcV$sp(Function0.scala:40)
at scala.runtime.AbstractFunction0.apply$mcV$sp(AbstractFunction0.scala:12)
at scala.App$$anonfun$main$1.apply(App.scala:76)
at scala.App$$anonfun$main$1.apply(App.scala:76)
at scala.collection.immutable.List.foreach(List.scala:383)
at scala.collection.generic.TraversableForwarder$class.foreach(TraversableForwarder.scala:35)
at scala.App$class.main(App.scala:76)
at MessageTest$.main(MessageTest.scala:15)
at MessageTest.main(MessageTest.scala)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:497)
Caused by: java.lang.ArrayIndexOutOfBoundsException: 49938
at com.thoughtworks.paranamer.BytecodeReadingParanamer$ClassReader.<init>(BytecodeReadingParanamer.java:451)
at com.thoughtworks.paranamer.BytecodeReadingParanamer$ClassReader.<init>(BytecodeReadingParanamer.java:431)
at com.thoughtworks.paranamer.BytecodeReadingParanamer$ClassReader.<init>(BytecodeReadingParanamer.java:492)
at com.thoughtworks.paranamer.BytecodeReadingParanamer$ClassReader.<init>(BytecodeReadingParanamer.java:337)
at com.thoughtworks.paranamer.BytecodeReadingParanamer.lookupParameterNames(BytecodeReadingParanamer.java:100)
at com.thoughtworks.paranamer.CachingParanamer.lookupParameterNames(CachingParanamer.java:75)
at com.thoughtworks.paranamer.CachingParanamer.lookupParameterNames(CachingParanamer.java:68)
at net.liftweb.json.Meta$ParanamerReader$.lookupParameterNames(Meta.scala:89)
at net.liftweb.json.Meta$Reflection$.argsInfo$1(Meta.scala:237)
at net.liftweb.json.Meta$Reflection$.constructorArgs(Meta.scala:253)
at net.liftweb.json.Meta$Reflection$$anonfun$constructors$1.apply(Meta.scala:227)
at net.liftweb.json.Meta$Reflection$$anonfun$constructors$1.apply(Meta.scala:227)
at scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:245)
at scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:245)
at scala.collection.IndexedSeqOptimized$class.foreach(IndexedSeqOptimized.scala:33)
at scala.collection.mutable.ArrayOps$ofRef.foreach(ArrayOps.scala:186)
at scala.collection.TraversableLike$class.map(TraversableLike.scala:245)
at scala.collection.mutable.ArrayOps$ofRef.map(ArrayOps.scala:186)
at net.liftweb.json.Meta$Reflection$.constructors(Meta.scala:227)
at net.liftweb.json.Meta$.net$liftweb$json$Meta$$constructors$1(Meta.scala:97)
at net.liftweb.json.Meta$.mkConstructor$1(Meta.scala:124)
at net.liftweb.json.Meta$.fieldMapping$1(Meta.scala:151)
at net.liftweb.json.Meta$.net$liftweb$json$Meta$$toArg$1(Meta.scala:155)
at net.liftweb.json.Meta$$anonfun$net$liftweb$json$Meta$$constructors$1$1$$anonfun$apply$1.apply(Meta.scala:99)
at net.liftweb.json.Meta$$anonfun$net$liftweb$json$Meta$$constructors$1$1$$anonfun$apply$1.apply(Meta.scala:98)
at scala.collection.immutable.List.map(List.scala:278)
at net.liftweb.json.Meta$$anonfun$net$liftweb$json$Meta$$constructors$1$1.apply(Meta.scala:98)
at net.liftweb.json.Meta$$anonfun$net$liftweb$json$Meta$$constructors$1$1.apply(Meta.scala:97)
at scala.collection.immutable.List.map(List.scala:274)
at net.liftweb.json.Meta$.net$liftweb$json$Meta$$constructors$1(Meta.scala:97)
at net.liftweb.json.Meta$$anonfun$mappingOf$1.apply(Meta.scala:169)
at net.liftweb.json.Meta$$anonfun$mappingOf$1.apply(Meta.scala:161)
at net.liftweb.json.Meta$Memo.memoize(Meta.scala:199)
at net.liftweb.json.Meta$.mappingOf(Meta.scala:161)
at net.liftweb.json.Extraction$.net$liftweb$json$Extraction$$mkMapping$1(Extraction.scala:194)
at net.liftweb.json.Extraction$.net$liftweb$json$Extraction$$extract0(Extraction.scala:199)
at net.liftweb.json.Extraction$.extract(Extraction.scala:43)
at net.liftweb.json.JsonAST$JValue.extract(JsonAST.scala:312)
at net.liftweb.json.Serialization$.read(Serialization.scala:58)
at MessageTest$.delayedEndpoint$MessageTest$1(MessageTest.scala:42)
at MessageTest$delayedInit$body.apply(MessageTest.scala:15)
at scala.Function0$class.apply$mcV$sp(Function0.scala:40)
at scala.runtime.AbstractFunction0.apply$mcV$sp(AbstractFunction0.scala:12)
at scala.App$$anonfun$main$1.apply(App.scala:76)
at scala.App$$anonfun$main$1.apply(App.scala:76)
at scala.collection.immutable.List.foreach(List.scala:383)
at scala.collection.generic.TraversableForwarder$class.foreach(TraversableForwarder.scala:35)
at scala.App$class.main(App.scala:76)
at MessageTest$.main(MessageTest.scala:15)
at MessageTest.main(MessageTest.scala)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:497)
Solution 2
case class Child2(var str:String,var Num:Int,MyList:List[Int],val myDate : LocalDateTime = LocalDateTime.now()){
var number:Int=555
}
class Message1Serializer extends Serializer[Child2]{
private val IntervalClass = classOf[Child2]
def deserialize(implicit format: Formats): PartialFunction[(TypeInfo, JValue), Child2] = {
case (TypeInfo(IntervalClass, _), json) => json match {
case JObject(
JField("str", JString(str)) :: JField("Num", JInt(num)) ::
JField("MyList", JArray(mylist)) :: (JField("myDate", JString(mydate)) ::
JField("number", JInt(number)) ::Nil)
) => {
val c = Child2(
str, num.intValue(), mylist.map(_.values.toString.toInt), LocalDateTime.parse(mydate)
)
c.number = number.intValue()
c
}
case x => throw new MappingException("Can't convert " + x + " to Interval")
}
}
def serialize(implicit format: Formats): PartialFunction[Any, JValue] = {
case x: Child2 =>
JObject(
JField("str", JString(x.str)) :: JField("Num", JInt(x.Num)) ::
JField("MyList", JArray(x.MyList.map(JInt(_)))) ::
JField("myDate", JString(x.myDate.toString)) ::
JField("number", JInt(x.number)) :: Nil
)
}
}
object MessageTest extends App
{
implicit val formats = /*Serialization.formats(NoTypeHints)*/DefaultFormats+new Message1Serializer
var c= new Child2("Mary", 5,List(1, 2),LocalDateTime.now())
c.number=1
// println("number"+c.number)
val ser = write(c)
println("Child class converted to string" +ser)
var obj=read[Child2](ser)
println("object of Child is "+obj)
println("str"+obj.str)
println("Num"+obj.Num)
println("MyList"+obj)
println("myDate"+obj.myDate)
println("number"+obj.number)
}
its also throwing mapping exception
Child class converted to string{"str":"Mary","Num":5,"MyList":[1,2],"myDate":"2015-07-28T17:09:31.512","number":1}
[error] (run-main-1) net.liftweb.json.MappingException: unknown error
net.liftweb.json.MappingException: unknown error
at net.liftweb.json.Extraction$.extract(Extraction.scala:46)
at net.liftweb.json.JsonAST$JValue.extract(JsonAST.scala:312)
at net.liftweb.json.Serialization$.read(Serialization.scala:58)
at MessageTest$.delayedEndpoint$MessageTest$1(MessageTest.scala:58)
at MessageTest$delayedInit$body.apply(MessageTest.scala:15)
at scala.Function0$class.apply$mcV$sp(Function0.scala:40)
at scala.runtime.AbstractFunction0.apply$mcV$sp(AbstractFunction0.scala:12)
at scala.App$$anonfun$main$1.apply(App.scala:76)
at scala.App$$anonfun$main$1.apply(App.scala:76)
at scala.collection.immutable.List.foreach(List.scala:383)
at scala.collection.generic.TraversableForwarder$class.foreach(TraversableForwarder.scala:35)
at scala.App$class.main(App.scala:76)
at MessageTest$.main(MessageTest.scala:15)
at MessageTest.main(MessageTest.scala)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:497)
Caused by: java.lang.ArrayIndexOutOfBoundsException: 49938
at com.thoughtworks.paranamer.BytecodeReadingParanamer$ClassReader.<init>(BytecodeReadingParanamer.java:451)
at com.thoughtworks.paranamer.BytecodeReadingParanamer$ClassReader.<init>(BytecodeReadingParanamer.java:431)
at com.thoughtworks.paranamer.BytecodeReadingParanamer$ClassReader.<init>(BytecodeReadingParanamer.java:492)
at com.thoughtworks.paranamer.BytecodeReadingParanamer$ClassReader.<init>(BytecodeReadingParanamer.java:337)
at com.thoughtworks.paranamer.BytecodeReadingParanamer.lookupParameterNames(BytecodeReadingParanamer.java:100)
at com.thoughtworks.paranamer.CachingParanamer.lookupParameterNames(CachingParanamer.java:75)
at com.thoughtworks.paranamer.CachingParanamer.lookupParameterNames(CachingParanamer.java:68)
at net.liftweb.json.Meta$ParanamerReader$.lookupParameterNames(Meta.scala:89)
at net.liftweb.json.Meta$Reflection$.argsInfo$1(Meta.scala:237)
at net.liftweb.json.Meta$Reflection$.constructorArgs(Meta.scala:253)
at net.liftweb.json.Meta$Reflection$$anonfun$constructors$1.apply(Meta.scala:227)
at net.liftweb.json.Meta$Reflection$$anonfun$constructors$1.apply(Meta.scala:227)
at scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:245)
at scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:245)
at scala.collection.IndexedSeqOptimized$class.foreach(IndexedSeqOptimized.scala:33)
at scala.collection.mutable.ArrayOps$ofRef.foreach(ArrayOps.scala:186)
at scala.collection.TraversableLike$class.map(TraversableLike.scala:245)
at scala.collection.mutable.ArrayOps$ofRef.map(ArrayOps.scala:186)
at net.liftweb.json.Meta$Reflection$.constructors(Meta.scala:227)
at net.liftweb.json.Meta$.net$liftweb$json$Meta$$constructors$1(Meta.scala:97)
at net.liftweb.json.Meta$.mkConstructor$1(Meta.scala:124)
at net.liftweb.json.Meta$.fieldMapping$1(Meta.scala:151)
at net.liftweb.json.Meta$.net$liftweb$json$Meta$$toArg$1(Meta.scala:155)
at net.liftweb.json.Meta$$anonfun$net$liftweb$json$Meta$$constructors$1$1$$anonfun$apply$1.apply(Meta.scala:99)
at net.liftweb.json.Meta$$anonfun$net$liftweb$json$Meta$$constructors$1$1$$anonfun$apply$1.apply(Meta.scala:98)
at scala.collection.immutable.List.map(List.scala:278)
at net.liftweb.json.Meta$$anonfun$net$liftweb$json$Meta$$constructors$1$1.apply(Meta.scala:98)
at net.liftweb.json.Meta$$anonfun$net$liftweb$json$Meta$$constructors$1$1.apply(Meta.scala:97)
at scala.collection.immutable.List.map(List.scala:274)
at net.liftweb.json.Meta$.net$liftweb$json$Meta$$constructors$1(Meta.scala:97)
at net.liftweb.json.Meta$$anonfun$mappingOf$1.apply(Meta.scala:169)
at net.liftweb.json.Meta$$anonfun$mappingOf$1.apply(Meta.scala:161)
at net.liftweb.json.Meta$Memo.memoize(Meta.scala:199)
at net.liftweb.json.Meta$.mappingOf(Meta.scala:161)
at net.liftweb.json.Extraction$.net$liftweb$json$Extraction$$mkMapping$1(Extraction.scala:194)
at net.liftweb.json.Extraction$.net$liftweb$json$Extraction$$extract0(Extraction.scala:199)
at net.liftweb.json.Extraction$.extract(Extraction.scala:43)
at net.liftweb.json.JsonAST$JValue.extract(JsonAST.scala:312)
at net.liftweb.json.Serialization$.read(Serialization.scala:58)
at MessageTest$.delayedEndpoint$MessageTest$1(MessageTest.scala:58)
at MessageTest$delayedInit$body.apply(MessageTest.scala:15)
at scala.Function0$class.apply$mcV$sp(Function0.scala:40)
at scala.runtime.AbstractFunction0.apply$mcV$sp(AbstractFunction0.scala:12)
at scala.App$$anonfun$main$1.apply(App.scala:76)
at scala.App$$anonfun$main$1.apply(App.scala:76)
at scala.collection.immutable.List.foreach(List.scala:383)
at scala.collection.generic.TraversableForwarder$class.foreach(TraversableForwarder.scala:35)
at scala.App$class.main(App.scala:76)
at MessageTest$.main(MessageTest.scala:15)
at MessageTest.main(MessageTest.scala)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:497)
please help me how can i resolve this issue .How should i write serializer for java 8 LocalDateTime
I can vaguely remember having a similar issue and I solved it
by parsing with corresponding DateTimeFormat Class
LocalDateTime.parse(dt, DateTimeFormatter.ofPattern("yyyy-MM-ddTHH:mm"))
Correct the pattern for your case
Related
I am trying to update my xml using a clob. I have no problem updating English characters. I can even update Russian and Arabic. but when it comes to Georgian characters, I get this internal error.
this is the update:
select = "update datadocumentxml d\n" +
" set d.datadocumentxml = updatexml(xmltype(d.datadocumentxml),'/Comments/#Comment', ?)\n" +
" .getClobVal(),\n" +
" where d.processengineguid = fnguidjava2raw(?)\n" +
" and d.datadocumentid = 'Comments'";
ps = conn.prepareStatement(select);
ps.setClob(1,clob);
ps.setString(2,processid);
rs = ps.executeQuery();
this is the error I get when trying to update Georgian letters:
Caused by: java.sql.SQLException: ORA-00600: internal error code,
arguments: [17114], [0x7FEA96179698], [], [], [], [], [], [], [], [], [], []
ORA-00600: internal error code, arguments: [17114], [0x7FEA96179698], [], [], [], [], [], [], [], [], [], []
at oracle.jdbc.driver.T4CTTIoer.processError(T4CTTIoer.java:447)
at oracle.jdbc.driver.T4CTTIoer.processError(T4CTTIoer.java:396)
at oracle.jdbc.driver.T4C8Oall.processError(T4C8Oall.java:951)
at oracle.jdbc.driver.T4CTTIfun.receive(T4CTTIfun.java:513)
at oracle.jdbc.driver.T4CTTIfun.doRPC(T4CTTIfun.java:227)
at oracle.jdbc.driver.T4C8Oall.doOALL(T4C8Oall.java:531)
at oracle.jdbc.driver.T4CPreparedStatement.doOall8(T4CPreparedStatement.java:208)
at oracle.jdbc.driver.T4CPreparedStatement.executeForRows(T4CPreparedStatement.java:1046)
at oracle.jdbc.driver.OracleStatement.doExecuteWithTimeout(OracleStatement.java:1336)
at oracle.jdbc.driver.OraclePreparedStatement.executeInternal(OraclePreparedStatement.java:3613)
at oracle.jdbc.driver.OraclePreparedStatement.executeQuery(OraclePreparedStatement.java:3657)
at oracle.jdbc.driver.OraclePreparedStatementWrapper.executeQuery(OraclePreparedStatementWrapper.java:1495)
at sun.reflect.GeneratedMethodAccessor845.invoke(Unknown Source)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:497)
at oracle.ucp.jdbc.proxy.PreparedStatementProxyFactory.invoke(PreparedStatementProxyFactory.java:125)
at com.sun.proxy.$Proxy949.executeQuery(Unknown Source)
at ge.bog.integration.crif.data.collector.connector.parallelcomments.UpdateXml.setComment(UpdateXml.java:79)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:497)
at ge.bog.integration.crif.data.collector.core.ProcessorImpl.executeClassFunction(ProcessorImpl.java:67)
... 46 more
I think it has something to do with charsets or JDBC driver. any suggestion would be much appreciated.
this is a wiki page for Georgian characters: https://en.wikipedia.org/wiki/Georgian_(Unicode_block)
I am trying to generate customer statistics using the following code. It's a combineByKey transformation. I got an ArrayIndexOutOfBounds exception. Wondering for the reason, but I am not getting any hint. Please can anyone clarify, why I am getting this exception. Thank you.
def createComb = (t:Array[String]) => {
val total = t(5).toDouble
val q = t(4).toInt
(total/q, total/q, q, total)}
def mergeValues : ((Double,Double,Int,Double), Array[String]) =>
(Double,Double,Int,Double) =
{case((mx,mn,q,tot),t) =>{
val total = t(5).toDouble
val quan = t(4).toInt
val mxx = scala.math.max(mx, total/q)
val minn = scala.math.min(mn, total/q)
(mxx,minn,quan+q,total+tot)}}
def mergeComb:((Double,Double,Int,Double),(Double,Double,Int,Double)) =>
(Double,Double,Int,Double) =
{ case((mx1,mn1,q1,tot1),(mx2,mn2,q2,tot2)) =>
(scala.math.max(mx1,mx2), scala.math.min(mn1,mn2), q1+q2, tot1+tot2)}
val statsOfCust = productsTotalByKey.combineByKey(createComb, mergeValues, mergeComb, new org.apache.spark.HashPartitioner(productsTotalByKey.partitions.size))
Here is the output which I got when executed on an RDD after executing above code on spark cluster.
scala> statsOfCust.first
[Stage 22:> (0 + 1) / 2]18/11/17 21:26:31 WARN TaskSetManager: Lost task 0.0 in stage 22.0 (TID 26, wn01.itversity.com, executor 9): java.lang.ArrayIndexOutOfBoundsException: 5
at $line80.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$anonfun$createComb$1.apply(<console>:24)
at $line80.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$anonfun$createComb$1.apply(<console>:23)
at org.apache.spark.util.collection.ExternalSorter$$anonfun$5.apply(ExternalSorter.scala:189)
at org.apache.spark.util.collection.ExternalSorter$$anonfun$5.apply(ExternalSorter.scala:188)
at org.apache.spark.util.collection.AppendOnlyMap.changeValue(AppendOnlyMap.scala:144)
at org.apache.spark.util.collection.SizeTrackingAppendOnlyMap.changeValue(SizeTrackingAppendOnlyMap.scala:32)
at org.apache.spark.util.collection.ExternalSorter.insertAll(ExternalSorter.scala:194)
at org.apache.spark.shuffle.sort.SortShuffleWriter.write(SortShuffleWriter.scala:63)
at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:96)
at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:53)
at org.apache.spark.scheduler.Task.run(Task.scala:109)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:345)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
at java.lang.Thread.run(Thread.java:745)
Driver stacktrace:
at org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1599)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1587)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1586)
at scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59)
at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:48)
at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1586)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:831)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:831)
at scala.Option.foreach(Option.scala:257)
at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:831)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:1820)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1769)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1758)
at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:48)
at org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:642)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:2034)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:2055)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:2074)
at org.apache.spark.rdd.RDD$$anonfun$take$1.apply(RDD.scala:1358)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112)
at org.apache.spark.rdd.RDD.withScope(RDD.scala:363)
at org.apache.spark.rdd.RDD.take(RDD.scala:1331)
at org.apache.spark.rdd.RDD$$anonfun$first$1.apply(RDD.scala:1372)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112)
at org.apache.spark.rdd.RDD.withScope(RDD.scala:363)
at org.apache.spark.rdd.RDD.first(RDD.scala:1371)
... 49 elided
Caused by: java.lang.ArrayIndexOutOfBoundsException: 5
at $anonfun$createComb$1.apply(<console>:24)
at $anonfun$createComb$1.apply(<console>:23)
at org.apache.spark.util.collection.ExternalSorter$$anonfun$5.apply(ExternalSorter.scala:189)
at org.apache.spark.util.collection.ExternalSorter$$anonfun$5.apply(ExternalSorter.scala:188)
at org.apache.spark.util.collection.AppendOnlyMap.changeValue(AppendOnlyMap.scala:144)
at org.apache.spark.util.collection.SizeTrackingAppendOnlyMap.changeValue(SizeTrackingAppendOnlyMap.scala:32)
at org.apache.spark.util.collection.ExternalSorter.insertAll(ExternalSorter.scala:194)
at org.apache.spark.shuffle.sort.SortShuffleWriter.write(SortShuffleWriter.scala:63)
at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:96)
at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:53)
at org.apache.spark.scheduler.Task.run(Task.scala:109)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:345)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
at java.lang.Thread.run(Thread.java:745)
Seems like a problem with createComb methods were you are assuming that t array has at least 6 elements.
It is just a quick geuss. Let me know if it helps. If not, I will try to investigate it further :)
Can anyone help with the Java code to convert the following JSON to Spark Dataframe..
Note : It is not the File
logic :
Listen to kafka topic T1 , read the each record in the RDD and apply additional logic convert the result data to Json Object and write it to another topic T2 in Kafka..
T2 Structure is below.
JSON:
[
{
"#tenant_id":"XYZ",
"alarmUpdateTime":1526342400000,
"alarm_id":"AB5C9123",
"alarm_updates":[
{
"alarmField":"Severity",
"new_value":"Minor",
"old_value":"Major"
},
{
"alarmField":"state",
"new_value":"UPDATE",
"old_value":"NEW"
}
],
"aucID":"5af83",
"inID":"INC15234567",
"index":"test",
"product":"test",
"source":"ABS",
"state":"NEW"
}
]
Classes created :
ClassAlarm{
String #tenant_id;
String alarm_id;
.
.
List <AlarmUpdate> update;
Get and Setter functions for all variables
}
AlarmUpdate{
String alarmField;
String oldVal;
String NewVal;
Get and Setter functions for all variables
}
AppClass{
void static main(){
Alarm alarmObj = new Alarm();
//set values for variables in alarmObj.
Dataset <Row> results = jobCtx.getSparkSession().createDataFrame(Arrays.asList(alarmObj), Alarm.class)
//At this point seeing following errors.
}
}
Error:
2018-05-15 13:40:48 ERROR JobScheduler - Error running job streaming
job 1526406040000 ms.0 scala.MatchError:
com.ca.alarmupdates.AlarmUpdate#48c8809b (of class
com.ca.alarmupdates.AlarmUpdate)
at org.apache.spark.sql.catalyst.CatalystTypeConverters$StructConverter.toCatalystImpl(CatalystTypeConverters.scala:236)
~[spark-catalyst_2.11-2.2.0.jar:2.2.0]
at org.apache.spark.sql.catalyst.CatalystTypeConverters$StructConverter.toCatalystImpl(CatalystTypeConverters.scala:231)
~[spark-catalyst_2.11-2.2.0.jar:2.2.0]
at org.apache.spark.sql.catalyst.CatalystTypeConverters$CatalystTypeConverter.toCatalyst(CatalystTypeConverters.scala:103)
~[spark-catalyst_2.11-2.2.0.jar:2.2.0]
at org.apache.spark.sql.catalyst.CatalystTypeConverters$ArrayConverter.toCatalystImpl(CatalystTypeConverters.scala:170)
~[spark-catalyst_2.11-2.2.0.jar:2.2.0]
at org.apache.spark.sql.catalyst.CatalystTypeConverters$ArrayConverter.toCatalystImpl(CatalystTypeConverters.scala:154)
~[spark-catalyst_2.11-2.2.0.jar:2.2.0]
at org.apache.spark.sql.catalyst.CatalystTypeConverters$CatalystTypeConverter.toCatalyst(CatalystTypeConverters.scala:103)
~[spark-catalyst_2.11-2.2.0.jar:2.2.0]
at org.apache.spark.sql.catalyst.CatalystTypeConverters$$anonfun$createToCatalystConverter$2.apply(CatalystTypeConverters.scala:379)
~[spark-catalyst_2.11-2.2.0.jar:2.2.0]
at org.apache.spark.sql.SQLContext$$anonfun$beansToRows$1$$anonfun$apply$1.apply(SQLContext.scala:1105)
~[spark-sql_2.11-2.2.0.jar:2.2.0]
at org.apache.spark.sql.SQLContext$$anonfun$beansToRows$1$$anonfun$apply$1.apply(SQLContext.scala:1105)
~[spark-sql_2.11-2.2.0.jar:2.2.0]
at scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:234)
~[jaf-sdk-2.4.0.jar:?]
at scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:234)
~[jaf-sdk-2.4.0.jar:?]
at scala.collection.IndexedSeqOptimized$class.foreach(IndexedSeqOptimized.scala:33)
~[jaf-sdk-2.4.0.jar:?]
at scala.collection.mutable.ArrayOps$ofRef.foreach(ArrayOps.scala:186)
~[jaf-sdk-2.4.0.jar:?]
at scala.collection.TraversableLike$class.map(TraversableLike.scala:234)
~[jaf-sdk-2.4.0.jar:?]
at scala.collection.mutable.ArrayOps$ofRef.map(ArrayOps.scala:186)
~[jaf-sdk-2.4.0.jar:?]
at org.apache.spark.sql.SQLContext$$anonfun$beansToRows$1.apply(SQLContext.scala:1105)
~[spark-sql_2.11-2.2.0.jar:2.2.0]
at org.apache.spark.sql.SQLContext$$anonfun$beansToRows$1.apply(SQLContext.scala:1103)
~[spark-sql_2.11-2.2.0.jar:2.2.0]
at scala.collection.Iterator$$anon$11.next(Iterator.scala:409)
~[jaf-sdk-2.4.0.jar:?]
at scala.collection.Iterator$class.toStream(Iterator.scala:1322)
~[jaf-sdk-2.4.0.jar:?]
at scala.collection.AbstractIterator.toStream(Iterator.scala:1336)
~[jaf-sdk-2.4.0.jar:?]
at scala.collection.TraversableOnce$class.toSeq(TraversableOnce.scala:298)
~[jaf-sdk-2.4.0.jar:?]
at scala.collection.AbstractIterator.toSeq(Iterator.scala:1336)
~[jaf-sdk-2.4.0.jar:?]
at org.apache.spark.sql.SparkSession.createDataFrame(SparkSession.scala:406)
~[spark-sql_2.11-2.2.0.jar:2.2.0]
at com.ca.alarmupdates.AlarmUpdates.lambda$null$0(AlarmUpdates.java:85)
~[classes/:?]
at java.util.Arrays$ArrayList.forEach(Arrays.java:3880) ~[?:1.8.0_161]
at com.ca.alarmupdates.AlarmUpdates.lambda$main$f87f782d$1(AlarmUpdates.java:58)
~[classes/:?]
at org.apache.spark.streaming.api.java.JavaDStreamLike$$anonfun$foreachRDD$1.apply(JavaDStreamLike.scala:272)
~[spark-streaming_2.11-2.2.0.jar:2.2.0]
at org.apache.spark.streaming.api.java.JavaDStreamLike$$anonfun$foreachRDD$1.apply(JavaDStreamLike.scala:272)
~[spark-streaming_2.11-2.2.0.jar:2.2.0]
at org.apache.spark.streaming.dstream.DStream$$anonfun$foreachRDD$1$$anonfun$apply$mcV$sp$3.apply(DStream.scala:628)
~[spark-streaming_2.11-2.2.0.jar:2.2.0]
at org.apache.spark.streaming.dstream.DStream$$anonfun$foreachRDD$1$$anonfun$apply$mcV$sp$3.apply(DStream.scala:628)
~[spark-streaming_2.11-2.2.0.jar:2.2.0]
at org.apache.spark.streaming.dstream.ForEachDStream$$anonfun$1$$anonfun$apply$mcV$sp$1.apply$mcV$sp(ForEachDStream.scala:51)
~[spark-streaming_2.11-2.2.0.jar:2.2.0]
at org.apache.spark.streaming.dstream.ForEachDStream$$anonfun$1$$anonfun$apply$mcV$sp$1.apply(ForEachDStream.scala:51)
~[spark-streaming_2.11-2.2.0.jar:2.2.0]
at org.apache.spark.streaming.dstream.ForEachDStream$$anonfun$1$$anonfun$apply$mcV$sp$1.apply(ForEachDStream.scala:51)
~[spark-streaming_2.11-2.2.0.jar:2.2.0]
at org.apache.spark.streaming.dstream.DStream.createRDDWithLocalProperties(DStream.scala:416)
~[spark-streaming_2.11-2.2.0.jar:2.2.0]
at org.apache.spark.streaming.dstream.ForEachDStream$$anonfun$1.apply$mcV$sp(ForEachDStream.scala:50)
~[spark-streaming_2.11-2.2.0.jar:2.2.0]
at org.apache.spark.streaming.dstream.ForEachDStream$$anonfun$1.apply(ForEachDStream.scala:50)
~[spark-streaming_2.11-2.2.0.jar:2.2.0]
at org.apache.spark.streaming.dstream.ForEachDStream$$anonfun$1.apply(ForEachDStream.scala:50)
~[spark-streaming_2.11-2.2.0.jar:2.2.0]
at scala.util.Try$.apply(Try.scala:192) ~[jaf-sdk-2.4.0.jar:?]
at org.apache.spark.streaming.scheduler.Job.run(Job.scala:39)
~[spark-streaming_2.11-2.2.0.jar:2.2.0]
at org.apache.spark.streaming.scheduler.JobScheduler$JobHandler$$anonfun$run$1.apply$mcV$sp(JobScheduler.scala:257)
~[spark-streaming_2.11-2.2.0.jar:2.2.0]
at org.apache.spark.streaming.scheduler.JobScheduler$JobHandler$$anonfun$run$1.apply(JobScheduler.scala:257)
~[spark-streaming_2.11-2.2.0.jar:2.2.0]
at org.apache.spark.streaming.scheduler.JobScheduler$JobHandler$$anonfun$run$1.apply(JobScheduler.scala:257)
~[spark-streaming_2.11-2.2.0.jar:2.2.0]
at scala.util.DynamicVariable.withValue(DynamicVariable.scala:58)
~[jaf-sdk-2.4.0.jar:?]
at org.apache.spark.streaming.scheduler.JobScheduler$JobHandler.run(JobScheduler.scala:256)
~[spark-streaming_2.11-2.2.0.jar:2.2.0]
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
~[?:1.8.0_161]
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
~[?:1.8.0_161]
at java.lang.Thread.run(Thread.java:748) ~[?:1.8.0_161]
You can use wholeTextFiles to read the json file and get the json text and use it in json api of SparkSession as
import org.apache.spark.sql.SparkSession;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
static SparkSession spark = SparkSession.builder().master("local").appName("simple").getOrCreate();
static JavaSparkContext sc = new JavaSparkContext(spark.sparkContext());
Dataset<Row> df = spark.read().json(sc.wholeTextFiles("path to json file").map(t -> t._2()));
df.show(false);
and you should get
+----------+---------------+--------+--------------------------------------------+-----+-----------+-----+-------+------+-----+
|#tenant_id|alarmUpdateTime|alarm_id|alarm_updates |aucID|inID |index|product|source|state|
+----------+---------------+--------+--------------------------------------------+-----+-----------+-----+-------+------+-----+
|XYZ |1526342400000 |AB5C9123|[[Severity,Minor,Major], [state,UPDATE,NEW]]|5af83|INC15234567|test |test |ABS |NEW |
+----------+---------------+--------+--------------------------------------------+-----+-----------+-----+-------+------+-----+
You can use the master and appName as you prefer
Updated
You have commented that
The way you do through file , can we do it with the object . I have to convert to Ingest the data to the other T2
For that lets say you have a record read from T1 topic as string object as
String t1Record = "[\n" +
" {\n" +
" \"#tenant_id\":\"XYZ\",\n" +
" \"alarmUpdateTime\":1526342400000,\n" +
" \"alarm_id\":\"AB5C9123\",\n" +
" \"alarm_updates\":[\n" +
" {\n" +
" \"alarmField\":\"Severity\",\n" +
" \"new_value\":\"Minor\",\n" +
" \"old_value\":\"Major\"\n" +
" },\n" +
" {\n" +
" \"alarmField\":\"state\",\n" +
" \"new_value\":\"UPDATE\",\n" +
" \"old_value\":\"NEW\"\n" +
" }\n" +
" ],\n" +
" \"aucID\":\"5af83\",\n" +
" \"inID\":\"INC15234567\",\n" +
" \"index\":\"test\",\n" +
" \"product\":\"test\",\n" +
" \"source\":\"ABS\",\n" +
" \"state\":\"NEW\"\n" +
" }\n" +
"]";
and you convert it into RDD as
JavaRDD<String> t1RecordRDD = sc.parallelize(Arrays.asList(t1Record));
Then you can apply the json api to convert into dataframe as
Dataset<Row> df = spark.read().json(t1RecordRDD);
which should give you the same result as above
I'm new to scala spark and its MLlib, and currently I'm struggling against an error that I don't know why it is happening.
I have an RDD with multiple partitions, containing data like this (output from take(#)):
Array[TermDoc] = Array(TermDoc(142389495503925248,Set(NEU),ArrayBuffer(salg, veotv, día, largooooo)), TermDoc(142389933619945473,Set(NEU),ArrayBuffer(librar, ayudar, bes, graci)), TermDoc(142391947707940864,Set(P),ArrayBuffer(graci, mar)), TermDoc(142416095012339712,Set(N+),ArrayBuffer(off, pensand, regalit, sind, va, sgae, van, corrupt, intent, sacar, conclusion, intent)), TermDoc(142422495721562112,Set(P+),ArrayBuffer(conozc, alguien, q, adict, dram, ja, ja, ja, suen, d)), TermDoc(142424715175280640,Set(NEU),ArrayBuffer(rt, si, amas, alguien, dejal, libr, si, grit, hombr, paurubi)), TermDoc(142483342040907776,Set(P+),ArrayBuffer(toca, grabacion, dl, especial, navideñ, mari, crism)), TermDoc(142493511634259968,Set(NEU))
Since there's an output, I assume that the RDD is not empty, but when I try to execute:
val count = rdd.count()
java.lang.UnsupportedOperationException: empty.init
at scala.collection.TraversableLike$class.init(TraversableLike.scala:475)
at scala.collection.mutable.ArrayOps$ofRef.scala$collection$IndexedSeqOptimized$$super$init(ArrayOps.scala:108)
at scala.collection.IndexedSeqOptimized$class.init(IndexedSeqOptimized.scala:129)
at scala.collection.mutable.ArrayOps$ofRef.init(ArrayOps.scala:108)
at $line24.$read$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$TweetParser$.buildDocument(<console>:58)
at $line24.$read$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$TweetParser$$anonfun$2.apply(<console>:49)
at $line24.$read$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$TweetParser$$anonfun$2.apply(<console>:49)
at scala.collection.Iterator$$anon$11.next(Iterator.scala:328)
at scala.collection.Iterator$$anon$11.next(Iterator.scala:328)
at org.apache.spark.util.Utils$.getIteratorSize(Utils.scala:1598)
at org.apache.spark.rdd.RDD$$anonfun$count$1.apply(RDD.scala:1157)
at org.apache.spark.rdd.RDD$$anonfun$count$1.apply(RDD.scala:1157)
at org.apache.spark.SparkContext$$anonfun$runJob$5.apply(SparkContext.scala:1869)
at org.apache.spark.SparkContext$$anonfun$runJob$5.apply(SparkContext.scala:1869)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:66)
at org.apache.spark.scheduler.Task.run(Task.scala:89)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:214)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615)
at java.lang.Thread.run(Thread.java:745)
17/03/13 10:15:11 WARN scheduler.TaskSetManager: Lost task 0.0 in stage 2.0 (TID 2, localhost): java.lang.UnsupportedOperationException: empty.init
at scala.collection.TraversableLike$class.init(TraversableLike.scala:475)
at scala.collection.mutable.ArrayOps$ofRef.scala$collection$IndexedSeqOptimized$$super$init(ArrayOps.scala:108)
at scala.collection.IndexedSeqOptimized$class.init(IndexedSeqOptimized.scala:129)
at scala.collection.mutable.ArrayOps$ofRef.init(ArrayOps.scala:108)
at $line24.$read$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$TweetParser$.buildDocument(<console>:58)
at $line24.$read$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$TweetParser$$anonfun$2.apply(<console>:49)
at $line24.$read$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$TweetParser$$anonfun$2.apply(<console>:49)
at scala.collection.Iterator$$anon$11.next(Iterator.scala:328)
at scala.collection.Iterator$$anon$11.next(Iterator.scala:328)
at org.apache.spark.util.Utils$.getIteratorSize(Utils.scala:1598)
at org.apache.spark.rdd.RDD$$anonfun$count$1.apply(RDD.scala:1157)
at org.apache.spark.rdd.RDD$$anonfun$count$1.apply(RDD.scala:1157)
at org.apache.spark.SparkContext$$anonfun$runJob$5.apply(SparkContext.scala:1869)
at org.apache.spark.SparkContext$$anonfun$runJob$5.apply(SparkContext.scala:1869)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:66)
at org.apache.spark.scheduler.Task.run(Task.scala:89)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:214)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615)
at java.lang.Thread.run(Thread.java:745)
17/03/13 10:15:11 ERROR scheduler.TaskSetManager: Task 0 in stage 2.0 failed 1 times; aborting job
17/03/13 10:15:11 WARN scheduler.TaskSetManager: Lost task 1.0 in stage 2.0 (TID 3, localhost): TaskKilled (killed intentionally)
17/03/13 10:15:11 WARN spark.ExecutorAllocationManager: No stages are running, but numRunningTasks != 0
17/03/13 10:15:11 ERROR scheduler.LiveListenerBus: Listener SQLListener threw an exception
java.lang.NullPointerException
at org.apache.spark.sql.execution.ui.SQLListener.onTaskEnd(SQLListener.scala:167)
at org.apache.spark.scheduler.SparkListenerBus$class.onPostEvent(SparkListenerBus.scala:42)
at org.apache.spark.scheduler.LiveListenerBus.onPostEvent(LiveListenerBus.scala:31)
at org.apache.spark.scheduler.LiveListenerBus.onPostEvent(LiveListenerBus.scala:31)
at org.apache.spark.util.ListenerBus$class.postToAll(ListenerBus.scala:55)
at org.apache.spark.util.AsynchronousListenerBus.postToAll(AsynchronousListenerBus.scala:37)
at org.apache.spark.util.AsynchronousListenerBus$$anon$1$$anonfun$run$1$$anonfun$apply$mcV$sp$1.apply$mcV$sp(AsynchronousListenerBus.scala:80)
at org.apache.spark.util.AsynchronousListenerBus$$anon$1$$anonfun$run$1$$anonfun$apply$mcV$sp$1.apply(AsynchronousListenerBus.scala:65)
at org.apache.spark.util.AsynchronousListenerBus$$anon$1$$anonfun$run$1$$anonfun$apply$mcV$sp$1.apply(AsynchronousListenerBus.scala:65)
at scala.util.DynamicVariable.withValue(DynamicVariable.scala:57)
at org.apache.spark.util.AsynchronousListenerBus$$anon$1$$anonfun$run$1.apply$mcV$sp(AsynchronousListenerBus.scala:64)
at org.apache.spark.util.Utils$.tryOrStopSparkContext(Utils.scala:1181)
at org.apache.spark.util.AsynchronousListenerBus$$anon$1.run(AsynchronousListenerBus.scala:63)
org.apache.spark.SparkException: Job aborted due to stage failure: Task 0 in stage 2.0 failed 1 times, most recent failure: Lost task 0.0 in stage 2.0 (TID 2, localhost): java.lang.UnsupportedOperationException: empty.init
at scala.collection.TraversableLike$class.init(TraversableLike.scala:475)
at scala.collection.mutable.ArrayOps$ofRef.scala$collection$IndexedSeqOptimized$$super$init(ArrayOps.scala:108)
at scala.collection.IndexedSeqOptimized$class.init(IndexedSeqOptimized.scala:129)
at scala.collection.mutable.ArrayOps$ofRef.init(ArrayOps.scala:108)
at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$TweetParser$.buildDocument(<console>:58)
at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$TweetParser$$anonfun$2.apply(<console>:49)
at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$TweetParser$$anonfun$2.apply(<console>:49)
at scala.collection.Iterator$$anon$11.next(Iterator.scala:328)
at scala.collection.Iterator$$anon$11.next(Iterator.scala:328)
at org.apache.spark.util.Utils$.getIteratorSize(Utils.scala:1598)
at org.apache.spark.rdd.RDD$$anonfun$count$1.apply(RDD.scala:1157)
at org.apache.spark.rdd.RDD$$anonfun$count$1.apply(RDD.scala:1157)
at org.apache.spark.SparkContext$$anonfun$runJob$5.apply(SparkContext.scala:1869)
at org.apache.spark.SparkContext$$anonfun$runJob$5.apply(SparkContext.scala:1869)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:66)
at org.apache.spark.scheduler.Task.run(Task.scala:89)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:214)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615)
at java.lang.Thread.run(Thread.java:745)
Driver stacktrace:
at org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1431)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1419)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1418)
at scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59)
at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:47)
at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1418)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:799)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:799)
at scala.Option.foreach(Option.scala:236)
at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:799)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:1640)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1599)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1588)
at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:48)
at org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:620)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:1843)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:1856)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:1869)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:1940)
at org.apache.spark.rdd.RDD.count(RDD.scala:1157)
at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:62)
at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:67)
at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:69)
at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:71)
at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:73)
at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:75)
at $iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:77)
at $iwC$$iwC$$iwC$$iwC.<init>(<console>:79)
at $iwC$$iwC$$iwC.<init>(<console>:81)
at $iwC$$iwC.<init>(<console>:83)
at $iwC.<init>(<console>:85)
at <init>(<console>:87)
at .<init>(<console>:91)
at .<clinit>(<console>)
at .<init>(<console>:7)
at .<clinit>(<console>)
at $print(<console>)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:606)
at org.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:1045)
at org.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1326)
at org.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:821)
at org.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:852)
at org.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:800)
at org.apache.spark.repl.SparkILoop.reallyInterpret$1(SparkILoop.scala:857)
at org.apache.spark.repl.SparkILoop.interpretStartingWith(SparkILoop.scala:902)
at org.apache.spark.repl.SparkILoop.command(SparkILoop.scala:814)
at org.apache.spark.repl.SparkILoop.processLine$1(SparkILoop.scala:657)
at org.apache.spark.repl.SparkILoop.innerLoop$1(SparkILoop.scala:665)
at org.apache.spark.repl.SparkILoop.org$apache$spark$repl$SparkILoop$$loop(SparkILoop.scala:670)
at org.apache.spark.repl.SparkILoop$$anonfun$org$apache$spark$repl$SparkILoop$$process$1.apply$mcZ$sp(SparkILoop.scala:997)
at org.apache.spark.repl.SparkILoop$$anonfun$org$apache$spark$repl$SparkILoop$$process$1.apply(SparkILoop.scala:945)
at org.apache.spark.repl.SparkILoop$$anonfun$org$apache$spark$repl$SparkILoop$$process$1.apply(SparkILoop.scala:945)
at scala.tools.nsc.util.ScalaClassLoader$.savingContextLoader(ScalaClassLoader.scala:135)
at org.apache.spark.repl.SparkILoop.org$apache$spark$repl$SparkILoop$$process(SparkILoop.scala:945)
at org.apache.spark.repl.SparkILoop.process(SparkILoop.scala:1064)
at org.apache.spark.repl.Main$.main(Main.scala:31)
at org.apache.spark.repl.Main.main(Main.scala)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:606)
at org.apache.spark.deploy.SparkSubmit$.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:731)
at org.apache.spark.deploy.SparkSubmit$.doRunMain$1(SparkSubmit.scala:181)
at org.apache.spark.deploy.SparkSubmit$.submit(SparkSubmit.scala:206)
at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:121)
at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)
Caused by: java.lang.UnsupportedOperationException: empty.init
at scala.collection.TraversableLike$class.init(TraversableLike.scala:475)
at scala.collection.mutable.ArrayOps$ofRef.scala$collection$IndexedSeqOptimized$$super$init(ArrayOps.scala:108)
at scala.collection.IndexedSeqOptimized$class.init(IndexedSeqOptimized.scala:129)
at scala.collection.mutable.ArrayOps$ofRef.init(ArrayOps.scala:108)
at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$TweetParser$.buildDocument(<console>:58)
at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$TweetParser$$anonfun$2.apply(<console>:49)
at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$TweetParser$$anonfun$2.apply(<console>:49)
at scala.collection.Iterator$$anon$11.next(Iterator.scala:328)
at scala.collection.Iterator$$anon$11.next(Iterator.scala:328)
at org.apache.spark.util.Utils$.getIteratorSize(Utils.scala:1598)
at org.apache.spark.rdd.RDD$$anonfun$count$1.apply(RDD.scala:1157)
at org.apache.spark.rdd.RDD$$anonfun$count$1.apply(RDD.scala:1157)
at org.apache.spark.SparkContext$$anonfun$runJob$5.apply(SparkContext.scala:1869)
at org.apache.spark.SparkContext$$anonfun$runJob$5.apply(SparkContext.scala:1869)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:66)
at org.apache.spark.scheduler.Task.run(Task.scala:89)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:214)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615)
at java.lang.Thread.run(Thread.java:745)
Apparently, it is saying that I'm trying to call count on an empty RDD. What's happening? It also fails with this line:
val terms = termDocsRdd.flatMap(_.terms).distinct().sortBy(identity)
Same empty.init exception.
Thanks.
UPDATE: Adding required information
object TweetParser extends Serializable{
val headerPart = "polarity"
val mentionRegex = """#(.)+?\s""".r
val fullRegex = """(\d+),(.+?),(N|P|NEU|NONE)(,\w+|;\w+)*""".r
def parseAll(csvFiles: Iterable[String], sc: SparkContext): RDD[Document] = {
val csv = sc.textFile(csvFiles mkString ",")
//val docs = scala.collection.mutable.ArrayBuffer.empty[Document]
val docs = csv.filter(!_.contains(headerPart)).map(buildDocument(_))
docs
//docs.filter(!_.docId.equals("INVALID"))
}
def buildDocument(line: String): Document = {
val lineSplit = line.split(",")
val id = lineSplit.head
val txt = lineSplit.tail.init.init.mkString(",")
val sent = lineSplit.init.last
val opt = lineSplit.last
if (id != null && txt != null && sent != null) {
if (txt.equals("")) {
//the line does not contain the option after sentiment
new Document(id, mentionRegex.replaceAllIn(sent, ""), Set(opt))
} else {
new Document(id, mentionRegex.replaceAllIn(txt, ""), Set(sent))
}
} else {
println("Invalid")
new Document("INVALID")
}
}
}
case class Document(docId: String, body: String = "", labels: Set[String] = Set.empty)
Tokenizer object:
import java.io.StringReader
import org.apache.lucene.analysis.es.SpanishAnalyzer
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute
import org.apache.lucene.util.Version
import org.apache.spark.rdd.RDD
object Tokenizer extends Serializable {
//val LuceneVersion = Version.LUCENE_5_1_0
def tokenizeAll(docs: RDD[Document]) = docs.map(tokenize)
def tokenize(doc: Document): TermDoc = TermDoc(doc.docId, doc.labels, tokenize(doc.body))
def tokenize(content: String): Seq[String] = {
val result = scala.collection.mutable.ArrayBuffer.empty[String]
/*content.split("\n").foreach(line => line.split(" ").foreach(
word => if (word.startsWith("#")) result += word.substring(1) else word
))*/
val analyzer = new SpanishAnalyzer()
analyzer.setVersion(Version.LUCENE_5_1_0)
val tReader = new StringReader(content)
val tStream = analyzer.tokenStream("", tReader)
val term = tStream.addAttribute(classOf[CharTermAttribute])
tStream.reset()
while (tStream.incrementToken()) {
val termValue = term.toString
if (termValue.startsWith("#")) {
result += termValue.substring(1)
}
else {
result += termValue
}
}
result
}
}
case class TermDoc(doc: String, labels: Set[String], terms: Seq[String])
Driver:
val csvFiles = List("/path/to/file.csv", "/path/to/file2.csv", "/path/to/file3.csv")
val docs = TweetParser.parseAll(csvFiles, sc)
val termDocsRdd = Tokenizer.tokenizeAll(docs)
val numDocs = termDocsRdd.count()
val terms = termDocsRdd.flatMap(_.terms).distinct().sortBy(identity)
I'm testing this at spark-shell. That's why driver looks like this. Hope this clarifies the question.
Apparently, it is saying that I'm trying to call count on an empty RDD
Actually - no, that's not what the error says. count triggers the computation of this RDD, and this exception is thrown while calculating one of the RDD's records.
Specifically, the error states:
java.lang.UnsupportedOperationException: empty.init
This is probably thrown from one of these expressions within buildDocument:
val txt = lineSplit.tail.init.init.mkString(",")
val sent = lineSplit.init.last
This code fragment assumes lineSplit is a collection with at least 3 elements - and the exception you see is the result of that assumption being incorrect for at least one record: for example, if lineSplit had just 2 elements, lineSplit.tail.init would be an empty collection, and therefore lineSplit.tail.init.init would throw the exception you see.
To overcome this - you can rewrite your "parsing" method to handle such irregularities in the data properly:
Wrap it with a Try(...) and filter only the successful records, e.g.:
import scala.util.{Try, Success}
def parseAll(csvFiles: Iterable[String], sc: SparkContext): RDD[Document] = {
val csv = sc.textFile(csvFiles mkString ",")
val docs = csv.filter(!_.contains(headerPart))
.map(s => Try(buildDocument(s)))
.collect { case Success(v) => v }
docs
}
Change the parsing so that "missing" parts of lineSplit will be set to null (as the following lines seem to expect), e.g.:
def buildDocument(line: String): Document = {
val (id, txt, sent, opt) = line.split(",").padTo(5, null) match {
case Array(a,b,c,d,e,_*) => (a, s"$b,$c", d, e)
}
// continue as before....
}
I got this piece of code in Groovy:
def getPhoto(params){
def data = entity.find(params).first();
byte[] mybyte = (byte[]) data.photo;
String str = java.util.Base64.Encoder.encodeToString(mybyte);
return str;
}
But when this code is executed it throws an error saying:
java.lang.Exception: No signature of method: static java.util.Base64$Encoder.encodeToString() is applicable for argument types: ([B) values: {[-1, -40, -1, -32, 0, 16, 74, 70, 73, 70, 0,...too long...
UPDATED
org.codehaus.groovy.runtime.metaclass.MethodSelectionException: Could not find which method <init>() to invoke from this list:
private java.util.Base64$Encoder#<init>(boolean, [B, int, boolean)
java.util.Base64$Encoder#<init>(boolean, [B, int, boolean, java.util.Base64$1)
at groovy.lang.MetaClassImpl.chooseMethod(MetaClassImpl.java:2419)
at groovy.lang.MetaClassImpl.invokeConstructor(MetaClassImpl.java:1250)
at groovy.lang.MetaClassImpl.invokeConstructor(MetaClassImpl.java:1182)
at org.codehaus.groovy.runtime.InvokerHelper.invokeConstructorOf(InvokerHelper.java:805)
at org.codehaus.groovy.runtime.ScriptBytecodeAdapter.invokeNewN(ScriptBytecodeAdapter.java:227)
at org.codehaus.groovy.runtime.ScriptBytecodeAdapter.invokeNew0(ScriptBytecodeAdapter.java:234)
at JobSearchService.getPhoto(script1000034.groovy:113)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:497)
at org.codehaus.groovy.reflection.CachedMethod.invoke(CachedMethod.java:86)
at groovy.lang.MetaMethod.doMethodInvoke(MetaMethod.java:226)
at org.codehaus.groovy.runtime.metaclass.ClosureMetaClass.invokeMethod(ClosureMetaClass.java:333)
at org.codehaus.groovy.runtime.ScriptBytecodeAdapter.invokeMethodOnCurrentN(ScriptBytecodeAdapter.java:77)
at JobSearchService$_search_closure2.doCall(script1000034.groovy:59)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:497)
at org.codehaus.groovy.reflection.CachedMethod.invoke(CachedMethod.java:86)
at groovy.lang.MetaMethod.doMethodInvoke(MetaMethod.java:226)
at org.codehaus.groovy.runtime.metaclass.ClosureMetaClass.invokeMethod(ClosureMetaClass.java:248)
at groovy.lang.MetaClassImpl.invokeMethod(MetaClassImpl.java:754)
at groovy.lang.Closure.call(Closure.java:292)
at groovy.lang.Closure.call(Closure.java:305)
at org.codehaus.groovy.runtime.DefaultGroovyMethods.each(DefaultGroovyMethods.java:1078)
at org.codehaus.groovy.runtime.DefaultGroovyMethods.each(DefaultGroovyMethods.java:1055)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:497)
at org.codehaus.groovy.runtime.metaclass.ReflectionMetaMethod.invoke(ReflectionMetaMethod.java:51)
at org.codehaus.groovy.runtime.metaclass.NewInstanceMetaMethod.invoke(NewInstanceMetaMethod.java:54)
at groovy.lang.MetaMethod.doMethodInvoke(MetaMethod.java:226)
at groovy.lang.MetaClassImpl.invokeMethod(MetaClassImpl.java:910)
at groovy.lang.MetaClassImpl.invokeMethod(MetaClassImpl.java:754)
at org.codehaus.groovy.runtime.InvokerHelper.invokePojoMethod(InvokerHelper.java:765)
at org.codehaus.groovy.runtime.InvokerHelper.invokeMethod(InvokerHelper.java:753)
at org.codehaus.groovy.runtime.ScriptBytecodeAdapter.invokeMethodN(ScriptBytecodeAdapter.java:167)
at JobSearchService.search(script1000034.groovy:57)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:497)
at org.codehaus.groovy.reflection.CachedMethod.invoke(CachedMethod.java:86)
at groovy.lang.MetaMethod.doMethodInvoke(MetaMethod.java:226)
at groovy.lang.MetaClassImpl.invokeMethod(MetaClassImpl.java:910)
at groovy.lang.MetaClassImpl.invokeMethod(MetaClassImpl.java:754)
at sun.reflect.GeneratedMethodAccessor11.invoke(Unknown Source)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:497)
at org.codehaus.groovy.reflection.CachedMethod.invoke(CachedMethod.java:86)
at groovy.lang.MetaMethod.doMethodInvoke(MetaMethod.java:226)
at groovy.lang.MetaClassImpl.invokeMethod(MetaClassImpl.java:910)
at groovy.lang.MetaClassImpl.invokeMethod(MetaClassImpl.java:754)
at org.codehaus.groovy.runtime.InvokerHelper.invokePojoMethod(InvokerHelper.java:765)
at org.codehaus.groovy.runtime.InvokerHelper.invokeMethod(InvokerHelper.java:753)
at org.codehaus.groovy.runtime.ScriptBytecodeAdapter.invokeMethodN(ScriptBytecodeAdapter.java:167)
at JobSearchService.invokeMethod(script1000034.groovy)
at com.rameses.osiris3.script.ScriptExecutor.invokeMethod(ScriptExecutor.java:41)
at com.rameses.osiris3.script.ManagedScriptExecutor$1.call(ManagedScriptExecutor.java:142)
at com.rameses.osiris3.script.InterceptorChain.fireChain(InterceptorChain.java:79)
at com.rameses.osiris3.script.ManagedScriptExecutor.execute(ManagedScriptExecutor.java:140)
at com.rameses.osiris3.script.ScriptRunnable.run(ScriptRunnable.java:62)
at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511)
at java.util.concurrent.FutureTask.run(FutureTask.java:266)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
at java.lang.Thread.run(Thread.java:745)
error class java.lang.Exception Could not find which method <init>() to invoke from this list:
private java.util.Base64$Encoder#<init>(boolean, [B, int, boolean)
java.util.Base64$Encoder#<init>(boolean, [B, int, boolean, java.util.Base64$1)
Encoder#encodeToString(byte[]) is an instance method, not a static method. Call it on an instance:
new Base64.Encoder().encodeToString(mybyte)
Note that Encoder is thread-safe, so you can save a copy in a private static final field.
Using #TypeChecked or #CompileStatic on your method will prevent many errors like this.
try
org.apache.commons.codec.binary.Base64.encodeBase64(mybyte)
Since Im using JDK 6, this is the best approach:
new sun.misc.BASE64Encoder().encode(mybyte);