Elasticsearch-hadoop provides local integration of Elasticsearch with Apache Spark. The data read from Elasticsearch is operated in the form of Rdd in Spark, while the contents of spark Rdd can be converted into documents and stored in Elasticsearch for querying. Here are two simple examples of interactions:
Dependent jar packs associated with
Elasticsearch-spark_2.10-2.3.2.jar Elasticsearch-spark_2.11-2.3.2.jar Elasticsearch-spark-1.2_2.10-2.3.2.jar Elasticsearch-spark-1.2_2.11-2.3.2.jar
Elasticsearch-> Spark
Import org.apache.spark.SparkConf
import org.apache.spark.SparkContext
Import Org.elasticsearch.spark.rdd.EsSpark
Import Org.apache.spark.sql.SQLContext
object E2sexample {
def Main (args:array[string]) {
var conf = new sparkconf ()
conf.setappname ("S2eexample")
Conf.setmaster (" spark://master:7077 ")
conf.set (" Es.nodes "," master:9200 ")
val sc = new Sparkcontext (conf)
val Rdd = Esspark.esrdd (SC, "Data/t1", "? q= King *")
println ("Rdd count:" + rdd.count ())
rdd.collect (). foreach (record = > {
print (record._1 + ":") for
((k, v) <-record._2) {
print (k + ":" + V)
}
println ()
val sqlcontext = new SqlContext (SC)
val df = SqlContext.read.format ("Org.elasticsearch.spark.sql"). Load ( "Data/t1")
Df.printschema ()
df.collect (). foreach (println)
sc.stop ()
}
}
Spark-> Elasticsearch
Import org.apache.spark.SparkConf
import org.apache.spark.SparkContext
Import Org.elasticsearch.spark.rdd.EsSpark Case
Class Job (jobname:string, joburl:string, companyname:string, Salary: String)
Object S2eexample {
def main (args:array[string]) {
var conf = new sparkconf ()
conf.setappname ( "S2eexample")
conf.setmaster ("spark://master:7077")
conf.set ("Es.index.auto.create", "true"
) Conf.set ("Es.nodes", "master:9200")
val sc = new Sparkcontext (conf)
val job1 = Job ("C Development Engineer", "http:// Job.c.com "," C Company "," 10000 ")
val job2 = Job (" C + + development Engineer "," http://job.c++.com "," C + + Company "," 10000 ")
val job3 = Job (" C # Development Engineer, "http://job.c#.com", "C # Company", "10000")
val job4 = Job ("Java Development engineer", "http://job.java.com", "java Company", " 10000 ")
val job5 = Job (" Scala Development Engineer "," http://job.scala.com "," java Company "," 10000 ")
val rdd = Sc.makerdd (Seq (job1 , Job2, JOB3, JOB4, job5))
esspark.savetoes (Rdd, "Data/job")
sc.stop ()
}
$bin/spark-submit--class org.esspark.examples.S2EExample--master spark://master:7077--jars. /elasticsearch-hadoop-2.3.2/dist/elasticsearch-spark_2.10-2.3.2.jar.. /temp/esspark-0.0.1-snapshot.jar