1. Installing the SDK
Yum-y Install Unzip
Yum-y Install Zip
Curl-s "Https://get.sdkman.io" | Bash
Execute under new terminal: Source "$HOME/.sdkman/bin/sdkman-init.sh"
The check is sufficient to install successfully:
(1) SDK version
(2) SDK Help
Supplemental Removal SDK
Tar zcvf ~/sdkman-backup_$ (date +%f-%kh%m). tar.gz-c ~/. Sdkman
RM-RF ~/.sdkman
2. Installing Gradle
SDK install Gradle3. Download Es-hadoop
Cd/data/tools
git clone https://github.com/elastic/elasticsearch-hadoop.git
4. Compiling Es-hadoop
Cd/data/tools/elasticsearch-hadoop
VI gradle.properties
+hadoopversion 2.6.0
+hiveversion 1.1.0
+sparkversion 2.1.0
./gradlew Distzip
5.
CP Elasticsearch-hadoop-7.0.0-alpha1-snapshot.jar/opt/cloudera/parcels/cdh/lib/hive/lib
SCP Elasticsearch-hadoop-7.0.0-alpha1-snapshot.jar Root@ctdn-1:/opt/cloudera/parcels/cdh/lib/hive/lib
6.
Reference:
Https://github.com/elastic/elasticsearch-hadoop
Https://www.elastic.co/guide/en/elasticsearch/hadoop/current/hive.html#hive
Https://www.elastic.co/guide/en/elasticsearch/hadoop/current/configuration.html
hive> add Jar/opt/cloudera/parcels/cdh/lib/hive/lib/elasticsearch-hadoop-7.0.0-alpha1-snapshot.jar;
CREATE EXTERNAL TABLE Ext_es_org_info (
' OrgID ' string,
' Investorg ' string,
' OrgName ' string,
' Logo ' string,
' Weburl ' string,
' Orgdesc ' string,
' Founddate ' string,
' District ' string,
' Investtotal ' int,
' Investstage ' string,
' Prov ' string,
' City ' string,
' Focusdomain ' string,
' Investproj ' string,
' Investamount ' string)
STORED by ' Org.elasticsearch.hadoop.hive.EsStorageHandler '
Tblproperties (
' Es.nodes ' = ' 10.11.8.32:9200 ',
' Es.index.auto.create ' = ' true ',
' Es.resource ' = ' org/org_info ',
' Es.mapping.id ' = ' orgid ',
' Es.mapping.names ' = ' investorg:investorg,
Orgname:orgname,
Logo:logo,
Weburl:weburl,
Orgdesc:orgdesc,
Founddate:founddate,
District:district,
Investtotal:investtotal,
Investstage:investstage,
Prov:prov,
City:city,
Focusdomain:focusdomain,
Investproj:investproj,
Investamount:investamount ');
SET hive.mapred.reduce.tasks.speculative.execution = false;
SET mapreduce.map.speculative = false;
SET mapreduce.reduce.speculative = false;
INSERT Overwrite TABLE ext_es_org_info
SELECT OrgID
, investorg
, OrgName
, logo
, Weburl
, Orgdesc
, Founddate
, District
, Investtotal
, Investstage
, Prov
, City
, Focusdomain
, investproj
, Investamount
From Es_org_info;
Curl-xget HTTP://10.11.8.32:9200/YELPINDEX/1
7.
Cd/opt/cloudera/parcels/cdh/lib/hive/conf
VI Hive-site.xml
+
<property>
<name>hive.aux.jars.path</name>
<value>/opt/cloudera/parcels/cdh/lib/hive/lib/elasticsearch-hadoop-7.0.0-alpha1-snapshot.jar</value >
<description>a Comma separated list (with no spaces) of the jar files</description>
</property>
SCP Hive-site.xml root@ctdn-6:/opt/cloudera/parcels/cdh/lib/hive/conf
Hive-site.xml
Curl Xget Http://10.11.8.32:9200/yelpindex/yelp/_search?q=id:1
Reference:
Https://www.elastic.co/guide/en/elasticsearch/client/python-api/current/index.html
Https://github.com/medcl/elasticsearch-analysis-ik
Https://github.com/elastic/elasticsearch-py
Http://qbox.io/blog/elasticsearch-in-apache-spark-python
Https://www.yelp.com/dataset
http://blog.csdn.net/xmo_jiao/article/details/73251937
Https://www.elastic.co/guide/en/elasticsearch/reference/6.1/query-dsl-mlt-query.html