- Download Hadoop
- Http://hadoop.apache.org/releases.html-http://mirrors.tuna.tsinghua.edu.cn/apache/hadoop/common/ Hadoop-2.6.5/hadoop-2.6.5.tar.gz
- Install HADOOP, configure Hadoop_home, put ${hadoop_home}/bin in Path
- Download Spark
- Http://spark.apache.org/downloads.html-https://d3kbcqa49mib13.cloudfront.net/ Spark-2.0.2-bin-hadoop2.6.tgz note matches with Hadoop version
- Install, configure Spark_home, put ${spark_home}/bin in Path
- Winutils.exe is not found when running the Spark program
- Download Https://github.com/srccodes/hadoop-common-2.2.0-bin.git and put it under ${hadoop_home}/bin
- Run-time settings can be run locally
- Spark Example:
Localsparkcontext.scala
- Import Org.apache.spark. {sparkconf, Sparkcontext}
- Import Org.scalatest._
- Trait Localsparkcontext extends Beforeandafterall {
- Self:suite =
- @transient var Sc:sparkcontext = _
- Override Def Beforeall () {
- Val conf = new sparkconf ()
- . Setmaster ("local[2]")
- . Setappname ("test")
- sc = new Sparkcontext (conf)
- }
- Override Def afterall () {
- if (sc! = null) {
- Sc.stop ()
- }
- }
- }
Sparkwcsuit.scala
- Import Org.apache.spark.rdd.RDD
- Import Org.apache.spark.sql. {Row, SqlContext}
- Import Org.apache.spark.util.LongAccumulator
- Import Org.scalatest.FunSuite
- Import Tool. Localsparkcontext
- Import Algos. {mergedpctr, pctrutils}
- Class Sparkwcsuit extends Funsuite with Localsparkcontext {
- Rdd WordCount
- Test ("Test Rdd WC") {
- Sc.setloglevel ("ERROR")
- Val Rdd = Sc.makerdd (Seq ("A", "B", "B"))
- Val res = Rdd.map ((_, 1)). Reducebykey (_ + _). Collect (). Sorted
- ASSERT (res = = = = Array ("A", 1), ("B", 2)))
- }
- }
Build.sbt
- Name: = "Doc_rank"
- Version: = "1.0"
- Scalaversion: = "2.10.5"
- Librarydependencies + = "Org.apache.spark"% "spark-core_2.10"% "2.0.2"
- Librarydependencies + = "Org.apache.spark"% "spark-mllib_2.10"% "2.0.2"
- Librarydependencies + = "commons-cli"% "commons-cli"% "1.2"
- Librarydependencies ++= Seq (
- "ORG.SCALANLP" percent "breeze"% "0.11.2",
- "ORG.SCALANLP" percent "breeze-natives"% "0.11.2",
- "ORG.SCALANLP" percent "Breeze-viz"% "0.11.2"
- )
- Librarydependencies ++= Seq (
- "Org.apache.hadoop"% "Hadoop-core"% "2.6.0-mr1-cdh5.4.4",
- "Org.apache.hbase"% "hbase-client"% "1.0.0-cdh5.4.4",
- "Org.apache.hbase"% "Hbase-common"% "1.0.0-cdh5.4.4",
- "Org.apache.hbase"% "hbase-server"% "1.0.0-cdh5.4.4",
- "Org.apache.hbase"% "Hbase-protocol"% "1.0.0-cdh5.4.4"
- )
- resolvers + = "Akka Repository" at "http://repo.akka.io/releases/";
- resolvers + = "cloudera-repo-releases" at "https://repository.cloudera.com/artifactory/repo/";
- resolvers ++= Seq (
- "Sonatype snapshots" at "https://oss.sonatype.org/content/repositories/snapshots/";
- "Sonatype releases" at "https://oss.sonatype.org/content/repositories/releases/";
- )
- Hadoop sample
Directory structure: src/├──main│ ├──java│ │ ├──io│ │ │ └──longwind│ │ │ └──mapreduce│ │ │ ├──main│ │ │ │ └──Main.java│ │ │ ├──mapreduce│ │ │ │ └── infoiduniquer.java│ │ │ └──utils│ │ │ ├──constant.java│ │ │ └──hadooputils.java│ │ └── org│ │ └──apache│ │ └──hadoop│ │ ├──io│ │ │ └──nativeio│ │ │ └──NativeIO.java│ │ └──mapred│ │ &nbSp; ├──clientcache.java│ │ ├──ClientServiceDelegate.java│ │ ├──NotRunningJob.java│ │ ├──resourcemgrdelegate.java│ │ ├──yarnclientprotocolprovider.java│ │ └──yarnrunner.java│ └──resources│ └── Log4j.properties└──test ├──java │ └──test └── Key dependencies in Resources └──log4j.properties pom.xml <dependency ><groupId>org.apache.hadoop</groupId><artifactId>hadoop-common</artifactId>< Version>2.6.0-cdh5.4.4</version></dependency> <dependency><groupid> Org.apache.hadoop</groupid><artifactid>hadoop-mapreduce-client-core</artifactid><version >2.6.0-cdh5.4.4</version></dependency> <dependency><groupid>org.apache.hadoop </groupid><artifactid>hadoop-mapreduce-client-common</artifactid><version>2.6.0- cdh5.4.4</version></dependency> code aspect: The above directory structure shows the org.apache.hadoop.* those are copied from the Hadoop source code package, Note that the 2.6.0-cdh5.4.4 version of the program to run error ACCESS0, if it is nativeio.java that should be a permissions issue, you need to manually modify the public static Boolean in Nativeio.java Access (String path, Accessright desiredacceSS) throws IOException { return true;//modified //return access0 (path, Desiredaccess.accessright ());//Pre-modification}
In this way, can be in Windows local, easy to Hadoop, spark development debugging, by the way Mrunit is not very strong, the problem is generally version, package conflicts, permissions. Reference:
- The MapReduce operating environment on the--windows of Hirano http://www.cnblogs.com/tq03/p/5101916.html
- On the way forward http://blog.csdn.net/congcong68/article/details/42043093--access0 problem solving
- XUWEIMDM http://blog.csdn.net/u011513853/article/details/52865076--Spark on Windows
Windows Local build Hadoop-spark Runtime Environment (hadoop-2.6, spark2.0)