This article mainly describes how to build spark applications using MAVEN, and can also be used in other cmdline Java applications. Project structure
/
/conf/configuration file
/libs/Dependency Pack
/bin/Start Command Script
/logs/log
/data/data
/src/Source Code
/pom.xml
/assembly.xml Project Pom.xml
<?xml version= "1.0" encoding= "UTF-8"?> <project "xmlns=" xmlns: Xsi= "Http://www.w3.org/2001/XMLSchema-instance" xsi:schemalocation= "http://maven.apache.org/POM/4.0.0 http:// Maven.apache.org/xsd/maven-4.0.0.xsd "> <modelVersion>4.0.0</modelVersion> <groupid>x
.x.x</groupid> <artifactId>x</artifactId> <version>1.0.0</version> <properties> <maven.test.skip>false</maven.test.skip> <scala.version>2.11 .7</scala.version> <scala.prefix>2.11</scala.prefix> <spark.version>2.0.2&
lt;/spark.version> </properties> <dependencies> <dependency> <groupId>org.apache.spark</groupId> <artifactid>spark-core_${scala.prefix}</ar
Tifactid> <version>${spark.version}</version> </dependency> <dependency> <groupId>org.apache.spark</groupId> <artifactid>spark-sql_${scala.prefix}</ Artifactid> <version>${spark.version}</version> <scope>provided</s cope> </dependency> <dependency> <groupid>org.apache.spark< ;/groupid> <artifactId>spark-graphx_${scala.prefix}</artifactId> <versio
N>${spark.version}</version> <scope>provided</scope> </dependency> <dependency> <groupId>org.apache.spark</groupId> <ARTIFAC
Tid>spark-streaming_${scala.prefix}</artifactid> <version>${spark.version}</version> <scope>provided</scope> </dependency> </dependencies> <build> <sourceDirectory>src/main/java</sourceDirectory> <plugins> <PLUGIN&G
T <groupId>org.apache.maven.plugins</groupId> <artifactid>maven-compiler-plugin</ar
Tifactid> <configuration> <source>1.8</source>
<target>1.8</target> <encoding>UTF-8</encoding>
<compilerArguments> <extdirs>${project.basedir}/lib</extdirs> </compilerArguments> </configuration> </PLUGIN&G
T
<plugin> <groupId>org.apache.maven.plugins</groupId> <artifactId>maven-jar-plugin</artifactId> <version>2.4</version> <configuration> <archive> <ADDMAVENDESCR Iptor>false</addmavendescriptor> <manifest> ;! --Specifies the external jar call path for MANIFEST.MF, where setting ture indicates that the entry jar and the invoked jar are on the same path, but do not specify Classpathprefix, otherwise it cannot be started, the class is not found --> <addClasspath>true</addClasspath> &L t;! --This controls the relative path of the main entry jar packet when it calls each other--> <!--<classpathprefix>libs/</classpathprefix >--> <classpathPrefix></classpathPrefix>
;/manifest> </archive> <excludes> <exclude>${project.basedir}/xml/*</exclude> </excludes> </configuration&
Gt </plugin> <!--have this plugin to create a new Scala class in order to mix--> <plugin>
;groupid>org.scala-tools</groupid> <artifactId>maven-scala-plugin</artifactId> <version>2.15.1</version> <executions> &L T;execution> <id>scala-compile-first</id> <phase >process-resources</phase> <goals> <!--<g
Oal>add-source</goal>--> <goal>compile</goal>
</goals> </execution> <execution> <goals> <goal>compile</goal> < /goals> </execution> </executions> <con Figuration> <scalaVersion>${scala.version}</scalaVersion> </ configuration> </plugin> <plugin> <groupid>org.c
Odehaus.mojo</groupid> <artifactId>build-helper-maven-plugin</artifactId> <version>1.8</version> <executions> <execution&
Gt
<id>add-source</id> <phase>generate-sources</phase>
<goals> <goal>add-source</goal> </goals> <configuration> <sources> <source>src/main/java</source> <source >src/main/scala</source> </sources> </config uration> </execution> </executions> </plugin
> <plugin> <groupId>org.apache.maven.plugins</groupId> <artifactId>maven-resources-plugin</artifactId> <version>2.2</version&
Gt <configuration> <encoding>UTF-8</encoding> </configuratio N> </plugin> <plugin> <groupid>org.apache.maven.Plugins</groupid> <artifactId>maven-assembly-plugin</artifactId> <version>2.4</version> <configuration> <appendassemblyid >false</appendAssemblyId> <descriptors> <descriptor&
Gt;${project.basedir}/assembly.xml</descriptor> </descriptors>
<archive> <addMavenDescriptor>false</addMavenDescriptor> <manifest> <!--Specify the external jar call path for MANIFEST.MF, where setting ture represents the entry jar and the jar being invoked on the same path--&
Gt <!--but do not specify Classpathprefix, otherwise cannot start, cannot find class--> <ADDCLASSPATH>TRUE</ADDCLASSPATH&G
T <!--here Control the relative path--> < when the main entry jar packet is invoked to each otherClasspathprefix></classpathprefix> </manifest> </arc hive> </configuration> <executions> <ex Ecution> <id>make-assembly</id> <phase>packag
E</phase> <goals> <goal>single</goal>
<!--<goal>assembly</goal>--> </goals>
</execution> </executions> </plugin>
</plugins> </build> <repositories> <repository>
<id>aliyun</id> <url>http://maven.aliyun.com/nexus/content/groups/public</url> <snapshots> <enabled>true</enabled> </snapshots> </repos
itory> </repositories> <pluginRepositories> <pluginRepository> <id>aliyun</id> <url>http://maven.aliyun.com/nexus/content/groups/public</url&
Gt
<snapshots> <enabled>false</enabled> </snapshots>
</pluginRepository> </pluginRepositories> </project>
Packing Assembly.xml
<assembly> <id>bin</id> <formats> <format>zip</format> </format s> <includeBaseDirectory>false</includeBaseDirectory> <dependencySets> <dependency Set> <outputDirectory>/${project.artifactId}/libs</outputDirectory> <!--If there is a third party ja R Pack, preferably import the local MAVEN library and then pack--> <useProjectArtifact>false</useProjectArtifact> <!--don't play
Fatjar Package--> <unpack>false</unpack> </dependencySet> </dependencySets> <fileSets> <!--a jar file compiled from the project itself, packaged into the root directory of the zip file--> <fileSet> <directory& Gt;${project.build.directory}</directory> <OUTPUTDIRECTORY>/${PROJECT.ARTIFACTID}/LIBS</OUTPUTD Irectory> <includes> <!--<include>${project.artifactid}</include>-->
; <inClude>*.jar</include> </includes> </fileSet> <!--package The documentation related to the project into a zip file The root directory--> <fileSet> <directory>${project.basedir}</directory> <OUTP Utdirectory>/${project.artifactid}/bin</outputdirectory> <includes> <include >start.sh</include> <include>*.sh</include> </includes> ;/fileset> <!--the documentation related to the project, packaged into the root directory of the zip file--> <fileSet> <directory>${projec
T.basedir}/conf</directory> <outputDirectory>/${project.artifactId}/conf</outputDirectory>
</fileSet> <fileSet> <directory>${project.basedir}/libs</directory> <outputDirectory>/${project.artifactId}/libs</outputDirectory> </fileSet> <f Ileset> ≪directory>${project.basedir}/logs</directory> <OUTPUTDIRECTORY>/${PROJECT.ARTIFACTID}/LOGS&L t;/outputdirectory> </fileSet> <fileSet> <directory>${project.basedir}/da Ta</directory> <outputDirectory>/${project.artifactId}/data</outputDirectory> </f ileset> <fileSet> <directory>${project.basedir}/bin</directory> <o utputdirectory>/${project.artifactid}/bin</outputdirectory> </fileSet> </fileSets> </a
Ssembly>
Startup script
dir=$ (CD ' DirName $; cd.. pwd)
cd $DIR
spark-submit \
--master
yarn\--deploy-mode client\# Such a project structure can only be submitted in client mode
--name increasedatabuild\
--executor-memory 16G \
--total-executor-cores 120 \
--class xxx.xx.xx.xx\
--conf "spark.hadoop.mapreduce.input.fileinputformat.split.minsize=107374182" \
--jars $ (ls libs/*.jar |grep-v "xxx-xx" | tr ' ",") \
./libs/xxx-xx-1.0.0.jar