1. Read from MongoDB
Package Com.mongodb.spark;
Import Org.apache.spark.api.java.JavaSparkContext;
Import org.apache.spark.sql.SparkSession;
Import org.bson.Document;
Import Com.mongodb.spark.MongoSpark;
Import Com.mongodb.spark.rdd.api.java.JavaMongoRDD; Public final class Readfrommongodb {public static void main (final string[] args) throws Interruptedexception {Spark Session spark = Sparksession.builder (). Master ("local"). AppName ("Mongosparkconnectorintro"). config (" Spark.mongodb.input.uri "," Mongodb://172.28.34.xxx:27117/wangzs.zhaopin "). config (" Spark.mongodb.output.uri ","
Mongodb://172.28.34.xxx:27117/wangzs.sparkmongo "). Getorcreate (); Create a javasparkcontext using the Sparksession ' s Sparkcontext object Javasparkcontext JSC = new Javasparkcontext (sp
Ark.sparkcontext ()); /* Start example:read data from MongoDB ************************/javamongordd<document> rdd = mongospark.load (js
c); /* End Example **************************************************///Analyze Data from MongoDB System.out.println (Rdd.count ());
System.out.println (Rdd.first (). Tojson ());
Jsc.close ();
}
}
2. Write to MongoDB
Package Com.mongodb.spark;
Import Com.mongodb.spark.MongoSpark;
Import Com.mongodb.spark.config.WriteConfig;
Import Org.apache.spark.api.java.JavaRDD;
Import Org.apache.spark.api.java.JavaSparkContext;
Import org.apache.spark.api.java.function.Function;
Import org.apache.spark.sql.SparkSession;
Import org.bson.Document;
Import static java.util.Arrays.asList;
Import Java.util.HashMap;
Import Java.util.Map; Public final class Writetomongodbwriteconfig {public static void main (final string[] args) throws Interruptedexception {sparksession spark = Sparksession.builder (). Master ("local"). AppName ("Mongosparkconnectorintro"). config ("Spark.mo Ngodb.input.uri "," Mongodb://172.28.34.xxx:27117/wangzs.zhaopin "). config (" Spark.mongodb.output.uri "," mongodb://
172.28.34.xxx:27117/wangzs.sparkmongo "). Getorcreate ();
Javasparkcontext JSC = new Javasparkcontext (Spark.sparkcontext ()); Create a custom Writeconfig map<string, string> writeoverrides = new Hashmap<strinG, string> ();
Writeoverrides.put ("collection", "spark");
Writeoverrides.put ("WRITECONCERN.W", "majority");
Writeconfig writeconfig = writeconfig.create (JSC). Withoptions (Writeoverrides); Create a RDD of the documents javardd<document> sparkdocuments = Jsc.parallelize (Aslist (1, 2, 3, 4, 5, 6, 7, 8,
9)). Map (new Function<integer, document> () {Public Document call (final Integer i) throws Exception {
Return Document.parse ("{spark:" + i + ", Name:" + i + "}");
}
});
/* Start Example:save data from RDD to MongoDB *****************/mongospark.save (sparkdocuments, writeconfig);
/* End Example **************************************************/jsc.close ();
}
}
3. Polymerization
Package Com.mongodb.spark;
Import Org.apache.spark.api.java.JavaSparkContext;
Import org.apache.spark.sql.SparkSession;
Import org.bson.Document;
Import Com.mongodb.spark.MongoSpark;
Import Com.mongodb.spark.rdd.api.java.JavaMongoRDD;
Import static java.util.Collections.singletonList; Public final class Aggregation {public static void main (final string[] args) throws Interruptedexception {sparksess Ion spark = Sparksession.builder (). Master ("local"). AppName ("Aggregation"). config ("Spark.mongodb.input.uri", " Mongodb://172.28.34.xxx:27117/wangzs.zhaopin "). config (" Spark.mongodb.output.uri "," mongodb://172.28.34.xxx
: 27117/wangzs.sparkmongo "). Getorcreate (); Create a javasparkcontext using the Sparksession ' s Sparkcontext object Javasparkcontext JSC = new Javasparkcontext (sp
Ark.sparkcontext ());
Load and analyze data from MongoDB javamongordd<document> RDD = mongospark.load (JSC); /* Start Example:use aggregation to filter a RDD ***************/JavAmongordd<document> Aggregatedrdd = Rdd. Withpipeline singletonlist (Document.parse ("{$match: {' gzdd ': ' Shanghai-Putuo
' } }"))); /* End Example **************************************************///Analyze data from MongoDB System.out.println (AG
Gregatedrdd.count ());
System.out.println (Aggregatedrdd.collect ());
Jsc.close ();
}
}
4. Datasets and SQL
* * 1/{"_id": ObjectId ("5ae911d3460fcf70c940ac96"), "name": "Bilbo Baggins", "Age": 50.0}/* 2 * * "_id": ObjectId ("5ae911d3460fcf70c940ac97"), "name": "Gandalf", "Age": 1000.0}/* 3/{"_id": Object Id ("5ae911d3460fcf70c940ac98"), "name": "Thorin", "Age": 195.0}/* 4/{"_id": ObjectId ("5AE911D3460FCF 70c940ac99 ")," name ":" Balin "," Age ": 178.0}/* 5/{" _id ": ObjectId (" 5ae911d3460fcf70c940ac9a "),"
Name ":" Kíli "," Age ": 77.0}/* 6/{" _id ": ObjectId (" 5ae911d3460fcf70c940ac9b ")," name ":" Dwalin ", "Age": 169.0} * * 7/{"_id": ObjectId ("5ae911d3460fcf70c940ac9c"), "name": "Óin", "Age": 167.0}/* 8/{"_id": ObjectId ("5ae911d3460fcf70c940ac9d"), "name": "Glóin", "Age": 158.0}/* 9 * * "_id": ObjectId ("5ae911d3460fcf70c940ac9e"), "name": "Fíli", "Age": 82.0}/*/{"_id": ObjectId ("5ae911d346 0fcf70c940ac9f ")," nAme ":" Bombur "}
Package Com.mongodb.spark;
Import java.io.Serializable;
Public final class Character implements Serializable {
private String name;
Private Integer age;
Public String GetName () {return
name;
}
public void SetName (String name) {
this.name = name;
}
Public Integer Getage () {return age
;
}
public void Setage (final Integer age) {
this.age = age;
}
}
Package Com.mongodb.spark;
Import Org.apache.spark.api.java.JavaSparkContext;
Import Org.apache.spark.sql.Dataset;
Import Org.apache.spark.sql.Row;
Import org.apache.spark.sql.SparkSession; Public final class Datasetsqldemo {public static void main (final string[] args) throws Interruptedexception {SparkS Ession spark = Sparksession.builder (). Master ("local"). AppName ("Aggregation"). config ("Spark.mongodb.input.uri", " Mongodb://172.28.34.xxx:27117/wangzs.sparktest "). config (" Spark.mongodb.output.uri "," mongodb://172.28.34.xxx
: 27117/wangzs.sparkmongo "). Getorcreate (); Create a javasparkcontext using the Sparksession ' s Sparkcontext object Javasparkcontext JSC = new Javasparkcontext (sp
Ark.sparkcontext ());
Load data with explicit schema dataset<character> Explicitds = Mongospark.load (JSC). ToDS (Character.class);
Explicitds.printschema ();
Explicitds.show (); Create the temp view and execute the query Explicitds.createorreplacetempview ("characters");
Dataset<row> centenarians = Spark.sql ("SELECT name, age from Characters WHERE age >= 100");
Centenarians.show (); Write the data to the "Hundredclub" collection Mongospark.write (Centenarians). Option ("Collection", "Hundredclub"). Mo
De ("Overwrite"). Save ();
Jsc.close ();
}
}
<project xmlns= "http://maven.apache.org/POM/4.0.0" xmlns:xsi= "Http://www.w3.org/2001/XMLSchema-instance" xsi: schemalocation= "http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd" > < Modelversion>4.0.0</modelversion> <groupId>com.wangzs</groupId> <artifactId> Spark-2.1.0-learn</artifactid> <version>0.0.1-SNAPSHOT</version> <packaging>jar</ Packaging> <name>spark-2.1.0-learn</name> <url>http://maven.apache.org</url> < dependencies> <dependency> <groupId>junit</groupId> <ARTIFACTID>JUNIT</ARTIFACTID&G
T <version>4.12</version> <scope>test</scope> </dependency> <dependency> <!-- Spark Dependency--> <groupId>org.apache.spark</groupId> <artifactid>spark-core_2.11</arti factid> <version>2.1.0</version> </dependency> <depeNdency> <groupId>org.mongodb.spark</groupId> <artifactid>mongo-spark-connector_2.11</ artifactid> <version>2.1.0</version> </dependency> <dependency> <groupid>org.ap Ache.spark</groupid> <artifactId>spark-sql_2.11</artifactId> <version>2.1.0</version > </dependency> </dependencies> <build> <pluginManagement> <plugins> <p Lugin> <groupId>org.apache.maven.plugins</groupId> <artifactid>maven-compiler-plugin</ar
tifactid> <configuration> <source>1.8</source> <target>1.8</target> </configuration> </plugin> <plugin> <groupid>org.apache.maven.plugins</groupid>
; <artifactId>maven-resources-plugin</artifactId> <configuration> <encoding>utf-8</enc Oding> </configuraTion> </plugin> < skip test--> <plugin> <groupid>org.apache.maven.plugins< when packaging!-- ;/groupid> <artifactId>maven-surefire-plugin</artifactId> <configuration> <skiptes ts>true</skiptests> </configuration> </plugin> </plugins> </PLUGINMANAGEMENT&G
T
</build> </project>
5.pom file
<project xmlns= "http://maven.apache.org/POM/4.0.0" xmlns:xsi= "Http://www.w3.org/2001/XMLSchema-instance" xsi: schemalocation= "http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd" > < Modelversion>4.0.0</modelversion> <groupId>com.wangzs</groupId> <artifactId> Spark-2.1.0-learn</artifactid> <version>0.0.1-SNAPSHOT</version> <packaging>jar</ Packaging> <name>spark-2.1.0-learn</name> <url>http://maven.apache.org</url> < dependencies> <dependency> <groupId>junit</groupId> <ARTIFACTID>JUNIT</ARTIFACTID&G
T <version>4.12</version> <scope>test</scope> </dependency> <dependency> <!-- Spark Dependency--> <groupId>org.apache.spark</groupId> <artifactid>spark-core_2.11</arti factid> <version>2.1.0</version> </dependency> <depeNdency> <groupId>org.mongodb.spark</groupId> <artifactid>mongo-spark-connector_2.11</ artifactid> <version>2.1.0</version> </dependency> <dependency> <groupid>org.ap Ache.spark</groupid> <artifactId>spark-sql_2.11</artifactId> <version>2.1.0</version > </dependency> </dependencies> <build> <pluginManagement> <plugins> <p Lugin> <groupId>org.apache.maven.plugins</groupId> <artifactid>maven-compiler-plugin</ar
tifactid> <configuration> <source>1.8</source> <target>1.8</target> </configuration> </plugin> <plugin> <groupid>org.apache.maven.plugins</groupid>
; <artifactId>maven-resources-plugin</artifactId> <configuration> <encoding>utf-8</enc Oding> </configuraTion> </plugin> < skip test--> <plugin> <groupid>org.apache.maven.plugins< when packaging!-- ;/groupid> <artifactId>maven-surefire-plugin</artifactId> <configuration> <skiptes ts>true</skiptests> </configuration> </plugin> </plugins> </PLUGINMANAGEMENT&G
T
</build> </project>