[Java]
Key code of Hadoop Map/Reduce Execution Process
JobClient. runJob (conf) | run the job
| --> JobClient jc = new JobClient (job );
| --> RunningJob rj = jc. submitJob (job );
| --> SubmitJobInternal (job );
| --> Int reduces = job. getNumReduceTasks ();
| --> JobContext context = new JobContext (job, jobId );
| --> Maps = writeOldSplits (job, submitSplitFile );
| --> Job. setNumMapTasks (maps );
| --> Job. writeXml (out );
| --> JobStatus status = jobSubmitClient. submitJob (jobId );
JobTracker. submitJob (JobId) | submit a job
| --> JobInProgress job = new JobInProgress (jobId, this, this. conf );
| --> CheckAccess (job, QueueManager. QueueOperation. SUBMIT_JOB); | check permissions
| --> CheckMemoryRequirements (job); | checks memory requirements
| --> AddJob (jobId, job); | add to job Queue
| --> Jobs. put (job. getProfile (). getJobID (), job );
| --> For (JobInProgressListener listener: jobInProgressListeners) | added to the listener for scheduling
| --> Listener. jobAdded (job );
JobTracker. heartbeat () | after JobTracker is started, TaskTracker calls it in RPC mode and returns the Response set.
| --> List <TaskTrackerAction> actions = new ArrayList <TaskTrackerAction> ();
| --> Tasks = taskScheduler. assignTasks (taskTrackerStatus); | select the appropriate tasks through the Scheduler
| --> For (Task task: tasks)
| --> ExpireLaunchingTasks. addNewTask (task. getTaskID ());
| --> Actions. add (new LaunchTaskAction (task); | the actual actions will also add commmitTask and so on.
| --> Response. setHeartbeatInterval (nextInterval );
| --> Response. setActions (actions. toArray (new TaskTrackerAction [actions. size ()]);
| --> Return response;
TaskTracker. offerService | after TaskTracker is started, heartbeat is continuously sent to JobTracker through offerservice ().
| --> TransmitHeartBeat ()
| --> HeartbeatResponse heartbeatResponse = jobClient. heartbeat (status, justStarted, justInited, askForNewTask, heartbeatResponseId );
| --> TaskTrackerAction [] actions = heartbeatResponse. getActions ();
| --> For (TaskTrackerAction action: actions)
| --> If (action instanceof LaunchTaskAction)
| --> AddToTaskQueue (LaunchTaskAction) action); | add to the execution Queue, add according to map/reduce task
| --> If (action. getTask (). isMapTask ()){
| --> MapLauncher. addToTaskQueue (action );
| --> TaskInProgress tip = registerTask (action, this );
| --> TasksToLaunch. add (tip );
| --> TasksToLaunch. policyall (); | wake up the blocked Process
| --> Else
| --> ReduceLauncher. addToTaskQueue (action );
TaskLauncher. run ()
| --> While (tasksToLaunch. isEmpty ())
| --> TasksToLaunch. wait ();
| --> Tip = tasksToLaunch. remove (0 );
| --> StartNewTask (tip );
| --> LocalizeJob (tip );
| --> LaunchTaskForJob (tip, new JobConf (rjob. jobConf ));
| --> Tip. setJobConf (jobConf );
| --> Tip. launchTask (); | TaskInProgress. launchTask ()
| --> This. runner = task. createRunner (TaskTracker. this, this); | differentiate between map and reduce
| --> This. runner. start ();
MapTaskRunner. run () | executes MapTask
| --> File workDir = new File (lDirAlloc. getLocalPathToRead () | prepare the execution path
| --> String jar = conf. getJar (); | prepare the jar package
| --> File jvm = new File (System. getProperty ("java. home"), "bin"), "java"); | get jvm
| --> Vargs. add (Child. class. getName (); | add a parameter. The Child class is started as the main function.
| --> Tracker. addToMemoryManager (t. getTaskID (), t. isMapTask (), conf, pidFile); | add to memory management
| --> JvmManager. launchJvm (this, jvmManager. constructJvmEnv (setup, vargs, stdout, stderr, logSize, | integrated into the jvm manager and started
WorkDir, env, pidFile, conf ));
| --> MapJvmManager. reapJvm (t, env); | differentiate map/reduce operations
JvmManager. reapJvm () |
| --> While (jvmIter. hasNext ())
| --> JvmRunner jvmRunner = jvmIter. next (). getValue ();
| --> JobID jId = jvmRunner. jvmId. getJobId ();
| --> SetRunningTaskForJvm (jvmRunner. jvmId, t );
| --> SpawnNewJvm (jobId, env, t );
| --> JvmRunner jvmRunner = new JvmRunner (env, jobId );
| --> JvmIdToRunner. put (jvmRunner. jvmId, jvmRunner );
| --> JvmRunner. start (); | run the run () method of JvmRunner.
| --> JvmRunner. run ()
| --> RunChild (env );
| --> List <String> wrappedCommand = TaskLog. captureOutAndError (env. setup, env. vargs, env. stdout, env. stderr,
Env. logSize, env. pidFile); | select the main function.
| --> Shexec.exe cute (); | run
| --> Int exitCode = shexec. getExitCode (); | obtains the execution status value.
| --> UpdateOnJvmExit (jvmId, exitCode, killed); | updates the Jvm status.
Child. main () execute Task (map/reduce)
| --> JVMId jvmId = new JVMId (firstTaskid. getJobID (), firstTaskid. isMap (), jvmIdInt );
| --> TaskUmbilicalProtocol umbilical = (TaskUmbilicalProtocol) RPC. getProxy (TaskUmbilicalProtocol. class,
TaskUmbilicalProtocol. versionID, address, defaultConf );
| --> While (true)
| --> JvmTask myTask = umbilical. getTask (jvmId );
| --> Task = myTask. getTask ();
| --> Taskid = task. getTaskID ();
| --> TaskRunner. setupWorkDir (job );
| --> Task. run (job, umbilical); | take maptask as an Example
| --> TaskReporter reporter = new TaskReporter (getProgress (), umbilical );
| --> If (useNewApi)
| --> RunNewMapper (job, split, umbilical, reporter );
| --> Else
| --> RunOldMapper (job, split, umbilical, reporter );
| --> InputSplit = (InputSplit) ReflectionUtils. newInstance (job. getClassByName (splitClass), job );
| --> MapRunnable <INKEY, INVALUE, OUTKEY, OUTVALUE> runner = ReflectionUtils. newInstance (job. getMapRunnerClass (), job );
| --> Runner. run (in, new OldOutputCollector (collector, conf), reporter );
MapRunner. run ()
| --> K1 key = input. createKey ();
| --> V1 value = input. createValue ();
| --> While (input. next (key, value ))
| --> Mapper. map (key, value, output, reporter );
| --> If (incrProcCount)
| --> Reporter. incrCounter (SkipBadRecords. COUNTER_GROUP,
| --> SkipBadRecords. COUNTER_MAP_PROCESSED_RECORDS, 1 );
| --> Mapper. close ();