Spark on yarn submit task error, sparkyarn
Application ID is application_1481285758114_422243, trackingURL: http: // ***: 4040
Exception in thread "main" org. apache. hadoop. mapred. InvalidInputException: Input path does not exist: hdfs: // mycluster-tj/user/engine_arch/data/mllib/sample_svlibm_data.txt
At org. apache. hadoop. mapred. FileInputFormat. singleThreadedListStatus (FileInputFormat. java: 287)
At org. apache. hadoop. mapred. FileInputFormat. listStatus (FileInputFormat. java: 229)
At org. apache. hadoop. mapred. FileInputFormat. getSplits (FileInputFormat. java: 315)
At org. apache. spark. rdd. HadoopRDD. getPartitions (hadoop. scala: 199)
At org. apache. spark. rdd. RDD $ anonfun $ partitions $2. apply (RDD. scala: 239)
At org. apache. spark. rdd. RDD $ anonfun $ partitions $2. apply (RDD. scala: 237)
At scala. Option. getOrElse (Option. scala: 120)
At org. apache. spark. rdd. RDD. partitions (RDD. scala: 237)
At org. apache. spark. rdd. MapPartitionsRDD. getPartitions (MapPartitionsRDD. scala: 35)
At org. apache. spark. rdd. RDD $ anonfun $ partitions $2. apply (RDD. scala: 239)
At org. apache. spark. rdd. RDD $ anonfun $ partitions $2. apply (RDD. scala: 237)
At scala. Option. getOrElse (Option. scala: 120)
At org. apache. spark. rdd. RDD. partitions (RDD. scala: 237)
At org. apache. spark. rdd. MapPartitionsRDD. getPartitions (MapPartitionsRDD. scala: 35)
At org. apache. spark. rdd. RDD $ anonfun $ partitions $2. apply (RDD. scala: 239)
At org. apache. spark. rdd. RDD $ anonfun $ partitions $2. apply (RDD. scala: 237)
At scala. Option. getOrElse (Option. scala: 120)
At org. apache. spark. rdd. RDD. partitions (RDD. scala: 237)
At org. apache. spark. rdd. MapPartitionsRDD. getPartitions (MapPartitionsRDD. scala: 35)
At org. apache. spark. rdd. RDD $ anonfun $ partitions $2. apply (RDD. scala: 239)
At org. apache. spark. rdd. RDD $ anonfun $ partitions $2. apply (RDD. scala: 237)
At scala. Option. getOrElse (Option. scala: 120)
At org. apache. spark. rdd. RDD. partitions (RDD. scala: 237)
At org. apache. spark. rdd. MapPartitionsRDD. getPartitions (MapPartitionsRDD. scala: 35)
At org. apache. spark. rdd. RDD $ anonfun $ partitions $2. apply (RDD. scala: 239)
At org. apache. spark. rdd. RDD $ anonfun $ partitions $2. apply (RDD. scala: 237)
At scala. Option. getOrElse (Option. scala: 120)
At org. apache. spark. rdd. RDD. partitions (RDD. scala: 237)
At org. apache. spark. rdd. MapPartitionsRDD. getPartitions (MapPartitionsRDD. scala: 35)
At org. apache. spark. rdd. RDD $ anonfun $ partitions $2. apply (RDD. scala: 239)
At org. apache. spark. rdd. RDD $ anonfun $ partitions $2. apply (RDD. scala: 237)
At scala. Option. getOrElse (Option. scala: 120)
At org. apache. spark. rdd. RDD. partitions (RDD. scala: 237)
At org. apache. spark. SparkContext. runJob (SparkContext. scala: 1994)
At org. apache. spark. rdd. RDD $ anonfun $ reduce $1. apply (RDD. scala: 1025)
At org. apache. spark. rdd. RDDOperationScope $. withScope (RDDOperationScope. scala: 150)
At org. apache. spark. rdd. RDDOperationScope $. withScope (RDDOperationScope. scala: 111)
At org. apache. spark. rdd. RDD. withScope (RDD. scala: 316)
At org. apache. spark. rdd. RDD. reduce (RDD. scala: 1007)
At org. apache. spark. mllib. util. MLUtils $. loadLibSVMFile (MLUtils. scala: 105)
At org. apache. spark. mllib. util. MLUtils $. loadLibSVMFile (MLUtils. scala: 134)
At org. apache. spark. ml. source. libsvm. LibSVMRelation. buildScan (LibSVMRelation. scala: 49)
At org.apache.spark. SQL .exe cution. CES. performancestrategy $. apply (performancestrategy. scala: 135)
At org. apache. spark. SQL. catalyst. planning. QueryPlanner $ anonfun $1. apply (QueryPlanner. scala: 58)
At org. apache. spark. SQL. catalyst. planning. QueryPlanner $ anonfun $1. apply (QueryPlanner. scala: 58)
At scala. collection. Iterator $ anon $13. hasNext (Iterator. scala: 371)
At org. apache. spark. SQL. catalyst. planning. QueryPlanner. plan (QueryPlanner. scala: 59)
At org. apache. spark. SQL. catalyst. planning. QueryPlanner. planLater (QueryPlanner. scala: 54)
At org.apache.spark. SQL .exe cution. SparkStrategies $ BasicOperators $. apply (SparkStrategies. scala: 336)
At org. apache. spark. SQL. catalyst. planning. QueryPlanner $ anonfun $1. apply (QueryPlanner. scala: 58)
At org. apache. spark. SQL. catalyst. planning. QueryPlanner $ anonfun $1. apply (QueryPlanner. scala: 58)
At scala. collection. Iterator $ anon $13. hasNext (Iterator. scala: 371)
At org. apache. spark. SQL. catalyst. planning. QueryPlanner. plan (QueryPlanner. scala: 59)
At org.apache.spark. SQL .exe cution. QueryExecution. sparkPlan $ lzycompute (QueryExecution. scala: 47)
At org.apache.spark. SQL .exe cution. QueryExecution. sparkPlan (QueryExecution. scala: 45)
At org.apache.spark. SQL .execution.QueryExecution.exe cutedPlan $ lzycompute (QueryExecution. scala: 52)
At org.apache.spark. SQL .execution.QueryExecution.exe cutedPlan (QueryExecution. scala: 52)
At org.apache.spark. SQL .exe cution. QueryExecution. toRdd $ lzycompute (QueryExecution. scala: 55)
At org.apache.spark. SQL .exe cution. QueryExecution. toRdd (QueryExecution. scala: 55)
At org. apache. spark. SQL. DataFrame. rdd $ lzycompute (DataFrame. scala: 1638)
At org. apache. spark. SQL. DataFrame. rdd (DataFrame. scala: 1635)
At org. apache. spark. SQL. DataFrame. map (DataFrame. scala: 1411)
At org. apache. spark. ml. feature. StandardScaler. fit (StandardScaler. scala: 90)
At com. xiaoju. arch. engine. spark. SparkDD. buildStandardScaler (SparkDD. java: 149)
At com. xiaoju. arch. engine. spark. StandardScalerDemo. main (StandardScalerDemo. java: 13)
At sun. reflect. NativeMethodAccessorImpl. invoke0 (Native Method)
At sun. reflect. NativeMethodAccessorImpl. invoke (NativeMethodAccessorImpl. java: 62)
At sun. reflect. DelegatingMethodAccessorImpl. invoke (DelegatingMethodAccessorImpl. java: 43)
At java. lang. reflect. Method. invoke (Method. java: 498)
At org. apache. spark. deploy. SparkSubmit $. org $ apache $ spark $ deploy $ SparkSubmit $ runMain (SparkSubmit. scala: 731)
At org. apache. spark. deploy. SparkSubmit $. doRunMain $1 (SparkSubmit. scala: 181)
At org. apache. spark. deploy. SparkSubmit $. submit (SparkSubmit. scala: 206)
At org. apache. spark. deploy. SparkSubmit $. main (SparkSubmit. scala: 121)
At org. apache. spark. deploy. SparkSubmit. main (SparkSubmit. scala)
Spark reads files. In yarn mode, files must be stored in hdfs. Otherwise, an error will be reported. This problem is not found in standalone mode.