将master设置为local[3]的时候,上传到服务器正常运行。但是将master设置为spark://协议的时候就会在调用计数器方法的时候报空指针异常。这是为什么呢?
其次,当我在本地运行时,计数器运行结果总是不一致。但是打包后又一致了。这是为什么呢?
package com.learning.cases.hoteldata import com.learning.cases.hoteldata.Clear.counter import org.apache.hadoop.conf.Configuration import org.apache.hadoop.fs.{FileSystem, Path} import org.apache.spark.{SparkConf, SparkContext} object Clear2 extends App { System.setProperty("HADOOP_USER_NAME","root") FileSystem.get(new Configuration()).delete(new Path("hdfs://hadoop1:8020/casepro/hoteldata/hotelsparktask2"),true) // val conf = new SparkConf().setAppName("Clear2").setMaster("spark://hadoop1:7077") val conf = new SparkConf().setAppName("Clear2").setMaster("local[3]") val sc = new SparkContext(conf) val counter = sc.longAccumulator val rdd = sc.textFile("hdfs://hadoop1:8020/casepro/hoteldata/hoteldata.csv") rdd.filter(t=>{ val fields = t.split(",") val stars = fields(6) // 星级 val comments = fields(11) // 评论数 val scores = fields(10) // 评分 if(stars.equalsIgnoreCase("null") || comments.equalsIgnoreCase("null") || scores.equalsIgnoreCase("null") ) { counter.add(1) false } else { true } }).cache().saveAsTextFile("hdfs://hadoop1:8020/casepro/hoteldata/hotelsparktask2") println("---------------删除条目数:"+counter.value+"---------------------") sc.stop() }