package wordcount import org.apache.spark.SparkConf import org.apache.spark.SparkContext import org.apache.spark.rdd.RDD object wordfcount { def main(args: Array[String]): Unit = { val conf = new SparkConf() conf.setMaster("local").setAppName("count") val sc=new SparkContext(conf) val lines:RDD[String]=sc.textFile("./words") val words:RDD[String]=lines.flatMap(line=>{ line.split(" ") }) val paireords:RDD[(String,Int)]=words.map(word=>{ new Tuple2(word,1) }) val reduce=paireords.reduceByKey((v1:Int,v2:Int)=>{v1+v2}) val result=reduce.sortBy(tuple=>{ tuple._2//按第二位进行降序排序 },false) result.foreach(tuple=>{ println(tuple) }) sc.stop() } }
算子
package wordcount import org.apache.spark.SparkConf import org.apache.spark.SparkContext import org.apache.spark.rdd.RDD object wordfcount { def main(args: Array[String]): Unit = { val conf = new SparkConf() conf.setMaster("local").setAppName("count") val sc=new SparkContext(conf) val lines:RDD[String]=sc.textFile("./words") //true 代表有无放回 放回 val result=lines.sample(true, 0.1) result.foreach(println) println("**************************************") //过滤算子 lines.filter(s=>{ s.equals("hello zhazha") }).foreach(println); println("**************************************") //将worker端计算的结果回收到Driver端 lines.collect().foreach(println) sc.stop() } }
奥利给
以后多多交流
记住这个网站了
我也是学计算机的
看了那么多博客,就你的能看懂
奥利给
可以