import org.apache.spark.{SparkConf, SparkContext}object Main {def main(args: Array[String]): Unit = {// 学习spark RDD中的转换算子// 1. map// 2. filter:过滤// 3. flatMap:flat(扁平化) + map(映射)// 4. reduceByKey: 键值对的数据(word,1),(hello,1)val conf = new SparkConf().setMaster("local[*]").setAppName("SparkDemo")val sc = new SparkContext(conf)// 创建一个RDD//val rdd = sc.parallelize(List(1,2,3,4,5,6,7,8,9,10))//val rdd1 = rdd.map(x => x * 2)// 使用filter算子,保留偶数//val rdd1 = rdd.filter(x => x % 2 == 0)// 有多个句子,每个句子有多个单词,单词之间使用空格前隔开// 目标: 把所有的单词找出来,做一个数组中// val rdd = sc.parallelize(List("hello world", "hello scala"))// val rdd1 = rdd.flatMap(x => x.split(" "))// 词频统计的例子val rdd = sc.parallelize(List("apple", "banana", "apple", "banana", "apple"))// val rdd1 = rdd.map(x => (x, 1))// val rdd3 = rdd1.reduceByKey((x, y) => x + y)rdd.map(x => (x, 1)).reduceByKey((x, y) => x + y).collect().foreach(println)// collect() 行动算子//rdd3.collect().foreach(println)}
}