前往小程序,Get更优阅读体验!
立即前往
首页
学习
活动
专区
工具
TVP
发布
社区首页 >专栏 >Spark实现排序

Spark实现排序

作者头像
曼路
发布2018-10-18 15:12:41
4220
发布2018-10-18 15:12:41
举报
文章被收录于专栏:浪淘沙浪淘沙

question: 用spark对数据进行排序,首先按照颜值的从高到低进行排序,如果颜值相等,在根据年龄的升序排序

1.User类继承ordered,并且序列化

代码语言:javascript
复制
package cn.edu360.spark.day06

import org.apache.log4j.{Level, Logger}
import org.apache.spark.rdd.RDD
import org.apache.spark.{SparkConf, SparkContext}

/**
  * 自定义排序
  * Created by zhangjingcun on 2018/9/27 17:13.
  */
object CustomSort1 {

  def main(args: Array[String]): Unit = {
    Logger.getLogger("org.apache.spark").setLevel(Level.OFF)

    val conf = new SparkConf().setAppName("IPLocation").setMaster("local[*]")
    val sc = new SparkContext(conf)

    //用spark对数据进行排序,首先按照颜值的从高到低进行排序,如果颜值相等,在根据年龄的升序排序
    val users: Array[String] = Array("1,tom,99,34", "2,marry,96,26", "3,mike,98,29", "4,jim,96,30")

    //并行化成RDD
    val userLines: RDD[String] = sc.makeRDD(users)

    //整理数据
    val userRdd: RDD[User1] = userLines.map(line => {
      val fileds = line.split(",")
      val id = fileds(0).toLong
      val name = fileds(1)
      val fv = fileds(2).toInt
      val age = fileds(3).toInt
      new User1(id, name, fv, age)
    })

    //排序
    val sorted: RDD[User1] = userRdd.sortBy(u => u)

    //收集数据
    val result: Array[User1] = sorted.collect()

    println(result.toBuffer)

    sc.stop()
  }
}

class User1 (val id:Long, val name:String, val fv:Int, val age:Int) extends Ordered[User1] with Serializable {
  override def compare(that: User1): Int = {
    //颜值相等的时候
    if (that.fv == this.fv) {
      this.age - that.age
    } else {
      -(this.fv - that.fv)
    }
  }

  override def toString: String = {
    s"User:($id, $name, $fv, $age)"
  }
}

2.User继承Sorted 没有序列化,不需要new

代码语言:javascript
复制
package cn.edu360.spark.day06

import org.apache.log4j.{Level, Logger}
import org.apache.spark.{SparkConf, SparkContext}
import org.apache.spark.rdd.RDD

/**
  * Created by zhangjingcun on 2018/9/27 17:32.
  */
object CustomSort2 {
  def main(args: Array[String]): Unit = {
    Logger.getLogger("org.apache.spark").setLevel(Level.OFF)

    val conf = new SparkConf().setAppName("IPLocation").setMaster("local[*]")
    val sc = new SparkContext(conf)

    //用spark对数据进行排序,首先按照颜值的从高到低进行排序,如果颜值相等,在根据年龄的升序排序
    val users: Array[String] = Array("1,tom,99,34", "2,marry,96,26", "3,mike,98,29", "4,jim,96,30")

    //并行化成RDD
    val userLines: RDD[String] = sc.makeRDD(users)

    //整理数据
    val userRdd: RDD[User2] = userLines.map(line => {
      val fileds = line.split(",")
      val id = fileds(0).toLong
      val name = fileds(1)
      val fv = fileds(2).toInt
      val age = fileds(3).toInt
      User2(id, name, fv, age)
    })

    //排序
    val sorted: RDD[User2] = userRdd.sortBy(u => u)

    //收集数据
    val result: Array[User2] = sorted.collect()

    println(result.toBuffer)

    sc.stop()
  }
}
//case 可以不使用new关键字
//不需要实现序列化
case class User2 (val id:Long, val name:String, val fv:Int, val age:Int) extends Ordered[User2] {
  override def compare(that: User2): Int = {
    //颜值相等的时候
    if (that.fv == this.fv) {
      this.age - that.age
    } else {
      -(this.fv - that.fv)
    }
  }

  override def toString: String = {
    s"User:($id, $name, $fv, $age)"
  }
}

3.

代码语言:javascript
复制
package cn.edu360.spark.day06

import org.apache.log4j.{Level, Logger}
import org.apache.spark.{SparkConf, SparkContext}
import org.apache.spark.rdd.RDD

/**
  * Created by zhangjingcun on 2018/9/27 17:37.
  */
object CustomSort3 {
  def main(args: Array[String]): Unit = {
    Logger.getLogger("org.apache.spark").setLevel(Level.OFF)

    val conf = new SparkConf().setAppName("IPLocation").setMaster("local[*]")
    val sc = new SparkContext(conf)

    //用spark对数据进行排序,首先按照颜值的从高到低进行排序,如果颜值相等,在根据年龄的升序排序
    val users: Array[String] = Array("1,tom,99,34", "2,marry,96,26", "3,mike,98,29", "4,jim,96,30")

    //并行化成RDD
    val userLines: RDD[String] = sc.makeRDD(users)

    //整理数据
    val userRdd: RDD[(Long, String, Int, Int)] = userLines.map(line => {
      val fileds = line.split(",")
      val id = fileds(0).toLong
      val name = fileds(1)
      val fv = fileds(2).toInt
      val age = fileds(3).toInt
      (id, name, fv, age)
    })

    //排序
    val sorted: RDD[(Long, String, Int, Int)] = userRdd.sortBy(tp => User3(tp._1, tp._2, tp._3, tp._4))

    //收集数据
    val result: Array[(Long, String, Int, Int)] = sorted.collect()

    println(result.toBuffer)

    sc.stop()
  }
}
//case 可以不使用new关键字
//不需要实现序列化
case class User3 (val id:Long, val name:String, val fv:Int, val age:Int) extends Ordered[User3] {
  override def compare(that: User3): Int = {
    //颜值相等的时候
    if (that.fv == this.fv) {
      this.age - that.age
    } else {
      -(this.fv - that.fv)
    }
  }

  override def toString: String = {
    s"User:($id, $name, $fv, $age)"
  }
}

4.

代码语言:javascript
复制
package cn.edu360.spark.day06

import org.apache.log4j.{Level, Logger}
import org.apache.spark.{SparkConf, SparkContext}
import org.apache.spark.rdd.RDD

/**
  * Created by zhangjingcun on 2018/9/27 17:41.
  */
object CustomSort4 {
  def main(args: Array[String]): Unit = {
    Logger.getLogger("org.apache.spark").setLevel(Level.OFF)

    val conf = new SparkConf().setAppName("IPLocation").setMaster("local[*]")
    val sc = new SparkContext(conf)

    //用spark对数据进行排序,首先按照颜值的从高到低进行排序,如果颜值相等,在根据年龄的升序排序
    val users: Array[String] = Array("1,tom,99,34", "2,marry,96,26", "3,mike,98,29", "4,jim,96,30")

    //并行化成RDD
    val userLines: RDD[String] = sc.makeRDD(users)

    //整理数据
    val tpRdd: RDD[(Long, String, Int, Int)] = userLines.map(line => {
      val fileds = line.split(",")
      val id = fileds(0).toLong
      val name = fileds(1)
      val fv = fileds(2).toInt
      val age = fileds(3).toInt
      (id, name, fv, age)
    })

    //利用元祖的比较特点:先比较第一个,如果不相等,按照第一个属性排序,在比较下个属性
    implicit val rules = Ordering[(Int, Int)].on[(Long, String, Int, Int)](t => (-t._3, t._4))

    val sorted = tpRdd.sortBy(t => t)
    //收集数据
    val result: Array[(Long, String, Int, Int)] = sorted.collect()

    println(result.toBuffer)

    sc.stop()
  }
}
本文参与 腾讯云自媒体分享计划,分享自作者个人站点/博客。
原始发表:2018年09月27日,如有侵权请联系 cloudcommunity@tencent.com 删除

本文分享自 作者个人站点/博客 前往查看

如有侵权,请联系 cloudcommunity@tencent.com 删除。

本文参与 腾讯云自媒体分享计划  ,欢迎热爱写作的你一起参与!

评论
登录后参与评论
0 条评论
热度
最新
推荐阅读
领券
问题归档专栏文章快讯文章归档关键词归档开发者手册归档开发者手册 Section 归档