import org.apache.spark.sql.types._
val schema = StructType(List(
StructField("integer_column", IntegerType, nullable = true),
StructField("string_column", StringType, nullable = true),
StructField("date_column", DateType, nullable = true)
))
val rdd = spark.sparkContext.parallelize(Seq(
Row(1, "First Value", java.sql.Date.valueOf("2010-01-01")),
Row(2, "Second Value", java.sql.Date.valueOf("2010-02-01")),
Row(null, "Second Value", java.sql.Date.valueOf("2010-02-01"))
))
df_fill.toJSON.collectAsList.toString
package utils
import org.apache.spark.sql.DataFrame
object MyDataInsightUtil {
def dataFrame2Json(data:DataFrame,num:Int=10)={
val dftopN = data.limit(num)
val arr = dftopN.collect().map(x=>x.toSeq.mkString("\"","\",\"","\"")).mkString("|")
val columnName = (dftopN.columns.mkString("\"","\",\"","\""))
(columnName+"|"+ arr)
}
}
结果:
填充后结果
“integer_column”,“string_column”,“date_column”|“1”,“First Value”,“2010-01-01”|“2”,“Second Value”,“2010-02-01”|"-3",“Second Value”,“2010-02-01”
原始结果
“integer_column”,“string_column”,“date_column”|“1”,“First Value”,“2010-01-01”|“2”,“Second Value”,“2010-02-01”|“null”,“Second Value”,“2010-02-01”