/*没有下面的话, 会报一个错误,java.lang.IllegalArgumentException: System memory 259522560 must be at least 4.718592E8(470M). Please use a larger heap size.这是memory不够,导致无法启动SparkContext*/
conf.set("spark.testing.memory", "2000000000");
JavaSparkContext sc = new JavaSparkContext(conf);
/*下面的这种倒入的方法也行*/
// JavaRDD<String> text = sc.textFile("hdfs://localhost:9000/README.txt");
/*原文件是:o1abc 45
o1abc 77
o1abc o1abc */
JavaRDD<String> text = sc.textFile("E://temp//input//friend.txt");
List<String> strList = text.collect();
/*输出str:o1abc 45
str:o1abc 77
str:o1abc o1abc*/
for (String str : strList) {
System.out.println("str:" + str);
}
/*Interface FlatMapFunction<T,R>, Iterable<R> call(T t)(注意之后的版本,返回值有所变化。)*/
JavaRDD<String> words = text.flatMap(new FlatMapFunction<String, String>() {
/*List的super Interface 是java.lang.Iterable*/
public Iterable<String> call(String line) throws Exception {
System.out.println("flatMap once, line is "+line );
String[] wordsArray=line.split(" ");
List<String> wordsList=Arrays.asList(wordsArray);
return wordsList;
}
});
List<String> wordsList = words.collect();
/*输出
flatMap once, line is o1abc 45
flatMap once, line is o1abc 77
flatMap once, line is o1abc o1abc
更多请见:https://blog.csdn.net/qq_44596980/article/details/93385009
本文系转载,前往查看
如有侵权,请联系 cloudcommunity@tencent.com 删除。
本文系转载,前往查看
如有侵权,请联系 cloudcommunity@tencent.com 删除。