1:练习spark的时候,操作大概如我读取hdfs上面的文件,然后spark懒加载以后,我读取详细信息出现如下所示的错误,错误虽然不大,我感觉有必要记录一下,因为错误的起因是对命令的不熟悉造成的,错误如下所示:
1 scala> text.collect
2 java.net.ConnectException: Call From slaver1/192.168.19.128 to slaver1:8020 failed on connection exception: java.net.ConnectException: Connection refused; For more details see: http://wiki.apache.org/hadoop/ConnectionRefused
3 at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
4 at sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:57)
5 at sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
6 at java.lang.reflect.Constructor.newInstance(Constructor.java:526)
7 at org.apache.hadoop.net.NetUtils.wrapWithMessage(NetUtils.java:791)
8 at org.apache.hadoop.net.NetUtils.wrapException(NetUtils.java:731)
9 at org.apache.hadoop.ipc.Client.call(Client.java:1472)
10 at org.apache.hadoop.ipc.Client.call(Client.java:1399)
11 at org.apache.hadoop.ipc.ProtobufRpcEngine$Invoker.invoke(ProtobufRpcEngine.java:232)
12 at com.sun.proxy.$Proxy36.getFileInfo(Unknown Source)
13 at org.apache.hadoop.hdfs.protocolPB.ClientNamenodeProtocolTranslatorPB.getFileInfo(ClientNamenodeProtocolTranslatorPB.java:752)
14 at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
15 at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
16 at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
17 at java.lang.reflect.Method.invoke(Method.java:606)
18 at org.apache.hadoop.io.retry.RetryInvocationHandler.invokeMethod(RetryInvocationHandler.java:187)
19 at org.apache.hadoop.io.retry.RetryInvocationHandler.invoke(RetryInvocationHandler.java:102)
20 at com.sun.proxy.$Proxy37.getFileInfo(Unknown Source)
21 at org.apache.hadoop.hdfs.DFSClient.getFileInfo(DFSClient.java:1988)
22 at org.apache.hadoop.hdfs.DistributedFileSystem$18.doCall(DistributedFileSystem.java:1118)
23 at org.apache.hadoop.hdfs.DistributedFileSystem$18.doCall(DistributedFileSystem.java:1114)
24 at org.apache.hadoop.fs.FileSystemLinkResolver.resolve(FileSystemLinkResolver.java:81)
25 at org.apache.hadoop.hdfs.DistributedFileSystem.getFileStatus(DistributedFileSystem.java:1114)
26 at org.apache.hadoop.fs.Globber.getFileStatus(Globber.java:57)
27 at org.apache.hadoop.fs.Globber.glob(Globber.java:252)
28 at org.apache.hadoop.fs.FileSystem.globStatus(FileSystem.java:1644)
29 at org.apache.hadoop.mapred.FileInputFormat.singleThreadedListStatus(FileInputFormat.java:257)
30 at org.apache.hadoop.mapred.FileInputFormat.listStatus(FileInputFormat.java:228)
31 at org.apache.hadoop.mapred.FileInputFormat.getSplits(FileInputFormat.java:313)
32 at org.apache.spark.rdd.HadoopRDD.getPartitions(HadoopRDD.scala:199)
33 at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:239)
34 at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:237)
35 at scala.Option.getOrElse(Option.scala:120)
36 at org.apache.spark.rdd.RDD.partitions(RDD.scala:237)
37 at org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:35)
38 at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:239)
39 at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:237)
40 at scala.Option.getOrElse(Option.scala:120)
41 at org.apache.spark.rdd.RDD.partitions(RDD.scala:237)
42 at org.apache.spark.SparkContext.runJob(SparkContext.scala:1929)
43 at org.apache.spark.rdd.RDD$$anonfun$collect$1.apply(RDD.scala:927)
44 at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:150)
45 at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:111)
46 at org.apache.spark.rdd.RDD.withScope(RDD.scala:316)
47 at org.apache.spark.rdd.RDD.collect(RDD.scala:926)
48 at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:30)
49 at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:35)
50 at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:37)
51 at $iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:39)
52 at $iwC$$iwC$$iwC$$iwC.<init>(<console>:41)
53 at $iwC$$iwC$$iwC.<init>(<console>:43)
54 at $iwC$$iwC.<init>(<console>:45)
55 at $iwC.<init>(<console>:47)
56 at <init>(<console>:49)
57 at .<init>(<console>:53)
58 at .<clinit>(<console>)
59 at .<init>(<console>:7)
60 at .<clinit>(<console>)
61 at $print(<console>)
62 at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
63 at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
64 at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
65 at java.lang.reflect.Method.invoke(Method.java:606)
66 at org.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:1065)
67 at org.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1346)
68 at org.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:840)
69 at org.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:871)
70 at org.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:819)
71 at org.apache.spark.repl.SparkILoop.reallyInterpret$1(SparkILoop.scala:857)
72 at org.apache.spark.repl.SparkILoop.interpretStartingWith(SparkILoop.scala:902)
73 at org.apache.spark.repl.SparkILoop.command(SparkILoop.scala:814)
74 at org.apache.spark.repl.SparkILoop.processLine$1(SparkILoop.scala:657)
75 at org.apache.spark.repl.SparkILoop.innerLoop$1(SparkILoop.scala:665)
76 at org.apache.spark.repl.SparkILoop.org$apache$spark$repl$SparkILoop$$loop(SparkILoop.scala:670)
77 at org.apache.spark.repl.SparkILoop$$anonfun$org$apache$spark$repl$SparkILoop$$process$1.apply$mcZ$sp(SparkILoop.scala:997)
78 at org.apache.spark.repl.SparkILoop$$anonfun$org$apache$spark$repl$SparkILoop$$process$1.apply(SparkILoop.scala:945)
79 at org.apache.spark.repl.SparkILoop$$anonfun$org$apache$spark$repl$SparkILoop$$process$1.apply(SparkILoop.scala:945)
80 at scala.tools.nsc.util.ScalaClassLoader$.savingContextLoader(ScalaClassLoader.scala:135)
81 at org.apache.spark.repl.SparkILoop.org$apache$spark$repl$SparkILoop$$process(SparkILoop.scala:945)
82 at org.apache.spark.repl.SparkILoop.process(SparkILoop.scala:1059)
83 at org.apache.spark.repl.Main$.main(Main.scala:31)
84 at org.apache.spark.repl.Main.main(Main.scala)
85 at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
86 at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
87 at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
88 at java.lang.reflect.Method.invoke(Method.java:606)
89 at org.apache.spark.deploy.SparkSubmit$.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:731)
90 at org.apache.spark.deploy.SparkSubmit$.doRunMain$1(SparkSubmit.scala:181)
91 at org.apache.spark.deploy.SparkSubmit$.submit(SparkSubmit.scala:206)
92 at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:121)
93 at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)
94 Caused by: java.net.ConnectException: Connection refused
95 at sun.nio.ch.SocketChannelImpl.checkConnect(Native Method)
96 at sun.nio.ch.SocketChannelImpl.finishConnect(SocketChannelImpl.java:739)
97 at org.apache.hadoop.net.SocketIOWithTimeout.connect(SocketIOWithTimeout.java:206)
98 at org.apache.hadoop.net.NetUtils.connect(NetUtils.java:530)
99 at org.apache.hadoop.net.NetUtils.connect(NetUtils.java:494)
100 at org.apache.hadoop.ipc.Client$Connection.setupConnection(Client.java:607)
101 at org.apache.hadoop.ipc.Client$Connection.setupIOstreams(Client.java:705)
102 at org.apache.hadoop.ipc.Client$Connection.access$2800(Client.java:368)
103 at org.apache.hadoop.ipc.Client.getConnection(Client.java:1521)
104 at org.apache.hadoop.ipc.Client.call(Client.java:1438)
105 ... 84 more
2:错误原因如下所示:
我使用了如下所示命令来读取hdfs上面的文件,scala> var text = sc.textFile("hdfs://slaver1:/input.txt");,然后使用text.collect命令来查看详细信息,就是查看详细信息的时候报的上面的错误,错误原因是因为我读取hdfs文件的时候少了端口号,造成的错误;
修改为如下所示即可:
scala> var text = sc.textFile("hdfs://slaver1:9000/input.txt");
scala> text.collect