MapReducer例题-找出有共同好友的user及他们的共同好友

数据:

格式说明:user:friend...

A:B,C,D,F,E,O
B:A,C,E,K
C:F,A,D,I
D:A,E,F,L
E:B,C,D,M,L
F:A,B,C,D,E,O,M
G:A,C,D,E,F
H:A,C,D,E,O
I:A,O
J:B,O
K:A,C,D
L:D,E,F
M:E,F,G
O:A,H,I,J

代码如下(看注释):

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

import java.io.IOException;
import java.net.URI;
import java.util.ArrayList;

/**
 *  找出有共同好友的users
 * <p>
 * 按题意应该是求出任意两个用户的共同好友
 */
public class MR {

    public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
        args = new String[3];
        args[0] = "hdfs://localhost:9000/mapreducer/0314/data/data.txt";
        args[1] = "hdfs://localhost:9000/mapreducer/0314/out/my/1";
        args[2] = "hdfs://localhost:9000/mapreducer/0314/out/my/2";

        Configuration conf = new Configuration();

        FileSystem fs = FileSystem.get(URI.create(args[1]), conf);
        if (fs.exists(new Path(args[1]))) {
            fs.delete(new Path(args[1]), true);
        }
        if (fs.exists(new Path(args[2]))) {
            fs.delete(new Path(args[2]), true);
        }

        Job job = Job.getInstance(conf);

        job.setJarByClass(MR.class);

        job.setMapperClass(MR.MyMapper.class);
        job.setReducerClass(MR.MyReducer.class);

        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(Text.class);

        FileInputFormat.addInputPath(job, new Path(args[0]));
        FileOutputFormat.setOutputPath(job, new Path(args[1]));

        if (job.waitForCompletion(true)) {
            Job job1 = Job.getInstance(conf);

            job1.setJarByClass(MR.class);

            job1.setMapperClass(MR.MyMapper1.class);
            job1.setReducerClass(MR.MyReducer1.class);

            job1.setOutputKeyClass(Text.class);
            job1.setOutputValueClass(Text.class);

            FileInputFormat.addInputPath(job1, new Path(args[1]));
            FileOutputFormat.setOutputPath(job1, new Path(args[2]));

            job1.waitForCompletion(true);
        }
    }

    private static class MyMapper extends Mapper<Object, Text, Text, Text> {

        Text outK = new Text();
        Text outV = new Text();

        @Override
        protected void map(Object key, Text value, Context context) throws IOException, InterruptedException {
            String[] split = value.toString().split(":");
            String[] split1 = split[1].split(",");
            /*
            将这个人所有的朋友作为key,将自身作为value输出,经过reducer阶段分组求出来的是这些用户(value)都有这个朋友(key)
             */
            for (String s : split1) {
                outK.set(s);
                outV.set(split[0]);
                context.write(outK, outV);
            }
        }
    }

    private static class MyReducer extends Reducer<Text, Text, Text, Text> {
        Text outK = new Text();
        Text outV = new Text();

        @Override
        protected void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException {

            ArrayList<String> list = new ArrayList<>();
            for (Text value : values) {
                list.add(value.toString());
            }
            /*
             * 在这里进行组合,将任意组合起来的用户作为key,然后将朋友作为value
             * 要注意两个问题:一是重复的问题,二是用户组合之后的顺序,所以用compareTo比较一下
             */
            for (int i = 0; i < list.size(); i++) {
                for (int j = i + 1; j < list.size(); j++) {
                    String s = list.get(i);
                    String s1 = list.get(j);
                    if (s.compareTo(s1) > 0) {
                        outK.set(s + "," + s1);
                    } else {
                        outK.set(s1 + "," + s);
                    }
                    outV.set(key);
                    context.write(outK, outV);
                }
            }
        }
    }

    /**
     * 第二个job作业就是为了将上一阶段的数据组合起来
     */
    private static class MyMapper1 extends Mapper<Object, Text, Text, Text> {
        Text outK = new Text();
        Text outV = new Text();

        @Override
        protected void map(Object key, Text value, Context context) throws IOException, InterruptedException {
            String[] line = value.toString().split("\t");

            outK.set(line[0]);
            outV.set(line[1]);
            context.write(outK, outV);
        }
    }

    private static class MyReducer1 extends Reducer<Text, Text, Text, Text> {
        Text outV = new Text();

        @Override
        protected void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException {
            StringBuilder sb = new StringBuilder();

            for (Text value : values) {
                sb.append(value.toString()).append(",");
            }
            if (sb.toString().endsWith(",")) {
                sb.setLength(sb.length() - 1);
            }
            outV.set(sb.toString());
            context.write(key, outV);
        }
    }
}

本文参与腾讯云自媒体分享计划,欢迎正在阅读的你也加入,一起分享。

发表于

我来说两句

0 条评论
登录 后参与评论

扫码关注云+社区

领取腾讯云代金券