文章/答案/技术大牛

发布

问蒙特卡洛树搜索
EN

Stack Overflow用户

提问于 2017-11-14 06:54:40

回答 1查看 331关注 0票数 0

我目前正在为一个简单的棋盘游戏开发一个MCTS实现。我想我几乎是正确的，但每当我想迭代超过1次(时间或数字(I)作为迭代的因素)时，游戏就会冻结。如果有人对这个问题有一个可靠的想法，我会很高兴的。

 // method to find  best turn
    Node findBest( )
    {
        Node startingPoint = new Node();
        startingPoint.CurrentField_Boxes = GameBoard.boxs;
        startingPoint.CurrentField_H = GameBoard.horizontal_line;
        startingPoint.CurrentField_V = GameBoard.vertical_line;

        Node rootNode = startingPoint;

       for( int i = 0; i < 2; i++)
        {
            //Selection
            Node promisingNode = selectPromisingNode(rootNode);
            Debug.Log("POST SELECTION CHECK VON X/Y " + promisingNode.x + "/" + promisingNode.y);

            //Expansion
            if (!checkField())
            {
                expandNode(promisingNode);
            }

            //Simulation
            Node nodeToExplore = promisingNode;
            Debug.Log("POST 2 SELECTION CHECK VON X/Y " + nodeToExplore.x + "/" + nodeToExplore.y);

            if (promisingNode.getChildren().Count > 0)
            {
                //get random child node
                nodeToExplore = promisingNode.getRandomChild();
                Debug.Log("POST 3 SELECTION CHECK VON X/Y " + nodeToExplore.x + "/" + nodeToExplore.y);
            }

            int playoutResult = simulation(nodeToExplore);
            Debug.Log("SIM RESULT :" + playoutResult);
            //backpropagation
            backPropogation(nodeToExplore);
        }

        Debug.Log("*********************************");
        Node  bestNode = new Node();
        Debug.Log("POST 4 SELECTION CHECK VON X/Y " + rootNode.getBestChild().x + "/" + rootNode.getBestChild().y);
        bestNode = rootNode.getBestChild();
        return bestNode;
    }

    private Node selectPromisingNode(Node rootNode)
    {
        Debug.Log("SELECTING");
        Node node = new Node();
        node = rootNode;

        while (node.getChildren().Count != 0) // checking if terminal
        {
            findBestNodeWithUCT(node);
        }

        return node;
    }

    Node findBestNodeWithUCT( Node root)
    {
        double j = 0;
        int bestIndex = 0; ;

        for (int i = 0; i < root.getChildren().Count; i++)
        {
            if (root.getChildren()[i].getVisitTimes() / root.getChildren()[i].getResult() > j)
            {
                j = root.getChildren()[i].getVisitTimes() / root.getChildren()[i].getResult();
                bestIndex = i;
                Debug.Log("BEST INDEX : " + bestIndex);
            }
        }

        Node bestOption = root.getChildren()[bestIndex];
        Debug.Log("BEST OPTION RETURN X/Y : " + bestOption.x + "/" + bestOption.y);

        return bestOption;
    }

    private void expandNode(Node node)
    {
        Debug.Log("Start EXPANSION");
        bool vert = false;
        bool hori = false;

    //Phase I : creating new Node
        //create a new node wit certain action A
        Node newNode = new Node();
        //init and setting parent 
        newNode.parent[0] = node;
        //init gamefield into node
        newNode.CurrentField_V = node.CurrentField_V;
        newNode.CurrentField_H = node.CurrentField_H;
        //init and setting child parent relationship
        newNode.visitTimes++;
        Debug.Log("ERSTELLE NEUEN KNOTEN");

     //Phase II : Validating Action
        //create a action a which leads to the expansion
        int x = UnityEngine.Random.Range(0,4);
        int y = UnityEngine.Random.Range(0, 4);
        //checking variables for valid coordinates
        Debug.Log("AKTION A IN EXPANSION :" + x + y);
        //Prechecking of generated coordinates to set corresponding obj
        if (y < 3)
        {
            vert = true;
        }
        if (x < 3 )
        {
            hori = true;
        }
        else if (x == 3 && y == 3)
        {
            Debug.Log(" OUT OF BOUNDS :NEW RANDOM");
            expandNode(node);

        }

     //Phase III : Doing Action
        if(vert && newNode.CurrentField_V[x, y].tag == "is play" && hori && newNode.CurrentField_H[x, y].tag != "is play")
        {
            expandNode(node);
        }
        //setting choice into sim gamefield with green color
        if (vert && newNode.CurrentField_V[x, y].tag != "is play")// && GameBoard.sim_vertical_line[x, y].tag != "is play")
        {
            Debug.Log("SETZEN DER VERTIKALEN AKTION IN KNOTENSTATE");
            GameBoard.sim_vertical_line[x, y].tag = "is play";
            GameBoard.sim_vertical_line[x, y].GetComponent<SpriteRenderer>().sprite = greenV;
          //  newNode.CurrentField_V[x, y].tag = "is play";
            newNode.x = x;
            newNode.y = y;
            node.nodeChildren.Add(newNode);

        }
        if (hori && newNode.CurrentField_H[x, y].tag != "is play" )//&& GameBoard.sim_horizontal_line[x, y].tag != "is play")
        {
            Debug.Log("SETZEN DER HORIZONTALEN AKTION IN KNOTENSTATE");
            GameBoard.sim_horizontal_line[x, y].tag = "is play";
            GameBoard.sim_horizontal_line[x, y].GetComponent<SpriteRenderer>().sprite = greenH;
          //  newNode.CurrentField_H[x, y].tag = "is play";
            newNode.x = x;
            newNode.y = y;
            node.nodeChildren.Add(newNode);

        }
        else { expandNode(node); }
    }

    private void backPropogation(Node nodeToExplore)
    {
        Node tempNode = nodeToExplore;
        Debug.Log("BACKPROPAGATION");
        while (tempNode != null)
        {
            int i = 0;

            if (i % 2 == 0)
            {
                tempNode.visitTimes++;
                int j = tempNode.result;
                tempNode.setResult(j);
                tempNode = tempNode.getParent();
            }
            else if (i % 2 != 0)
            {
                tempNode.visitTimes++;
                int j = tempNode.result;
                tempNode.setResult(j * -1);
                tempNode = tempNode.getParent();
            }
            i++;
        }
    }

一次迭代运行良好，我已经获得了比随机机器人更好的行为，但在一次迭代中最多只能创建5-6个节点。

非常感谢。

unity3d

tree

montecarlo

回答 1

Stack Overflow用户

发布于 2017-11-21 21:21:19

在selectPromisingNode()函数中，您有一个从第二次迭代开始的无限循环。首先将根节点插入到该函数中。从第二次迭代开始，根节点有超过0个子节点，因此满足while循环的条件。在循环内部，您不会更改node变量的值，因此您将无限频繁地以根节点作为参数来调用findBestNodeWithUCT()。

您可能希望将循环中的那行代码更改为：node = findBestNodeWithUCT(node)，这样您实际上就可以开始稍微遍历树，而不是停留在根部。

我没有检查其余的代码，所以我不知道是否还有更多的错误，但这至少似乎准确地解释了您所遇到的问题

票数 0

页面原文内容由Stack Overflow提供。腾讯云小微IT领域专用引擎提供翻译支持

原文链接：

https://stackoverflow.com/questions/47274915

复制

相似问题

问蒙特卡洛树搜索
EN

回答 1

Stack Overflow用户

社区

活动

圈层

关于

腾讯云开发者

热门产品

热门推荐

更多推荐

问蒙特卡洛树搜索EN

回答 1

Stack Overflow用户

社区

活动

圈层

关于

腾讯云开发者

热门产品

热门推荐

更多推荐

问蒙特卡洛树搜索
EN