我目前正在为一个简单的棋盘游戏开发一个MCTS实现。我想我几乎是正确的,但每当我想迭代超过1次(时间或数字(I)作为迭代的因素)时,游戏就会冻结。如果有人对这个问题有一个可靠的想法,我会很高兴的。
// method to find best turn
Node findBest( )
{
Node startingPoint = new Node();
startingPoint.CurrentField_Boxes = GameBoard.boxs;
startingPoint.CurrentField_H = GameBoard.horizontal_line;
startingPoint.CurrentField_V = GameBoard.vertical_line;
Node rootNode = startingPoint;
for( int i = 0; i < 2; i++)
{
//Selection
Node promisingNode = selectPromisingNode(rootNode);
Debug.Log("POST SELECTION CHECK VON X/Y " + promisingNode.x + "/" + promisingNode.y);
//Expansion
if (!checkField())
{
expandNode(promisingNode);
}
//Simulation
Node nodeToExplore = promisingNode;
Debug.Log("POST 2 SELECTION CHECK VON X/Y " + nodeToExplore.x + "/" + nodeToExplore.y);
if (promisingNode.getChildren().Count > 0)
{
//get random child node
nodeToExplore = promisingNode.getRandomChild();
Debug.Log("POST 3 SELECTION CHECK VON X/Y " + nodeToExplore.x + "/" + nodeToExplore.y);
}
int playoutResult = simulation(nodeToExplore);
Debug.Log("SIM RESULT :" + playoutResult);
//backpropagation
backPropogation(nodeToExplore);
}
Debug.Log("*********************************");
Node bestNode = new Node();
Debug.Log("POST 4 SELECTION CHECK VON X/Y " + rootNode.getBestChild().x + "/" + rootNode.getBestChild().y);
bestNode = rootNode.getBestChild();
return bestNode;
}
private Node selectPromisingNode(Node rootNode)
{
Debug.Log("SELECTING");
Node node = new Node();
node = rootNode;
while (node.getChildren().Count != 0) // checking if terminal
{
findBestNodeWithUCT(node);
}
return node;
}
Node findBestNodeWithUCT( Node root)
{
double j = 0;
int bestIndex = 0; ;
for (int i = 0; i < root.getChildren().Count; i++)
{
if (root.getChildren()[i].getVisitTimes() / root.getChildren()[i].getResult() > j)
{
j = root.getChildren()[i].getVisitTimes() / root.getChildren()[i].getResult();
bestIndex = i;
Debug.Log("BEST INDEX : " + bestIndex);
}
}
Node bestOption = root.getChildren()[bestIndex];
Debug.Log("BEST OPTION RETURN X/Y : " + bestOption.x + "/" + bestOption.y);
return bestOption;
}
private void expandNode(Node node)
{
Debug.Log("Start EXPANSION");
bool vert = false;
bool hori = false;
//Phase I : creating new Node
//create a new node wit certain action A
Node newNode = new Node();
//init and setting parent
newNode.parent[0] = node;
//init gamefield into node
newNode.CurrentField_V = node.CurrentField_V;
newNode.CurrentField_H = node.CurrentField_H;
//init and setting child parent relationship
newNode.visitTimes++;
Debug.Log("ERSTELLE NEUEN KNOTEN");
//Phase II : Validating Action
//create a action a which leads to the expansion
int x = UnityEngine.Random.Range(0,4);
int y = UnityEngine.Random.Range(0, 4);
//checking variables for valid coordinates
Debug.Log("AKTION A IN EXPANSION :" + x + y);
//Prechecking of generated coordinates to set corresponding obj
if (y < 3)
{
vert = true;
}
if (x < 3 )
{
hori = true;
}
else if (x == 3 && y == 3)
{
Debug.Log(" OUT OF BOUNDS :NEW RANDOM");
expandNode(node);
}
//Phase III : Doing Action
if(vert && newNode.CurrentField_V[x, y].tag == "is play" && hori && newNode.CurrentField_H[x, y].tag != "is play")
{
expandNode(node);
}
//setting choice into sim gamefield with green color
if (vert && newNode.CurrentField_V[x, y].tag != "is play")// && GameBoard.sim_vertical_line[x, y].tag != "is play")
{
Debug.Log("SETZEN DER VERTIKALEN AKTION IN KNOTENSTATE");
GameBoard.sim_vertical_line[x, y].tag = "is play";
GameBoard.sim_vertical_line[x, y].GetComponent<SpriteRenderer>().sprite = greenV;
// newNode.CurrentField_V[x, y].tag = "is play";
newNode.x = x;
newNode.y = y;
node.nodeChildren.Add(newNode);
}
if (hori && newNode.CurrentField_H[x, y].tag != "is play" )//&& GameBoard.sim_horizontal_line[x, y].tag != "is play")
{
Debug.Log("SETZEN DER HORIZONTALEN AKTION IN KNOTENSTATE");
GameBoard.sim_horizontal_line[x, y].tag = "is play";
GameBoard.sim_horizontal_line[x, y].GetComponent<SpriteRenderer>().sprite = greenH;
// newNode.CurrentField_H[x, y].tag = "is play";
newNode.x = x;
newNode.y = y;
node.nodeChildren.Add(newNode);
}
else { expandNode(node); }
}
private void backPropogation(Node nodeToExplore)
{
Node tempNode = nodeToExplore;
Debug.Log("BACKPROPAGATION");
while (tempNode != null)
{
int i = 0;
if (i % 2 == 0)
{
tempNode.visitTimes++;
int j = tempNode.result;
tempNode.setResult(j);
tempNode = tempNode.getParent();
}
else if (i % 2 != 0)
{
tempNode.visitTimes++;
int j = tempNode.result;
tempNode.setResult(j * -1);
tempNode = tempNode.getParent();
}
i++;
}
}
一次迭代运行良好,我已经获得了比随机机器人更好的行为,但在一次迭代中最多只能创建5-6个节点。
非常感谢。
发布于 2017-11-21 21:21:19
在selectPromisingNode()
函数中,您有一个从第二次迭代开始的无限循环。首先将根节点插入到该函数中。从第二次迭代开始,根节点有超过0个子节点,因此满足while循环的条件。在循环内部,您不会更改node
变量的值,因此您将无限频繁地以根节点作为参数来调用findBestNodeWithUCT()
。
您可能希望将循环中的那行代码更改为:node = findBestNodeWithUCT(node)
,这样您实际上就可以开始稍微遍历树,而不是停留在根部。
我没有检查其余的代码,所以我不知道是否还有更多的错误,但这至少似乎准确地解释了您所遇到的问题
https://stackoverflow.com/questions/47274915
复制相似问题