1 module dreversi.agent; 2 3 import dreversi.env; 4 5 import std.math; 6 import std.stdio; 7 8 import mir.ndslice; 9 10 alias Score = double; 11 12 struct Action { 13 size_t row = 0, col = 0; 14 Score score = double.nan; 15 } 16 17 /// this is not efficient implementation 18 /// as it computes without memory of game tree 19 struct MinMaxAgent { 20 bool isBlack; 21 bool verbose = false; 22 23 Action select(in Board board, size_t depth = 3) { 24 Action best; 25 foreach (r; 0 .. board.length!0) { 26 foreach (c; 0 .. board.length!1) { 27 auto next = board.put(this.isBlack, r, c); 28 if (next.valid) { 29 immutable score = search(next, false, depth); 30 if (this.verbose) writefln!"Action(%d, %d, %f)"(r, c, score); 31 if (best.score.isNaN || score > best.score) { 32 best = Action(r, c, score); 33 } 34 } 35 } 36 } 37 return best; 38 } 39 40 Score search(in Board board, bool isMine, // true if board is result of my action 41 size_t depth 42 ) { 43 import std.algorithm : max, min; 44 45 if (board.finished || depth == 0) { 46 return cast(double) board.score(this.isBlack); 47 } 48 49 /// pass 50 if ((isMine && board.pass(this.isBlack)) || (!isMine && board.pass(!this.isBlack))) { 51 return search(board, !isMine, depth-1); 52 } 53 54 Score alpha = -double.infinity; 55 Score beta = double.infinity; 56 foreach (r; 0 .. board.length!0) { 57 foreach (c; 0 .. board.length!1) { 58 immutable color = isMine ? this.isBlack : !this.isBlack; // XOR 59 immutable b = board.put(color, r, c); 60 if (b.valid) { 61 const score = search(b, !isMine, depth - 1); // recur 62 if (isMine) { 63 alpha = max(alpha, score); 64 } else { 65 beta = min(beta, score); 66 } 67 } 68 } 69 } 70 return isMine ? alpha : beta; 71 } 72 } 73 74 75 unittest { 76 auto env = reset(4, 4); // 4x4 is known to result that second player won +8 points 77 auto agent = MinMaxAgent(true); // as a first player 78 auto action = agent.select(env, 11); 79 assert(action.score == -8.0); 80 } 81 82 /// this is not efficient implementation 83 /// as it computes without memory of game tree 84 struct AlphaBetaAgent { 85 bool isBlack; 86 bool verbose = false; 87 double[Board] scoreCache; 88 89 @safe 90 Action select(in Board board, size_t depth = 3) { 91 Action best; 92 foreach (r; 0 .. board.length!0) { 93 foreach (c; 0 .. board.length!1) { 94 immutable next = board.put(this.isBlack, r, c); 95 if (next.valid) { 96 /* 97 if (board !in this.scoreCache) { 98 this.scoreCache[board] = search(next, depth, -double.infinity, double.infinity); 99 } 100 immutable score = this.scoreCache[board]; 101 */ 102 immutable score = search(next, depth, -double.infinity, double.infinity); 103 if (this.verbose) { 104 writefln!"Action(%d, %d, %f)"(r, c, score); 105 } 106 if (best.score.isNaN || score > best.score) { 107 best = Action(r, c, score); 108 } 109 } 110 } 111 } 112 return best; 113 } 114 115 pure @safe 116 Score search(bool isMine = false)(in Board board, size_t depth, Score alpha, Score beta) { 117 // import std.algorithm : max, min; 118 import mir.math.common : fmax, fmin; 119 120 if (board.finished || depth == 0) { 121 return cast(double) board.score(this.isBlack); 122 } 123 124 /// pass 125 static if (isMine) { 126 immutable color = this.isBlack; 127 } else { 128 immutable color = !this.isBlack; 129 } 130 if (board.pass(color)) { 131 return search!(!isMine)(board, depth-1, alpha, beta); 132 } 133 134 foreach (r; 0 .. board.length!0) { 135 foreach (c; 0 .. board.length!1) { 136 immutable b = board.put(color, r, c); 137 if (b.valid) { 138 immutable score = search!(!isMine)(b, depth - 1, alpha, beta); // recur 139 static if (isMine) { 140 alpha = fmax(alpha, score); 141 } else { 142 beta = fmin(beta, score); 143 } 144 if (alpha >= beta) { // cut 145 return isMine ? alpha : beta; 146 } 147 } 148 } 149 } 150 static if (isMine) { 151 return alpha; 152 } else { 153 return beta; 154 } 155 } 156 } 157 158 unittest { 159 auto env0 = reset(4, 4); // 4x4 is known to result that second player won +8 points 160 auto agentB = AlphaBetaAgent(true); // as a first player 161 auto agentW = AlphaBetaAgent(false); // as a first player 162 163 auto action0 = agentB.select(env0, 11); 164 assert(action0.score == -8.0); 165 auto env1 = env0.put(agentB.isBlack, action0.row, action0.col); 166 167 auto action1 = agentW.select(env1, 11); 168 assert(action1.score == 8.0); 169 auto env2 = env1.put(agentW.isBlack, action1.row, action1.col); 170 171 auto action2 = agentB.select(env2, 11); 172 assert(action2.score == -8.0); 173 auto env3 = env2.put(agentB.isBlack, action2.row, action2.col); 174 }