dreversi.agent source code

1 module dreversi.agent;
2 
3 import dreversi.env;
4 
5 import std.math;
6 import std.stdio;
7 
8 import mir.ndslice;
9 
10 alias Score = double;
11 
12 struct Action {
13     size_t row = 0, col = 0;
14     Score score = double.nan;
15 }
16 
17 /// this is not efficient implementation
18 /// as it computes without memory of game tree
19 struct MinMaxAgent {
20     bool isBlack;
21     bool verbose = false;
22 
23     Action select(in Board board, size_t depth = 3) {
24         Action best;
25         foreach (r; 0 .. board.length!0) {
26             foreach (c; 0 .. board.length!1) {
27                 auto next = board.put(this.isBlack, r, c);
28                 if (next.valid) {
29                     immutable score = search(next, false, depth);
30                     if (this.verbose) writefln!"Action(%d, %d, %f)"(r, c, score);
31                     if (best.score.isNaN || score > best.score) {
32                         best = Action(r, c, score);
33                     }
34                 }
35             }
36         }
37         return best;
38     }
39 
40     Score search(in Board board, bool isMine, // true if board is result of my action
41                  size_t depth
42         ) {
43         import std.algorithm : max, min;
44 
45         if (board.finished || depth == 0) {
46             return cast(double) board.score(this.isBlack);
47         }
48 
49         /// pass
50         if ((isMine && board.pass(this.isBlack)) || (!isMine && board.pass(!this.isBlack))) {
51             return search(board, !isMine, depth-1);
52         }
53 
54         Score alpha = -double.infinity;
55         Score beta = double.infinity;
56         foreach (r; 0 .. board.length!0) {
57             foreach (c; 0 .. board.length!1) {
58                 immutable color = isMine ? this.isBlack : !this.isBlack; // XOR
59                 immutable b = board.put(color, r, c);
60                 if (b.valid) {
61                     const score = search(b, !isMine, depth - 1); // recur
62                     if (isMine) {
63                         alpha = max(alpha, score);
64                     } else {
65                         beta = min(beta, score);
66                     }
67                 }
68             }
69         }
70         return isMine ? alpha : beta;
71     }
72 }
73 
74 
75 unittest {
76     auto env = reset(4, 4); // 4x4 is known to result that second player won +8 points
77     auto agent = MinMaxAgent(true); // as a first player
78     auto action = agent.select(env, 11);
79     assert(action.score == -8.0);
80 }
81 
82 /// this is not efficient implementation
83 /// as it computes without memory of game tree
84 struct AlphaBetaAgent {
85     bool isBlack;
86     bool verbose = false;
87     double[Board] scoreCache;
88 
89     @safe
90     Action select(in Board board, size_t depth = 3) {
91         Action best;
92         foreach (r; 0 .. board.length!0) {
93             foreach (c; 0 .. board.length!1) {
94                 immutable next = board.put(this.isBlack, r, c);
95                 if (next.valid) {
96                     /*
97                     if (board !in this.scoreCache) {
98                         this.scoreCache[board] = search(next, depth, -double.infinity, double.infinity);
99                     }
100                     immutable score = this.scoreCache[board];
101                     */
102                     immutable score = search(next, depth, -double.infinity, double.infinity);
103                     if (this.verbose) {
104                         writefln!"Action(%d, %d, %f)"(r, c, score);
105                     }
106                     if (best.score.isNaN || score > best.score) {
107                         best = Action(r, c, score);
108                     }
109                 }
110             }
111         }
112         return best;
113     }
114 
115     pure @safe
116     Score search(bool isMine = false)(in Board board, size_t depth, Score alpha, Score beta) {
117         // import std.algorithm : max, min;
118         import mir.math.common : fmax, fmin;
119 
120         if (board.finished || depth == 0) {
121             return cast(double) board.score(this.isBlack);
122         }
123 
124         /// pass
125         static if (isMine) {
126             immutable color = this.isBlack;
127         } else {
128             immutable color = !this.isBlack;
129         }
130         if (board.pass(color)) {
131             return search!(!isMine)(board, depth-1, alpha, beta);
132         }
133 
134         foreach (r; 0 .. board.length!0) {
135             foreach (c; 0 .. board.length!1) {
136                 immutable b = board.put(color, r, c);
137                 if (b.valid) {
138                     immutable score = search!(!isMine)(b, depth - 1, alpha, beta); // recur
139                     static if (isMine) {
140                         alpha = fmax(alpha, score);
141                     } else {
142                         beta = fmin(beta, score);
143                     }
144                     if (alpha >= beta) { // cut
145                         return isMine ? alpha : beta;
146                     }
147                 }
148             }
149         }
150         static if (isMine) {
151             return alpha;
152         } else {
153             return beta;
154         }
155     }
156 }
157 
158 unittest {
159     auto env0 = reset(4, 4); // 4x4 is known to result that second player won +8 points
160     auto agentB = AlphaBetaAgent(true); // as a first player
161     auto agentW = AlphaBetaAgent(false); // as a first player
162 
163     auto action0 = agentB.select(env0, 11);
164     assert(action0.score == -8.0);
165     auto env1 = env0.put(agentB.isBlack, action0.row, action0.col);
166 
167     auto action1 = agentW.select(env1, 11);
168     assert(action1.score == 8.0);
169     auto env2 = env1.put(agentW.isBlack, action1.row, action1.col);
170 
171     auto action2 = agentB.select(env2, 11);
172     assert(action2.score == -8.0);
173     auto env3 = env2.put(agentB.isBlack, action2.row, action2.col);
174 }