1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
use crate::game::{Base, Game};
use crate::policies::MultiplayerPolicy;

use async_trait::async_trait;
use std::collections::HashMap;
use std::fmt::Debug;
use std::iter::FromIterator;
use std::marker::PhantomData;

/// MuZero policy.
pub mod muz;
/// PUCT policy.
pub mod puct;
/// RAVE policy.
pub mod rave;
/// UCT policy.
pub mod uct;

pub trait MCTSGame = Game + Clone;
/* ABSTRACT MCTS */

use std::sync::RwLock;
use std::sync::{Arc, Weak};

/// Link from child node to parent node.
///
/// Represented as a weak reference.
pub type MCTSNodeParent<G, MCTS> = Option<(Weak<RwLock<MCTSTreeNode<G, MCTS>>>, <G as Base>::Move)>;
/// Strong reference to an MCTS tree.
pub type MCTSNodeChild<G, MCTS> = Arc<RwLock<MCTSTreeNode<G, MCTS>>>;

/// MCTS tree node.
#[derive(Clone)]
pub struct MCTSTreeNode<G, MCTS>
where
    G: MCTSGame,
    MCTS: BaseMCTSPolicy<G>,
{
    /// Node parent
    pub parent: MCTSNodeParent<G, MCTS>,
    /// List of explored children
    pub moves: HashMap<G::Move, MCTSNodeChild<G, MCTS>>,
    /// Node information
    pub info: MCTSNode<G, MCTS>,
}

impl<G, MCTS> Debug for MCTSTreeNode<G, MCTS>
where
    G: MCTSGame,
    MCTS: BaseMCTSPolicy<G>,
{
    fn fmt(&self, fmt: &mut std::fmt::Formatter<'_>) -> std::result::Result<(), std::fmt::Error> {
        writeln!(fmt, "{:?} ===> {:?}", self.moves, self.info)
    }
}

/// Node information
#[derive(Clone)]
pub struct MCTSNode<G: MCTSGame, MCTS: BaseMCTSPolicy<G>> {
    /// Node state.
    pub state: G,
    /// Reward when arriving in this node state, from parent.
    pub reward: f32,
    /// Additional node statistics, policy-specific.
    pub node: MCTS::NodeInfo,
    /// Additional moves statistics, policy-specific.
    pub moves: HashMap<G::Move, MCTS::MoveInfo>,
}

impl<G, MCTS> Debug for MCTSNode<G, MCTS>
where
    G: MCTSGame,
    MCTS: BaseMCTSPolicy<G>,
{
    fn fmt(&self, fmt: &mut std::fmt::Formatter<'_>) -> std::result::Result<(), std::fmt::Error> {
        write!(fmt, "NODE: {:?}|| MOVES:{:?}", self.node, self.moves)
    }
}

/// Interface used to write a policy as an MCTS policy.
#[async_trait]
pub trait BaseMCTSPolicy<G: MCTSGame>: Sized {
    /// Additional node statistics.
    type NodeInfo: Debug + Clone + Copy + Send + Sync;
    /// Additional move statistics.
    type MoveInfo: Debug + Clone + Copy + Send + Sync;
    /// Informations generated by a playout.
    type PlayoutInfo: Send + Sync;

    /// Get value associated to a node.
    ///
    /// This value should be relative to the current player,
    /// it will be maximized by the selection process.
    fn get_value(
        &self,
        board: &G,
        action: &G::Move,
        node_info: &Self::NodeInfo,
        move_info: &Self::MoveInfo,
        exploration: bool,
    ) -> f32;

    /// Default node statistics for a given state.
    fn default_node(&self, board: &G) -> Self::NodeInfo;

    /// Default move statistics for a given state.
    fn default_move(&self, board: &G, action: &G::Move) -> Self::MoveInfo;

    /// Backpropagate playout information.
    ///
    /// # Params
    /// - `leaf`: the newly created node by expansion, it can
    /// be used to propagate playout information.
    /// - `history`: the list of selected moves until leaf.
    /// - `playout`: playout information generated by `simulate`.
    fn backpropagate(
        &mut self,
        leaf: MCTSNodeChild<G, Self>,
        history: &[G::Move],
        playout: Self::PlayoutInfo,
    );

    /// Generate playout information starting from board.
    async fn simulate(&self, board: &G) -> Self::PlayoutInfo;
}

use float_ord::FloatOrd;

/// Wrapper for MCTS policy.
pub struct WithMCTSPolicy<G, MCTS>
where
    G: MCTSGame,
    MCTS: BaseMCTSPolicy<G>,
{
    /// Structure that handle MCTS business logic.
    pub base_mcts: MCTS,
    N_PLAYOUTS: usize,
    /// Root node from the last exploration. Can be taken to gather exploration statistics.
    pub root: Option<MCTSNodeChild<G, MCTS>>,
    _g: std::marker::PhantomData<G>,
}

impl<G, MCTS> WithMCTSPolicy<G, MCTS>
where
    G: MCTSGame + Clone,
    MCTS: BaseMCTSPolicy<G>,
{
    fn select_move(&self, tree_node: &MCTSTreeNode<G, MCTS>, exploration: bool) -> G::Move {
        *tree_node
            .info
            .moves
            .iter()
            .map(|(action, move_info)| {
                (
                    action,
                    self.base_mcts.get_value(
                        &tree_node.info.state,
                        action,
                        &tree_node.info.node,
                        &move_info,
                        exploration,
                    ),
                )
            })
            .max_by_key(|x| FloatOrd(x.1))
            .unwrap()
            .0
    }

    fn select(&self, root: MCTSNodeChild<G, MCTS>) -> (Vec<G::Move>, MCTSNodeChild<G, MCTS>) {
        let mut history: Vec<G::Move> = Vec::new();

        //let mut tree_pos = Some(root);
        let mut last_node = root;

        loop {
            let last_node_clone = last_node.clone();
            let last_node_ref = last_node_clone.read().unwrap();
            if last_node_ref.info.state.is_finished() {
                /* we're at a leaf node. */
                return (history, last_node);
            } else {
                /* play next move */
                let a = self.select_move(&last_node_ref, true);
                history.push(a);

                let node_imm = last_node_ref.moves.get(&a);
                if let Some(node) = node_imm {
                    if node.read().unwrap().info.state.is_finished() {
                        return (history, last_node);
                    } else {
                        let node = last_node_ref.moves.get(&a).unwrap();
                        last_node = node.clone();
                    }
                } else {
                    return (history, last_node);
                }
            }
        }
    }

    async fn expand(
        &mut self,
        tree_node: MCTSNodeChild<G, MCTS>,
        action: &G::Move,
    ) -> MCTSNodeChild<G, MCTS> {
        let mut new_state = tree_node.read().unwrap().info.state.clone();
        let reward = new_state.play(action).await;

        let new_node = self.base_mcts.default_node(&new_state);

        let moves_info = HashMap::from_iter(
            new_state
                .possible_moves()
                .iter()
                .map(|m| (*m, self.base_mcts.default_move(&new_state, &m))),
        );

        tree_node.write().unwrap().moves.insert(
            *action,
            Arc::new(RwLock::new(MCTSTreeNode {
                parent: Some((Arc::downgrade(&tree_node), *action)),
                moves: HashMap::new(),
                info: MCTSNode {
                    reward,
                    moves: moves_info,
                    node: new_node,
                    state: new_state,
                },
            })),
        );
        tree_node.read().unwrap().moves.get(action).unwrap().clone()
    }

    async fn tree_search(&mut self, root: MCTSNodeChild<G, MCTS>) {
        /* SELECT */
        let (history, last_node) = self.select(root);
        /* EXPAND */
        let created_node = self.expand(last_node, history.last().unwrap()).await;
        /* SIMULATE */
        let state = created_node.read().unwrap().info.state.clone();
        let playout = self.base_mcts.simulate(&state).await;
        /* BACKUP */
        self.base_mcts
            .backpropagate(created_node, &history, playout);
    }

    ///
    /// Instanciate a new MCTS policy, given a BaseMCTS instance.
    ///
    pub fn new(p: MCTS, N_PLAYOUTS: usize) -> Self {
        WithMCTSPolicy {
            base_mcts: p,
            N_PLAYOUTS,
            root: None,
            _g: PhantomData,
        }
    }
}

#[async_trait]
impl<G, MCTS> MultiplayerPolicy<G> for WithMCTSPolicy<G, MCTS>
where
    G: MCTSGame,
    MCTS: BaseMCTSPolicy<G> + Sync + Send,
{
    async fn play(&mut self, board: &G) -> G::Move {
        let root = Arc::new(RwLock::new(MCTSTreeNode {
            parent: None,
            info: MCTSNode {
                reward: 0.,
                state: board.clone(),
                node: self.base_mcts.default_node(board),
                moves: HashMap::from_iter(
                    board
                        .possible_moves()
                        .iter()
                        .map(|m| (*m, self.base_mcts.default_move(board, m))),
                ),
            },
            moves: HashMap::new(),
        }));

        let playout = self.base_mcts.simulate(board).await;
        self.base_mcts.backpropagate(root.clone(), &[], playout);

        for _ in 0..self.N_PLAYOUTS {
            //println!("####> {} | {:?}", i, root);
            self.tree_search(root.clone()).await
        }

        let chosen_move = self.select_move(&root.read().unwrap(), false);
        self.root = Some(root);

        chosen_move
    }
}