Upload 3 files

Browse files

Files changed (3) hide show

hongik/board_ai.py +332 -0
hongik/engine_ai.py +571 -0
hongik/hongik_ai.py +358 -0

hongik/board_ai.py ADDED Viewed

	@@ -0,0 +1,332 @@

+# Implements the world of Go, defining the game board, its rules
+# (moves, liberties, scoring, etc.), and determining the final winner.
+#
+# Author: Gemini 2.5 Pro, Gemini 2.5 Flash
+import numpy as np
+from collections import deque
+import random
+class IllegalMoveError(ValueError):
+    """Exception class raised for an illegal move."""
+    pass
+class Board:
+    """
+    Represents the Go board and enforces game rules.
+    """
+    EMPTY, BLACK, WHITE, WALL = 0, 1, 2, 3
+    PASS_LOC = 0
+    def __init__(self, size):
+        """Initializes the board, sets up walls, and history."""
+        if isinstance(size, tuple):
+            self.x_size, self.y_size = size
+        else:
+            self.x_size = self.y_size = size
+        self.arrsize = (self.x_size + 1) * (self.y_size + 2) + 1
+        self.dy = self.x_size + 1
+        self.adj = [-self.dy, -1, 1, self.dy]
+        self.board = np.zeros(shape=(self.arrsize), dtype=np.int8)
+        self.pla = Board.BLACK
+        self.prisoners = {Board.BLACK: 0, Board.WHITE: 0}
+        self.ko_points = set()
+        self.consecutive_passes = 0
+        self.turns = 0
+        self.ko_recapture_counts = {}
+        self.position_history = set()
+        self.position_history.add(self.board.tobytes())
+        for i in range(-1, self.x_size + 1):
+            self.board[self.loc(i, -1)] = Board.WALL
+            self.board[self.loc(i, self.y_size)] = Board.WALL
+        for i in range(-1, self.y_size + 1):
+            self.board[self.loc(-1, i)] = Board.WALL
+            self.board[self.loc(self.x_size, i)] = Board.WALL
+    def copy(self):
+        """Creates a deep copy of the current board state."""
+        new_board = Board((self.x_size, self.y_size))
+        new_board.board = np.copy(self.board)
+        new_board.pla = self.pla
+        new_board.prisoners = self.prisoners.copy()
+        new_board.ko_points = self.ko_points.copy()
+        new_board.consecutive_passes = self.consecutive_passes
+        new_board.turns = self.turns
+        new_board.ko_recapture_counts = self.ko_recapture_counts.copy()
+        new_board.position_history = self.position_history.copy()
+        return new_board
+    @staticmethod
+    def get_opp(player):
+        """Gets the opponent of the given player."""
+        return 3 - player
+    def loc(self, x, y):
+        """Converts (x, y) coordinates to a 1D array location."""
+        return (x + 1) + self.dy * (y + 1)
+    def loc_to_coord(self, loc):
+        """Converts a 1D array location back to (x, y) coordinates."""
+        return (loc % self.dy) - 1, (loc // self.dy) - 1
+    def is_on_board(self, loc):
+        """Checks if a location is within the board boundaries (not a wall)."""
+        return self.board[loc] != Board.WALL
+    def _get_group_info(self, loc):
+        """Scans and returns the stones and liberties of a group at a specific location."""
+        if not self.is_on_board(loc) or self.board[loc] == self.EMPTY:
+            return None, None
+        player = self.board[loc]
+        group_stones, liberties = set(), set()
+        q, visited = deque([loc]), {loc}
+        while q:
+            current_loc = q.popleft()
+            group_stones.add(current_loc)
+            for dloc in self.adj:
+                adj_loc = current_loc + dloc
+                if self.is_on_board(adj_loc):
+                    adj_stone = self.board[adj_loc]
+                    if adj_stone == self.EMPTY:
+                        liberties.add(adj_loc)
+                    elif adj_stone == player and adj_loc not in visited:
+                        visited.add(adj_loc)
+                        q.append(adj_loc)
+        return group_stones, liberties
+    def would_be_legal(self, player, loc):
+        """Checks if a move would be legal without actually playing it."""
+        if loc == self.PASS_LOC: return True
+        if not self.is_on_board(loc) or self.board[loc] != self.EMPTY or loc in self.ko_points:
+            return False
+        temp_board = self.copy()
+        temp_board.board[loc] = player
+        opponent = self.get_opp(player)
+        captured_any = False
+        captured_stones = set()
+        for dloc in temp_board.adj:
+            adj_loc = loc + dloc
+            if temp_board.board[adj_loc] == opponent:
+                group, libs = temp_board._get_group_info(adj_loc)
+                if not libs:
+                    captured_any = True
+                    captured_stones.update(group)
+        if captured_any:
+            for captured_loc in captured_stones:
+                temp_board.board[captured_loc] = self.EMPTY
+        next_board_hash = temp_board.board.tobytes()
+        if next_board_hash in self.position_history:
+            return False
+        if captured_any:
+            return True
+        _, my_libs = temp_board._get_group_info(loc)
+        return bool(my_libs)
+    def get_features(self):
+        """Generates a feature tensor for the neural network input."""
+        features = np.zeros((self.y_size, self.x_size, 3), dtype=np.float32)
+        current_player = self.pla
+        opponent_player = self.get_opp(self.pla)
+        for y in range(self.y_size):
+            for x in range(self.x_size):
+                loc = self.loc(x, y)
+                stone = self.board[loc]
+                if stone == current_player:
+                    features[y, x, 0] = 1
+                elif stone == opponent_player:
+                    features[y, x, 1] = 1
+        if self.pla == self.WHITE:
+            features[:, :, 2] = 1.0
+        return features
+    def is_game_over(self):
+        """Checks if the game is over (due to consecutive passes)."""
+        return self.consecutive_passes >= 2
+    def play(self, player, loc):
+        """Plays a move on the board, captures stones, and updates the game state."""
+        if not self.would_be_legal(player, loc):
+            raise IllegalMoveError("This move is against the rules.")
+        self.ko_points.clear()
+        if loc == self.PASS_LOC:
+            self.consecutive_passes += 1
+        else:
+            self.consecutive_passes = 0
+            self.board[loc] = player
+            opponent = self.get_opp(player)
+            captured_stones = set()
+            for dloc in self.adj:
+                adj_loc = loc + dloc
+                if self.board[adj_loc] == opponent:
+                    group, libs = self._get_group_info(adj_loc)
+                    if not libs:
+                        captured_stones.update(group)
+            if captured_stones:
+                self.prisoners[player] += len(captured_stones)
+                for captured_loc in captured_stones:
+                    self.board[captured_loc] = self.EMPTY
+            my_group, my_libs = self._get_group_info(loc)
+            if len(captured_stones) == 1 and len(my_group) == 1 and len(my_libs) == 1:
+                ko_loc = captured_stones.pop()
+                self.ko_points.add(ko_loc)
+            board_hash = self.board.tobytes()
+            self.position_history.add(board_hash)
+        self.pla = self.get_opp(player)
+        self.turns += 1
+    def _is_group_alive_statically(self, group_stones: set, board_state: np.ndarray) -> bool:
+        """Statically analyzes if a group is alive by checking for two eyes."""
+        if not group_stones: return False
+        owner_player = board_state[next(iter(group_stones))]
+        eye_locations = set()
+        for stone_loc in group_stones:
+            for dloc in self.adj:
+                adj_loc = stone_loc + dloc
+                if board_state[adj_loc] == self.EMPTY: eye_locations.add(adj_loc)
+        real_eye_count, visited_eye_locs = 0, set()
+        for eye_loc in eye_locations:
+            if eye_loc in visited_eye_locs: continue
+            eye_region, q, is_real_eye = set(), deque([eye_loc]), True
+            visited_eye_locs.add(eye_loc); eye_region.add(eye_loc)
+            while q:
+                current_loc = q.popleft()
+                for dloc in self.adj:
+                    adj_loc = current_loc + dloc
+                    if self.is_on_board(adj_loc):
+                        if board_state[adj_loc] == self.get_opp(owner_player):
+                            is_real_eye = False; break
+                        elif board_state[adj_loc] == self.EMPTY and adj_loc not in visited_eye_locs:
+                            visited_eye_locs.add(adj_loc); eye_region.add(adj_loc); q.append(adj_loc)
+                if not is_real_eye: break
+            if is_real_eye:
+                eye_size = len(eye_region)
+                if eye_size >= 6: real_eye_count += 2
+                else: real_eye_count += 1
+            if real_eye_count >= 2: return True
+        return real_eye_count >= 2
+    def _is_group_alive_by_rollout(self, group_stones_initial: set) -> bool:
+        """Determines if a group is alive via Monte Carlo rollouts for ambiguous cases."""
+        NUM_ROLLOUTS = 20
+        MAX_ROLLOUT_DEPTH = self.x_size * self.y_size // 2
+        owner_player = self.board[next(iter(group_stones_initial))]
+        attacker = self.get_opp(owner_player)
+        deaths = 0
+        for _ in range(NUM_ROLLOUTS):
+            rollout_board = self.copy()
+            rollout_board.pla = attacker
+            for _ in range(MAX_ROLLOUT_DEPTH):
+                first_stone_loc = next(iter(group_stones_initial))
+                if rollout_board.board[first_stone_loc] != owner_player:
+                    deaths += 1
+                    break
+                legal_moves = [loc for loc in range(1, self.arrsize) if rollout_board.board[loc] == self.EMPTY]
+                random.shuffle(legal_moves)
+                move_made = False
+                for move in legal_moves:
+                    if rollout_board.would_be_legal(rollout_board.pla, move):
+                        rollout_board.play(rollout_board.pla, move)
+                        move_made = True
+                        break
+                if not move_made:
+                    rollout_board.play(rollout_board.pla, self.PASS_LOC)
+                if rollout_board.is_game_over():
+                    break
+        death_rate = deaths / NUM_ROLLOUTS
+        print(f"[Life/Death Log] Group survival probability: {1-death_rate:.0%}")
+        return death_rate < 0.5
+    def get_winner(self, komi=6.5):
+        """Calculates the final score and determines the winner, handling life and death."""
+        temp_board_state = np.copy(self.board)
+        total_captives = self.prisoners.copy()
+        all_groups = self._find_all_groups(temp_board_state)
+        for player, groups in all_groups.items():
+            for group_stones in groups:
+                is_alive = self._is_group_alive_statically(group_stones, temp_board_state)
+                if not is_alive:
+                    is_alive = self._is_group_alive_by_rollout(group_stones)
+                if not is_alive:
+                    total_captives[self.get_opp(player)] += len(group_stones)
+                    for stone_loc in group_stones:
+                        temp_board_state[stone_loc] = self.EMPTY
+        final_board_with_territory = self._calculate_territory(temp_board_state)
+        black_territory = np.sum((final_board_with_territory == self.BLACK) & (temp_board_state == self.EMPTY))
+        white_territory = np.sum((final_board_with_territory == self.WHITE) & (temp_board_state == self.EMPTY))
+        black_score = black_territory + total_captives.get(self.BLACK, 0)
+        white_score = white_territory + total_captives.get(self.WHITE, 0) + komi
+        winner = self.BLACK if black_score > white_score else self.WHITE
+        return winner, black_score, white_score, total_captives
+    def _find_all_groups(self, board_state: np.ndarray) -> dict:
+        """Finds all stone groups on the board for a given board state."""
+        visited, all_groups = set(), {self.BLACK: [], self.WHITE: []}
+        for loc in range(self.arrsize):
+            if board_state[loc] in [self.BLACK, self.WHITE] and loc not in visited:
+                player, group_stones, q = board_state[loc], set(), deque([loc])
+                visited.add(loc); group_stones.add(loc)
+                while q:
+                    current_loc = q.popleft()
+                    for dloc in self.adj:
+                        adj_loc = current_loc + dloc
+                        if board_state[adj_loc] == player and adj_loc not in visited:
+                            visited.add(adj_loc); group_stones.add(adj_loc); q.append(adj_loc)
+                all_groups[player].append(group_stones)
+        return all_groups
+    def _calculate_territory(self, board_state: np.ndarray) -> np.ndarray:
+        """Calculates the territory for each player on a given board state."""
+        territory_map, visited = np.copy(board_state), set()
+        for loc in range(self.arrsize):
+            if territory_map[loc] == self.EMPTY and loc not in visited:
+                region_points, border_colors, q = set(), set(), deque([loc])
+                visited.add(loc); region_points.add(loc)
+                while q:
+                    current_loc = q.popleft()
+                    for dloc in self.adj:
+                        adj_loc = current_loc + dloc
+                        if board_state[adj_loc] in [self.BLACK, self.WHITE]: border_colors.add(board_state[adj_loc])
+                        elif board_state[adj_loc] == self.EMPTY and adj_loc not in visited:
+                            visited.add(adj_loc); region_points.add(adj_loc); q.append(adj_loc)
+                if len(border_colors) == 1:
+                    owner = border_colors.pop()
+                    for point in region_points: territory_map[point] = owner
+        return territory_map

hongik/engine_ai.py ADDED Viewed

	@@ -0,0 +1,571 @@

+# The engine file that serves as the AI's brain, responsible for training
+# the model through reinforcement learning and performing move analysis.
+#
+# Author: Gemini 2.5 Pro, Gemini 2.5 Flash
+import os
+os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
+import threading
+import tensorflow as tf
+tf.get_logger().setLevel('ERROR')
+import numpy as np
+import time
+import json
+import random
+import traceback
+from collections import deque
+import pickle
+import csv
+from huggingface_hub import hf_hub_download
+from hongik.hongik_ai import HongikAIPlayer,CNNTransformerHybrid
+from katrain.core.sgf_parser import Move
+from katrain.core.constants import *
+from kivy.clock import Clock
+from hongik.board_ai import Board, IllegalMoveError
+from katrain.core.game_node import GameNode
+from katrain.gui.theme import Theme
+class BaseEngine:
+    """Base class for KaTrain engines."""
+    def __init__(self, katrain, config):
+        self.katrain, self.config = katrain, config
+    def on_error(self, message, code=None, allow_popup=True):
+        print(f"ERROR: {message}", OUTPUT_ERROR)
+        if allow_popup and hasattr(self.katrain, "engine_recovery_popup"):
+            Clock.schedule_once(lambda dt: self.katrain("engine_recovery_popup", message, code))
+class HongikAIEngine(BaseEngine):
+    """
+    Main AI engine that manages the model, self-play, training, and analysis.
+    It orchestrates the entire reinforcement learning loop.
+    """
+    BOARD_SIZE, NUM_LAYERS, D_MODEL, NUM_HEADS, D_FF = 19, 7, 256, 8, 1024
+    SAVE_WEIGHTS_EVERY_STEPS, EVALUATION_EVERY_STEPS = 5, 20
+    REPLAY_BUFFER_SIZE, TRAINING_BATCH_SIZE = 200000, 32
+    CHECKPOINT_EVERY_GAMES = 10
+    RULES = {
+        "tromp-taylor": {"name": "Tromp-Taylor", "komi": 7.5, "scoring": "area"},
+        "korean": {"name": "korean", "komi": 6.5, "scoring": "territory"},
+        "chinese": {"name": "Chinese", "komi": 7.5, "scoring": "area"}
+    }
+    @staticmethod
+    def get_rules(ruleset: str):
+        """Returns the ruleset details for a given ruleset name."""
+        if not ruleset or ruleset.lower() not in HongikAIEngine.RULES:
+            ruleset = "korean"
+        return HongikAIEngine.RULES[ruleset.lower()]
+    def __init__(self, katrain, config):
+        """
+        Initializes the Hongik AI Engine. This involves setting up paths, loading
+        the neural network model and replay buffer, and preparing for training.
+        """
+        super().__init__(katrain, config)
+        print("Initializing Hongik AI Integrated Engine...", OUTPUT_DEBUG)
+        from appdirs import user_data_dir
+        APP_NAME = "HongikAI"
+        APP_AUTHOR = "NamyongPark"
+        self.BASE_PATH = user_data_dir(APP_NAME, APP_AUTHOR)
+        print(f"Data will be stored in: {self.BASE_PATH}")
+        self.REPLAY_BUFFER_PATH = os.path.join(self.BASE_PATH, "replay_buffer.pkl")
+        self.WEIGHTS_FILE_PATH = os.path.join(self.BASE_PATH, "hongik_ai_memory.weights.h5")
+        self.BEST_WEIGHTS_FILE_PATH = os.path.join(self.BASE_PATH, "hongik_ai_best.weights.h5")
+        self.CHECKPOINT_BUFFER_PATH = os.path.join(self.BASE_PATH, "replay_buffer_checkpoint.pkl")
+        self.CHECKPOINT_WEIGHTS_PATH = os.path.join(self.BASE_PATH, "hongik_ai_checkpoint.weights.h5")
+        self.TRAINING_LOG_PATH = os.path.join(self.BASE_PATH, "training_log.csv")
+        os.makedirs(self.BASE_PATH, exist_ok=True)
+        REPO_ID = "puco21/HongikAI"
+        files_to_download = [
+            "replay_buffer.pkl",
+            "hongik_ai_memory.weights.h5",
+            "hongik_ai_best.weights.h5"
+        ]
+        print("Checking for AI data files...")
+        for filename in files_to_download:
+            local_path = os.path.join(self.BASE_PATH, filename)
+            if not os.path.exists(local_path):
+                print(f"Downloading {filename} from Hugging Face Hub...")
+                try:
+                    hf_hub_download(
+                        repo_id=REPO_ID,
+                        filename=filename,
+                        local_dir=self.BASE_PATH,
+                        local_dir_use_symlinks=False
+                    )
+                    print(f"'{filename}' download complete.")
+                except Exception as e:
+                    print(f"Failed to download {filename}: {e}")
+        self.replay_buffer = deque(maxlen=self.REPLAY_BUFFER_SIZE)
+        self.load_replay_buffer(self.REPLAY_BUFFER_PATH)
+        self.hongik_model = CNNTransformerHybrid(self.NUM_LAYERS, self.D_MODEL, self.NUM_HEADS, self.D_FF, self.BOARD_SIZE)
+        _ = self.hongik_model(np.zeros((1, self.BOARD_SIZE, self.BOARD_SIZE, 3), dtype=np.float32))
+        load_path = self.CHECKPOINT_WEIGHTS_PATH if os.path.exists(self.CHECKPOINT_WEIGHTS_PATH) else (self.WEIGHTS_FILE_PATH if os.path.exists(self.WEIGHTS_FILE_PATH) else self.BEST_WEIGHTS_FILE_PATH)
+        if os.path.exists(load_path):
+            try:
+                self.hongik_model.load_weights(load_path)
+                print(f"Successfully loaded weights: {load_path}")
+            except Exception as e:
+                print(f"Failed to load weights (starting new training): {e}")
+        try:
+            max_visits = int(config.get("max_visits",150))
+        except (ValueError, TypeError):
+            print(f"Warning: Invalid max_visits value in config. Using default (150).")
+            max_visits = 150
+        self.hongik_player = HongikAIPlayer(self.hongik_model, n_simulations=max_visits)
+        self.optimizer = tf.keras.optimizers.Adam(learning_rate=0.0001, clipnorm=1.0)
+        self.policy_loss_fn = tf.keras.losses.CategoricalCrossentropy(from_logits=True)
+        self.value_loss_fn = tf.keras.losses.MeanSquaredError()
+        self.training_step_counter, self.game_history, self.self_play_active = 0, [], False
+        class MockProcess: poll = lambda self: None
+        self.katago_process = self.hongik_process = MockProcess()
+        self.sound_index = False
+        print("Hongik AI Engine ready!", OUTPUT_DEBUG)
+    def save_replay_buffer(self, path):
+        """Saves the current replay buffer to a specified file path using pickle."""
+        try:
+            with open(path, 'wb') as f:
+                pickle.dump(self.replay_buffer, f)
+            print(f"Successfully saved experience data ({len(self.replay_buffer)} items) to '{path}'.")
+        except Exception as e:
+            print(f"Error saving experience data: {e}")
+    def load_replay_buffer(self, path):
+        """Loads a replay buffer from a file, prioritizing a checkpoint file if it exists."""
+        load_path = self.CHECKPOINT_BUFFER_PATH if os.path.exists(self.CHECKPOINT_BUFFER_PATH) else path
+        if os.path.exists(load_path):
+            try:
+                with open(load_path, 'rb') as f:
+                    self.replay_buffer = pickle.load(f)
+                if self.replay_buffer.maxlen != self.REPLAY_BUFFER_SIZE:
+                    new_buffer = deque(maxlen=self.REPLAY_BUFFER_SIZE)
+                    new_buffer.extend(self.replay_buffer)
+                    self.replay_buffer = new_buffer
+                print(f"Successfully loaded experience data ({len(self.replay_buffer)} items) from '{load_path}'.")
+            except Exception as e:
+                print(f"Error loading experience data: {e}")
+    def _checkpoint_save(self):
+        """Saves a training checkpoint, including both the replay buffer and model weights."""
+        print(f"\n[{time.strftime('%H:%M:%S')}] Saving checkpoint...")
+        self.save_replay_buffer(self.CHECKPOINT_BUFFER_PATH)
+        self.hongik_model.save_weights(self.CHECKPOINT_WEIGHTS_PATH)
+        print("Checkpoint saved.")
+    def _log_training_progress(self, details: dict):
+        """Logs the progress of the training process to a CSV file for later analysis."""
+        try:
+            file_exists = os.path.isfile(self.TRAINING_LOG_PATH)
+            with open(self.TRAINING_LOG_PATH, 'a', newline='', encoding='utf-8') as f:
+                writer = csv.DictWriter(f, fieldnames=details.keys())
+                if not file_exists:
+                    writer.writeheader()
+                writer.writerow(details)
+        except Exception as e:
+            print(f"Error logging training progress: {e}")
+    def _node_to_board(self, node: GameNode) -> Board:
+        """Converts a KaTrain GameNode object to the internal Board representation used by the engine."""
+        board_snapshot = Board(node.board_size)
+        root_node = node.root
+        for player, prop_name in [(Board.BLACK, 'AB'), (Board.WHITE, 'AW')]:
+            setup_stones = root_node.properties.get(prop_name, [])
+            if setup_stones:
+                for coords in setup_stones:
+                    loc = board_snapshot.loc(coords[0], coords[1])
+                    if board_snapshot.board[loc] == Board.EMPTY:
+                        board_snapshot.play(player, loc)
+        current_player = Board.BLACK
+        for scene_node in node.nodes_from_root[1:]:
+            move = scene_node.move
+            if move:
+                loc = Board.PASS_LOC if move.is_pass else board_snapshot.loc(move.coords[0], move.coords[1])
+                board_snapshot.play(current_player, loc)
+                current_player = board_snapshot.pla
+        board_snapshot.pla = Board.BLACK if node.next_player == 'B' else Board.WHITE
+        return board_snapshot
+    def request_analysis(self, analysis_node: GameNode, callback: callable, **kwargs):
+        """
+        Requests an analysis of a specific board position. The analysis is run
+        in a separate thread to avoid blocking the GUI.
+        """
+        if not self.katrain.game: return
+        game_id = self.katrain.game.game_id
+        board = self._node_to_board(analysis_node)
+        threading.Thread(target=self._run_analysis, args=(game_id, board, analysis_node, callback), daemon=True).start()
+    def _run_analysis(self, game_id, board, analysis_node, callback):
+        """
+        The target function for the analysis thread. It runs MCTS and sends the
+        formatted results back to the GUI via the provided callback.
+        """
+        try:
+            policy_logits, _ = self.hongik_player.model(np.expand_dims(board.get_features(), 0), training=False)
+            policy = tf.nn.softmax(policy_logits[0]).numpy()
+            _, root_node = self.hongik_player.get_best_move(board)
+            analysis_result = self._format_analysis_results("analysis", root_node, board, policy)
+            analysis_node.analysis = analysis_result
+            def guarded_callback(dt):
+                if self.katrain.game and self.katrain.game.game_id == game_id:
+                    callback(analysis_result, False)
+            Clock.schedule_once(guarded_callback)
+        except Exception as e:
+            print(f"Error during AI analysis execution: {e}")
+            traceback.print_exc()
+    def _format_analysis_results(self, query_id, root_node, board, policy=None): # <-- policy=None 인자 추가
+        """
+        MCTS 분석 데이터를 KaTrain GUI가 이해할 수 있는 딕셔너리 형식으로 변환합니다.
+        """
+        move_infos, moves_dict = [], {}
+        if root_node and root_node.children:
+            sorted_children = sorted(root_node.children.items(), key=lambda i: i[1].n_visits, reverse=True)
+            best_move_q = sorted_children[0][1].q_value if sorted_children else 0
+            for i, (action, child) in enumerate(sorted_children):
+                coords = board.loc_to_coord(self.hongik_player._action_to_loc(action, board))
+                move_gtp = Move(coords=coords).gtp()
+                current_player_winrate = (child.q_value + 1) / 2
+                display_winrate = 1.0 - current_player_winrate if board.pla == Board.WHITE else current_player_winrate
+                display_score = -child.q_value * 20 if board.pla == Board.WHITE else child.q_value * 20
+                points_lost = (best_move_q - child.q_value) * 20
+                move_data = {
+                    "move": move_gtp,
+                    "visits": child.n_visits,
+                    "winrate": display_winrate,
+                    "scoreLead": display_score,
+                    "pointsLost": points_lost,
+                    "pv": [move_gtp],
+                    "order": i
+                }
+                move_infos.append(move_data)
+                moves_dict[move_gtp] = move_data
+        current_player_winrate = (root_node.q_value + 1) / 2 if root_node else 0.5
+        display_winrate = 1.0 - current_player_winrate if board.pla == Board.WHITE else current_player_winrate
+        display_score = -root_node.q_value * 20 if (root_node and board.pla == Board.WHITE) else (root_node.q_value * 20 if root_node else 0.0)
+        root_info = {"winrate": display_winrate, "scoreLead": display_score, "visits": root_node.n_visits if root_node else 0}
+        return {"id": query_id, "moveInfos": move_infos, "moves": moves_dict, "root": root_info, "rootInfo": root_info, "policy": policy.tolist() if policy is not None else None, "completed": True}
+    def start_self_play_loop(self):
+        """Starts the main self-play loop, which continuously plays games to generate training data."""
+        print(f"\n===========================================\n[{time.strftime('%H:%M:%S')}] Starting new self-play game.\n===========================================")
+        self.stop_self_play_loop()
+        self.self_play_active = True
+        self.game_history = []
+        Clock.schedule_once(self._self_play_turn, 0.3)
+    def request_score(self, game_node, callback):
+        """Requests a score calculation for the current game node, run in a separate thread."""
+        threading.Thread(target=lambda: callback(self.get_score(game_node)), daemon=True).start()
+    def stop_self_play_loop(self):
+        """Stops the active self-play loop."""
+        if not self.self_play_active: return
+        self.self_play_active = False
+        Clock.unschedule(self._self_play_turn)
+    def _self_play_turn(self, dt=None):
+        """
+        Executes a single turn of a self-play game. It gets the best move from the
+        AI, plays it on the board, and stores the state for later training.
+        """
+        if not self.self_play_active: return
+        game = self.katrain.game
+        try:
+            current_node = game.current_node
+            board_snapshot = self._node_to_board(current_node)
+            if game.end_result or board_snapshot.is_game_over():
+                self._process_game_result(game)
+                return
+            move_loc, root_node = self.hongik_player.get_best_move(board_snapshot, is_self_play=True)
+            coords = None if move_loc == Board.PASS_LOC else board_snapshot.loc_to_coord(move_loc)
+            move_obj = Move(player='B' if board_snapshot.pla == Board.BLACK else 'W', coords=coords)
+            game.play(move_obj)
+            if not move_obj.is_pass and self.sound_index:
+                self.katrain.play_sound()
+                black_player_type = self.katrain.players_info['B'].player_type
+                white_player_type = self.katrain.players_info['W'].player_type
+                if black_player_type == PLAYER_AI and white_player_type == PLAYER_AI:
+                    if self.katrain.game.current_node.next_player == 'B':
+                        self.katrain.controls.players['B'].active = True
+                        self.katrain.controls.players['W'].active = False
+                    else:
+                        self.katrain.controls.players['B'].active = False
+                        self.katrain.controls.players['W'].active = True
+            policy = np.zeros(self.BOARD_SIZE**2 + 1, dtype=np.float32)
+            if root_node and root_node.children:
+                total_visits = sum(c.n_visits for c in root_node.children.values())
+                if total_visits > 0:
+                    for action, child in root_node.children.items(): policy[action] = child.n_visits / total_visits
+            blacks_win_rate = 0.5
+            if root_node:
+                player_q_value = root_node.q_value
+                player_win_rate = (player_q_value + 1) / 2
+                blacks_win_rate = player_win_rate if board_snapshot.pla == Board.BLACK else (1 - player_win_rate)
+            self.game_history.append([board_snapshot.get_features(), policy, board_snapshot.pla, blacks_win_rate])
+            self.katrain.update_gui(game.current_node)
+            self.sound_index = True
+            Clock.schedule_once(self._self_play_turn, 0.3)
+        except Exception as e:
+            print(f"Critical error during self-play: {e}"); traceback.print_exc(); self.stop_self_play_loop()
+    def _process_game_result(self, game: 'Game'):
+        """
+        Processes the result of a finished game. It requests a final score and
+        then triggers the callback to handle training data generation.
+        """
+        try:
+            self.katrain.controls.set_status("Scoring...", STATUS_INFO)
+            self.katrain.board_gui.game_over_message = "Scoring..."
+            self.katrain.board_gui.game_is_over = True
+            self.request_score(game.current_node, self._on_score_calculated)
+        except Exception as e:
+            print(f"Error requesting score calculation: {e}")
+            self.katrain._do_start_hongik_selfplay()
+    def _on_score_calculated(self, score_details):
+        """
+        Callback function that handles the game result after scoring. It assigns rewards,
+        augments the data, adds it to the replay buffer, and triggers a training step.
+        """
+        try:
+            if not score_details:
+                print("Game ended but no result. Starting next game.")
+                return
+            game_num = self.training_step_counter + 1
+            winner_text = "Black" if score_details['winner'] == 'B' else "White"
+            b_score, w_score, diff = score_details['black_score'], score_details['white_score'], score_details['score']
+            final_message = f"{winner_text} wins by {abs(diff):.1f} points"
+            self.katrain.board_gui.game_over_message = final_message
+            print(f"\n==========================================\n[{time.strftime('%H:%M:%S')}]    Game #{game_num} Finished\n-----------------------------------------\n    Winner: {winner_text}\n    Margin: {abs(diff):.1f} points\n    Details: Black {b_score:.1f} vs White {w_score:.1f}\n--------------------------------------------")
+            winner = Board.BLACK if score_details['winner'] == 'B' else Board.WHITE
+            REVERSAL_THRESHOLD = 0.2
+            WIN_REWARD = 1.0
+            LOSS_REWARD = -1.0
+            BRILLIANT_MOVE_BONUS = 0.5
+            CONSOLATION_REWARD = 0.5
+            for i, (features, policy, player_turn, blacks_win_rate_after) in enumerate(self.game_history):
+                blacks_win_rate_before = self.game_history[i-1][3] if i > 0 else 0.5
+                if player_turn == Board.BLACK:
+                    win_rate_swing = blacks_win_rate_after - blacks_win_rate_before
+                else: # player_turn == Board.WHITE
+                    white_win_rate_before = 1 - blacks_win_rate_before
+                    white_win_rate_after = 1 - blacks_win_rate_after
+                    win_rate_swing = white_win_rate_after - white_win_rate_before
+                is_brilliant_move = win_rate_swing > REVERSAL_THRESHOLD
+                if player_turn == winner:
+                    reward = WIN_REWARD
+                    if is_brilliant_move:
+                        reward += BRILLIANT_MOVE_BONUS
+                else:
+                    reward = LOSS_REWARD
+                    if is_brilliant_move:
+                        reward = CONSOLATION_REWARD
+                for j in range(8):
+                    aug_features = self._augment_data(features, j, 'features')
+                    aug_policy = self._augment_data(policy, j, 'policy')
+                    self.replay_buffer.append([aug_features, aug_policy, reward])
+            self.training_step_counter += 1
+            loss = self._train_model() if len(self.replay_buffer) >= self.TRAINING_BATCH_SIZE else None
+            if loss is not None:
+                print(f"   Training complete! (Final loss: {loss:.4f})\n=======================================", OUTPUT_DEBUG)
+            log_data = {
+                'timestamp': time.strftime('%Y-%m-%d %H:%M:%S'),
+                'game_num': game_num,
+                'winner': score_details['winner'],
+                'score_diff': diff,
+                'total_moves': len(self.game_history),
+                'loss': f"{loss:.4f}" if loss else "N/A"
+            }
+            self._log_training_progress(log_data)
+            if self.training_step_counter % self.SAVE_WEIGHTS_EVERY_STEPS == 0:
+                self.hongik_model.save_weights(self.WEIGHTS_FILE_PATH)
+            if self.training_step_counter % self.CHECKPOINT_EVERY_GAMES == 0:
+                self._checkpoint_save()
+            if self.training_step_counter % self.EVALUATION_EVERY_STEPS == 0:
+                self._evaluate_model()
+        except Exception as e:
+            print(f"Error during post-game processing: {e}")
+            traceback.print_exc()
+        finally:
+            self.katrain._do_start_hongik_selfplay()
+    def _train_model(self):
+        """
+        Performs one training step. It samples a minibatch from the replay buffer
+        and uses it to update the neural network's weights.
+        """
+        if len(self.replay_buffer) < self.TRAINING_BATCH_SIZE: return None
+        total_loss, TRAIN_ITERATIONS = 0, 5
+        for _ in range(TRAIN_ITERATIONS):
+            minibatch = random.sample(self.replay_buffer, self.TRAINING_BATCH_SIZE)
+            features, policies, values = (np.array(e) for e in zip(*minibatch))
+            with tf.GradientTape() as tape:
+                pred_p, pred_v = self.hongik_model(features, training=True)
+                value_loss = self.value_loss_fn(values[:, None], pred_v)
+                policy_loss = self.policy_loss_fn(policies, pred_p)
+                loss = policy_loss + value_loss
+            self.optimizer.apply_gradients(zip(tape.gradient(loss, self.hongik_model.trainable_variables), self.hongik_model.trainable_variables))
+            total_loss += loss.numpy()
+        return total_loss / TRAIN_ITERATIONS
+    def _augment_data(self, data, index, data_type):
+        """
+        Augments the training data by applying 8 symmetries (rotations and flips)
+        to the board features and policy target.
+        """
+        if data_type == 'features':
+            augmented = data
+            if index & 1: augmented = np.fliplr(augmented)
+            if index & 2: augmented = np.flipud(augmented)
+            if index & 4: augmented = np.rot90(augmented, 1)
+            return augmented
+        elif data_type == 'policy':
+            policy_board = data[:-1].reshape(self.BOARD_SIZE, self.BOARD_SIZE)
+            augmented_board = policy_board
+            if index & 1: augmented_board = np.fliplr(augmented_board)
+            if index & 2: augmented_board = np.flipud(augmented_board)
+            if index & 4: augmented_board = np.rot90(augmented_board, 1)
+            return np.append(augmented_board.flatten(), data[-1])
+        return data
+    def get_score(self, game_node):
+        """Calculates the final score of a game using the board's internal scoring method."""
+        try:
+            board = self._node_to_board(game_node)
+            winner, black_score, white_score, _ = board.get_winner(self.katrain.game.komi)
+            score_diff = black_score - white_score
+            return {"winner": "B" if winner == Board.BLACK else "W", "score": score_diff, "black_score": black_score, "white_score": white_score}
+        except Exception as e:
+            print(f"Error during internal score calculation: {e}"); traceback.print_exc(); return None
+    def _game_turn(self):
+        """
+        Handles the AI's turn in a game against a human or another AI. It runs
+        in a separate thread to avoid blocking the GUI.
+        """
+        if self.self_play_active or self.katrain.game.end_result: return
+        next_player_info = self.katrain.players_info[self.katrain.game.current_node.next_player]
+        if next_player_info.player_type == PLAYER_AI:
+            def ai_move_thread():
+                try:
+                    board_snapshot = self._node_to_board(self.katrain.game.current_node)
+                    move_loc, _ = self.hongik_player.get_best_move(board_snapshot, is_self_play=False)
+                    coords = None if move_loc == Board.PASS_LOC else board_snapshot.loc_to_coord(move_loc)
+                    Clock.schedule_once(lambda dt: self.katrain._do_play(coords))
+                except Exception as e:
+                    print(f"\n--- Critical error during AI thinking (in thread) ---\n{traceback.format_exc()}\n---------------------------------------\n")
+            threading.Thread(target=ai_move_thread, daemon=True).start()
+    def _evaluate_model(self):
+        """
+        Periodically evaluates the currently training model against the best-known
+        'champion' model to measure progress and update the best weights if the
+        challenger is stronger.
+        """
+        print("\n--- [Championship Match Start] ---")
+        challenger_player = self.hongik_player
+        best_weights_path = self.BEST_WEIGHTS_FILE_PATH
+        if not os.path.exists(best_weights_path):
+            print("[Championship Match] Crowning the first champion!")
+            self.hongik_model.save_weights(best_weights_path)
+            return
+        champion_model = CNNTransformerHybrid(self.NUM_LAYERS, self.D_MODEL, self.NUM_HEADS, self.D_FF, self.BOARD_SIZE)
+        _ = champion_model(np.zeros((1, self.BOARD_SIZE, self.BOARD_SIZE, 3), dtype=np.float32))
+        champion_model.load_weights(best_weights_path)
+        champion_player = HongikAIPlayer(champion_model, int(self.config.get("max_visits", 150)))
+        EVAL_GAMES, challenger_wins = 5, 0
+        for i in range(EVAL_GAMES):
+            print(f"\n[Championship Match] Game {i+1} starting...")
+            board = Board(self.BOARD_SIZE)
+            players = {Board.BLACK: challenger_player, Board.WHITE: champion_player} if i % 2 == 0 else {Board.BLACK: champion_player, Board.WHITE: challenger_player}
+            while not board.is_game_over():
+                current_player_obj = players[board.pla]
+                move_loc, _ = current_player_obj.get_best_move(board)
+                board.play(board.pla, move_loc)
+            winner, _, _, _ = board.get_winner()
+            if (winner == Board.BLACK and i % 2 == 0) or (winner == Board.WHITE and i % 2 != 0):
+                challenger_wins += 1; print(f"[Championship Match] Game {i+1}: Challenger wins!")
+            else:
+                print(f"[Championship Match] Game {i+1}: Champion wins!")
+        print(f"\n--- [Championship Match End] ---\nFinal Score: Challenger {challenger_wins} wins / Champion {EVAL_GAMES - challenger_wins} wins")
+        if challenger_wins > EVAL_GAMES / 2:
+            print("A new champion is born! Updating 'best' weights.")
+            self.hongik_model.save_weights(best_weights_path)
+        else:
+            print("The champion defends the title. Keeping existing weights.")
+    def on_new_game(self):
+        """Called when a new game starts."""
+        pass
+    def start(self):
+        """Starts the engine."""
+        self.katrain.game_controls.set_player_selection()
+    def shutdown(self, finish=False):
+        """Shuts down the engine, saving progress and cleaning up checkpoint files."""
+        self.stop_self_play_loop()
+        self.save_replay_buffer(self.REPLAY_BUFFER_PATH)
+        try:
+            if os.path.exists(self.CHECKPOINT_BUFFER_PATH): os.remove(self.CHECKPOINT_BUFFER_PATH)
+            if os.path.exists(self.CHECKPOINT_WEIGHTS_PATH): os.remove(self.CHECKPOINT_WEIGHTS_PATH)
+        except OSError as e:
+            print(f"Error deleting checkpoint files: {e}")
+    def stop_pondering(self):
+        """Stops pondering."""
+        pass
+    def queries_remaining(self):
+        """Returns the number of remaining queries."""
+        return 0
+    def is_idle(self):
+        """Checks if the engine is idle (i.e., not in a self-play loop)."""
+        return not self.self_play_active

hongik/hongik_ai.py ADDED Viewed

	@@ -0,0 +1,358 @@

+# Implements the AI's 'brain', combining a CNN and Transformer for intuition,
+# and Monte Carlo Tree Search (MCTS) for rational deliberation.
+#
+# Author: 박남영,Gemini 2.5 Pro, Gemini 2.5 Flash
+import tensorflow as tf
+from tensorflow.keras import layers, Model
+import numpy as np
+from hongik.board_ai import Board, IllegalMoveError
+# ===================================================================
+# 트랜스포머 부품들
+# 이 부분은 우리가 이전에 함께 만들었던 트랜스포머의 핵심 부품들입니다.
+# 아빠의 설계 그대로 완벽하기에, 엄마는 손대지 않았어요.
+# ===================================================================
+def scaled_dot_product_attention(q, k, v, mask=None):
+    """
+    Calculates the attention scores, which is the core of the attention mechanism.
+    It determines how much focus to place on other parts of the input sequence.
+    """
+    matmul_qk = tf.matmul(q, k, transpose_b=True)
+    d_k = tf.cast(tf.shape(k)[-1], tf.float32)
+    scaled_attention_logits = matmul_qk / tf.math.sqrt(d_k)
+    if mask is not None:
+        scaled_attention_logits += (mask * -1e9)
+    attention_weights = tf.nn.softmax(scaled_attention_logits, axis=-1)
+    output = tf.matmul(attention_weights, v)
+    return output, attention_weights
+class MultiHeadAttention(layers.Layer):
+    """
+    Implements the Multi-Head Attention mechanism. This allows the model to jointly attend
+    to information from different representation subspaces at different positions,
+    which is more powerful than single-head attention.
+    """
+    def __init__(self, d_model, num_heads):
+        super(MultiHeadAttention, self).__init__()
+        self.num_heads = num_heads
+        self.d_model = d_model
+        assert d_model % self.num_heads == 0
+        self.depth = d_model // self.num_heads
+        self.wq = layers.Dense(d_model)
+        self.wk = layers.Dense(d_model)
+        self.wv = layers.Dense(d_model)
+        self.dense = layers.Dense(d_model)
+    def split_heads(self, x, batch_size):
+        """Splits the last dimension into (num_heads, depth)."""
+        seq_len = tf.shape(x)[1]
+        x = tf.reshape(x, (batch_size, seq_len, self.num_heads, self.depth))
+        return tf.transpose(x, perm=[0, 2, 1, 3])
+    def call(self, v, k, q, mask=None):
+        """Processes the input tensors through the multi-head attention mechanism."""
+        batch_size = tf.shape(q)[0]
+        q = self.wq(q); k = self.wk(k); v = self.wv(v)
+        q = self.split_heads(q, batch_size)
+        k = self.split_heads(k, batch_size)
+        v = self.split_heads(v, batch_size)
+        scaled_attention, _ = scaled_dot_product_attention(q, k, v, mask)
+        scaled_attention = tf.transpose(scaled_attention, perm=[0, 2, 1, 3])
+        concat_attention = tf.reshape(scaled_attention, (batch_size, -1, self.d_model))
+        output = self.dense(concat_attention)
+        return output
+class PositionWiseFeedForwardNetwork(layers.Layer):
+    """
+    Implements the Position-wise Feed-Forward Network. This is applied to each
+    position separately and identically. It consists of two linear transformations
+    with a ReLU activation in between.
+    """
+    def __init__(self, d_model, d_ff):
+        super(PositionWiseFeedForwardNetwork, self).__init__()
+        self.dense_1 = layers.Dense(d_ff, activation='relu')
+        self.dense_2 = layers.Dense(d_model)
+    def call(self, inputs):
+        return self.dense_2(self.dense_1(inputs))
+class EncoderLayer(layers.Layer):
+    """
+    Represents one layer of the Transformer encoder. It consists of a multi-head
+    attention mechanism followed by a position-wise feed-forward network.
+    Includes dropout and layer normalization.
+    """
+    def __init__(self, d_model, num_heads, d_ff, dropout_rate=0.1):
+        super(EncoderLayer, self).__init__()
+        self.mha = MultiHeadAttention(d_model=d_model, num_heads=num_heads)
+        self.ffn = PositionWiseFeedForwardNetwork(d_model=d_model, d_ff=d_ff)
+        self.layernorm1 = layers.LayerNormalization(epsilon=1e-6)
+        self.layernorm2 = layers.LayerNormalization(epsilon=1e-6)
+        self.dropout1 = layers.Dropout(dropout_rate)
+        self.dropout2 = layers.Dropout(dropout_rate)
+    def call(self, inputs, training, padding_mask=None):
+        attn_output = self.mha(inputs, inputs, inputs, padding_mask)
+        attn_output = self.dropout1(attn_output, training=training)
+        out1 = self.layernorm1(inputs + attn_output)
+        ffn_output = self.ffn(out1)
+        ffn_output = self.dropout2(ffn_output, training=training)
+        out2 = self.layernorm2(out1 + ffn_output)
+        return out2
+def get_positional_encoding(max_seq_len, d_model):
+    """
+    Generates positional encodings. Since the model contains no recurrence or
+    convolution, this is used to inject information about the relative or
+    absolute position of the tokens in the sequence.
+    """
+    angle_rads = (np.arange(max_seq_len)[:, np.newaxis] /
+                  np.power(10000, (2 * (np.arange(d_model)[np.newaxis, :] // 2)) / np.float32(d_model)))
+    angle_rads[:, 0::2] = np.sin(angle_rads[:, 0::2])
+    angle_rads[:, 1::2] = np.cos(angle_rads[:, 1::2])
+    pos_encoding = angle_rads[np.newaxis, ...]
+    return tf.cast(pos_encoding, dtype=tf.float32)
+# ===================================================================
+# 3. CNN + 트랜스포머 '직관' 엔진
+# ===================================================================
+class CNNTransformerHybrid(Model):
+    """
+    The 'Intuition' engine, combining a 'Scout' (CNN) and a 'Commander' (Transformer).
+    This version implements a lightweight head architecture using Squeeze-and-Excitation
+    and Convolutional Heads for parameter efficiency and performance.
+    """
+    def __init__(self, num_transformer_layers, d_model, num_heads, d_ff,
+                 board_size=19, cnn_filters=128, dropout_rate=0.1):
+        super(CNNTransformerHybrid, self).__init__()
+        self.board_size = board_size
+        self.d_model = d_model
+        self.cnn_conv1 = layers.Conv2D(cnn_filters, 3, padding='same', activation='relu')
+        self.cnn_bn1 = layers.BatchNormalization()
+        self.cnn_conv2 = layers.Conv2D(d_model, 1, padding='same', activation='relu')
+        self.cnn_bn2 = layers.BatchNormalization()
+        self.reshape_to_seq = layers.Reshape((board_size * board_size, d_model))
+        self.positional_encoding = get_positional_encoding(board_size * board_size, d_model)
+        self.dropout = layers.Dropout(dropout_rate)
+        self.transformer_encoder = [EncoderLayer(d_model, num_heads, d_ff, dropout_rate) for _ in range(num_transformer_layers)]
+        self.reshape_to_2d = layers.Reshape((board_size, board_size, d_model))
+        self.se_gap = layers.GlobalAveragePooling2D()
+        self.se_reshape = layers.Reshape((1, 1, d_model))
+        self.se_dense_1 = layers.Dense(d_model // 16, activation='relu', kernel_initializer='he_normal', use_bias=False)
+        self.se_dense_2 = layers.Dense(d_model, activation='sigmoid', kernel_initializer='he_normal', use_bias=False)
+        self.se_multiply = layers.Multiply()
+        self.policy_conv = layers.Conv2D(filters=2, kernel_size=1, padding='same', activation='relu')
+        self.policy_bn = layers.BatchNormalization()
+        self.policy_flatten = layers.Flatten()
+        self.policy_dense = layers.Dense(board_size * board_size + 1, name='policy_head')
+        self.value_conv = layers.Conv2D(filters=1, kernel_size=1, padding='same', activation='relu')
+        self.value_bn = layers.BatchNormalization()
+        self.value_flatten = layers.Flatten()
+        self.value_dense1 = layers.Dense(256, activation='relu')
+        self.value_dense2 = layers.Dense(1, activation='tanh', name='value_head')
+    @tf.function(jit_compile=False)
+    def call(self, inputs, training=False):
+        x = self.cnn_conv1(inputs)
+        x = self.cnn_bn1(x, training=training)
+        x = self.cnn_conv2(x)
+        cnn_output = self.cnn_bn2(x, training=training)
+        x = self.reshape_to_seq(cnn_output)
+        seq_len = tf.shape(x)[1]
+        x += self.positional_encoding[:, :seq_len, :]
+        x = self.dropout(x, training=training)
+        for i in range(len(self.transformer_encoder)):
+            x = self.transformer_encoder[i](x, training=training, padding_mask=None)
+        transformer_output = self.reshape_to_2d(x)
+        se = self.se_gap(transformer_output)
+        se = self.se_reshape(se)
+        se = self.se_dense_1(se)
+        se = self.se_dense_2(se)
+        se_output = self.se_multiply([transformer_output, se])
+        ph = self.policy_conv(se_output)
+        ph = self.policy_bn(ph, training=training)
+        ph = self.policy_flatten(ph)
+        policy_logits = self.policy_dense(ph)
+        vh = self.value_conv(se_output)
+        vh = self.value_bn(vh, training=training)
+        vh = self.value_flatten(vh)
+        vh = self.value_dense1(vh)
+        value = self.value_dense2(vh)
+        return policy_logits, value
+# ===================================================================
+# 4. MCTS '이성' 엔진
+# ===================================================================
+class MCTSNode:
+    """
+    Represents a single node in the Monte Carlo Tree Search. Each node stores
+    statistics like visit count (n_visits), total action value (q_value), and
+    prior probability (p_sa).
+    """
+    def __init__(self, parent=None, prior_p=1.0):
+        self.parent, self.children, self.n_visits, self.q_value, self.p_sa = parent, {}, 0, 0, prior_p
+        self.C_PUCT_BASE, self.C_PUCT_INIT = 19652, 1.25
+    def select(self, root_n_visits):
+        """
+        Selects the child node with the highest Upper Confidence Bound (UCB) score.
+        This balances exploration and exploitation during the search.
+        """
+        dynamic_c_puct = np.log((1 + root_n_visits + self.C_PUCT_BASE) / self.C_PUCT_BASE) + self.C_PUCT_INIT
+        return max(self.children.items(),
+                   key=lambda item: item[1].q_value + dynamic_c_puct * item[1].p_sa * np.sqrt(self.n_visits) / (1 + item[1].n_visits))
+    def expand(self, action_probs):
+        """
+        Expands a leaf node by creating new child nodes for all legal moves,
+        initializing their statistics from the prior probabilities given by the
+        neural network.
+        """
+        for action, prob in enumerate(action_probs):
+            if prob > 0 and action not in self.children: self.children[action] = MCTSNode(parent=self, prior_p=prob)
+    def update(self, leaf_value):
+        """
+        Updates the statistics of the node and its ancestors by backpropagating
+        the value obtained from the leaf node of a simulation.
+        """
+        if self.parent: self.parent.update(-leaf_value)
+        self.n_visits += 1; self.q_value += (leaf_value - self.q_value) / self.n_visits
+    def is_leaf(self):
+        """Checks if the node is a leaf node (i.e., has no children)."""
+        return len(self.children) == 0
+# ===================================================================
+# HongikAIPlayer 클래스
+# ===================================================================
+class HongikAIPlayer:
+    """
+    The 'Supreme Commander' that makes the final decision. It uses the neural
+    network's 'intuition' to guide the 'rational' search of the MCTS,
+    ultimately selecting the best move.
+    """
+    def __init__(self, cnn_transformer_model, n_simulations=100):
+        self.model, self.n_simulations, self.board_size = cnn_transformer_model, n_simulations, cnn_transformer_model.board_size
+    def _action_to_loc(self, action, board):
+        """Converts a policy network action index to a board location."""
+        return board.loc(action % self.board_size, action // self.board_size) if action < self.board_size**2 else Board.PASS_LOC
+    def get_best_move(self, board_state: Board, is_self_play=False):
+        """
+        Determines the best move for the current board state by running MCTS simulations.
+        It integrates the neural network's policy and value predictions to guide the search.
+        """
+        features = board_state.get_features()
+        policy_logits, value = self.model(np.expand_dims(features, 0), training=False)
+        intuition_probs = tf.nn.softmax(policy_logits[0]).numpy()
+        def is_filling_eye(loc, board):
+            if board.board[loc] != Board.EMPTY: return False
+            neighbor_colors = {board.board[loc + dloc] for dloc in board.adj if board.board[loc + dloc] != Board.WALL}
+            return len(neighbor_colors) == 1 and board.pla in neighbor_colors
+        for action, prob in enumerate(intuition_probs):
+            if prob > 0.001:
+                move_loc = self._action_to_loc(action, board_state)
+                if move_loc != Board.PASS_LOC and is_filling_eye(move_loc, board_state): intuition_probs[action] = 0
+        pass_action = self.board_size**2
+        pass_prob = intuition_probs[pass_action]
+        intuition_probs[pass_action] = 0
+        if board_state.turns < 100: pass_prob = 0
+        for action, prob in enumerate(intuition_probs):
+            if prob > 0 and not board_state.would_be_legal(board_state.pla, self._action_to_loc(action, board_state)): intuition_probs[action] = 0
+        total_prob = np.sum(intuition_probs)
+        if total_prob <= 1e-6: return self._action_to_loc(pass_action, board_state), MCTSNode()
+        intuition_probs /= total_prob
+        root = MCTSNode(); root.expand(intuition_probs)
+        for _ in range(self.n_simulations):
+            node, search_board = root, board_state.copy()
+            while not node.is_leaf():
+                action, node = node.select(root.n_visits)
+                move_loc = self._action_to_loc(action, search_board)
+                if not search_board.would_be_legal(search_board.pla, move_loc):
+                    node = None; break
+                try:
+                    search_board.play(search_board.pla, move_loc)
+                except IllegalMoveError:
+                    parent_node = node.parent
+                    if parent_node and action in parent_node.children:
+                        del parent_node.children[action]
+                    node = None
+                    break
+            if node is not None:
+                leaf_features = search_board.get_features()
+                _, leaf_value_tensor = self.model(np.expand_dims(leaf_features, 0), training=False)
+                leaf_value = leaf_value_tensor.numpy()[0][0]
+                node.update(leaf_value)
+        if not root.children: return self._action_to_loc(pass_action, board_state), root
+        PASS_THRESHOLD = -0.99
+        best_action_node = max(root.children.values(), key=lambda n: n.n_visits)
+        if best_action_node.q_value < PASS_THRESHOLD and pass_prob > 0:
+            return self._action_to_loc(pass_action, board_state), root
+        if board_state.turns < 30:
+            if is_self_play:
+                if not root.children:
+                    return self._action_to_loc(pass_action, board_state), root
+                child_actions = np.array(sorted(root.children.keys()))
+                visit_counts = np.array([root.children[action].n_visits for action in child_actions], dtype=np.float32)
+                temperature = 1.0
+                visit_counts_temp = visit_counts**(1/temperature)
+                if np.sum(visit_counts_temp) == 0:
+                    probs = np.ones(len(child_actions)) / len(child_actions)
+                else:
+                    probs = visit_counts_temp / np.sum(visit_counts_temp)
+                action = np.random.choice(child_actions, p=probs)
+                return self._action_to_loc(action, board_state), root
+        if not root.children:
+            return self._action_to_loc(pass_action, board_state), root
+        visit_counts = np.zeros_like(intuition_probs)
+        for action, node in root.children.items():
+            visit_counts[action] = node.n_visits
+        total_visits = np.sum(visit_counts)
+        reason_probs = visit_counts / total_visits if total_visits > 0 else intuition_probs
+        final_probs = (0.7 * intuition_probs) + (0.3 * reason_probs)
+        final_probs[pass_action] = -1
+        sorted_actions = np.argsort(final_probs)[::-1]
+        for action in sorted_actions:
+            move_loc = self._action_to_loc(action, board_state)
+            if board_state.would_be_legal(board_state.pla, move_loc):
+                return move_loc, root
+        return self._action_to_loc(pass_action, board_state), root