gapoera.agents
🤖 AI Agents
API AI Agents dapat digunakan sebagai lawan tanding pemain. Saat ini terdapat tiga jenis agent yang dapat digunakan:
- Random agent: bergerak secara acak mengambil salah satu aksi yang tersedia
- Greedy agent: bergerak dengan mengambil aksi yang paling menguntungkan saat itu
- Alpha-Beta pruning agent: bergerak menggunakan algoritma alpha-beta pruning
View Source
""" # 🤖 AI Agents API AI Agents dapat digunakan sebagai lawan tanding pemain. Saat ini terdapat tiga jenis agent yang dapat digunakan: - **Random agent**: bergerak secara acak mengambil salah satu aksi yang tersedia - **Greedy agent**: bergerak dengan mengambil aksi yang paling menguntungkan saat itu - **Alpha-Beta pruning agent**: bergerak menggunakan algoritma alpha-beta pruning --- """ # import all object in this package from .random_agent import * from .greedy_agent import * from .abp_agent import * # mancala agents from .mancala_agents import * __all__ = ['RandomAgent', 'BasicGreedyAgent', 'BasicABPruning', 'GreedyMancala']
View Source
class RandomAgent(object): """ Random agent documentation """ def __init__(self, c, seed=None): if seed: self.seeding(seed) self.color=c self.name="RANDOM AGENTS" def act(self, env): valid_act = env.valid_act(self.color) chosen_move = valid_act[np.random.choice(len(valid_act))] return chosen_move def seeding(self, seed): np.random.seed(seed)
Random agent documentation
View Source
def __init__(self, c, seed=None): if seed: self.seeding(seed) self.color=c self.name="RANDOM AGENTS"
View Source
def act(self, env): valid_act = env.valid_act(self.color) chosen_move = valid_act[np.random.choice(len(valid_act))] return chosen_move
View Source
def seeding(self, seed): np.random.seed(seed)
View Source
class BasicGreedyAgent(object): """ Greedy agent documentation """ def __init__(self, c, eps=0, seed=None): if seed: self.seeding(seed) self.color=c self.eps=eps self.name="BASIC GREEDY AGENTS" def act(self, env): valid_act = env.valid_act(self.color) if np.random.rand() < self.eps: # epsilon greedy movement chosen_move = valid_act[np.random.choice(len(valid_act))] return chosen_move best_r = -1000000 if len(valid_act)>0: best_a = valid_act[0] else: raise RuntimeError("No Valid Action") for a in valid_act: env.simulate_start() r = env.step(self.color, a)["reward"] if r >= best_r: best_r = r best_a = a env.simulate_stop() return best_a def seeding(self, seed): np.random.seed(seed)
Greedy agent documentation
View Source
def __init__(self, c, eps=0, seed=None): if seed: self.seeding(seed) self.color=c self.eps=eps self.name="BASIC GREEDY AGENTS"
View Source
def act(self, env): valid_act = env.valid_act(self.color) if np.random.rand() < self.eps: # epsilon greedy movement chosen_move = valid_act[np.random.choice(len(valid_act))] return chosen_move best_r = -1000000 if len(valid_act)>0: best_a = valid_act[0] else: raise RuntimeError("No Valid Action") for a in valid_act: env.simulate_start() r = env.step(self.color, a)["reward"] if r >= best_r: best_r = r best_a = a env.simulate_stop() return best_a
View Source
def seeding(self, seed): np.random.seed(seed)
View Source
class BasicABPruning(object): """ Alpha-beta pruning documentation """ def __init__(self, c, eps=0, depth=3, seed=None): if seed: self.seeding(seed) self.color=c self.eps=eps self.depth=depth self.name="BASIC ALPHA-BETA PRUNING AGENTS" self._INF = 10000000 def act(self, env): valid_act = env.valid_act(self.color) if np.random.rand() < self.eps: # epsilon greedy movement chosen_move = valid_act[np.random.choice(len(valid_act))] return chosen_move best_a = None _, best_a = self.minimax(env, 0, True, -self._INF, self._INF) return best_a def minimax(self, env, depth, is_max, alpha, beta): if depth == self.depth: return 0, None if is_max: player = self.color valid_act = env.valid_act(player) best_r = -self._INF best_a = None for a in valid_act: env.simulate_start() r = env.step(self.color, a)["reward"] if env.current_player()==player: tmp_r, _ = self.minimax(env, depth+1, True, alpha, beta) else: tmp_r, _ = self.minimax(env, depth+1, False, alpha, beta) if tmp_r + r > best_r: best_r = tmp_r + r best_a = a alpha = max(alpha, best_r) env.simulate_stop() if beta <= alpha: break return best_r, best_a else: player = 1-self.color valid_act = env.valid_act(player) best_r = self._INF best_a = None for a in valid_act: env.simulate_start() r = env.step(self.color, a)["reward"] r *= -1 if env.current_player()==player: tmp_r, _ = self.minimax(env, depth+1, False, alpha, beta) else: tmp_r, _ = self.minimax(env, depth+1, True, alpha, beta) if tmp_r + r < best_r: best_r = tmp_r + r best_a = a beta = min(beta, best_r) env.simulate_stop() if beta <= alpha: break return best_r, best_a def seeding(self, seed): np.random.seed(seed)
Alpha-beta pruning documentation
View Source
def __init__(self, c, eps=0, depth=3, seed=None): if seed: self.seeding(seed) self.color=c self.eps=eps self.depth=depth self.name="BASIC ALPHA-BETA PRUNING AGENTS" self._INF = 10000000
View Source
def act(self, env): valid_act = env.valid_act(self.color) if np.random.rand() < self.eps: # epsilon greedy movement chosen_move = valid_act[np.random.choice(len(valid_act))] return chosen_move best_a = None _, best_a = self.minimax(env, 0, True, -self._INF, self._INF) return best_a
View Source
def minimax(self, env, depth, is_max, alpha, beta): if depth == self.depth: return 0, None if is_max: player = self.color valid_act = env.valid_act(player) best_r = -self._INF best_a = None for a in valid_act: env.simulate_start() r = env.step(self.color, a)["reward"] if env.current_player()==player: tmp_r, _ = self.minimax(env, depth+1, True, alpha, beta) else: tmp_r, _ = self.minimax(env, depth+1, False, alpha, beta) if tmp_r + r > best_r: best_r = tmp_r + r best_a = a alpha = max(alpha, best_r) env.simulate_stop() if beta <= alpha: break return best_r, best_a else: player = 1-self.color valid_act = env.valid_act(player) best_r = self._INF best_a = None for a in valid_act: env.simulate_start() r = env.step(self.color, a)["reward"] r *= -1 if env.current_player()==player: tmp_r, _ = self.minimax(env, depth+1, False, alpha, beta) else: tmp_r, _ = self.minimax(env, depth+1, True, alpha, beta) if tmp_r + r < best_r: best_r = tmp_r + r best_a = a beta = min(beta, best_r) env.simulate_stop() if beta <= alpha: break return best_r, best_a
View Source
def seeding(self, seed): np.random.seed(seed)
View Source
class GreedyMancala(object): """ Mancala agent documentation """ def __init__(self, c, eps=0, type=1, seed=None): if seed: self.seeding(seed) self.color=c self.eps=eps self.name="GREEDY MANCALA" self.type=type def act(self, env): valid_act = env.valid_act(self.color) if np.random.rand() < self.eps: # epsilon greedy movement chosen_move = valid_act[np.random.choice(len(valid_act))] return chosen_move best_r = -1000000 if len(valid_act)>0: best_a = valid_act[0] else: raise RuntimeError("No Valid Action") best_a = [] for a in valid_act: env.simulate_start() r = env.step(self.color, a)["reward"] if self.type==1: if r >= best_r: # tipe 1 akan ambil dari poin terbanyak yang bisa diperoleh if r==best_r: best_a.append(a) else: best_a = [a] best_r = r else: if env.current_player == self.color: best_a.append(a) # tipe 2 akan ambil dari jumlah turn terbanyak env.simulate_stop() if self.type==1: return int(np.random.choice(best_a)) else: if len(best_a)>0: return int(max(best_a)) else: return int(max(valid_act)) def seeding(self, seed): np.random.seed(seed)
Mancala agent documentation
View Source
def __init__(self, c, eps=0, type=1, seed=None): if seed: self.seeding(seed) self.color=c self.eps=eps self.name="GREEDY MANCALA" self.type=type
View Source
def act(self, env): valid_act = env.valid_act(self.color) if np.random.rand() < self.eps: # epsilon greedy movement chosen_move = valid_act[np.random.choice(len(valid_act))] return chosen_move best_r = -1000000 if len(valid_act)>0: best_a = valid_act[0] else: raise RuntimeError("No Valid Action") best_a = [] for a in valid_act: env.simulate_start() r = env.step(self.color, a)["reward"] if self.type==1: if r >= best_r: # tipe 1 akan ambil dari poin terbanyak yang bisa diperoleh if r==best_r: best_a.append(a) else: best_a = [a] best_r = r else: if env.current_player == self.color: best_a.append(a) # tipe 2 akan ambil dari jumlah turn terbanyak env.simulate_stop() if self.type==1: return int(np.random.choice(best_a)) else: if len(best_a)>0: return int(max(best_a)) else: return int(max(valid_act))
View Source
def seeding(self, seed): np.random.seed(seed)