gapoera.agents

🤖 AI Agents

API AI Agents dapat digunakan sebagai lawan tanding pemain. Saat ini terdapat tiga jenis agent yang dapat digunakan:

  • Random agent: bergerak secara acak mengambil salah satu aksi yang tersedia
  • Greedy agent: bergerak dengan mengambil aksi yang paling menguntungkan saat itu
  • Alpha-Beta pruning agent: bergerak menggunakan algoritma alpha-beta pruning

View Source
"""
# 🤖 AI Agents

API AI Agents dapat digunakan sebagai lawan tanding pemain. 
Saat ini terdapat tiga jenis agent yang dapat digunakan:
- **Random agent**: bergerak secara acak mengambil salah satu aksi yang tersedia
- **Greedy agent**: bergerak dengan mengambil aksi yang paling menguntungkan saat itu
- **Alpha-Beta pruning agent**: bergerak menggunakan algoritma alpha-beta pruning

---
"""

# import all object in this package
from .random_agent import *
from .greedy_agent import *
from .abp_agent import *

# mancala agents
from .mancala_agents import *

__all__ = ['RandomAgent', 'BasicGreedyAgent', 'BasicABPruning', 'GreedyMancala']
#   class RandomAgent:
View Source
class RandomAgent(object):
    """
    Random agent documentation
    """
    def __init__(self, c, seed=None):
        if seed:
            self.seeding(seed)
        self.color=c
        self.name="RANDOM AGENTS"

    def act(self, env):
        valid_act = env.valid_act(self.color)
        chosen_move = valid_act[np.random.choice(len(valid_act))]
        return chosen_move

    def seeding(self, seed):
        np.random.seed(seed)

Random agent documentation

#   RandomAgent(c, seed=None)
View Source
    def __init__(self, c, seed=None):
        if seed:
            self.seeding(seed)
        self.color=c
        self.name="RANDOM AGENTS"
#   def act(self, env):
View Source
    def act(self, env):
        valid_act = env.valid_act(self.color)
        chosen_move = valid_act[np.random.choice(len(valid_act))]
        return chosen_move
#   def seeding(self, seed):
View Source
    def seeding(self, seed):
        np.random.seed(seed)
#   class BasicGreedyAgent:
View Source
class BasicGreedyAgent(object):
    """
    Greedy agent documentation
    """
    def __init__(self, c, eps=0, seed=None):
        if seed:
            self.seeding(seed)
        self.color=c
        self.eps=eps
        self.name="BASIC GREEDY AGENTS"

    def act(self, env):
        valid_act = env.valid_act(self.color)

        if np.random.rand() < self.eps: # epsilon greedy movement
            chosen_move = valid_act[np.random.choice(len(valid_act))]
            return chosen_move

        best_r = -1000000
        if len(valid_act)>0:
            best_a = valid_act[0]
        else:
            raise RuntimeError("No Valid Action")
            
        for a in valid_act:
            env.simulate_start()
            r = env.step(self.color, a)["reward"]
            if r >= best_r:
                best_r = r
                best_a = a
            env.simulate_stop()
        return best_a

    def seeding(self, seed):
        np.random.seed(seed)

Greedy agent documentation

#   BasicGreedyAgent(c, eps=0, seed=None)
View Source
    def __init__(self, c, eps=0, seed=None):
        if seed:
            self.seeding(seed)
        self.color=c
        self.eps=eps
        self.name="BASIC GREEDY AGENTS"
#   def act(self, env):
View Source
    def act(self, env):
        valid_act = env.valid_act(self.color)

        if np.random.rand() < self.eps: # epsilon greedy movement
            chosen_move = valid_act[np.random.choice(len(valid_act))]
            return chosen_move

        best_r = -1000000
        if len(valid_act)>0:
            best_a = valid_act[0]
        else:
            raise RuntimeError("No Valid Action")
            
        for a in valid_act:
            env.simulate_start()
            r = env.step(self.color, a)["reward"]
            if r >= best_r:
                best_r = r
                best_a = a
            env.simulate_stop()
        return best_a
#   def seeding(self, seed):
View Source
    def seeding(self, seed):
        np.random.seed(seed)
#   class BasicABPruning:
View Source
class BasicABPruning(object):
    """
    Alpha-beta pruning documentation
    """

    def __init__(self, c, eps=0, depth=3, seed=None):
        if seed:
            self.seeding(seed)
        self.color=c
        self.eps=eps
        self.depth=depth
        self.name="BASIC ALPHA-BETA PRUNING AGENTS"

        self._INF = 10000000

    def act(self, env):
        valid_act = env.valid_act(self.color)

        if np.random.rand() < self.eps: # epsilon greedy movement
            chosen_move = valid_act[np.random.choice(len(valid_act))]
            return chosen_move
        
        best_a = None
        _, best_a = self.minimax(env, 0, True, -self._INF, self._INF)       
        return best_a

    def minimax(self, env, depth, is_max, alpha, beta):

        if depth == self.depth:
            return 0, None

        if is_max:
            player = self.color
            valid_act = env.valid_act(player)
            best_r = -self._INF
            best_a = None
            for a in valid_act:
                env.simulate_start()
                r = env.step(self.color, a)["reward"]
                if env.current_player()==player:
                    tmp_r, _ = self.minimax(env, depth+1, True, alpha, beta)
                else:    
                    tmp_r, _ = self.minimax(env, depth+1, False, alpha, beta)
                if tmp_r + r > best_r:
                    best_r = tmp_r + r
                    best_a = a
                alpha = max(alpha, best_r)
                env.simulate_stop()
                if beta <= alpha:
                    break
            return best_r, best_a

        else:
            player = 1-self.color
            valid_act = env.valid_act(player)
            best_r = self._INF
            best_a = None
            for a in valid_act:
                env.simulate_start()
                r = env.step(self.color, a)["reward"]
                r *= -1
                if env.current_player()==player:
                    tmp_r, _ = self.minimax(env, depth+1, False, alpha, beta)
                else:    
                    tmp_r, _ = self.minimax(env, depth+1, True, alpha, beta)
                if tmp_r + r < best_r:
                    best_r = tmp_r + r
                    best_a = a
                beta = min(beta, best_r)
                env.simulate_stop()
                if beta <= alpha:
                    break
            return best_r, best_a



    def seeding(self, seed):
        np.random.seed(seed)

Alpha-beta pruning documentation

#   BasicABPruning(c, eps=0, depth=3, seed=None)
View Source
    def __init__(self, c, eps=0, depth=3, seed=None):
        if seed:
            self.seeding(seed)
        self.color=c
        self.eps=eps
        self.depth=depth
        self.name="BASIC ALPHA-BETA PRUNING AGENTS"

        self._INF = 10000000
#   def act(self, env):
View Source
    def act(self, env):
        valid_act = env.valid_act(self.color)

        if np.random.rand() < self.eps: # epsilon greedy movement
            chosen_move = valid_act[np.random.choice(len(valid_act))]
            return chosen_move
        
        best_a = None
        _, best_a = self.minimax(env, 0, True, -self._INF, self._INF)       
        return best_a
#   def minimax(self, env, depth, is_max, alpha, beta):
View Source
    def minimax(self, env, depth, is_max, alpha, beta):

        if depth == self.depth:
            return 0, None

        if is_max:
            player = self.color
            valid_act = env.valid_act(player)
            best_r = -self._INF
            best_a = None
            for a in valid_act:
                env.simulate_start()
                r = env.step(self.color, a)["reward"]
                if env.current_player()==player:
                    tmp_r, _ = self.minimax(env, depth+1, True, alpha, beta)
                else:    
                    tmp_r, _ = self.minimax(env, depth+1, False, alpha, beta)
                if tmp_r + r > best_r:
                    best_r = tmp_r + r
                    best_a = a
                alpha = max(alpha, best_r)
                env.simulate_stop()
                if beta <= alpha:
                    break
            return best_r, best_a

        else:
            player = 1-self.color
            valid_act = env.valid_act(player)
            best_r = self._INF
            best_a = None
            for a in valid_act:
                env.simulate_start()
                r = env.step(self.color, a)["reward"]
                r *= -1
                if env.current_player()==player:
                    tmp_r, _ = self.minimax(env, depth+1, False, alpha, beta)
                else:    
                    tmp_r, _ = self.minimax(env, depth+1, True, alpha, beta)
                if tmp_r + r < best_r:
                    best_r = tmp_r + r
                    best_a = a
                beta = min(beta, best_r)
                env.simulate_stop()
                if beta <= alpha:
                    break
            return best_r, best_a
#   def seeding(self, seed):
View Source
    def seeding(self, seed):
        np.random.seed(seed)
#   class GreedyMancala:
View Source
class GreedyMancala(object):
    """
    Mancala agent documentation
    """
    def __init__(self, c, eps=0, type=1, seed=None):
        if seed:
            self.seeding(seed)
        self.color=c
        self.eps=eps
        self.name="GREEDY MANCALA"
        self.type=type

    def act(self, env):
        valid_act = env.valid_act(self.color)

        if np.random.rand() < self.eps: # epsilon greedy movement
            chosen_move = valid_act[np.random.choice(len(valid_act))]
            return chosen_move

        best_r = -1000000
        if len(valid_act)>0:
            best_a = valid_act[0]
        else:
            raise RuntimeError("No Valid Action")
            
        best_a = []
        for a in valid_act:
            env.simulate_start()
            r = env.step(self.color, a)["reward"]
            if self.type==1:
                if r >= best_r: # tipe 1 akan ambil dari poin terbanyak yang bisa diperoleh
                    if r==best_r:
                        best_a.append(a)
                    else:
                        best_a = [a]
                    best_r = r
            else:
                if env.current_player == self.color:
                    best_a.append(a) # tipe 2 akan ambil dari jumlah turn terbanyak 
            env.simulate_stop()
        
        if self.type==1:
            return int(np.random.choice(best_a))
        else:
            if len(best_a)>0:
                return int(max(best_a))
            else:
                return int(max(valid_act))

    def seeding(self, seed):
        np.random.seed(seed)

Mancala agent documentation

#   GreedyMancala(c, eps=0, type=1, seed=None)
View Source
    def __init__(self, c, eps=0, type=1, seed=None):
        if seed:
            self.seeding(seed)
        self.color=c
        self.eps=eps
        self.name="GREEDY MANCALA"
        self.type=type
#   def act(self, env):
View Source
    def act(self, env):
        valid_act = env.valid_act(self.color)

        if np.random.rand() < self.eps: # epsilon greedy movement
            chosen_move = valid_act[np.random.choice(len(valid_act))]
            return chosen_move

        best_r = -1000000
        if len(valid_act)>0:
            best_a = valid_act[0]
        else:
            raise RuntimeError("No Valid Action")
            
        best_a = []
        for a in valid_act:
            env.simulate_start()
            r = env.step(self.color, a)["reward"]
            if self.type==1:
                if r >= best_r: # tipe 1 akan ambil dari poin terbanyak yang bisa diperoleh
                    if r==best_r:
                        best_a.append(a)
                    else:
                        best_a = [a]
                    best_r = r
            else:
                if env.current_player == self.color:
                    best_a.append(a) # tipe 2 akan ambil dari jumlah turn terbanyak 
            env.simulate_stop()
        
        if self.type==1:
            return int(np.random.choice(best_a))
        else:
            if len(best_a)>0:
                return int(max(best_a))
            else:
                return int(max(valid_act))
#   def seeding(self, seed):
View Source
    def seeding(self, seed):
        np.random.seed(seed)