-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathrps_learning.py
More file actions
148 lines (96 loc) · 3.58 KB
/
Copy pathrps_learning.py
File metadata and controls
148 lines (96 loc) · 3.58 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
import sys
import random
class GameInfo(object):
def __init__(self):
# poses are 'r', 'p', or 's' ('' means non existing game)
self.pose_ai = ''
self.pose_human = ''
# outcome is 'w', 'l', 'd' ('' means non existing game)
self.outcome = ''
def __str__(self):
return self.pose_ai + self.pose_human + self.outcome
class AIPlayer(object):
def __init__(self):
self.debug = True
self.greedy_chance = 1
self.prev_games = []
self.q_matrix = dict()
def random_pose(self):
return "rps"[int(random.random() * 3)]
def valid_state(self, state):
return len(state) == 3 # <ai pose> <human pose> <outcome> <action>
def find_greedy_pose(self, q_matrix, current_state):
if (not self.valid_state(current_state)) or len(q_matrix) == 0:
return 0, self.random_pose()
else:
# find the best pose with the current state
max_q = -sys.maxint - 1
best_pose = self.random_pose()
for i in range(len(self.q_matrix)):
state = self.q_matrix.keys()[i][0:3]
if state != current_state: continue # only look at q matrix entries with current state
if self.q_matrix.values()[i] > max_q:
max_q = self.q_matrix.values()[i]
action = self.q_matrix.keys()[i][-1]
best_pose = action
return max_q, best_pose
def get_pose_choice(self):
state = '' if (len(self.prev_games) == 0) else str(self.prev_games[-1])
pose = ''
q_greedy, pose_greedy = self.find_greedy_pose(self.q_matrix, state)
if self.debug: print 'Q Matrix: ', self.q_matrix
if random.random() <= self.greedy_chance and q_greedy > 0:
# pick greedily
pose = pose_greedy
if self.debug: print 'picks greedy - q = ' + str(q_greedy)
else:
# pick randomly
pose = self.random_pose()
if self.debug: print 'picks random'
return pose
def update_with_game_outcome(self, pose_ai, pose_human, outcome):
game = GameInfo()
game.pose_ai = pose_ai
game.pose_human = pose_human
game.outcome = outcome
#-----------------------------------------
# Update q matrix
state = '' if (len(self.prev_games) == 0) else str(self.prev_games[-1])
if (self.valid_state(state)):
state_action_pair = state + game.pose_ai
# create q_matrix entry if not yet existent
if not state_action_pair in self.q_matrix.keys():
self.q_matrix[state_action_pair] = 0
# reward for ai pose choice given previous game
self.q_matrix[state_action_pair] += 1 if game.outcome == 'w' else (-1 if game.outcome == 'l' else 0)
# save game to history
self.prev_games.append(game)
def main():
ai = AIPlayer()
wins_ai = 0
wins_human = 0
num_games = 50
human_pattern = ['r', 's', 's', 'r', 'p', 'p', 's']
for i in range(num_games):
pose_ai = ''
pose_human = ''
#-----------------------------------------
# Determine poses
pose_human = human_pattern[i % 7]
pose_ai = ai.get_pose_choice()
#-----------------------------------------
# Game outcome
outcome = 'd' if pose_ai == pose_human else \
'w' if ((pose_ai == 'r' and pose_human == 's') \
or (pose_ai == 'p' and pose_human == 'r' ) \
or (pose_ai == 's' and pose_human == 'p' )) \
else 'l'
ai.update_with_game_outcome(pose_ai, pose_human, outcome)
print 'human: ' + pose_human + ' ai: ' + pose_ai + ' outcome: ' + 'ai win? ' + outcome
print
if outcome == 'w': wins_ai += 1
elif outcome == 'l': wins_human += 1
print 'AI won ' + str((float(wins_ai) / (wins_ai + wins_human)) * 100.0) + '% of won games'
#-----------------------------------------
# Entry Point
main()