# IndexError: only integers, slices (`:`), ellipsis (`...`), numpy.newaxis (`None`) and are valid indices

0

Q_observation = self.Q_table[observation, :]
IndexError: only integers, slices (`:`), ellipsis (`...`), numpy.newaxis (`None`) and integer or boolean arrays are valid indices

``````def __init__(self, obser_n, act_n, epsilong, gama, alfa):
self.act_n = act_n
self.Q_table = np.zeros((obser_n, act_n))
self.epsilong = epsilong
self.gama = gama
self.alfa = alfa

"""

"""

def actionChoose(self, observation):
Q_observation = self.Q_table[observation, :]
if random.uniform(0, 1) > (1 - self.epsilong):
return np.random.choice(self.act_n)
else:
return self.getMaxOfQtable(observation)

# 　根据当前Observation返回Q值最大的action
def getMaxOfQtable(self, observation):
Q_observation = self.Q_table[observation, :]
maxList = np.where(Q_observation == max(Q_observation))[0]
return np.random.choice(maxList)

#  learn算法，更新Q表格
def learn(self, observation, action, reward, next_observation, next_action, is_done):
if is_done:
target_value = reward
this_value = self.Q_table[observation][action]
self.Q_table[observation][action] += self.alfa * (target_value - this_value)
else:
#   先计算目标值
target_value = reward + self.gama * max(self.Q_table[next_observation, :])
#   拿当前值
this_value = self.Q_table[observation][action]
#   计算时序差分
diff = target_value - this_value
#   更新当前Q
self.Q_table[observation][action] += self.alfa * diff

def getQtable(self):
return self.Q_table``````
python小白菜36 | 初学一级 | 园豆：152

您需要登录以后才能回答，未注册用户请先注册