为什么我的模型无法学习?损失非常高
我建立了一个模拟模型,卡车根据垃圾容器的填充水平收集垃圾容器。 我使用 OpenAi Gym 和 Tensorflow/keras 来创建我的深度强化学习模型...... 但是我的训练损失非常高... 我哪里做错了?提前致谢,
这是 Env
class Marltf(Env):
def __init__(self):
self.i= 0
self.containers1 = Container(3,3)
self.containers2 = Container(1,3)
self.containers3 = Container(3,1)
self.containers4 = Container(5,6)
self.containers5 = Container(8,6)
self.containers6 = Container(10,10)
self.containers7 = Container(11,11)
self.containers8 = Container(7,12)
self.passo = 0
self.containers2.lv = 2
self.containers3.lv = 4
self.containers5.lv = 4
self.containers6.lv = 1
self.containers8.lv = 2
self.shower_length= 300
self.containers = [self.containers1,self.containers2,self.containers3,self.containers4, self.containers5, self.containers6, self.containers7, self.containers8]
self.positions ={}
self.capacities ={}
self.camions= []
b = 0
for cont in self.containers:
b += cont.lv
reward = 0
nCamionFloat = 0
while b > 6:
b +=-10
nCamionFloat +=1
nCamionInt = int(nCamionFloat)
for ic in range(nCamionInt):
self.camions.append(Camion(1,1,None,ic))
for cam in self.camions:
self.positions[cam.name] = cam.position
self.capacities[cam.name] = 10
self.frames = []
self.cnt=0
self.mapp = Map(15,15,self.camions,self.containers)
self.state = (15*15)/5
self.action_space = gym.spaces.Discrete(4)
self.observation_space = Box(low = np.array([0]), high= np.array([51]))
def step(self, action):
moves = {0: (-1, 0),1: (1, 0),2: (0, -1),3: (0, 1)}
done = False
ic = 0
for cam in self.camions:
cam.position = (self.positions[ic][0],self.positions[ic][1])
cam.capacity = self.capacities[ic]
self.state += -5
mossa = moves[action]
x=self.camions[self.i].position
reward = 0
nuovaposizione = [mossa[0] + x[0],mossa[1] +x[1]]
self.shower_length -= 1
if self.mapp.mapp[nuovaposizione[0],nuovaposizione[1]] == -1:
reward += -5
self.state += -5
else:
self.mapp.mapp[x[0],x[1]] = 0
self.camions[self.i].position=nuovaposizione
self.mapp.mapp[nuovaposizione[0],nuovaposizione[1]] = 9
self.positions.update({self.camions[self.i].name : nuovaposizione})
reward += -1
self.state = -2
for contain in self.containers:
if self.camions[self.i].position[0] == contain.position[0] and camion.position[1] == contain.position[1] :
if contain.lv ==3 and self.camions[self.i].capacity >=3:
self.camions[self.i].reward += 100
self.camions[self.i].capacity += -3
self.capacities.update({self.camions[self.i].name : self.camions[self.i].capacity})
reward +=20
self.state +=20
contain.lv=0
elif contain.lv == 2 and self.camions[self.i].capacity >=2:
self.camions[self.i].reward += 50
self.camions[self.i].capacity += -2
self.capacities.update({self.camions[self.i].name : self.camions[self.i].capacity})
self.state +=10
reward += 50
contain.lv=0
elif contain.lv == 1 and self.camions[self.i].capacity >=1:
reward += 10
self.camions[self.i].reward +=5
self.camions[self.i].capacity += -1
self.capacities.update({self.camions[self.i].name : self.camions[self.i].capacity})
contain.lv=0
self.state+=1
elif contain.lv==4 and self.camions[self.i].capacity >=4:
reward +=50
self.camions[self.i].reward +=50
self.camions[self.i].capacity += -4
self.capacities.update({self.camions[self.i].name : self.camions[self.i].capacity})
self.state +=50
contain.lv=0
elif contain.lv==0 and self.camions[self.i].capacity >=4:
reward += -20
self.camions[self.i].reward +=-20
self.camions[self.i].capacity += 0
self.state += -20
contain.lv=0
if self.camions[self.i].capacity <=2:
self.camions[self.i].positions=(1,1)
self.positions.update({self.camions[self.i].name : (1,1)})
self.camions[self.i].capacity = 10
self.capacities.update({self.camions[self.i].name : self.camions[self.i].capacity})
if self.i ==1:
self.i= 0
self.i = 0
self.i = 0
elif self.i ==0:
self.i= 1
if self.shower_length <= 0:
done = True
else:
done = False
self.passo +=1
info = {}
return self.state,reward,done,info
def render(self, mode="human"):
BLACK = (0, 0, 0)
WHITE = (200, 200, 200)
WINDOW_HEIGHT = len(self.mapp.mapp[0]) *50
WINDOW_WIDTH = len(self.mapp.mapp[0]) *50
whiteC=pygame.image.load('white.jpg')
whiteC=pygame.transform.scale(whiteC,(50, 50))
greenC=pygame.image.load('green.jpg')
greenC=pygame.transform.scale(greenC,(50, 50))
yellowC=pygame.image.load('yellow.jpg')
yellowC=pygame.transform.scale(yellowC,(50, 50))
orangeC=pygame.image.load('orange.jpg')
orangeC=pygame.transform.scale(orangeC,(50, 50))
redC=pygame.image.load('red.jpg')
redC=pygame.transform.scale(redC,(50, 50))
gT=pygame.image.load('greenCamion.jpg')
gT=pygame.transform.scale(gT,(50, 50))
yT=pygame.image.load('yellowCamion.jpg')
yT=pygame.transform.scale(yT,(50, 50))
rT=pygame.image.load('redCamion.jpg')
rT=pygame.transform.scale(rT,(50, 50))
global SCREEN, CLOCK
pygame.init()
SCREEN = pygame.display.set_mode((WINDOW_WIDTH, WINDOW_HEIGHT))
CLOCK = pygame.time.Clock()
SCREEN.fill(BLACK)
pygame.draw.rect(SCREEN, WHITE, pygame.Rect( 10, 0, 50, 50))
blockSize = 50 #Set the size of the grid block
for i in range(0,len(self.mapp.mapp[0])):
for j in range(0,len(self.mapp.mapp[0])):
a=i*50
b=j*50
if self.mapp.mapp[i][j] == -1:
pygame.draw.rect(SCREEN, WHITE, pygame.Rect( a, b, 50, 50))
for c in self.camions :
if c.capacity > 6:
SCREEN.blit(gT, (c.position[0]*50, c.position[1]*50))
if c.capacity > 3 and c.capacity <= 6:
SCREEN.blit(yT, (c.position[0]*50, c.position[1]*50))
if c.capacity <= 3:
SCREEN.blit(rT, (c.position[0]*50, c.position[1]*50))
for contain in self.containers :
if contain.lv == 0:
SCREEN.blit(whiteC,(contain.position[0]*50 , contain.position[1]*50))
elif contain.lv == 1:
SCREEN.blit(greenC,(contain.position[0]*50 , contain.position[1]*50))
elif contain.lv == 2:
SCREEN.blit(yellowC,(contain.position[0]*50 , contain.position[1]*50))
elif contain.lv == 3:
SCREEN.blit(orangeC,(contain.position[0]*50 , contain.position[1]*50))
if contain.lv == 4:
SCREEN.blit(redC,(contain.position[0]*50 , contain.position[1]*50))
for x in range(0, WINDOW_WIDTH, blockSize):
for y in range(0, WINDOW_HEIGHT, blockSize):
rect = pygame.Rect(x, y, blockSize, blockSize)
pygame.draw.rect(SCREEN, WHITE, rect, 1)
pygame.display.flip()
view = pygame.surfarray.array3d(SCREEN)
view = view.transpose([1, 0, 2])
img_bgr = cv2.cvtColor(view, cv2.COLOR_RGB2BGR)
pygame.image.save(SCREEN, f"screenshot{self.cnt}.png")
self.cnt +=1
pygame.event.get()
def reset(self):
self.state = (15*15)/4
self.shower_length = 300
self.containers1.lv=3
self.containers2.lv=1
self.containers7.lv = 2
self.containers3.lv = 4
self.containers5.lv = 4
self.containers6.lv = 1
self.containers8.lv = 2
self.passo = 0
self.positions ={}
self.capacities ={}
self.camions= []
b = 0
for cont in self.containers:
b += cont.lv
reward = 0
nCamionFloat = 0
while b > 6:
b +=-10
nCamionFloat +=1
nCamionInt = int(nCamionFloat)
for ic in range(nCamionInt):
self.camions.append(Camion(1,1,None,ic))
for cam in self.camions:
self.positions[cam.name] = cam.position
self.capacities[cam.name] = 10
self.shower_length =60
self.cnt=0
self.i = 0
containers = [ containers1, containers2, containers3, containers4]
containers.append( containers1)
states = env.observation_space.shape
actions = env.action_space.n
b = env.action_space.sample()
我的模型,
def build_model(states,actions):
model = tf.keras.Sequential([
keras.layers.Dense(64, input_shape=states),
keras.layers.LeakyReLU(0.24,),
keras.layers.Dense(64),
keras.layers.LeakyReLU(0.24,),
keras.layers.Dense(32),
keras.layers.LeakyReLU(0.24,),
keras.layers.Dense(16),
keras.layers.LeakyReLU(0.24,),
keras.layers.Dense(8),
keras.layers.LeakyReLU(0.24,),
keras.layers.Dense(actions, activation='linear'),
])
return model
model = build_model(states, actions)
model.compile(loss='mse', metrics=['accuracy'])
def build_agent(model, actions):
policy = GreedyQPolicy()
memory = SequentialMemory(limit=10000, window_length=1)
dqn = DQNAgent(model=model, memory=memory, policy=policy,nb_actions=actions, nb_steps_warmup=10, target_model_update=1e-2)
return dqn
dqn = build_agent(model, actions)
dqn.compile(tf.keras.optimizers.Adadelta(
learning_rate=0.1, rho=0.95, epsilon=1e-07, name='Adadelta'), metrics= ["accuracy"]
)
a =dqn.fit(env, nb_steps=5000, visualize=True, verbose=2,)
损失从 50 开始,达到 200
I built a simulation model where trucks collect garbage containers based on their fill level.
I used OpenAi Gym and Tensorflow/keras to create my Deep Reinforcement Learning model...
But my training has a very high loss...
Where did I go wrong? Thanks in advance
this is the Env
class Marltf(Env):
def __init__(self):
self.i= 0
self.containers1 = Container(3,3)
self.containers2 = Container(1,3)
self.containers3 = Container(3,1)
self.containers4 = Container(5,6)
self.containers5 = Container(8,6)
self.containers6 = Container(10,10)
self.containers7 = Container(11,11)
self.containers8 = Container(7,12)
self.passo = 0
self.containers2.lv = 2
self.containers3.lv = 4
self.containers5.lv = 4
self.containers6.lv = 1
self.containers8.lv = 2
self.shower_length= 300
self.containers = [self.containers1,self.containers2,self.containers3,self.containers4, self.containers5, self.containers6, self.containers7, self.containers8]
self.positions ={}
self.capacities ={}
self.camions= []
b = 0
for cont in self.containers:
b += cont.lv
reward = 0
nCamionFloat = 0
while b > 6:
b +=-10
nCamionFloat +=1
nCamionInt = int(nCamionFloat)
for ic in range(nCamionInt):
self.camions.append(Camion(1,1,None,ic))
for cam in self.camions:
self.positions[cam.name] = cam.position
self.capacities[cam.name] = 10
self.frames = []
self.cnt=0
self.mapp = Map(15,15,self.camions,self.containers)
self.state = (15*15)/5
self.action_space = gym.spaces.Discrete(4)
self.observation_space = Box(low = np.array([0]), high= np.array([51]))
def step(self, action):
moves = {0: (-1, 0),1: (1, 0),2: (0, -1),3: (0, 1)}
done = False
ic = 0
for cam in self.camions:
cam.position = (self.positions[ic][0],self.positions[ic][1])
cam.capacity = self.capacities[ic]
self.state += -5
mossa = moves[action]
x=self.camions[self.i].position
reward = 0
nuovaposizione = [mossa[0] + x[0],mossa[1] +x[1]]
self.shower_length -= 1
if self.mapp.mapp[nuovaposizione[0],nuovaposizione[1]] == -1:
reward += -5
self.state += -5
else:
self.mapp.mapp[x[0],x[1]] = 0
self.camions[self.i].position=nuovaposizione
self.mapp.mapp[nuovaposizione[0],nuovaposizione[1]] = 9
self.positions.update({self.camions[self.i].name : nuovaposizione})
reward += -1
self.state = -2
for contain in self.containers:
if self.camions[self.i].position[0] == contain.position[0] and camion.position[1] == contain.position[1] :
if contain.lv ==3 and self.camions[self.i].capacity >=3:
self.camions[self.i].reward += 100
self.camions[self.i].capacity += -3
self.capacities.update({self.camions[self.i].name : self.camions[self.i].capacity})
reward +=20
self.state +=20
contain.lv=0
elif contain.lv == 2 and self.camions[self.i].capacity >=2:
self.camions[self.i].reward += 50
self.camions[self.i].capacity += -2
self.capacities.update({self.camions[self.i].name : self.camions[self.i].capacity})
self.state +=10
reward += 50
contain.lv=0
elif contain.lv == 1 and self.camions[self.i].capacity >=1:
reward += 10
self.camions[self.i].reward +=5
self.camions[self.i].capacity += -1
self.capacities.update({self.camions[self.i].name : self.camions[self.i].capacity})
contain.lv=0
self.state+=1
elif contain.lv==4 and self.camions[self.i].capacity >=4:
reward +=50
self.camions[self.i].reward +=50
self.camions[self.i].capacity += -4
self.capacities.update({self.camions[self.i].name : self.camions[self.i].capacity})
self.state +=50
contain.lv=0
elif contain.lv==0 and self.camions[self.i].capacity >=4:
reward += -20
self.camions[self.i].reward +=-20
self.camions[self.i].capacity += 0
self.state += -20
contain.lv=0
if self.camions[self.i].capacity <=2:
self.camions[self.i].positions=(1,1)
self.positions.update({self.camions[self.i].name : (1,1)})
self.camions[self.i].capacity = 10
self.capacities.update({self.camions[self.i].name : self.camions[self.i].capacity})
if self.i ==1:
self.i= 0
self.i = 0
self.i = 0
elif self.i ==0:
self.i= 1
if self.shower_length <= 0:
done = True
else:
done = False
self.passo +=1
info = {}
return self.state,reward,done,info
def render(self, mode="human"):
BLACK = (0, 0, 0)
WHITE = (200, 200, 200)
WINDOW_HEIGHT = len(self.mapp.mapp[0]) *50
WINDOW_WIDTH = len(self.mapp.mapp[0]) *50
whiteC=pygame.image.load('white.jpg')
whiteC=pygame.transform.scale(whiteC,(50, 50))
greenC=pygame.image.load('green.jpg')
greenC=pygame.transform.scale(greenC,(50, 50))
yellowC=pygame.image.load('yellow.jpg')
yellowC=pygame.transform.scale(yellowC,(50, 50))
orangeC=pygame.image.load('orange.jpg')
orangeC=pygame.transform.scale(orangeC,(50, 50))
redC=pygame.image.load('red.jpg')
redC=pygame.transform.scale(redC,(50, 50))
gT=pygame.image.load('greenCamion.jpg')
gT=pygame.transform.scale(gT,(50, 50))
yT=pygame.image.load('yellowCamion.jpg')
yT=pygame.transform.scale(yT,(50, 50))
rT=pygame.image.load('redCamion.jpg')
rT=pygame.transform.scale(rT,(50, 50))
global SCREEN, CLOCK
pygame.init()
SCREEN = pygame.display.set_mode((WINDOW_WIDTH, WINDOW_HEIGHT))
CLOCK = pygame.time.Clock()
SCREEN.fill(BLACK)
pygame.draw.rect(SCREEN, WHITE, pygame.Rect( 10, 0, 50, 50))
blockSize = 50 #Set the size of the grid block
for i in range(0,len(self.mapp.mapp[0])):
for j in range(0,len(self.mapp.mapp[0])):
a=i*50
b=j*50
if self.mapp.mapp[i][j] == -1:
pygame.draw.rect(SCREEN, WHITE, pygame.Rect( a, b, 50, 50))
for c in self.camions :
if c.capacity > 6:
SCREEN.blit(gT, (c.position[0]*50, c.position[1]*50))
if c.capacity > 3 and c.capacity <= 6:
SCREEN.blit(yT, (c.position[0]*50, c.position[1]*50))
if c.capacity <= 3:
SCREEN.blit(rT, (c.position[0]*50, c.position[1]*50))
for contain in self.containers :
if contain.lv == 0:
SCREEN.blit(whiteC,(contain.position[0]*50 , contain.position[1]*50))
elif contain.lv == 1:
SCREEN.blit(greenC,(contain.position[0]*50 , contain.position[1]*50))
elif contain.lv == 2:
SCREEN.blit(yellowC,(contain.position[0]*50 , contain.position[1]*50))
elif contain.lv == 3:
SCREEN.blit(orangeC,(contain.position[0]*50 , contain.position[1]*50))
if contain.lv == 4:
SCREEN.blit(redC,(contain.position[0]*50 , contain.position[1]*50))
for x in range(0, WINDOW_WIDTH, blockSize):
for y in range(0, WINDOW_HEIGHT, blockSize):
rect = pygame.Rect(x, y, blockSize, blockSize)
pygame.draw.rect(SCREEN, WHITE, rect, 1)
pygame.display.flip()
view = pygame.surfarray.array3d(SCREEN)
view = view.transpose([1, 0, 2])
img_bgr = cv2.cvtColor(view, cv2.COLOR_RGB2BGR)
pygame.image.save(SCREEN, f"screenshot{self.cnt}.png")
self.cnt +=1
pygame.event.get()
def reset(self):
self.state = (15*15)/4
self.shower_length = 300
self.containers1.lv=3
self.containers2.lv=1
self.containers7.lv = 2
self.containers3.lv = 4
self.containers5.lv = 4
self.containers6.lv = 1
self.containers8.lv = 2
self.passo = 0
self.positions ={}
self.capacities ={}
self.camions= []
b = 0
for cont in self.containers:
b += cont.lv
reward = 0
nCamionFloat = 0
while b > 6:
b +=-10
nCamionFloat +=1
nCamionInt = int(nCamionFloat)
for ic in range(nCamionInt):
self.camions.append(Camion(1,1,None,ic))
for cam in self.camions:
self.positions[cam.name] = cam.position
self.capacities[cam.name] = 10
self.shower_length =60
self.cnt=0
self.i = 0
containers = [ containers1, containers2, containers3, containers4]
containers.append( containers1)
states = env.observation_space.shape
actions = env.action_space.n
b = env.action_space.sample()
My model
def build_model(states,actions):
model = tf.keras.Sequential([
keras.layers.Dense(64, input_shape=states),
keras.layers.LeakyReLU(0.24,),
keras.layers.Dense(64),
keras.layers.LeakyReLU(0.24,),
keras.layers.Dense(32),
keras.layers.LeakyReLU(0.24,),
keras.layers.Dense(16),
keras.layers.LeakyReLU(0.24,),
keras.layers.Dense(8),
keras.layers.LeakyReLU(0.24,),
keras.layers.Dense(actions, activation='linear'),
])
return model
model = build_model(states, actions)
model.compile(loss='mse', metrics=['accuracy'])
def build_agent(model, actions):
policy = GreedyQPolicy()
memory = SequentialMemory(limit=10000, window_length=1)
dqn = DQNAgent(model=model, memory=memory, policy=policy,nb_actions=actions, nb_steps_warmup=10, target_model_update=1e-2)
return dqn
dqn = build_agent(model, actions)
dqn.compile(tf.keras.optimizers.Adadelta(
learning_rate=0.1, rho=0.95, epsilon=1e-07, name='Adadelta'), metrics= ["accuracy"]
)
a =dqn.fit(env, nb_steps=5000, visualize=True, verbose=2,)
the loss starts from 50 and reaches 200
如果你对这篇内容有疑问,欢迎到本站社区发帖提问 参与讨论,获取更多帮助,或者扫码二维码加入 Web 技术交流群。
绑定邮箱获取回复消息
由于您还没有绑定你的真实邮箱,如果其他用户或者作者回复了您的评论,将不能在第一时间通知您!
发布评论
评论(2)
在强化学习中,你通常不关心损失,而是奖励。从类名来看,它似乎也是一个多智能体强化学习问题,通常更难处理单智能体问题。
我要尝试改变的第一件事是步骤数:5000 非常低。
如果尚未定义,请尝试定义一个情节,然后在该情节结束时绘制累积奖励,并检查累积奖励是否随着情节数量的增加而增加。
这是检查奖励是否真正增加以及代理是否正在学习一些东西的最干净的方法。
In reinforcement learning you usually don't care about loss, but rewards. From the class name, it looks like it also is a multi agent reinforcement learning problem, which are usually more difficult to deal with w.r.t single agent problems.
The first thing that I would try to change is the number of steps: 5000 is very low.
Try to define, if it is not already defined, an episode, then plot the cumulative reward at the end of the episode, and check if the cumulative reward increases as the number of episodes increase.
This is the cleanest way to check if the reward is actually increasing and the agent is learning something.
在强化学习中,损失并不重要。损失非常大其实是正常的。在强化学习中,我们最关心的是奖励。
loss does not really matter in RL. Very high loss is actually normal. In RL we care the reward most.