TypeError:类型的参数'方法'使用RL分类器是不可能的
我正在使用使用链接使用链接 htttps ://github.com/gcamfer/anomaly-reactionrl/blob/master/notebooks/ae_rl_awid.ipynb 我在下面所示的代码部分面临错误:
if __name__ == "__main__":
# Train batch
batch_size = 1
# batch of memory ExpRep
minibatch_size = 10
ExpRep = True
iterations_episode = 100
# Initialization of the enviroment
env = RLenv("train",batch_size=batch_size,
iterations_episode=iterations_episode)
# obs_size = size of the state
obs_size = env.data_shape[1]-len(env.all_attack_names)
#num_episodes = int(env.data_shape[0]/(iterations_episode)/10)
num_episodes = 100
'''
Definition for the defensor agent.
'''
defender_valid_actions = list(range(len(env.attack_types))) # only detect type of attack
defender_num_actions = len(defender_valid_actions)
def_epsilon = 1 # exploration
min_epsilon = 0.01 # min value for exploration
def_gamma = 0.001
def_decay_rate = 0.999
def_hidden_size = 100
def_hidden_layers = 2
def_learning_rate = .01
defender_agent = DefenderAgent(defender_valid_actions,obs_size,"EpsilonGreedy",
epoch_length = iterations_episode,
epsilon = def_epsilon,
min_epsilon = min_epsilon,
decay_rate = def_decay_rate,
gamma = def_gamma,
hidden_size=def_hidden_size,
hidden_layers=def_hidden_layers,
minibatch_size = minibatch_size,
mem_size = 1000,
learning_rate=def_learning_rate,
ExpRep=ExpRep)
#Pretrained defender
#defender_agent.model_network.model.load_weights("models/type_model.h5")
'''
Definition for the attacker agent.
In this case the exploration is better to be greater
The correlation sould be greater too so gamma bigger
'''
attack_valid_actions = list(range(len(env.attack_names)))
attack_num_actions = len(attack_valid_actions)
att_epsilon = 1
min_epsilon = 0.99 # min value for exploration
att_gamma = 0.001
att_decay_rate = 0.99
att_hidden_layers = 1
att_hidden_size = 100
att_learning_rate = 0.2
attacker_agent = AttackAgent(attack_valid_actions,obs_size,"EpsilonGreedy",
epoch_length = iterations_episode,
epsilon = att_epsilon,
min_epsilon = min_epsilon,
decay_rate = att_decay_rate,
gamma = att_gamma,
hidden_size=att_hidden_size,
hidden_layers=att_hidden_layers,
minibatch_size = minibatch_size,
mem_size = 1000,
learning_rate=att_learning_rate,
ExpRep=ExpRep)
# Statistics
att_reward_chain = []
def_reward_chain = []
att_loss_chain = []
def_loss_chain = []
def_total_reward_chain = []
att_total_reward_chain = []
# Print parameters
print("-------------------------------------------------------------------------------")
print("Total epoch: {} | Iterations in epoch: {}"
"| Minibatch from mem size: {} | Total Samples: {}|".format(num_episodes,
iterations_episode,minibatch_size,
num_episodes*iterations_episode))
print("-------------------------------------------------------------------------------")
print("Dataset shape: {}".format(env.data_shape))
print("-------------------------------------------------------------------------------")
print("Attacker parameters: Num_actions={} | gamma={} |"
" epsilon={} | ANN hidden size={} | "
"ANN hidden layers={}|".format(attack_num_actions,
att_gamma,att_epsilon, att_hidden_size,
att_hidden_layers))
print("-------------------------------------------------------------------------------")
print("Defense parameters: Num_actions={} | gamma={} | "
"epsilon={} | ANN hidden size={} |"
" ANN hidden layers={}|".format(defender_num_actions,
def_gamma,def_epsilon,def_hidden_size,
def_hidden_layers))
print("-------------------------------------------------------------------------------")
# Main loop
attacks_by_epoch = []
attack_labels_list = []
for epoch in range(num_episodes):
start_time = time.time()
att_loss = 0.
def_loss = 0.
def_total_reward_by_episode = 0
att_total_reward_by_episode = 0
# Reset enviromet, actualize the data batch with random state/attacks
states = env.reset()
# Get actions for actual states following the policy
attack_actions = attacker_agent.act(states)
states = env.get_states(attack_actions)
done = False
attacks_list = []
# Iteration in one episode
for i_iteration in range(iterations_episode):
attacks_list.append(attack_actions[0])
# apply actions, get rewards and new state
act_time = time.time()
defender_actions = defender_agent.act(states)
#Enviroment actuation for this actions
next_states,def_reward, att_reward,next_attack_actions, done = env.act(defender_actions,attack_actions)
# If the epoch*batch_size*iterations_episode is largest than the df
attacker_agent.learn(states,attack_actions,next_states,att_reward,done)
defender_agent.learn(states,defender_actions,next_states,def_reward,done)
act_end_time = time.time()
# Train network, update loss after at least minibatch_learns
if ExpRep and epoch*iterations_episode + i_iteration >= minibatch_size:
def_loss += defender_agent.update_model()
att_loss += attacker_agent.update_model()
elif not ExpRep:
def_loss += defender_agent.update_model()
att_loss += attacker_agent.update_model()
update_end_time = time.time()
# Update the state
states = next_states
attack_actions = next_attack_actions
# Update statistics
def_total_reward_by_episode += np.sum(def_reward,dtype=np.int32)
att_total_reward_by_episode += np.sum(att_reward,dtype=np.int32)
attacks_by_epoch.append(attacks_list)
# Update user view
def_reward_chain.append(def_total_reward_by_episode)
att_reward_chain.append(att_total_reward_by_episode)
def_loss_chain.append(def_loss)
att_loss_chain.append(att_loss)
end_time = time.time()
print("\r\n|Epoch {:03d}/{:03d}| time: {:2.2f}|\r\n"
"|Def Loss {:4.4f} | Def Reward in ep {:03d}|\r\n"
"|Att Loss {:4.4f} | Att Reward in ep {:03d}|"
.format(epoch, num_episodes,(end_time-start_time),
def_loss, def_total_reward_by_episode,
att_loss, att_total_reward_by_episode))
print("|Def Estimated: {}| Att Labels: {}".format(env.def_estimated_labels,
env.def_true_labels))
attack_labels_list.append(env.def_true_labels)
我面临的错误是
typeError:类型'方法'的参数是不可能的
任何帮助都将不胜感激。提前致谢。
I am working on a classification problem using Reinforcement Learning using the link https://github.com/gcamfer/Anomaly-ReactionRL/blob/master/Notebooks/AE_RL_awid.ipynb
I am facing error in the part of code shown below:
if __name__ == "__main__":
# Train batch
batch_size = 1
# batch of memory ExpRep
minibatch_size = 10
ExpRep = True
iterations_episode = 100
# Initialization of the enviroment
env = RLenv("train",batch_size=batch_size,
iterations_episode=iterations_episode)
# obs_size = size of the state
obs_size = env.data_shape[1]-len(env.all_attack_names)
#num_episodes = int(env.data_shape[0]/(iterations_episode)/10)
num_episodes = 100
'''
Definition for the defensor agent.
'''
defender_valid_actions = list(range(len(env.attack_types))) # only detect type of attack
defender_num_actions = len(defender_valid_actions)
def_epsilon = 1 # exploration
min_epsilon = 0.01 # min value for exploration
def_gamma = 0.001
def_decay_rate = 0.999
def_hidden_size = 100
def_hidden_layers = 2
def_learning_rate = .01
defender_agent = DefenderAgent(defender_valid_actions,obs_size,"EpsilonGreedy",
epoch_length = iterations_episode,
epsilon = def_epsilon,
min_epsilon = min_epsilon,
decay_rate = def_decay_rate,
gamma = def_gamma,
hidden_size=def_hidden_size,
hidden_layers=def_hidden_layers,
minibatch_size = minibatch_size,
mem_size = 1000,
learning_rate=def_learning_rate,
ExpRep=ExpRep)
#Pretrained defender
#defender_agent.model_network.model.load_weights("models/type_model.h5")
'''
Definition for the attacker agent.
In this case the exploration is better to be greater
The correlation sould be greater too so gamma bigger
'''
attack_valid_actions = list(range(len(env.attack_names)))
attack_num_actions = len(attack_valid_actions)
att_epsilon = 1
min_epsilon = 0.99 # min value for exploration
att_gamma = 0.001
att_decay_rate = 0.99
att_hidden_layers = 1
att_hidden_size = 100
att_learning_rate = 0.2
attacker_agent = AttackAgent(attack_valid_actions,obs_size,"EpsilonGreedy",
epoch_length = iterations_episode,
epsilon = att_epsilon,
min_epsilon = min_epsilon,
decay_rate = att_decay_rate,
gamma = att_gamma,
hidden_size=att_hidden_size,
hidden_layers=att_hidden_layers,
minibatch_size = minibatch_size,
mem_size = 1000,
learning_rate=att_learning_rate,
ExpRep=ExpRep)
# Statistics
att_reward_chain = []
def_reward_chain = []
att_loss_chain = []
def_loss_chain = []
def_total_reward_chain = []
att_total_reward_chain = []
# Print parameters
print("-------------------------------------------------------------------------------")
print("Total epoch: {} | Iterations in epoch: {}"
"| Minibatch from mem size: {} | Total Samples: {}|".format(num_episodes,
iterations_episode,minibatch_size,
num_episodes*iterations_episode))
print("-------------------------------------------------------------------------------")
print("Dataset shape: {}".format(env.data_shape))
print("-------------------------------------------------------------------------------")
print("Attacker parameters: Num_actions={} | gamma={} |"
" epsilon={} | ANN hidden size={} | "
"ANN hidden layers={}|".format(attack_num_actions,
att_gamma,att_epsilon, att_hidden_size,
att_hidden_layers))
print("-------------------------------------------------------------------------------")
print("Defense parameters: Num_actions={} | gamma={} | "
"epsilon={} | ANN hidden size={} |"
" ANN hidden layers={}|".format(defender_num_actions,
def_gamma,def_epsilon,def_hidden_size,
def_hidden_layers))
print("-------------------------------------------------------------------------------")
# Main loop
attacks_by_epoch = []
attack_labels_list = []
for epoch in range(num_episodes):
start_time = time.time()
att_loss = 0.
def_loss = 0.
def_total_reward_by_episode = 0
att_total_reward_by_episode = 0
# Reset enviromet, actualize the data batch with random state/attacks
states = env.reset()
# Get actions for actual states following the policy
attack_actions = attacker_agent.act(states)
states = env.get_states(attack_actions)
done = False
attacks_list = []
# Iteration in one episode
for i_iteration in range(iterations_episode):
attacks_list.append(attack_actions[0])
# apply actions, get rewards and new state
act_time = time.time()
defender_actions = defender_agent.act(states)
#Enviroment actuation for this actions
next_states,def_reward, att_reward,next_attack_actions, done = env.act(defender_actions,attack_actions)
# If the epoch*batch_size*iterations_episode is largest than the df
attacker_agent.learn(states,attack_actions,next_states,att_reward,done)
defender_agent.learn(states,defender_actions,next_states,def_reward,done)
act_end_time = time.time()
# Train network, update loss after at least minibatch_learns
if ExpRep and epoch*iterations_episode + i_iteration >= minibatch_size:
def_loss += defender_agent.update_model()
att_loss += attacker_agent.update_model()
elif not ExpRep:
def_loss += defender_agent.update_model()
att_loss += attacker_agent.update_model()
update_end_time = time.time()
# Update the state
states = next_states
attack_actions = next_attack_actions
# Update statistics
def_total_reward_by_episode += np.sum(def_reward,dtype=np.int32)
att_total_reward_by_episode += np.sum(att_reward,dtype=np.int32)
attacks_by_epoch.append(attacks_list)
# Update user view
def_reward_chain.append(def_total_reward_by_episode)
att_reward_chain.append(att_total_reward_by_episode)
def_loss_chain.append(def_loss)
att_loss_chain.append(att_loss)
end_time = time.time()
print("\r\n|Epoch {:03d}/{:03d}| time: {:2.2f}|\r\n"
"|Def Loss {:4.4f} | Def Reward in ep {:03d}|\r\n"
"|Att Loss {:4.4f} | Att Reward in ep {:03d}|"
.format(epoch, num_episodes,(end_time-start_time),
def_loss, def_total_reward_by_episode,
att_loss, att_total_reward_by_episode))
print("|Def Estimated: {}| Att Labels: {}".format(env.def_estimated_labels,
env.def_true_labels))
attack_labels_list.append(env.def_true_labels)
and the error I am facing is
TypeError: argument of type 'method' is not iterable
Any help would be appreciated. Thanks in advance.
如果你对这篇内容有疑问,欢迎到本站社区发帖提问 参与讨论,获取更多帮助,或者扫码二维码加入 Web 技术交流群。
绑定邮箱获取回复消息
由于您还没有绑定你的真实邮箱,如果其他用户或者作者回复了您的评论,将不能在第一时间通知您!
发布评论