我的神经网络无法使用tensorflow.net和qlearning解决迷宫
我正在使用 TensorFlow 和 QLearning 练习神经网络。对于我的项目,我使用 C# 进行工作,以便稍后能够将我的程序迁移到 Unity 游戏引擎上。 我使用 TensorFlow.Net 库: https://github.com/SciSharp/TensorFlow.NET
首先我的目标是训练神经网络通过避开禁区来找到迷宫的到达点。
我的环境是一个int数组(3*3);当前位置由玩家的 X 和 Y 位置指示。
PlayerX : 0
PlayerY : 2
[ 0 0 1
0 -1 0
PLAYER 0 0 ]
我有四种可能的操作。左、右、上、下。
我获得的奖励取决于我执行某项操作时所处的位置。
0:免费 -1:禁止 1:到达 -0.25:访问过 -0.75:位置不变
我的神经网络有 9 个与环境状态相对应的输入。与概率相对应的 4 个输出指定要采取的最佳行动。我有一个包含 15 个神经元的隐藏层。输出的激活函数是Relu和softmax。我使用 Adam 优化器来训练我的模型。
我执行我的程序,但是当测试阶段它仍然没有到达时,发现到达点并仍然停留在盒子限制上并在循环中执行相同的操作。
https://i.sstatic.net/yedn9.png
训练模型代码
public void TrainModel(float[,] states, float[,] actions, float[,] rewards, float[,] next_states)
{
Tensor tf_states = tf.convert_to_tensor(states, TF_DataType.TF_FLOAT);
Tensor tf_rewards = tf.convert_to_tensor(rewards, TF_DataType.TF_FLOAT);
Tensor tf_next_states = tf.convert_to_tensor(next_states, TF_DataType.TF_FLOAT);
Tensor tf_actions = tf.convert_to_tensor(actions, TF_DataType.TF_FLOAT);
List<float> losses = new List<float>();
int size = (int)tf_next_states.shape.dims[0];
Tensor Q_stp1 = neuralNet.Apply(tf_next_states, training: true);
Tensor argmax = tf.cast(tf.max(Q_stp1, 1), TF_DataType.TF_FLOAT);
Tensor argmaxExpand_dims = tf.expand_dims(argmax, 1);
Tensor applyScalar = tf.multiply(argmaxExpand_dims, 0.99f);
Tensor applyRewards = tf.add(applyScalar, tf_rewards);
int count = (int)applyRewards.shape.dims[0];
Tensor Qtargets = tf.convert_to_tensor(new NDArray(applyRewards.BufferToArray(), (count, 1), TF_DataType.TF_FLOAT));
Func<Tensor, Tensor, Tensor, Tensor> model_loss = (tf_states, tf_actions, Qtargets) =>
{
Tensor result = neuralNet.Apply(tf_states, training: true);
Tensor subtract = tf.subtract(result, Qtargets);
Tensor square = tf.square(subtract);
Tensor loss = tf.multiply(square, tf_actions);
return tf.reduce_mean(loss);
};
Action<Tensor, Tensor, Tensor> run_optimization = (tf_states, tf_actions, Qtargets) =>
{
// Wrap computation inside a GradientTape for automatic differentiation.
using var g = tf.GradientTape();
// Forward pass.
var loss = model_loss(tf_states, tf_actions, Qtargets);
losses.Add(loss.BufferToArray()[0]);
// Compute gradients.
var gradients = g.gradient(loss, neuralNet.trainable_variables);
// Update W and b following gradients.
optimizer.apply_gradients(zip(gradients, neuralNet.trainable_variables.Select(x => x as ResourceVariable)));
};
for (int b = 0; b < size; b += batch_size)
{
var to = (b + batch_size < size) ? b + batch_size : b + (size - b);
var tf_states_b = tf_states.slice(new Slice(b, to));
var tf_actions_b = tf_actions.slice(new Slice(b, to));
var Qtargets_b = Qtargets.slice(new Slice(b, to));
// Minimize the error
run_optimization(tf_states_b, tf_actions_b, Qtargets_b);
}
message += "Mean loss : " + Average(losses.ToArray()) + "\n\n\n";
}
源代码:
MazeNetworkLearning MazeNetworkLearning = new MazeNetworkLearning();
MazeNetworkLearning.Start();
public class MazeNetworkLearning
{
enum EActions
{
LEFT = 0,
RIGHT = 1,
DOWN = 2,
UP = 3
}
// World State
float[,] currentState;
int playerX = 0;
int playerY = 2;
// Model Settings.
NeuralNet neuralNet;
Tensorflow.Keras.Optimizers.OptimizerV2 optimizer;
int num_classes = 4;
int neuronOfHiddenLayer = 15;
// Training parameters.
float learning_rate = 0.001f;
int batch_size = 32;
System.Random random;
string message = "";
public void Start()
{
random = new System.Random();
InitState();
InitModel();
Train();
}
#region Trainning
private void InitState()
{
currentState = new float[,]
{
{0, 0, 1},
{0, -1, 0},
{0, 0, 0}
};
}
private void InitModel()
{
neuralNet = new NeuralNet(new NeuralNetArgs
{
NumClasses = num_classes,
NeuronOfHiddenLayer = neuronOfHiddenLayer,
Activation1 = keras.activations.Relu,
Activation2 = keras.activations.Softmax,
});
optimizer = keras.optimizers.Adam(learning_rate);
tf.enable_eager_execution();
}
public void Train()
{
float epsilon = 1.0f;
List<float[]> states = new List<float[]>();
List<float[]> rewards = new List<float[]>();
List<float> reward_mean = new List<float>();
List<float[]> next_states = new List<float[]>();
List<float[]> actions = new List<float[]>();
float[] linearstate = GetLinearState();
float[] nextLinearState = null;
string statesLog = "";
statesLog += LogState(To1DArray(currentState));
for (int epi = 0; epi < 150; epi++)
{
int step = 0;
playerX = 0;
playerY = 2;
visited.Clear();
visited.Add(new int[] { playerX, playerY });
bool findplayer = false;
while (step < 400 && findplayer == false)
{
EActions action = TakeAction(linearstate, epsilon);
float reward = Action(action);
nextLinearState = GetLinearState();
statesLog += LogState(To1DArray(currentState));
float[] mask = { 0, 0, 0, 0 };
mask[(int)action] = 1;
int index = random.Next(0, states.Count);
statesLog += "reward : " + reward + " \n";
//index = states.Count;
states.Insert(index, linearstate);
rewards.Insert(index, new float[] { reward });
reward_mean.Insert(index, reward);
next_states.Insert(index, nextLinearState);
actions.Insert(index, mask);
if (states.Count > 10000)
{
states.RemoveAt(0);
rewards.RemoveAt(0);
reward_mean.RemoveAt(0);
next_states.RemoveAt(0);
actions.RemoveAt(0);
}
linearstate = nextLinearState;
step++;
findplayer = playerX == 2 && playerY == 0;
}
epsilon = Math.Clamp(epsilon * 0.99f, 0.1f, 1.0f);
if (epi % 5 == 0)
{
message += "---------------\n";
message += "rewards mean : " + Average(reward_mean.ToArray()) + "\n";
message += "episode : " + epi + "\n" + "\n";
TrainModel(To2D(states.ToArray()), To2D(actions.ToArray()), To2D(rewards.ToArray()), To2D(next_states.ToArray()));
}
}
string folder = @"C:\Temp\";
string fileName = "CSharpCornerAuthors.txt";
string fullPath = folder + fileName;
File.WriteAllLines("WriteText.txt", new string[] { statesLog });
Console.Write(statesLog);
Console.Write(message);
message = "------------------ TEST ------------------\n";
playerX = 0;
playerY = 2;
int step2 = 0;
while (step2 < 400 && !(playerX == 2 && playerY == 0))
{
EActions action = TakeAction(linearstate, 0);
Action(action);
message += LogState(To1DArray(currentState));
step2++;
}
Console.Write(message);
}
public void TrainModel(float[,] states, float[,] actions, float[,] rewards, float[,] next_states)
{
Tensor tf_states = tf.convert_to_tensor(states, TF_DataType.TF_FLOAT);
Tensor tf_rewards = tf.convert_to_tensor(rewards, TF_DataType.TF_FLOAT);
Tensor tf_next_states = tf.convert_to_tensor(next_states, TF_DataType.TF_FLOAT);
Tensor tf_actions = tf.convert_to_tensor(actions, TF_DataType.TF_FLOAT);
List<float> losses = new List<float>();
int size = (int)tf_next_states.shape.dims[0];
Tensor Q_stp1 = neuralNet.Apply(tf_next_states, training: true);
Tensor argmax = tf.cast(tf.max(Q_stp1, 1), TF_DataType.TF_FLOAT);
Tensor argmaxExpand_dims = tf.expand_dims(argmax, 1);
Tensor applyScalar = tf.multiply(argmaxExpand_dims, 0.99f);
Tensor applyRewards = tf.add(applyScalar, tf_rewards);
int count = (int)applyRewards.shape.dims[0];
Tensor Qtargets = tf.convert_to_tensor(new NDArray(applyRewards.BufferToArray(), (count, 1), TF_DataType.TF_FLOAT));
Func<Tensor, Tensor, Tensor, Tensor> model_loss = (tf_states, tf_actions, Qtargets) =>
{
Tensor result = neuralNet.Apply(tf_states, training: true);
Tensor subtract = tf.subtract(result, Qtargets);
Tensor square = tf.square(subtract);
Tensor loss = tf.multiply(square, tf_actions);
return tf.reduce_mean(loss);
};
Action<Tensor, Tensor, Tensor> run_optimization = (tf_states, tf_actions, Qtargets) =>
{
// Wrap computation inside a GradientTape for automatic differentiation.
using var g = tf.GradientTape();
// Forward pass.
var loss = model_loss(tf_states, tf_actions, Qtargets);
losses.Add(loss.BufferToArray()[0]);
// Compute gradients.
var gradients = g.gradient(loss, neuralNet.trainable_variables);
// Update W and b following gradients.
optimizer.apply_gradients(zip(gradients, neuralNet.trainable_variables.Select(x => x as ResourceVariable)));
};
for (int b = 0; b < size; b += batch_size)
{
var to = (b + batch_size < size) ? b + batch_size : b + (size - b);
var tf_states_b = tf_states.slice(new Slice(b, to));
var tf_actions_b = tf_actions.slice(new Slice(b, to));
var Qtargets_b = Qtargets.slice(new Slice(b, to));
// Minimize the error
run_optimization(tf_states_b, tf_actions_b, Qtargets_b);
}
message += "Mean loss : " + Average(losses.ToArray()) + "\n\n\n";
}
#endregion
#region Operation Function
public float Sum(params float[] customerssalary)
{
float result = 0;
for (int i = 0; i < customerssalary.Length; i++)
{
result += customerssalary[i];
}
return result;
}
public float Average(params float[] customerssalary)
{
float sum = Sum(customerssalary);
float result = (float)sum / customerssalary.Length;
return result;
}
private T[] To1DArray<T>(T[,] input)
{
// Step 1: get total size of 2D array, and allocate 1D array.
int size = input.Length;
T[] result = new T[size];
// Step 2: copy 2D array elements into a 1D array.
int write = 0;
for (int i = 0; i <= input.GetUpperBound(0); i++)
{
for (int z = 0; z <= input.GetUpperBound(1); z++)
{
result[write++] = input[i, z];
}
}
// Step 3: return the new array.
return result;
}
private T[,] To2D<T>(T[][] source)
{
try
{
int FirstDim = source.Length;
int SecondDim = source.GroupBy(row => row.Length).Single().Key; // throws InvalidOperationException if source is not rectangular
var result = new T[FirstDim, SecondDim];
for (int i = 0; i < FirstDim; ++i)
for (int j = 0; j < SecondDim; ++j)
result[i, j] = source[i][j];
return result;
}
catch (InvalidOperationException)
{
throw new InvalidOperationException("The given jagged array is not rectangular.");
}
}
#endregion
#region Environement
string LogState(float[] state)
{
string line = "";
if (state != null && state.Length > 0)
{
line = "---------- STATE LENGTH : " + state.Length + " ----------" + "\n";
line += "PlayerX : " + playerX + "\n";
line += "PlayerY : " + playerY + "\n";
string line2 = "";
string newLine = "[ ";
for (int i = 0; i < state.Length; i++)
{
if ((i % 3) == (playerX) && i / 3 == playerY)
newLine += " " + "PLAYER" + " ";
else
newLine += " " + state[i].ToString() + " ";
if ((i + 1) % 3 == 0)
{
if (i == state.Length - 1)
newLine += "]";
newLine += "\n";
line += newLine;
newLine = "";
}
}
}
return line;
}
private float[] GetLinearState()
{
float[,] result = (float[,])currentState.Clone();
result[playerY, playerX] = 0.5f;
return To1DArray(result);
}
private float GetReconpense(int x, int y)
{
return currentState[y, x];
}
EActions TakeAction(float[] state, float epsilon)
{
Tensor tensor = tf.constant(np.array(state));
tensor = tf.reshape(tensor, (1, 9));
EActions act;
if (random.NextDouble() < epsilon)
{
int next = random.Next(0, 4);
act = (EActions)next;
}
else
{
Tensor predict = neuralNet.Apply(tensor);
act = (EActions)tf.arg_max(predict, 1).BufferToArray()[0];
message += "Action : " + act + " \n";
}
return act;
}
List<int[]> visited = new List<int[]>();
float Action(EActions action)
{
float tmpPlayerX = playerX;
float tmpPlayerY = playerY;
switch (action)
{
case EActions.LEFT:
{
if (playerX != 0)
playerX--;
}
break;
case EActions.RIGHT:
{
if (playerX != 2)
playerX++;
}
break;
case EActions.DOWN:
{
if (playerY != 2)
playerY++;
}
break;
case EActions.UP:
{
if (playerY != 0)
playerY--;
}
break;
default:
break;
}
if (tmpPlayerX == playerX && tmpPlayerY == playerY)
return -0.75f;
else
{
float rec = GetReconpense(playerX, playerY);
if (rec != -1 && rec != 1)
{
if (!visited.Exists(x => x[0] == playerX && x[1] == playerY))
{
visited.Add(new int[] { playerX, playerY });
return 0;
}
else
return -0.25f;
}
}
return GetReconpense(playerX, playerY);
}
#endregion
#region Model Class
public class NeuralNet : Model
{
Layer fc1;
Layer output;
public NeuralNet(NeuralNetArgs args) :
base(args)
{
var layers = keras.layers;
// First fully-connected hidden layer.
fc1 = layers.Dense(args.NeuronOfHiddenLayer, activation: args.Activation1);
output = layers.Dense(args.NumClasses, activation: args.Activation2);
StackLayers(fc1, output);
}
// Set forward pass.
protected override Tensors Call(Tensors inputs, Tensor state = null, bool? training = null)
{
inputs = fc1.Apply(inputs);
inputs = output.Apply(inputs);
//if (!training.Value)
// inputs = tf.nn.softmax(inputs);
return inputs;
}
}
public class NeuralNetArgs : ModelArgs
{
/// <summary>
/// 1st layer number of neurons.
/// </summary>
public int NeuronOfHiddenLayer { get; set; }
public Activation Activation1 { get; set; }
public int NumClasses { get; set; }
public Activation Activation2 { get; set; }
}
#endregion
}
I受到 thibeau neveau 用 Python 编写的一个在环境中行驶的汽车示例的启发: https://github.com/thibo73800/aihub/blob/master/ rl/q_learning_nn.html
我将我的项目和 exe 压缩到我的 GitHub 中: https://github.com/Inazuma12/TensorFlowMaze
可以帮助我吗?
I am practicing neural networks with TensorFlow and QLearning. For my project I work in C# to be able to migrate later my program on the Unity game engine.
I use the TensorFlow.Net library :
https://github.com/SciSharp/TensorFlow.NET
To begin my goal is to train a neural network to find the arrival point of a maze by avoiding the forbidden zone.
My Environment is an array of int (3*3); The current location is indicated by the X and Y position of the player.
PlayerX : 0
PlayerY : 2
[ 0 0 1
0 -1 0
PLAYER 0 0 ]
I have four possible actions. LEFT, RIGHT, UP, DOWN.
I have rewards depending on where I am when I perform an action.
0 : Free
-1 : Forbidden
1 : Arrived
-0.25 : Visited
-0.75 : Unchanged position
My neural network has 9 inputs corresponding to the state of the environment. And 4 outputs corresponding to probabilities designate the best action to take. I have a hidden layer which contains 15 neurons. The activation function is Relu and softmax for the output. And I use an Adam optimizer to train my model.
I execute my program, but when the test phase it still does not arrive found the point of arrival and remained stuck on a box limit and performing the same action in loop.
https://i.sstatic.net/yedn9.png
Trainning Model Code
public void TrainModel(float[,] states, float[,] actions, float[,] rewards, float[,] next_states)
{
Tensor tf_states = tf.convert_to_tensor(states, TF_DataType.TF_FLOAT);
Tensor tf_rewards = tf.convert_to_tensor(rewards, TF_DataType.TF_FLOAT);
Tensor tf_next_states = tf.convert_to_tensor(next_states, TF_DataType.TF_FLOAT);
Tensor tf_actions = tf.convert_to_tensor(actions, TF_DataType.TF_FLOAT);
List<float> losses = new List<float>();
int size = (int)tf_next_states.shape.dims[0];
Tensor Q_stp1 = neuralNet.Apply(tf_next_states, training: true);
Tensor argmax = tf.cast(tf.max(Q_stp1, 1), TF_DataType.TF_FLOAT);
Tensor argmaxExpand_dims = tf.expand_dims(argmax, 1);
Tensor applyScalar = tf.multiply(argmaxExpand_dims, 0.99f);
Tensor applyRewards = tf.add(applyScalar, tf_rewards);
int count = (int)applyRewards.shape.dims[0];
Tensor Qtargets = tf.convert_to_tensor(new NDArray(applyRewards.BufferToArray(), (count, 1), TF_DataType.TF_FLOAT));
Func<Tensor, Tensor, Tensor, Tensor> model_loss = (tf_states, tf_actions, Qtargets) =>
{
Tensor result = neuralNet.Apply(tf_states, training: true);
Tensor subtract = tf.subtract(result, Qtargets);
Tensor square = tf.square(subtract);
Tensor loss = tf.multiply(square, tf_actions);
return tf.reduce_mean(loss);
};
Action<Tensor, Tensor, Tensor> run_optimization = (tf_states, tf_actions, Qtargets) =>
{
// Wrap computation inside a GradientTape for automatic differentiation.
using var g = tf.GradientTape();
// Forward pass.
var loss = model_loss(tf_states, tf_actions, Qtargets);
losses.Add(loss.BufferToArray()[0]);
// Compute gradients.
var gradients = g.gradient(loss, neuralNet.trainable_variables);
// Update W and b following gradients.
optimizer.apply_gradients(zip(gradients, neuralNet.trainable_variables.Select(x => x as ResourceVariable)));
};
for (int b = 0; b < size; b += batch_size)
{
var to = (b + batch_size < size) ? b + batch_size : b + (size - b);
var tf_states_b = tf_states.slice(new Slice(b, to));
var tf_actions_b = tf_actions.slice(new Slice(b, to));
var Qtargets_b = Qtargets.slice(new Slice(b, to));
// Minimize the error
run_optimization(tf_states_b, tf_actions_b, Qtargets_b);
}
message += "Mean loss : " + Average(losses.ToArray()) + "\n\n\n";
}
Source Code :
MazeNetworkLearning MazeNetworkLearning = new MazeNetworkLearning();
MazeNetworkLearning.Start();
public class MazeNetworkLearning
{
enum EActions
{
LEFT = 0,
RIGHT = 1,
DOWN = 2,
UP = 3
}
// World State
float[,] currentState;
int playerX = 0;
int playerY = 2;
// Model Settings.
NeuralNet neuralNet;
Tensorflow.Keras.Optimizers.OptimizerV2 optimizer;
int num_classes = 4;
int neuronOfHiddenLayer = 15;
// Training parameters.
float learning_rate = 0.001f;
int batch_size = 32;
System.Random random;
string message = "";
public void Start()
{
random = new System.Random();
InitState();
InitModel();
Train();
}
#region Trainning
private void InitState()
{
currentState = new float[,]
{
{0, 0, 1},
{0, -1, 0},
{0, 0, 0}
};
}
private void InitModel()
{
neuralNet = new NeuralNet(new NeuralNetArgs
{
NumClasses = num_classes,
NeuronOfHiddenLayer = neuronOfHiddenLayer,
Activation1 = keras.activations.Relu,
Activation2 = keras.activations.Softmax,
});
optimizer = keras.optimizers.Adam(learning_rate);
tf.enable_eager_execution();
}
public void Train()
{
float epsilon = 1.0f;
List<float[]> states = new List<float[]>();
List<float[]> rewards = new List<float[]>();
List<float> reward_mean = new List<float>();
List<float[]> next_states = new List<float[]>();
List<float[]> actions = new List<float[]>();
float[] linearstate = GetLinearState();
float[] nextLinearState = null;
string statesLog = "";
statesLog += LogState(To1DArray(currentState));
for (int epi = 0; epi < 150; epi++)
{
int step = 0;
playerX = 0;
playerY = 2;
visited.Clear();
visited.Add(new int[] { playerX, playerY });
bool findplayer = false;
while (step < 400 && findplayer == false)
{
EActions action = TakeAction(linearstate, epsilon);
float reward = Action(action);
nextLinearState = GetLinearState();
statesLog += LogState(To1DArray(currentState));
float[] mask = { 0, 0, 0, 0 };
mask[(int)action] = 1;
int index = random.Next(0, states.Count);
statesLog += "reward : " + reward + " \n";
//index = states.Count;
states.Insert(index, linearstate);
rewards.Insert(index, new float[] { reward });
reward_mean.Insert(index, reward);
next_states.Insert(index, nextLinearState);
actions.Insert(index, mask);
if (states.Count > 10000)
{
states.RemoveAt(0);
rewards.RemoveAt(0);
reward_mean.RemoveAt(0);
next_states.RemoveAt(0);
actions.RemoveAt(0);
}
linearstate = nextLinearState;
step++;
findplayer = playerX == 2 && playerY == 0;
}
epsilon = Math.Clamp(epsilon * 0.99f, 0.1f, 1.0f);
if (epi % 5 == 0)
{
message += "---------------\n";
message += "rewards mean : " + Average(reward_mean.ToArray()) + "\n";
message += "episode : " + epi + "\n" + "\n";
TrainModel(To2D(states.ToArray()), To2D(actions.ToArray()), To2D(rewards.ToArray()), To2D(next_states.ToArray()));
}
}
string folder = @"C:\Temp\";
string fileName = "CSharpCornerAuthors.txt";
string fullPath = folder + fileName;
File.WriteAllLines("WriteText.txt", new string[] { statesLog });
Console.Write(statesLog);
Console.Write(message);
message = "------------------ TEST ------------------\n";
playerX = 0;
playerY = 2;
int step2 = 0;
while (step2 < 400 && !(playerX == 2 && playerY == 0))
{
EActions action = TakeAction(linearstate, 0);
Action(action);
message += LogState(To1DArray(currentState));
step2++;
}
Console.Write(message);
}
public void TrainModel(float[,] states, float[,] actions, float[,] rewards, float[,] next_states)
{
Tensor tf_states = tf.convert_to_tensor(states, TF_DataType.TF_FLOAT);
Tensor tf_rewards = tf.convert_to_tensor(rewards, TF_DataType.TF_FLOAT);
Tensor tf_next_states = tf.convert_to_tensor(next_states, TF_DataType.TF_FLOAT);
Tensor tf_actions = tf.convert_to_tensor(actions, TF_DataType.TF_FLOAT);
List<float> losses = new List<float>();
int size = (int)tf_next_states.shape.dims[0];
Tensor Q_stp1 = neuralNet.Apply(tf_next_states, training: true);
Tensor argmax = tf.cast(tf.max(Q_stp1, 1), TF_DataType.TF_FLOAT);
Tensor argmaxExpand_dims = tf.expand_dims(argmax, 1);
Tensor applyScalar = tf.multiply(argmaxExpand_dims, 0.99f);
Tensor applyRewards = tf.add(applyScalar, tf_rewards);
int count = (int)applyRewards.shape.dims[0];
Tensor Qtargets = tf.convert_to_tensor(new NDArray(applyRewards.BufferToArray(), (count, 1), TF_DataType.TF_FLOAT));
Func<Tensor, Tensor, Tensor, Tensor> model_loss = (tf_states, tf_actions, Qtargets) =>
{
Tensor result = neuralNet.Apply(tf_states, training: true);
Tensor subtract = tf.subtract(result, Qtargets);
Tensor square = tf.square(subtract);
Tensor loss = tf.multiply(square, tf_actions);
return tf.reduce_mean(loss);
};
Action<Tensor, Tensor, Tensor> run_optimization = (tf_states, tf_actions, Qtargets) =>
{
// Wrap computation inside a GradientTape for automatic differentiation.
using var g = tf.GradientTape();
// Forward pass.
var loss = model_loss(tf_states, tf_actions, Qtargets);
losses.Add(loss.BufferToArray()[0]);
// Compute gradients.
var gradients = g.gradient(loss, neuralNet.trainable_variables);
// Update W and b following gradients.
optimizer.apply_gradients(zip(gradients, neuralNet.trainable_variables.Select(x => x as ResourceVariable)));
};
for (int b = 0; b < size; b += batch_size)
{
var to = (b + batch_size < size) ? b + batch_size : b + (size - b);
var tf_states_b = tf_states.slice(new Slice(b, to));
var tf_actions_b = tf_actions.slice(new Slice(b, to));
var Qtargets_b = Qtargets.slice(new Slice(b, to));
// Minimize the error
run_optimization(tf_states_b, tf_actions_b, Qtargets_b);
}
message += "Mean loss : " + Average(losses.ToArray()) + "\n\n\n";
}
#endregion
#region Operation Function
public float Sum(params float[] customerssalary)
{
float result = 0;
for (int i = 0; i < customerssalary.Length; i++)
{
result += customerssalary[i];
}
return result;
}
public float Average(params float[] customerssalary)
{
float sum = Sum(customerssalary);
float result = (float)sum / customerssalary.Length;
return result;
}
private T[] To1DArray<T>(T[,] input)
{
// Step 1: get total size of 2D array, and allocate 1D array.
int size = input.Length;
T[] result = new T[size];
// Step 2: copy 2D array elements into a 1D array.
int write = 0;
for (int i = 0; i <= input.GetUpperBound(0); i++)
{
for (int z = 0; z <= input.GetUpperBound(1); z++)
{
result[write++] = input[i, z];
}
}
// Step 3: return the new array.
return result;
}
private T[,] To2D<T>(T[][] source)
{
try
{
int FirstDim = source.Length;
int SecondDim = source.GroupBy(row => row.Length).Single().Key; // throws InvalidOperationException if source is not rectangular
var result = new T[FirstDim, SecondDim];
for (int i = 0; i < FirstDim; ++i)
for (int j = 0; j < SecondDim; ++j)
result[i, j] = source[i][j];
return result;
}
catch (InvalidOperationException)
{
throw new InvalidOperationException("The given jagged array is not rectangular.");
}
}
#endregion
#region Environement
string LogState(float[] state)
{
string line = "";
if (state != null && state.Length > 0)
{
line = "---------- STATE LENGTH : " + state.Length + " ----------" + "\n";
line += "PlayerX : " + playerX + "\n";
line += "PlayerY : " + playerY + "\n";
string line2 = "";
string newLine = "[ ";
for (int i = 0; i < state.Length; i++)
{
if ((i % 3) == (playerX) && i / 3 == playerY)
newLine += " " + "PLAYER" + " ";
else
newLine += " " + state[i].ToString() + " ";
if ((i + 1) % 3 == 0)
{
if (i == state.Length - 1)
newLine += "]";
newLine += "\n";
line += newLine;
newLine = "";
}
}
}
return line;
}
private float[] GetLinearState()
{
float[,] result = (float[,])currentState.Clone();
result[playerY, playerX] = 0.5f;
return To1DArray(result);
}
private float GetReconpense(int x, int y)
{
return currentState[y, x];
}
EActions TakeAction(float[] state, float epsilon)
{
Tensor tensor = tf.constant(np.array(state));
tensor = tf.reshape(tensor, (1, 9));
EActions act;
if (random.NextDouble() < epsilon)
{
int next = random.Next(0, 4);
act = (EActions)next;
}
else
{
Tensor predict = neuralNet.Apply(tensor);
act = (EActions)tf.arg_max(predict, 1).BufferToArray()[0];
message += "Action : " + act + " \n";
}
return act;
}
List<int[]> visited = new List<int[]>();
float Action(EActions action)
{
float tmpPlayerX = playerX;
float tmpPlayerY = playerY;
switch (action)
{
case EActions.LEFT:
{
if (playerX != 0)
playerX--;
}
break;
case EActions.RIGHT:
{
if (playerX != 2)
playerX++;
}
break;
case EActions.DOWN:
{
if (playerY != 2)
playerY++;
}
break;
case EActions.UP:
{
if (playerY != 0)
playerY--;
}
break;
default:
break;
}
if (tmpPlayerX == playerX && tmpPlayerY == playerY)
return -0.75f;
else
{
float rec = GetReconpense(playerX, playerY);
if (rec != -1 && rec != 1)
{
if (!visited.Exists(x => x[0] == playerX && x[1] == playerY))
{
visited.Add(new int[] { playerX, playerY });
return 0;
}
else
return -0.25f;
}
}
return GetReconpense(playerX, playerY);
}
#endregion
#region Model Class
public class NeuralNet : Model
{
Layer fc1;
Layer output;
public NeuralNet(NeuralNetArgs args) :
base(args)
{
var layers = keras.layers;
// First fully-connected hidden layer.
fc1 = layers.Dense(args.NeuronOfHiddenLayer, activation: args.Activation1);
output = layers.Dense(args.NumClasses, activation: args.Activation2);
StackLayers(fc1, output);
}
// Set forward pass.
protected override Tensors Call(Tensors inputs, Tensor state = null, bool? training = null)
{
inputs = fc1.Apply(inputs);
inputs = output.Apply(inputs);
//if (!training.Value)
// inputs = tf.nn.softmax(inputs);
return inputs;
}
}
public class NeuralNetArgs : ModelArgs
{
/// <summary>
/// 1st layer number of neurons.
/// </summary>
public int NeuronOfHiddenLayer { get; set; }
public Activation Activation1 { get; set; }
public int NumClasses { get; set; }
public Activation Activation2 { get; set; }
}
#endregion
}
I was greatly inspired by an example in python made by thibeau neveau with a car driving on an envirmenent:
https://github.com/thibo73800/aihub/blob/master/rl/q_learning_nn.html
I zip my project and exe in My GitHub :
https://github.com/Inazuma12/TensorFlowMaze
Can help me ?
如果你对这篇内容有疑问,欢迎到本站社区发帖提问 参与讨论,获取更多帮助,或者扫码二维码加入 Web 技术交流群。

绑定邮箱获取回复消息
由于您还没有绑定你的真实邮箱,如果其他用户或者作者回复了您的评论,将不能在第一时间通知您!
发布评论