MATLAB强化学习 PG 算法
- 2019 年 11 月 24 日
- 筆記

采用 policy gradient 学习方法训练agent,并行训练的参数也需要调整
%% 读取环境
ccc
env = rlPredefinedEnv("CartPole-Discrete");
obsInfo = getObservationInfo(env);
numObservations = obsInfo.Dimension(1);
actInfo = getActionInfo(env);
rng(0)
%% 初始化agent
actorNetwork = [
imageInputLayer([numObservations 1 1],'Normalization','none','Name','state')
fullyConnectedLayer(2,'Name','action')];
actorOpts = rlRepresentationOptions('LearnRate',1e-2,'GradientThreshold',1);
actor = rlRepresentation(actorNetwork,obsInfo,actInfo,'Observation',{'state'},'Action',{'action'},actorOpts);
agent = rlPGAgent(actor);
%% 设置训练参数
trainOpts = rlTrainingOptions(…
'MaxEpisodes', 1000, …
'MaxStepsPerEpisode', 200, …
'Verbose', false, …
'Plots','training-progress',…
'StopTrainingCriteria','AverageReward',…
'StopTrainingValue',195,…
'ScoreAveragingWindowLength',100);
plot(env)
%% 并行学习设置
trainOpts.UseParallel = true;
trainOpts.ParallelizationOptions.Mode = "async";
trainOpts.ParallelizationOptions.DataToSendFromWorkers = "Gradients";
trainOpts.ParallelizationOptions.StepsUntilDataIsSent = -1;
%% 训练

trainingStats = train(agent,env,trainOpts);
%% 结果展示
simOptions = rlSimulationOptions('MaxSteps',500);
experience = sim(env,agent,simOptions);
totalReward = sum(experience.Reward);
