MATLAB強化學習 PG 演算法

  • 2019 年 11 月 24 日
  • 筆記

採用 policy gradient 學習方法訓練agent,並行訓練的參數也需要調整

%% 讀取環境

ccc

env = rlPredefinedEnv("CartPole-Discrete");

obsInfo = getObservationInfo(env);

numObservations = obsInfo.Dimension(1);

actInfo = getActionInfo(env);

rng(0)

%% 初始化agent

actorNetwork = [

imageInputLayer([numObservations 1 1],'Normalization','none','Name','state')

fullyConnectedLayer(2,'Name','action')];

actorOpts = rlRepresentationOptions('LearnRate',1e-2,'GradientThreshold',1);

actor = rlRepresentation(actorNetwork,obsInfo,actInfo,'Observation',{'state'},'Action',{'action'},actorOpts);

agent = rlPGAgent(actor);

%% 設置訓練參數

trainOpts = rlTrainingOptions(…

'MaxEpisodes', 1000, …

'MaxStepsPerEpisode', 200, …

'Verbose', false, …

'Plots','training-progress',…

'StopTrainingCriteria','AverageReward',…

'StopTrainingValue',195,…

'ScoreAveragingWindowLength',100);

plot(env)

%% 並行學習設置

trainOpts.UseParallel = true;

trainOpts.ParallelizationOptions.Mode = "async";

trainOpts.ParallelizationOptions.DataToSendFromWorkers = "Gradients";

trainOpts.ParallelizationOptions.StepsUntilDataIsSent = -1;

%% 訓練

trainingStats = train(agent,env,trainOpts);

%% 結果展示

simOptions = rlSimulationOptions('MaxSteps',500);

experience = sim(env,agent,simOptions);

totalReward = sum(experience.Reward);