xxxxxxxxxx
7
1
begin
2
using ReinforcementLearning
3
using Flux
4
using Statistics
5
using Plots
6
using Distributions
7
end
xxxxxxxxxx
38
1
begin
2
const pₕ = 0.4
3
const WinCapital = 100
4
5
decode_state(s::Int) = s - 1
6
encode_state(s::Int) = s + 1
7
8
function nextstep(s::Int, a::Int)
9
s = decode_state(s)
10
a = min(s, a)
11
if s == WinCapital || s==0
12
[(0., false,encode_state(s))=>1.0]
13
else
14
[
15
((s+a >= WinCapital ? 1.0 : 0.), false, encode_state(min(s+a, WinCapital))) => pₕ,
16
(0., false, encode_state(max(s-a, 0))) => 1-pₕ
17
]
18
end
19
end
20
21
struct GamblerProblemEnvModel <: AbstractEnvironmentModel
22
cache
23
end
24
25
function GamblerProblemEnvModel()
26
GamblerProblemEnvModel(
27
Dict(
28
(s,a) => nextstep(s,a)
29
for s in 1:(WinCapital+1) for a in 1:WinCapital
30
)
31
)
32
end
33
34
RLBase.state_space(m::GamblerProblemEnvModel) = Base.OneTo(WinCapital+1)
35
RLBase.action_space(m::GamblerProblemEnvModel) = Base.OneTo(WinCapital)
36
37
(m::GamblerProblemEnvModel)(s, a) = m.cache[(s,a)]
38
end
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
1.0
xxxxxxxxxx
1
1
V = TabularVApproximator(;n_state=1+WinCapital,opt=Descent(1))
xxxxxxxxxx
1
1
RLZoo.value_iteration!(V=V, model=GamblerProblemEnvModel(), γ=1.0, max_iter=1000)
xxxxxxxxxx
1
1
plot(V.table[2:end-1])