Again, we'll describe the car rental problem with a distributional model.
xxxxxxxxxx
7
1
begin
2
using ReinforcementLearning
3
using Flux
4
using Statistics
5
using Plots
6
using Distributions
7
end
xxxxxxxxxx
74
1
begin
2
const PoissonUpperBound = 10
3
const MaxCars= 20
4
const MaxMoves = 5
5
const CostPerMove = 2
6
const CarRentalCartesianIndices = CartesianIndices((0:MaxCars,0:MaxCars))
7
const CarRentalLinearIndices = LinearIndices((0:MaxCars,0:MaxCars))
8
const Actions = -MaxMoves:MaxMoves
9
const RequestDist_1 = Poisson(3)
10
const RequestDist_2 = Poisson(4)
11
const ReturnDist_1 = Poisson(3)
12
const ReturnDist_2 = Poisson(2)
13
14
decode_state(s::Int) = Tuple(CarRentalCartesianIndices[s])
15
encode_state(s1::Int, s2::Int) = CarRentalLinearIndices[CartesianIndex(s1+1, s2+1)]
16
decode_action(a::Int) = a - MaxMoves - 1
17
encode_action(a::Int) = a + MaxMoves + 1
18
19
function merge_prob(dist)
20
merged = Dict()
21
for (s′, r, p) in dist
22
if haskey(merged, (s′, r))
23
merged[(s′, r)] += p
24
else
25
merged[(s′, r)] = p
26
end
27
end
28
[(r, false, s′) => p for ((s′, r), p) in merged]
29
end
30
31
function nextstep(state::Int, action::Int)
32
(s1, s2), a = decode_state(state), decode_action(action)
33
move = a > 0 ? min(a, s1) : max(a, -s2)
34
reward = -CostPerMove*abs(move)
35
s1′, s2′ = min(s1 - move, MaxCars), min(s2 + move, MaxCars)
36
merge_prob(
37
(
38
encode_state(
39
min(max(s1′-req_1, 0)+ret_1, MaxCars),
40
min(max(s2′-req_2, 0)+ret_2, MaxCars)
41
),
42
reward + (min(s1′, req_1) + min(s2′, req_2)) * 10,
43
(
44
pdf(RequestDist_1, req_1) *
45
pdf(RequestDist_2, req_2) *
46
pdf(ReturnDist_1, ret_1) *
47
pdf(ReturnDist_2, ret_2)
48
)
49
)
50
for req_1 in 0:PoissonUpperBound,
51
req_2 in 0:PoissonUpperBound,
52
ret_1 in 0:PoissonUpperBound,
53
ret_2 in 0:PoissonUpperBound
54
)
55
end
56
57
struct CarRentalEnvModel <: AbstractEnvironmentModel
58
dist
59
end
60
61
function CarRentalEnvModel()
62
CarRentalEnvModel(
63
Dict(
64
(s,a) => nextstep(s,a)
65
for s in 1:(MaxCars+1)^2 for a in 1:length(Actions)
66
)
67
)
68
end
69
70
(m::CarRentalEnvModel)(s, a) = m.dist[(s, a)]
71
72
RLBase.state_space(m::CarRentalEnvModel) = Base.OneTo((MaxCars+1)^2)
73
RLBase.action_space(m::CarRentalEnvModel) = Base.OneTo(length(Actions))
74
end
290
1
150
false
267
5.41694e-10
70
false
147
0.00986154
90
false
329
9.13952e-8
130
false
293
2.12204e-10
80
false
162
0.000278687
120
false
161
5.20353e-5
110
false
201
5.56929e-6
160
false
14
3.95436e-6
50
false
375
1.02696e-7
120
false
203
1.01255e-5
150
false
222
1.23625e-8
20
false
207
5.52979e-5
20
false
293
6.6416e-5
110
false
98
7.74961e-5
130
false
161
2.02511e-5
140
false
244
3.33185e-8
80
false
332
3.22524e-7
120
false
139
4.23029e-5
60
false
373
3.79938e-8
140
false
83
1.29745e-5
160
false
222
1.6237e-9
110
false
333
1.50538e-9
140
false
251
1.12709e-9
90
false
142
0.000831651
90
false
167
0.000665869
160
false
62
3.21244e-7
130
false
15
1.64033e-5
120
false
97
2.7728e-5
150
false
60
1.31175e-5
140
false
164
3.37816e-6
58
9
44
false
310
1.30133e-8
64
false
165
1.46725e-5
64
false
310
4.42091e-10
54
false
59
0.00222643
84
false
263
1.5352e-7
14
false
120
0.00119927
34
false
41
2.84389e-5
104
false
137
6.17593e-5
-6
false
328
1.73382e-9
84
false
143
8.67125e-6
4
false
286
3.85774e-7
94
false
151
2.37732e-6
54
false
80
0.00188099
74
false
57
0.00275736
124
false
243
2.4485e-10
14
false
187
4.05995e-5
44
false
162
0.000779582
104
false
283
1.00661e-9
94
false
202
1.59093e-6
104
false
164
5.17008e-8
124
false
158
7.00401e-7
-6
false
182
2.45768e-5
64
false
197
3.67534e-5
14
false
273
6.06114e-8
14
false
227
2.49499e-5
74
false
16
0.000562964
104
false
10
0.000562964
64
false
304
4.42948e-8
34
false
185
0.000137223
-6
false
189
1.91074e-6
325
1
80
false
162
0.00303687
160
false
14
7.80039e-7
-10
false
248
5.52979e-5
90
false
177
0.000339342
160
false
8
7.70409e-7
120
false
240
1.13621e-5
-10
false
226
1.84326e-5
130
false
161
1.90238e-5
60
false
373
3.48166e-7
120
false
260
3.44803e-6
-10
false
357
9.17353e-7
100
false
328
2.35537e-7
140
false
57
2.55416e-5
120
false
216
6.63575e-6
180
false
89
9.63011e-8
90
false
431
2.85659e-11
-10
false
313
1.38245e-5
70
false
226
0.00080679
60
false
357
1.09467e-8
90
false
197
0.000110867
90
false
63
3.42136e-5
130
false
351
4.89701e-12
90
false
405
7.99375e-10
150
false
237
5.02142e-8
50
false
201
0.00180916
110
false
333
1.0447e-10
130
false
346
6.09406e-10
150
false
60
2.57691e-7
50
false
387
3.94985e-8
170
false
218
6.41625e-9
321
7
58
false
371
2.75662e-7
38
false
427
0.00025689
118
false
286
3.30175e-9
58
false
428
3.42034e-5
98
false
344
1.24996e-5
68
false
242
0.000169844
98
false
381
7.46344e-6
48
false
267
1.70634e-6
118
false
297
2.98538e-5
128
false
367
4.80973e-10
68
false
407
2.90386e-5
18
false
328
1.99961e-5
138
false
344
1.53911e-9
68
false
431
4.58967e-7
48
false
323
0.00275991
18
false
424
0.000116245
108
false
341
1.35359e-5
58
false
304
0.000630339
98
false
363
1.49548e-5
88
false
281
0.000684564
128
false
190
6.28472e-5
18
false
345
0.000658391
58
false
321
0.00566946
98
false
133
7.98943e-6
108
false
217
0.000574217
28
false
305
0.000196703
138
false
242
5.45424e-8
128
false
233
6.49777e-5
128
false
177
1.71604e-5
108
false
200
1.67582e-5
282
7
58
false
428
1.12649e-5
68
false
333
2.0636e-9
18
false
328
0.000306015
138
false
344
1.10476e-8
18
false
264
0.000165894
108
false
341
2.15048e-5
158
false
324
1.35976e-10
68
false
326
0.000220073
148
false
118
1.17006e-7
158
false
296
1.30417e-8
118
false
283
5.52224e-6
-2
false
394
6.66537e-7
58
false
338
1.2442e-5
38
false
352
6.19139e-6
108
false
259
0.000491558
68
false
220
0.00325339
38
false
306
0.00122559
138
false
322
1.23115e-7
78
false
360
0.000100802
108
false
310
5.40102e-10
48
false
243
0.00144881
108
false
255
0.000653055
148
false
255
3.03036e-6
8
false
374
1.28864e-6
58
false
341
0.000765031
88
false
407
1.3357e-6
18
false
345
0.000789531
98
false
247
2.66548e-7
108
false
247
3.62276e-8
108
false
200
0.000176846
230
7
158
false
56
1.94528e-5
158
false
147
2.37604e-7
68
false
333
7.98316e-5
38
false
229
0.00375548
158
false
324
1.60502e-10
48
false
145
5.24306e-5
148
false
118
6.97765e-5
118
false
283
2.54765e-6
168
false
309
9.28618e-11
168
false
307
3.09539e-10
38
false
352
2.64201e-5
68
false
251
0.00181097
108
false
210
0.000101324
108
false
310
7.21602e-6
68
false
349
4.34484e-6
138
false
165
4.1757e-5
138
false
178
1.20865e-5
78
false
202
4.97681e-5
128
false
345
6.77117e-9
88
false
264
5.54337e-5
118
false
79
0.000141063
138
false
76
1.01116e-5
38
false
397
6.50815e-6
58
false
314
0.000253599
98
false
247
0.000523332
108
false
247
0.000195965
98
false
336
3.16622e-7
98
false
84
0.000683643
18
false
229
0.000319499
108
false
200
2.93276e-5
281
3
44
false
310
9.19804e-6
64
false
165
6.37224e-5
64
false
310
9.2779e-7
74
false
231
1.0318e-9
164
false
29
6.66283e-6
104
false
165
3.13323e-7
84
false
263
6.91368e-5
34
false
367
6.52153e-6
14
false
430
2.34065e-8
164
false
258
1.44452e-9
104
false
137
0.000870063
64
false
429
1.52351e-9
-6
false
328
7.37305e-6
84
false
143
5.21887e-5
4
false
286
0.000184326
94
false
151
2.84389e-5
74
false
57
3.37054e-5
124
false
243
2.1241e-7
154
false
274
8.02509e-10
134
false
169
2.13292e-6
64
false
197
0.00272818
14
false
273
5.3323e-7
14
false
227
0.000168023
4
false
284
9.01151e-5
164
false
139
6.84856e-9
104
false
10
5.32628e-6
64
false
304
7.98316e-5
34
false
392
3.41521e-7
34
false
185
0.000339342
134
false
148
2.13292e-6
212
8
58
false
114
0.000114209
68
false
151
0.00240638
38
false
232
0.000877324
68
false
242
1.65745e-7
18
false
217
0.00199865
18
false
277
0.00296097
8
false
212
0.00148048
48
false
323
6.44565e-7
8
false
254
0.00296097
98
false
111
4.81275e-5
58
false
304
5.37137e-8
8
false
255
0.00444145
48
false
316
1.48542e-6
58
false
194
0.00157918
18
false
401
8.35547e-7
18
false
191
0.00296097
58
false
322
2.00531e-7
58
false
197
0.000203038
48
false
149
0.00631673
48
false
158
3.42626e-5
98
false
241
5.45663e-10
88
false
179
1.28913e-7
98
false
127
9.50667e-6
48
false
255
0.000421116
38
false
256
0.00157918
18
false
345
4.07889e-6
58
false
321
4.01063e-7
98
false
133
9.6255e-6
78
false
112
0.000406076
28
false
305
1.90348e-6
276
6
120
false
233
5.43777e-7
70
false
295
7.49393e-5
110
false
201
7.70409e-9
40
false
254
0.00513548
100
false
97
5.77807e-8
80
false
211
0.000749393
30
false
337
0.000263458
120
false
215
2.447e-6
90
false
177
0.000124853
100
false
72
1.79762e-6
40
false
407
2.49589e-6
80
false
344
3.61229e-7
40
false
430
8.16137e-8
70
false
172
0.00485739
50
false
424
1.3382e-6
90
false
130
0.00174153
90
false
149
0.00198693
120
false
240
2.9495e-8
10
false
407
2.92289e-6
0
false
348
4.44358e-7
50
false
221
5.55409e-5
60
false
199
0.000138686
50
false
201
8.88716e-6
20
false
390
1.74358e-7
40
false
382
9.62235e-5
100
false
303
3.84993e-8
10
false
277
0.000230408
80
false
261
2.69567e-5
50
false
387
1.22414e-6
90
false
338
2.71888e-7
255
8
6
false
275
0.00148048
76
false
158
6.52622e-6
6
false
423
7.42592e-5
26
false
240
0.000214142
26
false
359
0.00035093
46
false
364
6.76793e-6
66
false
191
0.00240638
96
false
253
2.26349e-7
96
false
300
2.03714e-8
56
false
320
6.01594e-5
-4
false
431
2.45583e-7
6
false
276
0.00222073
36
false
255
0.0118439
-4
false
341
0.000832773
76
false
176
0.000174032
46
false
241
0.000114209
76
false
261
2.90054e-6
76
false
281
2.20994e-6
96
false
193
4.278e-5
16
false
424
3.57196e-5
36
false
277
0.00789592
26
false
344
0.000152278
56
false
177
0.000114209
26
false
318
0.00394796
16
false
283
0.000107071
86
false
236
2.67375e-5
36
false
386
3.62568e-6
96
false
295
1.006e-8
96
false
299
3.39524e-8
36
false
237
0.00532974
286
4
76
false
438
4.89701e-11
196
false
218
1.83431e-10
126
false
288
1.3926e-7
106
false
137
0.000798397
26
false
185
0.000294922
186
false
92
1.22784e-6
86
false
266
0.000331912
156
false
118
1.35685e-5
26
false
183
6.55383e-5
106
false
264
7.46097e-5
76
false
147
0.000192329
106
false
354
1.46548e-9
46
false
241
1.2442e-5
186
false
34
4.17878e-8
176
false
75
1.7281e-6
56
false
294
1.64975e-5
156
false
261
9.16548e-8
156
false
227
1.74761e-9
106
false
313
1.3997e-8
96
false
137
0.000554454
106
false
207
2.54297e-5
66
false
366
1.34295e-6
116
false
247
4.39374e-6
16
false
273
0.000221361
116
false
388
5.67086e-9
106
false
200
0.00081139
196
false
53
5.85029e-8
96
false
299
1.33307e-7
136
false
290
1.71525e-9
106
false
305
9.99458e-6
302
3
44
false
310
2.9973e-5
64
false
165
7.60769e-5
64
false
310
3.52843e-6
74
false
231
4.12719e-9
164
false
29
2.60013e-6
104
false
165
8.53469e-7
84
false
263
0.000203894
34
false
367
2.54248e-5
14
false
430
1.61245e-7
164
false
258
7.67543e-9
104
false
137
0.00112926
64
false
429
3.28402e-8
-6
false
328
1.84326e-5
84
false
143
6.95245e-5
4
false
286
0.000313355
124
false
243
7.84639e-7
154
false
274
3.61129e-9
134
false
169
2.13292e-6
114
false
365
1.33761e-8
44
false
162
0.000752052
64
false
197
0.00236491
14
false
273
1.06646e-6
14
false
227
0.00013153
4
false
284
0.000143365
164
false
139
1.89786e-8
64
false
304
0.000253599
34
false
392
1.75873e-6
104
false
427
1.14264e-10
34
false
185
0.000218032
134
false
148
1.06646e-6
302
5
58
false
428
3.41056e-6
68
false
333
1.23625e-8
38
false
229
2.13292e-6
18
false
328
0.000365657
138
false
344
7.46764e-9
18
false
264
0.000691224
108
false
341
9.67131e-6
158
false
324
1.06957e-10
68
false
326
0.000170636
148
false
118
5.36277e-7
158
false
296
1.04029e-8
118
false
283
4.48372e-6
-2
false
394
7.10973e-7
38
false
352
7.9653e-6
108
false
259
0.000342635
68
false
220
0.00523119
38
false
306
0.0013602
138
false
322
8.68021e-8
78
false
360
1.72547e-5
108
false
310
1.33752e-9
108
false
255
0.00028672
148
false
255
2.22214e-6
8
false
374
1.733e-6
58
false
341
0.000181101
88
false
407
5.16276e-7
38
false
397
1.50471e-9
18
false
345
0.000326462
98
false
247
6.66618e-7
108
false
247
9.80208e-8
108
false
200
0.000252028
407
10
82
false
237
1.99736e-5
72
false
401
0.0012562
72
false
433
3.69613e-8
72
false
361
0.00653042
82
false
405
0.000249082
122
false
421
5.80144e-9
82
false
434
1.22287e-10
102
false
323
8.32664e-5
112
false
232
5.13972e-5
82
false
235
4.43857e-6
82
false
285
6.15725e-6
122
false
284
4.46054e-7
42
false
322
0.000235938
2
false
407
8.29469e-5
112
false
380
3.9667e-6
112
false
407
1.53018e-7
122
false
232
4.11177e-5
62
false
338
0.00274861
112
false
298
0.000332351
2
false
411
3.99922e-6
102
false
340
0.000372319
92
false
388
4.15359e-6
32
false
384
0.0031059
82
false
303
0.00021652
92
false
407
1.02563e-5
42
false
349
8.88716e-6
92
false
346
2.33029e-5
122
false
400
2.90072e-8
92
false
360
0.000730998
-8
false
425
0.000136199
277
7
38
false
427
7.17244e-6
58
false
428
1.29041e-7
98
false
344
1.96634e-8
68
false
151
5.99207e-5
58
false
201
6.35136e-6
68
false
242
3.71878e-5
118
false
95
4.64547e-7
118
false
297
3.62518e-8
18
false
277
0.00062671
68
false
407
1.03233e-7
18
false
328
1.06646e-6
48
false
323
0.000367382
18
false
424
6.6666e-5
98
false
111
6.09194e-5
18
false
264
2.66615e-6
58
false
304
4.62809e-5
88
false
118
7.22258e-8
88
false
281
1.32154e-5
68
false
326
1.38452e-6
48
false
316
0.000702556
88
false
179
1.46655e-5
98
false
127
0.000160584
48
false
255
0.0107341
38
false
256
0.00571945
18
false
345
0.00010799
58
false
321
0.000620252
98
false
133
0.000228504
108
false
217
2.02288e-5
28
false
305
4.30776e-5
108
false
200
8.51286e-7
83
7
118
false
84
6.67609e-7
128
false
177
2.70847e-8
118
false
167
4.75297e-8
48
false
19
0.00170187
18
false
311
7.80217e-9
48
false
119
2.48841e-5
48
false
230
1.27183e-5
118
false
95
4.19317e-5
48
false
267
3.54024e-7
18
false
61
4.91537e-5
128
false
123
4.83637e-7
38
false
252
6.33793e-6
138
false
74
1.84623e-5
28
false
100
2.7649e-5
128
false
114
7.10973e-7
38
false
229
2.60659e-5
58
false
62
0.00998509
98
false
168
1.34707e-6
-2
false
166
8.19228e-6
8
false
168
0.00054805
108
false
249
2.05572e-9
38
false
62
0.00389948
58
false
314
7.61757e-10
128
false
166
1.56719e-8
108
false
98
0.000329768
98
false
247
5.9609e-8
108
false
247
1.10047e-8
98
false
84
6.38118e-5
18
false
229
1.69892e-5
108
false
200
4.30816e-7
332
7
168
false
374
2.34918e-11
158
false
343
1.79189e-7
68
false
333
0.00199932
158
false
434
2.61174e-11
138
false
344
5.98007e-6
158
false
324
1.25193e-6
68
false
326
0.000273725
118
false
283
0.000295673
168
false
309
2.54486e-8
-2
false
394
1.22884e-5
168
false
307
1.40015e-7
38
false
352
0.00388622
148
false
357
1.65088e-11
68
false
251
0.000798397
108
false
210
6.42967e-5
138
false
322
6.16177e-6
108
false
310
0.000276547
8
false
332
7.37305e-5
68
false
349
0.00221016
138
false
165
1.60341e-5
8
false
374
0.000313355
128
false
430
3.26853e-7
88
false
407
3.99922e-6
38
false
397
0.000669078
168
false
408
4.12719e-10
58
false
314
0.0013835
98
false
247
0.00211697
198
false
350
1.3266e-12
108
false
247
0.00112926
98
false
336
6.14619e-6
78
5
48
false
19
0.00138414
48
false
119
0.00141713
48
false
230
1.90723e-7
-2
false
143
1.22884e-5
58
false
201
3.10555e-6
68
false
242
3.79938e-8
118
false
95
6.67352e-6
48
false
267
1.18496e-8
18
false
61
0.00328777
38
false
252
9.3421e-9
28
false
100
0.00273515
38
false
229
9.96297e-7
88
false
30
0.000275643
58
false
62
0.000339176
-2
false
166
7.37305e-6
8
false
168
1.63494e-5
98
false
111
1.33307e-7
18
false
273
5.20534e-9
88
false
118
0.000124489
-2
false
79
1.22884e-5
28
false
119
0.000184326
108
false
98
4.85357e-6
98
false
133
5.55448e-7
48
false
96
0.000185248
108
false
217
6.34262e-9
98
false
247
2.26997e-11
108
false
247
1.83638e-12
58
false
32
4.97681e-5
18
false
229
1.09235e-6
108
false
200
8.36078e-8
196
2
12
false
138
0.00110903
82
false
227
1.91484e-9
82
false
128
6.66537e-7
122
false
196
1.68871e-7
62
false
57
0.00141787
72
false
90
0.000612029
22
false
135
0.000387085
2
false
95
2.45768e-5
72
false
183
3.30182e-6
82
false
221
1.50065e-6
32
false
60
8.88716e-5
12
false
206
1.59549e-5
12
false
267
1.893e-6
22
false
185
3.69591e-5
22
false
285
1.48193e-6
102
false
162
5.17008e-8
92
false
96
0.000217427
132
false
9
2.90817e-6
22
false
180
0.000650638
52
false
187
2.49537e-7
2
false
205
2.57135e-5
62
false
301
4.85267e-8
62
false
177
0.000297108
52
false
75
0.00720502
2
false
287
5.51029e-7
122
false
236
9.59249e-9
132
false
30
5.98988e-6
112
false
264
1.63234e-11
42
false
140
0.000999166
72
false
288
2.28527e-10
95
2
2
false
119
0.000267608
32
false
158
2.86723e-5
22
false
204
9.55743e-6
62
false
203
2.07362e-7
2
false
101
0.00301059
82
false
49
0.000108865
62
false
159
1.24417e-5
62
false
57
0.000125972
32
false
11
0.00112897
72
false
204
1.24973e-8
52
false
57
0.00068585
12
false
63
0.00070218
72
false
183
5.62377e-8
82
false
225
2.77717e-10
2
false
79
0.00602118
82
false
204
1.38859e-9
22
false
35
0.0135477
32
false
16
0.00228617
42
false
34
0.00609644
92
false
55
1.77149e-6
22
false
33
0.00301059
52
false
120
9.14467e-5
72
false
75
0.000146968
42
false
140
0.000203215
12
false
209
9.21609e-7
52
false
219
9.55743e-8
82
false
8
8.16488e-5
22
false
39
0.00304822
72
false
222
3.11043e-8
32
false
37
0.00457233
68
4
26
false
133
0.000624237
36
false
139
7.21435e-6
66
false
129
0.000220618
26
false
240
1.57994e-8
66
false
191
2.33458e-6
6
false
242
1.05329e-8
26
false
183
1.52665e-7
16
false
120
1.45487e-5
46
false
241
5.71318e-10
66
false
111
0.000297834
66
false
218
6.75361e-8
66
false
134
2.12739e-5
6
false
198
2.61151e-6
46
false
70
0.00503745
16
false
183
3.50682e-7
46
false
193
5.04643e-6
26
false
243
1.26959e-9
26
false
179
9.80386e-6
56
false
93
0.000194597
26
false
98
5.40326e-5
16
false
52
0.00676139
56
false
64
2.06404e-5
6
false
31
0.0030175
-4
false
50
3.68653e-5
6
false
90
2.45768e-5
36
false
133
0.000609721
56
false
177
1.87463e-6
6
false
74
0.0015523
36
false
237
2.63323e-8
-4
false
176
4.6813e-7
157
8
106
false
137
5.92072e-5
26
false
185
1.19088e-5
116
false
170
7.93973e-5
96
false
253
3.88665e-6
86
false
266
3.4447e-8
26
false
183
0.000110867
106
false
264
1.3997e-8
46
false
241
0.000771453
96
false
193
0.000556845
36
false
408
1.18496e-8
96
false
137
0.000201247
106
false
75
0.00010253
6
false
198
0.000230408
16
false
183
5.46561e-5
36
false
306
1.36934e-5
96
false
305
1.44711e-8
26
false
310
4.30816e-7
36
false
204
8.92123e-5
36
false
342
4.78858e-6
66
false
100
2.17152e-5
56
false
235
0.000365657
66
false
366
4.79092e-8
106
false
200
3.37768e-6
26
false
344
6.96243e-6
56
false
177
0.00498236
96
false
299
1.4931e-6
16
false
283
0.000170348
86
false
236
0.000220982
36
false
237
0.000776475
106
false
305
1.81643e-9
198
6
80
false
162
4.91578e-5
90
false
69
0.000157292
90
false
177
0.00113732
160
false
8
2.73014e-6
120
false
240
8.47e-7
130
false
161
5.81418e-8
10
false
407
5.20145e-9
140
false
69
0.000241516
120
false
260
4.54351e-7
100
false
328
3.04703e-10
150
false
48
9.53101e-5
120
false
216
2.04306e-5
70
false
226
2.90569e-6
70
false
55
6.74108e-5
90
false
197
0.000808239
130
false
149
5.90266e-5
90
false
180
0.00014927
90
false
179
0.000335735
70
false
328
1.31459e-7
10
false
202
0.00020322
140
false
232
5.17619e-8
80
false
113
0.00414245
70
false
187
2.88903e-8
10
false
266
5.80628e-5
160
false
129
3.51419e-6
110
false
190
2.03812e-5
150
false
237
1.74374e-8
50
false
201
0.00182351
50
false
387
1.14546e-7
170
false
218
1.77469e-10
211
1
50
false
10
0.000750619
20
false
179
8.9604e-5
90
false
69
0.00123591
50
false
169
6.06993e-5
40
false
254
2.26091e-6
60
false
8
0.00187655
50
false
73
0.00188099
80
false
211
3.42571e-7
20
false
120
9.53889e-6
-10
false
267
8.46397e-9
90
false
177
4.72924e-7
70
false
172
0.000132989
10
false
70
0.000147461
90
false
130
0.000183097
-10
false
226
2.66615e-8
90
false
149
3.48757e-5
60
false
33
0.00019695
90
false
137
6.62093e-7
50
false
238
6.25308e-6
60
false
181
3.13366e-7
10
false
266
4.81035e-8
40
false
297
2.14742e-7
0
false
117
5.77666e-5
30
false
95
0.000785058
30
false
259
5.09043e-6
50
false
221
9.51398e-7
60
false
199
3.39893e-6
50
false
201
1.17787e-6
10
false
277
1.75549e-7
60
false
127
0.000242797
348
3
44
false
310
0.00249118
64
false
165
0.000533669
64
false
310
0.000827616
74
false
231
0.000120512
104
false
165
0.000226938
124
false
333
4.04336e-9
84
false
263
0.00145151
164
false
258
1.00428e-7
104
false
137
6.74108e-5
64
false
429
4.67587e-7
84
false
143
0.000612939
94
false
294
4.11521e-7
124
false
243
9.88356e-5
104
false
397
2.43762e-10
114
false
365
9.61444e-7
104
false
283
0.000217681
94
false
202
0.00263071
114
false
102
5.25804e-5
164
false
243
1.40015e-7
164
false
155
1.1203e-5
124
false
158
0.000391576
144
false
258
2.21192e-6
94
false
383
2.66615e-8
14
false
273
0.000214314
74
false
399
9.77306e-9
164
false
139
4.06568e-6
64
false
304
0.000306015
34
false
392
8.72234e-5
104
false
427
3.10251e-8
154
false
205
2.35182e-7
375
4
76
false
438
1.53403e-5
126
false
390
7.516e-7
166
false
371
2.23556e-9
66
false
209
0.000354531
46
false
332
0.000996054
126
false
209
0.000220817
126
false
288
0.000142983
146
false
325
2.27511e-7
116
false
373
9.45091e-6
96
false
440
3.26672e-7
106
false
393
1.15665e-5
76
false
288
0.000414734
86
false
352
0.000477293
146
false
147
1.50759e-5
116
false
436
1.74805e-7
166
false
267
1.30325e-6
126
false
375
7.1977e-7
6
false
334
1.84326e-5
146
false
182
4.96151e-5
86
false
266
6.63575e-5
76
false
271
0.00523119
176
false
353
1.68281e-10
156
false
399
4.97474e-12
156
false
333
3.78282e-8
146
false
241
1.70634e-6
106
false
305
6.53207e-6
136
false
290
2.54297e-5
116
false
289
0.000365225
116
false
410
3.03574e-7
116
false
188
0.000537114
110
4
26
false
133
0.000638284
36
false
139
4.35122e-5
76
false
159
2.13095e-7
26
false
226
7.28306e-9
46
false
265
3.80878e-10
66
false
129
0.000279862
76
false
158
8.29735e-7
26
false
185
2.6308e-8
26
false
240
1.06939e-6
66
false
191
3.77719e-6
36
false
255
2.92582e-8
6
false
242
3.54024e-7
26
false
183
6.26184e-7
16
false
120
1.15644e-5
76
false
176
4.22467e-6
46
false
241
1.26836e-7
66
false
111
0.00136669
76
false
180
5.32737e-8
66
false
218
7.4701e-7
86
false
214
5.43114e-7
36
false
277
1.75549e-8
6
false
90
3.68653e-5
76
false
67
0.00383687
36
false
133
0.000894241
56
false
177
2.13658e-5
16
false
283
1.57994e-8
6
false
74
0.00020322
36
false
237
1.48193e-6
-4
false
176
4.91537e-6
76
false
29
0.00107999
172
6
0
false
256
4.09614e-6
20
false
179
6.81534e-5
90
false
69
0.00253756
70
false
295
2.14155e-7
50
false
169
0.00113837
40
false
254
0.000247335
50
false
73
5.3086e-5
80
false
211
1.54191e-5
30
false
337
4.51733e-7
90
false
177
7.15743e-6
100
false
72
0.000117451
70
false
172
0.00258833
90
false
130
0.00133508
90
false
149
0.000331792
0
false
348
6.34797e-9
90
false
137
6.39391e-6
50
false
238
0.000225116
60
false
181
8.51061e-7
30
false
253
4.74319e-5
10
false
319
1.56043e-6
100
false
134
3.33246e-5
30
false
259
0.000159475
110
false
190
2.64691e-7
50
false
221
1.38622e-5
60
false
199
4.72061e-5
50
false
201
6.80164e-6
20
false
390
1.26959e-10
10
false
277
1.69307e-5
60
false
127
0.00151782
80
false
261
1.58652e-7
202
10
72
false
433
3.40251e-9
-8
false
373
1.77743e-6
82
false
227
1.4726e-6
112
false
380
7.11224e-8
122
false
196
0.000257972
102
false
246
6.0641e-7
72
false
183
0.000129472
162
false
89
1.04276e-5
112
false
101
2.60013e-7
142
false
282
3.96661e-8
-8
false
368
1.84326e-5
72
false
387
4.81252e-6
82
false
221
0.00114895
22
false
388
8.72234e-5
152
false
265
4.12719e-11
12
false
267
0.00013153
22
false
285
0.0018645
152
false
318
2.95782e-9
102
false
162
1.29435e-5
92
false
96
6.09194e-5
62
false
301
0.00169647
62
false
177
0.00173083
2
false
287
0.000118199
122
false
318
1.71501e-6
122
false
236
0.000130573
112
false
264
1.86641e-6
32
false
384
2.64201e-5
92
false
407
1.10131e-7
62
false
250
8.6671e-8
72
false
288
8.81187e-6
332
8
76
false
438
9.60215e-6
166
false
371
1.6697e-9
196
false
218
1.73342e-7
126
false
288
8.74132e-5
96
false
440
2.88555e-8
176
false
411
1.9899e-11
86
false
266
0.00279115
106
false
264
0.0013403
186
false
303
3.26736e-8
106
false
354
9.64596e-6
156
false
227
6.51846e-7
156
false
261
2.76716e-5
56
false
294
0.000396087
126
false
417
4.04336e-9
106
false
313
2.14313e-5
96
false
305
0.00151952
116
false
205
0.00027498
126
false
356
1.53509e-8
26
false
310
0.000196615
126
false
267
0.000153399
116
false
164
2.69643e-5
136
false
238
5.68779e-6
106
false
207
0.000162348
66
false
366
5.77666e-5
116
false
247
0.000347617
116
false
388
3.50092e-5
196
false
350
2.211e-12
106
false
357
2.12577e-7
136
false
290
3.48819e-6
106
false
305
0.00109778
xxxxxxxxxx
1
1
model = CarRentalEnvModel()
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
1.0
xxxxxxxxxx
1
1
V = TabularVApproximator(n_state=length(state_space(model)), opt=Descent(1.0))
TabularPolicy
├─ table => Dict
└─ n_action => 11
xxxxxxxxxx
1
1
p = TabularPolicy(;table=Dict(s=>1 for s in state_space(model)), n_action=length(action_space(model)))
300
xxxxxxxxxx
1
1
policy_iteration!(;V=V, π=p, model=model, γ=0.9, max_iter=300)
xxxxxxxxxx
1
1
heatmap(0:MaxCars, 0:MaxCars, reshape([decode_action(p(x)) for x in state_space(model)], 1+MaxCars,1+MaxCars))
xxxxxxxxxx
1
1
heatmap(0:MaxCars, 0:MaxCars, reshape(V.table, 1+MaxCars,1+MaxCars))