You selected dai0.pl
title:-
T=['% A prolog program of dynamic allocation index --- a provisional version '
,'% By Kenryo Indo (Kanto Gakuen University)'
,'% 4-11 Aug 2004'
,'% file: dai0.pl'
,'References:'
,'[1] Gittins, J.C. (1979). Bandit processes and dynamic allocation indicies.'
,' J. R. Stat. Soc. B 41:148-77.'
,'[2] Gittins, J.C. and Jones, D.M. (1979). A dynamic alloction index for the discounted multiarmed bandit problem.'
,' Biometrika 66(3): 561-5.'], forall(member(X,T),(nl,write(X))),nl.
:- title.
%--------------------------------------
% model of bandit process
%--------------------------------------
:- dynamic max_of_alpha_plus_beta/1.
max_of_alpha_plus_beta(141).
state(Alpha,Beta):-
(var(Alpha)->Alpha=A; A is Alpha),
(var(Beta)->Beta=B; B is Beta),
max_of_alpha_plus_beta(T),
state_0(A + B = T-_K).
state_0(Alpha + Beta = T-K):-
max_of_alpha_plus_beta(T0),
(T=0;natural_number_up_to(T0,T)),
(K=0;natural_number_up_to(T,K)),
T1 is T -K,
(Alpha=0;natural_number_up_to(T1,Alpha)),
Beta is T1 - Alpha.
initial_state(A,B):-
natural_number_up_to(10,A),
natural_number_up_to(10,B).
bandit(time(0),control,1).
bandit(time(0),state,(A,B)):-initial_state(A,B).
bandit(time(0),number_of_continue_before,0).
bandit(time(0),number_of_stop_before,0).
discount_factor(0.75).
expected_reward(V,state(A,B),control(1)):-
state(A,B),
V is (A+1)/(A+B+2).
expected_reward(0,state(_,_),control(0)).
stop_criteria(U):- K is 100,natural_number_up_to(K,N),U is N/K.
%---------------
% The index
%---------------
% dai (or V-value) for a bandit process is the maxmum over (constant) stopping times of
% averaged expected reward weighted by continuation probability for each state and trials.
:- dynamic dai_0/3.
dai(time(T-A-B=K),stop_by(U),R/W=V):-
max_of_alpha_plus_beta(T0),
natural_number_up_to(T0,T), T0>T,
state_0(A+B=T-K),
(clause(dai_0(time(T-A-B=K),stop_by(U),R/W=V),true)->true;
(max(U,max(V,
(stop_criteria(U),
total_discounted_reward(R/W=V,T-A-B=K,stop_by(U))
)
)),
retractall(dai_0(time(T-A-B=K),stop_by(_),_=V0)),(V0=V->true;read(y)),
assert(dai_0(time(T-A-B=K),stop_by(U),R/W=V))
)
).
% tests: N=21, for a handy (about a day) simulation on my PC for the present.
test_dai(Ttrue;!,fail),
natural_number_up_to(N,T), state_0(A+B=T-K),A=<9,B=<9,
dai(time(T-A-B=K),_,_),tell_goal('dai_out.txt',forall,dai_0(_,_,_)),fail.
test_dai_0(T-A-B=K,S,D):- state_0(A+B=20-_),A=<9,B=<9,max(K,dai_0(time(T-A-B=K),S,D)).
test_dai_0(T-A-B=K,S,D,true:D1,diff:X):- dai_0_140(time(140-A-B=_),_,_=D1),test_dai_0(T-A-B=K,S,_=D), X is D1 - D.
test_dai_N(T):- init_cnt,forall(test_dai_0(T-A-B=K,_,D,E,F),(update_cnt(N),X=([N],T-A-B=K,D,E,F),write(X),nl)),nl,write(complete).
test_dai_N(T,K):- init_cnt,forall(test_dai_0(T-A-B=K,_,D,E,F),(update_cnt(N),X=([N],T-A-B=K,D,E,F),write(X),nl)),nl,write(complete).
inspect_growth_from_other_window(T-A-B=K):- ['dai_out.txt'], max(B,max(A,max(K,(test_dai_0(T-A-B=K,_,_))))).
inspect_growth_1(T-A-B=K):- ['z:/Prolog/dai_out.txt'], max(A,max(K,(test_dai_0(T-A-B=K,_,_)))).
%:- write(tamesi), %%%% about line 85 here.
% averaged expected reward weighted by continuation probability
%---------------------------------------------------
total_discounted_reward(R/1=R,T-A-B=1,stop_by(U)):-
state_0(A+B=T-1),
stop_criteria(U),
expected_reward(R,state(A,B),control(1)).
total_discounted_reward(R/W=E,T-A-B=2,stop_by(U)):-
prob(Q0,success(0/1),state(A+B=T-2),stop_time(U)),
prob(Q1,success(1/1),state(A+B=T-2),stop_time(U)),
discount_factor(D),
W = 1 + D * (Q0 + Q1),
A1 is A + 1,
B1 is B + 1,
expected_reward(V0,state(A,B1),control(1)),
expected_reward(V1,state(A1,B),control(1)),
expected_reward(R1,state(A,B),control(1)),
R = R1+ D * (Q1 * V1 + Q0 * V0),
E is R/W.
total_discounted_reward(R/W=E,T-A-B=3,stop_by(U)):-
state_0(A+B=T-3),
T1 is T -1,
total_discounted_reward(R1/W1=_E1,T1-A-B=2,stop_by(U)),
prob(Q0,success(0/2),state(A+B=T-3),stop_time(U)),
prob(Q1,success(1/2),state(A+B=T-3),stop_time(U)),
prob(Q2,success(2/2),state(A+B=T-3),stop_time(U)),
discount_factor(D),
W = W1 + D^2 * (Q0 + Q1 + Q2),
A1 is A + 1,
B1 is B + 1,
A2 is A + 2,
B2 is B + 2,
expected_reward(V0,state(A,B2),control(1)),
expected_reward(V1,state(A1,B1),control(1)),
expected_reward(V2,state(A2,B),control(1)),
R = R1 + D^2 * (Q2*V2 + Q1*V1 + Q0*V0),
E is R/W.
%:- write(tamesi), %%%% about line 130 here.
% a generalization.
total_discounted_reward(R/W=E,T-A-B=K,stop_by(U)):-
state_0(A+B=T-K),
K > 3,
T1 is T -1,
K1 is K -1,
total_discounted_reward(R1/W1=_E1,T1-A-B=K1,stop_by(U)),
findall(Q * V,
(
(X=0;natural_number_up_to(K1,X)),
A1 is A + X,
B1 is B + K1 - X,
expected_reward(V,state(A1,B1),control(1)),
prob(Q,success(X/K1),state(A+B=T-K),stop_time(U))
),
LQV),
sum_eq(LQV,EV,_),
findall(Q, member(Q * V, LQV), LQ),
sum_eq(LQ,QS,_),
discount_factor(D),
R = R1 + D^K1 * EV,
W = W1 + D^K1 * QS,
E is R/W.
%:- write(tamesi), %%%% about line 157 here.
% continuation probability (Q-value)
%---------------------------------------------------
% after 2 trials with X successful pulls.
prob(Q,success(X/1),state(A+B=T-2),stop_time(U)):-
member((X,Y),[(1,0),(0,1)]),
state_0(A+B=T-2),
A1 is A + X,
B1 is B + Y,
dai(time(T-A1-B1=1),stop_by(U),V/1=V),
P is (A+1)/(A+B+2),
(V < U -> Q = 0; Q = P).
% after 3 trials with X successful pulls.
prob(Q,success(X/2),state(A+B=T-3),stop_time(U)):-
state_0(A+B=T-3),
stop_criteria(U),
natural_number_up_to(3,X0),
X is X0 - 1,
findall(Q1*Q2,
prob_item(Q1*Q2,success(X/2),state(A+B=T-3),stop_time(U)),
LQ),
sum(LQ,Q).
%:- write(tamesi), %%%% about line 185 here.
% a generalization.
% after K trials with X successful pulls.
prob(Q,success(X/K1),state(A+B=T-K),stop_time(U)):-
state_0(A+B=T-K),
K>3,
K1 is K - 1,
stop_criteria(U),
natural_number_up_to(K,X0),
X is X0 - 1,
findall(ProductQ,
prob_item(ProductQ,success(X/K1),state(A+B=T-K),stop_time(U)),
LQ),
sum(LQ,Q).
prob_item(Q1*Q2,success(X/2),state(A+B=T-3),stop_time(U)):-
member((X,X1,X2),[(0,0,0),(1,1,0),(1,0,1),(2,1,1)]),
A1 is A + X,
B1 is B + 2 -X,
P1 is A1/(A1+B1),
dai(time(T-A1-B1=1),stop_by(_U1),_=V1),
%nl,write(dai(time(T-A1-B1=1),stop_by(U),_=V1)),
(V1 < U -> Q1 = 0; Q1 = P1),
A2 is A + X - X2,
B2 is B + 1 - X1,
P2 is (A2)/(A2+B2),
dai(time(T-A2-B2=2),stop_by(_U2),_=V2),
%nl,write(dai(time(T-A2-B2=2),stop_by(U),_=V2)),
(V2 < U -> Q2 = 0; Q2 = P2).
%:- write(tamesi), %%%% about line 219 here.
% The terms of generalized prob/4.
prob_item(Q,success(X/K1),state(A+B=T-K),stop_time(U)):-
K>3,
distribution_0(K1,X,_,D,0,[0,1]),
findall(Qi,
prob_item_0(Qi,[A+B=T-K,X,D,U],_V,_),
QL),
product(QL,Q).
prob_item_0(Qi,[A+B=T-K,X,D,U],Vi,T-Ai-Bi=I):-
nth1(I,D,Zi),
Xi is X - Zi,
Ai is A + Xi,
Yi is K - I - Xi,
Bi is B + Yi,
Pi is (Ai)/(Ai+Bi),
dai(time(T-Ai-Bi=I),stop_by(_Ui),_E=Vi),
(Vi < U -> Qi = 0; Qi = Pi).
%--------------------------------------
% common programs (utilities)
%--------------------------------------
natural_number_up_to(M,N):-
(var(M)->max_of_alpha_plus_beta(M);true),
M1 is integer(M),
length(L,M1),
nth1(N,L,_).
%distribution(length:K,sum_up_to:X,send:O,inventory:H,remain:R,allow:A):-
% distribution_0(K,X,O,H,R,A).
distribution_0(0,R,[],[],R,_A).
distribution_0(K,M,[Y|O],[M1|H],R,A):-
(var(A)
-> (Y=0;natural_number_up_to(M,Y))
; (member(Y,A),Y= X
).
ranking(X,Goal,Ranking,ascend):-
% X: the objective variable,
% Goal: the objective function and constraints,
setof((X,Goal),Goal,Z),
sort(Z,Ranking),
Ranking=[X|_].
ranking(X,Goal,Ranking,descend):-
% X: the objective variable,
% Goal: the objective function and constraints,
ranking(X,Goal,Ranking0,ascend),
reverse(Ranking0,Ranking).
sum([],0).
sum([X|Members],Sum):-
sum(Members, Sum1),
Sum is Sum1 + X.
product([],1).
product([X|Members], Prod):-
product(Members, Prod1),
Prod is Prod1 * X.
sum_eq([],0,0).
sum_eq([X|Members],E + X, Sum):-
sum_eq(Members,E, Sum1),
Sum is Sum1 + X.
product_eq([],1,1).
product_eq([X|Members],E*X, Prod):-
product_eq(Members,E, Prod1),
Prod is Prod1 * X.
product_sum([],[],[],0).
product_sum([P|Q],[A|B],[E|F],V):-
length(Q,N),
length(B,N),
product_sum(Q,B,F,V1),
E is P * A,
V is V1 + E.
average(U,G,A):-
findall(U,G,B),
length(B,N),
sum(B,S),
A is S/N.
stdev(U,G,Y):-
average(U,G,A),
findall((U-A)^2,G,B),
length(B,N),
sum(B,S),
Y is S/(N-1).
bag0([],_A,0).
bag0([C|B],A,N):-
length([C|B],N),
bag0(B,A,_N1),
member(C,A).
zeros(Zero,N):-bag0(Zero,[0],N).
ones(One,N):-bag0(One,[1],N).
binary(Bin,N):-bag0(Bin,[0,1],N).
forall_write(X,G):-
forall(G,(nl,write(X))).
% redirect to file
%--------------------------------
tell_goal(File,G):-
(current_stream(File,write,S0)->close(S0);true),
open(File,write,S),
tell(File),
nl,
time_stamp('% file output start time ',_),
nl,
write('%---------- start from here ------------%'),
nl,
G,
nl,
write('%---------- end of data ------------%'),
nl,
time_stamp('% file output end time ',_),
tell(user),
close(S),
% The following is to cope with the duplicated stream problem.
(current_stream(File,write,S1)->close(S1);true).
% save all successful goals to a file
%--------------------------------
tell_goal(File,forall,G):-
G0 = (nl,write(G),write('.')),
G1 = forall(G,G0),
tell_goal(File,G1).
tell_goal(File,forall_such_that,G,Condition):-
% G should be occurred in the Condition.
WRITE = (nl,write(G),write('.')),
G1 = forall(Condition,WRITE),
tell_goal(File,G1).
% time stamp
%--------------------------------
time_stamp(no,T):-
get_time(U),
convert_time(U,A,B,C,D,E,F,_G),
T = [date(A/B/C), time(D:E:F)],
nl.
time_stamp(Word,T):-
\+ var(Word),
Word \= no,
get_time(U),
convert_time(U,A,B,C,D,E,F,_G),
T = [date(A/B/C), time(D:E:F)],
% format('~`.t~t~a~30|~`.t~t~70|~n',[Word]),
write((Word,T)),
nl.
%counter
%----------------------
:- dynamic cnt/1.
cnt(0).
init_cnt:- abolish(cnt/1),assert(cnt(0)).
update_cnt(N):- retract(cnt(B)),N is B+1,assert(cnt(N)).
% Table 1 in Gittins and Jones (1979).
%--------------------------------------------
% note: Each parameter in the table 1 cooresponds to that of Gittins(1979)
% who describes the algorithm above modeled minus 1.
dai_0_140(time(140-A-B=K),stop_by(U),_=U):-
dai_140(B1,A1,U),A is A1-1, B is B1 -1,
K is 140 - A - B.
dai_140(1,1,0.6211).
dai_140(1,2,0.7465).
dai_140(1,3,0.8062).
dai_140(1,4,0.8419).
dai_140(1,5,0.8659).
dai_140(1,6,0.8833).
dai_140(1,7,0.8965).
dai_140(1,8,0.9069).
dai_140(1,9,0.9153).
dai_140(1,10,0.9223).
dai_140(2,1,0.4256).
dai_140(2,2,0.5760).
dai_140(2,3,0.6607).
dai_140(2,4,0.7159).
dai_140(2,5,0.7548).
dai_140(2,6,0.7841).
dai_140(2,7,0.8068).
dai_140(2,8,0.8251).
dai_140(2,9,0.8401).
dai_140(2,10,0.8526).
dai_140(3,1,0.3182).
dai_140(3,2,0.4641).
dai_140(3,3,0.5554).
dai_140(3,4,0.6191).
dai_140(3,5,0.6659).
dai_140(3,6,0.7023).
dai_140(3,7,0.7312).
dai_140(3,8,0.7548).
dai_140(3,9,0.7745).
dai_140(3,10,0.7912).
dai_140(4,1,0.2519).
dai_140(4,2,0.3871).
dai_140(4,3,0.4773).
dai_140(4,4,0.5436).
dai_140(4,5,0.5946).
dai_140(4,6,0.6348).
dai_140(4,7,0.6673).
dai_140(4,8,0.6946).
dai_140(4,9,0.7176).
dai_140(4,10,0.7372).
dai_140(5,1,0.2073).
dai_140(5,2,0.3307).
dai_140(5,3,0.4182).
dai_140(5,4,0.4838).
dai_140(5,5,0.5360).
dai_140(5,6,0.5784).
dai_140(5,7,0.6134).
dai_140(5,8,0.6428).
dai_140(5,9,0.6678).
dai_140(5,10,0.6896).
dai_140(6,1,0.1755).
dai_140(6,2,0.2883).
dai_140(6,3,0.3713).
dai_140(6,4,0.4359).
dai_140(6,5,0.4875).
dai_140(6,6,0.5306).
dai_140(6,7,0.5669).
dai_140(6,8,0.5978).
dai_140(6,9,0.6244).
dai_140(6,10,0.6476).
dai_140(7,1,0.1518).
dai_140(7,2,0.2550).
dai_140(7,3,0.3334).
dai_140(7,4,0.3961).
dai_140(7,5,0.4473).
dai_140(7,6,0.4899).
dai_140(7,7,0.5266).
dai_140(7,8,0.5584).
dai_140(7,9,0.5860).
dai_140(7,10,0.6102).
dai_140(8,1,0.1335).
dai_140(8,2,0.2285).
dai_140(8,3,0.3025).
dai_140(8,4,0.3627).
dai_140(8,5,0.4129).
dai_140(8,6,0.4553).
dai_140(8,7,0.4916).
dai_140(8,8,0.5236).
dai_140(8,9,0.5518).
dai_140(8,10,0.5767).
dai_140(9,1,0.1190).
dai_140(9,2,0.2067).
dai_140(9,3,0.2767).
dai_140(9,4,0.3343).
dai_140(9,5,0.3832).
dai_140(9,6,0.4249).
dai_140(9,7,0.4611).
dai_140(9,8,0.4928).
dai_140(9,9,0.5212).
dai_140(9,10,0.5465).
dai_140(10,1,0.1072).
dai_140(10,2,0.1886).
dai_140(10,3,0.2547).
dai_140(10,4,0.3100).
dai_140(10,5,0.3537).
dai_140(10,6,0.3983).
dai_140(10,7,0.4341).
dai_140(10,8,0.4657).
dai_140(10,9,0.4937).
dai_140(10,10,0.5192).
/*
% tentatively generated data by dai0.pl, of a goal test_dai/2
% compared with T=140 data in Gittins & Jones (1979).
?- test_dai(T<21,_).
[T-A-B=<1], go ahead(y/n):y.
[T-A-B=<2], go ahead(y/n):y.
[T-A-B=<3], go ahead(y/n):y.
[T-A-B=<4], go ahead(y/n):y.
% another terminal.
?- inspect_growth_from_other_window(A).
% dai_out.txt compiled 0.11 sec, 46,172 bytes
A = 13-9-0=4
Yes
?- test_dai_N(_,_).
[1], 4-0-0=4, 0.604839, true:0.6211, diff:0.016261
[2], 5-1-0=4, 0.723232, true:0.7465, diff:0.023268
[3], 6-2-0=4, 0.785294, true:0.8062, diff:0.020906
[4], 7-3-0=4, 0.824112, true:0.8419, diff:0.017788
[5], 8-4-0=4, 0.85085, true:0.8659, diff:0.01505
[6], 9-5-0=4, 0.870445, true:0.8833, diff:0.012855
[7], 10-6-0=4, 0.885446, true:0.8965, diff:0.011054
[8], 11-7-0=4, 0.89731, true:0.9069, diff:0.00959
[9], 12-8-0=4, 0.906932, true:0.9153, diff:0.008368
...
[93], 15-2-9=4, 0.245336, true:0.2547, diff:0.009364
[94], 16-3-9=4, 0.301453, true:0.31, diff:0.008547
[95], 17-4-9=4, 0.34918, true:0.3537, diff:0.00452
[96], 17-5-9=3, 0.387594, true:0.3983, diff:0.010706
[97], 18-6-9=3, 0.423882, true:0.4341, diff:0.010218
[98], 19-7-9=3, 0.456111, true:0.4657, diff:0.009589
[99], 20-8-9=3, 0.484839, true:0.4937, diff:0.008861
[100], 21-9-9=3, 0.510619, true:0.5192, diff:0.008581
complete
Yes
?- tell_goal('dai_run.txt',forall,(A).
test_dai_N(_,_)).
Yes
?-
% You can analyze read the file and analyze it by using spreadsheet and so on.
*/
% end of program
return to front page.