You selected dai0.pl

title:-
T=['% A prolog program of dynamic allocation index --- a provisional version '
,'% By Kenryo Indo (Kanto Gakuen University)'
,'% 4-11 Aug 2004'
,'% file: dai0.pl'
,'References:'
,'[1] Gittins, J.C. (1979). Bandit processes and dynamic allocation indicies.'
,'  J. R. Stat. Soc. B 41:148-77.'
,'[2] Gittins, J.C. and Jones, D.M. (1979). A dynamic alloction index for the discounted multiarmed bandit problem.'
,'  Biometrika 66(3): 561-5.'], forall(member(X,T),(nl,write(X))),nl.
:- title.

%--------------------------------------
% model of bandit process
%--------------------------------------

:- dynamic  max_of_alpha_plus_beta/1.

max_of_alpha_plus_beta(141).

state(Alpha,Beta):-
   (var(Alpha)->Alpha=A; A is Alpha),
   (var(Beta)->Beta=B; B is Beta),
   max_of_alpha_plus_beta(T),
   state_0(A + B = T-_K).

state_0(Alpha + Beta = T-K):-
   max_of_alpha_plus_beta(T0),
   (T=0;natural_number_up_to(T0,T)),
   (K=0;natural_number_up_to(T,K)),
   T1 is T -K,
   (Alpha=0;natural_number_up_to(T1,Alpha)),
   Beta is T1 - Alpha.

initial_state(A,B):-
   natural_number_up_to(10,A),
   natural_number_up_to(10,B).

bandit(time(0),control,1).
bandit(time(0),state,(A,B)):-initial_state(A,B).
bandit(time(0),number_of_continue_before,0).
bandit(time(0),number_of_stop_before,0).

discount_factor(0.75).

expected_reward(V,state(A,B),control(1)):-
   state(A,B),
   V is (A+1)/(A+B+2).

expected_reward(0,state(_,_),control(0)).

stop_criteria(U):- K is 100,natural_number_up_to(K,N),U is N/K.


%---------------
% The index
%---------------

% dai (or V-value) for a bandit process is the maxmum over (constant) stopping times of 
% averaged expected reward weighted by continuation probability for each state and trials. 

:- dynamic dai_0/3. 

dai(time(T-A-B=K),stop_by(U),R/W=V):-
   max_of_alpha_plus_beta(T0),
   natural_number_up_to(T0,T), T0>T,
   state_0(A+B=T-K),
   (clause(dai_0(time(T-A-B=K),stop_by(U),R/W=V),true)->true;
    (max(U,max(V,
      (stop_criteria(U),   
       total_discounted_reward(R/W=V,T-A-B=K,stop_by(U))
      )
     )),
     retractall(dai_0(time(T-A-B=K),stop_by(_),_=V0)),(V0=V->true;read(y)),
     assert(dai_0(time(T-A-B=K),stop_by(U),R/W=V))
    )
   ).

% tests:  N=21, for a handy (about a day) simulation on my PC for the present. 
test_dai(Ttrue;!,fail),
   natural_number_up_to(N,T), state_0(A+B=T-K),A=<9,B=<9,
   dai(time(T-A-B=K),_,_),tell_goal('dai_out.txt',forall,dai_0(_,_,_)),fail.
test_dai_0(T-A-B=K,S,D):- state_0(A+B=20-_),A=<9,B=<9,max(K,dai_0(time(T-A-B=K),S,D)).
test_dai_0(T-A-B=K,S,D,true:D1,diff:X):- dai_0_140(time(140-A-B=_),_,_=D1),test_dai_0(T-A-B=K,S,_=D), X is D1 - D.
test_dai_N(T):- init_cnt,forall(test_dai_0(T-A-B=K,_,D,E,F),(update_cnt(N),X=([N],T-A-B=K,D,E,F),write(X),nl)),nl,write(complete).
test_dai_N(T,K):- init_cnt,forall(test_dai_0(T-A-B=K,_,D,E,F),(update_cnt(N),X=([N],T-A-B=K,D,E,F),write(X),nl)),nl,write(complete).
inspect_growth_from_other_window(T-A-B=K):- ['dai_out.txt'], max(B,max(A,max(K,(test_dai_0(T-A-B=K,_,_))))).
inspect_growth_1(T-A-B=K):- ['z:/Prolog/dai_out.txt'], max(A,max(K,(test_dai_0(T-A-B=K,_,_)))).

%:- write(tamesi),  %%%% about line 85 here.

% averaged expected reward weighted by continuation probability
%---------------------------------------------------

total_discounted_reward(R/1=R,T-A-B=1,stop_by(U)):-
   state_0(A+B=T-1),
   stop_criteria(U),
   expected_reward(R,state(A,B),control(1)).

total_discounted_reward(R/W=E,T-A-B=2,stop_by(U)):-
   prob(Q0,success(0/1),state(A+B=T-2),stop_time(U)),
   prob(Q1,success(1/1),state(A+B=T-2),stop_time(U)),
   discount_factor(D),
   W = 1 + D * (Q0 + Q1),
   A1 is A + 1,
   B1 is B + 1,
   expected_reward(V0,state(A,B1),control(1)),
   expected_reward(V1,state(A1,B),control(1)),
   expected_reward(R1,state(A,B),control(1)),
   R = R1+ D * (Q1 * V1 + Q0 * V0),
   E is R/W.

total_discounted_reward(R/W=E,T-A-B=3,stop_by(U)):-
   state_0(A+B=T-3),
   T1 is T -1,
   total_discounted_reward(R1/W1=_E1,T1-A-B=2,stop_by(U)),
   prob(Q0,success(0/2),state(A+B=T-3),stop_time(U)),
   prob(Q1,success(1/2),state(A+B=T-3),stop_time(U)),
   prob(Q2,success(2/2),state(A+B=T-3),stop_time(U)),
   discount_factor(D),
   W = W1 + D^2 * (Q0 + Q1 + Q2),
   A1 is A + 1,
   B1 is B + 1,
   A2 is A + 2,
   B2 is B + 2,
   expected_reward(V0,state(A,B2),control(1)),
   expected_reward(V1,state(A1,B1),control(1)),
   expected_reward(V2,state(A2,B),control(1)),
   R = R1 + D^2 * (Q2*V2 + Q1*V1 + Q0*V0),
   E is R/W.


%:- write(tamesi),  %%%% about line 130 here.

% a generalization.
total_discounted_reward(R/W=E,T-A-B=K,stop_by(U)):-
   state_0(A+B=T-K),
   K > 3,
   T1 is T -1,
   K1 is K -1,
   total_discounted_reward(R1/W1=_E1,T1-A-B=K1,stop_by(U)),
   findall(Q * V,
    (
     (X=0;natural_number_up_to(K1,X)),
     A1 is A + X,
     B1 is B + K1 - X,
     expected_reward(V,state(A1,B1),control(1)),
     prob(Q,success(X/K1),state(A+B=T-K),stop_time(U))
    ),
   LQV),
   sum_eq(LQV,EV,_),
   findall(Q, member(Q * V, LQV), LQ),
   sum_eq(LQ,QS,_),
   discount_factor(D),
   R = R1 + D^K1 * EV,
   W = W1 + D^K1 * QS,
   E is R/W.


%:- write(tamesi),  %%%% about line 157 here.


% continuation probability (Q-value) 
%---------------------------------------------------

% after 2 trials with X successful pulls.
prob(Q,success(X/1),state(A+B=T-2),stop_time(U)):-
   member((X,Y),[(1,0),(0,1)]),
   state_0(A+B=T-2),
   A1 is A + X,   
   B1 is B + Y,
   dai(time(T-A1-B1=1),stop_by(U),V/1=V),
   P is (A+1)/(A+B+2),
   (V < U -> Q = 0; Q = P).

% after 3 trials with X successful pulls.
prob(Q,success(X/2),state(A+B=T-3),stop_time(U)):-
   state_0(A+B=T-3),
   stop_criteria(U),
   natural_number_up_to(3,X0),
   X is X0 - 1,
   findall(Q1*Q2,
     prob_item(Q1*Q2,success(X/2),state(A+B=T-3),stop_time(U)),
   LQ),
   sum(LQ,Q).


%:- write(tamesi),  %%%% about line 185 here.


% a generalization.
% after K trials with X successful pulls.

prob(Q,success(X/K1),state(A+B=T-K),stop_time(U)):-
   state_0(A+B=T-K),
   K>3,
   K1 is K - 1,
   stop_criteria(U),
   natural_number_up_to(K,X0),
   X is X0 - 1,
   findall(ProductQ,
     prob_item(ProductQ,success(X/K1),state(A+B=T-K),stop_time(U)),
   LQ),
   sum(LQ,Q).

prob_item(Q1*Q2,success(X/2),state(A+B=T-3),stop_time(U)):-
   member((X,X1,X2),[(0,0,0),(1,1,0),(1,0,1),(2,1,1)]),
   A1 is A + X,
   B1 is B + 2 -X,
   P1 is A1/(A1+B1),
   dai(time(T-A1-B1=1),stop_by(_U1),_=V1),
   %nl,write(dai(time(T-A1-B1=1),stop_by(U),_=V1)),
   (V1 < U -> Q1 = 0; Q1 = P1),
   A2 is A + X - X2,
   B2 is B + 1 - X1,
   P2 is (A2)/(A2+B2),
   dai(time(T-A2-B2=2),stop_by(_U2),_=V2),
   %nl,write(dai(time(T-A2-B2=2),stop_by(U),_=V2)),
   (V2 < U -> Q2 = 0; Q2 = P2).


%:- write(tamesi),  %%%% about line 219 here.


% The terms of generalized prob/4.

prob_item(Q,success(X/K1),state(A+B=T-K),stop_time(U)):-
   K>3,
   distribution_0(K1,X,_,D,0,[0,1]),
   findall(Qi,
     prob_item_0(Qi,[A+B=T-K,X,D,U],_V,_),
   QL),
   product(QL,Q).

prob_item_0(Qi,[A+B=T-K,X,D,U],Vi,T-Ai-Bi=I):-
   nth1(I,D,Zi),
   Xi is X - Zi,
   Ai is A + Xi,
   Yi is K - I - Xi,
   Bi is B + Yi,
   Pi is (Ai)/(Ai+Bi),
   dai(time(T-Ai-Bi=I),stop_by(_Ui),_E=Vi),
   (Vi < U -> Qi = 0; Qi = Pi).

%--------------------------------------
% common programs (utilities)
%--------------------------------------

natural_number_up_to(M,N):-
   (var(M)->max_of_alpha_plus_beta(M);true),
   M1 is integer(M),
   length(L,M1),
   nth1(N,L,_).

%distribution(length:K,sum_up_to:X,send:O,inventory:H,remain:R,allow:A):-
%   distribution_0(K,X,O,H,R,A).

distribution_0(0,R,[],[],R,_A).
distribution_0(K,M,[Y|O],[M1|H],R,A):-
   (var(A)
    -> (Y=0;natural_number_up_to(M,Y))
    ;  (member(Y,A),Y= X
  ).

ranking(X,Goal,Ranking,ascend):-
  % X: the objective variable,
  % Goal: the objective function and constraints,
  setof((X,Goal),Goal,Z),
  sort(Z,Ranking),
  Ranking=[X|_].

ranking(X,Goal,Ranking,descend):-
  % X: the objective variable,
  % Goal: the objective function and constraints,
  ranking(X,Goal,Ranking0,ascend),
  reverse(Ranking0,Ranking).

sum([],0).
sum([X|Members],Sum):-
   sum(Members, Sum1),
   Sum is Sum1 + X.

product([],1).
product([X|Members], Prod):-
   product(Members, Prod1),
   Prod is Prod1 * X.

sum_eq([],0,0).
sum_eq([X|Members],E + X, Sum):-
   sum_eq(Members,E, Sum1),
   Sum is Sum1 + X.

product_eq([],1,1).
product_eq([X|Members],E*X, Prod):-
   product_eq(Members,E, Prod1),
   Prod is Prod1 * X.


product_sum([],[],[],0).
product_sum([P|Q],[A|B],[E|F],V):-
   length(Q,N),
   length(B,N),
   product_sum(Q,B,F,V1),
   E is P * A,
   V is V1 + E.

average(U,G,A):-
   findall(U,G,B),
   length(B,N),
   sum(B,S),
   A is S/N.

stdev(U,G,Y):-
   average(U,G,A),
   findall((U-A)^2,G,B),
   length(B,N),
   sum(B,S),
   Y is S/(N-1).


bag0([],_A,0).
bag0([C|B],A,N):-
   length([C|B],N),
   bag0(B,A,_N1),
   member(C,A).
zeros(Zero,N):-bag0(Zero,[0],N).
ones(One,N):-bag0(One,[1],N).

binary(Bin,N):-bag0(Bin,[0,1],N).

forall_write(X,G):-
  forall(G,(nl,write(X))).


% redirect to file
%--------------------------------

tell_goal(File,G):-
   (current_stream(File,write,S0)->close(S0);true),
   open(File,write,S),
   tell(File),
   nl,
   time_stamp('% file output start time ',_),
   nl,
   write('%----------  start from here ------------%'),
   nl,
   G,
   nl,
   write('%----------  end of data ------------%'),
   nl,
   time_stamp('% file output end time ',_),
   tell(user),
   close(S),
   % The following is to cope with the duplicated stream problem.
   (current_stream(File,write,S1)->close(S1);true).

% save all successful goals to a file
%--------------------------------

tell_goal(File,forall,G):-
   G0 = (nl,write(G),write('.')),
   G1 = forall(G,G0),
   tell_goal(File,G1).


tell_goal(File,forall_such_that,G,Condition):-
   % G should be occurred in the Condition.
   WRITE = (nl,write(G),write('.')),
   G1 = forall(Condition,WRITE),
   tell_goal(File,G1).


% time stamp
%--------------------------------

time_stamp(no,T):-
   get_time(U),
   convert_time(U,A,B,C,D,E,F,_G),
   T = [date(A/B/C), time(D:E:F)],
   nl.

time_stamp(Word,T):-
   \+ var(Word),
   Word \= no,
   get_time(U),
   convert_time(U,A,B,C,D,E,F,_G),
   T = [date(A/B/C), time(D:E:F)],
%   format('~`.t~t~a~30|~`.t~t~70|~n',[Word]),
   write((Word,T)),
   nl.


%counter
%----------------------
:- dynamic cnt/1.

cnt(0).
init_cnt:- abolish(cnt/1),assert(cnt(0)).
update_cnt(N):- retract(cnt(B)),N is B+1,assert(cnt(N)).



% Table 1 in Gittins and Jones (1979). 
%--------------------------------------------
% note: Each parameter in the table 1 cooresponds to that of Gittins(1979)
% who describes the algorithm above modeled minus 1.

dai_0_140(time(140-A-B=K),stop_by(U),_=U):-
   dai_140(B1,A1,U),A is A1-1, B is B1 -1,
   K is 140 - A - B.

dai_140(1,1,0.6211).
dai_140(1,2,0.7465).
dai_140(1,3,0.8062).
dai_140(1,4,0.8419).
dai_140(1,5,0.8659).
dai_140(1,6,0.8833).
dai_140(1,7,0.8965).
dai_140(1,8,0.9069).
dai_140(1,9,0.9153).
dai_140(1,10,0.9223).
dai_140(2,1,0.4256).
dai_140(2,2,0.5760).
dai_140(2,3,0.6607).
dai_140(2,4,0.7159).
dai_140(2,5,0.7548).
dai_140(2,6,0.7841).
dai_140(2,7,0.8068).
dai_140(2,8,0.8251).
dai_140(2,9,0.8401).
dai_140(2,10,0.8526).
dai_140(3,1,0.3182).
dai_140(3,2,0.4641).
dai_140(3,3,0.5554).
dai_140(3,4,0.6191).
dai_140(3,5,0.6659).
dai_140(3,6,0.7023).
dai_140(3,7,0.7312).
dai_140(3,8,0.7548).
dai_140(3,9,0.7745).
dai_140(3,10,0.7912).
dai_140(4,1,0.2519).
dai_140(4,2,0.3871).
dai_140(4,3,0.4773).
dai_140(4,4,0.5436).
dai_140(4,5,0.5946).
dai_140(4,6,0.6348).
dai_140(4,7,0.6673).
dai_140(4,8,0.6946).
dai_140(4,9,0.7176).
dai_140(4,10,0.7372).
dai_140(5,1,0.2073).
dai_140(5,2,0.3307).
dai_140(5,3,0.4182).
dai_140(5,4,0.4838).
dai_140(5,5,0.5360).
dai_140(5,6,0.5784).
dai_140(5,7,0.6134).
dai_140(5,8,0.6428).
dai_140(5,9,0.6678).
dai_140(5,10,0.6896).
dai_140(6,1,0.1755).
dai_140(6,2,0.2883).
dai_140(6,3,0.3713).
dai_140(6,4,0.4359).
dai_140(6,5,0.4875).
dai_140(6,6,0.5306).
dai_140(6,7,0.5669).
dai_140(6,8,0.5978).
dai_140(6,9,0.6244).
dai_140(6,10,0.6476).
dai_140(7,1,0.1518).
dai_140(7,2,0.2550).
dai_140(7,3,0.3334).
dai_140(7,4,0.3961).
dai_140(7,5,0.4473).
dai_140(7,6,0.4899).
dai_140(7,7,0.5266).
dai_140(7,8,0.5584).
dai_140(7,9,0.5860).
dai_140(7,10,0.6102).
dai_140(8,1,0.1335).
dai_140(8,2,0.2285).
dai_140(8,3,0.3025).
dai_140(8,4,0.3627).
dai_140(8,5,0.4129).
dai_140(8,6,0.4553).
dai_140(8,7,0.4916).
dai_140(8,8,0.5236).
dai_140(8,9,0.5518).
dai_140(8,10,0.5767).
dai_140(9,1,0.1190).
dai_140(9,2,0.2067).
dai_140(9,3,0.2767).
dai_140(9,4,0.3343).
dai_140(9,5,0.3832).
dai_140(9,6,0.4249).
dai_140(9,7,0.4611).
dai_140(9,8,0.4928).
dai_140(9,9,0.5212).
dai_140(9,10,0.5465).
dai_140(10,1,0.1072).
dai_140(10,2,0.1886).
dai_140(10,3,0.2547).
dai_140(10,4,0.3100).
dai_140(10,5,0.3537).
dai_140(10,6,0.3983).
dai_140(10,7,0.4341).
dai_140(10,8,0.4657).
dai_140(10,9,0.4937).
dai_140(10,10,0.5192).


/*

% tentatively generated data by dai0.pl, of a goal test_dai/2 
% compared with T=140 data in Gittins & Jones (1979).

?- test_dai(T<21,_).
[T-A-B=<1], go ahead(y/n):y.
[T-A-B=<2], go ahead(y/n):y.
[T-A-B=<3], go ahead(y/n):y.
[T-A-B=<4], go ahead(y/n):y.

% another terminal.
 
?- inspect_growth_from_other_window(A).
% dai_out.txt compiled 0.11 sec, 46,172 bytes

A = 13-9-0=4 

Yes
?- test_dai_N(_,_).
[1], 4-0-0=4, 0.604839, true:0.6211, diff:0.016261
[2], 5-1-0=4, 0.723232, true:0.7465, diff:0.023268
[3], 6-2-0=4, 0.785294, true:0.8062, diff:0.020906
[4], 7-3-0=4, 0.824112, true:0.8419, diff:0.017788
[5], 8-4-0=4, 0.85085, true:0.8659, diff:0.01505
[6], 9-5-0=4, 0.870445, true:0.8833, diff:0.012855
[7], 10-6-0=4, 0.885446, true:0.8965, diff:0.011054
[8], 11-7-0=4, 0.89731, true:0.9069, diff:0.00959
[9], 12-8-0=4, 0.906932, true:0.9153, diff:0.008368

...
[93], 15-2-9=4, 0.245336, true:0.2547, diff:0.009364
[94], 16-3-9=4, 0.301453, true:0.31, diff:0.008547
[95], 17-4-9=4, 0.34918, true:0.3537, diff:0.00452
[96], 17-5-9=3, 0.387594, true:0.3983, diff:0.010706
[97], 18-6-9=3, 0.423882, true:0.4341, diff:0.010218
[98], 19-7-9=3, 0.456111, true:0.4657, diff:0.009589
[99], 20-8-9=3, 0.484839, true:0.4937, diff:0.008861
[100], 21-9-9=3, 0.510619, true:0.5192, diff:0.008581

complete

Yes
?- tell_goal('dai_run.txt',forall,(A).
test_dai_N(_,_)).

Yes
?- 

% You can analyze read the file and analyze it by using spreadsheet and so on.

*/


% end of program

return to front page.