prolog source code

You selected mw2u_130523.pl

% Mann-Whitney-Wilcoxon's U-statistics
% mw2u_130519.pl (tested on SWI-Prolog 6.2.2, 64bit)
% 2013.5.17 (modified: 5.19,23)
% By Kenryo Indo


% U is a total number of defeated opponents over the group.[1,2]


% the Direct Calculation Method

mw2u( L1, L2, U):-
	 length( L1, N1),
%	 ( N1 > 20 -> mw2ui( L1, L2, U) ; true),
	 length( L2, N2),
%	 ( N2 > 20 -> mw2ui( L1, L2, U) ; true),
	 N1 =< N2,
%	 !,
	 count_win_and_tie( L1, L2, U).

mw2u( L1, L2, U):-
	 count_win_and_tie( L2, L1, U).

count_win_and_tie( L1, L2, U):-
	 count_win( L1, L2, A),
	 count_tie( L1, L2, B),
	 U is A + B / 2.

count_win( L1, L2, A):-
	 findall( 1,
	 (
		 member( C, L1),
		 member( D, L2),
		 C < D
	 ),
	 H),
	 length( H, A).

count_tie( L1, L2, B):-
	 findall( 1,
	 (
		 member( D, L1),
		 member( D, L2)
	 ),
	 T),
	 length( T, B).


%example

competitor(K, 'T'):- member(K, [1,2,3]).
competitor(K, 'H'):- member(K, [1,2,3]).

%rdata(['T', 'H', 'H', 'H', 'H', 'H', 'T', 'T', 'T', 'T', 'T', 'H']).

rdata(1, 'T H H H H H T T T T T H').
rdata(2, 'H H H H H H H H H T T T T T T T T T T H H H H H H H H H H T T T T T T T T T').



ranking( K, C, X):-
	 member( K, [1, 2]),
	 competitor( K, C),
	 rdata2list( K, L),
	 findall( J, nth1( J, L, C), X).


% Lumley (1996) (See [4])

ranking( 3, 'T', [3,3,4,3,1,2,3,1,1,5,4]).
ranking( 3, 'H', [1,2,1,1,1,1,1,1,1,1,2,4,1,1]).


/*


 ?- ranking( 1, 'T', X).
X = [1, 7, 8, 9, 10, 11].


?- ranking( 1, 'H', X).
X = [2, 3, 4, 5, 6, 12].


?- K=1, ranking( K, 'T', X), ranking( K, 'H', Y), mw2u( X, Y, U).
K = 1,
X = [1, 7, 8, 9, 10, 11],
Y = [2, 3, 4, 5, 6, 12],
U = 11 ;
K = 1,
X = [1, 7, 8, 9, 10, 11],
Y = [2, 3, 4, 5, 6, 12],
U = 25.


?- K=2, ranking( K, 'T', X), ranking( K, 'H', Y), mw2u( X, Y, U).
K = 2,
X = [10, 11, 12, 13, 14, 15, 16, 17, 18|...],
Y = [1, 2, 3, 4, 5, 6, 7, 8, 9|...],
U = 100 ;
K = 2,
X = [10, 11, 12, 13, 14, 15, 16, 17, 18|...],
Y = [1, 2, 3, 4, 5, 6, 7, 8, 9|...],
U = 261.


?- K=3, ranking( K, 'T', X), ranking( K, 'H', Y), mw2u( X, Y, U).
K = 3,
X = [1, 2, 1, 1, 1, 1, 1, 1, 1|...],
Y = [3, 3, 4, 3, 1, 2, 3, 1, 1|...],
U = 32.5 ;
false.


*/


% Table. U-critical statistics: alpha = 0.05 (See [3] and [5])

u_table(n,[4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20]).
%n1=1,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-
u_table(2,[-,-,-,-,0,0,0,0,1,1,1,1,1,2,2,2,2]).
u_table(3,[-,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,8]).
u_table(4,[0,1,2,3,4,4,5,6,7,8,9,10,11,11,12,13,13]).
u_table(5,[-,2,3,5,6,7,8,9,11,12,13,14,15,17,18,19,20]).
u_table(6,[-,-,5,6,8,10,11,13,14,16,17,19,21,22,24,25,27]).
u_table(7,[-,-,-,8,10,12,14,16,18,20,22,24,26,28,30,32,34]).
u_table(8,[-,-,-,-,13,15,17,19,22,24,26,28,31,34,36,38,41]).
u_table(9,[-,-,-,-,-,17,20,23,26,28,31,34,37,39,42,45,48]).
u_table(10,[-,-,-,-,-,-,23,26,29,33,36,39,42,45,48,52,55]).
u_table(11,[-,-,-,-,-,-,-,30,33,37,40,44,47,51,55,58,62]).
u_table(12,[-,-,-,-,-,-,-,-,37,41,45,49,53,57,61,65,69]).
u_table(13,[-,-,-,-,-,-,-,-,-,45,50,54,59,63,67,72,76]).
u_table(14,[-,-,-,-,-,-,-,-,-,-,55,59,64,67,74,78,83]).
u_table(15,[-,-,-,-,-,-,-,-,-,-,-,64,70,75,80,85,90]).
u_table(16,[-,-,-,-,-,-,-,-,-,-,-,-,75,81,86,92,98]).
u_table(17,[-,-,-,-,-,-,-,-,-,-,-,-,-,87,93,99,105]).
u_table(18,[-,-,-,-,-,-,-,-,-,-,-,-,-,-,99,106,112]).
u_table(19,[-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,113,119]).
u_table(20,[-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,127]).


u_table_index( N1, N2, Uc):-
	 u_table( n, H),
	 nth1( K, H, N2), 
	 u_table( N1, L),
	 \+ u_table_missing( N1, N2),
	 nth1( K, L, W),
	 ( W = '-' -> u_table_index( N2, N1, Uc) ; Uc = W).

u_table_missing( 1, _).
u_table_missing( 2, N2):- member( N2, [4,5,6,7]).
u_table_missing( 3, 4).


/*

 ?- length(L, 9), nth1(J,L,_),nl,write([J]), nth1(K,L,_), u_table_index( J, K, U), write(K->U;' '),fail.

[1]
[2]8->0; 9->0; 
[3]5->0; 6->1; 7->1; 8->2; 9->2; 
[4]4->0; 5->1; 6->2; 7->3; 8->4; 9->4; 
[5]5->2; 6->3; 7->5; 8->6; 9->7; 
[6]6->5; 7->6; 8->8; 9->10; 
[7]7->8; 8->10; 9->12; 
[8]8->13; 9->15; 
[9]9->17; 
false.

 ?- length(L, 9), nth1(J,L,_),nl,write([J]), nth1(K,L,_), u_critical( J, K, U), write(K->U;' '),fail.

[1]
[2]8->0; 9->0; 
[3]5->0; 6->1; 7->1; 8->2; 9->2; 
[4]4->0; 5->1; 6->2; 7->3; 8->4; 9->4; 
[5]5->2; 6->3; 7->5; 8->6; 9->7; 
[6]6->5; 7->6; 8->8; 9->10; 
[7]7->8; 8->10; 9->12; 
[8]8->13; 9->15; 
[9]9->17; 
false.

?- u_table( n , L), nth1( J, L, N1),  nth1( K, L, N2), 
\+ (u_critical( N1, N2, U), U\='-').  

false.

*/



u_critical( N1, N2, _, Uc):- 
	 N1 =< 20,
	 N2 =< 20,
	 !,
	 u_table_index( N1, N2, Uc).

u_critical( N1, N2, T, Uc):- 
	 u_critical_zapx( N1, N2, _, _, T, Uc). 


% negligence
u_critical( N1, N2, Uc):- 
	 u_critical( N1, N2, [], Uc).


% approximation by standard normal distribution

u_critical_zapx( N1, N2, M, S, T, Uc):- 
	 u_mean( N1, N2, M),
	 u_sd_with_ties( N1, N2, T, S),
	 z_critical( 0.05, both, Zc),
	 Uc is - Zc * S + M.

% negligence
u_critical_zapx( N1, N2, M, S, Uc):- 
	 u_critical_zapx( N1, N2, M, S, [], Uc). 

u_critical_zapx_old( N1, N2, M, S, Uc):- 
	 u_mean( N1, N2, M),
	 u_sd( N1, N2, S),
	 z_critical( 0.05, both, Zc),
	 Uc is - Zc * S + M.


u_mean( N1, N2, M):-
	 M is N1 * N2 / 2.

u_sd( N1, N2, S):-
	 A is N1 + N2 + 1,
	 S is sqrt( N1 * N2 * A / 12).

% u_sd/3 works only for no-tie data.

u_sd_with_ties( N1, N2, TL, S):-
	 N is N1 + N2,
	 u_sd_term_for_ties( TL, T),
	 A is N^3 - N - T,
	 S is sqrt( N1 * N2 * A / 12 / (N^2 - N)).

u_sd_term_for_ties( [], 0).
u_sd_term_for_ties( [X|L], T):-
	 u_sd_term_for_ties( L, T0),
	 T is T0 + X^3 - X.

z_critical( 0.05, both, 1.959963985).



/*

 ?- length(L, 9), nth1(J,L,_),J1 is J + 11, nl,write([J1]), nth1(K,L,_), K1 is K + 11,u_critical(J1, K1,U),write(K1->U;' '),fail.

[12]12->37; 13->41; 14->45; 15->49; 16->53; 17->57; 18->61; 19->65; 20->69; 
[13]12->41; 13->45; 14->50; 15->54; 16->59; 17->63; 18->67; 19->72; 20->76; 
[14]12->45; 13->50; 14->55; 15->59; 16->64; 17->67; 18->74; 19->78; 20->83; 
[15]12->49; 13->54; 14->59; 15->64; 16->70; 17->75; 18->80; 19->85; 20->90; 
[16]12->53; 13->59; 14->64; 15->70; 16->75; 17->81; 18->86; 19->92; 20->98; 
[17]12->57; 13->63; 14->67; 15->75; 16->81; 17->87; 18->93; 19->99; 20->105; 
[18]12->61; 13->67; 14->74; 15->80; 16->86; 17->93; 18->99; 19->106; 20->112; 
[19]12->65; 13->72; 14->78; 15->85; 16->92; 17->99; 18->106; 19->113; 20->119; 
[20]12->69; 13->76; 14->83; 15->90; 16->98; 17->105; 18->112; 19->119; 20->127; 
false.


 ?- length(L, 9), nth1(J,L,_),J1 is J + 11, nl,write([J1]), nth1(K,L,_), K1 is K + 11,u_critical_zapx(J1, K1, _,_,Ux),Uxx is integer(Ux+0.5),write(K1->Uxx;' '),fail.

[12]12->44; 13->48; 14->53; 15->57; 16->61; 17->65; 18->70; 19->74; 20->78; 
[13]12->48; 13->53; 14->58; 15->62; 16->67; 17->72; 18->76; 19->81; 20->86; 
[14]12->53; 13->58; 14->63; 15->68; 16->73; 17->78; 18->83; 19->88; 20->93; 
[15]12->57; 13->62; 14->68; 15->73; 16->79; 17->84; 18->90; 19->96; 20->101; 
[16]12->61; 13->67; 14->73; 15->79; 16->85; 17->91; 18->97; 19->103; 20->109; 
[17]12->65; 13->72; 14->78; 15->84; 16->91; 17->97; 18->104; 19->110; 20->117; 
[18]12->70; 13->76; 14->83; 15->90; 16->97; 17->104; 18->111; 19->117; 20->124; 
[19]12->74; 13->81; 14->88; 15->96; 16->103; 17->110; 18->117; 19->125; 20->132; 
[20]12->78; 13->86; 14->93; 15->101; 16->109; 17->117; 18->124; 19->132; 20->140; 
false.

 ?- length(L, 9), nth1(J,L,_),J1 is J + 11, nl,write([J1]), nth1(K,L,_), K1 is K + 11,u_critical(J1,K1,U), u_critical_zapx(J1, K1, _,_,Ux),P is integer(U- Ux+0.5),write(K1->P;' '),fail.

[12]12-> -1; 13->0; 14->0; 15->0; 16->0; 17->0; 18->0; 19->0; 20->0; 
[13]12->0; 13-> -1; 14->0; 15->0; 16->0; 17->0; 18-> -1; 19->0; 20->0; 
[14]12->0; 13->0; 14->0; 15-> -1; 16->0; 17-> -2; 18->0; 19-> -1; 20->0; 
[15]12->0; 13->0; 14-> -1; 15-> -1; 16->0; 17->0; 18->0; 19->0; 20-> -1; 
[16]12->0; 13->0; 14->0; 15->0; 16->0; 17->0; 18-> -1; 19->0; 20->0; 
[17]12->0; 13->0; 14-> -2; 15->0; 16->0; 17->0; 18->0; 19->0; 20->0; 
[18]12->0; 13-> -1; 14->0; 15->0; 16-> -1; 17->0; 18-> -1; 19->0; 20->0; 
[19]12->0; 13->0; 14-> -1; 15->0; 16->0; 17->0; 18->0; 19->0; 20-> -1; 
[20]12->0; 13->0; 14->0; 15-> -1; 16->0; 17->0; 18->0; 19-> -1; 20->0; 
false.

*/


u_test_on_sample( K, U, Uc, R):-
	 ranking( K, 'T', X),
	 ranking( K, 'H', Y),
	 u_test( X, Y, U, Uc, R).

u_test( X, Y, U, Uc, R):-
	 mw2u( X, Y, U),
	 \+ (mw2u( X, Y, U1), U1 < U),
	 length( X, N1),
	 length( Y, N2),
	 histogram_of_two_data( X, Y, H),
	 select_freq_of_ties( H, T),
%	(T=[]->true; trace),
	 u_critical( N1, N2, T, Uc),
	 hyp_test_rule( U, Uc, R).


hyp_test_rule( U, Uc, reject_null):-
	 Uc > U.

hyp_test_rule( U, Uc, cannot_reject_null):-
	 Uc =< U.

histogram_of_two_data( X, Y, H):-
	 append( X, Y, Z),
	 histogram_of_data( Z, _, H).

histogram_of_data( Z, W, H):-
	 sort( Z, W),
	 findall( K, 
	 (
		 member( A, W),
		 findall( A, member( A, Z), L),
		 length( L, K)
	 ), H).


select_freq_of_ties( H, T):-
	 findall( C, (member( C, H), C >1), T).



% demo

/*

 ?- u_test_on_sample(K, U, Uc, Y), nl, write( example:K;'U'=U;'Uc'=Uc;Y), fail.

example:1;U=11;Uc=5;cannot_reject_null
example:2;U=100;Uc=113;reject_null
example:3;U=32.5;Uc=40;reject_null
false.
 
*/


% the Indirect Calculation Method

mw2ui( L1, L2, U):-
	 length( L1, N1),
	 length( L2, N2),
	 append( L1, L2, L),
	 sort_data( small_is_better, L, W),
	 data2rank( L1, W, X),
	 data2rank( L2, W, Y),
	 sum_list( X, R1),
	 sum_list( Y, R2),
	 (N1 < N2 -> (N, R) = (N1,R1) ; (N, R) = (N2, R2)),
	 u_indirect( N, R, U).

u_indirect( N, R, U):-
 	 U is R - N * ( N + 1) / 2.

% Note: The above method can be applied only to rank data.
% If the indirect method applied to data which does not stand for 
% rankings which are ranked in a combined list given two observation data series, 
% it cannot provide  the true value of U.


/*

 ?- ranking( K, 'T', X), ranking( K, 'H', Y), length(X, N1), length(Y, N2), 
N1 =< N2, mw2u(X,Y,U), mw2ui( X, Y, Ui), nl, write( example:K;direct:U;indirect:Ui), fail.

example:1;direct:11;indirect:11
example:1;direct:25;indirect:11
example:2;direct:100;indirect:100
example:2;direct:261;indirect:100
example:3;direct:32.5;indirect:121.5
example:3;direct:121.5;indirect:121.5
false.


% Also see [3, 4, 5]

% The following data is cited from web cite [6].

 ?- sample_data( X, Y), mw2u(X,Y,U), mw2ui(X,Y,Ui).

X = [9, 12, 6, 3],
Y = [4, 9, 11, 5],
U = Ui, Ui = 7.5 ;
X = [9, 12, 6, 3],
Y = [4, 9, 11, 5],
U = 8.5,
Ui = 7.5.


?- sample_data( A, B), append( A, B, C), data2rank( large_is_better, A, C, X),
    data2rank( large_is_better, B, C, Y),length(X, N1), length(Y, N2), mw2u( X, Y, U), mw2ui(X,Y,Ui).

 A = [9, 12, 6, 3],
B = [4, 9, 11, 5],
C = [9, 12, 6, 3, 4, 9, 11, 5],
X = [5.5, 8, 4, 1],
Y = [2, 5.5, 7, 3],
N1 = N2, N2 = 4,
U = Ui, Ui = 7.5 ;
A = [9, 12, 6, 3],
B = [4, 9, 11, 5],
C = [9, 12, 6, 3, 4, 9, 11, 5],
X = [5.5, 8, 4, 1],
Y = [2, 5.5, 7, 3],
N1 = N2, N2 = 4,
U = 8.5,
Ui = 7.5.



% Comparison with R's wilcox.test:

> w<- wilcox.test(c(9,12,6,3),c(4,9,11,5), correct=FALSE)
 警告メッセージ: 
In wilcox.test.default(c(9, 12, 6, 3), c(4, 9, 11, 5), correct = FALSE) :
   タイがあるため、正確な p 値を計算することができません 
> w

        Wilcoxon rank sum test

data:  c(9, 12, 6, 3) and c(4, 9, 11, 5)
W = 8.5, p-value = 0.8845
alternative hypothesis: true location shift is not equal to 0

> p<- w$p.value
> p
[1] 0.8845494
> z<- qnorm( 1- 0.5*p)
> z
[1] 0.1452045
> 


% Maybe, at least as for this package, even the current version of R is not correct.

*/


% References:
/*
[1] Wilcoxon, F. (1945). Individual comparisons by ranking methods, Biometrics Bulletin, 1: 80-83.
[2] Mann, H. B. and Whitney, D. R. (1947). On a test of whether one of two random variables is stochastically larger than the other, The Annals of Mathematical Statistics, 18(1): 50-60.
[3] http://en.wikipedia.org/wiki/Mann-Whitney_U
[4] http://oku.edu.mie-u.ac.jp/~okumura/stat/wmw.html
[5] http://kusuri-jouhou.com/statistics/mann.html
[6] http://aoki2.si.gunma-u.ac.jp/R/u-test.html

*/


% supplement

% pre-process to make ranking from observation data

rdata2list( K, R):-
	 rdata( K, L),
	 findall( X, (sub_atom( L, _, 1, _, X), competitor( K, X)), R). 
/*

?- A='abc', sub_atom(A, B,C,D,E), nl, write(A;B;C;D;E), fail.

abc;0;0;3;
abc;0;1;2;a
abc;0;2;1;ab
abc;0;3;0;abc
abc;1;0;2;
abc;1;1;1;b
abc;1;2;0;bc
abc;2;0;1;
abc;2;1;0;c
abc;3;0;0;
false.

% note: D is a number of the remaining (after) characters
*/




sort_data( small_is_better, D, L):-
	 sort( D, W),
	 findall( X, (member( X, W), member( X, D)), L).

sort_data( large_is_better, D, L):-
	 sort_data( small_is_better, D, H),
	 reverse( H, L).

data2rank( small_is_better, D, L, X):-
	 sort_data( small_is_better, L, H),
	 data2rank( D, H, X).
	 
data2rank( large_is_better, D, L, X):-
	 sort_data( large_is_better, L, H),
	 data2rank( D, H, X).

data2rank( D, L, X):-
	 rank_histogram( L, H),
	 restore_rank_from_histogram( D, H, X).

/*
% depreciate

data2rank_old( D, L, X):-
	 findall( J, 
	 (
		 member( C, D),
		 rank_in_list( J, L, C)
	 ), X).

rank_in_list( J, E, X):-
	 histogram_of_data( E, W, _),
	 nth1( J, W, X),
	 !.

*/


% See [6]
sample_data([9, 12, 6, 3], [4, 9, 11, 5]). 

/*

?- A=[2,5,3], B=[10,1,9,6], append( A, B, C),
 data2rank( small_is_better, A, C, X), data2rank( small_is_better, B, C, Y).

A = [2, 5, 3],
B = [10, 1, 9, 6],
C = [2, 5, 3, 10, 1, 9, 6],
X = [2, 4, 3],
Y = [7, 1, 6, 5].

 ?- data2rank([1,2,3],[-3,0,1,2,3,5],A).
A = [3, 4, 5].

 ?- data2rank([1,2,3],[-3,0,1,2,3,5,-7],A).
A = [3, 4, 5].

 ?- data2rank(small_is_better,[1,2,3],[-3,0,1,2,3,5,-7],A).
A = [4, 5, 6].

% for ties (=> compute the class median)

 ?- data2rank(  [1,2,2,3,3,3], [1,2,3], X).
X = [1, 2, 2, 3, 3, 3].

 ?- data2rank([1,3,2,2,3], [1,2,2,3,3,3,4,5],L).
L = [1, 5, 2.5, 2.5, 5].

*/



restore_rank_from_histogram( D, H, X):-
	 findall( M,
	 (
		 member( C, D),
		 member( C:_:M, H)
	 ), X).

restore_rank_from_histogram( H, X):-
	 findall( M,
	 (
		 member( _:K:M, H),
		 length( L, K),
		 member( _, L)
	 ), X).

rank_histogram( D, H):-
	 sort_data( small_is_better, D, D1),
	 findall( X:K:M, 
	 (
		 class_in_rank_histogram( K, D1, X, L),
		 median_in_rank_class( M, L)
	 ), H).


class_in_rank_histogram( K, D, X, L):-
	 setof( J, nth1( J, D, X), L),
	 length( L, K).

median_in_rank_class( M, L):-
	 L = [T|_],
	 last( L, P),
	 M is (T + P) /2.

/*

% a way to make histogram

?- A=[1,2,2,3,3,3], findall(X:L, setof( J, nth1(J, A, X), L), H).

A = [1, 2, 2, 3, 3, 3],
H = [1:[1], 2:[2, 3], 3:[4, 5, 6]].

 ?- A=[1,2,2,3,3,3], rank_histogram( A, H).
 
A = [1, 2, 2, 3, 3, 3],
H = [1:1:1, 2:2:2.5, 3:3:5].

?- A=[1,2,2,3,3,3], rank_histogram( A, H), restore_rank_from_histogram( H, Y).

A = [1, 2, 2, 3, 3, 3],
H = [1:1:1, 2:2:2.5, 3:3:5],
Y = [1, 2.5, 2.5, 5, 5, 5].


*/
return to front page.