You selected mw2u_130523.pl
% Mann-Whitney-Wilcoxon's U-statistics
% mw2u_130519.pl (tested on SWI-Prolog 6.2.2, 64bit)
% 2013.5.17 (modified: 5.19,23)
% By Kenryo Indo
% U is a total number of defeated opponents over the group.[1,2]
% the Direct Calculation Method
mw2u( L1, L2, U):-
length( L1, N1),
% ( N1 > 20 -> mw2ui( L1, L2, U) ; true),
length( L2, N2),
% ( N2 > 20 -> mw2ui( L1, L2, U) ; true),
N1 =< N2,
% !,
count_win_and_tie( L1, L2, U).
mw2u( L1, L2, U):-
count_win_and_tie( L2, L1, U).
count_win_and_tie( L1, L2, U):-
count_win( L1, L2, A),
count_tie( L1, L2, B),
U is A + B / 2.
count_win( L1, L2, A):-
findall( 1,
(
member( C, L1),
member( D, L2),
C < D
),
H),
length( H, A).
count_tie( L1, L2, B):-
findall( 1,
(
member( D, L1),
member( D, L2)
),
T),
length( T, B).
%example
competitor(K, 'T'):- member(K, [1,2,3]).
competitor(K, 'H'):- member(K, [1,2,3]).
%rdata(['T', 'H', 'H', 'H', 'H', 'H', 'T', 'T', 'T', 'T', 'T', 'H']).
rdata(1, 'T H H H H H T T T T T H').
rdata(2, 'H H H H H H H H H T T T T T T T T T T H H H H H H H H H H T T T T T T T T T').
ranking( K, C, X):-
member( K, [1, 2]),
competitor( K, C),
rdata2list( K, L),
findall( J, nth1( J, L, C), X).
% Lumley (1996) (See [4])
ranking( 3, 'T', [3,3,4,3,1,2,3,1,1,5,4]).
ranking( 3, 'H', [1,2,1,1,1,1,1,1,1,1,2,4,1,1]).
/*
?- ranking( 1, 'T', X).
X = [1, 7, 8, 9, 10, 11].
?- ranking( 1, 'H', X).
X = [2, 3, 4, 5, 6, 12].
?- K=1, ranking( K, 'T', X), ranking( K, 'H', Y), mw2u( X, Y, U).
K = 1,
X = [1, 7, 8, 9, 10, 11],
Y = [2, 3, 4, 5, 6, 12],
U = 11 ;
K = 1,
X = [1, 7, 8, 9, 10, 11],
Y = [2, 3, 4, 5, 6, 12],
U = 25.
?- K=2, ranking( K, 'T', X), ranking( K, 'H', Y), mw2u( X, Y, U).
K = 2,
X = [10, 11, 12, 13, 14, 15, 16, 17, 18|...],
Y = [1, 2, 3, 4, 5, 6, 7, 8, 9|...],
U = 100 ;
K = 2,
X = [10, 11, 12, 13, 14, 15, 16, 17, 18|...],
Y = [1, 2, 3, 4, 5, 6, 7, 8, 9|...],
U = 261.
?- K=3, ranking( K, 'T', X), ranking( K, 'H', Y), mw2u( X, Y, U).
K = 3,
X = [1, 2, 1, 1, 1, 1, 1, 1, 1|...],
Y = [3, 3, 4, 3, 1, 2, 3, 1, 1|...],
U = 32.5 ;
false.
*/
% Table. U-critical statistics: alpha = 0.05 (See [3] and [5])
u_table(n,[4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20]).
%n1=1,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-
u_table(2,[-,-,-,-,0,0,0,0,1,1,1,1,1,2,2,2,2]).
u_table(3,[-,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,8]).
u_table(4,[0,1,2,3,4,4,5,6,7,8,9,10,11,11,12,13,13]).
u_table(5,[-,2,3,5,6,7,8,9,11,12,13,14,15,17,18,19,20]).
u_table(6,[-,-,5,6,8,10,11,13,14,16,17,19,21,22,24,25,27]).
u_table(7,[-,-,-,8,10,12,14,16,18,20,22,24,26,28,30,32,34]).
u_table(8,[-,-,-,-,13,15,17,19,22,24,26,28,31,34,36,38,41]).
u_table(9,[-,-,-,-,-,17,20,23,26,28,31,34,37,39,42,45,48]).
u_table(10,[-,-,-,-,-,-,23,26,29,33,36,39,42,45,48,52,55]).
u_table(11,[-,-,-,-,-,-,-,30,33,37,40,44,47,51,55,58,62]).
u_table(12,[-,-,-,-,-,-,-,-,37,41,45,49,53,57,61,65,69]).
u_table(13,[-,-,-,-,-,-,-,-,-,45,50,54,59,63,67,72,76]).
u_table(14,[-,-,-,-,-,-,-,-,-,-,55,59,64,67,74,78,83]).
u_table(15,[-,-,-,-,-,-,-,-,-,-,-,64,70,75,80,85,90]).
u_table(16,[-,-,-,-,-,-,-,-,-,-,-,-,75,81,86,92,98]).
u_table(17,[-,-,-,-,-,-,-,-,-,-,-,-,-,87,93,99,105]).
u_table(18,[-,-,-,-,-,-,-,-,-,-,-,-,-,-,99,106,112]).
u_table(19,[-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,113,119]).
u_table(20,[-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,127]).
u_table_index( N1, N2, Uc):-
u_table( n, H),
nth1( K, H, N2),
u_table( N1, L),
\+ u_table_missing( N1, N2),
nth1( K, L, W),
( W = '-' -> u_table_index( N2, N1, Uc) ; Uc = W).
u_table_missing( 1, _).
u_table_missing( 2, N2):- member( N2, [4,5,6,7]).
u_table_missing( 3, 4).
/*
?- length(L, 9), nth1(J,L,_),nl,write([J]), nth1(K,L,_), u_table_index( J, K, U), write(K->U;' '),fail.
[1]
[2]8->0; 9->0;
[3]5->0; 6->1; 7->1; 8->2; 9->2;
[4]4->0; 5->1; 6->2; 7->3; 8->4; 9->4;
[5]5->2; 6->3; 7->5; 8->6; 9->7;
[6]6->5; 7->6; 8->8; 9->10;
[7]7->8; 8->10; 9->12;
[8]8->13; 9->15;
[9]9->17;
false.
?- length(L, 9), nth1(J,L,_),nl,write([J]), nth1(K,L,_), u_critical( J, K, U), write(K->U;' '),fail.
[1]
[2]8->0; 9->0;
[3]5->0; 6->1; 7->1; 8->2; 9->2;
[4]4->0; 5->1; 6->2; 7->3; 8->4; 9->4;
[5]5->2; 6->3; 7->5; 8->6; 9->7;
[6]6->5; 7->6; 8->8; 9->10;
[7]7->8; 8->10; 9->12;
[8]8->13; 9->15;
[9]9->17;
false.
?- u_table( n , L), nth1( J, L, N1), nth1( K, L, N2),
\+ (u_critical( N1, N2, U), U\='-').
false.
*/
u_critical( N1, N2, _, Uc):-
N1 =< 20,
N2 =< 20,
!,
u_table_index( N1, N2, Uc).
u_critical( N1, N2, T, Uc):-
u_critical_zapx( N1, N2, _, _, T, Uc).
% negligence
u_critical( N1, N2, Uc):-
u_critical( N1, N2, [], Uc).
% approximation by standard normal distribution
u_critical_zapx( N1, N2, M, S, T, Uc):-
u_mean( N1, N2, M),
u_sd_with_ties( N1, N2, T, S),
z_critical( 0.05, both, Zc),
Uc is - Zc * S + M.
% negligence
u_critical_zapx( N1, N2, M, S, Uc):-
u_critical_zapx( N1, N2, M, S, [], Uc).
u_critical_zapx_old( N1, N2, M, S, Uc):-
u_mean( N1, N2, M),
u_sd( N1, N2, S),
z_critical( 0.05, both, Zc),
Uc is - Zc * S + M.
u_mean( N1, N2, M):-
M is N1 * N2 / 2.
u_sd( N1, N2, S):-
A is N1 + N2 + 1,
S is sqrt( N1 * N2 * A / 12).
% u_sd/3 works only for no-tie data.
u_sd_with_ties( N1, N2, TL, S):-
N is N1 + N2,
u_sd_term_for_ties( TL, T),
A is N^3 - N - T,
S is sqrt( N1 * N2 * A / 12 / (N^2 - N)).
u_sd_term_for_ties( [], 0).
u_sd_term_for_ties( [X|L], T):-
u_sd_term_for_ties( L, T0),
T is T0 + X^3 - X.
z_critical( 0.05, both, 1.959963985).
/*
?- length(L, 9), nth1(J,L,_),J1 is J + 11, nl,write([J1]), nth1(K,L,_), K1 is K + 11,u_critical(J1, K1,U),write(K1->U;' '),fail.
[12]12->37; 13->41; 14->45; 15->49; 16->53; 17->57; 18->61; 19->65; 20->69;
[13]12->41; 13->45; 14->50; 15->54; 16->59; 17->63; 18->67; 19->72; 20->76;
[14]12->45; 13->50; 14->55; 15->59; 16->64; 17->67; 18->74; 19->78; 20->83;
[15]12->49; 13->54; 14->59; 15->64; 16->70; 17->75; 18->80; 19->85; 20->90;
[16]12->53; 13->59; 14->64; 15->70; 16->75; 17->81; 18->86; 19->92; 20->98;
[17]12->57; 13->63; 14->67; 15->75; 16->81; 17->87; 18->93; 19->99; 20->105;
[18]12->61; 13->67; 14->74; 15->80; 16->86; 17->93; 18->99; 19->106; 20->112;
[19]12->65; 13->72; 14->78; 15->85; 16->92; 17->99; 18->106; 19->113; 20->119;
[20]12->69; 13->76; 14->83; 15->90; 16->98; 17->105; 18->112; 19->119; 20->127;
false.
?- length(L, 9), nth1(J,L,_),J1 is J + 11, nl,write([J1]), nth1(K,L,_), K1 is K + 11,u_critical_zapx(J1, K1, _,_,Ux),Uxx is integer(Ux+0.5),write(K1->Uxx;' '),fail.
[12]12->44; 13->48; 14->53; 15->57; 16->61; 17->65; 18->70; 19->74; 20->78;
[13]12->48; 13->53; 14->58; 15->62; 16->67; 17->72; 18->76; 19->81; 20->86;
[14]12->53; 13->58; 14->63; 15->68; 16->73; 17->78; 18->83; 19->88; 20->93;
[15]12->57; 13->62; 14->68; 15->73; 16->79; 17->84; 18->90; 19->96; 20->101;
[16]12->61; 13->67; 14->73; 15->79; 16->85; 17->91; 18->97; 19->103; 20->109;
[17]12->65; 13->72; 14->78; 15->84; 16->91; 17->97; 18->104; 19->110; 20->117;
[18]12->70; 13->76; 14->83; 15->90; 16->97; 17->104; 18->111; 19->117; 20->124;
[19]12->74; 13->81; 14->88; 15->96; 16->103; 17->110; 18->117; 19->125; 20->132;
[20]12->78; 13->86; 14->93; 15->101; 16->109; 17->117; 18->124; 19->132; 20->140;
false.
?- length(L, 9), nth1(J,L,_),J1 is J + 11, nl,write([J1]), nth1(K,L,_), K1 is K + 11,u_critical(J1,K1,U), u_critical_zapx(J1, K1, _,_,Ux),P is integer(U- Ux+0.5),write(K1->P;' '),fail.
[12]12-> -1; 13->0; 14->0; 15->0; 16->0; 17->0; 18->0; 19->0; 20->0;
[13]12->0; 13-> -1; 14->0; 15->0; 16->0; 17->0; 18-> -1; 19->0; 20->0;
[14]12->0; 13->0; 14->0; 15-> -1; 16->0; 17-> -2; 18->0; 19-> -1; 20->0;
[15]12->0; 13->0; 14-> -1; 15-> -1; 16->0; 17->0; 18->0; 19->0; 20-> -1;
[16]12->0; 13->0; 14->0; 15->0; 16->0; 17->0; 18-> -1; 19->0; 20->0;
[17]12->0; 13->0; 14-> -2; 15->0; 16->0; 17->0; 18->0; 19->0; 20->0;
[18]12->0; 13-> -1; 14->0; 15->0; 16-> -1; 17->0; 18-> -1; 19->0; 20->0;
[19]12->0; 13->0; 14-> -1; 15->0; 16->0; 17->0; 18->0; 19->0; 20-> -1;
[20]12->0; 13->0; 14->0; 15-> -1; 16->0; 17->0; 18->0; 19-> -1; 20->0;
false.
*/
u_test_on_sample( K, U, Uc, R):-
ranking( K, 'T', X),
ranking( K, 'H', Y),
u_test( X, Y, U, Uc, R).
u_test( X, Y, U, Uc, R):-
mw2u( X, Y, U),
\+ (mw2u( X, Y, U1), U1 < U),
length( X, N1),
length( Y, N2),
histogram_of_two_data( X, Y, H),
select_freq_of_ties( H, T),
% (T=[]->true; trace),
u_critical( N1, N2, T, Uc),
hyp_test_rule( U, Uc, R).
hyp_test_rule( U, Uc, reject_null):-
Uc > U.
hyp_test_rule( U, Uc, cannot_reject_null):-
Uc =< U.
histogram_of_two_data( X, Y, H):-
append( X, Y, Z),
histogram_of_data( Z, _, H).
histogram_of_data( Z, W, H):-
sort( Z, W),
findall( K,
(
member( A, W),
findall( A, member( A, Z), L),
length( L, K)
), H).
select_freq_of_ties( H, T):-
findall( C, (member( C, H), C >1), T).
% demo
/*
?- u_test_on_sample(K, U, Uc, Y), nl, write( example:K;'U'=U;'Uc'=Uc;Y), fail.
example:1;U=11;Uc=5;cannot_reject_null
example:2;U=100;Uc=113;reject_null
example:3;U=32.5;Uc=40;reject_null
false.
*/
% the Indirect Calculation Method
mw2ui( L1, L2, U):-
length( L1, N1),
length( L2, N2),
append( L1, L2, L),
sort_data( small_is_better, L, W),
data2rank( L1, W, X),
data2rank( L2, W, Y),
sum_list( X, R1),
sum_list( Y, R2),
(N1 < N2 -> (N, R) = (N1,R1) ; (N, R) = (N2, R2)),
u_indirect( N, R, U).
u_indirect( N, R, U):-
U is R - N * ( N + 1) / 2.
% Note: The above method can be applied only to rank data.
% If the indirect method applied to data which does not stand for
% rankings which are ranked in a combined list given two observation data series,
% it cannot provide the true value of U.
/*
?- ranking( K, 'T', X), ranking( K, 'H', Y), length(X, N1), length(Y, N2),
N1 =< N2, mw2u(X,Y,U), mw2ui( X, Y, Ui), nl, write( example:K;direct:U;indirect:Ui), fail.
example:1;direct:11;indirect:11
example:1;direct:25;indirect:11
example:2;direct:100;indirect:100
example:2;direct:261;indirect:100
example:3;direct:32.5;indirect:121.5
example:3;direct:121.5;indirect:121.5
false.
% Also see [3, 4, 5]
% The following data is cited from web cite [6].
?- sample_data( X, Y), mw2u(X,Y,U), mw2ui(X,Y,Ui).
X = [9, 12, 6, 3],
Y = [4, 9, 11, 5],
U = Ui, Ui = 7.5 ;
X = [9, 12, 6, 3],
Y = [4, 9, 11, 5],
U = 8.5,
Ui = 7.5.
?- sample_data( A, B), append( A, B, C), data2rank( large_is_better, A, C, X),
data2rank( large_is_better, B, C, Y),length(X, N1), length(Y, N2), mw2u( X, Y, U), mw2ui(X,Y,Ui).
A = [9, 12, 6, 3],
B = [4, 9, 11, 5],
C = [9, 12, 6, 3, 4, 9, 11, 5],
X = [5.5, 8, 4, 1],
Y = [2, 5.5, 7, 3],
N1 = N2, N2 = 4,
U = Ui, Ui = 7.5 ;
A = [9, 12, 6, 3],
B = [4, 9, 11, 5],
C = [9, 12, 6, 3, 4, 9, 11, 5],
X = [5.5, 8, 4, 1],
Y = [2, 5.5, 7, 3],
N1 = N2, N2 = 4,
U = 8.5,
Ui = 7.5.
% Comparison with R's wilcox.test:
> w<- wilcox.test(c(9,12,6,3),c(4,9,11,5), correct=FALSE)
警告メッセージ:
In wilcox.test.default(c(9, 12, 6, 3), c(4, 9, 11, 5), correct = FALSE) :
タイがあるため、正確な p 値を計算することができません
> w
Wilcoxon rank sum test
data: c(9, 12, 6, 3) and c(4, 9, 11, 5)
W = 8.5, p-value = 0.8845
alternative hypothesis: true location shift is not equal to 0
> p<- w$p.value
> p
[1] 0.8845494
> z<- qnorm( 1- 0.5*p)
> z
[1] 0.1452045
>
% Maybe, at least as for this package, even the current version of R is not correct.
*/
% References:
/*
[1] Wilcoxon, F. (1945). Individual comparisons by ranking methods, Biometrics Bulletin, 1: 80-83.
[2] Mann, H. B. and Whitney, D. R. (1947). On a test of whether one of two random variables is stochastically larger than the other, The Annals of Mathematical Statistics, 18(1): 50-60.
[3] http://en.wikipedia.org/wiki/Mann-Whitney_U
[4] http://oku.edu.mie-u.ac.jp/~okumura/stat/wmw.html
[5] http://kusuri-jouhou.com/statistics/mann.html
[6] http://aoki2.si.gunma-u.ac.jp/R/u-test.html
*/
% supplement
% pre-process to make ranking from observation data
rdata2list( K, R):-
rdata( K, L),
findall( X, (sub_atom( L, _, 1, _, X), competitor( K, X)), R).
/*
?- A='abc', sub_atom(A, B,C,D,E), nl, write(A;B;C;D;E), fail.
abc;0;0;3;
abc;0;1;2;a
abc;0;2;1;ab
abc;0;3;0;abc
abc;1;0;2;
abc;1;1;1;b
abc;1;2;0;bc
abc;2;0;1;
abc;2;1;0;c
abc;3;0;0;
false.
% note: D is a number of the remaining (after) characters
*/
sort_data( small_is_better, D, L):-
sort( D, W),
findall( X, (member( X, W), member( X, D)), L).
sort_data( large_is_better, D, L):-
sort_data( small_is_better, D, H),
reverse( H, L).
data2rank( small_is_better, D, L, X):-
sort_data( small_is_better, L, H),
data2rank( D, H, X).
data2rank( large_is_better, D, L, X):-
sort_data( large_is_better, L, H),
data2rank( D, H, X).
data2rank( D, L, X):-
rank_histogram( L, H),
restore_rank_from_histogram( D, H, X).
/*
% depreciate
data2rank_old( D, L, X):-
findall( J,
(
member( C, D),
rank_in_list( J, L, C)
), X).
rank_in_list( J, E, X):-
histogram_of_data( E, W, _),
nth1( J, W, X),
!.
*/
% See [6]
sample_data([9, 12, 6, 3], [4, 9, 11, 5]).
/*
?- A=[2,5,3], B=[10,1,9,6], append( A, B, C),
data2rank( small_is_better, A, C, X), data2rank( small_is_better, B, C, Y).
A = [2, 5, 3],
B = [10, 1, 9, 6],
C = [2, 5, 3, 10, 1, 9, 6],
X = [2, 4, 3],
Y = [7, 1, 6, 5].
?- data2rank([1,2,3],[-3,0,1,2,3,5],A).
A = [3, 4, 5].
?- data2rank([1,2,3],[-3,0,1,2,3,5,-7],A).
A = [3, 4, 5].
?- data2rank(small_is_better,[1,2,3],[-3,0,1,2,3,5,-7],A).
A = [4, 5, 6].
% for ties (=> compute the class median)
?- data2rank( [1,2,2,3,3,3], [1,2,3], X).
X = [1, 2, 2, 3, 3, 3].
?- data2rank([1,3,2,2,3], [1,2,2,3,3,3,4,5],L).
L = [1, 5, 2.5, 2.5, 5].
*/
restore_rank_from_histogram( D, H, X):-
findall( M,
(
member( C, D),
member( C:_:M, H)
), X).
restore_rank_from_histogram( H, X):-
findall( M,
(
member( _:K:M, H),
length( L, K),
member( _, L)
), X).
rank_histogram( D, H):-
sort_data( small_is_better, D, D1),
findall( X:K:M,
(
class_in_rank_histogram( K, D1, X, L),
median_in_rank_class( M, L)
), H).
class_in_rank_histogram( K, D, X, L):-
setof( J, nth1( J, D, X), L),
length( L, K).
median_in_rank_class( M, L):-
L = [T|_],
last( L, P),
M is (T + P) /2.
/*
% a way to make histogram
?- A=[1,2,2,3,3,3], findall(X:L, setof( J, nth1(J, A, X), L), H).
A = [1, 2, 2, 3, 3, 3],
H = [1:[1], 2:[2, 3], 3:[4, 5, 6]].
?- A=[1,2,2,3,3,3], rank_histogram( A, H).
A = [1, 2, 2, 3, 3, 3],
H = [1:1:1, 2:2:2.5, 3:3:5].
?- A=[1,2,2,3,3,3], rank_histogram( A, H), restore_rank_from_histogram( H, Y).
A = [1, 2, 2, 3, 3, 3],
H = [1:1:1, 2:2:2.5, 3:3:5],
Y = [1, 2.5, 2.5, 5, 5, 5].
*/
return to front page.