You selected exactp.pl
% Fisher's exact test
% exactp.pl
% developed on SWI-prolog 6.2.2, 64 bits
% By Kenryo Indo
% 2013.5.30 (modified: 5.31, for relatively large numbers)
% examples.
sample_data( sample1).
sample_data( sample2).
%sample( 1). 2 x 2 contingency table [2]
sample1:class( c1, 'Men').
sample1:class( c2,'Women').
sample1:group( r1, 'Dieting').
sample1:group( r2, 'Non-dieting').
sample1:table( r1, c1, 1).
sample1:table( r1, c2, 9).
sample1:table( r2, c1, 11).
sample1:table( r2, c2, 3).
%sample( 2). 2 x 2 contingency table [3]
sample2:class( c1, 'tooth decay').
sample2:class( c2, 'no cavity').
sample2:group( r1, 'having a sweet tooth').
sample2:group( r2, 'not fond of sweets').
sample2:table( r1, c1, 13).
sample2:table( r1, c2, 4).
sample2:table( r2, c1, 6).
sample2:table( r2, c2, 14).
% The p-value : assuming the margins are fixed,
% Under a null hypothesis of independence,
% p-value can be computed by a hypergeometric distribution
% of the numbers in the cells of the table.
factorial_number( T, F):-
ascending_numbers( T, H),
product_list( H, E),
F is E.
ascending_numbers( T, H):-
length( L, T),
findall( I, nth1( I, L, _), H).
product_list( [], 1). % <-- This clause was missing in the earlier version.
product_list( [ X | T], F):-
product_list( T, X, F).
product_list( [], X, X).
product_list( [X|T], Y, F):-
Z = X * Y,
product_list( T, Z, F).
/*
?- product_list([1,2,3],L).
L = 3* (2*1).
?- length(L,10),nth1(J,L,_),ascending_numbers(J,X),nl,write(X),fail.
[1]
[1,2]
[1,2,3]
[1,2,3,4]
[1,2,3,4,5]
[1,2,3,4,5,6]
[1,2,3,4,5,6,7]
[1,2,3,4,5,6,7,8]
[1,2,3,4,5,6,7,8,9]
[1,2,3,4,5,6,7,8,9,10]
false.
?- length(L,10),nth0(J,[_|L],_),factorial_number(J,X),nl,write(fact(J)=X),fail.
fact(0)=1
fact(1)=1
fact(2)=2
fact(3)=6
fact(4)=24
fact(5)=120
fact(6)=720
fact(7)=5040
fact(8)=40320
fact(9)=362880
fact(10)=3628800
false.
*/
marginal_frequencies( X, [T1,T2,S1,S2]):-
X = [X11, X12, X21, X22],
T1 is X11 + X12,
T2 is X21 + X22,
S1 is X11 + X21,
S2 is X12 + X22.
observ_p( X, P):-
sum_list( X, N),
X = [X11, X12 | _],
marginal_frequencies( X, [T1,_,S1,S2]),
combin_num( N, T1, CD),
combin_num( S1, X11, CN1),
combin_num( S2, X12, CN2),
P is CN1 * CN2 / CD.
/*
observ_p_old( X, P):-
X = [X11, X12, X21, X22],
sum_list( X, N),
marginal_frequencies( X, [T1,T2,S1,S2]),
ascending_numbers( N, FN),
ascending_numbers( X11, F11),
ascending_numbers( X12, F12),
ascending_numbers( X21, F21),
ascending_numbers( X22, F22),
append( [FN, F11, F12, F21, F22], LD),
ascending_numbers( T1, FT1),
ascending_numbers( T2, FT2),
ascending_numbers( S1, FS1),
ascending_numbers( S2, FS2),
append( [ FT1, FT2, FS1, FS2], LN),
ord_subtract( LN, LD, LN1),
ord_subtract( LD, LN, LD1),
product_list( LN1, Numerator),
product_list( LD1, Denominator),
P is Numerator / Denominator.
*/
combin_num( N, K, C):-
length( L, N),
append( A, B, L),
length( A, K),
length( B, M),
ascending_numbers( N, NL),
ascending_numbers( K, DL1),
ascending_numbers( M, DL2),
append( DL1, DL2, DL),
ord_subtract( NL, DL, F),
ord_subtract( DL, NL, G),
product_list( F, Numerator),
product_list( G, Denominator),
C is Numerator / Denominator.
sample_table( K, X):-
sample_data( K),
findall( A, K:table( _, _, A), X).
/*
?- append([[1,2],[1,2],[1,2,3]],K).
K = [1, 2, 1, 2, 1, 2, 3].
?- combin_num( 5,K,P), nl, write(K->P),fail.
0->1
1->5
2->10
3->10
4->5
5->1
false.
% for a 2 x 2 table in [4], (3, 2, 1, 4)
?- X=[3,2,1,4], marginal_frequencies(X,W).
X = [3, 2, 1, 4],
W = [5, 5, 4, 6].
?- X=[3,2,1,4], observ_p(X,P).
X = [3, 2, 1, 4],
P = 0.23809523809523808.
?- sample_table(K,X).
K = sample1,
X = [1, 9, 11, 3] ;
K = sample2,
X = [13, 4, 6, 14].
?- sample_table(K,X), observ_p(X,P).
K = sample1,
X = [1, 9, 11, 3],
P = 0.001346076187912236 ;
K = sample2,
X = [13, 4, 6, 14],
P = 0.005219867675736516.
*/
solve_sum( A, B, C):-
length( L, C),
append( G, H, L),
length( G, A),
length( H, B).
generate_contingency_table( [T1,T2,S1,S2], X):-
X = [X11, X12, X21, X22],
solve_sum( X11, X12, T1),
solve_sum( X11, X21, S1), % Note. This order is crucial to time comsumption
solve_sum( X21, X22, T2),
solve_sum( X12, X22, S2).
generate_contingency_table_from( X, W, Y):-
marginal_frequencies( X, W),
generate_contingency_table( W, Y).
exact_p( X, P):-
observ_p( X, Q),
G = generate_contingency_table_from( X, _, Y),
findall( R, (G, observ_p( Y, R), R =< Q), L),
sum_list( L, P).
/*
?- solve_sum(A, B, 2).
A = 0,
B = 2 ;
A = B, B = 1 ;
A = 2,
B = 0 ;
false.
?- X=[3,2,1,4], marginal_frequencies(X,W).
X = [3, 2, 1, 4],
W = [5, 5, 4, 6].
?- W=[5,5,4,6], generate_contingency_table(W,Y).
W = [5, 5, 4, 6],
Y = [0, 5, 4, 1] ;
W = [5, 5, 4, 6],
Y = [1, 4, 3, 2] ;
W = [5, 5, 4, 6],
Y = [2, 3, 2, 3] ;
W = [5, 5, 4, 6],
Y = [3, 2, 1, 4] ;
W = [5, 5, 4, 6],
Y = [4, 1, 0, 5] ;
false.
?- X=[3,2,1,4], exact_p(X,P).
X = [3, 2, 1, 4],
P = 0.5238095238095237.
?- sample_table(K,X), exact_p(X,P).
K = sample1,
X = [1, 9, 11, 3],
P = 0.0027594561852200836 ;
K = sample2,
X = [13, 4, 6, 14],
P = 0.008138143815466445.
% comparison with fisher.test using R
> F <- fisher.test(matrix(c(3,2,1,4),nrow=2))
> F$p.value
[1] 0.5238095
> F1 <- fisher.test(matrix(c(1,9,11,3),nrow=2))
> F1$p.value
[1] 0.002759456
> F2 <- fisher.test(matrix(c(13,4,6,14),nrow=2))
> F2$p.value
[1] 0.008138144
*/
% for relatively large numbers
ln_list( L, H):-
findall( X,
(
member(W, L),
X is log( W)
), H).
combin_num_ln( N, K, C):-
length( L, N),
append( A, B, L),
length( A, K),
length( B, M),
ascending_numbers( N, NL),
ascending_numbers( K, DL1),
ascending_numbers( M, DL2),
append( DL1, DL2, DL),
ord_subtract( NL, DL, F),
ord_subtract( DL, NL, G),
ln_list( F, FL),
ln_list( G, GL),
sum_list( FL, CN),
sum_list( GL, CD),
C is ( CN - CD).
observ_p_ln( X, P):-
sum_list( X, N),
X = [ X11, X12 | _],
marginal_frequencies( X, [T1, _,S1, S2]),
combin_num_ln( N, T1, CDL),
combin_num_ln( S1, X11, CNL1),
combin_num_ln( S2, X12, CNL2),
P is exp( CNL1 + CNL2 - CDL).
exact_p_ln( X, P):-
observ_p_ln( X, Q),
G = generate_contingency_table_from( X, _, Y),
% G = gen_ctab_from( X, _, Y),
findall( R, (G, observ_p_ln( Y, R), R =< Q), L),
sum_list( L, P).
/*
% for another example in [4].
?- X=[228,863,284,851], exact_p(X,P).
ERROR: is/2: Arithmetic: evaluation error: `float_overflow'
% Then, let us use logarithm.
?- combin_num_ln(2226, 1091,P).
P = 1538.4310457750516 .
% Incidentally, time/1 of swi-prolog may be beneficial.
?- X=[228,863,284,851], time(exact_p_ln(X,P)).
% 54,256,617 inferences, 15.553 CPU in 15.553 seconds (100% CPU, 3488431 Lips)
X = [228, 863, 284, 851],
P = 0.02334574579155282 .
% comparison to R
> F5<- fisher.test(matrix(c(228,863,284,851),nrow=2))
> F5$p.value
[1] 0.02334575
% other preliminary tests
?- sample_table(K,X), generate_contingency_table_from( X, _, Y),
observ_p(Y,Q),observ_p_ln(Y,P),R is P-Q,nl,write(Y;R),fail.
[0,10,12,2];2.710505431213761e-20
[1,9,11,3];2.8189256484623115e-18
[2,8,10,4];1.3877787807814457e-17
[3,7,9,5];1.6653345369377348e-16
[4,6,8,6];2.498001805406602e-16
[5,5,7,7];4.996003610813204e-16
[6,4,6,8];2.498001805406602e-16
[7,3,5,9];1.6653345369377348e-16
[8,2,4,10];1.3877787807814457e-17
[9,1,3,11];2.8189256484623115e-18
[10,0,2,12];2.710505431213761e-20
[0,17,19,1];2.0679515313825692e-24
[1,16,18,2];1.5881867761018131e-22
[2,15,17,3];2.0328790734103208e-20
[3,14,16,4];4.336808689942018e-19
[4,13,15,5];1.1275702593849246e-17
[5,12,14,6];4.163336342344337e-17
[6,11,13,7];-1.3877787807814457e-17
[7,10,12,8];8.326672684688674e-17
[8,9,11,9];-2.7755575615628914e-16
[9,8,10,10];7.771561172376096e-16
[10,7,9,11];6.106226635438361e-16
[11,6,8,12];5.551115123125783e-17
[12,5,7,13];3.8163916471489756e-17
[13,4,6,14];-4.336808689942018e-18
[14,3,5,15];9.75781955236954e-19
[15,2,4,16];4.743384504624082e-20
[16,1,3,17];2.117582368135751e-22
[17,0,2,18];1.819797347616661e-23
false.
% negligence
?- X = [3,2,1,4], exact_p(X,P), exact_p_ln(X,Q).
X = [3, 2, 1, 4],
P = Q, Q = 0.5238095238095237 .
?- sample_table(K,X), exact_p(X,P).
K = sample1,
X = [1, 9, 11, 3],
P = 0.002759456185220083 ;
K = sample2,
X = [13, 4, 6, 14],
P = 0.008138143815466445 ;
false.
% The following was possible to compute.
?- X=[23,86,28,85], exact_p(X,P).
X = [23, 86, 28, 85],
P = 0.5280417128466433 .
% comparison to R
> F4<- fisher.test(matrix(c(23,86,28,85),nrow=2))
> F4$p.value
[1] 0.5280417
*/
% References:
% [1] Fisher, R. A. (1934). Statistical Methods for Research Workers. (5th ed. and later.) Oliver and Boyd.
% [2] http://en.wikipedia.org/wiki/Fisher%27s_exact_test
% [3] http://aoki2.si.gunma-u.ac.jp/lecture/Cross/Fisher.html
% [4] http://oku.edu.mie-u.ac.jp/~okumura/stat/fishertest.php
return to front page.