>install.packages(bnlearn) library(bnlearn) > set.seed(1) #Create a Bayesian network!! > net = model2network("[E][F|E][D|F][T|E][S|E][G|T:S]") # One may plot the resulting DAG using: > plot(net) ---------------------------------------------------------------------------------------------------------------------------------------- # We have to assign conditional vertex probabilities: # For the prior distribution in E, this is simple: # Define "yes" and "no": yn = c("yes","no") > cptE = matrix(c(0.3,0.7), ncol=2 , dimnames=list(NULL,yn)) > cptE yes no [1,] 0.3 0.7 # For a conditional, conditioned on ONLY ONE node, the cpd can be defined as: > cptF = matrix( c(0.5 , 0.5 , 0.1 , 0.9) , ncol=2 , dimnames=list("F"=yn, "E"=yn) ) > cptF E F yes no yes 0.5 0.1 no 0.5 0.9 > cptS = matrix( c(0.75 , 0.25 , 0.25 , 0.75), ncol=2 , dimnames=list("S"=yn, "E"=yn) ) > cptT = matrix( c(0.8 , 0.2 , 0.3 , 0.7), ncol=2 , dimnames=list("T"=yn, "E"=yn) ) > cptD = matrix( c(0.8 , 0.2 , 0.2 , 0.8), ncol=2 , dimnames=list("D"=yn, "F"=yn) ) # Now for the last conditional it is harder to do it as it would require a 3d Matrix ( = Tensor) which R does not have: # Start defining a random object: > cptG = c(0.5,0.5,0.4,0.6,0.3,0.7,0.2,0.8) > cptG [1] 0.5 0.5 0.4 0.6 0.3 0.7 0.2 0.8 # This is just a vector, so we have to make a table out of it > dim(cptG) = c(2,2,2) > cptG , , 1 [,1] [,2] [1,] 0.5 0.4 [2,] 0.5 0.6 , , 2 [,1] [,2] [1,] 0.3 0.2 [2,] 0.7 0.8 # Now, we have to assign the right dimension names: > dimnames(cptG) = list( "G"=yn , "S" = yn , "T"=yn) > cptG , , T = yes S D yes no yes 0.5 0.4 no 0.5 0.6 , , T = no S D yes no yes 0.3 0.2 no 0.7 0.8 # THIS IS THE WRONG ASSIGNMENT: For instance P(G="yes"|S="no",T="yes") = 0.3 > dimnames(cptG) = list( "G"=yn , "T" = yn , "S"=yn) > cptG , , S = yes T D yes no yes 0.5 0.4 no 0.5 0.6 , , S = no T D yes no yes 0.3 0.2 no 0.7 0.8 # Now it is correct! :-) This one should always double-check! ----------------------------------------------------------------------------------------------------------------------------------------------- # Now we assign all these tables to the network defined at the very beginning: > net.disc = custom.fit(net, dist=list(E=cptE, F=cptF, D=cptD, S=cptS, T=cptT, G=cptG) ) # We have to give probability tables for ALL variables, if we don't, we get an error: > net.disc = custom.fit(net, dist=list(E=cptE, F=cptF, D=cptD, S=cptS, T=cptT, ) ) Error in list(E = cptE, F = cptF, D = cptD, S = cptS, T = cptT, ) : argument 6 is empty # If we now query "net.disc" > net.disccpq Bayesian network parameters Parameters of node D (multinomial distribution) Conditional probability table: F D yes no yes 0.8 0.2 no 0.2 0.8 Parameters of node E (multinomial distribution) Conditional probability table: yes no 0.3 0.7 Parameters of node F (multinomial distribution) Conditional probability table: E F yes no yes 0.5 0.1 no 0.5 0.9 Parameters of node G (multinomial distribution) Conditional probability table: , , T = yes S G yes no yes 0.5 0.3 no 0.5 0.7 , , T = no S G yes no yes 0.4 0.2 no 0.6 0.8 Parameters of node S (multinomial distribution) Conditional probability table: E S yes no yes 0.75 0.25 no 0.25 0.75 Parameters of node T (multinomial distribution) Conditional probability table: E T yes no yes 0.8 0.3 no 0.2 0.7 # We obtain all the information of the probability tables ------------------------------------------------------------------------------------------------------------ # We can compute probabilites without evidence using the cpquery command: > cpquery(net.disc, (D=="yes") , TRUE, debug=TRUE ) [1] 0.3340769 # is the probability that a person visits the doctor, with no evidence given! # if there is no condition (evidence) we have to set this to "TRUE", if we do not do it: > cpquery(net.disc, D=="yes" ) Error in cpquery(net.disc, D == "yes") : the expression describing the evidence is missing. # WARNING!!! This command is doing "inexact inference", we can also specify the method: > cpquery(net.disc, (D=="yes") , method="ls", TRUE ) [1] 0.3304769 > cpquery(net.disc, (D=="yes") , method="lw", TRUE ) [1] 0.3342769 # whereas the exact probability is: 0.332 ;-) # for the exercise, do not specifiy a method!!! ----------------------------------------------------------------------------------------------------------- # So lets do inexact inference of the previous exercises now and compare # b) How likely will Tom visit the doctor if he has a cold? # P(D=1|E=1) = P(F=1|E=1) * P(D=1|F=1) + P(F=0|E=1) * P(D=1|F=0) # P = 0.5*0.8 + 0.5*0.2 = 0.5 > cpquery(net.disc, D=="yes", E=="yes") [1] 0.5029476 ---------------------------------------------------------------------------------------------------------- # c) What did Sven drink, if we know he spend lots of money at the store? # Looking at the conditional vertex probability distribution P(G|T,S) in G, we are only interested in the case G==1 # P(G=1|S=1,T=1) = 0.5 # P(G=1|S=1,T=0) = 0.4 # P(G=1|S=0,T=1) = 0.3 # P(G=1|S=0,T=0) = 0.2 # Their sum is: 1.4 # This leads to the following probabilities: # P(T=1,S=1|G=1) = 0.3571 # P(T=0,S=1|G=1) = 0.2857 # P(T=1,S=0|G=1) = 0.2143 # P(T=0,S=0|G=1) = 0.1429 > cpquery(net.disc, (T=="yes" & S=="yes") , G=="yes") [1] 0.3588521 > cpquery(net.disc, (T=="no" & S=="yes") , G=="yes") [1] 0.2042609 > cpquery(net.disc, (T=="yes" & S=="no") , G=="yes") [1] 0.2020789 > cpquery(net.disc, (T=="no" & S=="no") , G=="yes") [1] 0.2326533 # I do not know why these results don't conform with the analytical results... :( ---------------------------------------------------------------------------------------------------------- # d) Marc hat Saft getrunken, mit welcher Wahrscheinlichkeit hat er ein Fieber? > cpquery(net.disc, event=(F=="yes") , evidence=S=="yes", method="ls") [1] 0.322673 # True P = 0.325 --------------------------------------------------------------------------------------------------------- # e) Tim hat im Laden viel Geld gelassen, mit welcher Wahrscheinlichkeit wird er zum Doktor gehen? > cpquery(net.disc, event=(D=="yes") , evidence=G=="yes", method="ls") [1] 0.3562518 True P = 0.3553