Iti0210w92 demo

Allikas: Lambda
import csv
from collections import defaultdict

# https://github.com/sjwhitworth/golearn/blob/master/examples/datasets/tennis.csv
with open("tennis.csv") as f:
    r = csv.reader(f, delimiter=",")
    samples = [s for s in r]

#print(samples)

N = len(samples[1:])
N_n = 0
N_y = 0

for row in samples[1:]:
    if row[4] == "yes" : N_y += 1
    elif row[4] == "no" : N_n += 1

cpt_y = {}
cpt_n = {}
for i in range(4):
    cpt_y[i] = defaultdict(int)
    cpt_n[i] = defaultdict(int)

for row in samples[1:]:
    for i in range(4):
        if row[4] == "yes" :
            cpt_y[i][row[i]] += 1
        elif row[4] == "no" :
            cpt_n[i][row[i]] += 1

#print(N, "yes", N_y, "no", N_n)

uniques = [3, 3, 2, 2]
def predict(rida):
    H_yes = (N_y/N)
    H_no = (N_n/N)
    for i in range(4):
        H_yes *= ((cpt_y[i][rida[i]] + 1) / (N_y +uniques[i]))
        H_no *= ((cpt_n[i][rida[i]] + 1) / (N_n +uniques[i]))
    print("H_yes", H_yes)
    print("H_no", H_no)
    print("Yes?", H_yes>H_no)

predict(["overcast",	"cool",	"high", "true"])