root/examples/naive_bayesian/naive_bayesian.pl

Revision 17701, 2.3 kB (checked in by lwall, 16 months ago)

s:g/err/orelse/

  • Property svn:mime-type set to text/plain; charset=UTF-8
  • Property svn:eol-style set to native
Line 
1use v6-alpha;
2
3my %words;
4
5sub load_db returns Void {
6    return() unless "words.db.pl" ~~ :e;
7    my $db = open("words.db.pl") orelse die "Cannot open the words.db.pl file: $!";
8    for (=$db) -> $_line {
9        my $line = $_line;
10        my ($key, $value) = split("\t", $line);
11        %words{"$key"} = $value;
12    }
13    $db.close();
14}
15
16sub save_db returns Void {
17    my $db = open("words.db.pl", :w) orelse die "Cannot open the words.db.pl file: $!";
18    for (%words.kv) -> $key, $value {
19        $db.say($key ~ "\t" ~ $value);
20    }
21    $db.close();
22}
23
24sub parse_file (Str $file) returns Hash {
25    my %words_in_file;   
26    my $fh = open("$file") orelse die "Cannot open the '$file' file: $!";
27    for (=$fh) -> $_line {
28        my $line = $_line;       
29        while ($line ~~ s:perl5/(\w+)[ \t\n\r]//) {
30            %words_in_file{lc($0)}++;
31        }
32    }
33    $fh.close;
34    return %words_in_file;
35}
36
37sub add_words (Str $category, %words_in_file) returns Void {
38    for (%words_in_file.kv) -> $key, $value {
39        %words{"$category-$key"} += $value;
40    }   
41}
42
43sub classify (%words_in_file) returns Void {
44
45    my %count;
46    my $total = 0;
47   
48    for (%words.kv) -> $key, $value {
49        $key ~~ rx:perl5/^(.+)-(.+)$/;
50        %count{$0} += $value;
51        $total     += $value;
52    }
53
54    my %score;
55    for (%words_in_file.keys) -> $word  {
56        for (%count.kv) -> $category, $count {
57            if (defined(%words{"$category-$word"})) {
58                %score{$category} += log(%words{"$category-$word"} / $count);
59            }
60            else {
61                %score{$category} += log(0.01 / $count);
62            }
63        }
64    }
65   
66    for (%count.kv) -> $category, $count {
67        %score{$category} += log($count / $total)
68    }
69   
70    # do this weird sort block because:
71    #    %score{$^a} <=> %score{$^b}
72    # does not currently work
73    for (%count.keys.sort:{ %score{$^a} == %score{$^b} ?? 0 !! %score{$^a} > %score{$^b} ?? -1 !! 1 }) -> $category {
74        say("$category %score{$category}");
75    }
76}
77
78load_db();
79
80if (@*ARGS[0] eq 'add' && +@*ARGS == 3) {
81    add_words(@*ARGS[1], parse_file(@*ARGS[2]));
82}
83elsif (@*ARGS[0] eq 'classify' && +@*ARGS == 2) {
84    classify(parse_file(@*ARGS[1]));
85}
86else {
87    say("USAGE:
88    add <category> <file>
89    classify <file>");
90}
91
92save_db();
Note: See TracBrowser for help on using the browser.