#!/usr/bin/perl -w use strict; use warnings; use XML::Simple; use Data::Dumper; # Create begin.rs open (BEGIN, ">./rs/begin.rs"); print BEGIN <<'EOF'; // aiml2rs begin script > begin + request - {ok} < begin ! var name = Alice ! var gender = female ! var master = Dr. Wallace ! var birthday = a very long time ago ! var birthplace = New York ! var boyfriend = I am single ! var favoritebook = Artificial Intelligence: A Modern Approach ! var favoritecolor = blue ! var favoriteband = They Might Be Giants ! var favoritefood = pizza ! var favoritesong = Mr. Roboto ! var favoritemovie = A.I. ! var forfun = I chat with people online ! var friends = I chat with people online ! var girlfriend = I am single ! var kindmusic = electronic ! var location = New York ! var looklike = kind of how you think I look ! var question = ask me a question ! var sign = saggitarious ! var talkabout = anything that comes up ! var wear = kind of what you think I'm wearing ! var website = alicebot.org ! var email = alice\@alicebot.org ! var language = English // Handle AIML-special tags like , , etc. > object alice perl my ($rs,@args) = @_; my $tag = lc(shift(@args)); if ($tag eq 'date') { return localtime(time()); } elsif ($tag eq 'size') { # In AIML, returns the number of # loaded categories. return scalar keys %{$rs->{topics}->{random}}; } elsif ($tag eq 'version') { # In AIML, returns the AIML version. return $RiveScript::VERSION; } elsif ($tag eq 'gossip') { # In AIML, logs a line of text to a # log file. return ""; } elsif ($tag eq 'person2' || $tag eq 'gender') { # In AIML, and are substitution # tags like is in RiveScript. return join(" ",@args); } elsif ($tag eq 'learn') { # In AIML, is to load another AIML doc. return ""; } elsif ($tag eq 'system') { # In AIML, executes system commands. return ""; } else { return ""; } < object // Substitutions ! sub " = " ! sub ' = ' ! sub & = & ! sub < = < ! sub > = > ! sub + = plus ! sub - = minus ! sub / = divided ! sub * = times ! sub i'm = i am ! sub i'd = i would ! sub i've = i have ! sub i'll = i will ! sub don't = do not ! sub isn't = is not ! sub you'd = you would ! sub you're = you are ! sub you've = you have ! sub you'll = you will ! sub he'd = he would ! sub he's = he is ! sub he'll = he will ! sub she'd = she would ! sub she's = she is ! sub she'll = she will ! sub they'd = they would ! sub they're = they are ! sub they've = they have ! sub they'll = they will ! sub we'd = we would ! sub we're = we are ! sub we've = we have ! sub we'll = we will ! sub whats = what is ! sub what's = what is ! sub what're = what are ! sub what've = what have ! sub what'll = what will ! sub can't = can not ! sub whos = who is ! sub who's = who is ! sub who'd = who would ! sub who'll = who will ! sub don't = do not ! sub didn't = did not ! sub it's = it is ! sub could've = could have ! sub couldn't = could not ! sub should've = should have ! sub shouldn't = should not ! sub would've = would have ! sub wouldn't = would not ! sub when's = when is ! sub when're = when are ! sub when'd = when did ! sub y = why ! sub u = you ! sub ur = your ! sub r = are ! sub n = and ! sub im = i am ! sub wat = what ! sub wats = what is ! sub ohh = oh ! sub becuse = because ! sub becasue = because ! sub becuase = because ! sub practise = practice ! sub its a = it is a ! sub fav = favorite ! sub fave = favorite ! sub yesi = yes i ! sub yetit = yet it ! sub iam = i am ! sub welli = well i ! sub wellit = well it ! sub amfine = am fine ! sub aman = am an ! sub amon = am on ! sub amnot = am not ! sub realy = really ! sub iamusing = i am using ! sub amleaving = am leaving ! sub yuo = you ! sub youre = you are ! sub didnt = did not ! sub ain't = is not ! sub aint = is not ! sub wanna = want to ! sub brb = be right back ! sub bbl = be back later ! sub gtg = got to go ! sub g2g = got to go ! sub lyl = love you lots ! sub gf = girlfriend ! sub g/f = girlfriend ! sub bf = boyfriend ! sub b/f = boyfriend ! sub b/f/f = best friend forever ! sub :-) = smile ! sub :) = smile ! sub :d = grin ! sub :-d = grin ! sub :-p = tongue ! sub :p = tongue ! sub ;-) = wink ! sub ;) = wink ! sub :-( = sad ! sub :( = sad ! sub :'( = cry ! sub :-[ = shy ! sub :-\\ = uncertain ! sub :-/ = uncertain ! sub :-s = uncertain ! sub 8-) = cool ! sub 8) = cool ! sub :-* = kissyface ! sub :-! = foot ! sub o:-) = angel ! sub >:o = angry ! sub :\@ = angry ! sub 8o| = angry ! sub :\$ = blush ! sub :-\$ = blush ! sub :-[ = blush ! sub :[ = bat ! sub (a) = angel ! sub (h) = cool ! sub 8-| = nerdy ! sub |-) = tired ! sub +o( = ill ! sub *-) = uncertain ! sub ^o) = raised eyebrow ! sub (6) = devil ! sub (l) = love ! sub (u) = broken heart ! sub (k) = kissyface ! sub (f) = rose ! sub (w) = wilted rose // Person substitutions ! person i am = you are ! person you are = I am ! person i'm = you're ! person you're = I'm ! person my = your ! person your = my ! person you = I ! person i = you EOF opendir (DIR, "./aiml"); foreach my $file (sort(grep(/\.aiml$/i, readdir(DIR)))) { &processAIML("./aiml/$file"); } closedir (DIR); sub processAIML { my $file = shift; open (FILE, $file); my @data = ; close (FILE); chomp @data; # Convert everything inside and # into CDATA. my $aiml = join("\n",@data); $aiml =~ s/<(pattern|template|that)>/<$1>/]]><\/$1>/sig; # Load it. print "Loading XML data from $file\n"; my $xml = XMLin($aiml); if (!exists $xml->{category} || ref($xml->{category}) ne "ARRAY") { warn "ERROR: The file $file didn't parse correctly!\n"; return; } my $rs = $file; $rs =~ s/\/([A-Za-z0-9]+?)\.aiml$/$1\.rs/ig; open (RIVE, ">./rs/$rs") or die "Can't write ./rs/$rs: $!"; print RIVE "// aiml2rs -- Generated on " . localtime(time()) . "\n"; # Process the categories. print "Processing AIML file $file...\n"; foreach my $category (@{$xml->{category}}) { my $pattern = $category->{pattern}; my $template = $category->{template}; my $that = exists $category->{that} ? $category->{that} : undef; # Remove newlines from these. $pattern =~ s/[\x0d\x0a]+//sig; $template =~ s/[\x0d\x0a]+//sig; if (defined $that) { $that =~ s/[\x0d\x0a]+//sig; } # Process pattern tags. $pattern = &doTags($pattern,"pattern"); if (defined $that) { $that = &doTags($that,"pattern"); } # Patterns can't contain illegal symbols. my $re = qr/[^A-Za-z0-9\(\|\)\[\]\*\_\#\@\{\}<>\s]/; if ($pattern =~ $re) { print "Warning: Pattern contains invalid symbols: $pattern\n"; $pattern =~ s/$re//g; } if (defined $that && $that =~ $re) { print "Warning: That contains invalid symbols: $that\n"; $that =~ s/$re//g; } # Process tags in the template. $template = &doTags($template,"template","pattern"); print RIVE "\n" . "+ " . lc($pattern) . "\n"; if (defined $that) { print RIVE "% " . lc($that) . "\n"; } print RIVE $template . "\n"; } close (RIVE); } sub doTags { my $string = shift; my %context = map { $_ => 1 } @_; # Common regexp bits my $qq = q{(?:"|')}; # Both kinds of quotes if (exists $context{pattern}) { # Tags that can exist in $string =~ s{}{}ig; } if (exists $context{template}) { $string =~ s{}{rs2aiml_aiml_think=on::}ig; $string =~ s{}{rs2aiml_aiml_think=off::}ig; my @parts = split(/rs2aiml_/, $string); my @condition = (); my @new = (); foreach my $part (@parts) { my $think = 0; if ($part =~ /^aiml_think=(on|off)::/i) { $think = $1 eq 'on' ? 1 : 0; $part =~ s/^aiml_think=(on|off):://ig; } next if length $part == 0; next if $part =~ /^\s+$/; # (.+?)}{$1}ig; # Inside if ($think) { # becomes just $part =~ s{\s*(.+?)\s*}{}ig; } else { # becomes $part =~ s{\s*(.+?)\s*}{}ig; } # Template-only tags $part =~ s{}{}ig; $part =~ s{}{}ig; $part =~ s{}{}ig; $part =~ s{}{}ig; $part =~ s{}{}ig; $part =~ s{}{}ig; $part =~ s{}{}ig; $part =~ s{}{}ig; $part =~ s{}{}ig; $part =~ s{}{}ig; $part =~ s{}{}ig; $part =~ s{}{alice date}ig; $part =~ s{}{}ig; $part =~ s{}{alice size}ig; $part =~ s{}{alice version}ig; $part =~ s{(.+?)}{alice gossip $1}ig; $part =~ s{<(uppercase|/uppercase)>}{\{$1\}}ig; $part =~ s{<(lowercase|/lowercase)>}{\{$1\}}ig; $part =~ s{<(formal|/formal)>}{\{$1\}}ig; $part =~ s{<(sentence|/sentence)>}{\{$1\}}ig; $part =~ s{<(person|/person)>}{\{$1\}}ig; $part =~ s{<(person|uppercase|lowercase|formal|sentence)\s*/>}{<$1>}ig; $part =~ s{(.+?)}{alice person2 $1}ig; $part =~ s{(.+?)}{alice gender $1}ig; $part =~ s{(.+?)}{alice learn $1}ig; $part =~ s{(.+?)}{alice system $1}ig; $part =~ s{\s*(.+?)\s*}{\{\@\L$1\E\}}ig; $part =~ s{}{<\@>}ig; $part =~ s{}{\\n}ig; my $i = 0; # Look for conditionals. if ($part =~ / 100) { die "Couldn't resolve conditionals: $part\n"; } while ($part =~ /(.+?)<\/condition>/i) { $i++; if ($i > 100) { die "Got stuck in conditional 1: $part"; } my $var = $1; my $value = $2; my $text = $3; push (@condition, " eq $value => $text"); $part =~ s/(.+?)<\/condition>//i; } while ($part =~ /(.+?)<\/condition>/i) { $i++; if ($i > 100) { die "Got stuck in conditional 2: $part"; } my $var = $1; my $body = $2; while ($body =~ /(.+?)<\/li>/i) { $i++; if ($i > 100) { die "Got stuck in conditional 2.5: $body"; } my $value = $1; my $text = $2; push (@condition, " eq $value => $text"); $body =~ s/(.+?)<\/li>//i; } $part =~ s/(.+?)<\/condition>//i; } while ($part =~ /(.+?)<\/condition>/i) { $i++; if ($i > 100) { die "Got stuck in conditional 3: $part"; } my $body = $1; while ($body =~ /(.+?)<\/li>/i) { $i++; if ($i > 100) { die "Got stuck in conditional 3.33: $body"; } my $var = $1; my $value = $2; my $text = $3; push (@condition, " eq $value => $text"); $body =~ s/(.+?)<\/li>//i; } while ($body =~ /
  • (.+?)<\/li>/i) { $i++; if ($i > 100) { die "Got stuck in conditional 3.66: $body"; } push (@new, $1); $body =~ s/
  • (.+?)<\/li>//i; } $part =~ s/(.+?)<\/condition>//i; } $part =~ s///ig; $part =~ s/<\/condition.+?>//ig; } # Look for randomness. if ($part =~ //i) { while ($part =~ /(.+?)<\/random>/i) { my $body = $1; my @rand = (); while ($body =~ /
  • (.+?)<\/li>/i) { push (@rand, $1); $body =~ s/
  • (.+?)<\/li>//i; } my $rnd = join("|",@rand); $part =~ s/(.+?)<\/random>/{random}$rnd\{\/random}/i; } $part =~ s///ig; $part =~ s/<\/random>//ig; } # Fix weird set issues $part =~ s{\s*(.+?)\s*>+\s*}{}ig; $part =~ s{\s*(.+?)\s*}{$2}ig; $part =~ s{]+?)>\s*([^>\{])}{>$3}ig; $part =~ s{]+?)\s*<([^<>]+)>\s*([^>]+)(>)}{$4$5}ig; $part =~ s{]+?)>\s*([^>\{])}{>$3}ig; $part =~ s/^[\t\s]+//g; $part =~ s/[\t\s]+$//g; $part =~ s/[\t\s+]/ /g; push (@new,$part); } # Weed through the reply bits in @new, to remove extra spaces # and get rid of empty fields. my @replies = (); foreach my $item (@new) { $item =~ s/^[\s\t]+//g; $item =~ s/[\s\t]+$//g; $item =~ s/[\s\t]+]/ /g; next if length $item == 0; push (@replies,$item); } $string = ''; if (scalar(@condition)) { $string = '* ' . join("\n* ", @condition) . "\n"; } $string .= '- ' . join("\n^ ",@replies); } return $string; }