#!/usr/bin/perl -w
use strict;
use warnings;
use XML::Simple;
use Data::Dumper;
# Create begin.rs
open (BEGIN, ">./rs/begin.rs");
print BEGIN <<'EOF';
// aiml2rs begin script
> begin
+ request
- {ok}
< begin
! var name = Alice
! var gender = female
! var master = Dr. Wallace
! var birthday = a very long time ago
! var birthplace = New York
! var boyfriend = I am single
! var favoritebook = Artificial Intelligence: A Modern Approach
! var favoritecolor = blue
! var favoriteband = They Might Be Giants
! var favoritefood = pizza
! var favoritesong = Mr. Roboto
! var favoritemovie = A.I.
! var forfun = I chat with people online
! var friends = I chat with people online
! var girlfriend = I am single
! var kindmusic = electronic
! var location = New York
! var looklike = kind of how you think I look
! var question = ask me a question
! var sign = saggitarious
! var talkabout = anything that comes up
! var wear = kind of what you think I'm wearing
! var website = alicebot.org
! var email = alice\@alicebot.org
! var language = English
// Handle AIML-special tags like , , etc.
> object alice perl
my ($rs,@args) = @_;
my $tag = lc(shift(@args));
if ($tag eq 'date') {
return localtime(time());
}
elsif ($tag eq 'size') {
# In AIML, returns the number of
# loaded categories.
return scalar keys %{$rs->{topics}->{random}};
}
elsif ($tag eq 'version') {
# In AIML, returns the AIML version.
return $RiveScript::VERSION;
}
elsif ($tag eq 'gossip') {
# In AIML, logs a line of text to a
# log file.
return "";
}
elsif ($tag eq 'person2' || $tag eq 'gender') {
# In AIML, and are substitution
# tags like is in RiveScript.
return join(" ",@args);
}
elsif ($tag eq 'learn') {
# In AIML, is to load another AIML doc.
return "";
}
elsif ($tag eq 'system') {
# In AIML, executes system commands.
return "";
}
else {
return "";
}
< object
// Substitutions
! sub " = "
! sub ' = '
! sub & = &
! sub < = <
! sub > = >
! sub + = plus
! sub - = minus
! sub / = divided
! sub * = times
! sub i'm = i am
! sub i'd = i would
! sub i've = i have
! sub i'll = i will
! sub don't = do not
! sub isn't = is not
! sub you'd = you would
! sub you're = you are
! sub you've = you have
! sub you'll = you will
! sub he'd = he would
! sub he's = he is
! sub he'll = he will
! sub she'd = she would
! sub she's = she is
! sub she'll = she will
! sub they'd = they would
! sub they're = they are
! sub they've = they have
! sub they'll = they will
! sub we'd = we would
! sub we're = we are
! sub we've = we have
! sub we'll = we will
! sub whats = what is
! sub what's = what is
! sub what're = what are
! sub what've = what have
! sub what'll = what will
! sub can't = can not
! sub whos = who is
! sub who's = who is
! sub who'd = who would
! sub who'll = who will
! sub don't = do not
! sub didn't = did not
! sub it's = it is
! sub could've = could have
! sub couldn't = could not
! sub should've = should have
! sub shouldn't = should not
! sub would've = would have
! sub wouldn't = would not
! sub when's = when is
! sub when're = when are
! sub when'd = when did
! sub y = why
! sub u = you
! sub ur = your
! sub r = are
! sub n = and
! sub im = i am
! sub wat = what
! sub wats = what is
! sub ohh = oh
! sub becuse = because
! sub becasue = because
! sub becuase = because
! sub practise = practice
! sub its a = it is a
! sub fav = favorite
! sub fave = favorite
! sub yesi = yes i
! sub yetit = yet it
! sub iam = i am
! sub welli = well i
! sub wellit = well it
! sub amfine = am fine
! sub aman = am an
! sub amon = am on
! sub amnot = am not
! sub realy = really
! sub iamusing = i am using
! sub amleaving = am leaving
! sub yuo = you
! sub youre = you are
! sub didnt = did not
! sub ain't = is not
! sub aint = is not
! sub wanna = want to
! sub brb = be right back
! sub bbl = be back later
! sub gtg = got to go
! sub g2g = got to go
! sub lyl = love you lots
! sub gf = girlfriend
! sub g/f = girlfriend
! sub bf = boyfriend
! sub b/f = boyfriend
! sub b/f/f = best friend forever
! sub :-) = smile
! sub :) = smile
! sub :d = grin
! sub :-d = grin
! sub :-p = tongue
! sub :p = tongue
! sub ;-) = wink
! sub ;) = wink
! sub :-( = sad
! sub :( = sad
! sub :'( = cry
! sub :-[ = shy
! sub :-\\ = uncertain
! sub :-/ = uncertain
! sub :-s = uncertain
! sub 8-) = cool
! sub 8) = cool
! sub :-* = kissyface
! sub :-! = foot
! sub o:-) = angel
! sub >:o = angry
! sub :\@ = angry
! sub 8o| = angry
! sub :\$ = blush
! sub :-\$ = blush
! sub :-[ = blush
! sub :[ = bat
! sub (a) = angel
! sub (h) = cool
! sub 8-| = nerdy
! sub |-) = tired
! sub +o( = ill
! sub *-) = uncertain
! sub ^o) = raised eyebrow
! sub (6) = devil
! sub (l) = love
! sub (u) = broken heart
! sub (k) = kissyface
! sub (f) = rose
! sub (w) = wilted rose
// Person substitutions
! person i am = you are
! person you are = I am
! person i'm = you're
! person you're = I'm
! person my = your
! person your = my
! person you = I
! person i = you
EOF
opendir (DIR, "./aiml");
foreach my $file (sort(grep(/\.aiml$/i, readdir(DIR)))) {
&processAIML("./aiml/$file");
}
closedir (DIR);
sub processAIML {
my $file = shift;
open (FILE, $file);
my @data = ;
close (FILE);
chomp @data;
# Convert everything inside and
# into CDATA.
my $aiml = join("\n",@data);
$aiml =~ s/<(pattern|template|that)>/<$1>/]]><\/$1>/sig;
# Load it.
print "Loading XML data from $file\n";
my $xml = XMLin($aiml);
if (!exists $xml->{category} || ref($xml->{category}) ne "ARRAY") {
warn "ERROR: The file $file didn't parse correctly!\n";
return;
}
my $rs = $file;
$rs =~ s/\/([A-Za-z0-9]+?)\.aiml$/$1\.rs/ig;
open (RIVE, ">./rs/$rs") or die "Can't write ./rs/$rs: $!";
print RIVE "// aiml2rs -- Generated on " . localtime(time()) . "\n";
# Process the categories.
print "Processing AIML file $file...\n";
foreach my $category (@{$xml->{category}}) {
my $pattern = $category->{pattern};
my $template = $category->{template};
my $that = exists $category->{that} ? $category->{that} : undef;
# Remove newlines from these.
$pattern =~ s/[\x0d\x0a]+//sig;
$template =~ s/[\x0d\x0a]+//sig;
if (defined $that) { $that =~ s/[\x0d\x0a]+//sig; }
# Process pattern tags.
$pattern = &doTags($pattern,"pattern");
if (defined $that) {
$that = &doTags($that,"pattern");
}
# Patterns can't contain illegal symbols.
my $re = qr/[^A-Za-z0-9\(\|\)\[\]\*\_\#\@\{\}<>\s]/;
if ($pattern =~ $re) {
print "Warning: Pattern contains invalid symbols: $pattern\n";
$pattern =~ s/$re//g;
}
if (defined $that && $that =~ $re) {
print "Warning: That contains invalid symbols: $that\n";
$that =~ s/$re//g;
}
# Process tags in the template.
$template = &doTags($template,"template","pattern");
print RIVE "\n"
. "+ " . lc($pattern) . "\n";
if (defined $that) {
print RIVE "% " . lc($that) . "\n";
}
print RIVE $template . "\n";
}
close (RIVE);
}
sub doTags {
my $string = shift;
my %context = map { $_ => 1 } @_;
# Common regexp bits
my $qq = q{(?:"|')}; # Both kinds of quotes
if (exists $context{pattern}) {
# Tags that can exist in
$string =~ s{}{}ig;
}
if (exists $context{template}) {
$string =~ s{}{rs2aiml_aiml_think=on::}ig;
$string =~ s{}{rs2aiml_aiml_think=off::}ig;
my @parts = split(/rs2aiml_/, $string);
my @condition = ();
my @new = ();
foreach my $part (@parts) {
my $think = 0;
if ($part =~ /^aiml_think=(on|off)::/i) {
$think = $1 eq 'on' ? 1 : 0;
$part =~ s/^aiml_think=(on|off):://ig;
}
next if length $part == 0;
next if $part =~ /^\s+$/;
# (.+?)}{$1}ig;
# Inside
if ($think) {
# becomes just
$part =~ s{\s*(.+?)\s*}{}ig;
}
else {
# becomes
$part =~ s{\s*(.+?)\s*}{}ig;
}
# Template-only tags
$part =~ s{}{}ig;
$part =~ s{}{}ig;
$part =~ s{}{}ig;
$part =~ s{}{}ig;
$part =~ s{}{}ig;
$part =~ s{}{}ig;
$part =~ s{}{}ig;
$part =~ s{}{}ig;
$part =~ s{}{}ig;
$part =~ s{}{}ig;
$part =~ s{}{}ig;
$part =~ s{}{alice date}ig;
$part =~ s{}{}ig;
$part =~ s{}{alice size}ig;
$part =~ s{}{alice version}ig;
$part =~ s{(.+?)}{alice gossip $1}ig;
$part =~ s{<(uppercase|/uppercase)>}{\{$1\}}ig;
$part =~ s{<(lowercase|/lowercase)>}{\{$1\}}ig;
$part =~ s{<(formal|/formal)>}{\{$1\}}ig;
$part =~ s{<(sentence|/sentence)>}{\{$1\}}ig;
$part =~ s{<(person|/person)>}{\{$1\}}ig;
$part =~ s{<(person|uppercase|lowercase|formal|sentence)\s*/>}{<$1>}ig;
$part =~ s{(.+?)}{alice person2 $1}ig;
$part =~ s{(.+?)}{alice gender $1}ig;
$part =~ s{(.+?)}{alice learn $1}ig;
$part =~ s{(.+?)}{alice system $1}ig;
$part =~ s{\s*(.+?)\s*}{\{\@\L$1\E\}}ig;
$part =~ s{}{<\@>}ig;
$part =~ s{ }{\\n}ig;
my $i = 0;
# Look for conditionals.
if ($part =~ / 100) {
die "Couldn't resolve conditionals: $part\n";
}
while ($part =~ /(.+?)<\/condition>/i) {
$i++;
if ($i > 100) {
die "Got stuck in conditional 1: $part";
}
my $var = $1; my $value = $2; my $text = $3;
push (@condition, " eq $value => $text");
$part =~ s/(.+?)<\/condition>//i;
}
while ($part =~ /(.+?)<\/condition>/i) {
$i++;
if ($i > 100) {
die "Got stuck in conditional 2: $part";
}
my $var = $1;
my $body = $2;
while ($body =~ /
(.+?)<\/li>/i) {
$i++;
if ($i > 100) {
die "Got stuck in conditional 2.5: $body";
}
my $value = $1; my $text = $2;
push (@condition, " eq $value => $text");
$body =~ s/
(.+?)<\/li>//i;
}
$part =~ s/(.+?)<\/condition>//i;
}
while ($part =~ /(.+?)<\/condition>/i) {
$i++;
if ($i > 100) {
die "Got stuck in conditional 3: $part";
}
my $body = $1;
while ($body =~ /
(.+?)<\/li>/i) {
$i++;
if ($i > 100) {
die "Got stuck in conditional 3.33: $body";
}
my $var = $1; my $value = $2; my $text = $3;
push (@condition, " eq $value => $text");
$body =~ s/
(.+?)<\/li>//i;
}
while ($body =~ /
(.+?)<\/li>/i) {
$i++;
if ($i > 100) {
die "Got stuck in conditional 3.66: $body";
}
push (@new, $1);
$body =~ s/
(.+?)<\/li>//i;
}
$part =~ s/(.+?)<\/condition>//i;
}
$part =~ s///ig;
$part =~ s/<\/condition.+?>//ig;
}
# Look for randomness.
if ($part =~ //i) {
while ($part =~ /(.+?)<\/random>/i) {
my $body = $1;
my @rand = ();
while ($body =~ /
(.+?)<\/li>/i) {
push (@rand, $1);
$body =~ s/
(.+?)<\/li>//i;
}
my $rnd = join("|",@rand);
$part =~ s/(.+?)<\/random>/{random}$rnd\{\/random}/i;
}
$part =~ s///ig;
$part =~ s/<\/random>//ig;
}
# Fix weird set issues
$part =~ s{\s*(.+?)\s*>+\s*}{}ig;
$part =~ s{\s*(.+?)\s*}{$2}ig;
$part =~ s{]+?)>\s*([^>\{])}{>$3}ig;
$part =~ s{]+?)\s*<([^<>]+)>\s*([^>]+)(>)}{$4$5}ig;
$part =~ s{]+?)>\s*([^>\{])}{>$3}ig;
$part =~ s/^[\t\s]+//g;
$part =~ s/[\t\s]+$//g;
$part =~ s/[\t\s+]/ /g;
push (@new,$part);
}
# Weed through the reply bits in @new, to remove extra spaces
# and get rid of empty fields.
my @replies = ();
foreach my $item (@new) {
$item =~ s/^[\s\t]+//g;
$item =~ s/[\s\t]+$//g;
$item =~ s/[\s\t]+]/ /g;
next if length $item == 0;
push (@replies,$item);
}
$string = '';
if (scalar(@condition)) {
$string = '* ' . join("\n* ", @condition) . "\n";
}
$string .= '- ' . join("\n^ ",@replies);
}
return $string;
}