#!/usr/bin/perl
#  
#  Copyright (c) 2002 Steve Slaven, All Rights Reserved.
#  
#  This program is free software; you can redistribute it and/or
#  modify it under the terms of the GNU General Public License as
#  published by the Free Software Foundation; either version 2 of
#  the License, or (at your option) any later version.
#  
#  This program is distributed in the hope that it will be useful,
#  but WITHOUT ANY WARRANTY; without even the implied warranty of
#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
#  GNU General Public License for more details.
#  
#  You should have received a copy of the GNU General Public License
#  along with this program; if not, write to the Free Software
#  Foundation, Inc., 59 Temple Place, Suite 330, Boston,
#  MA 02111-1307 USA
#  

# This is all a mess right now, swapping in and out algs trying to figure out
# wtf the guys at yahoo games are doing to make it work... grrrr...
# Helps you cheat at spelldown  :)

# Personally I like it run as 'spelldown -s -l999 -m20'
# which does auto-scoring, and only shows the top 20 items
# each time

# takes 2 params, the letter list and a comma sepd list of word lengths
# if not passed, goes in to "interactive" mode that takes the two params and
# spits out results over and over
use strict;
use vars qw( @ARGS );

use Getopt::Std;
use DB_File;
use POSIX;

use Term::ReadLine;

my %o;
getopts( 'dhpfsB:l:m:t:c:Q', \%o );

my $VERSION = '1.0rc1';
die( "
spelldown v$VERSION
Author: Steve Slaven - http://hoopajoo.net

Usage: $0 [-hpsfQ] [-c cheat-FO] [-l lines] [-B BADWORD]
	[-m max_matches] [-t time] [letters combos]
  -h  This help
  -p  Preload dictionary in to memory for interactive use
      speeds up searches by about 2x, but uses lots of memory (+90MB for me)
  -s  Do scoring and sort matches by score, and potential time bonus
  -l  Show X lines per page, defaults to 5
  -f  Use frequency code (EXPERIMENTAL, lower rareness value means it's been seen less)
  -m  Show a max of N items, set lower than '-l' (dash-ell) to never
      have to see page 2, set to zero to disable completely
  -t  Allow max of N seconds per search (change dunring interactive mode w/ t TIME)
  -c  co-op cheating, read point values from a FIFO and letters from STDIN
      just echo 111,111 > FIFO or make a wrapper to read/write to it.  The
      idea is one person keys the letters in to spelldown, and the other writes
      the scoring combos to the FIFO
  -Q  Quiet mode, for parsing.  Only outputs words
  -B  Enter badword BADWORD and exit
  
letters is the letters from the puzzle, and combos is a formatted list of the
bonus modifiers, e.g. an entry might be

./spelldown -s -f abcdefghi 1213,112

Which has the letters 'abcdefghi' as the letters to use, and there were
two words that needed to be filled in, one four letters and the other three
letters.  The First had an x2 at letter 2 and an x3 at letter four, while the
second had an x2 at letter three.  You must put 1's in the places where there
are no bonuses.

" ) if $o{ h };

# This is the word scoring thing
my %scores = ( a => 1,
	    b => 2,
	    c => 2,
	    d => 1,
	    e => 1,
	    f => 3,
	    g => 2,
	    h => 2,
	    i => 1,
	    j => 5,
	    k => 3,
	    l => 1,
	    m => 2,
	    n => 1,
	    o => 1,
	    p => 2,
	    q => 5,
	    r => 1,
	    s => 1,
	    t => 1,
	    u => 2,
	    v => 3,
	    w => 4,
	    x => 5,
	    y => 3,
	    z => 4 );

#generate_list( [ split( //, shift( @ARGV ) ) ], [ split( /,/, shift( @ARGV ) ) ] );

# Hot hot grits
$| = 1;
my( %d, %dict, $letters, $passed, $combos, $cnt, $entry, $ans, $interrupted, %badwords );
my( @combo_scores, @combo_numbers, $find_words, $score_words, $term, %frequency );
my( $cmd, $arg );

if( $o{ c } ) {
  die( "$o{c} not a FIFO" ) unless -p $o{ c };
  print "Waiting for client connection on FIFO...\n";
  open( FIFO, $o{ c } ) || die( "Couldn't open $o{c}: $!" );
}

# Preload dict file?  Good for multiple runs
if( $o{ p } ) {
  tie %d, 'DB_File', 'spelldown.db', O_RDONLY;
  print "Preloading dictionary in to memory...\n";
  %dict = %d;
  untie %d;
}else{
  tie %dict, 'DB_File', 'spelldown.db', O_RDONLY;
}

# Non-accepted words database
tie %badwords, 'DB_File', 'badwords.db';

# Add badword and exit if needed
if( $o{ B } ) {
	print "Adding badword $o{B}\n";
	$badwords{ $o{ B } } = 1;
	exit;
}

# Frequency scoring database
tie %frequency, 'DB_File', 'frequency.db';

# Load args if passed
$letters = shift( @ARGV );
if( $letters ) {
  $passed = 1;
  $combos = shift( @ARGV );
  $o{ l } = 9999999 unless $o{ l };
}else{
  $passed = 0;
  $term = new Term::ReadLine 'Spelldown SuperCheatORama!';
  $o{ l } = 5 unless $o{ l };
}

# Set sighup to interrupt processing
$SIG{ INT } = sub { print STDERR "Interrupted!\n" ; $interrupted = 1; };
$SIG{ ALRM } = sub { print STDERR "Interrupted!\n" ; $interrupted = 1; };

#for( keys( %o ) ) {
#  print "$_: $o{$_}\n";
#}

print "spelldown v$VERSION by Steve Slaven - http://hoopajoo.net & Chris Weedin\n" unless $passed;

while ( 1 ) {
  while ( ! $letters ) {
    $interrupted = 0;
    print "   end > quit, b > bad word, l > list bad words, ub > unbad word\n";
    print "   t > timeout, p > per page, m > max items to return\n";
    $entry = $term -> readline( 'Enter letter combos and scores: ' );
    chomp( $entry );

    #$term -> addhistory( $entry );

    ( $letters, $combos ) = $entry =~ /(\S+)\s+(\S+)/;
    $cmd = $letters;
    $arg = $combos;

    exit() if $entry eq 'end';

    if ( $cmd eq 'b' ) {
      print "Adding badword entry: $arg\n";
      $badwords{ $arg } = 1;

      $letters = $combos = $entry = '';
    } elsif ( $cmd eq 'ub' ) {
      print "Removing badword entry: $arg\n";
      delete( $badwords{ $arg } );
	
      $letters = $combos = $entry = '';
    } elsif ( $entry eq 'l' ) {
      print "Badword list:\n";
      for ( keys( %badwords ) ) {
	print "  $_\n";
      }
	
      $letters = $combos = $entry = '';
    } elsif ( $cmd eq 't' ) {
      print "Timeout set to $arg\n";
      $o{ t } = $arg;

      $letters = $combos = $entry = '';
    }elsif( $cmd eq 'p' ) {
      print "Per page set to $arg\n";
      $o{ l } = $arg;

      $letters = $combos = $entry = '';
    }elsif( $cmd eq 'm' ) {
      print "Max returned results set to $arg\n";
      $o{ m } = $arg;

      $letters = $combos = $entry = '';
    }

    if ( ( ! $combos ) && $o{ c } && $entry ) {
      print "Waiting for input from client...\n";
      chomp( $combos = <FIFO> ) unless $interrupted;
      $letters = $entry;
      print "Got entry, running...\n";
    }
  }

  $cnt = 1;
  $ans = '';

  # Generate a combo thing list
  @combo_scores = @combo_numbers = ();
  for ( split( /,/, $combos ) ) {
    push( @combo_scores, split( // ) );
    push( @combo_numbers, length( $_ ) );
  }

  #dprint( "Combo scores: " . join( ",", @combo_scores ) . "\n" );
  #dprint( "Combo numbers: " . join( ",", @combo_numbers ) . "\n" );

  alarm( $o{ t } ) if $o{ t };
  $find_words = generate_list( [ split( //, $letters ) ], \@combo_numbers );
  alarm( 0 );

  # Sort by score
  if ( $o{ s } ) {
    $score_words = [ sort { $b -> { sortkey } cmp $a -> { sortkey } }
		     map { score_words( $_, \@combo_scores ) } @{ $find_words } ];
  } else {
    $score_words = [ map { { score => 0, avg_letter_score => 0, words => $_ } } @{ $find_words } ];
  }

  # Handle frequency updates (pretty much made this part up since we don't really know
  # how frequent a word has been used on spelldown, but we can track it for us)
  # Higher words on the list are scored more frequency points
  $cnt = 100;
  for( @{ $score_words } ) {
    for( split( /\s+/, $_ -> { words } ) ) {
      $frequency{ $_ } += $cnt;
    }

    $cnt -= 9;
    last if $cnt < 0;
  }

  # Strip to -m max lines
  if( $o{ m } ) {
      if( scalar( @{ $score_words } ) > $o{ m } ) {
	  $score_words = [ @{ $score_words }[ 0 .. $o{ m } - 1 ] ];
      }
  }

  # Only print head/foot if in interactive mode
  print "---- BEGIN ----\n" unless $passed;
  #print "word (rank, high nonadj letter, high letter, nonadj letter, letter, nonadj score, score)\n" unless $passed;
  printf( '%6s  %-24s %6s %6s %9s%s',
	  'rank',
	  'words',
	  'time+',
	  'score',
	  'rareness',
	  "\n" ) if $o{ s } and ! $o{ Q };

  $cnt = 1;
  for ( @{ $score_words } ) {
    print $_ -> { words } unless $o{ s } and ! $o{ Q };
    # Dump gobs of debug info
#     printf( '%-25s (%-15s,%04.3f,%04.3f,%04.3f,%04.3f,%03d,%03d)', 
# 	    $_ -> { words },
# 	    $_ -> { sortkey },
# 	    $_ -> { high_non_adjusted_avg },
# 	    $_ -> { high_avg },
# 	    $_ -> { non_adjusted_avg_letter_score }, 
# 	    $_ -> { avg_letter_score }, 
# 	    $_ -> { non_adjusted_score },
# 	    $_ -> { score } ) if $o{ s };
    printf( '%6d %-25s %6d %6d %09d',
	    $cnt,
	    $_ -> { words },
	    $_ -> { time_bonus },
	    $_ -> { score },
	    $_ -> { rareness } ) if $o{ s } and ! $o{ Q };
    print "\n";

    if( $o{ l } ) {
      if ( $cnt % $o{ l } == 0 ) {
	print "Press RET (q+ret to end run)  ";
	chomp( $ans = <STDIN> );
      }
    }
    $cnt++;

    last if( $ans eq 'q' )
  }
  print "---- END ----\n" unless $passed;

  exit if $passed;
  $letters = '';
}

# Here is the magic.  See the builddict program for details about the dict
# format.  The way we do this is split out the letters then order them,
# so we don't re-process any starting letters.  Then, starting with the
# letter at the front of the list, walk the dict until we either reach a word
# length, OR we reach a dead end.
sub generate_list {
  my $letters = shift();
  my @wordlens = @{ shift() };

  # Handle interrupting
  return( [] ) if $interrupted;

  # No more recursion...
  my( $candidates, $carry, $this_len, $this_can );

  # Prepop candidate with nothing done yet
  $candidates = [ { words => '', letters => $letters } ];

  # Follow the something...
  for $this_len ( @wordlens ) {
    # Find words for this round, add them to candidates, prune off letters, then lather
    # rinse, repeat
    $carry = [];
    for $this_can ( @{ $candidates } ) {
      for( @{ find_words( $this_len, $this_can -> { letters } ) } ) {
	push( @{ $carry }, { words => $this_can -> { words } . " " . $_ -> { word },
			     letters => $_ -> { letters } } );
      }
    }
    $candidates = $carry; # Carry survivors
  }

  return( [ map { $_ -> { words } } @{ $carry } ] );
}

# This is the real meat, it will make a word of LEN from letters, and do automagic
# letter juggling.  Returns all words of LEN long as arrayref.
# Now the return is a hashref... no need to recalc letter things if we've done it here
sub find_words {
  my $len = shift;
  my @letters = @{ shift() };

  # Short out if interrupted
  return( [] ) if $interrupted;

  my( @ltemp, @ret, @valid, $candidates, $carrys, $next_letter );
  my( $this_candidate, $prune_letter, $pruned );
  my %seen = ();
  my $count = scalar( @letters );
  # We loop over the whole array once because we use the first
  # letter in the list as the point to build words from,
  # so we cycle that point on each iteraction, eg abc bca cab
  for( 1 .. $count ) {
    #dprint( "Begin round $_, for len $len, passed working" );
    #dprint( join( ":", @letters ) . "\n" );

    # Skip if we've been there
    if( ! $seen{ $letters[ 0 ] } ) {
      # Begin the dict walk, start with first letter
      @ltemp = @letters;
      $candidates = [ { word => shift( @ltemp ), letters => [ @ltemp ] } ];

      # Walk them all at the same time instead of recursively, go from candidates => carrys
      # and loop, this shaved off about 30% of exec time
      for( 1 .. $len - 1 ) {
	# New array to build into
	$carrys = [];

	for $this_candidate ( @{ $candidates } ) {
	  #dprint( "this candidate: " . $this_candidate -> { word } . "\n" );
	  #dprint( "Remaining: " . join( ",", @{ $this_candidate -> { letters } } ) . "\n" );

	  last if $interrupted; # Back out fast

	  for $next_letter ( split( //, $dict{ $this_candidate -> { word } } ) ) {
	    # If we have the letter, make it a candidate
	    #dprint( "Next letter: $next_letter\n" );

	    if( grep { $_ eq $next_letter } @{ $this_candidate -> { letters } } ) {
	      #dprint( "Hey, I got that one!\n" );

	      # Inlined now, we can also tell if the letter is present if $prune_letter is ''
	      $prune_letter = $next_letter;
	      $pruned = [];
	      for( @{ $this_candidate -> { letters } } ) {
		if( $prune_letter eq $_ ) {
		  # Skip this, but not future ones
		  $prune_letter = '';
		}else{
		  push( @{ $pruned }, $_ );
		}
	      }

	      push( @{ $carrys }, { word => $this_candidate -> { word } . $next_letter,
				    letters => $pruned } )
	    }
	  }
	  #dprint( "  Current candidates: " . join( ",", map { $_ -> { word } } @{ $candidates } ) . "\n" );
	}
	
	$candidates = $carrys; # Carry for next letter
      }
      # Pull out ones that are real words, pruning the badword list
      push( @ret,
	    grep { ! $badwords{ $_ -> { word } } }
	    grep { $dict{ $_ -> { word } } =~ /\+/ }
	    @{ $carrys } );
      #dprint( "Added: " . join( ",", @ret ) . "\n" );
    }else{
      #dprint( "Already seen $letters[0] this round, not following\n" );
    }
    $seen{ $letters[ 0 ] } = 1;
    push( @letters, shift( @letters ) ); # Rotate letter to end

    # not a sub anymore
    #@letters = rotate( @letters );
  }

  # ret matches (hashrefs w/ remaining letters for them)
  return( \@ret );
}

# puts the first letter in the array on the end and returns it
sub rotate {
  my $first = shift;
  return( @_, $first );
}

# This is inline now
sub prune_letters {
  # takes an arrayref of elements and a word
  # prunes out 1 each letter from arrayref
  # returns an arrayref with letters removed
  # e.g. abcdcdc, dc => abcdc
  my @prune = split( //, shift() );
  my @letters = @{ shift() };
  my( @ret, $skip );

  for $skip ( @prune ) {
    @ret = ();
    for( @letters ) {
      if( $_ eq $skip ) {
	# Unset skip so we copy all the rest of the things
	$skip = '';
      }else{
	push( @ret, $_ );
      }
    }
    # Now jam them back in and loop
    @letters = @ret;
  }
  return( \@ret );
}

# Comment out calls if not using
sub dprint {
  return() unless $o{ d };
  print STDERR shift();
}

sub score_words {
  my $orig_words = shift;
  my @words = split( /\s+/, $orig_words );

  # The templates is an array of all the score perturbs,
  # e.g. 1112,122 for +2 on the last and 2 last letters
  # when it gets here, it is flattened in an array
  # because we flatten the word thing and score it up
  my @bonus = @{ shift() };
  my( $total_score, $total_letters, $non_adjusted_score, $high_avg, $high_non_adjusted_avg, $high_word_len, $total_guess_time );
  my( $this_word_len, $this_avg, $this_non_adjusted_avg, $this_score, $this_non_adjusted, $this_bonus, $rareness_factor );

  $total_score = $total_letters = 0;
  $high_avg = 0;
  $high_non_adjusted_avg = 0;
  for( @words ) {
    next unless $_;

    # Update frequency, and rareness for this combo of words
    # lower rareness factor is better, means words have been seen less
    $rareness_factor += $frequency{ $_ } if $o{ f };

    $this_word_len = $this_non_adjusted = $this_score = 0;
    for( split( // ) ) {
      $this_bonus = shift( @bonus );

      $this_word_len++;
      $total_letters++;

      $total_score += $scores{ $_ } * $this_bonus;
      $non_adjusted_score += $scores{ $_ };

      $this_score += $scores{ $_ } * $this_bonus;
      $this_non_adjusted += $scores{ $_ };
    }

    $this_avg = $this_score / $this_word_len;
    $this_non_adjusted_avg = $this_non_adjusted / $this_word_len;

    # Calc time
    my $guess_time = int( $this_avg );

    # Hole at four?
    if( $this_avg >= 4 && $this_avg < 4.5 ) {
	$guess_time = 0;
    }elsif( $this_avg < 4 && $this_avg > 3.4 ) {
	$guess_time = $this_word_len * 5;
    }elsif( $this_avg >= 4.5 ) {
	$guess_time = $this_word_len * 10;
    }elsif( $guess_time < 2 ) {
	$guess_time = 0;
    }elsif( $guess_time == 2 ) {
	$guess_time = $this_word_len * 1;
    }elsif( $guess_time == 3 ) {
	$guess_time = $this_word_len * 3;
    }else{
	$guess_time = $this_word_len * 10;
    }
    $total_guess_time += $guess_time;

    if( $this_avg > $high_avg ) {
    	$high_avg = $this_avg ;
        $high_word_len = $this_word_len;
    }

    $high_non_adjusted_avg = $this_non_adjusted_avg if $this_non_adjusted_avg > $high_non_adjusted_avg;
  }

  my $avg_letter_score = $total_score / $total_letters;
  my $non_adjusted_avg_letter_score = $non_adjusted_score / $total_letters;

  # Now reverse the rareness, we want the less-seen items at the top
  $rareness_factor = sprintf( '%09d', 999999999 - $rareness_factor );
  return( { score => $total_score,
	    avg_letter_score => $avg_letter_score,
	    words => $orig_words,
            non_adjusted_score => $non_adjusted_score,
            non_adjusted_avg_letter_score => $non_adjusted_avg_letter_score,
            high_avg => $high_avg,
            high_non_adjusted_avg => $high_non_adjusted_avg,
	    time_bonus => $total_guess_time,
	    rareness => $rareness_factor,
            sortkey => sprintf( '%03d-%09d-%02d', 
				$total_guess_time,
				$rareness_factor,
				$total_score )  
  } );
}