#!/usr/bin/perl
#  
#  Copyright (c) 2002 Steve Slaven, All Rights Reserved.
#  
#  This program is free software; you can redistribute it and/or
#  modify it under the terms of the GNU General Public License as
#  published by the Free Software Foundation; either version 2 of
#  the License, or (at your option) any later version.
#  
#  This program is distributed in the hope that it will be useful,
#  but WITHOUT ANY WARRANTY; without even the implied warranty of
#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
#  GNU General Public License for more details.
#  
#  You should have received a copy of the GNU General Public License
#  along with this program; if not, write to the Free Software
#  Foundation, Inc., 59 Temple Place, Suite 330, Boston,
#  MA 02111-1307 USA
#  

use strict;
use Getopt::Std;
use DB_File;

my( %dict, %o, %temp_dict );

$o{ m } = 2;
$o{ M } = 8;
$o{ w } = 1;
getopts( 'hD:d:xb:m:M:ew:NR', \%o );

# Handle dict build, not really a big deal anymore
# since it's not a tie'd hash
if( $o{ b } ) {
	print "Building dictionary: $o{b}\n";
	$| = 1;

	open( OUT, ">$o{b}" );
	my( $output, $cnt );
	while( <> ) {
		chomp();

		if( ( length( $_ ) <= $o{ M } ) &&
			( length( $_ ) >= $o{ m } ) ) {
			$output = sprintf( '%6d words processed', ++$cnt );
			print $output if $cnt % 100 == 0;

			# Clean up some crap
			#s/es$// if length( $_ ) > $o{ m };
			#s/ed$// if length( $_ ) > $o{ m };
			#s/s$// if length( $_ ) > $o{ m };

			print OUT "$_\n" if ( length( $_ ) >= $o{ m } ) &&
						( ! $dict{ $_ } );
			$dict{ $_ } = 1;

			print "\b" x length( $output ) if $cnt % 100 == 0;
		}
	}
	close( OUT );
	print "\nDone\n";
	exit;
}

# Plain text dictionary
if( $o{ d } ) {
	my $input_dict;

	open( IN, $o{ d } ) || die( "Couldn't open dict '$o{d}': $!" );
	{
		undef $/;
		$input_dict = <IN>;
	}
	close( IN );

	%dict = map { $_ => 1 } split( /\n/, $input_dict );
}

$o{ h } = 1 unless $ARGV[ 0 ];
die( qq{
phone-a-gram v1.1
Finds possible words for your phone number
Author: Steve Slaven - http://hoopajoo.net

Usage: $0 [-hwxelN] [-D level] [-d dictionary] [-w match # words]
		[-b dictfile wordlist -m MIN -M MAX ] number

	-h	This help
	-D	Debug
	-e	Use expandos, e.g. 8 => ate, ne => any, etc
	-d	Use dictionary and only print sequences that have
		real words in them.
	-b	Build dictionary dictfile out of wordlist.  The wordlist
		is a newline sep'd list of words
	-l	Calculate from left to right instead of right to left
	-w	Match at least N words
	-x	Use q and z on 7 and 9 (default uses normal letters only)
	-m	Words must be MIN chars long for dictionary
	-M	Words must be less than MAX chars long for dictionary
	-N	Do not show numbers that include not-word parts
	-R	Retard mode, treat 1 as I and 0 as O

} ) if $o{ h };

# This is the translation array for each digit
my %g = ( 1 => [ qw{ 1 } ],
	2 => [ qw{ 2 a b c } ],
	3 => [ qw{ 3 d e f } ],
	4 => [ qw{ 4 g h i } ],
	5 => [ qw{ 5 j k l } ],
	6 => [ qw{ 6 m n o } ],
	7 => [ qw{ 7 p r s } ],
	8 => [ qw{ 8 t u v } ],
	9 => [ qw{ 9 w x y } ],
	0 => [ qw{ 0 } ]
);

# Add extended chars
if( $o{ x } ) {
	push( @{ $g{ 7 } }, 'q[7]' );
	push( @{ $g{ 9 } }, 'z[9]' );
}

if( $o{ R } ) {
	push( @{ $g{ 1 } }, 'i[1]' );
	push( @{ $g{ 0 } }, 'o[0]' );
}

my $num = shift( @ARGV );
$num =~ s/\D//g;

# Initialize some stuff...
my $len = length( $num );
my @counters = map { 0 } ( 1 .. $len );
my @number = split( //, $num );

my( $word, $word_ok, $expando_word, @expandos );

# Word expandos
@expandos = (
	{ regex => '4', subst => 'for[4]' },
	{ regex => '8', subst => 'ate[8]' },
	{ regex => 'b', subst => 'be[b]' },
	{ regex => 'ne', subst => 'any[ne]' },
);

my $looping = 1;
while( $looping ) {
	# Output the current crop, $word is the phone# + letters expanded
	$word = '';
	for( 0 .. ( $len - 1 ) ) {
		#$word .= resolve_digit( $_ );

		# Faster to resolve this way, saves about .5 seconds
		$word .= $g{ $number[ $_ ] } -> [ $counters[ $_ ] ];
	}

	$word_ok = 1;
	if( $o{ d } ) {
		# Try normal
		$word_ok = 0;

		$word_ok += has_words( $word ); # Has words returns the number
						# of words matched
		if( $o{ e } ) {
			# Make an expando-word, expand things
			# like 4 (for), the letter b to be,
			# ne to any, etc
			$expando_word = $word;

			for( @expandos ) {
				$expando_word =~ 
					s/$_->{regex}/$_->{subst}/ge;
			}

			$word_ok += has_words( $expando_word );
		}
	}

	print "Word matchlevel = $word_ok\n" if $o{ D } > 2;

	# Print unless we undefined it
	print "$word\n" if $word_ok >= $o{ w };

	# Handle inc'ing
	inc_digit( 0 );

	# Check if done, e.g all max
	$looping = 0;
	for( 0 .. ( $len - 1 ) ) {
		# Works if any are defined then we loop.
		# we're looking at the digit with a counter + 1
		# If all counters + 1 are undefined in the table,
		# there's nothing to look at anymore

		# Similar to the word build loop, but counter+1
		$looping = $looping || 
			defined( $g{ $number[ $_ ] } -> 
				[ $counters[ $_ ] + 1 ] );
		
		#resolve_digit( $_, 1 ) );
	}
}

# This is a sub because doing a recursive sub is easier
sub inc_digit {
	my $place = shift;

	return if $place == $len;

	print "Inc for $place\n" if $o{ D } > 2;

	$counters[ $place ]++;

	print "Counters: " . join( ",", @counters ) . "\n" if $o{ D } > 4;

	# Check if defined, if not, inc the next one unless we hit limit
	if( ! defined( $g{ $number[ $place ] } -> [ $counters[ $place ] ] ) ) {
		# Again, saving time, that sub was used a lot
		# resolve_digit( $place ) ) ) {
		print "Rollover in place $place\n" if $o{ D } > 4;
		$counters[ $place ] = 0;

		inc_digit( $place + 1 );
	}

	return();
}

sub resolve_digit {
	# Takes the digit place and returns the current resolved thingy
	my $place = shift;
	my $offset = shift() || 0; # Use to push this counter off by X

	# This is confusing... the %g for each $num[]
	# will give the arrayref of valid outputs
	# and the $counters[] corresponds to the offset
	# into that arrayref for the current digit... ?? :)
	my $resolve = $g{ $number[ $place ] } -> 
		[ $counters[ $place ] + $offset ];

	print "Resolved place $place as '$resolve'\n" if $o{ D } > 6;
	return( $resolve );
}

sub has_words {
	# Scans the arg passed for words in the dictionary
	# Grab word chunks, this reduces the amount
	# of stuff we're looking through
	my $word = shift;
	my( $thisword, $thislen, $start, $end, @splitword, $inc_test );

	# Only whole words without junk
	$word =~ s/\[[^]]+]//g;
	my( @checks ) = $word =~ /([a-z]+)/gi;
	my $count = 0;

	for( @checks ) {
		if( $dict{ $_ } ) {
			$count++;
		}else{
			return( 0 ) if $o{ N };
		}
	}
	return( $count );

	#############################################
	# We don't do this anymore this sucks
	#############################################
	#
	my $wordcount = 0;
	my @wordchunks = grep { length( $_ ) > 1 } split( /\d+/, $word );

	for $thisword ( @wordchunks ) {
		# Split into array to process easier
		@splitword = split( //, $thisword );
		$thislen = length( $thisword ) - 1; # 0 offset

		print "$thisword ---\n" if $o{ D } > 3;

		# Loop over sub-sections of the word
		for $start ( 0 .. $thislen ) {
			for $end ( $start .. $thislen ) {
				# Make a word from $start to $end
				# and test if it exists
				$inc_test = join( '', 
					@splitword[ $start .. $end ] );

				print "Wordmatch: $inc_test\n" if 
					( $o{ D } > 2 && $dict{ $inc_test } );
				$wordcount++ if $dict{ $inc_test };
			}
		}
	}

	return( $wordcount );


	# Takes too long... several seconds per iteration
	#for( keys( %dict ) ) {
	#	if( length( $_ ) < $len ) {
	#		$word_ok = 1 if $word =~ /\Q$_\E/i;
	#	}
	#}

	# Handle wordlist magic - not accurate enough, misses a
	# lot of possible words
	#@almost_words = $word =~ /([a-z]+)/g;
	#for( @almost_words ) {
	#	$inc_test = undef;
	#	for( split( // ) ) {
	#		$inc_test .= $_;
	#		$word_ok = 1 if $dict{ $inc_test };
	#	}
	#}

	# Try more fancy stuff, like 1==one, 4=for, 8=ate, etc
	# TODO
}