#!/usr/bin/perl # # Copyright (c) 2002 Steve Slaven, All Rights Reserved. # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License as # published by the Free Software Foundation; either version 2 of # the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, # MA 02111-1307 USA # use strict; use Getopt::Std; use DB_File; my( %dict, %o, %temp_dict ); $o{ m } = 2; $o{ M } = 8; $o{ w } = 1; getopts( 'hD:d:xb:m:M:ew:NR', \%o ); # Handle dict build, not really a big deal anymore # since it's not a tie'd hash if( $o{ b } ) { print "Building dictionary: $o{b}\n"; $| = 1; open( OUT, ">$o{b}" ); my( $output, $cnt ); while( <> ) { chomp(); if( ( length( $_ ) <= $o{ M } ) && ( length( $_ ) >= $o{ m } ) ) { $output = sprintf( '%6d words processed', ++$cnt ); print $output if $cnt % 100 == 0; # Clean up some crap #s/es$// if length( $_ ) > $o{ m }; #s/ed$// if length( $_ ) > $o{ m }; #s/s$// if length( $_ ) > $o{ m }; print OUT "$_\n" if ( length( $_ ) >= $o{ m } ) && ( ! $dict{ $_ } ); $dict{ $_ } = 1; print "\b" x length( $output ) if $cnt % 100 == 0; } } close( OUT ); print "\nDone\n"; exit; } # Plain text dictionary if( $o{ d } ) { my $input_dict; open( IN, $o{ d } ) || die( "Couldn't open dict '$o{d}': $!" ); { undef $/; $input_dict = ; } close( IN ); %dict = map { $_ => 1 } split( /\n/, $input_dict ); } $o{ h } = 1 unless $ARGV[ 0 ]; die( qq{ phone-a-gram v1.1 Finds possible words for your phone number Author: Steve Slaven - http://hoopajoo.net Usage: $0 [-hwxelN] [-D level] [-d dictionary] [-w match # words] [-b dictfile wordlist -m MIN -M MAX ] number -h This help -D Debug -e Use expandos, e.g. 8 => ate, ne => any, etc -d Use dictionary and only print sequences that have real words in them. -b Build dictionary dictfile out of wordlist. The wordlist is a newline sep'd list of words -l Calculate from left to right instead of right to left -w Match at least N words -x Use q and z on 7 and 9 (default uses normal letters only) -m Words must be MIN chars long for dictionary -M Words must be less than MAX chars long for dictionary -N Do not show numbers that include not-word parts -R Retard mode, treat 1 as I and 0 as O } ) if $o{ h }; # This is the translation array for each digit my %g = ( 1 => [ qw{ 1 } ], 2 => [ qw{ 2 a b c } ], 3 => [ qw{ 3 d e f } ], 4 => [ qw{ 4 g h i } ], 5 => [ qw{ 5 j k l } ], 6 => [ qw{ 6 m n o } ], 7 => [ qw{ 7 p r s } ], 8 => [ qw{ 8 t u v } ], 9 => [ qw{ 9 w x y } ], 0 => [ qw{ 0 } ] ); # Add extended chars if( $o{ x } ) { push( @{ $g{ 7 } }, 'q[7]' ); push( @{ $g{ 9 } }, 'z[9]' ); } if( $o{ R } ) { push( @{ $g{ 1 } }, 'i[1]' ); push( @{ $g{ 0 } }, 'o[0]' ); } my $num = shift( @ARGV ); $num =~ s/\D//g; # Initialize some stuff... my $len = length( $num ); my @counters = map { 0 } ( 1 .. $len ); my @number = split( //, $num ); my( $word, $word_ok, $expando_word, @expandos ); # Word expandos @expandos = ( { regex => '4', subst => 'for[4]' }, { regex => '8', subst => 'ate[8]' }, { regex => 'b', subst => 'be[b]' }, { regex => 'ne', subst => 'any[ne]' }, ); my $looping = 1; while( $looping ) { # Output the current crop, $word is the phone# + letters expanded $word = ''; for( 0 .. ( $len - 1 ) ) { #$word .= resolve_digit( $_ ); # Faster to resolve this way, saves about .5 seconds $word .= $g{ $number[ $_ ] } -> [ $counters[ $_ ] ]; } $word_ok = 1; if( $o{ d } ) { # Try normal $word_ok = 0; $word_ok += has_words( $word ); # Has words returns the number # of words matched if( $o{ e } ) { # Make an expando-word, expand things # like 4 (for), the letter b to be, # ne to any, etc $expando_word = $word; for( @expandos ) { $expando_word =~ s/$_->{regex}/$_->{subst}/ge; } $word_ok += has_words( $expando_word ); } } print "Word matchlevel = $word_ok\n" if $o{ D } > 2; # Print unless we undefined it print "$word\n" if $word_ok >= $o{ w }; # Handle inc'ing inc_digit( 0 ); # Check if done, e.g all max $looping = 0; for( 0 .. ( $len - 1 ) ) { # Works if any are defined then we loop. # we're looking at the digit with a counter + 1 # If all counters + 1 are undefined in the table, # there's nothing to look at anymore # Similar to the word build loop, but counter+1 $looping = $looping || defined( $g{ $number[ $_ ] } -> [ $counters[ $_ ] + 1 ] ); #resolve_digit( $_, 1 ) ); } } # This is a sub because doing a recursive sub is easier sub inc_digit { my $place = shift; return if $place == $len; print "Inc for $place\n" if $o{ D } > 2; $counters[ $place ]++; print "Counters: " . join( ",", @counters ) . "\n" if $o{ D } > 4; # Check if defined, if not, inc the next one unless we hit limit if( ! defined( $g{ $number[ $place ] } -> [ $counters[ $place ] ] ) ) { # Again, saving time, that sub was used a lot # resolve_digit( $place ) ) ) { print "Rollover in place $place\n" if $o{ D } > 4; $counters[ $place ] = 0; inc_digit( $place + 1 ); } return(); } sub resolve_digit { # Takes the digit place and returns the current resolved thingy my $place = shift; my $offset = shift() || 0; # Use to push this counter off by X # This is confusing... the %g for each $num[] # will give the arrayref of valid outputs # and the $counters[] corresponds to the offset # into that arrayref for the current digit... ?? :) my $resolve = $g{ $number[ $place ] } -> [ $counters[ $place ] + $offset ]; print "Resolved place $place as '$resolve'\n" if $o{ D } > 6; return( $resolve ); } sub has_words { # Scans the arg passed for words in the dictionary # Grab word chunks, this reduces the amount # of stuff we're looking through my $word = shift; my( $thisword, $thislen, $start, $end, @splitword, $inc_test ); # Only whole words without junk $word =~ s/\[[^]]+]//g; my( @checks ) = $word =~ /([a-z]+)/gi; my $count = 0; for( @checks ) { if( $dict{ $_ } ) { $count++; }else{ return( 0 ) if $o{ N }; } } return( $count ); ############################################# # We don't do this anymore this sucks ############################################# # my $wordcount = 0; my @wordchunks = grep { length( $_ ) > 1 } split( /\d+/, $word ); for $thisword ( @wordchunks ) { # Split into array to process easier @splitword = split( //, $thisword ); $thislen = length( $thisword ) - 1; # 0 offset print "$thisword ---\n" if $o{ D } > 3; # Loop over sub-sections of the word for $start ( 0 .. $thislen ) { for $end ( $start .. $thislen ) { # Make a word from $start to $end # and test if it exists $inc_test = join( '', @splitword[ $start .. $end ] ); print "Wordmatch: $inc_test\n" if ( $o{ D } > 2 && $dict{ $inc_test } ); $wordcount++ if $dict{ $inc_test }; } } } return( $wordcount ); # Takes too long... several seconds per iteration #for( keys( %dict ) ) { # if( length( $_ ) < $len ) { # $word_ok = 1 if $word =~ /\Q$_\E/i; # } #} # Handle wordlist magic - not accurate enough, misses a # lot of possible words #@almost_words = $word =~ /([a-z]+)/g; #for( @almost_words ) { # $inc_test = undef; # for( split( // ) ) { # $inc_test .= $_; # $word_ok = 1 if $dict{ $inc_test }; # } #} # Try more fancy stuff, like 1==one, 4=for, 8=ate, etc # TODO }