Obscure south american monkey - CARARARA (websters 1913 dictionary) likewise only found on wordlists seemingly. Solved quite quickly again using perl using the Net::Dict module, a bit of (ugly) brute force, and some graceful regular expressions to get the 1 million or so possible combinations down to about 4000 contenders for the eight-letter attempt. Michael Dufour. Here's the code: #Okay using windows no word lists to use #install Net-Dict using ppm (perl package manager) #use it! and specify website and dictionary to use use Net::Dict; $dict = Net::Dict->new('dict.org'); $dict->setDicts('web1913'); #establish translation table %codon_dictionary = docodon(); #array of bases @base = qw(A C G T); #initialise some stuff @words = (); @matches = (); @duds = (); $ct = ''; #ugly! build the word print "building wordlist\n"; for ($i=0; $i<4; $i++) { for ($j=0; $j<4; $j++) { for ($k=0; $k<4; $k++) { for ($l=0; $l<4; $l++) { for ($m=0; $m<4; $m++) { for ($n=0; $n<4; $n++) { for ($o=0; $o<4; $o++) { for ($p=0; $p<4; $p++) { for ($q=0; $q<4; $q++) { for ($r=0; $r<4; $r++) { $ct = @base[$i] . @base[$j] . @base[$k] . @base[$l] . @base[$m] . @base[$n] . @base[$o] . @base[$p] . @base[$q] . @base[$r]; $pt = ''; for ($pos=0; $pos<8; $pos++) { $pt .= codon_to_aminoacid(substr($ct,$pos,3)); } push (@words, $pt); } } } } } } } } } } print $#words, "\n"; #sort the array print "sorting wordlist\n"; @words = sort @words; $imax = $#words; #filter out impossiblities print "...filtering combinatons\n"; for ($i=0; $i<$imax; $i++) { $w = shift (@words); #These are the possible two-letter combinations of DNA if ($w =~ /_/ig) { next; } elsif ($w =~ /Q/ig) { next; } elsif ($w =~ /[APT][^HLPR]/ig) { next; } elsif ($w =~ /C[^A]/ig) { next; } elsif ($w =~ /S[^AV]/ig) { next; } elsif ($w =~ /[DHNSY][^IMT]/ig) { next; } elsif ($w =~ /[EK][^KNRS]/ig) { next; } elsif ($w =~ /F[^FLS]/ig) { next; } elsif ($w =~ /G[^ADEG]/ig) { next; } elsif ($w =~ /R[^ADEGV]/ig) { next; } elsif ($w =~ /I[^FLSY]/ig) { next; } elsif ($w =~ /M[^C]/ig) { next; } elsif ($w =~ /[LV][^CFLSY]/ig) { next; } elsif ($w =~ /W[^G]/ig) { next; } #Triple letters elsif ($w =~ /[FGKLP]{3,}/ig) { next; } #Brute force filter from inspection of smaller word combos elsif ($w =~ /[DY]T[LP]/ig) { next; } elsif ($w =~ /[IV]Y[MT]/ig) { next; } elsif ($w =~ /[FV]S[LPR]/ig) { next; } elsif ($w =~ /[HN]T[HLPR]/ig) { next; } elsif ($w =~ /[PST]RV/ig) { next; } elsif ($w =~ /[GRW]G[DGV]/ig) { next; } elsif ($w =~ /[RS]V[CFLSW]/ig) { next; } elsif ($w =~ /[PT]L[FLS]/ig) { next; } elsif ($w =~ /[FPTV]L[CW]/ig) { next; } elsif ($w =~ /[DHN]IY/ig) { next; } elsif ($w =~ /[DHNY]M[CW]/ig) { next; } elsif ($w =~ /(KN|D)[MT]/ig) { next; } elsif ($w =~ /[KPT]R[DG]/ig) { next; } elsif ($w =~ /(^I|^L|V)F[FLS]/ig) { next; } elsif ($w =~ /VC[AV]/ig) { next; } elsif ($w =~ /YI[FLY]/ig) { next; } elsif ($w =~ /PHM/ig) { next; } elsif ($w =~ /ESV/ig) { next; } elsif ($w =~ /GEK/ig) { next; } elsif ($w =~ /MWG/ig) { next; } elsif ($w =~ /PLL/ig) { next; } elsif ($w =~ /TPP/ig) { next; } elsif ($w =~ /EKR/ig) { next; } elsif ($w =~ /AHT/ig) { next; } elsif ($w =~ /VY[MT]/ig) { next; } elsif ($w =~ /^[DY]T[RH]/ig) { next; } elsif ($w =~ /^[AT]HM/ig) { next; } elsif ($w =~ /^TP[HLPR]/ig) { next; } #Potential to whittle down even more, but no time #Everything else must therefore be a possibility else { push (@words, $w); } } print $#words, "\n"; #obviously DNA coding can decipher to same plaintext #so remove duplicates. Crude but effective. #Must be a better way in Perl print "...filtering duplicates\n"; @temp = @words; @words = (); $done = 0; $words[0] = $a = $temp[$i]; for ($i=1; $i<($#temp-1); $i++) { $b = $temp[$i]; if ($b eq $a) { next; } else { push (@words, $b); $a = $b; } } #print "@words", "\n"; print $#words, "\n"; #exit; #open (FILER, ">words.txt"); #$j = 0; #$s = ''; #for ($i=0; $i<($#words-1); $i++) { # $j++; # if ($j<8) { # $s .= $words[$i]; # $s .= " "; # } else { # print FILER $s, "\n"; # $j = 0; # $s = ''; # } #} #exit; #yeah yeah - go through wordlist - check if there are words matching #the three letter starting combo then try and match print "matching wordlist\n"; $done = 0; $pp = 'ZZZ'; for ($i=0; $i<($#words-1); $i++) { $cp = substr($words[$i],0,3); if ($cp eq $pp) { if ($done == 1) { next; } else { $isword = word_search($words[$i]); if ($isword == 1) { print "\t", $words[$i], "\n"; push (@matches, $words[$i]); } next; } } else { $pp = $cp; $done = 0; $ismatch = word_match($cp); if ($ismatch == 1) { $isword = word_search($words[$i]); if ($isword == 1) { print "\t", $words[$i], "\n"; push (@matches,$words[$i]); } next; } else { push (@duds,$cp); $done = 1; next; } } } print "@matches"; exit; sub word_search { my ($w) = @_; my $ref = $dict->define($w); if (@{@$ref[0]} ne '') { return '1'; } else { return '0'; } } sub word_match { my ($w) = @_; $ref = $dict->match($w,'prefix'); if (@{@$ref[0]} ne '') { return '1'; } else { return '0'; } } sub docodon { my (%codon_dictionary) = ( 'TCA' => 'S', 'TCC' => 'S', 'TCG' => 'S', 'TCT' => 'S', 'TTC' => 'F', 'TTT' => 'F', 'TTA' => 'L', 'TTG' => 'L', 'TAC' => 'Y', 'TAT' => 'Y', 'TAA' => '_', 'TAG' => '_', 'TGC' => 'C', 'TGT' => 'C', 'TGA' => '_', 'TGG' => 'W', 'CTA' => 'L', 'CTC' => 'L', 'CTG' => 'L', 'CTT' => 'L', 'CCA' => 'P', 'CCC' => 'P', 'CCG' => 'P', 'CCT' => 'P', 'CAC' => 'H', 'CAT' => 'H', 'CAA' => 'Q', 'CAG' => 'Q', 'CGA' => 'R', 'CGC' => 'R', 'CGG' => 'R', 'CGT' => 'R', 'ATA' => 'I', 'ATC' => 'I', 'ATT' => 'I', 'ATG' => 'M', 'ACA' => 'T', 'ACC' => 'T', 'ACG' => 'T', 'ACT' => 'T', 'AAC' => 'N', 'AAT' => 'N', 'AAA' => 'K', 'AAG' => 'K', 'AGC' => 'S', 'AGT' => 'S', 'AGA' => 'R', 'AGG' => 'R', 'GTA' => 'V', 'GTC' => 'V', 'GTG' => 'V', 'GTT' => 'V', 'GCA' => 'A', 'GCC' => 'A', 'GCG' => 'A', 'GCT' => 'A', 'GAC' => 'D', 'GAT' => 'D', 'GAA' => 'E', 'GAG' => 'E', 'GGA' => 'G', 'GGC' => 'G', 'GGG' => 'G', 'GGT' => 'G', ); # print keys %codon_dictionary; return %codon_dictionary; } sub codon_to_aminoacid { my ($codon) = @_; $codon = uc($codon); if (exists $codon_dictionary{$codon}) { return $codon_dictionary{$codon}; } else { print STDERR "Bad codon: "; print "$codon!!\n\n"; exit; } }