QA0005
ZIP with MATLAB scripts:
script to train for Countdown TV Words
Q: How to train for TV Competition Countdown Words section?
https://www.channel4.com/programmes/countdown
https://en.wikipedia.org/wiki/Countdown_(game_show)
A:
Contenders randomly choose 9 characters, usually including between 3 and 5 vowels, rest consonants,
then given 30 seconds to find the longest possible Oxford dictionary word that can be built in any order, without repetition.
Following MATLAB scripts to train, for those who consider applying to Countdown TV competition.
If .MAT dictionary not available connect_source takes the default supplied .TXT dictionary and generates ED.mat.
generate_input chooses 9 random characters meeting Countdown specs.
Then, within average 32 seconds, all dictionary words between 9 and 6 characters length that match are returned in E.
The default dictionary in single .TXT file available may have a few errors, like including 'anmia' despite neither collinsdictionary.com nor dictionary.com consider such word to be English.
I am not going to spend time building or looking for another more accurate version of English in .TXT and format. The exercise is about writing a MATLAB script for those interested to train for Countdown competition.
The English dictionary I used here is included in EnglishDictionary.txt containing 378305 words.
One may consider using Microsoft Word editor instead, but Word, with for instsance font type Courier New, font size 10, Word stops counting pages at 5000.
With the above font type and size, the available dictionary would have 71 words/page, no spaces, no header, explanations, no foot notes, page numbers: Only 1 word per line.
It's easier to work with plain .TXT file as source dictionary. The source dictionary (.TXT) here used has already been translated to a .MAT file: ED.mat
If another source dictionary is the choice, make sure nothing except dictionary entry words, one word per line is contained in the .doc file.
Support function combinator.m is included in the .ZIP for download, written by Matt Fig
If ED.mat is not present in the script folder, the script attempts to generate it from EnglishDictionary.txt
MAIN SCRIPT:
clear all;clc;close all
connect_source
build_base
generate_input
D{186730}='nat'; % odd correction, just in case D 'nat' is pushed to 'NaT'
%% find all match words from same length as input L down to length 6
tic
E={}; % stack match words here
nL={} % shake the basket, get nL indices to L and all sub-vectors
sznL=[]
for s4=numel(L):-1:6
nL0=combinator(numel(L),s4,'c');
for s5=1:1:size(nL0,1)
nL=[nL nL0(s5,:)];
end
sznL=[sznL size(nL0,1)];
end
n0=[];
for s6=1:1:sum(sznL)
fL=histcounts(double(L(nL{s6})),nf_edges); % measure input trace
for s2=1:1:szD
H0=H(s2,:);
mt0=numel(fL); % start assuming trace has 26 (length(fL)) discrepancies.
for s3=1:1:numel(fL)
if fL(s3)==H0(s3)
mt0=mt0-1; % s3-th element of fL and H0 match, -1 amount discrepancies
end
end
if mt0==0 n0=[n0 s2]; end % it's a match, increase meter mt0
end
end
L
E=D(n0)
toc
SUPPORT FUNCTIONS:
% connect_source.m
%
% support function for countdown_bench.m
%
list1=dir; % read list of files in same folder as this script
list1.name;
s1=1;s2=1;
cs1=0; % ED.mat present in script folder, cs=0 ED.mat assumed present, find it.
ds1=0; % ds1=0: there's no EnglishDictionary.txt in forder, ds1=1 there is EnglishDictionary.txt in folder
while s1<=size(list1,1) && cs1==0 % search for ED.mat
if isequal(list1(s1).name,'ED.mat')
load ED.mat;
cs1=1; % cs1=1 ED.mat found, stop searching for it in dir list.
end
s1=s1+1;
end
while s2<=size(list1,1) && ds1==0 % search for EnglishDictionary.txt
if isequal(list1(s2).name,'EnglishDictionary.txt')
ds1=1; % ds1=1 EnglishDictionary.txt found
end
s2=s2+1;
end
if cs1==0 && ds1==0 cs_ds=-2; end
if cs1==1 && ds1==0 cs_ds=-1; end
if cs1==0 && ds1==1 cs_ds=0; end
if cs1==1 && ds1==1 cs_ds=1; end
% cs_ds=-2 [cs1 ds1] = [0 0] neither ED.mat nor EnglishDictionary.txt present in script folder
% message: No TXT source dictionary, No MAT dictionary: provide for source dictionary, and exit
% cs_ds=-1 [cs1 ds1] = [1 0] ED.mat present but not EnglishDictionary.txt
% message: ED.mat present but EnglsihDictionary.txt not present, ED.mat may be outdated provide for source dictionary, and exit
% cs_ds=0 [cs1 ds1] = [0 1] ED.mat not present, EnglishDictionary.txt present in same script folder: generate ED.mat and proceed
% cs_ds=1 [cs1 ds1]=[1 1] both ED.mat and EnglishDictionary.txt both present in same script folder, next
% version include check that even with both present there's no update
% mismatch, but for v1.0 assume that if both files in same script folder good to start.
switch cs_ds
case -2 % cs_ds=-1 [cs1 ds1] = [0 0] neither ED.mat nor EnglishDictionary.txt present in script folder
% message: No TXT source dictionary, No MAT dictionary: provide for source dictionary, exit
uiwait(msgbox('No TXT source dictionary, No MAT dictionary: provide source dictionary','error','modal'));
return;
case -1 % cs_ds=-2 [cs1 ds1] = [1 0] ED.mat present but not EnglishDictionary.txt
% message: ED.mat present but EnglsihDictionary.txt not present, ED.mat may be outdated provide for source dictionary, and exit
uiwait(msgbox('ED.mat present but EnglsihDictionary.txt missing, ED.mat may be outdated, provide source dictionary','error','modal'));
return;
case 0 % ED.mat not present but EnglishDictionary.txt present in same script folder: generate ED.mat and proceed
% L1out=['éèáàóòìíìúùäöüï'] % for next version EnglishDictionary.txt default dictionary doesn't contain any word with any L1out character
% but just in case another dictionary is provided containing accentuated words, any proper dictionary
% should contain entry for accentuated word, and same word without accent marks.
% this clause removes any accentuated word
L2out=['&/']
L3out=['1234567890']
Lout=[L2out L3out] % remove apparently accepted words with numerals and special characters in L2out
% L4sub=['nat'] % -32 on 'nat' converts to 'NaT', not expected 'NAT' and MATLAB converts this word to type Not-a-Time for time series.
D=readcell('EnglishDictionary.txt','FileType','text'); % import .txt dictionary with 378305 words.
Dz=D{end} % dictionary last word, v1.0 assumes no odd character in dictionary last word.
s1=1
while ~isequal(D{s1},Dz)
D0=D(s1);
tag1=0;
if ~isempty(intersect(D0{:},Lout)) tag1=1; end % remove words with any common character in Lout
if ~isempty(intersect(double(D0{:}),35)) tag=1; end % remove words containing char(35)='
if length(D0{:})>9 tag=1; end % remove words longer than 9 or shorter than 5 characters
if length(D0{:})<5 tag=1; end % remove words longer than 9 or shorter than 5 characters
D(s1)=[];
switch tag1
case 1
D(s1)=[];
otherwise
% D{s1}=double(D0{:})-32; % next version, dictionary all caps, but for v1.0 all characters small.
end
if isequal(D0{:},Dz) % avoid loop crash because of s1>size(D,1)
s1=s1+1;
else
break;
end
end
% code for all dictionary characters to capital here
D{186730}='nat'; % odd correction
save ED.mat D; % generate ED.mat
otherwise % cs_ds=1 both EnglishDictionary.txt and D.mat in script folder
% both ED.mat and EnglishDictionary.txt in script folder, v1.0 does not check possible .mat .txt update mismatch,
end
% build_base.m
%
% support function for test76543.m
% word to char distribution: 'abhj' > word spectrum '1 1 0 0 0 0 1 0 1 0 ..' tag each trace with bin2dec
% when doubled tripled .. characters 01021020 ..' = '01011010 ..' + '00010010 ..' ?
% instead:
% findout top amount character repetition throughout dictionary
% generate sub-set dictionaries, one for all words with at least a doubled character?
% what about words with 2, 3, .. doubled characters?
%
% perhaps there's a faster way considering zsD+1 is prime and using mod.
arange=[65 69 73 79 85]; % vowels ascii indices
azrange=[65:90]; % laphabet indices
zrange=azrange; % consonants indices
zrange([arange]-64)=[];
szD=size(D,1) % measure dictionary size
f=char(azrange+32) % f span for 'ab..yz'
nf=double(f)
nf_edges=[nf nf(end)+1]-.5 % align bin edges and set each bin span to 1
H=zeros(szD,length(f)); % generate array to compare input
for s2=1:1:szD
H(s2,:)=histcounts(double(D{s2}),nf_edges);
end
% generate_input.m
%
% support function for test76543.m
%
nvowels=randi([3 5],1,1); % generate amount of vowels in input string
ar=arange(randi([1 numel(arange)],1,nvowels))+32; % -32 correction: 1st version no capitals, small characters only
aL=char(ar);
nconsonants = 9-nvowels; % calculate amount consonants in input string
zr=zrange(randi([1 numel(zrange)],1,nconsonants))+32; % -32 correction: 1st version no capitals, small characters only
zL=char(zr);
L=[aL zL]