projects ~ terse ~ singular ~ blog ~ archive ~ niceness ~ about

C# Norvig Spelling Corrector

08 Jul 2010

A while ago I ported Peter Norvig’s spelling corrector to C#, however after a site redesign or two I managed to accidentally delete the original code. Whoops!

So I recently received an email request for the code linked on Peter’s website, which reminded me I needed to sort this out. After a wrangle with the internet archive I obtained the original code, then overhauled it to be a little less messy + horrible than it was before.

So here it is in all it’s glory (the code is licensed under the MIT licence):-

using System;
using System.IO;
using System.Linq;
using System.Text.RegularExpressions;
using StrEnum = System.Collections.Generic.IEnumerable<string>;

static class SpellCorrect
{
    const string Alphabet = "abcdefghijklmnopqrstuvwxyz";

    static StrEnum Edits1(string w)
    {
        // Deletion
        return (from i in Enumerable.Range(0, w.Length)
                select w.Substring(0, i) + w.Substring(i + 1))
        // Transposition
         .Union(from i in Enumerable.Range(0, w.Length - 1)
                select w.Substring(0, i) + w.Substring(i + 1, 1) +
                       w.Substring(i, 1) + w.Substring(i + 2))
        // Alteration
         .Union(from i in Enumerable.Range(0, w.Length)
                from c in Alphabet
                select w.Substring(0, i) + c + w.Substring(i + 1))
        // Insertion
         .Union(from i in Enumerable.Range(0, w.Length + 1)
                from c in Alphabet
                select w.Substring(0, i) + c + w.Substring(i));
    }

    static void Main(string[] args)
    {
        var nWords = (from Match m in Regex.Matches(File.ReadAllText("big.txt").ToLower(), "[a-z]+")
                      group m.Value by m.Value)
                     .ToDictionary(gr => gr.Key, gr => gr.Count());

        Func<StrEnum, StrEnum> nullIfEmpty = c => c.Count() == 0 ? null : c;

        var candidates =
            nullIfEmpty(new[] {args[0]}.Where(nWords.ContainsKey))
            ?? nullIfEmpty(Edits1(args[0]).Where(nWords.ContainsKey))
            ?? nullIfEmpty((from e1 in Edits1(args[0])
                            from e2 in Edits1(e1)
                            where nWords.ContainsKey(e2)
                            select e2).Distinct());

        Console.WriteLine(
            candidates == null
                ? args[0]
                : (from cand in candidates
                   orderby (nWords.ContainsKey(cand) ? nWords[cand] : 1) descending
                   select cand).First());
    }
}

download

~ Lorenzo

Comments

blog comments powered by Disqus