Dictionary wildcard search in C#

By Nov 02, 2016

Description:

Here we implement a C# program of Word Search in a dictionary, with wildcards.

Preferencesoft

In this article, we present a research program of a word in a dictionary using wildcards.

When we look for a word in a dictionary, sometimes we do not know its exact spelling. For example, if we are not sure how to write connection: connexion or connection? We can use a wildcard, a sequence of symbol representing unknown characters and when research, it can be replaced by any character.

Furthermore, this type of research is often used in crosswords, when only a few characters are visible. We can search a word by placing a symbol ‘?’ in place of the missing characters. This gives us a list of several words that match the pattern.

We will only cover the following wildcards:

  •          * matches zero or more characters

  •          ? matches any character at the specified position

  •          [ ] matches a range of characters, example [a-z]

  •          [ ] matches the specified characters, example [aZxy2]

To use special characters, we use the backslash character ‘\’. For example, \* is the asterisk.

By cons it is not in the spirit of PowerShell that places a back quote to the left a reserved character.

The algorithm is to cut the pattern at the non backslashed brackets [ and ].

It then translates the pattern into a regular expression.

using System;
using System.Collections.Generic;
using System.IO;
using System.Text;
using System.Text.RegularExpressions;
 
namespace DictionarySearch
{
    class Program
    {
        static string Esc(string str)
        {
            // Convert at the same time
            // * --> .*  (not preceded by \)
            // ? --> .   (not preceded by \)
            // . --> \.
            //   --> \s
            // \ --> \\   (\ not followed by [ ] * ?)
            int i = 0;
            int len = str.Length;
            StringBuilder sb = new StringBuilder(2 * len);
            while (i < len)
            {
                switch (str[i])
                {
                    case '*':
                        sb.Append(".*");
                        break;
                    case '?':
                        sb.Append(".");
                        break;
                    case '.':
                        sb.Append(@"\.");
                        break;
                    case ' ':
                        sb.Append(@"\s");
                        break;
                    case '\\':
                        if (i + 1 < len)
                        {
                            i++;
                            switch (str[i])
                            {
                                case '[':
                                case ']':
                                case '*':
                                case '?':
                                    sb.Append(string.Format(@"\{0}", str[i]));
                                    break;
                                default:
                                    sb.Append(@"\\");
                                    i--;
                                    break;
                            }
                        }
                        else
                            sb.Append(@"\\");
                        break;
                    default:
                        sb.Append(str[i]);
                        break;
                }
                i++;
            }
            return sb.ToString();
        }
 
        static string Special(string str)
        {
            // Convert at the same time
            // * --> \*
            // ? --> \? 
            // . --> \.
            //   --> \s
            // \ --> \\ 
            string s = str.Replace("*", @"\*");
            str = str.Replace("?", @"\?");
            str = str.Replace(".", @"\.");
            str = str.Replace(" ", @"\s");
            str = str.Replace(@"\", @"\\");
            return str;
        }
 
        static string WildcardToRegex(string s, out string error)
        {
            List<int> start = new List<int>();
            List<int> end = new List<int>();
            error = "";
            string str = s.Replace(@"\\", "??");
            str = str.Replace(@"\[", "??");
            str = str.Replace(@"\]", "??");
            //replace by ?, according to 
            // the number of characters.
            int len = str.Length;
            bool open = false;
            int i;
            for (i = 0; i < len; i++)
            {
                if (str[i] == '[')
                {
                    if (open)
                    {
                        error = string.Format("A bracket was already opened before {0}", i + 1);
                        return "";
                    }
                    else
                    {
                        open = true;
                        start.Add(i);
                    }
                }
                else
                {
                    if (str[i] == ']')
                    {
                        if (!open)
                        {
                            error = string.Format("No bracket already opened before {0}", i + 1);
                            return "";
                        }
                        else
                        {
                            open = false;
                            end.Add(i);
                        }
                    }
                }
            }
            if (open)
            {
                error = string.Format("Bracket not closed at {0}", i + 1);
                return "";
            }
            //Convert to Regex
            StringBuilder sb = new StringBuilder();
            int backet_number = start.Count;
            int pred = 0;
            sb.Append("^");
            for (int j = 0; j < backet_number; j++)
            {
                string before = s.Substring(pred, start[j] - pred);
                string bracket = s.Substring(start[j], end[j] - start[j] + 1);
                pred = end[j] + 1;
                before = Esc(before);
                sb.Append(before);
                bracket = Special(bracket);
                sb.Append(bracket);
            }
            if (pred < len)
            {
                string after = s.Substring(pred, len - pred);
                after = Esc(after);
                sb.Append(after);
            }
            sb.Append("$");
            return sb.ToString();
        }
 
 
        static string[] ReadDico()
        {
            string[] lines = null;
            try
            {
                using (StreamReader sr = new StreamReader(@"C:\Users\user1\Documents\Visual Studio 2015\Projects\DictionarySearch\DictionarySearch\dict.txt"))
                {
                    String line = sr.ReadToEnd();
                    line = line.Replace("\r\n", "\n");
                    lines = line.Split('\n');
                    line = "";
                }
            }
            catch (Exception)
            {
                Console.WriteLine("Could not read the file");
            }
            return lines;
        }
        static void Main(string[] args)
        {
            string[] entries = ReadDico();
            if (entries == null)
            {
                Console.WriteLine("Cannot find the dictionary");
                return;
            }
            Console.WriteLine("Enter a word:");
            string input = Console.ReadLine();
            string error = "";
            Console.WriteLine(WildcardToRegex(input, out error));
            if (!string.IsNullOrEmpty(error))
                Console.WriteLine(error);
            else
            {
                Regex regex = new Regex(WildcardToRegex(input, out error));
                for (int i = 0; i < entries.Length; i++)
                {
                    if (regex.IsMatch(entries[i]))
                    {
                        Console.WriteLine(entries[i]);
                    }
                }
            }
            Console.ReadLine();
        }
    }
}

CSharp

Categories

Share

Follow


KodFor Privacy Policy