Thursday, January 8, 2015

C++ - Wildcard String Search

Main procedure in this implementation is:

bool wildcardSearch(wstring input, wstring pattern, vector<pair<int,int>>* positions);

Procedure iterates through string input, searches for pattern, with wildcards '?' (one character) and '*' (multiple characters) allowed, and puts positions of found sub-strings into vector positions. If at least one match is found, procedure returns true, otherwise it returns false.

Complete code is given below:


// WildcardSearch.h : header file
//

#include 
#include 
#include 
#include 

using namespace std;

void replaceAll(wstring& input, const wstring& from, const wstring& to);
bool wildcardMatch( wstring input, wstring pattern, int* last);
bool wildcardSearch(wstring input, wstring pattern, vector>* positions);



// WildcardSearch.cpp : implementation file
//

#include "WildcardSearch.h"

/***************************************************************************/
/* void replaceAll(wstring& input, const wstring& from, const wstring& to) */
/*                         */
/* Parameters:                     */
/*  input - string to replace in              */
/*  from - substring to replace              */
/*  to - substring to replace with             */
/***************************************************************************/
void replaceAll(wstring& input, const wstring& from, const wstring& to) 
{
 if (from.empty())
  return;

 size_t startPos = 0;

 while ((startPos = input.find(from, startPos)) != wstring::npos) 
 {
  input.replace(startPos, from.length(), to);
  startPos += to.length();
 }
}

/***************************************************************************/
/* bool wildcardMatch( wstring input, wstring pattern, int* last)    */
/*                         */
/* Parameters:                     */
/*  input - string to search in              */
/*  pattern - string to search for (might include wildcard characters) */
/*  last - index of last matched character, if match is found    */
/*                         */
/* Return values:                    */
/*  false - no match                  */
/*  true - match found                 */
/***************************************************************************/
bool wildcardMatch( wstring input, wstring pattern, int* last)
{
 int i, z;

 if (pattern[0] == '\0') // empty pattern is always match
 {
  *last = input.length();
  return true;
 }

 for (i = 0; pattern[i] != '\0'; i++) 
 {
  if (pattern[i] == '\0') 
  {
   return false; // pattern is finished
  }
  else if (pattern[i] == '?')
  {
   continue; // wildcard '?' replaces exactly one character
  }
  else if (pattern[i] == '*') // wildcard '*' replaces none, one or more characters
  {
   int lenPattern = pattern.length();
   wstring subPattern = _T("");   
   subPattern = pattern.substr(i+1, lenPattern-i-1);

   if (input.length() < (unsigned)i) // pattern is longer than input
   {
    return false;
   }

   if (input[i] == '\0' && subPattern.empty()) // '*' might be no match
   {
    *last = input.length();
    return true;
   }

   int lenInput = input.length();
   
   wstring subInput = _T("");
   subPattern = _T("");
   int index = -1;

   //for (z = i; input[z] != '\0'; z++) 
   for (z = lenInput; z >= i; z--) 
   {
    subInput = input.substr(z, lenInput-z);
    subPattern = pattern.substr(i+1, lenPattern-i-1);

    int lenSubInput = subInput.length();
    int lenSubPattern = subPattern.length();

    if (wildcardMatch(subInput, subPattern, &index) == 1)
    {
     if (!subPattern.empty() && (subPattern.find('*') == wstring::npos))
      *last = z + lenSubPattern;
     else
      *last = z + lenSubInput;

     return true;
    }
   }

   // pattern after '*' cannot be found
   return false;
  }
  else if (input.length() < (unsigned)i || pattern[i] != input[i])
  {
   return false; // no match
  }

  // continue matching
 }

 if (pattern.length() > input.length())
 {
  return false;
 }
 else
 {
  // pattern without '*' and all characters matching
  *last = pattern.length();
  return true;
 }
}

/******************************************************************************************/
/* bool wildcardSearch(wstring input, wstring pattern, vector>* positions) */
/*                              */
/* Parameters:                          */
/*  input - string to search in                   */
/*  pattern - string to search for (might include wildcard characters)      */
/*  positions - indices of first and last character in matched substrings     */
/*                              */
/* Return values:                         */
/*  false - no matching strings found                 */
/*  true - at least one match match found                */
/******************************************************************************************/
bool wildcardSearch(wstring input, wstring pattern, vector>* positions)
{
 int j = 0, k = 0, z = 0;

 // replace duplicate '**' with single '*'
 replaceAll(pattern, _T("**"), _T("*"));

 int lenInput = input.length();
 int lenPattern = pattern.length();

 wstring subInput = _T("");
 int match = 0, last = 0;
 bool bFound = false;

 positions->clear();

 for (int i=0; i pos(i, last+i);
    bool bExists = std::find(positions->begin(), positions->end(), pos) != positions->end();    
    
    if (!bExists)
     positions->push_back(pos);

    bFound = true;
   }
  }  
 }

 return bFound;
}


Procedure is invoked in following way: 

vector> positions;
bool ret = wildcardSearch(strInput, strPattern, &positions);