Thursday, January 8, 2015

C++ - Wildcard String Search

Main procedure in this implementation is:

bool wildcardSearch(wstring input, wstring pattern, vector<pair<int,int>>* positions);

Procedure iterates through string input, searches for pattern, with wildcards '?' (one character) and '*' (multiple characters) allowed, and puts positions of found sub-strings into vector positions. If at least one match is found, procedure returns true, otherwise it returns false.

Complete code is given below:


// WildcardSearch.h : header file
//

#include 
#include 
#include 
#include 

using namespace std;

void replaceAll(wstring& input, const wstring& from, const wstring& to);
bool wildcardMatch( wstring input, wstring pattern, int* last);
bool wildcardSearch(wstring input, wstring pattern, vector>* positions);



// WildcardSearch.cpp : implementation file
//

#include "WildcardSearch.h"

/***************************************************************************/
/* void replaceAll(wstring& input, const wstring& from, const wstring& to) */
/*                         */
/* Parameters:                     */
/*  input - string to replace in              */
/*  from - substring to replace              */
/*  to - substring to replace with             */
/***************************************************************************/
void replaceAll(wstring& input, const wstring& from, const wstring& to) 
{
 if (from.empty())
  return;

 size_t startPos = 0;

 while ((startPos = input.find(from, startPos)) != wstring::npos) 
 {
  input.replace(startPos, from.length(), to);
  startPos += to.length();
 }
}

/***************************************************************************/
/* bool wildcardMatch( wstring input, wstring pattern, int* last)    */
/*                         */
/* Parameters:                     */
/*  input - string to search in              */
/*  pattern - string to search for (might include wildcard characters) */
/*  last - index of last matched character, if match is found    */
/*                         */
/* Return values:                    */
/*  false - no match                  */
/*  true - match found                 */
/***************************************************************************/
bool wildcardMatch( wstring input, wstring pattern, int* last)
{
 int i, z;

 if (pattern[0] == '\0') // empty pattern is always match
 {
  *last = input.length();
  return true;
 }

 for (i = 0; pattern[i] != '\0'; i++) 
 {
  if (pattern[i] == '\0') 
  {
   return false; // pattern is finished
  }
  else if (pattern[i] == '?')
  {
   continue; // wildcard '?' replaces exactly one character
  }
  else if (pattern[i] == '*') // wildcard '*' replaces none, one or more characters
  {
   int lenPattern = pattern.length();
   wstring subPattern = _T("");   
   subPattern = pattern.substr(i+1, lenPattern-i-1);

   if (input.length() < (unsigned)i) // pattern is longer than input
   {
    return false;
   }

   if (input[i] == '\0' && subPattern.empty()) // '*' might be no match
   {
    *last = input.length();
    return true;
   }

   int lenInput = input.length();
   
   wstring subInput = _T("");
   subPattern = _T("");
   int index = -1;

   //for (z = i; input[z] != '\0'; z++) 
   for (z = lenInput; z >= i; z--) 
   {
    subInput = input.substr(z, lenInput-z);
    subPattern = pattern.substr(i+1, lenPattern-i-1);

    int lenSubInput = subInput.length();
    int lenSubPattern = subPattern.length();

    if (wildcardMatch(subInput, subPattern, &index) == 1)
    {
     if (!subPattern.empty() && (subPattern.find('*') == wstring::npos))
      *last = z + lenSubPattern;
     else
      *last = z + lenSubInput;

     return true;
    }
   }

   // pattern after '*' cannot be found
   return false;
  }
  else if (input.length() < (unsigned)i || pattern[i] != input[i])
  {
   return false; // no match
  }

  // continue matching
 }

 if (pattern.length() > input.length())
 {
  return false;
 }
 else
 {
  // pattern without '*' and all characters matching
  *last = pattern.length();
  return true;
 }
}

/******************************************************************************************/
/* bool wildcardSearch(wstring input, wstring pattern, vector>* positions) */
/*                              */
/* Parameters:                          */
/*  input - string to search in                   */
/*  pattern - string to search for (might include wildcard characters)      */
/*  positions - indices of first and last character in matched substrings     */
/*                              */
/* Return values:                         */
/*  false - no matching strings found                 */
/*  true - at least one match match found                */
/******************************************************************************************/
bool wildcardSearch(wstring input, wstring pattern, vector>* positions)
{
 int j = 0, k = 0, z = 0;

 // replace duplicate '**' with single '*'
 replaceAll(pattern, _T("**"), _T("*"));

 int lenInput = input.length();
 int lenPattern = pattern.length();

 wstring subInput = _T("");
 int match = 0, last = 0;
 bool bFound = false;

 positions->clear();

 for (int i=0; i pos(i, last+i);
    bool bExists = std::find(positions->begin(), positions->end(), pos) != positions->end();    
    
    if (!bExists)
     positions->push_back(pos);

    bFound = true;
   }
  }  
 }

 return bFound;
}


Procedure is invoked in following way: 

vector> positions;
bool ret = wildcardSearch(strInput, strPattern, &positions);

Monday, September 1, 2014

Windows App - INI File Parser

CIniParser is class written in C++. It can be used to parse INI files, as well as other files with the same format.

It reads sections, parameters, values and comments for each parameter.

Demo application can be downloaded here. It first reads and shows lines one by one, and then shows how actual parsing looks like, with parameters, values and comments shown.




Monday, July 21, 2014

C++ - Custom Tooltip

Custom tool tip C++ class implements multiline and multicolor tool tip, which can be used to present different information, in several lines, using various colors:


CPP and H file can be reviewed and downloaded from following locations: source and header. The class is MFC dependent. Use of the class is simple:




void CCustomChartControl::OnMouseMove(UINT nFlags, CPoint point)
{
  COLORREF color1, color2, color3;
  CString strText1(_T("")), strText2(_T("")), strText3(_T(""));

  // initialize colors and texts
  // ...

  if (m_pWndToolTip == NULL)
  {
     m_pWndToolTip = new CCustomToolTip();
   
     if (!m_pWndToolTip->Create(this))
     {
         TRACE(_T("Couldn't create tool tip window!"));    
     }
     else
     {
         m_pWndToolTip->ShowToolTip(TRUE);
     }
  }

  if (m_pWndToolTip != NULL)
  {        
     m_pWndToolTip->UpdateText(strText1, strText2, strText3,
         true, true, true,
         true, true, true,
         color1, color2, color3,
         point);   
  }
}
In method CCustomToolTip::UpdateText you can set texts with three different colors and visibility types (visible/not visible).

Thursday, June 19, 2014

Android App - Zoromatic Flashlight

Zoromatic Flashlight for Android is simple application, which uses device's camera flash, to provide flashlight at any time. It also provides additional strobe feature, with frequencies from 0 to 20 Hz.



Application can be downloaded from following location: https://play.google.com/store/apps/details?id=com.zoromatic.flashlight.

Sunday, October 6, 2013

C++ - Bitap algorithm for Levenshtein distance calculation

In information theory and computer science, the Levenshtein distance is a string metric for measuring the difference between two sequences. Informally, the Levenshtein distance between two words is the minimum number of single-character edits (insertion, deletion, substitution) required to change one word into the other.

The bitap algorithm (also known as the shift-orshift-and or Baeza-Yates–Gonnet algorithm) is an approximate string matching algorithm. The algorithm tells whether a given text contains a substring which is "approximately equal" to a given pattern, where approximate equality is defined in terms of Levenshtein distance — if the substring and pattern are within a given distance k of each other, then the algorithm considers them equal.


C++ implementation of bitap algorithm is given below:



#include 
#include 

using namespace std;

#define Diff_Timeout    1.0f
#define Diff_EditCost   4
#define Match_Threshold   0.5f
#define Match_Distance   100
#define Patch_DeleteThreshold 0.5f
#define Patch_Margin    4
#define Match_MaxBits   32

int match_bitap(const wstring &text, const wstring &pattern, int loc) 
{
 if (!(Match_MaxBits == 0 || pattern.length() <= Match_MaxBits)) 
 {
  throw _T("Pattern too long for this application.");
 }

 // Initialize the alphabet.
 map  s = match_alphabet(pattern);

 // Highest score beyond which we give up.
 double score_threshold = Match_Threshold;
 // Is there a nearby exact match? (speedup)
 int best_loc = text.find(pattern, loc);
 
 if (best_loc != -1) 
 {
  score_threshold = min(match_bitapScore(0, best_loc, loc, pattern), score_threshold);
  // What about in the other direction? (speedup)
  best_loc = text.rfind(pattern, loc + pattern.length());
  if (best_loc != -1) {
   score_threshold = min(match_bitapScore(0, best_loc, loc, pattern), score_threshold);
  }
 }

 // Initialize the bit arrays.
 int matchmask = 1 << (pattern.length() - 1);
 best_loc = -1;

 int bin_min, bin_mid;
 int bin_max = pattern.length() + text.length();
 int *rd;
 int *last_rd = NULL;
 
 for (int d = 0; d < pattern.length(); d++) 
 {
  // Scan for the best match; each iteration allows for one more error.
  // Run a binary search to determine how far from 'loc' we can stray at
  // this error level.
  bin_min = 0;
  bin_mid = bin_max;
  
  while (bin_min < bin_mid) 
  {
   if (match_bitapScore(d, loc + bin_mid, loc, pattern) <= score_threshold) 
   {
     bin_min = bin_mid;
   } 
   else 
   {
    bin_max = bin_mid;
   }
   
   bin_mid = (bin_max - bin_min) / 2 + bin_min;
  }
  
  // Use the result from this iteration as the maximum for the next.
  bin_max = bin_mid;
  int start = max(1, loc - bin_mid + 1);
  int finish = min(loc + bin_mid, text.length()) + pattern.length();

  rd = new int[finish + 2];
  rd[finish + 1] = (1 << d) - 1;
  
  for (int j = finish; j >= start; j--) 
  {
   int charMatch;
   
   if (text.length() <= j - 1) 
   {
    // Out of range.
    charMatch = 0;
   } 
   else 
   {
    charMatch = s[text[j - 1]];
   }

   if (d == 0) 
   {
    // First pass: exact match.
    rd[j] = ((rd[j + 1] << 1) | 1) & charMatch;
   } 
   else 
   {
    // Subsequent passes: fuzzy match.
    rd[j] = ((rd[j + 1] << 1) | 1) & charMatch
     | (((last_rd[j + 1] | last_rd[j]) << 1) | 1)
     | last_rd[j + 1];
   }
   
   if ((rd[j] & matchmask) != 0) 
   {
    double score = match_bitapScore(d, j - 1, loc, pattern);
    // This match will almost certainly be better than any existing
    // match.  But check anyway.
    if (score <= score_threshold) 
    {
     // Told you so.
     score_threshold = score;
     best_loc = j - 1;
     if (best_loc > loc) 
     {
      // When passing loc, don't exceed our current distance from loc.
      start = max(1, 2 * loc - best_loc);
     } 
     else 
     {
      // Already passed loc, downhill from here on in.
      break;
     }
    }
   }
  }

  if (match_bitapScore(d + 1, loc, loc, pattern) > score_threshold) 
  {
   // No hope for a (better) match at greater error levels.
   break;
  }
  
  delete [] last_rd;
  last_rd = rd;
 }

 delete [] last_rd;
 delete [] rd;
 return best_loc;
}


double match_bitapScore(int e, int x, int loc, const wstring &pattern) 
{
 const float accuracy = static_cast (e) / pattern.length();
 const int proximity = abs(loc - x);
 
 if (Match_Distance == 0) 
 {
  // Dodge divide by zero error.
  return proximity == 0 ? accuracy : 1.0;
 }
 
 return accuracy + (proximity / static_cast (Match_Distance));
}


map  match_alphabet(const wstring &pattern) 
{
 map  s;
 int i;
 
 for (i = 0; i < pattern.length(); i++) 
 {
  TCHAR c = pattern[i];
  s[c] = 0;
 }
 
 for (i = 0; i < pattern.length(); i++) 
 {
  TCHAR c = pattern[i];
  s[c] = (s[c] | (1 << (pattern.length() - i - 1)));
 }

 return s;
}

The call to procedure is as follows:

int res = match_bitap(stringOnestringTwo, 0);

where stringOne and stringTwo are strings whose match (Levenshtein distance) is to be determined, and 0 is location in stringOne where matching procedure should start. Return value is Levenshtein distance, as described above.

Thursday, June 20, 2013

HTML - Boxes around text within HTML table cell


If you want to have boxes only around some text within larger HTML container (td, div), use following code:


<span style="display:inline-block;border:1px solid #000;">
Your text goes here...
</span>


If you apply code to text within <td> element of a table, you'll get result like this:



Thursday, February 21, 2013

Android App - Zoromatic ScreenLock

Zoromatic ScreenLock is standalone Android application which allows user to lock device's screen, with one single click. This is not a widget, so it does not resides in memory, it turns itself off after lock.



Settings screen allows user to provide administration rights to the application:



Before uninstall, ScreenLock Admin rights should be disabled in Settings.

ScreenLock works on Android 2.2 or newer. The newest version can be downloaded from here.