Sunday, November 29, 2015

C/C++ - Getting fraction and exponent from double number using unions

Double is stored as 64-bit number, with following internal structure in computer registers:


This storage can be binary presented in C++ using unions, since their members share the same address space. That way we can have presentation of both exponent and fraction in one object:


typedef union 
{
 double db;
 unsigned short sh[4];
} Double;

Double D;
D.db = d; // some double
In this presentation, members of union have following meaning:

D.sh[0] - fraction (bits 15-0)
D.sh[1] - fraction (bits 31-16)
D.sh[2] - fraction (bits 47-32)
D.sh[3] - bit 15 is for sign 0 +, 1 -), bits 14-4 are for exponent, and bits 3-0 are part of fraction (bits 51-48)

From D.sh it is possible to extract sign, fraction (mantissa) and exponent:


short exp = ((D.sh[3] & 0x7FF0) >> 4) - 1023;
short sign = (D.sh[3] & 0x8000) >> 15;

double mant = 1.;

for (short i=0; i<16; i++)
{
 short bit = (D.sh[0] >> i) & 1; 
 mant += bit * pow (2., i-52);
}

for (short i=16; i<32; i++)
{
 short bit = (D.sh[1] >> (i-16)) & 1; 
 mant += bit * pow (2., i-52);
}

for (short i=32; i<48; i++)
{
 short bit = (D.sh[2] >> (i-32)) & 1; 
 mant += bit * pow (2., i-52);
}

for (short i=48; i<52; i++)
{
 short bit = (D.sh[3] >> (i-48)) & 1; 
 mant += bit * pow (2., i-52);
}

double dNew = pow(-1., sign) * mant * pow (2., exp); // should be the same as initial double d

Check binary file certificate

Certificates of binary files (executables or dynamic libraries) on Windows can be checked using available tool (signtool.exe) or by developing code using API procedures.

1. The utility signtool.exe is used to sign binaries, but it also allows overview of certificate information. Usage is described at location https://msdn.microsoft.com/en-us/library/windows/desktop/aa388171%28v=vs.85%29.aspx

2. WinVerifyTrust API gives information if file is signed and certificate is valid or not. Usage is described here: https://msdn.microsoft.com/en-us/library/aa388208%28v=vs.85%29.aspx.

3. Crypto API gives more information, similar to results of signtool.exe. Possible usage is described at https://support.microsoft.com/en-us/kb/323809.

XML - Dealing with characters not allowed in XML tags

There are five characters in total, not allowed in XML tags, They could be either omitted or replaced with escape sequences:

<  replace with &#60; or &lt;
>  replace with &#62; or &gt;
"   replace with &#34; or &quot;
&  replace with &#38; or &amp;
'    replace with &#39; or &apos;


Thursday, January 8, 2015

C++ - Wildcard String Search

Main procedure in this implementation is:

bool wildcardSearch(wstring input, wstring pattern, vector<pair<int,int>>* positions);

Procedure iterates through string input, searches for pattern, with wildcards '?' (one character) and '*' (multiple characters) allowed, and puts positions of found sub-strings into vector positions. If at least one match is found, procedure returns true, otherwise it returns false.

Complete code is given below:


// WildcardSearch.h : header file
//

#include 
#include 
#include 
#include 

using namespace std;

void replaceAll(wstring& input, const wstring& from, const wstring& to);
bool wildcardMatch( wstring input, wstring pattern, int* last);
bool wildcardSearch(wstring input, wstring pattern, vector>* positions);



// WildcardSearch.cpp : implementation file
//

#include "WildcardSearch.h"

/***************************************************************************/
/* void replaceAll(wstring& input, const wstring& from, const wstring& to) */
/*                         */
/* Parameters:                     */
/*  input - string to replace in              */
/*  from - substring to replace              */
/*  to - substring to replace with             */
/***************************************************************************/
void replaceAll(wstring& input, const wstring& from, const wstring& to) 
{
 if (from.empty())
  return;

 size_t startPos = 0;

 while ((startPos = input.find(from, startPos)) != wstring::npos) 
 {
  input.replace(startPos, from.length(), to);
  startPos += to.length();
 }
}

/***************************************************************************/
/* bool wildcardMatch( wstring input, wstring pattern, int* last)    */
/*                         */
/* Parameters:                     */
/*  input - string to search in              */
/*  pattern - string to search for (might include wildcard characters) */
/*  last - index of last matched character, if match is found    */
/*                         */
/* Return values:                    */
/*  false - no match                  */
/*  true - match found                 */
/***************************************************************************/
bool wildcardMatch( wstring input, wstring pattern, int* last)
{
 int i, z;

 if (pattern[0] == '\0') // empty pattern is always match
 {
  *last = input.length();
  return true;
 }

 for (i = 0; pattern[i] != '\0'; i++) 
 {
  if (pattern[i] == '\0') 
  {
   return false; // pattern is finished
  }
  else if (pattern[i] == '?')
  {
   continue; // wildcard '?' replaces exactly one character
  }
  else if (pattern[i] == '*') // wildcard '*' replaces none, one or more characters
  {
   int lenPattern = pattern.length();
   wstring subPattern = _T("");   
   subPattern = pattern.substr(i+1, lenPattern-i-1);

   if (input.length() < (unsigned)i) // pattern is longer than input
   {
    return false;
   }

   if (input[i] == '\0' && subPattern.empty()) // '*' might be no match
   {
    *last = input.length();
    return true;
   }

   int lenInput = input.length();
   
   wstring subInput = _T("");
   subPattern = _T("");
   int index = -1;

   //for (z = i; input[z] != '\0'; z++) 
   for (z = lenInput; z >= i; z--) 
   {
    subInput = input.substr(z, lenInput-z);
    subPattern = pattern.substr(i+1, lenPattern-i-1);

    int lenSubInput = subInput.length();
    int lenSubPattern = subPattern.length();

    if (wildcardMatch(subInput, subPattern, &index) == 1)
    {
     if (!subPattern.empty() && (subPattern.find('*') == wstring::npos))
      *last = z + lenSubPattern;
     else
      *last = z + lenSubInput;

     return true;
    }
   }

   // pattern after '*' cannot be found
   return false;
  }
  else if (input.length() < (unsigned)i || pattern[i] != input[i])
  {
   return false; // no match
  }

  // continue matching
 }

 if (pattern.length() > input.length())
 {
  return false;
 }
 else
 {
  // pattern without '*' and all characters matching
  *last = pattern.length();
  return true;
 }
}

/******************************************************************************************/
/* bool wildcardSearch(wstring input, wstring pattern, vector>* positions) */
/*                              */
/* Parameters:                          */
/*  input - string to search in                   */
/*  pattern - string to search for (might include wildcard characters)      */
/*  positions - indices of first and last character in matched substrings     */
/*                              */
/* Return values:                         */
/*  false - no matching strings found                 */
/*  true - at least one match match found                */
/******************************************************************************************/
bool wildcardSearch(wstring input, wstring pattern, vector>* positions)
{
 int j = 0, k = 0, z = 0;

 // replace duplicate '**' with single '*'
 replaceAll(pattern, _T("**"), _T("*"));

 int lenInput = input.length();
 int lenPattern = pattern.length();

 wstring subInput = _T("");
 int match = 0, last = 0;
 bool bFound = false;

 positions->clear();

 for (int i=0; i pos(i, last+i);
    bool bExists = std::find(positions->begin(), positions->end(), pos) != positions->end();    
    
    if (!bExists)
     positions->push_back(pos);

    bFound = true;
   }
  }  
 }

 return bFound;
}


Procedure is invoked in following way: 

vector> positions;
bool ret = wildcardSearch(strInput, strPattern, &positions);