178 lines
4.0 KiB
C
178 lines
4.0 KiB
C
/*
|
|
[Header: soundexmodule.c,v 1.2 95/05/02 15:40:45 dwwillia Exp ]
|
|
|
|
Perform soundex comparisons on strings.
|
|
|
|
Soundex is an algorithm that hashes English strings into numerical value.
|
|
Strings that sound the same are hashed to the same value. This allows
|
|
for non-literal string matching.
|
|
|
|
From: David Wayne Williams <dwwillia@iucf.indiana.edu>
|
|
|
|
Apr 29 1996 - added get_soundex method that returns the soundex of a
|
|
string (chrish@qnx.com)
|
|
May 2 1996 - added doc strings (chrish@qnx.com)
|
|
*/
|
|
|
|
#include <string.h>
|
|
#include <ctype.h>
|
|
#include "Python.h"
|
|
|
|
static char soundex_module__doc__[] =
|
|
"Perform Soundex comparisons on strings, allowing non-literal matching.";
|
|
|
|
static void soundex_hash(char *str, char *result)
|
|
{
|
|
char *sptr = str; /* pointer into str */
|
|
char *rptr = result; /* pointer into result */
|
|
|
|
if(*str == '\0')
|
|
{
|
|
strcpy(result,"000000");
|
|
return;
|
|
}
|
|
|
|
/* Preserve the first character of the input string.
|
|
*/
|
|
*(rptr++) = toupper(*(sptr++));
|
|
|
|
/* Translate the rest of the input string into result. The following
|
|
transformations are used:
|
|
|
|
1) All vowels, W, and H, are skipped.
|
|
|
|
2) BFPV = 1
|
|
CGJKQSXZ = 2
|
|
DT = 3
|
|
L = 4
|
|
MN = 5
|
|
R = 6
|
|
|
|
3) Only translate the first of adjacent equal translations. I.E.
|
|
remove duplicate digits.
|
|
*/
|
|
|
|
for(;(rptr - result) < 6 && *sptr != '\0';sptr++)
|
|
{
|
|
switch (toupper(*sptr))
|
|
{
|
|
case 'W':
|
|
case 'H':
|
|
case 'A':
|
|
case 'I':
|
|
case 'O':
|
|
case 'U':
|
|
case 'Y':
|
|
break;
|
|
case 'B':
|
|
case 'F':
|
|
case 'P':
|
|
case 'V':
|
|
if(*(rptr - 1) != '1')
|
|
*(rptr++) = '1';
|
|
break;
|
|
case 'C':
|
|
case 'G':
|
|
case 'J':
|
|
case 'K':
|
|
case 'Q':
|
|
case 'S':
|
|
case 'X':
|
|
case 'Z':
|
|
if(*(rptr - 1) != '2')
|
|
*(rptr++) = '2';
|
|
break;
|
|
case 'D':
|
|
case 'T':
|
|
if(*(rptr - 1) != '3')
|
|
*(rptr++) = '3';
|
|
break;
|
|
case 'L':
|
|
if(*(rptr - 1) != '4')
|
|
*(rptr++) = '4';
|
|
break;
|
|
case 'M':
|
|
case 'N':
|
|
if(*(rptr - 1) != '5')
|
|
*(rptr++) = '5';
|
|
break;
|
|
case 'R':
|
|
if(*(rptr -1) != '6')
|
|
*(rptr++) = '6';
|
|
default:
|
|
break;
|
|
}
|
|
}
|
|
|
|
/* Pad 0's on right side of string out to 6 characters.
|
|
*/
|
|
for(; rptr < result + 6; rptr++)
|
|
*rptr = '0';
|
|
|
|
/* Terminate the result string.
|
|
*/
|
|
*(result + 6) = '\0';
|
|
}
|
|
|
|
|
|
/* Return the actual soundex value. */
|
|
/* Added by Chris Herborth (chrish@qnx.com) */
|
|
static char soundex_get_soundex__doc__[] =
|
|
"Return the (English) Soundex hash value for a string.";
|
|
static PyObject *
|
|
get_soundex(PyObject *self, PyObject *args)
|
|
{
|
|
char *str;
|
|
char sdx[7];
|
|
|
|
if(!PyArg_ParseTuple( args, "s", &str))
|
|
return NULL;
|
|
|
|
soundex_hash(str, sdx);
|
|
|
|
return PyString_FromString(sdx);
|
|
}
|
|
|
|
static char soundex_sound_similar__doc__[] =
|
|
"Compare two strings to see if they sound similar (English).";
|
|
static PyObject *
|
|
sound_similar(PyObject *self, PyObject *args)
|
|
{
|
|
char *str1, *str2;
|
|
char res1[7], res2[7];
|
|
|
|
if(!PyArg_ParseTuple(args, "ss", &str1, &str2))
|
|
return NULL;
|
|
|
|
soundex_hash(str1, res1);
|
|
soundex_hash(str2, res2);
|
|
|
|
if(!strcmp(res1,res2))
|
|
return Py_BuildValue("i",1);
|
|
else
|
|
return Py_BuildValue("i",0);
|
|
}
|
|
|
|
/* Python Method Table.
|
|
*/
|
|
static PyMethodDef SoundexMethods[] =
|
|
{
|
|
{"sound_similar", sound_similar, 1, soundex_sound_similar__doc__},
|
|
{"get_soundex", get_soundex, 1, soundex_get_soundex__doc__},
|
|
|
|
{NULL, NULL } /* sentinel */
|
|
};
|
|
|
|
|
|
/* Register the method table.
|
|
*/
|
|
DL_EXPORT(void)
|
|
initsoundex()
|
|
{
|
|
(void) Py_InitModule4("soundex",
|
|
SoundexMethods,
|
|
soundex_module__doc__,
|
|
(PyObject *)NULL,
|
|
PYTHON_API_VERSION);
|
|
}
|