I've been trying to get some information on the mysteries of external functions and although I've seen quite a number of threads about this, I am still a little confused.
I've written the following routine.
Code: Select all
#define ISALPHA(c) ((c>='a'&&c<='z')||(c>='A'&&c<='Z'))
#define ISDIGIT(c) (c>='0'&&c<='9')
#define ISSPACE(c) (c==' '||c==9||c==10||c==11||c==13)
#define ISPUNCTUATION(c) (!((c>='a'&&c<='z')||(c>='A'&&c<='Z'))&&!(c>='0'&&c<='9')&&!(c==' '||c==9||c==10||c==11||c==13))
char* titleCase(char* s) {
const int size = 4000;
// allocate memory
char *buf = (char *)malloc(size);
assert(*buf != NULL);
// set some variables
char *ch = buf;
int endOfWord = 1; // set when ch is not alpha
int inWord = 0; // set when ch is not the first letter of a word
// copy input string to buffer
// and begin checking each character
strcpy(buf,s);
while (*ch) {
if (endOfWord) {
if (ISALPHA(*ch)) {
// found an alpha after end of word.
// must be a start of new word
inWord = 1;
endOfWord = 0;
*ch = toupper(*ch);
}
} else {
if (inWord) {
if (ISALPHA(*ch)) {
// if previous 2 characters were 'Mc' then
// this should be uppercase
// otherwise lowercase
if ((ch - buf) > 2) {
if (*(ch-2)=='M'&&*(ch-1)=='c') {
*ch = toupper(*ch);
} else {
*ch = tolower(*ch);
}
} else {
*ch = tolower(*ch);
}
} else {
// found a non-alpha
// must be the end of a word
inWord = 0;
endOfWord = 1;
}
}
}
// increment pointers
*ch++;
}
// NB no need to free because DataStage will do that for you (I hope)
return buf;
}
Code: Select all
int _tmain(int argc, _TCHAR* argv[])
{
char name[] = "mike o'brien, mcfee, mcdonald, this is a title, MR, MRS, MISS, mR, mRs, mIss";
printf("%s\n",name);
char *tCase = titleCase(name);
printf("%s\n",tCase);
free(tCase);
return 0;
}
Code: Select all
mike o'brien, mcfee, mcdonald, this is a title, MR, MRS, MISS, mR, mRs, mIss
Mike O'Brien, McFee, McDonald, This Is A Title, Mr, Mrs, Miss, Mr, Mrs, Miss
Press any key to continue . . .
Although I've read on this forum that DataStage will take care of freeing memory, the generated code does not show any evidence of this (as shown below). Has anyone got any definitive statements from IBM about this?
Code: Select all
//
// Generated file to implement the V0S1_Try_ExternalRoutine_Transformer_1 transform operator.
//
// define external functions used
extern int32 hasDigit(string inString);
extern string titleCase(string inString);
// define our input/output link names
inputname 0 DSLink9;
outputname 0 hasDigit;
outputname 1 clean;
initialize {
// define our row rejected variable
int8 RowRejected0;
// define our null set variable
int8 NullSetVar0;
// define and initialise each link row count variable required
uint64 RowCount0_1;
RowCount0_1 = 0;
// Stage variable declaration and initialisation
string StageVar0_svDigitFlag;
StageVar0_svDigitFlag = "";
}
mainloop {
// initialise our row rejected variable
RowRejected0 = 1;
// declare our intermediate variables for this section
int64 InterVar0_0;
string InterVar0_1;
// evaluate the stage variables first
StageVar0_svDigitFlag = hasDigit(DSLink9.nme_sur);
// evaluate constraint and columns for link: hasDigit
InterVar0_0 = StageVar0_svDigitFlag;
if (InterVar0_0)
{
InterVar0_1 = titleCase(DSLink9.emp_tle);
hasDigit.emp_tle = InterVar0_1;
InterVar0_1 = titleCase(DSLink9.nme_sur);
hasDigit.nme_sur = InterVar0_1;
writerecord 0;
RowRejected0 = 0;
}
// evaluate constraint and columns for link: clean
InterVar0_0 = RowRejected0;
if (InterVar0_0)
{
clean.emp_tle_1 = DSLink9.emp_tle;
InterVar0_1 = titleCase(DSLink9.emp_tle);
clean.emp_tle = InterVar0_1;
clean.nme_sur_1 = DSLink9.nme_sur;
InterVar0_1 = titleCase(DSLink9.nme_sur);
clean.nme_sur = InterVar0_1;
writerecord 1;
RowRejected0 = 0;
RowCount0_1 = RowCount0_1 + 1;
}
}
finish {
// Log warnings for any reject links
string LogMsg0;
string LogLink0;
if (RowCount0_1 > 0) {
LogMsg0 = RowCount0_1;
LogLink0 = " rows written to reject link: ";
LogMsg0 = LogMsg0 + LogLink0;
LogLink0 = "clean";
LogMsg0 = LogMsg0 + LogLink0;
print_message(LogMsg0);
}
}
When I use the function in a DataStage Transform stage, I get no compile errors but the output is unchanged as shown below.
Code: Select all
Original Modified
Ms Ms
Ms Ms
MRS MRS
Mrs Mrs
Mrs Mrs
Ms Ms