We write a DB procedure to profile data and write results into a table, then a Java program will write the data in the table into Excel.
For other profiling information,like data value, frequency count, percent, it is easy to produce in DB. For format and data type, there are two many varieties. We can just try to conclude the rules from IA column analysis reports we have produced.
This is the code for getting format and data type:
CREATE FUNCTION "F_GET_TYPE_FORMAT" (
"P_GET_WHAT" VARCHAR(10),
"P_INSTR" VARCHAR(100) )
RETURNS VARCHAR(100)
BEGIN ATOMIC
DECLARE v_format VARCHAR(100) DEFAULT '' ;
DECLARE v_type VARCHAR(10) DEFAULT '' ;
DECLARE v_pos SMALLINT ;
DECLARE v_cur_char CHAR(1) ;
DECLARE v_len SMALLINT ;
IF p_instr is null
THEN
SET v_format='NA';
SET v_type='NA';
ELSEIF p_instr=''
THEN
SET v_format='NA';
SET v_type='STRING';
ELSE
SET v_len = LENGTH(TRIM(p_instr));
SET v_pos = 1;
WHILE v_pos <= v_len DO
SET v_cur_char = SUBSTR(p_instr,v_pos,1);
IF v_cur_char IN ('A','B','C','D','E','F','G','H','I','J','K','L','M','N','O','P','Q','R','S','T','U','V','W','X','Y','Z')
THEN SET v_format=v_format||'A';
ELSEIF v_cur_char IN ('a','b','c','d','e','f','g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v','w','x','y','z')
THEN SET v_format=v_format||'a';
ELSEIF v_cur_char IN ('0','1','2','3','4','5','6','7','8','9')
THEN SET v_format=v_format||'9';
ELSE
SET v_format=v_format||v_cur_char;
END IF;
SET v_pos = v_pos + 1;
END WHILE;
SET v_type='INT';
SET v_len = LENGTH(v_format);
SET v_pos = 1;
WHIL:
WHILE v_pos <= v_len DO
SET v_cur_char = SUBSTR(v_format,v_pos,1);
IF v_cur_char <> '9'
THEN
SET v_type='STRING';
LEAVE WHIL;
END IF;
SET v_pos = v_pos + 1;
END WHILE;
IF v_type='INT' and BIGINT(p_instr)<128
THEN SET v_type='INT8';
ELSEIF v_type='INT' and BIGINT(p_instr)<32768
THEN SET v_type='INT16';
ELSEIF v_type='INT' and BIGINT(p_instr)<2147483648
THEN SET v_type='INT32';
ELSEIF v_type='INT' and BIGINT(p_instr)>2147483647
THEN SET v_type='INT64';
END IF;
END IF;
IF P_GET_WHAT='T'
THEN RETURN v_type;
ELSE
RETURN v_format;
END IF;
END;
ray.wurlod wrote:OK, now let's see you profile compliance with data rules "with one click in Excel". ...