1 -- Created By ViCarePlus, Visolve (vicareplus_engg@visolve.com)
2 ---------------------------------------------------------------
3 ---------------------------------------------------------------
5 -- Procedure for de-identification
6 -- Procedure added to accomplish HIPAA De-identification
8 drop procedure if exists `de_identification`;
12 CREATE PROCEDURE `de_identification`()
14 #Run the de-identification process.
15 DECLARE unknown_table_name INT DEFAULT 0;
16 DECLARE unknown_col_name INT DEFAULT 0;
17 DECLARE unknown_prepare_stmt INT DEFAULT 0;
18 DECLARE table_already_exists INT DEFAULT 0;
19 DECLARE CONTINUE HANDLER FOR 1146 SET unknown_table_name = 1;
20 DECLARE CONTINUE HANDLER FOR 1054 SET unknown_col_name = 1;
21 DECLARE CONTINUE HANDLER FOR 1243 SET unknown_prepare_stmt = 1;
22 DECLARE CONTINUE HANDLER FOR 1050 SET table_already_exists = 1;
24 #Create the transaction_metadata_de_identification table, which contains the tables/columns to include in the report, and whether the table/column needs to be de-identified or not.
25 call load_transaction_metadata_de_identification_table();
27 #Create an empty de_identified_data table, which will contain the complete,de-identified data once this process is finished.
28 call create_de_identified_data_table();
30 #Filter the patients to include in the report, based on the drugs,immunizations, and diagnosis selected.
33 #For each patient, and table/column name to include in the report,select the data from the appropriate tables, and insert into the de_identified_data table. Skip any tables/columns containing identifiers (names, telephone, etc).
34 call perform_de_identification();
36 #Handle error conditions
37 IF table_already_exists = 1 THEN
38 insert into de_identification_error_log values("de-identification",CURRENT_TIMESTAMP(), "when create table, table already exists");
39 update de_identification_status set status = 3;
41 IF unknown_prepare_stmt = 1 THEN
42 insert into de_identification_error_log values("de-identification",CURRENT_TIMESTAMP(), "Unkown prepare statement");
43 update de_identification_status set status = 3;
45 IF unknown_col_name = 1 THEN
46 insert into de_identification_error_log values("de-identification",CURRENT_TIMESTAMP(), "Unkown column name");
47 update de_identification_status set status = 3;
49 IF unknown_table_name = 1 THEN
50 insert into de_identification_error_log values("de-identification",CURRENT_TIMESTAMP(), "Unkown table name");
51 update de_identification_status set status = 3;
54 #If no error set status as De-identification process completed
55 update de_identification_status set status = 2 where status != 3;
57 #Drop empty columns in the final De-identified data
58 call drop_no_value_column();
60 #Drop transaction table created from De-identification process
61 call drop_transaction_tables();
65 -- --------------------------------------------------------
67 -- Procedure to create transaction tables
68 -- Procedure added to accomplish HIPAA De-identification
70 drop procedure if exists `create_transaction_tables`;
74 CREATE PROCEDURE `create_transaction_tables`()
76 #Create transaction tables needed for de_identification process
78 #transaction_metadata_de_identification : Tells which tables/columns need to be de-identified
79 #temp_patient_id : The list of patients to include in this report.
80 #temp_re_identification : Contains a re-identification code for each patient.
81 #temp_patient_record_id : A temporary table, contains the primary id of the record corresponding to a patient.
82 #param_include_tables : Contains the tables/columns to include in this report.
83 #param_filter_pid : Contains the drugs/immunizations/diagnosis for filtering which patients to include
85 DROP TABLE IF EXISTS transaction_metadata_de_identification;
86 CREATE TABLE transaction_metadata_de_identification (table_name varchar(255) NOT NULL,col_name varchar(255) NOT NULL, load_to_lexical_table tinyint(1) NOT NULL,include_in_de_identification int(2) NOT NULL,include_in_re_identification tinyint(1) NOT NULL);
87 DROP TABLE IF EXISTS temp_patient_id_table;
88 create table temp_patient_id_table (pid varchar(10));
89 DROP TABLE IF EXISTS temp_re_identification_code_table;
90 create table temp_re_identification_code_table (re_identification_code varchar(50));
91 DROP TABLE IF EXISTS temp_patient_record_id;
92 create table temp_patient_record_id(number int auto_increment, id int not null, key(number));
93 DROP TABLE IF EXISTS param_include_tables;
94 create table param_include_tables(value varchar(500),include_unstructured boolean);
95 DROP TABLE IF EXISTS param_filter_pid;
96 create table param_filter_pid(begin_date date, end_date date, diagnosis_text varchar(500), drug_text varchar(500), immunization_text varchar(500));
100 -- --------------------------------------------------------
102 -- Procedure to load data to lexical look up table
103 -- Procedure added to accomplish HIPAA De-identification
105 drop procedure if exists `load_lexical_look_up_table`;
109 CREATE PROCEDURE `load_lexical_look_up_table`()
111 #Populate lexical look up table with 18 unique identifiers specified by HIPAA as identifying data from openemr database
113 #The lexical_look_up_table is used to store the text of known patient identifiers, such as patient names (John Smith), telephone numbers (408-111-222), etc. Later on, during the identification process, these text snippets will be removed from unstructured data, such as patient notes.
115 DECLARE tableName VARCHAR(255) ;
116 DECLARE colName VARCHAR(255) ;
117 DECLARE done INT DEFAULT 0;
118 declare out_status varchar(20);
119 DECLARE cur1 CURSOR FOR SELECT table_name,col_name FROM metadata_de_identification where load_to_lexical_table = 1;
120 DECLARE CONTINUE HANDLER FOR NOT FOUND SET done = 1;
121 DECLARE CONTINUE HANDLER FOR 1062 SET out_status='Duplicate Entry';
123 FETCH cur1 INTO tableName, colName;
125 SET @v = CONCAT("insert into lexical_look_up_table (lex_text) select ",colName," from ", tableName);
126 PREPARE stmt1 FROM @v;
128 FETCH cur1 INTO tableName, colName;
131 update lexical_look_up_table set lex_text = LOWER(lex_text);
132 delete from lexical_look_up_table where char_length(lex_text) <= 1;
136 -- --------------------------------------------------------
138 -- Procedure to load data for transaction metadata de-identification
139 -- Procedure added to accomplish HIPAA De-identification
141 drop procedure if exists `load_transaction_metadata_de_identification_table`;
145 CREATE PROCEDURE `load_transaction_metadata_de_identification_table`()
148 #The param_include_tables contains the tables/columns that will be used in this report.
149 #The metadata_de_identification table tells which tables/columns need to be de-identified.
150 #Populate the transaction_metadata_de_identification table with the same information as the metadata_de_identification table, except only include the tables/columns that are included in this data report.
152 #Include_tables contains string of table names separated by '#', like "history_data#prescriptions#"
153 #Loop through each table name by getting the substring delimited by '#'.
155 declare subString varchar(255);
156 declare include_tables varchar(500);
157 declare includeUnstructured int;
158 select value into include_tables from param_include_tables;
159 select include_unstructured into includeUnstructured from param_include_tables;
160 delete from transaction_metadata_de_identification;
161 #In parameter individual values are separated by '#'
162 SET include_tables = LTRIM(include_tables);
163 SET include_tables = RTRIM(include_tables);
164 IF include_tables = "all" THEN
165 insert into transaction_metadata_de_identification (table_name,col_name,include_in_de_identification) select table_name, col_name, include_in_de_identification from metadata_de_identification where table_name = "patient_data" || table_name = "history_data" || table_name = "lists" || table_name = "immunizations" || table_name = "prescriptions" || table_name = "transactions" || table_name = "insurance_data" || table_name = "billing" || table_name = "payments";
167 SET noWords=LENGTH(include_tables) - LENGTH(REPLACE(include_tables, '#', '')) + 1;
168 SET include_tables = CONCAT(include_tables,'#');
169 insert into transaction_metadata_de_identification (table_name,col_name,include_in_de_identification) select table_name, col_name, include_in_de_identification from metadata_de_identification where table_name = "patient_data";
171 #Obtain individual value from the parameter
172 SET subString = SUBSTRING_INDEX( SUBSTRING_INDEX( include_tables, '#', noWords), '#', -1 );
173 SET subString = LTRIM(subString);
174 SET subString = RTRIM(subString);
175 insert into transaction_metadata_de_identification (table_name,col_name,include_in_de_identification) select table_name, col_name, include_in_de_identification from metadata_de_identification where table_name = subString;
176 set noWords = noWords -1;
179 IF includeUnstructured = 0 THEN
180 update transaction_metadata_de_identification set include_in_de_identification = 0 where include_in_de_identification = 4;
183 #Create a lexical_look_up_table, which contains text that should be removed from unstructured text data.
185 call load_lexical_look_up_table();
190 -- --------------------------------------------------------
192 -- Procedure to create de-identified data table
193 -- Procedure added to accomplish HIPAA De-identification
195 drop procedure if exists `create_de_identified_data_table`;
199 CREATE PROCEDURE `create_de_identified_data_table`()
202 #This creates a table (de_identified_data) containing all the patient data to be included in the report. Each table/column that is included in this report (such as history_data/tobacco) will have a corresponding column in the de_identified_data.
203 #In addition, the de_identified_data table will have columns number, sub_number which contain the primary id of the table/column row where this data was read from.
205 DECLARE colName VARCHAR(255) ;
206 DECLARE newColName VARCHAR(255) ;
207 DECLARE tableName VARCHAR(255) ;
208 DECLARE done INT DEFAULT 0;
209 DECLARE duplicateColumn INT DEFAULT 0;
210 DECLARE cur1 CURSOR FOR SELECT col_name,table_name FROM transaction_metadata_de_identification where include_in_de_identification != 0;
211 DECLARE CONTINUE HANDLER FOR NOT FOUND SET done = 1;
212 DECLARE CONTINUE HANDLER FOR 1060 SET duplicateColumn = 1;
213 drop table IF EXISTS de_identified_data;
214 create table de_identified_data (number INT, sub_number INT NOT NULL,re_identification_code varchar(255) NOT NULL);
216 FETCH cur1 INTO colName,tableName;
219 SET @v = CONCAT("alter table de_identified_data add column `", colName, "` text not null");
220 PREPARE stmt1 FROM @v;
222 #add immunization name to de-identified data, if immunization data is included in report
224 IF tableName = "immunizations" and colName = "immunization_id" THEN
226 alter table de_identified_data add column immunization_name text not null;
229 #For duplicate column name append table name with the col name
230 IF(duplicateColumn) THEN
231 SET newColName = CONCAT(tableName,":",colName);
232 SET @v = CONCAT("alter table de_identified_data add column `", newColName, "` text not null");
234 PREPARE stmt1 FROM @v;
236 SET duplicateColumn = 0;
237 update transaction_metadata_de_identification set col_name = newColName where col_name = colName and table_name = tableName;
239 FETCH cur1 INTO colName,tableName;
247 -- --------------------------------------------------------
249 -- Procedure to filter pid for de-identification process
250 -- Procedure added to accomplish HIPAA De-identification
252 drop procedure if exists `filter_pid`;
256 CREATE PROCEDURE `filter_pid`()
258 #Retrieve a list of patient ids that satisfy the selections picked in the de-identification Input screen.
259 # The table param_filter_pid contains the parameters (start/end date, diagnosis, drugs, immunizations)
260 #for filter out which patients to select. Store the selected patient ids in the temp_patient_id_table
261 declare startDate varchar(30);
262 declare endDate varchar(30);
263 declare diagnosis_list varchar(1000);
264 declare drug_list varchar(1000);
265 declare immunization_list varchar(1000);
267 declare subString varchar(255);
268 select begin_date into startDate from param_filter_pid;
269 select end_date into endDate from param_filter_pid;
270 select diagnosis_text into diagnosis_list from param_filter_pid;
271 select drug_text into drug_list from param_filter_pid;
272 select immunization_text into immunization_list from param_filter_pid;
273 drop table IF EXISTS t1;
274 create table t1 (pid int);
275 delete from temp_patient_id_table;
276 insert into temp_patient_id_table (pid) select pid from patient_data;
277 #In parameter individual values are separated by '#'
278 SET diagnosis_list = LTRIM(diagnosis_list);
279 SET diagnosis_list = RTRIM(diagnosis_list);
280 IF (diagnosis_list != "all") then
281 SET diagnosis_list = CONCAT(diagnosis_list,'#');
282 SET noWords=LENGTH(diagnosis_list) - LENGTH(REPLACE(diagnosis_list, '#', '')) + 1 ;
283 WHILE( noWords != 0) do
284 #Obtain individual value from the parameter
285 SET subString = SUBSTRING_INDEX( SUBSTRING_INDEX( diagnosis_list, '#', noWords), '#', -1 );
286 SET subString = LTRIM(subString);
287 SET subString = RTRIM(subString);
288 SET subString = SUBSTRING_INDEX(subString, '-', 1);
289 insert into t1 (pid) select pid from lists where diagnosis = subString and begdate >= startDate and begdate<= endDate;
290 set noWords = noWords -1;
293 insert into t1 (pid) select pid from lists where begdate >= startDate and begdate<= endDate;
295 DELETE FROM temp_patient_id_table where pid NOT IN (SELECT pid FROM t1);
297 SET drug_list = LTRIM(drug_list);
298 SET drug_list = RTRIM(drug_list);
299 IF (drug_list != "all") then
300 SET drug_list = CONCAT(drug_list,'#');
301 SET noWords=LENGTH(drug_list) - LENGTH(REPLACE(drug_list, '#', '')) + 1;
302 WHILE( noWords >= 0) do
303 SET subString = SUBSTRING_INDEX( SUBSTRING_INDEX( drug_list, '#', noWords), '#', -1 );
304 SET subString = LTRIM(subString);
305 SET subString = RTRIM(subString);
306 SET subString = SUBSTRING_INDEX(subString, '-', -1);
307 insert into t1 (pid) select patient_id from prescriptions where drug = subString and start_date >= startDate and start_date <= endDate;
308 insert into t1 (pid) select pid from lists where type = "medication" and title = subString and begdate >= startDate and begdate <= endDate;
309 set noWords = noWords -1;
312 insert into t1 (pid) select patient_id from prescriptions where start_date >= startDate and start_date <= endDate;
313 insert into t1 (pid) select pid from lists where type = "medication" and begdate >= startDate and begdate <= endDate;
315 DELETE FROM temp_patient_id_table where pid NOT IN (SELECT pid FROM t1);
317 SET drug_list = LTRIM(immunization_list);
318 SET drug_list = RTRIM(immunization_list);
319 IF (immunization_list != "all") then
320 SET immunization_list = CONCAT(immunization_list,'#');
321 SET noWords=LENGTH(immunization_list) - LENGTH(REPLACE(immunization_list, '#', '')) + 1;
322 WHILE( noWords >= 0) do
323 SET subString = SUBSTRING_INDEX( SUBSTRING_INDEX( immunization_list, '#', noWords), '#', -1 );
324 SET subString = LTRIM(subString);
325 SET subString = RTRIM(subString);
326 SET subString = SUBSTRING_INDEX(subString, '-', 1);
327 insert into t1 (pid) select patient_id from immunizations where immunization_id = subString and administered_date >= startDate and administered_date <= endDate;
328 set noWords = noWords -1;
331 insert into t1 (pid) select patient_id from immunizations where administered_date >= startDate and administered_date <= endDate;
333 DELETE FROM temp_patient_id_table where pid NOT IN (SELECT pid FROM t1);
339 -- --------------------------------------------------------
341 -- Procedure to drop no value column
342 -- Procedure added to accomplish HIPAA De-identification
344 drop procedure if exists `drop_no_value_column`;
348 CREATE PROCEDURE `drop_no_value_column`()
350 #In table de_identified_data, remove any empty columns (columns that contain an empty value, for every patient).
351 DECLARE done INT DEFAULT 0;
352 DECLARE val int default 0;
353 declare colName VARCHAR(255) ;
354 DECLARE metadate_cursor CURSOR FOR SELECT col_name FROM transaction_metadata_de_identification where include_in_de_identification != 0 ;
355 DECLARE CONTINUE HANDLER FOR NOT FOUND SET done = 1;
356 OPEN metadate_cursor;
357 FETCH metadate_cursor INTO colName;
359 SET @v = CONCAT("select count(`", colName ,"`) INTO @val from de_identified_data where `", colName ,"` != ' '");
360 PREPARE stmt1 FROM @v;
363 SET @v = CONCAT("alter table de_identified_data drop column `", colName ,"`");
364 PREPARE stmt1 FROM @v;
366 DELETE FROM transaction_metadata_de_identification where col_name = colName;
369 FETCH metadate_cursor INTO colName;
371 close metadate_cursor;
375 -- --------------------------------------------------------
377 -- Procedure to check match for regular expression
378 -- Procedure added to accomplish HIPAA De-identification
380 drop function if exists `match_regular_expression`;
384 CREATE FUNCTION `match_regular_expression`(unstructuredData varchar(255)) RETURNS varchar(255)
386 #Given some unstructured data (like patient notes), replace any urls, dates, or names in the data with 'xxx'. Then return the modified data.
387 DECLARE newString varchar(255);
388 DECLARE subString varchar(30);
390 DECLARE count INT DEFAULT 1;
392 SET unstructuredData = CONCAT(unstructuredData,' ');
393 SET noWords=LENGTH(unstructuredData) - LENGTH(REPLACE(unstructuredData, ' ', '')) ;
394 WHILE( noWords >= count) do
396 SET subString = SUBSTRING_INDEX( SUBSTRING_INDEX( unstructuredData, ' ', count), ' ', -1 );
398 IF ( LOCATE("www.", subString) || LOCATE(".com", subString) || LOCATE("http", subString) || LOCATE(".co", subString) || LOCATE(".in", subString) )THEN
399 SET subString = "xxx";
400 #Check for date (yyyy/mm/dd or dd-mm-yyyy)
401 ELSEIF (SELECT subString REGEXP "([0-9]{4})[-|/|.|\]([0-9]{1,2})[-|/|.|\]([0-9]{1,2})")THEN SET subString = LEFT(subString,4);
402 ELSEIF (SELECT subString REGEXP "([0-9]{1,2})[-|/|.|\]([0-9]{1,2})[-|/|.|\]([0-9]{4})")THEN SET subString = RIGHT(subString,4);
403 ELSEIF (LOCATE("mr.", subString) || LOCATE("mrs.", subString) || LOCATE("ms.", subString)|| LOCATE("dr.", subString) )THEN
404 SET subString = "xxx";
406 SET newString = CONCAT(newString, subString, " ");
407 SET count = count + 1;
409 SET newString = LTRIM(newString);
410 SET newString = RTRIM(newString);
411 #Return updated string
416 -- --------------------------------------------------------
418 -- Procedure to perform de-identification
419 -- Procedure added to accomplish HIPAA De-identification
421 drop procedure if exists `perform_de_identification`;
427 CREATE PROCEDURE `perform_de_identification`()
431 #When this prodecure starts:
433 #The temp_patient_id_table contains the list of patients to gather data for.
435 #The de_identified_data table contains the table/column names to gather data for
437 #transaction_metadata_de_identification which tells whether the table/column needs to be de-identified or not.
439 DECLARE lexText VARCHAR(255) ;
441 DECLARE unstructuredData VARCHAR(255) ;
443 DECLARE colName VARCHAR(255) ;
445 DECLARE originalColName VARCHAR(255) ;
447 DECLARE tableName VARCHAR(255) ;
449 DECLARE includeInDeIdentification INT ;
451 DECLARE recordNumber INT DEFAULT 0;
453 DECLARE patientId INT;
455 DECLARE charPosition INT;
457 DECLARE recordCount INT;
459 DECLARE recordId INT;
461 DECLARE insertFlag INT DEFAULT 0;
463 DECLARE columnFlag INT DEFAULT 0;
465 DECLARE done INT DEFAULT 0;
467 DECLARE unknownColumn INT DEFAULT 0;
469 DECLARE patient_id_cursor CURSOR FOR SELECT pid from temp_patient_id_table;
471 DECLARE metadate_cursor CURSOR FOR SELECT table_name,col_name,include_in_de_identification FROM transaction_metadata_de_identification where include_in_de_identification != 0 ;
473 DECLARE lexical_cursor CURSOR FOR select lex_text from lexical_look_up_table;
475 DECLARE CONTINUE HANDLER FOR NOT FOUND SET done = 1;
477 OPEN patient_id_cursor;
479 FETCH patient_id_cursor into patientId;
485 #If the patient id has no re-identification code associated with it, then create a new re-identification code using UUID() and assign it to the patient id (store it in re_identification_code_data table)
487 IF(!( select count(*) from re_identification_code_data where pid = patientId)) THEN
489 insert into re_identification_code_data values (patientId, uuid());
493 FETCH patient_id_cursor into patientId;
497 close patient_id_cursor;
501 delete from de_identified_data;
503 #first row/record of de-identified data table will be the column name (display purpose)
505 insert into de_identified_data (number,sub_number,re_identification_code) values ("number","sub_number","re_identification_code");
509 OPEN patient_id_cursor;
513 FETCH patient_id_cursor INTO patientId;
517 set recordNumber = recordNumber + 1;
519 OPEN metadate_cursor;
521 FETCH metadate_cursor INTO tableName, colName, includeInDeIdentification;
529 #Handle case when table name is appened with the column name eg:history_data:date
531 set charPosition = locate(":",colName);
533 if charPosition && tableName = substring(colName,1,charPosition-1) then
534 set @z = CONCAT("update de_identified_data set `", colName ,"` = '", colName, "' where number = 0 ");
536 set originalColName = colName;
538 set colName = substring(colName,charPosition+1);
541 if (tableName = 'lists' ) then
542 set @z = CONCAT("update de_identified_data set `", originalColName ,"` = 'issues:", colName, "' where number = 0 ");
545 if (tableName = 'lists' ) then
546 set @z = CONCAT("update de_identified_data set `", colName ,"` = 'issues:", colName, "' where number = 0 ");
548 set @z = CONCAT("update de_identified_data set `", colName ,"` = '", tableName, ":", colName, "' where number = 0 ");
551 PREPARE stmt2 FROM @z;
556 TRUNCATE temp_patient_record_id;
558 if (tableName = 'prescriptions' || tableName = 'immunizations') then
560 SET @v = CONCAT("insert into temp_patient_record_id (id) select id from ", tableName," where patient_id = ", patientId);
562 else SET @v = CONCAT("insert into temp_patient_record_id (id) select id from ", tableName," where pid = ", patientId);
566 PREPARE stmt1 FROM @v;
572 SELECT count(*) FROM temp_patient_record_id into recordCount;
576 while recordCount != 0 do
580 select count(*) from de_identified_data where number = recordNumber and sub_number = recordCount AND re_identification_code = (select re_identification_code from re_identification_code_data where pid = patientId) into insertFlag;
586 if insertFlag = 0 then
588 insert into de_identified_data (number,sub_number,re_identification_code) values (recordNumber,recordCount, (select re_identification_code from re_identification_code_data where pid = patientId));
596 SELECT id FROM temp_patient_record_id where number = recordCount into recordId;
600 #Case 4 :unstructured data(eg:patient notes) perform lexical analysis - replace any identifying text (name, telephone, etc) with xxx
602 IF includeInDeIdentification = 4 then
604 SET @v = CONCAT("select ", colName, " into @unstructuredData from ", tableName, " where id = ",recordId);
606 PREPARE stmt1 FROM @v;
610 SET @unstructuredData = LOWER(@unstructuredData);
616 FETCH lexical_cursor INTO lexText;
622 SET @unstructuredData = REPLACE (@unstructuredData, lexText, "xxx");
624 FETCH lexical_cursor INTO lexText;
628 CLOSE lexical_cursor;
632 set @unstructuredData = match_regular_expression(@unstructuredData);
634 IF columnFlag = 0 THEN
636 SET @v = CONCAT("update de_identified_data set `", colName, "` = '", @unstructuredData,"' where sub_number = ",recordCount, " and number = ", recordNumber );
640 SET @v = CONCAT("update de_identified_data set `", originalColName, "` = '", @unstructuredData,"' where sub_number = ",recordCount, " and number = ", recordNumber );
644 #Case 2:date feild , provide only year part
646 ELSEIF includeInDeIdentification = 2 then
648 IF columnFlag = 0 THEN
650 SET @v = CONCAT("update de_identified_data set `", colName, "` = ( select LEFT ( (select ",colName," from ", tableName, " where id = ",recordId," ), 4)) where sub_number = ",recordCount, " and number = ", recordNumber );
654 SET @v = CONCAT("update de_identified_data set `", originalColName, "` = ( select LEFT ( (select ",colName," from ", tableName, " where id = ",recordId," ), 4)) where sub_number = ",recordCount, " and number = ", recordNumber );
658 #Case 3:zip code, provide only first 3 digits
660 ELSEIF includeInDeIdentification = 3 then
662 IF columnFlag = 0 THEN
664 SET @v = CONCAT("update de_identified_data set `", colName, "` = ( select LEFT ( (select ",colName," from ", tableName, " where id = ",recordId," ), 3)) where sub_number = ",recordCount, " and number = ", recordNumber );
668 SET @v = CONCAT("update de_identified_data set `", originalColName, "` = ( select LEFT ( (select ",colName," from ", tableName, " where id = ",recordId," ), 3)) where sub_number = ",recordCount, " and number = ", recordNumber );
674 IF columnFlag = 0 THEN
676 SET @v = CONCAT("update de_identified_data set `", colName, "` = ( select ",colName," from ", tableName, " where id = ",recordId," ) where sub_number = ",recordCount, " and number = ", recordNumber );
680 SET @v = CONCAT("update de_identified_data set `", originalColName, "` = ( select ",colName," from ", tableName, " where id = ",recordId," ) where sub_number = ",recordCount, " and number = ", recordNumber );
686 PREPARE stmt1 FROM @v;
690 #add immunization name to de-identified data, if immunization data is included in report
692 IF tableName = "immunizations" and colName = "immunization_id" THEN
693 update de_identified_data set immunization_name = "immunization:immunization_name" where number = 0;
695 SET @v = CONCAT("select immunization_id into @immunizationId from immunizations where id = ", recordId );
697 PREPARE stmt1 FROM @v;
704 SET @z = CONCAT("update de_identified_data set immunization_name = ( select title from list_options where list_id = 'immunizations' and option_id = ",@immunizationId," ) where sub_number = ",recordCount, " and number = ", recordNumber );
706 PREPARE stmt2 FROM @z;
712 set recordCount = recordCount - 1;
716 FETCH metadate_cursor INTO tableName, colName, includeInDeIdentification;
720 CLOSE metadate_cursor;
724 FETCH patient_id_cursor INTO patientId;
728 CLOSE patient_id_cursor;
730 # Note that a single patient can have multiple row entries in the de_identified_data.
732 # That is because a single patient can have multiple entries for prescriptions, immunizations, etc.
738 -- --------------------------------------------------------
739 -- Procedure to drop transaction tables
740 -- --------------------------------------------------------
741 -- Procedure added to accomplish HIPAA De-identification
743 drop procedure if exists `drop_transaction_tables`;
747 CREATE PROCEDURE `drop_transaction_tables`()
749 #After de-identification process is completed drop transaction tables
750 DROP TABLE IF EXISTS transaction_metadata_de_identification;
751 DROP TABLE IF EXISTS temp_patient_id_table;
752 DROP TABLE IF EXISTS temp_re_identification_code_table;
753 DROP TABLE IF EXISTS temp_patient_record_id;
754 DROP TABLE IF EXISTS param_filter_pid;
756 DROP TABLE IF EXISTS param_filter_pid;
759 -- --------------------------------------------------------
761 -- Procedure for re-identification
762 -- Procedure added to accomplish HIPAA De-identification
764 drop procedure if exists `re_identification`;
768 CREATE PROCEDURE `re_identification`()
770 DECLARE unknown_table_name INT DEFAULT 0;
771 DECLARE unknown_col_name INT DEFAULT 0;
772 DECLARE unknown_prepare_stmt INT DEFAULT 0;
773 DECLARE table_already_exists INT DEFAULT 0;
774 DECLARE CONTINUE HANDLER FOR 1146 SET unknown_table_name = 1;
775 DECLARE CONTINUE HANDLER FOR 1054 SET unknown_col_name = 1;
776 DECLARE CONTINUE HANDLER FOR 1243 SET unknown_prepare_stmt = 1;
777 DECLARE CONTINUE HANDLER FOR 1050 SET table_already_exists = 1;
778 call create_re_identified_data_table();
779 call perform_re_identification();
780 #Set re-identification status as completed
781 update re_identification_status set status = 2;
782 #Handle error conditions
783 IF table_already_exists = 1 THEN
784 insert into de_identification_error_log values("re-identification",CURRENT_TIMESTAMP(), "when create table, table already exists");
786 IF unknown_prepare_stmt = 1 THEN
787 insert into de_identification_error_log values("re-identification",CURRENT_TIMESTAMP(), "Unkown prepare statement");
789 IF unknown_col_name = 1 THEN
790 insert into de_identification_error_log values("re-identification",CURRENT_TIMESTAMP(), "Unkown column name");
792 IF unknown_table_name = 1 THEN
793 insert into de_identification_error_log values("re-identification",CURRENT_TIMESTAMP(), "Unkown table name");
795 update re_identification_status set status = 2;
800 -- --------------------------------------------------------
802 -- Procedure to create re-identified data table
803 -- Procedure added to accomplish HIPAA De-identification
805 drop procedure if exists `create_re_identified_data_table`;
809 CREATE PROCEDURE `create_re_identified_data_table`()
811 #Create re-identified data table for the particular iteration of the re-identification process
812 DECLARE colName VARCHAR(255) ;
813 DECLARE done INT DEFAULT 0;
814 DECLARE metadata_cursor CURSOR FOR SELECT col_name FROM metadata_de_identification where include_in_re_identification = 1;
815 DECLARE CONTINUE HANDLER FOR NOT FOUND SET done = 1;
816 drop table IF EXISTS re_identified_data;
817 create table re_identified_data (number varchar(255), pid varchar(255), re_identification_code varchar(255) NOT NULL);
818 OPEN metadata_cursor;
819 FETCH metadata_cursor INTO colName;
822 SET @v = CONCAT("alter table re_identified_data add column ", colName, " varchar(255) not null");
823 PREPARE stmt1 FROM @v;
825 FETCH metadata_cursor INTO colName;
828 CLOSE metadata_cursor;
832 -- --------------------------------------------------------
834 -- Procedure to perform re-identification
835 -- Procedure added to accomplish HIPAA De-identification
837 drop procedure if exists `perform_re_identification`;
841 CREATE PROCEDURE `perform_re_identification`()
843 #When this prodecure starts:
844 #The temp_re_identification_code_table contains the list of re-identification codes to gather data for.
845 #The re_identified_data table contains the table/column names to gather data for
846 #metadata_de_identification which tells whether the table/column needs to be de-identified or not.
847 DECLARE colName VARCHAR(255) ;
848 DECLARE tableName VARCHAR(255) ;
849 DECLARE patientId INT;
850 DECLARE recordNumber INT DEFAULT 0;
851 DECLARE reIdentificationCode varchar(50);
852 DECLARE done INT DEFAULT 0;
853 DECLARE unknownColumn INT DEFAULT 0;
854 DECLARE found_re_id_code INT DEFAULT 0;
855 DECLARE re_identification_code_cursor CURSOR FOR select re_identification_code from temp_re_identification_code_table;
856 DECLARE metadata_cursor CURSOR FOR SELECT col_name,table_name FROM metadata_de_identification where include_in_re_identification = 1 ;
857 DECLARE CONTINUE HANDLER FOR NOT FOUND SET done = 1;
858 delete from re_identified_data;
860 insert into re_identified_data (number) values ("record number");
861 update re_identified_data set pid = "patient id" where number = 0;
862 update re_identified_data set re_identification_code = "re_identification_code" where number = 0;
863 OPEN re_identification_code_cursor;
864 FETCH re_identification_code_cursor INTO reIdentificationCode;
866 select count(*) from re_identification_code_data where re_identification_code = reIdentificationCode INTO found_re_id_code;
868 if (found_re_id_code) then
869 #If input code matchs with re-identification code in database obtain re-identifiying data for the input code
870 set recordNumber = recordNumber + 1;
871 insert into re_identified_data (number) values (recordNumber);
872 select pid from re_identification_code_data where re_identification_code = reIdentificationCode INTO patientId;
873 update re_identified_data set pid = patientId where number = recordNumber;
874 update re_identified_data set re_identification_code = reIdentificationCode where number = recordNumber;
875 OPEN metadata_cursor;
876 FETCH metadata_cursor INTO colName, tableName;
879 SET @v = CONCAT("update re_identified_data set ", colName, " = ( select ",colName," from ", tableName, " where pid = ",patientId," ) where number = ",recordNumber );
880 PREPARE stmt1 FROM @v;
884 set @z = CONCAT("update re_identified_data set `", colName ,"` = '", tableName, ":", colName, "' where number = 0 ");
885 PREPARE stmt2 FROM @z;
888 FETCH metadata_cursor INTO colName, tableName;
890 CLOSE metadata_cursor;
893 FETCH re_identification_code_cursor INTO reIdentificationCode;
895 CLOSE re_identification_code_cursor;
900 -- --------------------------------------------------------
902 -- Table structure for table `metadata for de-identification`
903 -- Table added to accomplish HIPAA De-identification
905 #IfNotTable metadata_de_identification
906 CREATE TABLE `metadata_de_identification` (
907 `table_name` varchar(255) NOT NULL,
908 `col_name` varchar(255) NOT NULL,
909 `load_to_lexical_table` tinyint(1) NOT NULL,
910 -- load_to_lexical_table can be
911 -- 0 do not include in lexical look up table
912 -- 1 include in lexical look up table
913 `include_in_de_identification` int(2) NOT NULL,
914 -- include_in_de_identification can be
915 -- 0 do not include in de-identification
916 -- 1 include in de-identification
917 -- 2 date feild - include only year part
918 -- 3 zip code - include only first 3 digits
919 -- 4 unstructured data - perform lexical analysis
920 `include_in_re_identification` tinyint(1) NOT NULL
921 -- include_in_re_identification can be
922 -- 0 do not include in re-identification
923 -- 1 include in re-identification
928 -- --------------------------------------------------------
930 -- Table structure for table `lexical look up table`
931 -- Table added to accomplish HIPAA De-identification
933 #IfNotTable lexical_look_up_table
934 CREATE TABLE `lexical_look_up_table` (
935 `id` int(11) NOT NULL auto_increment,
936 `lex_text` varchar(255) NOT NULL,
941 -- --------------------------------------------------------
943 -- Table structure for table `re_identification_code_data`
944 -- Table added to accomplish HIPAA De-identification
946 #IfNotTable re_identification_code_data
947 CREATE TABLE `re_identification_code_data` (
948 `pid` bigint(20) NOT NULL,
949 `re_identification_code` varchar(50) NOT NULL
953 -- --------------------------------------------------------
955 -- Table structure for table `de_identification_status`insert into re_identification_code_data values (patientId, uuid());
956 -- Table added to accomplish HIPAA De-identification
958 #IfNotTable de_identification_status
959 CREATE TABLE `de_identification_status` (
961 -- 2 re-identification process completed, file ready to download
962 -- 1 de-identification process running
963 -- 0 de-identification process not running
965 `status` int(11) default NULL,
966 `last_available_de_identified_data_file` varchar(100) default NULL
969 -- Dumping data for table `de_identification_status`
971 insert into de_identification_status values (0," ");
975 -- --------------------------------------------------------
977 -- Table structure for table `de_identification_error_log`
978 -- Table added to accomplish HIPAA De-identification
980 #IfNotTable de_identification_error_log
981 CREATE TABLE `de_identification_error_log` (
982 `activity` varchar(100),
983 `date_time` timestamp NOT NULL default CURRENT_TIMESTAMP on update CURRENT_TIMESTAMP,
989 -- --------------------------------------------------------
991 -- Table structure for table `re_identification_status`
992 -- Table added to accomplish HIPAA De-identification
995 #IfNotTable re_identification_status
996 CREATE TABLE `re_identification_status` (
998 -- 2 re-identification process completed, file ready to download
999 -- 1 re-identification process running
1000 -- 0 re-identification process not running
1001 `status` int(11) default NULL
1004 -- Dumping data for table `re_identification_status`
1006 insert into re_identification_status values (0);
1010 ------------------------------------------------------------