Highway to PSR2
[openemr.git] / contrib / util / deidentification / deidentification.php
blob11ba5fa2f659052765c6f3017e3148fc7bfb947d
1 <?php
3 /* This script is for de-identifying an OpenEMR database for the purpose of creating a live demo or for
4 development using real data but keepinng patient identities a secret. The script does the following
6 * removing patient information from the patient_data table and replacing patient demographics with a randomly generated name
7 * replacing data stored in the the insurance_data table with the random generated name
8 * specfying insurance subscriber as self and populating the subscriber info with auto-generated data from step 1.
9 * clearing provider and staff information in the user table replacing it with autogenerated and unique names and IDN's
10 * truncating the log tables since personal information may be stored there
11 * removes data in forms - END USER MUST EDIT deIdForms function to enure all form data is removed.
13 There is no turnging back.......
15 To use:
17 1) Enter values for host, user, database, password
18 2) type: php deidentification_OpenEMR.php
20 Your database now has deidientified all data and can never be restored.
22 * Copyright (C) 2017 Daniel Pflieger <growlingflea@gmail.com daniel@mi-squared.com and OEMR <www.oemr.org>
24 * LICENSE: This program is free software; you can redistribute it and/or
25 * modify it under the terms of the GNU General Public License
26 * as published by the Free Software Foundation; either version 3
27 * of the License, or (at your option) any later version.
28 * This program is distributed in the hope that it will be useful,
29 * but WITHOUT ANY WARRANTY; without even the implied warranty of
30 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
31 * GNU General Public License for more details.
32 * You should have received a copy of the GNU General Public License
33 * along with this program. If not, see <http://opensource.org/licenses/gpl-license.php>;.
35 * @package OpenEMR
36 * @author Daniel Pflieger <growlingflea@gmail.com> <daniel@mi-squared.com>
37 * @link http://www.open-emr.org
40 NOTE: IT IS STILL THE RESPONSIBILITY OF THE USER TO ENSURE THAT ALL PERSONAL IDENTIFIABLE INFORMATION HAS BEEN DELETED FROM THE DATABASE. THE END USER
41 MUST MAKE SURE THAT ALL TEXT AREAS IN FORMS DO NOT INCLUDE THE PATIENTS REAL NAME.
45 //Contact the database
46 //$con = mysqli_connect("HOST","USER","PASS","DATABASE") or die("Some error occurred during connection " . mysqli_error($con));
48 //Instructions: Change these 4 values. The run as a normal php script
49 //Remember - there is no turning back
51 //To run script, comment out the return line, fill out the databae credentials, and run. Uncomment out the return statement when complete.
52 return 0;
53 $host = 'localhost';
54 $user = 'root';
55 $pass = '';
56 $database = '';
57 $DEBUG = false;
60 //***********************************************************************
61 //***********************************************************************
66 $con = mysqli_connect($host, $user, $pass, $database) or die("Some error occurred during connection. must enter Host, Username, password, and database in mysqli_connect() " . mysqli_error($con));
67 echo("\n Successfully connected to database....... Waiting...... \n ");
70 //random names
71 // count of males 33
72 $male = array("John", "Joe", "Tony", "Harley", "Kenneth", "Sam", "Lewi", "Jimmy", "Moby", "Donald", "George", "Barack", "Jose", "Fonzy", "Cat", "David", "Scruggs",
73 "Prately","Stabile","Faler","Wolfrum","Hughe","Gustave","Lemar","Zhu","Quihuiz","Krinsky","Cresswell","Vanbergen","Durelli","Carron","Targett","Emmert","Ferrusi"
76 // count female names 75
77 $female = array(
78 "Hillary", "Alison", "Kathy", "Jesse", "Buzz", "Jenny", "Rachael", "Jennifer", "Lauren", "Lisa", "Amy", "Dana", "Blake",
79 "Beatrice", "Beatrix", "Bea", "Bee", "Beattie", "Trixie", "Trissie", "Belinda", "Bel", "Bell", "Belle", "Linda", "Lindy", "Lin", "Lynn",
80 "Belle", "Bell", "Bel", "Bella", "Annabel", "Arabella", "Isabel", "Rosabel", "Belinda", "Berenice", "Bernice", "Bernie",
81 "Bertha", "Berta", "Bertie", "Betty", "Beverly", "Beverley", "Bev", "Blanche", "Blanch", "Bonnie", "Bonny", "Brenda", "Brendie", "Brandy",
82 "Brett", "Bret", "Bretta", "Bridget", "Bridgette", "Brigid", "Brigit", "Biddie", "Biddy", "Bridie", "Bridey", "Brie", "Bree", "Brita",
83 "Brittany", "Brittney", "Britney", "Brit", "Britt", "Brita", "Brie"
85 // count of names is: 806
86 $lnames = array (
87 "Sandles","Hollifield","Flack","Lussier","Deloe","Thao","Dardenne","Spiro","Futter","Subasic","Shawler","Baehr","Murrillo","Christenbury",
88 "Plourde","Cotty","Suro","Gabe","Davids","Eschrich","Trautmann","Matarrita","Crompton","Pekas","Komo","Monroy","Kortkamp","Klukan","Henrity",
89 "Kindig","Syal","Hurtig","Vangilder","Ronsini","Hutchenson","Alvero","Valeriani","Bendtsen","Hornbarger","Marsili","Burfeind","Torello","Wink",
90 "Nemani","Astry","Eroh","Hidalgo","Wardlow","Sherren","Donegan","Weick","Peria","Fetchko","Achterhof","Schlappi","Stoudmire","Winsett","Obrien",
91 "Zindell","Snachez","Upright","Pinales","Laneve","Danner","Perches","Brentlinger","Epperly","Colpaert","Hoppins","Cheslock","Sprecher","Hesselman","Viscosi",
92 "Brazzi","Martincic","Pienta","Bonaventura","Pluta","Ruse","Gondek","Hammerstad","Fahl","Cromeans","Ramsbottom","Scipioni","Balley","Peskind","Senf","Hyler",
93 "Villareal","Weymouth","Smyser","Hawkinson","Freeberg","Gruntz","Pennebaker","Norbo","Bazar","Kveton","Glumac","Braner","Ye","Macoreno","Baj","Lehtomaki","Wiford",
94 "Carnell","Jegede","Motts","Magda","Tjaden","Sinko","Larrosa","Shaub","Gerecke","Wyborny","Arterberry","Milliken","Gurnee","Coble","Opal","Orrick","Barlup","Sedlachek",
95 "Silveri","Nielson","Niss","Heavener","Youngkin","Poro","Losinger","Billeaudeau","Hospelhorn","Orizetti","Lesches","Yarnall","Vanderlip","Ingrum","Anzaldo","Hirose",
96 "Rottinghaus","Missler","Brogden","Delasancha","Sacchetti","Faria","Chicharello","Salvesen","Kapichok","Sleppy","Semper","Abatti","Jarnagin","Schuerholz","Nicholl",
97 "Loft","Dagel","Winkelpleck","Madera","Muhs","Denner","Makey","Wendell","Ridder","Slaugenhaupt","Wilburn","Poladian","Dozal","Royston","Eardley","Villarta","Youree",
98 "Appiah","Edith","Abdeldayen","Pasquini","Cabe","Hurla","Gest","Bayete","Goatley","Mcguffin","Kluz","Gepner","Crowston","Gyger","Stover","Orandello","Hairgrove",
99 "Mccurtain","Cendana","Pfeiffenberge","Dirkson","Weida","Baucom","Tamas","Berra","Austad","Kawano","Benda","Ellermann","Ryan","Landolt","Schnelzer","Arnt","Minardi",
100 "Maizes","Rosborough","Lyford","Klaass","Shorette","Whitener","Buys","Kounthong","Bisconer","Waser","Flamer","Marushia","Emlin","Driggs","Kubes","Duderstadt","Butters"
101 ,"Shears","Wigington","Walchak","Kenouo","Bohmann","Hannagan","Eigner","Rainford","Adkisson","Vitrano","Walkingstick","Tart","Armesto","Kimmell","Ulman","Fortgang",
102 "Stockley","Brzostek","Hoefling","Crickenberger","Cyler","Ornelas","Farria","Montella","Stock","Zorc","Halbrooks","Carriere","Colindres","Tyska","Esguerra","Buerstatte",
103 "Welles","Gettinger","Grotz","Revilla","Galston","Pittmann","Bechel","Kimberlin","Odums","Causby","Ware","Havenhill","Branine","Kelnhofer","Dahm","Braunwarth","Omarah",
104 "Syrop","Mcgugin","Aston","Lannom","Hirt","Mccrorey","Lyall","Kos","Muegge","Lamorte","Lothringer","Falcioni","Delgenio","Catenaccio","Gauntt","Shutte","Sarley","Calogero",
105 "Obermuller","Polintan","Cerritos","Doom","Zaccagnino","Schilz","Garib","Coffie","Birkline","Maleck","Delaluz","Maiolo","Hackmann","Portsche","Sundberg","Him","Lempka","Ohmen",
106 "Chamble","Riolo","Cichonski","Onan","Derfus","Knilands","Pawelk","Vallelonga","Mikesell","Breaker","Lockman","Navo","Cornelson","Mcgeady","Kana","Meikle","Shapino","Tonas",
107 "Fasbender","Grodecki","Leffers","Wahdan","Krawczyk","Hafemeister","Sebestyen","Kun","Dehm","Guyton","Dupaty","Gjeltema","Fernet","Calvery","Glasgow","Anger","Capelli",
108 "Levendoski","Gehle","Dungey","Denike","Marreel","Arbertha","Granberry","Guillan","Mass","Poeschl","Ouderkirk","Connard","Lichlyter","Taverna","Ladden","Blethen","Lauter",
109 "Avrett","Scovel","Gietz","Cabiltes","Podbielski","Glaser","Beringer","Stoyanoff","Mcfeeley","Hazel","Fratzke","Register","Olaughlin","Bilbrew","Holey","Snead","Dincher",
110 "Toribio","Kazmer","Rushman","Herkstroeter","Zwack","Packen","Busta","Sanlucas","Shivers","Bracks","Brodis","Beavis","Liuzza","Chadd","Truan","Beyene","Sedberry","Bow","Kemper",
111 "Tabuena","Bodwell","Lowa","Buboltz","Thicke","Maupins","Rozzell","Sheffer","Mckeone","Ulvan","Metta","Spielmann","Ullman","Chick","Baseman","Critchfield","Readenour","Zimlich",
112 "Kinch","Ochoa","Gobel","Safranek","Mandia","Bissonette","Mansanares","Brigantino","Zipfel","Soja","Touney","Ochotorena","Baradi","Burbridge","Hayes","Mozgala","Spisak",
113 "Cartledge","Luetmer","Pipkins","Roorda","Boccanfuso","Houghtelling","Habibi","Dumaine","Sloup","Sperdute","Villanueva","Fitzgerrel","Breiling","Kachikian","Rimes",
114 "Kubler","Ostroot","Sauerwein","Condie","Buckey","Solesbee","Mckern","Berceir","Meason","Strissel","Salvati","Ingham","Lather","Pullan","Gastelun","Elridge","Moorehouse",
115 "Marcantel","Hadaway","Spriggle","Podraza","Mainguy","Henedia","Hofstadter","Laundree","Cerrano","Benac","Cahee","Gadson","Vandevort","Trias","Redus","Nabarrete","Valeriano",
116 "Feimster","Calcagino","Ashbach","Dolch","Altringer","Kala","Abeb","Laurenceau","Mallozzi","Winkles","Monsegur","Severi","Solle","Sjogren","Pok","Molett","Varos","Gilfoy",
117 "Medicus","Jeppson","Abbasi","Mccraight","Ohashi","Hocker","Mckennon","Littrel","Twilligear","Liptok","Gollop","Cuthbert","Poissant","Lainez","Pratts","Haugrud","Posa",
118 "Schrag","Entrup","Cortes","Blanga","Demello","Skelton","Corell","Mchan","Torti","Vecchia","Alaya","Keown","Cicali","Machkovich","Lawford","Troyani","Devon","Smylie",
119 "Macklem","Garibay","Rowold","Wern","Madaffari","Tatum","Belmont","Nishikawa","Similien","Rybacki","Brisbane","Martiarena","Dortch","Eck","Lijewski","Sarlinas","Dekok",
120 "Oliviera","Baublitz","Pane","Wiebe","Gatson","Allende","Driesel","Sartor","Tarras","Richiusa","Oriordan","Alar","Brickle","Kosorog","Barbee","Beidleman","Agbisit",
121 "Schiavi","Regester","Marrero","Braxton","Mateus","Cara","Abdelal","Merone","Rodarmel","Goerlich","Dunnell","Ralat","Lacina","Scheidt","Zilahi","Kjetland","Knepshield",
122 "Alce","Champy","Degiorgio","Ciubal","Kuhl","Hargett","Bosa","Derkas","Spierling","Bonnenfant","Hoegerl","Steffa","Harriss","Feldhaus","Schapiro","Calcano","Cresta",
123 "Ladtkow","Yournet","Getler","Reisling","Miker","Valorie","Genga","Lerwick","Yaun","Yoes","Guild","Beverly","Plassman","Dolly","Ghera","Costilla","Hauber","Svennungsen",
124 "Tharaldson","Verghese","Torpey","Merlin","Levitch","Laughinghouse","Cabreja","Kilb","Fontanilla","Berri","Doede","Oligee","Janikowski","Denkins","Alanko","Jeannette",
125 "Hites","Schriefer","Oborny","Malory","Sturrup","Petros","Geesey","Rivenberg","Trumball","Littfin","Gigantino","Shipps","Wycoff","Kupper","Dolgas","Oby","Polucha","Rasp",
126 "Graue","Konick","Espenshade","Machel","Noxon","Bassiti","Schnickel","Corlee","Meaker","Bolch","Iara","Laredo","Stasko","Fisette","Clar","Didlake","Borghoff","Dubberly",
127 "Quattro","Amparan","Walstrom","Mancini","Rathmanner","Andina","Muldrow","Heimbigner","Bloodough","Stoot","Bluestein","Meeks","Kaltenhauser","Ybos","Rehak","Dao","Shrum",
128 "Bjerknes","Harp","Studeny","Sweers","Granto","Eldrige","Nast","Goodling","Daquino","Allegood","Delonge","Lattrell","Willougby","Betry","Sorvig","Schremp","Waynick",
129 "Quidley","Kadner","Wares","Swarb","Placko","Travelstead","Liebold","Fukano","Daughetee","Feagler","Orie","Thruman","Quartuccio","Reinholdt","Urwin","Repoff","Crickard",
130 "Prately","Stabile","Faler","Wolfrum","Hughe","Gustave","Lemar","Zhu","Quihuiz","Krinsky","Cresswell","Vanbergen","Durelli","Carron","Targett","Emmert","Ferrusi",
131 "Harless","Ailes","Shimer","Greff","Pinna","Guedes","Meury","Lapari","Litty","Ji","Marcus","Lampel","Minotti","Migliorisi","Tevebaugh","Morgan","Steidel","Bartee",
132 "Brackman","Borruso","Ficklin","Baza","Mercier","Sponholz","Trego","Channell","Vanburen","Zaring","Luken","Komorowski","Fasciano","Drafts","Dar","Callicoat","Callam",
133 "Villaquiran","Vanbrunt","Coiner","Luckenbill","Mcray","Pin","Hayer","Lapenta","Isita","Slaydon","Frondorf","Cavagna","Passalacqua","Lehnertz","Kavadias","Macione",
134 "Sturch","Boes","Albor","Bookamer","Burbine","Bardach","Ghaor","Quartieri","Mcgill","Michelena","Aronson","Brosig","Morganti","Rodewald","Barich","Langelier"
137 /* function to clear the present value if a record's column and replace it with a value if spoecified
138 Input: $con, $table, $column, $value = value to replace with
140 function removeColumn($con, $table, $column, $value = '')
142 $removeSS = ("Update $table SET $column='$value' where 1 ");
143 $query = mysqli_query($con, $removeSS) or print( "\n QUERY '$removeSS' DID NOT WORK. PLEASE VERIFY THE TABLE AND COLUMN EXISTS \n");
144 if ($query) {
145 print("\n Query '$removeSS' completed! \n");
149 /* This function replaces the first and last name of the patient with a auto generated name
150 removes data from the encounter, removes personal information from the patient_data, notes, and form_encounter
151 tables. This should be the first function called.
153 INPUT: connection, last name array, firstname arrays, debug
154 OUTPUT: Number of patients DEID.
156 function deIdPatientData($con, $lnames, $male, $female, $DEBUG = false)
159 removeColumn($con, "patient_data", "ss", "0000-00-00");
160 removeColumn($con, "form_encounter", "reason", "reason goes here, bucko");
161 removeColumn($con, "form_encounter", "facility", "Service Facility");
162 removeColumn($con, "notes", "note", "notes goes here, Captian");
164 $i = 0;
166 $removeLname = ("Select lname, pid, id, ss, street, sex from patient_data ");
167 $query = mysqli_query($con, $removeLname);
168 while ($result = mysqli_fetch_array($query)) {
169 if ($DEBUG===true) {
170 if ($i ===10) {
171 break;
175 $i++;
176 $string = '';
177 //Give the user a new last name in patient_data.lname
178 $last_name = $lnames[rand(0, 800)];
180 //Give the user a new first name
181 $first_name_male = $male[rand(0, 32)];
182 $first_name_female = $female[rand(0, 74)];
184 //Change the street address patient_Data.street
185 $street = rand(1, 9999)." ".rand(0, 200)." Avenue ";
187 //remove the drivers license
188 $drivers_license = rand(2, 999).rand(0, 999).rand(0, 99);
190 //change the patient_data.phone_home
191 $phone_home = rand(200, 999)."-".rand(200, 999)."-".rand(1000, 9999);
193 $string = "update patient_data set lname = '$last_name', ";
195 if ($result['sex'] === 'Male') {
196 $string .= " fname = '$first_name_male', ";
197 } else {
198 $string .= " fname = '$first_name_female', ";
201 $string .= " drivers_license = '$drivers_license', ";
203 $string .= " phone_home = '$phone_home', ";
205 $string .= " phone_cell = '$phone_home', ";
207 $string .= " phone_biz = '$phone_home', ";
209 $string .= " contact_relationship = '', ";
211 $string .= " email = '', ";
213 $string .= " street = '$street' ";
215 $string .= "where pid = "."'".$result['pid']."' ; " ;
218 mysqli_query($con, $string) or die("Failed Patient Replacement");
219 $string = '';
220 deIdInsuranceDataTable($con, $result['pid']);
221 //now update insurace
224 return $i;
228 //This function replaces the data stored in the insurance_data table with the random generated name.
229 //In order for this to work, this function must be called AFTER the random name generator has been called.
230 //Input: $con, $pid
231 function deIdInsuranceDataTable($con, $pid)
234 //check if there is
235 $type = ['primary', 'secondary', 'tertiary'];
237 //get the patient demographics: fname, middlename, lname, street, state, city, zip, dob, subscriber phone,
238 $query = "Select fname, lname, mname, DOB, street, state, city, postal_code, ss, phone_home from patient_data where pid = $pid";
239 $query = mysqli_query($con, $query);
240 $demographic_array = mysqli_fetch_array($query);
243 //for each insurance type:
244 //see if a insured name exists. if it does, update the table with the new information
245 foreach ($type as $ty) {
246 //see if a first name exists, if it does then replace it
247 $query = "select subscriber_lname from insurance_data where pid = '{$pid}' and type = '{$ty}' ";
248 $result = mysqli_query($con, $query);
249 $result = mysqli_fetch_array($result);
250 if ($result['subscriber_lname'] === '' || $result === null) {
251 continue;
252 } else {
253 $string = "update insurance_data set
254 subscriber_lname = '{$demographic_array['lname']}',
255 subscriber_fname = '{$demographic_array['fname']}',
256 subscriber_mname = '{$demographic_array['mname']}',
257 subscriber_relationship = 'self',
258 subscriber_ss = '000-00-0000',
259 subscriber_DOB = '{$demographic_array['DOB']}',
260 subscriber_street = '{$demographic_array['street']}',
261 subscriber_postal_code = '{$demographic_array['postal_code']}',
262 subscriber_city = '{$demographic_array['city']}',
263 subscriber_state = '{$demographic_array['state']}',
264 subscriber_phone = '{$demographic_array['phone_home']}'
266 where pid = $pid and type = '{$ty}'; ";
268 $update = mysqli_query($con, $string) or print("update did not work");
274 //This function replaces the facility name with unqiue names so users can
275 //see how different facilities have their data and permissions abstracted depending on
276 //which facility the user has access too.
277 function deIdFacilityTable($con)
281 removeColumn($con, "facility", "phone", "(000) 000-0000");
282 removeColumn($con, "facility", "street", "123 Somewhere");
283 removeColumn($con, "facility", "federal_ein", "123456789");
284 removeColumn($con, "facility", "attn", "Office Person");
285 removeColumn($con, "facility", "facility_npi", "987654321");
287 $query = "select * from facility";
288 $result = mysqli_query($con, $query);
289 while ($row = mysqli_fetch_array($result)) {
290 $string = "update facility set
292 `name` = 'Facility_{$row['id']}',
293 `phone` = '(000) 000-0000'
295 where `id` = {$row['id']}";
297 mysqli_query($con, $string) or print "Error altering facility table \n";
298 $string = '';
301 echo "Successfully deid'ed Facility Table";
307 function deIdUsersTable($con)
311 removeColumn($con, "users", "federaltaxid", "123456789");
312 removeColumn($con, "users", "federaldrugid", "D000-1");
313 // removeColumn($con, "users", "facility", "Service Facility");
314 removeColumn($con, "users", "email", "doc@home.com");
315 removeColumn($con, "users", "organization", "");
316 removeColumn($con, "users", "npi", "");
317 removeColumn($con, "users", "street", "");
318 removeColumn($con, "users", "streetb", "");
319 removeColumn($con, "users", "city", "");
320 removeColumn($con, "users", "state", "");
321 removeColumn($con, "users", "zip", "");
322 removeColumn($con, "users", "phone", "");
323 removeColumn($con, "users", "phonew1", "");
324 removeColumn($con, "users", "phonew2", "");
325 removeColumn($con, "users", "fax", "");
326 removeColumn($con, "users", "phonecell", "");
327 removeColumn($con, "users", "info", "");
330 $query = "select * from users";
331 $result = mysqli_query($con, $query);
335 while ($row = mysqli_fetch_array($result)) {
336 $string = "update users set ";
338 if (strpos($row['newcrop_user_role'], 'doctor') !==false) {
339 $string .= "fname = 'Doctor.{$row['id']}',
340 lname = 'Doctor.{$row['id']}' ";
341 } else if (strpos($row['newcrop_user_role'], 'nurse') !==false) {
342 $string .= "fname = 'Nurse.{$row['id']}',
343 lname = 'Nurse.{$row['id']}' ";
344 } else {
345 $string .= "fname = 'noNewCrop',
346 lname = 'Nurse{$row['id']}'";
349 $string .= " where `id` = {$row['id']} ";
350 mysqli_query($con, $string) or print "Error altering users table \n";
351 //$string = '';
354 echo "successfuly altered user table \n ";
357 //Clears most forms. User must verify that this function handles all text fields that might hold personal identifying information
358 function deIdForms($con)
361 removeColumn($con, "form_physical_exam", "comments", "no comment, talk to my lawyer");
362 removeColumn($con, "form_soap", "subjective", "Patient Hurts");
363 removeColumn($con, "form_soap", "objective", "I see bad things");
364 removeColumn($con, "form_soap", "assessment", "Bad thing is fixable");
365 removeColumn($con, "form_soap", "plan", "play by ear, hope for the best");
366 removeColumn($con, "form_dictation", "dictation", "This are words the Doctor doth spoke.");
367 removeColumn($con, "form_dictation", "additional_notes", "");
369 removeColumn($con, "phone_numbers", "prefix", "555");
370 removeColumn($con, "onotes", "body", "Internal Office notes posted here");
371 removeColumn($con, "pnotes", "body", "DATETIME (FROMUSER to USER) Note about Patient posted here");
373 echo "successfuly altered user forms table \n ";
376 // truncates log tables to remove all hidden information
377 function truncateLogs($con)
380 $query = mysqli_query($con, "TRUNCATE TABLE log") or print("\n\n log table not truncated \n\n");
381 $query = mysqli_query($con, "TRUNCATE TABLE documents") or print("\n\n documents table not truncated \n\n");
387 //Program starts here
389 $patients = deIdPatientData($con, $lnames, $male, $female, $DEBUG);
390 $success = deIdFacilityTable($con);
391 $success = deIdUsersTable($con);
392 $success = deIdForms($con);
393 $success = truncateLogs($con);
397 // Close the connection
398 mysqli_close($con);
399 echo " \n successfully updated $patients patients \n\n";