From 70390ef4f901f05219040d2c1375eecac00c8114 Mon Sep 17 00:00:00 2001 From: Rod Roark Date: Tue, 6 Jul 2021 10:32:10 +1000 Subject: [PATCH] Implemented duplicate patients management. (#4502) * Implemented duplicate patients management. Identification and correction of duplicate patients involves the following: 1. Column "dupscore" in the patient_data table was created to contain a value representing the likelihood that the patient duplicates any of the patients created earlier (that is, with a lesser "pid" value). It may have any of the following values: o -9, initially assigned, indicating that it needs to be computed. o -1, indicating that the patient has been manually flagged as not duplicating a previous patient. o A score in the range 0-40 computed by the SQL expression found in library/dupscore.inc.php. A value of 12 or higher is considered to be very likely a duplicate. 2. The script library/dupscore.inc.php which provides the SQL expression needed for scoring. 3. The command-line tool contrib/util/dupscore.cli.php which should be run to initially assign all patient_data.dupscore values, and may be run from time to time to update or recompute them. See the source of that script for its usage syntax. 4. The web tool Administration -> Patient -> Manage Duplicates. For each patient with a dupscore of 7 or higher, it shows below it all the corresponding matching patients. Thus you see a collection of patient groups. Within each group you can choose to mark the first patient as not a duplicate, or you can merge it with one of the other patients. You do that by choosing "Merge and Keep" or "Merge and Discard" for the other patient, which brings up the Merge Patients page to complete the merge. That page describes in more detail what it will do. --- contrib/util/dupscore.cli.php | 123 ++++++++++ interface/main/tabs/menu/menus/standard.json | 12 + interface/patient_file/manage_dup_patients.php | 297 +++++++++++++++++++++++++ interface/patient_file/merge_patients.php | 90 ++++++-- library/dupscore.inc.php | 44 ++++ library/patient.inc | 23 ++ sql/6_0_0-to-6_1_0_upgrade.sql | 4 + sql/database.sql | 1 + 8 files changed, 570 insertions(+), 24 deletions(-) create mode 100755 contrib/util/dupscore.cli.php create mode 100644 interface/patient_file/manage_dup_patients.php create mode 100644 library/dupscore.inc.php diff --git a/contrib/util/dupscore.cli.php b/contrib/util/dupscore.cli.php new file mode 100755 index 000000000..dc693a549 --- /dev/null +++ b/contrib/util/dupscore.cli.php @@ -0,0 +1,123 @@ +#!/usr/bin/php + + * @copyright Copyright (c) 2021 Rod Roark + * @license https://github.com/openemr/openemr/blob/master/LICENSE GNU General Public License 3 + */ + +// The number of scores to compute between tests for time expiration. +$querylimit = 1000; + +if (php_sapi_name() !== 'cli') { + die("This script must be run from the command line!\n"); +} + +$args = getopt('cq', array('webdir:', 'site:', 'maxmins:')); + +// print_r($args); // debugging + +$args['webdir'] = $args['webdir'] ?? dirname(dirname(dirname(__FILE__))); +$args['site'] = $args['site'] ?? 'default'; +$args['maxmins'] = floatval($args['maxmins'] ?? 60); + +if (stripos(PHP_OS, 'WIN') === 0) { + $args['webdir'] = str_replace("\\", "/", $args['webdir']); +} + +// Bring in some libraries and settings shared with web scripts. +$_GET['site'] = $args['site']; +$ignoreAuth = 1; +require_once($args['webdir'] . "/interface/globals.php"); + +// Bring in the getDupScoreSQL() function. +require_once("$srcdir/dupscore.inc.php"); + +$endtime = time() + 365 * 24 * 60 * 60; // a year from now +if (!empty($args['maxmins'])) { + $endtime = time() + $args['maxmins'] * 60; +} + +if (isset($args['c'])) { + // Note -1 means the patient is manually flagged as not a duplicate. + sqlStatementNoLog("UPDATE patient_data SET dupscore = -9 WHERE dupscore != -1"); + if (!isset($args['q'])) { + echo xl("All scores have been cleared.") . "\n"; + } +} + +$count = 0; +$finished = false; + +while (!$finished && time() < $endtime) { + $scores = array(); + $query1 = "SELECT p1.pid, MAX(" . getDupScoreSQL() . ") AS dupscore" . + " FROM patient_data AS p1, patient_data AS p2" . + " WHERE p1.dupscore = -9 AND p2.pid < p1.pid" . + " GROUP BY p1.pid ORDER BY p1.pid LIMIT " . escape_limit($querylimit); + + // echo "$query1\n"; // debugging + + $res1 = sqlStatementNoLog($query1); + while ($row1 = sqlFetchArray($res1)) { + $scores[$row1['pid']] = $row1['dupscore']; + }; + foreach ($scores as $pid => $score) { + sqlStatementNoLog( + "UPDATE patient_data SET dupscore = ? WHERE pid = ?", + array($score, $pid) + ); + ++$count; + } + + if (!isset($args['q']) && count($scores) > 0) { + echo "$count... "; + } + if (count($scores) < $querylimit) { + $finished = true; + } +} + +if (!isset($args['q'])) { + if (!$count) { + echo xl("No patients without scores were found."); + } + if ($finished) { + echo "\n" . xl("All done.") . "\n"; + } else { + echo "\n" . xl("This run is incomplete due to time expiration.") . "\n"; + } +} + +if (!$finished) { + exit(1); +} diff --git a/interface/main/tabs/menu/menus/standard.json b/interface/main/tabs/menu/menus/standard.json index ce421e8a6..74116ed39 100644 --- a/interface/main/tabs/menu/menus/standard.json +++ b/interface/main/tabs/menu/menus/standard.json @@ -854,6 +854,18 @@ "admin", "super" ] + }, + { + "label": "Manage Duplicates", + "menu_id": "adm0", + "target": "adm", + "url": "/interface/patient_file/manage_dup_patients.php", + "children": [], + "requirement": 0, + "acl_req": [ + "admin", + "super" + ] } ], "requirement": 0, diff --git a/interface/patient_file/manage_dup_patients.php b/interface/patient_file/manage_dup_patients.php new file mode 100644 index 000000000..5ac65e281 --- /dev/null +++ b/interface/patient_file/manage_dup_patients.php @@ -0,0 +1,297 @@ + + * @copyright Copyright (c) 2017-2021 Rod Roark + * @license https://github.com/openemr/openemr/blob/master/LICENSE GNU General Public License 3 + */ + +require_once("../globals.php"); +require_once("$srcdir/patient.inc"); +require_once("$srcdir/options.inc.php"); + +use OpenEMR\Common\Acl\AclMain; +use OpenEMR\Common\Csrf\CsrfUtils; +use OpenEMR\Core\Header; +use OpenEMR\Services\FacilityService; + +$firsttime = true; + +function displayRow($row, $pid = '') +{ + global $firsttime; + + $bgcolor = '#ffdddd'; + $myscore = ''; + $options = ''; + + if (empty($pid)) { + $pid = $row['pid']; + } + + if (isset($row['myscore'])) { + $myscore = $row['myscore']; + $options = "" . + "" . + ""; + } else { + $myscore = $row['dupscore']; + $options = "" . + "" . + ""; + if (!$firsttime) { + echo "  \n"; + } + } + + $firsttime = false; + $ptname = $row['lname'] . ', ' . $row['fname'] . ' ' . $row['mname']; + $phones = array(); + if (trim($row['phone_home'])) { + $phones[] = trim($row['phone_home']); + } + if (trim($row['phone_biz' ])) { + $phones[] = trim($row['phone_biz' ]); + } + if (trim($row['phone_cell'])) { + $phones[] = trim($row['phone_cell']); + } + $phones = implode(', ', $phones); + + $facname = ''; + if ($row['home_facility']) { + $facrow = getFacility($row['home_facility']); + if (!empty($facrow['name'])) { + $facname = $facrow['name']; + } + } + ?> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +<?php echo xlt('Duplicate Patient Management') ?> + + + + + + + + + + +
+ +

+ +
+ + + + + + + + + +
+ +   + ' onclick='window.print()' /> +
+
+ + + + + + + + + + + + + + + + + + + + 7 " . + "ORDER BY dupscore DESC, pid DESC LIMIT 100"; +$res1 = sqlStatement($query); +while ($row1 = sqlFetchArray($res1)) { + displayRow($row1); + $query = "SELECT p2.*, ($scorecalc) AS myscore " . + "FROM patient_data AS p1, patient_data AS p2 WHERE " . + "p1.pid = ? AND p2.pid < p1.pid AND ($scorecalc) > 7 " . + "ORDER BY myscore DESC, p2.pid DESC"; + $res2 = sqlStatement($query, array($row1['pid'])); + while ($row2 = sqlFetchArray($res2)) { + displayRow($row2, $row1['pid']); + } +} +?> + +
+ + + + + + + + + + + + + + + + + + + + + + + +
+ + + +
+
+ + +
'> + + +
+ + + diff --git a/interface/patient_file/merge_patients.php b/interface/patient_file/merge_patients.php index 17ea5f4f5..6419e95fb 100644 --- a/interface/patient_file/merge_patients.php +++ b/interface/patient_file/merge_patients.php @@ -8,7 +8,7 @@ * @link http://www.open-emr.org * @author Rod Roark * @author Brady Miller - * @copyright Copyright (c) 2013 Rod Roark + * @copyright Copyright (c) 2013-2021 Rod Roark * @copyright Copyright (c) 2018 Brady Miller * @license https://github.com/openemr/openemr/blob/master/LICENSE GNU General Public License 3 */ @@ -16,11 +16,15 @@ set_time_limit(0); require_once("../globals.php"); +require_once("$srcdir/patient.inc"); use OpenEMR\Common\Acl\AclMain; use OpenEMR\Common\Csrf\CsrfUtils; use OpenEMR\Core\Header; +$form_pid1 = empty($_GET['pid1']) ? 0 : intval($_GET['pid1']); +$form_pid2 = empty($_GET['pid2']) ? 0 : intval($_GET['pid2']); + // Set this to true for production use. If false you will get a "dry run" with no updates. $PRODUCTION = true; @@ -160,7 +164,7 @@ if (!empty($_POST['form_submit'])) { $tprow = sqlQuery("SELECT * FROM patient_data WHERE pid = ?", array($target_pid)); $sprow = sqlQuery("SELECT * FROM patient_data WHERE pid = ?", array($source_pid)); - // Do some checking to make sure source and target exist and are the same person. + // Do some checking to make sure source and target exist and are the same person. if (empty($tprow['pid'])) { die(xlt('Target patient not found')); } @@ -169,20 +173,20 @@ if (!empty($_POST['form_submit'])) { die(xlt('Source patient not found')); } - if ($tprow['ss'] != $sprow['ss']) { - die(xlt('Target and source SSN do not match')); - } - - if (empty($tprow['DOB']) || $tprow['DOB'] == '0000-00-00') { - die(xlt('Target patient has no DOB')); - } - - if (empty($sprow['DOB']) || $sprow['DOB'] == '0000-00-00') { - die(xlt('Source patient has no DOB')); - } - - if ($tprow['DOB'] != $sprow['DOB']) { - die(xlt('Target and source DOB do not match')); + // SSN and DOB checking are skipped if we are coming from the dup manager. + if (!$form_pid1 || !$form_pid2) { + if ($tprow['ss'] != $sprow['ss']) { + die(xlt('Target and source SSN do not match')); + } + if (empty($tprow['DOB']) || $tprow['DOB'] == '0000-00-00') { + die(xlt('Target patient has no DOB')); + } + if (empty($sprow['DOB']) || $sprow['DOB'] == '0000-00-00') { + die(xlt('Source patient has no DOB')); + } + if ($tprow['DOB'] != $sprow['DOB']) { + die(xlt('Target and source DOB do not match')); + } } $tdocdir = "$OE_SITE_DIR/documents/" . check_file_dir_name($target_pid); @@ -269,23 +273,56 @@ if (!empty($_POST['form_submit'])) { $crow = sqlQuery("SHOW COLUMNS FROM `" . escape_table_name($tblname) . "` WHERE " . "`Field` LIKE 'pid' OR `Field` LIKE 'patient_id'"); if (!empty($crow['Field'])) { - $colname = $crow['Field']; - updateRows($tblname, $colname, $source_pid, $target_pid); + $colname = $crow['Field']; + updateRows($tblname, $colname, $source_pid, $target_pid); + // Note employer_data is included here; its rows are never deleted and the + // most recent row for each patient is the one that is normally relevant. } } } + // Recompute dupscore for target patient. + updateDupScore($target_pid); + echo "
" . xlt('Merge complete.') . ""; + echo "
 
\n"; + echo "\n"; + echo ""; + exit(0); } + + +$target_string = xl('Click to select'); +$source_string = xl('Click to select'); +$target_pid = '0'; +$source_pid = '0'; + +if ($form_pid1) { + $target_pid = $form_pid1; + $row = sqlQuery( + "SELECT lname, fname FROM patient_data WHERE pid = ?", + array($target_pid) + ); + $target_string = $row['lname'] . ', ' . $row['fname'] . " ($target_pid)"; +} +if ($form_pid2) { + $source_pid = $form_pid2; + $row = sqlQuery( + "SELECT lname, fname FROM patient_data WHERE pid = ?", + array($source_pid) + ); + $source_string = $row['lname'] . ', ' . $row['fname'] . " ($source_pid)"; +} ?>

-
+
@@ -294,8 +331,11 @@ if (!empty($_POST['form_submit'])) {
- )' onclick='sel_patient(this, this.form.form_target_pid)' title='' readonly /> - + ' readonly /> + @@ -307,10 +347,10 @@ if (!empty($_POST['form_submit'])) { )' + value='' onclick='sel_patient(this, this.form.form_source_pid)' title='' readonly /> - + @@ -321,7 +361,7 @@ if (!empty($_POST['form_submit'])) {
-

+

@@ -333,7 +373,9 @@ if (!empty($_POST['form_submit'])) {

+

+
diff --git a/library/dupscore.inc.php b/library/dupscore.inc.php new file mode 100644 index 000000000..6abd821e7 --- /dev/null +++ b/library/dupscore.inc.php @@ -0,0 +1,44 @@ + + * @copyright Copyright (c) 2021 Rod Roark + * @license https://github.com/openemr/openemr/blob/master/LICENSE GNU General Public License 3 + */ + +// The SQL returned by this function is an expression that computes the duplication +// score between two patient_data table rows p1 and p2. +// +function getDupScoreSQL() +{ + return + // 5 First name + "5 * (SOUNDEX(p1.fname) = SOUNDEX(p2.fname)) + " . + // 3 Last name + "3 * (SOUNDEX(p1.lname) = SOUNDEX(p2.lname)) + " . + // 4 Any phone number + "4 * (" . + "(TRIM(p1.phone_home) != '' AND ( " . + "REPLACE(REPLACE(p1.phone_home, '-', ''), ' ', '') IN ( " . + "REPLACE(REPLACE(p2.phone_home, '-', ''), ' ', ''), " . + "REPLACE(REPLACE(p2.phone_biz , '-', ''), ' ', ''), " . + "REPLACE(REPLACE(p2.phone_cell, '-', ''), ' ', '')))) " . + "OR (TRIM(p1.phone_biz) != '' AND ( " . + "REPLACE(REPLACE(p1.phone_biz , '-', ''), ' ', '') IN ( " . + "REPLACE(REPLACE(p2.phone_biz , '-', ''), ' ', ''), " . + "REPLACE(REPLACE(p2.phone_cell, '-', ''), ' ', '')))) " . + "OR (TRIM(p1.phone_cell) != '' AND ( " . + "REPLACE(REPLACE(p1.phone_cell, '-', ''), ' ', '') = " . + "REPLACE(REPLACE(p2.phone_cell, '-', ''), ' ', ''))) " . + ") + " . + // 6 Birth date + "6 * (p1.DOB IS NOT NULL AND p2.DOB IS NOT NULL AND p1.DOB = p2.DOB) + " . + // 7 Email + "7 * (TRIM(p1.email) != '' AND TRIM(p1.email) = TRIM(p2.email)) + " . + // 15 Government ID + "15 * (TRIM(p1.ss) != '' AND TRIM(p1.ss) = TRIM(p2.ss))"; +} diff --git a/library/patient.inc b/library/patient.inc index fd7d4a0eb..eeeff5ecf 100644 --- a/library/patient.inc +++ b/library/patient.inc @@ -8,9 +8,11 @@ * @author Brady Miller * @author Sherwin Gaddis * @author Stephen Waite + * @author Rod Roark * @copyright Copyright (c) 2018-2019 Brady Miller * @copyright Copyright (c) 2019 Sherwin Gaddis * @copyright Copyright (c) 2018-2021 Stephen Waite + * @copyright Copyright (c) 2021 Rod Roark * @license https://github.com/openemr/openemr/blob/master/LICENSE GNU General Public License 3 */ @@ -19,6 +21,8 @@ use OpenEMR\Services\FacilityService; use OpenEMR\Services\PatientService; use OpenEMR\Services\SocialHistoryService; +require_once(dirname(__FILE__) . "/dupscore.inc.php"); + $facilityService = new FacilityService(); // These are for sports team use: @@ -1074,6 +1078,7 @@ function updatePatientData($pid, $new, $create = false) $pid === null ) { $result = $patientService->databaseInsert($new); + updateDupScore($result['pid']); } else { $new['pid'] = $pid; $result = $patientService->databaseUpdate($new); @@ -1795,3 +1800,21 @@ function is_patient_deceased($pid, $date = '') return $results; } } + +// This computes, sets and returns the dup score for the given patient. +// +function updateDupScore($pid) +{ + $row = sqlQuery( + "SELECT MAX(" . getDupScoreSQL() . ") AS dupscore " . + "FROM patient_data AS p1, patient_data AS p2 WHERE " . + "p1.pid = ? AND p2.pid < p1.pid", + array($pid) + ); + $dupscore = empty($row['dupscore']) ? 0 : $row['dupscore']; + sqlStatement( + "UPDATE patient_data SET dupscore = ? WHERE pid = ?", + array($dupscore, $pid) + ); + return $dupscore; +} diff --git a/sql/6_0_0-to-6_1_0_upgrade.sql b/sql/6_0_0-to-6_1_0_upgrade.sql index 8a62e884a..15f4a92f3 100644 --- a/sql/6_0_0-to-6_1_0_upgrade.sql +++ b/sql/6_0_0-to-6_1_0_upgrade.sql @@ -817,6 +817,10 @@ ALTER TABLE `form_clinical_notes` ADD COLUMN `clinical_notes_category` varchar(1 UPDATE `list_options` SET notes="LOINC:11488-4" WHERE list_id="Clinical_Note_Type" AND option_id="consultation_note" AND notes="LOINC:81222-2"; #EndIf +#IfMissingColumn patient_data dupscore +ALTER TABLE `patient_data` ADD COLUMN `dupscore` INT NOT NULL default -9; +#EndIf + #IfMissingColumn procedure_type procedure_type_name ALTER TABLE `procedure_type` ADD `procedure_type_name` VARCHAR(64) NULL #EndIf diff --git a/sql/database.sql b/sql/database.sql index 658b222ee..ab6ad3abf 100644 --- a/sql/database.sql +++ b/sql/database.sql @@ -7207,6 +7207,7 @@ CREATE TABLE `patient_data` ( `birth_fname` TEXT, `birth_lname` TEXT, `birth_mname` TEXT, + `dupscore` INT NOT NULL default -9, UNIQUE KEY `pid` (`pid`), UNIQUE KEY `uuid` (`uuid`), KEY `id` (`id`) -- 2.11.4.GIT