5 * CLI script to compute patient duplication scores in patient_data.dupscore.
6 * The score is a measure of the likelihood that the patient is a duplicate of
7 * some patient created before it. Optional arguments specifying values are:
9 * --webdir The full path to the OpenEMR web directory. Defaults to the directory
10 * two levels above that of this script.
11 * --site The site ID. Defaults to "default".
12 * --maxmins The maximum number of minutes to run. Defaults to 60. Use 0 for no limit.
14 * Arguments not having a value may be:
16 * -q Suppresses messages on stdout.
17 * -c Clears existing scores to recompute all of them; except scores of -1
18 * are not cleared because they are manually assigned.
20 * Because we are comparing every patient with every other patient, this script can
21 * run for a very long time with a large database. Thus we want to do it offline.
22 * If --maxmins is exceeded the script will terminate but may be run again to resume
26 * php /var/www/html/openemr/contrib/util/dupscore.cli.php --maxmins=240
28 * Here is a sample crontab entry to automatically run up to 2 hours nightly:
29 * 3 1 * * * root php /var/www/html/openemr/contrib/util/dupscore.cli.php -q --maxmins=120
32 * @link http://www.open-emr.org
33 * @author Rod Roark <rod@sunsetsystems.com>
34 * @copyright Copyright (c) 2021 Rod Roark <rod@sunsetsystems.com>
35 * @license https://github.com/openemr/openemr/blob/master/LICENSE GNU General Public License 3
38 // The number of scores to compute between tests for time expiration.
41 if (php_sapi_name() !== 'cli') {
42 die("This script must be run from the command line!\n");
45 $args = getopt('cq', array('webdir:', 'site:', 'maxmins:'));
47 // print_r($args); // debugging
49 $args['webdir'] = $args['webdir'] ??
dirname(dirname(dirname(__FILE__
)));
50 $args['site'] = $args['site'] ??
'default';
51 $args['maxmins'] = floatval($args['maxmins'] ??
60);
53 if (stripos(PHP_OS
, 'WIN') === 0) {
54 $args['webdir'] = str_replace("\\", "/", $args['webdir']);
57 // Bring in some libraries and settings shared with web scripts.
58 $_GET['site'] = $args['site'];
60 require_once($args['webdir'] . "/interface/globals.php");
62 // Bring in the getDupScoreSQL() function.
63 require_once("$srcdir/dupscore.inc.php");
65 $endtime = time() +
365 * 24 * 60 * 60; // a year from now
66 if (!empty($args['maxmins'])) {
67 $endtime = time() +
$args['maxmins'] * 60;
70 if (isset($args['c'])) {
71 // Note -1 means the patient is manually flagged as not a duplicate.
72 sqlStatementNoLog("UPDATE patient_data SET dupscore = -9 WHERE dupscore != -1");
73 if (!isset($args['q'])) {
74 echo xl("All scores have been cleared.") . "\n";
81 while (!$finished && time() < $endtime) {
83 $query1 = "SELECT p1.pid, MAX(" . getDupScoreSQL() . ") AS dupscore" .
84 " FROM patient_data AS p1, patient_data AS p2" .
85 " WHERE p1.dupscore = -9 AND p2.pid < p1.pid" .
86 " GROUP BY p1.pid ORDER BY p1.pid LIMIT " . escape_limit($querylimit);
88 // echo "$query1\n"; // debugging
90 $res1 = sqlStatementNoLog($query1);
91 while ($row1 = sqlFetchArray($res1)) {
92 $scores[$row1['pid']] = $row1['dupscore'];
94 foreach ($scores as $pid => $score) {
96 "UPDATE patient_data SET dupscore = ? WHERE pid = ?",
102 if (!isset($args['q']) && count($scores) > 0) {
105 if (count($scores) < $querylimit) {
110 if (!isset($args['q'])) {
112 echo xl("No patients without scores were found.");
115 echo "\n" . xl("All done.") . "\n";
117 echo "\n" . xl("This run is incomplete due to time expiration.") . "\n";