Prepping ubuntu package for 4.1.2 release
[openemr.git] / contrib / util / emr_scan_load.plx
blob29df37026295e118c1c0953ccc50736afe2940c8
1 #!/usr/bin/perl
2 use strict;
4 use Time::Local;
5 use DBI;
7 #######################################################################
8 # Copyright (C) 2006 Rod Roark <rod@sunsetsystems.com>
10 # This program is free software; you can redistribute it and/or
11 # modify it under the terms of the GNU General Public License
12 # as published by the Free Software Foundation; either version 2
13 # of the License, or (at your option) any later version.
14 #######################################################################
15 # This program is to be run frequently via the system crontab. On
16 # each run it will move scanned-in documents from a shared directory
17 # into matching locations in the openemr/documents directory, and also
18 # update the database accordingly.
19 # Each scanned-in file must be placed into a directory corresponding
20 # to its category, and its name must begin with the patient's pubpid
21 # followed by any non-alphanumeric character. For example:
22 # <shared-directory>/Categories/XRay/1234-knee-xray-20060131.jpg
23 #######################################################################
24 # NOTE: This is contributed as-is for the possible benefit of those who
25 # may wish to build on it. As of this writing it is not known how well
26 # it works, if at all, with current OpenEMR.
27 #######################################################################
29 #######################################################################
30 # Parameters that you must customize #
31 #######################################################################
33 # Parameters for MySQL database connections:
35 my $DBNAME = "openemr"; # database name
36 my $DBUSER = "openemr"; # database user name
37 my $DBPASS = "secret"; # database user's password
39 # Log file location:
41 my $INSLOG = "/mnt/drive2/emr_scan_load.log";
43 # Shared directory base where the scanners deposit files:
45 my $INPATH = "/mnt/drive2/scan_docs";
47 # Base directory for OpenEMR documents:
49 # my $OUTPATH = "/usr/local/apache2/htdocs/openemr/documents";
50 my $OUTPATH = "/mnt/drive2/documents";
52 # This should specify the user and group that the web server runs as:
54 my $CHOWN_COMMAND = "chown nobody:nogroup";
56 # This is the user from whom patient notes are addressed:
58 my $SCANNER_OPERATOR = 'ksears';
60 # This person gets patient notes if the doctor cannot be determined or is
61 # out of the office:
63 my $DEFAULT_PRACTITIONER = 'candroney';
65 # We need a SQL condition to identify encounter forms that will only be
66 # entered by practitioners. Yeah there's probably a better way.
68 my $PRACTITIONER_FORM =
69 "formdir = 'soap' OR formdir = 'reviewofs' OR formdir = 'ros'";
71 # For each day of the week (Sun-Sat), if the office is open:
73 my @open_days = (0, 1, 1, 1, 1, 1, 0);
75 # Office closing time:
77 my $closing_time = '17:00:00';
79 # Set this to 0 for production use:
81 my $DEBUG = 0;
83 #######################################################################
84 # Initialization #
85 #######################################################################
87 my $dbh = DBI->connect("dbi:mysql:dbname=$DBNAME", $DBUSER, $DBPASS)
88 or die $DBI::errstr;
90 $| = 1; # Turn on autoflushing of stdout.
92 #######################################################################
93 # Functions #
94 #######################################################################
96 # Write a log message.
98 sub tolog($$) {
99 my ($msg, $error) = @_;
100 my @tm = localtime; $tm[5] += 1900; $tm[4] += 1;
101 my $ts = sprintf "%04u-%02u-%02u %02u:%02u:%02u",
102 $tm[5], $tm[4], $tm[3], $tm[2], $tm[1], $tm[0];
103 if ($error) {
104 $msg = '***ERROR: ' . $msg;
106 if ($DEBUG) {
107 $msg = '*DEBUGGING* ' . $msg;
109 open LOG, ">> $INSLOG" or die "Cannot open $INSLOG: $!";
110 print LOG "$ts $msg\n";
111 close LOG;
114 # Determine if the designated doc is in the office at the specified time,
115 # or will be later that day.
117 sub is_doc_available($$) {
118 my ($practitioner, $now) = @_;
120 $now = time() if (! $now);
122 my @tm = localtime $now; $tm[5] += 1900; $tm[4] += 1;
123 my $current_date = sprintf "%04u-%02u-%02u", $tm[5], $tm[4], $tm[3];
124 my $current_time = sprintf "%02u:%02u:%02u", $tm[2], $tm[1], $tm[0];
125 my $daynow = int($now / (24 * 60 * 60));
126 my $docid = $dbh->selectrow_array("SELECT id FROM users WHERE " .
127 "username = '$practitioner'");
129 my $query = "SELECT " .
130 "pc_catid, pc_eventDate, pc_endDate, pc_recurrtype, pc_recurrspec, " .
131 "pc_startTime, pc_endTime, pc_alldayevent " .
132 "FROM openemr_postcalendar_events " .
133 "WHERE pc_aid = '$docid' AND " .
134 "( pc_catid = 2 OR pc_catid = 3 OR pc_duration >= 21600 ) AND " .
135 "pc_eventDate <= '$current_date' AND pc_endDate >= '$current_date' " .
136 "ORDER BY pc_startTime";
137 my $esth = $dbh->prepare($query)
138 or die $dbh->errstr;
139 $esth->execute() or die $esth->errstr;
141 # &tolog($query, 0); # debugging
143 my $vacation = 0;
144 my $in_active = 0;
145 my $in_until = '';
147 # Look at each event selected.
149 while (my @erow = $esth->fetchrow_array()) {
150 my ($pc_catid, $pc_eventDate, $pc_endDate, $pc_recurrtype, $pc_recurrspec,
151 $pc_startTime, $pc_endTime, $pc_alldayevent) = @erow;
152 my $repeattype = '0';
153 my $repeatfreq = '0';
154 if ($pc_recurrspec =~ /"event_repeat_freq_type";s:1:"(\d)"/) {
155 # 0 = day, 1 = week, 2 = month, 3 = year, 4 = workday
156 $repeattype = $1;
158 if ($pc_recurrspec =~ /"event_repeat_freq";s:1:"(\d)"/) {
159 # 1 = every, 2 = every other, etc.
160 $repeatfreq = $1;
163 # If this is a repeating event, determine if it applies to today.
165 if ($pc_recurrtype) {
166 $pc_eventDate =~ /^(\d+)\D(\d+)\D(\d+)/;
167 my $time0 = timelocal(1, 0, 0, $3, $2 - 1, $1 - 1900);
168 my $day0 = int($time0 / (24 * 60 * 60));
169 my $elapsed_days = $daynow - $day0;
170 my @tm0 = localtime $time0; $tm0[5] += 1900; $tm0[4] += 1;
172 if ($repeattype == 0) { # day
173 if ($repeatfreq > 1) {
174 my $quotient = sprintf('%.4f', $elapsed_days / $repeatfreq);
175 next if ($quotient != int($quotient));
178 elsif ($repeattype == 1) { # week
179 my $repdays = $repeatfreq * 7;
180 if ($repdays > 0) {
181 my $quotient = sprintf('%.4f', $elapsed_days / $repdays);
182 next if ($quotient != int($quotient));
185 elsif ($repeattype == 2) { # month
186 next if ($tm[3] != $tm0[3]); # if not same day of month
187 if ($repeatfreq > 1) {
188 my $elapsed_months = ($tm[5] - $tm0[5]) * 12 + $tm[4] - $tm0[4];
189 my $quotient = sprintf('%.4f', $elapsed_months / $repeatfreq);
190 next if ($quotient != int($quotient));
193 elsif ($repeattype == 3) { # year
194 next if ($tm[3] != $tm0[3] || $tm[4] != $tm0[4]);
195 if ($repeatfreq > 1) {
196 my $elapsed_years = $tm[5] - $tm0[5];
197 my $quotient = sprintf('%.4f', $elapsed_years / $repeatfreq);
198 next if ($quotient != int($quotient));
201 elsif ($repeattype == 4) { # work day (M-F)
202 next if ($tm0[6] == 0 || $tm0[6] == 6); # if today is not a work day
203 if ($repeatfreq > 1) {
204 my $dowdiff = $tm[6] - $tm0[6];
205 my $elapsed_workdays = ($elapsed_days - $dowdiff) * 5 / 7 + $dowdiff;
206 my $quotient = sprintf('%.4f', $elapsed_workdays / $repeatfreq);
207 next if ($quotient != int($quotient));
212 # Phew. Now we know that this event is applicable to this day.
214 if ($pc_catid == 2) { # In Office
215 $in_active = 1;
216 $in_until = '23:59:59';
218 elsif ($pc_catid == 3) { # Out of Office
219 if ($in_active) {
220 $in_until = $pc_startTime;
221 $in_active = 0;
224 else { # Vacation or equivalent
225 $vacation = 1;
229 return 1 if ($in_until && ! $vacation && $in_until gt $current_time);
230 return 0;
233 # Generate a patient note if appropriate for this top-level category.
235 sub generate_note($$$) {
236 # my ($pid, $path, $topcategory, $docid) = @_;
237 my ($pid, $path, $docid) = @_;
239 # if ($NO_NOTE_CATEGORIES{$topcategory}) {
240 # return;
243 # Get the login name of the user who entered the last clinical form for
244 # this patient. That's who we'll send the note to.
246 my $fsth = $dbh->prepare("SELECT user, groupname FROM forms WHERE " .
247 "pid = '$pid' AND ( $PRACTITIONER_FORM ) ORDER BY date DESC LIMIT 1")
248 or die $dbh->errstr;
249 $fsth->execute() or die $fsth->errstr;
250 my @frow = $fsth->fetchrow_array();
252 my $assigned_to = $DEFAULT_PRACTITIONER;
253 my $groupname = '';
254 if (@frow) {
255 $assigned_to = $frow[0];
256 $groupname = $frow[1];
258 # Check the schedule to see if this doc is in today (or will be in the
259 # next working day if it's after hours now); if not, assign the default.
261 my $now = time();
262 my @tm = localtime $now;
263 my $current_time = sprintf "%02u:%02u:%02u", $tm[2], $tm[1], $tm[0];
264 while ($open_days[$tm[6]] == 0 || $current_time gt $closing_time) {
265 $current_time = '00:00:00';
266 $now = timelocal(0, 0, 0, $tm[3], $tm[4], $tm[5]) + (24 * 60 * 60);
267 @tm = localtime $now;
269 if (! &is_doc_available($assigned_to, $now)) {
270 &tolog("$assigned_to not available, using default practitioner", 0);
271 $assigned_to = $DEFAULT_PRACTITIONER;
273 } else {
274 &tolog("Patient $pid has no clinical forms, using default practitioner", 0);
277 # Build the text of the note including timestamp and addressing.
278 # The document ID is also included, so that OpenEMR can easily
279 # look up and display the document when the note is viewed.
281 my @tm = localtime; $tm[5] += 1900; $tm[4] += 1;
282 my $body = sprintf "%04u-%02u-%02u %02u:%02u",
283 $tm[5], $tm[4], $tm[3], $tm[2], $tm[1];
284 $body .= " ($SCANNER_OPERATOR to $assigned_to) ";
285 $body .= "New scanned document $docid: $path";
287 # Write it to the database.
289 my $query = "INSERT INTO pnotes ( date, body, pid, user, groupname, " .
290 "authorized, activity, title, assigned_to ) VALUES ( " .
291 "NOW(), '$body', '$pid', '$SCANNER_OPERATOR', '$groupname', '1', '1', " .
292 "'New Document', '$assigned_to')";
293 if (! $DEBUG) {
294 $dbh->do($query) or die $query;
297 &tolog("Patient note assigned to $assigned_to", 0);
300 # Process a document file.
302 sub process_file($$) {
303 my ($path, $notify) = @_;
305 # Extract the ending filename from the path. Clean it up a bit for
306 # use as an output filename. Return if it's a leftover problem.
308 my $dname = '';
309 my $fname = $path;
310 if ($path =~ m'^(.*)/([^/]+)$') {
311 $dname = $1;
312 $fname = $2;
314 return if ($fname =~ /^ERR/);
315 $fname =~ s/[^a-zA-Z0-9_.]/_/g;
316 while ($fname =~ s/__/_/g) {}
318 # Get out if the source file is open by any other process. This
319 # normally means that it's still being written via smbd.
321 if (my $pr = `lsof -t '$INPATH/$path'`) {
322 &tolog("Temporarily skipping '$path' which is open by process $pr", 0);
323 return;
326 # Get the chart number and look up the patient's pid.
328 my $pubpid = '';
329 if ($fname =~ /^([A-Za-z0-9]+)/) {
330 $pubpid = $1;
333 my $psth = $dbh->prepare("SELECT pid FROM patient_data " .
334 "WHERE pubpid = '$pubpid' LIMIT 1")
335 or die $dbh->errstr;
336 $psth->execute() or die $psth->errstr;
337 my @prow = $psth->fetchrow_array();
339 if (! @prow) {
340 &tolog("$path: there is no patient with chart id '$pubpid'", 1);
341 rename "$INPATH/$path", "$INPATH/$dname/ERR-$fname" if (! $DEBUG);
342 return;
344 if ($psth->fetchrow_array()) {
345 &tolog("$path: there are multiple patients with chart id '$pubpid'", 1);
346 rename "$INPATH/$path", "$INPATH/$dname/ERR-$fname" if (! $DEBUG);
347 return;
349 my $pid = $prow[0];
351 # Look up the document category and get its ID.
353 my @catpath = split /\//, $dname;
354 my $catid = 0;
355 my $catname = '';
356 for (my $i = 0; $i < scalar @catpath; ++$i) {
357 $catname = $catpath[$i];
358 $catid = $dbh->selectrow_array("SELECT id FROM categories WHERE " .
359 "name = '$catname' AND parent = $catid");
361 if (! $catid) {
362 &tolog("Category '$dname' does not exist", 1);
363 rename "$INPATH/$path", "$INPATH/$dname/ERR-$fname";
364 return;
367 # Get the source file size; if zero, skip it. It appears this
368 # can be a case where the scanner software has created the
369 # directory entry but has not yet written the data, so we do
370 # not want to delete the file.
372 my $fsize = (stat("$INPATH/$path"))[7];
373 if (! $fsize) {
374 # &tolog("Deleting and skipping empty file '$path'", 1);
375 # unlink "$INPATH/$path";
376 &tolog("Skipping empty file '$path'", 1);
377 return;
380 # Make sure the target directory exists.
382 system "mkdir -p '$OUTPATH/$pid'; $CHOWN_COMMAND '$OUTPATH/$pid'" if (! $DEBUG);
384 # If the target filename exists, modify it until it doesn't.
386 my $count = 0;
387 while (-e "$OUTPATH/$pid/$fname") {
388 my $oldfname = $fname;
389 my $fsuff = '';
390 if ($fname =~ /^(.*)(\..+)$/) {
391 $fname = $1;
392 $fsuff = $2;
394 if ($count++) {
395 $fname =~ s/_\d+$//;
397 $fname .= '_' . $count . $fsuff;
398 &tolog("File '$pid/$oldfname' already exists; trying '$pid/$fname' ...", 0);
400 my $target = "$OUTPATH/$pid/$fname";
402 # Move the file to its destination and set its owner and group.
404 my $movecmd = "mv '$INPATH/$path' '$target'";
405 if (! $DEBUG) {
406 my $rc = system $movecmd;
407 if ($rc != 0) {
408 &tolog("Command '$movecmd' failed with return code $rc", 1);
409 return;
411 system "$CHOWN_COMMAND '$target'";
414 # Compute assorted values for the documents table.
416 $dbh->do("update sequences set id = id + 1") if (! $DEBUG);
417 my $newid = $dbh->selectrow_array("SELECT id FROM sequences");
418 my @tm = localtime; $tm[5] += 1900; $tm[4] += 1;
419 my $ts1 = sprintf "%04u-%02u-%02u %02u:%02u:%02u", $tm[5], $tm[4], $tm[3], $tm[2], $tm[1], $tm[0];
420 my $ts2 = $ts1;
421 $ts2 =~ s/\D//g;
422 my $mimetype = $DEBUG ? '' : `file -i $target`;
423 $mimetype =~ s/;.*$//; # remove trailing "; charset=..." if present
424 $mimetype =~ s/^.*:\s*//; # remove everything preceding the mime type
425 $mimetype =~ s/\s*$//; # remove any trailing line feed or other whitespace
426 if (! $mimetype) {
427 &tolog("Unable to determine MIME type using 'file -i $target'; proceeding with empty type", 1);
430 if (! $DEBUG) {
431 # Update the database.
433 my $query = "INSERT INTO documents ( " .
434 "id, type, size, date, url, mimetype, revision, foreign_id" .
435 " ) VALUES ( " .
436 "'$newid', 'file_url', '$fsize', '$ts1', 'file://$target', '$mimetype', '$ts2', $pid " .
437 ")";
438 $dbh->do($query) or die $query;
440 my $query = "INSERT INTO categories_to_documents ( " .
441 "category_id, document_id" .
442 " ) VALUES ( " .
443 "'$catid', '$newid' " .
444 ")";
445 $dbh->do($query) or die $query;
448 &tolog("Loaded '$path' as $mimetype", 0);
450 # Generate the patient note if appropriate for this category.
452 # &generate_note($pid, "$dname/$fname", $catpath[1], $newid);
453 &generate_note($pid, "$dname/$fname", $newid) if $notify;
456 # Scan the source directory recursively to invoke processing of each
457 # document file.
459 sub scan_dir($) {
460 my $path = shift;
461 my $notify = -f "$INPATH/$path/.notify";
462 opendir my $dh, "$INPATH/$path";
463 while (my $dirent = readdir $dh) {
464 next if ($dirent =~ /^\./);
465 my $thispath = $path ? "$path/$dirent" : $dirent;
466 if (-d "$INPATH/$thispath") {
467 &scan_dir($thispath);
468 } else {
469 &process_file($thispath, $notify);
472 closedir $dh;
475 #######################################################################
476 # Processing #
477 #######################################################################
479 &scan_dir('Categories'); # This makes everything happen.
481 #######################################################################
482 # Shutdown #
483 #######################################################################
485 $dbh->disconnect;