Bug 1893155 - Part 6: Correct constant for minimum epoch day. r=spidermonkey-reviewer...
[gecko.git] / third_party / content_analysis_sdk / demo / handler.h
blob88599963c51b013ce9c8622563b60f3cd8281852
1 // Copyright 2022 The Chromium Authors.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #ifndef CONTENT_ANALYSIS_DEMO_HANDLER_H_
6 #define CONTENT_ANALYSIS_DEMO_HANDLER_H_
8 #include <time.h>
10 #include <algorithm>
11 #include <atomic>
12 #include <chrono>
13 #include <cstdio>
14 #include <fstream>
15 #include <iostream>
16 #include <optional>
17 #include <thread>
18 #include <utility>
19 #include <regex>
20 #include <vector>
22 #include "content_analysis/sdk/analysis_agent.h"
23 #include "demo/atomic_output.h"
24 #include "demo/request_queue.h"
26 using RegexArray = std::vector<std::pair<std::string, std::regex>>;
28 // An AgentEventHandler that dumps requests information to stdout and blocks
29 // any requests that have the keyword "block" in their data
30 class Handler : public content_analysis::sdk::AgentEventHandler {
31 public:
32 using Event = content_analysis::sdk::ContentAnalysisEvent;
34 Handler(std::vector<unsigned long>&& delays, const std::string& print_data_file_path,
35 RegexArray&& toBlock = RegexArray(),
36 RegexArray&& toWarn = RegexArray(),
37 RegexArray&& toReport = RegexArray()) :
38 toBlock_(std::move(toBlock)), toWarn_(std::move(toWarn)), toReport_(std::move(toReport)),
39 delays_(std::move(delays)), print_data_file_path_(print_data_file_path) {}
41 const std::vector<unsigned long> delays() { return delays_; }
42 size_t nextDelayIndex() const { return nextDelayIndex_; }
44 protected:
45 // subclasses can override this
46 // returns whether the response has been set
47 virtual bool SetCustomResponse(AtomicCout& aout, std::unique_ptr<Event>& event) {
48 return false;
50 // subclasses can override this
51 // returns whether the response has been sent
52 virtual bool SendCustomResponse(std::unique_ptr<Event>& event) {
53 return false;
55 // Analyzes one request from Google Chrome and responds back to the browser
56 // with either an allow or block verdict.
57 void AnalyzeContent(AtomicCout& aout, std::unique_ptr<Event> event) {
58 // An event represents one content analysis request and response triggered
59 // by a user action in Google Chrome. The agent determines whether the
60 // user is allowed to perform the action by examining event->GetRequest().
61 // The verdict, which can be "allow" or "block" is written into
62 // event->GetResponse().
64 DumpEvent(aout.stream(), event.get());
66 bool success = true;
67 std::optional<content_analysis::sdk::ContentAnalysisResponse_Result_TriggeredRule_Action> caResponse;
68 bool setResponse = SetCustomResponse(aout, event);
69 if (!setResponse) {
70 caResponse = content_analysis::sdk::ContentAnalysisResponse_Result_TriggeredRule_Action_BLOCK;
71 if (event->GetRequest().has_text_content()) {
72 caResponse = DecideCAResponse(
73 event->GetRequest().text_content(), aout.stream());
74 } else if (event->GetRequest().has_file_path()) {
75 // TODO: Fix downloads to store file *first* so we can check contents.
76 // Until then, just check the file name:
77 caResponse = DecideCAResponse(
78 event->GetRequest().file_path(), aout.stream());
79 } else if (event->GetRequest().has_print_data()) {
80 // In the case of print request, normally the PDF bytes would be parsed
81 // for sensitive data violations. To keep this class simple, only the
82 // URL is checked for the word "block".
83 caResponse = DecideCAResponse(event->GetRequest().request_data().url(), aout.stream());
87 if (!success) {
88 content_analysis::sdk::UpdateResponse(
89 event->GetResponse(),
90 std::string(),
91 content_analysis::sdk::ContentAnalysisResponse::Result::FAILURE);
92 aout.stream() << " Verdict: failed to reach verdict: ";
93 aout.stream() << event->DebugString() << std::endl;
94 } else {
95 aout.stream() << " Verdict: ";
96 if (caResponse) {
97 switch (caResponse.value()) {
98 case content_analysis::sdk::ContentAnalysisResponse_Result_TriggeredRule_Action_BLOCK:
99 aout.stream() << "BLOCK";
100 break;
101 case content_analysis::sdk::ContentAnalysisResponse_Result_TriggeredRule_Action_WARN:
102 aout.stream() << "WARN";
103 break;
104 case content_analysis::sdk::ContentAnalysisResponse_Result_TriggeredRule_Action_REPORT_ONLY:
105 aout.stream() << "REPORT_ONLY";
106 break;
107 case content_analysis::sdk::ContentAnalysisResponse_Result_TriggeredRule_Action_ACTION_UNSPECIFIED:
108 aout.stream() << "ACTION_UNSPECIFIED";
109 break;
110 default:
111 aout.stream() << "<error>";
112 break;
114 auto rc =
115 content_analysis::sdk::SetEventVerdictTo(event.get(), caResponse.value());
116 if (rc != content_analysis::sdk::ResultCode::OK) {
117 aout.stream() << " error: "
118 << content_analysis::sdk::ResultCodeToString(rc) << std::endl;
119 aout.stream() << " " << event->DebugString() << std::endl;
121 aout.stream() << std::endl;
122 } else {
123 aout.stream() << " Verdict: allow" << std::endl;
125 aout.stream() << std::endl;
127 aout.stream() << std::endl;
129 // If a delay is specified, wait that much.
130 size_t nextDelayIndex = nextDelayIndex_.fetch_add(1);
131 unsigned long delay = delays_[nextDelayIndex % delays_.size()];
132 if (delay > 0) {
133 aout.stream() << "Delaying response to " << event->GetRequest().request_token()
134 << " for " << delay << "s" << std::endl<< std::endl;
135 aout.flush();
136 std::this_thread::sleep_for(std::chrono::seconds(delay));
139 // Send the response back to Google Chrome.
140 bool sentCustomResponse = SendCustomResponse(event);
141 if (!sentCustomResponse) {
142 auto rc = event->Send();
143 if (rc != content_analysis::sdk::ResultCode::OK) {
144 aout.stream() << "[Demo] Error sending response: "
145 << content_analysis::sdk::ResultCodeToString(rc)
146 << std::endl;
147 aout.stream() << event->DebugString() << std::endl;
152 protected:
153 void OnBrowserConnected(
154 const content_analysis::sdk::BrowserInfo& info) override {
155 AtomicCout aout;
156 aout.stream() << std::endl << "==========" << std::endl;
157 aout.stream() << "Browser connected pid=" << info.pid
158 << " path=" << info.binary_path << std::endl;
161 void OnBrowserDisconnected(
162 const content_analysis::sdk::BrowserInfo& info) override {
163 AtomicCout aout;
164 aout.stream() << std::endl << "Browser disconnected pid=" << info.pid << std::endl;
165 aout.stream() << "==========" << std::endl;
168 void OnAnalysisRequested(std::unique_ptr<Event> event) override {
169 // If the agent is capable of analyzing content in the background, the
170 // events may be handled in background threads. Having said that, a
171 // event should not be assumed to be thread safe, that is, it should not
172 // be accessed by more than one thread concurrently.
174 // In this example code, the event is handled synchronously.
175 AtomicCout aout;
176 aout.stream() << std::endl << "----------" << std::endl << std::endl;
177 AnalyzeContent(aout, std::move(event));
180 void OnResponseAcknowledged(
181 const content_analysis::sdk::ContentAnalysisAcknowledgement&
182 ack) override {
183 const char* final_action = "<Unknown>";
184 if (ack.has_final_action()) {
185 switch (ack.final_action()) {
186 case content_analysis::sdk::ContentAnalysisAcknowledgement::ACTION_UNSPECIFIED:
187 final_action = "<Unspecified>";
188 break;
189 case content_analysis::sdk::ContentAnalysisAcknowledgement::ALLOW:
190 final_action = "Allow";
191 break;
192 case content_analysis::sdk::ContentAnalysisAcknowledgement::REPORT_ONLY:
193 final_action = "Report only";
194 break;
195 case content_analysis::sdk::ContentAnalysisAcknowledgement::WARN:
196 final_action = "Warn";
197 break;
198 case content_analysis::sdk::ContentAnalysisAcknowledgement::BLOCK:
199 final_action = "Block";
200 break;
204 AtomicCout aout;
205 aout.stream() << " Ack: " << ack.request_token() << std::endl;
206 aout.stream() << " Final action: " << final_action << std::endl;
208 void OnCancelRequests(
209 const content_analysis::sdk::ContentAnalysisCancelRequests& cancel)
210 override {
211 AtomicCout aout;
212 aout.stream() << "Cancel: " << std::endl;
213 aout.stream() << " User action ID: " << cancel.user_action_id() << std::endl;
216 void OnInternalError(
217 const char* context,
218 content_analysis::sdk::ResultCode error) override {
219 AtomicCout aout;
220 aout.stream() << std::endl
221 << "*ERROR*: context=\"" << context << "\" "
222 << content_analysis::sdk::ResultCodeToString(error)
223 << std::endl;
226 void DumpEvent(std::stringstream& stream, Event* event) {
227 time_t now = time(nullptr);
228 stream << "Received at: " << ctime(&now); // Includes \n.
229 stream << "Received from: pid=" << event->GetBrowserInfo().pid
230 << " path=" << event->GetBrowserInfo().binary_path << std::endl;
232 const content_analysis::sdk::ContentAnalysisRequest& request =
233 event->GetRequest();
234 std::string connector = "<Unknown>";
235 if (request.has_analysis_connector()) {
236 switch (request.analysis_connector()) {
237 case content_analysis::sdk::FILE_DOWNLOADED:
238 connector = "download";
239 break;
240 case content_analysis::sdk::FILE_ATTACHED:
241 connector = "attach";
242 break;
243 case content_analysis::sdk::BULK_DATA_ENTRY:
244 connector = "bulk-data-entry";
245 break;
246 case content_analysis::sdk::PRINT:
247 connector = "print";
248 break;
249 case content_analysis::sdk::FILE_TRANSFER:
250 connector = "file-transfer";
251 break;
252 default:
253 break;
256 std::string reason;
257 if (request.has_reason()) {
258 using content_analysis::sdk::ContentAnalysisRequest;
259 switch (request.reason()) {
260 case content_analysis::sdk::ContentAnalysisRequest::UNKNOWN:
261 reason = "<Unknown>";
262 break;
263 case content_analysis::sdk::ContentAnalysisRequest::CLIPBOARD_PASTE:
264 reason = "CLIPBOARD_PASTE";
265 break;
266 case content_analysis::sdk::ContentAnalysisRequest::DRAG_AND_DROP:
267 reason = "DRAG_AND_DROP";
268 break;
269 case content_analysis::sdk::ContentAnalysisRequest::FILE_PICKER_DIALOG:
270 reason = "FILE_PICKER_DIALOG";
271 break;
272 case content_analysis::sdk::ContentAnalysisRequest::PRINT_PREVIEW_PRINT:
273 reason = "PRINT_PREVIEW_PRINT";
274 break;
275 case content_analysis::sdk::ContentAnalysisRequest::SYSTEM_DIALOG_PRINT:
276 reason = "SYSTEM_DIALOG_PRINT";
277 break;
278 case content_analysis::sdk::ContentAnalysisRequest::NORMAL_DOWNLOAD:
279 reason = "NORMAL_DOWNLOAD";
280 break;
281 case content_analysis::sdk::ContentAnalysisRequest::SAVE_AS_DOWNLOAD:
282 reason = "SAVE_AS_DOWNLOAD";
283 break;
287 std::string url =
288 request.has_request_data() && request.request_data().has_url()
289 ? request.request_data().url() : "<No URL>";
291 std::string tab_title =
292 request.has_request_data() && request.request_data().has_tab_title()
293 ? request.request_data().tab_title() : "<No tab title>";
295 std::string filename =
296 request.has_request_data() && request.request_data().has_filename()
297 ? request.request_data().filename() : "<No filename>";
299 std::string digest =
300 request.has_request_data() && request.request_data().has_digest()
301 ? request.request_data().digest() : "<No digest>";
303 std::string file_path =
304 request.has_file_path()
305 ? request.file_path() : "None, bulk text entry or print";
307 std::string machine_user =
308 request.has_client_metadata() &&
309 request.client_metadata().has_browser() &&
310 request.client_metadata().browser().has_machine_user()
311 ? request.client_metadata().browser().machine_user() : "<No machine user>";
313 std::string email =
314 request.has_request_data() && request.request_data().has_email()
315 ? request.request_data().email() : "<No email>";
317 time_t t = request.expires_at();
318 std::string expires_at_str = ctime(&t);
319 // Returned string includes trailing \n, overwrite with null.
320 expires_at_str[expires_at_str.size() - 1] = 0;
321 time_t secs_remaining = t - now;
323 std::string user_action_id = request.has_user_action_id()
324 ? request.user_action_id() : "<No user action id>";
326 stream << "Request: " << request.request_token() << std::endl;
327 stream << " User action ID: " << user_action_id << std::endl;
328 stream << " Expires at: " << expires_at_str << " ("
329 << secs_remaining << " seconds from now)" << std::endl;
330 stream << " Connector: " << connector << std::endl;
331 if (!reason.empty()) {
332 stream << " Reason: " << reason << std::endl;
334 stream << " URL: " << url << std::endl;
335 stream << " Tab title: " << tab_title << std::endl;
336 stream << " Filename: " << filename << std::endl;
337 stream << " Digest: " << digest << std::endl;
338 stream << " Filepath: " << file_path << std::endl;
339 stream << " Machine user: " << machine_user << std::endl;
340 stream << " Email: " << email << std::endl;
342 if (request.has_text_content() && !request.text_content().empty()) {
343 std::string prefix = " Pasted data: ";
344 std::string text_content = request.text_content();
346 // Truncate the text past 50 bytes to keep it to a reasonable length in
347 // the terminal window.
348 if (text_content.size() > 50) {
349 prefix = " Pasted data (truncated): ";
350 text_content = text_content.substr(0, 50) + "...";
352 stream << prefix
353 << text_content
354 << std::endl;
355 stream << " Pasted data size (bytes): "
356 << request.text_content().size()
357 << std::endl;
360 if (request.has_print_data() && !print_data_file_path_.empty()) {
361 if (request.request_data().has_print_metadata() &&
362 request.request_data().print_metadata().has_printer_name()) {
363 stream << " Printer name: "
364 << request.request_data().print_metadata().printer_name()
365 << std::endl;
366 } else {
367 stream << " No printer name in request" << std::endl;
370 stream << " Print data saved to: " << print_data_file_path_
371 << std::endl;
372 using content_analysis::sdk::ContentAnalysisEvent;
373 auto print_data =
374 content_analysis::sdk::CreateScopedPrintHandle(event->GetRequest(),
375 event->GetBrowserInfo().pid);
376 std::ofstream file(print_data_file_path_,
377 std::ios::out | std::ios::trunc | std::ios::binary);
378 file.write(print_data->data(), print_data->size());
379 file.flush();
380 file.close();
384 bool ReadContentFromFile(const std::string& file_path,
385 std::string* content) {
386 std::ifstream file(file_path,
387 std::ios::in | std::ios::binary | std::ios::ate);
388 if (!file.is_open())
389 return false;
391 // Get file size. This example does not handle files larger than 1MB.
392 // Make sure content string can hold the contents of the file.
393 int size = file.tellg();
394 if (size > 1024 * 1024)
395 return false;
397 content->resize(size + 1);
399 // Read file into string.
400 file.seekg(0, std::ios::beg);
401 file.read(&(*content)[0], size);
402 content->at(size) = 0;
403 return true;
406 std::optional<content_analysis::sdk::ContentAnalysisResponse_Result_TriggeredRule_Action>
407 DecideCAResponse(const std::string& content, std::stringstream& stream) {
408 for (auto& r : toBlock_) {
409 if (std::regex_search(content, r.second)) {
410 stream << "'" << content << "' matches BLOCK regex '"
411 << r.first << "'" << std::endl;
412 return content_analysis::sdk::ContentAnalysisResponse_Result_TriggeredRule_Action_BLOCK;
415 for (auto& r : toWarn_) {
416 if (std::regex_search(content, r.second)) {
417 stream << "'" << content << "' matches WARN regex '"
418 << r.first << "'" << std::endl;
419 return content_analysis::sdk::ContentAnalysisResponse_Result_TriggeredRule_Action_WARN;
422 for (auto& r : toReport_) {
423 if (std::regex_search(content, r.second)) {
424 stream << "'" << content << "' matches REPORT_ONLY regex '"
425 << r.first << "'" << std::endl;
426 return content_analysis::sdk::ContentAnalysisResponse_Result_TriggeredRule_Action_REPORT_ONLY;
429 stream << "'" << content << "' was ALLOWed\n";
430 return {};
433 // For the demo, block any content that matches these wildcards.
434 RegexArray toBlock_;
435 RegexArray toWarn_;
436 RegexArray toReport_;
438 std::vector<unsigned long> delays_;
439 std::atomic<size_t> nextDelayIndex_;
440 std::string print_data_file_path_;
443 // An AgentEventHandler that dumps requests information to stdout and blocks
444 // any requests that have the keyword "block" in their data
445 class QueuingHandler : public Handler {
446 public:
447 QueuingHandler(unsigned long threads, std::vector<unsigned long>&& delays, const std::string& print_data_file_path,
448 RegexArray&& toBlock = RegexArray(),
449 RegexArray&& toWarn = RegexArray(),
450 RegexArray&& toReport = RegexArray())
451 : Handler(std::move(delays), print_data_file_path, std::move(toBlock), std::move(toWarn), std::move(toReport)) {
452 StartBackgroundThreads(threads);
455 ~QueuingHandler() override {
456 // Abort background process and wait for it to finish.
457 request_queue_.abort();
458 WaitForBackgroundThread();
461 private:
462 void OnAnalysisRequested(std::unique_ptr<Event> event) override {
464 time_t now = time(nullptr);
465 const content_analysis::sdk::ContentAnalysisRequest& request =
466 event->GetRequest();
467 AtomicCout aout;
468 aout.stream() << std::endl << "Queuing request: " << request.request_token()
469 << " at " << ctime(&now) << std::endl;
472 request_queue_.push(std::move(event));
475 static void* ProcessRequests(void* qh) {
476 QueuingHandler* handler = reinterpret_cast<QueuingHandler*>(qh);
478 while (true) {
479 auto event = handler->request_queue_.pop();
480 if (!event)
481 break;
483 AtomicCout aout;
484 aout.stream() << std::endl << "----------" << std::endl;
485 aout.stream() << "Thread: " << std::this_thread::get_id()
486 << std::endl;
487 aout.stream() << "Delaying request processing for "
488 << handler->delays()[handler->nextDelayIndex() % handler->delays().size()] << "s" << std::endl << std::endl;
489 aout.flush();
491 handler->AnalyzeContent(aout, std::move(event));
494 return 0;
497 // A list of outstanding content analysis requests.
498 RequestQueue request_queue_;
500 void StartBackgroundThreads(unsigned long threads) {
501 threads_.reserve(threads);
502 for (unsigned long i = 0; i < threads; ++i) {
503 threads_.emplace_back(std::make_unique<std::thread>(ProcessRequests, this));
507 void WaitForBackgroundThread() {
508 for (auto& thread : threads_) {
509 thread->join();
513 // Thread id of backgrond thread.
514 std::vector<std::unique_ptr<std::thread>> threads_;
517 #endif // CONTENT_ANALYSIS_DEMO_HANDLER_H_