Bug 1874684 - Part 10: Replace BigInt with Int128 in RoundNumberToIncrement. r=mgaudet
[gecko.git] / third_party / content_analysis_sdk / agent_improvements.patch
blobc1475caded39b9345e6d75ce270d204206c21f68
1 commit 4ad63eb3aa65ce7baa08190aac2770540dc25f43
2 Author: Greg Stoll <gstoll@mozilla.com>
3 Date: Wed, 27 Mar 2024 12:13:56 -0500
5 Mozilla improvements to content_analysis_sdk
7 - add ability for demo agent to block/warn/report specific regexes
8 - add ability for demo agent to chose a sequence of delays to apply
10 diff --git a/CMakeLists.txt b/CMakeLists.txt
11 index 39477223f031c..5dacc81031117 100644
12 --- a/CMakeLists.txt
13 +++ b/CMakeLists.txt
14 @@ -203,6 +203,7 @@ add_executable(agent
15 ./demo/agent.cc
16 ./demo/handler.h
18 +target_compile_features(agent PRIVATE cxx_std_17)
19 target_include_directories(agent PRIVATE ${AGENT_INCLUDES})
20 target_link_libraries(agent PRIVATE cac_agent)
22 diff --git a/agent/src/event_win.h b/agent/src/event_win.h
23 index 9f8b6903566f2..f631f693dcd9c 100644
24 --- a/agent/src/event_win.h
25 +++ b/agent/src/event_win.h
26 @@ -28,6 +28,12 @@ class ContentAnalysisEventWin : public ContentAnalysisEventBase {
27 ResultCode Close() override;
28 ResultCode Send() override;
29 std::string DebugString() const override;
30 + std::string SerializeStringToSendToBrowser() {
31 + return agent_to_chrome()->SerializeAsString();
32 + }
33 + void SetResponseSent() { response_sent_ = true; }
35 + HANDLE Pipe() const { return hPipe_; }
37 private:
38 void Shutdown();
39 diff --git a/browser/src/client_win.cc b/browser/src/client_win.cc
40 index 9d3d7e8c52662..039946d131398 100644
41 --- a/browser/src/client_win.cc
42 +++ b/browser/src/client_win.cc
43 @@ -418,7 +418,11 @@ DWORD ClientWin::ConnectToPipe(const std::string& pipename, HANDLE* handle) {
45 void ClientWin::Shutdown() {
46 if (hPipe_ != INVALID_HANDLE_VALUE) {
47 - FlushFileBuffers(hPipe_);
48 + // TODO: This trips the LateWriteObserver. We could move this earlier
49 + // (before the LateWriteObserver is created) or just remove it, although
50 + // the later could mean an ACK message is not processed by the agent
51 + // in time.
52 + // FlushFileBuffers(hPipe_);
53 CloseHandle(hPipe_);
54 hPipe_ = INVALID_HANDLE_VALUE;
56 diff --git a/demo/agent.cc b/demo/agent.cc
57 index ff8b93f647ebd..3e168b0915a0c 100644
58 --- a/demo/agent.cc
59 +++ b/demo/agent.cc
60 @@ -2,12 +2,18 @@
61 // Use of this source code is governed by a BSD-style license that can be
62 // found in the LICENSE file.
64 +#include <algorithm>
65 #include <fstream>
66 #include <iostream>
67 #include <string>
68 +#include <regex>
69 +#include <vector>
71 #include "content_analysis/sdk/analysis_agent.h"
72 #include "demo/handler.h"
73 +#include "demo/handler_misbehaving.h"
75 +using namespace content_analysis::sdk;
77 // Different paths are used depending on whether this agent should run as a
78 // use specific agent or not. These values are chosen to match the test
79 @@ -19,19 +25,50 @@ constexpr char kPathSystem[] = "brcm_chrm_cas";
80 std::string path = kPathSystem;
81 bool use_queue = false;
82 bool user_specific = false;
83 -unsigned long delay = 0; // In seconds.
84 +std::vector<unsigned long> delays = {0}; // In seconds.
85 unsigned long num_threads = 8u;
86 std::string save_print_data_path = "";
87 +RegexArray toBlock, toWarn, toReport;
88 +static bool useMisbehavingHandler = false;
89 +static std::string modeStr;
91 // Command line parameters.
92 -constexpr const char* kArgDelaySpecific = "--delay=";
93 +constexpr const char* kArgDelaySpecific = "--delays=";
94 constexpr const char* kArgPath = "--path=";
95 constexpr const char* kArgQueued = "--queued";
96 constexpr const char* kArgThreads = "--threads=";
97 constexpr const char* kArgUserSpecific = "--user";
98 +constexpr const char* kArgToBlock = "--toblock=";
99 +constexpr const char* kArgToWarn = "--towarn=";
100 +constexpr const char* kArgToReport = "--toreport=";
101 +constexpr const char* kArgMisbehave = "--misbehave=";
102 constexpr const char* kArgHelp = "--help";
103 constexpr const char* kArgSavePrintRequestDataTo = "--save-print-request-data-to=";
105 +std::map<std::string, Mode> sStringToMode = {
106 +#define AGENT_MODE(name) {#name, Mode::Mode_##name},
107 +#include "modes.h"
108 +#undef AGENT_MODE
111 +std::map<Mode, std::string> sModeToString = {
112 +#define AGENT_MODE(name) {Mode::Mode_##name, #name},
113 +#include "modes.h"
114 +#undef AGENT_MODE
117 +std::vector<std::pair<std::string, std::regex>>
118 +ParseRegex(const std::string str) {
119 + std::vector<std::pair<std::string, std::regex>> ret;
120 + for (auto it = str.begin(); it != str.end(); /* nop */) {
121 + auto it2 = std::find(it, str.end(), ',');
122 + ret.push_back(std::make_pair(std::string(it, it2), std::regex(it, it2)));
123 + it = it2 == str.end() ? it2 : it2 + 1;
126 + return ret;
129 bool ParseCommandLine(int argc, char* argv[]) {
130 for (int i = 1; i < argc; ++i) {
131 const std::string arg = argv[i];
132 @@ -44,16 +81,38 @@ bool ParseCommandLine(int argc, char* argv[]) {
133 path = kPathUser;
134 user_specific = true;
135 } else if (arg.find(kArgDelaySpecific) == 0) {
136 - delay = std::stoul(arg.substr(strlen(kArgDelaySpecific)));
137 + std::string delaysStr = arg.substr(strlen(kArgDelaySpecific));
138 + delays.clear();
139 + size_t posStart = 0, posEnd;
140 + unsigned long delay;
141 + while ((posEnd = delaysStr.find(',', posStart)) != std::string::npos) {
142 + delay = std::stoul(delaysStr.substr(posStart, posEnd - posStart));
143 + if (delay > 30) {
144 + delay = 30;
146 + delays.push_back(delay);
147 + posStart = posEnd + 1;
149 + delay = std::stoul(delaysStr.substr(posStart));
150 if (delay > 30) {
151 delay = 30;
153 + delays.push_back(delay);
154 } else if (arg.find(kArgPath) == 0) {
155 path = arg.substr(strlen(kArgPath));
156 } else if (arg.find(kArgQueued) == 0) {
157 use_queue = true;
158 } else if (arg.find(kArgThreads) == 0) {
159 num_threads = std::stoul(arg.substr(strlen(kArgThreads)));
160 + } else if (arg.find(kArgToBlock) == 0) {
161 + toBlock = ParseRegex(arg.substr(strlen(kArgToBlock)));
162 + } else if (arg.find(kArgToWarn) == 0) {
163 + toWarn = ParseRegex(arg.substr(strlen(kArgToWarn)));
164 + } else if (arg.find(kArgToReport) == 0) {
165 + toReport = ParseRegex(arg.substr(strlen(kArgToReport)));
166 + } else if (arg.find(kArgMisbehave) == 0) {
167 + modeStr = arg.substr(strlen(kArgMisbehave));
168 + useMisbehavingHandler = true;
169 } else if (arg.find(kArgHelp) == 0) {
170 return false;
171 } else if (arg.find(kArgSavePrintRequestDataTo) == 0) {
172 @@ -72,13 +131,17 @@ void PrintHelp() {
173 << "A simple agent to process content analysis requests." << std::endl
174 << "Data containing the string 'block' blocks the request data from being used." << std::endl
175 << std::endl << "Options:" << std::endl
176 - << kArgDelaySpecific << "<delay> : Add a delay to request processing in seconds (max 30)." << std::endl
177 + << kArgDelaySpecific << "<delay1,delay2,...> : Add delays to request processing in seconds. Delays are limited to 30 seconds and are applied round-robin to requests. Default is 0." << std::endl
178 << kArgPath << " <path> : Used the specified path instead of default. Must come after --user." << std::endl
179 << kArgQueued << " : Queue requests for processing in a background thread" << std::endl
180 << kArgThreads << " : When queued, number of threads in the request processing thread pool" << std::endl
181 << kArgUserSpecific << " : Make agent OS user specific." << std::endl
182 << kArgHelp << " : prints this help message" << std::endl
183 - << kArgSavePrintRequestDataTo << " : saves the PDF data to the given file path for print requests";
184 + << kArgSavePrintRequestDataTo << " : saves the PDF data to the given file path for print requests" << std::endl
185 + << kArgToBlock << "<regex> : Regular expression matching file and text content to block." << std::endl
186 + << kArgToWarn << "<regex> : Regular expression matching file and text content to warn about." << std::endl
187 + << kArgToReport << "<regex> : Regular expression matching file and text content to report." << std::endl
188 + << kArgMisbehave << "<mode> : Use 'misbehaving' agent in given mode for testing purposes." << std::endl;
191 int main(int argc, char* argv[]) {
192 @@ -87,9 +150,17 @@ int main(int argc, char* argv[]) {
193 return 1;
196 - auto handler = use_queue
197 - ? std::make_unique<QueuingHandler>(num_threads, delay, save_print_data_path)
198 - : std::make_unique<Handler>(delay, save_print_data_path);
199 + auto handler =
200 + useMisbehavingHandler
201 + ? MisbehavingHandler::Create(modeStr, std::move(delays), save_print_data_path, std::move(toBlock), std::move(toWarn), std::move(toReport))
202 + : use_queue
203 + ? std::make_unique<QueuingHandler>(num_threads, std::move(delays), save_print_data_path, std::move(toBlock), std::move(toWarn), std::move(toReport))
204 + : std::make_unique<Handler>(std::move(delays), save_print_data_path, std::move(toBlock), std::move(toWarn), std::move(toReport));
206 + if (!handler) {
207 + std::cout << "[Demo] Failed to construct handler." << std::endl;
208 + return 1;
211 // Each agent uses a unique name to identify itself with Google Chrome.
212 content_analysis::sdk::ResultCode rc;
213 diff --git a/demo/handler.h b/demo/handler.h
214 index 9d1ccfdf9857a..88599963c51b0 100644
215 --- a/demo/handler.h
216 +++ b/demo/handler.h
217 @@ -7,31 +7,51 @@
219 #include <time.h>
221 +#include <algorithm>
222 +#include <atomic>
223 #include <chrono>
224 #include <cstdio>
225 #include <fstream>
226 #include <iostream>
227 +#include <optional>
228 #include <thread>
229 #include <utility>
230 +#include <regex>
231 #include <vector>
233 #include "content_analysis/sdk/analysis_agent.h"
234 #include "demo/atomic_output.h"
235 #include "demo/request_queue.h"
237 +using RegexArray = std::vector<std::pair<std::string, std::regex>>;
239 // An AgentEventHandler that dumps requests information to stdout and blocks
240 // any requests that have the keyword "block" in their data
241 class Handler : public content_analysis::sdk::AgentEventHandler {
242 public:
243 using Event = content_analysis::sdk::ContentAnalysisEvent;
245 - Handler(unsigned long delay, const std::string& print_data_file_path) :
246 - delay_(delay), print_data_file_path_(print_data_file_path) {
248 + Handler(std::vector<unsigned long>&& delays, const std::string& print_data_file_path,
249 + RegexArray&& toBlock = RegexArray(),
250 + RegexArray&& toWarn = RegexArray(),
251 + RegexArray&& toReport = RegexArray()) :
252 + toBlock_(std::move(toBlock)), toWarn_(std::move(toWarn)), toReport_(std::move(toReport)),
253 + delays_(std::move(delays)), print_data_file_path_(print_data_file_path) {}
255 - unsigned long delay() { return delay_; }
256 + const std::vector<unsigned long> delays() { return delays_; }
257 + size_t nextDelayIndex() const { return nextDelayIndex_; }
259 protected:
260 + // subclasses can override this
261 + // returns whether the response has been set
262 + virtual bool SetCustomResponse(AtomicCout& aout, std::unique_ptr<Event>& event) {
263 + return false;
265 + // subclasses can override this
266 + // returns whether the response has been sent
267 + virtual bool SendCustomResponse(std::unique_ptr<Event>& event) {
268 + return false;
270 // Analyzes one request from Google Chrome and responds back to the browser
271 // with either an allow or block verdict.
272 void AnalyzeContent(AtomicCout& aout, std::unique_ptr<Event> event) {
273 @@ -43,29 +63,25 @@ class Handler : public content_analysis::sdk::AgentEventHandler {
275 DumpEvent(aout.stream(), event.get());
277 - bool block = false;
278 bool success = true;
279 - unsigned long delay = delay_;
281 - if (event->GetRequest().has_text_content()) {
282 - block = ShouldBlockRequest(
283 - event->GetRequest().text_content());
284 - GetFileSpecificDelay(event->GetRequest().text_content(), &delay);
285 - } else if (event->GetRequest().has_file_path()) {
286 - std::string content;
287 - success =
288 - ReadContentFromFile(event->GetRequest().file_path(),
289 - &content);
290 - if (success) {
291 - block = ShouldBlockRequest(content);
292 - GetFileSpecificDelay(content, &delay);
293 + std::optional<content_analysis::sdk::ContentAnalysisResponse_Result_TriggeredRule_Action> caResponse;
294 + bool setResponse = SetCustomResponse(aout, event);
295 + if (!setResponse) {
296 + caResponse = content_analysis::sdk::ContentAnalysisResponse_Result_TriggeredRule_Action_BLOCK;
297 + if (event->GetRequest().has_text_content()) {
298 + caResponse = DecideCAResponse(
299 + event->GetRequest().text_content(), aout.stream());
300 + } else if (event->GetRequest().has_file_path()) {
301 + // TODO: Fix downloads to store file *first* so we can check contents.
302 + // Until then, just check the file name:
303 + caResponse = DecideCAResponse(
304 + event->GetRequest().file_path(), aout.stream());
305 + } else if (event->GetRequest().has_print_data()) {
306 + // In the case of print request, normally the PDF bytes would be parsed
307 + // for sensitive data violations. To keep this class simple, only the
308 + // URL is checked for the word "block".
309 + caResponse = DecideCAResponse(event->GetRequest().request_data().url(), aout.stream());
311 - } else if (event->GetRequest().has_print_data()) {
312 - // In the case of print request, normally the PDF bytes would be parsed
313 - // for sensitive data violations. To keep this class simple, only the
314 - // URL is checked for the word "block".
315 - block = ShouldBlockRequest(event->GetRequest().request_data().url());
316 - GetFileSpecificDelay(event->GetRequest().request_data().url(), &delay);
319 if (!success) {
320 @@ -75,22 +91,44 @@ class Handler : public content_analysis::sdk::AgentEventHandler {
321 content_analysis::sdk::ContentAnalysisResponse::Result::FAILURE);
322 aout.stream() << " Verdict: failed to reach verdict: ";
323 aout.stream() << event->DebugString() << std::endl;
324 - } else if (block) {
325 - auto rc = content_analysis::sdk::SetEventVerdictToBlock(event.get());
326 - aout.stream() << " Verdict: block";
327 - if (rc != content_analysis::sdk::ResultCode::OK) {
328 - aout.stream() << " error: "
329 - << content_analysis::sdk::ResultCodeToString(rc) << std::endl;
330 - aout.stream() << " " << event->DebugString() << std::endl;
331 + } else {
332 + aout.stream() << " Verdict: ";
333 + if (caResponse) {
334 + switch (caResponse.value()) {
335 + case content_analysis::sdk::ContentAnalysisResponse_Result_TriggeredRule_Action_BLOCK:
336 + aout.stream() << "BLOCK";
337 + break;
338 + case content_analysis::sdk::ContentAnalysisResponse_Result_TriggeredRule_Action_WARN:
339 + aout.stream() << "WARN";
340 + break;
341 + case content_analysis::sdk::ContentAnalysisResponse_Result_TriggeredRule_Action_REPORT_ONLY:
342 + aout.stream() << "REPORT_ONLY";
343 + break;
344 + case content_analysis::sdk::ContentAnalysisResponse_Result_TriggeredRule_Action_ACTION_UNSPECIFIED:
345 + aout.stream() << "ACTION_UNSPECIFIED";
346 + break;
347 + default:
348 + aout.stream() << "<error>";
349 + break;
351 + auto rc =
352 + content_analysis::sdk::SetEventVerdictTo(event.get(), caResponse.value());
353 + if (rc != content_analysis::sdk::ResultCode::OK) {
354 + aout.stream() << " error: "
355 + << content_analysis::sdk::ResultCodeToString(rc) << std::endl;
356 + aout.stream() << " " << event->DebugString() << std::endl;
358 + aout.stream() << std::endl;
359 + } else {
360 + aout.stream() << " Verdict: allow" << std::endl;
362 aout.stream() << std::endl;
363 - } else {
364 - aout.stream() << " Verdict: allow" << std::endl;
367 aout.stream() << std::endl;
369 // If a delay is specified, wait that much.
370 + size_t nextDelayIndex = nextDelayIndex_.fetch_add(1);
371 + unsigned long delay = delays_[nextDelayIndex % delays_.size()];
372 if (delay > 0) {
373 aout.stream() << "Delaying response to " << event->GetRequest().request_token()
374 << " for " << delay << "s" << std::endl<< std::endl;
375 @@ -99,16 +137,19 @@ class Handler : public content_analysis::sdk::AgentEventHandler {
378 // Send the response back to Google Chrome.
379 - auto rc = event->Send();
380 - if (rc != content_analysis::sdk::ResultCode::OK) {
381 - aout.stream() << "[Demo] Error sending response: "
382 - << content_analysis::sdk::ResultCodeToString(rc)
383 - << std::endl;
384 - aout.stream() << event->DebugString() << std::endl;
385 + bool sentCustomResponse = SendCustomResponse(event);
386 + if (!sentCustomResponse) {
387 + auto rc = event->Send();
388 + if (rc != content_analysis::sdk::ResultCode::OK) {
389 + aout.stream() << "[Demo] Error sending response: "
390 + << content_analysis::sdk::ResultCodeToString(rc)
391 + << std::endl;
392 + aout.stream() << event->DebugString() << std::endl;
397 - private:
398 + protected:
399 void OnBrowserConnected(
400 const content_analysis::sdk::BrowserInfo& info) override {
401 AtomicCout aout;
402 @@ -362,21 +403,40 @@ class Handler : public content_analysis::sdk::AgentEventHandler {
403 return true;
406 - bool ShouldBlockRequest(const std::string& content) {
407 - // Determines if the request should be blocked. For this simple example
408 - // the content is blocked if the string "block" is found. Otherwise the
409 - // content is allowed.
410 - return content.find("block") != std::string::npos;
413 - void GetFileSpecificDelay(const std::string& content, unsigned long* delay) {
414 - auto pos = content.find("delay=");
415 - if (pos != std::string::npos) {
416 - std::sscanf(content.substr(pos).c_str(), "delay=%lu", delay);
417 + std::optional<content_analysis::sdk::ContentAnalysisResponse_Result_TriggeredRule_Action>
418 + DecideCAResponse(const std::string& content, std::stringstream& stream) {
419 + for (auto& r : toBlock_) {
420 + if (std::regex_search(content, r.second)) {
421 + stream << "'" << content << "' matches BLOCK regex '"
422 + << r.first << "'" << std::endl;
423 + return content_analysis::sdk::ContentAnalysisResponse_Result_TriggeredRule_Action_BLOCK;
426 + for (auto& r : toWarn_) {
427 + if (std::regex_search(content, r.second)) {
428 + stream << "'" << content << "' matches WARN regex '"
429 + << r.first << "'" << std::endl;
430 + return content_analysis::sdk::ContentAnalysisResponse_Result_TriggeredRule_Action_WARN;
433 + for (auto& r : toReport_) {
434 + if (std::regex_search(content, r.second)) {
435 + stream << "'" << content << "' matches REPORT_ONLY regex '"
436 + << r.first << "'" << std::endl;
437 + return content_analysis::sdk::ContentAnalysisResponse_Result_TriggeredRule_Action_REPORT_ONLY;
440 + stream << "'" << content << "' was ALLOWed\n";
441 + return {};
444 - unsigned long delay_;
445 + // For the demo, block any content that matches these wildcards.
446 + RegexArray toBlock_;
447 + RegexArray toWarn_;
448 + RegexArray toReport_;
450 + std::vector<unsigned long> delays_;
451 + std::atomic<size_t> nextDelayIndex_;
452 std::string print_data_file_path_;
455 @@ -384,8 +444,11 @@ class Handler : public content_analysis::sdk::AgentEventHandler {
456 // any requests that have the keyword "block" in their data
457 class QueuingHandler : public Handler {
458 public:
459 - QueuingHandler(unsigned long threads, unsigned long delay, const std::string& print_data_file_path)
460 - : Handler(delay, print_data_file_path) {
461 + QueuingHandler(unsigned long threads, std::vector<unsigned long>&& delays, const std::string& print_data_file_path,
462 + RegexArray&& toBlock = RegexArray(),
463 + RegexArray&& toWarn = RegexArray(),
464 + RegexArray&& toReport = RegexArray())
465 + : Handler(std::move(delays), print_data_file_path, std::move(toBlock), std::move(toWarn), std::move(toReport)) {
466 StartBackgroundThreads(threads);
469 @@ -421,6 +484,8 @@ class QueuingHandler : public Handler {
470 aout.stream() << std::endl << "----------" << std::endl;
471 aout.stream() << "Thread: " << std::this_thread::get_id()
472 << std::endl;
473 + aout.stream() << "Delaying request processing for "
474 + << handler->delays()[handler->nextDelayIndex() % handler->delays().size()] << "s" << std::endl << std::endl;
475 aout.flush();
477 handler->AnalyzeContent(aout, std::move(event));
479 2.42.0.windows.2