Rename Heap -> LocalsHeap
[hiphop-php.git] / hphp / runtime / vm / taint / state.cpp
blob6ad6bc27fa0baab1a235e140df9bc64b106f0f33
1 /*
2 +----------------------------------------------------------------------+
3 | HipHop for PHP |
4 +----------------------------------------------------------------------+
5 | Copyright (c) 2010-present Facebook, Inc. (http://www.facebook.com) |
6 +----------------------------------------------------------------------+
7 | This source file is subject to version 3.01 of the PHP license, |
8 | that is bundled with this package in the file LICENSE, and is |
9 | available through the world-wide-web at the following url: |
10 | http://www.php.net/license/3_01.txt |
11 | If you did not receive a copy of the PHP license and are unable to |
12 | obtain it through the world-wide-web, please send a note to |
13 | license@php.net so we can mail you a copy immediately. |
14 +----------------------------------------------------------------------+
17 #ifdef HHVM_TAINT
19 #include <algorithm>
20 #include <cmath>
21 #include <fstream>
22 #include <sstream>
24 #include <folly/Singleton.h>
25 #include <folly/dynamic.h>
26 #include <folly/json.h>
28 #include "hphp/runtime/base/execution-context.h"
29 #include "hphp/runtime/base/init-fini-node.h"
30 #include "hphp/runtime/base/runtime-option.h"
32 #include "hphp/runtime/vm/taint/configuration.h"
33 #include "hphp/runtime/vm/taint/state.h"
35 #include "hphp/util/assertions.h"
36 #include "hphp/util/process.h"
37 #include "hphp/util/trace.h"
39 namespace HPHP {
40 namespace taint {
42 TRACE_SET_MOD(taint);
44 std::string Path::jsonLine() const {
45 const Func* last = nullptr;
47 std::vector<Hop> hops;
48 const Path* path = this;
49 while (path != nullptr) {
50 auto hop = path->hop;
51 if (hop.from != nullptr || hop.to != nullptr) {
52 hops.push_back(hop);
54 path = path->parent;
57 folly::dynamic jsonHops = folly::dynamic::array;
58 for (int i = hops.size() - 1; i >= 0; i--) {
59 auto hop = hops[i];
61 if (last != hop.from && hop.from != nullptr) {
62 jsonHops.push_back(hop.from->fullName()->data());
64 last = hop.from;
66 if (last != hop.to && hop.to != nullptr) {
67 jsonHops.push_back(hop.to->fullName()->data());
69 last = hop.to;
72 folly::dynamic json = folly::dynamic::object("hops", std::move(jsonHops));
73 return folly::toJson(json);
76 Path::Path() : hop{nullptr, nullptr}, parent(nullptr) {}
78 void destructPath(void* p) {
79 auto path = (Path*)p;
80 path->~Path();
83 Path* Path::origin(PathArena* arena, Hop hop) {
84 Path* path = arena->allocD<Path>(&destructPath);
85 if (!path) {
86 return path;
88 path->hop = hop;
89 path->parent = nullptr;
90 return path;
93 Path* Path::to(PathArena* arena, Hop hop) const {
94 Path* child = arena->allocD<Path>(&destructPath);
95 if (!child) {
96 return child;
98 child->hop = hop;
99 child->parent = this;
100 return child;
103 std::ostream& operator<<(std::ostream& out, const HPHP::taint::Value& value) {
104 if (!value) {
105 return out << "_";
106 } else {
107 return out << "S";
111 // We assume EvalVMStackElms is a power of 2, so doubling gets us
112 // the power of 2 closest to n+1. Assuming things are a power of 2
113 // makes a lot of the later operations faster.
114 // Also have a fallback size for tests when this may not be set.
115 Stack::Stack()
116 : m_stack(
117 RuntimeOption::EvalVMStackElms ? (RuntimeOption::EvalVMStackElms * 2)
118 : 16384,
119 nullptr),
120 m_top(0),
121 m_bottom(0) {
122 // Validate our assertions.
123 assert_flog(
124 ceil(log2(m_stack.size())) == floor(log2(m_stack.size())),
125 "Size be power of 2! Got {}",
126 m_stack.size());
129 void Stack::push(Value value) {
130 if (full()) {
131 return;
133 m_stack[m_top] = value;
134 m_top = (m_top + 1) & (m_stack.size() - 1);
137 void Stack::pushFront(Value value) {
138 if (full()) {
139 return;
141 m_bottom = (m_bottom - 1) & (m_stack.size() - 1);
142 m_stack[m_bottom] = value;
145 Value Stack::top() const {
146 return peek(0);
149 Value Stack::peek(int offset) const {
150 // TODO(T93491972): replace with assertions once we can run the integration
151 // tests.
152 if (size() <= offset) {
153 FTRACE(
155 "taint: (WARNING) called `Stack::peek({})` on stack of size {}\n",
156 offset,
157 size());
158 return nullptr;
160 auto index = (m_top - offset - 1) & (m_stack.size() - 1);
161 return m_stack[index];
164 void Stack::pop(int n) {
165 if (size() < n) {
166 FTRACE(
168 "taint: (WARNING) called `Stack::pop({})` on stack of size {}\n",
170 size());
171 n = size();
173 m_top = (m_top - n) & (m_stack.size() - 1);
176 void Stack::popFront() {
177 if (empty()) {
178 FTRACE(3, "taint: (WARNING) called `Stack::popFront()` on empty stack\n");
179 return;
181 m_bottom = (m_bottom + 1) & (m_stack.size() - 1);
184 void Stack::replaceTop(Value value) {
185 if (empty()) {
186 FTRACE(3, "taint: (WARNING) called `Stack::replaceTop()` on empty stack\n");
187 return;
189 auto index = (m_top - 1) & (m_stack.size() - 1);
190 m_stack[index] = value;
193 size_t Stack::size() const {
194 return (m_top - m_bottom) & (m_stack.size() - 1);
197 std::string Stack::show() const {
198 std::stringstream stream;
199 for (size_t i = 0; i < size(); i++) {
200 stream << m_stack[(m_bottom + i) & (m_stack.size() - 1)] << " ";
202 stream << "(t)";
203 return stream.str();
206 void Stack::clear() {
207 m_top = 0;
208 m_bottom = 0;
211 bool Stack::full() const {
212 return (m_stack.size() - size()) == 1;
215 bool Stack::empty() const {
216 return m_bottom == m_top;
219 void LocalsHeap::set(tv_lval typedValue, Value value) {
220 m_heap[std::move(typedValue)] = value;
223 Value LocalsHeap::get(const tv_lval& typedValue) const {
224 auto value = m_heap.find(typedValue);
225 if (value != m_heap.end()) {
226 return value->second;
229 return nullptr;
232 void LocalsHeap::clear() {
233 m_heap.clear();
236 namespace {
238 struct SingletonTag {};
240 InitFiniNode s_stateInitialization(
241 []() { State::instance->initialize(); },
242 InitFiniNode::When::RequestStart);
244 InitFiniNode s_stateTeardown(
245 []() { State::instance->teardown(); },
246 InitFiniNode::When::RequestFini);
248 } // namespace
250 rds::local::RDSLocal<State, rds::local::Initialize::FirstUse> State::instance;
252 State::State() : arena(std::make_unique<PathArena>()) {}
254 void State::initialize() {
255 m_request_start = std::chrono::system_clock::now();
256 FTRACE(1, "taint: initializing state\n");
258 stack.clear();
259 heap_locals.clear();
260 paths.clear();
261 arena = std::make_unique<PathArena>();
262 m_function_metadata = Configuration::get()->functionMetadata();
264 // Stack is initialized with 4 values before any operation happens.
265 // We don't care about these values but mirroring simplifies
266 // consistency checks.
267 for (int i = 0; i < 4; i++) {
268 stack.push(nullptr);
272 namespace {
274 folly::dynamic requestMetadata(
275 std::chrono::time_point<std::chrono::system_clock> requestStart) {
276 folly::dynamic metadata = folly::dynamic::object;
277 metadata["metadata"] = true;
279 metadata["mimeType"] = g_context->getMimeType();
280 metadata["workingDirectory"] = g_context->getCwd();
282 auto requestUrl = g_context->getRequestUrl();
284 if (RO::EvalTaintLogRequestURLs) {
285 metadata["requestUrl"] = requestUrl;
288 auto commandLine = Process::GetCommandLine(getpid());
289 metadata["commandLine"] = commandLine;
291 std::chrono::time_point<std::chrono::system_clock> endTime =
292 std::chrono::system_clock::now();
293 metadata["requestStartTime"] =
294 std::chrono::duration_cast<std::chrono::seconds>(
295 requestStart.time_since_epoch())
296 .count();
297 metadata["requestEndTime"] = std::chrono::duration_cast<std::chrono::seconds>(
298 endTime.time_since_epoch())
299 .count();
300 metadata["requestDurationMs"] =
301 std::chrono::duration_cast<std::chrono::milliseconds>(
302 endTime - requestStart)
303 .count();
305 auto timeForHash = std::chrono::duration_cast<std::chrono::milliseconds>(
306 requestStart.time_since_epoch())
307 .count();
308 metadata["identifier"] = requestUrl != ""
309 ? folly::sformat(
310 "request-{}",
311 std::hash<std::string>()(
312 folly::sformat("{}-{}", requestUrl, timeForHash)))
313 : folly::sformat(
314 "script-{}",
315 std::hash<std::string>()(
316 folly::sformat("{}-{}", commandLine, timeForHash)));
318 return metadata;
321 } // namespace
323 void State::teardown() {
324 auto metadata = requestMetadata(m_request_start);
325 auto identifier = metadata["identifier"].asString();
326 FTRACE(1, "taint: processed request `{}`\n", identifier);
328 auto outputDirectory = Configuration::get()->outputDirectory;
329 if (!outputDirectory) {
330 // Print to stderr, useful for integration tests.
331 for (auto& path : paths) {
332 std::cerr << path->jsonLine() << std::endl;
334 return;
337 auto outputPath = *outputDirectory + "/output-" + identifier + ".json";
338 if (paths.empty()) {
339 FTRACE(1, "taint: no data flows found in request `{}`\n", identifier);
340 return;
343 FTRACE(1, "taint: writing results to {}\n", outputPath);
344 try {
345 std::ofstream output;
346 output.open(outputPath);
347 output << folly::toJson(metadata) << std::endl;
348 for (auto& path : paths) {
349 output << path->jsonLine() << std::endl;
351 output.close();
352 } catch (std::exception& exception) {
353 throw std::runtime_error("unable to write to `" + outputPath + "`");
357 std::vector<Source> State::sources(const Func* func) {
358 if (!m_function_metadata) {
359 m_function_metadata = Configuration::get()->functionMetadata();
360 if (!m_function_metadata) {
361 return {};
364 return m_function_metadata->sources(func);
367 std::vector<Sink> State::sinks(const Func* func) {
368 if (!m_function_metadata) {
369 m_function_metadata = Configuration::get()->functionMetadata();
370 if (!m_function_metadata) {
371 return {};
374 return m_function_metadata->sinks(func);
377 } // namespace taint
378 } // namespace HPHP
380 #endif