2 +----------------------------------------------------------------------+
4 +----------------------------------------------------------------------+
5 | Copyright (c) 2010-2013 Facebook, Inc. (http://www.facebook.com) |
6 +----------------------------------------------------------------------+
7 | This source file is subject to version 3.01 of the PHP license, |
8 | that is bundled with this package in the file LICENSE, and is |
9 | available through the world-wide-web at the following url: |
10 | http://www.php.net/license/3_01.txt |
11 | If you did not receive a copy of the PHP license and are unable to |
12 | obtain it through the world-wide-web, please send a note to |
13 | license@php.net so we can mail you a copy immediately. |
14 +----------------------------------------------------------------------+
16 #ifndef incl_HPHP_TARGET_PROFILE_H_
17 #define incl_HPHP_TARGET_PROFILE_H_
19 #include <folly/Optional.h>
21 #include "hphp/runtime/base/type-string.h"
22 #include "hphp/runtime/base/static-string-table.h"
23 #include "hphp/runtime/base/rds.h"
24 #include "hphp/runtime/vm/jit/mc-generator.h"
25 #include "hphp/runtime/vm/jit/ir-instruction.h"
32 namespace HPHP
{ namespace jit
{
34 //////////////////////////////////////////////////////////////////////
37 * This is a utility for creating or querying a 'target profiling'
38 * counter during JIT compilation. The idea is similar to target
39 * cache, except instead of caching the information, these collect
40 * information that can be used to generate a smarter
41 * TransKind::Optimize translation.
43 * To use one of these, define a type for the data you are going to
44 * collect, and then create and query it in a JIT translation doing
45 * something like this (example assumes you're in hhbc-translator):
47 * TargetProfile<MyType> prof(m_context,
48 * m_irb->state().marker(),
51 * if (prof.optimizing()) {
52 * auto const data = prof.data(MyType::reduce);
53 * // You can read here "data" to decide whether to generate
54 * // different IR based on the profiling information.
58 * // Normal translation
60 * if (prof.profiling()) {
61 * gen(ProfMyTarget, RDSHandleData { prof.handle() }, ...);
66 struct TargetProfile
{
67 explicit TargetProfile(const TransContext
& context
,
69 const StringData
* name
)
70 : m_link(createLink(context
, marker
, name
))
74 * Access the data we collected during profiling.
76 * ReduceFn is used to fold the data from each local RDS slot. It must have
77 * the signature void(T&, const T&), and should assume the second argument
78 * might be concurrently written to by other threads running in the
83 template<class ReduceFn
>
84 T
data(ReduceFn reduce
) const {
85 assertx(optimizing());
86 auto const hand
= handle();
88 for (auto& base
: rds::allTLBases()) {
89 reduce(accum
, rds::handleToRef
<T
>(base
, hand
));
95 * Query whether this is set up to profile or optimize. It's possible
96 * neither is true (e.g. if we're producing a TransKind::Live translation or
97 * we're producing a TransKind::Optimize translation and the link couldn't be
98 * attached for some reason.).
100 bool profiling() const {
101 return mcg
->tx().mode() == TransKind::Profile
;
103 bool optimizing() const {
104 return mcg
->tx().mode() == TransKind::Optimize
&& m_link
.bound();
108 * Access the handle to the link. You generally should only need to do this
111 rds::Handle
handle() const { return m_link
.handle(); }
114 rds::Link
<T
> link() {
115 if (!m_link
) m_link
= createLink();
119 static rds::Link
<T
> createLink(const TransContext
& context
,
121 const StringData
* name
) {
122 switch (mcg
->tx().mode()) {
123 case TransKind::Profile
:
133 case TransKind::Optimize
:
134 if (isValidTransID(marker
.profTransID())) {
135 return rds::attach
<T
>(
137 marker
.profTransID(), // transID from profiling translation
144 case TransKind::Anchor
:
145 case TransKind::Prologue
:
146 case TransKind::Interp
:
147 case TransKind::Live
:
148 case TransKind::Proflogue
:
149 case TransKind::Invalid
:
150 return rds::Link
<T
>(rds::kInvalidHandle
);
156 rds::Link
<T
> const m_link
;
159 //////////////////////////////////////////////////////////////////////
162 * DecRefProfile is used to track which DecRef instructions are likely to go to
163 * zero. During an optimized translation, the release path will be put in
164 * acold if it rarely went to zero during profiling.
166 struct DecRefProfile
{
170 int hitRate() const {
171 return decrement
? destroy
* 100 / decrement
: 0;
174 std::string
toString() const {
175 return folly::format("decl: {:3}, destroy: {:3} ({:3}%)",
176 decrement
, destroy
, hitRate()).str();
179 static void reduce(DecRefProfile
& a
, const DecRefProfile
& b
) {
180 // This is slightly racy but missing a few either way isn't a
181 // disaster. It's already racy at profiling time because the two values
182 // aren't updated atomically.
183 a
.decrement
+= b
.decrement
;
184 a
.destroy
+= b
.destroy
;
187 typedef folly::Optional
<TargetProfile
<DecRefProfile
>> OptDecRefProfile
;
189 //////////////////////////////////////////////////////////////////////
192 * Record profiling information about non-packed arrays. This counts the
193 * number of times a non-packed array was used as the base of a CGetElem
196 struct NonPackedArrayProfile
{
198 static void reduce(NonPackedArrayProfile
& a
, const NonPackedArrayProfile
& b
) {
203 struct StructArrayProfile
{
204 int32_t nonStructCount
;
205 int32_t numShapesSeen
;
206 Shape
* shape
{nullptr}; // Never access this directly. Use getShape instead.
208 bool isEmpty() const {
209 return !numShapesSeen
;
212 bool isMonomorphic() const {
213 return numShapesSeen
== 1;
216 bool isPolymorphic() const {
217 return numShapesSeen
> 1;
220 void makePolymorphic() {
221 numShapesSeen
= INT_MAX
;
225 Shape
* getShape() const {
226 assertx(isMonomorphic());
230 static void reduce(StructArrayProfile
& a
, const StructArrayProfile
& b
) {
231 a
.nonStructCount
+= b
.nonStructCount
;
232 if (a
.isPolymorphic()) return;
236 a
.numShapesSeen
= b
.numShapesSeen
;
240 assertx(a
.isMonomorphic());
241 if (b
.isEmpty()) return;
242 if (b
.isMonomorphic() && a
.getShape() == b
.getShape()) return;
248 //////////////////////////////////////////////////////////////////////
250 struct ReleaseVVProfile
{
254 int percentReleased() const {
255 return executed
? (100 * released
/ executed
) : 0;
258 static void reduce(ReleaseVVProfile
& a
, const ReleaseVVProfile
& b
) {
259 // Racy but OK -- just used for profiling to trigger optimization.
260 a
.executed
+= b
.executed
;
261 a
.released
+= b
.released
;