1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* vim:set expandtab ts=4 sw=2 sts=2 cin: */
3 /* This Source Code Form is subject to the terms of the Mozilla Public
4 * License, v. 2.0. If a copy of the MPL was not distributed with this
5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
7 #include "Classifier.h"
8 #include "mozilla/Components.h"
9 #include "mozilla/ErrorNames.h"
10 #include "mozilla/net/AsyncUrlChannelClassifier.h"
11 #include "mozilla/net/UrlClassifierCommon.h"
12 #include "mozilla/net/UrlClassifierFeatureFactory.h"
13 #include "mozilla/net/UrlClassifierFeatureResult.h"
14 #include "nsContentUtils.h"
15 #include "nsIChannel.h"
16 #include "nsIHttpChannel.h"
17 #include "nsIHttpChannelInternal.h"
18 #include "nsIURIClassifier.h"
19 #include "nsIUrlClassifierUtils.h"
21 #include "nsNetUtil.h"
22 #include "nsPrintfCString.h"
23 #include "nsProxyRelease.h"
24 #include "nsServiceManagerUtils.h"
25 #include "nsUrlClassifierDBService.h"
32 // Big picture comment
33 // -----------------------------------------------------------------------------
34 // nsUrlClassifierDBService::channelClassify() classifies a channel using a set
35 // of URL-Classifier features. This method minimizes the number of lookups and
36 // URI parsing and this is done using the classes here described.
38 // The first class is 'FeatureTask' which is able to retrieve the list of
39 // features for this channel using the feature-factory. See
40 // UrlClassifierFeatureFactory.
41 // For each feature, it creates a FeatureData object, which contains the
42 // whitelist and blacklist prefs and tables. The reason why we create
43 // FeatureData is because:
44 // - features are not thread-safe.
45 // - we want to store the state of the classification in the FeatureData
48 // It can happen that multiple features share the same tables. In order to do
49 // the lookup just once, we have TableData class. When multiple features
50 // contain the same table, they have references to the same couple TableData +
53 // During the classification, the channel's URIs are fragmented. In order to
54 // create these fragments just once, we use the URIData class, which is pointed
55 // by TableData classes.
57 // The creation of these classes happens on the main-thread. The classification
58 // happens on the worker thread.
61 // -----------------------------------------------------------------------------
63 // In order to avoid multiple URI parsing, we have this class which contains
64 // nsIURI and its fragments.
67 NS_INLINE_DECL_THREADSAFE_REFCOUNTING(URIData
);
69 static nsresult
Create(nsIURI
* aURI
, nsIURI
* aInnermostURI
, URIData
** aData
);
71 bool IsEqual(nsIURI
* aURI
) const;
73 const nsTArray
<nsCString
>& Fragments();
81 nsCOMPtr
<nsIURI
> mURI
;
83 nsTArray
<nsCString
> mFragments
;
86 /* static */ nsresult
URIData::Create(nsIURI
* aURI
, nsIURI
* aInnermostURI
,
88 MOZ_ASSERT(NS_IsMainThread());
90 MOZ_ASSERT(aInnermostURI
);
92 RefPtr
<URIData
> data
= new URIData();
95 nsCOMPtr
<nsIUrlClassifierUtils
> utilsService
=
96 components::UrlClassifierUtils::Service();
97 if (NS_WARN_IF(!utilsService
)) {
98 return NS_ERROR_FAILURE
;
101 nsresult rv
= utilsService
->GetKeyForURI(aInnermostURI
, data
->mURISpec
);
102 if (NS_WARN_IF(NS_FAILED(rv
))) {
106 UC_LOG(("URIData::Create[%p] - new URIData created for spec %s", data
.get(),
107 data
->mURISpec
.get()));
113 URIData::URIData() { MOZ_ASSERT(NS_IsMainThread()); }
115 URIData::~URIData() {
116 NS_ReleaseOnMainThreadSystemGroup("URIData:mURI", mURI
.forget());
119 bool URIData::IsEqual(nsIURI
* aURI
) const {
120 MOZ_ASSERT(NS_IsMainThread());
123 bool isEqual
= false;
124 nsresult rv
= mURI
->Equals(aURI
, &isEqual
);
125 if (NS_WARN_IF(NS_FAILED(rv
))) {
132 const nsTArray
<nsCString
>& URIData::Fragments() {
133 MOZ_ASSERT(!NS_IsMainThread());
135 if (mFragments
.IsEmpty()) {
136 nsresult rv
= LookupCache::GetLookupFragments(mURISpec
, &mFragments
);
137 Unused
<< NS_WARN_IF(NS_FAILED(rv
));
143 nsIURI
* URIData::URI() const {
144 MOZ_ASSERT(NS_IsMainThread());
149 // ----------------------------------------------------------------------------
151 // In order to avoid multiple lookups on the same table + URI, we have this
155 NS_INLINE_DECL_THREADSAFE_REFCOUNTING(TableData
);
163 TableData(URIData
* aURIData
, const nsACString
& aTable
);
167 const nsACString
& Table() const;
169 State
MatchState() const;
171 bool IsEqual(URIData
* aURIData
, const nsACString
& aTable
) const;
173 // Returns true if the table classifies the URI. This method must be called
174 // on hte classifier worker thread.
175 bool DoLookup(nsUrlClassifierDBServiceWorker
* aWorkerClassifier
);
180 RefPtr
<URIData
> mURIData
;
184 LookupResultArray mResults
;
187 TableData::TableData(URIData
* aURIData
, const nsACString
& aTable
)
188 : mURIData(aURIData
), mState(eUnclassified
), mTable(aTable
) {
189 MOZ_ASSERT(NS_IsMainThread());
190 MOZ_ASSERT(aURIData
);
192 UC_LOG(("TableData CTOR[%p] - new TableData created %s", this,
193 aTable
.BeginReading()));
196 TableData::~TableData() = default;
198 nsIURI
* TableData::URI() const {
199 MOZ_ASSERT(NS_IsMainThread());
200 return mURIData
->URI();
203 const nsACString
& TableData::Table() const {
204 MOZ_ASSERT(NS_IsMainThread());
208 TableData::State
TableData::MatchState() const {
209 MOZ_ASSERT(NS_IsMainThread());
213 bool TableData::IsEqual(URIData
* aURIData
, const nsACString
& aTable
) const {
214 MOZ_ASSERT(NS_IsMainThread());
215 return mURIData
== aURIData
&& mTable
== aTable
;
218 bool TableData::DoLookup(nsUrlClassifierDBServiceWorker
* aWorkerClassifier
) {
219 MOZ_ASSERT(!NS_IsMainThread());
220 MOZ_ASSERT(aWorkerClassifier
);
222 if (mState
== TableData::eUnclassified
) {
223 UC_LOG(("TableData::DoLookup[%p] - starting lookup", this));
225 const nsTArray
<nsCString
>& fragments
= mURIData
->Fragments();
226 nsresult rv
= aWorkerClassifier
->DoSingleLocalLookupWithURIFragments(
227 fragments
, mTable
, mResults
);
228 Unused
<< NS_WARN_IF(NS_FAILED(rv
));
230 mState
= mResults
.IsEmpty() ? TableData::eNoMatch
: TableData::eMatch
;
232 UC_LOG(("TableData::DoLookup[%p] - lookup completed. Matches: %d", this,
233 (int)mResults
.Length()));
236 return !mResults
.IsEmpty();
240 // ----------------------------------------------------------------------------
244 // This is class contains all the Feature data.
257 nsresult
Initialize(FeatureTask
* aTask
, nsIChannel
* aChannel
,
258 nsIUrlClassifierFeature
* aFeature
);
260 void DoLookup(nsUrlClassifierDBServiceWorker
* aWorkerClassifier
);
262 // Returns true if the next feature should be processed.
263 bool MaybeCompleteClassification(nsIChannel
* aChannel
);
266 nsresult
InitializeList(FeatureTask
* aTask
, nsIChannel
* aChannel
,
267 nsIUrlClassifierFeature::listType aListType
,
268 nsTArray
<RefPtr
<TableData
>>& aList
);
271 nsCOMPtr
<nsIUrlClassifierFeature
> mFeature
;
273 nsTArray
<RefPtr
<TableData
>> mBlacklistTables
;
274 nsTArray
<RefPtr
<TableData
>> mWhitelistTables
;
276 // blacklist + whitelist.
277 nsCString mHostInPrefTables
[2];
280 FeatureData::FeatureData() : mState(eUnclassified
) {}
282 FeatureData::~FeatureData() {
283 NS_ReleaseOnMainThreadSystemGroup("FeatureData:mFeature", mFeature
.forget());
286 nsresult
FeatureData::Initialize(FeatureTask
* aTask
, nsIChannel
* aChannel
,
287 nsIUrlClassifierFeature
* aFeature
) {
288 MOZ_ASSERT(NS_IsMainThread());
290 MOZ_ASSERT(aChannel
);
291 MOZ_ASSERT(aFeature
);
293 nsAutoCString featureName
;
294 aFeature
->GetName(featureName
);
295 UC_LOG(("FeatureData::Initialize[%p] - Feature %s - Channel %p", this,
296 featureName
.get(), aChannel
));
300 nsresult rv
= InitializeList(
301 aTask
, aChannel
, nsIUrlClassifierFeature::blacklist
, mBlacklistTables
);
302 if (NS_WARN_IF(NS_FAILED(rv
))) {
306 rv
= InitializeList(aTask
, aChannel
, nsIUrlClassifierFeature::whitelist
,
308 if (NS_WARN_IF(NS_FAILED(rv
))) {
315 void FeatureData::DoLookup(nsUrlClassifierDBServiceWorker
* aWorkerClassifier
) {
316 MOZ_ASSERT(!NS_IsMainThread());
317 MOZ_ASSERT(aWorkerClassifier
);
318 MOZ_ASSERT(mState
== eUnclassified
);
320 UC_LOG(("FeatureData::DoLookup[%p] - lookup starting", this));
322 // This is wrong, but it's fast: we don't want to check if the host is in the
323 // blacklist table if we know that it's going to be whitelisted by pref.
324 // So, also if maybe it's not blacklisted, let's consider it 'whitelisted'.
325 if (!mHostInPrefTables
[nsIUrlClassifierFeature::whitelist
].IsEmpty()) {
326 UC_LOG(("FeatureData::DoLookup[%p] - whitelisted by pref", this));
327 mState
= eMatchWhitelist
;
331 // Let's check if this feature blacklists the URI.
334 !mHostInPrefTables
[nsIUrlClassifierFeature::blacklist
].IsEmpty();
336 UC_LOG(("FeatureData::DoLookup[%p] - blacklisted by pref: %d", this,
339 if (isBlacklisted
== false) {
340 // If one of the blacklist table matches the URI, we don't need to continue
341 // with the others: the feature is blacklisted (but maybe also
343 for (TableData
* tableData
: mBlacklistTables
) {
344 if (tableData
->DoLookup(aWorkerClassifier
)) {
345 isBlacklisted
= true;
351 UC_LOG(("FeatureData::DoLookup[%p] - blacklisted before whitelisting: %d",
352 this, isBlacklisted
));
354 if (!isBlacklisted
) {
359 // Now, let's check if we need to whitelist the same URI.
361 for (TableData
* tableData
: mWhitelistTables
) {
362 // If one of the whitelist table matches the URI, we don't need to continue
363 // with the others: the feature is whitelisted.
364 if (tableData
->DoLookup(aWorkerClassifier
)) {
365 UC_LOG(("FeatureData::DoLookup[%p] - whitelisted by table", this));
366 mState
= eMatchWhitelist
;
371 UC_LOG(("FeatureData::DoLookup[%p] - blacklisted", this));
372 mState
= eMatchBlacklist
;
375 bool FeatureData::MaybeCompleteClassification(nsIChannel
* aChannel
) {
376 MOZ_ASSERT(NS_IsMainThread());
379 ("FeatureData::MaybeCompleteClassification[%p] - completing "
380 "classification for channel %p",
386 ("FeatureData::MaybeCompleteClassification[%p] - no match. Let's "
391 case eMatchWhitelist
:
393 ("FeatureData::MaybeCompleteClassification[%p] - whitelisted. Let's "
398 case eMatchBlacklist
:
400 ("FeatureData::MaybeCompleteClassification[%p] - blacklisted", this));
404 MOZ_CRASH("We should not be here!");
408 MOZ_ASSERT(mState
== eMatchBlacklist
);
410 // Maybe we have to skip this host
411 nsAutoCString skipList
;
412 nsresult rv
= mFeature
->GetSkipHostList(skipList
);
413 if (NS_WARN_IF(NS_FAILED(rv
))) {
415 ("FeatureData::MaybeCompleteClassification[%p] - error. Let's move on",
420 if (nsContentUtils::IsURIInList(mBlacklistTables
[0]->URI(), skipList
)) {
422 ("FeatureData::MaybeCompleteClassification[%p] - uri found in skiplist",
428 list
.Assign(mHostInPrefTables
[nsIUrlClassifierFeature::blacklist
]);
430 for (TableData
* tableData
: mBlacklistTables
) {
431 if (tableData
->MatchState() == TableData::eMatch
) {
432 if (!list
.IsEmpty()) {
433 list
.AppendLiteral(",");
436 list
.Append(tableData
->Table());
441 ("FeatureData::MaybeCompleteClassification[%p] - process channel %p with "
443 this, aChannel
, list
.get()));
445 bool shouldContinue
= false;
446 rv
= mFeature
->ProcessChannel(aChannel
, list
, &shouldContinue
);
447 Unused
<< NS_WARN_IF(NS_FAILED(rv
));
449 return shouldContinue
;
453 // ----------------------------------------------------------------------------
455 // This class keeps the callback alive and makes sure that we release it on the
457 class CallbackHolder final
{
459 NS_INLINE_DECL_REFCOUNTING(CallbackHolder
);
461 explicit CallbackHolder(std::function
<void()>&& aCallback
)
462 : mCallback(std::move(aCallback
)) {}
464 void Exec() const { mCallback(); }
467 ~CallbackHolder() = default;
469 std::function
<void()> mCallback
;
473 // ----------------------------------------------------------------------------
475 // A FeatureTask is a class that is able to classify a channel using a set of
476 // features. The features are grouped by:
477 // - URIs - to avoid extra URI parsing.
478 // - Tables - to avoid multiple lookup on the same table.
481 NS_INLINE_DECL_THREADSAFE_REFCOUNTING(FeatureTask
);
483 static nsresult
Create(nsIChannel
* aChannel
,
484 std::function
<void()>&& aCallback
,
485 FeatureTask
** aTask
);
487 // Called on the classifier thread.
488 void DoLookup(nsUrlClassifierDBServiceWorker
* aWorkerClassifier
);
490 // Called on the main-thread to process the channel.
491 void CompleteClassification();
493 nsresult
GetOrCreateURIData(nsIURI
* aURI
, nsIURI
* aInnermostURI
,
496 nsresult
GetOrCreateTableData(URIData
* aURIData
, const nsACString
& aTable
,
500 FeatureTask(nsIChannel
* aChannel
, std::function
<void()>&& aCallback
);
503 nsCOMPtr
<nsIChannel
> mChannel
;
504 RefPtr
<CallbackHolder
> mCallbackHolder
;
506 nsTArray
<FeatureData
> mFeatures
;
507 nsTArray
<RefPtr
<URIData
>> mURIs
;
508 nsTArray
<RefPtr
<TableData
>> mTables
;
511 // Features are able to classify particular URIs from a channel. For instance,
512 // tracking-annotation feature uses the top-level URI to whitelist the current
513 // channel's URI; flash feature always uses the channel's URI. Because of
514 // this, this function aggregates feature per URI and tables.
515 /* static */ nsresult
FeatureTask::Create(nsIChannel
* aChannel
,
516 std::function
<void()>&& aCallback
,
517 FeatureTask
** aTask
) {
518 MOZ_ASSERT(NS_IsMainThread());
519 MOZ_ASSERT(aChannel
);
522 // We need to obtain the list of nsIUrlClassifierFeature objects able to
523 // classify this channel. If the list is empty, we do an early return.
524 nsTArray
<nsCOMPtr
<nsIUrlClassifierFeature
>> features
;
525 UrlClassifierFeatureFactory::GetFeaturesFromChannel(aChannel
, features
);
526 if (features
.IsEmpty()) {
527 UC_LOG(("FeatureTask::Create: Nothing to do for channel %p", aChannel
));
528 return NS_ERROR_FAILURE
;
531 RefPtr
<FeatureTask
> task
= new FeatureTask(aChannel
, std::move(aCallback
));
533 UC_LOG(("FeatureTask::Create[%p] - FeatureTask created for channel %p",
534 task
.get(), aChannel
));
536 for (nsIUrlClassifierFeature
* feature
: features
) {
537 FeatureData
* featureData
= task
->mFeatures
.AppendElement();
538 nsresult rv
= featureData
->Initialize(task
, aChannel
, feature
);
539 if (NS_WARN_IF(NS_FAILED(rv
))) {
548 FeatureTask::FeatureTask(nsIChannel
* aChannel
,
549 std::function
<void()>&& aCallback
)
550 : mChannel(aChannel
) {
551 MOZ_ASSERT(NS_IsMainThread());
552 MOZ_ASSERT(mChannel
);
554 std::function
<void()> callback
= std::move(aCallback
);
555 mCallbackHolder
= new CallbackHolder(std::move(callback
));
558 FeatureTask::~FeatureTask() {
559 NS_ReleaseOnMainThreadSystemGroup("FeatureTask::mChannel", mChannel
.forget());
560 NS_ReleaseOnMainThreadSystemGroup("FeatureTask::mCallbackHolder",
561 mCallbackHolder
.forget());
564 nsresult
FeatureTask::GetOrCreateURIData(nsIURI
* aURI
, nsIURI
* aInnermostURI
,
566 MOZ_ASSERT(NS_IsMainThread());
568 MOZ_ASSERT(aInnermostURI
);
572 ("FeatureTask::GetOrCreateURIData[%p] - Checking if a URIData must be "
576 for (URIData
* data
: mURIs
) {
577 if (data
->IsEqual(aURI
)) {
578 UC_LOG(("FeatureTask::GetOrCreateURIData[%p] - Reuse existing URIData %p",
581 RefPtr
<URIData
> uriData
= data
;
582 uriData
.forget(aData
);
587 RefPtr
<URIData
> data
;
588 nsresult rv
= URIData::Create(aURI
, aInnermostURI
, getter_AddRefs(data
));
589 if (NS_WARN_IF(NS_FAILED(rv
))) {
593 mURIs
.AppendElement(data
);
595 UC_LOG(("FeatureTask::GetOrCreateURIData[%p] - Create new URIData %p", this,
602 nsresult
FeatureTask::GetOrCreateTableData(URIData
* aURIData
,
603 const nsACString
& aTable
,
605 MOZ_ASSERT(NS_IsMainThread());
606 MOZ_ASSERT(aURIData
);
610 ("FeatureTask::GetOrCreateTableData[%p] - Checking if TableData must be "
614 for (TableData
* data
: mTables
) {
615 if (data
->IsEqual(aURIData
, aTable
)) {
617 "FeatureTask::GetOrCreateTableData[%p] - Reuse existing TableData %p",
620 RefPtr
<TableData
> tableData
= data
;
621 tableData
.forget(aData
);
626 RefPtr
<TableData
> data
= new TableData(aURIData
, aTable
);
627 mTables
.AppendElement(data
);
629 UC_LOG(("FeatureTask::GetOrCreateTableData[%p] - Create new TableData %p",
636 void FeatureTask::DoLookup(nsUrlClassifierDBServiceWorker
* aWorkerClassifier
) {
637 MOZ_ASSERT(!NS_IsMainThread());
638 MOZ_ASSERT(aWorkerClassifier
);
640 UC_LOG(("FeatureTask::DoLookup[%p] - starting lookup", this));
642 for (FeatureData
& feature
: mFeatures
) {
643 feature
.DoLookup(aWorkerClassifier
);
646 UC_LOG(("FeatureTask::DoLookup[%p] - lookup completed", this));
649 void FeatureTask::CompleteClassification() {
650 MOZ_ASSERT(NS_IsMainThread());
652 for (FeatureData
& feature
: mFeatures
) {
653 if (!feature
.MaybeCompleteClassification(mChannel
)) {
658 UC_LOG(("FeatureTask::CompleteClassification[%p] - exec callback", this));
660 mCallbackHolder
->Exec();
663 nsresult
FeatureData::InitializeList(
664 FeatureTask
* aTask
, nsIChannel
* aChannel
,
665 nsIUrlClassifierFeature::listType aListType
,
666 nsTArray
<RefPtr
<TableData
>>& aList
) {
667 MOZ_ASSERT(NS_IsMainThread());
669 MOZ_ASSERT(aChannel
);
671 UC_LOG(("FeatureData::InitializeList[%p] - Initialize list %d for channel %p",
672 this, aListType
, aChannel
));
674 nsCOMPtr
<nsIURI
> uri
;
676 mFeature
->GetURIByListType(aChannel
, aListType
, getter_AddRefs(uri
));
677 if (NS_WARN_IF(NS_FAILED(rv
)) || !uri
) {
678 if (UC_LOG_ENABLED()) {
679 nsAutoCString errorName
;
680 GetErrorName(rv
, errorName
);
681 UC_LOG(("FeatureData::InitializeList got an unexpected error (rv=%s)",
687 nsCOMPtr
<nsIURI
> innermostURI
= NS_GetInnermostURI(uri
);
688 if (NS_WARN_IF(!innermostURI
)) {
689 return NS_ERROR_FAILURE
;
693 rv
= innermostURI
->GetHost(host
);
694 if (NS_WARN_IF(NS_FAILED(rv
))) {
699 nsAutoCString tableName
;
700 rv
= mFeature
->HasHostInPreferences(host
, aListType
, tableName
, &found
);
701 if (NS_WARN_IF(NS_FAILED(rv
))) {
706 mHostInPrefTables
[aListType
] = tableName
;
709 RefPtr
<URIData
> uriData
;
710 rv
= aTask
->GetOrCreateURIData(uri
, innermostURI
, getter_AddRefs(uriData
));
711 if (NS_WARN_IF(NS_FAILED(rv
))) {
717 nsTArray
<nsCString
> tables
;
718 rv
= mFeature
->GetTables(aListType
, tables
);
719 if (NS_WARN_IF(NS_FAILED(rv
))) {
723 for (const nsCString
& table
: tables
) {
724 RefPtr
<TableData
> data
;
725 rv
= aTask
->GetOrCreateTableData(uriData
, table
, getter_AddRefs(data
));
726 if (NS_WARN_IF(NS_FAILED(rv
))) {
731 aList
.AppendElement(data
);
739 /* static */ nsresult
AsyncUrlChannelClassifier::CheckChannel(
740 nsIChannel
* aChannel
, std::function
<void()>&& aCallback
) {
741 MOZ_ASSERT(XRE_IsParentProcess());
742 MOZ_ASSERT(aChannel
);
745 return NS_ERROR_INVALID_ARG
;
749 ("AsyncUrlChannelClassifier::CheckChannel starting the classification "
753 RefPtr
<FeatureTask
> task
;
755 FeatureTask::Create(aChannel
, std::move(aCallback
), getter_AddRefs(task
));
756 if (NS_WARN_IF(NS_FAILED(rv
))) {
760 RefPtr
<nsUrlClassifierDBServiceWorker
> workerClassifier
=
761 nsUrlClassifierDBService::GetWorker();
762 if (NS_WARN_IF(!workerClassifier
)) {
763 return NS_ERROR_FAILURE
;
766 nsCOMPtr
<nsIRunnable
> r
= NS_NewRunnableFunction(
767 "AsyncUrlChannelClassifier::CheckChannel",
768 [task
, workerClassifier
]() -> void {
769 MOZ_ASSERT(!NS_IsMainThread());
770 task
->DoLookup(workerClassifier
);
772 nsCOMPtr
<nsIRunnable
> r
= NS_NewRunnableFunction(
773 "AsyncUrlChannelClassifier::CheckChannel - return",
774 [task
]() -> void { task
->CompleteClassification(); });
776 NS_DispatchToMainThread(r
);
779 return nsUrlClassifierDBService::BackgroundThread()->Dispatch(
780 r
, NS_DISPATCH_NORMAL
);
784 } // namespace mozilla