1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 // Parse the data returned from the SafeBrowsing v2.1 protocol response.
7 // TODOv3(shess): Review these changes carefully.
11 #include "base/format_macros.h"
12 #include "base/logging.h"
13 #include "base/strings/string_number_conversions.h"
14 #include "base/strings/string_split.h"
15 #include "base/strings/stringprintf.h"
16 #include "base/sys_byteorder.h"
17 #include "base/time/time.h"
18 #include "build/build_config.h"
19 #include "chrome/browser/safe_browsing/protocol_parser.h"
20 #include "chrome/browser/safe_browsing/safe_browsing_util.h"
24 // Helper class for scanning a buffer.
27 BufferReader(const char* data
, size_t length
)
32 // Return info about remaining buffer data.
33 size_t length() const {
36 const char* data() const {
43 // Remove |l| characters from the buffer.
44 void Advance(size_t l
) {
45 DCHECK_LE(l
, length());
50 // Get a reference to data in the buffer.
51 // TODO(shess): I'm not sure I like this. Fill out a StringPiece instead?
52 bool RefData(const void** pptr
, size_t l
) {
54 Advance(length()); // poison
63 // Copy data out of the buffer.
64 bool GetData(void* ptr
, size_t l
) {
66 if (!RefData(&buf_ptr
, l
))
69 memcpy(ptr
, buf_ptr
, l
);
73 // Read a 32-bit integer in network byte order into a local uint32.
74 bool GetNet32(uint32
* i
) {
75 if (!GetData(i
, sizeof(*i
)))
78 *i
= base::NetToHost32(*i
);
82 // Returns false if there is no data, otherwise fills |*line| with a reference
83 // to the next line of data in the buffer.
84 bool GetLine(base::StringPiece
* line
) {
88 // Find the end of the line, or the end of the input.
90 while (eol
< length_
&& data_
[eol
] != '\n') {
93 line
->set(data_
, eol
);
96 // Skip the newline if present.
97 if (length_
&& data_
[0] == '\n')
103 // Read out |c| colon-separated pieces from the next line. The resulting
104 // pieces point into the original data buffer.
105 bool GetPieces(size_t c
, std::vector
<base::StringPiece
>* pieces
) {
106 base::StringPiece line
;
110 // Find the parts separated by ':'.
111 while (pieces
->size() + 1 < c
) {
112 size_t colon_ofs
= line
.find(':');
113 if (colon_ofs
== base::StringPiece::npos
) {
118 pieces
->push_back(line
.substr(0, colon_ofs
));
119 line
.remove_prefix(colon_ofs
+ 1);
122 // The last piece runs to the end of the line.
123 pieces
->push_back(line
);
131 DISALLOW_COPY_AND_ASSIGN(BufferReader
);
134 bool ParseGetHashMetadata(size_t hash_count
,
135 BufferReader
* reader
,
136 std::vector
<SBFullHashResult
>* full_hashes
) {
137 for (size_t i
= 0; i
< hash_count
; ++i
) {
138 base::StringPiece line
;
139 if (!reader
->GetLine(&line
))
142 size_t meta_data_len
;
143 if (!base::StringToSizeT(line
, &meta_data_len
))
146 const void* meta_data
;
147 if (!reader
->RefData(&meta_data
, meta_data_len
))
151 (*full_hashes
)[full_hashes
->size() - hash_count
+ i
].metadata
.assign(
152 reinterpret_cast<const char*>(meta_data
), meta_data_len
);
160 namespace safe_browsing
{
162 // BODY = CACHELIFETIME LF HASHENTRY* EOF
163 // CACHELIFETIME = DIGIT+
164 // HASHENTRY = LISTNAME ":" HASHSIZE ":" NUMRESPONSES [":m"] LF
165 // HASHDATA (METADATALEN LF METADATA)*
166 // HASHSIZE = DIGIT+ # Length of each full hash
167 // NUMRESPONSES = DIGIT+ # Number of full hashes in HASHDATA
168 // HASHDATA = <HASHSIZE*NUMRESPONSES number of unsigned bytes>
169 // METADATALEN = DIGIT+
170 // METADATA = <METADATALEN number of unsigned bytes>
171 bool ParseGetHash(const char* chunk_data
,
173 base::TimeDelta
* cache_lifetime
,
174 std::vector
<SBFullHashResult
>* full_hashes
) {
175 full_hashes
->clear();
176 BufferReader
reader(chunk_data
, chunk_len
);
178 // Parse out cache lifetime.
180 base::StringPiece line
;
181 if (!reader
.GetLine(&line
))
184 int64_t cache_lifetime_seconds
;
185 if (!base::StringToInt64(line
, &cache_lifetime_seconds
))
188 // TODO(shess): Zero also doesn't make sense, but isn't clearly forbidden,
189 // either. Maybe there should be a threshold involved.
190 if (cache_lifetime_seconds
< 0)
193 *cache_lifetime
= base::TimeDelta::FromSeconds(cache_lifetime_seconds
);
196 while (!reader
.empty()) {
197 std::vector
<base::StringPiece
> cmd_parts
;
198 if (!reader
.GetPieces(3, &cmd_parts
))
201 SBFullHashResult full_hash
;
202 full_hash
.list_id
= safe_browsing_util::GetListId(cmd_parts
[0]);
205 if (!base::StringToSizeT(cmd_parts
[1], &hash_len
))
208 // TODO(shess): Is this possible? If not, why the length present?
209 if (hash_len
!= sizeof(SBFullHash
))
212 // Metadata is indicated by an optional ":m" at the end of the line.
213 bool has_metadata
= false;
214 base::StringPiece hash_count_string
= cmd_parts
[2];
215 size_t optional_colon
= hash_count_string
.find(':', 0);
216 if (optional_colon
!= base::StringPiece::npos
) {
217 if (hash_count_string
.substr(optional_colon
) != ":m")
220 hash_count_string
.remove_suffix(2);
224 if (!base::StringToSizeT(hash_count_string
, &hash_count
))
227 if (hash_len
* hash_count
> reader
.length())
230 // Ignore hash results from lists we don't recognize.
231 if (full_hash
.list_id
< 0) {
232 reader
.Advance(hash_len
* hash_count
);
233 if (has_metadata
&& !ParseGetHashMetadata(hash_count
, &reader
, NULL
))
238 for (size_t i
= 0; i
< hash_count
; ++i
) {
239 if (!reader
.GetData(&full_hash
.hash
, hash_len
))
241 full_hashes
->push_back(full_hash
);
244 if (has_metadata
&& !ParseGetHashMetadata(hash_count
, &reader
, full_hashes
))
248 return reader
.empty();
251 // BODY = HEADER LF PREFIXES EOF
252 // HEADER = PREFIXSIZE ":" LENGTH
253 // PREFIXSIZE = DIGIT+ # Size of each prefix in bytes
254 // LENGTH = DIGIT+ # Size of PREFIXES in bytes
255 std::string
FormatGetHash(const std::vector
<SBPrefix
>& prefixes
) {
257 request
.append(base::Uint64ToString(sizeof(SBPrefix
)));
259 request
.append(base::Uint64ToString(sizeof(SBPrefix
) * prefixes
.size()));
260 request
.append("\n");
262 // SBPrefix values are read without concern for byte order, so write back the
264 for (size_t i
= 0; i
< prefixes
.size(); ++i
) {
265 request
.append(reinterpret_cast<const char*>(&prefixes
[i
]),
272 bool ParseUpdate(const char* chunk_data
,
274 size_t* next_update_sec
,
276 std::vector
<SBChunkDelete
>* deletes
,
277 std::vector
<ChunkUrl
>* chunk_urls
) {
278 DCHECK(next_update_sec
);
282 BufferReader
reader(chunk_data
, chunk_len
);
285 std::string list_name
;
287 while (!reader
.empty()) {
288 std::vector
<base::StringPiece
> pieces
;
289 if (!reader
.GetPieces(2, &pieces
))
292 base::StringPiece
& command
= pieces
[0];
294 // Differentiate on the first character of the command (which is usually
295 // only one character, with the exception of the 'ad' and 'sd' commands).
296 switch (command
[0]) {
299 // Must be either an 'ad' (add-del) or 'sd' (sub-del) chunk. We must
300 // have also parsed the list name before getting here, or the add-del
301 // or sub-del will have no context.
302 if (list_name
.empty() || (command
!= "ad" && command
!= "sd"))
304 SBChunkDelete chunk_delete
;
305 chunk_delete
.is_sub_del
= command
[0] == 's';
306 StringToRanges(pieces
[1].as_string(), &chunk_delete
.chunk_del
);
307 chunk_delete
.list_name
= list_name
;
308 deletes
->push_back(chunk_delete
);
313 // The line providing the name of the list (i.e. 'goog-phish-shavar').
314 list_name
= pieces
[1].as_string();
318 // The line providing the next earliest time (in seconds) to re-query.
319 if (!base::StringToSizeT(pieces
[1], next_update_sec
))
325 chunk_url
.url
= pieces
[1].as_string(); // Skip the initial "u:".
326 chunk_url
.list_name
= list_name
;
327 chunk_urls
->push_back(chunk_url
);
332 if (pieces
[1] != "pleasereset")
338 // According to the spec, we ignore commands we don't understand.
339 // TODO(shess): Does this apply to r:unknown or n:not-integer?
347 // BODY = (UINT32 CHUNKDATA)+
348 // UINT32 = Unsigned 32-bit integer in network byte order
349 // CHUNKDATA = Encoded ChunkData protocol message
350 bool ParseChunk(const char* data
,
352 ScopedVector
<SBChunkData
>* chunks
) {
353 BufferReader
reader(data
, length
);
355 while (!reader
.empty()) {
357 if (!reader
.GetNet32(&l
) || l
== 0 || l
> reader
.length())
360 const void* p
= NULL
;
361 if (!reader
.RefData(&p
, l
))
364 scoped_ptr
<SBChunkData
> chunk(new SBChunkData());
365 if (!chunk
->ParseFrom(reinterpret_cast<const unsigned char*>(p
), l
))
368 chunks
->push_back(chunk
.release());
371 DCHECK(reader
.empty());
375 // LIST = LISTNAME ";" LISTINFO (":" LISTINFO)*
376 // LISTINFO = CHUNKTYPE ":" CHUNKLIST
377 // CHUNKTYPE = "a" | "s"
378 // CHUNKLIST = (RANGE | NUMBER) ["," CHUNKLIST]
380 // RANGE = NUMBER "-" NUMBER
381 std::string
FormatList(const SBListChunkRanges
& list
) {
382 std::string formatted_results
= list
.name
;
383 formatted_results
.append(";");
385 if (!list
.adds
.empty())
386 formatted_results
.append("a:").append(list
.adds
);
387 if (!list
.adds
.empty() && !list
.subs
.empty())
388 formatted_results
.append(":");
389 if (!list
.subs
.empty())
390 formatted_results
.append("s:").append(list
.subs
);
391 formatted_results
.append("\n");
393 return formatted_results
;
396 } // namespace safe_browsing