1 // Copyright (c) 2006-2008 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "net/base/net_util.h"
7 #include "base/file_path.h"
8 #include "base/string_piece.h"
9 #include "base/string_util.h"
10 #include "base/sys_string_conversions.h"
11 #include "googleurl/src/gurl.h"
12 #include "net/base/escape.h"
16 bool FileURLToFilePath(const GURL
& url
, FilePath
* file_path
) {
17 *file_path
= FilePath();
18 std::wstring
& file_path_str
= const_cast<std::wstring
&>(file_path
->value());
19 file_path_str
.clear();
25 std::string host
= url
.host();
27 // URL contains no host, the path is the filename. In this case, the path
28 // will probably be preceeded with a slash, as in "/C:/foo.txt", so we
29 // trim out that here.
31 size_t first_non_slash
= path
.find_first_not_of("/\\");
32 if (first_non_slash
!= std::string::npos
&& first_non_slash
> 0)
33 path
.erase(0, first_non_slash
);
35 // URL contains a host: this means it's UNC. We keep the preceeding slash
39 path
.append(url
.path());
44 std::replace(path
.begin(), path
.end(), '/', '\\');
46 // GURL stores strings as percent-encoded UTF-8, this will undo if possible.
47 path
= UnescapeURLComponent(path
,
48 UnescapeRule::SPACES
| UnescapeRule::URL_SPECIAL_CHARS
);
50 if (!IsStringUTF8(path
)) {
51 // Not UTF-8, assume encoding is native codepage and we're done. We know we
52 // are giving the conversion function a nonempty string, and it may fail if
53 // the given string is not in the current encoding and give us an empty
54 // string back. We detect this and report failure.
55 file_path_str
= base::SysNativeMBToWide(path
);
56 return !file_path_str
.empty();
58 file_path_str
.assign(UTF8ToWide(path
));
60 // We used to try too hard and see if |path| made up entirely of
61 // the 1st 256 characters in the Unicode was a zero-extended UTF-16.
62 // If so, we converted it to 'Latin-1' and checked if the result was UTF-8.
63 // If the check passed, we converted the result to UTF-8.
64 // Otherwise, we treated the result as the native OS encoding.
65 // However, that led to http://crbug.com/4619 and http://crbug.com/14153