tdf#119326 crash when adding "Windows Share" File resource
[LibreOffice.git] / tools / source / fsys / urlobj.cxx
blobf024fec558bf4512a549d82a8cd904ae47638011
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
20 #include <sal/config.h>
22 #include <tools/urlobj.hxx>
23 #include <tools/debug.hxx>
24 #include <tools/inetmime.hxx>
25 #include <tools/stream.hxx>
26 #include <com/sun/star/uno/Reference.hxx>
27 #include <com/sun/star/util/XStringWidth.hpp>
28 #include <o3tl/enumarray.hxx>
29 #include <osl/diagnose.h>
30 #include <osl/file.hxx>
31 #include <rtl/character.hxx>
32 #include <rtl/string.h>
33 #include <rtl/textenc.h>
34 #include <rtl/ustring.hxx>
35 #include <sal/log.hxx>
36 #include <sal/types.h>
38 #include <algorithm>
39 #include <cassert>
40 #include <limits>
41 #include <memory>
43 #include <string.h>
45 #include <com/sun/star/uno/Sequence.hxx>
46 #include <sax/tools/converter.hxx>
47 #include <rtl/uri.hxx>
48 #include <comphelper/base64.hxx>
50 using namespace css;
52 // INetURLObject
54 /* The URI grammar (using RFC 2234 conventions).
56 Constructs of the form
57 {reference <rule1> using rule2}
58 stand for a rule matching the given rule1 specified in the given reference,
59 encoded to URI syntax using rule2 (as specified in this URI grammar).
62 ; RFC 1738, RFC 2396, RFC 2732, private
63 login = [user [":" password] "@"] hostport
64 user = *(escaped / alphanum / "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / ";" / "=" / "_" / "~")
65 password = *(escaped / alphanum / "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / ";" / "=" / "_" / "~")
66 hostport = host [":" port]
67 host = incomplete-hostname / hostname / IPv4address / IPv6reference
68 incomplete-hostname = *(domainlabel ".") domainlabel
69 hostname = *(domainlabel ".") toplabel ["."]
70 domainlabel = alphanum [*(alphanum / "-") alphanum]
71 toplabel = ALPHA [*(alphanum / "-") alphanum]
72 IPv4address = 1*3DIGIT "." 1*3DIGIT "." 1*3DIGIT "." 1*3DIGIT
73 IPv6reference = "[" hexpart [":" IPv4address] "]"
74 hexpart = (hexseq ["::" [hexseq]]) / ("::" [hexseq])
75 hexseq = hex4 *(":" hex4)
76 hex4 = 1*4HEXDIG
77 port = *DIGIT
78 escaped = "%" HEXDIG HEXDIG
79 reserved = "$" / "&" / "+" / "," / "/" / ":" / ";" / "=" / "?" / "@" / "[" / "]"
80 mark = "!" / "'" / "(" / ")" / "*" / "-" / "." / "_" / "~"
81 alphanum = ALPHA / DIGIT
82 unreserved = alphanum / mark
83 uric = escaped / reserved / unreserved
84 pchar = escaped / unreserved / "$" / "&" / "+" / "," / ":" / "=" / "@"
87 ; RFC 1738, RFC 2396
88 ftp-url = "FTP://" login ["/" segment *("/" segment) [";TYPE=" ("A" / "D" / "I")]]
89 segment = *pchar
92 ; RFC 1738, RFC 2396
93 http-url = "HTTP://" hostport ["/" segment *("/" segment) ["?" *uric]]
94 segment = *(pchar / ";")
97 ; RFC 1738, RFC 2396, <http://support.microsoft.com/default.aspx?scid=KB;EN-US;Q188997&>
98 file-url = "FILE://" [host / "LOCALHOST" / netbios-name] ["/" segment *("/" segment)]
99 segment = *pchar
100 netbios-name = 1*{<alphanum / "!" / "#" / "$" / "%" / "&" / "'" / "(" / ")" / "-" / "." / "@" / "^" / "_" / "{" / "}" / "~"> using (escaped / alphanum / "!" / "$" / "&" / "'" / "(" / ")" / "-" / "." / "@" / "_" / "~")}
103 ; RFC 2368, RFC 2396
104 mailto-url = "MAILTO:" [to] [headers]
105 to = {RFC 822 <#mailbox> using *(escaped / alphanum / "!" / "$" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / "/" / ":" / ";" / "@" / "_" / "~")}
106 headers = "?" header *("&" header)
107 header = hname "=" hvalue
108 hname = {RFC 822 <field-name> using *(escaped / alphanum / "!" / "$" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / "/" / ":" / ";" / "@" / "_" / "~")} / "BODY"
109 hvalue = {RFC 822 <field-body> using *(escaped / alphanum / "!" / "$" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / "/" / ":" / ";" / "@" / "_" / "~")}
112 ; private (see RFC 1738, RFC 2396)
113 vnd-sun-star-webdav-url = "VND.SUN.STAR.WEBDAV://" hostport ["/" segment *("/" segment) ["?" *uric]]
114 segment = *(pchar / ";")
117 ; private
118 private-url = "PRIVATE:" path ["?" *uric]
119 path = *(escaped / alphanum / "!" / "$" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / "/" / ":" / ";" / "=" / "@" / "_" / "~")
122 ; private
123 vnd-sun-star-help-url = "VND.SUN.STAR.HELP://" name *("/" segment) ["?" *uric]
124 name = *(escaped / alphanum / "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / ":" / ";" / "=" / "@" / "_" / "~")
125 segment = *(escaped / alphanum / "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / ":" / ";" / "=" / "@" / "_" / "~")
128 ; private
129 https-url = "HTTPS://" hostport ["/" segment *("/" segment) ["?" *uric]]
130 segment = *(escaped / alphanum / "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / ":" / ";" / "=" / "@" / "_" / "~")
133 ; private
134 slot-url = "SLOT:" path ["?" *uric]
135 path = *(escaped / alphanum / "!" / "$" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / "/" / ":" / ";" / "=" / "@" / "_" / "~")
138 ; private
139 macro-url = "MACRO:" path ["?" *uric]
140 path = *(escaped / alphanum / "!" / "$" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / "/" / ":" / ";" / "=" / "@" / "_" / "~")
143 ; private
144 javascript-url = "JAVASCRIPT:" *uric
147 ; RFC 2397
148 data-url = "DATA:" [mediatype] [";BASE64"] "," *uric
149 mediatype = [type "/" subtype] *(";" attribute "=" value)
150 type = {RFC 2045 <type> using *(escaped / alphanum / "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "-" / "." / ":" / "?" / "@" / "_" / "~")}
151 subtype = {RFC 2045 <subtype> using *(escaped / alphanum / "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "-" / "." / ":" / "?" / "@" / "_" / "~")}
152 attribute = {RFC 2045 <subtype> using *(escaped / alphanum / "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "-" / "." / ":" / "?" / "@" / "_" / "~")}
153 value = {RFC 2045 <subtype> using *(escaped / alphanum / "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "-" / "." / ":" / "?" / "@" / "_" / "~")}
156 ; RFC 2392, RFC 2396
157 cid-url = "CID:" {RFC 822 <addr-spec> using *uric}
160 ; private
161 vnd-sun-star-hier-url = "VND.SUN.STAR.HIER:" ["//"reg_name] *("/" *pchar)
162 reg_name = 1*(escaped / alphanum / "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / ":" / ";" / "=" / "@" / "_" / "~")
165 ; private
166 uno-url = ".UNO:" path ["?" *uric]
167 path = *(escaped / alphanum / "!" / "$" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / "/" / ":" / ";" / "=" / "@" / "_" / "~")
170 ; private
171 component-url = ".COMPONENT:" path ["?" *uric]
172 path = *(escaped / alphanum / "!" / "$" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / "/" / ":" / ";" / "=" / "@" / "_" / "~")
175 ; private
176 vnd-sun-star-pkg-url = "VND.SUN.STAR.PKG://" reg_name *("/" *pchar) ["?" *uric]
177 reg_name = 1*(escaped / alphanum / "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / ":" / ";" / "=" / "@" / "_" / "~")
180 ; RFC 2255
181 ldap-url = "LDAP://" [hostport] ["/" [dn ["?" [attrdesct *("," attrdesc)] ["?" ["base" / "one" / "sub"] ["?" [filter] ["?" extension *("," extension)]]]]]]
182 dn = {RFC 2253 <distinguishedName> using *(escaped / alphanum / "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / "/" / ":" / ";" / "=" / "@" / "_" / "~")}
183 attrdesc = {RFC 2251 <AttributeDescription> using *(escaped / alphanum / "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "-" / "." / "/" / ":" / ";" / "=" / "@" / "_" / "~")}
184 filter = {RFC 2254 <filter> using *(escaped / alphanum / "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / "/" / ":" / ";" / "=" / "@" / "_" / "~")}
185 extension = ["!"] ["X-"] extoken ["=" exvalue]
186 extoken = {RFC 2252 <oid> using *(escaped / alphanum / "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "-" / "." / "/" / ":" / ";" / "@" / "_" / "~")}
187 exvalue = {RFC 2251 <LDAPString> using *(escaped / alphanum / "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "-" / "." / "/" / ":" / ";" / "=" / "@" / "_" / "~")}
190 ; private
191 db-url = "DB:" *uric
194 ; private
195 vnd-sun-star-cmd-url = "VND.SUN.STAR.CMD:" opaque_part
196 opaque_part = uric_no_slash *uric
197 uric_no_slash = unreserved / escaped / ";" / "?" / ":" / "@" / "&" / "=" / "+" / "$" / ","
200 ; RFC 1738
201 telnet-url = "TELNET://" login ["/"]
204 ; private
205 vnd-sun-star-expand-url = "VND.SUN.STAR.EXPAND:" opaque_part
206 opaque_part = uric_no_slash *uric
207 uric_no_slash = unreserved / escaped / ";" / "?" / ":" / "@" / "&" / "=" / "+" / "$" / ","
210 ; private
211 vnd-sun-star-tdoc-url = "VND.SUN.STAR.TDOC:/" segment *("/" segment)
212 segment = *pchar
215 ; private
216 unknown-url = scheme ":" 1*uric
217 scheme = ALPHA *(alphanum / "+" / "-" / ".")
220 ; private (http://ubiqx.org/cifs/Appendix-D.html):
221 smb-url = "SMB://" login ["/" segment *("/" segment) ["?" *uric]]
222 segment = *(pchar / ";")
225 inline sal_Int32 INetURLObject::SubString::clear()
227 sal_Int32 nDelta = -m_nLength;
228 m_nBegin = -1;
229 m_nLength = 0;
230 return nDelta;
233 inline sal_Int32 INetURLObject::SubString::set(OUStringBuffer & rString,
234 OUString const & rSubString)
236 OUString sTemp(rString.makeStringAndClear());
237 sal_Int32 nDelta = set(sTemp, rSubString);
238 rString.append(sTemp);
239 return nDelta;
242 inline sal_Int32 INetURLObject::SubString::set(OUString & rString,
243 OUString const & rSubString)
245 sal_Int32 nDelta = rSubString.getLength() - m_nLength;
247 rString = rString.replaceAt(m_nBegin, m_nLength, rSubString);
249 m_nLength = rSubString.getLength();
250 return nDelta;
253 inline sal_Int32 INetURLObject::SubString::set(OUStringBuffer & rString,
254 OUString const & rSubString,
255 sal_Int32 nTheBegin)
257 m_nBegin = nTheBegin;
258 return set(rString, rSubString);
261 inline void INetURLObject::SubString::operator +=(sal_Int32 nDelta)
263 if (isPresent())
264 m_nBegin = m_nBegin + nDelta;
267 int INetURLObject::SubString::compare(SubString const & rOther,
268 OUStringBuffer const & rThisString,
269 OUStringBuffer const & rOtherString) const
271 sal_Int32 len = std::min(m_nLength, rOther.m_nLength);
272 sal_Unicode const * p1 = rThisString.getStr() + m_nBegin;
273 sal_Unicode const * end = p1 + len;
274 sal_Unicode const * p2 = rOtherString.getStr() + rOther.m_nBegin;
275 while (p1 != end) {
276 if (*p1 < *p2) {
277 return -1;
278 } else if (*p1 > *p2) {
279 return 1;
281 ++p1;
282 ++p2;
284 return m_nLength < rOther.m_nLength ? -1
285 : m_nLength > rOther.m_nLength ? 1
286 : 0;
289 struct INetURLObject::SchemeInfo
291 sal_Char const * m_pScheme;
292 sal_Char const * m_pPrefix;
293 bool m_bAuthority;
294 bool m_bUser;
295 bool m_bAuth;
296 bool m_bPassword;
297 bool m_bHost;
298 bool m_bPort;
299 bool m_bHierarchical;
300 bool m_bQuery;
303 struct INetURLObject::PrefixInfo
305 enum Kind { OFFICIAL, INTERNAL, EXTERNAL, ALIAS }; // order is important!
307 sal_Char const * m_pPrefix;
308 sal_Char const * m_pTranslatedPrefix;
309 INetProtocol m_eScheme;
310 Kind m_eKind;
313 // static
314 inline INetURLObject::SchemeInfo const &
315 INetURLObject::getSchemeInfo(INetProtocol eTheScheme)
317 static o3tl::enumarray<INetProtocol, SchemeInfo> const map = {
318 SchemeInfo{
319 "", "", false, false, false, false, false, false, false, false},
320 SchemeInfo{
321 "ftp", "ftp://", true, true, false, true, true, true, true,
322 false},
323 SchemeInfo{
324 "http", "http://", true, false, false, false, true, true, true,
325 true},
326 SchemeInfo{
327 "file", "file://", true, false, false, false, true, false, true,
328 false},
329 SchemeInfo{
330 "mailto", "mailto:", false, false, false, false, false, false,
331 false, true},
332 SchemeInfo{
333 "vnd.sun.star.webdav", "vnd.sun.star.webdav://", true, false,
334 false, false, true, true, true, true},
335 SchemeInfo{
336 "private", "private:", false, false, false, false, false, false,
337 false, true},
338 SchemeInfo{
339 "vnd.sun.star.help", "vnd.sun.star.help://", true, false, false,
340 false, false, false, true, true},
341 SchemeInfo{
342 "https", "https://", true, false, false, false, true, true,
343 true, true},
344 SchemeInfo{
345 "slot", "slot:", false, false, false, false, false, false, false,
346 true},
347 SchemeInfo{
348 "macro", "macro:", false, false, false, false, false, false,
349 false, true},
350 SchemeInfo{
351 "javascript", "javascript:", false, false, false, false, false,
352 false, false, false},
353 SchemeInfo{
354 "data", "data:", false, false, false, false, false, false, false,
355 false},
356 SchemeInfo{
357 "cid", "cid:", false, false, false, false, false, false, false,
358 false},
359 SchemeInfo{
360 "vnd.sun.star.hier", "vnd.sun.star.hier:", true, false, false,
361 false, false, false, true, false},
362 SchemeInfo{
363 ".uno", ".uno:", false, false, false, false, false, false, false,
364 true},
365 SchemeInfo{
366 ".component", ".component:", false, false, false, false, false,
367 false, false, true},
368 SchemeInfo{
369 "vnd.sun.star.pkg", "vnd.sun.star.pkg://", true, false, false,
370 false, false, false, true, true},
371 SchemeInfo{
372 "ldap", "ldap://", true, false, false, false, true, true,
373 false, true},
374 SchemeInfo{
375 "db", "db:", false, false, false, false, false, false, false,
376 false},
377 SchemeInfo{
378 "vnd.sun.star.cmd", "vnd.sun.star.cmd:", false, false, false,
379 false, false, false, false, false},
380 SchemeInfo{
381 "telnet", "telnet://", true, true, false, true, true, true,
382 true, false},
383 SchemeInfo{
384 "vnd.sun.star.expand", "vnd.sun.star.expand:", false, false,
385 false, false, false, false, false, false},
386 SchemeInfo{
387 "vnd.sun.star.tdoc", "vnd.sun.star.tdoc:", false, false, false,
388 false, false, false, true, false},
389 SchemeInfo{
390 "", "", false, false, false, false, true, true, true, false },
391 SchemeInfo{
392 "smb", "smb://", true, true, false, true, true, true, true,
393 true},
394 SchemeInfo{
395 "hid", "hid:", false, false, false, false, false, false, false,
396 true},
397 SchemeInfo{
398 "sftp", "sftp://", true, true, false, true, true, true, true,
399 true},
400 SchemeInfo{
401 "vnd.libreoffice.cmis", "vnd.libreoffice.cmis://", true, true,
402 false, false, true, false, true, true} };
403 return map[eTheScheme];
406 inline INetURLObject::SchemeInfo const & INetURLObject::getSchemeInfo() const
408 return getSchemeInfo(m_eScheme);
411 namespace {
413 sal_Unicode getHexDigit(sal_uInt32 nWeight)
415 assert(nWeight < 16);
416 static const sal_Unicode aDigits[16]
417 = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C',
418 'D', 'E', 'F' };
419 return aDigits[nWeight];
424 // static
425 inline void INetURLObject::appendEscape(OUStringBuffer & rTheText,
426 sal_uInt32 nOctet)
428 rTheText.append( '%' );
429 rTheText.append( getHexDigit(nOctet >> 4) );
430 rTheText.append( getHexDigit(nOctet & 15) );
433 namespace {
435 enum
437 PA = INetURLObject::PART_USER_PASSWORD,
438 PD = INetURLObject::PART_FPATH,
439 PE = INetURLObject::PART_AUTHORITY,
440 PF = INetURLObject::PART_REL_SEGMENT_EXTRA,
441 PG = INetURLObject::PART_URIC,
442 PH = INetURLObject::PART_HTTP_PATH,
443 PI = INetURLObject::PART_MESSAGE_ID_PATH,
444 PJ = INetURLObject::PART_MAILTO,
445 PK = INetURLObject::PART_PATH_BEFORE_QUERY,
446 PL = INetURLObject::PART_PCHAR,
447 PM = INetURLObject::PART_VISIBLE,
448 PN = INetURLObject::PART_VISIBLE_NONSPECIAL,
449 PO = INetURLObject::PART_UNO_PARAM_VALUE,
450 PP = INetURLObject::PART_UNAMBIGUOUS,
451 PQ = INetURLObject::PART_URIC_NO_SLASH,
452 PR = INetURLObject::PART_HTTP_QUERY,
455 static sal_uInt32 const aMustEncodeMap[128]
456 = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
457 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
458 /* */ PP,
459 /* ! */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
460 /* " */ PM+PN +PP,
461 /* # */ PM,
462 /* $ */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
463 /* % */ PM,
464 /* & */ PA +PD+PE+PF+PG+PH+PI +PK+PL+PM+PN+PO +PQ+PR,
465 /* ' */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
466 /* ( */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
467 /* ) */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
468 /* * */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
469 /* + */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO +PQ+PR,
470 /* , */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN +PQ+PR,
471 /* - */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
472 /* . */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
473 /* / */ +PD +PG+PH+PI+PJ+PK +PM+PN+PO,
474 /* 0 */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
475 /* 1 */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
476 /* 2 */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
477 /* 3 */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
478 /* 4 */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
479 /* 5 */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
480 /* 6 */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
481 /* 7 */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
482 /* 8 */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
483 /* 9 */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
484 /* : */ +PD+PE +PG+PH+PI+PJ+PK+PL+PM+PN+PO +PQ+PR,
485 /* ; */ PA +PE+PF+PG+PH+PI+PJ+PK +PM +PQ+PR,
486 /* < */ +PI +PM+PN +PP,
487 /* = */ PA +PD+PE+PF+PG+PH +PK+PL+PM+PN +PQ+PR,
488 /* > */ +PI +PM+PN +PP,
489 /* ? */ +PG +PM +PO +PQ,
490 /* @ */ +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
491 /* A */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
492 /* B */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
493 /* C */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
494 /* D */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
495 /* E */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
496 /* F */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
497 /* G */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
498 /* H */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
499 /* I */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
500 /* J */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
501 /* K */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
502 /* L */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
503 /* M */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
504 /* N */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
505 /* O */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
506 /* P */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
507 /* Q */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
508 /* R */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
509 /* S */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
510 /* T */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
511 /* U */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
512 /* V */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
513 /* W */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
514 /* X */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
515 /* Y */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
516 /* Z */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
517 /* [ */ PG +PM+PN+PO,
518 /* \ */ +PM+PN +PP,
519 /* ] */ PG +PM+PN+PO,
520 /* ^ */ PM+PN +PP,
521 /* _ */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
522 /* ` */ PM+PN +PP,
523 /* a */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
524 /* b */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
525 /* c */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
526 /* d */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
527 /* e */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
528 /* f */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
529 /* g */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
530 /* h */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
531 /* i */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
532 /* j */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
533 /* k */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
534 /* l */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
535 /* m */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
536 /* n */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
537 /* o */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
538 /* p */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
539 /* q */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
540 /* r */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
541 /* s */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
542 /* t */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
543 /* u */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
544 /* v */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
545 /* w */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
546 /* x */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
547 /* y */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
548 /* z */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
549 /* { */ PM+PN +PP,
550 /* | */ +PM+PN +PP,
551 /* } */ PM+PN +PP,
552 /* ~ */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ,
553 0 };
555 inline bool mustEncode(sal_uInt32 nUTF32, INetURLObject::Part ePart)
557 return !rtl::isAscii(nUTF32) || !(aMustEncodeMap[nUTF32] & ePart);
562 void INetURLObject::setInvalid()
564 m_aAbsURIRef.setLength(0);
565 m_eScheme = INetProtocol::NotValid;
566 m_aScheme.clear();
567 m_aUser.clear();
568 m_aAuth.clear();
569 m_aHost.clear();
570 m_aPort.clear();
571 m_aPath.clear();
572 m_aQuery.clear();
573 m_aFragment.clear();
576 namespace {
578 std::unique_ptr<SvMemoryStream> memoryStream(
579 void const * data, sal_Int32 length)
581 std::unique_ptr<char[]> b(
582 new char[length]);
583 memcpy(b.get(), data, length);
584 std::unique_ptr<SvMemoryStream> s(
585 new SvMemoryStream(b.get(), length, StreamMode::READ));
586 s->ObjectOwnsMemory(true);
587 b.release();
588 return s;
593 std::unique_ptr<SvMemoryStream> INetURLObject::getData() const
595 if( GetProtocol() != INetProtocol::Data )
597 return nullptr;
600 OUString sURLPath = GetURLPath( DecodeMechanism::WithCharset, RTL_TEXTENCODING_ISO_8859_1 );
601 sal_Unicode const * pSkippedMediatype = INetMIME::scanContentType( sURLPath );
602 sal_Int32 nCharactersSkipped = pSkippedMediatype == nullptr
603 ? 0 : pSkippedMediatype-sURLPath.getStr();
604 if (sURLPath.match(",", nCharactersSkipped))
606 nCharactersSkipped += strlen(",");
607 OString sURLEncodedData(
608 sURLPath.getStr() + nCharactersSkipped,
609 sURLPath.getLength() - nCharactersSkipped,
610 RTL_TEXTENCODING_ISO_8859_1, OUSTRING_TO_OSTRING_CVTFLAGS);
611 return memoryStream(
612 sURLEncodedData.getStr(), sURLEncodedData.getLength());
614 else if (sURLPath.matchIgnoreAsciiCase(";base64,", nCharactersSkipped))
616 nCharactersSkipped += strlen(";base64,");
617 OUString sBase64Data = sURLPath.copy( nCharactersSkipped );
618 css::uno::Sequence< sal_Int8 > aDecodedData;
619 if (comphelper::Base64::decodeSomeChars(aDecodedData, sBase64Data)
620 == sBase64Data.getLength())
622 return memoryStream(
623 aDecodedData.getArray(), aDecodedData.getLength());
626 return nullptr;
629 namespace {
631 FSysStyle guessFSysStyleByCounting(sal_Unicode const * pBegin,
632 sal_Unicode const * pEnd,
633 FSysStyle eStyle)
635 DBG_ASSERT(eStyle
636 & (FSysStyle::Unix
637 | FSysStyle::Dos),
638 "guessFSysStyleByCounting(): Bad style");
639 DBG_ASSERT(std::numeric_limits< sal_Int32 >::min() < pBegin - pEnd
640 && pEnd - pBegin <= std::numeric_limits< sal_Int32 >::max(),
641 "guessFSysStyleByCounting(): Too big");
642 sal_Int32 nSlashCount
643 = (eStyle & FSysStyle::Unix) ?
644 0 : std::numeric_limits< sal_Int32 >::min();
645 sal_Int32 nBackslashCount
646 = (eStyle & FSysStyle::Dos) ?
647 0 : std::numeric_limits< sal_Int32 >::min();
648 while (pBegin != pEnd)
649 switch (*pBegin++)
651 case '/':
652 ++nSlashCount;
653 break;
655 case '\\':
656 ++nBackslashCount;
657 break;
659 return nSlashCount >= nBackslashCount ?
660 FSysStyle::Unix : FSysStyle::Dos;
663 OUString parseScheme(
664 sal_Unicode const ** begin, sal_Unicode const * end,
665 sal_uInt32 fragmentDelimiter)
667 sal_Unicode const * p = *begin;
668 if (p != end && rtl::isAsciiAlpha(*p)) {
669 do {
670 ++p;
671 } while (p != end
672 && (rtl::isAsciiAlphanumeric(*p) || *p == '+' || *p == '-'
673 || *p == '.'));
674 // #i34835# To avoid problems with Windows file paths like "C:\foo",
675 // do not accept generic schemes that are only one character long:
676 if (end - p > 1 && p[0] == ':' && p[1] != fragmentDelimiter
677 && p - *begin >= 2)
679 OUString scheme(
680 OUString(*begin, p - *begin).toAsciiLowerCase());
681 *begin = p + 1;
682 return scheme;
685 return OUString();
690 bool INetURLObject::setAbsURIRef(OUString const & rTheAbsURIRef,
691 EncodeMechanism eMechanism,
692 rtl_TextEncoding eCharset,
693 bool bSmart,
694 FSysStyle eStyle)
696 sal_Unicode const * pPos = rTheAbsURIRef.getStr();
697 sal_Unicode const * pEnd = pPos + rTheAbsURIRef.getLength();
699 setInvalid();
701 sal_uInt32 nFragmentDelimiter = '#';
703 OUStringBuffer aSynAbsURIRef;
705 // Parse <scheme>:
706 sal_Unicode const * p = pPos;
707 PrefixInfo const * pPrefix = getPrefix(p, pEnd);
708 if (pPrefix)
710 pPos = p;
711 m_eScheme = pPrefix->m_eScheme;
713 OUString sTemp(OUString::createFromAscii(pPrefix->m_eKind
714 >= PrefixInfo::EXTERNAL ?
715 pPrefix->m_pTranslatedPrefix :
716 pPrefix->m_pPrefix));
717 aSynAbsURIRef.append(sTemp);
718 m_aScheme = SubString( 0, sTemp.indexOf(':') );
720 else
722 if (bSmart)
724 // For scheme detection, the first (if any) of the following
725 // productions that matches the input string (and for which the
726 // appropriate style bit is set in eStyle, if applicable)
727 // determines the scheme. The productions use the auxiliary rules
729 // domain = label *("." label)
730 // label = alphanum [*(alphanum / "-") alphanum]
731 // alphanum = ALPHA / DIGIT
732 // IPv6reference = "[" IPv6address "]"
733 // IPv6address = hexpart [":" IPv4address]
734 // IPv4address = 1*3DIGIT 3("." 1*3DIGIT)
735 // hexpart = (hexseq ["::" [hexseq]]) / ("::" [hexseq])
736 // hexseq = hex4 *(":" hex4)
737 // hex4 = 1*4HEXDIG
738 // UCS4 = <any UCS4 character>
740 // 1st Production (known scheme; handled by the "if (pPrefix)" branch above):
741 // <one of the known schemes, ignoring case> ":" *UCS4
742 // 2nd Production (mailto):
743 // domain "@" domain
744 // 3rd Production (ftp):
745 // "FTP" 2*("." label) ["/" *UCS4]
746 // 4th Production (http):
747 // label 2*("." label) ["/" *UCS4]
748 // 5th Production (file):
749 // "//" (domain / IPv6reference) ["/" *UCS4]
750 // 6th Production (Unix file):
751 // "/" *UCS4
752 // 7th Production (UNC file; FSysStyle::Dos only):
753 // "\\" domain ["\" *UCS4]
754 // 8th Production (Unix-like DOS file; FSysStyle::Dos only):
755 // ALPHA ":" ["/" *UCS4]
756 // 9th Production (DOS file; FSysStyle::Dos only):
757 // ALPHA ":" ["\" *UCS4]
758 // 10th Production (any scheme; handled by the "m_eScheme = INetProtocol::Generic;" code
759 // after this else branch):
760 // <any scheme> ":" *UCS4
762 // For the 'non URL' file productions 6--9, the interpretation of
763 // the input as a (degenerate) URI is turned off, i.e., escape
764 // sequences and fragments are never detected as such, but are
765 // taken as literal characters.
767 sal_Unicode const * p1 = pPos;
768 if (eStyle & FSysStyle::Dos
769 && pEnd - p1 >= 2
770 && rtl::isAsciiAlpha(p1[0])
771 && p1[1] == ':'
772 && (pEnd - p1 == 2 || p1[2] == '/' || p1[2] == '\\'))
774 m_eScheme = INetProtocol::File; // 8th, 9th
775 eMechanism = EncodeMechanism::All;
776 nFragmentDelimiter = 0x80000000;
778 else if (pEnd - p1 >= 2 && p1[0] == '/' && p1[1] == '/')
780 p1 += 2;
781 if ((scanDomain(p1, pEnd) > 0 || scanIPv6reference(p1, pEnd))
782 && (p1 == pEnd || *p1 == '/'))
783 m_eScheme = INetProtocol::File; // 5th
785 else if (p1 != pEnd && *p1 == '/')
787 m_eScheme = INetProtocol::File; // 6th
788 eMechanism = EncodeMechanism::All;
789 nFragmentDelimiter = 0x80000000;
791 else if (eStyle & FSysStyle::Dos
792 && pEnd - p1 >= 2
793 && p1[0] == '\\'
794 && p1[1] == '\\')
796 p1 += 2;
797 sal_Int32 n = rtl_ustr_indexOfChar_WithLength(
798 p1, pEnd - p1, '\\');
799 sal_Unicode const * pe = n == -1 ? pEnd : p1 + n;
800 if (
801 parseHostOrNetBiosName(
802 p1, pe, EncodeMechanism::All, RTL_TEXTENCODING_DONTKNOW,
803 true, nullptr) ||
804 (scanDomain(p1, pe) > 0 && p1 == pe)
807 m_eScheme = INetProtocol::File; // 7th
808 eMechanism = EncodeMechanism::All;
809 nFragmentDelimiter = 0x80000000;
812 else
814 sal_Unicode const * pDomainEnd = p1;
815 sal_uInt32 nLabels = scanDomain(pDomainEnd, pEnd);
816 if (nLabels > 0 && pDomainEnd != pEnd && *pDomainEnd == '@')
818 ++pDomainEnd;
819 if (scanDomain(pDomainEnd, pEnd) > 0
820 && pDomainEnd == pEnd)
821 m_eScheme = INetProtocol::Mailto; // 2nd
823 else if (nLabels >= 3
824 && (pDomainEnd == pEnd || *pDomainEnd == '/'))
825 m_eScheme
826 = pDomainEnd - p1 >= 4
827 && (p1[0] == 'f' || p1[0] == 'F')
828 && (p1[1] == 't' || p1[1] == 'T')
829 && (p1[2] == 'p' || p1[2] == 'P')
830 && p1[3] == '.' ?
831 INetProtocol::Ftp : INetProtocol::Http; // 3rd, 4th
835 OUString aSynScheme;
836 if (m_eScheme == INetProtocol::NotValid) {
837 sal_Unicode const * p1 = pPos;
838 aSynScheme = parseScheme(&p1, pEnd, nFragmentDelimiter);
839 if (!aSynScheme.isEmpty())
841 m_eScheme = INetProtocol::Generic;
842 pPos = p1;
846 if (bSmart && m_eScheme == INetProtocol::NotValid && pPos != pEnd
847 && *pPos != nFragmentDelimiter)
849 m_eScheme = m_eSmartScheme;
852 if (m_eScheme == INetProtocol::NotValid)
854 setInvalid();
855 return false;
858 if (m_eScheme != INetProtocol::Generic) {
859 aSynScheme = OUString::createFromAscii(getSchemeInfo().m_pScheme);
861 m_aScheme.set(aSynAbsURIRef, aSynScheme, aSynAbsURIRef.getLength());
862 aSynAbsURIRef.append(':');
865 sal_uInt32 nSegmentDelimiter = '/';
866 sal_uInt32 nAltSegmentDelimiter = 0x80000000;
867 bool bSkippedInitialSlash = false;
869 // Parse //<user>;AUTH=<auth>@<host>:<port> or
870 // //<user>:<password>@<host>:<port> or
871 // //<reg_name>
872 if (getSchemeInfo().m_bAuthority)
874 sal_Unicode const * pUserInfoBegin = nullptr;
875 sal_Unicode const * pUserInfoEnd = nullptr;
876 sal_Unicode const * pHostPortBegin = nullptr;
877 sal_Unicode const * pHostPortEnd = nullptr;
879 switch (m_eScheme)
881 case INetProtocol::VndSunStarHelp:
883 if (pEnd - pPos < 2 || *pPos++ != '/' || *pPos++ != '/')
885 setInvalid();
886 return false;
888 aSynAbsURIRef.append("//");
889 OUStringBuffer aSynAuthority;
890 while (pPos < pEnd
891 && *pPos != '/' && *pPos != '?'
892 && *pPos != nFragmentDelimiter)
894 EscapeType eEscapeType;
895 sal_uInt32 nUTF32 = getUTF32(pPos, pEnd,
896 eMechanism,
897 eCharset, eEscapeType);
898 appendUCS4(aSynAuthority, nUTF32, eEscapeType,
899 PART_AUTHORITY, eCharset, false);
901 m_aHost.set(aSynAbsURIRef,
902 aSynAuthority.makeStringAndClear(),
903 aSynAbsURIRef.getLength());
904 // misusing m_aHost to store the authority
905 break;
908 case INetProtocol::VndSunStarHier:
910 if (pEnd - pPos >= 2 && pPos[0] == '/' && pPos[1] == '/')
912 pPos += 2;
913 aSynAbsURIRef.append("//");
914 OUStringBuffer aSynAuthority;
915 while (pPos < pEnd
916 && *pPos != '/' && *pPos != '?'
917 && *pPos != nFragmentDelimiter)
919 EscapeType eEscapeType;
920 sal_uInt32 nUTF32 = getUTF32(pPos,
921 pEnd,
922 eMechanism,
923 eCharset,
924 eEscapeType);
925 appendUCS4(aSynAuthority,
926 nUTF32,
927 eEscapeType,
928 PART_AUTHORITY,
929 eCharset,
930 false);
932 if (aSynAuthority.isEmpty())
934 setInvalid();
935 return false;
937 m_aHost.set(aSynAbsURIRef,
938 aSynAuthority.makeStringAndClear(),
939 aSynAbsURIRef.getLength());
940 // misusing m_aHost to store the authority
942 break;
945 case INetProtocol::VndSunStarPkg:
946 case INetProtocol::Cmis:
948 if (pEnd - pPos < 2 || *pPos++ != '/' || *pPos++ != '/')
950 setInvalid();
951 return false;
953 aSynAbsURIRef.append("//");
954 OUStringBuffer aSynUser;
956 bool bHasUser = false;
957 while (pPos < pEnd && *pPos != '@'
958 && *pPos != '/' && *pPos != '?'
959 && *pPos != nFragmentDelimiter)
961 EscapeType eEscapeType;
962 sal_uInt32 nUTF32 = getUTF32(pPos, pEnd,
963 eMechanism,
964 eCharset, eEscapeType);
965 appendUCS4(aSynUser, nUTF32, eEscapeType,
966 PART_USER_PASSWORD, eCharset, false);
968 bHasUser = *pPos == '@';
971 OUStringBuffer aSynAuthority;
972 if ( !bHasUser )
974 aSynAuthority = aSynUser;
976 else
978 m_aUser.set(aSynAbsURIRef,
979 aSynUser.makeStringAndClear(),
980 aSynAbsURIRef.getLength());
981 aSynAbsURIRef.append("@");
982 ++pPos;
984 while (pPos < pEnd
985 && *pPos != '/' && *pPos != '?'
986 && *pPos != nFragmentDelimiter)
988 EscapeType eEscapeType;
989 sal_uInt32 nUTF32 = getUTF32(pPos, pEnd,
990 eMechanism,
991 eCharset, eEscapeType);
992 appendUCS4(aSynAuthority, nUTF32, eEscapeType,
993 PART_AUTHORITY, eCharset, false);
996 if (aSynAuthority.isEmpty())
998 setInvalid();
999 return false;
1001 m_aHost.set(aSynAbsURIRef,
1002 aSynAuthority.makeStringAndClear(),
1003 aSynAbsURIRef.getLength());
1004 // misusing m_aHost to store the authority
1005 break;
1008 case INetProtocol::File:
1009 if (bSmart)
1011 // The first of the following seven productions that
1012 // matches the rest of the input string (and for which the
1013 // appropriate style bit is set in eStyle, if applicable)
1014 // determines the used notation. The productions use the
1015 // auxiliary rules
1017 // domain = label *("." label)
1018 // label = alphanum [*(alphanum / "-") alphanum]
1019 // alphanum = ALPHA / DIGIT
1020 // IPv6reference = "[" IPv6address "]"
1021 // IPv6address = hexpart [":" IPv4address]
1022 // IPv4address = 1*3DIGIT 3("." 1*3DIGIT)
1023 // hexpart = (hexseq ["::" [hexseq]]) / ("::" [hexseq])
1024 // hexseq = hex4 *(":" hex4)
1025 // hex4 = 1*4HEXDIG
1026 // path = <any UCS4 character except "#">
1027 // UCS4 = <any UCS4 character>
1029 // 1st Production (URL):
1030 // "//" [domain / IPv6reference] ["/" *path]
1031 // ["#" *UCS4]
1032 // becomes
1033 // "file://" domain "/" *path ["#" *UCS4]
1034 if (pEnd - pPos >= 2 && pPos[0] == '/' && pPos[1] == '/')
1036 sal_Unicode const * p1 = pPos + 2;
1037 while (p1 != pEnd && *p1 != '/' &&
1038 *p1 != nFragmentDelimiter)
1040 ++p1;
1042 if (parseHostOrNetBiosName(
1043 pPos + 2, p1, EncodeMechanism::All,
1044 RTL_TEXTENCODING_DONTKNOW, true, nullptr))
1046 aSynAbsURIRef.append("//");
1047 pHostPortBegin = pPos + 2;
1048 pHostPortEnd = p1;
1049 pPos = p1;
1050 break;
1054 // 2nd Production (MS IE generated 1; FSysStyle::Dos only):
1055 // "//" ALPHA ":" ["/" *path] ["#" *UCS4]
1056 // becomes
1057 // "file:///" ALPHA ":" ["/" *path] ["#" *UCS4]
1058 // replacing "\" by "/" within <*path>
1059 // 3rd Production (MS IE generated 2; FSysStyle::Dos only):
1060 // "//" ALPHA ":" ["\" *path] ["#" *UCS4]
1061 // becomes
1062 // "file:///" ALPHA ":" ["/" *path] ["#" *UCS4]
1063 // replacing "\" by "/" within <*path>
1064 // 4th Production (miscounted slashes):
1065 // "//" *path ["#" *UCS4]
1066 // becomes
1067 // "file:///" *path ["#" *UCS4]
1068 if (pEnd - pPos >= 2 && pPos[0] == '/' && pPos[1] == '/')
1070 aSynAbsURIRef.append("//");
1071 pPos += 2;
1072 bSkippedInitialSlash = true;
1073 if ((eStyle & FSysStyle::Dos)
1074 && pEnd - pPos >= 2
1075 && rtl::isAsciiAlpha(pPos[0])
1076 && pPos[1] == ':'
1077 && (pEnd - pPos == 2
1078 || pPos[2] == '/' || pPos[2] == '\\'))
1079 nAltSegmentDelimiter = '\\';
1080 break;
1083 // 5th Production (Unix):
1084 // "/" *path ["#" *UCS4]
1085 // becomes
1086 // "file:///" *path ["#" *UCS4]
1087 if (pPos < pEnd && *pPos == '/')
1089 aSynAbsURIRef.append("//");
1090 break;
1093 // 6th Production (UNC; FSysStyle::Dos only):
1094 // "\\" domain ["\" *path] ["#" *UCS4]
1095 // becomes
1096 // "file://" domain "/" *path ["#" *UCS4]
1097 // replacing "\" by "/" within <*path>
1098 if (eStyle & FSysStyle::Dos
1099 && pEnd - pPos >= 2
1100 && pPos[0] == '\\'
1101 && pPos[1] == '\\')
1103 sal_Unicode const * p1 = pPos + 2;
1104 sal_Unicode const * pe = p1;
1105 while (pe < pEnd && *pe != '\\' &&
1106 *pe != nFragmentDelimiter)
1108 ++pe;
1110 if (
1111 parseHostOrNetBiosName(
1112 p1, pe, EncodeMechanism::All,
1113 RTL_TEXTENCODING_DONTKNOW, true, nullptr) ||
1114 (scanDomain(p1, pe) > 0 && p1 == pe)
1117 aSynAbsURIRef.append("//");
1118 pHostPortBegin = pPos + 2;
1119 pHostPortEnd = pe;
1120 pPos = pe;
1121 nSegmentDelimiter = '\\';
1122 break;
1126 // 7th Production (Unix-like DOS; FSysStyle::Dos only):
1127 // ALPHA ":" ["/" *path] ["#" *UCS4]
1128 // becomes
1129 // "file:///" ALPHA ":" ["/" *path] ["#" *UCS4]
1130 // replacing "\" by "/" within <*path>
1131 // 8th Production (DOS; FSysStyle::Dos only):
1132 // ALPHA ":" ["\" *path] ["#" *UCS4]
1133 // becomes
1134 // "file:///" ALPHA ":" ["/" *path] ["#" *UCS4]
1135 // replacing "\" by "/" within <*path>
1136 if (eStyle & FSysStyle::Dos
1137 && pEnd - pPos >= 2
1138 && rtl::isAsciiAlpha(pPos[0])
1139 && pPos[1] == ':'
1140 && (pEnd - pPos == 2
1141 || pPos[2] == '/'
1142 || pPos[2] == '\\'))
1144 aSynAbsURIRef.append("//");
1145 nAltSegmentDelimiter = '\\';
1146 bSkippedInitialSlash = true;
1147 break;
1150 // 9th Production (any):
1151 // *path ["#" *UCS4]
1152 // becomes
1153 // "file:///" *path ["#" *UCS4]
1154 // replacing the delimiter by "/" within <*path>. The
1155 // delimiter is that character from the set { "/", "\"}
1156 // which appears most often in <*path> (if FSysStyle::Unix
1157 // is not among the style bits, "/" is removed from the
1158 // set; if FSysStyle::Dos is not among the style bits, "\" is
1159 // removed from the set). If two or
1160 // more characters appear the same number of times, the
1161 // character mentioned first in that set is chosen. If
1162 // the first character of <*path> is the delimiter, that
1163 // character is not copied
1164 if (eStyle & (FSysStyle::Unix | FSysStyle::Dos))
1166 aSynAbsURIRef.append("//");
1167 switch (guessFSysStyleByCounting(pPos, pEnd, eStyle))
1169 case FSysStyle::Unix:
1170 nSegmentDelimiter = '/';
1171 break;
1173 case FSysStyle::Dos:
1174 nSegmentDelimiter = '\\';
1175 break;
1177 default:
1178 OSL_FAIL(
1179 "INetURLObject::setAbsURIRef():"
1180 " Bad guessFSysStyleByCounting");
1181 break;
1183 bSkippedInitialSlash
1184 = pPos != pEnd && *pPos != nSegmentDelimiter;
1185 break;
1188 SAL_FALLTHROUGH;
1189 default:
1191 // For INetProtocol::File, allow an empty authority ("//") to be
1192 // missing if the following path starts with an explicit "/"
1193 // (Java is notorious in generating such file URLs, so be
1194 // liberal here):
1195 if (pEnd - pPos >= 2 && pPos[0] == '/' && pPos[1] == '/')
1196 pPos += 2;
1197 else if (!bSmart
1198 && !(m_eScheme == INetProtocol::File
1199 && pPos != pEnd && *pPos == '/'))
1201 setInvalid();
1202 return false;
1204 aSynAbsURIRef.append("//");
1206 sal_Unicode const * pAuthority = pPos;
1207 sal_uInt32 c = getSchemeInfo().m_bQuery ? '?' : 0x80000000;
1208 while (pPos < pEnd && *pPos != '/' && *pPos != c
1209 && *pPos != nFragmentDelimiter)
1210 ++pPos;
1211 if (getSchemeInfo().m_bUser)
1212 if (getSchemeInfo().m_bHost)
1214 sal_Unicode const * p1 = pAuthority;
1215 while (p1 < pPos && *p1 != '@')
1216 ++p1;
1217 if (p1 == pPos)
1219 pHostPortBegin = pAuthority;
1220 pHostPortEnd = pPos;
1222 else
1224 pUserInfoBegin = pAuthority;
1225 pUserInfoEnd = p1;
1226 pHostPortBegin = p1 + 1;
1227 pHostPortEnd = pPos;
1230 else
1232 pUserInfoBegin = pAuthority;
1233 pUserInfoEnd = pPos;
1235 else if (getSchemeInfo().m_bHost)
1237 pHostPortBegin = pAuthority;
1238 pHostPortEnd = pPos;
1240 else if (pPos != pAuthority)
1242 setInvalid();
1243 return false;
1245 break;
1249 if (pUserInfoBegin)
1251 Part ePart = PART_USER_PASSWORD;
1252 bool bSupportsPassword = getSchemeInfo().m_bPassword;
1253 bool bSupportsAuth
1254 = !bSupportsPassword && getSchemeInfo().m_bAuth;
1255 bool bHasAuth = false;
1256 OUStringBuffer aSynUser;
1257 sal_Unicode const * p1 = pUserInfoBegin;
1258 while (p1 < pUserInfoEnd)
1260 EscapeType eEscapeType;
1261 sal_uInt32 nUTF32 = getUTF32(p1, pUserInfoEnd,
1262 eMechanism, eCharset, eEscapeType);
1263 if (eEscapeType == EscapeType::NONE)
1265 if (nUTF32 == ':' && bSupportsPassword)
1267 bHasAuth = true;
1268 break;
1270 else if (nUTF32 == ';' && bSupportsAuth
1271 && pUserInfoEnd - p1
1272 > RTL_CONSTASCII_LENGTH("auth=")
1273 && INetMIME::equalIgnoreCase(
1275 p1 + RTL_CONSTASCII_LENGTH("auth="),
1276 "auth="))
1278 p1 += RTL_CONSTASCII_LENGTH("auth=");
1279 bHasAuth = true;
1280 break;
1283 appendUCS4(aSynUser, nUTF32, eEscapeType, ePart,
1284 eCharset, false);
1286 m_aUser.set(aSynAbsURIRef, aSynUser.makeStringAndClear(),
1287 aSynAbsURIRef.getLength());
1288 if (bHasAuth)
1290 if (bSupportsPassword)
1292 aSynAbsURIRef.append(':');
1293 OUStringBuffer aSynAuth;
1294 while (p1 < pUserInfoEnd)
1296 EscapeType eEscapeType;
1297 sal_uInt32 nUTF32 = getUTF32(p1, pUserInfoEnd,
1298 eMechanism, eCharset,
1299 eEscapeType);
1300 appendUCS4(aSynAuth, nUTF32, eEscapeType,
1301 ePart, eCharset, false);
1303 m_aAuth.set(aSynAbsURIRef, aSynAuth.makeStringAndClear(),
1304 aSynAbsURIRef.getLength());
1306 else
1308 aSynAbsURIRef.append(";AUTH=");
1309 OUStringBuffer aSynAuth;
1310 while (p1 < pUserInfoEnd)
1312 EscapeType eEscapeType;
1313 sal_uInt32 nUTF32 = getUTF32(p1, pUserInfoEnd,
1314 eMechanism, eCharset,
1315 eEscapeType);
1316 if (!INetMIME::isIMAPAtomChar(nUTF32))
1318 setInvalid();
1319 return false;
1321 appendUCS4(aSynAuth, nUTF32, eEscapeType,
1322 ePart, eCharset, false);
1324 m_aAuth.set(aSynAbsURIRef, aSynAuth.makeStringAndClear(),
1325 aSynAbsURIRef.getLength());
1328 if (pHostPortBegin)
1329 aSynAbsURIRef.append('@');
1332 if (pHostPortBegin)
1334 sal_Unicode const * pPort = pHostPortEnd;
1335 if ( getSchemeInfo().m_bPort && pHostPortBegin < pHostPortEnd )
1337 sal_Unicode const * p1 = pHostPortEnd - 1;
1338 while (p1 > pHostPortBegin && rtl::isAsciiDigit(*p1))
1339 --p1;
1340 if (*p1 == ':')
1341 pPort = p1;
1343 bool bNetBiosName = false;
1344 switch (m_eScheme)
1346 case INetProtocol::File:
1347 // If the host equals "LOCALHOST" (unencoded and ignoring
1348 // case), turn it into an empty host:
1349 if (INetMIME::equalIgnoreCase(pHostPortBegin, pPort,
1350 "localhost"))
1351 pHostPortBegin = pPort;
1352 bNetBiosName = true;
1353 break;
1355 case INetProtocol::Ldap:
1356 case INetProtocol::Smb:
1357 if (pHostPortBegin == pPort && pPort != pHostPortEnd)
1359 setInvalid();
1360 return false;
1362 break;
1363 default:
1364 if (pHostPortBegin == pPort)
1366 setInvalid();
1367 return false;
1369 break;
1371 OUStringBuffer aSynHost;
1372 if (!parseHostOrNetBiosName(
1373 pHostPortBegin, pPort, eMechanism, eCharset,
1374 bNetBiosName, &aSynHost))
1376 setInvalid();
1377 return false;
1379 m_aHost.set(aSynAbsURIRef, aSynHost.makeStringAndClear(),
1380 aSynAbsURIRef.getLength());
1381 if (pPort != pHostPortEnd)
1383 aSynAbsURIRef.append(':');
1384 m_aPort.set(aSynAbsURIRef,
1385 OUString(pPort + 1, pHostPortEnd - (pPort + 1)),
1386 aSynAbsURIRef.getLength());
1391 // Parse <path>
1392 OUStringBuffer aSynPath;
1393 if (!parsePath(m_eScheme, &pPos, pEnd, eMechanism, eCharset,
1394 bSkippedInitialSlash, nSegmentDelimiter,
1395 nAltSegmentDelimiter,
1396 getSchemeInfo().m_bQuery ? '?' : 0x80000000,
1397 nFragmentDelimiter, aSynPath))
1399 setInvalid();
1400 return false;
1402 m_aPath.set(aSynAbsURIRef, aSynPath.makeStringAndClear(),
1403 aSynAbsURIRef.getLength());
1405 // Parse ?<query>
1406 if (getSchemeInfo().m_bQuery && pPos < pEnd && *pPos == '?')
1408 aSynAbsURIRef.append('?');
1409 OUStringBuffer aSynQuery;
1410 for (++pPos; pPos < pEnd && *pPos != nFragmentDelimiter;)
1412 EscapeType eEscapeType;
1413 sal_uInt32 nUTF32 = getUTF32(pPos, pEnd,
1414 eMechanism, eCharset, eEscapeType);
1415 appendUCS4(aSynQuery, nUTF32, eEscapeType,
1416 PART_URIC, eCharset, true);
1418 m_aQuery.set(aSynAbsURIRef, aSynQuery.makeStringAndClear(),
1419 aSynAbsURIRef.getLength());
1422 // Parse #<fragment>
1423 if (pPos < pEnd && *pPos == nFragmentDelimiter)
1425 aSynAbsURIRef.append(sal_Unicode(nFragmentDelimiter));
1426 OUStringBuffer aSynFragment;
1427 for (++pPos; pPos < pEnd;)
1429 EscapeType eEscapeType;
1430 sal_uInt32 nUTF32 = getUTF32(pPos, pEnd,
1431 eMechanism, eCharset, eEscapeType);
1432 appendUCS4(aSynFragment, nUTF32, eEscapeType, PART_URIC,
1433 eCharset, true);
1435 m_aFragment.set(aSynAbsURIRef, aSynFragment.makeStringAndClear(),
1436 aSynAbsURIRef.getLength());
1439 if (pPos != pEnd)
1441 setInvalid();
1442 return false;
1445 m_aAbsURIRef = aSynAbsURIRef;
1447 // At this point references of type "\\server\paths" have
1448 // been converted to file:://server/path".
1449 #ifdef LINUX
1450 if (m_eScheme==INetProtocol::File && !m_aHost.isEmpty()) {
1451 // Change "file:://server/path" URIs to "smb:://server/path" on
1452 // Linux
1453 // Leave "file::path" URIs unchanged.
1454 changeScheme(INetProtocol::Smb);
1456 #endif
1458 return true;
1461 void INetURLObject::changeScheme(INetProtocol eTargetScheme) {
1462 OUString aTmpStr=m_aAbsURIRef.makeStringAndClear();
1463 int oldSchemeLen = 0;
1464 if (m_eScheme == INetProtocol::Generic)
1465 oldSchemeLen = m_aScheme.getLength();
1466 else
1467 oldSchemeLen = strlen(getSchemeInfo().m_pScheme);
1468 m_eScheme=eTargetScheme;
1469 int newSchemeLen=strlen(getSchemeInfo().m_pScheme);
1470 m_aAbsURIRef.appendAscii(getSchemeInfo().m_pScheme);
1471 m_aAbsURIRef.append(aTmpStr.getStr()+oldSchemeLen);
1472 int delta=newSchemeLen-oldSchemeLen;
1473 m_aUser+=delta;
1474 m_aAuth+=delta;
1475 m_aHost+=delta;
1476 m_aPort+=delta;
1477 m_aPath+=delta;
1478 m_aQuery+=delta;
1479 m_aFragment+=delta;
1482 bool INetURLObject::convertRelToAbs(OUString const & rTheRelURIRef,
1483 INetURLObject & rTheAbsURIRef,
1484 bool & rWasAbsolute,
1485 EncodeMechanism eMechanism,
1486 rtl_TextEncoding eCharset,
1487 bool bIgnoreFragment, bool bSmart,
1488 bool bRelativeNonURIs, FSysStyle eStyle)
1489 const
1491 sal_Unicode const * p = rTheRelURIRef.getStr();
1492 sal_Unicode const * pEnd = p + rTheRelURIRef.getLength();
1494 sal_Unicode const * pPrefixBegin = p;
1495 PrefixInfo const * pPrefix = getPrefix(pPrefixBegin, pEnd);
1496 bool hasScheme = pPrefix != nullptr;
1497 if (!hasScheme) {
1498 pPrefixBegin = p;
1499 hasScheme = !parseScheme(&pPrefixBegin, pEnd, '#').isEmpty();
1502 sal_uInt32 nSegmentDelimiter = '/';
1503 sal_uInt32 nQueryDelimiter
1504 = !bSmart || getSchemeInfo().m_bQuery ? '?' : 0x80000000;
1505 sal_uInt32 nFragmentDelimiter = '#';
1506 Part ePart = PART_VISIBLE;
1508 if (!hasScheme && bSmart)
1510 // If the input matches any of the following productions (for which
1511 // the appropriate style bit is set in eStyle), it is assumed to be an
1512 // absolute file system path, rather than a relative URI reference.
1513 // (This is only a subset of the productions used for scheme detection
1514 // in INetURLObject::setAbsURIRef(), because most of those productions
1515 // interfere with the syntax of relative URI references.) The
1516 // productions use the auxiliary rules
1518 // domain = label *("." label)
1519 // label = alphanum [*(alphanum / "-") alphanum]
1520 // alphanum = ALPHA / DIGIT
1521 // UCS4 = <any UCS4 character>
1523 // 1st Production (UNC file; FSysStyle::Dos only):
1524 // "\\" domain ["\" *UCS4]
1525 // 2nd Production (Unix-like DOS file; FSysStyle::Dos only):
1526 // ALPHA ":" ["/" *UCS4]
1527 // 3rd Production (DOS file; FSysStyle::Dos only):
1528 // ALPHA ":" ["\" *UCS4]
1529 if (eStyle & FSysStyle::Dos)
1531 bool bFSys = false;
1532 sal_Unicode const * q = p;
1533 if (pEnd - q >= 2
1534 && rtl::isAsciiAlpha(q[0])
1535 && q[1] == ':'
1536 && (pEnd - q == 2 || q[2] == '/' || q[2] == '\\'))
1537 bFSys = true; // 2nd, 3rd
1538 else if (pEnd - q >= 2 && q[0] == '\\' && q[1] == '\\')
1540 q += 2;
1541 sal_Int32 n = rtl_ustr_indexOfChar_WithLength(
1542 q, pEnd - q, '\\');
1543 sal_Unicode const * qe = n == -1 ? pEnd : q + n;
1544 if (parseHostOrNetBiosName(
1545 q, qe, EncodeMechanism::All, RTL_TEXTENCODING_DONTKNOW,
1546 true, nullptr))
1548 bFSys = true; // 1st
1551 if (bFSys)
1553 INetURLObject aNewURI;
1554 aNewURI.setAbsURIRef(rTheRelURIRef, eMechanism,
1555 eCharset, true, eStyle);
1556 if (!aNewURI.HasError())
1558 rTheAbsURIRef = aNewURI;
1559 rWasAbsolute = true;
1560 return true;
1565 // When the base URL is a file URL, accept relative file system paths
1566 // using "\" or ":" as delimiter (and ignoring URI conventions for "%"
1567 // and "#"), as well as relative URIs using "/" as delimiter:
1568 if (m_eScheme == INetProtocol::File)
1569 switch (guessFSysStyleByCounting(p, pEnd, eStyle))
1571 case FSysStyle::Unix:
1572 nSegmentDelimiter = '/';
1573 break;
1575 case FSysStyle::Dos:
1576 nSegmentDelimiter = '\\';
1577 bRelativeNonURIs = true;
1578 break;
1580 default:
1581 OSL_FAIL("INetURLObject::convertRelToAbs():"
1582 " Bad guessFSysStyleByCounting");
1583 break;
1586 if (bRelativeNonURIs)
1588 eMechanism = EncodeMechanism::All;
1589 nQueryDelimiter = 0x80000000;
1590 nFragmentDelimiter = 0x80000000;
1591 ePart = PART_VISIBLE_NONSPECIAL;
1595 // If the relative URI has the same scheme as the base URI, and that
1596 // scheme is hierarchical, then ignore its presence in the relative
1597 // URI in order to be backward compatible (cf. RFC 2396 section 5.2
1598 // step 3):
1599 if (pPrefix && pPrefix->m_eScheme == m_eScheme
1600 && getSchemeInfo().m_bHierarchical)
1602 hasScheme = false;
1603 while (p != pEnd && *p++ != ':') ;
1605 rWasAbsolute = hasScheme;
1607 // Fast solution for non-relative URIs:
1608 if (hasScheme)
1610 INetURLObject aNewURI(rTheRelURIRef, eMechanism, eCharset);
1611 if (aNewURI.HasError())
1613 rWasAbsolute = false;
1614 return false;
1617 if (bIgnoreFragment)
1618 aNewURI.clearFragment();
1619 rTheAbsURIRef = aNewURI;
1620 return true;
1623 enum State { STATE_AUTH, STATE_ABS_PATH, STATE_REL_PATH, STATE_FRAGMENT,
1624 STATE_DONE };
1626 OUStringBuffer aSynAbsURIRef;
1627 // make sure that the scheme is copied for generic schemes: getSchemeInfo().m_pScheme
1628 // is empty ("") in that case, so take the scheme from m_aAbsURIRef
1629 if (m_eScheme != INetProtocol::Generic)
1631 aSynAbsURIRef.appendAscii(getSchemeInfo().m_pScheme);
1633 else
1635 sal_Unicode const * pSchemeBegin
1636 = m_aAbsURIRef.getStr();
1637 sal_Unicode const * pSchemeEnd = pSchemeBegin;
1638 while (pSchemeEnd[0] != ':')
1640 ++pSchemeEnd;
1642 aSynAbsURIRef.append(pSchemeBegin, pSchemeEnd - pSchemeBegin);
1644 aSynAbsURIRef.append(':');
1646 State eState = STATE_AUTH;
1647 bool bSameDoc = true;
1649 if (getSchemeInfo().m_bAuthority)
1651 if (pEnd - p >= 2 && p[0] == '/' && p[1] == '/')
1653 aSynAbsURIRef.append("//");
1654 p += 2;
1655 eState = STATE_ABS_PATH;
1656 bSameDoc = false;
1657 while (p != pEnd)
1659 EscapeType eEscapeType;
1660 sal_uInt32 nUTF32
1661 = getUTF32(p, pEnd, eMechanism,
1662 eCharset, eEscapeType);
1663 if (eEscapeType == EscapeType::NONE)
1665 if (nUTF32 == nSegmentDelimiter)
1666 break;
1667 else if (nUTF32 == nFragmentDelimiter)
1669 eState = STATE_FRAGMENT;
1670 break;
1673 appendUCS4(aSynAbsURIRef, nUTF32, eEscapeType,
1674 PART_VISIBLE, eCharset, true);
1677 else
1679 SubString aAuthority(getAuthority());
1680 aSynAbsURIRef.append(m_aAbsURIRef.getStr()
1681 + aAuthority.getBegin(),
1682 aAuthority.getLength());
1686 if (eState == STATE_AUTH)
1688 if (p == pEnd)
1689 eState = STATE_DONE;
1690 else if (*p == nFragmentDelimiter)
1692 ++p;
1693 eState = STATE_FRAGMENT;
1695 else if (*p == nSegmentDelimiter)
1697 ++p;
1698 eState = STATE_ABS_PATH;
1699 bSameDoc = false;
1701 else
1703 eState = STATE_REL_PATH;
1704 bSameDoc = false;
1708 if (eState == STATE_ABS_PATH)
1710 aSynAbsURIRef.append('/');
1711 eState = STATE_DONE;
1712 while (p != pEnd)
1714 EscapeType eEscapeType;
1715 sal_uInt32 nUTF32
1716 = getUTF32(p, pEnd, eMechanism, eCharset, eEscapeType);
1717 if (eEscapeType == EscapeType::NONE)
1719 if (nUTF32 == nFragmentDelimiter)
1721 eState = STATE_FRAGMENT;
1722 break;
1724 else if (nUTF32 == nSegmentDelimiter)
1725 nUTF32 = '/';
1727 appendUCS4(aSynAbsURIRef, nUTF32, eEscapeType, ePart,
1728 eCharset, true);
1731 else if (eState == STATE_REL_PATH)
1733 if (!getSchemeInfo().m_bHierarchical)
1735 // Detect cases where a relative input could not be made absolute
1736 // because the given base URL is broken (most probably because it is
1737 // empty):
1738 SAL_WARN_IF(
1739 HasError(), "tools.urlobj",
1740 "cannot make <" << rTheRelURIRef
1741 << "> absolute against broken base <"
1742 << GetMainURL(DecodeMechanism::NONE) << ">");
1743 rWasAbsolute = false;
1744 return false;
1747 sal_Unicode const * pBasePathBegin
1748 = m_aAbsURIRef.getStr() + m_aPath.getBegin();
1749 sal_Unicode const * pBasePathEnd
1750 = pBasePathBegin + m_aPath.getLength();
1751 while (pBasePathEnd != pBasePathBegin)
1752 if (*(--pBasePathEnd) == '/')
1754 ++pBasePathEnd;
1755 break;
1758 sal_Int32 nPathBegin = aSynAbsURIRef.getLength();
1759 aSynAbsURIRef.append(pBasePathBegin, pBasePathEnd - pBasePathBegin);
1760 DBG_ASSERT(aSynAbsURIRef.getLength() > nPathBegin
1761 && aSynAbsURIRef[aSynAbsURIRef.getLength() - 1] == '/',
1762 "INetURLObject::convertRelToAbs(): Bad base path");
1764 while (p != pEnd && *p != nQueryDelimiter && *p != nFragmentDelimiter)
1766 if (*p == '.')
1768 if (pEnd - p == 1
1769 || p[1] == nSegmentDelimiter
1770 || p[1] == nQueryDelimiter
1771 || p[1] == nFragmentDelimiter)
1773 ++p;
1774 if (p != pEnd && *p == nSegmentDelimiter)
1775 ++p;
1776 continue;
1778 else if (pEnd - p >= 2
1779 && p[1] == '.'
1780 && (pEnd - p == 2
1781 || p[2] == nSegmentDelimiter
1782 || p[2] == nQueryDelimiter
1783 || p[2] == nFragmentDelimiter)
1784 && aSynAbsURIRef.getLength() - nPathBegin > 1)
1786 p += 2;
1787 if (p != pEnd && *p == nSegmentDelimiter)
1788 ++p;
1790 sal_Int32 i = aSynAbsURIRef.getLength() - 2;
1791 while (i > nPathBegin && aSynAbsURIRef[i] != '/')
1792 --i;
1793 aSynAbsURIRef.setLength(i + 1);
1794 DBG_ASSERT(
1795 aSynAbsURIRef.getLength() > nPathBegin
1796 && aSynAbsURIRef[aSynAbsURIRef.getLength() - 1] == '/',
1797 "INetURLObject::convertRelToAbs(): Bad base path");
1798 continue;
1802 while (p != pEnd
1803 && *p != nSegmentDelimiter
1804 && *p != nQueryDelimiter
1805 && *p != nFragmentDelimiter)
1807 EscapeType eEscapeType;
1808 sal_uInt32 nUTF32
1809 = getUTF32(p, pEnd, eMechanism,
1810 eCharset, eEscapeType);
1811 appendUCS4(aSynAbsURIRef, nUTF32, eEscapeType, ePart,
1812 eCharset, true);
1814 if (p != pEnd && *p == nSegmentDelimiter)
1816 aSynAbsURIRef.append('/');
1817 ++p;
1821 while (p != pEnd && *p != nFragmentDelimiter)
1823 EscapeType eEscapeType;
1824 sal_uInt32 nUTF32
1825 = getUTF32(p, pEnd, eMechanism, eCharset, eEscapeType);
1826 appendUCS4(aSynAbsURIRef, nUTF32, eEscapeType, ePart,
1827 eCharset, true);
1830 if (p == pEnd)
1831 eState = STATE_DONE;
1832 else
1834 ++p;
1835 eState = STATE_FRAGMENT;
1838 else if (bSameDoc)
1840 aSynAbsURIRef.append(m_aAbsURIRef.getStr() + m_aPath.getBegin(),
1841 m_aPath.getLength());
1842 if (m_aQuery.isPresent())
1843 aSynAbsURIRef.append(m_aAbsURIRef.getStr()
1844 + m_aQuery.getBegin() - 1,
1845 m_aQuery.getLength() + 1);
1848 if (eState == STATE_FRAGMENT && !bIgnoreFragment)
1850 aSynAbsURIRef.append('#');
1851 while (p != pEnd)
1853 EscapeType eEscapeType;
1854 sal_uInt32 nUTF32
1855 = getUTF32(p, pEnd, eMechanism, eCharset, eEscapeType);
1856 appendUCS4(aSynAbsURIRef, nUTF32, eEscapeType,
1857 PART_VISIBLE, eCharset, true);
1861 INetURLObject aNewURI(aSynAbsURIRef.makeStringAndClear());
1862 if (aNewURI.HasError())
1864 // Detect cases where a relative input could not be made absolute
1865 // because the given base URL is broken (most probably because it is
1866 // empty):
1867 SAL_WARN_IF(
1868 HasError(), "tools.urlobj",
1869 "cannot make <" << rTheRelURIRef
1870 << "> absolute against broken base <" << GetMainURL(DecodeMechanism::NONE)
1871 << ">");
1872 rWasAbsolute = false;
1873 return false;
1876 rTheAbsURIRef = aNewURI;
1877 return true;
1880 bool INetURLObject::convertAbsToRel(OUString const & rTheAbsURIRef,
1881 OUString & rTheRelURIRef,
1882 EncodeMechanism eEncodeMechanism,
1883 DecodeMechanism eDecodeMechanism,
1884 rtl_TextEncoding eCharset,
1885 FSysStyle eStyle) const
1887 // Check for hierarchical base URL:
1888 if (!getSchemeInfo().m_bHierarchical)
1890 rTheRelURIRef = decode(rTheAbsURIRef, eDecodeMechanism, eCharset);
1891 return false;
1894 // Convert the input (absolute or relative URI ref) to an absolute URI
1895 // ref:
1896 INetURLObject aSubject;
1897 bool bWasAbsolute;
1898 if (!convertRelToAbs(rTheAbsURIRef, aSubject, bWasAbsolute,
1899 eEncodeMechanism, eCharset, false, false, false,
1900 eStyle))
1902 rTheRelURIRef = decode(rTheAbsURIRef, eDecodeMechanism, eCharset);
1903 return false;
1906 // Check for differing scheme or authority parts:
1907 if ((m_aScheme.compare(
1908 aSubject.m_aScheme, m_aAbsURIRef, aSubject.m_aAbsURIRef)
1909 != 0)
1910 || (m_aUser.compare(
1911 aSubject.m_aUser, m_aAbsURIRef, aSubject.m_aAbsURIRef)
1912 != 0)
1913 || (m_aAuth.compare(
1914 aSubject.m_aAuth, m_aAbsURIRef, aSubject.m_aAbsURIRef)
1915 != 0)
1916 || (m_aHost.compare(
1917 aSubject.m_aHost, m_aAbsURIRef, aSubject.m_aAbsURIRef)
1918 != 0)
1919 || (m_aPort.compare(
1920 aSubject.m_aPort, m_aAbsURIRef, aSubject.m_aAbsURIRef)
1921 != 0))
1923 rTheRelURIRef = aSubject.GetMainURL(eDecodeMechanism, eCharset);
1924 return false;
1927 sal_Unicode const * pBasePathBegin
1928 = m_aAbsURIRef.getStr() + m_aPath.getBegin();
1929 sal_Unicode const * pBasePathEnd = pBasePathBegin + m_aPath.getLength();
1930 sal_Unicode const * pSubjectPathBegin
1931 = aSubject.m_aAbsURIRef.getStr() + aSubject.m_aPath.getBegin();
1932 sal_Unicode const * pSubjectPathEnd
1933 = pSubjectPathBegin + aSubject.m_aPath.getLength();
1935 // Make nMatch point past the last matching slash, or past the end of the
1936 // paths, in case they are equal:
1937 sal_Unicode const * pSlash = nullptr;
1938 sal_Unicode const * p1 = pBasePathBegin;
1939 sal_Unicode const * p2 = pSubjectPathBegin;
1940 for (;;)
1942 if (p1 == pBasePathEnd || p2 == pSubjectPathEnd)
1944 if (p1 == pBasePathEnd && p2 == pSubjectPathEnd)
1945 pSlash = p1;
1946 break;
1949 sal_Unicode c = *p1++;
1950 if (c != *p2++)
1951 break;
1952 if (c == '/')
1953 pSlash = p1;
1955 if (!pSlash)
1957 // One of the paths does not start with '/':
1958 rTheRelURIRef = aSubject.GetMainURL(eDecodeMechanism, eCharset);
1959 return false;
1961 sal_Int32 nMatch = pSlash - pBasePathBegin;
1963 // If the two URLs are DOS file URLs starting with different volumes
1964 // (e.g., file:///a:/... and file:///b:/...), the subject is not made
1965 // relative (it could be, but some people do not like that):
1966 if (m_eScheme == INetProtocol::File
1967 && nMatch <= 1
1968 && hasDosVolume(eStyle)
1969 && aSubject.hasDosVolume(eStyle)) //TODO! ok to use eStyle for these?
1971 rTheRelURIRef = aSubject.GetMainURL(eDecodeMechanism, eCharset);
1972 return false;
1975 // For every slash in the base path after nMatch, a prefix of "../" is
1976 // added to the new relative URL (if the common prefix of the two paths is
1977 // only "/"---but see handling of file URLs above---, the complete subject
1978 // path could go into the new relative URL instead, but some people don't
1979 // like that):
1980 OUStringBuffer aSynRelURIRef;
1981 for (sal_Unicode const * p = pBasePathBegin + nMatch; p != pBasePathEnd;
1982 ++p)
1984 if (*p == '/')
1985 aSynRelURIRef.append("../");
1988 // If the new relative URL would start with "//" (i.e., it would be
1989 // mistaken for a relative URL starting with an authority part), or if the
1990 // new relative URL would neither be empty nor start with <"/"> nor start
1991 // with <1*rseg> (i.e., it could be mistaken for an absolute URL starting
1992 // with a scheme part), then the new relative URL is prefixed with "./":
1993 if (aSynRelURIRef.isEmpty())
1995 if (pSubjectPathEnd - pSubjectPathBegin >= nMatch + 2
1996 && pSubjectPathBegin[nMatch] == '/'
1997 && pSubjectPathBegin[nMatch + 1] == '/')
1999 aSynRelURIRef.append("./");
2001 else
2003 for (sal_Unicode const * p = pSubjectPathBegin + nMatch;
2004 p != pSubjectPathEnd && *p != '/'; ++p)
2006 if (mustEncode(*p, PART_REL_SEGMENT_EXTRA))
2008 aSynRelURIRef.append("./");
2009 break;
2015 // The remainder of the subject path, starting at nMatch, is appended to
2016 // the new relative URL:
2017 aSynRelURIRef.append(decode(pSubjectPathBegin + nMatch, pSubjectPathEnd,
2018 eDecodeMechanism, eCharset));
2020 // If the subject has defined query or fragment parts, they are appended
2021 // to the new relative URL:
2022 if (aSubject.m_aQuery.isPresent())
2024 aSynRelURIRef.append('?');
2025 aSynRelURIRef.append(aSubject.decode(aSubject.m_aQuery,
2026 eDecodeMechanism, eCharset));
2028 if (aSubject.m_aFragment.isPresent())
2030 aSynRelURIRef.append('#');
2031 aSynRelURIRef.append(aSubject.decode(aSubject.m_aFragment,
2032 eDecodeMechanism, eCharset));
2035 rTheRelURIRef = aSynRelURIRef.makeStringAndClear();
2036 return true;
2039 // static
2040 bool INetURLObject::convertIntToExt(OUString const & rTheIntURIRef,
2041 OUString & rTheExtURIRef,
2042 DecodeMechanism eDecodeMechanism,
2043 rtl_TextEncoding eCharset)
2045 OUString aSynExtURIRef(encodeText(rTheIntURIRef, PART_VISIBLE,
2046 EncodeMechanism::NotCanonical, eCharset, true));
2047 sal_Unicode const * pBegin = aSynExtURIRef.getStr();
2048 sal_Unicode const * pEnd = pBegin + aSynExtURIRef.getLength();
2049 sal_Unicode const * p = pBegin;
2050 PrefixInfo const * pPrefix = getPrefix(p, pEnd);
2051 bool bConvert = pPrefix && pPrefix->m_eKind == PrefixInfo::INTERNAL;
2052 if (bConvert)
2054 aSynExtURIRef =
2055 aSynExtURIRef.replaceAt(0, p - pBegin,
2056 OUString::createFromAscii(pPrefix->m_pTranslatedPrefix));
2058 rTheExtURIRef = decode(aSynExtURIRef, eDecodeMechanism, eCharset);
2059 return bConvert;
2062 // static
2063 bool INetURLObject::convertExtToInt(OUString const & rTheExtURIRef,
2064 OUString & rTheIntURIRef,
2065 DecodeMechanism eDecodeMechanism,
2066 rtl_TextEncoding eCharset)
2068 OUString aSynIntURIRef(encodeText(rTheExtURIRef, PART_VISIBLE,
2069 EncodeMechanism::NotCanonical, eCharset, true));
2070 sal_Unicode const * pBegin = aSynIntURIRef.getStr();
2071 sal_Unicode const * pEnd = pBegin + aSynIntURIRef.getLength();
2072 sal_Unicode const * p = pBegin;
2073 PrefixInfo const * pPrefix = getPrefix(p, pEnd);
2074 bool bConvert = pPrefix && pPrefix->m_eKind == PrefixInfo::EXTERNAL;
2075 if (bConvert)
2077 aSynIntURIRef =
2078 aSynIntURIRef.replaceAt(0, p - pBegin,
2079 OUString::createFromAscii(pPrefix->m_pTranslatedPrefix));
2081 rTheIntURIRef = decode(aSynIntURIRef, eDecodeMechanism, eCharset);
2082 return bConvert;
2085 // static
2086 INetURLObject::PrefixInfo const * INetURLObject::getPrefix(sal_Unicode const *& rBegin,
2087 sal_Unicode const * pEnd)
2089 static PrefixInfo const aMap[]
2090 = { // dummy entry at front needed, because pLast may point here:
2091 { nullptr, nullptr, INetProtocol::NotValid, PrefixInfo::INTERNAL },
2092 { ".component:", "staroffice.component:", INetProtocol::Component,
2093 PrefixInfo::INTERNAL },
2094 { ".uno:", "staroffice.uno:", INetProtocol::Uno,
2095 PrefixInfo::INTERNAL },
2096 { "cid:", nullptr, INetProtocol::Cid, PrefixInfo::OFFICIAL },
2097 { "data:", nullptr, INetProtocol::Data, PrefixInfo::OFFICIAL },
2098 { "db:", "staroffice.db:", INetProtocol::Db, PrefixInfo::INTERNAL },
2099 { "file:", nullptr, INetProtocol::File, PrefixInfo::OFFICIAL },
2100 { "ftp:", nullptr, INetProtocol::Ftp, PrefixInfo::OFFICIAL },
2101 { "hid:", "staroffice.hid:", INetProtocol::Hid,
2102 PrefixInfo::INTERNAL },
2103 { "http:", nullptr, INetProtocol::Http, PrefixInfo::OFFICIAL },
2104 { "https:", nullptr, INetProtocol::Https, PrefixInfo::OFFICIAL },
2105 { "javascript:", nullptr, INetProtocol::Javascript, PrefixInfo::OFFICIAL },
2106 { "ldap:", nullptr, INetProtocol::Ldap, PrefixInfo::OFFICIAL },
2107 { "macro:", "staroffice.macro:", INetProtocol::Macro,
2108 PrefixInfo::INTERNAL },
2109 { "mailto:", nullptr, INetProtocol::Mailto, PrefixInfo::OFFICIAL },
2110 { "private:", "staroffice.private:", INetProtocol::PrivSoffice,
2111 PrefixInfo::INTERNAL },
2112 { "private:factory/", "staroffice.factory:",
2113 INetProtocol::PrivSoffice, PrefixInfo::INTERNAL },
2114 { "private:helpid/", "staroffice.helpid:", INetProtocol::PrivSoffice,
2115 PrefixInfo::INTERNAL },
2116 { "private:java/", "staroffice.java:", INetProtocol::PrivSoffice,
2117 PrefixInfo::INTERNAL },
2118 { "private:searchfolder:", "staroffice.searchfolder:",
2119 INetProtocol::PrivSoffice, PrefixInfo::INTERNAL },
2120 { "private:trashcan:", "staroffice.trashcan:",
2121 INetProtocol::PrivSoffice, PrefixInfo::INTERNAL },
2122 { "sftp:", nullptr, INetProtocol::Sftp, PrefixInfo::OFFICIAL },
2123 { "slot:", "staroffice.slot:", INetProtocol::Slot,
2124 PrefixInfo::INTERNAL },
2125 { "smb:", nullptr, INetProtocol::Smb, PrefixInfo::OFFICIAL },
2126 { "staroffice.component:", ".component:", INetProtocol::Component,
2127 PrefixInfo::EXTERNAL },
2128 { "staroffice.db:", "db:", INetProtocol::Db, PrefixInfo::EXTERNAL },
2129 { "staroffice.factory:", "private:factory/",
2130 INetProtocol::PrivSoffice, PrefixInfo::EXTERNAL },
2131 { "staroffice.helpid:", "private:helpid/", INetProtocol::PrivSoffice,
2132 PrefixInfo::EXTERNAL },
2133 { "staroffice.hid:", "hid:", INetProtocol::Hid,
2134 PrefixInfo::EXTERNAL },
2135 { "staroffice.java:", "private:java/", INetProtocol::PrivSoffice,
2136 PrefixInfo::EXTERNAL },
2137 { "staroffice.macro:", "macro:", INetProtocol::Macro,
2138 PrefixInfo::EXTERNAL },
2139 { "staroffice.private:", "private:", INetProtocol::PrivSoffice,
2140 PrefixInfo::EXTERNAL },
2141 { "staroffice.searchfolder:", "private:searchfolder:",
2142 INetProtocol::PrivSoffice, PrefixInfo::EXTERNAL },
2143 { "staroffice.slot:", "slot:", INetProtocol::Slot,
2144 PrefixInfo::EXTERNAL },
2145 { "staroffice.trashcan:", "private:trashcan:",
2146 INetProtocol::PrivSoffice, PrefixInfo::EXTERNAL },
2147 { "staroffice.uno:", ".uno:", INetProtocol::Uno,
2148 PrefixInfo::EXTERNAL },
2149 { "staroffice:", "private:", INetProtocol::PrivSoffice,
2150 PrefixInfo::EXTERNAL },
2151 { "telnet:", nullptr, INetProtocol::Telnet, PrefixInfo::OFFICIAL },
2152 { "vnd.libreoffice.cmis:", nullptr, INetProtocol::Cmis, PrefixInfo::INTERNAL },
2153 { "vnd.sun.star.cmd:", nullptr, INetProtocol::VndSunStarCmd,
2154 PrefixInfo::OFFICIAL },
2155 { "vnd.sun.star.expand:", nullptr, INetProtocol::VndSunStarExpand,
2156 PrefixInfo::OFFICIAL },
2157 { "vnd.sun.star.help:", nullptr, INetProtocol::VndSunStarHelp,
2158 PrefixInfo::OFFICIAL },
2159 { "vnd.sun.star.hier:", nullptr, INetProtocol::VndSunStarHier,
2160 PrefixInfo::OFFICIAL },
2161 { "vnd.sun.star.pkg:", nullptr, INetProtocol::VndSunStarPkg,
2162 PrefixInfo::OFFICIAL },
2163 { "vnd.sun.star.tdoc:", nullptr, INetProtocol::VndSunStarTdoc,
2164 PrefixInfo::OFFICIAL },
2165 { "vnd.sun.star.webdav:", nullptr, INetProtocol::VndSunStarWebdav,
2166 PrefixInfo::OFFICIAL }
2168 /* This list needs to be sorted, or you'll introduce serious bugs */
2170 PrefixInfo const * pFirst = aMap + 1;
2171 PrefixInfo const * pLast = aMap + sizeof aMap / sizeof (PrefixInfo) - 1;
2172 PrefixInfo const * pMatch = nullptr;
2173 sal_Unicode const * pMatched = rBegin;
2174 sal_Unicode const * p = rBegin;
2175 sal_Int32 i = 0;
2176 for (; pFirst < pLast; ++i)
2178 if (pFirst->m_pPrefix[i] == '\0')
2180 pMatch = pFirst++;
2181 pMatched = p;
2183 if (p >= pEnd)
2184 break;
2185 sal_uInt32 nChar = rtl::toAsciiLowerCase(*p++);
2186 while (pFirst <= pLast && static_cast<unsigned char>(pFirst->m_pPrefix[i]) < nChar)
2187 ++pFirst;
2188 while (pFirst <= pLast && static_cast<unsigned char>(pLast->m_pPrefix[i]) > nChar)
2189 --pLast;
2191 if (pFirst == pLast)
2193 sal_Char const * q = pFirst->m_pPrefix + i;
2194 while (p < pEnd && *q != '\0'
2195 && rtl::toAsciiLowerCase(*p) == static_cast<unsigned char>(*q))
2197 ++p;
2198 ++q;
2200 if (*q == '\0')
2202 rBegin = p;
2203 return pFirst;
2206 rBegin = pMatched;
2207 return pMatch;
2210 sal_Int32 INetURLObject::getAuthorityBegin() const
2212 DBG_ASSERT(getSchemeInfo().m_bAuthority,
2213 "INetURLObject::getAuthority(): Bad scheme");
2214 sal_Int32 nBegin;
2215 if (m_aUser.isPresent())
2216 nBegin = m_aUser.getBegin();
2217 else if (m_aHost.isPresent())
2218 nBegin = m_aHost.getBegin();
2219 else
2220 nBegin = m_aPath.getBegin();
2221 nBegin -= RTL_CONSTASCII_LENGTH("//");
2222 DBG_ASSERT(m_aAbsURIRef[nBegin] == '/' && m_aAbsURIRef[nBegin + 1] == '/',
2223 "INetURLObject::getAuthority(): Bad authority");
2224 return nBegin;
2227 INetURLObject::SubString INetURLObject::getAuthority() const
2229 sal_Int32 nBegin = getAuthorityBegin();
2230 sal_Int32 nEnd = m_aPort.isPresent() ? m_aPort.getEnd() :
2231 m_aHost.isPresent() ? m_aHost.getEnd() :
2232 m_aAuth.isPresent() ? m_aAuth.getEnd() :
2233 m_aUser.isPresent() ? m_aUser.getEnd() :
2234 nBegin + RTL_CONSTASCII_LENGTH("//");
2235 return SubString(nBegin, nEnd - nBegin);
2238 bool INetURLObject::setUser(OUString const & rTheUser,
2239 rtl_TextEncoding eCharset)
2241 if (
2242 !getSchemeInfo().m_bUser
2245 return false;
2248 OUString aNewUser(encodeText(rTheUser, PART_USER_PASSWORD,
2249 EncodeMechanism::WasEncoded, eCharset, false));
2250 sal_Int32 nDelta;
2251 if (m_aUser.isPresent())
2252 nDelta = m_aUser.set(m_aAbsURIRef, aNewUser);
2253 else if (m_aHost.isPresent())
2255 m_aAbsURIRef.insert(m_aHost.getBegin(), u'@');
2256 nDelta = m_aUser.set(m_aAbsURIRef, aNewUser, m_aHost.getBegin()) + 1;
2258 else if (getSchemeInfo().m_bHost)
2259 return false;
2260 else
2261 nDelta = m_aUser.set(m_aAbsURIRef, aNewUser, m_aPath.getBegin());
2262 m_aAuth += nDelta;
2263 m_aHost += nDelta;
2264 m_aPort += nDelta;
2265 m_aPath += nDelta;
2266 m_aQuery += nDelta;
2267 m_aFragment += nDelta;
2268 return true;
2271 namespace
2273 void lcl_Erase(OUStringBuffer &rBuf, sal_Int32 index, sal_Int32 count)
2275 OUString sTemp(rBuf.makeStringAndClear());
2276 rBuf.append(sTemp.replaceAt(index, count, OUString()));
2280 bool INetURLObject::clearPassword()
2282 if (!getSchemeInfo().m_bPassword)
2283 return false;
2284 if (m_aAuth.isPresent())
2286 lcl_Erase(m_aAbsURIRef, m_aAuth.getBegin() - 1,
2287 m_aAuth.getLength() + 1);
2288 sal_Int32 nDelta = m_aAuth.clear() - 1;
2289 m_aHost += nDelta;
2290 m_aPort += nDelta;
2291 m_aPath += nDelta;
2292 m_aQuery += nDelta;
2293 m_aFragment += nDelta;
2295 return true;
2298 bool INetURLObject::setPassword(OUString const & rThePassword,
2299 rtl_TextEncoding eCharset)
2301 if (!getSchemeInfo().m_bPassword)
2302 return false;
2303 OUString aNewAuth(encodeText(rThePassword, PART_USER_PASSWORD,
2304 EncodeMechanism::WasEncoded, eCharset, false));
2305 sal_Int32 nDelta;
2306 if (m_aAuth.isPresent())
2307 nDelta = m_aAuth.set(m_aAbsURIRef, aNewAuth);
2308 else if (m_aUser.isPresent())
2310 m_aAbsURIRef.insert(m_aUser.getEnd(), u':');
2311 nDelta
2312 = m_aAuth.set(m_aAbsURIRef, aNewAuth, m_aUser.getEnd() + 1) + 1;
2314 else if (m_aHost.isPresent())
2316 m_aAbsURIRef.insert(m_aHost.getBegin(),
2317 OUString( ":@" ));
2318 m_aUser.set(m_aAbsURIRef, OUString(), m_aHost.getBegin());
2319 nDelta
2320 = m_aAuth.set(m_aAbsURIRef, aNewAuth, m_aHost.getBegin() + 1) + 2;
2322 else if (getSchemeInfo().m_bHost)
2323 return false;
2324 else
2326 m_aAbsURIRef.insert(m_aPath.getBegin(), u':');
2327 m_aUser.set(m_aAbsURIRef, OUString(), m_aPath.getBegin());
2328 nDelta
2329 = m_aAuth.set(m_aAbsURIRef, aNewAuth, m_aPath.getBegin() + 1) + 1;
2331 m_aHost += nDelta;
2332 m_aPort += nDelta;
2333 m_aPath += nDelta;
2334 m_aQuery += nDelta;
2335 m_aFragment += nDelta;
2336 return true;
2339 // static
2340 bool INetURLObject::parseHost(sal_Unicode const *& rBegin, sal_Unicode const * pEnd,
2341 OUString & rCanonic)
2343 // RFC 2373 is inconsistent about how to write an IPv6 address in which an
2344 // IPv4 address directly follows the abbreviating "::". The ABNF in
2345 // Appendix B suggests ":::13.1.68.3", while an example in 2.2/3 explicitly
2346 // mentions "::13:1.68.3". This algorithm accepts both variants:
2347 enum State { STATE_INITIAL, STATE_LABEL, STATE_LABEL_HYPHEN,
2348 STATE_LABEL_DOT, STATE_TOPLABEL, STATE_TOPLABEL_HYPHEN,
2349 STATE_TOPLABEL_DOT, STATE_IP4, STATE_IP4_DOT, STATE_IP6,
2350 STATE_IP6_COLON, STATE_IP6_2COLON, STATE_IP6_3COLON,
2351 STATE_IP6_HEXSEQ1, STATE_IP6_HEXSEQ1_COLON,
2352 STATE_IP6_HEXSEQ1_MAYBE_IP4, STATE_IP6_HEXSEQ2,
2353 STATE_IP6_HEXSEQ2_COLON, STATE_IP6_HEXSEQ2_MAYBE_IP4,
2354 STATE_IP6_IP4, STATE_IP6_IP4_DOT, STATE_IP6_DONE };
2355 OUStringBuffer aTheCanonic;
2356 sal_uInt32 nNumber = 0;
2357 int nDigits = 0;
2358 int nOctets = 0;
2359 State eState = STATE_INITIAL;
2360 sal_Unicode const * p = rBegin;
2361 for (; p != pEnd; ++p)
2362 switch (eState)
2364 case STATE_INITIAL:
2365 if (*p == '[')
2367 aTheCanonic.append('[');
2368 eState = STATE_IP6;
2370 else if (rtl::isAsciiAlpha(*p) || *p == '_')
2371 eState = STATE_TOPLABEL;
2372 else if (rtl::isAsciiDigit(*p))
2374 nNumber = INetMIME::getWeight(*p);
2375 nDigits = 1;
2376 nOctets = 1;
2377 eState = STATE_IP4;
2379 else
2380 goto done;
2381 break;
2383 case STATE_LABEL:
2384 if (*p == '.')
2385 eState = STATE_LABEL_DOT;
2386 else if (*p == '-')
2387 eState = STATE_LABEL_HYPHEN;
2388 else if (!rtl::isAsciiAlphanumeric(*p) && *p != '_')
2389 goto done;
2390 break;
2392 case STATE_LABEL_HYPHEN:
2393 if (rtl::isAsciiAlphanumeric(*p) || *p == '_')
2394 eState = STATE_LABEL;
2395 else if (*p != '-')
2396 goto done;
2397 break;
2399 case STATE_LABEL_DOT:
2400 if (rtl::isAsciiAlpha(*p) || *p == '_')
2401 eState = STATE_TOPLABEL;
2402 else if (rtl::isAsciiDigit(*p))
2403 eState = STATE_LABEL;
2404 else
2405 goto done;
2406 break;
2408 case STATE_TOPLABEL:
2409 if (*p == '.')
2410 eState = STATE_TOPLABEL_DOT;
2411 else if (*p == '-')
2412 eState = STATE_TOPLABEL_HYPHEN;
2413 else if (!rtl::isAsciiAlphanumeric(*p) && *p != '_')
2414 goto done;
2415 break;
2417 case STATE_TOPLABEL_HYPHEN:
2418 if (rtl::isAsciiAlphanumeric(*p) || *p == '_')
2419 eState = STATE_TOPLABEL;
2420 else if (*p != '-')
2421 goto done;
2422 break;
2424 case STATE_TOPLABEL_DOT:
2425 if (rtl::isAsciiAlpha(*p) || *p == '_')
2426 eState = STATE_TOPLABEL;
2427 else if (rtl::isAsciiDigit(*p))
2428 eState = STATE_LABEL;
2429 else
2430 goto done;
2431 break;
2433 case STATE_IP4:
2434 if (*p == '.')
2435 if (nOctets < 4)
2437 aTheCanonic.append( OUString::number(nNumber) );
2438 aTheCanonic.append( '.' );
2439 ++nOctets;
2440 eState = STATE_IP4_DOT;
2442 else
2443 eState = STATE_LABEL_DOT;
2444 else if (*p == '-')
2445 eState = STATE_LABEL_HYPHEN;
2446 else if (rtl::isAsciiAlpha(*p) || *p == '_')
2447 eState = STATE_LABEL;
2448 else if (rtl::isAsciiDigit(*p))
2449 if (nDigits < 3)
2451 nNumber = 10 * nNumber + INetMIME::getWeight(*p);
2452 ++nDigits;
2454 else
2455 eState = STATE_LABEL;
2456 else
2457 goto done;
2458 break;
2460 case STATE_IP4_DOT:
2461 if (rtl::isAsciiAlpha(*p) || *p == '_')
2462 eState = STATE_TOPLABEL;
2463 else if (rtl::isAsciiDigit(*p))
2465 nNumber = INetMIME::getWeight(*p);
2466 nDigits = 1;
2467 eState = STATE_IP4;
2469 else
2470 goto done;
2471 break;
2473 case STATE_IP6:
2474 if (*p == ':')
2475 eState = STATE_IP6_COLON;
2476 else if (rtl::isAsciiHexDigit(*p))
2478 nNumber = INetMIME::getHexWeight(*p);
2479 nDigits = 1;
2480 eState = STATE_IP6_HEXSEQ1;
2482 else
2483 goto done;
2484 break;
2486 case STATE_IP6_COLON:
2487 if (*p == ':')
2489 aTheCanonic.append("::");
2490 eState = STATE_IP6_2COLON;
2492 else
2493 goto done;
2494 break;
2496 case STATE_IP6_2COLON:
2497 if (*p == ']')
2498 eState = STATE_IP6_DONE;
2499 else if (*p == ':')
2501 aTheCanonic.append(':');
2502 eState = STATE_IP6_3COLON;
2504 else if (rtl::isAsciiDigit(*p))
2506 nNumber = INetMIME::getWeight(*p);
2507 nDigits = 1;
2508 eState = STATE_IP6_HEXSEQ2_MAYBE_IP4;
2510 else if (rtl::isAsciiHexDigit(*p))
2512 nNumber = INetMIME::getHexWeight(*p);
2513 nDigits = 1;
2514 eState = STATE_IP6_HEXSEQ2;
2516 else
2517 goto done;
2518 break;
2520 case STATE_IP6_3COLON:
2521 if (rtl::isAsciiDigit(*p))
2523 nNumber = INetMIME::getWeight(*p);
2524 nDigits = 1;
2525 nOctets = 1;
2526 eState = STATE_IP6_IP4;
2528 else
2529 goto done;
2530 break;
2532 case STATE_IP6_HEXSEQ1:
2533 if (*p == ']')
2535 aTheCanonic.append(
2536 OUString::number(nNumber, 16));
2537 eState = STATE_IP6_DONE;
2539 else if (*p == ':')
2541 aTheCanonic.append(
2542 OUString::number(nNumber, 16));
2543 aTheCanonic.append(':');
2544 eState = STATE_IP6_HEXSEQ1_COLON;
2546 else if (rtl::isAsciiHexDigit(*p) && nDigits < 4)
2548 nNumber = 16 * nNumber + INetMIME::getHexWeight(*p);
2549 ++nDigits;
2551 else
2552 goto done;
2553 break;
2555 case STATE_IP6_HEXSEQ1_COLON:
2556 if (*p == ':')
2558 aTheCanonic.append(':');
2559 eState = STATE_IP6_2COLON;
2561 else if (rtl::isAsciiDigit(*p))
2563 nNumber = INetMIME::getWeight(*p);
2564 nDigits = 1;
2565 eState = STATE_IP6_HEXSEQ1_MAYBE_IP4;
2567 else if (rtl::isAsciiHexDigit(*p))
2569 nNumber = INetMIME::getHexWeight(*p);
2570 nDigits = 1;
2571 eState = STATE_IP6_HEXSEQ1;
2573 else
2574 goto done;
2575 break;
2577 case STATE_IP6_HEXSEQ1_MAYBE_IP4:
2578 if (*p == ']')
2580 aTheCanonic.append(
2581 OUString::number(nNumber, 16));
2582 eState = STATE_IP6_DONE;
2584 else if (*p == ':')
2586 aTheCanonic.append(
2587 OUString::number(nNumber, 16));
2588 aTheCanonic.append(':');
2589 eState = STATE_IP6_HEXSEQ1_COLON;
2591 else if (*p == '.')
2593 nNumber = 100 * (nNumber >> 8) + 10 * (nNumber >> 4 & 15)
2594 + (nNumber & 15);
2595 aTheCanonic.append(
2596 OUString::number(nNumber));
2597 aTheCanonic.append('.');
2598 nOctets = 2;
2599 eState = STATE_IP6_IP4_DOT;
2601 else if (rtl::isAsciiDigit(*p) && nDigits < 3)
2603 nNumber = 16 * nNumber + INetMIME::getWeight(*p);
2604 ++nDigits;
2606 else if (rtl::isAsciiHexDigit(*p) && nDigits < 4)
2608 nNumber = 16 * nNumber + INetMIME::getHexWeight(*p);
2609 ++nDigits;
2610 eState = STATE_IP6_HEXSEQ1;
2612 else
2613 goto done;
2614 break;
2616 case STATE_IP6_HEXSEQ2:
2617 if (*p == ']')
2619 aTheCanonic.append(
2620 OUString::number(nNumber, 16));
2621 eState = STATE_IP6_DONE;
2623 else if (*p == ':')
2625 aTheCanonic.append(
2626 OUString::number(nNumber, 16));
2627 aTheCanonic.append(':');
2628 eState = STATE_IP6_HEXSEQ2_COLON;
2630 else if (rtl::isAsciiHexDigit(*p) && nDigits < 4)
2632 nNumber = 16 * nNumber + INetMIME::getHexWeight(*p);
2633 ++nDigits;
2635 else
2636 goto done;
2637 break;
2639 case STATE_IP6_HEXSEQ2_COLON:
2640 if (rtl::isAsciiDigit(*p))
2642 nNumber = INetMIME::getWeight(*p);
2643 nDigits = 1;
2644 eState = STATE_IP6_HEXSEQ2_MAYBE_IP4;
2646 else if (rtl::isAsciiHexDigit(*p))
2648 nNumber = INetMIME::getHexWeight(*p);
2649 nDigits = 1;
2650 eState = STATE_IP6_HEXSEQ2;
2652 else
2653 goto done;
2654 break;
2656 case STATE_IP6_HEXSEQ2_MAYBE_IP4:
2657 if (*p == ']')
2659 aTheCanonic.append(
2660 OUString::number(nNumber, 16));
2661 eState = STATE_IP6_DONE;
2663 else if (*p == ':')
2665 aTheCanonic.append(
2666 OUString::number(nNumber, 16));
2667 aTheCanonic.append(':');
2668 eState = STATE_IP6_HEXSEQ2_COLON;
2670 else if (*p == '.')
2672 nNumber = 100 * (nNumber >> 8) + 10 * (nNumber >> 4 & 15)
2673 + (nNumber & 15);
2674 aTheCanonic.append(
2675 OUString::number(nNumber));
2676 aTheCanonic.append('.');
2677 nOctets = 2;
2678 eState = STATE_IP6_IP4_DOT;
2680 else if (rtl::isAsciiDigit(*p) && nDigits < 3)
2682 nNumber = 16 * nNumber + INetMIME::getWeight(*p);
2683 ++nDigits;
2685 else if (rtl::isAsciiHexDigit(*p) && nDigits < 4)
2687 nNumber = 16 * nNumber + INetMIME::getHexWeight(*p);
2688 ++nDigits;
2689 eState = STATE_IP6_HEXSEQ2;
2691 else
2692 goto done;
2693 break;
2695 case STATE_IP6_IP4:
2696 if (*p == ']')
2697 if (nOctets == 4)
2699 aTheCanonic.append(
2700 OUString::number(nNumber));
2701 eState = STATE_IP6_DONE;
2703 else
2704 goto done;
2705 else if (*p == '.')
2706 if (nOctets < 4)
2708 aTheCanonic.append(
2709 OUString::number(nNumber));
2710 aTheCanonic.append('.');
2711 ++nOctets;
2712 eState = STATE_IP6_IP4_DOT;
2714 else
2715 goto done;
2716 else if (rtl::isAsciiDigit(*p) && nDigits < 3)
2718 nNumber = 10 * nNumber + INetMIME::getWeight(*p);
2719 ++nDigits;
2721 else
2722 goto done;
2723 break;
2725 case STATE_IP6_IP4_DOT:
2726 if (rtl::isAsciiDigit(*p))
2728 nNumber = INetMIME::getWeight(*p);
2729 nDigits = 1;
2730 eState = STATE_IP6_IP4;
2732 else
2733 goto done;
2734 break;
2736 case STATE_IP6_DONE:
2737 goto done;
2739 done:
2740 switch (eState)
2742 case STATE_LABEL:
2743 case STATE_TOPLABEL:
2744 case STATE_TOPLABEL_DOT:
2745 aTheCanonic.setLength(0);
2746 aTheCanonic.append(rBegin, p - rBegin);
2747 rBegin = p;
2748 rCanonic = aTheCanonic.makeStringAndClear();
2749 return true;
2751 case STATE_IP4:
2752 if (nOctets == 4)
2754 aTheCanonic.append(
2755 OUString::number(nNumber));
2756 rBegin = p;
2757 rCanonic = aTheCanonic.makeStringAndClear();
2758 return true;
2760 return false;
2762 case STATE_IP6_DONE:
2763 aTheCanonic.append(']');
2764 rBegin = p;
2765 rCanonic = aTheCanonic.makeStringAndClear();
2766 return true;
2768 default:
2769 return false;
2773 // static
2774 bool INetURLObject::parseHostOrNetBiosName(
2775 sal_Unicode const * pBegin, sal_Unicode const * pEnd,
2776 EncodeMechanism eMechanism, rtl_TextEncoding eCharset, bool bNetBiosName,
2777 OUStringBuffer* pCanonic)
2779 OUString aTheCanonic;
2780 if (pBegin < pEnd)
2782 sal_Unicode const * p = pBegin;
2783 if (!parseHost(p, pEnd, aTheCanonic) || p != pEnd)
2785 if (bNetBiosName)
2787 OUStringBuffer buf;
2788 while (pBegin < pEnd)
2790 EscapeType eEscapeType;
2791 sal_uInt32 nUTF32 = getUTF32(pBegin, pEnd,
2792 eMechanism, eCharset,
2793 eEscapeType);
2794 if (!INetMIME::isVisible(nUTF32))
2795 return false;
2796 if (!rtl::isAsciiAlphanumeric(nUTF32))
2797 switch (nUTF32)
2799 case '"':
2800 case '*':
2801 case '+':
2802 case ',':
2803 case '/':
2804 case ':':
2805 case ';':
2806 case '<':
2807 case '=':
2808 case '>':
2809 case '?':
2810 case '[':
2811 case '\\':
2812 case ']':
2813 case '`':
2814 case '|':
2815 return false;
2817 if (pCanonic != nullptr) {
2818 appendUCS4(
2819 buf, nUTF32, eEscapeType, PART_URIC,
2820 eCharset, true);
2823 aTheCanonic = buf.makeStringAndClear();
2825 else
2826 return false;
2829 if (pCanonic != nullptr) {
2830 *pCanonic = aTheCanonic;
2832 return true;
2835 bool INetURLObject::setHost(OUString const & rTheHost,
2836 rtl_TextEncoding eCharset)
2838 if (!getSchemeInfo().m_bHost)
2839 return false;
2840 OUStringBuffer aSynHost(rTheHost);
2841 bool bNetBiosName = false;
2842 switch (m_eScheme)
2844 case INetProtocol::File:
2846 OUString sTemp(aSynHost.toString());
2847 if (sTemp.equalsIgnoreAsciiCase("localhost"))
2849 aSynHost.setLength(0);
2851 bNetBiosName = true;
2853 break;
2854 case INetProtocol::Ldap:
2855 if (aSynHost.isEmpty() && m_aPort.isPresent())
2856 return false;
2857 break;
2859 default:
2860 if (aSynHost.isEmpty())
2861 return false;
2862 break;
2864 if (!parseHostOrNetBiosName(
2865 aSynHost.getStr(), aSynHost.getStr() + aSynHost.getLength(),
2866 EncodeMechanism::WasEncoded, eCharset, bNetBiosName, &aSynHost))
2867 return false;
2868 sal_Int32 nDelta = m_aHost.set(m_aAbsURIRef, aSynHost.makeStringAndClear());
2869 m_aPort += nDelta;
2870 m_aPath += nDelta;
2871 m_aQuery += nDelta;
2872 m_aFragment += nDelta;
2873 return true;
2876 // static
2877 bool INetURLObject::parsePath(INetProtocol eScheme,
2878 sal_Unicode const ** pBegin,
2879 sal_Unicode const * pEnd,
2880 EncodeMechanism eMechanism,
2881 rtl_TextEncoding eCharset,
2882 bool bSkippedInitialSlash,
2883 sal_uInt32 nSegmentDelimiter,
2884 sal_uInt32 nAltSegmentDelimiter,
2885 sal_uInt32 nQueryDelimiter,
2886 sal_uInt32 nFragmentDelimiter,
2887 OUStringBuffer &rSynPath)
2889 DBG_ASSERT(pBegin, "INetURLObject::parsePath(): Null output param");
2891 sal_Unicode const * pPos = *pBegin;
2892 OUStringBuffer aTheSynPath;
2894 switch (eScheme)
2896 case INetProtocol::NotValid:
2897 return false;
2899 case INetProtocol::Ftp:
2900 if (pPos < pEnd && *pPos != '/' && *pPos != nFragmentDelimiter)
2901 return false;
2902 while (pPos < pEnd && *pPos != nFragmentDelimiter)
2904 EscapeType eEscapeType;
2905 sal_uInt32 nUTF32 = getUTF32(pPos, pEnd, eMechanism,
2906 eCharset, eEscapeType);
2907 appendUCS4(aTheSynPath, nUTF32, eEscapeType,
2908 PART_HTTP_PATH, eCharset, true);
2910 if (aTheSynPath.isEmpty())
2911 aTheSynPath.append('/');
2912 break;
2914 case INetProtocol::Http:
2915 case INetProtocol::VndSunStarWebdav:
2916 case INetProtocol::Https:
2917 case INetProtocol::Smb:
2918 case INetProtocol::Cmis:
2919 if (pPos < pEnd && *pPos != '/' && *pPos != nFragmentDelimiter)
2920 return false;
2921 while (pPos < pEnd && *pPos != nQueryDelimiter
2922 && *pPos != nFragmentDelimiter)
2924 EscapeType eEscapeType;
2925 sal_uInt32 nUTF32 = getUTF32(pPos, pEnd, eMechanism,
2926 eCharset, eEscapeType);
2927 appendUCS4(aTheSynPath, nUTF32, eEscapeType,
2928 PART_HTTP_PATH, eCharset, true);
2930 if (aTheSynPath.isEmpty())
2931 aTheSynPath.append('/');
2932 break;
2934 case INetProtocol::File:
2936 if (bSkippedInitialSlash)
2937 aTheSynPath.append('/');
2938 else if (pPos < pEnd
2939 && *pPos != nSegmentDelimiter
2940 && *pPos != nAltSegmentDelimiter)
2941 return false;
2942 while (pPos < pEnd && *pPos != nFragmentDelimiter)
2944 EscapeType eEscapeType;
2945 sal_uInt32 nUTF32 = getUTF32(pPos, pEnd, eMechanism,
2946 eCharset, eEscapeType);
2947 if (eEscapeType == EscapeType::NONE)
2949 if (nUTF32 == nSegmentDelimiter
2950 || nUTF32 == nAltSegmentDelimiter)
2952 aTheSynPath.append('/');
2953 continue;
2955 else if (nUTF32 == '|'
2956 && (pPos == pEnd
2957 || *pPos == nFragmentDelimiter
2958 || *pPos == nSegmentDelimiter
2959 || *pPos == nAltSegmentDelimiter)
2960 && aTheSynPath.getLength() == 2
2961 && rtl::isAsciiAlpha(aTheSynPath[1]))
2963 // A first segment of <ALPHA "|"> is translated to
2964 // <ALPHA ":">:
2965 aTheSynPath.append(':');
2966 continue;
2969 appendUCS4(aTheSynPath, nUTF32, eEscapeType,
2970 PART_PCHAR, eCharset, true);
2972 if (aTheSynPath.isEmpty())
2973 aTheSynPath.append('/');
2974 break;
2977 case INetProtocol::Mailto:
2978 while (pPos < pEnd && *pPos != nQueryDelimiter
2979 && *pPos != nFragmentDelimiter)
2981 EscapeType eEscapeType;
2982 sal_uInt32 nUTF32 = getUTF32(pPos, pEnd, eMechanism,
2983 eCharset, eEscapeType);
2984 appendUCS4(aTheSynPath, nUTF32, eEscapeType,
2985 PART_MAILTO, eCharset, true);
2987 break;
2990 case INetProtocol::PrivSoffice:
2991 case INetProtocol::Slot:
2992 case INetProtocol::Hid:
2993 case INetProtocol::Macro:
2994 case INetProtocol::Uno:
2995 case INetProtocol::Component:
2996 case INetProtocol::Ldap:
2997 while (pPos < pEnd && *pPos != nQueryDelimiter
2998 && *pPos != nFragmentDelimiter)
3000 EscapeType eEscapeType;
3001 sal_uInt32 nUTF32 = getUTF32(pPos, pEnd, eMechanism,
3002 eCharset, eEscapeType);
3003 appendUCS4(aTheSynPath, nUTF32, eEscapeType,
3004 PART_PATH_BEFORE_QUERY, eCharset, true);
3006 break;
3008 case INetProtocol::VndSunStarHelp:
3009 if (pPos == pEnd
3010 || *pPos == nQueryDelimiter
3011 || *pPos == nFragmentDelimiter)
3012 aTheSynPath.append('/');
3013 else
3015 if (*pPos != '/')
3016 return false;
3017 while (pPos < pEnd && *pPos != nQueryDelimiter
3018 && *pPos != nFragmentDelimiter)
3020 EscapeType eEscapeType;
3021 sal_uInt32 nUTF32 = getUTF32(pPos, pEnd,
3022 eMechanism,
3023 eCharset, eEscapeType);
3024 appendUCS4(aTheSynPath, nUTF32, eEscapeType,
3025 PART_HTTP_PATH, eCharset, true);
3028 break;
3030 case INetProtocol::Javascript:
3031 case INetProtocol::Data:
3032 case INetProtocol::Cid:
3033 case INetProtocol::Db:
3034 while (pPos < pEnd && *pPos != nFragmentDelimiter)
3036 EscapeType eEscapeType;
3037 sal_uInt32 nUTF32 = getUTF32(pPos, pEnd, eMechanism,
3038 eCharset, eEscapeType);
3039 appendUCS4(aTheSynPath, nUTF32, eEscapeType,
3040 PART_URIC, eCharset, true);
3042 break;
3044 case INetProtocol::VndSunStarHier:
3045 case INetProtocol::VndSunStarPkg:
3046 if (pPos < pEnd && *pPos != '/'
3047 && *pPos != nQueryDelimiter && *pPos != nFragmentDelimiter)
3048 return false;
3049 while (pPos < pEnd && *pPos != nQueryDelimiter
3050 && *pPos != nFragmentDelimiter)
3052 EscapeType eEscapeType;
3053 sal_uInt32 nUTF32 = getUTF32(pPos, pEnd, eMechanism,
3054 eCharset, eEscapeType);
3055 if (eEscapeType == EscapeType::NONE && nUTF32 == '/')
3056 aTheSynPath.append('/');
3057 else
3058 appendUCS4(aTheSynPath, nUTF32, eEscapeType,
3059 PART_PCHAR, eCharset, false);
3061 if (aTheSynPath.isEmpty())
3062 aTheSynPath.append('/');
3063 break;
3065 case INetProtocol::VndSunStarCmd:
3066 case INetProtocol::VndSunStarExpand:
3068 if (pPos == pEnd || *pPos == nFragmentDelimiter)
3069 return false;
3070 Part ePart = PART_URIC_NO_SLASH;
3071 while (pPos != pEnd && *pPos != nFragmentDelimiter)
3073 EscapeType eEscapeType;
3074 sal_uInt32 nUTF32 = getUTF32(pPos, pEnd, eMechanism,
3075 eCharset, eEscapeType);
3076 appendUCS4(aTheSynPath, nUTF32, eEscapeType, ePart,
3077 eCharset, true);
3078 ePart = PART_URIC;
3080 break;
3083 case INetProtocol::Telnet:
3084 if (pPos < pEnd)
3086 if (*pPos != '/' || pEnd - pPos > 1)
3087 return false;
3088 ++pPos;
3090 aTheSynPath.append('/');
3091 break;
3093 case INetProtocol::VndSunStarTdoc:
3094 if (pPos == pEnd || *pPos != '/')
3095 return false;
3096 while (pPos < pEnd && *pPos != nFragmentDelimiter)
3098 EscapeType eEscapeType;
3099 sal_uInt32 nUTF32 = getUTF32(pPos, pEnd, eMechanism,
3100 eCharset, eEscapeType);
3101 if (eEscapeType == EscapeType::NONE && nUTF32 == '/')
3102 aTheSynPath.append('/');
3103 else
3104 appendUCS4(aTheSynPath, nUTF32, eEscapeType,
3105 PART_PCHAR, eCharset, false);
3107 break;
3109 case INetProtocol::Generic:
3110 case INetProtocol::Sftp:
3111 while (pPos < pEnd && *pPos != nFragmentDelimiter)
3113 EscapeType eEscapeType;
3114 sal_uInt32 nUTF32 = getUTF32(pPos, pEnd, eMechanism,
3115 eCharset, eEscapeType);
3116 appendUCS4(aTheSynPath, nUTF32, eEscapeType,
3117 PART_URIC, eCharset, true);
3119 if (aTheSynPath.isEmpty())
3120 return false;
3121 break;
3122 default:
3123 OSL_ASSERT(false);
3124 break;
3127 *pBegin = pPos;
3128 rSynPath = aTheSynPath;
3129 return true;
3132 bool INetURLObject::setPath(OUString const & rThePath,
3133 EncodeMechanism eMechanism,
3134 rtl_TextEncoding eCharset)
3136 OUStringBuffer aSynPath;
3137 sal_Unicode const * p = rThePath.getStr();
3138 sal_Unicode const * pEnd = p + rThePath.getLength();
3139 if (!parsePath(m_eScheme, &p, pEnd, eMechanism, eCharset, false,
3140 '/', 0x80000000, 0x80000000, 0x80000000, aSynPath)
3141 || p != pEnd)
3142 return false;
3143 sal_Int32 nDelta = m_aPath.set(m_aAbsURIRef, aSynPath.makeStringAndClear());
3144 m_aQuery += nDelta;
3145 m_aFragment += nDelta;
3146 return true;
3149 bool INetURLObject::checkHierarchical() const {
3150 if (m_eScheme == INetProtocol::VndSunStarExpand) {
3151 OSL_FAIL(
3152 "INetURLObject::checkHierarchical vnd.sun.star.expand");
3153 return true;
3154 } else {
3155 return getSchemeInfo().m_bHierarchical;
3159 bool INetURLObject::Append(OUString const & rTheSegment,
3160 EncodeMechanism eMechanism,
3161 rtl_TextEncoding eCharset)
3163 return insertName(rTheSegment, false, LAST_SEGMENT, eMechanism, eCharset);
3166 INetURLObject::SubString INetURLObject::getSegment(sal_Int32 nIndex,
3167 bool bIgnoreFinalSlash)
3168 const
3170 DBG_ASSERT(nIndex >= 0 || nIndex == LAST_SEGMENT,
3171 "INetURLObject::getSegment(): Bad index");
3173 if (!checkHierarchical())
3174 return SubString();
3176 sal_Unicode const * pPathBegin
3177 = m_aAbsURIRef.getStr() + m_aPath.getBegin();
3178 sal_Unicode const * pPathEnd = pPathBegin + m_aPath.getLength();
3179 sal_Unicode const * pSegBegin;
3180 sal_Unicode const * pSegEnd;
3181 if (nIndex == LAST_SEGMENT)
3183 pSegEnd = pPathEnd;
3184 if (bIgnoreFinalSlash && pSegEnd > pPathBegin && pSegEnd[-1] == '/')
3185 --pSegEnd;
3186 if (pSegEnd <= pPathBegin)
3187 return SubString();
3188 pSegBegin = pSegEnd - 1;
3189 while (pSegBegin > pPathBegin && *pSegBegin != '/')
3190 --pSegBegin;
3192 else
3194 pSegBegin = pPathBegin;
3195 while (nIndex-- > 0)
3198 ++pSegBegin;
3199 if (pSegBegin >= pPathEnd)
3200 return SubString();
3202 while (*pSegBegin != '/');
3203 pSegEnd = pSegBegin + 1;
3204 while (pSegEnd < pPathEnd && *pSegEnd != '/')
3205 ++pSegEnd;
3208 return SubString(pSegBegin - m_aAbsURIRef.getStr(),
3209 pSegEnd - pSegBegin);
3212 bool INetURLObject::insertName(OUString const & rTheName,
3213 bool bAppendFinalSlash, sal_Int32 nIndex,
3214 EncodeMechanism eMechanism,
3215 rtl_TextEncoding eCharset)
3217 DBG_ASSERT(nIndex >= 0 || nIndex == LAST_SEGMENT,
3218 "INetURLObject::insertName(): Bad index");
3220 if (!checkHierarchical())
3221 return false;
3223 sal_Unicode const * pPathBegin
3224 = m_aAbsURIRef.getStr() + m_aPath.getBegin();
3225 sal_Unicode const * pPathEnd = pPathBegin + m_aPath.getLength();
3226 sal_Unicode const * pPrefixEnd;
3227 bool bInsertSlash;
3228 sal_Unicode const * pSuffixBegin;
3229 if (nIndex == LAST_SEGMENT)
3231 pPrefixEnd = pPathEnd;
3232 if (pPrefixEnd > pPathBegin &&
3233 pPrefixEnd[-1] == '/')
3235 --pPrefixEnd;
3237 bInsertSlash = bAppendFinalSlash;
3238 pSuffixBegin = pPathEnd;
3240 else if (nIndex == 0)
3242 pPrefixEnd = pPathBegin;
3243 bInsertSlash =
3244 (pPathBegin < pPathEnd && *pPathBegin != '/') ||
3245 (pPathBegin == pPathEnd && bAppendFinalSlash);
3246 pSuffixBegin =
3247 (pPathEnd - pPathBegin == 1 && *pPathBegin == '/' &&
3248 !bAppendFinalSlash)
3249 ? pPathEnd : pPathBegin;
3251 else
3253 pPrefixEnd = pPathBegin;
3254 sal_Unicode const * pEnd = pPathEnd;
3255 if (pEnd > pPathBegin && pEnd[-1] == '/')
3256 --pEnd;
3257 bool bSkip = pPrefixEnd < pEnd && *pPrefixEnd == '/';
3258 bInsertSlash = false;
3259 pSuffixBegin = pPathEnd;
3260 while (nIndex-- > 0)
3261 for (;;)
3263 if (bSkip)
3264 ++pPrefixEnd;
3265 bSkip = true;
3266 if (pPrefixEnd >= pEnd)
3268 if (nIndex == 0)
3270 bInsertSlash = bAppendFinalSlash;
3271 break;
3273 else
3274 return false;
3276 if (*pPrefixEnd == '/')
3278 pSuffixBegin = pPrefixEnd;
3279 break;
3284 OUStringBuffer aNewPath;
3285 aNewPath.append(pPathBegin, pPrefixEnd - pPathBegin);
3286 aNewPath.append('/');
3287 aNewPath.append(encodeText(rTheName, PART_PCHAR,
3288 eMechanism, eCharset, true));
3289 if (bInsertSlash) {
3290 aNewPath.append('/');
3292 aNewPath.append(pSuffixBegin, pPathEnd - pSuffixBegin);
3294 return setPath(aNewPath.makeStringAndClear(), EncodeMechanism::NotCanonical,
3295 RTL_TEXTENCODING_UTF8);
3298 void INetURLObject::clearQuery()
3300 if (HasError())
3301 return;
3302 if (m_aQuery.isPresent())
3304 lcl_Erase(m_aAbsURIRef, m_aQuery.getBegin() - 1,
3305 m_aQuery.getLength() + 1);
3306 m_aFragment += m_aQuery.clear() - 1;
3310 bool INetURLObject::setQuery(OUString const & rTheQuery,
3311 EncodeMechanism eMechanism,
3312 rtl_TextEncoding eCharset)
3314 if (!getSchemeInfo().m_bQuery)
3315 return false;
3316 OUString aNewQuery(encodeText(rTheQuery, PART_URIC,
3317 eMechanism, eCharset, true));
3318 sal_Int32 nDelta;
3319 if (m_aQuery.isPresent())
3320 nDelta = m_aQuery.set(m_aAbsURIRef, aNewQuery);
3321 else
3323 m_aAbsURIRef.insert(m_aPath.getEnd(), u'?');
3324 nDelta = m_aQuery.set(m_aAbsURIRef, aNewQuery, m_aPath.getEnd() + 1)
3325 + 1;
3327 m_aFragment += nDelta;
3328 return true;
3331 bool INetURLObject::clearFragment()
3333 if (HasError())
3334 return false;
3335 if (m_aFragment.isPresent())
3337 m_aAbsURIRef.setLength(m_aFragment.getBegin() - 1);
3338 m_aFragment.clear();
3340 return true;
3343 bool INetURLObject::setFragment(OUString const & rTheFragment,
3344 EncodeMechanism eMechanism,
3345 rtl_TextEncoding eCharset)
3347 if (HasError())
3348 return false;
3349 OUString aNewFragment(encodeText(rTheFragment, PART_URIC,
3350 eMechanism, eCharset, true));
3351 if (m_aFragment.isPresent())
3352 m_aFragment.set(m_aAbsURIRef, aNewFragment);
3353 else
3355 m_aAbsURIRef.append('#');
3356 m_aFragment.set(m_aAbsURIRef, aNewFragment, m_aAbsURIRef.getLength());
3358 return true;
3361 bool INetURLObject::hasDosVolume(FSysStyle eStyle) const
3363 sal_Unicode const * p = m_aAbsURIRef.getStr() + m_aPath.getBegin();
3364 return (eStyle & FSysStyle::Dos)
3365 && m_aPath.getLength() >= 3
3366 && p[0] == '/'
3367 && rtl::isAsciiAlpha(p[1])
3368 && p[2] == ':'
3369 && (m_aPath.getLength() == 3 || p[3] == '/');
3372 // static
3373 OUString INetURLObject::encodeText(sal_Unicode const * pBegin,
3374 sal_Unicode const * pEnd,
3375 Part ePart, EncodeMechanism eMechanism,
3376 rtl_TextEncoding eCharset,
3377 bool bKeepVisibleEscapes)
3379 OUStringBuffer aResult;
3380 while (pBegin < pEnd)
3382 EscapeType eEscapeType;
3383 sal_uInt32 nUTF32 = getUTF32(pBegin, pEnd,
3384 eMechanism, eCharset, eEscapeType);
3385 appendUCS4(aResult, nUTF32, eEscapeType, ePart,
3386 eCharset, bKeepVisibleEscapes);
3388 return aResult.makeStringAndClear();
3391 // static
3392 OUString INetURLObject::decode(sal_Unicode const * pBegin,
3393 sal_Unicode const * pEnd,
3394 DecodeMechanism eMechanism,
3395 rtl_TextEncoding eCharset)
3397 switch (eMechanism)
3399 case DecodeMechanism::NONE:
3400 return OUString(pBegin, pEnd - pBegin);
3402 case DecodeMechanism::ToIUri:
3403 eCharset = RTL_TEXTENCODING_UTF8;
3404 break;
3406 default:
3407 break;
3409 OUStringBuffer aResult;
3410 while (pBegin < pEnd)
3412 EscapeType eEscapeType;
3413 sal_uInt32 nUTF32 = getUTF32(pBegin, pEnd,
3414 EncodeMechanism::WasEncoded, eCharset, eEscapeType);
3415 switch (eEscapeType)
3417 case EscapeType::NONE:
3418 aResult.appendUtf32(nUTF32);
3419 break;
3421 case EscapeType::Octet:
3422 appendEscape(aResult, nUTF32);
3423 break;
3425 case EscapeType::Utf32:
3426 if (
3427 rtl::isAscii(nUTF32) &&
3429 eMechanism == DecodeMechanism::ToIUri ||
3431 eMechanism == DecodeMechanism::Unambiguous &&
3432 mustEncode(nUTF32, PART_UNAMBIGUOUS)
3437 appendEscape(aResult, nUTF32);
3439 else
3440 aResult.appendUtf32(nUTF32);
3441 break;
3444 return aResult.makeStringAndClear();
3447 OUString INetURLObject::GetURLNoPass(DecodeMechanism eMechanism,
3448 rtl_TextEncoding eCharset) const
3450 INetURLObject aTemp(*this);
3451 aTemp.clearPassword();
3452 return aTemp.GetMainURL(eMechanism, eCharset);
3455 OUString INetURLObject::GetURLNoMark(DecodeMechanism eMechanism,
3456 rtl_TextEncoding eCharset) const
3458 INetURLObject aTemp(*this);
3459 aTemp.clearFragment();
3460 return aTemp.GetMainURL(eMechanism, eCharset);
3463 OUString
3464 INetURLObject::getAbbreviated(
3465 uno::Reference< util::XStringWidth > const & rStringWidth,
3466 sal_Int32 nWidth,
3467 DecodeMechanism eMechanism,
3468 rtl_TextEncoding eCharset)
3469 const
3471 OSL_ENSURE(rStringWidth.is(), "specification violation");
3472 OUStringBuffer aBuffer;
3473 // make sure that the scheme is copied for generic schemes: getSchemeInfo().m_pScheme
3474 // is empty ("") in that case, so take the scheme from m_aAbsURIRef
3475 if (m_eScheme != INetProtocol::Generic)
3477 aBuffer.appendAscii(getSchemeInfo().m_pScheme);
3479 else
3481 if (!m_aAbsURIRef.isEmpty())
3483 sal_Unicode const * pSchemeBegin
3484 = m_aAbsURIRef.getStr();
3485 sal_Unicode const * pSchemeEnd = pSchemeBegin;
3487 while (pSchemeEnd[0] != ':')
3489 ++pSchemeEnd;
3491 aBuffer.append(pSchemeBegin, pSchemeEnd - pSchemeBegin);
3494 aBuffer.append(':');
3495 bool bAuthority = getSchemeInfo().m_bAuthority;
3496 sal_Unicode const * pCoreBegin
3497 = m_aAbsURIRef.getStr() + (bAuthority ? getAuthorityBegin() :
3498 m_aPath.getBegin());
3499 sal_Unicode const * pCoreEnd
3500 = m_aAbsURIRef.getStr() + m_aPath.getBegin() + m_aPath.getLength();
3501 bool bSegment = false;
3502 if (getSchemeInfo().m_bHierarchical)
3504 OUString aRest;
3505 if (m_aQuery.isPresent())
3506 aRest = "?...";
3507 else if (m_aFragment.isPresent())
3508 aRest = "#...";
3509 OUStringBuffer aTrailer;
3510 sal_Unicode const * pBegin = pCoreBegin;
3511 sal_Unicode const * pEnd = pCoreEnd;
3512 sal_Unicode const * pPrefixBegin = pBegin;
3513 sal_Unicode const * pSuffixEnd = pEnd;
3514 bool bPrefix = true;
3515 bool bSuffix = true;
3518 if (bSuffix)
3520 sal_Unicode const * p = pSuffixEnd - 1;
3521 if (pSuffixEnd == pCoreEnd && *p == '/')
3522 --p;
3523 while (*p != '/')
3524 --p;
3525 if (bAuthority && p == pCoreBegin + 1)
3526 --p;
3527 OUString
3528 aSegment(decode(p + (p == pBegin && pBegin != pCoreBegin ?
3529 1 : 0),
3530 pSuffixEnd,
3531 eMechanism,
3532 eCharset));
3533 pSuffixEnd = p;
3534 OUStringBuffer aResult(aBuffer);
3535 if (pSuffixEnd != pBegin)
3536 aResult.append("...");
3537 aResult.append(aSegment);
3538 aResult.append(aTrailer.toString());
3539 aResult.append(aRest);
3540 if (rStringWidth->
3541 queryStringWidth(aResult.makeStringAndClear())
3542 <= nWidth)
3544 aTrailer.insert(0, aSegment);
3545 bSegment = true;
3546 pEnd = pSuffixEnd;
3548 else
3549 bSuffix = false;
3550 if (pPrefixBegin > pSuffixEnd)
3551 pPrefixBegin = pSuffixEnd;
3552 if (pBegin == pEnd)
3553 break;
3555 if (bPrefix)
3557 sal_Unicode const * p
3558 = pPrefixBegin
3559 + (bAuthority && pPrefixBegin == pCoreBegin ? 2 :
3561 OSL_ASSERT(p <= pEnd);
3562 while (p < pEnd && *p != '/')
3563 ++p;
3564 if (p == pCoreEnd - 1 && *p == '/')
3565 ++p;
3566 OUString
3567 aSegment(decode(pPrefixBegin
3568 + (pPrefixBegin == pCoreBegin ? 0 :
3570 p == pEnd ? p : p + 1,
3571 eMechanism,
3572 eCharset));
3573 pPrefixBegin = p;
3574 OUStringBuffer aResult(aBuffer);
3575 aResult.append(aSegment);
3576 if (pPrefixBegin != pEnd)
3577 aResult.append("...");
3578 aResult.append(aTrailer.toString());
3579 aResult.append(aRest);
3580 if (rStringWidth->
3581 queryStringWidth(aResult.makeStringAndClear())
3582 <= nWidth)
3584 aBuffer.append(aSegment);
3585 bSegment = true;
3586 pBegin = pPrefixBegin;
3588 else
3589 bPrefix = false;
3590 if (pPrefixBegin > pSuffixEnd)
3591 pSuffixEnd = pPrefixBegin;
3592 if (pBegin == pEnd)
3593 break;
3596 while (bPrefix || bSuffix);
3597 if (bSegment)
3599 if (pPrefixBegin != pBegin || pSuffixEnd != pEnd)
3600 aBuffer.append("...");
3601 aBuffer.append(aTrailer.toString());
3604 if (!bSegment)
3605 aBuffer.append(decode(pCoreBegin,
3606 pCoreEnd,
3607 eMechanism,
3608 eCharset));
3609 if (m_aQuery.isPresent())
3611 aBuffer.append('?');
3612 aBuffer.append(decode(m_aQuery, eMechanism, eCharset));
3614 if (m_aFragment.isPresent())
3616 aBuffer.append('#');
3617 aBuffer.append(decode(m_aFragment, eMechanism, eCharset));
3619 if (!aBuffer.isEmpty())
3621 OUStringBuffer aResult(aBuffer);
3622 if (rStringWidth->queryStringWidth(aResult.makeStringAndClear())
3623 > nWidth)
3624 for (sal_Int32 i = aBuffer.getLength();;)
3626 if (i == 0)
3628 aBuffer.setLength(aBuffer.getLength() - 1);
3629 if (aBuffer.isEmpty())
3630 break;
3632 else
3634 aBuffer.setLength(--i);
3635 aBuffer.append("...");
3637 aResult = aBuffer;
3638 if (rStringWidth->
3639 queryStringWidth(aResult.makeStringAndClear())
3640 <= nWidth)
3641 break;
3644 return aBuffer.makeStringAndClear();
3647 bool INetURLObject::operator ==(INetURLObject const & rObject) const
3649 if (m_eScheme != rObject.m_eScheme)
3650 return false;
3651 if (m_eScheme == INetProtocol::NotValid)
3652 return m_aAbsURIRef.toString() == rObject.m_aAbsURIRef.toString();
3653 if ((m_aScheme.compare(
3654 rObject.m_aScheme, m_aAbsURIRef, rObject.m_aAbsURIRef)
3655 != 0)
3656 || GetUser(DecodeMechanism::NONE) != rObject.GetUser(DecodeMechanism::NONE)
3657 || GetPass(DecodeMechanism::NONE) != rObject.GetPass(DecodeMechanism::NONE)
3658 || !GetHost(DecodeMechanism::NONE).equalsIgnoreAsciiCase(
3659 rObject.GetHost(DecodeMechanism::NONE))
3660 || GetPort() != rObject.GetPort()
3661 || HasParam() != rObject.HasParam()
3662 || GetParam() != rObject.GetParam())
3663 return false;
3664 OUString aPath1(GetURLPath(DecodeMechanism::NONE));
3665 OUString aPath2(rObject.GetURLPath(DecodeMechanism::NONE));
3666 switch (m_eScheme)
3668 case INetProtocol::File:
3670 // If the URL paths of two file URLs only differ in that one has a
3671 // final '/' and the other has not, take the two paths as
3672 // equivalent (this could be useful for other schemes, too):
3673 sal_Int32 nLength = aPath1.getLength();
3674 switch (nLength - aPath2.getLength())
3676 case -1:
3677 if (aPath2[nLength] != '/')
3678 return false;
3679 break;
3681 case 0:
3682 break;
3684 case 1:
3685 if (aPath1[--nLength] != '/')
3686 return false;
3687 break;
3689 default:
3690 return false;
3692 return aPath1.compareTo(aPath2, nLength) == 0;
3695 default:
3696 return aPath1 == aPath2;
3700 bool INetURLObject::ConcatData(INetProtocol eTheScheme,
3701 OUString const & rTheUser,
3702 OUString const & rThePassword,
3703 OUString const & rTheHost,
3704 sal_uInt32 nThePort,
3705 OUString const & rThePath)
3707 setInvalid();
3708 m_eScheme = eTheScheme;
3709 if (HasError() || m_eScheme == INetProtocol::Generic)
3710 return false;
3711 m_aAbsURIRef.setLength(0);
3712 m_aAbsURIRef.appendAscii(getSchemeInfo().m_pScheme);
3713 m_aAbsURIRef.append(':');
3714 if (getSchemeInfo().m_bAuthority)
3716 m_aAbsURIRef.append("//");
3717 bool bUserInfo = false;
3718 if (getSchemeInfo().m_bUser)
3720 if (!rTheUser.isEmpty())
3722 m_aUser.set(m_aAbsURIRef,
3723 encodeText(rTheUser, PART_USER_PASSWORD,
3724 EncodeMechanism::WasEncoded, RTL_TEXTENCODING_UTF8, false),
3725 m_aAbsURIRef.getLength());
3726 bUserInfo = true;
3729 else if (!rTheUser.isEmpty())
3731 setInvalid();
3732 return false;
3734 if (!rThePassword.isEmpty())
3736 if (getSchemeInfo().m_bPassword)
3738 m_aAbsURIRef.append(':');
3739 m_aAuth.set(m_aAbsURIRef,
3740 encodeText(rThePassword, PART_USER_PASSWORD,
3741 EncodeMechanism::WasEncoded, RTL_TEXTENCODING_UTF8, false),
3742 m_aAbsURIRef.getLength());
3743 bUserInfo = true;
3745 else
3747 setInvalid();
3748 return false;
3751 if (bUserInfo && getSchemeInfo().m_bHost)
3752 m_aAbsURIRef.append('@');
3753 if (getSchemeInfo().m_bHost)
3755 OUStringBuffer aSynHost(rTheHost);
3756 bool bNetBiosName = false;
3757 switch (m_eScheme)
3759 case INetProtocol::File:
3761 OUString sTemp(aSynHost.toString());
3762 if (sTemp.equalsIgnoreAsciiCase( "localhost" ))
3764 aSynHost.setLength(0);
3766 bNetBiosName = true;
3768 break;
3770 case INetProtocol::Ldap:
3771 if (aSynHost.isEmpty() && nThePort != 0)
3773 setInvalid();
3774 return false;
3776 break;
3778 default:
3779 if (aSynHost.isEmpty())
3781 setInvalid();
3782 return false;
3784 break;
3786 if (!parseHostOrNetBiosName(
3787 aSynHost.getStr(), aSynHost.getStr() + aSynHost.getLength(),
3788 EncodeMechanism::WasEncoded, RTL_TEXTENCODING_UTF8, bNetBiosName, &aSynHost))
3790 setInvalid();
3791 return false;
3793 m_aHost.set(m_aAbsURIRef, aSynHost.makeStringAndClear(),
3794 m_aAbsURIRef.getLength());
3795 if (nThePort != 0)
3797 if (getSchemeInfo().m_bPort)
3799 m_aAbsURIRef.append(':');
3800 m_aPort.set(m_aAbsURIRef,
3801 OUString::number(nThePort),
3802 m_aAbsURIRef.getLength());
3804 else
3806 setInvalid();
3807 return false;
3811 else if (!rTheHost.isEmpty() || nThePort != 0)
3813 setInvalid();
3814 return false;
3817 OUStringBuffer aSynPath;
3818 sal_Unicode const * p = rThePath.getStr();
3819 sal_Unicode const * pEnd = p + rThePath.getLength();
3820 if (!parsePath(m_eScheme, &p, pEnd, EncodeMechanism::WasEncoded, RTL_TEXTENCODING_UTF8, false, '/',
3821 0x80000000, 0x80000000, 0x80000000, aSynPath)
3822 || p != pEnd)
3824 setInvalid();
3825 return false;
3827 m_aPath.set(m_aAbsURIRef, aSynPath.makeStringAndClear(),
3828 m_aAbsURIRef.getLength());
3829 return true;
3832 // static
3833 OUString INetURLObject::GetAbsURL(OUString const & rTheBaseURIRef,
3834 OUString const & rTheRelURIRef,
3835 EncodeMechanism eEncodeMechanism,
3836 DecodeMechanism eDecodeMechanism,
3837 rtl_TextEncoding eCharset)
3839 // Backwards compatibility:
3840 if (rTheRelURIRef.isEmpty() || rTheRelURIRef[0] == '#')
3841 return rTheRelURIRef;
3843 INetURLObject aTheAbsURIRef;
3844 bool bWasAbsolute;
3845 return INetURLObject(rTheBaseURIRef, eEncodeMechanism, eCharset).
3846 convertRelToAbs(rTheRelURIRef, aTheAbsURIRef,
3847 bWasAbsolute, eEncodeMechanism,
3848 eCharset, false, false,
3849 false, FSysStyle::Detect)
3850 || eEncodeMechanism != EncodeMechanism::WasEncoded
3851 || eDecodeMechanism != DecodeMechanism::ToIUri
3852 || eCharset != RTL_TEXTENCODING_UTF8 ?
3853 aTheAbsURIRef.GetMainURL(eDecodeMechanism, eCharset) :
3854 rTheRelURIRef;
3857 OUString INetURLObject::getExternalURL() const
3859 OUString aTheExtURIRef;
3860 translateToExternal(
3861 m_aAbsURIRef.toString(), aTheExtURIRef);
3862 return aTheExtURIRef;
3865 bool INetURLObject::isSchemeEqualTo(o3tl::u16string_view scheme) const {
3866 return m_aScheme.isPresent()
3867 && (rtl_ustr_compareIgnoreAsciiCase_WithLength(
3868 scheme.data(), scheme.size(),
3869 m_aAbsURIRef.getStr() + m_aScheme.getBegin(),
3870 m_aScheme.getLength())
3871 == 0);
3874 bool INetURLObject::isAnyKnownWebDAVScheme() const {
3875 return ( isSchemeEqualTo( INetProtocol::Http ) ||
3876 isSchemeEqualTo( INetProtocol::Https ) ||
3877 isSchemeEqualTo( INetProtocol::VndSunStarWebdav ) ||
3878 isSchemeEqualTo( u"vnd.sun.star.webdavs" ) ||
3879 isSchemeEqualTo( u"webdav" ) ||
3880 isSchemeEqualTo( u"webdavs" ));
3883 // static
3884 OUString INetURLObject::GetScheme(INetProtocol eTheScheme)
3886 return OUString::createFromAscii(getSchemeInfo(eTheScheme).m_pPrefix);
3889 // static
3890 OUString INetURLObject::GetSchemeName(INetProtocol eTheScheme)
3892 return OUString::createFromAscii(getSchemeInfo(eTheScheme).m_pScheme);
3895 // static
3896 INetProtocol INetURLObject::CompareProtocolScheme(OUString const &
3897 rTheAbsURIRef)
3899 sal_Unicode const * p = rTheAbsURIRef.getStr();
3900 PrefixInfo const * pPrefix = getPrefix(p, p + rTheAbsURIRef.getLength());
3901 return pPrefix ? pPrefix->m_eScheme : INetProtocol::NotValid;
3904 OUString INetURLObject::GetHostPort(DecodeMechanism eMechanism,
3905 rtl_TextEncoding eCharset) const
3907 // Check because PROT_VND_SUN_STAR_HELP, PROT_VND_SUN_STAR_HIER, and
3908 // PROT_VND_SUN_STAR_PKG misuse m_aHost:
3909 if (!getSchemeInfo().m_bHost)
3910 return OUString();
3911 OUStringBuffer aHostPort(decode(m_aHost, eMechanism, eCharset));
3912 if (m_aPort.isPresent())
3914 aHostPort.append(':');
3915 aHostPort.append(decode(m_aPort, eMechanism, eCharset));
3917 return aHostPort.makeStringAndClear();
3920 sal_uInt32 INetURLObject::GetPort() const
3922 if (m_aPort.isPresent())
3924 sal_Unicode const * p = m_aAbsURIRef.getStr() + m_aPort.getBegin();
3925 sal_Unicode const * pEnd = p + m_aPort.getLength();
3926 sal_uInt32 nThePort;
3927 if (INetMIME::scanUnsigned(p, pEnd, true, nThePort) && p == pEnd)
3928 return nThePort;
3930 return 0;
3933 bool INetURLObject::SetPort(sal_uInt32 nThePort)
3935 if (getSchemeInfo().m_bPort && m_aHost.isPresent())
3937 OUString aNewPort(OUString::number(nThePort));
3938 sal_Int32 nDelta;
3939 if (m_aPort.isPresent())
3940 nDelta = m_aPort.set(m_aAbsURIRef, aNewPort);
3941 else
3943 m_aAbsURIRef.insert(m_aHost.getEnd(), u':');
3944 nDelta = m_aPort.set(m_aAbsURIRef, aNewPort, m_aHost.getEnd() + 1)
3945 + 1;
3947 m_aPath += nDelta;
3948 m_aQuery += nDelta;
3949 m_aFragment += nDelta;
3950 return true;
3952 return false;
3955 sal_Int32 INetURLObject::getSegmentCount(bool bIgnoreFinalSlash) const
3957 if (!checkHierarchical())
3958 return 0;
3960 sal_Unicode const * p = m_aAbsURIRef.getStr() + m_aPath.getBegin();
3961 sal_Unicode const * pEnd = p + m_aPath.getLength();
3962 if (bIgnoreFinalSlash && pEnd > p && pEnd[-1] == '/')
3963 --pEnd;
3964 sal_Int32 n = p == pEnd || *p == '/' ? 0 : 1;
3965 while (p != pEnd)
3966 if (*p++ == '/')
3967 ++n;
3968 return n;
3971 bool INetURLObject::removeSegment(sal_Int32 nIndex, bool bIgnoreFinalSlash)
3973 SubString aSegment(getSegment(nIndex, bIgnoreFinalSlash));
3974 if (!aSegment.isPresent())
3975 return false;
3977 OUStringBuffer aNewPath;
3978 aNewPath.append(m_aAbsURIRef.getStr() + m_aPath.getBegin(),
3979 aSegment.getBegin() - m_aPath.getBegin());
3980 if (bIgnoreFinalSlash && aSegment.getEnd() == m_aPath.getEnd())
3981 aNewPath.append('/');
3982 else
3983 aNewPath.append(m_aAbsURIRef.getStr() + aSegment.getEnd(),
3984 m_aPath.getEnd() - aSegment.getEnd());
3985 if (aNewPath.isEmpty() && !aSegment.isEmpty() &&
3986 m_aAbsURIRef[aSegment.getBegin()] == '/')
3988 aNewPath.append('/');
3991 return setPath(aNewPath.makeStringAndClear(), EncodeMechanism::NotCanonical,
3992 RTL_TEXTENCODING_UTF8);
3995 OUString INetURLObject::getName(sal_Int32 nIndex, bool bIgnoreFinalSlash,
3996 DecodeMechanism eMechanism,
3997 rtl_TextEncoding eCharset) const
3999 SubString aSegment(getSegment(nIndex, bIgnoreFinalSlash));
4000 if (!aSegment.isPresent())
4001 return OUString();
4003 sal_Unicode const * pSegBegin
4004 = m_aAbsURIRef.getStr() + aSegment.getBegin();
4005 sal_Unicode const * pSegEnd = pSegBegin + aSegment.getLength();
4007 if (pSegBegin < pSegEnd && *pSegBegin == '/')
4008 ++pSegBegin;
4009 sal_Unicode const * p = pSegBegin;
4010 while (p != pSegEnd && *p != ';')
4011 ++p;
4013 return decode(pSegBegin, p, eMechanism, eCharset);
4016 bool INetURLObject::setName(OUString const & rTheName)
4018 SubString aSegment(getSegment(LAST_SEGMENT, true));
4019 if (!aSegment.isPresent())
4020 return false;
4022 sal_Unicode const * pPathBegin
4023 = m_aAbsURIRef.getStr() + m_aPath.getBegin();
4024 sal_Unicode const * pPathEnd = pPathBegin + m_aPath.getLength();
4025 sal_Unicode const * pSegBegin
4026 = m_aAbsURIRef.getStr() + aSegment.getBegin();
4027 sal_Unicode const * pSegEnd = pSegBegin + aSegment.getLength();
4029 if (pSegBegin < pSegEnd && *pSegBegin == '/')
4030 ++pSegBegin;
4031 sal_Unicode const * p = pSegBegin;
4032 while (p != pSegEnd && *p != ';')
4033 ++p;
4035 OUStringBuffer aNewPath;
4036 aNewPath.append(pPathBegin, pSegBegin - pPathBegin);
4037 aNewPath.append(encodeText(rTheName, PART_PCHAR,
4038 EncodeMechanism::WasEncoded, RTL_TEXTENCODING_UTF8, true));
4039 aNewPath.append(p, pPathEnd - p);
4041 return setPath(aNewPath.makeStringAndClear(), EncodeMechanism::NotCanonical,
4042 RTL_TEXTENCODING_UTF8);
4045 bool INetURLObject::hasExtension()
4046 const
4048 SubString aSegment(getSegment(LAST_SEGMENT, true/*bIgnoreFinalSlash*/));
4049 if (!aSegment.isPresent())
4050 return false;
4052 sal_Unicode const * pSegBegin
4053 = m_aAbsURIRef.getStr() + aSegment.getBegin();
4054 sal_Unicode const * pSegEnd = pSegBegin + aSegment.getLength();
4056 if (pSegBegin < pSegEnd && *pSegBegin == '/')
4057 ++pSegBegin;
4058 for (sal_Unicode const * p = pSegBegin; p != pSegEnd && *p != ';'; ++p)
4059 if (*p == '.' && p != pSegBegin)
4060 return true;
4061 return false;
4064 OUString INetURLObject::getBase(sal_Int32 nIndex, bool bIgnoreFinalSlash,
4065 DecodeMechanism eMechanism,
4066 rtl_TextEncoding eCharset) const
4068 SubString aSegment(getSegment(nIndex, bIgnoreFinalSlash));
4069 if (!aSegment.isPresent())
4070 return OUString();
4072 sal_Unicode const * pSegBegin
4073 = m_aAbsURIRef.getStr() + aSegment.getBegin();
4074 sal_Unicode const * pSegEnd = pSegBegin + aSegment.getLength();
4076 if (pSegBegin < pSegEnd && *pSegBegin == '/')
4077 ++pSegBegin;
4078 sal_Unicode const * pExtension = nullptr;
4079 sal_Unicode const * p = pSegBegin;
4080 for (; p != pSegEnd && *p != ';'; ++p)
4081 if (*p == '.' && p != pSegBegin)
4082 pExtension = p;
4083 if (!pExtension)
4084 pExtension = p;
4086 return decode(pSegBegin, pExtension, eMechanism, eCharset);
4089 bool INetURLObject::setBase(OUString const & rTheBase, sal_Int32 nIndex,
4090 EncodeMechanism eMechanism,
4091 rtl_TextEncoding eCharset)
4093 SubString aSegment(getSegment(nIndex, true/*bIgnoreFinalSlash*/));
4094 if (!aSegment.isPresent())
4095 return false;
4097 sal_Unicode const * pPathBegin
4098 = m_aAbsURIRef.getStr() + m_aPath.getBegin();
4099 sal_Unicode const * pPathEnd = pPathBegin + m_aPath.getLength();
4100 sal_Unicode const * pSegBegin
4101 = m_aAbsURIRef.getStr() + aSegment.getBegin();
4102 sal_Unicode const * pSegEnd = pSegBegin + aSegment.getLength();
4104 if (pSegBegin < pSegEnd && *pSegBegin == '/')
4105 ++pSegBegin;
4106 sal_Unicode const * pExtension = nullptr;
4107 sal_Unicode const * p = pSegBegin;
4108 for (; p != pSegEnd && *p != ';'; ++p)
4109 if (*p == '.' && p != pSegBegin)
4110 pExtension = p;
4111 if (!pExtension)
4112 pExtension = p;
4114 OUStringBuffer aNewPath;
4115 aNewPath.append(pPathBegin, pSegBegin - pPathBegin);
4116 aNewPath.append(encodeText(rTheBase, PART_PCHAR,
4117 eMechanism, eCharset, true));
4118 aNewPath.append(pExtension, pPathEnd - pExtension);
4120 return setPath(aNewPath.makeStringAndClear(), EncodeMechanism::NotCanonical,
4121 RTL_TEXTENCODING_UTF8);
4124 OUString INetURLObject::getExtension(sal_Int32 nIndex,
4125 bool bIgnoreFinalSlash,
4126 DecodeMechanism eMechanism,
4127 rtl_TextEncoding eCharset) const
4129 SubString aSegment(getSegment(nIndex, bIgnoreFinalSlash));
4130 if (!aSegment.isPresent())
4131 return OUString();
4133 sal_Unicode const * pSegBegin
4134 = m_aAbsURIRef.getStr() + aSegment.getBegin();
4135 sal_Unicode const * pSegEnd = pSegBegin + aSegment.getLength();
4137 if (pSegBegin < pSegEnd && *pSegBegin == '/')
4138 ++pSegBegin;
4139 sal_Unicode const * pExtension = nullptr;
4140 sal_Unicode const * p = pSegBegin;
4141 for (; p != pSegEnd && *p != ';'; ++p)
4142 if (*p == '.' && p != pSegBegin)
4143 pExtension = p;
4145 if (!pExtension)
4146 return OUString();
4148 return decode(pExtension + 1, p, eMechanism, eCharset);
4151 bool INetURLObject::setExtension(OUString const & rTheExtension,
4152 sal_Int32 nIndex, bool bIgnoreFinalSlash,
4153 rtl_TextEncoding eCharset)
4155 SubString aSegment(getSegment(nIndex, bIgnoreFinalSlash));
4156 if (!aSegment.isPresent())
4157 return false;
4159 sal_Unicode const * pPathBegin
4160 = m_aAbsURIRef.getStr() + m_aPath.getBegin();
4161 sal_Unicode const * pPathEnd = pPathBegin + m_aPath.getLength();
4162 sal_Unicode const * pSegBegin
4163 = m_aAbsURIRef.getStr() + aSegment.getBegin();
4164 sal_Unicode const * pSegEnd = pSegBegin + aSegment.getLength();
4166 if (pSegBegin < pSegEnd && *pSegBegin == '/')
4167 ++pSegBegin;
4168 sal_Unicode const * pExtension = nullptr;
4169 sal_Unicode const * p = pSegBegin;
4170 for (; p != pSegEnd && *p != ';'; ++p)
4171 if (*p == '.' && p != pSegBegin)
4172 pExtension = p;
4173 if (!pExtension)
4174 pExtension = p;
4176 OUStringBuffer aNewPath;
4177 aNewPath.append(pPathBegin, pExtension - pPathBegin);
4178 aNewPath.append('.');
4179 aNewPath.append(encodeText(rTheExtension, PART_PCHAR,
4180 EncodeMechanism::WasEncoded, eCharset, true));
4181 aNewPath.append(p, pPathEnd - p);
4183 return setPath(aNewPath.makeStringAndClear(), EncodeMechanism::NotCanonical,
4184 RTL_TEXTENCODING_UTF8);
4187 bool INetURLObject::removeExtension(sal_Int32 nIndex, bool bIgnoreFinalSlash)
4189 SubString aSegment(getSegment(nIndex, bIgnoreFinalSlash));
4190 if (!aSegment.isPresent())
4191 return false;
4193 sal_Unicode const * pPathBegin
4194 = m_aAbsURIRef.getStr() + m_aPath.getBegin();
4195 sal_Unicode const * pPathEnd = pPathBegin + m_aPath.getLength();
4196 sal_Unicode const * pSegBegin
4197 = m_aAbsURIRef.getStr() + aSegment.getBegin();
4198 sal_Unicode const * pSegEnd = pSegBegin + aSegment.getLength();
4200 if (pSegBegin < pSegEnd && *pSegBegin == '/')
4201 ++pSegBegin;
4202 sal_Unicode const * pExtension = nullptr;
4203 sal_Unicode const * p = pSegBegin;
4204 for (; p != pSegEnd && *p != ';'; ++p)
4205 if (*p == '.' && p != pSegBegin)
4206 pExtension = p;
4207 if (!pExtension)
4208 return true;
4210 OUStringBuffer aNewPath;
4211 aNewPath.append(pPathBegin, pExtension - pPathBegin);
4212 aNewPath.append(p, pPathEnd - p);
4214 return setPath(aNewPath.makeStringAndClear(), EncodeMechanism::NotCanonical,
4215 RTL_TEXTENCODING_UTF8);
4218 bool INetURLObject::hasFinalSlash() const
4220 if (!checkHierarchical())
4221 return false;
4223 sal_Unicode const * pPathBegin
4224 = m_aAbsURIRef.getStr() + m_aPath.getBegin();
4225 sal_Unicode const * pPathEnd = pPathBegin + m_aPath.getLength();
4226 return pPathEnd > pPathBegin && pPathEnd[-1] == '/';
4229 bool INetURLObject::setFinalSlash()
4231 if (!checkHierarchical())
4232 return false;
4234 sal_Unicode const * pPathBegin
4235 = m_aAbsURIRef.getStr() + m_aPath.getBegin();
4236 sal_Unicode const * pPathEnd = pPathBegin + m_aPath.getLength();
4237 if (pPathEnd > pPathBegin && pPathEnd[-1] == '/')
4238 return true;
4240 OUStringBuffer aNewPath;
4241 aNewPath.append(pPathBegin, pPathEnd - pPathBegin);
4242 aNewPath.append('/');
4244 return setPath(aNewPath.makeStringAndClear(), EncodeMechanism::NotCanonical,
4245 RTL_TEXTENCODING_UTF8);
4248 bool INetURLObject::removeFinalSlash()
4250 if (!checkHierarchical())
4251 return false;
4253 sal_Unicode const * pPathBegin
4254 = m_aAbsURIRef.getStr() + m_aPath.getBegin();
4255 sal_Unicode const * pPathEnd = pPathBegin + m_aPath.getLength();
4256 if (pPathEnd <= pPathBegin || pPathEnd[-1] != '/')
4257 return true;
4259 --pPathEnd;
4260 if (pPathEnd == pPathBegin && *pPathBegin == '/')
4261 return false;
4262 OUString aNewPath(pPathBegin, pPathEnd - pPathBegin);
4264 return setPath(aNewPath, EncodeMechanism::NotCanonical, RTL_TEXTENCODING_UTF8);
4267 bool INetURLObject::setFSysPath(OUString const & rFSysPath,
4268 FSysStyle eStyle)
4270 sal_Unicode const * pFSysBegin = rFSysPath.getStr();
4271 sal_Unicode const * pFSysEnd = pFSysBegin + rFSysPath.getLength();
4273 switch (((eStyle & FSysStyle::Vos) ? 1 : 0)
4274 + ((eStyle & FSysStyle::Unix) ? 1 : 0)
4275 + ((eStyle & FSysStyle::Dos) ? 1 : 0))
4277 case 0:
4278 return false;
4280 case 1:
4281 break;
4283 default:
4284 if (eStyle & FSysStyle::Vos
4285 && pFSysEnd - pFSysBegin >= 2
4286 && pFSysBegin[0] == '/'
4287 && pFSysBegin[1] == '/')
4289 if (pFSysEnd - pFSysBegin >= 3
4290 && pFSysBegin[2] == '.'
4291 && (pFSysEnd - pFSysBegin == 3 || pFSysBegin[3] == '/'))
4293 eStyle = FSysStyle::Vos; // Production T1
4294 break;
4297 sal_Unicode const * p = pFSysBegin + 2;
4298 OUString aHost;
4299 if (parseHost(p, pFSysEnd, aHost)
4300 && (p == pFSysEnd || *p == '/'))
4302 eStyle = FSysStyle::Vos; // Production T2
4303 break;
4307 if (eStyle & FSysStyle::Dos
4308 && pFSysEnd - pFSysBegin >= 2
4309 && pFSysBegin[0] == '\\'
4310 && pFSysBegin[1] == '\\')
4312 sal_Unicode const * p = pFSysBegin + 2;
4313 OUString aHost;
4314 if (parseHost(p, pFSysEnd, aHost)
4315 && (p == pFSysEnd || *p == '\\'))
4317 eStyle = FSysStyle::Dos; // Production T3
4318 break;
4322 if (eStyle & FSysStyle::Dos
4323 && pFSysEnd - pFSysBegin >= 2
4324 && rtl::isAsciiAlpha(pFSysBegin[0])
4325 && pFSysBegin[1] == ':'
4326 && (pFSysEnd - pFSysBegin == 2
4327 || pFSysBegin[2] == '/'
4328 || pFSysBegin[2] == '\\'))
4330 eStyle = FSysStyle::Dos; // Productions T4, T5
4331 break;
4334 if (!(eStyle & (FSysStyle::Unix | FSysStyle::Dos)))
4335 return false;
4337 eStyle = guessFSysStyleByCounting(pFSysBegin, pFSysEnd, eStyle);
4338 // Production T6
4339 break;
4342 OUStringBuffer aSynAbsURIRef("file://");
4344 switch (eStyle)
4346 case FSysStyle::Vos:
4348 sal_Unicode const * p = pFSysBegin;
4349 if (pFSysEnd - p < 2 || *p++ != '/' || *p++ != '/')
4350 return false;
4351 if (p != pFSysEnd && *p == '.'
4352 && (pFSysEnd - p == 1 || p[1] == '/'))
4353 ++p;
4354 for (; p != pFSysEnd; ++p)
4355 switch (*p)
4357 case '#':
4358 case '%':
4359 appendEscape(aSynAbsURIRef, *p);
4360 break;
4362 default:
4363 aSynAbsURIRef.append(*p);
4364 break;
4366 break;
4369 case FSysStyle::Unix:
4371 sal_Unicode const * p = pFSysBegin;
4372 if (p != pFSysEnd && *p != '/')
4373 return false;
4374 for (; p != pFSysEnd; ++p)
4375 switch (*p)
4377 case '|':
4378 case '#':
4379 case '%':
4380 appendEscape(aSynAbsURIRef, *p);
4381 break;
4383 default:
4384 aSynAbsURIRef.append(*p);
4385 break;
4387 break;
4390 case FSysStyle::Dos:
4392 sal_uInt32 nAltDelimiter = 0x80000000;
4393 sal_Unicode const * p = pFSysBegin;
4394 if (pFSysEnd - p >= 3 && p[0] == '\\' && p[1] == '\\')
4395 p += 2;
4396 else
4398 aSynAbsURIRef.append('/');
4399 if (pFSysEnd - p >= 2
4400 && rtl::isAsciiAlpha(p[0])
4401 && p[1] == ':'
4402 && (pFSysEnd - p == 2 || p[2] == '\\' || p[2] == '/'))
4403 nAltDelimiter = '/';
4405 for (; p != pFSysEnd; ++p)
4406 if (*p == '\\' || *p == nAltDelimiter)
4407 aSynAbsURIRef.append('/');
4408 else
4409 switch (*p)
4411 case '/':
4412 case '#':
4413 case '%':
4414 appendEscape(aSynAbsURIRef, *p);
4415 break;
4417 default:
4418 aSynAbsURIRef.append(*p);
4419 break;
4421 break;
4424 default:
4425 OSL_ASSERT(false);
4426 break;
4429 INetURLObject aTemp(aSynAbsURIRef.makeStringAndClear(), EncodeMechanism::WasEncoded,
4430 RTL_TEXTENCODING_UTF8);
4431 if (aTemp.HasError())
4432 return false;
4434 *this = aTemp;
4435 return true;
4438 OUString INetURLObject::getFSysPath(FSysStyle eStyle,
4439 sal_Unicode * pDelimiter) const
4441 if (m_eScheme != INetProtocol::File)
4442 return OUString();
4444 if (((eStyle & FSysStyle::Vos) ? 1 : 0)
4445 + ((eStyle & FSysStyle::Unix) ? 1 : 0)
4446 + ((eStyle & FSysStyle::Dos) ? 1 : 0)
4447 > 1)
4449 if(eStyle & FSysStyle::Vos && m_aHost.isPresent() && m_aHost.getLength() > 0)
4451 eStyle= FSysStyle::Vos;
4453 else
4455 if(hasDosVolume(eStyle) || ((eStyle & FSysStyle::Dos) && m_aHost.isPresent() && m_aHost.getLength() > 0))
4457 eStyle = FSysStyle::Dos;
4459 else
4461 if(eStyle & FSysStyle::Unix && (!m_aHost.isPresent() || m_aHost.getLength() == 0))
4463 eStyle = FSysStyle::Unix;
4465 else
4467 eStyle= FSysStyle(0);
4473 switch (eStyle)
4475 case FSysStyle::Vos:
4477 if (pDelimiter)
4478 *pDelimiter = '/';
4480 OUStringBuffer aSynFSysPath;
4481 aSynFSysPath.append("//");
4482 if (m_aHost.isPresent() && m_aHost.getLength() > 0)
4483 aSynFSysPath.append(decode(m_aHost, DecodeMechanism::WithCharset,
4484 RTL_TEXTENCODING_UTF8));
4485 else
4486 aSynFSysPath.append('.');
4487 aSynFSysPath.append(decode(m_aPath, DecodeMechanism::WithCharset,
4488 RTL_TEXTENCODING_UTF8));
4489 return aSynFSysPath.makeStringAndClear();
4492 case FSysStyle::Unix:
4494 if (m_aHost.isPresent() && m_aHost.getLength() > 0)
4495 return OUString();
4497 if (pDelimiter)
4498 *pDelimiter = '/';
4500 return decode(m_aPath, DecodeMechanism::WithCharset, RTL_TEXTENCODING_UTF8);
4503 case FSysStyle::Dos:
4505 if (pDelimiter)
4506 *pDelimiter = '\\';
4508 OUStringBuffer aSynFSysPath;
4509 if (m_aHost.isPresent() && m_aHost.getLength() > 0)
4511 aSynFSysPath.append("\\\\");
4512 aSynFSysPath.append(decode(m_aHost, DecodeMechanism::WithCharset,
4513 RTL_TEXTENCODING_UTF8));
4514 aSynFSysPath.append('\\');
4516 sal_Unicode const * p
4517 = m_aAbsURIRef.getStr() + m_aPath.getBegin();
4518 sal_Unicode const * pEnd = p + m_aPath.getLength();
4519 DBG_ASSERT(p < pEnd && *p == '/',
4520 "INetURLObject::getFSysPath(): Bad path");
4521 ++p;
4522 while (p < pEnd)
4524 EscapeType eEscapeType;
4525 sal_uInt32 nUTF32 = getUTF32(p, pEnd, EncodeMechanism::WasEncoded,
4526 RTL_TEXTENCODING_UTF8,
4527 eEscapeType);
4528 if (eEscapeType == EscapeType::NONE && nUTF32 == '/')
4529 aSynFSysPath.append('\\');
4530 else
4531 aSynFSysPath.appendUtf32(nUTF32);
4533 return aSynFSysPath.makeStringAndClear();
4536 default:
4537 return OUString();
4541 // static
4542 void INetURLObject::appendUCS4Escape(OUStringBuffer & rTheText,
4543 sal_uInt32 nUCS4)
4545 DBG_ASSERT(nUCS4 < 0x80000000,
4546 "INetURLObject::appendUCS4Escape(): Bad char");
4547 if (nUCS4 < 0x80)
4548 appendEscape(rTheText, nUCS4);
4549 else if (nUCS4 < 0x800)
4551 appendEscape(rTheText, nUCS4 >> 6 | 0xC0);
4552 appendEscape(rTheText, (nUCS4 & 0x3F) | 0x80);
4554 else if (nUCS4 < 0x10000)
4556 appendEscape(rTheText, nUCS4 >> 12 | 0xE0);
4557 appendEscape(rTheText, (nUCS4 >> 6 & 0x3F) | 0x80);
4558 appendEscape(rTheText, (nUCS4 & 0x3F) | 0x80);
4560 else if (nUCS4 < 0x200000)
4562 appendEscape(rTheText, nUCS4 >> 18 | 0xF0);
4563 appendEscape(rTheText, (nUCS4 >> 12 & 0x3F) | 0x80);
4564 appendEscape(rTheText, (nUCS4 >> 6 & 0x3F) | 0x80);
4565 appendEscape(rTheText, (nUCS4 & 0x3F) | 0x80);
4567 else if (nUCS4 < 0x4000000)
4569 appendEscape(rTheText, nUCS4 >> 24 | 0xF8);
4570 appendEscape(rTheText, (nUCS4 >> 18 & 0x3F) | 0x80);
4571 appendEscape(rTheText, (nUCS4 >> 12 & 0x3F) | 0x80);
4572 appendEscape(rTheText, (nUCS4 >> 6 & 0x3F) | 0x80);
4573 appendEscape(rTheText, (nUCS4 & 0x3F) | 0x80);
4575 else
4577 appendEscape(rTheText, nUCS4 >> 30 | 0xFC);
4578 appendEscape(rTheText, (nUCS4 >> 24 & 0x3F) | 0x80);
4579 appendEscape(rTheText, (nUCS4 >> 18 & 0x3F) | 0x80);
4580 appendEscape(rTheText, (nUCS4 >> 12 & 0x3F) | 0x80);
4581 appendEscape(rTheText, (nUCS4 >> 6 & 0x3F) | 0x80);
4582 appendEscape(rTheText, (nUCS4 & 0x3F) | 0x80);
4586 // static
4587 void INetURLObject::appendUCS4(OUStringBuffer& rTheText, sal_uInt32 nUCS4,
4588 EscapeType eEscapeType,
4589 Part ePart, rtl_TextEncoding eCharset,
4590 bool bKeepVisibleEscapes)
4592 bool bEscape;
4593 rtl_TextEncoding eTargetCharset = RTL_TEXTENCODING_DONTKNOW;
4594 switch (eEscapeType)
4596 case EscapeType::NONE:
4597 if (mustEncode(nUCS4, ePart))
4599 bEscape = true;
4600 eTargetCharset = RTL_TEXTENCODING_UTF8;
4602 else
4603 bEscape = false;
4604 break;
4606 case EscapeType::Octet:
4607 bEscape = true;
4608 eTargetCharset = RTL_TEXTENCODING_ISO_8859_1;
4609 break;
4611 case EscapeType::Utf32:
4612 if (mustEncode(nUCS4, ePart))
4614 bEscape = true;
4615 eTargetCharset = eCharset;
4617 else if (bKeepVisibleEscapes && INetMIME::isVisible(nUCS4))
4619 bEscape = true;
4620 eTargetCharset = RTL_TEXTENCODING_ASCII_US;
4622 else
4623 bEscape = false;
4624 break;
4625 default:
4626 bEscape = false;
4629 if (bEscape)
4631 switch (eTargetCharset)
4633 default:
4634 OSL_FAIL("INetURLObject::appendUCS4(): Unsupported charset");
4635 SAL_FALLTHROUGH;
4636 case RTL_TEXTENCODING_ASCII_US:
4637 case RTL_TEXTENCODING_ISO_8859_1:
4638 appendEscape(rTheText, nUCS4);
4639 break;
4640 case RTL_TEXTENCODING_UTF8:
4641 appendUCS4Escape(rTheText, nUCS4);
4642 break;
4645 else
4646 rTheText.append(sal_Unicode(nUCS4));
4649 // static
4650 sal_uInt32 INetURLObject::getUTF32(sal_Unicode const *& rBegin,
4651 sal_Unicode const * pEnd,
4652 EncodeMechanism eMechanism,
4653 rtl_TextEncoding eCharset,
4654 EscapeType & rEscapeType)
4656 DBG_ASSERT(rBegin < pEnd, "INetURLObject::getUTF32(): Bad sequence");
4657 sal_uInt32 nUTF32 = INetMIME::getUTF32Character(rBegin, pEnd);
4658 switch (eMechanism)
4660 case EncodeMechanism::All:
4661 rEscapeType = EscapeType::NONE;
4662 break;
4664 case EncodeMechanism::WasEncoded:
4666 int nWeight1;
4667 int nWeight2;
4668 if (nUTF32 == static_cast<unsigned char>('%') && rBegin + 1 < pEnd
4669 && (nWeight1 = INetMIME::getHexWeight(rBegin[0])) >= 0
4670 && (nWeight2 = INetMIME::getHexWeight(rBegin[1])) >= 0)
4672 rBegin += 2;
4673 nUTF32 = nWeight1 << 4 | nWeight2;
4674 switch (eCharset)
4676 default:
4677 OSL_FAIL(
4678 "INetURLObject::getUTF32(): Unsupported charset");
4679 SAL_FALLTHROUGH;
4680 case RTL_TEXTENCODING_ASCII_US:
4681 rEscapeType = rtl::isAscii(nUTF32) ?
4682 EscapeType::Utf32 : EscapeType::Octet;
4683 break;
4685 case RTL_TEXTENCODING_ISO_8859_1:
4686 rEscapeType = EscapeType::Utf32;
4687 break;
4689 case RTL_TEXTENCODING_UTF8:
4690 if (rtl::isAscii(nUTF32))
4691 rEscapeType = EscapeType::Utf32;
4692 else
4694 if (nUTF32 >= 0xC0 && nUTF32 <= 0xF4)
4696 sal_uInt32 nEncoded;
4697 int nShift;
4698 sal_uInt32 nMin;
4699 if (nUTF32 <= 0xDF)
4701 nEncoded = (nUTF32 & 0x1F) << 6;
4702 nShift = 0;
4703 nMin = 0x80;
4705 else if (nUTF32 <= 0xEF)
4707 nEncoded = (nUTF32 & 0x0F) << 12;
4708 nShift = 6;
4709 nMin = 0x800;
4711 else
4713 nEncoded = (nUTF32 & 0x07) << 18;
4714 nShift = 12;
4715 nMin = 0x10000;
4717 sal_Unicode const * p = rBegin;
4718 bool bUTF8 = true;
4719 for (;;)
4721 if (pEnd - p < 3
4722 || p[0] != '%'
4723 || (nWeight1
4724 = INetMIME::getHexWeight(p[1]))
4726 || nWeight1 > 11
4727 || (nWeight2
4728 = INetMIME::getHexWeight(p[2]))
4729 < 0)
4731 bUTF8 = false;
4732 break;
4734 p += 3;
4735 nEncoded
4736 |= ((nWeight1 & 3) << 4 | nWeight2)
4737 << nShift;
4738 if (nShift == 0)
4739 break;
4740 nShift -= 6;
4742 if (bUTF8 && rtl::isUnicodeScalarValue(nEncoded)
4743 && nEncoded >= nMin)
4745 rBegin = p;
4746 nUTF32 = nEncoded;
4747 rEscapeType = EscapeType::Utf32;
4748 break;
4751 rEscapeType = EscapeType::Octet;
4753 break;
4756 else
4757 rEscapeType = EscapeType::NONE;
4758 break;
4761 case EncodeMechanism::NotCanonical:
4763 int nWeight1;
4764 int nWeight2;
4765 if (nUTF32 == static_cast<unsigned char>('%') && rBegin + 1 < pEnd
4766 && ((nWeight1 = INetMIME::getHexWeight(rBegin[0])) >= 0)
4767 && ((nWeight2 = INetMIME::getHexWeight(rBegin[1])) >= 0))
4769 rBegin += 2;
4770 nUTF32 = nWeight1 << 4 | nWeight2;
4771 rEscapeType = EscapeType::Octet;
4773 else
4774 rEscapeType = EscapeType::NONE;
4775 break;
4778 return nUTF32;
4781 // static
4782 sal_uInt32 INetURLObject::scanDomain(sal_Unicode const *& rBegin,
4783 sal_Unicode const * pEnd,
4784 bool bEager)
4786 enum State { STATE_DOT, STATE_LABEL, STATE_HYPHEN };
4787 State eState = STATE_DOT;
4788 sal_Int32 nLabels = 0;
4789 sal_Unicode const * pLastAlphanumeric = nullptr;
4790 for (sal_Unicode const * p = rBegin;; ++p)
4791 switch (eState)
4793 case STATE_DOT:
4794 if (p != pEnd && (rtl::isAsciiAlphanumeric(*p) || *p == '_'))
4796 ++nLabels;
4797 eState = STATE_LABEL;
4798 break;
4800 if (bEager || nLabels == 0)
4801 return 0;
4802 rBegin = p - 1;
4803 return nLabels;
4805 case STATE_LABEL:
4806 if (p != pEnd)
4808 if (rtl::isAsciiAlphanumeric(*p) || *p == '_')
4809 break;
4810 else if (*p == '.')
4812 eState = STATE_DOT;
4813 break;
4815 else if (*p == '-')
4817 pLastAlphanumeric = p;
4818 eState = STATE_HYPHEN;
4819 break;
4822 rBegin = p;
4823 return nLabels;
4825 case STATE_HYPHEN:
4826 if (p != pEnd)
4828 if (rtl::isAsciiAlphanumeric(*p) || *p == '_')
4830 eState = STATE_LABEL;
4831 break;
4833 else if (*p == '-')
4834 break;
4836 if (bEager)
4837 return 0;
4838 rBegin = pLastAlphanumeric;
4839 return nLabels;
4843 // static
4844 bool INetURLObject::scanIPv6reference(sal_Unicode const *& rBegin,
4845 sal_Unicode const * pEnd)
4847 if (rBegin != pEnd && *rBegin == '[') {
4848 sal_Unicode const * p = rBegin + 1;
4849 //TODO: check for valid IPv6address (RFC 2373):
4850 while (p != pEnd && (rtl::isAsciiHexDigit(*p) || *p == ':' || *p == '.'))
4852 ++p;
4854 if (p != pEnd && *p == ']') {
4855 rBegin = p + 1;
4856 return true;
4859 return false;
4862 OUString INetURLObject::GetPartBeforeLastName()
4863 const
4865 if (!checkHierarchical())
4866 return OUString();
4867 INetURLObject aTemp(*this);
4868 aTemp.clearFragment();
4869 aTemp.clearQuery();
4870 aTemp.removeSegment(LAST_SEGMENT, false);
4871 aTemp.setFinalSlash();
4872 return aTemp.GetMainURL(DecodeMechanism::ToIUri);
4875 OUString INetURLObject::GetLastName(DecodeMechanism eMechanism,
4876 rtl_TextEncoding eCharset) const
4878 return getName(LAST_SEGMENT, true, eMechanism, eCharset);
4881 OUString INetURLObject::GetFileExtension() const
4883 return getExtension(LAST_SEGMENT, false);
4886 void INetURLObject::CutLastName()
4888 INetURLObject aTemp(*this);
4889 aTemp.clearFragment();
4890 aTemp.clearQuery();
4891 if (!aTemp.removeSegment(LAST_SEGMENT, false))
4892 return;
4893 *this = aTemp;
4896 OUString INetURLObject::PathToFileName() const
4898 if (m_eScheme != INetProtocol::File)
4899 return OUString();
4900 OUString aSystemPath;
4901 if (osl::FileBase::getSystemPathFromFileURL(
4902 decode(m_aAbsURIRef.getStr(),
4903 m_aAbsURIRef.getStr() + m_aPath.getEnd(),
4904 DecodeMechanism::NONE, RTL_TEXTENCODING_UTF8),
4905 aSystemPath)
4906 != osl::FileBase::E_None)
4907 return OUString();
4908 return aSystemPath;
4911 OUString INetURLObject::GetFull() const
4913 INetURLObject aTemp(*this);
4914 aTemp.removeFinalSlash();
4915 return aTemp.PathToFileName();
4918 OUString INetURLObject::GetPath() const
4920 INetURLObject aTemp(*this);
4921 aTemp.removeSegment();
4922 aTemp.removeFinalSlash();
4923 return aTemp.PathToFileName();
4926 void INetURLObject::SetBase(OUString const & rTheBase)
4928 setBase(rTheBase, LAST_SEGMENT, EncodeMechanism::All);
4931 OUString INetURLObject::GetBase() const
4933 return getBase(LAST_SEGMENT, true, DecodeMechanism::WithCharset);
4936 void INetURLObject::SetName(OUString const & rTheName,
4937 EncodeMechanism eMechanism,
4938 rtl_TextEncoding eCharset)
4940 INetURLObject aTemp(*this);
4941 if (aTemp.removeSegment()
4942 && aTemp.insertName(rTheName, false, LAST_SEGMENT, eMechanism,
4943 eCharset))
4944 *this = aTemp;
4947 void INetURLObject::SetExtension(OUString const & rTheExtension)
4949 setExtension(rTheExtension, LAST_SEGMENT, false);
4952 OUString INetURLObject::CutExtension()
4954 OUString aTheExtension(getExtension(LAST_SEGMENT, false));
4955 return removeExtension(LAST_SEGMENT, false)
4956 ? aTheExtension : OUString();
4959 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */