Add 'reference' to the iterator_traits, needed by LegacyIterator reqs
[LibreOffice.git] / svl / source / misc / adrparse.cxx
bloba0cdcfdfc022c086b96f79a8ad88e45998345caa
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
20 #include <rtl/ustrbuf.hxx>
21 #include <svl/adrparse.hxx>
23 namespace
26 enum ElementType { ELEMENT_START, ELEMENT_DELIM, ELEMENT_ITEM, ELEMENT_END };
28 struct ParsedAddrSpec
30 sal_Unicode const * m_pBegin;
31 sal_Unicode const * m_pEnd;
32 ElementType m_eLastElem;
33 bool m_bAtFound;
34 bool m_bReparse;
36 ParsedAddrSpec() { reset(); }
38 bool isPoorlyValid() const { return m_eLastElem >= ELEMENT_ITEM; }
40 bool isValid() const { return isPoorlyValid() && m_bAtFound; }
42 void reset();
44 void finish();
47 void ParsedAddrSpec::reset()
49 m_pBegin = nullptr;
50 m_pEnd = nullptr;
51 m_eLastElem = ELEMENT_START;
52 m_bAtFound = false;
53 m_bReparse = false;
56 void ParsedAddrSpec::finish()
58 if (isPoorlyValid())
59 m_eLastElem = ELEMENT_END;
60 else
61 reset();
66 class SvAddressParser_Impl
68 enum State { BEFORE_COLON, BEFORE_LESS, AFTER_LESS, AFTER_GREATER };
70 enum TokenType: sal_uInt32 {
71 TOKEN_QUOTED = 0x80000000, TOKEN_DOMAIN, TOKEN_COMMENT, TOKEN_ATOM };
73 sal_Unicode const * m_pInputPos;
74 sal_Unicode const * m_pInputEnd;
75 sal_uInt32 m_nCurToken;
76 sal_Unicode const * m_pCurTokenBegin;
77 sal_Unicode const * m_pCurTokenEnd;
78 ParsedAddrSpec m_aOuterAddrSpec;
79 ParsedAddrSpec m_aInnerAddrSpec;
80 ParsedAddrSpec * m_pAddrSpec;
81 State m_eState;
82 TokenType m_eType;
84 inline void reset();
86 void addTokenToAddrSpec(ElementType eTokenElem);
88 bool readToken();
90 static OUString reparse(sal_Unicode const * pBegin,
91 sal_Unicode const * pEnd, bool bAddrSpec);
93 public:
94 SvAddressParser_Impl(SvAddressParser * pParser, const OUString& rIn);
97 inline void SvAddressParser_Impl::reset()
99 m_aOuterAddrSpec.reset();
100 m_aInnerAddrSpec.reset();
101 m_pAddrSpec = &m_aOuterAddrSpec;
102 m_eState = BEFORE_COLON;
103 m_eType = TOKEN_ATOM;
106 void SvAddressParser_Impl::addTokenToAddrSpec(ElementType eTokenElem)
108 if (!m_pAddrSpec->m_pBegin)
109 m_pAddrSpec->m_pBegin = m_pCurTokenBegin;
110 else if (m_pAddrSpec->m_pEnd < m_pCurTokenBegin)
111 m_pAddrSpec->m_bReparse = true;
112 m_pAddrSpec->m_pEnd = m_pCurTokenEnd;
113 m_pAddrSpec->m_eLastElem = eTokenElem;
117 // SvAddressParser_Impl
120 bool SvAddressParser_Impl::readToken()
122 m_nCurToken = m_eType;
123 switch (m_eType)
125 case TOKEN_QUOTED:
127 m_pCurTokenBegin = m_pInputPos - 1;
128 bool bEscaped = false;
129 for (;;)
131 if (m_pInputPos >= m_pInputEnd)
132 return false;
133 sal_Unicode cChar = *m_pInputPos++;
134 if (bEscaped)
136 bEscaped = false;
138 else if (cChar == '"')
140 m_pCurTokenEnd = m_pInputPos;
141 return true;
143 else if (cChar == '\\')
144 bEscaped = true;
148 case TOKEN_DOMAIN:
150 m_pCurTokenBegin = m_pInputPos - 1;
151 bool bEscaped = false;
152 for (;;)
154 if (m_pInputPos >= m_pInputEnd)
155 return false;
156 sal_Unicode cChar = *m_pInputPos++;
157 if (bEscaped)
158 bEscaped = false;
159 else if (cChar == ']')
161 m_pCurTokenEnd = m_pInputPos;
162 return true;
164 else if (cChar == '\\')
165 bEscaped = true;
169 case TOKEN_COMMENT:
171 m_pCurTokenBegin = m_pInputPos - 1;
172 bool bEscaped = false;
173 int nLevel = 0;
174 for (;;)
176 if (m_pInputPos >= m_pInputEnd)
177 return false;
178 sal_Unicode cChar = *m_pInputPos++;
179 if (bEscaped)
181 bEscaped = false;
183 else if (cChar == '(')
185 ++nLevel;
187 else if (cChar == ')')
188 if (nLevel)
190 --nLevel;
192 else
193 return true;
194 else if (cChar == '\\')
196 bEscaped = true;
201 default:
203 sal_Unicode cChar;
204 for (;;)
206 if (m_pInputPos >= m_pInputEnd)
207 return false;
208 cChar = *m_pInputPos++;
209 if (cChar > ' ' && cChar != 0x7F) // DEL
210 break;
212 m_pCurTokenBegin = m_pInputPos - 1;
213 if (cChar == '"' || cChar == '(' || cChar == ')' || cChar == ','
214 || cChar == '.' || cChar == ':' || cChar == ';'
215 || cChar == '<' || cChar == '>' || cChar == '@'
216 || cChar == '[' || cChar == '\\' || cChar == ']')
218 m_nCurToken = cChar;
219 m_pCurTokenEnd = m_pInputPos;
220 return true;
222 else
223 for (;;)
225 if (m_pInputPos >= m_pInputEnd)
227 m_pCurTokenEnd = m_pInputPos;
228 return true;
230 cChar = *m_pInputPos++;
231 if (cChar <= ' ' || cChar == '"' || cChar == '('
232 || cChar == ')' || cChar == ',' || cChar == '.'
233 || cChar == ':' || cChar == ';' || cChar == '<'
234 || cChar == '>' || cChar == '@' || cChar == '['
235 || cChar == '\\' || cChar == ']'
236 || cChar == 0x7F) // DEL
238 m_pCurTokenEnd = --m_pInputPos;
239 return true;
246 // static
247 OUString SvAddressParser_Impl::reparse(sal_Unicode const * pBegin,
248 sal_Unicode const * pEnd, bool bAddrSpec)
250 OUStringBuffer aResult;
251 TokenType eMode = TOKEN_ATOM;
252 bool bEscaped = false;
253 bool bEndsWithSpace = false;
254 int nLevel = 0;
255 while (pBegin < pEnd)
257 sal_Unicode cChar = *pBegin++;
258 switch (eMode)
260 case TOKEN_QUOTED:
261 if (bEscaped)
263 aResult.append(cChar);
264 bEscaped = false;
266 else if (cChar == '"')
268 if (bAddrSpec)
269 aResult.append(cChar);
270 eMode = TOKEN_ATOM;
272 else if (cChar == '\\')
274 if (bAddrSpec)
275 aResult.append(cChar);
276 bEscaped = true;
278 else
279 aResult.append(cChar);
280 break;
282 case TOKEN_DOMAIN:
283 if (bEscaped)
285 aResult.append(cChar);
286 bEscaped = false;
288 else if (cChar == ']')
290 aResult.append(cChar);
291 eMode = TOKEN_ATOM;
293 else if (cChar == '\\')
295 if (bAddrSpec)
296 aResult.append(cChar);
297 bEscaped = true;
299 else
300 aResult.append(cChar);
301 break;
303 case TOKEN_COMMENT:
304 if (bEscaped)
305 bEscaped = false;
306 else if (cChar == '(')
307 ++nLevel;
308 else if (cChar == ')')
309 if (nLevel)
310 --nLevel;
311 else
312 eMode = TOKEN_ATOM;
313 else if (cChar == '\\')
314 bEscaped = true;
315 break;
317 case TOKEN_ATOM:
318 if (cChar <= ' ' || cChar == 0x7F) // DEL
320 if (!bAddrSpec && !bEndsWithSpace)
322 aResult.append(' ');
323 bEndsWithSpace = true;
326 else if (cChar == '(')
328 if (!bAddrSpec && !bEndsWithSpace)
330 aResult.append(' ');
331 bEndsWithSpace = true;
333 eMode = TOKEN_COMMENT;
335 else
337 bEndsWithSpace = false;
338 if (cChar == '"')
340 if (bAddrSpec)
341 aResult.append(cChar);
342 eMode = TOKEN_QUOTED;
344 else if (cChar == '[')
346 aResult.append(cChar);
347 eMode = TOKEN_QUOTED;
349 else
350 aResult.append(cChar);
352 break;
355 return aResult.makeStringAndClear();
358 SvAddressParser_Impl::SvAddressParser_Impl(SvAddressParser * pParser,
359 const OUString& rInput)
360 : m_pCurTokenBegin(nullptr)
361 , m_pCurTokenEnd(nullptr)
363 m_pInputPos = rInput.getStr();
364 m_pInputEnd = m_pInputPos + rInput.getLength();
366 reset();
367 bool bDone = false;
368 for (;;)
370 if (!readToken())
372 if (m_eState == AFTER_LESS)
373 m_nCurToken = '>';
374 else
376 m_nCurToken = ',';
377 bDone = true;
380 switch (m_nCurToken)
382 case TOKEN_QUOTED:
383 if (m_pAddrSpec->m_eLastElem != ELEMENT_END)
385 if (m_pAddrSpec->m_bAtFound
386 || m_pAddrSpec->m_eLastElem <= ELEMENT_DELIM)
387 m_pAddrSpec->reset();
388 addTokenToAddrSpec(ELEMENT_ITEM);
390 m_eType = TOKEN_ATOM;
391 break;
393 case TOKEN_DOMAIN:
394 if (m_pAddrSpec->m_eLastElem != ELEMENT_END)
396 if (m_pAddrSpec->m_bAtFound && m_pAddrSpec->m_eLastElem == ELEMENT_DELIM)
397 addTokenToAddrSpec(ELEMENT_ITEM);
398 else
399 m_pAddrSpec->reset();
401 m_eType = TOKEN_ATOM;
402 break;
404 case TOKEN_COMMENT:
405 m_eType = TOKEN_ATOM;
406 break;
408 case TOKEN_ATOM:
409 if (m_pAddrSpec->m_eLastElem != ELEMENT_END)
411 if (m_pAddrSpec->m_eLastElem != ELEMENT_DELIM)
412 m_pAddrSpec->reset();
413 addTokenToAddrSpec(ELEMENT_ITEM);
415 break;
417 case '(':
418 m_eType = TOKEN_COMMENT;
419 break;
421 case ')':
422 case '\\':
423 case ']':
424 m_pAddrSpec->finish();
425 break;
427 case '<':
428 switch (m_eState)
430 case BEFORE_COLON:
431 case BEFORE_LESS:
432 m_aOuterAddrSpec.finish();
433 m_pAddrSpec = &m_aInnerAddrSpec;
434 m_eState = AFTER_LESS;
435 break;
437 case AFTER_LESS:
438 m_aInnerAddrSpec.finish();
439 break;
441 case AFTER_GREATER:
442 m_aOuterAddrSpec.finish();
443 break;
445 break;
447 case '>':
448 if (m_eState == AFTER_LESS)
450 m_aInnerAddrSpec.finish();
451 if (m_aInnerAddrSpec.isValid())
452 m_aOuterAddrSpec.m_eLastElem = ELEMENT_END;
453 m_pAddrSpec = &m_aOuterAddrSpec;
454 m_eState = AFTER_GREATER;
456 else
458 m_aOuterAddrSpec.finish();
460 break;
462 case '@':
463 if (m_pAddrSpec->m_eLastElem != ELEMENT_END)
465 if (!m_pAddrSpec->m_bAtFound
466 && m_pAddrSpec->m_eLastElem == ELEMENT_ITEM)
468 addTokenToAddrSpec(ELEMENT_DELIM);
469 m_pAddrSpec->m_bAtFound = true;
471 else
472 m_pAddrSpec->reset();
474 break;
476 case ',':
477 case ';':
478 if (m_eState == AFTER_LESS)
479 if (m_nCurToken == ',')
481 if (m_aInnerAddrSpec.m_eLastElem != ELEMENT_END)
482 m_aInnerAddrSpec.reset();
484 else
485 m_aInnerAddrSpec.finish();
486 else
488 if(m_aInnerAddrSpec.isValid() || (!m_aOuterAddrSpec.isValid() && m_aInnerAddrSpec.isPoorlyValid()))
490 m_pAddrSpec = &m_aInnerAddrSpec;
492 else if(m_aOuterAddrSpec.isPoorlyValid())
494 m_pAddrSpec = &m_aOuterAddrSpec;
496 else
498 m_pAddrSpec = nullptr;
501 if (m_pAddrSpec)
503 OUString aTheAddrSpec;
504 if (m_pAddrSpec->m_bReparse)
505 aTheAddrSpec = reparse(m_pAddrSpec->m_pBegin, m_pAddrSpec->m_pEnd, true);
506 else
508 sal_Int32 nLen = m_pAddrSpec->m_pEnd - m_pAddrSpec->m_pBegin;
509 if (nLen == rInput.getLength())
510 aTheAddrSpec = rInput;
511 else
512 aTheAddrSpec = rInput.copy( (m_pAddrSpec->m_pBegin - rInput.getStr()),
513 nLen);
515 pParser->m_vAddresses.emplace_back( aTheAddrSpec );
517 if (bDone)
518 return;
519 reset();
521 break;
523 case ':':
524 switch (m_eState)
526 case BEFORE_COLON:
527 m_aOuterAddrSpec.reset();
528 m_eState = BEFORE_LESS;
529 break;
531 case BEFORE_LESS:
532 case AFTER_GREATER:
533 m_aOuterAddrSpec.finish();
534 break;
536 case AFTER_LESS:
537 m_aInnerAddrSpec.reset();
538 break;
540 break;
542 case '"':
543 m_eType = TOKEN_QUOTED;
544 break;
546 case '.':
547 if (m_pAddrSpec->m_eLastElem != ELEMENT_END)
549 if (m_pAddrSpec->m_eLastElem != ELEMENT_DELIM)
550 addTokenToAddrSpec(ELEMENT_DELIM);
551 else
552 m_pAddrSpec->reset();
554 break;
556 case '[':
557 m_eType = TOKEN_DOMAIN;
558 break;
563 SvAddressParser::SvAddressParser(const OUString& rInput)
565 SvAddressParser_Impl aDoParse(this, rInput);
568 SvAddressParser::~SvAddressParser()
572 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */