testsuite/52641 - Require int32 for gcc.dg/pr93820-2.c.
[official-gcc.git] / gcc / rust / util / rust-punycode.cc
blob89476f2cc82fac7ade231f5ed827ad5cd60c1584
1 // Copyright (C) 2020-2024 Free Software Foundation, Inc.
3 // This file is part of GCC.
5 // GCC is free software; you can redistribute it and/or modify it under
6 // the terms of the GNU General Public License as published by the Free
7 // Software Foundation; either version 3, or (at your option) any later
8 // version.
10 // GCC is distributed in the hope that it will be useful, but WITHOUT ANY
11 // WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 // FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
13 // for more details.
15 // You should have received a copy of the GNU General Public License
16 // along with GCC; see the file COPYING3. If not see
17 // <http://www.gnu.org/licenses/>.
19 // This file provides functions for punycode conversion
20 // See https://datatracker.ietf.org/doc/html/rfc3492
22 #include "rust-system.h"
23 #include "rust-unicode.h"
24 #include "optional.h"
25 #include "selftest.h"
27 namespace Rust {
29 // https://tools.ietf.org/html/rfc3492#section-4.
30 constexpr uint32_t BASE = 36;
31 constexpr uint32_t TMIN = 1;
32 constexpr uint32_t TMAX = 26;
33 constexpr uint32_t SKEW = 38;
34 constexpr uint32_t DAMP = 700;
35 constexpr uint32_t INITIAL_BIAS = 72;
36 constexpr uint32_t INITIAL_N = 128;
37 constexpr char DELIMITER = '-';
39 std::string
40 extract_basic_string (const std::vector<Codepoint> &src)
42 std::string basic_string;
43 for (auto c : src)
45 if (c.is_ascii ())
46 basic_string += c.as_string ();
48 return basic_string;
51 uint32_t
52 adapt_bias (uint32_t delta, const uint32_t n_points, const bool is_first)
54 delta /= is_first ? DAMP : 2;
55 delta += delta / n_points;
56 uint32_t k = 0;
58 while (delta > (BASE - TMIN) * TMAX / 2)
60 delta /= BASE - TMIN;
61 k += BASE;
63 return k + (BASE - TMIN + 1) * delta / (delta + SKEW);
66 uint32_t
67 clamped_sub (const uint32_t min, const uint32_t lhs, const uint32_t rhs,
68 const uint32_t max)
70 if (min + rhs >= lhs)
71 return min;
72 else if (max + rhs <= lhs)
73 return max;
74 else
75 return lhs - rhs;
78 uint32_t
79 min_gt_or_eq (const std::vector<Codepoint> &l, const uint32_t threshold)
81 uint32_t min = UINT32_MAX;
82 for (auto c : l)
83 if (c.value >= threshold && c.value < min)
84 min = c.value;
85 return min;
88 char
89 encode_digit (const uint32_t d)
91 return d + 22 + (d < 26 ? 75 : 0);
94 tl::optional<std::string>
95 encode_punycode (const Utf8String &input)
97 std::vector<Codepoint> input_chars = input.get_chars ();
99 uint32_t n = INITIAL_N;
100 uint32_t delta = 0;
101 uint32_t bias = INITIAL_BIAS;
103 std::string output = extract_basic_string (input_chars);
104 uint32_t h = output.size ();
105 const uint32_t b = h;
106 if (b > 0)
107 output += DELIMITER;
109 while (h < input_chars.size ())
111 const uint32_t m = min_gt_or_eq (input_chars, n);
113 if (m - n > ((UINT32_MAX - delta) / (h + 1)))
114 return tl::nullopt;
116 delta += (m - n) * (h + 1);
117 n = m;
119 for (const auto c : input_chars)
121 if (c.value < n)
122 delta++;
123 else if (c.value == n)
125 uint32_t q = delta;
126 // encode as a variable length integer
127 for (uint32_t k = 1;; k++)
129 const uint32_t kb = k * BASE;
130 const uint32_t t = clamped_sub (TMIN, kb, bias, TMAX);
131 if (q < t)
132 break;
134 output += encode_digit (t + (q - t) % (BASE - t));
135 q = (q - t) / (BASE - t);
137 output += encode_digit (q);
139 bias = adapt_bias (delta, h + 1, h == b);
140 delta = 0;
141 h++;
144 delta++;
145 n++;
148 return {output};
151 } // namespace Rust
153 #if CHECKING_P
155 namespace selftest {
157 void
158 encode_assert (const std::string &input, const std::string &expected)
160 Rust::Utf8String input_utf8
161 = Rust::Utf8String::make_utf8_string (input).value ();
162 std::string actual = Rust::encode_punycode (input_utf8).value ();
163 ASSERT_EQ (actual, expected);
166 void
167 rust_punycode_encode_test ()
169 encode_assert ("abc", "abc-");
170 encode_assert ("12345", "12345-");
171 encode_assert ("香港", "j6w193g");
173 // Examples from https://datatracker.ietf.org/doc/html/rfc3492#section-7.1
174 encode_assert ("ليهمابتكلموشعربي؟", "egbpdaj6bu4bxfgehfvwxn");
175 encode_assert ("他们为什么不说中文", "ihqwcrb4cv8a8dqg056pqjye");
176 encode_assert ("他們爲什麽不說中文", "ihqwctvzc91f659drss3x8bo0yb");
177 encode_assert ("Pročprostěnemluvíčesky", "Proprostnemluvesky-uyb24dma41a");
180 } // namespace selftest
182 #endif // CHECKING_P