Line data Source code
1 : //===- StringRef.h - Constant String Reference Wrapper ----------*- C++ -*-===//
2 : //
3 : // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 : // See https://llvm.org/LICENSE.txt for license information.
5 : // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 : //
7 : //===----------------------------------------------------------------------===//
8 :
9 : #ifndef LLVM_ADT_STRINGREF_H
10 : #define LLVM_ADT_STRINGREF_H
11 :
12 : #include "llvm/ADT/DenseMapInfo.h"
13 : #include "llvm/ADT/STLFunctionalExtras.h"
14 : #include "llvm/ADT/iterator_range.h"
15 : #include "llvm/Support/Compiler.h"
16 : #include <algorithm>
17 : #include <cassert>
18 : #include <cstddef>
19 : #include <cstring>
20 : #include <limits>
21 : #include <string>
22 : #include <string_view>
23 : #include <type_traits>
24 : #include <utility>
25 :
26 : namespace llvm {
27 :
28 : class APInt;
29 : class hash_code;
30 : template <typename T> class SmallVectorImpl;
31 : class StringRef;
32 :
33 : /// Helper functions for StringRef::getAsInteger.
34 : bool getAsUnsignedInteger(StringRef Str, unsigned Radix,
35 : unsigned long long &Result);
36 :
37 : bool getAsSignedInteger(StringRef Str, unsigned Radix, long long &Result);
38 :
39 : bool consumeUnsignedInteger(StringRef &Str, unsigned Radix,
40 : unsigned long long &Result);
41 : bool consumeSignedInteger(StringRef &Str, unsigned Radix, long long &Result);
42 :
43 : /// StringRef - Represent a constant reference to a string, i.e. a character
44 : /// array and a length, which need not be null terminated.
45 : ///
46 : /// This class does not own the string data, it is expected to be used in
47 : /// situations where the character data resides in some other buffer, whose
48 : /// lifetime extends past that of the StringRef. For this reason, it is not in
49 : /// general safe to store a StringRef.
50 : class LLVM_GSL_POINTER StringRef {
51 : public:
52 : static constexpr size_t npos = ~size_t(0);
53 :
54 : using iterator = const char *;
55 : using const_iterator = const char *;
56 : using size_type = size_t;
57 :
58 : private:
59 : /// The start of the string, in an external buffer.
60 : const char *Data = nullptr;
61 :
62 : /// The length of the string.
63 : size_t Length = 0;
64 :
65 : // Workaround memcmp issue with null pointers (undefined behavior)
66 : // by providing a specialized version
67 108 : static int compareMemory(const char *Lhs, const char *Rhs, size_t Length) {
68 108 : if (Length == 0) { return 0; }
69 108 : return ::memcmp(Lhs,Rhs,Length);
70 : }
71 :
72 : public:
73 : /// @name Constructors
74 : /// @{
75 :
76 : /// Construct an empty string ref.
77 : /*implicit*/ StringRef() = default;
78 :
79 : /// Disable conversion from nullptr. This prevents things like
80 : /// if (S == nullptr)
81 : StringRef(std::nullptr_t) = delete;
82 :
83 : /// Construct a string ref from a cstring.
84 9313 : /*implicit*/ constexpr StringRef(const char *Str)
85 9313 : : Data(Str), Length(Str ?
86 : // GCC 7 doesn't have constexpr char_traits. Fall back to __builtin_strlen.
87 : #if defined(_GLIBCXX_RELEASE) && _GLIBCXX_RELEASE < 8
88 : __builtin_strlen(Str)
89 : #else
90 9313 : std::char_traits<char>::length(Str)
91 : #endif
92 : : 0) {
93 9313 : }
94 :
95 : /// Construct a string ref from a pointer and length.
96 51359 : /*implicit*/ constexpr StringRef(const char *data, size_t length)
97 51359 : : Data(data), Length(length) {}
98 :
99 : /// Construct a string ref from an std::string.
100 45 : /*implicit*/ StringRef(const std::string &Str)
101 45 : : Data(Str.data()), Length(Str.length()) {}
102 :
103 : /// Construct a string ref from an std::string_view.
104 : /*implicit*/ constexpr StringRef(std::string_view Str)
105 : : Data(Str.data()), Length(Str.size()) {}
106 :
107 : /// @}
108 : /// @name Iterators
109 : /// @{
110 :
111 : iterator begin() const { return Data; }
112 :
113 : iterator end() const { return Data + Length; }
114 :
115 : const unsigned char *bytes_begin() const {
116 : return reinterpret_cast<const unsigned char *>(begin());
117 : }
118 : const unsigned char *bytes_end() const {
119 : return reinterpret_cast<const unsigned char *>(end());
120 : }
121 : iterator_range<const unsigned char *> bytes() const {
122 : return make_range(bytes_begin(), bytes_end());
123 : }
124 :
125 : /// @}
126 : /// @name String Operations
127 : /// @{
128 :
129 : /// data - Get a pointer to the start of the string (which may not be null
130 : /// terminated).
131 77283 : [[nodiscard]] constexpr const char *data() const { return Data; }
132 :
133 : /// empty - Check if the string is empty.
134 : [[nodiscard]] constexpr bool empty() const { return Length == 0; }
135 :
136 : /// size - Get the string size.
137 138722 : [[nodiscard]] constexpr size_t size() const { return Length; }
138 :
139 : /// front - Get the first character in the string.
140 : [[nodiscard]] char front() const {
141 : assert(!empty());
142 : return Data[0];
143 : }
144 :
145 : /// back - Get the last character in the string.
146 : [[nodiscard]] char back() const {
147 : assert(!empty());
148 : return Data[Length-1];
149 : }
150 :
151 : // copy - Allocate copy in Allocator and return StringRef to it.
152 : template <typename Allocator>
153 : [[nodiscard]] StringRef copy(Allocator &A) const {
154 : // Don't request a length 0 copy from the allocator.
155 : if (empty())
156 : return StringRef();
157 : char *S = A.template Allocate<char>(Length);
158 : std::copy(begin(), end(), S);
159 : return StringRef(S, Length);
160 : }
161 :
162 : /// Check for string equality, ignoring case.
163 : [[nodiscard]] bool equals_insensitive(StringRef RHS) const {
164 : return Length == RHS.Length && compare_insensitive(RHS) == 0;
165 : }
166 :
167 : /// compare - Compare two strings; the result is negative, zero, or positive
168 : /// if this string is lexicographically less than, equal to, or greater than
169 : /// the \p RHS.
170 : [[nodiscard]] int compare(StringRef RHS) const {
171 : // Check the prefix for a mismatch.
172 : if (int Res = compareMemory(Data, RHS.Data, std::min(Length, RHS.Length)))
173 : return Res < 0 ? -1 : 1;
174 :
175 : // Otherwise the prefixes match, so we only need to check the lengths.
176 : if (Length == RHS.Length)
177 : return 0;
178 : return Length < RHS.Length ? -1 : 1;
179 : }
180 :
181 : /// Compare two strings, ignoring case.
182 : [[nodiscard]] int compare_insensitive(StringRef RHS) const;
183 :
184 : /// compare_numeric - Compare two strings, treating sequences of digits as
185 : /// numbers.
186 : [[nodiscard]] int compare_numeric(StringRef RHS) const;
187 :
188 : /// Determine the edit distance between this string and another
189 : /// string.
190 : ///
191 : /// \param Other the string to compare this string against.
192 : ///
193 : /// \param AllowReplacements whether to allow character
194 : /// replacements (change one character into another) as a single
195 : /// operation, rather than as two operations (an insertion and a
196 : /// removal).
197 : ///
198 : /// \param MaxEditDistance If non-zero, the maximum edit distance that
199 : /// this routine is allowed to compute. If the edit distance will exceed
200 : /// that maximum, returns \c MaxEditDistance+1.
201 : ///
202 : /// \returns the minimum number of character insertions, removals,
203 : /// or (if \p AllowReplacements is \c true) replacements needed to
204 : /// transform one of the given strings into the other. If zero,
205 : /// the strings are identical.
206 : [[nodiscard]] unsigned edit_distance(StringRef Other,
207 : bool AllowReplacements = true,
208 : unsigned MaxEditDistance = 0) const;
209 :
210 : [[nodiscard]] unsigned
211 : edit_distance_insensitive(StringRef Other, bool AllowReplacements = true,
212 : unsigned MaxEditDistance = 0) const;
213 :
214 : /// str - Get the contents as an std::string.
215 : [[nodiscard]] std::string str() const {
216 : if (!Data) return std::string();
217 : return std::string(Data, Length);
218 : }
219 :
220 : /// @}
221 : /// @name Operator Overloads
222 : /// @{
223 :
224 : [[nodiscard]] char operator[](size_t Index) const {
225 : assert(Index < Length && "Invalid index!");
226 : return Data[Index];
227 : }
228 :
229 : /// Disallow accidental assignment from a temporary std::string.
230 : ///
231 : /// The declaration here is extra complicated so that `stringRef = {}`
232 : /// and `stringRef = "abc"` continue to select the move assignment operator.
233 : template <typename T>
234 : std::enable_if_t<std::is_same<T, std::string>::value, StringRef> &
235 : operator=(T &&Str) = delete;
236 :
237 : /// @}
238 : /// @name Type Conversions
239 : /// @{
240 :
241 45 : constexpr operator std::string_view() const {
242 45 : return std::string_view(data(), size());
243 : }
244 :
245 : /// @}
246 : /// @name String Predicates
247 : /// @{
248 :
249 : /// Check if this string starts with the given \p Prefix.
250 108 : [[nodiscard]] bool starts_with(StringRef Prefix) const {
251 216 : return Length >= Prefix.Length &&
252 216 : compareMemory(Data, Prefix.Data, Prefix.Length) == 0;
253 : }
254 : [[nodiscard]] bool starts_with(char Prefix) const {
255 : return !empty() && front() == Prefix;
256 : }
257 :
258 : /// Check if this string starts with the given \p Prefix, ignoring case.
259 : [[nodiscard]] bool starts_with_insensitive(StringRef Prefix) const;
260 :
261 : /// Check if this string ends with the given \p Suffix.
262 : [[nodiscard]] bool ends_with(StringRef Suffix) const {
263 : return Length >= Suffix.Length &&
264 : compareMemory(end() - Suffix.Length, Suffix.Data, Suffix.Length) ==
265 : 0;
266 : }
267 : [[nodiscard]] bool ends_with(char Suffix) const {
268 : return !empty() && back() == Suffix;
269 : }
270 :
271 : /// Check if this string ends with the given \p Suffix, ignoring case.
272 : [[nodiscard]] bool ends_with_insensitive(StringRef Suffix) const;
273 :
274 : /// @}
275 : /// @name String Searching
276 : /// @{
277 :
278 : /// Search for the first character \p C in the string.
279 : ///
280 : /// \returns The index of the first occurrence of \p C, or npos if not
281 : /// found.
282 : [[nodiscard]] size_t find(char C, size_t From = 0) const {
283 : return std::string_view(*this).find(C, From);
284 : }
285 :
286 : /// Search for the first character \p C in the string, ignoring case.
287 : ///
288 : /// \returns The index of the first occurrence of \p C, or npos if not
289 : /// found.
290 : [[nodiscard]] size_t find_insensitive(char C, size_t From = 0) const;
291 :
292 : /// Search for the first character satisfying the predicate \p F
293 : ///
294 : /// \returns The index of the first character satisfying \p F starting from
295 : /// \p From, or npos if not found.
296 : [[nodiscard]] size_t find_if(function_ref<bool(char)> F,
297 : size_t From = 0) const {
298 : StringRef S = drop_front(From);
299 : while (!S.empty()) {
300 : if (F(S.front()))
301 : return size() - S.size();
302 : S = S.drop_front();
303 : }
304 : return npos;
305 : }
306 :
307 : /// Search for the first character not satisfying the predicate \p F
308 : ///
309 : /// \returns The index of the first character not satisfying \p F starting
310 : /// from \p From, or npos if not found.
311 : [[nodiscard]] size_t find_if_not(function_ref<bool(char)> F,
312 : size_t From = 0) const {
313 : return find_if([F](char c) { return !F(c); }, From);
314 : }
315 :
316 : /// Search for the first string \p Str in the string.
317 : ///
318 : /// \returns The index of the first occurrence of \p Str, or npos if not
319 : /// found.
320 : [[nodiscard]] size_t find(StringRef Str, size_t From = 0) const;
321 :
322 : /// Search for the first string \p Str in the string, ignoring case.
323 : ///
324 : /// \returns The index of the first occurrence of \p Str, or npos if not
325 : /// found.
326 : [[nodiscard]] size_t find_insensitive(StringRef Str, size_t From = 0) const;
327 :
328 : /// Search for the last character \p C in the string.
329 : ///
330 : /// \returns The index of the last occurrence of \p C, or npos if not
331 : /// found.
332 : [[nodiscard]] size_t rfind(char C, size_t From = npos) const {
333 : size_t I = std::min(From, Length);
334 : while (I) {
335 : --I;
336 : if (Data[I] == C)
337 : return I;
338 : }
339 : return npos;
340 : }
341 :
342 : /// Search for the last character \p C in the string, ignoring case.
343 : ///
344 : /// \returns The index of the last occurrence of \p C, or npos if not
345 : /// found.
346 : [[nodiscard]] size_t rfind_insensitive(char C, size_t From = npos) const;
347 :
348 : /// Search for the last string \p Str in the string.
349 : ///
350 : /// \returns The index of the last occurrence of \p Str, or npos if not
351 : /// found.
352 : [[nodiscard]] size_t rfind(StringRef Str) const;
353 :
354 : /// Search for the last string \p Str in the string, ignoring case.
355 : ///
356 : /// \returns The index of the last occurrence of \p Str, or npos if not
357 : /// found.
358 : [[nodiscard]] size_t rfind_insensitive(StringRef Str) const;
359 :
360 : /// Find the first character in the string that is \p C, or npos if not
361 : /// found. Same as find.
362 : [[nodiscard]] size_t find_first_of(char C, size_t From = 0) const {
363 : return find(C, From);
364 : }
365 :
366 : /// Find the first character in the string that is in \p Chars, or npos if
367 : /// not found.
368 : ///
369 : /// Complexity: O(size() + Chars.size())
370 : [[nodiscard]] size_t find_first_of(StringRef Chars, size_t From = 0) const;
371 :
372 : /// Find the first character in the string that is not \p C or npos if not
373 : /// found.
374 : [[nodiscard]] size_t find_first_not_of(char C, size_t From = 0) const;
375 :
376 : /// Find the first character in the string that is not in the string
377 : /// \p Chars, or npos if not found.
378 : ///
379 : /// Complexity: O(size() + Chars.size())
380 : [[nodiscard]] size_t find_first_not_of(StringRef Chars,
381 : size_t From = 0) const;
382 :
383 : /// Find the last character in the string that is \p C, or npos if not
384 : /// found.
385 : [[nodiscard]] size_t find_last_of(char C, size_t From = npos) const {
386 : return rfind(C, From);
387 : }
388 :
389 : /// Find the last character in the string that is in \p C, or npos if not
390 : /// found.
391 : ///
392 : /// Complexity: O(size() + Chars.size())
393 : [[nodiscard]] size_t find_last_of(StringRef Chars,
394 : size_t From = npos) const;
395 :
396 : /// Find the last character in the string that is not \p C, or npos if not
397 : /// found.
398 : [[nodiscard]] size_t find_last_not_of(char C, size_t From = npos) const;
399 :
400 : /// Find the last character in the string that is not in \p Chars, or
401 : /// npos if not found.
402 : ///
403 : /// Complexity: O(size() + Chars.size())
404 : [[nodiscard]] size_t find_last_not_of(StringRef Chars,
405 : size_t From = npos) const;
406 :
407 : /// Return true if the given string is a substring of *this, and false
408 : /// otherwise.
409 : [[nodiscard]] bool contains(StringRef Other) const {
410 : return find(Other) != npos;
411 : }
412 :
413 : /// Return true if the given character is contained in *this, and false
414 : /// otherwise.
415 : [[nodiscard]] bool contains(char C) const {
416 : return find_first_of(C) != npos;
417 : }
418 :
419 : /// Return true if the given string is a substring of *this, and false
420 : /// otherwise.
421 : [[nodiscard]] bool contains_insensitive(StringRef Other) const {
422 : return find_insensitive(Other) != npos;
423 : }
424 :
425 : /// Return true if the given character is contained in *this, and false
426 : /// otherwise.
427 : [[nodiscard]] bool contains_insensitive(char C) const {
428 : return find_insensitive(C) != npos;
429 : }
430 :
431 : /// @}
432 : /// @name Helpful Algorithms
433 : /// @{
434 :
435 : /// Return the number of occurrences of \p C in the string.
436 : [[nodiscard]] size_t count(char C) const {
437 : size_t Count = 0;
438 : for (size_t I = 0; I != Length; ++I)
439 : if (Data[I] == C)
440 : ++Count;
441 : return Count;
442 : }
443 :
444 : /// Return the number of non-overlapped occurrences of \p Str in
445 : /// the string.
446 : size_t count(StringRef Str) const;
447 :
448 : /// Parse the current string as an integer of the specified radix. If
449 : /// \p Radix is specified as zero, this does radix autosensing using
450 : /// extended C rules: 0 is octal, 0x is hex, 0b is binary.
451 : ///
452 : /// If the string is invalid or if only a subset of the string is valid,
453 : /// this returns true to signify the error. The string is considered
454 : /// erroneous if empty or if it overflows T.
455 : template <typename T> bool getAsInteger(unsigned Radix, T &Result) const {
456 : if constexpr (std::numeric_limits<T>::is_signed) {
457 : long long LLVal;
458 : if (getAsSignedInteger(*this, Radix, LLVal) ||
459 : static_cast<T>(LLVal) != LLVal)
460 : return true;
461 : Result = LLVal;
462 : } else {
463 : unsigned long long ULLVal;
464 : // The additional cast to unsigned long long is required to avoid the
465 : // Visual C++ warning C4805: '!=' : unsafe mix of type 'bool' and type
466 : // 'unsigned __int64' when instantiating getAsInteger with T = bool.
467 : if (getAsUnsignedInteger(*this, Radix, ULLVal) ||
468 : static_cast<unsigned long long>(static_cast<T>(ULLVal)) != ULLVal)
469 : return true;
470 : Result = ULLVal;
471 : }
472 : return false;
473 : }
474 :
475 : /// Parse the current string as an integer of the specified radix. If
476 : /// \p Radix is specified as zero, this does radix autosensing using
477 : /// extended C rules: 0 is octal, 0x is hex, 0b is binary.
478 : ///
479 : /// If the string does not begin with a number of the specified radix,
480 : /// this returns true to signify the error. The string is considered
481 : /// erroneous if empty or if it overflows T.
482 : /// The portion of the string representing the discovered numeric value
483 : /// is removed from the beginning of the string.
484 : template <typename T> bool consumeInteger(unsigned Radix, T &Result) {
485 : if constexpr (std::numeric_limits<T>::is_signed) {
486 : long long LLVal;
487 : if (consumeSignedInteger(*this, Radix, LLVal) ||
488 : static_cast<long long>(static_cast<T>(LLVal)) != LLVal)
489 : return true;
490 : Result = LLVal;
491 : } else {
492 : unsigned long long ULLVal;
493 : if (consumeUnsignedInteger(*this, Radix, ULLVal) ||
494 : static_cast<unsigned long long>(static_cast<T>(ULLVal)) != ULLVal)
495 : return true;
496 : Result = ULLVal;
497 : }
498 : return false;
499 : }
500 :
501 : /// Parse the current string as an integer of the specified \p Radix, or of
502 : /// an autosensed radix if the \p Radix given is 0. The current value in
503 : /// \p Result is discarded, and the storage is changed to be wide enough to
504 : /// store the parsed integer.
505 : ///
506 : /// \returns true if the string does not solely consist of a valid
507 : /// non-empty number in the appropriate base.
508 : ///
509 : /// APInt::fromString is superficially similar but assumes the
510 : /// string is well-formed in the given radix.
511 : bool getAsInteger(unsigned Radix, APInt &Result) const;
512 :
513 : /// Parse the current string as an integer of the specified \p Radix. If
514 : /// \p Radix is specified as zero, this does radix autosensing using
515 : /// extended C rules: 0 is octal, 0x is hex, 0b is binary.
516 : ///
517 : /// If the string does not begin with a number of the specified radix,
518 : /// this returns true to signify the error. The string is considered
519 : /// erroneous if empty.
520 : /// The portion of the string representing the discovered numeric value
521 : /// is removed from the beginning of the string.
522 : bool consumeInteger(unsigned Radix, APInt &Result);
523 :
524 : /// Parse the current string as an IEEE double-precision floating
525 : /// point value. The string must be a well-formed double.
526 : ///
527 : /// If \p AllowInexact is false, the function will fail if the string
528 : /// cannot be represented exactly. Otherwise, the function only fails
529 : /// in case of an overflow or underflow, or an invalid floating point
530 : /// representation.
531 : bool getAsDouble(double &Result, bool AllowInexact = true) const;
532 :
533 : /// @}
534 : /// @name String Operations
535 : /// @{
536 :
537 : // Convert the given ASCII string to lowercase.
538 : [[nodiscard]] std::string lower() const;
539 :
540 : /// Convert the given ASCII string to uppercase.
541 : [[nodiscard]] std::string upper() const;
542 :
543 : /// @}
544 : /// @name Substring Operations
545 : /// @{
546 :
547 : /// Return a reference to the substring from [Start, Start + N).
548 : ///
549 : /// \param Start The index of the starting character in the substring; if
550 : /// the index is npos or greater than the length of the string then the
551 : /// empty substring will be returned.
552 : ///
553 : /// \param N The number of characters to included in the substring. If N
554 : /// exceeds the number of characters remaining in the string, the string
555 : /// suffix (starting with \p Start) will be returned.
556 : [[nodiscard]] constexpr StringRef substr(size_t Start,
557 : size_t N = npos) const {
558 : Start = std::min(Start, Length);
559 : return StringRef(Data + Start, std::min(N, Length - Start));
560 : }
561 :
562 : /// Return a StringRef equal to 'this' but with only the first \p N
563 : /// elements remaining. If \p N is greater than the length of the
564 : /// string, the entire string is returned.
565 : [[nodiscard]] StringRef take_front(size_t N = 1) const {
566 : if (N >= size())
567 : return *this;
568 : return drop_back(size() - N);
569 : }
570 :
571 : /// Return a StringRef equal to 'this' but with only the last \p N
572 : /// elements remaining. If \p N is greater than the length of the
573 : /// string, the entire string is returned.
574 : [[nodiscard]] StringRef take_back(size_t N = 1) const {
575 : if (N >= size())
576 : return *this;
577 : return drop_front(size() - N);
578 : }
579 :
580 : /// Return the longest prefix of 'this' such that every character
581 : /// in the prefix satisfies the given predicate.
582 : [[nodiscard]] StringRef take_while(function_ref<bool(char)> F) const {
583 : return substr(0, find_if_not(F));
584 : }
585 :
586 : /// Return the longest prefix of 'this' such that no character in
587 : /// the prefix satisfies the given predicate.
588 : [[nodiscard]] StringRef take_until(function_ref<bool(char)> F) const {
589 : return substr(0, find_if(F));
590 : }
591 :
592 : /// Return a StringRef equal to 'this' but with the first \p N elements
593 : /// dropped.
594 : [[nodiscard]] StringRef drop_front(size_t N = 1) const {
595 : assert(size() >= N && "Dropping more elements than exist");
596 : return substr(N);
597 : }
598 :
599 : /// Return a StringRef equal to 'this' but with the last \p N elements
600 : /// dropped.
601 : [[nodiscard]] StringRef drop_back(size_t N = 1) const {
602 : assert(size() >= N && "Dropping more elements than exist");
603 : return substr(0, size()-N);
604 : }
605 :
606 : /// Return a StringRef equal to 'this', but with all characters satisfying
607 : /// the given predicate dropped from the beginning of the string.
608 : [[nodiscard]] StringRef drop_while(function_ref<bool(char)> F) const {
609 : return substr(find_if_not(F));
610 : }
611 :
612 : /// Return a StringRef equal to 'this', but with all characters not
613 : /// satisfying the given predicate dropped from the beginning of the string.
614 : [[nodiscard]] StringRef drop_until(function_ref<bool(char)> F) const {
615 : return substr(find_if(F));
616 : }
617 :
618 : /// Returns true if this StringRef has the given prefix and removes that
619 : /// prefix.
620 : bool consume_front(StringRef Prefix) {
621 : if (!starts_with(Prefix))
622 : return false;
623 :
624 : *this = substr(Prefix.size());
625 : return true;
626 : }
627 :
628 : /// Returns true if this StringRef has the given prefix, ignoring case,
629 : /// and removes that prefix.
630 : bool consume_front_insensitive(StringRef Prefix) {
631 : if (!starts_with_insensitive(Prefix))
632 : return false;
633 :
634 : *this = substr(Prefix.size());
635 : return true;
636 : }
637 :
638 : /// Returns true if this StringRef has the given suffix and removes that
639 : /// suffix.
640 : bool consume_back(StringRef Suffix) {
641 : if (!ends_with(Suffix))
642 : return false;
643 :
644 : *this = substr(0, size() - Suffix.size());
645 : return true;
646 : }
647 :
648 : /// Returns true if this StringRef has the given suffix, ignoring case,
649 : /// and removes that suffix.
650 : bool consume_back_insensitive(StringRef Suffix) {
651 : if (!ends_with_insensitive(Suffix))
652 : return false;
653 :
654 : *this = substr(0, size() - Suffix.size());
655 : return true;
656 : }
657 :
658 : /// Return a reference to the substring from [Start, End).
659 : ///
660 : /// \param Start The index of the starting character in the substring; if
661 : /// the index is npos or greater than the length of the string then the
662 : /// empty substring will be returned.
663 : ///
664 : /// \param End The index following the last character to include in the
665 : /// substring. If this is npos or exceeds the number of characters
666 : /// remaining in the string, the string suffix (starting with \p Start)
667 : /// will be returned. If this is less than \p Start, an empty string will
668 : /// be returned.
669 : [[nodiscard]] StringRef slice(size_t Start, size_t End) const {
670 : Start = std::min(Start, Length);
671 : End = std::clamp(End, Start, Length);
672 : return StringRef(Data + Start, End - Start);
673 : }
674 :
675 : /// Split into two substrings around the first occurrence of a separator
676 : /// character.
677 : ///
678 : /// If \p Separator is in the string, then the result is a pair (LHS, RHS)
679 : /// such that (*this == LHS + Separator + RHS) is true and RHS is
680 : /// maximal. If \p Separator is not in the string, then the result is a
681 : /// pair (LHS, RHS) where (*this == LHS) and (RHS == "").
682 : ///
683 : /// \param Separator The character to split on.
684 : /// \returns The split substrings.
685 : [[nodiscard]] std::pair<StringRef, StringRef> split(char Separator) const {
686 : return split(StringRef(&Separator, 1));
687 : }
688 :
689 : /// Split into two substrings around the first occurrence of a separator
690 : /// string.
691 : ///
692 : /// If \p Separator is in the string, then the result is a pair (LHS, RHS)
693 : /// such that (*this == LHS + Separator + RHS) is true and RHS is
694 : /// maximal. If \p Separator is not in the string, then the result is a
695 : /// pair (LHS, RHS) where (*this == LHS) and (RHS == "").
696 : ///
697 : /// \param Separator - The string to split on.
698 : /// \return - The split substrings.
699 : [[nodiscard]] std::pair<StringRef, StringRef>
700 : split(StringRef Separator) const {
701 : size_t Idx = find(Separator);
702 : if (Idx == npos)
703 : return std::make_pair(*this, StringRef());
704 : return std::make_pair(slice(0, Idx), slice(Idx + Separator.size(), npos));
705 : }
706 :
707 : /// Split into two substrings around the last occurrence of a separator
708 : /// string.
709 : ///
710 : /// If \p Separator is in the string, then the result is a pair (LHS, RHS)
711 : /// such that (*this == LHS + Separator + RHS) is true and RHS is
712 : /// minimal. If \p Separator is not in the string, then the result is a
713 : /// pair (LHS, RHS) where (*this == LHS) and (RHS == "").
714 : ///
715 : /// \param Separator - The string to split on.
716 : /// \return - The split substrings.
717 : [[nodiscard]] std::pair<StringRef, StringRef>
718 : rsplit(StringRef Separator) const {
719 : size_t Idx = rfind(Separator);
720 : if (Idx == npos)
721 : return std::make_pair(*this, StringRef());
722 : return std::make_pair(slice(0, Idx), slice(Idx + Separator.size(), npos));
723 : }
724 :
725 : /// Split into substrings around the occurrences of a separator string.
726 : ///
727 : /// Each substring is stored in \p A. If \p MaxSplit is >= 0, at most
728 : /// \p MaxSplit splits are done and consequently <= \p MaxSplit + 1
729 : /// elements are added to A.
730 : /// If \p KeepEmpty is false, empty strings are not added to \p A. They
731 : /// still count when considering \p MaxSplit
732 : /// An useful invariant is that
733 : /// Separator.join(A) == *this if MaxSplit == -1 and KeepEmpty == true
734 : ///
735 : /// \param A - Where to put the substrings.
736 : /// \param Separator - The string to split on.
737 : /// \param MaxSplit - The maximum number of times the string is split.
738 : /// \param KeepEmpty - True if empty substring should be added.
739 : void split(SmallVectorImpl<StringRef> &A,
740 : StringRef Separator, int MaxSplit = -1,
741 : bool KeepEmpty = true) const;
742 :
743 : /// Split into substrings around the occurrences of a separator character.
744 : ///
745 : /// Each substring is stored in \p A. If \p MaxSplit is >= 0, at most
746 : /// \p MaxSplit splits are done and consequently <= \p MaxSplit + 1
747 : /// elements are added to A.
748 : /// If \p KeepEmpty is false, empty strings are not added to \p A. They
749 : /// still count when considering \p MaxSplit
750 : /// An useful invariant is that
751 : /// Separator.join(A) == *this if MaxSplit == -1 and KeepEmpty == true
752 : ///
753 : /// \param A - Where to put the substrings.
754 : /// \param Separator - The string to split on.
755 : /// \param MaxSplit - The maximum number of times the string is split.
756 : /// \param KeepEmpty - True if empty substring should be added.
757 : void split(SmallVectorImpl<StringRef> &A, char Separator, int MaxSplit = -1,
758 : bool KeepEmpty = true) const;
759 :
760 : /// Split into two substrings around the last occurrence of a separator
761 : /// character.
762 : ///
763 : /// If \p Separator is in the string, then the result is a pair (LHS, RHS)
764 : /// such that (*this == LHS + Separator + RHS) is true and RHS is
765 : /// minimal. If \p Separator is not in the string, then the result is a
766 : /// pair (LHS, RHS) where (*this == LHS) and (RHS == "").
767 : ///
768 : /// \param Separator - The character to split on.
769 : /// \return - The split substrings.
770 : [[nodiscard]] std::pair<StringRef, StringRef> rsplit(char Separator) const {
771 : return rsplit(StringRef(&Separator, 1));
772 : }
773 :
774 : /// Return string with consecutive \p Char characters starting from the
775 : /// the left removed.
776 : [[nodiscard]] StringRef ltrim(char Char) const {
777 : return drop_front(std::min(Length, find_first_not_of(Char)));
778 : }
779 :
780 : /// Return string with consecutive characters in \p Chars starting from
781 : /// the left removed.
782 : [[nodiscard]] StringRef ltrim(StringRef Chars = " \t\n\v\f\r") const {
783 : return drop_front(std::min(Length, find_first_not_of(Chars)));
784 : }
785 :
786 : /// Return string with consecutive \p Char characters starting from the
787 : /// right removed.
788 : [[nodiscard]] StringRef rtrim(char Char) const {
789 : return drop_back(Length - std::min(Length, find_last_not_of(Char) + 1));
790 : }
791 :
792 : /// Return string with consecutive characters in \p Chars starting from
793 : /// the right removed.
794 : [[nodiscard]] StringRef rtrim(StringRef Chars = " \t\n\v\f\r") const {
795 : return drop_back(Length - std::min(Length, find_last_not_of(Chars) + 1));
796 : }
797 :
798 : /// Return string with consecutive \p Char characters starting from the
799 : /// left and right removed.
800 : [[nodiscard]] StringRef trim(char Char) const {
801 : return ltrim(Char).rtrim(Char);
802 : }
803 :
804 : /// Return string with consecutive characters in \p Chars starting from
805 : /// the left and right removed.
806 : [[nodiscard]] StringRef trim(StringRef Chars = " \t\n\v\f\r") const {
807 : return ltrim(Chars).rtrim(Chars);
808 : }
809 :
810 : /// Detect the line ending style of the string.
811 : ///
812 : /// If the string contains a line ending, return the line ending character
813 : /// sequence that is detected. Otherwise return '\n' for unix line endings.
814 : ///
815 : /// \return - The line ending character sequence.
816 : [[nodiscard]] StringRef detectEOL() const {
817 : size_t Pos = find('\r');
818 : if (Pos == npos) {
819 : // If there is no carriage return, assume unix
820 : return "\n";
821 : }
822 : if (Pos + 1 < Length && Data[Pos + 1] == '\n')
823 : return "\r\n"; // Windows
824 : if (Pos > 0 && Data[Pos - 1] == '\n')
825 : return "\n\r"; // You monster!
826 : return "\r"; // Classic Mac
827 : }
828 : /// @}
829 : };
830 :
831 : /// A wrapper around a string literal that serves as a proxy for constructing
832 : /// global tables of StringRefs with the length computed at compile time.
833 : /// In order to avoid the invocation of a global constructor, StringLiteral
834 : /// should *only* be used in a constexpr context, as such:
835 : ///
836 : /// constexpr StringLiteral S("test");
837 : ///
838 : class StringLiteral : public StringRef {
839 : private:
840 : constexpr StringLiteral(const char *Str, size_t N) : StringRef(Str, N) {
841 : }
842 :
843 : public:
844 : template <size_t N>
845 : constexpr StringLiteral(const char (&Str)[N])
846 : #if defined(__clang__) && __has_attribute(enable_if)
847 : #pragma clang diagnostic push
848 : #pragma clang diagnostic ignored "-Wgcc-compat"
849 : __attribute((enable_if(__builtin_strlen(Str) == N - 1,
850 : "invalid string literal")))
851 : #pragma clang diagnostic pop
852 : #endif
853 : : StringRef(Str, N - 1) {
854 : }
855 :
856 : // Explicit construction for strings like "foo\0bar".
857 : template <size_t N>
858 : static constexpr StringLiteral withInnerNUL(const char (&Str)[N]) {
859 : return StringLiteral(Str, N - 1);
860 : }
861 : };
862 :
863 : /// @name StringRef Comparison Operators
864 : /// @{
865 :
866 : inline bool operator==(StringRef LHS, StringRef RHS) {
867 : if (LHS.size() != RHS.size())
868 : return false;
869 : if (LHS.empty())
870 : return true;
871 : return ::memcmp(LHS.data(), RHS.data(), LHS.size()) == 0;
872 : }
873 :
874 : inline bool operator!=(StringRef LHS, StringRef RHS) { return !(LHS == RHS); }
875 :
876 : inline bool operator<(StringRef LHS, StringRef RHS) {
877 : return LHS.compare(RHS) < 0;
878 : }
879 :
880 : inline bool operator<=(StringRef LHS, StringRef RHS) {
881 : return LHS.compare(RHS) <= 0;
882 : }
883 :
884 : inline bool operator>(StringRef LHS, StringRef RHS) {
885 : return LHS.compare(RHS) > 0;
886 : }
887 :
888 : inline bool operator>=(StringRef LHS, StringRef RHS) {
889 : return LHS.compare(RHS) >= 0;
890 : }
891 :
892 : inline std::string &operator+=(std::string &buffer, StringRef string) {
893 : return buffer.append(string.data(), string.size());
894 : }
895 :
896 : /// @}
897 :
898 : /// Compute a hash_code for a StringRef.
899 : [[nodiscard]] hash_code hash_value(StringRef S);
900 :
901 : // Provide DenseMapInfo for StringRefs.
902 : template <> struct DenseMapInfo<StringRef, void> {
903 : static inline StringRef getEmptyKey() {
904 : return StringRef(
905 : reinterpret_cast<const char *>(~static_cast<uintptr_t>(0)), 0);
906 : }
907 :
908 : static inline StringRef getTombstoneKey() {
909 : return StringRef(
910 : reinterpret_cast<const char *>(~static_cast<uintptr_t>(1)), 0);
911 : }
912 :
913 : static unsigned getHashValue(StringRef Val);
914 :
915 : static bool isEqual(StringRef LHS, StringRef RHS) {
916 : if (RHS.data() == getEmptyKey().data())
917 : return LHS.data() == getEmptyKey().data();
918 : if (RHS.data() == getTombstoneKey().data())
919 : return LHS.data() == getTombstoneKey().data();
920 : return LHS == RHS;
921 : }
922 : };
923 :
924 : } // end namespace llvm
925 :
926 : #endif // LLVM_ADT_STRINGREF_H
|