libfilezilla
Loading...
Searching...
No Matches
string.hpp
Go to the documentation of this file.
1#ifndef LIBFILEZILLA_STRING_HEADER
2#define LIBFILEZILLA_STRING_HEADER
3
4#include "libfilezilla.hpp"
5
6#include <algorithm>
7#include <string>
8#include <string_view>
9#include <vector>
10
18namespace fz {
19
32#ifdef FZ_WINDOWS
33typedef std::wstring native_string;
34typedef std::wstring_view native_string_view;
35#endif
36#if defined(FZ_UNIX) || defined(FZ_MAC)
37typedef std::string native_string;
38typedef std::string_view native_string_view;
39#endif
40
45native_string FZ_PUBLIC_SYMBOL to_native(std::string_view const& in);
46
51native_string FZ_PUBLIC_SYMBOL to_native(std::wstring_view const& in);
52
54template<typename T, typename std::enable_if_t<std::is_same_v<native_string, typename std::decay_t<T>>, int> = 0>
55inline native_string to_native(T const& in) {
56 return in;
57}
58
65int FZ_PUBLIC_SYMBOL stricmp(std::string_view const& a, std::string_view const& b);
66int FZ_PUBLIC_SYMBOL stricmp(std::wstring_view const& a, std::wstring_view const& b);
67
85template<typename Char>
86Char tolower_ascii(Char c) {
87 if (c >= 'A' && c <= 'Z') {
88 return c + ('a' - 'A');
89 }
90 return c;
91}
92
93template<>
94std::wstring::value_type FZ_PUBLIC_SYMBOL tolower_ascii(std::wstring::value_type c);
95
97template<typename Char>
98Char toupper_ascii(Char c) {
99 if (c >= 'a' && c <= 'z') {
100 return c + ('A' - 'a');
101 }
102 return c;
103}
104
105template<>
106std::wstring::value_type FZ_PUBLIC_SYMBOL toupper_ascii(std::wstring::value_type c);
107
110 // Note: For UTF-8 strings it works on individual octets!
111std::string FZ_PUBLIC_SYMBOL str_tolower_ascii(std::string_view const& s);
112std::wstring FZ_PUBLIC_SYMBOL str_tolower_ascii(std::wstring_view const& s);
113
114std::string FZ_PUBLIC_SYMBOL str_toupper_ascii(std::string_view const& s);
115std::wstring FZ_PUBLIC_SYMBOL str_toupper_ascii(std::wstring_view const& s);
116
122struct FZ_PUBLIC_SYMBOL less_insensitive_ascii final
123{
124 template<typename T>
125 bool operator()(T const& lhs, T const& rhs) const {
126 return std::lexicographical_compare(lhs.cbegin(), lhs.cend(), rhs.cbegin(), rhs.cend(),
127 [](typename T::value_type const& a, typename T::value_type const& b) {
128 return tolower_ascii(a) < tolower_ascii(b);
129 }
130 );
131 }
132};
133
138inline bool equal_insensitive_ascii(std::string_view a, std::string_view b)
139{
140 return std::equal(a.cbegin(), a.cend(), b.cbegin(), b.cend(),
141 [](auto const& a, auto const& b) {
142 return tolower_ascii(a) == tolower_ascii(b);
143 }
144 );
145}
146inline bool equal_insensitive_ascii(std::wstring_view a, std::wstring_view b)
147{
148 return std::equal(a.cbegin(), a.cend(), b.cbegin(), b.cend(),
149 [](auto const& a, auto const& b) {
150 return tolower_ascii(a) == tolower_ascii(b);
151 }
152 );
153}
154
159std::wstring FZ_PUBLIC_SYMBOL to_wstring(std::string_view const& in);
160
165template <typename T>
166inline auto to_wstring(T && in) -> decltype(std::wstring(std::forward<T>(in)))
167{
168 return std::wstring(std::forward<T>(in));
169}
170
172template<typename Arg>
173inline typename std::enable_if<std::is_arithmetic_v<std::decay_t<Arg>>, std::wstring>::type to_wstring(Arg && arg)
174{
175 return std::to_wstring(std::forward<Arg>(arg));
176}
177
178
183std::wstring FZ_PUBLIC_SYMBOL to_wstring_from_utf8(std::string_view const& in);
184std::wstring FZ_PUBLIC_SYMBOL to_wstring_from_utf8(char const* s, size_t len);
185
186class buffer;
187std::wstring FZ_PUBLIC_SYMBOL to_wstring_from_utf8(fz::buffer const& in);
188
193std::string FZ_PUBLIC_SYMBOL to_string(std::wstring_view const& in);
194
199template <typename T>
200inline auto to_string(T && in) -> decltype(std::string(std::forward<T>(in)))
201{
202 return std::string(std::forward<T>(in));
203}
204
205
207template<typename Arg>
208inline typename std::enable_if<std::is_arithmetic_v<std::decay_t<Arg>>, std::string>::type to_string(Arg && arg)
209{
210 return std::to_string(std::forward<Arg>(arg));
211}
212
213
215template<typename Char>
216size_t strlen(Char const* str) {
217 return std::char_traits<Char>::length(str);
218}
219
220
227std::string FZ_PUBLIC_SYMBOL to_utf8(std::string_view const& in);
228
235std::string FZ_PUBLIC_SYMBOL to_utf8(std::wstring_view const& in);
236
238template<typename String, typename Arg>
239inline auto toString(Arg&& arg) -> typename std::enable_if<std::is_same_v<String, std::string>, decltype(to_string(std::forward<Arg>(arg)))>::type
240{
241 return to_string(std::forward<Arg>(arg));
242}
243
244template<typename String, typename Arg>
245inline auto toString(Arg&& arg) -> typename std::enable_if<std::is_same_v<String, std::wstring>, decltype(to_wstring(std::forward<Arg>(arg)))>::type
246{
247 return to_wstring(std::forward<Arg>(arg));
248}
249
250#if !defined(fzT) || defined(DOXYGEN)
251#ifdef FZ_WINDOWS
256#define fzT(x) L ## x
257#else
262#define fzT(x) x
263#endif
264#endif
265
267template<typename Char>
268Char const* choose_string(char const* c, wchar_t const* w);
269
270template<> inline char const* choose_string(char const* c, wchar_t const*) { return c; }
271template<> inline wchar_t const* choose_string(char const*, wchar_t const* w) { return w; }
272
273#if !defined(fzS) || defined(DOXYGEN)
285#define fzS(Char, s) fz::choose_string<Char>(s, L ## s)
286#endif
287
292std::string FZ_PUBLIC_SYMBOL replaced_substrings(std::string_view const& in, std::string_view const& find, std::string_view const& replacement);
293std::wstring FZ_PUBLIC_SYMBOL replaced_substrings(std::wstring_view const& in, std::wstring_view const& find, std::wstring_view const& replacement);
294
296std::string FZ_PUBLIC_SYMBOL replaced_substrings(std::string_view const& in, char find, char replacement);
297std::wstring FZ_PUBLIC_SYMBOL replaced_substrings(std::wstring_view const& in, wchar_t find, wchar_t replacement);
298
303bool FZ_PUBLIC_SYMBOL replace_substrings(std::string& in, std::string_view const& find, std::string_view const& replacement);
304bool FZ_PUBLIC_SYMBOL replace_substrings(std::wstring& in, std::wstring_view const& find, std::wstring_view const& replacement);
305
307bool FZ_PUBLIC_SYMBOL replace_substrings(std::string& in, char find, char replacement);
308bool FZ_PUBLIC_SYMBOL replace_substrings(std::wstring& in, wchar_t find, wchar_t replacement);
309
336template <typename String, typename Delims>
338{
339 using view_type = std::basic_string_view<std::decay_t<decltype(std::declval<String>()[0])>>;
340
341public:
348 constexpr strtokenizer(String && string, Delims &&delims, bool ignore_empty)
349 : string_(std::forward<String>(string))
350 , delims_(std::forward<Delims>(delims))
351 , ignore_empty_(ignore_empty)
352 {}
353
354 using value_type = const view_type;
355 using pointer = value_type*;
356 using reference = value_type&;
357 using size_type = std::size_t;
358 using difference_type = std::ptrdiff_t;
359
360 struct sentinel{};
361
362 struct iterator
363 {
364 using iterator_category = std::input_iterator_tag;
365 using difference_type = strtokenizer::difference_type;
366 using value_type = strtokenizer::value_type;
367 using pointer = strtokenizer::pointer;
368 using reference = strtokenizer::reference;
369
370 constexpr bool operator !=(sentinel) const
371 {
372 return !s_.empty();
373 }
374
375 constexpr bool operator ==(sentinel) const
376 {
377 return s_.empty();
378 }
379
380 constexpr value_type operator*() const
381 {
382 return s_.substr(0, pos_);
383 }
384
385 constexpr iterator &operator++()
386 {
387 for (;;) {
388 if (pos_ != s_.size()) {
389 ++pos_;
390 }
391
392 s_.remove_prefix(pos_);
393
394 pos_ = s_.find_first_of(t_->delims_);
395
396 if (pos_ == view_type::npos) {
397 pos_ = s_.size();
398 break;
399 }
400
401 if (pos_ != 0 || !t_->ignore_empty_) {
402 break;
403 }
404 }
405
406 return *this;
407 }
408
409 private:
410 friend strtokenizer;
411
412 constexpr iterator(const strtokenizer *t)
413 : t_(t)
414 , s_(view_type(t_->string_))
415 , pos_(view_type::npos)
416 {
417 operator++();
418 }
419
420 const strtokenizer *t_;
421 view_type s_;
422 size_type pos_;
423 };
424
425 using const_value_type = value_type;
426 using const_pointer = pointer;
427 using const_reference = reference;
428 using const_iterator = iterator;
429
430 constexpr iterator begin() const
431 {
432 return { this };
433 }
434
435 constexpr sentinel end() const
436 {
437 return {};
438 }
439
440 constexpr const_iterator cbegin() const
441 {
442 return { this };
443 }
444
445 constexpr sentinel cend() const
446 {
447 return {};
448 }
449
450public:
451 String string_;
452 Delims delims_;
453 bool ignore_empty_;
454};
455
462template <typename String, typename Delims>
463strtokenizer(String && string, Delims &&delims, bool ignore_empty) -> strtokenizer<String, Delims>;
464
471std::vector<std::string> FZ_PUBLIC_SYMBOL strtok(std::string_view const& tokens, std::string_view const& delims, bool const ignore_empty = true);
472std::vector<std::wstring> FZ_PUBLIC_SYMBOL strtok(std::wstring_view const& tokens, std::wstring_view const& delims, bool const ignore_empty = true);
473inline auto FZ_PUBLIC_SYMBOL strtok(std::string_view const& tokens, char const delim, bool const ignore_empty = true) {
474 return strtok(tokens, std::string_view(&delim, 1), ignore_empty);
475}
476inline auto FZ_PUBLIC_SYMBOL strtok(std::wstring_view const& tokens, wchar_t const delim, bool const ignore_empty = true) {
477 return strtok(tokens, std::wstring_view(&delim, 1), ignore_empty);
478}
479
488std::vector<std::string_view> FZ_PUBLIC_SYMBOL strtok_view(std::string_view const& tokens, std::string_view const& delims, bool const ignore_empty = true);
489std::vector<std::wstring_view> FZ_PUBLIC_SYMBOL strtok_view(std::wstring_view const& tokens, std::wstring_view const& delims, bool const ignore_empty = true);
490inline auto FZ_PUBLIC_SYMBOL strtok_view(std::string_view const& tokens, char const delim, bool const ignore_empty = true) {
491 return strtok_view(tokens, std::string_view(&delim, 1), ignore_empty);
492}
493inline auto FZ_PUBLIC_SYMBOL strtok_view(std::wstring_view const& tokens, wchar_t const delim, bool const ignore_empty = true) {
494 return strtok_view(tokens, std::wstring_view(&delim, 1), ignore_empty);
495}
496
498template<typename T, typename String>
499T to_integral_impl(String const& s, T const errorval = T())
500{
501 if constexpr (std::is_same_v<T, bool>) {
502 return static_cast<T>(to_integral_impl<unsigned int>(s, static_cast<unsigned int>(errorval))) != 0;
503 }
504 else if constexpr (std::is_enum_v<T>) {
505 return static_cast<T>(to_integral_impl<std::underlying_type_t<T>>(s, static_cast<std::underlying_type_t<T>>(errorval)));
506 }
507 else {
508 T ret{};
509 auto it = s.cbegin();
510 if (it != s.cend() && (*it == '-' || *it == '+')) {
511 ++it;
512 }
513
514 if (it == s.cend()) {
515 return errorval;
516 }
517
518 for (; it != s.cend(); ++it) {
519 auto const& c = *it;
520 if (c < '0' || c > '9') {
521 return errorval;
522 }
523 ret *= 10;
524 ret += c - '0';
525 }
526
527 if (!s.empty() && s.front() == '-') {
528 ret *= static_cast<T>(-1);
529 }
530 return ret;
531 }
532}
533
535template<typename T>
536T to_integral(std::string_view const& s, T const errorval = T()) {
537 return to_integral_impl<T>(s, errorval);
538}
539
540template<typename T>
541T to_integral(std::wstring_view const& s, T const errorval = T()) {
542 return to_integral_impl<T>(s, errorval);
543}
544
545template<typename T, typename StringType>
546T to_integral(std::basic_string_view<StringType> const& s, T const errorval = T()) {
547 return to_integral_impl<T>(s, errorval);
548}
549
550
552template<typename String>
553bool str_is_ascii(String const& s) {
554 for (auto const& c : s) {
555 if (static_cast<std::make_unsigned_t<typename String::value_type>>(c) > 127) {
556 return false;
557 }
558 }
559
560 return true;
561}
562
564template<typename String, typename Chars>
565void trim_impl(String & s, Chars const& chars, bool fromLeft, bool fromRight) {
566 size_t const first = fromLeft ? s.find_first_not_of(chars) : 0;
567 if (first == String::npos) {
568 s = String();
569 return;
570 }
571
572 size_t const last = fromRight ? s.find_last_not_of(chars) : s.size();
573 if (last == String::npos) {
574 s = String();
575 return;
576 }
577
578 // Invariant: If first exists, then last >= first
579 s = s.substr(first, last - first + 1);
580}
581
583inline std::string FZ_PUBLIC_SYMBOL trimmed(std::string_view s, std::string_view const& chars = " \r\n\t", bool fromLeft = true, bool fromRight = true)
584{
585 trim_impl(s, chars, fromLeft, fromRight);
586 return std::string(s);
587}
588
589inline std::wstring FZ_PUBLIC_SYMBOL trimmed(std::wstring_view s, std::wstring_view const& chars = L" \r\n\t", bool fromLeft = true, bool fromRight = true)
590{
591 trim_impl(s, chars, fromLeft, fromRight);
592 return std::wstring(s);
593}
594
595inline std::string FZ_PUBLIC_SYMBOL ltrimmed(std::string_view s, std::string_view const& chars = " \r\n\t")
596{
597 trim_impl(s, chars, true, false);
598 return std::string(s);
599}
600
601inline std::wstring FZ_PUBLIC_SYMBOL ltrimmed(std::wstring_view s, std::wstring_view const& chars = L" \r\n\t")
602{
603 trim_impl(s, chars, true, false);
604 return std::wstring(s);
605}
606
607inline std::string FZ_PUBLIC_SYMBOL rtrimmed(std::string_view s, std::string_view const& chars = " \r\n\t")
608{
609 trim_impl(s, chars, false, true);
610 return std::string(s);
611}
612
613inline std::wstring FZ_PUBLIC_SYMBOL rtrimmed(std::wstring_view s, std::wstring_view const& chars = L" \r\n\t")
614{
615 trim_impl(s, chars, false, true);
616 return std::wstring(s);
617}
618
619
621template<typename String, typename std::enable_if_t<std::is_same_v<typename String::value_type, char>, int> = 0>
622inline void trim(String & s, std::string_view const& chars = " \r\n\t", bool fromLeft = true, bool fromRight = true)
623{
624 trim_impl(s, chars, fromLeft, fromRight);
625}
626
627template<typename String, typename std::enable_if_t<std::is_same_v<typename String::value_type, wchar_t>, int> = 0>
628inline void trim(String & s, std::wstring_view const& chars = L" \r\n\t", bool fromLeft = true, bool fromRight = true)
629{
630 trim_impl(s, chars, fromLeft, fromRight);
631}
632
633template<typename String, typename std::enable_if_t<std::is_same_v<typename String::value_type, char>, int> = 0>
634inline void ltrim(String& s, std::string_view const& chars = " \r\n\t")
635{
636 trim_impl(s, chars, true, false);
637}
638
639template<typename String, typename std::enable_if_t<std::is_same_v<typename String::value_type, wchar_t>, int> = 0>
640inline void ltrim(String& s, std::wstring_view const& chars = L" \r\n\t")
641{
642 trim_impl(s, chars, true, false);
643}
644
645template<typename String, typename std::enable_if_t<std::is_same_v<typename String::value_type, char>, int> = 0>
646inline void rtrim(String& s, std::string_view const& chars = " \r\n\t")
647{
648 trim_impl(s, chars, false, true);
649}
650
651template<typename String, typename std::enable_if_t<std::is_same_v<typename String::value_type, wchar_t>, int> = 0>
652inline void rtrim(String & s, std::wstring_view const& chars = L" \r\n\t")
653{
654 trim_impl(s, chars, false, true);
655}
656
661template<bool insensitive_ascii = false, typename String>
662bool starts_with(String const& s, String const& beginning)
663{
664 if (beginning.size() > s.size()) {
665 return false;
666 }
667 if constexpr (insensitive_ascii) {
668 return std::equal(beginning.begin(), beginning.end(), s.begin(), [](typename String::value_type const& a, typename String::value_type const& b) {
669 return tolower_ascii(a) == tolower_ascii(b);
670 });
671 }
672 else {
673 return std::equal(beginning.begin(), beginning.end(), s.begin());
674 }
675}
676
681template<bool insensitive_ascii = false, typename String>
682bool ends_with(String const& s, String const& ending)
683{
684 if (ending.size() > s.size()) {
685 return false;
686 }
687
688 if constexpr (insensitive_ascii) {
689 return std::equal(ending.rbegin(), ending.rend(), s.rbegin(), [](typename String::value_type const& a, typename String::value_type const& b) {
690 return tolower_ascii(a) == tolower_ascii(b);
691 });
692 }
693 else {
694 return std::equal(ending.rbegin(), ending.rend(), s.rbegin());
695 }
696}
697
703std::string FZ_PUBLIC_SYMBOL normalize_hyphens(std::string_view const& in);
704std::wstring FZ_PUBLIC_SYMBOL normalize_hyphens(std::wstring_view const& in);
705
707bool FZ_PUBLIC_SYMBOL is_valid_utf8(std::string_view s);
708
729bool FZ_PUBLIC_SYMBOL is_valid_utf8(std::string_view s, size_t & state);
730
736void FZ_PUBLIC_SYMBOL unicode_codepoint_to_utf8_append(std::string& result, uint32_t codepoint);
737
758bool FZ_PUBLIC_SYMBOL utf16be_to_utf8_append(std::string & result, std::string_view data, uint32_t & state);
759
761bool FZ_PUBLIC_SYMBOL utf16le_to_utf8_append(std::string & result, std::string_view data, uint32_t & state);
762
763}
764
765#endif
The buffer class is a simple buffer where data can be appended at the end and consumed at the front....
Definition: buffer.hpp:27
Small class to return filesystem errors.
Definition: fsresult.hpp:22
Container-like class that can be used to iterate over tokens in a string.
Definition: string.hpp:338
constexpr strtokenizer(String &&string, Delims &&delims, bool ignore_empty)
strtokenizer class constructor.
Definition: string.hpp:348
Sets some global macros and further includes string.hpp.
The namespace used by libfilezilla.
Definition: apply.hpp:17
size_t strlen(Char const *str)
Returns length of 0-terminated character sequence. Works with both narrow and wide-characters.
Definition: string.hpp:216
Char toupper_ascii(Char c)
Converts ASCII lowercase characters to uppercase as if C-locale is used.
Definition: string.hpp:98
bool utf16le_to_utf8_append(std::string &result, std::string_view data, uint32_t &state)
Just as utf16be_to_utf8_append but for little-endian UTF-16.
std::vector< std::string_view > strtok_view(std::string_view const &tokens, std::string_view const &delims, bool const ignore_empty=true)
Tokenizes string.
Char tolower_ascii(Char c)
Converts ASCII uppercase characters to lowercase as if C-locale is used.
Definition: string.hpp:86
bool str_is_ascii(String const &s)
Returns true iff the string only has characters in the 7-bit ASCII range.
Definition: string.hpp:553
auto toString(Arg &&arg) -> typename std::enable_if< std::is_same_v< String, std::string >, decltype(to_string(std::forward< Arg >(arg)))>::type
Calls either fz::to_string or fz::to_wstring depending on the passed template argument.
Definition: string.hpp:239
void trim(String &s, std::string_view const &chars=" \r\n\t", bool fromLeft=true, bool fromRight=true)
Remove all leading and trailing whitespace from string.
Definition: string.hpp:622
bool is_valid_utf8(std::string_view s)
Verifies that the input data is valid UTF-8.
std::string trimmed(std::string_view s, std::string_view const &chars=" \r\n\t", bool fromLeft=true, bool fromRight=true)
Return passed string with all leading and trailing whitespace removed.
Definition: string.hpp:583
std::wstring to_wstring_from_utf8(std::string_view const &in)
Converts from std::string in UTF-8 into std::wstring.
std::string normalize_hyphens(std::string_view const &in)
std::wstring native_string
A string in the system's native character type and encoding. Note: This typedef changes depending on...
Definition: string.hpp:33
bool utf16be_to_utf8_append(std::string &result, std::string_view data, uint32_t &state)
Converts from UTF-16-BE and appends it to the passed string.
std::string to_utf8(std::string_view const &in)
Converts from std::string in native encoding into std::string in UTF-8.
bool ends_with(String const &s, String const &ending)
Tests whether the first string ends with the second string.
Definition: string.hpp:682
bool equal_insensitive_ascii(std::string_view a, std::string_view b)
Locale-insensitive stricmp.
Definition: string.hpp:138
std::vector< std::string > strtok(std::string_view const &tokens, std::string_view const &delims, bool const ignore_empty=true)
Tokenizes string.
std::string to_string(std::wstring_view const &in)
Converts from std::wstring into std::string in system encoding.
std::wstring to_wstring(std::string_view const &in)
Converts from std::string in system encoding into std::wstring.
std::string replaced_substrings(std::string_view const &in, std::string_view const &find, std::string_view const &replacement)
Returns in with all occurrences of find in the input string replaced with replacement.
bool starts_with(String const &s, String const &beginning)
Tests whether the first string starts with the second string.
Definition: string.hpp:662
bool replace_substrings(std::string &in, std::string_view const &find, std::string_view const &replacement)
Modifies in, replacing all occurrences of find with replacement.
int stricmp(std::string_view const &a, std::string_view const &b)
Locale-sensitive stricmp.
std::string str_tolower_ascii(std::string_view const &s)
tr_tolower_ascii does for strings what tolower_ascii does for individual characters
void unicode_codepoint_to_utf8_append(std::string &result, uint32_t codepoint)
Encodes a valid Unicode codepoint as UTF-8 and appends it to the passed string.
native_string to_native(std::string_view const &in)
Converts std::string to native_string.
T to_integral(std::string_view const &s, T const errorval=T())
Converts string to integral type T. If string is not convertible, errorval is returned.
Definition: string.hpp:536
Char const * choose_string(char const *c, wchar_t const *w)
Returns the function argument of the type matching the template argument.
Comparator to be used for std::map for case-insensitive keys.
Definition: string.hpp:123
Definition: string.hpp:363
Definition: string.hpp:360