omw  0.2.1-beta
string.h
1 /*
2 author Oliver Blaser
3 date 19.11.2023
4 copyright MIT - Copyright (c) 2023 Oliver Blaser
5 */
6 
7 #ifndef IG_OMW_STRING_H
8 #define IG_OMW_STRING_H
9 
10 #include <cstddef>
11 #include <cstdint>
12 #include <string>
13 #include <utility>
14 #include <vector>
15 
16 #include "../omw/defs.h"
17 #include "../omw/int.h"
18 #include "../omw/vector.h"
19 
20 #if OMW_CPPSTD >= OMW_CPPSTD_20
21 #define OMW_STDSTRING_CONSTEXPR constexpr
22 #else
23 #define OMW_STDSTRING_CONSTEXPR
24 #endif
25 
30 #define OMW_UTF8CP_00C4 "\xC3\x84"
33 #define OMW_UTF8CP_00D6 "\xC3\x96"
34 #define OMW_UTF8CP_00DC "\xC3\x9C"
35 #define OMW_UTF8CP_00E4 "\xC3\xA4"
36 #define OMW_UTF8CP_00F6 "\xC3\xB6"
37 #define OMW_UTF8CP_00FC "\xC3\xBC"
38 #define OMW_UTF8CP_2580 "\xE2\x96\x80"
39 #define OMW_UTF8CP_2584 "\xE2\x96\x84"
40 #define OMW_UTF8CP_2588 "\xE2\x96\x88"
41 
45 #define OMW_UTF8CP_Auml OMW_UTF8CP_00C4
46 #define OMW_UTF8CP_Ouml OMW_UTF8CP_00D6
47 #define OMW_UTF8CP_Uuml OMW_UTF8CP_00DC
48 #define OMW_UTF8CP_auml OMW_UTF8CP_00E4
49 #define OMW_UTF8CP_ouml OMW_UTF8CP_00F6
50 #define OMW_UTF8CP_uuml OMW_UTF8CP_00FC
51 
55 namespace omw
56 {
61  const char* const UTF8CP_00C4 = OMW_UTF8CP_00C4;
64  const char* const UTF8CP_00D6 = OMW_UTF8CP_00D6;
65  const char* const UTF8CP_00DC = OMW_UTF8CP_00DC;
66  const char* const UTF8CP_00E4 = OMW_UTF8CP_00E4;
67  const char* const UTF8CP_00F6 = OMW_UTF8CP_00F6;
68  const char* const UTF8CP_00FC = OMW_UTF8CP_00FC;
69  const char* const UTF8CP_2580 = OMW_UTF8CP_2580;
70  const char* const UTF8CP_2584 = OMW_UTF8CP_2584;
71  const char* const UTF8CP_2588 = OMW_UTF8CP_2588;
73 
76  const char* const UTF8CP_Auml = omw::UTF8CP_00C4;
77  const char* const UTF8CP_Ouml = omw::UTF8CP_00D6;
78  const char* const UTF8CP_Uuml = omw::UTF8CP_00DC;
79  const char* const UTF8CP_auml = omw::UTF8CP_00E4;
80  const char* const UTF8CP_ouml = omw::UTF8CP_00F6;
81  const char* const UTF8CP_uuml = omw::UTF8CP_00FC;
83 
84  constexpr char pairtos_defaultDelimiter = ';';
85  const char* const hexStrDigitsUpper = "0123456789ABCDEF";
86  const char* const hexStrDigitsLower = "0123456789abcdef";
87  const char* const hexStrDigits = hexStrDigitsUpper;
88  constexpr char toHexStr_defaultDelimiter = 0x20;
89 
90 
91 
92  using stdStringVector_t = std::vector<std::string>;
93  constexpr omw::stdStringVector_t::size_type stdStringVector_npos = static_cast<omw::stdStringVector_t::size_type>(-1);
94 
95  class string;
96  using stringVector_t = std::vector<omw::string>;
97  constexpr omw::stringVector_t::size_type stringVector_npos = static_cast<omw::stringVector_t::size_type>(-1);
98 
99 
100 
102  {
103  public:
104  StringReplacePair() : m_s(), m_r() {}
105  StringReplacePair(const std::string& searchElement, const std::string& replaceElement) : m_s(searchElement), m_r(replaceElement) {}
106  StringReplacePair(const char searchElement, const std::string& replaceElement) : m_s(1, searchElement), m_r(replaceElement) {}
107  StringReplacePair(const std::string& searchElement, const char replaceElement) : m_s(searchElement), m_r(1, replaceElement) {}
108  StringReplacePair(const char searchElement, const char replaceElement) : m_s(1, searchElement), m_r(1, replaceElement) {}
109  virtual ~StringReplacePair() {}
110 
111  const std::string& search() const { return m_s; }
112  const std::string& replace() const { return m_r; }
113 
114  private:
115  std::string m_s;
116  std::string m_r;
117  };
118 
119 
120 
123  inline OMW_STDSTRING_CONSTEXPR bool contains(const std::string& str, char ch) { return (str.find(ch) != std::string::npos); }
124  inline OMW_STDSTRING_CONSTEXPR bool contains(const std::string& str, const char* s) { return (str.find(s) != std::string::npos); }
125 #if (OMW_CPPSTD < OMW_CPPSTD_17)
126  inline OMW_STDSTRING_CONSTEXPR bool contains(const std::string& str, const std::string& s) { return (str.find(s) != std::string::npos); }
127 #else
128  inline OMW_STDSTRING_CONSTEXPR bool contains(const std::string& str, const std::string_view& sv) { return (str.find(sv) != std::string::npos); }
129 #endif
130 
131  omw::stringVector_t split(const std::string& str, char delimiter, omw::stringVector_t::size_type maxTokenCount = omw::stringVector_npos);
132  inline omw::stringVector_t split(const char* str, char delimiter, omw::stringVector_t::size_type maxTokenCount = omw::stringVector_npos) { return (str ? omw::split(std::string(str), delimiter, maxTokenCount) : omw::stringVector_t()); }
133  omw::stringVector_t splitLen(const std::string& str, std::string::size_type tokenLength, omw::stringVector_t::size_type maxTokenCount = omw::stringVector_npos);
134  inline omw::stringVector_t splitLen(const char* str, std::string::size_type tokenLength, omw::stringVector_t::size_type maxTokenCount = omw::stringVector_npos) { return (str ? omw::splitLen(std::string(str), tokenLength, maxTokenCount) : omw::stringVector_t()); }
136 
137 
138 
139  class string : public std::string
140  {
141  public:
142  string();
143  string(omw::string::size_type count, char c);
144  string(const char* str);
145  string(const char* str, omw::string::size_type count);
146  string(const std::string& other);
147  string(const std::string& other, std::string::size_type pos, std::string::size_type count = std::string::npos);
148  string(const char* first, const char* last);
149  ~string() {}
150 
151  std::string& std();
152  const std::string& std() const;
153 
154 #ifndef OMWi_DOXYGEN_PREDEFINE
155 #if (OMW_CPPSTD < OMW_CPPSTD_23)
156  bool contains(char ch) const { return (this->find(ch) != omw::string::npos); }
157  bool contains(const char* str) const { return (this->find(str) != omw::string::npos); }
158 #if (OMW_CPPSTD < OMW_CPPSTD_17)
159  bool contains(const std::string& str) const { return (this->find(str) != omw::string::npos); }
160 #else // < C++17
161  bool contains(std::string_view sv) const { return (this->find(sv) != omw::string::npos); }
162 #endif // < C++17
163 #endif // < C++23
164 #else // OMWi_DOXYGEN_PREDEFINE
165  bool contains(char ch) const;
166  bool contains(const char* str) const;
167  bool contains(const std::string& str) const;
168  bool contains(std::string_view sv) const;
169 #endif // OMWi_DOXYGEN_PREDEFINE
170 
171  omw::string& replaceFirst(const std::string& search, const std::string& replace, size_type startPos = 0);
172  omw::string& replaceFirst(const omw::StringReplacePair& pair, size_type startPos = 0);
173 
174  omw::string& replaceAll(char search, char replace, size_type startPos = 0, size_t* nReplacements = nullptr);
175  omw::string& replaceAll(char search, const std::string& replace, size_type startPos = 0, size_t* nReplacements = nullptr);
176  omw::string& replaceAll(const std::string& search, char replace, size_type startPos = 0, size_t* nReplacements = nullptr);
177  omw::string& replaceAll(const std::string& search, const std::string& replace, size_type startPos = 0, size_t* nReplacements = nullptr);
178  omw::string& replaceAll(const omw::StringReplacePair& pair, size_type startPos = 0, size_t* nReplacements = nullptr);
179  omw::string& replaceAll(const std::vector<omw::StringReplacePair>& pairs, size_type startPos = 0, size_t* nReplacementsTotal = nullptr, std::vector<size_t>* nReplacements = nullptr);
180  omw::string& replaceAll(const omw::StringReplacePair* pairs, size_t count, size_type startPos = 0, size_t* nReplacementsTotal = nullptr, std::vector<size_t>* nReplacements = nullptr);
181 
182  omw::string& reverse();
183  omw::string reversed() const;
184 
185  omw::stringVector_t split(char delimiter, omw::stringVector_t::size_type maxTokenCount = omw::stringVector_npos) const;
186  //omw::stringVector_t split(const char* delimiter, omw::stringVector_t::size_type maxTokenCount = omw::stringVector_npos) const;
187  //omw::stringVector_t split(const std::string& delimiter, omw::stringVector_t::size_type maxTokenCount = omw::stringVector_npos) const;
188  omw::stringVector_t splitLen(omw::string::size_type tokenLength, omw::stringVector_t::size_type maxTokenCount = omw::stringVector_npos) const;
189 
193  //omw::string& lower(); // reserved name
194  omw::string& lower_ascii();
195  omw::string& lower_asciiExt();
196  //omw::string& upper(); // reserved name
197  omw::string& upper_ascii();
198  omw::string& upper_asciiExt();
199  //omw::string toLower() const; // reserved name
200  omw::string toLower_ascii() const;
201  omw::string toLower_asciiExt() const;
202  //omw::string toUpper() const; // reserved name
203  omw::string toUpper_ascii() const;
204  omw::string toUpper_asciiExt() const;
206  };
207 
208 
209 
212  std::string& replaceFirst(std::string& str, const std::string& search, const std::string& replace, std::string::size_type startPos = 0);
213  inline std::string& replaceFirst(std::string& str, const omw::StringReplacePair& pair, std::string::size_type startPos = 0) { return omw::replaceFirst(str, pair.search(), pair.replace(), startPos); }
214 
215  std::string& replaceAll(std::string& str, char search, char replace, std::string::size_type startPos = 0, size_t* nReplacements = nullptr);
216  std::string& replaceAll(std::string& str, char search, const std::string& replace, std::string::size_type startPos = 0, size_t* nReplacements = nullptr);
217  std::string& replaceAll(std::string& str, const std::string& search, char replace, std::string::size_type startPos = 0, size_t* nReplacements = nullptr);
218  std::string& replaceAll(std::string& str, const std::string& search, const std::string& replace, std::string::size_type startPos = 0, size_t* nReplacements = nullptr);
219  std::string& replaceAll(std::string& str, const omw::StringReplacePair& pair, std::string::size_type startPos = 0, size_t* nReplacements = nullptr);
220  std::string& replaceAll(std::string& str, const std::vector<omw::StringReplacePair>& pairs, std::string::size_type startPos = 0, size_t* nReplacementsTotal = nullptr, std::vector<size_t>* nReplacements = nullptr);
221  std::string& replaceAll(std::string& str, const omw::StringReplacePair* pairs, size_t count, std::string::size_type startPos = 0, size_t* nReplacementsTotal = nullptr, std::vector<size_t>* nReplacements = nullptr);
223 
224 
225 
228  inline omw::string to_string(int32_t value) { return std::to_string(value); }
229  inline omw::string to_string(uint32_t value) { return std::to_string(value); }
230  inline omw::string to_string(int64_t value) { return std::to_string(value); }
231  inline omw::string to_string(uint64_t value) { return std::to_string(value); }
232  inline omw::string to_string(float value) { return std::to_string(value); }
233  inline omw::string to_string(double value) { return std::to_string(value); }
234  inline omw::string to_string(long double value) { return std::to_string(value); }
235 
236  omw::string to_string(bool value, bool asText = true);
237  omw::string to_string(const omw::int128_t& value);
238  omw::string to_string(const omw::uint128_t& value);
239  omw::string to_string(const std::pair<int32_t, int32_t>& value, char delimiter = pairtos_defaultDelimiter);
240  omw::string to_string(const std::pair<uint32_t, uint32_t>& value, char delimiter = pairtos_defaultDelimiter);
241  omw::string to_string(const std::pair<int64_t, int64_t>& value, char delimiter = pairtos_defaultDelimiter);
242  omw::string to_string(const std::pair<uint64_t, uint64_t>& value, char delimiter = pairtos_defaultDelimiter);
243  omw::string to_string(const std::pair<float, float>& value, char delimiter = pairtos_defaultDelimiter);
244  omw::string to_string(const std::pair<double, double>& value, char delimiter = pairtos_defaultDelimiter);
245  omw::string to_string(const std::pair<long double, long double>& value, char delimiter = pairtos_defaultDelimiter);
247 
248 
251  bool stob(const std::string& boolStr);
252 
253  size_t stoz(const std::string& str, size_t* pos = nullptr, int base = 10);
254 
255  std::pair<int32_t, int32_t> stoipair(const std::string& str, char delimiter = pairtos_defaultDelimiter);
256  //std::pair<uint32_t, uint32_t> stouipair(const std::string& str, char delimiter = pairtos_defaultDelimiter);
257  //std::pair<int64_t, int64_t> stoi64pair(const std::string& str, char delimiter = pairtos_defaultDelimiter);
258  //std::pair<uint64_t, uint64_t> stoui64pair(const std::string& str, char delimiter = pairtos_defaultDelimiter);
259  //std::pair<float, float> stofpair(const std::string& str, char delimiter = pairtos_defaultDelimiter);
260  //std::pair<double, double> stodpair(const std::string& str, char delimiter = pairtos_defaultDelimiter);
261  //std::pair<long double, long double> stoldpair(const std::string& str, char delimiter = pairtos_defaultDelimiter);
262 
263  //omw::int128_t stoi128(const std::string& str);
264  //omw::uint128_t stoui128(const std::string& str);
266 
267 
268 
271  omw::string toHexStr(int8_t value);
272  omw::string toHexStr(uint8_t value);
273  omw::string toHexStr(int16_t value);
274  omw::string toHexStr(uint16_t value);
275  omw::string toHexStr(int32_t value);
276  omw::string toHexStr(uint32_t value);
277  omw::string toHexStr(int64_t value);
278  omw::string toHexStr(uint64_t value);
279  omw::string toHexStr(const omw::Base_Int128& value);
280  omw::string toHexStr(int16_t value, char delimiter);
281  omw::string toHexStr(uint16_t value, char delimiter);
282  omw::string toHexStr(int32_t value, char delimiter);
283  omw::string toHexStr(uint32_t value, char delimiter);
284  omw::string toHexStr(int64_t value, char delimiter);
285  omw::string toHexStr(uint64_t value, char delimiter);
286  omw::string toHexStr(const omw::Base_Int128& value, char delimiter);
287  omw::string toHexStr(const std::vector<char>& data, char delimiter = toHexStr_defaultDelimiter);
288  omw::string toHexStr(const std::vector<uint8_t>& data, char delimiter = toHexStr_defaultDelimiter);
289  omw::string toHexStr(const char* data, size_t count, char delimiter = toHexStr_defaultDelimiter);
290  omw::string toHexStr(const uint8_t* data, size_t count, char delimiter = toHexStr_defaultDelimiter);
291 
292  int32_t hexstoi(const std::string& str);
293  int64_t hexstoi64(const std::string& str);
295  uint32_t hexstoui(const std::string& str);
296  uint64_t hexstoui64(const std::string& str);
298  std::vector<uint8_t> hexstovector(const std::string& str, char delimiter = toHexStr_defaultDelimiter);
299 
300  omw::string sepHexStr(const std::string& str);
301  omw::string sepHexStr(const std::string& str, char delimiter);
302  omw::string sepHexStr(const std::string& str, char rmChar, char delimiter);
303  omw::string sepHexStr(const std::string& str, const char* rmChars, size_t count, char delimiter = toHexStr_defaultDelimiter);
304  omw::string sepHexStr(const std::string& str, const std::vector<char>& rmChars, char delimiter = toHexStr_defaultDelimiter);
305  //omw::string sepHexStr(const std::string& str, const char* rmString, char delimiter = toHexStr_defaultDelimiter);
306  //omw::string sepHexStr(const std::string& str, const std::string& rmString, char delimiter = toHexStr_defaultDelimiter);
307  //omw::string sepHexStr(const std::string& str, const std::string* rmStrings, size_t count, char delimiter = toHexStr_defaultDelimiter);
308  //omw::string sepHexStr(const std::string& str, const omw::string* rmStrings, size_t count, char delimiter = toHexStr_defaultDelimiter);
309  //omw::string sepHexStr(const std::string& str, const omw::stdStringVector_t& rmStrings, char delimiter = toHexStr_defaultDelimiter);
310  //omw::string sepHexStr(const std::string& str, const omw::stringVector_t& rmStrings, char delimiter = toHexStr_defaultDelimiter);
311 
312  omw::string rmNonHex(const std::string& str);
313  void rmNonHex(char* str);
314  void rmNonHex(std::string& str);
316 
317 
318 
319  // TODO check overloads
320  //omw::string join(const std::string* strings, size_t count, char delimiter = '\0');
321  //omw::string join(const std::string* strings, size_t count, const char* delimiter);
322  //omw::string join(const std::string* strings, size_t count, const std::string& delimiter);
323  //omw::string join(const omw::stdStringVector_t& strings);
324  //omw::string join(const omw::stdStringVector_t& strings, char delimiter);
325  //omw::string join(const omw::stdStringVector_t& strings, const char* delimiter);
326  //omw::string join(const omw::stdStringVector_t& strings, const std::string& delimiter);
327  //omw::string join(const omw::stringVector_t& strings);
328  omw::string join(const omw::stringVector_t& strings);
329  omw::string join(const omw::stringVector_t& strings, char delimiter);
330  //omw::string join(const omw::stringVector_t& strings, const char* delimiter);
331  //omw::string join(const omw::stringVector_t& strings, const std::string& delimiter);
332 
333 
334 
337  omw::stringVector_t stringVector(const char* const* strings, size_t count);
338  omw::stringVector_t stringVector(const std::string* strings, size_t count);
339  omw::stringVector_t stringVector(const omw::string* strings, size_t count);
340  omw::stringVector_t stringVector(const omw::stdStringVector_t& strvec);
341 
342  omw::stdStringVector_t stdStringVector(const char* const* strings, size_t count);
343  omw::stdStringVector_t stdStringVector(const std::string* strings, size_t count);
344  omw::stdStringVector_t stdStringVector(const omw::string* strings, size_t count);
345  omw::stdStringVector_t stdStringVector(const omw::stringVector_t& strvec);
347 
348 
349 
350  //bool isValidUTF8(const std::string& str);
351 
352 
353 
356  constexpr bool isBlank(char ch) { return ((ch == 0x09) || (ch == 0x20)); }
357  constexpr bool isCntrl(char ch) { return (((ch >= 0x00) && (ch <= 0x1F)) || (ch == 0x7F)); }
358  constexpr bool isDigit(char ch) { return ((ch >= 0x30) && (ch <= 0x39)); }
359  constexpr bool isGraph(char ch) { return ((ch >= 0x21) && (ch <= 0x7E)); }
360  constexpr bool isHex(char ch)
361  {
362  return (((ch >= 0x30) && (ch <= 0x39)) || ((ch >= 0x41) && (ch <= 0x46)) ||
363  ((ch >= 0x61) && (ch <= 0x66)));
364  }
365  constexpr bool isLower(char ch) { return ((ch >= 0x61) && (ch <= 0x7A)); }
366  constexpr bool isNull(char ch) { return (ch == 0x00); }
367  constexpr bool isPrint(char ch) { return ((ch >= 0x20) && (ch <= 0x7E)); }
368  constexpr bool isPunct(char ch)
369  {
370  return (((ch >= 0x21) && (ch <= 0x2F)) || ((ch >= 0x3A) && (ch <= 0x40)) ||
371  ((ch >= 0x5B) && (ch <= 0x60)) || ((ch >= 0x7B) && (ch <= 0x7E)));
372  }
373  constexpr bool isSpace(char ch) { return (((ch >= 0x09) && (ch <= 0x0D)) || (ch == 0x20)); }
374  constexpr bool isUpper(char ch) { return ((ch >= 0x41) && (ch <= 0x5A)); }
375  constexpr bool isAlpha(char ch) { return (isLower(ch) || isUpper(ch)); }
376  constexpr bool isAlnum(char ch) { return (isAlpha(ch) || isDigit(ch)); }
377 
378  // same for UTF-8:
379  //bool is...(const char* utf8); // utf8 = UTF-8 byte string (e.g. "\xC3\xA4")
381 
384  bool isInteger(const std::string& str);
385  bool isUInteger(const std::string& str);
386  bool isHex(const std::string& str, std::string::size_type pos = 0, std::string::size_type count = std::string::npos);
388 
389  size_t peekNewLine(const char* p);
390  size_t peekNewLine(const char* p, const char* end);
391 
392  omw::string readString(const uint8_t* data, size_t count);
393  omw::string readString(const std::vector<uint8_t>& data, std::vector<uint8_t>::size_type pos, std::vector<uint8_t>::size_type count);
394  void writeString(uint8_t* buffer, const uint8_t* end, const std::string& str);
395  void writeString(std::vector<uint8_t>& buffer, std::vector<uint8_t>::size_type pos, const std::string& str);
396 
397 
398 
400 }
401 
402 #endif // IG_OMW_STRING_H
omw::StringReplacePair
Definition: string.h:102
std::string
C++ standard string. See std::basic_string.
Definition: linkToStd.dox:19
omw::hexstoi
int32_t hexstoi(const std::string &str)
Definition: string.cpp:933
omw::string::std
std::string & std()
Definition: string.cpp:261
omw::string::contains
bool contains(std::string_view sv) const
omw::replaceFirst
std::string & replaceFirst(std::string &str, const std::string &search, const std::string &replace, std::string::size_type startPos=0)
Definition: string.cpp:527
omw::readString
omw::string readString(const uint8_t *data, size_t count)
Definition: string.cpp:1398
omw::string::contains
bool contains(const std::string &str) const
omw::SignedInt128
Definition: int.h:78
omw::string::contains
bool contains(const char *str) const
omw::peekNewLine
size_t peekNewLine(const char *p)
Definition: string.cpp:1339
omw::UnsignedInt128
Definition: int.h:105
omw::sepHexStr
omw::string sepHexStr(const std::string &str)
Definition: string.cpp:1037
omw::writeString
void writeString(uint8_t *buffer, const uint8_t *end, const std::string &str)
Definition: string.cpp:1430
omw::hexstoi128
omw::int128_t hexstoi128(const std::string &str)
Definition: string.cpp:957
omw::hexstoui128
omw::uint128_t hexstoui128(const std::string &str)
Definition: string.cpp:993
omw::stoipair
std::pair< int32_t, int32_t > stoipair(const std::string &str, char delimiter=pairtos_defaultDelimiter)
Definition: string.cpp:785
omw::replaceAll
std::string & replaceAll(std::string &str, char search, char replace, std::string::size_type startPos=0, size_t *nReplacements=nullptr)
Definition: string.cpp:540
omw::string::replaceAll
omw::string & replaceAll(char search, char replace, size_type startPos=0, size_t *nReplacements=nullptr)
Definition: string.cpp:324
omw::hexstoi64
int64_t hexstoi64(const std::string &str)
Definition: string.cpp:945
omw::Base_Int128
Definition: int.h:24
omw::stob
bool stob(const std::string &boolStr)
Definition: string.cpp:729
omw::isUInteger
bool isUInteger(const std::string &str)
Definition: string.cpp:1284
omw::string
Definition: string.h:140
omw::string::reversed
omw::string reversed() const
Definition: string.cpp:429
omw
Main namespace.
omw::string::reverse
omw::string & reverse()
Definition: string.cpp:397
omw::hexstoui
uint32_t hexstoui(const std::string &str)
Definition: string.cpp:969
omw::isInteger
bool isInteger(const std::string &str)
Definition: string.cpp:1267
omw::hexstoui64
uint64_t hexstoui64(const std::string &str)
Definition: string.cpp:981
omw::string::contains
bool contains(char ch) const
omw::string::replaceFirst
omw::string & replaceFirst(const std::string &search, const std::string &replace, size_type startPos=0)
Definition: string.cpp:305