tokenizer: clean unicode-aware

This commit is contained in:
djcb
2017-10-28 14:13:09 +03:00
parent 0e5e8b6bce
commit 55ffb524db
3 changed files with 40 additions and 18 deletions

View File

@ -34,6 +34,17 @@ namespace Mux {
*/
std::string utf8_flatten (const std::string& str);
/**
* Replace all control characters with spaces, and remove leading and trailing space.
*
* @param dirty an unclean string
*
* @return a cleaned-up string.
*/
std::string utf8_clean (const std::string& dirty);
/**
* Split a string in parts
*