From 725826231fda0f736157f087355501fbf61fcc4e Mon Sep 17 00:00:00 2001 From: "Dirk-Jan C. Binnema" Date: Tue, 16 Mar 2021 16:51:01 +0200 Subject: [PATCH] utils: Add remove_ctrl Add a helper function to remove control characters / multi-spaces, and a test. --- lib/utils/mu-utils.cc | 16 ++++++++++++++++ lib/utils/test-utils.cc | 16 ++++++++++++++++ 2 files changed, 32 insertions(+) diff --git a/lib/utils/mu-utils.cc b/lib/utils/mu-utils.cc index 59ece80b..d80dfb2f 100644 --- a/lib/utils/mu-utils.cc +++ b/lib/utils/mu-utils.cc @@ -147,6 +147,22 @@ Mu::utf8_clean (const std::string& dirty) clean.erase (clean.find_last_not_of(" ") + 1); // remove trailing space return clean; +std::string +Mu::remove_ctrl (const std::string& str) +{ + char prev{'\0'}; + std::string result; + result.reserve(str.length()); + + for (auto&& c: str) { + if (::iscntrl(c) || c == ' ') { + if (prev != ' ') + result += prev = ' '; + } else + result += prev = c; + } + + return result; } std::vector diff --git a/lib/utils/test-utils.cc b/lib/utils/test-utils.cc index b1f666e5..020135f6 100644 --- a/lib/utils/test-utils.cc +++ b/lib/utils/test-utils.cc @@ -136,6 +136,22 @@ test_flatten () test_cases (cases, [](auto s, auto f){ return utf8_flatten(s); }); } +static void +test_remove_ctrl () +{ + CaseVec cases = { + { "Foo\n\nbar", true, "Foo bar" }, + { "", false, "" }, + { " ", false, " " }, + { "Hello World ", false, "Hello World " }, + { "Ångström", false, "Ångström" }, + }; + + test_cases (cases, [](auto s, auto f){ return remove_ctrl(s); }); +} + + + static void test_clean () {