From d307d01b18b84a117ecd2c427cec85595cf72ea3 Mon Sep 17 00:00:00 2001
From: sfan5 <sfan5@live.de>
Date: Sun, 24 Mar 2024 10:33:41 +0100
Subject: [PATCH] Fix tests that rely on UTF-8 literals

and the lowercase function
---
 src/filesys.cpp                 |  4 ++--
 src/unittest/test_settings.cpp  |  6 +++---
 src/unittest/test_utilities.cpp | 27 ++++++++++++++++-----------
 src/util/string.h               | 23 +++++++++++++++++++----
 4 files changed, 40 insertions(+), 20 deletions(-)

diff --git a/src/filesys.cpp b/src/filesys.cpp
index 88d617801..9f493d7cf 100644
--- a/src/filesys.cpp
+++ b/src/filesys.cpp
@@ -701,8 +701,8 @@ bool PathStartsWith(const std::string &path, const std::string &prefix)
 				char pathchar = path[pathpos+len];
 				char prefixchar = prefix[prefixpos+len];
 				if(FILESYS_CASE_INSENSITIVE){
-					pathchar = tolower(pathchar);
-					prefixchar = tolower(prefixchar);
+					pathchar = my_tolower(pathchar);
+					prefixchar = my_tolower(prefixchar);
 				}
 				if(pathchar != prefixchar)
 					return false;
diff --git a/src/unittest/test_settings.cpp b/src/unittest/test_settings.cpp
index 6f382fabc..f5145ee16 100644
--- a/src/unittest/test_settings.cpp
+++ b/src/unittest/test_settings.cpp
@@ -51,7 +51,7 @@ void TestSettings::runTests(IGameDef *gamedef)
 ////////////////////////////////////////////////////////////////////////////////
 
 const char *TestSettings::config_text_before =
-	"leet = 1337\n"
+	u8"leet = 1337\n"
 	"leetleet = 13371337\n"
 	"leetleet_neg = -13371337\n"
 	"floaty_thing = 1.1\n"
@@ -77,7 +77,7 @@ const char *TestSettings::config_text_before =
 	"[dummy_eof_end_tag]\n";
 
 const std::string TestSettings::config_text_after =
-	"leet = 1337\n"
+	u8"leet = 1337\n"
 	"leetleet = 13371337\n"
 	"leetleet_neg = -13371337\n"
 	"floaty_thing = 1.1\n"
@@ -154,7 +154,7 @@ void TestSettings::testAllSettings()
 
 	// Not sure if 1.1 is an exact value as a float, but doesn't matter
 	UASSERT(fabs(s.getFloat("floaty_thing") - 1.1) < 0.001);
-	UASSERT(s.get("stringy_thing") == "asd /( ¤%&(/\" BLÖÄRP");
+	UASSERT(s.get("stringy_thing") == u8"asd /( ¤%&(/\" BLÖÄRP");
 	UASSERT(fabs(s.getV3F("coord").X - 1.0) < 0.001);
 	UASSERT(fabs(s.getV3F("coord").Y - 2.0) < 0.001);
 	UASSERT(fabs(s.getV3F("coord").Z - 4.5) < 0.001);
diff --git a/src/unittest/test_utilities.cpp b/src/unittest/test_utilities.cpp
index 752950abe..5c653a529 100644
--- a/src/unittest/test_utilities.cpp
+++ b/src/unittest/test_utilities.cpp
@@ -183,9 +183,11 @@ void TestUtilities::testWrapDegrees_0_360_v3f()
 
 void TestUtilities::testLowercase()
 {
-	UASSERT(lowercase("Foo bAR") == "foo bar");
-	UASSERT(lowercase("eeeeeeaaaaaaaaaaaààààà") == "eeeeeeaaaaaaaaaaaààààà");
-	UASSERT(lowercase("MINETEST-powa") == "minetest-powa");
+	UASSERTEQ(auto, lowercase("Foo bAR"), "foo bar");
+	UASSERTEQ(auto, lowercase(u8"eeeeeeaaaaaaaaaaaààààà"), u8"eeeeeeaaaaaaaaaaaààààà");
+	// intentionally won't handle Unicode, regardless of locale
+	UASSERTEQ(auto, lowercase(u8"ÜÜ"), u8"ÜÜ");
+	UASSERTEQ(auto, lowercase("MINETEST-powa"), "minetest-powa");
 }
 
 
@@ -309,18 +311,21 @@ void TestUtilities::testAsciiPrintableHelper()
 
 void TestUtilities::testUTF8()
 {
-	UASSERT(utf8_to_wide("¤") == L"¤");
+	UASSERT(utf8_to_wide(u8"¤") == L"¤");
 
-	UASSERT(wide_to_utf8(L"¤") == "¤");
+	UASSERTEQ(std::string, wide_to_utf8(L"¤"), u8"¤");
 
 	UASSERTEQ(std::string, wide_to_utf8(utf8_to_wide("")), "");
 	UASSERTEQ(std::string, wide_to_utf8(utf8_to_wide("the shovel dug a crumbly node!")),
 		"the shovel dug a crumbly node!");
-	UASSERTEQ(std::string, wide_to_utf8(utf8_to_wide("-ä-")),
-		"-ä-");
-	UASSERTEQ(std::string, wide_to_utf8(utf8_to_wide("-\xF0\xA0\x80\x8B-")),
-		"-\xF0\xA0\x80\x8B-");
-
+	UASSERTEQ(std::string, wide_to_utf8(utf8_to_wide(u8"-ä-")),
+		u8"-ä-");
+	UASSERTEQ(std::string, wide_to_utf8(utf8_to_wide(u8"-\U0002000b-")),
+		u8"-\U0002000b-");
+	if constexpr (sizeof(wchar_t) == 4) {
+		const auto *literal = U"-\U0002000b-";
+		UASSERT(utf8_to_wide(u8"-\U0002000b-") == reinterpret_cast<const wchar_t*>(literal));
+	}
 }
 
 void TestUtilities::testRemoveEscapes()
@@ -643,7 +648,7 @@ void TestUtilities::testSanitizeDirName()
 	UASSERTEQ(auto, sanitizeDirName(" a ", "~"), "_a_");
 	UASSERTEQ(auto, sanitizeDirName("COM1", "~"), "~COM1");
 	UASSERTEQ(auto, sanitizeDirName("COM1", ":"), "_COM1");
-	UASSERTEQ(auto, sanitizeDirName("cOm\u00B2", "~"), "~cOm\u00B2");
+	UASSERTEQ(auto, sanitizeDirName(u8"cOm\u00B2", "~"), u8"~cOm\u00B2");
 	UASSERTEQ(auto, sanitizeDirName("cOnIn$", "~"), "~cOnIn$");
 	UASSERTEQ(auto, sanitizeDirName(" cOnIn$ ", "~"), "_cOnIn$_");
 }
diff --git a/src/util/string.h b/src/util/string.h
index 0c81a15cb..bddbc62ce 100644
--- a/src/util/string.h
+++ b/src/util/string.h
@@ -101,6 +101,21 @@ bool parseColorString(const std::string &value, video::SColor &color, bool quiet
 		unsigned char default_alpha = 0xff);
 std::string encodeHexColorString(video::SColor color);
 
+/**
+ * Converts a letter to lowercase, with safe handling of the char type and non-ASCII.
+ * @param c input letter
+ * @returns same letter but lowercase
+*/
+inline char my_tolower(char c)
+{
+	// By design this function cannot handle any Unicode (codepoints don't fit into char),
+	// but make sure to pass it through unchanged.
+	// tolower() can mangle it if the POSIX locale is not UTF-8.
+	if (static_cast<unsigned char>(c) > 0x7f)
+		return c;
+	// toupper(3): "If the argument c is of type char, it must be cast to unsigned char"
+	return tolower(static_cast<unsigned char>(c));
+}
 
 /**
  * Returns a copy of \p str with spaces inserted at the right hand side to ensure
@@ -173,7 +188,7 @@ inline bool str_equal(std::basic_string_view<T> s1,
 		return false;
 
 	for (size_t i = 0; i < s1.size(); ++i)
-		if(tolower(s1[i]) != tolower(s2[i]))
+		if (my_tolower(s1[i]) != my_tolower(s2[i]))
 			return false;
 
 	return true;
@@ -212,7 +227,7 @@ inline bool str_starts_with(std::basic_string_view<T> str,
 		return str.compare(0, prefix.size(), prefix) == 0;
 
 	for (size_t i = 0; i < prefix.size(); ++i)
-		if (tolower(str[i]) != tolower(prefix[i]))
+		if (my_tolower(str[i]) != my_tolower(prefix[i]))
 			return false;
 	return true;
 }
@@ -253,7 +268,7 @@ inline bool str_ends_with(std::basic_string_view<T> str,
 		return str.compare(start, suffix.size(), suffix) == 0;
 
 	for (size_t i = 0; i < suffix.size(); ++i)
-		if (tolower(str[start + i]) != tolower(suffix[i]))
+		if (my_tolower(str[start + i]) != my_tolower(suffix[i]))
 			return false;
 	return true;
 }
@@ -305,7 +320,7 @@ inline std::string lowercase(std::string_view str)
 	std::string s2;
 	s2.resize(str.size());
 	for (size_t i = 0; i < str.size(); i++)
-		s2[i] = tolower(str[i]);
+		s2[i] = my_tolower(str[i]);
 	return s2;
 }