Browse Source

Add tests for Unicode path component decoding in decode_path_component function

yhirose 2 weeks ago
parent
commit
43a54a3e3d
2 changed files with 15 additions and 1 deletions
  1. 2 1
      httplib.h
  2. 13 0
      test/test.cc

+ 2 - 1
httplib.h

@@ -9292,7 +9292,8 @@ inline std::string decode_path_component(const std::string &component) {
         // Unicode %uXXXX encoding
         auto val = 0;
         if (detail::from_hex_to_i(component, i + 2, 4, val)) {
-          // 4 digits Unicode codes
+          // 4 digits Unicode codes: val is 0x0000-0xFFFF (from 4 hex digits),
+          // so to_utf8 writes at most 3 bytes. buff[4] is safe.
           char buff[4];
           size_t len = detail::to_utf8(val, buff);
           if (len > 0) { result.append(buff, len); }

+ 13 - 0
test/test.cc

@@ -413,6 +413,19 @@ TEST(DecodePathTest, PercentCharacterNUL) {
   EXPECT_EQ(decode_path_component("x%00x"), expected);
 }
 
+TEST(DecodePathTest, UnicodeEncoding) {
+  // %u0041 = 'A' (1-byte UTF-8)
+  EXPECT_EQ("A", decode_path_component("%u0041"));
+  // %u00E9 = 'é' (2-byte UTF-8)
+  EXPECT_EQ(U8("é"), decode_path_component("%u00E9"));
+  // %u3042 = 'あ' (3-byte UTF-8)
+  EXPECT_EQ(U8("あ"), decode_path_component("%u3042"));
+  // %uFFFF = max 4-digit hex (3-byte UTF-8, must not overflow buff[4])
+  EXPECT_FALSE(decode_path_component("%uFFFF").empty());
+  // %uD800 = surrogate (invalid, silently dropped)
+  EXPECT_EQ("", decode_path_component("%uD800"));
+}
+
 TEST(SanitizeFilenameTest, VariousPatterns) {
   // Path traversal
   EXPECT_EQ("passwd", httplib::sanitize_filename("../../../etc/passwd"));