yhirose 1 день назад
Родитель
Сommit
96785eea21
2 измененных файлов с 333 добавлено и 32 удалено
  1. 111 32
      httplib.h
  2. 222 0
      test/test.cc

+ 111 - 32
httplib.h

@@ -698,6 +698,93 @@ inline bool parse_port(const std::string &s, int &port) {
   return parse_port(s.data(), s.size(), port);
 }
 
+struct UrlComponents {
+  std::string scheme;
+  std::string host;
+  std::string port;
+  std::string path;
+  std::string query;
+};
+
+inline bool parse_url(const std::string &url, UrlComponents &uc) {
+  uc = {};
+  size_t pos = 0;
+
+  auto sep = url.find("://");
+  if (sep != std::string::npos) {
+    uc.scheme = url.substr(0, sep);
+
+    // Scheme must be [a-z]+ only
+    if (uc.scheme.empty()) { return false; }
+    for (auto c : uc.scheme) {
+      if (c < 'a' || c > 'z') { return false; }
+    }
+
+    pos = sep + 3;
+  } else if (url.compare(0, 2, "//") == 0) {
+    pos = 2;
+  }
+
+  auto has_authority_prefix = pos > 0;
+  auto has_authority = has_authority_prefix || (!url.empty() && url[0] != '/' &&
+                                                url[0] != '?' && url[0] != '#');
+  if (has_authority) {
+    if (pos < url.size() && url[pos] == '[') {
+      auto close = url.find(']', pos);
+      if (close == std::string::npos) { return false; }
+      uc.host = url.substr(pos + 1, close - pos - 1);
+
+      // IPv6 host must be [a-fA-F0-9:]+ only
+      if (uc.host.empty()) { return false; }
+      for (auto c : uc.host) {
+        if (!((c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F') ||
+              (c >= '0' && c <= '9') || c == ':')) {
+          return false;
+        }
+      }
+
+      pos = close + 1;
+    } else {
+      auto end = url.find_first_of(":/?#", pos);
+      if (end == std::string::npos) { end = url.size(); }
+      uc.host = url.substr(pos, end - pos);
+      pos = end;
+    }
+
+    if (pos < url.size() && url[pos] == ':') {
+      ++pos;
+      auto end = url.find_first_of("/?#", pos);
+      if (end == std::string::npos) { end = url.size(); }
+      uc.port = url.substr(pos, end - pos);
+      pos = end;
+    }
+
+    // Without :// or //, the entire input must be consumed as host[:port].
+    // If there is leftover (path, query, etc.), this is not a valid
+    // host[:port] string — clear and reparse as a plain path.
+    if (!has_authority_prefix && pos < url.size()) {
+      uc.host.clear();
+      uc.port.clear();
+      pos = 0;
+    }
+  }
+
+  if (pos < url.size() && url[pos] != '?' && url[pos] != '#') {
+    auto end = url.find_first_of("?#", pos);
+    if (end == std::string::npos) { end = url.size(); }
+    uc.path = url.substr(pos, end - pos);
+    pos = end;
+  }
+
+  if (pos < url.size() && url[pos] == '?') {
+    auto end = url.find('#', pos);
+    if (end == std::string::npos) { end = url.size(); }
+    uc.query = url.substr(pos, end - pos);
+  }
+
+  return true;
+}
+
 } // namespace detail
 
 enum SSLVerifierResponse {
@@ -12940,20 +13027,21 @@ inline bool ClientImpl::redirect(Request &req, Response &res, Error &error) {
   auto location = res.get_header_value("location");
   if (location.empty()) { return false; }
 
-  thread_local const std::regex re(
-      R"((?:(https?):)?(?://(?:\[([a-fA-F\d:]+)\]|([^:/?#]+))(?::(\d+))?)?([^?#]*)(\?[^#]*)?(?:#.*)?)");
+  detail::UrlComponents uc;
+  if (!detail::parse_url(location, uc)) { return false; }
 
-  std::smatch m;
-  if (!std::regex_match(location, m, re)) { return false; }
+  // Only follow http/https redirects
+  if (!uc.scheme.empty() && uc.scheme != "http" && uc.scheme != "https") {
+    return false;
+  }
 
   auto scheme = is_ssl() ? "https" : "http";
 
-  auto next_scheme = m[1].str();
-  auto next_host = m[2].str();
-  if (next_host.empty()) { next_host = m[3].str(); }
-  auto port_str = m[4].str();
-  auto next_path = m[5].str();
-  auto next_query = m[6].str();
+  auto next_scheme = std::move(uc.scheme);
+  auto next_host = std::move(uc.host);
+  auto port_str = std::move(uc.port);
+  auto next_path = std::move(uc.path);
+  auto next_query = std::move(uc.query);
 
   auto next_port = port_;
   if (!port_str.empty()) {
@@ -12966,7 +13054,7 @@ inline bool ClientImpl::redirect(Request &req, Response &res, Error &error) {
   if (next_host.empty()) { next_host = host_; }
   if (next_path.empty()) { next_path = "/"; }
 
-  auto path = decode_query_component(next_path, true) + next_query;
+  auto path = decode_path_component(next_path) + next_query;
 
   // Same host redirect - use current client
   if (next_scheme == scheme && next_host == host_ && next_port == port_) {
@@ -14690,12 +14778,9 @@ inline Client::Client(const std::string &scheme_host_port)
 inline Client::Client(const std::string &scheme_host_port,
                       const std::string &client_cert_path,
                       const std::string &client_key_path) {
-  const static std::regex re(
-      R"((?:([a-z]+):\/\/)?(?:\[([a-fA-F\d:]+)\]|([^:/?#]+))(?::(\d+))?)");
-
-  std::smatch m;
-  if (std::regex_match(scheme_host_port, m, re)) {
-    auto scheme = m[1].str();
+  detail::UrlComponents uc;
+  if (detail::parse_url(scheme_host_port, uc) && !uc.host.empty()) {
+    auto &scheme = uc.scheme;
 
 #ifdef CPPHTTPLIB_SSL_ENABLED
     if (!scheme.empty() && (scheme != "http" && scheme != "https")) {
@@ -14711,12 +14796,10 @@ inline Client::Client(const std::string &scheme_host_port,
 
     auto is_ssl = scheme == "https";
 
-    auto host = m[2].str();
-    if (host.empty()) { host = m[3].str(); }
+    auto host = std::move(uc.host);
 
-    auto port_str = m[4].str();
     auto port = is_ssl ? 443 : 80;
-    if (!port_str.empty() && !detail::parse_port(port_str, port)) { return; }
+    if (!uc.port.empty() && !detail::parse_port(uc.port, port)) { return; }
 
     if (is_ssl) {
 #ifdef CPPHTTPLIB_SSL_ENABLED
@@ -20123,12 +20206,10 @@ inline bool WebSocket::is_open() const { return !closed_; }
 inline WebSocketClient::WebSocketClient(
     const std::string &scheme_host_port_path, const Headers &headers)
     : headers_(headers) {
-  const static std::regex re(
-      R"(([a-z]+):\/\/(?:\[([a-fA-F\d:]+)\]|([^:/?#]+))(?::(\d+))?(\/.*))");
-
-  std::smatch m;
-  if (std::regex_match(scheme_host_port_path, m, re)) {
-    auto scheme = m[1].str();
+  detail::UrlComponents uc;
+  if (detail::parse_url(scheme_host_port_path, uc) && !uc.scheme.empty() &&
+      !uc.host.empty() && !uc.path.empty()) {
+    auto &scheme = uc.scheme;
 
 #ifdef CPPHTTPLIB_SSL_ENABLED
     if (scheme != "ws" && scheme != "wss") {
@@ -20144,14 +20225,12 @@ inline WebSocketClient::WebSocketClient(
 
     auto is_ssl = scheme == "wss";
 
-    host_ = m[2].str();
-    if (host_.empty()) { host_ = m[3].str(); }
+    host_ = std::move(uc.host);
 
-    auto port_str = m[4].str();
     port_ = is_ssl ? 443 : 80;
-    if (!port_str.empty() && !detail::parse_port(port_str, port_)) { return; }
+    if (!uc.port.empty() && !detail::parse_port(uc.port, port_)) { return; }
 
-    path_ = m[5].str();
+    path_ = std::move(uc.path);
 
 #ifdef CPPHTTPLIB_SSL_ENABLED
     is_ssl_ = is_ssl;

+ 222 - 0
test/test.cc

@@ -12392,6 +12392,38 @@ TEST(RedirectTest, RedirectToUrlWithPlusInQueryParameters) {
   }
 }
 
+TEST(RedirectTest, RedirectWithPlusInPath) {
+  Server svr;
+
+  svr.Get("/", [](const Request & /*req*/, Response &res) {
+    res.set_redirect("/a+b");
+  });
+
+  // Route pattern uses regex; escape + as \\+
+  svr.Get(R"(/a\+b)", [](const Request &req, Response &res) {
+    res.set_content(req.path, "text/plain");
+  });
+
+  auto thread = std::thread([&]() { svr.listen(HOST, PORT); });
+  auto se = detail::scope_exit([&] {
+    svr.stop();
+    thread.join();
+    ASSERT_FALSE(svr.is_running());
+  });
+
+  svr.wait_until_ready();
+
+  {
+    Client cli(HOST, PORT);
+    cli.set_follow_location(true);
+
+    auto res = cli.Get("/");
+    ASSERT_TRUE(res);
+    EXPECT_EQ(StatusCode::OK_200, res->status);
+    EXPECT_EQ("/a+b", res->body);
+  }
+}
+
 #ifdef CPPHTTPLIB_SSL_ENABLED
 TEST(RedirectTest, Issue2185_Online) {
   SSLClient client("github.com");
@@ -12672,6 +12704,196 @@ TEST(PathParamsTest, SemicolonInTheMiddleIsNotAParam) {
   EXPECT_EQ(request.path_params, expected_params);
 }
 
+TEST(ParseUrlTest, VariousPatterns) {
+  {
+    detail::UrlComponents uc;
+    ASSERT_TRUE(detail::parse_url("http://example.com:8080/path?q=1#frag", uc));
+    EXPECT_EQ("http", uc.scheme);
+    EXPECT_EQ("example.com", uc.host);
+    EXPECT_EQ("8080", uc.port);
+    EXPECT_EQ("/path", uc.path);
+    EXPECT_EQ("?q=1", uc.query);
+  }
+  {
+    detail::UrlComponents uc;
+    ASSERT_TRUE(detail::parse_url("https://example.com/path", uc));
+    EXPECT_EQ("https", uc.scheme);
+    EXPECT_EQ("example.com", uc.host);
+    EXPECT_TRUE(uc.port.empty());
+    EXPECT_EQ("/path", uc.path);
+  }
+  {
+    detail::UrlComponents uc;
+    ASSERT_TRUE(detail::parse_url("http://[::1]:8080/path", uc));
+    EXPECT_EQ("::1", uc.host);
+    EXPECT_EQ("8080", uc.port);
+    EXPECT_EQ("/path", uc.path);
+  }
+  {
+    detail::UrlComponents uc;
+    ASSERT_FALSE(detail::parse_url("http://[::1/path", uc));
+  }
+  {
+    detail::UrlComponents uc;
+    ASSERT_TRUE(detail::parse_url("//example.com/path?q=1", uc));
+    EXPECT_TRUE(uc.scheme.empty());
+    EXPECT_EQ("example.com", uc.host);
+    EXPECT_EQ("/path", uc.path);
+    EXPECT_EQ("?q=1", uc.query);
+  }
+  {
+    detail::UrlComponents uc;
+    ASSERT_TRUE(detail::parse_url("/path?q=1", uc));
+    EXPECT_TRUE(uc.host.empty());
+    EXPECT_EQ("/path", uc.path);
+    EXPECT_EQ("?q=1", uc.query);
+  }
+  {
+    detail::UrlComponents uc;
+    ASSERT_TRUE(detail::parse_url("example.com:8080", uc));
+    EXPECT_EQ("example.com", uc.host);
+    EXPECT_EQ("8080", uc.port);
+  }
+  {
+    // Unix socket path — must not be parsed as host
+    detail::UrlComponents uc;
+    ASSERT_TRUE(detail::parse_url("./httplib-server.sock", uc));
+    EXPECT_TRUE(uc.host.empty());
+  }
+  {
+    detail::UrlComponents uc;
+    ASSERT_TRUE(detail::parse_url("", uc));
+    EXPECT_TRUE(uc.host.empty());
+    EXPECT_TRUE(uc.path.empty());
+  }
+  {
+    detail::UrlComponents uc;
+    ASSERT_FALSE(detail::parse_url("HTTP://example.com/path", uc));
+  }
+  {
+    detail::UrlComponents uc;
+    ASSERT_FALSE(detail::parse_url("h2://example.com/path", uc));
+  }
+  {
+    // Accepted by parse_url; callers restrict to http/https
+    detail::UrlComponents uc;
+    ASSERT_TRUE(detail::parse_url("ftp://example.com/", uc));
+    EXPECT_EQ("ftp", uc.scheme);
+  }
+  {
+    detail::UrlComponents uc;
+    ASSERT_FALSE(detail::parse_url("http://[::1<script>]/path", uc));
+  }
+  {
+    detail::UrlComponents uc;
+    ASSERT_FALSE(detail::parse_url("http://[]/path", uc));
+  }
+}
+
+TEST(ParseUrlTest, FragmentHandling) {
+  {
+    detail::UrlComponents uc;
+    ASSERT_TRUE(detail::parse_url("http://example.com/path#frag", uc));
+    EXPECT_EQ("/path", uc.path);
+    EXPECT_TRUE(uc.query.empty());
+  }
+  {
+    detail::UrlComponents uc;
+    ASSERT_TRUE(detail::parse_url("#frag", uc));
+    EXPECT_TRUE(uc.path.empty());
+    EXPECT_TRUE(uc.query.empty());
+  }
+}
+
+TEST(ParseUrlTest, UserinfoHandling) {
+  // Userinfo with @ but no colon — host includes @
+  detail::UrlComponents uc;
+  ASSERT_TRUE(detail::parse_url("http://user@host.com/path", uc));
+  EXPECT_EQ("user@host.com", uc.host);
+  EXPECT_EQ("/path", uc.path);
+}
+
+TEST(ParseUrlTest, IPv6EdgeCases) {
+  {
+    detail::UrlComponents uc;
+    ASSERT_TRUE(detail::parse_url("[::1]:8080", uc));
+    EXPECT_TRUE(uc.scheme.empty());
+    EXPECT_EQ("::1", uc.host);
+    EXPECT_EQ("8080", uc.port);
+  }
+  {
+    // Zone ID '%25' is not in [a-fA-F0-9:]
+    detail::UrlComponents uc;
+    ASSERT_FALSE(detail::parse_url("http://[fe80::1%25eth0]:443/path", uc));
+  }
+}
+
+TEST(ParseUrlTest, SchemeEdgeCases) {
+  {
+    detail::UrlComponents uc;
+    ASSERT_FALSE(detail::parse_url("://evil.com/path", uc));
+  }
+  {
+    detail::UrlComponents uc;
+    ASSERT_FALSE(detail::parse_url("ht-tp://evil.com/path", uc));
+  }
+  {
+    detail::UrlComponents uc;
+    ASSERT_FALSE(detail::parse_url("h.t://evil.com/path", uc));
+  }
+}
+
+TEST(ParseUrlTest, PortEdgeCases) {
+  {
+    detail::UrlComponents uc;
+    ASSERT_TRUE(detail::parse_url("http://example.com:/path", uc));
+    EXPECT_TRUE(uc.port.empty());
+    EXPECT_EQ("/path", uc.path);
+  }
+  {
+    // parse_url accepts any port string; validation is done by parse_port
+    detail::UrlComponents uc;
+    ASSERT_TRUE(detail::parse_url("http://example.com:abc/path", uc));
+    EXPECT_EQ("abc", uc.port);
+  }
+}
+
+TEST(ParseUrlTest, WebSocketPatterns) {
+  {
+    detail::UrlComponents uc;
+    ASSERT_TRUE(detail::parse_url("ws://echo.example.com:8080/ws", uc));
+    EXPECT_EQ("ws", uc.scheme);
+    EXPECT_EQ("echo.example.com", uc.host);
+    EXPECT_EQ("8080", uc.port);
+    EXPECT_EQ("/ws", uc.path);
+  }
+  {
+    detail::UrlComponents uc;
+    ASSERT_TRUE(detail::parse_url("wss://echo.example.com/ws", uc));
+    EXPECT_EQ("wss", uc.scheme);
+    EXPECT_EQ("echo.example.com", uc.host);
+    EXPECT_TRUE(uc.port.empty());
+    EXPECT_EQ("/ws", uc.path);
+  }
+}
+
+TEST(ParseUrlTest, QueryOnly) {
+  detail::UrlComponents uc;
+  ASSERT_TRUE(detail::parse_url("?q=1&r=2", uc));
+  EXPECT_TRUE(uc.host.empty());
+  EXPECT_TRUE(uc.path.empty());
+  EXPECT_EQ("?q=1&r=2", uc.query);
+}
+
+TEST(ParseUrlTest, SchemeRelativeWithPort) {
+  detail::UrlComponents uc;
+  ASSERT_TRUE(detail::parse_url("//example.com:443/path", uc));
+  EXPECT_TRUE(uc.scheme.empty());
+  EXPECT_EQ("example.com", uc.host);
+  EXPECT_EQ("443", uc.port);
+  EXPECT_EQ("/path", uc.path);
+}
+
 TEST(UniversalClientImplTest, Ipv6LiteralAddress) {
   // If ipv6 regex working, regex match codepath is taken.
   // else port will default to 80 in Client impl