diff --git a/ci/scripts/cpp_build.sh b/ci/scripts/cpp_build.sh index 1e2f3e8f8f1a..3d9b2ba72d41 100755 --- a/ci/scripts/cpp_build.sh +++ b/ci/scripts/cpp_build.sh @@ -70,6 +70,7 @@ if [ "${ARROW_ENABLE_THREADING:-ON}" = "OFF" ]; then ARROW_JEMALLOC=OFF ARROW_MIMALLOC=OFF ARROW_S3=OFF + ARROW_S3_MODULE=OFF ARROW_WITH_OPENTELEMETRY=OFF fi @@ -229,6 +230,7 @@ else -DARROW_PARQUET=${ARROW_PARQUET:-OFF} \ -DARROW_RUNTIME_SIMD_LEVEL=${ARROW_RUNTIME_SIMD_LEVEL:-MAX} \ -DARROW_S3=${ARROW_S3:-OFF} \ + -DARROW_S3_MODULE=${ARROW_S3_MODULE:-OFF} \ -DARROW_SIMD_LEVEL=${ARROW_SIMD_LEVEL:-DEFAULT} \ -DARROW_SUBSTRAIT=${ARROW_SUBSTRAIT:-OFF} \ -DARROW_TEST_LINKAGE=${ARROW_TEST_LINKAGE:-shared} \ diff --git a/cpp/src/arrow/filesystem/CMakeLists.txt b/cpp/src/arrow/filesystem/CMakeLists.txt index e6330df42603..ee46f4d256ce 100644 --- a/cpp/src/arrow/filesystem/CMakeLists.txt +++ b/cpp/src/arrow/filesystem/CMakeLists.txt @@ -128,7 +128,7 @@ if(ARROW_S3) endif() endif() - if(ARROW_S3_MODULE) + if(ARROW_S3_MODULE AND ARROW_BUILD_TESTS) add_arrow_test(s3fs_module_test SOURCES s3fs_module_test.cc diff --git a/cpp/src/arrow/filesystem/filesystem.cc b/cpp/src/arrow/filesystem/filesystem.cc index 8281bed7ce16..f92336e004ea 100644 --- a/cpp/src/arrow/filesystem/filesystem.cc +++ b/cpp/src/arrow/filesystem/filesystem.cc @@ -893,10 +893,10 @@ Status LoadFileSystemFactories(const char* libpath) { namespace { -Result> FileSystemFromUriReal(const Uri& uri, - const std::string& uri_string, - const io::IOContext& io_context, - std::string* out_path) { +Result> FileSystemFromUriReal( + const Uri& uri, const std::string& uri_string, + const FileSystemFactoryOptions& options, const io::IOContext& io_context, + std::string* out_path) { const auto scheme = uri.scheme(); { @@ -904,9 +904,13 @@ Result> FileSystemFromUriReal(const Uri& uri, auto* factory, FileSystemFactoryRegistry::GetInstance()->FactoryForScheme(scheme)); if (factory != nullptr) { - return factory->function(uri, io_context, out_path); + return factory->function(uri, options, io_context, out_path); } } + if (!options.empty()) { + return Status::NotImplemented("Filesystem options are not supported yet for scheme '", + scheme, "', got ", options.size(), " option(s)"); + } if (scheme == "abfs" || scheme == "abfss") { #ifdef ARROW_AZURE @@ -962,14 +966,28 @@ Result> FileSystemFromUriReal(const Uri& uri, Result> FileSystemFromUri(const std::string& uri_string, std::string* out_path) { - return FileSystemFromUri(uri_string, io::default_io_context(), out_path); + return FileSystemFromUriAndOptions(uri_string, /*options=*/{}, io::default_io_context(), + out_path); +} + +Result> FileSystemFromUriAndOptions( + const std::string& uri_string, const FileSystemFactoryOptions& options, + std::string* out_path) { + return FileSystemFromUriAndOptions(uri_string, options, io::default_io_context(), + out_path); } Result> FileSystemFromUri(const std::string& uri_string, const io::IOContext& io_context, std::string* out_path) { + return FileSystemFromUriAndOptions(uri_string, /*options=*/{}, io_context, out_path); +} + +Result> FileSystemFromUriAndOptions( + const std::string& uri_string, const FileSystemFactoryOptions& options, + const io::IOContext& io_context, std::string* out_path) { ARROW_ASSIGN_OR_RAISE(auto fsuri, ParseFileSystemUri(uri_string)); - return FileSystemFromUriReal(fsuri, uri_string, io_context, out_path); + return FileSystemFromUriReal(fsuri, uri_string, options, io_context, out_path); } Result> FileSystemFromUriOrPath(const std::string& uri_string, diff --git a/cpp/src/arrow/filesystem/filesystem.h b/cpp/src/arrow/filesystem/filesystem.h index 3a47eb62f524..a0478763a463 100644 --- a/cpp/src/arrow/filesystem/filesystem.h +++ b/cpp/src/arrow/filesystem/filesystem.h @@ -17,6 +17,7 @@ #pragma once +#include #include #include #include @@ -28,6 +29,7 @@ #include "arrow/filesystem/type_fwd.h" #include "arrow/io/interfaces.h" +#include "arrow/result.h" #include "arrow/type_fwd.h" #include "arrow/util/compare.h" #include "arrow/util/macros.h" @@ -357,13 +359,44 @@ class ARROW_EXPORT FileSystem bool default_async_is_sync_ = true; }; +using FileSystemFactoryOptions = std::vector>; + struct FileSystemFactory { std::function>( - const Uri& uri, const io::IOContext& io_context, std::string* out_path)> + const Uri& uri, const FileSystemFactoryOptions& options, + const io::IOContext& io_context, std::string* out_path)> function; std::string_view file; int line; + /// Construct from an options-aware factory function. + FileSystemFactory(std::function>( + const Uri&, const FileSystemFactoryOptions&, const io::IOContext&, + std::string*)> + fn, + std::string_view file, int line) + : function(std::move(fn)), file(file), line(line) {} + + /// Construct from a non-options aware factory function maintaining source compatibility + /// with existing factories. + FileSystemFactory(std::function>( + const Uri&, const io::IOContext&, std::string*)> + fn, + std::string_view file, int line) + : function([fn = std::move(fn)]( + const Uri& uri, const FileSystemFactoryOptions& options, + const io::IOContext& ctx, + std::string* out_path) -> Result> { + if (!options.empty()) { + return Status::NotImplemented( + "Filesystem factory does not support additional options, got ", + options.size(), " option(s)"); + } + return fn(uri, ctx, out_path); + }), + file(file), + line(line) {} + bool operator==(const FileSystemFactory& other) const { // In the case where libarrow is linked statically both to the executable and to a // dynamically loaded filesystem implementation library, the library contains a @@ -547,6 +580,30 @@ ARROW_EXPORT Result> FileSystemFromUri(const std::string& uri, std::string* out_path = NULLPTR); +/// \brief Create a new FileSystem by URI with extended backend-specific filesystem +/// options +/// +/// Recognized schemes are "file", "mock", "hdfs", "viewfs", "s3", +/// "gs" and "gcs". +/// +/// Support for other schemes can be added using RegisterFileSystemFactory. +/// +/// \param[in] uri the URI to give access to +/// \param[in] options a list of backend-specific filesystem options +/// Each option is a (name, value) pair. +/// The expected type is specific to the backend and +/// option name. +/// Options are forwarded to schemes dispatched through a registered +/// FileSystemFactory. Non-empty options return NotImplemented for a registered +/// FileSystemFactory that does not support them or for schemes not handled by +/// a registered factory. +/// \param[out] out_path (optional) Path inside the filesystem. +/// \return out_fs FileSystem instance. +ARROW_EXPORT +Result> FileSystemFromUriAndOptions( + const std::string& uri, const FileSystemFactoryOptions& options, + std::string* out_path = NULLPTR); + /// \brief Create a new FileSystem by URI with a custom IO context /// /// Recognized schemes are "file", "mock", "hdfs", "viewfs", "s3", @@ -563,6 +620,31 @@ Result> FileSystemFromUri(const std::string& uri, const io::IOContext& io_context, std::string* out_path = NULLPTR); +/// \brief Create a new FileSystem by URI with a custom IO context with backend-specific +/// filesystem options +/// +/// Recognized schemes are "file", "mock", "hdfs", "viewfs", "s3", +/// "gs" and "gcs". +/// +/// Support for other schemes can be added using RegisterFileSystemFactory. +/// +/// \param[in] uri a URI-based path, ex: file:///some/local/path +/// \param[in] options a list of backend-specific filesystem options +/// Each option is a (name, value) pair. +/// The expected type is specific to the backend and +/// option name. +/// Options are forwarded to schemes dispatched through a registered +/// FileSystemFactory. Non-empty options return NotImplemented for a registered +/// FileSystemFactory that does not support them or for schemes not handled by +/// a registered factory. +/// \param[in] io_context an IOContext which will be associated with the filesystem +/// \param[out] out_path (optional) Path inside the filesystem. +/// \return out_fs FileSystem instance. +ARROW_EXPORT +Result> FileSystemFromUriAndOptions( + const std::string& uri, const FileSystemFactoryOptions& options, + const io::IOContext& io_context, std::string* out_path = NULLPTR); + /// \brief Create a new FileSystem by URI /// /// Support for other schemes can be added using RegisterFileSystemFactory. diff --git a/cpp/src/arrow/filesystem/filesystem_test.cc b/cpp/src/arrow/filesystem/filesystem_test.cc index 5072c3a8c25b..10a3922e36ac 100644 --- a/cpp/src/arrow/filesystem/filesystem_test.cc +++ b/cpp/src/arrow/filesystem/filesystem_test.cc @@ -15,6 +15,7 @@ // specific language governing permissions and limitations // under the License. +#include #include #include #include @@ -640,6 +641,11 @@ TEST_F(TestMockFS, FileSystemFromUri) { Invalid, ::testing::HasSubstr("syntax error at character ' ' (position 12)"), FileSystemFromUri("mock:/folder name/bar", &path)); CheckDirs({}); + FileSystemFactoryOptions options{{"some_option", 1}}; + EXPECT_RAISES_WITH_MESSAGE_THAT( + NotImplemented, ::testing::HasSubstr("options are not supported"), + FileSystemFromUriAndOptions("mock:///foo/bar", options, &path)); + CheckDirs({}); } //////////////////////////////////////////////////////////////////////////// diff --git a/cpp/src/arrow/filesystem/localfs_test.cc b/cpp/src/arrow/filesystem/localfs_test.cc index 2e91783c92dc..212d91989659 100644 --- a/cpp/src/arrow/filesystem/localfs_test.cc +++ b/cpp/src/arrow/filesystem/localfs_test.cc @@ -15,6 +15,7 @@ // specific language governing permissions and limitations // under the License. +#include #include #include #include @@ -29,6 +30,7 @@ #include "arrow/filesystem/path_util.h" #include "arrow/filesystem/test_util.h" #include "arrow/filesystem/util_internal.h" +#include "arrow/testing/examplefs.h" #include "arrow/testing/gtest_util.h" #include "arrow/testing/matchers.h" #include "arrow/util/io_util.h" @@ -83,6 +85,12 @@ Result> FSFromUriOrPath(const std::string& uri, //////////////////////////////////////////////////////////////////////////// // Registered FileSystemFactory tests +struct ConcreteTypedOption : ExampleTypedOption { + explicit ConcreteTypedOption(int value) : value_(value) {} + int value() const override { return value_; } + int value_; +}; + class SlowFileSystemPublicProps : public SlowFileSystem { public: SlowFileSystemPublicProps(std::shared_ptr base_fs, double average_latency, @@ -144,7 +152,6 @@ TEST(FileSystemFromUri, LoadedRegisteredFactory) { EXPECT_THAT(FileSystemFromUri("example:///hey/yo", &path), Raises(StatusCode::Invalid)); EXPECT_THAT(LoadFileSystemFactories(ARROW_FILESYSTEM_EXAMPLE_LIBPATH), Ok()); - ASSERT_OK_AND_ASSIGN(auto fs, FileSystemFromUri("example:///hey/yo", &path)); EXPECT_EQ(path, "/hey/yo"); EXPECT_EQ(fs->type_name(), "local"); @@ -171,10 +178,21 @@ TEST(FileSystemFromUri, RuntimeRegisteredFactory) { } FileSystemRegistrar kSegfaultFileSystemModule[]{ - ARROW_REGISTER_FILESYSTEM("segfault", nullptr, {}), - ARROW_REGISTER_FILESYSTEM("segfault", nullptr, {}), - ARROW_REGISTER_FILESYSTEM("segfault", nullptr, {}), -}; + ARROW_REGISTER_FILESYSTEM( + "segfault", + std::function>( + const Uri&, const io::IOContext&, std::string*)>(nullptr), + {}), + ARROW_REGISTER_FILESYSTEM( + "segfault", + std::function>( + const Uri&, const io::IOContext&, std::string*)>(nullptr), + {}), + ARROW_REGISTER_FILESYSTEM( + "segfault", + std::function>( + const Uri&, const io::IOContext&, std::string*)>(nullptr), + {})}; TEST(FileSystemFromUri, LinkedRegisteredFactoryNameCollision) { // Since multiple registrars are defined in this translation unit which all // register factories for the 'segfault' scheme, using that scheme in FileSystemFromUri @@ -185,6 +203,46 @@ TEST(FileSystemFromUri, LinkedRegisteredFactoryNameCollision) { // other schemes are not affected by the collision EXPECT_THAT(FileSystemFromUri("slowfile:///hey/yo", &path), Ok()); } + +TEST(FileSystemFromUriAndOptions, LoadedRegisteredFactory) { +#ifdef __EMSCRIPTEN__ + GTEST_SKIP() << "Emscripten dynamic library testing disabled"; +#endif + std::string path; + EXPECT_THAT(LoadFileSystemFactories(ARROW_FILESYSTEM_EXAMPLE_LIBPATH), Ok()); + // Validate extra options are forwarded to the factory. + FileSystemFactoryOptions options{ + {"example_option_string", std::string("example_value")}, + {"example_option_int", 42}, + {"example_typed_option", + std::shared_ptr(std::make_shared(12345))}, + }; + ASSERT_OK_AND_ASSIGN(auto fs, + FileSystemFromUriAndOptions("example:///hey/yo", options, &path)); + EXPECT_EQ(path, "/hey/yo/example_value/42/12345"); + EXPECT_EQ(fs->type_name(), "local"); +} + +TEST(FileSystemFromUriAndOptions, RuntimeRegisteredFactory) { + std::string path; + EXPECT_THAT(FileSystemFromUriAndOptions("slowfile3:///hey/yo", {}, &path), + Raises(StatusCode::Invalid)); + + EXPECT_THAT( + RegisterFileSystemFactory("slowfile3", {SlowFileSystemFactory, __FILE__, __LINE__}), + Ok()); + + ASSERT_OK_AND_ASSIGN(auto fs, + FileSystemFromUriAndOptions("slowfile3:///hey/yo", {}, &path)); + EXPECT_EQ(path, "/hey/yo"); + EXPECT_EQ(fs->type_name(), "slow"); + + // Validate that legacy (3-arg) factories reject non-empty options. + FileSystemFactoryOptions unsupported{{"some_option", 1}}; + EXPECT_THAT(FileSystemFromUriAndOptions("slowfile3:///hey/yo", unsupported, &path), + Raises(StatusCode::NotImplemented)); +} + //////////////////////////////////////////////////////////////////////////// // Misc tests diff --git a/cpp/src/arrow/filesystem/s3fs.cc b/cpp/src/arrow/filesystem/s3fs.cc index 0c15f6f18444..3b6309484298 100644 --- a/cpp/src/arrow/filesystem/s3fs.cc +++ b/cpp/src/arrow/filesystem/s3fs.cc @@ -327,7 +327,48 @@ S3Options S3Options::FromAssumeRoleWithWebIdentity() { } Result S3Options::FromUri(const Uri& uri, std::string* out_path) { - S3Options options; + return FromUriAndOptions(uri, FileSystemFactoryOptions{}, out_path); +} + +Result S3Options::FromUri(const std::string& uri_string, + std::string* out_path) { + Uri uri; + RETURN_NOT_OK(uri.Parse(uri_string)); + return FromUri(uri, out_path); +} + +namespace { + +Result GetStringOption(const std::string& key, const std::any& value) { + // TODO: Validate this is necessary with tests. + if (const auto* s = std::any_cast(&value)) { + return *s; + } + return Status::Invalid("S3 filesystem option '", key, "' must be a std::string"); +} +} // namespace + +Result S3Options::FromUriAndOptions(const ::arrow::util::Uri& uri, + const FileSystemFactoryOptions& options, + std::string* out_path) { + std::optional access_key, secret_key, session_token; + for (const auto& [key, value] : options) { + if (key == "access_key") { + ARROW_ASSIGN_OR_RAISE(access_key, GetStringOption(key, value)); + } else if (key == "secret_key") { + ARROW_ASSIGN_OR_RAISE(secret_key, GetStringOption(key, value)); + } else if (key == "session_token") { + ARROW_ASSIGN_OR_RAISE(session_token, GetStringOption(key, value)); + } else { + return Status::Invalid("Unexpected option for S3 filesystem: '", key, "'"); + } + } + + if (access_key.has_value() != secret_key.has_value()) { + return Status::Invalid( + "Both 'access_key' and 'secret_key' must be provided together"); + } + S3Options s3_options; const auto bucket = uri.host(); auto path = uri.path(); @@ -355,68 +396,74 @@ Result S3Options::FromUri(const Uri& uri, std::string* out_path) { options_map.emplace(kv.first, kv.second); } - const auto username = uri.username(); - if (!username.empty()) { - options.ConfigureAccessKey(username, uri.password()); + if (access_key.has_value()) { + s3_options.ConfigureAccessKey(*access_key, *secret_key, session_token.value_or("")); } else { - options.ConfigureDefaultCredentials(); + const auto username = uri.username(); + if (!username.empty()) { + s3_options.ConfigureAccessKey(username, uri.password()); + } else { + s3_options.ConfigureDefaultCredentials(); + } } + // Prefer AWS service-specific endpoint url auto s3_endpoint_env = arrow::internal::GetEnvVar(kAwsEndpointUrlS3EnvVar); if (s3_endpoint_env.ok()) { - options.endpoint_override = *s3_endpoint_env; + s3_options.endpoint_override = *s3_endpoint_env; } else { auto endpoint_env = arrow::internal::GetEnvVar(kAwsEndpointUrlEnvVar); if (endpoint_env.ok()) { - options.endpoint_override = *endpoint_env; + s3_options.endpoint_override = *endpoint_env; } } bool region_set = false; for (const auto& kv : options_map) { if (kv.first == "region") { - options.region = kv.second; + s3_options.region = kv.second; region_set = true; } else if (kv.first == "scheme") { - options.scheme = kv.second; + s3_options.scheme = kv.second; } else if (kv.first == "endpoint_override") { - options.endpoint_override = kv.second; + s3_options.endpoint_override = kv.second; } else if (kv.first == "allow_delayed_open") { - ARROW_ASSIGN_OR_RAISE(options.allow_delayed_open, + ARROW_ASSIGN_OR_RAISE(s3_options.allow_delayed_open, ::arrow::internal::ParseBoolean(kv.second)); } else if (kv.first == "allow_bucket_creation") { - ARROW_ASSIGN_OR_RAISE(options.allow_bucket_creation, + ARROW_ASSIGN_OR_RAISE(s3_options.allow_bucket_creation, ::arrow::internal::ParseBoolean(kv.second)); } else if (kv.first == "allow_bucket_deletion") { - ARROW_ASSIGN_OR_RAISE(options.allow_bucket_deletion, + ARROW_ASSIGN_OR_RAISE(s3_options.allow_bucket_deletion, ::arrow::internal::ParseBoolean(kv.second)); } else if (kv.first == "tls_ca_file_path") { - options.tls_ca_file_path = kv.second; + s3_options.tls_ca_file_path = kv.second; } else if (kv.first == "tls_ca_dir_path") { - options.tls_ca_dir_path = kv.second; + s3_options.tls_ca_dir_path = kv.second; } else if (kv.first == "tls_verify_certificates") { - ARROW_ASSIGN_OR_RAISE(options.tls_verify_certificates, + ARROW_ASSIGN_OR_RAISE(s3_options.tls_verify_certificates, ::arrow::internal::ParseBoolean(kv.second)); } else if (kv.first == "smart_defaults") { - options.smart_defaults = kv.second; + s3_options.smart_defaults = kv.second; } else { return Status::Invalid("Unexpected query parameter in S3 URI: '", kv.first, "'"); } } - if (!region_set && !bucket.empty() && options.endpoint_override.empty()) { + if (!region_set && !bucket.empty() && s3_options.endpoint_override.empty()) { // XXX Should we use a dedicated resolver with the given credentials? - ARROW_ASSIGN_OR_RAISE(options.region, ResolveS3BucketRegion(bucket)); + ARROW_ASSIGN_OR_RAISE(s3_options.region, ResolveS3BucketRegion(bucket)); } - return options; + return s3_options; } -Result S3Options::FromUri(const std::string& uri_string, - std::string* out_path) { +Result S3Options::FromUriAndOptions(const std::string& uri_string, + const FileSystemFactoryOptions& options, + std::string* out_path) { Uri uri; RETURN_NOT_OK(uri.Parse(uri_string)); - return FromUri(uri, out_path); + return FromUriAndOptions(uri, options, out_path); } bool S3Options::Equals(const S3Options& other) const { @@ -3603,11 +3650,18 @@ Result ResolveS3BucketRegion(const std::string& bucket) { auto kS3FileSystemModule = ARROW_REGISTER_FILESYSTEM( "s3", - [](const arrow::util::Uri& uri, const io::IOContext& io_context, + [](const arrow::util::Uri& uri, const FileSystemFactoryOptions& options, + const io::IOContext& io_context, std::string* out_path) -> Result> { + /*if (!options.empty()) { + return Status::NotImplemented( + "S3 filesystem factory options are not supported yet, got: ", options.size(), + " option(s)"); + }*/ RETURN_NOT_OK(EnsureS3Initialized()); - ARROW_ASSIGN_OR_RAISE(auto options, S3Options::FromUri(uri, out_path)); - return S3FileSystem::Make(options, io_context); + ARROW_ASSIGN_OR_RAISE(auto s3_options, + S3Options::FromUriAndOptions(uri, options, out_path)); + return S3FileSystem::Make(s3_options, io_context); }, [] { DCHECK_OK(EnsureS3Finalized()); }); diff --git a/cpp/src/arrow/filesystem/s3fs.h b/cpp/src/arrow/filesystem/s3fs.h index 158d70a93fce..99d0307b6c55 100644 --- a/cpp/src/arrow/filesystem/s3fs.h +++ b/cpp/src/arrow/filesystem/s3fs.h @@ -17,6 +17,7 @@ #pragma once +#include #include #include #include @@ -290,6 +291,20 @@ struct ARROW_EXPORT S3Options { std::string* out_path = NULLPTR); static Result FromUri(const std::string& uri, std::string* out_path = NULLPTR); + + /// Equivalent to FromUri() with specific backend options that can't be represented + /// on the URI overlaid on top. Recognized keys: + /// - "access_key" (std::string) + /// - "secret_key" (std::string) + /// - "session_token" (std::string). + /// Options take precedence over the URI; unknown keys return + /// Status::Invalid. + static Result FromUriAndOptions(const ::arrow::util::Uri& uri, + const FileSystemFactoryOptions& options, + std::string* out_path = NULLPTR); + static Result FromUriAndOptions(const std::string& uri, + const FileSystemFactoryOptions& options, + std::string* out_path = NULLPTR); }; /// S3-backed FileSystem implementation. diff --git a/cpp/src/arrow/filesystem/s3fs_module_test.cc b/cpp/src/arrow/filesystem/s3fs_module_test.cc index 987a8979b271..14f796528586 100644 --- a/cpp/src/arrow/filesystem/s3fs_module_test.cc +++ b/cpp/src/arrow/filesystem/s3fs_module_test.cc @@ -16,6 +16,7 @@ // under the License. #include +#include #include #include #include @@ -82,4 +83,29 @@ TEST(S3Test, FromUri) { "&allow_bucket_creation=0&allow_bucket_deletion=0"); } +TEST(S3Test, FromUriAndOptionsCredentials) { + ASSERT_OK_AND_ASSIGN(auto minio, GetMinioEnv()->GetOneServer()); + std::string path; + FileSystemFactoryOptions options{ + {"access_key", std::string(minio->access_key())}, + {"secret_key", std::string(minio->secret_key())}, + }; + // Credentials supplied via options, NOT in the URI. + ASSERT_OK_AND_ASSIGN( + auto fs, + FileSystemFromUriAndOptions("s3://bucket/somedir/subdir/subfile", options, &path)); + // They crossed the module boundary and were applied -> reflected in MakeUri. + EXPECT_EQ(fs->MakeUri("/" + path), + "s3://minio:miniopass@bucket/somedir/subdir/subfile" + "?region=us-east-1&scheme=https&endpoint_override=" + "&allow_bucket_creation=0&allow_bucket_deletion=0"); +} + +TEST(S3Test, FromUriRejectsUnknownOptions) { + FileSystemFactoryOptions options{{"some_option", 1}}; + EXPECT_RAISES_WITH_MESSAGE_THAT( + Invalid, ::testing::HasSubstr("Unexpected option"), + FileSystemFromUriAndOptions("s3://bucket/key", options)); +} + } // namespace arrow::fs diff --git a/cpp/src/arrow/testing/examplefs.cc b/cpp/src/arrow/testing/examplefs.cc index 5c9d5f9d9071..da4651acdc50 100644 --- a/cpp/src/arrow/testing/examplefs.cc +++ b/cpp/src/arrow/testing/examplefs.cc @@ -15,9 +15,12 @@ // specific language governing permissions and limitations // under the License. +#include + #include "arrow/filesystem/filesystem.h" #include "arrow/filesystem/filesystem_library.h" #include "arrow/result.h" +#include "arrow/testing/examplefs.h" #include "arrow/util/uri.h" #include @@ -26,12 +29,41 @@ namespace arrow::fs { auto kExampleFileSystemModule = ARROW_REGISTER_FILESYSTEM( "example", - [](const Uri& uri, const io::IOContext& io_context, + [](const Uri& uri, const FileSystemFactoryOptions& options, + const io::IOContext& io_context, std::string* out_path) -> Result> { constexpr std::string_view kScheme = "example"; EXPECT_EQ(uri.scheme(), kScheme); auto local_uri = "file" + uri.ToString().substr(kScheme.size()); - return FileSystemFromUri(local_uri, io_context, out_path); + ARROW_ASSIGN_OR_RAISE(auto fs, FileSystemFromUri(local_uri, io_context, out_path)); + for (const auto& [key, value] : options) { + EXPECT_TRUE(value.has_value()); + if (key == "example_option_string") { + if (const auto* s = std::any_cast(&value)) { + if (out_path != nullptr) *out_path += "/" + *s; + } else { + ADD_FAILURE() << "example_option_string has wrong type"; + } + } else if (key == "example_option_int") { + if (const auto* i = std::any_cast(&value)) { + if (out_path != nullptr) *out_path += "/" + std::to_string(*i); + } else { + ADD_FAILURE() << "example_option_int has wrong type"; + } + } else if (key == "example_typed_option") { + if (const auto* opt = + std::any_cast>(&value)) { + if (out_path != nullptr) { + *out_path += "/" + std::to_string((*opt)->value()); + } + } else if (out_path != nullptr) { + *out_path += "/typed_cast_failed"; + } + } else { + ADD_FAILURE() << "Unexpected option: " << key; + } + } + return fs; }, {}); diff --git a/cpp/src/arrow/testing/examplefs.h b/cpp/src/arrow/testing/examplefs.h new file mode 100644 index 000000000000..e04b84c982ad --- /dev/null +++ b/cpp/src/arrow/testing/examplefs.h @@ -0,0 +1,28 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +namespace arrow::fs { + +class ExampleTypedOption { + public: + virtual ~ExampleTypedOption() = default; + virtual int value() const = 0; +}; + +} // namespace arrow::fs