Skip to content

Commit

Permalink
Merge pull request #138 from amzn/s3_folder
Browse files Browse the repository at this point in the history
S3 "file system" utilities
  • Loading branch information
pbthif committed May 17, 2023
2 parents ba14437 + 6d724f0 commit 83315f8
Show file tree
Hide file tree
Showing 4 changed files with 323 additions and 36 deletions.
41 changes: 41 additions & 0 deletions Sources/S3Client/S3ClientProtocolV2+listFolder.swift
@@ -0,0 +1,41 @@
//
// S3ClientProtocolV2+listFolder.swift
// S3Client
//

import S3Model

public extension S3ClientProtocolV2 {
// Returns all S3 objects within the same S3 "folder" as a given object.
// If the object identifier provided is a directory, the return value will
// contain all objects within its parent "folder".
// Optionally, the caller can pass a filename prefix provider; only objects
// with the same prefix in the "folder" will be returned.
func listFolder(
for objectIdentifier: S3ObjectIdentifier,
fileNamePrefixProvider: (String) throws -> String = { _ in "" }) async throws
-> [S3ObjectIdentifier] {
let bucketName = objectIdentifier.bucketName
let s3Folder = try objectIdentifier.parentPath ?? ""
let fileName = try objectIdentifier.fileName ?? ""
let fileNamePrefix = try fileNamePrefixProvider(fileName)
let listBucketPrefix = s3Folder + fileNamePrefix

var nextToken: NextToken? = nil
var objectKeys = Set<String>()
repeat {
let request = ListObjectsV2Request(
bucket: bucketName,
continuationToken: nextToken,
prefix: listBucketPrefix)
let response = try await self.listObjectsV2(input: request)

// Filter objects from sub-folders
objectKeys.formUnion((response.contents ?? []).compactMap(\.key)
.filter { ($0.lastIndex(of: "/") ?? $0.startIndex) <= s3Folder.endIndex })
nextToken = response.nextContinuationToken
} while nextToken != nil

return objectKeys.map { S3ObjectIdentifier(bucketName: bucketName, keyPath: $0) }
}
}
74 changes: 74 additions & 0 deletions Sources/S3Client/S3ObjectIdentifier.swift
@@ -0,0 +1,74 @@
//
// S3ObjectIdentifier.swift
// S3Client
//

import Foundation
import S3Model

/**
An identifier for an S3 object, specifying the name of its bucket
and its key path.
*/
public struct S3ObjectIdentifier: Equatable {
internal static let s3Prefix = "s3://"
internal static let httpsPrefix = "https://"
internal static let httpPrefix = "http://"
internal static let s3EndpointRegex = #"^https?:\/\/(.+\.)?s3[.-][a-z0-9-]+\."#

public let bucketName: String
public let keyPath: String

public init(bucketName: String,
keyPath: String) {
self.bucketName = bucketName
self.keyPath = keyPath
}

// Returns the key path of the parent S3 "folder" containing the object, WITH trailing '/'.
// For example, for an object at path "a/b/c/d.ext", the return value will be "a/b/c/".
public var parentPath: String? {
get throws {
let pathComponents = try url.pathComponents
if pathComponents.count <= 1 {
return nil
} else {
var slashCharacterSet = CharacterSet()
slashCharacterSet.insert(charactersIn: "/")
let path = pathComponents.dropLast().joined(separator: "/").trimmingCharacters(in: slashCharacterSet)
if path.isEmpty {
return nil
} else {
return path + "/"
}
}
}
}

// Returns the "file name" of the object within its S3 "folder". If the key path is a directory path
// (e.g. ending with '/'), the return value will be nil.
public var fileName: String? {
get throws {
if try url.hasDirectoryPath {
return nil
} else {
return try url.pathComponents.last
}
}
}

private var url: URL {
get throws {
let url = URL(string: "\(Self.httpsPrefix)\(bucketName).s3.amazonaws.com/\(keyPath)")
guard let url else {
throw S3Error.validationError(reason: "Cannot initialize URL for bucket \(bucketName) " +
"and key \(keyPath).")
}

return url
}
}
}

// To keep backwards compatibility after fixing the typo
public typealias S3ObjectIdentifer = S3ObjectIdentifier
57 changes: 21 additions & 36 deletions Sources/S3Client/String+asS3ObjectIdentifier.swift
Expand Up @@ -5,59 +5,39 @@

import Foundation

/**
An identifier for an S3 object, specifying the name of its bucket
and its key path.
*/
public struct S3ObjectIdentifer: Equatable {
internal static let s3Prefix = "s3://"
internal static let httpsPrefix = "https://"
internal static let httpPrefix = "http://"
internal static let s3EndpointRegex = #"^https?:\/\/(.+\.)?s3[.-][a-z0-9-]+\."#

public let bucketName: String
public let keyPath: String

public init(bucketName: String,
keyPath: String) {
self.bucketName = bucketName
self.keyPath = keyPath
}
}

public extension String {
/**
If possible creates an S3ObjectIdentifer instance from this string,
seperating the bucket name and key path.
If possible creates an S3ObjectIdentifier instance from this string,
separating the bucket name and key path.
Will return nil if this string is not of the form-
s3://bucketName/the/key/path
This would return S3ObjectIdentifer(bucketName: "bucketName",
keyPath: "/the/key/path")
This would return S3ObjectIdentifier(bucketName: "bucketName",
keyPath: "/the/key/path")
*/
func asS3ObjectIdentifier() -> S3ObjectIdentifer? {
if self.starts(with: S3ObjectIdentifer.s3Prefix) {
func asS3ObjectIdentifier() -> S3ObjectIdentifier? {
if self.starts(with: S3ObjectIdentifier.s3Prefix) {
// get the url without the scheme - of the form {bucket}/{key+}
let nonPrefixedUrl = self.dropFirst(S3ObjectIdentifer.s3Prefix.count)
let nonPrefixedUrl = self.dropFirst(S3ObjectIdentifier.s3Prefix.count)

return asS3ObjectIdentifierFromNonPrefixedUrl(nonPrefixedUrl: nonPrefixedUrl)
} else if self.starts(with: S3ObjectIdentifer.httpsPrefix) || self.starts(with: S3ObjectIdentifer.httpPrefix) {
} else if self.starts(with: S3ObjectIdentifier.httpsPrefix) || self.starts(with: S3ObjectIdentifier.httpPrefix) {
return asS3ObjectIdentifierFromHttpOrHttps()
}

return nil
}

/// Tries to parse the bucket and key names from an HTTP or HTTPS URL.
private func asS3ObjectIdentifierFromHttpOrHttps() -> S3ObjectIdentifer? {
private func asS3ObjectIdentifierFromHttpOrHttps() -> S3ObjectIdentifier? {
guard let url = URL(string: self) else {
return nil
}

let urlPath = url.path.dropFirst()

guard let regex = try? NSRegularExpression(pattern: S3ObjectIdentifer.s3EndpointRegex, options: []) else {
guard let regex = try? NSRegularExpression(pattern: S3ObjectIdentifier.s3EndpointRegex, options: []) else {
return nil
}

Expand All @@ -73,7 +53,7 @@ public extension String {
// The capture group is the bucket name (with trailing dot) and the URL path is the key name
let bucketName = String(self[bucketRange].dropLast())
let keyName = String(urlPath)
return S3ObjectIdentifer(bucketName: bucketName, keyPath: keyName)
return S3ObjectIdentifier(bucketName: bucketName, keyPath: keyName)
}

// If the regex capture group is empty, the URL is in the path style, for example:
Expand All @@ -90,17 +70,22 @@ public extension String {
#endif
}

/// Spilts a url of the form {bucket}/{key+} into a S3ObjectIdentifer if possible
private func asS3ObjectIdentifierFromNonPrefixedUrl(nonPrefixedUrl: Substring) -> S3ObjectIdentifer? {
/// Splits a url of the form {bucket}/{key+} into a S3ObjectIdentifier if possible
private func asS3ObjectIdentifierFromNonPrefixedUrl(nonPrefixedUrl: Substring) -> S3ObjectIdentifier? {
guard let nextUrlSeparator = getIndexOfNextUrlSeparator(url: nonPrefixedUrl) else {
return nil
}

let bucketKeySeperatorIndex = nonPrefixedUrl.index(nextUrlSeparator,
let bucketKeySeparatorIndex = nonPrefixedUrl.index(nextUrlSeparator,
offsetBy: 1)
let bucketName = String(nonPrefixedUrl[..<nextUrlSeparator])
let keyPath = String(nonPrefixedUrl[bucketKeySeperatorIndex...])
let keyPath = String(nonPrefixedUrl[bucketKeySeparatorIndex...])

return S3ObjectIdentifer(bucketName: bucketName, keyPath: keyPath)
return S3ObjectIdentifier(bucketName: bucketName, keyPath: keyPath)
}

// To keep backwards compatibility after fixing the typo
func asS3ObjectIdentifer() -> S3ObjectIdentifer? {
asS3ObjectIdentifier()
}
}

0 comments on commit 83315f8

Please sign in to comment.