Skip to content

Commit

Permalink
Change default copy behaviour to preserve directory structure (peak#133)
Browse files Browse the repository at this point in the history
* Add --flatten flag instead of --parents

* Preserve directory structure as a default

* Update Changelog and Readme.md

* Simplify prepareUploadDestination

* Consolidate copy and upload destination url prep
  • Loading branch information
sonmezonur committed Mar 30, 2020
1 parent 7aca503 commit 7aa10f4
Show file tree
Hide file tree
Showing 6 changed files with 565 additions and 215 deletions.
3 changes: 2 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,8 @@ This is a major release with many breaking changes.
- `-ds`, `-dw`, `-us` and `-uw` global flags are no longer available. Multipart
concurrency and part size flags are now part of the `cp/mv` command. New
replacement flags are `--concurrency | -c` and `--part-size | -p`. ([#110](https://github.com/peak/s5cmd/pull/110))

- Dropped `-parents` flag from copy command. Copy behaviour has changed to preserve the directory hierarchy as a default.
Optional `-flatten` flag is added to flatten directory structure.
#### Features

- Added `mb` command to make buckets. ([#25](https://github.com/peak/s5cmd/issues/25))
Expand Down
12 changes: 6 additions & 6 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -84,10 +84,10 @@ parallel. `s5cmd` will create the destination directory if it is missing.

`file1.gz file2.gz file3.gz`

ℹ️ `s5cmd` flattens the source directory structure by default. If you want to keep
the source directory structure, use the `--parents` flag.
ℹ️ `s5cmd` preserves the source directory structure by default. If you want to flatten
the source directory structure, use the `--flatten` flag.

s5cmd cp --parents 's3://bucket/logs/2020/03/*' logs/
s5cmd cp 's3://bucket/logs/2020/03/*' logs/

The above command will match the following objects:

Expand All @@ -111,7 +111,7 @@ logs/19/originals/file3.gz

#### Upload multiple files to S3

s5cmd cp --parents directory/ s3://bucket/
s5cmd cp directory/ s3://bucket/

Will upload all files at given directory to S3 while keeping the folder hiearchy
of the source.
Expand All @@ -138,7 +138,7 @@ they'll be deleted in a single request.

`s5cmd` supports copying objects on the server side as well.

s5cmd cp --parents 's3://bucket/logs/2020/*' s3://bucket/logs/backup/
s5cmd cp 's3://bucket/logs/2020/*' s3://bucket/logs/backup/

Will copy all the matching objects to the given S3 prefix, respecting the source
folder hiearchy.
Expand Down Expand Up @@ -169,7 +169,7 @@ or
`commands.txt` content could look like:

```
cp --parents s3://bucket/2020/03/* logs/2020/03/
cp s3://bucket/2020/03/* logs/2020/03/
# line comments are supported
rm s3://bucket/2020/03/19/file2.gz
Expand Down
85 changes: 26 additions & 59 deletions command/cp.go
Original file line number Diff line number Diff line change
Expand Up @@ -77,8 +77,9 @@ var copyCommandFlags = []cli.Flag{
Usage: "only overwrite destination if source modtime is newer",
},
&cli.BoolFlag{
Name: "parents",
Usage: "create same directory structure of source, starting from the first wildcard",
Name: "flatten",
Aliases: []string{"f"},
Usage: "flatten directory structure of source, starting from the first wildcard",
},
&cli.StringFlag{
Name: "storage-class",
Expand Down Expand Up @@ -118,7 +119,7 @@ var CopyCommand = &cli.Command{
noClobber: c.Bool("no-clobber"),
ifSizeDiffer: c.Bool("if-size-differ"),
ifSourceNewer: c.Bool("if-source-newer"),
parents: c.Bool("parents"),
flatten: c.Bool("flatten"),
storageClass: storage.LookupClass(c.String("storage-class")),
concurrency: c.Int("concurrency"),
partSize: c.Int64("part-size") * megabytes,
Expand All @@ -140,7 +141,7 @@ type Copy struct {
noClobber bool
ifSizeDiffer bool
ifSourceNewer bool
parents bool
flatten bool
storageClass storage.StorageClass

// s3 options
Expand Down Expand Up @@ -183,6 +184,11 @@ func (c Copy) Run(ctx context.Context) error {
}()

isBatch := srcurl.HasGlob()
if !isBatch && !srcurl.IsRemote() {
obj, _ := client.Stat(ctx, srcurl)
isBatch = obj != nil && obj.Type.IsDir()
}

for object := range objch {
if object.Type.IsDir() || errorpkg.IsCancelation(object.Err) {
continue
Expand Down Expand Up @@ -223,7 +229,7 @@ func (c Copy) prepareCopyTask(
isBatch bool,
) func() error {
return func() error {
dsturl = prepareCopyDestination(srcurl, dsturl, c.parents, isBatch)
dsturl = prepareRemoteDestination(srcurl, dsturl, c.flatten, isBatch)
err := c.doCopy(ctx, srcurl, dsturl)
if err != nil {
return &errorpkg.Error{
Expand All @@ -244,7 +250,7 @@ func (c Copy) prepareDownloadTask(
isBatch bool,
) func() error {
return func() error {
dsturl, err := prepareDownloadDestination(ctx, srcurl, dsturl, c.parents, isBatch)
dsturl, err := prepareLocalDestination(ctx, srcurl, dsturl, c.flatten, isBatch)
if err != nil {
return err
}
Expand All @@ -269,7 +275,7 @@ func (c Copy) prepareUploadTask(
isBatch bool,
) func() error {
return func() error {
dsturl = prepareUploadDestination(srcurl, dsturl, c.parents, isBatch)
dsturl = prepareRemoteDestination(srcurl, dsturl, c.flatten, isBatch)
err := c.doUpload(ctx, srcurl, dsturl)
if err != nil {
return &errorpkg.Error{
Expand Down Expand Up @@ -494,48 +500,36 @@ func (c Copy) shouldOverride(ctx context.Context, srcurl *url.URL, dsturl *url.U
return stickyErr
}

// prepareCopyDestination will return a new destination URL for local->local
// and remote->remote copy operations.
func prepareCopyDestination(
// prepareRemoteDestination will return a new destination URL for
// remote->remote and local->remote copy operations.
func prepareRemoteDestination(
srcurl *url.URL,
dsturl *url.URL,
parents bool,
flatten bool,
isBatch bool,
) *url.URL {
objname := srcurl.Base()
if parents {
if isBatch && !flatten {
objname = srcurl.Relative()
}

// For remote->remote copy operations, treat <dst> as prefix if it has "/"
// suffix.
if dsturl.IsRemote() {
if dsturl.IsPrefix() || dsturl.IsBucket() {
dsturl = dsturl.Join(objname)
}
return dsturl
}

// Absolute <src> path is given. Use given <dst> and local copy operation
// will create missing directories if <dst> has one.
if !isBatch {
return dsturl
if dsturl.IsPrefix() || dsturl.IsBucket() {
dsturl = dsturl.Join(objname)
}

return dsturl.Join(objname)
return dsturl
}

// prepareDownloadDestination will return a new destination URL for
// remote->local and remote->remote copy operations.
func prepareDownloadDestination(
// remote->local copy operations.
func prepareLocalDestination(
ctx context.Context,
srcurl *url.URL,
dsturl *url.URL,
parents bool,
flatten bool,
isBatch bool,
) (*url.URL, error) {
objname := srcurl.Base()
if parents {
if isBatch && !flatten {
objname = srcurl.Relative()
}

Expand All @@ -555,7 +549,7 @@ func prepareDownloadDestination(
return nil, err
}

if parents {
if isBatch && !flatten {
dsturl = dsturl.Join(objname)
if err := os.MkdirAll(dsturl.Dir(), os.ModePerm); err != nil {
return nil, err
Expand All @@ -578,27 +572,6 @@ func prepareDownloadDestination(
return dsturl, nil
}

// prepareUploadDestination will return a new destination URL for local->remote
// operations.
func prepareUploadDestination(
srcurl *url.URL,
dsturl *url.URL,
parents bool,
isBatch bool,
) *url.URL {
// if given destination is a bucket/objname, don't do any join and respect
// the user's destination object name.
if !isBatch && !dsturl.IsBucket() && !dsturl.IsPrefix() {
return dsturl
}

objname := srcurl.Base()
if parents {
objname = srcurl.Relative()
}
return dsturl.Join(objname)
}

// getObject checks if the object from given url exists. If no object is
// found, error and returning object would be nil.
func getObject(ctx context.Context, url *url.URL) (*storage.Object, error) {
Expand Down Expand Up @@ -644,12 +617,6 @@ func Validate(c *cli.Context) error {
return fmt.Errorf("target %q can not contain glob characters", dst)
}

// --parents is used in conjunction with a wildcard source to deduce
// relative source paths.
if !srcurl.HasGlob() && c.Bool("parents") {
return fmt.Errorf("source argument must contain wildcard if --parents flag is provided")
}

// we don't operate on S3 prefixes for copy and delete operations.
if srcurl.IsBucket() || srcurl.IsPrefix() {
return fmt.Errorf("source argument must contain wildcard character")
Expand Down
2 changes: 1 addition & 1 deletion command/mv.go
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ var MoveCommand = &cli.Command{
noClobber: c.Bool("no-clobber"),
ifSizeDiffer: c.Bool("if-size-differ"),
ifSourceNewer: c.Bool("if-source-newer"),
parents: c.Bool("parents"),
flatten: c.Bool("flatten"),
storageClass: storage.LookupClass(c.String("storage-class")),
}

Expand Down
Loading

0 comments on commit 7aa10f4

Please sign in to comment.