Skip to content

Commit

Permalink
HADOOP-12292. Make use of DeleteObjects optional. (Thomas Demoor via …
Browse files Browse the repository at this point in the history
…stevel)
  • Loading branch information
steveloughran committed Feb 6, 2016
1 parent fe124da commit 29ae258
Show file tree
Hide file tree
Showing 8 changed files with 75 additions and 22 deletions.
3 changes: 3 additions & 0 deletions hadoop-common-project/hadoop-common/CHANGES.txt
Original file line number Diff line number Diff line change
Expand Up @@ -265,6 +265,9 @@ Trunk (Unreleased)
HADOOP-11828. Implement the Hitchhiker erasure coding algorithm.
(Jack Liuquan via zhz)

HADOOP-12292. Make use of DeleteObjects optional.
(Thomas Demoor via stevel)

BUG FIXES

HADOOP-12617. SPNEGO authentication request to non-default realm gets
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -887,6 +887,15 @@ for ldap providers in the same way as above does.
<description>Threshold before uploads or copies use parallel multipart operations.</description>
</property>

<property>
<name>fs.s3a.multiobjectdelete.enable</name>
<value>true</value>
<description>When enabled, multiple single-object delete requests are replaced by
a single 'delete multiple objects'-request, reducing the number of requests.
Beware: legacy S3-compatible object stores might not support this request.
</description>
</property>

<property>
<name>fs.s3a.acl.default</name>
<description>Set a canned ACL for newly created and copied objects. Value may be private,
Expand Down
3 changes: 2 additions & 1 deletion hadoop-project/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,7 @@
<exec-maven-plugin.version>1.3.1</exec-maven-plugin.version>
<make-maven-plugin.version>1.0-beta-1</make-maven-plugin.version>
<native-maven-plugin.version>1.0-alpha-8</native-maven-plugin.version>
<surefire.fork.timeout>900</surefire.fork.timeout>
</properties>

<dependencyManagement>
Expand Down Expand Up @@ -1129,7 +1130,7 @@
<artifactId>maven-surefire-plugin</artifactId>
<configuration>
<reuseForks>false</reuseForks>
<forkedProcessTimeoutInSeconds>900</forkedProcessTimeoutInSeconds>
<forkedProcessTimeoutInSeconds>${surefire.fork.timeout}</forkedProcessTimeoutInSeconds>
<argLine>${maven-surefire-plugin.argLine}</argLine>
<environmentVariables>
<HADOOP_COMMON_HOME>${hadoop.common.build.dir}</HADOOP_COMMON_HOME>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,9 @@ public class Constants {
public static final String MIN_MULTIPART_THRESHOLD = "fs.s3a.multipart.threshold";
public static final long DEFAULT_MIN_MULTIPART_THRESHOLD = Integer.MAX_VALUE;

//enable multiobject-delete calls?
public static final String ENABLE_MULTI_DELETE = "fs.s3a.multiobjectdelete.enable";

// comma separated list of directories
public static final String BUFFER_DIR = "fs.s3a.buffer.dir";

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@
import com.amazonaws.auth.InstanceProfileCredentialsProvider;
import com.amazonaws.services.s3.AmazonS3Client;
import com.amazonaws.services.s3.model.CannedAccessControlList;
import com.amazonaws.services.s3.model.DeleteObjectRequest;
import com.amazonaws.services.s3.model.DeleteObjectsRequest;
import com.amazonaws.services.s3.model.ListObjectsRequest;
import com.amazonaws.services.s3.model.ObjectListing;
Expand Down Expand Up @@ -82,6 +83,7 @@ public class S3AFileSystem extends FileSystem {
private String bucket;
private int maxKeys;
private long partSize;
private boolean enableMultiObjectsDelete;
private TransferManager transfers;
private ExecutorService threadPoolExecutor;
private long multiPartThreshold;
Expand Down Expand Up @@ -200,6 +202,7 @@ public void initialize(URI name, Configuration conf) throws IOException {
partSize = conf.getLong(MULTIPART_SIZE, DEFAULT_MULTIPART_SIZE);
multiPartThreshold = conf.getLong(MIN_MULTIPART_THRESHOLD,
DEFAULT_MIN_MULTIPART_THRESHOLD);
enableMultiObjectsDelete = conf.getBoolean(ENABLE_MULTI_DELETE, true);

if (partSize < 5 * 1024 * 1024) {
LOG.error(MULTIPART_SIZE + " must be at least 5 MB");
Expand Down Expand Up @@ -522,23 +525,16 @@ public boolean rename(Path src, Path dst) throws IOException {
copyFile(summary.getKey(), newDstKey);

if (keysToDelete.size() == MAX_ENTRIES_TO_DELETE) {
DeleteObjectsRequest deleteRequest =
new DeleteObjectsRequest(bucket).withKeys(keysToDelete);
s3.deleteObjects(deleteRequest);
statistics.incrementWriteOps(1);
keysToDelete.clear();
removeKeys(keysToDelete, true);
}
}

if (objects.isTruncated()) {
objects = s3.listNextBatchOfObjects(objects);
statistics.incrementReadOps(1);
} else {
if (keysToDelete.size() > 0) {
DeleteObjectsRequest deleteRequest =
new DeleteObjectsRequest(bucket).withKeys(keysToDelete);
s3.deleteObjects(deleteRequest);
statistics.incrementWriteOps(1);
if (!keysToDelete.isEmpty()) {
removeKeys(keysToDelete, false);
}
break;
}
Expand All @@ -552,6 +548,36 @@ public boolean rename(Path src, Path dst) throws IOException {
return true;
}

/**
* A helper method to delete a list of keys on a s3-backend.
*
* @param keysToDelete collection of keys to delete on the s3-backend
* @param clearKeys clears the keysToDelete-list after processing the list
* when set to true
*/
private void removeKeys(List<DeleteObjectsRequest.KeyVersion> keysToDelete,
boolean clearKeys) {
if (enableMultiObjectsDelete) {
DeleteObjectsRequest deleteRequest
= new DeleteObjectsRequest(bucket).withKeys(keysToDelete);
s3.deleteObjects(deleteRequest);
statistics.incrementWriteOps(1);
} else {
int writeops = 0;

for (DeleteObjectsRequest.KeyVersion keyVersion : keysToDelete) {
s3.deleteObject(
new DeleteObjectRequest(bucket, keyVersion.getKey()));
writeops++;
}

statistics.incrementWriteOps(writeops);
}
if (clearKeys) {
keysToDelete.clear();
}
}

/** Delete a file.
*
* @param f the path to delete.
Expand Down Expand Up @@ -626,11 +652,7 @@ public boolean delete(Path f, boolean recursive) throws IOException {
}

if (keys.size() == MAX_ENTRIES_TO_DELETE) {
DeleteObjectsRequest deleteRequest =
new DeleteObjectsRequest(bucket).withKeys(keys);
s3.deleteObjects(deleteRequest);
statistics.incrementWriteOps(1);
keys.clear();
removeKeys(keys, true);
}
}

Expand All @@ -639,10 +661,7 @@ public boolean delete(Path f, boolean recursive) throws IOException {
statistics.incrementReadOps(1);
} else {
if (!keys.isEmpty()) {
DeleteObjectsRequest deleteRequest =
new DeleteObjectsRequest(bucket).withKeys(keys);
s3.deleteObjects(deleteRequest);
statistics.incrementWriteOps(1);
removeKeys(keys, false);
}
break;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -266,6 +266,15 @@ If you do any of these: change your credentials immediately!
<description>Threshold before uploads or copies use parallel multipart operations.</description>
</property>

<property>
<name>fs.s3a.multiobjectdelete.enable</name>
<value>false</value>
<description>When enabled, multiple single-object delete requests are replaced by
a single 'delete multiple objects'-request, reducing the number of requests.
Beware: legacy S3-compatible object stores might not support this request.
</description>
</property>

<property>
<name>fs.s3a.acl.default</name>
<description>Set a canned ACL for newly created and copied objects. Value may be private,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,13 +36,21 @@

/**
* Base class for scale tests; here is where the common scale configuration
* keys are defined
* keys are defined.
*/
public class S3AScaleTestBase {

public static final String SCALE_TEST = "scale.test.";

/**
* The number of operations to perform: {@value}
*/
public static final String KEY_OPERATION_COUNT =
SCALE_TEST + "operation.count";

/**
* The default number of operations to perform: {@value}
*/
public static final long DEFAULT_OPERATION_COUNT = 2005;

protected S3AFileSystem fs;
Expand Down Expand Up @@ -71,6 +79,7 @@ public Configuration getConf() {
@Before
public void setUp() throws Exception {
conf = createConfiguration();
LOG.info("Scale test operation count = {}", getOperationCount());
fs = S3ATestUtils.createTestFileSystem(conf);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ public void testBulkRenameAndDelete() throws Throwable {
// use Executor to speed up file creation
ExecutorService exec = Executors.newFixedThreadPool(16);
final ExecutorCompletionService<Boolean> completionService =
new ExecutorCompletionService<Boolean>(exec);
new ExecutorCompletionService<>(exec);
try {
final byte[] data = ContractTestUtils.dataset(testBufferSize, 'a', 'z');

Expand Down

0 comments on commit 29ae258

Please sign in to comment.