Skip to content

Commit

Permalink
HADOOP-13164 Optimize S3AFileSystem::deleteUnnecessaryFakeDirectories…
Browse files Browse the repository at this point in the history
…. Contributed by Rajesh Balamohan.
  • Loading branch information
steveloughran committed Sep 29, 2016
1 parent a1b8251 commit ee0c722
Show file tree
Hide file tree
Showing 6 changed files with 144 additions and 35 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -359,7 +359,7 @@ protected void assertMinusOne(String text, int result) {
assertEquals(text + " wrong read result " + result, -1, result);
}

boolean rename(Path src, Path dst) throws IOException {
protected boolean rename(Path src, Path dst) throws IOException {
return getFileSystem().rename(src, dst);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -668,15 +668,15 @@ private boolean innerRename(Path src, Path dst) throws IOException,
copyFile(summary.getKey(), newDstKey, summary.getSize());

if (keysToDelete.size() == MAX_ENTRIES_TO_DELETE) {
removeKeys(keysToDelete, true);
removeKeys(keysToDelete, true, false);
}
}

if (objects.isTruncated()) {
objects = continueListObjects(objects);
} else {
if (!keysToDelete.isEmpty()) {
removeKeys(keysToDelete, false);
removeKeys(keysToDelete, false, false);
}
break;
}
Expand Down Expand Up @@ -924,17 +924,25 @@ public void incrementPutProgressStatistics(String key, long bytes) {
* @param keysToDelete collection of keys to delete on the s3-backend
* @param clearKeys clears the keysToDelete-list after processing the list
* when set to true
* @param deleteFakeDir indicates whether this is for deleting fake dirs
*/
private void removeKeys(List<DeleteObjectsRequest.KeyVersion> keysToDelete,
boolean clearKeys) throws AmazonClientException {
boolean clearKeys, boolean deleteFakeDir) throws AmazonClientException {
if (keysToDelete.isEmpty()) {
// no keys
return;
}
if (enableMultiObjectsDelete) {
deleteObjects(new DeleteObjectsRequest(bucket).withKeys(keysToDelete));
instrumentation.fileDeleted(keysToDelete.size());
} else {
for (DeleteObjectsRequest.KeyVersion keyVersion : keysToDelete) {
deleteObject(keyVersion.getKey());
}
}
if (!deleteFakeDir) {
instrumentation.fileDeleted(keysToDelete.size());
} else {
instrumentation.fakeDirsDeleted(keysToDelete.size());
}
if (clearKeys) {
keysToDelete.clear();
Expand Down Expand Up @@ -1017,15 +1025,15 @@ private boolean innerDelete(S3AFileStatus status, boolean recursive)
LOG.debug("Got object to delete {}", summary.getKey());

if (keys.size() == MAX_ENTRIES_TO_DELETE) {
removeKeys(keys, true);
removeKeys(keys, true, false);
}
}

if (objects.isTruncated()) {
objects = continueListObjects(objects);
} else {
if (!keys.isEmpty()) {
removeKeys(keys, false);
removeKeys(keys, false, false);
}
break;
}
Expand Down Expand Up @@ -1504,37 +1512,30 @@ public void finishedWrite(String key) {
/**
* Delete mock parent directories which are no longer needed.
* This code swallows IO exceptions encountered
* @param f path
*/
private void deleteUnnecessaryFakeDirectories(Path f) {
while (true) {
String key = "";
try {
key = pathToKey(f);
if (key.isEmpty()) {
break;
}

S3AFileStatus status = getFileStatus(f);

if (status.isDirectory() && status.isEmptyDirectory()) {
LOG.debug("Deleting fake directory {}/", key);
deleteObject(key + "/");
* @param path path
*/
private void deleteUnnecessaryFakeDirectories(Path path) {
List<DeleteObjectsRequest.KeyVersion> keysToRemove = new ArrayList<>();
while (!path.isRoot()) {
String key = pathToKey(path);
key = (key.endsWith("/")) ? key : (key + "/");
keysToRemove.add(new DeleteObjectsRequest.KeyVersion(key));
path = path.getParent();
}
try {
removeKeys(keysToRemove, false, true);
} catch(AmazonClientException e) {
instrumentation.errorIgnored();
if (LOG.isDebugEnabled()) {
StringBuilder sb = new StringBuilder();
for(DeleteObjectsRequest.KeyVersion kv : keysToRemove) {
sb.append(kv.getKey()).append(",");
}
} catch (IOException | AmazonClientException e) {
LOG.debug("While deleting key {} ", key, e);
instrumentation.errorIgnored();
LOG.debug("While deleting keys {} ", sb.toString(), e);
}

if (f.isRoot()) {
break;
}

f = f.getParent();
}
}


private void createFakeDirectory(final String objectName)
throws AmazonClientException, AmazonServiceException,
InterruptedIOException {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,7 @@ public class S3AInstrumentation {
private final MutableCounterLong numberOfFilesCopied;
private final MutableCounterLong bytesOfFilesCopied;
private final MutableCounterLong numberOfFilesDeleted;
private final MutableCounterLong numberOfFakeDirectoryDeletes;
private final MutableCounterLong numberOfDirectoriesCreated;
private final MutableCounterLong numberOfDirectoriesDeleted;
private final Map<String, MutableCounterLong> streamMetrics =
Expand Down Expand Up @@ -135,6 +136,7 @@ public S3AInstrumentation(URI name) {
numberOfFilesCopied = counter(FILES_COPIED);
bytesOfFilesCopied = counter(FILES_COPIED_BYTES);
numberOfFilesDeleted = counter(FILES_DELETED);
numberOfFakeDirectoryDeletes = counter(FAKE_DIRECTORIES_DELETED);
numberOfDirectoriesCreated = counter(DIRECTORIES_CREATED);
numberOfDirectoriesDeleted = counter(DIRECTORIES_DELETED);
ignoredErrors = counter(IGNORED_ERRORS);
Expand Down Expand Up @@ -295,6 +297,14 @@ public void fileDeleted(int count) {
numberOfFilesDeleted.incr(count);
}

/**
* Indicate that fake directory request was made.
* @param count number of directory entries included in the delete request.
*/
public void fakeDirsDeleted(int count) {
numberOfFakeDirectoryDeletes.incr(count);
}

/**
* Indicate that S3A created a directory.
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,10 @@ public enum Statistic {
"Total number of files created through the object store."),
FILES_DELETED("files_deleted",
"Total number of files deleted from the object store."),
FAKE_DIRECTORIES_CREATED("fake_directories_created",
"Total number of fake directory entries created in the object store."),
FAKE_DIRECTORIES_DELETED("fake_directories_deleted",
"Total number of fake directory deletes submitted to object store."),
IGNORED_ERRORS("ignored_errors", "Errors caught and ignored"),
INVOCATION_COPY_FROM_LOCAL_FILE(CommonStatisticNames.OP_COPY_FROM_LOCAL_FILE,
"Calls of copyFromLocalFile()"),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -188,4 +188,89 @@ public void testCostOfCopyFromLocalFile() throws Throwable {
tmpFile.delete();
}
}

private void reset(MetricDiff... diffs) {
for (MetricDiff diff : diffs) {
diff.reset();
}
}

@Test
public void testFakeDirectoryDeletion() throws Throwable {
describe("Verify whether create file works after renaming a file. "
+ "In S3, rename deletes any fake directories as a part of "
+ "clean up activity");
S3AFileSystem fs = getFileSystem();
Path srcBaseDir = path("src");
mkdirs(srcBaseDir);
MetricDiff deleteRequests =
new MetricDiff(fs, Statistic.OBJECT_DELETE_REQUESTS);
MetricDiff directoriesDeleted =
new MetricDiff(fs, Statistic.DIRECTORIES_DELETED);
MetricDiff fakeDirectoriesDeleted =
new MetricDiff(fs, Statistic.FAKE_DIRECTORIES_DELETED);
MetricDiff directoriesCreated =
new MetricDiff(fs, Statistic.DIRECTORIES_CREATED);

Path srcDir = new Path(srcBaseDir, "1/2/3/4/5/6");
Path srcFilePath = new Path(srcDir, "source.txt");
int srcDirDepth = directoriesInPath(srcDir);
// one dir created, one removed
mkdirs(srcDir);
String state = "after mkdir(srcDir)";
directoriesCreated.assertDiffEquals(state, 1);
/* TODO: uncomment once HADOOP-13222 is in
deleteRequests.assertDiffEquals(state, 1);
directoriesDeleted.assertDiffEquals(state, 0);
fakeDirectoriesDeleted.assertDiffEquals(state, srcDirDepth);
*/
reset(deleteRequests, directoriesCreated, directoriesDeleted,
fakeDirectoriesDeleted);

// creating a file should trigger demise of the src dir
touch(fs, srcFilePath);
state = "after touch(fs, srcFilePath)";
deleteRequests.assertDiffEquals(state, 1);
directoriesCreated.assertDiffEquals(state, 0);
directoriesDeleted.assertDiffEquals(state, 0);
fakeDirectoriesDeleted.assertDiffEquals(state, srcDirDepth);

reset(deleteRequests, directoriesCreated, directoriesDeleted,
fakeDirectoriesDeleted);

Path destBaseDir = path("dest");
Path destDir = new Path(destBaseDir, "1/2/3/4/5/6");
Path destFilePath = new Path(destDir, "dest.txt");
mkdirs(destDir);
state = "after mkdir(destDir)";

int destDirDepth = directoriesInPath(destDir);
directoriesCreated.assertDiffEquals(state, 1);
/* TODO: uncomment once HADOOP-13222 is in
deleteRequests.assertDiffEquals(state,1);
directoriesDeleted.assertDiffEquals(state,0);
fakeDirectoriesDeleted.assertDiffEquals(state,destDirDepth);
*/
reset(deleteRequests, directoriesCreated, directoriesDeleted,
fakeDirectoriesDeleted);

fs.rename(srcFilePath, destFilePath);
state = "after rename(srcFilePath, destFilePath)";
directoriesCreated.assertDiffEquals(state, 1);
// one for the renamed file, one for the parent
deleteRequests.assertDiffEquals(state, 2);
directoriesDeleted.assertDiffEquals(state, 0);
fakeDirectoriesDeleted.assertDiffEquals(state, destDirDepth);

reset(deleteRequests, directoriesCreated, directoriesDeleted,
fakeDirectoriesDeleted);

assertIsFile(destFilePath);
assertIsDirectory(srcDir);
}

private int directoriesInPath(Path path) {
return path.isRoot() ? 0 : 1 + directoriesInPath(path.getParent());
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -298,13 +298,22 @@ public String toString() {

/**
* Assert that the value of {@link #diff()} matches that expected.
* @param message message to print; metric name is appended
* @param expected expected value.
*/
public void assertDiffEquals(long expected) {
Assert.assertEquals("Count of " + this,
public void assertDiffEquals(String message, long expected) {
Assert.assertEquals(message + ": " + statistic.getSymbol(),
expected, diff());
}

/**
* Assert that the value of {@link #diff()} matches that expected.
* @param expected expected value.
*/
public void assertDiffEquals(long expected) {
assertDiffEquals("Count of " + this, expected);
}

/**
* Assert that the value of {@link #diff()} matches that of another
* instance.
Expand Down

0 comments on commit ee0c722

Please sign in to comment.