Skip to content

Commit

Permalink
YARN-4138. Roll back container resource allocation after resource inc…
Browse files Browse the repository at this point in the history
…rease token expires. Contributed by Meng Ding
  • Loading branch information
jian-he committed Feb 11, 2016
1 parent aeb13ef commit d16b17b
Show file tree
Hide file tree
Showing 16 changed files with 804 additions and 227 deletions.
4 changes: 4 additions & 0 deletions hadoop-yarn-project/CHANGES.txt
Original file line number Diff line number Diff line change
Expand Up @@ -804,6 +804,10 @@ Release 2.8.0 - UNRELEASED

YARN-4420. Add REST API for List Reservations. (Sean Po via curino)

YARN-4138. Roll back container resource allocation after resource
increase token expires. (Meng Ding via jianhe)


OPTIMIZATIONS

YARN-3339. TestDockerContainerExecutor should pull a single image and not
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.hadoop.yarn.server.resourcemanager.rmcontainer;


import org.apache.hadoop.yarn.api.records.ContainerId;

public class AllocationExpirationInfo implements
Comparable<AllocationExpirationInfo> {

private final ContainerId containerId;
private final boolean increase;

public AllocationExpirationInfo(ContainerId containerId) {
this(containerId, false);
}

public AllocationExpirationInfo(
ContainerId containerId, boolean increase) {
this.containerId = containerId;
this.increase = increase;
}

public ContainerId getContainerId() {
return this.containerId;
}

public boolean isIncrease() {
return this.increase;
}

@Override
public int hashCode() {
return (getContainerId().hashCode() << 16);
}

@Override
public boolean equals(Object other) {
if (!(other instanceof AllocationExpirationInfo)) {
return false;
}
return compareTo((AllocationExpirationInfo)other) == 0;
}

@Override
public int compareTo(AllocationExpirationInfo other) {
if (other == null) {
return -1;
}
// Only need to compare containerId.
return getContainerId().compareTo(other.getContainerId());
}

@Override
public String toString() {
return "<container=" + getContainerId() + ", increase="
+ isIncrease() + ">";
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@
package org.apache.hadoop.yarn.server.resourcemanager.rmcontainer;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.yarn.api.records.ContainerId;
import org.apache.hadoop.yarn.conf.YarnConfiguration;
import org.apache.hadoop.yarn.event.Dispatcher;
import org.apache.hadoop.yarn.event.EventHandler;
Expand All @@ -28,7 +27,7 @@

@SuppressWarnings({"unchecked", "rawtypes"})
public class ContainerAllocationExpirer extends
AbstractLivelinessMonitor<ContainerId> {
AbstractLivelinessMonitor<AllocationExpirationInfo> {

private EventHandler dispatcher;

Expand All @@ -47,7 +46,9 @@ public void serviceInit(Configuration conf) throws Exception {
}

@Override
protected void expire(ContainerId containerId) {
dispatcher.handle(new ContainerExpiredSchedulerEvent(containerId));
protected void expire(AllocationExpirationInfo allocationExpirationInfo) {
dispatcher.handle(new ContainerExpiredSchedulerEvent(
allocationExpirationInfo.getContainerId(),
allocationExpirationInfo.isIncrease()));
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,8 @@ public interface RMContainer extends EventHandler<RMContainerEvent> {

Resource getAllocatedResource();

Resource getLastConfirmedResource();

NodeId getAllocatedNode();

Priority getAllocatedPriority();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@

package org.apache.hadoop.yarn.server.resourcemanager.rmcontainer;

import java.util.Collections;
import java.util.EnumSet;
import java.util.List;
import java.util.concurrent.locks.ReentrantReadWriteLock;
Expand Down Expand Up @@ -49,12 +50,15 @@
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptEventType;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.event.RMAppAttemptContainerFinishedEvent;
import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeCleanContainerEvent;
import org.apache.hadoop.yarn.server.resourcemanager.rmnode
.RMNodeDecreaseContainerEvent;
import org.apache.hadoop.yarn.state.InvalidStateTransitionException;
import org.apache.hadoop.yarn.state.MultipleArcTransition;
import org.apache.hadoop.yarn.state.SingleArcTransition;
import org.apache.hadoop.yarn.state.StateMachine;
import org.apache.hadoop.yarn.state.StateMachineFactory;
import org.apache.hadoop.yarn.util.ConverterUtils;
import org.apache.hadoop.yarn.util.resource.Resources;
import org.apache.hadoop.yarn.webapp.util.WebAppUtils;

@SuppressWarnings({"unchecked", "rawtypes"})
Expand Down Expand Up @@ -119,9 +123,6 @@ RMContainerEventType.KILL, new KillTransition())
RMContainerEventType.RELEASED, new KillTransition())
.addTransition(RMContainerState.RUNNING, RMContainerState.RUNNING,
RMContainerEventType.RESERVED, new ContainerReservedTransition())
.addTransition(RMContainerState.RUNNING, RMContainerState.EXPIRED,
RMContainerEventType.EXPIRE,
new ContainerExpiredWhileRunningTransition())
.addTransition(RMContainerState.RUNNING, RMContainerState.RUNNING,
RMContainerEventType.CHANGE_RESOURCE, new ChangeResourceTransition())
.addTransition(RMContainerState.RUNNING, RMContainerState.RUNNING,
Expand Down Expand Up @@ -177,7 +178,10 @@ RMContainerEventType.CHANGE_RESOURCE, new ChangeResourceTransition())
private List<ResourceRequest> resourceRequests;

private volatile boolean hasIncreaseReservation = false;

// Only used for container resource increase and decrease. This is the
// resource to rollback to should container resource increase token expires.
private Resource lastConfirmedResource;

public RMContainerImpl(Container container,
ApplicationAttemptId appAttemptId, NodeId nodeId, String user,
RMContext rmContext) {
Expand Down Expand Up @@ -210,6 +214,7 @@ public RMContainerImpl(Container container,
this.isAMContainer = false;
this.resourceRequests = null;
this.nodeLabelExpression = nodeLabelExpression;
this.lastConfirmedResource = container.getResource();

ReentrantReadWriteLock lock = new ReentrantReadWriteLock();
this.readLock = lock.readLock();
Expand Down Expand Up @@ -283,6 +288,16 @@ public Resource getAllocatedResource() {
}
}

@Override
public Resource getLastConfirmedResource() {
try {
readLock.lock();
return this.lastConfirmedResource;
} finally {
readLock.unlock();
}
}

@Override
public NodeId getAllocatedNode() {
return container.getNodeId();
Expand Down Expand Up @@ -525,7 +540,8 @@ public void transition(RMContainerImpl container, RMContainerEvent event) {
container.setResourceRequests(null);

// Register with containerAllocationExpirer.
container.containerAllocationExpirer.register(container.getContainerId());
container.containerAllocationExpirer.register(
new AllocationExpirationInfo(container.getContainerId()));

// Tell the app
container.eventHandler.handle(new RMAppRunningOnNodeEvent(container
Expand All @@ -543,7 +559,8 @@ public void transition(RMContainerImpl container, RMContainerEvent event) {
if (acquiredEvent.isIncreasedContainer()) {
// If container is increased but not acquired by AM, we will start
// containerAllocationExpirer for this container in this transition.
container.containerAllocationExpirer.register(event.getContainerId());
container.containerAllocationExpirer.register(
new AllocationExpirationInfo(event.getContainerId(), true));
}
}
}
Expand All @@ -553,22 +570,65 @@ private static final class NMReportedContainerChangeIsDoneTransition

@Override
public void transition(RMContainerImpl container, RMContainerEvent event) {
// Unregister the allocation expirer, it is already increased..
container.containerAllocationExpirer.unregister(event.getContainerId());
}
}

private static final class ContainerExpiredWhileRunningTransition extends
BaseTransition {
RMContainerNMDoneChangeResourceEvent nmDoneChangeResourceEvent =
(RMContainerNMDoneChangeResourceEvent)event;
Resource rmContainerResource = container.getAllocatedResource();
Resource nmContainerResource =
nmDoneChangeResourceEvent.getNMContainerResource();

if (Resources.equals(rmContainerResource, nmContainerResource)) {
// If rmContainerResource == nmContainerResource, the resource
// increase is confirmed.
// In this case:
// - Set the lastConfirmedResource as nmContainerResource
// - Unregister the allocation expirer
container.lastConfirmedResource = nmContainerResource;
container.containerAllocationExpirer.unregister(
new AllocationExpirationInfo(event.getContainerId()));
} else if (Resources.fitsIn(rmContainerResource, nmContainerResource)) {
// If rmContainerResource < nmContainerResource, this is caused by the
// following sequence:
// 1. AM asks for increase from 1G to 5G, and RM approves it
// 2. AM acquires the increase token and increases on NM
// 3. Before NM reports 5G to RM to confirm the increase, AM sends
// a decrease request to 4G, and RM approves it
// 4. When NM reports 5G to RM, RM now sees its own allocation as 4G
// In this cases:
// - Set the lastConfirmedResource as rmContainerResource
// - Unregister the allocation expirer
// - Notify NM to reduce its resource to rmContainerResource
container.lastConfirmedResource = rmContainerResource;
container.containerAllocationExpirer.unregister(
new AllocationExpirationInfo(event.getContainerId()));
container.eventHandler.handle(new RMNodeDecreaseContainerEvent(
container.nodeId,
Collections.singletonList(container.getContainer())));
} else if (Resources.fitsIn(nmContainerResource, rmContainerResource)) {
// If nmContainerResource < rmContainerResource, this is caused by the
// following sequence:
// 1. AM asks for increase from 1G to 2G, and RM approves it
// 2. AM asks for increase from 2G to 4G, and RM approves it
// 3. AM only uses the 2G token to increase on NM, but never uses the
// 4G token
// 4. NM reports 2G to RM, but RM sees its own allocation as 4G
// In this case:
// - Set the lastConfirmedResource as the maximum of
// nmContainerResource and lastConfirmedResource
// - Do NOT unregister the allocation expirer
// When the increase allocation expires, resource will be rolled back to
// the last confirmed resource.
container.lastConfirmedResource = Resources.componentwiseMax(
nmContainerResource, container.lastConfirmedResource);
} else {
// Something wrong happened, kill the container
LOG.warn("Something wrong happened, container size reported by NM"
+ " is not expected, ContainerID=" + container.containerId
+ " rm-size-resource:" + rmContainerResource + " nm-size-reosurce:"
+ nmContainerResource);
container.eventHandler.handle(new RMNodeCleanContainerEvent(
container.nodeId, container.containerId));

@Override
public void transition(RMContainerImpl container, RMContainerEvent event) {
// When the container expired, and it has a pending increased request, we
// will kill the container.
// TODO, we can do better for this: roll back container resource to the
// resource before increase, and notify scheduler about this decrease as
// well. Will do that in a separated JIRA.
new KillTransition().transition(container, event);
}
}
}

Expand All @@ -577,20 +637,22 @@ private static final class ChangeResourceTransition extends BaseTransition {
@Override
public void transition(RMContainerImpl container, RMContainerEvent event) {
RMContainerChangeResourceEvent changeEvent = (RMContainerChangeResourceEvent)event;
// Register with containerAllocationExpirer.
// For now, we assume timeout for increase is as same as container
// allocation.

Resource targetResource = changeEvent.getTargetResource();
Resource lastConfirmedResource = container.lastConfirmedResource;

if (!changeEvent.isIncrease()) {
// if this is a decrease request, if container was increased but not
// told to NM, we can consider previous increase is cancelled,
// unregister from the containerAllocationExpirer
container.containerAllocationExpirer.unregister(container
.getContainerId());
// Only unregister from the containerAllocationExpirer when target
// resource is less than or equal to the last confirmed resource.
if (Resources.fitsIn(targetResource, lastConfirmedResource)) {
container.lastConfirmedResource = targetResource;
container.containerAllocationExpirer.unregister(
new AllocationExpirationInfo(event.getContainerId()));
}
}
container.container.setResource(changeEvent.getTargetResource());

container.container.setResource(targetResource);

// We reach here means we either allocated increase reservation OR
// decreased container, reservation will be cancelled anyway.
container.hasIncreaseReservation = false;
Expand Down Expand Up @@ -662,8 +724,8 @@ private static final class KillTransition extends FinishedTransition {
public void transition(RMContainerImpl container, RMContainerEvent event) {

// Unregister from containerAllocationExpirer.
container.containerAllocationExpirer.unregister(container
.getContainerId());
container.containerAllocationExpirer.unregister(
new AllocationExpirationInfo(container.getContainerId()));

// Inform node
container.eventHandler.handle(new RMNodeCleanContainerEvent(
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.hadoop.yarn.server.resourcemanager.rmcontainer;

import org.apache.hadoop.yarn.api.records.ContainerId;
import org.apache.hadoop.yarn.api.records.Resource;

public class RMContainerNMDoneChangeResourceEvent extends RMContainerEvent {

private final Resource nmContainerResource;

public RMContainerNMDoneChangeResourceEvent(
ContainerId containerId, Resource nmContainerResource) {
super(containerId, RMContainerEventType.NM_DONE_CHANGE_RESOURCE);
this.nmContainerResource = nmContainerResource;
}

public Resource getNMContainerResource() {
return nmContainerResource;
}
}
Loading

0 comments on commit d16b17b

Please sign in to comment.