Skip to content

Commit

Permalink
YARN-2619. Added NodeManager support for disk io isolation through cg…
Browse files Browse the repository at this point in the history
…roups. Contributed by Varun Vasudev and Wei Yan.
  • Loading branch information
vinoduec committed May 1, 2015
1 parent 98a6176 commit 1b3b9e5
Show file tree
Hide file tree
Showing 12 changed files with 600 additions and 134 deletions.
3 changes: 3 additions & 0 deletions hadoop-yarn-project/CHANGES.txt
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,9 @@ Release 2.8.0 - UNRELEASED
YARN-2498. Respect labels in preemption policy of capacity scheduler for
inter-queue preemption. (Wangda Tan via jianhe)

YARN-2619. Added NodeManager support for disk io isolation through cgroups.
(Varun Vasudev and Wei Yan via vinodkv)

IMPROVEMENTS

YARN-1880. Cleanup TestApplicationClientProtocolOnHA
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -823,38 +823,68 @@ private static void addDeprecatedKeys() {
public static final int DEFAULT_NM_RESOURCE_PERCENTAGE_PHYSICAL_CPU_LIMIT =
100;

/**
* Prefix for disk configurations. Work in progress: This configuration
* parameter may be changed/removed in the future.
*/
@Private
public static final String NM_DISK_RESOURCE_PREFIX = NM_PREFIX
+ "resource.disk.";
/**
* This setting controls if resource handling for disk operations is enabled.
* Work in progress: This configuration parameter may be changed/removed in
* the future
*/
@Private
public static final String NM_DISK_RESOURCE_ENABLED = NM_DISK_RESOURCE_PREFIX
+ "enabled";
/** Disk as a resource is disabled by default. **/
@Private
public static final boolean DEFAULT_NM_DISK_RESOURCE_ENABLED = false;

public static final String NM_NETWORK_RESOURCE_PREFIX = NM_PREFIX + "resource.network.";
public static final String NM_NETWORK_RESOURCE_PREFIX = NM_PREFIX
+ "resource.network.";

/** This setting controls if resource handling for network bandwidth is enabled **/
/* Work in progress: This configuration parameter may be changed/removed in the future */
/**
* This setting controls if resource handling for network bandwidth is
* enabled. Work in progress: This configuration parameter may be
* changed/removed in the future
*/
@Private
public static final String NM_NETWORK_RESOURCE_ENABLED =
NM_NETWORK_RESOURCE_PREFIX + "enabled";
/** Network as a resource is disabled by default **/
/** Network as a resource is disabled by default. **/
@Private
public static final boolean DEFAULT_NM_NETWORK_RESOURCE_ENABLED = false;

/** Specifies the interface to be used for applying network throttling rules **/
/* Work in progress: This configuration parameter may be changed/removed in the future */
/**
* Specifies the interface to be used for applying network throttling rules.
* Work in progress: This configuration parameter may be changed/removed in
* the future
*/
@Private
public static final String NM_NETWORK_RESOURCE_INTERFACE =
NM_NETWORK_RESOURCE_PREFIX + "interface";
@Private
public static final String DEFAULT_NM_NETWORK_RESOURCE_INTERFACE = "eth0";

/** Specifies the total available outbound bandwidth on the node **/
/* Work in progress: This configuration parameter may be changed/removed in the future */
/**
* Specifies the total available outbound bandwidth on the node. Work in
* progress: This configuration parameter may be changed/removed in the future
*/
@Private
public static final String NM_NETWORK_RESOURCE_OUTBOUND_BANDWIDTH_MBIT =
NM_NETWORK_RESOURCE_PREFIX + "outbound-bandwidth-mbit";
@Private
public static final int DEFAULT_NM_NETWORK_RESOURCE_OUTBOUND_BANDWIDTH_MBIT = 1000;
public static final int DEFAULT_NM_NETWORK_RESOURCE_OUTBOUND_BANDWIDTH_MBIT =
1000;

/** Specifies the total outbound bandwidth available to YARN containers. defaults to
* NM_NETWORK_RESOURCE_OUTBOUND_BANDWIDTH_MBIT if not specified.
/**
* Specifies the total outbound bandwidth available to YARN containers.
* defaults to NM_NETWORK_RESOURCE_OUTBOUND_BANDWIDTH_MBIT if not specified.
* Work in progress: This configuration parameter may be changed/removed in
* the future
*/
/* Work in progress: This configuration parameter may be changed/removed in the future */
@Private
public static final String NM_NETWORK_RESOURCE_OUTBOUND_BANDWIDTH_YARN_MBIT =
NM_NETWORK_RESOURCE_PREFIX + "outbound-bandwidth-yarn-mbit";
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,170 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources;

import com.google.common.annotations.VisibleForTesting;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.util.Shell;
import org.apache.hadoop.yarn.api.records.ContainerId;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.privileged.PrivilegedOperation;

import java.io.File;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.List;

/**
* Handler class to handle the blkio controller. Currently it splits resources
* evenly across all containers. Once we have scheduling sorted out, we can
* modify the function to represent the disk resources allocated.
*/
@InterfaceAudience.Private
@InterfaceStability.Unstable
public class CGroupsBlkioResourceHandlerImpl implements DiskResourceHandler {

static final Log LOG = LogFactory
.getLog(CGroupsBlkioResourceHandlerImpl.class);

private CGroupsHandler cGroupsHandler;
// Arbitrarily choose a weight - all that matters is that all containers
// get the same weight assigned to them. Once we have scheduling support
// this number will be determined dynamically for each container.
@VisibleForTesting
static final String DEFAULT_WEIGHT = "500";
private static final String PARTITIONS_FILE = "/proc/partitions";

CGroupsBlkioResourceHandlerImpl(CGroupsHandler cGroupsHandler) {
this.cGroupsHandler = cGroupsHandler;
// check for linux so that we don't print messages for tests running on
// other platforms
if(Shell.LINUX) {
checkDiskScheduler();
}
}


private void checkDiskScheduler() {
String data;

// read /proc/partitions and check to make sure that sd* and hd*
// are using the CFQ scheduler. If they aren't print a warning
try {
byte[] contents = Files.readAllBytes(Paths.get(PARTITIONS_FILE));
data = new String(contents, "UTF-8").trim();
} catch (IOException e) {
String msg = "Couldn't read " + PARTITIONS_FILE +
"; can't determine disk scheduler type";
LOG.warn(msg, e);
return;
}
String[] lines = data.split(System.lineSeparator());
if (lines.length > 0) {
for (String line : lines) {
String[] columns = line.split("\\s+");
if (columns.length > 4) {
String partition = columns[4];
// check some known partitions to make sure the disk scheduler
// is cfq - not meant to be comprehensive, more a sanity check
if (partition.startsWith("sd") || partition.startsWith("hd")
|| partition.startsWith("vd") || partition.startsWith("xvd")) {
String schedulerPath =
"/sys/block/" + partition + "/queue/scheduler";
File schedulerFile = new File(schedulerPath);
if (schedulerFile.exists()) {
try {
byte[] contents = Files.readAllBytes(Paths.get(schedulerPath));
String schedulerString = new String(contents, "UTF-8").trim();
if (!schedulerString.contains("[cfq]")) {
LOG.warn("Device " + partition + " does not use the CFQ"
+ " scheduler; disk isolation using "
+ "CGroups will not work on this partition.");
}
} catch (IOException ie) {
LOG.warn(
"Unable to determine disk scheduler type for partition "
+ partition, ie);
}
}
}
}
}
}
}

@Override
public List<PrivilegedOperation> bootstrap(Configuration configuration)
throws ResourceHandlerException {
// if bootstrap is called on this class, disk is already enabled
// so no need to check again
this.cGroupsHandler
.mountCGroupController(CGroupsHandler.CGroupController.BLKIO);
return null;
}

@Override
public List<PrivilegedOperation> preStart(Container container)
throws ResourceHandlerException {

String cgroupId = container.getContainerId().toString();
cGroupsHandler
.createCGroup(CGroupsHandler.CGroupController.BLKIO, cgroupId);
try {
cGroupsHandler.updateCGroupParam(CGroupsHandler.CGroupController.BLKIO,
cgroupId, CGroupsHandler.CGROUP_PARAM_BLKIO_WEIGHT, DEFAULT_WEIGHT);
} catch (ResourceHandlerException re) {
cGroupsHandler.deleteCGroup(CGroupsHandler.CGroupController.BLKIO,
cgroupId);
LOG.warn("Could not update cgroup for container", re);
throw re;
}
List<PrivilegedOperation> ret = new ArrayList<>();
ret.add(new PrivilegedOperation(
PrivilegedOperation.OperationType.ADD_PID_TO_CGROUP,
PrivilegedOperation.CGROUP_ARG_PREFIX
+ cGroupsHandler.getPathForCGroupTasks(
CGroupsHandler.CGroupController.BLKIO, cgroupId)));
return ret;
}

@Override
public List<PrivilegedOperation> reacquireContainer(ContainerId containerId)
throws ResourceHandlerException {
return null;
}

@Override
public List<PrivilegedOperation> postComplete(ContainerId containerId)
throws ResourceHandlerException {
cGroupsHandler.deleteCGroup(CGroupsHandler.CGroupController.BLKIO,
containerId.toString());
return null;
}

@Override
public List<PrivilegedOperation> teardown() throws ResourceHandlerException {
return null;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,8 @@
public interface CGroupsHandler {
public enum CGroupController {
CPU("cpu"),
NET_CLS("net_cls");
NET_CLS("net_cls"),
BLKIO("blkio");

private final String name;

Expand All @@ -48,6 +49,7 @@ String getName() {

public static final String CGROUP_FILE_TASKS = "tasks";
public static final String CGROUP_PARAM_CLASSID = "classid";
public static final String CGROUP_PARAM_BLKIO_WEIGHT = "weight";

/**
* Mounts a cgroup controller
Expand Down
Loading

0 comments on commit 1b3b9e5

Please sign in to comment.