Skip to content

Commit

Permalink
HIVE-8965 - Enhance PTest to kill all processes between tests and to …
Browse files Browse the repository at this point in the history
…report when a TEST*.xml file is not generated (Brock reviewed by Szehon)

git-svn-id: https://svn.apache.org/repos/asf/hive/trunk@1641739 13f79535-47bb-0310-9956-ffa450edef68
  • Loading branch information
Brock Noland committed Nov 26, 2014
1 parent fab3d62 commit bba0402
Show file tree
Hide file tree
Showing 10 changed files with 48 additions and 10 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,10 @@ public void execute() throws Throwable {
JUnitReportParser parser = new JUnitReportParser(logger, batchLogDir);
executedTests.addAll(parser.getExecutedTests());
failedTests.addAll(parser.getFailedTests());
// if the TEST*.xml was not generated or was corrupt, let someone know
if (parser.getNumAttemptedTests() == 0) {
failedTests.add(batch.getName() + " - did not produce a TEST-*.xml file");
}
}
} finally {
long elapsed = System.currentTimeMillis() - start;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,6 @@
import java.util.concurrent.CopyOnWriteArrayList;
import java.util.concurrent.TimeUnit;

import org.apache.commons.io.FileUtils;
import org.apache.hive.ptest.execution.conf.Host;
import org.apache.hive.ptest.execution.conf.TestBatch;
import org.apache.hive.ptest.execution.ssh.RSyncCommand;
Expand Down Expand Up @@ -218,7 +217,7 @@ private boolean executeTestBatch(Drone drone, TestBatch batch, Set<TestBatch> fa
script.delete();
mLogger.info(drone + " executing " + batch + " with " + command);
RemoteCommandResult sshResult = new SSHCommand(mSSHCommandExecutor, drone.getPrivateKey(), drone.getUser(),
drone.getHost(), drone.getInstance(), command).
drone.getHost(), drone.getInstance(), command, true).
call();
File batchLogDir = null;
if(sshResult.getExitCode() == Constants.EXIT_CODE_UNKNOWN) {
Expand Down Expand Up @@ -341,13 +340,27 @@ RSyncResult copyFromDroneToLocal(Drone drone, String localFile, String remoteFil
}
return result;
}
/**
* Execute command on at least one drone. The method will retry when the command
* exits with a status code of 255 until all drones have been utilized, possibly
* excluding the host from future use.
*/
ListenableFuture<SSHResult> execIgnoreAllErrors(final String cmd)
throws Exception {
return exec(cmd, false);
}
/**
* Execute command on at least one drone. The method will retry when the command
* exits with a status code of 255 until all drones have been utilized, possibly
* excluding the host from future use.
*/
ListenableFuture<SSHResult> exec(final String cmd)
throws Exception {
return exec(cmd, true);
}

private ListenableFuture<SSHResult> exec(final String cmd, final boolean reportErrors)
throws Exception {
return mExecutor.submit(new Callable<SSHResult>() {
@Override
public SSHResult call() throws Exception {
Expand All @@ -357,8 +370,8 @@ public SSHResult call() throws Exception {
templateVariables.put("localDir", drone.getLocalDirectory());
String command = Templates.getTemplateResult(cmd, templateVariables);
SSHResult result = new SSHCommand(mSSHCommandExecutor, drone.getPrivateKey(), drone.getUser(),
drone.getHost(), drone.getInstance(), command).call();
if(result.getExitCode() == Constants.EXIT_CODE_UNKNOWN) {
drone.getHost(), drone.getInstance(), command, reportErrors).call();
if(reportErrors && result.getExitCode() == Constants.EXIT_CODE_UNKNOWN) {
mDrones.remove(drone); // return value not checked due to concurrent access
mLogger.error("Aborting drone during exec " + command,
new AbortDroneException("Drone " + drone + " exited with "
Expand Down Expand Up @@ -388,7 +401,7 @@ public RemoteCommandResult call() throws Exception {
templateVariables.put("localDir", drone.getLocalDirectory());
String command = Templates.getTemplateResult(cmd, templateVariables);
SSHResult result = new SSHCommand(mSSHCommandExecutor, drone.getPrivateKey(), drone.getUser(),
drone.getHost(), drone.getInstance(), command).call();
drone.getHost(), drone.getInstance(), command, true).call();
if(result.getExitCode() != Constants.EXIT_CODE_SUCCESS) {
mDrones.remove(drone); // return value not checked due to concurrent access
mLogger.error("Aborting drone during exec " + command,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,9 @@ public Set<String> getFailedTests() {
}
return failedTests;
}
public int getNumAttemptedTests() {
return getExecutedTests().size() + getFailedTests().size();
}
private void parse() {
for(File file : getFiles(directory)) {
FileInputStream stream = null;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,14 @@ protected List<SSHResult> execHosts(String command)
}
return toListOfResults(futures);
}
protected List<SSHResult> execHostsIgnoreErrors(String command)
throws Exception {
List<ListenableFuture<SSHResult>> futures = Lists.newArrayList();
for(HostExecutor hostExecutor : hostExecutors) {
futures.add(hostExecutor.execIgnoreAllErrors(command));
}
return toListOfResults(futures);
}
// clean prep
protected List<RemoteCommandResult> execInstances(String command)
throws Exception {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,8 @@ public void execute() throws Exception {
Templates.writeTemplateResult("source-prep.vm", sourcePrepScript, getTemplateDefaults());
execLocally("bash " + sourcePrepScript.getPath());
logger.debug("Deleting " + sourcePrepScript + ": " + sourcePrepScript.delete());
execHostsIgnoreErrors("pkill -f java");
execHostsIgnoreErrors("pkill -9 -f java");
elapsedTime = TimeUnit.MINUTES.convert((System.currentTimeMillis() - start),
TimeUnit.MILLISECONDS);
logger.info("PERF: source prep took " + elapsedTime + " minutes");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -286,7 +286,7 @@ private Set<NodeMetadata> verifyHosts(Set<? extends NodeMetadata> hosts)
@Override
public void run() {
String ip = publicIpOrHostname(node);
SSHCommand command = new SSHCommand(mSSHCommandExecutor, mPrivateKey, mUser, ip, 0, "pkill -f java");
SSHCommand command = new SSHCommand(mSSHCommandExecutor, mPrivateKey, mUser, ip, 0, "pkill -f java", true);
mSSHCommandExecutor.execute(command);
if(command.getExitCode() == Constants.EXIT_CODE_UNKNOWN ||
command.getException() != null) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,12 +22,14 @@ public class SSHCommand extends AbstractSSHCommand<SSHResult> {

private final SSHCommandExecutor executor;
private final String command;
private final boolean reportErrors;

public SSHCommand(SSHCommandExecutor executor, String privateKey,
String user, String host, int instance, String command) {
String user, String host, int instance, String command, boolean reportErrors) {
super(privateKey, user, host, instance);
this.executor = executor;
this.command = command;
this.reportErrors = reportErrors;
}

@Override
Expand All @@ -37,6 +39,10 @@ public SSHResult call() {
getExitCode(), getException(), getOutput());
}

public boolean isReportErrors() {
return reportErrors;
}

public String getCommand() {
return command;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ public void execute(SSHCommand command) {
command.setExitCode(Constants.EXIT_CODE_UNKNOWN);
return;
}
if(attempts++ <= 3 && cmd.getExitCode() == Constants.EXIT_CODE_UNKNOWN) {
if(command.isReportErrors() && attempts++ <= 3 && cmd.getExitCode() == Constants.EXIT_CODE_UNKNOWN) {
mLogger.warn("Command exited with " + cmd.getExitCode() + ", will retry: " + command);
retry = true;
TimeUnit.SECONDS.sleep(5);
Expand Down
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
bash /tmp/hive-ptest-units/TestPrepPhase/source-prep.sh
mkdir -p /some/working/dir/scratch
pkill -9 -f java
pkill -f java
rm -rf /some/working/dir/scratch
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ public void testShutdownBeforeWaitFor() throws Exception {
"-o StrictHostKeyChecking=no");
Assert.assertFalse(executor.isShutdown());
executor.shutdownNow();
SSHCommand command = new SSHCommand(executor, "privateKey", "user", "host", 1, "whoami");
SSHCommand command = new SSHCommand(executor, "privateKey", "user", "host", 1, "whoami", true);
executor.execute(command);
Assert.assertTrue(executor.isShutdown());
Assert.assertEquals(Constants.EXIT_CODE_UNKNOWN, command.getExitCode());
Expand All @@ -77,7 +77,7 @@ public Integer answer(InvocationOnMock invocation) throws Throwable {
return Constants.EXIT_CODE_UNKNOWN;
}
});
SSHCommand command = new SSHCommand(executor, "privateKey", "user", "host", 1, "whoami");
SSHCommand command = new SSHCommand(executor, "privateKey", "user", "host", 1, "whoami", true);
executor.execute(command);
Assert.assertTrue(executor.isShutdown());
Assert.assertEquals(Constants.EXIT_CODE_UNKNOWN, command.getExitCode());
Expand Down

0 comments on commit bba0402

Please sign in to comment.