Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

gh-109330: Compare stats using opcode names, not numbers #109335

Merged
merged 1 commit into from
Sep 12, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 8 additions & 8 deletions Python/specialize.c
Original file line number Diff line number Diff line change
Expand Up @@ -123,19 +123,19 @@ _Py_GetSpecializationStats(void) {

#define PRINT_STAT(i, field) \
if (stats[i].field) { \
fprintf(out, " opcode[%d]." #field " : %" PRIu64 "\n", i, stats[i].field); \
fprintf(out, " opcode[%s]." #field " : %" PRIu64 "\n", _PyOpcode_OpName[i], stats[i].field); \
}

static void
print_spec_stats(FILE *out, OpcodeStats *stats)
{
/* Mark some opcodes as specializable for stats,
* even though we don't specialize them yet. */
fprintf(out, "opcode[%d].specializable : 1\n", BINARY_SLICE);
fprintf(out, "opcode[%d].specializable : 1\n", STORE_SLICE);
fprintf(out, "opcode[BINARY_SLICE].specializable : 1\n");
fprintf(out, "opcode[STORE_SLICE].specializable : 1\n");
for (int i = 0; i < 256; i++) {
if (_PyOpcode_Caches[i]) {
fprintf(out, "opcode[%d].specializable : 1\n", i);
fprintf(out, "opcode[%s].specializable : 1\n", _PyOpcode_OpName[i]);
}
PRINT_STAT(i, specialization.success);
PRINT_STAT(i, specialization.failure);
Expand All @@ -147,14 +147,14 @@ print_spec_stats(FILE *out, OpcodeStats *stats)
for (int j = 0; j < SPECIALIZATION_FAILURE_KINDS; j++) {
uint64_t val = stats[i].specialization.failure_kinds[j];
if (val) {
fprintf(out, " opcode[%d].specialization.failure_kinds[%d] : %"
PRIu64 "\n", i, j, val);
fprintf(out, " opcode[%s].specialization.failure_kinds[%d] : %"
PRIu64 "\n", _PyOpcode_OpName[i], j, val);
}
}
for (int j = 0; j < 256; j++) {
if (stats[i].pair_count[j]) {
fprintf(out, "opcode[%d].pair_count[%d] : %" PRIu64 "\n",
i, j, stats[i].pair_count[j]);
fprintf(out, "opcode[%s].pair_count[%s] : %" PRIu64 "\n",
_PyOpcode_OpName[i], _PyOpcode_OpName[j], stats[i].pair_count[j]);
}
}
}
Expand Down
72 changes: 27 additions & 45 deletions Tools/scripts/summarize_stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,22 +16,6 @@
else:
DEFAULT_DIR = "/tmp/py_stats/"

#Create list of all instruction names
specialized = iter(opcode._specialized_opmap.keys())
opname = ["<0>"]
for name in opcode.opname[1:]:
if name.startswith("<"):
try:
name = next(specialized)
except StopIteration:
pass
opname.append(name)

# opcode_name --> opcode
# Sort alphabetically.
opmap = {name: i for i, name in enumerate(opname)}
opmap = dict(sorted(opmap.items()))

TOTAL = "specialization.hit", "specialization.miss", "execution_count"

def format_ratio(num, den):
Expand Down Expand Up @@ -200,12 +184,12 @@ def gather_stats(input):
raise ValueError(f"{input:r} is not a file or directory path")

def extract_opcode_stats(stats):
opcode_stats = [ {} for _ in range(256) ]
opcode_stats = collections.defaultdict(dict)
for key, value in stats.items():
if not key.startswith("opcode"):
continue
n, _, rest = key[7:].partition("]")
opcode_stats[int(n)][rest.strip(".")] = value
name, _, rest = key[7:].partition("]")
opcode_stats[name][rest.strip(".")] = value
return opcode_stats

def parse_kinds(spec_src, prefix="SPEC_FAIL"):
Expand Down Expand Up @@ -246,11 +230,10 @@ def categorized_counts(opcode_stats):
specialized_instructions = {
op for op in opcode._specialized_opmap.keys()
if "__" not in op}
for i, opcode_stat in enumerate(opcode_stats):
for name, opcode_stat in opcode_stats.items():
if "execution_count" not in opcode_stat:
continue
count = opcode_stat['execution_count']
name = opname[i]
if "specializable" in opcode_stat:
not_specialized += count
elif name in specialized_instructions:
Expand Down Expand Up @@ -314,13 +297,13 @@ def emit_table(header, rows):

def calculate_execution_counts(opcode_stats, total):
counts = []
for i, opcode_stat in enumerate(opcode_stats):
for name, opcode_stat in opcode_stats.items():
if "execution_count" in opcode_stat:
count = opcode_stat['execution_count']
miss = 0
if "specializable" not in opcode_stat:
miss = opcode_stat.get("specialization.miss")
counts.append((count, opname[i], miss))
counts.append((count, name, miss))
counts.sort(reverse=True)
cumulative = 0
rows = []
Expand Down Expand Up @@ -381,16 +364,17 @@ def get_defines():
def emit_specialization_stats(opcode_stats):
defines = get_defines()
with Section("Specialization stats", summary="specialization stats by family"):
for i, opcode_stat in enumerate(opcode_stats):
name = opname[i]
for name, opcode_stat in opcode_stats.items():
print_specialization_stats(name, opcode_stat, defines)

def emit_comparative_specialization_stats(base_opcode_stats, head_opcode_stats):
defines = get_defines()
with Section("Specialization stats", summary="specialization stats by family"):
for i, (base_opcode_stat, head_opcode_stat) in enumerate(zip(base_opcode_stats, head_opcode_stats)):
name = opname[i]
print_comparative_specialization_stats(name, base_opcode_stat, head_opcode_stat, defines)
opcodes = set(base_opcode_stats.keys()) & set(head_opcode_stats.keys())
for opcode in opcodes:
print_comparative_specialization_stats(
opcode, base_opcode_stats[opcode], head_opcode_stats[opcode], defines
)

def calculate_specialization_effectiveness(opcode_stats, total):
basic, not_specialized, specialized = categorized_counts(opcode_stats)
Expand All @@ -407,12 +391,12 @@ def emit_specialization_overview(opcode_stats, total):
for title, field in (("Deferred", "specialization.deferred"), ("Misses", "specialization.miss")):
total = 0
counts = []
for i, opcode_stat in enumerate(opcode_stats):
for name, opcode_stat in opcode_stats.items():
# Avoid double counting misses
if title == "Misses" and "specializable" in opcode_stat:
continue
value = opcode_stat.get(field, 0)
counts.append((value, opname[i]))
counts.append((value, name))
total += value
counts.sort(reverse=True)
if total:
Expand Down Expand Up @@ -539,29 +523,27 @@ def emit_comparative_gc_stats(base_stats, head_stats):

def get_total(opcode_stats):
total = 0
for opcode_stat in opcode_stats:
for opcode_stat in opcode_stats.values():
if "execution_count" in opcode_stat:
total += opcode_stat['execution_count']
return total

def emit_pair_counts(opcode_stats, total):
pair_counts = []
for i, opcode_stat in enumerate(opcode_stats):
if i == 0:
continue
for name_i, opcode_stat in opcode_stats.items():
for key, value in opcode_stat.items():
if key.startswith("pair_count"):
x, _, _ = key[11:].partition("]")
name_j, _, _ = key[11:].partition("]")
if value:
pair_counts.append((value, (i, int(x))))
pair_counts.append((value, (name_i, name_j)))
with Section("Pair counts", summary="Pair counts for top 100 pairs"):
pair_counts.sort(reverse=True)
cumulative = 0
rows = []
for (count, pair) in itertools.islice(pair_counts, 100):
i, j = pair
name_i, name_j = pair
cumulative += count
rows.append((opname[i] + " " + opname[j], count, format_ratio(count, total),
rows.append((f"{name_i} {name_j}", count, format_ratio(count, total),
format_ratio(cumulative, total)))
emit_table(("Pair", "Count:", "Self:", "Cumulative:"),
rows
Expand All @@ -577,18 +559,18 @@ def emit_pair_counts(opcode_stats, total):
successors[first][second] = count
total_predecessors[second] += count
total_successors[first] += count
for name, i in opmap.items():
total1 = total_predecessors[i]
total2 = total_successors[i]
for name in opcode_stats.keys():
total1 = total_predecessors[name]
total2 = total_successors[name]
if total1 == 0 and total2 == 0:
continue
pred_rows = succ_rows = ()
if total1:
pred_rows = [(opname[pred], count, f"{count/total1:.1%}")
for (pred, count) in predecessors[i].most_common(5)]
pred_rows = [(pred, count, f"{count/total1:.1%}")
for (pred, count) in predecessors[name].most_common(5)]
if total2:
succ_rows = [(opname[succ], count, f"{count/total2:.1%}")
for (succ, count) in successors[i].most_common(5)]
succ_rows = [(succ, count, f"{count/total2:.1%}")
for (succ, count) in successors[name].most_common(5)]
with Section(name, 3, f"Successors and predecessors for {name}"):
emit_table(("Predecessors", "Count:", "Percentage:"),
pred_rows
Expand Down
Loading