diff options
Diffstat (limited to 'libcxx/utils')
| -rwxr-xr-x | libcxx/utils/cat_files.py | 51 | ||||
| -rw-r--r-- | libcxx/utils/ci/BOT_OWNERS.txt | 4 | ||||
| -rw-r--r-- | libcxx/utils/ci/docker-compose.yml | 2 | ||||
| -rwxr-xr-x | libcxx/utils/ci/vendor/android/run-buildbot-container | 2 | ||||
| -rwxr-xr-x | libcxx/utils/compare-benchmarks | 123 | ||||
| -rwxr-xr-x | libcxx/utils/consolidate-benchmarks | 36 | ||||
| -rw-r--r-- | libcxx/utils/generate_feature_test_macro_components.py | 4 | ||||
| -rw-r--r-- | libcxx/utils/generate_libcxx_cppm_in.py | 12 | ||||
| -rwxr-xr-x | libcxx/utils/libcxx-benchmark-json | 57 | ||||
| -rwxr-xr-x | libcxx/utils/libcxx-compare-benchmarks | 73 | ||||
| -rw-r--r-- | libcxx/utils/libcxx/sym_check/util.py | 2 | ||||
| -rw-r--r-- | libcxx/utils/libcxx/test/features.py | 2 | ||||
| -rw-r--r-- | libcxx/utils/libcxx/test/format.py | 28 | ||||
| -rw-r--r-- | libcxx/utils/libcxx/test/params.py | 11 | ||||
| -rwxr-xr-x | libcxx/utils/parse-google-benchmark-results | 45 | ||||
| -rwxr-xr-x | libcxx/utils/parse-spec-results | 108 | ||||
| -rw-r--r-- | libcxx/utils/requirements.txt | 2 | ||||
| -rwxr-xr-x | libcxx/utils/synchronize_csv_status_files.py | 44 |
18 files changed, 405 insertions, 201 deletions
diff --git a/libcxx/utils/cat_files.py b/libcxx/utils/cat_files.py deleted file mode 100755 index 77127cb98c8a..000000000000 --- a/libcxx/utils/cat_files.py +++ /dev/null @@ -1,51 +0,0 @@ -#!/usr/bin/env python -# ===----------------------------------------------------------------------===## -# -# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -# See https://llvm.org/LICENSE.txt for license information. -# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -# -# ===----------------------------------------------------------------------===## - -from argparse import ArgumentParser -import sys - - -def print_and_exit(msg): - sys.stderr.write(msg + "\n") - sys.exit(1) - - -def main(): - parser = ArgumentParser(description="Concatenate two files into a single file") - parser.add_argument( - "-o", - "--output", - dest="output", - required=True, - help="The output file. stdout is used if not given", - type=str, - action="store", - ) - parser.add_argument( - "files", metavar="files", nargs="+", help="The files to concatenate" - ) - - args = parser.parse_args() - - if len(args.files) < 2: - print_and_exit("fewer than 2 inputs provided") - data = "" - for filename in args.files: - with open(filename, "r") as f: - data += f.read() - if len(data) != 0 and data[-1] != "\n": - data += "\n" - assert len(data) > 0 and "cannot cat empty files" - with open(args.output, "w") as f: - f.write(data) - - -if __name__ == "__main__": - main() - sys.exit(0) diff --git a/libcxx/utils/ci/BOT_OWNERS.txt b/libcxx/utils/ci/BOT_OWNERS.txt index 2d01b96d72a6..90f827251087 100644 --- a/libcxx/utils/ci/BOT_OWNERS.txt +++ b/libcxx/utils/ci/BOT_OWNERS.txt @@ -18,6 +18,6 @@ E: powerllvm@ca.ibm.com D: AIX, ppc64le N: Android libc++ -E: rprichard@google.com -H: rprichard +E: pirama@google.com, sharjeelkhan@google.com +G: pirama-arumuga-nainar, Sharjeel-Khan D: Emulator-based x86[-64] libc++ CI testing diff --git a/libcxx/utils/ci/docker-compose.yml b/libcxx/utils/ci/docker-compose.yml index d8ba8e5fba2b..e89623d2a847 100644 --- a/libcxx/utils/ci/docker-compose.yml +++ b/libcxx/utils/ci/docker-compose.yml @@ -37,4 +37,4 @@ services: ANDROID_CLANG_VERSION: r563880 ANDROID_CLANG_PREBUILTS_COMMIT: 6ae4184bb8706f9731569b9a0a82be3fcdcb951c ANDROID_SYSROOT_COMMIT: f8b85cc5262c6e5cbc9a92c1bab2b18b32a4c63f - <<: *compiler_versions + <<: [*image_versions, *compiler_versions] diff --git a/libcxx/utils/ci/vendor/android/run-buildbot-container b/libcxx/utils/ci/vendor/android/run-buildbot-container index 7b5d9a4cc3fe..81c719b1f8b9 100755 --- a/libcxx/utils/ci/vendor/android/run-buildbot-container +++ b/libcxx/utils/ci/vendor/android/run-buildbot-container @@ -27,5 +27,5 @@ if [ -S /var/run/docker.sock ]; then DOCKER_OPTIONS+=(--volume /var/run/docker.sock:/var/run/docker.sock) fi -docker run "${DOCKER_OPTIONS[@]}" ghcr.io/libcxx/android-buildkite-builder \ +docker run "${DOCKER_OPTIONS[@]}" ghcr.io/llvm/libcxx-android-builder \ bash -c 'git config --global --add safe.directory /llvm; (/opt/android/container-setup.sh && exec bash)' diff --git a/libcxx/utils/compare-benchmarks b/libcxx/utils/compare-benchmarks new file mode 100755 index 000000000000..9bda5f1a2794 --- /dev/null +++ b/libcxx/utils/compare-benchmarks @@ -0,0 +1,123 @@ +#!/usr/bin/env python3 + +import argparse +import re +import statistics +import sys + +import plotly +import tabulate + +def parse_lnt(lines): + """ + Parse lines in LNT format and return a dictionnary of the form: + + { + 'benchmark1': { + 'metric1': [float], + 'metric2': [float], + ... + }, + 'benchmark2': { + 'metric1': [float], + 'metric2': [float], + ... + }, + ... + } + + Each metric may have multiple values. + """ + results = {} + for line in lines: + line = line.strip() + if not line: + continue + + (identifier, value) = line.split(' ') + (name, metric) = identifier.split('.') + if name not in results: + results[name] = {} + if metric not in results[name]: + results[name][metric] = [] + results[name][metric].append(float(value)) + return results + +def plain_text_comparison(benchmarks, baseline, candidate): + """ + Create a tabulated comparison of the baseline and the candidate. + """ + headers = ['Benchmark', 'Baseline', 'Candidate', 'Difference', '% Difference'] + fmt = (None, '.2f', '.2f', '.2f', '.2f') + table = [] + for (bm, base, cand) in zip(benchmarks, baseline, candidate): + diff = (cand - base) if base and cand else None + percent = 100 * (diff / base) if base and cand else None + row = [bm, base, cand, diff, percent] + table.append(row) + return tabulate.tabulate(table, headers=headers, floatfmt=fmt, numalign='right') + +def create_chart(benchmarks, baseline, candidate): + """ + Create a bar chart comparing 'baseline' and 'candidate'. + """ + figure = plotly.graph_objects.Figure() + figure.add_trace(plotly.graph_objects.Bar(x=benchmarks, y=baseline, name='Baseline')) + figure.add_trace(plotly.graph_objects.Bar(x=benchmarks, y=candidate, name='Candidate')) + return figure + +def prepare_series(baseline, candidate, metric, aggregate=statistics.median): + """ + Prepare the data for being formatted or displayed as a chart. + + Metrics that have more than one value are aggregated using the given aggregation function. + """ + all_benchmarks = sorted(list(set(baseline.keys()) | set(candidate.keys()))) + baseline_series = [] + candidate_series = [] + for bm in all_benchmarks: + baseline_series.append(aggregate(baseline[bm][metric]) if bm in baseline and metric in baseline[bm] else None) + candidate_series.append(aggregate(candidate[bm][metric]) if bm in candidate and metric in candidate[bm] else None) + return (all_benchmarks, baseline_series, candidate_series) + +def main(argv): + parser = argparse.ArgumentParser( + prog='compare-benchmarks', + description='Compare the results of two sets of benchmarks in LNT format.', + epilog='This script requires the `tabulate` and the `plotly` Python modules.') + parser.add_argument('baseline', type=argparse.FileType('r'), + help='Path to a LNT format file containing the benchmark results for the baseline.') + parser.add_argument('candidate', type=argparse.FileType('r'), + help='Path to a LNT format file containing the benchmark results for the candidate.') + parser.add_argument('--metric', type=str, default='execution_time', + help='The metric to compare. LNT data may contain multiple metrics (e.g. code size, execution time, etc) -- ' + 'this option allows selecting which metric is being analyzed. The default is "execution_time".') + parser.add_argument('--output', '-o', type=argparse.FileType('w'), default=sys.stdout, + help='Path of a file where to output the resulting comparison. Default to stdout.') + parser.add_argument('--filter', type=str, required=False, + help='An optional regular expression used to filter the benchmarks included in the comparison. ' + 'Only benchmarks whose names match the regular expression will be included.') + parser.add_argument('--format', type=str, choices=['text', 'chart'], default='text', + help='Select the output format. "text" generates a plain-text comparison in tabular form, and "chart" ' + 'generates a self-contained HTML graph that can be opened in a browser. The default is text.') + args = parser.parse_args(argv) + + baseline = parse_lnt(args.baseline.readlines()) + candidate = parse_lnt(args.candidate.readlines()) + + if args.filter is not None: + regex = re.compile(args.filter) + baseline = {k: v for (k, v) in baseline.items() if regex.search(k)} + candidate = {k: v for (k, v) in candidate.items() if regex.search(k)} + + (benchmarks, baseline_series, candidate_series) = prepare_series(baseline, candidate, args.metric) + + if args.format == 'chart': + figure = create_chart(benchmarks, baseline_series, candidate_series) + plotly.io.write_html(figure, file=args.output) + else: + diff = plain_text_comparison(benchmarks, baseline_series, candidate_series) + args.output.write(diff) + +if __name__ == '__main__': + main(sys.argv[1:]) diff --git a/libcxx/utils/consolidate-benchmarks b/libcxx/utils/consolidate-benchmarks new file mode 100755 index 000000000000..c84607f1991c --- /dev/null +++ b/libcxx/utils/consolidate-benchmarks @@ -0,0 +1,36 @@ +#!/usr/bin/env python3 + +import argparse +import pathlib +import sys + +def main(argv): + parser = argparse.ArgumentParser( + prog='consolidate-benchmarks', + description='Consolidate benchmark result files (in LNT format) into a single LNT-format file.') + parser.add_argument('files_or_directories', type=str, nargs='+', + help='Path to files or directories containing LNT data to consolidate. Directories are searched ' + 'recursively for files with a .lnt extension.') + parser.add_argument('--output', '-o', type=argparse.FileType('w'), default=sys.stdout, + help='Where to output the result. Default to stdout.') + args = parser.parse_args(argv) + + files = [] + for arg in args.files_or_directories: + path = pathlib.Path(arg) + if path.is_dir(): + for p in path.rglob('*.lnt'): + files.append(p) + else: + files.append(path) + + for file in files: + for line in file.open().readlines(): + line = line.strip() + if not line: + continue + args.output.write(line) + args.output.write('\n') + +if __name__ == '__main__': + main(sys.argv[1:]) diff --git a/libcxx/utils/generate_feature_test_macro_components.py b/libcxx/utils/generate_feature_test_macro_components.py index 8d57a07b8836..c1e579c77574 100644 --- a/libcxx/utils/generate_feature_test_macro_components.py +++ b/libcxx/utils/generate_feature_test_macro_components.py @@ -569,7 +569,7 @@ feature_test_macros = [ "headers": ["format"], # Trying to use `std::format` where to_chars floating-point is not # available causes compilation errors, even with non floating-point types. - # https://github.com/llvm/llvm-project/issues/125353 + # https://llvm.org/PR125353 "test_suite_guard": "!defined(_LIBCPP_VERSION) || _LIBCPP_AVAILABILITY_HAS_TO_CHARS_FLOATING_POINT", "libcxx_guard": "_LIBCPP_AVAILABILITY_HAS_TO_CHARS_FLOATING_POINT", }, @@ -1053,7 +1053,7 @@ feature_test_macros = [ "headers": ["ostream", "print"], # Trying to use `std::print` where to_chars floating-point is not # available causes compilation errors, even with non floating-point types. - # https://github.com/llvm/llvm-project/issues/125353 + # https://llvm.org/PR125353 "test_suite_guard": "!defined(_LIBCPP_VERSION) || _LIBCPP_AVAILABILITY_HAS_TO_CHARS_FLOATING_POINT", "libcxx_guard": "_LIBCPP_AVAILABILITY_HAS_TO_CHARS_FLOATING_POINT", }, diff --git a/libcxx/utils/generate_libcxx_cppm_in.py b/libcxx/utils/generate_libcxx_cppm_in.py index 39076a61b55b..26d680a0db31 100644 --- a/libcxx/utils/generate_libcxx_cppm_in.py +++ b/libcxx/utils/generate_libcxx_cppm_in.py @@ -9,11 +9,19 @@ import os.path import sys -from libcxx.header_information import module_c_headers, module_headers, header_restrictions, headers_not_available, libcxx_root +from libcxx.header_information import ( + module_c_headers, + module_headers, + header_restrictions, + headers_not_available, + libcxx_root, +) def write_file(module): - with open(libcxx_root / "modules" / f"{module}.cppm.in", "w") as module_cpp_in: + with open( + libcxx_root / "modules" / f"{module}.cppm.in", "w", encoding="utf-8" + ) as module_cpp_in: module_cpp_in.write( """\ // -*- C++ -*- diff --git a/libcxx/utils/libcxx-benchmark-json b/libcxx/utils/libcxx-benchmark-json deleted file mode 100755 index 7f743c32caf4..000000000000 --- a/libcxx/utils/libcxx-benchmark-json +++ /dev/null @@ -1,57 +0,0 @@ -#!/usr/bin/env bash - -set -e - -PROGNAME="$(basename "${0}")" -MONOREPO_ROOT="$(realpath $(dirname "${PROGNAME}"))" -function usage() { -cat <<EOF -Usage: -${PROGNAME} [-h|--help] <build-directory> benchmarks... - -Print the path to the JSON files containing benchmark results for the given benchmarks. - -This requires those benchmarks to have already been run, i.e. this only resolves the path -to the benchmark .json file within the build directory. - -<build-directory> The path to the build directory. -benchmarks... Paths of the benchmarks to extract the results for. Those paths are relative to '<monorepo-root>'. - -Example -======= -$ cmake -S runtimes -B build/ -DLLVM_ENABLE_RUNTIMES="libcxx;libcxxabi" -$ libcxx-lit build/ -sv libcxx/test/benchmarks/algorithms/for_each.bench.cpp -$ less \$(${PROGNAME} build/ libcxx/test/benchmarks/algorithms/for_each.bench.cpp) -EOF -} - -if [[ "${1}" == "-h" || "${1}" == "--help" ]]; then - usage - exit 0 -fi - -if [[ $# -lt 1 ]]; then - usage - exit 1 -fi - -build_dir="${1}" -shift - -for benchmark in ${@}; do - # Normalize the paths by turning all benchmarks paths into absolute ones and then making them - # relative to the root of the monorepo. - benchmark="$(realpath ${benchmark})" - relative=$(python -c "import os; import sys; print(os.path.relpath(sys.argv[1], sys.argv[2]))" "${benchmark}" "${MONOREPO_ROOT}") - - # Extract components of the benchmark path - directory="$(dirname ${relative})" - file="$(basename ${relative})" - - # Reconstruct the (slightly weird) path to the benchmark json file. This should be kept in sync - # whenever the test suite changes. - json="${build_dir}/${directory}/Output/${file}.dir/benchmark-result.json" - if [[ -f "${json}" ]]; then - echo "${json}" - fi -done diff --git a/libcxx/utils/libcxx-compare-benchmarks b/libcxx/utils/libcxx-compare-benchmarks deleted file mode 100755 index 08c53b2420c8..000000000000 --- a/libcxx/utils/libcxx-compare-benchmarks +++ /dev/null @@ -1,73 +0,0 @@ -#!/usr/bin/env bash - -set -e - -PROGNAME="$(basename "${0}")" -MONOREPO_ROOT="$(realpath $(dirname "${PROGNAME}"))" -function usage() { -cat <<EOF -Usage: -${PROGNAME} [-h|--help] <baseline-build> <candidate-build> benchmarks... [-- gbench-args...] - -Compare the given benchmarks between the baseline and the candidate build directories. - -This requires those benchmarks to have already been generated in both build directories. - -<baseline-build> The path to the build directory considered the baseline. -<candidate-build> The path to the build directory considered the candidate. -benchmarks... Paths of the benchmarks to compare. Those paths are relative to '<monorepo-root>'. -[-- gbench-args...] Any arguments provided after '--' will be passed as-is to GoogleBenchmark's compare.py tool. - -Example -======= -$ libcxx-lit build1/ -sv libcxx/test/benchmarks/algorithms/for_each.bench.cpp -$ libcxx-lit build2/ -sv libcxx/test/benchmarks/algorithms/for_each.bench.cpp -$ ${PROGNAME} build1/ build2/ libcxx/test/benchmarks/algorithms/for_each.bench.cpp -EOF -} - -if [[ "${1}" == "-h" || "${1}" == "--help" ]]; then - usage - exit 0 -fi - -if [[ $# -lt 1 ]]; then - usage - exit 1 -fi - -baseline="${1}" -candidate="${2}" -shift; shift - -GBENCH="${MONOREPO_ROOT}/third-party/benchmark" - -python3 -m venv /tmp/libcxx-compare-benchmarks-venv -source /tmp/libcxx-compare-benchmarks-venv/bin/activate -pip3 install -r ${GBENCH}/tools/requirements.txt - -benchmarks="" -while [[ $# -gt 0 ]]; do - if [[ "${1}" == "--" ]]; then - shift - break - fi - benchmarks+=" ${1}" - shift -done - -for benchmark in ${benchmarks}; do - base="$(${MONOREPO_ROOT}/libcxx/utils/libcxx-benchmark-json ${baseline} ${benchmark})" - cand="$(${MONOREPO_ROOT}/libcxx/utils/libcxx-benchmark-json ${candidate} ${benchmark})" - - if [[ ! -e "${base}" ]]; then - echo "Benchmark ${benchmark} does not exist in the baseline" - continue - fi - if [[ ! -e "${cand}" ]]; then - echo "Benchmark ${benchmark} does not exist in the candidate" - continue - fi - - "${GBENCH}/tools/compare.py" benchmarks "${base}" "${cand}" ${@} -done diff --git a/libcxx/utils/libcxx/sym_check/util.py b/libcxx/utils/libcxx/sym_check/util.py index fc7ba4244ab5..dbc886f29dde 100644 --- a/libcxx/utils/libcxx/sym_check/util.py +++ b/libcxx/utils/libcxx/sym_check/util.py @@ -95,7 +95,7 @@ def is_xcoff_or_big_ar(filename): with open(filename, "rb") as f: magic_bytes = f.read(7) return ( - magic_bytes[:4] in [b"\x01DF", b"\x01F7"] # XCOFF32 # XCOFF64 + magic_bytes[:2] in [b"\x01\xDF", b"\x01\xF7"] # XCOFF32 # XCOFF64 or magic_bytes == b"<bigaf>" ) diff --git a/libcxx/utils/libcxx/test/features.py b/libcxx/utils/libcxx/test/features.py index 0cb81546665d..a964f3bdeeb4 100644 --- a/libcxx/utils/libcxx/test/features.py +++ b/libcxx/utils/libcxx/test/features.py @@ -176,7 +176,7 @@ DEFAULT_FEATURES = [ when=lambda cfg: hasCompileFlag(cfg, "-Xclang -verify-ignore-unexpected"), ), Feature( - name="add-latomic-workaround", # https://github.com/llvm/llvm-project/issues/73361 + name="add-latomic-workaround", # https://llvm.org/PR73361 when=lambda cfg: sourceBuilds( cfg, "int main(int, char**) { return 0; }", ["-latomic"] ), diff --git a/libcxx/utils/libcxx/test/format.py b/libcxx/utils/libcxx/test/format.py index 59d0fffd3781..5765afec399c 100644 --- a/libcxx/utils/libcxx/test/format.py +++ b/libcxx/utils/libcxx/test/format.py @@ -12,6 +12,8 @@ import lit.formats import os import re +THIS_FILE = os.path.abspath(__file__) +LIBCXX_UTILS = os.path.dirname(os.path.dirname(os.path.dirname(THIS_FILE))) def _getTempPaths(test): """ @@ -34,11 +36,14 @@ def _checkBaseSubstitutions(substitutions): def _executeScriptInternal(test, litConfig, commands): """ - Returns (stdout, stderr, exitCode, timeoutInfo, parsedCommands) + Returns (stdout, stderr, exitCode, timeoutInfo, parsedCommands), or an appropriate lit.Test.Result + in case of an error while parsing the script. TODO: This really should be easier to access from Lit itself """ parsedCommands = parseScript(test, preamble=commands) + if isinstance(parsedCommands, lit.Test.Result): + return parsedCommands _, tmpBase = _getTempPaths(test) execDir = os.path.dirname(test.getExecPath()) @@ -65,7 +70,8 @@ def parseScript(test, preamble): """ Extract the script from a test, with substitutions applied. - Returns a list of commands ready to be executed. + Returns a list of commands ready to be executed, or an appropriate lit.Test.Result in case of error + while parsing the script (this includes the script being unsupported). - test The lit.Test to parse. @@ -349,7 +355,11 @@ class CxxStandardLibraryTest(lit.formats.FileBasedTest): ] if "enable-benchmarks=run" in test.config.available_features: steps += ["%dbg(EXECUTED AS) %{exec} %t.exe --benchmark_out=%T/benchmark-result.json --benchmark_out_format=json"] + parse_results = os.path.join(LIBCXX_UTILS, 'parse-google-benchmark-results') + steps += [f"{parse_results} %T/benchmark-result.json --output-format=lnt > %T/results.lnt"] return self._executeShTest(test, litConfig, steps) + elif re.search('[.]gen[.][^.]+$', filename): # This only happens when a generator test is not supported + return self._executeShTest(test, litConfig, []) else: return lit.Test.Result( lit.Test.UNRESOLVED, "Unknown test suffix for '{}'".format(filename) @@ -381,11 +391,19 @@ class CxxStandardLibraryTest(lit.formats.FileBasedTest): generatorExecDir = os.path.dirname(testSuite.getExecPath(pathInSuite)) os.makedirs(generatorExecDir, exist_ok=True) - # Run the generator test + # Run the generator test. It's possible for this to fail for two reasons: the generator test + # is unsupported or the generator ran but failed at runtime -- handle both. In the first case, + # we return the generator test itself, since it should produce the same result when run after + # test suite generation. In the second case, it's a true error so we report it. steps = [] # Steps must already be in the script - (out, err, exitCode, _, _) = _executeScriptInternal(generator, litConfig, steps) + result = _executeScriptInternal(generator, litConfig, steps) + if isinstance(result, lit.Test.Result): + yield generator + return + + (out, err, exitCode, _, _) = result if exitCode != 0: - raise RuntimeError(f"Error while trying to generate gen test\nstdout:\n{out}\n\nstderr:\n{err}") + raise RuntimeError(f"Error while trying to generate gen test {'/'.join(pathInSuite)}\nstdout:\n{out}\n\nstderr:\n{err}") # Split the generated output into multiple files and generate one test for each file for subfile, content in self._splitFile(out): diff --git a/libcxx/utils/libcxx/test/params.py b/libcxx/utils/libcxx/test/params.py index 81c613421a46..6f013a75195a 100644 --- a/libcxx/utils/libcxx/test/params.py +++ b/libcxx/utils/libcxx/test/params.py @@ -375,6 +375,17 @@ DEFAULT_PARAMETERS = [ actions=lambda mode: [AddFeature(f"enable-benchmarks={mode}")], ), Parameter( + name="spec_dir", + type=str, + default="none", + help="Path to the SPEC benchmarks. This is required in order to run the SPEC benchmarks as part of " + "the libc++ test suite. If provided, the appropriate SPEC toolset must already be built and installed.", + actions=lambda spec_dir: [ + AddSubstitution("%{spec_dir}", spec_dir), + AddFeature('enable-spec-benchmarks') + ] if spec_dir != "none" else [], + ), + Parameter( name="long_tests", choices=[True, False], type=bool, diff --git a/libcxx/utils/parse-google-benchmark-results b/libcxx/utils/parse-google-benchmark-results new file mode 100755 index 000000000000..280c8045db6c --- /dev/null +++ b/libcxx/utils/parse-google-benchmark-results @@ -0,0 +1,45 @@ +#!/usr/bin/env python3 + +import argparse +import csv +import json +import sys + +def main(argv): + parser = argparse.ArgumentParser( + prog='parse-google-benchmark-results', + description='Parse Google Benchmark result files (in JSON format) into CSV or LNT compatible output.') + parser.add_argument('filename', type=argparse.FileType('r'), nargs='+', + help='One of more JSON files to extract the results from. The results parsed from each ' + 'file are concatenated together.') + parser.add_argument('--timing', type=str, choices=['real_time', 'cpu_time'], default='real_time', + help='The timing to extract from the Google Benchmark results. This can either be the ' + '"real time" or the "CPU time". Default is "real time".') + parser.add_argument('--output-format', type=str, choices=['csv', 'lnt'], default='csv', + help='The desired output format for the data. `csv` is CSV format and `lnt` is a format compatible with ' + '`lnt importreport` (see https://llvm.org/docs/lnt/importing_data.html#importing-data-in-a-text-file).') + args = parser.parse_args(argv) + + # Parse the data from all files, aggregating the results + headers = ['Benchmark', args.timing] + rows = [] + for file in args.filename: + js = json.load(file) + for bm in js['benchmarks']: + row = [bm['name'], bm[args.timing]] + rows.append(row) + + # Print the results in the right format + if args.output_format == 'csv': + writer = csv.writer(sys.stdout) + writer.writerow(headers) + for row in rows: + writer.writerow(row) + elif args.output_format == 'lnt': + benchmark = headers.index('Benchmark') + time = headers.index(args.timing) + for row in rows: + print(f'{row[benchmark].replace(".", "_")}.execution_time {row[time]}') + +if __name__ == '__main__': + main(sys.argv[1:]) diff --git a/libcxx/utils/parse-spec-results b/libcxx/utils/parse-spec-results new file mode 100755 index 000000000000..3aff206f8959 --- /dev/null +++ b/libcxx/utils/parse-spec-results @@ -0,0 +1,108 @@ +#!/usr/bin/env python3 + +import argparse +import csv +import sys + +def parse_table(rows, table_title): + """ + Parse a CSV table out of an iterator over rows. + + Return a tuple containing (extracted headers, extracted rows). + """ + in_table = False + rows_iter = iter(rows) + extracted = [] + headers = None + while True: + try: + row = next(rows_iter) + except StopIteration: + break + + if not in_table and row == [table_title]: + in_table = True + next_row = next(rows_iter) + assert next_row == [], f'There should be an empty row after the title of the table, found {next_row}' + headers = next(rows_iter) # Extract the headers + continue + + elif in_table and row == []: # An empty row marks the end of the table + in_table = False + break + + elif in_table: + extracted.append(row) + + assert len(extracted) != 0, f'Could not extract rows from the table, this is suspicious. Table title was {table_title}' + assert headers is not None, f'Could not extract headers from the table, this is suspicious. Table title was {table_title}' + + return (headers, extracted) + +def main(argv): + parser = argparse.ArgumentParser( + prog='parse-spec-results', + description='Parse SPEC result files (in CSV format) and extract the selected result table, in the selected format.') + parser.add_argument('filename', type=argparse.FileType('r'), nargs='+', + help='One of more CSV files to extract the results from. The results parsed from each file are concatenated ' + 'together.') + parser.add_argument('--table', type=str, choices=['full', 'selected'], default='full', + help='The name of the table to extract from SPEC results. `full` means extracting the Full Results Table ' + 'and `selected` means extracting the Selected Results Table. Default is `full`.') + parser.add_argument('--output-format', type=str, choices=['csv', 'lnt'], default='csv', + help='The desired output format for the data. `csv` is CSV format and `lnt` is a format compatible with ' + '`lnt importreport` (see https://llvm.org/docs/lnt/importing_data.html#importing-data-in-a-text-file).') + parser.add_argument('--extract', type=str, + help='A comma-separated list of headers to extract from the table. If provided, only the data associated to ' + 'those headers will be present in the resulting data. Invalid header names are diagnosed. Please make ' + 'sure to use appropriate quoting for header names that contain spaces. This option only makes sense ' + 'when the output format is CSV.') + parser.add_argument('--keep-not-run', action='store_true', + help='Keep entries whose \'Base Status\' is marked as \'NR\', aka \'Not Run\'. By default, such entries are discarded.') + args = parser.parse_args(argv) + + if args.table == 'full': + table_title = 'Full Results Table' + elif args.table == 'selected': + table_title = 'Selected Results Table' + + # Parse the headers and the rows in each file, aggregating all the results + headers = None + rows = [] + for file in args.filename: + reader = csv.reader(file) + (parsed_headers, parsed_rows) = parse_table(reader, table_title) + assert headers is None or headers == parsed_headers, f'Found files with different headers: {headers} and {parsed_headers}' + headers = parsed_headers + rows.extend(parsed_rows) + + # Remove rows that were not run unless we were asked to keep them + if not args.keep_not_run: + not_run = headers.index('Base Status') + rows = [row for row in rows if row[not_run] != 'NR'] + + if args.extract is not None: + if args.output_format != 'csv': + raise RuntimeError('Passing --extract requires the output format to be csv') + for h in args.extract.split(','): + if h not in headers: + raise RuntimeError(f'Header name {h} was not present in the parsed headers {headers}') + + extracted_fields = [headers.index(h) for h in args.extract.split(',')] + headers = [headers[i] for i in extracted_fields] + rows = [[row[i] for i in extracted_fields] for row in rows] + + # Print the results in the right format + if args.output_format == 'csv': + writer = csv.writer(sys.stdout) + writer.writerow(headers) + for row in rows: + writer.writerow(row) + elif args.output_format == 'lnt': + benchmark = headers.index('Benchmark') + time = headers.index('Est. Base Run Time') + for row in rows: + print(f'{row[benchmark].replace(".", "_")}.execution_time {row[time]}') + +if __name__ == '__main__': + main(sys.argv[1:]) diff --git a/libcxx/utils/requirements.txt b/libcxx/utils/requirements.txt new file mode 100644 index 000000000000..de6e123eec54 --- /dev/null +++ b/libcxx/utils/requirements.txt @@ -0,0 +1,2 @@ +plotly +tabulate diff --git a/libcxx/utils/synchronize_csv_status_files.py b/libcxx/utils/synchronize_csv_status_files.py index 5dbd734de7fb..66cc100a8eee 100755 --- a/libcxx/utils/synchronize_csv_status_files.py +++ b/libcxx/utils/synchronize_csv_status_files.py @@ -154,6 +154,12 @@ class PaperInfo: First version of LLVM in which this paper/issue was resolved. """ + github_issue: Optional[str] + """ + Optional number of the Github issue tracking the implementation status of this paper. + This is used to cross-reference rows in the status pages with Github issues. + """ + notes: Optional[str] """ Optional plain text string representing notes to associate to the paper. @@ -170,6 +176,7 @@ class PaperInfo: status: PaperStatus, meeting: Optional[str] = None, first_released_version: Optional[str] = None, + github_issue: Optional[str] = None, notes: Optional[str] = None, original: Optional[object] = None): self.paper_number = paper_number @@ -177,16 +184,18 @@ class PaperInfo: self.status = status self.meeting = meeting self.first_released_version = first_released_version + self.github_issue = github_issue self.notes = notes self.original = original - def for_printing(self) -> Tuple[str, str, str, str, str, str]: + def for_printing(self) -> Tuple[str, str, str, str, str, str, str]: return ( f'`{self.paper_number} <https://wg21.link/{self.paper_number}>`__', self.paper_name, self.meeting if self.meeting is not None else '', self.status.to_csv_entry(), self.first_released_version if self.first_released_version is not None else '', + f'`#{self.github_issue} <https://github.com/llvm/llvm-project/issues/{self.github_issue}>`__' if self.github_issue is not None else '', self.notes if self.notes is not None else '', ) @@ -203,13 +212,19 @@ class PaperInfo: if match is None: raise RuntimeError(f"Can't parse paper/issue number out of row: {row}") + # Match the issue number if present + github_issue = re.search(r'#([0-9]+)', row[5]) + if github_issue: + github_issue = github_issue.group(1) + return PaperInfo( paper_number=match.group(1), paper_name=row[1], status=PaperStatus.from_csv_entry(row[3]), meeting=row[2] or None, first_released_version=row[4] or None, - notes=row[5] or None, + github_issue=github_issue, + notes=row[6] or None, original=row, ) @@ -235,6 +250,7 @@ class PaperInfo: status=PaperStatus.from_github_issue(issue), meeting=issue.get('meeting Voted', None), first_released_version=None, # TODO + github_issue=str(issue['content']['number']), notes=notes, original=issue, ) @@ -252,19 +268,24 @@ def merge(paper: PaperInfo, gh: PaperInfo) -> PaperInfo: is not useful. In case we don't update the CSV row's status, we still take any updated notes coming - from the Github issue. + from the Github issue and we add a link to the Github issue if it was previously missing. """ + took_gh_in_full = False # Whether we updated the entire PaperInfo from the Github version if paper.status == PaperStatus(PaperStatus.TODO) and gh.status == PaperStatus(PaperStatus.IN_PROGRESS): result = copy.deepcopy(paper) - result.notes = gh.notes elif paper.status < gh.status: result = copy.deepcopy(gh) + took_gh_in_full = True elif paper.status == gh.status: result = copy.deepcopy(paper) - result.notes = gh.notes else: print(f"We found a CSV row and a Github issue with different statuses:\nrow: {paper}\nGithub issue: {gh}") result = copy.deepcopy(paper) + + # If we didn't take the Github issue in full, make sure to update the notes, the link and anything else. + if not took_gh_in_full: + result.github_issue = gh.github_issue + result.notes = gh.notes return result def load_csv(file: pathlib.Path) -> List[Tuple]: @@ -285,6 +306,8 @@ def create_github_issue(paper: PaperInfo, labels: List[str]) -> None: """ Create a new Github issue representing the given PaperInfo. """ + assert paper.github_issue is None, "Trying to create a Github issue for a paper that is already tracked" + paper_name = paper.paper_name.replace('``', '`').replace('\\', '') create_cli = ['gh', 'issue', 'create', '--repo', 'llvm/llvm-project', @@ -363,6 +386,17 @@ def sync_csv(rows: List[Tuple], from_github: List[PaperInfo], create_new: bool, results.append(row) continue + # Validate the Github issue associated to the CSV row, if any + if paper.github_issue is not None: + if len(tracking) == 0: + print(f"Found row claiming to have a tracking issue, but failed to find a tracking issue on Github: {row}") + results.append(row) + continue + if len(tracking) == 1 and paper.github_issue != tracking[0].github_issue: + print(f"Found row with incorrect tracking issue: {row}\ntracked by: {tracking[0]}") + results.append(row) + continue + # If there is no tracking issue for that row and we are creating new issues, do that. # Otherwise just log that we're missing an issue. if len(tracking) == 0: |
