diff options
Diffstat (limited to 'libcxx/test/libcxx-03/transitive_includes/to_csv.py')
| -rwxr-xr-x | libcxx/test/libcxx-03/transitive_includes/to_csv.py | 120 |
1 files changed, 120 insertions, 0 deletions
diff --git a/libcxx/test/libcxx-03/transitive_includes/to_csv.py b/libcxx/test/libcxx-03/transitive_includes/to_csv.py new file mode 100755 index 000000000000..69d94deedf6f --- /dev/null +++ b/libcxx/test/libcxx-03/transitive_includes/to_csv.py @@ -0,0 +1,120 @@ +#!/usr/bin/env python +# ===----------------------------------------------------------------------===## +# +# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +# See https://llvm.org/LICENSE.txt for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# +# ===----------------------------------------------------------------------===## + +from typing import List, Tuple, Optional +import argparse +import io +import itertools +import os +import pathlib +import re +import sys + +libcxx_root = os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))) +sys.path.append(os.path.join(libcxx_root, "utils")) +from libcxx.header_information import Header + +def parse_line(line: str) -> Tuple[int, str]: + """ + Parse a single line of --trace-includes output. + + Returns the inclusion level and the raw file name being included. + """ + match = re.match(r"(\.+) (.+)", line) + if not match: + raise ArgumentError(f"Line {line} contains invalid data.") + + # The number of periods in front of the header name is the nesting level of + # that header. + return (len(match.group(1)), match.group(2)) + +def make_cxx_v1_relative(header: str) -> Optional[str]: + """ + Returns the path of the header as relative to <whatever>/c++/v1, or None if the path + doesn't contain c++/v1. + + We use that heuristic to figure out which headers are libc++ headers. + """ + # On Windows, the path separators can either be forward slash or backslash. + # If it is a backslash, Clang prints it escaped as two consecutive + # backslashes, and they need to be escaped in the RE. (Use a raw string for + # the pattern to avoid needing another level of escaping on the Python string + # literal level.) + pathsep = r"(?:/|\\\\)" + CXX_V1_REGEX = r"^.*c\+\+" + pathsep + r"v[0-9]+" + pathsep + r"(.+)$" + match = re.match(CXX_V1_REGEX, header) + if not match: + return None + else: + return match.group(1) + +def parse_file(file: io.TextIOBase) -> List[Tuple[Header, Header]]: + """ + Parse a file containing --trace-includes output to generate a list of the + transitive includes contained in it. + """ + result = [] + includer = None + for line in file.readlines(): + (level, header) = parse_line(line) + relative = make_cxx_v1_relative(header) + + # Not a libc++ header + if relative is None: + continue + + # If we're at the first level, remember this header as being the one who includes other headers. + # There's usually exactly one, except if the compiler is passed a file with `-include`. + if level == 1: + includer = Header(relative) + continue + + # Otherwise, take note that this header is being included by the top-level includer. + else: + assert includer is not None + result.append((includer, Header(relative))) + return result + +def print_csv(includes: List[Tuple[Header, Header]]) -> None: + """ + Print the transitive includes as space-delimited CSV. + + This function only prints public libc++ headers that are not C compatibility headers. + """ + # Sort and group by includer + by_includer = lambda t: t[0] + includes = itertools.groupby(sorted(includes, key=by_includer), key=by_includer) + + for (includer, includees) in includes: + includees = map(lambda t: t[1], includees) + for h in sorted(set(includees)): + if h.is_public() and not h.is_C_compatibility(): + print(f"{includer} {h}") + +def main(argv): + parser = argparse.ArgumentParser( + description=""" + Given a list of headers produced by --trace-includes, produce a list of libc++ headers in that output. + + Note that -fshow-skipped-includes must also be passed to the compiler in order to get sufficient + information for this script to run. + + The output of this script is provided in space-delimited CSV format where each line contains: + + <header performing inclusion> <header being included> + """) + parser.add_argument("inputs", type=argparse.FileType("r"), nargs='+', default=None, + help="One or more files containing the result of --trace-includes") + args = parser.parse_args(argv) + + includes = [line for file in args.inputs for line in parse_file(file)] + print_csv(includes) + +if __name__ == "__main__": + main(sys.argv[1:]) |
