#!/usr/bin/env python3.8 import argparse import os import glob import tarfile import zipfile import shutil import pathlib import sys from typing import Generator, Any sys.path.insert(0, ".") from pegen import build from scripts import test_parse_directory HERE = pathlib.Path(__file__).resolve().parent argparser = argparse.ArgumentParser( prog="test_pypi_packages", description="Helper program to test parsing PyPI packages", ) argparser.add_argument( "-t", "--tree", action="count", help="Compare parse tree to official AST", default=0 ) def get_packages() -> Generator[str, None, None]: all_packages = ( glob.glob("./data/pypi/*.tar.gz") + glob.glob("./data/pypi/*.zip") + glob.glob("./data/pypi/*.tgz") ) for package in all_packages: yield package def extract_files(filename: str) -> None: savedir = os.path.join("data", "pypi") if tarfile.is_tarfile(filename): tarfile.open(filename).extractall(savedir) elif zipfile.is_zipfile(filename): zipfile.ZipFile(filename).extractall(savedir) else: raise ValueError(f"Could not identify type of compressed file {filename}") def find_dirname(package_name: str) -> str: for name in os.listdir(os.path.join("data", "pypi")): full_path = os.path.join("data", "pypi", name) if os.path.isdir(full_path) and name in package_name: return full_path assert False # This is to fix mypy, should never be reached def run_tests(dirname: str, tree: int) -> int: return test_parse_directory.parse_directory( dirname, verbose=False, excluded_files=[], tree_arg=tree, short=True, mode=1 if tree else 0, parser="pegen", ) def main() -> None: args = argparser.parse_args() tree = args.tree for package in get_packages(): print(f"Extracting files from {package}... ", end="") try: extract_files(package) print("Done") except ValueError as e: print(e) continue print(f"Trying to parse all python files ... ") dirname = find_dirname(package) status = run_tests(dirname, tree) if status == 0: shutil.rmtree(dirname) else: print(f"Failed to parse {dirname}") if __name__ == "__main__": main()