#!/usr/bin/env python3.8 import argparse import os import glob import tarfile import zipfile import shutil import pathlib import sys from typing import Generator sys.path.insert(0, ".") from scripts import test_parse_directory HERE = pathlib.Path(__file__).resolve().parent argparser = argparse.ArgumentParser( prog="test_pypi_packages", description="Helper program to test parsing PyPI packages", ) argparser.add_argument( "-t", "--tree", action="count", help="Compare parse tree to official AST", default=0 ) def get_packages() -> Generator[str, None, None]: all_packages = ( glob.glob("./data/pypi/*.tar.gz") + glob.glob("./data/pypi/*.zip") + glob.glob("./data/pypi/*.tgz") ) for package in all_packages: yield package def extract_files(filename: str) -> None: savedir = os.path.join("data", "pypi") if tarfile.is_tarfile(filename): tarfile.open(filename).extractall(savedir) elif zipfile.is_zipfile(filename): zipfile.ZipFile(filename).extractall(savedir) else: raise ValueError(f"Could not identify type of compressed file {filename}") def find_dirname(package_name: str) -> str: for name in os.listdir(os.path.join("data", "pypi")): full_path = os.path.join("data", "pypi", name) if os.path.isdir(full_path) and name in package_name: return full_path assert False # This is to fix mypy, should never be reached def run_tests(dirname: str, tree: int) -> int: return test_parse_directory.parse_directory( dirname, verbose=False, excluded_files=[], tree_arg=tree, short=True, mode=1 if tree else 0, parser="pegen", ) def main() -> None: args = argparser.parse_args() tree = args.tree for package in get_packages(): print(f"Extracting files from {package}... ", end="") try: extract_files(package) print("Done") except ValueError as e: print(e) continue print(f"Trying to parse all python files ... ") dirname = find_dirname(package) status = run_tests(dirname, tree) if status == 0: shutil.rmtree(dirname) else: print(f"Failed to parse {dirname}") if __name__ == "__main__": main()