From defbbd68f7f68f4edb3a6b256f26e0532727b3da Mon Sep 17 00:00:00 2001 From: Christian Heimes Date: Sun, 10 Apr 2022 10:29:51 +0300 Subject: [PATCH] bpo-40280: WASM docs and smaller browser builds (GH-32412) Co-authored-by: Brett Cannon --- Doc/library/sys.rst | 2 + Makefile.pre.in | 5 +- Tools/wasm/README.md | 92 +++++++++++++++++- Tools/wasm/Setup.local.example | 15 +++ Tools/wasm/wasm_assets.py | 164 +++++++++++++++++++++++---------- Tools/wasm/wasm_webserver.py | 7 ++ 6 files changed, 231 insertions(+), 54 deletions(-) create mode 100644 Tools/wasm/Setup.local.example diff --git a/Doc/library/sys.rst b/Doc/library/sys.rst index 3b09a4cdf8b..126da31b5bd 100644 --- a/Doc/library/sys.rst +++ b/Doc/library/sys.rst @@ -1183,7 +1183,9 @@ always available. System ``platform`` value ================ =========================== AIX ``'aix'`` + Emscripten ``'emscripten'`` Linux ``'linux'`` + WASI ``'wasi'`` Windows ``'win32'`` Windows/Cygwin ``'cygwin'`` macOS ``'darwin'`` diff --git a/Makefile.pre.in b/Makefile.pre.in index cb6e962045a..22a68a70487 100644 --- a/Makefile.pre.in +++ b/Makefile.pre.in @@ -812,8 +812,9 @@ $(DLLLIBRARY) libpython$(LDVERSION).dll.a: $(LIBRARY_OBJS) # --preload-file turns a relative asset path into an absolute path. $(WASM_STDLIB): $(srcdir)/Lib/*.py $(srcdir)/Lib/*/*.py \ - pybuilddir.txt $(srcdir)/Tools/wasm/wasm_assets.py \ - python.html python.worker.js + $(srcdir)/Tools/wasm/wasm_assets.py \ + Makefile pybuilddir.txt Modules/Setup.local \ + python.html python.worker.js $(PYTHON_FOR_BUILD) $(srcdir)/Tools/wasm/wasm_assets.py \ --builddir . --prefix $(prefix) diff --git a/Tools/wasm/README.md b/Tools/wasm/README.md index 40b82e8f939..fa99703acfc 100644 --- a/Tools/wasm/README.md +++ b/Tools/wasm/README.md @@ -9,9 +9,21 @@ possible to build for *wasm32-wasi* out-of-the-box yet. ## wasm32-emscripten build -Cross compiling to wasm32-emscripten platform needs the [Emscripten](https://emscripten.org/) -tool chain and a build Python interpreter. -All commands below are relative to a repository checkout. +Cross compiling to the wasm32-emscripten platform needs the +[Emscripten](https://emscripten.org/) SDK and a build Python interpreter. +Emscripten 3.1.8 or newer are recommended. All commands below are relative +to a repository checkout. + +Christian Heimes maintains a container image with Emscripten SDK, Python +build dependencies, WASI-SDK, wasmtime, and several additional tools. + +``` +# Fedora, RHEL, CentOS +podman run --rm -ti -v $(pwd):/python-wasm/cpython:Z quay.io/tiran/cpythonbuild:emsdk3 + +# other +docker run --rm -ti -v $(pwd):/python-wasm/cpython quay.io/tiran/cpythonbuild:emsdk3 +``` ### Compile a build Python interpreter @@ -167,3 +179,77 @@ linker options. - pthread support requires WASM threads and SharedArrayBuffer (bulk memory). The runtime keeps a pool of web workers around. Each web worker uses several file descriptors (eventfd, epoll, pipe). + +# Hosting Python WASM builds + +The simple REPL terminal uses SharedArrayBuffer. For security reasons +browsers only provide the feature in secure environents with cross-origin +isolation. The webserver must send cross-origin headers and correct MIME types +for the JavaScript and WASM files. Otherwise the terminal will fail to load +with an error message like ``Browsers disable shared array buffer``. + +## Apache HTTP .htaccess + +Place a ``.htaccess`` file in the same directory as ``python.wasm``. + +``` +# .htaccess +Header set Cross-Origin-Opener-Policy same-origin +Header set Cross-Origin-Embedder-Policy require-corp + +AddType application/javascript js +AddType application/wasm wasm + + + AddOutputFilterByType DEFLATE text/html application/javascript application/wasm + +``` + +# Detect WebAssembly builds + +## Python code + +```# python +import os, sys + +if sys.platform == "emscripten": + # Python on Emscripten +if sys.platform == "wasi": + # Python on WASI + +if os.name == "posix": + # WASM platforms identify as POSIX-like. + # Windows does not provide os.uname(). + machine = os.uname().machine + if machine.startswith("wasm"): + # WebAssembly (wasm32 or wasm64) +``` + +## C code + +Emscripten SDK and WASI SDK define several built-in macros. You can dump a +full list of built-ins with ``emcc -dM -E - < /dev/null`` and +``/path/to/wasi-sdk/bin/clang -dM -E - < /dev/null``. + +```# C +#ifdef __EMSCRIPTEN__ + // Python on Emscripten +#endif +``` + +* WebAssembly ``__wasm__`` (also ``__wasm``) +* wasm32 ``__wasm32__`` (also ``__wasm32``) +* wasm64 ``__wasm64__`` +* Emscripten ``__EMSCRIPTEN__`` (also ``EMSCRIPTEN``) +* Emscripten version ``__EMSCRIPTEN_major__``, ``__EMSCRIPTEN_minor__``, ``__EMSCRIPTEN_tiny__`` +* WASI ``__wasi__`` + +Feature detection flags: + +* ``__EMSCRIPTEN_PTHREADS__`` +* ``__EMSCRIPTEN_SHARED_MEMORY__`` +* ``__wasm_simd128__`` +* ``__wasm_sign_ext__`` +* ``__wasm_bulk_memory__`` +* ``__wasm_atomics__`` +* ``__wasm_mutable_globals__`` diff --git a/Tools/wasm/Setup.local.example b/Tools/wasm/Setup.local.example new file mode 100644 index 00000000000..ad58c31a2ef --- /dev/null +++ b/Tools/wasm/Setup.local.example @@ -0,0 +1,15 @@ +# Module/Setup.local with reduced stdlib +*disabled* +_asyncio +audioop +_bz2 +_crypt +_decimal +_pickle +pyexpat _elementtree +_sha3 _blake2 +_zoneinfo +xxsubtype + +# cjk codecs +#_multibytecodec _codecs_cn _codecs_hk _codecs_iso2022 _codecs_jp _codecs_kr _codecs_tw diff --git a/Tools/wasm/wasm_assets.py b/Tools/wasm/wasm_assets.py index bb1983af4c7..fba70b9c9d0 100755 --- a/Tools/wasm/wasm_assets.py +++ b/Tools/wasm/wasm_assets.py @@ -20,7 +20,11 @@ SRCDIR = pathlib.Path(__file__).parent.parent.parent.absolute() SRCDIR_LIB = SRCDIR / "Lib" # sysconfig data relative to build dir. -SYSCONFIGDATA_GLOB = "build/lib.*/_sysconfigdata_*.py" +SYSCONFIGDATA = pathlib.PurePath( + "build", + f"lib.emscripten-wasm32-{sys.version_info.major}.{sys.version_info.minor}", + "_sysconfigdata__emscripten_wasm32-emscripten.py", +) # Library directory relative to $(prefix). WASM_LIB = pathlib.PurePath("lib") @@ -38,33 +42,44 @@ WASM_DYNLOAD = WASM_STDLIB / "lib-dynload" OMIT_FILES = ( # regression tests "test/", - # user interfaces: TK, curses - "curses/", - "idlelib/", - "tkinter/", - "turtle.py", - "turtledemo/", # package management "ensurepip/", "venv/", # build system "distutils/", "lib2to3/", - # concurrency - "concurrent/", - "multiprocessing/", # deprecated "asyncore.py", "asynchat.py", - # Synchronous network I/O and protocols are not supported; for example, - # socket.create_connection() raises an exception: - # "BlockingIOError: [Errno 26] Operation in progress". + "uu.py", + "xdrlib.py", + # other platforms + "_aix_support.py", + "_bootsubprocess.py", + "_osx_support.py", + # webbrowser + "antigravity.py", + "webbrowser.py", + # Pure Python implementations of C extensions + "_pydecimal.py", + "_pyio.py", + # Misc unused or large files + "pydoc_data/", + "msilib/", +) + +# Synchronous network I/O and protocols are not supported; for example, +# socket.create_connection() raises an exception: +# "BlockingIOError: [Errno 26] Operation in progress". +OMIT_NETWORKING_FILES = ( "cgi.py", "cgitb.py", "email/", "ftplib.py", "http/", "imaplib.py", + "mailbox.py", + "mailcap.py", "nntplib.py", "poplib.py", "smtpd.py", @@ -77,26 +92,28 @@ OMIT_FILES = ( "urllib/response.py", "urllib/robotparser.py", "wsgiref/", - "xmlrpc/", - # dbm / gdbm - "dbm/", - # other platforms - "_aix_support.py", - "_bootsubprocess.py", - "_osx_support.py", - # webbrowser - "antigravity.py", - "webbrowser.py", - # ctypes - "ctypes/", - # Pure Python implementations of C extensions - "_pydecimal.py", - "_pyio.py", - # Misc unused or large files - "pydoc_data/", - "msilib/", ) +OMIT_MODULE_FILES = { + "_asyncio": ["asyncio/"], + "audioop": ["aifc.py", "sunau.py", "wave.py"], + "_crypt": ["crypt.py"], + "_curses": ["curses/"], + "_ctypes": ["ctypes/"], + "_decimal": ["decimal.py"], + "_dbm": ["dbm/ndbm.py"], + "_gdbm": ["dbm/gnu.py"], + "_json": ["json/"], + "_multiprocessing": ["concurrent/", "multiprocessing/"], + "pyexpat": ["xml/", "xmlrpc/"], + "readline": ["rlcompleter.py"], + "_sqlite3": ["sqlite3/"], + "_ssl": ["ssl.py"], + "_tkinter": ["idlelib/", "tkinter/", "turtle.py", "turtledemo/"], + + "_zoneinfo": ["zoneinfo/"], +} + # regression test sub directories OMIT_SUBDIRS = ( "ctypes/test/", @@ -105,34 +122,59 @@ OMIT_SUBDIRS = ( ) -OMIT_ABSOLUTE = {SRCDIR_LIB / name for name in OMIT_FILES} -OMIT_SUBDIRS_ABSOLUTE = tuple(str(SRCDIR_LIB / name) for name in OMIT_SUBDIRS) - - -def filterfunc(name: str) -> bool: - return not name.startswith(OMIT_SUBDIRS_ABSOLUTE) - - def create_stdlib_zip( - args: argparse.Namespace, compression: int = zipfile.ZIP_DEFLATED, *, optimize: int = 0 + args: argparse.Namespace, + *, + optimize: int = 0, ) -> None: - sysconfig_data = list(args.builddir.glob(SYSCONFIGDATA_GLOB)) - if not sysconfig_data: - raise ValueError("No sysconfigdata file found") + def filterfunc(name: str) -> bool: + return not name.startswith(args.omit_subdirs_absolute) with zipfile.PyZipFile( - args.wasm_stdlib_zip, mode="w", compression=compression, optimize=0 + args.wasm_stdlib_zip, mode="w", compression=args.compression, optimize=optimize ) as pzf: + if args.compresslevel is not None: + pzf.compresslevel = args.compresslevel + pzf.writepy(args.sysconfig_data) for entry in sorted(args.srcdir_lib.iterdir()): if entry.name == "__pycache__": continue - if entry in OMIT_ABSOLUTE: + if entry in args.omit_files_absolute: continue if entry.name.endswith(".py") or entry.is_dir(): # writepy() writes .pyc files (bytecode). pzf.writepy(entry, filterfunc=filterfunc) - for entry in sysconfig_data: - pzf.writepy(entry) + + +def detect_extension_modules(args: argparse.Namespace): + modules = {} + + # disabled by Modules/Setup.local ? + with open(args.builddir / "Makefile") as f: + for line in f: + if line.startswith("MODDISABLED_NAMES="): + disabled = line.split("=", 1)[1].strip().split() + for modname in disabled: + modules[modname] = False + break + + # disabled by configure? + with open(args.sysconfig_data) as f: + data = f.read() + loc = {} + exec(data, globals(), loc) + + for name, value in loc["build_time_vars"].items(): + if value not in {"yes", "missing", "disabled", "n/a"}: + continue + if not name.startswith("MODULE_"): + continue + if name.endswith(("_CFLAGS", "_DEPS", "_LDFLAGS")): + continue + modname = name.removeprefix("MODULE_").lower() + if modname not in modules: + modules[modname] = value == "yes" + return modules def path(val: str) -> pathlib.Path: @@ -147,7 +189,10 @@ parser.add_argument( type=path, ) parser.add_argument( - "--prefix", help="install prefix", default=pathlib.Path("/usr/local"), type=path + "--prefix", + help="install prefix", + default=pathlib.Path("/usr/local"), + type=path, ) @@ -162,6 +207,27 @@ def main(): args.wasm_stdlib = args.wasm_root / WASM_STDLIB args.wasm_dynload = args.wasm_root / WASM_DYNLOAD + # bpo-17004: zipimport supports only zlib compression. + # Emscripten ZIP_STORED + -sLZ4=1 linker flags results in larger file. + args.compression = zipfile.ZIP_DEFLATED + args.compresslevel = 9 + + args.sysconfig_data = args.builddir / SYSCONFIGDATA + if not args.sysconfig_data.is_file(): + raise ValueError(f"sysconfigdata file {SYSCONFIGDATA} missing.") + + extmods = detect_extension_modules(args) + omit_files = list(OMIT_FILES) + omit_files.extend(OMIT_NETWORKING_FILES) + for modname, modfiles in OMIT_MODULE_FILES.items(): + if not extmods.get(modname): + omit_files.extend(modfiles) + + args.omit_files_absolute = {args.srcdir_lib / name for name in omit_files} + args.omit_subdirs_absolute = tuple( + str(args.srcdir_lib / name) for name in OMIT_SUBDIRS + ) + # Empty, unused directory for dynamic libs, but required for site initialization. args.wasm_dynload.mkdir(parents=True, exist_ok=True) marker = args.wasm_dynload / ".empty" @@ -170,7 +236,7 @@ def main(): shutil.copy(args.srcdir_lib / "os.py", args.wasm_stdlib) # The rest of stdlib that's useful in a WASM context. create_stdlib_zip(args) - size = round(args.wasm_stdlib_zip.stat().st_size / 1024 ** 2, 2) + size = round(args.wasm_stdlib_zip.stat().st_size / 1024**2, 2) parser.exit(0, f"Created {args.wasm_stdlib_zip} ({size} MiB)\n") diff --git a/Tools/wasm/wasm_webserver.py b/Tools/wasm/wasm_webserver.py index ef642bf8a5b..186bd57fc20 100755 --- a/Tools/wasm/wasm_webserver.py +++ b/Tools/wasm/wasm_webserver.py @@ -14,6 +14,13 @@ parser.add_argument( class MyHTTPRequestHandler(server.SimpleHTTPRequestHandler): + extensions_map = server.SimpleHTTPRequestHandler.extensions_map.copy() + extensions_map.update( + { + ".wasm": "application/wasm", + } + ) + def end_headers(self): self.send_my_headers() super().end_headers()