Source code for betty.wikipedia
"""
Fetch information from Wikipedia.
"""
from __future__ import annotations
import re
from typing import cast
[docs]
class NotAPageError(ValueError):
"""
Raised when a URL does not point to a Wikipedia page.
"""
_PAGE_URL_PATTERN = re.compile(r"^https?://([a-z]+)\.wikipedia\.org/wiki/([^/?#]+).*$")
[docs]
def parse_page_url(url: str) -> tuple[str, str]:
"""
Parse the URL for a Wikipedia page.
:return: A 2-tuple with the page language and the page name.
"""
match = _PAGE_URL_PATTERN.fullmatch(url)
if match is None:
raise NotAPageError
return cast(tuple[str, str], match.groups())