Source code for ada_url.ada_adapter

from enum import IntEnum
from typing import (
    Dict,
    Final,
    Iterable,
    Iterator,
    List,
    Optional,
    Tuple,
    TypedDict,
    Union,
)

from ada_url._ada_wrapper import ffi, lib

URL_ATTRIBUTES = (
    'href',
    'username',
    'password',
    'protocol',
    'port',
    'hostname',
    'host',
    'pathname',
    'search',
    'hash',
)
PARSE_ATTRIBUTES = URL_ATTRIBUTES + ('origin', 'host_type', 'scheme_type')

# These are the attributes that have corresponding ada_get_* functions
GET_ATTRIBUTES = frozenset(PARSE_ATTRIBUTES)

# These are the attributes that have corresponding ada_set_* functons
SET_ATTRIBUTES = frozenset(URL_ATTRIBUTES)

# These are the attributes that can be cleared with one of the ada_clear_* functions
CLEAR_ATTRIBUTES = frozenset(('port', 'hash', 'search'))

# These are the attributes that must be cleared by setting the empty string
UNSET_ATTRIBUTES = frozenset(('username', 'password', 'pathname'))

_marker = object()



[docs]
class HostType(IntEnum):
    """
    Enum for URL host types:

    * ``DEFAULT`` hosts like ``https://example.org`` are ``0``.
    * ``IPV4`` hosts like ``https://192.0.2.1`` are ``1``.
    * ``IPV6`` hosts like ``https://[2001:db8::]`` are ``2``.

    .. code-block:: python

        >>> from ada_url import HostType
        >>> HostType.DEFAULT
        <HostType.DEFAULT: 0>

    """

    DEFAULT = 0
    IPV4 = 1
    IPV6 = 2




[docs]
class SchemeType(IntEnum):
    """
    Enum for `URL scheme types <https://url.spec.whatwg.org/#url-miscellaneous>`__.

    * ``HTTP`` URLs like ``http://example.org`` are ``0``.
    * ``NOT_SPECIAL`` URLs like ``git://example.og`` are ``1``.
    * ``HTTPS`` URLs like ``https://example.org`` are ``2``.
    * ``WS`` URLs like ``ws://example.org`` are ``3``.
    * ``FTP`` URLs like ``ftp://example.org`` are ``4``.
    * ``WSS`` URLs like ``wss://example.org`` are ``5``.
    * ``FILE`` URLs like ``file://example`` are ``6``.

    .. code-block:: python

        >>> from ada_url import SchemeType
        >>> SchemeType.HTTPS
        <SchemeType.HTTPS: 2>

    """

    HTTP = 0
    NOT_SPECIAL = 1
    HTTPS = 2
    WS = 3
    FTP = 4
    WSS = 5
    FILE = 6



class ParseAttributes(TypedDict, total=False):
    href: str
    username: str
    password: str
    protocol: str
    port: str
    hostname: str
    host: str
    pathname: str
    search: str
    hash: str
    origin: str
    host_type: HostType
    scheme_type: SchemeType


def _get_obj(constructor, destructor, *args):
    obj = constructor(*args)

    return ffi.gc(obj, destructor)


def _get_str(x):
    ret = ffi.string(x.data, x.length).decode('utf-8') if x.length else ''
    return ret



[docs]
class URL:
    """
    Parses a *url* (with an optional *base*) according to the
    WHATWG URL parsing standard.

    .. code-block:: python

        >>> from ada_url import URL
        >>> old_url = 'https://example.org:443/file.txt?q=1'
        >>> urlobj = URL(old_url)
        >>> urlobj.host
        'example.org'
        >>> urlobj.host = 'example.com'
        >>> new_url = urlobj.href
        >>> new_url
        'https://example.com:443/file.txt?q=1'

    You can read and write the following attributes:

    * ``href``
    * ``protocol``
    * ``username``
    * ``password``
    * ``host``
    * ``hostname``
    * ``port``
    * ``pathname``
    * ``search``
    * ``hash``

    You can additionally read these attributes:

    * ``origin``, which will be a ``str``
    * ``host_type``, which will be a :class:`HostType` enum
    * ``scheme_type``, which will be a :class:`SchemeType` enum

    The class also exposes a static method that checks whether the input
    *url* (and optional *base*) can be parsed:

    .. code-block:: python

        >>> url = 'file_2.txt'
        >>> base = 'https://example.org:443/file_1.txt'
        >>> URL.can_parse(url, base)
        True

    See the `WHATWG docs <https://url.spec.whatwg.org/#url-class>`__ for
    more details on the URL class.

    """

    href: str
    username: str
    password: str
    protocol: str
    port: str
    hostname: str
    host: str
    pathname: str
    search: str
    hash: str
    origin: Final[str]
    host_type: Final[HostType]
    scheme_type: Final[SchemeType]

    def __init__(self, url: str, base: Optional[str] = None):
        url_bytes = url.encode('utf-8')

        if base is None:
            self.urlobj = _get_obj(
                lib.ada_parse, lib.ada_free, url_bytes, len(url_bytes)
            )
        else:
            base_bytes = base.encode('utf-8')
            self.urlobj = _get_obj(
                lib.ada_parse_with_base,
                lib.ada_free,
                url_bytes,
                len(url_bytes),
                base_bytes,
                len(base_bytes),
            )

        if not lib.ada_is_valid(self.urlobj):
            raise ValueError('Invalid input')

    def __copy__(self):
        cls = self.__class__
        ret = cls.__new__(cls)
        ret.__dict__.update(self.__dict__)
        super(URL, ret).__init__()
        return ret

    def __deepcopy__(self, memo):
        cls = self.__class__
        ret = cls.__new__(cls)
        super(URL, ret).__init__()
        ret.urlobj = lib.ada_copy(self.urlobj)

        return ret

    def __delattr__(self, attr: str):
        if attr in CLEAR_ATTRIBUTES:
            clear_func = getattr(lib, f'ada_clear_{attr}')
            clear_func(self.urlobj)
        elif attr in UNSET_ATTRIBUTES:
            set_func = getattr(lib, f'ada_set_{attr}')
            set_func(self.urlobj, b'', 0)
        else:
            raise AttributeError(f'cannot remove {attr}')

    def __dir__(self) -> List[str]:
        return super().__dir__() + list(PARSE_ATTRIBUTES)

    def __getattr__(self, attr: str) -> Union[str, HostType, SchemeType]:
        if attr in GET_ATTRIBUTES:
            get_func = getattr(lib, f'ada_get_{attr}')
            data = get_func(self.urlobj)
            if attr == 'origin':
                ret = _get_str(data)
                lib.ada_free_owned_string(data)
            elif attr == 'host_type':
                ret = HostType(data)
            elif attr == 'scheme_type':
                ret = SchemeType(data)
            else:
                ret = _get_str(data)

            return ret

        raise AttributeError(f'no attribute named {attr}')

    def __setattr__(self, attr: str, value: str) -> None:
        if attr in SET_ATTRIBUTES:
            try:
                value_bytes = value.encode('utf-8')
            except Exception:
                raise ValueError(f'Invalid value for {attr}') from None

            set_func = getattr(lib, f'ada_set_{attr}')
            ret = set_func(self.urlobj, value_bytes, len(value_bytes))
            if (ret is not None) and (not ret):
                raise ValueError(f'Invalid value for {attr}') from None

            return ret

        return super().__setattr__(attr, value)

    def __str__(self):
        return self.href

    def __repr__(self):
        return f'<URL "{self.href}">'

    @staticmethod
    def can_parse(url: str, base: Optional[str] = None) -> bool:
        try:
            url_bytes = url.encode('utf-8')
        except Exception:
            return False

        if base is None:
            return lib.ada_can_parse(url_bytes, len(url_bytes))

        try:
            base_bytes = base.encode('utf-8')
        except Exception:
            return False

        return lib.ada_can_parse_with_base(
            url_bytes, len(url_bytes), base_bytes, len(base_bytes)
        )




[docs]
class URLSearchParams:
    """
    Parses the given *params* string according to the WHATWG URL parsing standard.

    The attribute and methods from the standard are implemented:

    .. code-block:: python

        >>> from ada_url import URLSearchParams
        >>> obj = URLSearchParams('key1=value1&key2=value2&key2=value3')
        >>> obj.size
        3
        >>> obj.append('key2', 'value4')
        >>> str(obj)
        'key1=value1&key2=value2&key2=value3&key2=value4'
        >>> obj.delete('key1')
        >>> str(obj)
        'key2=value2&key2=value3&key2=value4'
        >>> obj.delete('key2', 'value2')
        >>> str(obj)
        'key2=value3&key2=value4'
        >>> obj.get('key2')
        'value3'
        >>> obj.get_all('key2')
        ['value3', 'value4']
        >>> obj.has('key2')
        True
        >>> obj.has('key2', 'value5')
        False
        >>> obj.set('key1', 'value6')
        >>> str(obj)
        'key2=value3&key2=value4&key1=value6'
        >>> obj.sort()
        >>> str(obj)
        'key1=value6&key2=value3&key2=value4'

    Iterators for the ``keys``, ``values``, and ``items`` are also implemented:

    .. code-block:: python

        >>> obj = URLSearchParams('key1=value1&key2=value2&key2=value3')
        >>> list(obj.keys())
        ['key1', 'key2', 'key2']
        >>> list(obj.values())
        ['value1', 'value2', 'value3']
        >>> list(obj.items())
        [('key1', 'value1'), ('key2', 'value2'), ('key2', 'value3')]

    See the `WHATWG docs <https://url.spec.whatwg.org/#interface-urlsearchparams>`__ for
    more details on the URLSearchParams class.

    """

    def __init__(self, params: str):
        params_bytes = params.encode('utf-8')
        self.paramsobj = _get_obj(
            lib.ada_parse_search_params,
            lib.ada_free_search_params,
            params_bytes,
            len(params_bytes),
        )

    @property
    def size(self) -> int:
        return lib.ada_search_params_size(self.paramsobj)

    def __len__(self) -> int:
        return self.size

    def append(self, key: str, value: str):
        key_bytes = key.encode('utf-8')
        value_bytes = value.encode('utf-8')
        lib.ada_search_params_append(
            self.paramsobj,
            key_bytes,
            len(key_bytes),
            value_bytes,
            len(value_bytes),
        )

    def delete(self, key: str, value: Optional[str] = None):
        key_bytes = key.encode('utf-8')
        if value is None:
            lib.ada_search_params_remove(self.paramsobj, key_bytes, len(key_bytes))
        else:
            value_bytes = value.encode('utf-8')
            lib.ada_search_params_remove_value(
                self.paramsobj,
                key_bytes,
                len(key_bytes),
                value_bytes,
                len(value_bytes),
            )

    def get(self, key: str) -> str:
        key_bytes = key.encode('utf-8')
        item = lib.ada_search_params_get(self.paramsobj, key_bytes, len(key_bytes))
        return _get_str(item)

    def get_all(self, key: str) -> List[str]:
        key_bytes = key.encode('utf-8')
        items = lib.ada_search_params_get_all(self.paramsobj, key_bytes, len(key_bytes))
        count = lib.ada_strings_size(items)

        ret = []
        for i in range(count):
            value = _get_str(lib.ada_strings_get(items, i))
            ret.append(value)

        return ret

    def has(self, key: str, value: Optional[str] = None) -> bool:
        key_bytes = key.encode('utf-8')
        if value is None:
            return lib.ada_search_params_has(self.paramsobj, key_bytes, len(key_bytes))
        else:
            value_bytes = value.encode('utf-8')
            return lib.ada_search_params_has_value(
                self.paramsobj,
                key_bytes,
                len(key_bytes),
                value_bytes,
                len(value_bytes),
            )

    def set(self, key: str, value: str):
        key_bytes = key.encode('utf-8')
        value_bytes = value.encode('utf-8')
        lib.ada_search_params_set(
            self.paramsobj,
            key_bytes,
            len(key_bytes),
            value_bytes,
            len(value_bytes),
        )

    def sort(self):
        lib.ada_search_params_sort(self.paramsobj)

    def keys(self) -> Iterator[str]:
        iterator = _get_obj(
            lib.ada_search_params_get_keys,
            lib.ada_free_search_params_keys_iter,
            self.paramsobj,
        )
        while lib.ada_search_params_keys_iter_has_next(iterator):
            item = lib.ada_search_params_keys_iter_next(iterator)
            yield _get_str(item)

    def values(self) -> Iterator[str]:
        iterator = _get_obj(
            lib.ada_search_params_get_values,
            lib.ada_free_search_params_values_iter,
            self.paramsobj,
        )
        while lib.ada_search_params_values_iter_has_next(iterator):
            item = lib.ada_search_params_values_iter_next(iterator)
            yield _get_str(item)

    def items(self) -> Iterator[Tuple[str, str]]:
        iterator = _get_obj(
            lib.ada_search_params_get_entries,
            lib.ada_free_search_params_entries_iter,
            self.paramsobj,
        )
        while lib.ada_search_params_entries_iter_has_next(iterator):
            item = lib.ada_search_params_entries_iter_next(iterator)
            yield _get_str(item.key), _get_str(item.value)

    def __repr__(self):
        return f'<SearchParams "{self}">'

    def __str__(self) -> str:
        result = _get_obj(
            lib.ada_search_params_to_string, lib.ada_free_owned_string, self.paramsobj
        )
        return _get_str(result)




[docs]
def check_url(s: str) -> bool:
    """
    Returns ``True`` if *s* represents a valid URL, and ``False`` otherwise.

    .. code-block:: python

        >>> from ada_url import check_url
        >>> check_url('bogus')
        False
        >>> check_url('http://a/b/c/d;p?q')
        True

    """
    try:
        s_bytes = s.encode('utf-8')
    except Exception:
        return False

    urlobj = _get_obj(lib.ada_parse, lib.ada_free, s_bytes, len(s_bytes))
    return lib.ada_is_valid(urlobj)




[docs]
def join_url(base_url: str, s: str) -> str:
    """
    Return the URL that results from joining *base_url* to *s*.
    Raises ``ValueError`` if no valid URL can be constructed.

    .. code-block:: python

        >>> from ada_url import join_url
        >>> base_url = 'http://a/b/c/d;p?q'
        >>> join_url(base_url, '../g')
        'http://a/b/g'

    """
    try:
        base_bytes = base_url.encode('utf-8')
        s_bytes = s.encode('utf-8')
    except Exception:
        raise ValueError('Invalid URL') from None

    urlobj = _get_obj(
        lib.ada_parse_with_base,
        lib.ada_free,
        s_bytes,
        len(s_bytes),
        base_bytes,
        len(base_bytes),
    )
    if not lib.ada_is_valid(urlobj):
        raise ValueError('Invalid URL') from None

    return _get_str(lib.ada_get_href(urlobj))




[docs]
def normalize_url(s: str) -> str:
    """
    Returns a "normalized" URL with all ``'..'`` and ``'/'`` characters resolved.

    .. code-block:: python

        >>> from ada_url import normalize_url
        >>> normalize_url('http://a/b/c/../g')
        'http://a/b/g'

    """
    return parse_url(s, attributes=('href',))['href']




[docs]
def parse_url(s: str, attributes: Iterable[str] = PARSE_ATTRIBUTES) -> ParseAttributes:
    """
    Returns a dictionary with the parsed components of the URL represented by *s*.

    .. code-block:: python

        >>> from ada_url import parse_url
        >>> url = 'https://user_1:password_1@example.org:8080/dir/../api?q=1#frag'
        >>> parse_url(url)
        {
            'href': 'https://user_1:password_1@example.org:8080/api?q=1#frag',
            'username': 'user_1',
            'password': 'password_1',
            'protocol': 'https:',
            'host': 'example.org:8080',
            'port': '8080',
            'hostname': 'example.org',
            'pathname': '/api',
            'search': '?q=1',
            'hash': '#frag'
            'origin': 'https://example.org:8080',
            'host_type': 0
            'scheme_type': 2
        }

    The names of the dictionary keys correspond to the components of the "URL class"
    in the WHATWG URL spec.
    ``host_type`` is a :class:`HostType` enum.
    ``scheme_type`` is a :class:`SchemeType` enum.

    Pass in a sequence of *attributes* to limit which keys are returned.

    .. code-block:: python

        >>> from ada_url import parse_url
        >>> url = 'https://user_1:password_1@example.org:8080/dir/../api?q=1#frag'
        >>> parse_url(url, attributes=('protocol'))
        {'protocol': 'https:'}

    Unrecognized attributes are ignored.

    """
    try:
        s_bytes = s.encode('utf-8')
    except Exception:
        raise ValueError('Invalid URL') from None

    ret = {}
    urlobj = _get_obj(lib.ada_parse, lib.ada_free, s_bytes, len(s_bytes))
    if not lib.ada_is_valid(urlobj):
        raise ValueError('Invalid URL') from None

    for attr in attributes:
        get_func = getattr(lib, f'ada_get_{attr}')
        data = get_func(urlobj)
        if attr == 'origin':
            ret[attr] = _get_str(data)
            lib.ada_free_owned_string(data)
        elif attr == 'host_type':
            ret[attr] = HostType(data)
        elif attr == 'scheme_type':
            ret[attr] = SchemeType(data)
        else:
            ret[attr] = _get_str(data)

    return ret




[docs]
def replace_url(s: str, **kwargs: str) -> str:
    """
    Start with the URL represented by *s*, replace the attributes given in the *kwargs*
    mapping, and return a normalized URL with the result.

    Provide an empty string to unset an attribute.

    .. code-block:: python

        >>> from ada_url import replace_url
        >>> base_url = 'https://user_1:password_1@example.org/resource'
        >>> replace_url(base_url, username='user_2', password='', protocol='http:')
        'http://user_2@example.org/resource'

    Unrecognized attributes are ignored. ``href`` is replaced first if it is given.
    ``hostname`` is replaced before ``host`` if both are given.

    ``ValueError`` is raised if the input URL or one of the components is not valid.
    """
    try:
        s_bytes = s.encode('utf-8')
    except Exception:
        raise ValueError('Invalid URL') from None

    urlobj = _get_obj(lib.ada_parse, lib.ada_free, s_bytes, len(s_bytes))
    if not lib.ada_is_valid(urlobj):
        raise ValueError('Invalid URL') from None

    # We process attributes in the order given by the documentation, e.g.
    # href before anything else.
    for attr in URL_ATTRIBUTES:
        value = kwargs.get(attr, _marker)
        if value is _marker:
            continue

        try:
            value_bytes = value.encode('utf-8')
        except Exception:
            raise ValueError(f'Invalid value for {attr}') from None

        if (not value_bytes) and (attr in CLEAR_ATTRIBUTES):
            clear_func = getattr(lib, f'ada_clear_{attr}')
            clear_func(urlobj)
        else:
            set_func = getattr(lib, f'ada_set_{attr}')
            set_result = set_func(urlobj, value_bytes, len(value_bytes))
            if (set_result is not None) and (not set_result):
                raise ValueError(f'Invalid value for {attr}') from None

    return _get_str(lib.ada_get_href(urlobj))




[docs]
def parse_search_params(s: str) -> Dict[str, List[str]]:
    """
    Returns a dictionary representing the parsed URL Parameters specified by *s*.
    The returned dictionary maps each key to a list of values associated with it.

    .. code-block:: python

        >>> from ada_url import parse_search_params
        >>> parse_search_params('key1=value1&key1=value2&key2=value3')
        {'key1': ['value1', 'value2'], 'key2': ['value3']}

    """
    ret = {}
    for key, value in URLSearchParams(s).items():
        if key not in ret:
            ret[key] = [value]
        else:
            ret[key].append(value)

    return ret




[docs]
def replace_search_params(s: str, *args: Tuple[str, str]) -> str:
    """
    Returns a string representing the URL parameters specified by *s*, modified by the
    ``(key, value)`` pairs passed in as *args*.

    .. code-block:: python

        >>> from ada_url import replace_search_params
        >>> replace_search_params(
        ...     'key1=value1&key1=value2',
        ...     ('key1', 'value3'),
        ...     ('key2', 'value4')
        ... )
        'key1=value3&key2=value4'
    """
    search_params = URLSearchParams(s)
    for key, value in args:
        search_params.delete(key)

    for key, value in args:
        search_params.append(key, value)

    return str(search_params)




[docs]
class idna:
    """Process international domains according to the UTS #46 standard.

    :func:`idna.encode` implements the UTS #46 ``ToASCII`` operation.
    Its output is a Python ``bytes`` object.
    It is also available as :func:`idna_to_ascii`.

    .. code-block:: python

        >>> from ada_url import idna
        >>> idna.encode('meßagefactory.ca')
        b'xn--meagefactory-m9a.ca'

    :func:`idna.decode` implements the UTS #46 ``ToUnicode`` operation.
    Its oputput is a Python ``str`` object.
    It is also available as :func:`idna_to_unicode`.

    .. code-block:: python

        >>> from ada_url import idna
        >>> idna.decode('xn--meagefactory-m9a.ca')
        'meßagefactory.ca'

    Both functions accept either ``str`` or ``bytes`` objects as input.
    """

    @staticmethod
    def decode(s: Union[str, bytes]) -> str:
        if isinstance(s, str):
            s = s.encode('ascii')

        data = _get_obj(lib.ada_idna_to_unicode, lib.ada_free_owned_string, s, len(s))
        return _get_str(data)

    @staticmethod
    def encode(s: Union[str, bytes]) -> str:
        if isinstance(s, str):
            s = s.encode('utf-8')

        val = _get_obj(lib.ada_idna_to_ascii, lib.ada_free_owned_string, s, len(s))
        return ffi.string(val.data, val.length) if val.length else b''



idna_to_unicode = idna.decode

idna_to_ascii = idna.encode
Source code for ada_url.ada_adapter

ada-url/ada-python

Navigation

Related Topics