first commit
This commit is contained in:
14
venv/lib/python3.12/site-packages/yarl/__init__.py
Normal file
14
venv/lib/python3.12/site-packages/yarl/__init__.py
Normal file
@@ -0,0 +1,14 @@
|
||||
from ._query import Query, QueryVariable, SimpleQuery
|
||||
from ._url import URL, cache_clear, cache_configure, cache_info
|
||||
|
||||
__version__ = "1.22.0"
|
||||
|
||||
__all__ = (
|
||||
"URL",
|
||||
"SimpleQuery",
|
||||
"QueryVariable",
|
||||
"Query",
|
||||
"cache_clear",
|
||||
"cache_configure",
|
||||
"cache_info",
|
||||
)
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
203
venv/lib/python3.12/site-packages/yarl/_parse.py
Normal file
203
venv/lib/python3.12/site-packages/yarl/_parse.py
Normal file
@@ -0,0 +1,203 @@
|
||||
"""URL parsing utilities."""
|
||||
|
||||
import re
|
||||
import unicodedata
|
||||
from functools import lru_cache
|
||||
from typing import Union
|
||||
from urllib.parse import scheme_chars, uses_netloc
|
||||
|
||||
from ._quoters import QUOTER, UNQUOTER_PLUS
|
||||
|
||||
# Leading and trailing C0 control and space to be stripped per WHATWG spec.
|
||||
# == "".join([chr(i) for i in range(0, 0x20 + 1)])
|
||||
WHATWG_C0_CONTROL_OR_SPACE = (
|
||||
"\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n\x0b\x0c\r\x0e\x0f\x10"
|
||||
"\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f "
|
||||
)
|
||||
|
||||
# Unsafe bytes to be removed per WHATWG spec
|
||||
UNSAFE_URL_BYTES_TO_REMOVE = ["\t", "\r", "\n"]
|
||||
USES_AUTHORITY = frozenset(uses_netloc)
|
||||
|
||||
SplitURLType = tuple[str, str, str, str, str]
|
||||
|
||||
|
||||
def split_url(url: str) -> SplitURLType:
|
||||
"""Split URL into parts."""
|
||||
# Adapted from urllib.parse.urlsplit
|
||||
# Only lstrip url as some applications rely on preserving trailing space.
|
||||
# (https://url.spec.whatwg.org/#concept-basic-url-parser would strip both)
|
||||
url = url.lstrip(WHATWG_C0_CONTROL_OR_SPACE)
|
||||
for b in UNSAFE_URL_BYTES_TO_REMOVE:
|
||||
if b in url:
|
||||
url = url.replace(b, "")
|
||||
|
||||
scheme = netloc = query = fragment = ""
|
||||
i = url.find(":")
|
||||
if i > 0 and url[0] in scheme_chars:
|
||||
for c in url[1:i]:
|
||||
if c not in scheme_chars:
|
||||
break
|
||||
else:
|
||||
scheme, url = url[:i].lower(), url[i + 1 :]
|
||||
has_hash = "#" in url
|
||||
has_question_mark = "?" in url
|
||||
if url[:2] == "//":
|
||||
delim = len(url) # position of end of domain part of url, default is end
|
||||
if has_hash and has_question_mark:
|
||||
delim_chars = "/?#"
|
||||
elif has_question_mark:
|
||||
delim_chars = "/?"
|
||||
elif has_hash:
|
||||
delim_chars = "/#"
|
||||
else:
|
||||
delim_chars = "/"
|
||||
for c in delim_chars: # look for delimiters; the order is NOT important
|
||||
wdelim = url.find(c, 2) # find first of this delim
|
||||
if wdelim >= 0 and wdelim < delim: # if found
|
||||
delim = wdelim # use earliest delim position
|
||||
netloc = url[2:delim]
|
||||
url = url[delim:]
|
||||
has_left_bracket = "[" in netloc
|
||||
has_right_bracket = "]" in netloc
|
||||
if (has_left_bracket and not has_right_bracket) or (
|
||||
has_right_bracket and not has_left_bracket
|
||||
):
|
||||
raise ValueError("Invalid IPv6 URL")
|
||||
if has_left_bracket:
|
||||
bracketed_host = netloc.partition("[")[2].partition("]")[0]
|
||||
# Valid bracketed hosts are defined in
|
||||
# https://www.rfc-editor.org/rfc/rfc3986#page-49
|
||||
# https://url.spec.whatwg.org/
|
||||
if bracketed_host and bracketed_host[0] == "v":
|
||||
if not re.match(r"\Av[a-fA-F0-9]+\..+\Z", bracketed_host):
|
||||
raise ValueError("IPvFuture address is invalid")
|
||||
elif ":" not in bracketed_host:
|
||||
raise ValueError("The IPv6 content between brackets is not valid")
|
||||
if has_hash:
|
||||
url, _, fragment = url.partition("#")
|
||||
if has_question_mark:
|
||||
url, _, query = url.partition("?")
|
||||
if netloc and not netloc.isascii():
|
||||
_check_netloc(netloc)
|
||||
return scheme, netloc, url, query, fragment
|
||||
|
||||
|
||||
def _check_netloc(netloc: str) -> None:
|
||||
# Adapted from urllib.parse._checknetloc
|
||||
# looking for characters like \u2100 that expand to 'a/c'
|
||||
# IDNA uses NFKC equivalence, so normalize for this check
|
||||
|
||||
# ignore characters already included
|
||||
# but not the surrounding text
|
||||
n = netloc.replace("@", "").replace(":", "").replace("#", "").replace("?", "")
|
||||
normalized_netloc = unicodedata.normalize("NFKC", n)
|
||||
if n == normalized_netloc:
|
||||
return
|
||||
# Note that there are no unicode decompositions for the character '@' so
|
||||
# its currently impossible to have test coverage for this branch, however if the
|
||||
# one should be added in the future we want to make sure its still checked.
|
||||
for c in "/?#@:": # pragma: no branch
|
||||
if c in normalized_netloc:
|
||||
raise ValueError(
|
||||
f"netloc '{netloc}' contains invalid "
|
||||
"characters under NFKC normalization"
|
||||
)
|
||||
|
||||
|
||||
@lru_cache # match the same size as urlsplit
|
||||
def split_netloc(
|
||||
netloc: str,
|
||||
) -> tuple[Union[str, None], Union[str, None], Union[str, None], Union[int, None]]:
|
||||
"""Split netloc into username, password, host and port."""
|
||||
if "@" not in netloc:
|
||||
username: Union[str, None] = None
|
||||
password: Union[str, None] = None
|
||||
hostinfo = netloc
|
||||
else:
|
||||
userinfo, _, hostinfo = netloc.rpartition("@")
|
||||
username, have_password, password = userinfo.partition(":")
|
||||
if not have_password:
|
||||
password = None
|
||||
|
||||
if "[" in hostinfo:
|
||||
_, _, bracketed = hostinfo.partition("[")
|
||||
hostname, _, port_str = bracketed.partition("]")
|
||||
_, _, port_str = port_str.partition(":")
|
||||
else:
|
||||
hostname, _, port_str = hostinfo.partition(":")
|
||||
|
||||
if not port_str:
|
||||
return username or None, password, hostname or None, None
|
||||
|
||||
try:
|
||||
port = int(port_str)
|
||||
except ValueError:
|
||||
raise ValueError("Invalid URL: port can't be converted to integer")
|
||||
if not (0 <= port <= 65535):
|
||||
raise ValueError("Port out of range 0-65535")
|
||||
return username or None, password, hostname or None, port
|
||||
|
||||
|
||||
def unsplit_result(
|
||||
scheme: str, netloc: str, url: str, query: str, fragment: str
|
||||
) -> str:
|
||||
"""Unsplit a URL without any normalization."""
|
||||
if netloc or (scheme and scheme in USES_AUTHORITY) or url[:2] == "//":
|
||||
if url and url[:1] != "/":
|
||||
url = f"{scheme}://{netloc}/{url}" if scheme else f"{scheme}:{url}"
|
||||
else:
|
||||
url = f"{scheme}://{netloc}{url}" if scheme else f"//{netloc}{url}"
|
||||
elif scheme:
|
||||
url = f"{scheme}:{url}"
|
||||
if query:
|
||||
url = f"{url}?{query}"
|
||||
return f"{url}#{fragment}" if fragment else url
|
||||
|
||||
|
||||
@lru_cache # match the same size as urlsplit
|
||||
def make_netloc(
|
||||
user: Union[str, None],
|
||||
password: Union[str, None],
|
||||
host: Union[str, None],
|
||||
port: Union[int, None],
|
||||
encode: bool = False,
|
||||
) -> str:
|
||||
"""Make netloc from parts.
|
||||
|
||||
The user and password are encoded if encode is True.
|
||||
|
||||
The host must already be encoded with _encode_host.
|
||||
"""
|
||||
if host is None:
|
||||
return ""
|
||||
ret = host
|
||||
if port is not None:
|
||||
ret = f"{ret}:{port}"
|
||||
if user is None and password is None:
|
||||
return ret
|
||||
if password is not None:
|
||||
if not user:
|
||||
user = ""
|
||||
elif encode:
|
||||
user = QUOTER(user)
|
||||
if encode:
|
||||
password = QUOTER(password)
|
||||
user = f"{user}:{password}"
|
||||
elif user and encode:
|
||||
user = QUOTER(user)
|
||||
return f"{user}@{ret}" if user else ret
|
||||
|
||||
|
||||
def query_to_pairs(query_string: str) -> list[tuple[str, str]]:
|
||||
"""Parse a query given as a string argument.
|
||||
|
||||
Works like urllib.parse.parse_qsl with keep empty values.
|
||||
"""
|
||||
pairs: list[tuple[str, str]] = []
|
||||
if not query_string:
|
||||
return pairs
|
||||
for k_v in query_string.split("&"):
|
||||
k, _, v = k_v.partition("=")
|
||||
pairs.append((UNQUOTER_PLUS(k), UNQUOTER_PLUS(v)))
|
||||
return pairs
|
||||
41
venv/lib/python3.12/site-packages/yarl/_path.py
Normal file
41
venv/lib/python3.12/site-packages/yarl/_path.py
Normal file
@@ -0,0 +1,41 @@
|
||||
"""Utilities for working with paths."""
|
||||
|
||||
from collections.abc import Sequence
|
||||
from contextlib import suppress
|
||||
|
||||
|
||||
def normalize_path_segments(segments: Sequence[str]) -> list[str]:
|
||||
"""Drop '.' and '..' from a sequence of str segments"""
|
||||
|
||||
resolved_path: list[str] = []
|
||||
|
||||
for seg in segments:
|
||||
if seg == "..":
|
||||
# ignore any .. segments that would otherwise cause an
|
||||
# IndexError when popped from resolved_path if
|
||||
# resolving for rfc3986
|
||||
with suppress(IndexError):
|
||||
resolved_path.pop()
|
||||
elif seg != ".":
|
||||
resolved_path.append(seg)
|
||||
|
||||
if segments and segments[-1] in (".", ".."):
|
||||
# do some post-processing here.
|
||||
# if the last segment was a relative dir,
|
||||
# then we need to append the trailing '/'
|
||||
resolved_path.append("")
|
||||
|
||||
return resolved_path
|
||||
|
||||
|
||||
def normalize_path(path: str) -> str:
|
||||
# Drop '.' and '..' from str path
|
||||
prefix = ""
|
||||
if path and path[0] == "/":
|
||||
# preserve the "/" root element of absolute paths, copying it to the
|
||||
# normalised output as per sections 5.2.4 and 6.2.2.3 of rfc3986.
|
||||
prefix = "/"
|
||||
path = path[1:]
|
||||
|
||||
segments = path.split("/")
|
||||
return prefix + "/".join(normalize_path_segments(segments))
|
||||
121
venv/lib/python3.12/site-packages/yarl/_query.py
Normal file
121
venv/lib/python3.12/site-packages/yarl/_query.py
Normal file
@@ -0,0 +1,121 @@
|
||||
"""Query string handling."""
|
||||
|
||||
import math
|
||||
from collections.abc import Iterable, Mapping, Sequence
|
||||
from typing import TYPE_CHECKING, Any, SupportsInt, Union, cast
|
||||
|
||||
from multidict import istr
|
||||
|
||||
from ._quoters import QUERY_PART_QUOTER, QUERY_QUOTER
|
||||
|
||||
SimpleQuery = Union[str, SupportsInt, float]
|
||||
QueryVariable = Union[SimpleQuery, Sequence[SimpleQuery]]
|
||||
Query = Union[
|
||||
None, str, Mapping[str, QueryVariable], Sequence[tuple[str, QueryVariable]]
|
||||
]
|
||||
|
||||
|
||||
def query_var(v: SimpleQuery) -> str:
|
||||
"""Convert a query variable to a string."""
|
||||
cls = type(v)
|
||||
if cls is int: # Fast path for non-subclassed int
|
||||
return str(v)
|
||||
if isinstance(v, str):
|
||||
return v
|
||||
if isinstance(v, float):
|
||||
if math.isinf(v):
|
||||
raise ValueError("float('inf') is not supported")
|
||||
if math.isnan(v):
|
||||
raise ValueError("float('nan') is not supported")
|
||||
return str(float(v))
|
||||
if cls is not bool and isinstance(v, SupportsInt):
|
||||
return str(int(v))
|
||||
raise TypeError(
|
||||
"Invalid variable type: value "
|
||||
"should be str, int or float, got {!r} "
|
||||
"of type {}".format(v, cls)
|
||||
)
|
||||
|
||||
|
||||
def get_str_query_from_sequence_iterable(
|
||||
items: Iterable[tuple[Union[str, istr], QueryVariable]],
|
||||
) -> str:
|
||||
"""Return a query string from a sequence of (key, value) pairs.
|
||||
|
||||
value is a single value or a sequence of values for the key
|
||||
|
||||
The sequence of values must be a list or tuple.
|
||||
"""
|
||||
quoter = QUERY_PART_QUOTER
|
||||
pairs = [
|
||||
f"{quoter(k)}={quoter(v if type(v) is str else query_var(v))}"
|
||||
for k, val in items
|
||||
for v in (
|
||||
val if type(val) is not str and isinstance(val, (list, tuple)) else (val,)
|
||||
)
|
||||
]
|
||||
return "&".join(pairs)
|
||||
|
||||
|
||||
def get_str_query_from_iterable(
|
||||
items: Iterable[tuple[Union[str, istr], SimpleQuery]],
|
||||
) -> str:
|
||||
"""Return a query string from an iterable.
|
||||
|
||||
The iterable must contain (key, value) pairs.
|
||||
|
||||
The values are not allowed to be sequences, only single values are
|
||||
allowed. For sequences, use `_get_str_query_from_sequence_iterable`.
|
||||
"""
|
||||
quoter = QUERY_PART_QUOTER
|
||||
# A listcomp is used since listcomps are inlined on CPython 3.12+ and
|
||||
# they are a bit faster than a generator expression.
|
||||
pairs = [
|
||||
f"{quoter(k)}={quoter(v if type(v) is str else query_var(v))}" for k, v in items
|
||||
]
|
||||
return "&".join(pairs)
|
||||
|
||||
|
||||
def get_str_query(*args: Any, **kwargs: Any) -> Union[str, None]:
|
||||
"""Return a query string from supported args."""
|
||||
query: Union[
|
||||
str,
|
||||
Mapping[str, QueryVariable],
|
||||
Sequence[tuple[Union[str, istr], SimpleQuery]],
|
||||
None,
|
||||
]
|
||||
if kwargs:
|
||||
if args:
|
||||
msg = "Either kwargs or single query parameter must be present"
|
||||
raise ValueError(msg)
|
||||
query = kwargs
|
||||
elif len(args) == 1:
|
||||
query = args[0]
|
||||
else:
|
||||
raise ValueError("Either kwargs or single query parameter must be present")
|
||||
|
||||
if query is None:
|
||||
return None
|
||||
if not query:
|
||||
return ""
|
||||
if type(query) is dict:
|
||||
return get_str_query_from_sequence_iterable(query.items())
|
||||
if type(query) is str or isinstance(query, str):
|
||||
return QUERY_QUOTER(query)
|
||||
if isinstance(query, Mapping):
|
||||
return get_str_query_from_sequence_iterable(query.items())
|
||||
if isinstance(query, (bytes, bytearray, memoryview)):
|
||||
msg = "Invalid query type: bytes, bytearray and memoryview are forbidden"
|
||||
raise TypeError(msg)
|
||||
if isinstance(query, Sequence):
|
||||
# We don't expect sequence values if we're given a list of pairs
|
||||
# already; only mappings like builtin `dict` which can't have the
|
||||
# same key pointing to multiple values are allowed to use
|
||||
# `_query_seq_pairs`.
|
||||
if TYPE_CHECKING:
|
||||
query = cast(Sequence[tuple[Union[str, istr], SimpleQuery]], query)
|
||||
return get_str_query_from_iterable(query)
|
||||
raise TypeError(
|
||||
"Invalid query type: only str, mapping or "
|
||||
"sequence of (key, value) pairs is allowed"
|
||||
)
|
||||
33
venv/lib/python3.12/site-packages/yarl/_quoters.py
Normal file
33
venv/lib/python3.12/site-packages/yarl/_quoters.py
Normal file
@@ -0,0 +1,33 @@
|
||||
"""Quoting and unquoting utilities for URL parts."""
|
||||
|
||||
from typing import Union
|
||||
from urllib.parse import quote
|
||||
|
||||
from ._quoting import _Quoter, _Unquoter
|
||||
|
||||
QUOTER = _Quoter(requote=False)
|
||||
REQUOTER = _Quoter()
|
||||
PATH_QUOTER = _Quoter(safe="@:", protected="/+", requote=False)
|
||||
PATH_REQUOTER = _Quoter(safe="@:", protected="/+")
|
||||
QUERY_QUOTER = _Quoter(safe="?/:@", protected="=+&;", qs=True, requote=False)
|
||||
QUERY_REQUOTER = _Quoter(safe="?/:@", protected="=+&;", qs=True)
|
||||
QUERY_PART_QUOTER = _Quoter(safe="?/:@", qs=True, requote=False)
|
||||
FRAGMENT_QUOTER = _Quoter(safe="?/:@", requote=False)
|
||||
FRAGMENT_REQUOTER = _Quoter(safe="?/:@")
|
||||
|
||||
UNQUOTER = _Unquoter()
|
||||
PATH_UNQUOTER = _Unquoter(unsafe="+")
|
||||
PATH_SAFE_UNQUOTER = _Unquoter(ignore="/%", unsafe="+")
|
||||
QS_UNQUOTER = _Unquoter(qs=True)
|
||||
UNQUOTER_PLUS = _Unquoter(plus=True) # to match urllib.parse.unquote_plus
|
||||
|
||||
|
||||
def human_quote(s: Union[str, None], unsafe: str) -> Union[str, None]:
|
||||
if not s:
|
||||
return s
|
||||
for c in "%" + unsafe:
|
||||
if c in s:
|
||||
s = s.replace(c, f"%{ord(c):02X}")
|
||||
if s.isprintable():
|
||||
return s
|
||||
return "".join(c if c.isprintable() else quote(c) for c in s)
|
||||
19
venv/lib/python3.12/site-packages/yarl/_quoting.py
Normal file
19
venv/lib/python3.12/site-packages/yarl/_quoting.py
Normal file
@@ -0,0 +1,19 @@
|
||||
import os
|
||||
import sys
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
__all__ = ("_Quoter", "_Unquoter")
|
||||
|
||||
|
||||
NO_EXTENSIONS = bool(os.environ.get("YARL_NO_EXTENSIONS")) # type: bool
|
||||
if sys.implementation.name != "cpython":
|
||||
NO_EXTENSIONS = True
|
||||
|
||||
|
||||
if TYPE_CHECKING or NO_EXTENSIONS:
|
||||
from ._quoting_py import _Quoter, _Unquoter
|
||||
else:
|
||||
try:
|
||||
from ._quoting_c import _Quoter, _Unquoter
|
||||
except ImportError: # pragma: no cover
|
||||
from ._quoting_py import _Quoter, _Unquoter # type: ignore[assignment]
|
||||
Binary file not shown.
451
venv/lib/python3.12/site-packages/yarl/_quoting_c.pyx
Normal file
451
venv/lib/python3.12/site-packages/yarl/_quoting_c.pyx
Normal file
@@ -0,0 +1,451 @@
|
||||
from cpython.exc cimport PyErr_NoMemory
|
||||
from cpython.mem cimport PyMem_Free, PyMem_Malloc, PyMem_Realloc
|
||||
from cpython.unicode cimport (
|
||||
PyUnicode_DATA,
|
||||
PyUnicode_DecodeASCII,
|
||||
PyUnicode_DecodeUTF8Stateful,
|
||||
PyUnicode_GET_LENGTH,
|
||||
PyUnicode_KIND,
|
||||
PyUnicode_READ,
|
||||
)
|
||||
from libc.stdint cimport uint8_t, uint64_t
|
||||
from libc.string cimport memcpy, memset
|
||||
|
||||
from string import ascii_letters, digits
|
||||
|
||||
|
||||
cdef str GEN_DELIMS = ":/?#[]@"
|
||||
cdef str SUB_DELIMS_WITHOUT_QS = "!$'()*,"
|
||||
cdef str SUB_DELIMS = SUB_DELIMS_WITHOUT_QS + '+?=;'
|
||||
cdef str RESERVED = GEN_DELIMS + SUB_DELIMS
|
||||
cdef str UNRESERVED = ascii_letters + digits + '-._~'
|
||||
cdef str ALLOWED = UNRESERVED + SUB_DELIMS_WITHOUT_QS
|
||||
cdef str QS = '+&=;'
|
||||
|
||||
DEF BUF_SIZE = 8 * 1024 # 8KiB
|
||||
|
||||
cdef inline Py_UCS4 _to_hex(uint8_t v) noexcept:
|
||||
if v < 10:
|
||||
return <Py_UCS4>(v+0x30) # ord('0') == 0x30
|
||||
else:
|
||||
return <Py_UCS4>(v+0x41-10) # ord('A') == 0x41
|
||||
|
||||
|
||||
cdef inline int _from_hex(Py_UCS4 v) noexcept:
|
||||
if '0' <= v <= '9':
|
||||
return <int>(v) - 0x30 # ord('0') == 0x30
|
||||
elif 'A' <= v <= 'F':
|
||||
return <int>(v) - 0x41 + 10 # ord('A') == 0x41
|
||||
elif 'a' <= v <= 'f':
|
||||
return <int>(v) - 0x61 + 10 # ord('a') == 0x61
|
||||
else:
|
||||
return -1
|
||||
|
||||
|
||||
cdef inline int _is_lower_hex(Py_UCS4 v) noexcept:
|
||||
return 'a' <= v <= 'f'
|
||||
|
||||
|
||||
cdef inline long _restore_ch(Py_UCS4 d1, Py_UCS4 d2):
|
||||
cdef int digit1 = _from_hex(d1)
|
||||
if digit1 < 0:
|
||||
return -1
|
||||
cdef int digit2 = _from_hex(d2)
|
||||
if digit2 < 0:
|
||||
return -1
|
||||
return digit1 << 4 | digit2
|
||||
|
||||
|
||||
cdef uint8_t ALLOWED_TABLE[16]
|
||||
cdef uint8_t ALLOWED_NOTQS_TABLE[16]
|
||||
|
||||
|
||||
cdef inline bint bit_at(uint8_t array[], uint64_t ch) noexcept:
|
||||
return array[ch >> 3] & (1 << (ch & 7))
|
||||
|
||||
|
||||
cdef inline void set_bit(uint8_t array[], uint64_t ch) noexcept:
|
||||
array[ch >> 3] |= (1 << (ch & 7))
|
||||
|
||||
|
||||
memset(ALLOWED_TABLE, 0, sizeof(ALLOWED_TABLE))
|
||||
memset(ALLOWED_NOTQS_TABLE, 0, sizeof(ALLOWED_NOTQS_TABLE))
|
||||
|
||||
for i in range(128):
|
||||
if chr(i) in ALLOWED:
|
||||
set_bit(ALLOWED_TABLE, i)
|
||||
set_bit(ALLOWED_NOTQS_TABLE, i)
|
||||
if chr(i) in QS:
|
||||
set_bit(ALLOWED_NOTQS_TABLE, i)
|
||||
|
||||
# ----------------- writer ---------------------------
|
||||
|
||||
cdef struct Writer:
|
||||
char *buf
|
||||
bint heap_allocated_buf
|
||||
Py_ssize_t size
|
||||
Py_ssize_t pos
|
||||
bint changed
|
||||
|
||||
|
||||
cdef inline void _init_writer(Writer* writer, char* buf):
|
||||
writer.buf = buf
|
||||
writer.heap_allocated_buf = False
|
||||
writer.size = BUF_SIZE
|
||||
writer.pos = 0
|
||||
writer.changed = 0
|
||||
|
||||
|
||||
cdef inline void _release_writer(Writer* writer):
|
||||
if writer.heap_allocated_buf:
|
||||
PyMem_Free(writer.buf)
|
||||
|
||||
|
||||
cdef inline int _write_char(Writer* writer, Py_UCS4 ch, bint changed):
|
||||
cdef char * buf
|
||||
cdef Py_ssize_t size
|
||||
|
||||
if writer.pos == writer.size:
|
||||
# reallocate
|
||||
size = writer.size + BUF_SIZE
|
||||
if not writer.heap_allocated_buf:
|
||||
buf = <char*>PyMem_Malloc(size)
|
||||
if buf == NULL:
|
||||
PyErr_NoMemory()
|
||||
return -1
|
||||
memcpy(buf, writer.buf, writer.size)
|
||||
writer.heap_allocated_buf = True
|
||||
else:
|
||||
buf = <char*>PyMem_Realloc(writer.buf, size)
|
||||
if buf == NULL:
|
||||
PyErr_NoMemory()
|
||||
return -1
|
||||
writer.buf = buf
|
||||
writer.size = size
|
||||
writer.buf[writer.pos] = <char>ch
|
||||
writer.pos += 1
|
||||
writer.changed |= changed
|
||||
return 0
|
||||
|
||||
|
||||
cdef inline int _write_pct(Writer* writer, uint8_t ch, bint changed):
|
||||
if _write_char(writer, '%', changed) < 0:
|
||||
return -1
|
||||
if _write_char(writer, _to_hex(<uint8_t>ch >> 4), changed) < 0:
|
||||
return -1
|
||||
return _write_char(writer, _to_hex(<uint8_t>ch & 0x0f), changed)
|
||||
|
||||
|
||||
cdef inline int _write_utf8(Writer* writer, Py_UCS4 symbol):
|
||||
cdef uint64_t utf = <uint64_t> symbol
|
||||
|
||||
if utf < 0x80:
|
||||
return _write_pct(writer, <uint8_t>utf, True)
|
||||
elif utf < 0x800:
|
||||
if _write_pct(writer, <uint8_t>(0xc0 | (utf >> 6)), True) < 0:
|
||||
return -1
|
||||
return _write_pct(writer, <uint8_t>(0x80 | (utf & 0x3f)), True)
|
||||
elif 0xD800 <= utf <= 0xDFFF:
|
||||
# surogate pair, ignored
|
||||
return 0
|
||||
elif utf < 0x10000:
|
||||
if _write_pct(writer, <uint8_t>(0xe0 | (utf >> 12)), True) < 0:
|
||||
return -1
|
||||
if _write_pct(writer, <uint8_t>(0x80 | ((utf >> 6) & 0x3f)),
|
||||
True) < 0:
|
||||
return -1
|
||||
return _write_pct(writer, <uint8_t>(0x80 | (utf & 0x3f)), True)
|
||||
elif utf > 0x10FFFF:
|
||||
# symbol is too large
|
||||
return 0
|
||||
else:
|
||||
if _write_pct(writer, <uint8_t>(0xf0 | (utf >> 18)), True) < 0:
|
||||
return -1
|
||||
if _write_pct(writer, <uint8_t>(0x80 | ((utf >> 12) & 0x3f)),
|
||||
True) < 0:
|
||||
return -1
|
||||
if _write_pct(writer, <uint8_t>(0x80 | ((utf >> 6) & 0x3f)),
|
||||
True) < 0:
|
||||
return -1
|
||||
return _write_pct(writer, <uint8_t>(0x80 | (utf & 0x3f)), True)
|
||||
|
||||
|
||||
# --------------------- end writer --------------------------
|
||||
|
||||
|
||||
cdef class _Quoter:
|
||||
cdef bint _qs
|
||||
cdef bint _requote
|
||||
|
||||
cdef uint8_t _safe_table[16]
|
||||
cdef uint8_t _protected_table[16]
|
||||
|
||||
def __init__(
|
||||
self, *, str safe='', str protected='', bint qs=False, bint requote=True,
|
||||
):
|
||||
cdef Py_UCS4 ch
|
||||
|
||||
self._qs = qs
|
||||
self._requote = requote
|
||||
|
||||
if not self._qs:
|
||||
memcpy(self._safe_table,
|
||||
ALLOWED_NOTQS_TABLE,
|
||||
sizeof(self._safe_table))
|
||||
else:
|
||||
memcpy(self._safe_table,
|
||||
ALLOWED_TABLE,
|
||||
sizeof(self._safe_table))
|
||||
for ch in safe:
|
||||
if ord(ch) > 127:
|
||||
raise ValueError("Only safe symbols with ORD < 128 are allowed")
|
||||
set_bit(self._safe_table, ch)
|
||||
|
||||
memset(self._protected_table, 0, sizeof(self._protected_table))
|
||||
for ch in protected:
|
||||
if ord(ch) > 127:
|
||||
raise ValueError("Only safe symbols with ORD < 128 are allowed")
|
||||
set_bit(self._safe_table, ch)
|
||||
set_bit(self._protected_table, ch)
|
||||
|
||||
def __call__(self, val):
|
||||
if val is None:
|
||||
return None
|
||||
if type(val) is not str:
|
||||
if isinstance(val, str):
|
||||
# derived from str
|
||||
val = str(val)
|
||||
else:
|
||||
raise TypeError("Argument should be str")
|
||||
return self._do_quote_or_skip(<str>val)
|
||||
|
||||
cdef str _do_quote_or_skip(self, str val):
|
||||
cdef char[BUF_SIZE] buffer
|
||||
cdef Py_UCS4 ch
|
||||
cdef Py_ssize_t length = PyUnicode_GET_LENGTH(val)
|
||||
cdef Py_ssize_t idx = length
|
||||
cdef bint must_quote = 0
|
||||
cdef Writer writer
|
||||
cdef int kind = PyUnicode_KIND(val)
|
||||
cdef const void *data = PyUnicode_DATA(val)
|
||||
|
||||
# If everything in the string is in the safe
|
||||
# table and all ASCII, we can skip quoting
|
||||
while idx:
|
||||
idx -= 1
|
||||
ch = PyUnicode_READ(kind, data, idx)
|
||||
if ch >= 128 or not bit_at(self._safe_table, ch):
|
||||
must_quote = 1
|
||||
break
|
||||
|
||||
if not must_quote:
|
||||
return val
|
||||
|
||||
_init_writer(&writer, &buffer[0])
|
||||
try:
|
||||
return self._do_quote(<str>val, length, kind, data, &writer)
|
||||
finally:
|
||||
_release_writer(&writer)
|
||||
|
||||
cdef str _do_quote(
|
||||
self,
|
||||
str val,
|
||||
Py_ssize_t length,
|
||||
int kind,
|
||||
const void *data,
|
||||
Writer *writer
|
||||
):
|
||||
cdef Py_UCS4 ch
|
||||
cdef long chl
|
||||
cdef int changed
|
||||
cdef Py_ssize_t idx = 0
|
||||
|
||||
while idx < length:
|
||||
ch = PyUnicode_READ(kind, data, idx)
|
||||
idx += 1
|
||||
if ch == '%' and self._requote and idx <= length - 2:
|
||||
chl = _restore_ch(
|
||||
PyUnicode_READ(kind, data, idx),
|
||||
PyUnicode_READ(kind, data, idx + 1)
|
||||
)
|
||||
if chl != -1:
|
||||
ch = <Py_UCS4>chl
|
||||
idx += 2
|
||||
if ch < 128:
|
||||
if bit_at(self._protected_table, ch):
|
||||
if _write_pct(writer, ch, True) < 0:
|
||||
raise
|
||||
continue
|
||||
|
||||
if bit_at(self._safe_table, ch):
|
||||
if _write_char(writer, ch, True) < 0:
|
||||
raise
|
||||
continue
|
||||
|
||||
changed = (_is_lower_hex(PyUnicode_READ(kind, data, idx - 2)) or
|
||||
_is_lower_hex(PyUnicode_READ(kind, data, idx - 1)))
|
||||
if _write_pct(writer, ch, changed) < 0:
|
||||
raise
|
||||
continue
|
||||
else:
|
||||
ch = '%'
|
||||
|
||||
if self._write(writer, ch) < 0:
|
||||
raise
|
||||
|
||||
if not writer.changed:
|
||||
return val
|
||||
else:
|
||||
return PyUnicode_DecodeASCII(writer.buf, writer.pos, "strict")
|
||||
|
||||
cdef inline int _write(self, Writer *writer, Py_UCS4 ch):
|
||||
if self._qs:
|
||||
if ch == ' ':
|
||||
return _write_char(writer, '+', True)
|
||||
|
||||
if ch < 128 and bit_at(self._safe_table, ch):
|
||||
return _write_char(writer, ch, False)
|
||||
|
||||
return _write_utf8(writer, ch)
|
||||
|
||||
|
||||
cdef class _Unquoter:
|
||||
cdef str _ignore
|
||||
cdef bint _has_ignore
|
||||
cdef str _unsafe
|
||||
cdef bytes _unsafe_bytes
|
||||
cdef Py_ssize_t _unsafe_bytes_len
|
||||
cdef const unsigned char * _unsafe_bytes_char
|
||||
cdef bint _qs
|
||||
cdef bint _plus # to match urllib.parse.unquote_plus
|
||||
cdef _Quoter _quoter
|
||||
cdef _Quoter _qs_quoter
|
||||
|
||||
def __init__(self, *, ignore="", unsafe="", qs=False, plus=False):
|
||||
self._ignore = ignore
|
||||
self._has_ignore = bool(self._ignore)
|
||||
self._unsafe = unsafe
|
||||
# unsafe may only be extended ascii characters (0-255)
|
||||
self._unsafe_bytes = self._unsafe.encode('ascii')
|
||||
self._unsafe_bytes_len = len(self._unsafe_bytes)
|
||||
self._unsafe_bytes_char = self._unsafe_bytes
|
||||
self._qs = qs
|
||||
self._plus = plus
|
||||
self._quoter = _Quoter()
|
||||
self._qs_quoter = _Quoter(qs=True)
|
||||
|
||||
def __call__(self, val):
|
||||
if val is None:
|
||||
return None
|
||||
if type(val) is not str:
|
||||
if isinstance(val, str):
|
||||
# derived from str
|
||||
val = str(val)
|
||||
else:
|
||||
raise TypeError("Argument should be str")
|
||||
return self._do_unquote(<str>val)
|
||||
|
||||
cdef str _do_unquote(self, str val):
|
||||
cdef Py_ssize_t length = PyUnicode_GET_LENGTH(val)
|
||||
if length == 0:
|
||||
return val
|
||||
|
||||
cdef list ret = []
|
||||
cdef char buffer[4]
|
||||
cdef Py_ssize_t buflen = 0
|
||||
cdef Py_ssize_t consumed
|
||||
cdef str unquoted
|
||||
cdef Py_UCS4 ch = 0
|
||||
cdef long chl = 0
|
||||
cdef Py_ssize_t idx = 0
|
||||
cdef Py_ssize_t start_pct
|
||||
cdef int kind = PyUnicode_KIND(val)
|
||||
cdef const void *data = PyUnicode_DATA(val)
|
||||
cdef bint changed = 0
|
||||
while idx < length:
|
||||
ch = PyUnicode_READ(kind, data, idx)
|
||||
idx += 1
|
||||
if ch == '%' and idx <= length - 2:
|
||||
changed = 1
|
||||
chl = _restore_ch(
|
||||
PyUnicode_READ(kind, data, idx),
|
||||
PyUnicode_READ(kind, data, idx + 1)
|
||||
)
|
||||
if chl != -1:
|
||||
ch = <Py_UCS4>chl
|
||||
idx += 2
|
||||
assert buflen < 4
|
||||
buffer[buflen] = ch
|
||||
buflen += 1
|
||||
try:
|
||||
unquoted = PyUnicode_DecodeUTF8Stateful(buffer, buflen,
|
||||
NULL, &consumed)
|
||||
except UnicodeDecodeError:
|
||||
start_pct = idx - buflen * 3
|
||||
buffer[0] = ch
|
||||
buflen = 1
|
||||
ret.append(val[start_pct : idx - 3])
|
||||
try:
|
||||
unquoted = PyUnicode_DecodeUTF8Stateful(buffer, buflen,
|
||||
NULL, &consumed)
|
||||
except UnicodeDecodeError:
|
||||
buflen = 0
|
||||
ret.append(val[idx - 3 : idx])
|
||||
continue
|
||||
if not unquoted:
|
||||
assert consumed == 0
|
||||
continue
|
||||
assert consumed == buflen
|
||||
buflen = 0
|
||||
if self._qs and unquoted in '+=&;':
|
||||
ret.append(self._qs_quoter(unquoted))
|
||||
elif (
|
||||
(self._unsafe_bytes_len and unquoted in self._unsafe) or
|
||||
(self._has_ignore and unquoted in self._ignore)
|
||||
):
|
||||
ret.append(self._quoter(unquoted))
|
||||
else:
|
||||
ret.append(unquoted)
|
||||
continue
|
||||
else:
|
||||
ch = '%'
|
||||
|
||||
if buflen:
|
||||
start_pct = idx - 1 - buflen * 3
|
||||
ret.append(val[start_pct : idx - 1])
|
||||
buflen = 0
|
||||
|
||||
if ch == '+':
|
||||
if (
|
||||
(not self._qs and not self._plus) or
|
||||
(self._unsafe_bytes_len and self._is_char_unsafe(ch))
|
||||
):
|
||||
ret.append('+')
|
||||
else:
|
||||
changed = 1
|
||||
ret.append(' ')
|
||||
continue
|
||||
|
||||
if self._unsafe_bytes_len and self._is_char_unsafe(ch):
|
||||
changed = 1
|
||||
ret.append('%')
|
||||
h = hex(ord(ch)).upper()[2:]
|
||||
for ch in h:
|
||||
ret.append(ch)
|
||||
continue
|
||||
|
||||
ret.append(ch)
|
||||
|
||||
if not changed:
|
||||
return val
|
||||
|
||||
if buflen:
|
||||
ret.append(val[length - buflen * 3 : length])
|
||||
|
||||
return ''.join(ret)
|
||||
|
||||
cdef inline bint _is_char_unsafe(self, Py_UCS4 ch):
|
||||
for i in range(self._unsafe_bytes_len):
|
||||
if ch == self._unsafe_bytes_char[i]:
|
||||
return True
|
||||
return False
|
||||
213
venv/lib/python3.12/site-packages/yarl/_quoting_py.py
Normal file
213
venv/lib/python3.12/site-packages/yarl/_quoting_py.py
Normal file
@@ -0,0 +1,213 @@
|
||||
import codecs
|
||||
import re
|
||||
from string import ascii_letters, ascii_lowercase, digits
|
||||
from typing import Union, overload
|
||||
|
||||
BASCII_LOWERCASE = ascii_lowercase.encode("ascii")
|
||||
BPCT_ALLOWED = {f"%{i:02X}".encode("ascii") for i in range(256)}
|
||||
GEN_DELIMS = ":/?#[]@"
|
||||
SUB_DELIMS_WITHOUT_QS = "!$'()*,"
|
||||
SUB_DELIMS = SUB_DELIMS_WITHOUT_QS + "+&=;"
|
||||
RESERVED = GEN_DELIMS + SUB_DELIMS
|
||||
UNRESERVED = ascii_letters + digits + "-._~"
|
||||
ALLOWED = UNRESERVED + SUB_DELIMS_WITHOUT_QS
|
||||
|
||||
|
||||
_IS_HEX = re.compile(b"[A-Z0-9][A-Z0-9]")
|
||||
_IS_HEX_STR = re.compile("[A-Fa-f0-9][A-Fa-f0-9]")
|
||||
|
||||
utf8_decoder = codecs.getincrementaldecoder("utf-8")
|
||||
|
||||
|
||||
class _Quoter:
|
||||
def __init__(
|
||||
self,
|
||||
*,
|
||||
safe: str = "",
|
||||
protected: str = "",
|
||||
qs: bool = False,
|
||||
requote: bool = True,
|
||||
) -> None:
|
||||
self._safe = safe
|
||||
self._protected = protected
|
||||
self._qs = qs
|
||||
self._requote = requote
|
||||
|
||||
@overload
|
||||
def __call__(self, val: str) -> str: ...
|
||||
@overload
|
||||
def __call__(self, val: None) -> None: ...
|
||||
def __call__(self, val: Union[str, None]) -> Union[str, None]:
|
||||
if val is None:
|
||||
return None
|
||||
if not isinstance(val, str):
|
||||
raise TypeError("Argument should be str")
|
||||
if not val:
|
||||
return ""
|
||||
bval = val.encode("utf8", errors="ignore")
|
||||
ret = bytearray()
|
||||
pct = bytearray()
|
||||
safe = self._safe
|
||||
safe += ALLOWED
|
||||
if not self._qs:
|
||||
safe += "+&=;"
|
||||
safe += self._protected
|
||||
bsafe = safe.encode("ascii")
|
||||
idx = 0
|
||||
while idx < len(bval):
|
||||
ch = bval[idx]
|
||||
idx += 1
|
||||
|
||||
if pct:
|
||||
if ch in BASCII_LOWERCASE:
|
||||
ch = ch - 32 # convert to uppercase
|
||||
pct.append(ch)
|
||||
if len(pct) == 3: # pragma: no branch # peephole optimizer
|
||||
buf = pct[1:]
|
||||
if not _IS_HEX.match(buf):
|
||||
ret.extend(b"%25")
|
||||
pct.clear()
|
||||
idx -= 2
|
||||
continue
|
||||
try:
|
||||
unquoted = chr(int(pct[1:].decode("ascii"), base=16))
|
||||
except ValueError:
|
||||
ret.extend(b"%25")
|
||||
pct.clear()
|
||||
idx -= 2
|
||||
continue
|
||||
|
||||
if unquoted in self._protected:
|
||||
ret.extend(pct)
|
||||
elif unquoted in safe:
|
||||
ret.append(ord(unquoted))
|
||||
else:
|
||||
ret.extend(pct)
|
||||
pct.clear()
|
||||
|
||||
# special case, if we have only one char after "%"
|
||||
elif len(pct) == 2 and idx == len(bval):
|
||||
ret.extend(b"%25")
|
||||
pct.clear()
|
||||
idx -= 1
|
||||
|
||||
continue
|
||||
|
||||
elif ch == ord("%") and self._requote:
|
||||
pct.clear()
|
||||
pct.append(ch)
|
||||
|
||||
# special case if "%" is last char
|
||||
if idx == len(bval):
|
||||
ret.extend(b"%25")
|
||||
|
||||
continue
|
||||
|
||||
if self._qs and ch == ord(" "):
|
||||
ret.append(ord("+"))
|
||||
continue
|
||||
if ch in bsafe:
|
||||
ret.append(ch)
|
||||
continue
|
||||
|
||||
ret.extend((f"%{ch:02X}").encode("ascii"))
|
||||
|
||||
ret2 = ret.decode("ascii")
|
||||
if ret2 == val:
|
||||
return val
|
||||
return ret2
|
||||
|
||||
|
||||
class _Unquoter:
|
||||
def __init__(
|
||||
self,
|
||||
*,
|
||||
ignore: str = "",
|
||||
unsafe: str = "",
|
||||
qs: bool = False,
|
||||
plus: bool = False,
|
||||
) -> None:
|
||||
self._ignore = ignore
|
||||
self._unsafe = unsafe
|
||||
self._qs = qs
|
||||
self._plus = plus # to match urllib.parse.unquote_plus
|
||||
self._quoter = _Quoter()
|
||||
self._qs_quoter = _Quoter(qs=True)
|
||||
|
||||
@overload
|
||||
def __call__(self, val: str) -> str: ...
|
||||
@overload
|
||||
def __call__(self, val: None) -> None: ...
|
||||
def __call__(self, val: Union[str, None]) -> Union[str, None]:
|
||||
if val is None:
|
||||
return None
|
||||
if not isinstance(val, str):
|
||||
raise TypeError("Argument should be str")
|
||||
if not val:
|
||||
return ""
|
||||
decoder = utf8_decoder()
|
||||
ret = []
|
||||
idx = 0
|
||||
while idx < len(val):
|
||||
ch = val[idx]
|
||||
idx += 1
|
||||
if ch == "%" and idx <= len(val) - 2:
|
||||
pct = val[idx : idx + 2]
|
||||
if _IS_HEX_STR.fullmatch(pct):
|
||||
b = bytes([int(pct, base=16)])
|
||||
idx += 2
|
||||
try:
|
||||
unquoted = decoder.decode(b)
|
||||
except UnicodeDecodeError:
|
||||
start_pct = idx - 3 - len(decoder.buffer) * 3
|
||||
ret.append(val[start_pct : idx - 3])
|
||||
decoder.reset()
|
||||
try:
|
||||
unquoted = decoder.decode(b)
|
||||
except UnicodeDecodeError:
|
||||
ret.append(val[idx - 3 : idx])
|
||||
continue
|
||||
if not unquoted:
|
||||
continue
|
||||
if self._qs and unquoted in "+=&;":
|
||||
to_add = self._qs_quoter(unquoted)
|
||||
if to_add is None: # pragma: no cover
|
||||
raise RuntimeError("Cannot quote None")
|
||||
ret.append(to_add)
|
||||
elif unquoted in self._unsafe or unquoted in self._ignore:
|
||||
to_add = self._quoter(unquoted)
|
||||
if to_add is None: # pragma: no cover
|
||||
raise RuntimeError("Cannot quote None")
|
||||
ret.append(to_add)
|
||||
else:
|
||||
ret.append(unquoted)
|
||||
continue
|
||||
|
||||
if decoder.buffer:
|
||||
start_pct = idx - 1 - len(decoder.buffer) * 3
|
||||
ret.append(val[start_pct : idx - 1])
|
||||
decoder.reset()
|
||||
|
||||
if ch == "+":
|
||||
if (not self._qs and not self._plus) or ch in self._unsafe:
|
||||
ret.append("+")
|
||||
else:
|
||||
ret.append(" ")
|
||||
continue
|
||||
|
||||
if ch in self._unsafe:
|
||||
ret.append("%")
|
||||
h = hex(ord(ch)).upper()[2:]
|
||||
for ch in h:
|
||||
ret.append(ch)
|
||||
continue
|
||||
|
||||
ret.append(ch)
|
||||
|
||||
if decoder.buffer:
|
||||
ret.append(val[-len(decoder.buffer) * 3 :])
|
||||
|
||||
ret2 = "".join(ret)
|
||||
if ret2 == val:
|
||||
return val
|
||||
return ret2
|
||||
1622
venv/lib/python3.12/site-packages/yarl/_url.py
Normal file
1622
venv/lib/python3.12/site-packages/yarl/_url.py
Normal file
File diff suppressed because it is too large
Load Diff
1
venv/lib/python3.12/site-packages/yarl/py.typed
Normal file
1
venv/lib/python3.12/site-packages/yarl/py.typed
Normal file
@@ -0,0 +1 @@
|
||||
# Placeholder
|
||||
Reference in New Issue
Block a user