A RetroSearch Logo

Home - News ( United States | United Kingdom | Italy | Germany ) - Football scores

Search Query:

Showing content from https://github.com/RDFLib/rdflib/commit/dfa4054 below:

IRI to URI conversion (#2304) · RDFLib/rdflib@dfa4054 · GitHub

File tree Expand file treeCollapse file tree 5 files changed

+169

-19

lines changed

Filter options

Expand file treeCollapse file tree 5 files changed

+169

-19

lines changed Original file line number Diff line number Diff line change

@@ -522,32 +522,92 @@ def _coalesce(

522 522

return default

523 523 524 524 525 +

_RFC3986_SUBDELIMS = "!$&'()*+,;="

526 +

"""

527 +

``sub-delims`` production from `RFC 3986, section 2.2

528 +

<https://www.rfc-editor.org/rfc/rfc3986.html#section-2.2>`_.

529 +

"""

530 + 531 +

_RFC3986_PCHAR_NU = "%" + _RFC3986_SUBDELIMS + ":@"

532 +

"""

533 +

The non-unreserved characters in the ``pchar`` production from RFC 3986.

534 +

"""

535 + 536 +

_QUERY_SAFE_CHARS = _RFC3986_PCHAR_NU + "/?"

537 +

"""

538 +

The non-unreserved characters that are safe to use in in the query and fragment

539 +

components.

540 + 541 +

.. code-block::

542 + 543 +

pchar = unreserved / pct-encoded / sub-delims / ":" / "@" query

544 +

= *( pchar / "/" / "?" ) fragment = *( pchar / "/" / "?" )

545 +

"""

546 + 547 +

_USERNAME_SAFE_CHARS = _RFC3986_SUBDELIMS + "%"

548 +

"""

549 +

The non-unreserved characters that are safe to use in the username and password

550 +

components.

551 + 552 +

.. code-block::

553 + 554 +

userinfo = *( unreserved / pct-encoded / sub-delims / ":" )

555 + 556 +

":" is excluded as this is only used for the username and password components,

557 +

and they are treated separately.

558 +

"""

559 + 560 +

_PATH_SAFE_CHARS = _RFC3986_PCHAR_NU + "/"

561 +

"""

562 +

The non-unreserved characters that are safe to use in the path component.

563 + 564 + 565 +

This is based on various path-related productions from RFC 3986.

566 +

"""

567 + 568 + 525 569

def _iri2uri(iri: str) -> str:

526 570

"""

527 -

Convert an IRI to a URI (Python 3).

528 -

https://stackoverflow.com/a/42309027

529 -

https://stackoverflow.com/a/40654295

530 -

netloc should be encoded using IDNA;

531 -

non-ascii URL path should be encoded to UTF-8 and then percent-escaped;

532 -

non-ascii query parameters should be encoded to the encoding of a page

533 -

URL was extracted from (or to the encoding server uses), then

534 -

percent-escaped.

571 +

Prior art:

572 + 573 +

* `iri_to_uri from Werkzeug <https://github.com/pallets/werkzeug/blob/92c6380248c7272ee668e1f8bbd80447027ccce2/src/werkzeug/urls.py#L926-L931>`_

574 + 535 575

>>> _iri2uri("https://dbpedia.org/resource/Almería")

536 576

'https://dbpedia.org/resource/Almer%C3%ADa'

537 577

"""

578 +

# https://datatracker.ietf.org/doc/html/rfc3986

538 579

# https://datatracker.ietf.org/doc/html/rfc3305

539 580 540 -

(scheme, netloc, path, query, fragment) = urlsplit(iri)

581 +

parts = urlsplit(iri)

582 +

(scheme, netloc, path, query, fragment) = parts

541 583 542 -

# Just support http/https, otherwise return the iri unmolested

584 +

# Just support http/https, otherwise return the iri unaltered

543 585

if scheme not in ["http", "https"]:

544 586

return iri

545 587 546 -

scheme = quote(scheme)

547 -

netloc = netloc.encode("idna").decode("utf-8")

548 -

path = quote(path)

549 -

query = quote(query)

550 -

fragment = quote(fragment)

588 +

path = quote(path, safe=_PATH_SAFE_CHARS)

589 +

query = quote(query, safe=_QUERY_SAFE_CHARS)

590 +

fragment = quote(fragment, safe=_QUERY_SAFE_CHARS)

591 + 592 +

if parts.hostname:

593 +

netloc = parts.hostname.encode("idna").decode("ascii")

594 +

else:

595 +

netloc = ""

596 + 597 +

if ":" in netloc:

598 +

# Quote IPv6 addresses

599 +

netloc = f"[{netloc}]"

600 + 601 +

if parts.port:

602 +

netloc = f"{netloc}:{parts.port}"

603 + 604 +

if parts.username:

605 +

auth = quote(parts.username, safe=_USERNAME_SAFE_CHARS)

606 +

if parts.password:

607 +

pass_quoted = quote(parts.password, safe=_USERNAME_SAFE_CHARS)

608 +

auth = f"{auth}:{pass_quoted}"

609 +

netloc = f"{auth}@{netloc}"

610 + 551 611

uri = urlunsplit((scheme, netloc, path, query, fragment))

552 612 553 613

if iri.endswith("#") and not uri.endswith("#"):

Original file line number Diff line number Diff line change

@@ -1,14 +1,20 @@

1 +

import logging

1 2

import re

2 3

from http.server import BaseHTTPRequestHandler

3 4

from test.data import TEST_DATA_DIR

4 5

from test.utils import GraphHelper

5 6

from test.utils.graph import cached_graph

6 -

from test.utils.http import ctx_http_handler

7 +

from test.utils.http import (

8 +

MOCK_HTTP_REQUEST_WILDCARD,

9 +

MockHTTPRequest,

10 +

ctx_http_handler,

11 +

)

7 12

from test.utils.httpservermock import (

8 13

MethodName,

9 14

MockHTTPResponse,

10 15

ServedBaseHTTPServerMock,

11 16

)

17 +

from test.utils.wildcard import URL_PARSE_RESULT_WILDCARD

12 18

from urllib.error import HTTPError

13 19 14 20

import pytest

@@ -235,7 +241,34 @@ def test_5xx(self):

235 241

assert raised.value.code == 500

236 242 237 243 238 -

def test_iri_source(function_httpmock: ServedBaseHTTPServerMock) -> None:

244 +

@pytest.mark.parametrize(

245 +

["url_suffix", "expected_request"],

246 +

[

247 +

(

248 +

"/resource/Almería",

249 +

MOCK_HTTP_REQUEST_WILDCARD._replace(

250 +

path="/resource/Almer%C3%ADa",

251 +

parsed_path=URL_PARSE_RESULT_WILDCARD._replace(

252 +

path="/resource/Almer%C3%ADa"

253 +

),

254 +

),

255 +

),

256 +

(

257 +

"/resource/Almería?foo=bar",

258 +

MOCK_HTTP_REQUEST_WILDCARD._replace(

259 +

parsed_path=URL_PARSE_RESULT_WILDCARD._replace(

260 +

path="/resource/Almer%C3%ADa"

261 +

),

262 +

path_query={"foo": ["bar"]},

263 +

),

264 +

),

265 +

],

266 +

)

267 +

def test_iri_source(

268 +

url_suffix: str,

269 +

expected_request: MockHTTPRequest,

270 +

function_httpmock: ServedBaseHTTPServerMock,

271 +

) -> None:

239 272

diverse_triples_path = TEST_DATA_DIR / "variants/diverse_triples.ttl"

240 273 241 274

function_httpmock.responses[MethodName.GET].append(

@@ -247,9 +280,11 @@ def test_iri_source(function_httpmock: ServedBaseHTTPServerMock) -> None:

247 280

)

248 281

)

249 282

g = Graph()

250 -

g.parse(f"{function_httpmock.url}/resource/Almería")

283 +

g.parse(f"{function_httpmock.url}{url_suffix}")

251 284

assert function_httpmock.call_count == 1

252 285

GraphHelper.assert_triple_sets_equals(cached_graph((diverse_triples_path,)), g)

286 +

assert len(g) > 1

253 287 254 288

req = function_httpmock.requests[MethodName.GET].pop(0)

255 -

assert req.path == "/resource/Almer%C3%ADa"

289 +

logging.debug("req = %s", req)

290 +

assert expected_request == req

Original file line number Diff line number Diff line change

@@ -635,6 +635,24 @@ def test_get_tree(

635 635

"http://example.com:1231/",

636 636

},

637 637

),

638 +

(

639 +

"http://example.com:1231/a=b",

640 +

{

641 +

"http://example.com:1231/a=b",

642 +

},

643 +

),

644 +

(

645 +

"http://aé:aé@example.com:1231/bé/a=bé&c=d#a=bé&c=d",

646 +

{

647 +

"http://a%C3%A9:a%C3%A9@example.com:1231/b%C3%A9/a=b%C3%A9&c=d#a=b%C3%A9&c=d",

648 +

},

649 +

),

650 +

(

651 +

"http://a%C3%A9:a%C3%A9@example.com:1231/b%C3%A9/a=b%C3%A9&c=d#a=b%C3%A9&c=d",

652 +

{

653 +

"http://a%C3%A9:a%C3%A9@example.com:1231/b%C3%A9/a=b%C3%A9&c=d#a=b%C3%A9&c=d",

654 +

},

655 +

),

638 656

],

639 657

)

640 658

def test_iri2uri(iri: str, expected_result: Union[Set[str], Type[Exception]]) -> None:

Original file line number Diff line number Diff line change

@@ -4,6 +4,7 @@

4 4

import random

5 5

from contextlib import contextmanager

6 6

from http.server import BaseHTTPRequestHandler, HTTPServer

7 +

from test.utils.wildcard import EQ_WILDCARD

7 8

from threading import Thread

8 9

from typing import (

9 10

Dict,

@@ -62,6 +63,14 @@ class MockHTTPRequest(NamedTuple):

62 63

body: Optional[bytes]

63 64 64 65 66 +

MOCK_HTTP_REQUEST_WILDCARD = MockHTTPRequest(

67 +

EQ_WILDCARD, EQ_WILDCARD, EQ_WILDCARD, EQ_WILDCARD, EQ_WILDCARD, EQ_WILDCARD

68 +

)

69 +

"""

70 +

This object should be equal to any `MockHTTPRequest` object.

71 +

"""

72 + 73 + 65 74

class MockHTTPResponse(NamedTuple):

66 75

status_code: int

67 76

reason_phrase: str

Original file line number Diff line number Diff line change

@@ -0,0 +1,28 @@

1 +

from typing import Any

2 +

from urllib.parse import ParseResult

3 + 4 + 5 +

class EqWildcard:

6 +

"""

7 +

An object that matches anything.

8 +

"""

9 + 10 +

def __eq__(self, other: Any) -> Any:

11 +

return True

12 + 13 +

def __req__(self, other: Any) -> Any:

14 +

return True

15 + 16 +

def __repr__(self) -> str:

17 +

return "EqWildcard()"

18 + 19 + 20 +

EQ_WILDCARD: Any = EqWildcard()

21 + 22 + 23 +

URL_PARSE_RESULT_WILDCARD = ParseResult(

24 +

EQ_WILDCARD, EQ_WILDCARD, EQ_WILDCARD, EQ_WILDCARD, EQ_WILDCARD, EQ_WILDCARD

25 +

)

26 +

"""

27 +

This should be equal to any `ParseResult` object.

28 +

"""

You can’t perform that action at this time.


RetroSearch is an open source project built by @garambo | Open a GitHub Issue

Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo

HTML: 3.2 | Encoding: UTF-8 | Version: 0.7.4