From fda37f413f41ae31479a29277030e450bf9032aa Mon Sep 17 00:00:00 2001 From: Mark Joshwel Date: Tue, 5 Sep 2023 17:34:28 +0000 Subject: [PATCH] s+,docs,tests: many - s+: local code conversion - s+: fingerprinted user agents + override arg - s+: rate limited default_ geocoding functions - s+: gecoding function protocols - docs: update api - tests: fix np and macritchie expecteds --- README.md | 408 ++++++++++++++++++++++++++++++++---------- playground.ipynb | 330 ++++++++++++++++++++++++++++------ poetry.lock | 6 +- surplus/__init__.py | 13 +- surplus/surplus.py | 425 +++++++++++++++++++++++++++++++++++--------- test.py | 30 ++-- 6 files changed, 969 insertions(+), 243 deletions(-) diff --git a/README.md b/README.md index 0901d28..ccc1a4f 100644 --- a/README.md +++ b/README.md @@ -22,6 +22,7 @@ to iOS Shortcuts-like shareable text. - [what counts as "incorrect"](#what-counts-as-incorrect) - [output technical details](#the-technical-details-of-surpluss-output) - [api reference](#api-reference) + - [details on the fingerprinted user agent](#details-on-the-fingerprinted-user-agent) - [licence](#licence) ```text @@ -62,27 +63,29 @@ see [licence](#licence) for licensing information. ### command-line usage ```text -usage: surplus [-h] [-d] [-v] [-c {pluscode,localcode,latlong,string}] +usage: surplus [-h] [-d] [-v] [-c {pluscode,localcode,latlong,sharetext}] + [-u USER_AGENT] [query ...] Google Maps Plus Code to iOS Shortcuts-like shareable text positional arguments: - query full-length Plus Code (6PH58QMF+FX), - shortened Plus Code/'local code' (8QMF+FX Singapore), - latlong (1.3336875, 103.7749375), - string query (e.g., 'Wisma Atria'), - or '-' to read from stdin + query full-length Plus Code (6PH58QMF+FX), shortened + Plus Code/'local code' (8QMF+FX Singapore), + latlong (1.3336875, 103.7749375), string query + (e.g., 'Wisma Atria'), or '-' to read from stdin options: -h, --help show this help message and exit -d, --debug prints lat, long and reverser response dict to stderr -v, --version prints version information to stderr and exits - -c {pluscode,localcode,latlong,sharetext}, - --convert-to {pluscode,localcode,latlong,sharetext} + -c {pluscode,localcode,latlong,sharetext}, --convert-to {pluscode,localcode,latlong,sharetext} converts query a specific output type, defaults to 'sharetext' + -u USER_AGENT, --user-agent USER_AGENT + user agent string to use for geocoding service, + defaults to fingerprinted user agent string ``` ### example api usage @@ -189,7 +192,7 @@ and do the following: function, which by default is OpenStreetMap Nominatim. (_don't know what the above means? then you are using the default reverser._) - also look at the [what counts as "incorrect"](#what-counts-as-incorrect) section + also look at the ['what counts as "incorrect"'](#what-counts-as-incorrect) section before moving on. 2. include the erroneous query. @@ -263,12 +266,13 @@ of incorrect outputs. ```text $ s+ --debug 8QJF+RP Singapore -surplus version 2.1.0, debug mode +surplus version 2.1.0, debug mode (latest@future, Tue 05 Sep 2023 23:38:59 +0800) debug: parse_query: behaviour.query=['8QJF+RP', 'Singapore'] debug: _match_plus_code: portion_plus_code='8QJF+RP', portion_locality='Singapore' debug: cli: query=Result(value=LocalCodeQuery(code='8QJF+RP', locality='Singapore'), error=None) -debug: cli: latlong.get()=Latlong(latitude=1.3320625, longitude=103.7743125) -debug: cli: location={'amenity': 'Ngee Ann Polytechnic', 'house_number': '535', 'road': 'Clementi Road', 'suburb': 'Bukit Timah', 'city': 'Singapore', 'county': 'Northwest', 'ISO3166-2-lvl6': 'SG-03', 'postcode': '599489', 'country': 'Singapore', 'country_code': 'sg', 'raw': "{...}", 'latitude': '1.33318835', 'longitude': '103.77461234638255'} +debug: cli: latlong_result.get()=Latlong(latitude=1.3320625, longitude=103.7743125) +debug: cli: behaviour.user_agent='surplus/2.1.0-local (1fdbfa0b0cfb)' +debug: cli: location={...} debug: _generate_text: seen_names=['Ngee Ann Polytechnic', 'Clementi Road'] debug: _generate_text_line: [True] -> True -------- 'Ngee Ann Polytechnic' debug: _generate_text_line: [True] -> True -------- '535' @@ -329,7 +333,7 @@ variables this variable is displayed to show what query type [`parse_query()`](#def-parse_query) has recognised, and if there were any errors during query parsing -- **expression `latlong.get()=`** +- **expression `latlong_result.get()=`** (_only shown if the query is a plus code_) @@ -340,12 +344,12 @@ variables the response dictionary from the reverser function passed to [`surplus()`](#def-surplus) - for more information on the reverser function, see [`Behaviour`](#class-behaviour) and - [`default_reverser`](#def-default_reverser) + for more information on the reverser function, see + [`SurplusReverserProtocol`](#surplusreverserprotocol) - **variable `seen_names`** - a list of unique important names found in certain nominatim keys used in final output + a list of unique important names found in certain Nominatim keys used in final output lines 0-3 - **`_generate_text_line` seen name checks** @@ -524,7 +528,13 @@ line breakdown of shareable text output, accompanied by their Nominatim keys: - [types](#types) - [`Query`](#query) - [`ResultType`](#resulttype) + - [`SurplusGeocoderProtocol`](#surplusgeocoderprotocol) + - [`SurplusReverserProtocol`](#surplusreverserprotocol) - [`class Behaviour`](#class-behaviour) +- [`class SurplusDefaultGeocoding`](#class-surplusdefaultgeocoding) + - [`SurplusDefaultGeocoding.update_geocoding_functions()`](#surplusdefaultgeocodingupdate_geocoding_functions) + - [`SurplusDefaultGeocoding.geocoder()`](#surplusdefaultgeocodinggeocoder) + - [`SurplusDefaultGeocoding.reverser()`](#surplusdefaultgeocodingreverser) - [`class ConversionResultTypeEnum`](#class-conversionresulttypeenum) - [`class Result`](#class-result) - [`Result.__bool__()`](#result__bool__) @@ -547,8 +557,8 @@ line breakdown of shareable text output, accompanied by their Nominatim keys: - [`StringQuery.__str__()`](#stringquery__str__) - [`def surplus()`](#def-surplus) - [`def parse_query()`](#def-parse_query) -- [`def default_geocoder()`](#def-default_geocoder) -- [`def default_reverser()`](#def-default_reverser) +- [`def generate_fingerprinted_user_agent`](#def-generate_fingerprinted_user_agent) + - [details on the fingerprinted user agent](#details-on-the-fingerprinted-user-agent) ### constants @@ -557,30 +567,40 @@ line breakdown of shareable text output, accompanied by their Nominatim keys: a tuple of integers representing the version of surplus, in the format `[major, minor, patch]` -- `VERSION_SUFFIX: Final[str]` - `BUILD_BRANCH: Final[str]` - `BUILD_COMMIT: Final[str]` - `BUILD_DATETIME: Final[datetime]` +- `VERSION_SUFFIX: typing.Final[str]` + `BUILD_BRANCH: typing.Final[str]` + `BUILD_COMMIT: typing.Final[str]` + `BUILD_DATETIME: typing.Final[datetime]` string and a [datetime.datetime](https://docs.python.org/3/library/datetime.html) object containing version and build information, set by [releaser.py](releaser.py) -- `SHAREABLE_TEXT_LINE_0_KEYS: tuple[str, ...]` - `SHAREABLE_TEXT_LINE_1_KEYS: tuple[str, ...]` - `SHAREABLE_TEXT_LINE_2_KEYS: tuple[str, ...]` - `SHAREABLE_TEXT_LINE_3_KEYS: tuple[str, ...]` - `SHAREABLE_TEXT_LINE_4_KEYS: tuple[str, ...]` - `SHAREABLE_TEXT_LINE_5_KEYS: tuple[str, ...]` - `SHAREABLE_TEXT_LINE_6_KEYS: tuple[str, ...]` +- `CONNECTION_MAX_RETRIES: int = 9` + `CONNECTION_WAIT_SECONDS: int = 10` - a tuple of strings containing nominatim keys used in shareable text line 0-6 + defines if and how many times to retry a connection, alongside how many seconds to wait + in between tries, for Nominatim -- `SHAREABLE_TEXT_NAMES: tuple[str, ...]` + > [!NOTE] + > this constant only affects the default surplus Nominatim geocoding functions. custom + > functions do not read from this, unless deliberately programmed to do so + +- `SHAREABLE_TEXT_LINE_0_KEYS: typing.Final[tuple[str, ...]]` + `SHAREABLE_TEXT_LINE_1_KEYS: typing.Final[tuple[str, ...]]` + `SHAREABLE_TEXT_LINE_2_KEYS: typing.Final[tuple[str, ...]]` + `SHAREABLE_TEXT_LINE_3_KEYS: typing.Final[tuple[str, ...]]` + `SHAREABLE_TEXT_LINE_4_KEYS: typing.Final[tuple[str, ...]]` + `SHAREABLE_TEXT_LINE_5_KEYS: typing.Final[tuple[str, ...]]` + `SHAREABLE_TEXT_LINE_6_KEYS: typing.Final[tuple[str, ...]]` + + a tuple of strings containing Nominatim keys used in shareable text line 0-6 + +- `SHAREABLE_TEXT_NAMES: typing.Final[tuple[str, ...]]` - a tuple of strings containing nominatim keys used in shareable text line 0-2 and + a tuple of strings containing Nominatim keys used in shareable text line 0-2 and special keys in line 3 -- `EMPTY_LATLONG: Latlong` +- `EMPTY_LATLONG: typing.Final[Latlong]` a constant for an empty latlong coordinate, with latitude and longitude set to 0.0 ### exception classes @@ -618,6 +638,88 @@ ResultType = TypeVar("ResultType") [generic type](https://docs.python.org/3/library/typing.html#generics) used by [`Result`](#class-result) +#### `SurplusGeocoderProtocol` + +[typing_extensions.Protocol](https://mypy.readthedocs.io/en/stable/protocols.html#callback-protocols) +class for documentation and static type checking of surplus geocoder functions + +- **signature and conforming function signature** + + ```python + class SurplusGeocoderProtocol(Protocol): + def __call__(self, place: str) -> Latlong: + ... + ``` + + functions that conform to this protocol should have the following signature: + + ```python + def example(place: str) -> Latlong: ... + ``` + +- **information on conforming functions** + + function takes in a location name as a string, and returns a [Latlong](#class-latlong) + + function can and should be be + [`functools.lru_cache()`-wrapped](https://docs.python.org/3/library/functools.html#functools.lru_cache) + if the geocoding service asks for caching + + exceptions are handled by the caller + +#### `SurplusReverserProtocol` + +[typing_extensions.Protocol](https://mypy.readthedocs.io/en/stable/protocols.html#callback-protocols) +class for documentation and static type checking of surplus reverser functions + +- **signature and conforming function signature** + + ```python + class SurplusReverserProtocol(Protocol): + def __call__(self, latlong: Latlong, level: int = 18) -> dict[str, Any]: + ... + ``` + + functions that conform to this protocol should have the following signature: + + ```python + def example(self, latlong: Latlong, level: int = 18) -> dict[str, Any]: ... + ``` + +- **information on conforming functions** + + function takes in a [Latlong](#class-latlong) object and return a dictionary with [`SHAREABLE_TEXT_LINE_*_KEYS`](#constants) keys at the dictionaries' top-level. + keys are used to access address information. + + function should also take in an int representing the level of detail for the returned + address, 0-18 (country-level to building), inclusive. should default to 18. + + keys for latitude, longitude and an iso3166-2 (or closest equivalent) should also be + included at the dictionaries top level as the keys `latitude`, `longitude` and + `ISO3166-2` (non-case sensitive, or at least something starting with `ISO3166`) + respectively. + + ```python + { + 'ISO3166-2-lvl6': 'SG-03', + 'amenity': 'Ngee Ann Polytechnic', + ... + 'country': 'Singapore', + 'latitude': 1.33318835, + 'longitude': 103.77461234638255, + 'postcode': '599489', + 'raw': {...}, + } + ``` + + function can and should be + [`functools.lru_cache()`-wrapped](https://docs.python.org/3/library/functools.html#functools.lru_cache) + if the geocoding service asks for caching + + see the [playground notebook](/playground.ipynb) in repository root for detailed + sample output + exceptions are handled by the caller + ### `class Behaviour` [`typing.NamedTuple`](https://docs.python.org/3/library/typing.html#typing.NamedTuple) @@ -629,15 +731,13 @@ attributes original user-passed query string or a list of strings from splitting user-passed query string by spaces -- `geocoder: typing.Callable[[str], Latlong] = default_geocoder` - name string to location function, must take in a string and return a - [`Latlong`](#class-latlong), exceptions are handled by the caller +- `geocoder: SurplusGeocoderProtocol = default_geocoding.geocoder` + name string to location function, see + [`SurplusGeocoderProtocol`](#surplusgeocoderprotocol) for more information -- `reverser: Callable[[Latlong], dict[str, Any]] = default_reverser` - [`Latlong`](#class-latlong) object to dictionary function, must take in a string and return a - dict. keys found in SHAREABLE_TEXT_LINE_*_KEYS used to access address details are placed - top-level in the dict, exceptions are handled by the caller. - see the [playground notebook](playground.ipynb) for example output +- `reverser: SurplusReverserProtocol = default_geocoding.reverser` + Latlong object to address information dictionary function, see + [`SurplusReverserProtocol`](#surplusreverserprotocol) for more information - `stderr: typing.TextIO = sys.stderr` [TextIO-like object](https://docs.python.org/3/library/io.html#text-i-o) @@ -658,6 +758,87 @@ attributes - `convert_to_type: ConversionResultTypeEnum = ConversionResultTypeEnum.SHAREABLE_TEXT` what type to convert the query to +### `class SurplusDefaultGeocoding` + +> [!IMPORTANT] +> this has replaced the now deprecated default geocoding functions, `default_geocoder()` +> and `default_reverser()`, in surplus 2.1.0 and later. + +see [SurplusGeocoderProtocol](#surplusgeocoderprotocol) and +[SurplusReverserProtocol](#surplusreverserprotocol) for more information how to +implement a compliant custom geocoder functions. + +[`dataclasses.dataclass`](https://docs.python.org/3/library/dataclasses.html) providing +the default geocoding functionality for surplus, via +[OpenStreetMap Nominatim](https://nominatim.openstreetmap.org/) + +attributes + +- `user_agent: str = default_fingerprint` + pass in a custom user agent here, else it will be the default + [fingerprinted user agent](#details-on-the-fingerprinted-user-agent) + +example usage + +```python +from surplus import surplus, Behaviour, SurplusDefaultGeocoding + +geocoding = SurplusDefaultGeocoding("custom user agent") +geocoding.update_geocoding_functions() # not necessary but recommended + +behaviour = Behaviour( + ..., + geocoder=geocoding.geocoder, + reverser=geocoding.reverser +) + +result = surplus("query", behaviour=behaviour) + +... +``` + +methods + +- [`def update_geocoding_functions(self) -> None: ...`](#surplusdefaultgeocodingupdate_geocoding_functions) +- [`def geocoder(self, place: str) -> Latlong: ...`](#surplusdefaultgeocodinggeocoder) +- [`def reverser(self, latlong: Latlong, level: int = 18) -> dict[str, Any]: ...`](#surplusdefaultgeocodingreverser) + +#### `SurplusDefaultGeocoding.update_geocoding_functions()` + +re-initialise the geocoding functions with the current user agent, also generate a new +user agent if not set properly + +it is recommended to call this before using surplus as by default the geocoding functions +are uninitialised + +- signature + + ```python + def update_geocoding_functions(self) -> None: ... + ``` + +#### `SurplusDefaultGeocoding.geocoder()` + +> [!WARNING] +> this function is primarily given to be passed into a [`Behaviour`](#class-behaviour) +> object, and is not meant to be called directly. + +default geocoder for surplus + +see [SurplusGeocoderProtocol](#surplusgeocoderprotocol) for more information on surplus +geocoder functions + +#### `SurplusDefaultGeocoding.reverser()` + +> [!WARNING] +> this function is primarily given to be passed into a [`Behaviour`](#class-behaviour) +> object, and is not meant to be called directly. + +default reverser for surplus + +see [SurplusReverserProtocol](#surplusreverserprotocol) for more information on surplus +reverser functions + ### `class ConversionResultTypeEnum` [enum.Enum](https://docs.python.org/3/library/enum.html) @@ -808,15 +989,15 @@ methods - signature ```python - def to_lat_long_coord(self, geocoder: Callable[[str], Latlong]) -> Result[Latlong]: + def to_lat_long_coord(self, geocoder: SurplusGeocoderProtocol) -> Result[Latlong]: ... ``` - arguments - - `geocoder: typing.Callable[[str], Latlong]` - name string to location function, must take in a string and return a - [`Latlong`](#class-latlong), exceptions are handled by the caller + - `geocoder: SurplusGeocoderProtocol` + name string to location function, see + [SurplusGeocoderProtocol](#surplusgeocoderprotocol) for more information - returns [`Result`](#class-result)[`[Latlong]`](#class-latlong) @@ -860,15 +1041,15 @@ exclusive method that returns a full-length Plus Code as a string - signature ```python - def to_full_plus_code(self, geocoder: Callable[[str], Latlong]) -> Result[str]: + def to_full_plus_code(self, geocoder: SurplusGeocoderProtocol) -> Result[str]: ... ``` - arguments - - `geocoder: typing.Callable[[str], Latlong]` - name string to location function, must take in a string and return a - [`Latlong`](#class-latlong), exceptions are handled by the caller + - `geocoder: SurplusGeocoderProtocol` + name string to location function, see + [SurplusGeocoderProtocol](#surplusgeocoderprotocol) for more information - returns [`Result`](#class-result)`[str]` @@ -879,15 +1060,15 @@ method that returns a latitude-longitude coordinate pair - signature ```python - def to_lat_long_coord(self, geocoder: Callable[[str], Latlong]) -> Result[Latlong]: + def to_lat_long_coord(self, geocoder: SurplusGeocoderProtocol) -> Result[Latlong]: ... ``` - arguments - - `geocoder: typing.Callable[[str], Latlong]` - name string to location function, must take in a string and return a - [`Latlong`](#class-latlong), exceptions are handled by the caller + - `geocoder: SurplusGeocoderProtocol` + name string to location function, see + [SurplusGeocoderProtocol](#surplusgeocoderprotocol) for more information - returns [`Result`](#class-result)[`[Latlong]`](#class-latlong) @@ -924,15 +1105,15 @@ method that returns a latitude-longitude coordinate pair - signature ```python - def to_lat_long_coord(self, geocoder: Callable[[str], Latlong]) -> Result[Latlong]: + def to_lat_long_coord(self, geocoder: SurplusGeocoderProtocol) -> Result[Latlong]: ... ``` - arguments - - `geocoder: typing.Callable[[str], Latlong]` - name string to location function, must take in a string and return a - [`Latlong`](#class-latlong), exceptions are handled by the caller + - `geocoder: SurplusGeocoderProtocol` + name string to location function, see + [SurplusGeocoderProtocol](#surplusgeocoderprotocol) for more information - returns [`Result`](#class-result)[`[Latlong]`](#class-latlong) @@ -969,15 +1150,15 @@ method that returns a latitude-longitude coordinate pair - signature ```python - def to_lat_long_coord(self, geocoder: Callable[[str], Latlong]) -> Result[Latlong]: + def to_lat_long_coord(self, geocoder: SurplusGeocoderProtocol) -> Result[Latlong]: ... ``` - arguments - - `geocoder: typing.Callable[[str], Latlong]` - name string to location function, must take in a string and return a - [`Latlong`](#class-latlong), exceptions are handled by the caller + - `geocoder: SurplusGeocoderProtocol` + name string to location function, see + [SurplusGeocoderProtocol](#surplusgeocoderprotocol) for more information - returns [`Result`](#class-result)[`[Latlong]`](#class-latlong) @@ -1030,35 +1211,90 @@ function that parses a query string into a query object - returns [`Result`](#class-result)[`[Query]`](#query) -### `def default_geocoder()` +### `def generate_fingerprinted_user_agent()` -default geocoder for surplus, uses OpenStreetMap Nominatim - -> [!NOTE] -> function is not used by surplus and not directly by the user, but is exposed for -> convenience being [Behaviour](#class-behaviour) objects. -> pass in a custom function to [Behaviour](#class-behaviour) to override the default reverser. +function that attempts to return a unique user agent string. - signature - ```python - def default_geocoder(place: str) -> Latlong: - ``` +```python +def generate_fingerprinted_user_agent() -> Result[str]: +``` -### `def default_reverser()` +- returns [`Result[str]`](#class-result) -default reverser for surplus, uses OpenStreetMap Nominatim + this result will always have a valid value as erroneous results will have a + resulting value of `'surplus/ (generic-user)'` -> [!NOTE] -> function is not used by surplus and not directly by the user, but is exposed for -> convenience being [Behaviour](#class-behaviour) objects. -> pass in a custom function to [Behaviour](#class-behaviour) to override the default reverser. + valid results will have a value of `'surplus/ ()'`, where + the fingerprint hash is a 12 character hexadecimal string -- signature +#### details on the fingerprinted user agent - ```python - def default_reverser(latlong: Latlong) -> dict[str, Any]: - ``` +**why do this in the first place?** +if too many people use surplus at the same time, +Nominatim will start to think it's just one person being greedy. so to prevent this, +surplus will try to generate a unique user agent string for each user through +fingerprinting. + +at the time of writing, the pre-hashed fingerprint string is as follows: + +```python +unique_info: str = f"{version}-{system_info}-{hostname}-{mac_address}" +``` + +it contains the following, in order, alongside an example: + +1. `version` - the surplus version alongside a suffix, if any + + ```text + 2.1.0-local + ``` + +2. `system_info` - generic machine and operating system information + + ```text + Linux-6.5.0-locietta-WSL2-xanmod1-x86_64-with-glibc2.35 + ``` + +3. `hostname` - your computer's hostname + + ```text + mark + ``` + +4. `mac_address` - your computer's mac address + + ```text + A9:36:3C:98:79:33 + ``` + +after hashing, this string becomes a 12 character hexadecimal string, as shown below: + +```text +surplus/2.1.0-local (1fdbfa0b0cfb) + ^^^^^^^^^^ + this is the hashed result of unique_info +``` + +if at any time, the retrieval of ant of these four elements fail, surplus will just give +up and default to `'surplus/ (generic-user)'`. + +if any of this seems weird to you, that's fine. pass in a custom user agent flag to +surplus with `-u` or `--user-agent` to override the default user agent, or override the +default user agent in your own code by passing in a custom user agent string to +[`Behaviour`](#class-behaviour). + +```text +$ surplus --user_agent "a-shiny-custom-and-unique-user-agent" 77Q4+7X Austin, Texas, USA +... +``` + +```python +>>> from surplus import surplus, Behaviour +>>> surplus(..., Behaviour(user_agent="a-shiny-custom-and-unique-user-agent")) +... +``` ## licence @@ -1069,21 +1305,15 @@ python module docstring. however, direct dependencies of surplus are licensed under different, but still permissive and open-source licences. -```text -geopy 2.4.0 Python Geocoding Toolbox -└── geographiclib >=1.52,<3 -pluscodes 2022.1.3 Compute Plus Codes (Open Location Codes). -``` - - [geopy](https://pypi.org/project/geopy/): Python Geocoding Toolbox - MIT License + MIT Licence - [geographiclib](https://pypi.org/project/geographiclib/): The geodesic routines from GeographicLib - MIT License + MIT Licence - [pluscodes](https://pypi.org/project/pluscodes/): Compute Plus Codes (Open Location Codes) diff --git a/playground.ipynb b/playground.ipynb index 06156dd..79e6cd2 100644 --- a/playground.ipynb +++ b/playground.ipynb @@ -4,7 +4,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# surplus 2.0.0 playground notebook\n", + "# surplus 2.x.y playground notebook\n", "\n", "wrangling with environments for devbox users using codium/vs code:\n", "\n", @@ -42,13 +42,15 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "from surplus import PlusCodeQuery, LocalCodeQuery, LatlongQuery, StringQuery\n", "from surplus import Latlong, Result\n", - "from surplus import default_geocoder, default_reverser" + "from surplus import SurplusDefaultGeocoding\n", + "\n", + "geocoding = SurplusDefaultGeocoding()" ] }, { @@ -60,7 +62,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 3, "metadata": {}, "outputs": [ { @@ -78,9 +80,9 @@ "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mZeroDivisionError\u001b[0m Traceback (most recent call last)", - "\u001b[1;32m/home/m/works/surplus/playground.ipynb Cell 5\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 8\u001b[0m \u001b[39mprint\u001b[39m(\u001b[39m\"\u001b[39m\u001b[39m{}\u001b[39;00m\u001b[39m\\t\u001b[39;00m\u001b[39m{:<40}\u001b[39;00m\u001b[39m\\t\u001b[39;00m\u001b[39m{}\u001b[39;00m\u001b[39m\"\u001b[39m\u001b[39m.\u001b[39mformat(\u001b[39mbool\u001b[39m(nom_result), \u001b[39mrepr\u001b[39m(nom_result\u001b[39m.\u001b[39merror), nom_result\u001b[39m.\u001b[39mget()))\n\u001b[1;32m 9\u001b[0m \u001b[39mprint\u001b[39m(\n\u001b[1;32m 10\u001b[0m \u001b[39m\"\u001b[39m\u001b[39m{}\u001b[39;00m\u001b[39m\\t\u001b[39;00m\u001b[39m{:<40}\u001b[39;00m\u001b[39m\\t\u001b[39;00m\u001b[39m{}\u001b[39;00m\u001b[39m\"\u001b[39m\u001b[39m.\u001b[39mformat(\n\u001b[1;32m 11\u001b[0m \u001b[39mbool\u001b[39m(exc_result), \u001b[39mrepr\u001b[39m(exc_result\u001b[39m.\u001b[39merror), exc_result\u001b[39m.\u001b[39mcry(string\u001b[39m=\u001b[39m\u001b[39mTrue\u001b[39;00m)\n\u001b[1;32m 12\u001b[0m )\n\u001b[1;32m 13\u001b[0m )\n\u001b[0;32m---> 14\u001b[0m \u001b[39mprint\u001b[39m(\u001b[39m\"\u001b[39m\u001b[39m{}\u001b[39;00m\u001b[39m\\t\u001b[39;00m\u001b[39m{:<40}\u001b[39;00m\u001b[39m\\t\u001b[39;00m\u001b[39m{}\u001b[39;00m\u001b[39m\"\u001b[39m\u001b[39m.\u001b[39mformat(\u001b[39mbool\u001b[39m(exc_result), \u001b[39mrepr\u001b[39m(exc_result\u001b[39m.\u001b[39merror), exc_result\u001b[39m.\u001b[39;49mget()))\n", - "File \u001b[0;32m~/works/surplus/surplus.py:247\u001b[0m, in \u001b[0;36mResult.get\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 245\u001b[0m \u001b[39m\"\"\"method that returns self.value if Result is non-erroneous else raises error\"\"\"\u001b[39;00m\n\u001b[1;32m 246\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39misinstance\u001b[39m(\u001b[39mself\u001b[39m\u001b[39m.\u001b[39merror, \u001b[39mBaseException\u001b[39;00m):\n\u001b[0;32m--> 247\u001b[0m \u001b[39mraise\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39merror\n\u001b[1;32m 248\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mvalue\n", - "\u001b[1;32m/home/m/works/surplus/playground.ipynb Cell 5\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 1\u001b[0m nom_result \u001b[39m=\u001b[39m Result[\u001b[39mint\u001b[39m](\u001b[39m3\u001b[39m)\n\u001b[1;32m 3\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[0;32m----> 4\u001b[0m \u001b[39m1\u001b[39;49m \u001b[39m/\u001b[39;49m \u001b[39m0\u001b[39;49m\n\u001b[1;32m 5\u001b[0m \u001b[39mexcept\u001b[39;00m \u001b[39mException\u001b[39;00m \u001b[39mas\u001b[39;00m exc:\n\u001b[1;32m 6\u001b[0m exc_result \u001b[39m=\u001b[39m Result[\u001b[39mint\u001b[39m](\u001b[39m-\u001b[39m\u001b[39m1\u001b[39m, error\u001b[39m=\u001b[39mexc)\n", + "\u001b[1;32m/home/m/works/surplus/playground.ipynb Cell 5\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 8\u001b[0m \u001b[39mprint\u001b[39m(\u001b[39m\"\u001b[39m\u001b[39m{}\u001b[39;00m\u001b[39m\\t\u001b[39;00m\u001b[39m{:<40}\u001b[39;00m\u001b[39m\\t\u001b[39;00m\u001b[39m{}\u001b[39;00m\u001b[39m\"\u001b[39m\u001b[39m.\u001b[39mformat(\u001b[39mbool\u001b[39m(nom_result), \u001b[39mrepr\u001b[39m(nom_result\u001b[39m.\u001b[39merror), nom_result\u001b[39m.\u001b[39mget()))\n\u001b[1;32m 9\u001b[0m \u001b[39mprint\u001b[39m(\n\u001b[1;32m 10\u001b[0m \u001b[39m\"\u001b[39m\u001b[39m{}\u001b[39;00m\u001b[39m\\t\u001b[39;00m\u001b[39m{:<40}\u001b[39;00m\u001b[39m\\t\u001b[39;00m\u001b[39m{}\u001b[39;00m\u001b[39m\"\u001b[39m\u001b[39m.\u001b[39mformat(\n\u001b[1;32m 11\u001b[0m \u001b[39mbool\u001b[39m(exc_result), \u001b[39mrepr\u001b[39m(exc_result\u001b[39m.\u001b[39merror), exc_result\u001b[39m.\u001b[39mcry(string\u001b[39m=\u001b[39m\u001b[39mTrue\u001b[39;00m)\n\u001b[1;32m 12\u001b[0m )\n\u001b[1;32m 13\u001b[0m )\n\u001b[0;32m---> 14\u001b[0m \u001b[39mprint\u001b[39m(\u001b[39m\"\u001b[39m\u001b[39m{}\u001b[39;00m\u001b[39m\\t\u001b[39;00m\u001b[39m{:<40}\u001b[39;00m\u001b[39m\\t\u001b[39;00m\u001b[39m{}\u001b[39;00m\u001b[39m\"\u001b[39m\u001b[39m.\u001b[39mformat(\u001b[39mbool\u001b[39m(exc_result), \u001b[39mrepr\u001b[39m(exc_result\u001b[39m.\u001b[39merror), exc_result\u001b[39m.\u001b[39;49mget()))\n", + "File \u001b[0;32m~/works/surplus/surplus/surplus.py:270\u001b[0m, in \u001b[0;36mResult.get\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 268\u001b[0m \u001b[39m\"\"\"method that returns self.value if Result is non-erroneous else raises error\"\"\"\u001b[39;00m\n\u001b[1;32m 269\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39misinstance\u001b[39m(\u001b[39mself\u001b[39m\u001b[39m.\u001b[39merror, \u001b[39mBaseException\u001b[39;00m):\n\u001b[0;32m--> 270\u001b[0m \u001b[39mraise\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39merror\n\u001b[1;32m 271\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mvalue\n", + "\u001b[1;32m/home/m/works/surplus/playground.ipynb Cell 5\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 1\u001b[0m nom_result \u001b[39m=\u001b[39m Result[\u001b[39mint\u001b[39m](\u001b[39m3\u001b[39m)\n\u001b[1;32m 3\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[0;32m----> 4\u001b[0m \u001b[39m1\u001b[39;49m \u001b[39m/\u001b[39;49m \u001b[39m0\u001b[39;49m\n\u001b[1;32m 5\u001b[0m \u001b[39mexcept\u001b[39;00m \u001b[39mException\u001b[39;00m \u001b[39mas\u001b[39;00m exc:\n\u001b[1;32m 6\u001b[0m exc_result \u001b[39m=\u001b[39m Result[\u001b[39mint\u001b[39m](\u001b[39m-\u001b[39m\u001b[39m1\u001b[39m, error\u001b[39m=\u001b[39mexc)\n", "\u001b[0;31mZeroDivisionError\u001b[0m: division by zero" ] } @@ -111,27 +113,7 @@ }, { "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "Result(value=Latlong(latitude=1.3336875, longitude=103.7746875), error=None)" - ] - }, - "execution_count": 3, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "PlusCodeQuery(code=\"6PH58QMF+FV\").to_lat_long_coord(geocoder=default_geocoder)" - ] - }, - { - "cell_type": "code", - "execution_count": 4, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -146,16 +128,12 @@ } ], "source": [ - "plus_code = LocalCodeQuery(code=\"8QMF+FV\", locality=\"Singapore\").to_full_plus_code(\n", - " geocoder=default_geocoder\n", - ")\n", - "\n", - "PlusCodeQuery(code=plus_code.get()).to_lat_long_coord(geocoder=default_geocoder)" + "PlusCodeQuery(code=\"6PH58QMF+FV\").to_lat_long_coord(geocoder=geocoding.geocoder)" ] }, { "cell_type": "code", - "execution_count": 5, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -170,20 +148,22 @@ } ], "source": [ - "LocalCodeQuery(code=\"8QMF+FV\", locality=\"Singapore\").to_lat_long_coord(\n", - " geocoder=default_geocoder\n", - ")" + "plus_code = LocalCodeQuery(code=\"8QMF+FV\", locality=\"Singapore\").to_full_plus_code(\n", + " geocoder=geocoding.geocoder\n", + ")\n", + "\n", + "PlusCodeQuery(code=plus_code.get()).to_lat_long_coord(geocoder=geocoding.geocoder)" ] }, { "cell_type": "code", - "execution_count": 6, + "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "Result(value=Latlong(latitude=1.33318835, longitude=103.77461234638255), error=None)" + "Result(value=Latlong(latitude=1.3336875, longitude=103.7746875), error=None)" ] }, "execution_count": 6, @@ -192,14 +172,14 @@ } ], "source": [ - "LatlongQuery(\n", - " latlong=Latlong(latitude=1.33318835, longitude=103.77461234638255)\n", - ").to_lat_long_coord(geocoder=default_geocoder)" + "LocalCodeQuery(code=\"8QMF+FV\", locality=\"Singapore\").to_lat_long_coord(\n", + " geocoder=geocoding.geocoder\n", + ")" ] }, { "cell_type": "code", - "execution_count": 7, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -214,7 +194,29 @@ } ], "source": [ - "StringQuery(query=\"Ngee Ann Polytechnic\").to_lat_long_coord(geocoder=default_geocoder)" + "LatlongQuery(\n", + " latlong=Latlong(latitude=1.33318835, longitude=103.77461234638255)\n", + ").to_lat_long_coord(geocoder=geocoding.geocoder)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Result(value=Latlong(latitude=1.33318835, longitude=103.77461234638255), error=None)" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "StringQuery(query=\"Ngee Ann Polytechnic\").to_lat_long_coord(geocoder=geocoding.geocoder)" ] }, { @@ -282,7 +284,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -298,6 +300,7 @@ " 'house_number': '535',\n", " 'latitude': 1.33318835,\n", " 'longitude': 103.77461234638255,\n", + " 'neighbourhood': 'Ewart Park',\n", " 'postcode': '599489',\n", " 'raw': {'address': {'ISO3166-2-lvl6': 'SG-03',\n", " 'amenity': 'Ngee Ann Polytechnic',\n", @@ -306,6 +309,7 @@ " 'country_code': 'sg',\n", " 'county': 'Northwest',\n", " 'house_number': '535',\n", + " 'neighbourhood': 'Ewart Park',\n", " 'postcode': '599489',\n", " 'road': 'Clementi Road',\n", " 'suburb': 'Bukit Timah'},\n", @@ -315,8 +319,9 @@ " '103.7701481',\n", " '103.7783945'],\n", " 'class': 'amenity',\n", - " 'display_name': 'Ngee Ann Polytechnic, 535, Clementi Road, Bukit '\n", - " 'Timah, Singapore, Northwest, 599489, Singapore',\n", + " 'display_name': 'Ngee Ann Polytechnic, 535, Clementi Road, Ewart '\n", + " 'Park, Bukit Timah, Singapore, Northwest, 599489, '\n", + " 'Singapore',\n", " 'importance': 0.34662169301918117,\n", " 'lat': '1.33318835',\n", " 'licence': 'Data © OpenStreetMap contributors, ODbL 1.0. '\n", @@ -325,7 +330,7 @@ " 'name': 'Ngee Ann Polytechnic',\n", " 'osm_id': 2535118,\n", " 'osm_type': 'relation',\n", - " 'place_id': 297946059,\n", + " 'place_id': 250910125,\n", " 'place_rank': 30,\n", " 'type': 'university'},\n", " 'road': 'Clementi Road',\n", @@ -337,15 +342,240 @@ "import pprint\n", "\n", "latlong = LocalCodeQuery(code=\"8QMF+FV\", locality=\"Singapore\").to_lat_long_coord(\n", - " default_geocoder\n", + " geocoder=geocoding.geocoder\n", ")\n", "if not latlong:\n", " latlong.cry()\n", "\n", "else:\n", - " location = default_reverser(latlong.get())\n", + " location = geocoding.reverser(latlong.get())\n", " pprint.pprint(location)" ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 2.1.0: the adventure of shortening global/full Plus Codes" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### testing rate-limited default geocoding functions" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "1.33318835, 103.77461234638255\n", + "{'amenity': 'Ngee Ann Polytechnic', 'house_number': '535', 'road': 'Clementi Road', 'neighbourhood': 'Ewart Park', 'suburb': 'Bukit Timah', 'city': 'Singapore', 'county': 'Northwest', 'ISO3166-2-lvl6': 'SG-03', 'postcode': '599489', 'country': 'Singapore', 'country_code': 'sg', 'raw': {'place_id': 250910125, 'licence': 'Data © OpenStreetMap contributors, ODbL 1.0. http://osm.org/copyright', 'osm_type': 'relation', 'osm_id': 2535118, 'lat': '1.33318835', 'lon': '103.77461234638255', 'class': 'amenity', 'type': 'university', 'place_rank': 30, 'importance': 0.34662169301918117, 'addresstype': 'amenity', 'name': 'Ngee Ann Polytechnic', 'display_name': 'Ngee Ann Polytechnic, 535, Clementi Road, Ewart Park, Bukit Timah, Singapore, Northwest, 599489, Singapore', 'address': {'amenity': 'Ngee Ann Polytechnic', 'house_number': '535', 'road': 'Clementi Road', 'neighbourhood': 'Ewart Park', 'suburb': 'Bukit Timah', 'city': 'Singapore', 'county': 'Northwest', 'ISO3166-2-lvl6': 'SG-03', 'postcode': '599489', 'country': 'Singapore', 'country_code': 'sg'}, 'boundingbox': ['1.3289692', '1.3372184', '103.7701481', '103.7783945']}, 'latitude': 1.33318835, 'longitude': 103.77461234638255}\n" + ] + } + ], + "source": [ + "from surplus import SurplusGeocoderProtocol, SurplusReverserProtocol\n", + "\n", + "\n", + "test_geocoding = SurplusDefaultGeocoding(user_agent=\"surplus/playground\")\n", + "\n", + "print(location := test_geocoding.geocoder(\"Ngee Ann Polytechnic\"))\n", + "\n", + "print(reversed := test_geocoding.reverser(f\"{location.latitude}, {location.longitude}\"))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### loop for less information until a local code is made" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# TODO\n", + "\n", + "test1 = LocalCodeQuery(\"9R3J+R9\", \"Singapore\")\n", + "test2 = LocalCodeQuery(\"G227+XF\", \"St Lucia, Queensland, Australia\")\n", + "\n", + "level = 13" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'suburb': 'Bishan',\n", + " 'city': 'Singapore',\n", + " 'county': 'Central',\n", + " 'ISO3166-2-lvl6': 'SG-01',\n", + " 'country': 'Singapore',\n", + " 'country_code': 'sg',\n", + " 'raw': {'place_id': 251115282,\n", + " 'licence': 'Data © OpenStreetMap contributors, ODbL 1.0. http://osm.org/copyright',\n", + " 'osm_type': 'way',\n", + " 'osm_id': 795946716,\n", + " 'lat': '1.3519117',\n", + " 'lon': '103.8489708',\n", + " 'class': 'place',\n", + " 'type': 'suburb',\n", + " 'place_rank': 19,\n", + " 'importance': 0.39184907371668787,\n", + " 'addresstype': 'suburb',\n", + " 'name': 'Bishan',\n", + " 'display_name': 'Bishan, Singapore, Central, Singapore',\n", + " 'address': {'suburb': 'Bishan',\n", + " 'city': 'Singapore',\n", + " 'county': 'Central',\n", + " 'ISO3166-2-lvl6': 'SG-01',\n", + " 'country': 'Singapore',\n", + " 'country_code': 'sg'},\n", + " 'boundingbox': ['1.3416846', '1.3679829', '103.8184512', '103.8604083']},\n", + " 'latitude': 1.3519117,\n", + " 'longitude': 103.8489708}" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "(\n", + " response := geocoding.reverser(\n", + " test1.to_lat_long_coord(geocoding.geocoder).get(), level=level\n", + " )\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'suburb': 'St Lucia',\n", + " 'city_district': 'St Lucia',\n", + " 'city': 'Brisbane City',\n", + " 'state': 'Queensland',\n", + " 'ISO3166-2-lvl4': 'AU-QLD',\n", + " 'postcode': '4072',\n", + " 'country': 'Australia',\n", + " 'country_code': 'au',\n", + " 'raw': {'place_id': 54477898,\n", + " 'licence': 'Data © OpenStreetMap contributors, ODbL 1.0. http://osm.org/copyright',\n", + " 'osm_type': 'node',\n", + " 'osm_id': 88800268,\n", + " 'lat': '-27.4987362',\n", + " 'lon': '153.0081642',\n", + " 'class': 'place',\n", + " 'type': 'suburb',\n", + " 'place_rank': 19,\n", + " 'importance': 0.27501,\n", + " 'addresstype': 'suburb',\n", + " 'name': 'St Lucia',\n", + " 'display_name': 'St Lucia, Brisbane City, Queensland, 4072, Australia',\n", + " 'address': {'suburb': 'St Lucia',\n", + " 'city_district': 'St Lucia',\n", + " 'city': 'Brisbane City',\n", + " 'state': 'Queensland',\n", + " 'ISO3166-2-lvl4': 'AU-QLD',\n", + " 'postcode': '4072',\n", + " 'country': 'Australia',\n", + " 'country_code': 'au'},\n", + " 'boundingbox': ['-27.5187362', '-27.4787362', '152.9881642', '153.0281642']},\n", + " 'latitude': -27.4987362,\n", + " 'longitude': 153.0081642}" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "(\n", + " response := geocoding.reverser(\n", + " test2.to_lat_long_coord(geocoding.geocoder).get(), level=level\n", + " )\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## machine fingerprinting attempt\n", + "\n", + "because of nominatim's acceptable usage policy \n", + "" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from hashlib import shake_256 as _hashlib_shake_256\n", + "from platform import platform as _platform_platform\n", + "from socket import gethostname as _socket_gethostname\n", + "from uuid import getnode as _uuid_getnode\n", + "from surplus import VERSION, VERSION_SUFFIX\n", + "\n", + "\n", + "def generate_fingerprinted_user_agent() -> Result[str]:\n", + " \"\"\"\n", + " function that attempts to return a unique user agent string.\n", + "\n", + " returns Result[str]\n", + " this result will always have a valid value as erroneous results will have a\n", + " resulting value of 'surplus//generic-user'\n", + " valid results will have a value of 'surplus//', where\n", + " fingerprint is a 12 character hexadecimal string\n", + " \"\"\"\n", + " version: str = \".\".join([str(v) for v in VERSION]) + VERSION_SUFFIX\n", + "\n", + " try:\n", + " system_info: str = _platform_platform()\n", + " hostname: str = _socket_gethostname()\n", + " mac_address: str = \":\".join(\n", + " [\n", + " \"{:02x}\".format((_uuid_getnode() >> elements) & 0xFF)\n", + " for elements in range(0, 2 * 6, 2)\n", + " ][::-1]\n", + " )\n", + " unique_info: str = f\"{version}-{system_info}-{hostname}-{mac_address}\"\n", + "\n", + " print(f\"{version=}\")\n", + " print(f\"{system_info=}\")\n", + " print(f\"{hostname=}\")\n", + " print(f\"{mac_address=}\")\n", + "\n", + " except Exception as exc:\n", + " return Result[str](f\"surplus/{version} (generic-user)\", error=exc)\n", + "\n", + " fingerprint: str = _hashlib_shake_256(unique_info.encode()).hexdigest(5)\n", + "\n", + " return Result[str](f\"surplus/{version} ({fingerprint})\")" + ] } ], "metadata": { diff --git a/poetry.lock b/poetry.lock index 9debda9..8099786 100644 --- a/poetry.lock +++ b/poetry.lock @@ -14,14 +14,14 @@ files = [ [[package]] name = "asttokens" -version = "2.3.0" +version = "2.4.0" description = "Annotate AST trees with source code positions" category = "dev" optional = false python-versions = "*" files = [ - {file = "asttokens-2.3.0-py2.py3-none-any.whl", hash = "sha256:bef1a51bc256d349e9f94e7e40e44b705ed1162f55294220dd561d24583d9877"}, - {file = "asttokens-2.3.0.tar.gz", hash = "sha256:2552a88626aaa7f0f299f871479fc755bd4e7c11e89078965e928fb7bb9a6afe"}, + {file = "asttokens-2.4.0-py2.py3-none-any.whl", hash = "sha256:cf8fc9e61a86461aa9fb161a14a0841a03c405fa829ac6b202670b3495d2ce69"}, + {file = "asttokens-2.4.0.tar.gz", hash = "sha256:2e0171b991b2c959acc6c49318049236844a5da1d65ba2672c4880c1c894834e"}, ] [package.dependencies] diff --git a/surplus/__init__.py b/surplus/__init__.py index b9d0162..6b26542 100644 --- a/surplus/__init__.py +++ b/surplus/__init__.py @@ -32,10 +32,14 @@ For more information, please refer to # surplus was and would've been a single-file module, but typing is in the way :( # https://github.com/python/typing/issues/1333 +from .surplus import default_geocoder # deprecated, emulation function +from .surplus import default_reverser # deprecated, emulation function from .surplus import ( BUILD_BRANCH, BUILD_COMMIT, BUILD_DATETIME, + CONNECTION_MAX_RETRIES, + CONNECTION_WAIT_SECONDS, EMPTY_LATLONG, SHAREABLE_TEXT_LINE_0_KEYS, SHAREABLE_TEXT_LINE_1_KEYS, @@ -45,7 +49,6 @@ from .surplus import ( SHAREABLE_TEXT_LINE_5_KEYS, SHAREABLE_TEXT_LINE_6_KEYS, SHAREABLE_TEXT_NAMES, - USER_AGENT, VERSION, VERSION_SUFFIX, Behaviour, @@ -60,13 +63,15 @@ from .surplus import ( PlusCodeNotFoundError, PlusCodeQuery, Query, + Result, ResultType, StringQuery, + SurplusDefaultGeocoding, SurplusException, - UnavailableFeatureError, + SurplusGeocoderProtocol, + SurplusReverserProtocol, cli, - default_geocoder, - default_reverser, + generate_fingerprinted_user_agent, handle_args, parse_query, surplus, diff --git a/surplus/surplus.py b/surplus/surplus.py index 3295275..4d57c62 100644 --- a/surplus/surplus.py +++ b/surplus/surplus.py @@ -31,8 +31,13 @@ For more information, please refer to from argparse import ArgumentParser from collections import OrderedDict +from dataclasses import dataclass from datetime import datetime, timedelta, timezone from enum import Enum +from functools import lru_cache +from hashlib import shake_256 +from platform import platform +from socket import gethostname from sys import stderr, stdin, stdout from typing import ( Any, @@ -45,12 +50,17 @@ from typing import ( TypeAlias, TypeVar, ) +from uuid import getnode from geopy import Location as _geopy_Location # type: ignore +from geopy.extra.rate_limiter import RateLimiter as _geopy_RateLimiter # type: ignore from geopy.geocoders import Nominatim as _geopy_Nominatim # type: ignore +from pluscodes import Area as _PlusCode_Area # type: ignore from pluscodes import PlusCode as _PlusCode # type: ignore +from pluscodes import decode as _PlusCode_decode # type: ignore from pluscodes import encode as _PlusCode_encode # type: ignore from pluscodes.validator import Validator as _PlusCode_Validator # type: ignore +from typing_extensions import Protocol from pluscodes.openlocationcode import ( # type: ignore # isort: skip recoverNearest as _PlusCode_recoverNearest, @@ -63,7 +73,8 @@ VERSION_SUFFIX: Final[str] = "-local" BUILD_BRANCH: Final[str] = "future" BUILD_COMMIT: Final[str] = "latest" BUILD_DATETIME: Final[datetime] = datetime.now(timezone(timedelta(hours=8))) # using SGT -USER_AGENT: Final[str] = "surplus" +CONNECTION_MAX_RETRIES: int = 9 +CONNECTION_WAIT_SECONDS: int = 10 SHAREABLE_TEXT_LINE_0_KEYS: Final[tuple[str, ...]] = ( "emergency", "historic", @@ -155,10 +166,6 @@ class EmptyQueryError(SurplusException): ... -class UnavailableFeatureError(SurplusException): - ... - - # data structures @@ -290,6 +297,66 @@ class Latlong(NamedTuple): EMPTY_LATLONG: Final[Latlong] = Latlong(latitude=0.0, longitude=0.0) +class SurplusGeocoderProtocol(Protocol): + """ + typing_extensions.Protocol class for documentation and static type checking of + surplus reverser functions + + (place: str) -> Latlong + + name string to location function. must take in a string and return a Latlong. + + function can be functools.lru_cache()-wrapped if the geocoding service asks for + caching + + exceptions are handled by the caller + """ + + def __call__(self, place: str) -> Latlong: + ... + + +class SurplusReverserProtocol(Protocol): + """ + typing_extensions.Protocol class for documentation and static type checking of + surplus reverser functions + + (latlong: Latlong, level: int = 18) -> dict[str, Any]: + + Latlong object to address information dictionary function. must take in a string and + return a dict with SHAREABLE_TEXT_LINE_*_KEYS keys at the dictionaries' top-level. + keys are used to access address information. + + function should also take in a int representing the level of detail for the + returned address, 0-18 (country-level to building), inclusive. + + keys for latitude, longitude and an iso3166-2 (or closest equivalent) should also be + included at the dictionaries top level as the keys `latitude`, `longitude` and + `ISO3166-2` (non-case sensitive, or at least something starting with `ISO3166`) + respectively. + + { + 'ISO3166-2-lvl6': 'SG-03', + 'amenity': 'Ngee Ann Polytechnic', + ... + 'country': 'Singapore', + 'latitude': 1.33318835, + 'longitude': 103.77461234638255, + 'postcode': '599489', + 'raw': {...}, + } + + function can be functools.lru_cache()-wrapped if the geocoding service asks for + caching + + exceptions are handled by the caller, + see the playground notebook in repository root for sample output + """ + + def __call__(self, latlong: Latlong, level: int = 18) -> dict[str, Any]: + ... + + class PlusCodeQuery(NamedTuple): """ typing.NamedTuple representing a full-length Plus Code (e.g., 6PH58QMF+FX) @@ -304,14 +371,14 @@ class PlusCodeQuery(NamedTuple): code: str - def to_lat_long_coord(self, geocoder: Callable[[str], Latlong]) -> Result[Latlong]: + def to_lat_long_coord(self, geocoder: SurplusGeocoderProtocol) -> Result[Latlong]: """ method that returns a latitude-longitude coordinate pair arguments - geocoder: typing.Callable[[str], Latlong] - name string to location function, must take in a string and return a - Latlong, exceptions are handled by the caller + geocoder: SurplusGeocoderProtocol + name string to location function, see SurplusGeocoderProtocol docstring + for more information returns Result[Latlong] """ @@ -363,14 +430,14 @@ class LocalCodeQuery(NamedTuple): code: str locality: str - def to_full_plus_code(self, geocoder: Callable[[str], Latlong]) -> Result[str]: + def to_full_plus_code(self, geocoder: SurplusGeocoderProtocol) -> Result[str]: """ exclusive method that returns a full-length Plus Code as a string arguments - geocoder: typing.Callable[[str], Latlong] - name string to location function, must take in a string and return a - Latlong, exceptions are handled by the caller + geocoder: SurplusGeocoderProtocol + name string to location function, see SurplusGeocoderProtocol docstring + for more information returns Result[str] """ @@ -389,14 +456,14 @@ class LocalCodeQuery(NamedTuple): except Exception as exc: return Result[str]("", error=exc) - def to_lat_long_coord(self, geocoder: Callable[[str], Latlong]) -> Result[Latlong]: + def to_lat_long_coord(self, geocoder: SurplusGeocoderProtocol) -> Result[Latlong]: """ method that returns a latitude-longitude coordinate pair arguments - geocoder: typing.Callable[[str], Latlong] - name string to location function, must take in a string and return a - Latlong, exceptions are handled by the caller + geocoder: SurplusGeocoderProtocol + name string to location function, see SurplusGeocoderProtocol docstring + for more information returns Result[Latlong] """ @@ -431,14 +498,14 @@ class LatlongQuery(NamedTuple): latlong: Latlong - def to_lat_long_coord(self, geocoder: Callable[[str], Latlong]) -> Result[Latlong]: + def to_lat_long_coord(self, geocoder: SurplusGeocoderProtocol) -> Result[Latlong]: """ method that returns a latitude-longitude coordinate pair arguments - geocoder: typing.Callable[[str], Latlong] - name string to location function, must take in a string and return a - Latlong, exceptions are handled by the caller + geocoder: SurplusGeocoderProtocol + name string to location function, see SurplusGeocoderProtocol docstring + for more information returns Result[Latlong] """ @@ -464,14 +531,14 @@ class StringQuery(NamedTuple): query: str - def to_lat_long_coord(self, geocoder: Callable[[str], Latlong]) -> Result[Latlong]: + def to_lat_long_coord(self, geocoder: SurplusGeocoderProtocol) -> Result[Latlong]: """ method that returns a latitude-longitude coordinate pair arguments - geocoder: typing.Callable[[str], Latlong] - name string to location function, must take in a string and return a - Latlong, exceptions are handled by the caller + geocoder: SurplusGeocoderProtocol + name string to location function, see SurplusGeocoderProtocol docstring + for more information returns Result[Latlong] """ @@ -490,43 +557,190 @@ class StringQuery(NamedTuple): Query: TypeAlias = PlusCodeQuery | LocalCodeQuery | LatlongQuery | StringQuery -def default_geocoder(place: str) -> Latlong: - """default geocoder for surplus, uses OpenStreetMap Nominatim""" +def generate_fingerprinted_user_agent() -> Result[str]: + """ + function that attempts to return a unique user agent string. - location: _geopy_Location | None = _geopy_Nominatim(user_agent=USER_AGENT).geocode( - place - ) + returns Result[str] + this result will always have a valid value as erroneous results will have a + resulting value of 'surplus/ (generic-user)' - if location is None: - raise NoSuitableLocationError( - f"No suitable location could be geolocated from '{place}'" + valid results will have a value of 'surplus/ ()', + where is a 12 character hexadecimal string + """ + version: str = ".".join([str(v) for v in VERSION]) + VERSION_SUFFIX + + try: + system_info: str = platform() + hostname: str = gethostname() + mac_address: str = ":".join( + [ + "{:02x}".format((getnode() >> elements) & 0xFF) + for elements in range(0, 2 * 6, 2) + ][::-1] + ) + unique_info: str = f"{version}-{system_info}-{hostname}-{mac_address}" + + except Exception as exc: + return Result[str](f"surplus/{version} (generic-user)", error=exc) + + fingerprint: str = shake_256(unique_info.encode()).hexdigest(5) + + return Result[str](f"surplus/{version} ({fingerprint})") + + +default_fingerprint: Final[str] = generate_fingerprinted_user_agent().value + + +@dataclass +class SurplusDefaultGeocoding: + """ + dataclass providing the default geocoding functionality for surplus, via + OpenStreetMap Nominatim + + attributes + user_agent: str = default_fingerprint + pass in a custom user agent here, else it will be the default fingerprinted + user agent + + usage + geocoding = SurplusDefaultGeocoding(behaviour.user_agent) + geocoding.update_geocoding_functions() + ... + Behaviour( + ..., + geocoder=geocoding.geocoder, + reverser=geocoding.reverser + ) + """ + + user_agent: str = default_fingerprint + _ratelimited_raw_geocoder: Callable | None = None + _ratelimited_raw_reverser: Callable | None = None + _first_update: bool = False + + def update_geocoding_functions(self) -> None: + """ + re-initialise the geocoding functions with the current user agent, also generate + a new user agent if not set properly + + recommended to call this before using surplus as by default the geocoding + functions are uninitialised + """ + + if not isinstance(self.user_agent, str): + self.user_agent: str = generate_fingerprinted_user_agent().value + + nominatim = _geopy_Nominatim(user_agent=self.user_agent) + + # this is + + self._ratelimited_raw_geocoder: Callable = lru_cache( + _geopy_RateLimiter( + nominatim.geocode, + max_retries=CONNECTION_MAX_RETRIES, + error_wait_seconds=CONNECTION_WAIT_SECONDS, + ) ) - return Latlong( - latitude=location.latitude, - longitude=location.longitude, + self._ratelimited_raw_reverser: Callable = lru_cache( + _geopy_RateLimiter( + nominatim.reverse, + max_retries=CONNECTION_MAX_RETRIES, + error_wait_seconds=CONNECTION_WAIT_SECONDS, + ) + ) + + self._first_update = True + + def geocoder(self, place: str) -> Latlong: + """ + default geocoder for surplus, uses OpenStreetMap Nominatim + + see SurplusGeocoderProtocol for more information on surplus geocoder functions + """ + + if not callable(self._ratelimited_raw_geocoder) or (self._first_update is False): + self.update_geocoding_functions() + + # https://github.com/python/mypy/issues/12155 + assert callable(self._ratelimited_raw_geocoder) + + location: _geopy_Location | None = self._ratelimited_raw_geocoder(place) + + if location is None: + raise NoSuitableLocationError( + f"No suitable location could be geolocated from '{place}'" + ) + + return Latlong( + latitude=location.latitude, + longitude=location.longitude, + ) + + def reverser(self, latlong: Latlong, level: int = 18) -> dict[str, Any]: + """ + default reverser for surplus, uses OpenStreetMap Nominatim + + arguments + latlong: Latlong + level: int = 0 + level of detail for the returned address, 0-18 (country-building) inclusive + + see SurplusReverserProtocol for more information on surplus reverser functions + """ + + if not callable(self._ratelimited_raw_reverser) or (self._first_update is False): + self.update_geocoding_functions() + + # https://github.com/python/mypy/issues/12155 + assert callable(self._ratelimited_raw_reverser) + + location: _geopy_Location | None = self._ratelimited_raw_reverser( + str(latlong), zoom=level + ) + + if location is None: + raise NoSuitableLocationError(f"could not reverse '{str(latlong)}'") + + location_dict: dict[str, Any] = {} + + for key in (address := location.raw.get("address", {})): + location_dict[key] = address.get(key, "") + + location_dict["raw"] = location.raw + location_dict["latitude"] = location.latitude + location_dict["longitude"] = location.longitude + + return location_dict + + +default_geocoding: Final[SurplusDefaultGeocoding] = SurplusDefaultGeocoding( + default_fingerprint +) +default_geocoding.update_geocoding_functions() + + +def default_geocoder(place: str) -> Latlong: + """(deprecated) geocoder for surplus, uses OpenStreetMap Nominatim""" + print( + "warning: default_geocoder is deprecated. " + "this is a emulation function that will use a fingerprinted user agent.", + file=stderr, ) + return default_geocoding.geocoder(place=place) -def default_reverser(latlong: Latlong) -> dict[str, Any]: - """default reverser for surplus, uses OpenStreetMap Nominatim""" - location: _geopy_Location | None = _geopy_Nominatim(user_agent=USER_AGENT).reverse( - str(latlong) +def default_reverser(latlong: Latlong, level: int = 18) -> dict[str, Any]: + """ + (deprecated) reverser for surplus, uses OpenStreetMap Nominatim + """ + print( + "warning: default_reverser is deprecated. " + "this is a emulation function that will use a fingerprinted user agent.", + file=stderr, ) - - if location is None: - raise NoSuitableLocationError(f"could not reverse '{str(latlong)}'") - - location_dict: dict[str, Any] = {} - - for key in (address := location.raw.get("address", {})): - location_dict[key] = address.get(key, "") - - location_dict["raw"] = location.raw - location_dict["latitude"] = location.latitude - location_dict["longitude"] = location.longitude - - return location_dict + return default_geocoding.reverser(latlong=latlong, level=level) class Behaviour(NamedTuple): @@ -537,14 +751,12 @@ class Behaviour(NamedTuple): query: str | list[str] = "" original user-passed query string or a list of strings from splitting user-passed query string by spaces - geocoder: Callable[[str], Latlong] = default_geocoderi - name string to location function, must take in a string and return a Latlong, - exceptions are handled by the caller - reverser: Callable[[str], dict[str, Any]] = default_reverser - Latlong object to dictionary function, must take in a string and return a - dict. keys found in SHAREABLE_TEXT_LINE_*_KEYS used to access address details - are placed top-level in the dict, exceptions are handled by the caller. - see the playground notebook for example output + geocoder: SurplusGeocoderProtocol = default_geocoding.geocoder + name string to location function, see SurplusGeocoderProtocol docstring for + for more information + reverser: SurplusReverserProtocol = default_geocoding.reverser + latlong to address information dict function, see SurplusReverserProtocol + docstring for more information stderr: TextIO = sys.stderr TextIO-like object representing a writeable file. defaults to sys.stderr stdout: TextIO = sys.stdout @@ -558,8 +770,8 @@ class Behaviour(NamedTuple): """ query: str | list[str] = "" - geocoder: Callable[[str], Latlong] = default_geocoder - reverser: Callable[[Latlong], dict[str, Any]] = default_reverser + geocoder: SurplusGeocoderProtocol = default_geocoding.geocoder + reverser: SurplusReverserProtocol = default_geocoding.reverser stderr: TextIO = stderr stdout: TextIO = stdout debug: bool = False @@ -694,7 +906,7 @@ def parse_query(behaviour: Behaviour) -> Result[Query]: split_query = behaviour.query if behaviour.debug: - print(f"debug: {split_query=}\ndebug: {original_query=}", behaviour.stderr) + print(f"debug: {split_query=}\ndebug: {original_query=}", file=behaviour.stderr) # not a plus/local code, try to match for latlong or string query match split_query: @@ -804,6 +1016,13 @@ def handle_args() -> Behaviour: f"'{Behaviour([]).convert_to_type.value}'" ), default=Behaviour([]).convert_to_type.value, + ), + parser.add_argument( + "-u", + "--user-agent", + type=str, + help=f"user agent string to use for geocoding service, defaults to fingerprinted user agent string", + default=default_fingerprint, ) args = parser.parse_args() @@ -821,10 +1040,12 @@ def handle_args() -> Behaviour: else: query = args.query + geocoding = SurplusDefaultGeocoding(args.user_agent) + behaviour = Behaviour( query=query, - geocoder=default_geocoder, - reverser=default_reverser, + geocoder=geocoding.geocoder, + reverser=geocoding.reverser, stderr=stderr, stdout=stdout, debug=args.debug, @@ -911,7 +1132,7 @@ def _generate_text( # get iso3166-2 before doing anything iso3166_2: str = "" for key in location: - if key.startswith("iso3166"): + if key.lower().startswith("iso3166"): iso3166_2 = location.get(key, "") # skeleton code to allow for changing keys based on iso3166-2 code @@ -1004,17 +1225,19 @@ def surplus(query: Query | str, behaviour: Behaviour) -> Result[str]: match behaviour.convert_to_type: case ConversionResultTypeEnum.SHAREABLE_TEXT: # get latlong and handle result - latlong = query.to_lat_long_coord(geocoder=behaviour.geocoder) + latlong_result: Result[Latlong] = query.to_lat_long_coord( + geocoder=behaviour.geocoder + ) - if not latlong: - return Result[str]("", error=latlong.error) + if not latlong_result: + return Result[str]("", error=latlong_result.error) if behaviour.debug: - print(f"debug: cli: {latlong.get()=}", file=behaviour.stderr) + print(f"debug: cli: {latlong_result.get()=}", file=behaviour.stderr) # reverse location and handle result try: - location: dict[str, Any] = behaviour.reverser(latlong.get()) + location: dict[str, Any] = behaviour.reverser(latlong_result.get()) except Exception as exc: return Result[str]("", error=exc) @@ -1041,21 +1264,23 @@ def surplus(query: Query | str, behaviour: Behaviour) -> Result[str]: return Result[str](text) case ConversionResultTypeEnum.PLUS_CODE: + # if its already a plus code, just return it if isinstance(query, PlusCodeQuery): return Result[str](str(query)) # get latlong and handle result - latlong = query.to_lat_long_coord(geocoder=behaviour.geocoder) + latlong_query = query.to_lat_long_coord(geocoder=behaviour.geocoder) - if not latlong: - return Result[str]("", error=latlong.error) + if not latlong_query: + return Result[str]("", error=latlong_query.error) if behaviour.debug: - print(f"debug: cli: {latlong.get()=}", file=behaviour.stderr) + print(f"debug: cli: {latlong_query.get()=}", file=behaviour.stderr) + # perform operation try: pluscode: str = _PlusCode_encode( - lat=latlong.get().latitude, lon=latlong.get().longitude + lat=latlong_query.get().latitude, lon=latlong_query.get().longitude ) except Exception as exc: @@ -1064,10 +1289,41 @@ def surplus(query: Query | str, behaviour: Behaviour) -> Result[str]: return Result[str](pluscode) case ConversionResultTypeEnum.LOCAL_CODE: + # if its already a local code, just return it + if isinstance(query, LocalCodeQuery): + return Result[str](str(query)) + + latlong: Latlong = EMPTY_LATLONG + + # if its a plus code, convert to latlong first + if isinstance(query, PlusCodeQuery): + pluscode_latlong_result = PlusCodeQuery.to_lat_long_coord( + query, geocoder=behaviour.geocoder + ) + + if not pluscode_latlong_result: + return Result[str]("", error=pluscode_latlong_result.error) + + latlong = pluscode_latlong_result.get() + + # get latlong and handle result + latlong_result = query.to_lat_long_coord(geocoder=behaviour.geocoder) + + if not latlong_result: + return Result[str]("", error=latlong_result.error) + + if behaviour.debug: + print(f"debug: cli: {latlong_result.get()=}", file=behaviour.stderr) + + latlong = latlong_result.get() + + # perform operation # TODO: https://github.com/markjoshwel/surplus/issues/18 + # https://github.com/google/open-location-code/wiki/Guidance-for-shortening-codes + return Result[str]( text, - error=UnavailableFeatureError( + error=NotImplementedError( "converting to Plus Code is not implemented yet" ), ) @@ -1078,15 +1334,16 @@ def surplus(query: Query | str, behaviour: Behaviour) -> Result[str]: return Result[str](str(query)) # get latlong and handle result - latlong = query.to_lat_long_coord(geocoder=behaviour.geocoder) + latlong_result = query.to_lat_long_coord(geocoder=behaviour.geocoder) - if not latlong: - return Result[str]("", error=latlong.error) + if not latlong_result: + return Result[str]("", error=latlong_result.error) if behaviour.debug: - print(f"debug: cli: {latlong.get()=}", file=behaviour.stderr) + print(f"debug: cli: {latlong_result.get()=}", file=behaviour.stderr) - return Result[str](str(latlong.get())) + # perform operation + return Result[str](str(latlong_result.get())) case _: return Result[str]( diff --git a/test.py b/test.py index fa8ac5e..d3dc37c 100644 --- a/test.py +++ b/test.py @@ -100,22 +100,26 @@ tests: list[ContinuityTest] = [ ), ContinuityTest( query="Ngee Ann Polytechnic, Singapore", - expected=( - "Ngee Ann Polytechnic\n" - "535 Clementi Road\n" - "Bukit Timah\n" - "599489\n" - "Northwest, Singapore" - ), + expected=[ + ( + "Ngee Ann Polytechnic\n" + "535 Clementi Road\n" + "Bukit Timah\n" + "599489\n" + "Northwest, Singapore" + ) + ], ), ContinuityTest( query="1.3521, 103.8198", - expected=( - "MacRitchie Nature Trail\n" - "Central Water Catchment\n" - "574325\n" - "Central, Singapore" - ), + expected=[ + ( + "MacRitchie Nature Trail\n" + "Central Water Catchment\n" + "574325\n" + "Central, Singapore" + ) + ], ), ]