From 8ddde41a2e81f6149b7dce6f22abc3acbcacfbad Mon Sep 17 00:00:00 2001 From: Mark Joshwel Date: Tue, 5 Sep 2023 04:35:39 +0000 Subject: [PATCH 1/7] s+: convert to latlong --- surplus/surplus.py | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/surplus/surplus.py b/surplus/surplus.py index b1e8606..01744bb 100644 --- a/surplus/surplus.py +++ b/surplus/surplus.py @@ -446,7 +446,7 @@ class LatlongQuery(NamedTuple): def __str__(self) -> str: """method that returns string representation of query""" - return f"{self.latlong.latitude}, {self.latlong.longitude}" + return f"{str(self.latlong)}" class StringQuery(NamedTuple): @@ -1058,13 +1058,20 @@ def surplus(query: Query | str, behaviour: Behaviour) -> Result[str]: ) case ConversionResultTypeEnum.LATLONG: - # TODO: https://github.com/markjoshwel/surplus/issues/18 - return Result[str]( - text, - error=UnavailableFeatureError( - "converting to Latlong is not implemented yet" - ), - ) + # return the latlong if already given a latlong + if isinstance(query, LatlongQuery): + return Result[str](str(query)) + + # get latlong and handle result + latlong = query.to_lat_long_coord(geocoder=behaviour.geocoder) + + if not latlong: + return Result[str]("", error=latlong.error) + + if behaviour.debug: + print(f"debug: cli: {latlong.get()=}", file=behaviour.stderr) + + return Result[str](str(latlong.get())) case _: return Result[str]( From fa1fff96029f32363291846e5b5976659a5b3349 Mon Sep 17 00:00:00 2001 From: Mark Joshwel Date: Tue, 5 Sep 2023 04:39:10 +0000 Subject: [PATCH 2/7] s+: convert to pluscode --- surplus/surplus.py | 27 ++++++++++++++++++++------- 1 file changed, 20 insertions(+), 7 deletions(-) diff --git a/surplus/surplus.py b/surplus/surplus.py index 01744bb..aaa8334 100644 --- a/surplus/surplus.py +++ b/surplus/surplus.py @@ -49,6 +49,7 @@ from typing import ( from geopy import Location as _geopy_Location # type: ignore from geopy.geocoders import Nominatim as _geopy_Nominatim # type: ignore from pluscodes import PlusCode as _PlusCode # type: ignore +from pluscodes import encode as _PlusCode_encode # type: ignore from pluscodes.validator import Validator as _PlusCode_Validator # type: ignore from pluscodes.openlocationcode import ( # type: ignore # isort: skip @@ -1040,13 +1041,25 @@ def surplus(query: Query | str, behaviour: Behaviour) -> Result[str]: return Result[str](text) case ConversionResultTypeEnum.PLUS_CODE: - # TODO: https://github.com/markjoshwel/surplus/issues/18 - return Result[str]( - text, - error=UnavailableFeatureError( - "converting to Plus Code is not implemented yet" - ), - ) + if isinstance(query, PlusCodeQuery): + return Result[str](str(query)) + + # get latlong and handle result + latlong = query.to_lat_long_coord(geocoder=behaviour.geocoder) + + if not latlong: + return Result[str]("", error=latlong.error) + + if behaviour.debug: + print(f"debug: cli: {latlong.get()=}", file=behaviour.stderr) + + try: + pluscode: str = _PlusCode_encode(lat=latlong.get().latitude, lon=latlong.get().longitude) + + except Exception as exc: + return Result[str]("", error=exc) + + return Result[str](pluscode) case ConversionResultTypeEnum.LOCAL_CODE: # TODO: https://github.com/markjoshwel/surplus/issues/18 From f6de01a02915801acf96ebcdaa8aadfac2a7c2c3 Mon Sep 17 00:00:00 2001 From: Mark Joshwel Date: Tue, 5 Sep 2023 04:39:40 +0000 Subject: [PATCH 3/7] s+: format and check --- surplus/surplus.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/surplus/surplus.py b/surplus/surplus.py index aaa8334..3295275 100644 --- a/surplus/surplus.py +++ b/surplus/surplus.py @@ -1054,7 +1054,9 @@ def surplus(query: Query | str, behaviour: Behaviour) -> Result[str]: print(f"debug: cli: {latlong.get()=}", file=behaviour.stderr) try: - pluscode: str = _PlusCode_encode(lat=latlong.get().latitude, lon=latlong.get().longitude) + pluscode: str = _PlusCode_encode( + lat=latlong.get().latitude, lon=latlong.get().longitude + ) except Exception as exc: return Result[str]("", error=exc) From fda37f413f41ae31479a29277030e450bf9032aa Mon Sep 17 00:00:00 2001 From: Mark Joshwel Date: Tue, 5 Sep 2023 17:34:28 +0000 Subject: [PATCH 4/7] s+,docs,tests: many - s+: local code conversion - s+: fingerprinted user agents + override arg - s+: rate limited default_ geocoding functions - s+: gecoding function protocols - docs: update api - tests: fix np and macritchie expecteds --- README.md | 408 ++++++++++++++++++++++++++++++++---------- playground.ipynb | 330 ++++++++++++++++++++++++++++------ poetry.lock | 6 +- surplus/__init__.py | 13 +- surplus/surplus.py | 425 +++++++++++++++++++++++++++++++++++--------- test.py | 30 ++-- 6 files changed, 969 insertions(+), 243 deletions(-) diff --git a/README.md b/README.md index 0901d28..ccc1a4f 100644 --- a/README.md +++ b/README.md @@ -22,6 +22,7 @@ to iOS Shortcuts-like shareable text. - [what counts as "incorrect"](#what-counts-as-incorrect) - [output technical details](#the-technical-details-of-surpluss-output) - [api reference](#api-reference) + - [details on the fingerprinted user agent](#details-on-the-fingerprinted-user-agent) - [licence](#licence) ```text @@ -62,27 +63,29 @@ see [licence](#licence) for licensing information. ### command-line usage ```text -usage: surplus [-h] [-d] [-v] [-c {pluscode,localcode,latlong,string}] +usage: surplus [-h] [-d] [-v] [-c {pluscode,localcode,latlong,sharetext}] + [-u USER_AGENT] [query ...] Google Maps Plus Code to iOS Shortcuts-like shareable text positional arguments: - query full-length Plus Code (6PH58QMF+FX), - shortened Plus Code/'local code' (8QMF+FX Singapore), - latlong (1.3336875, 103.7749375), - string query (e.g., 'Wisma Atria'), - or '-' to read from stdin + query full-length Plus Code (6PH58QMF+FX), shortened + Plus Code/'local code' (8QMF+FX Singapore), + latlong (1.3336875, 103.7749375), string query + (e.g., 'Wisma Atria'), or '-' to read from stdin options: -h, --help show this help message and exit -d, --debug prints lat, long and reverser response dict to stderr -v, --version prints version information to stderr and exits - -c {pluscode,localcode,latlong,sharetext}, - --convert-to {pluscode,localcode,latlong,sharetext} + -c {pluscode,localcode,latlong,sharetext}, --convert-to {pluscode,localcode,latlong,sharetext} converts query a specific output type, defaults to 'sharetext' + -u USER_AGENT, --user-agent USER_AGENT + user agent string to use for geocoding service, + defaults to fingerprinted user agent string ``` ### example api usage @@ -189,7 +192,7 @@ and do the following: function, which by default is OpenStreetMap Nominatim. (_don't know what the above means? then you are using the default reverser._) - also look at the [what counts as "incorrect"](#what-counts-as-incorrect) section + also look at the ['what counts as "incorrect"'](#what-counts-as-incorrect) section before moving on. 2. include the erroneous query. @@ -263,12 +266,13 @@ of incorrect outputs. ```text $ s+ --debug 8QJF+RP Singapore -surplus version 2.1.0, debug mode +surplus version 2.1.0, debug mode (latest@future, Tue 05 Sep 2023 23:38:59 +0800) debug: parse_query: behaviour.query=['8QJF+RP', 'Singapore'] debug: _match_plus_code: portion_plus_code='8QJF+RP', portion_locality='Singapore' debug: cli: query=Result(value=LocalCodeQuery(code='8QJF+RP', locality='Singapore'), error=None) -debug: cli: latlong.get()=Latlong(latitude=1.3320625, longitude=103.7743125) -debug: cli: location={'amenity': 'Ngee Ann Polytechnic', 'house_number': '535', 'road': 'Clementi Road', 'suburb': 'Bukit Timah', 'city': 'Singapore', 'county': 'Northwest', 'ISO3166-2-lvl6': 'SG-03', 'postcode': '599489', 'country': 'Singapore', 'country_code': 'sg', 'raw': "{...}", 'latitude': '1.33318835', 'longitude': '103.77461234638255'} +debug: cli: latlong_result.get()=Latlong(latitude=1.3320625, longitude=103.7743125) +debug: cli: behaviour.user_agent='surplus/2.1.0-local (1fdbfa0b0cfb)' +debug: cli: location={...} debug: _generate_text: seen_names=['Ngee Ann Polytechnic', 'Clementi Road'] debug: _generate_text_line: [True] -> True -------- 'Ngee Ann Polytechnic' debug: _generate_text_line: [True] -> True -------- '535' @@ -329,7 +333,7 @@ variables this variable is displayed to show what query type [`parse_query()`](#def-parse_query) has recognised, and if there were any errors during query parsing -- **expression `latlong.get()=`** +- **expression `latlong_result.get()=`** (_only shown if the query is a plus code_) @@ -340,12 +344,12 @@ variables the response dictionary from the reverser function passed to [`surplus()`](#def-surplus) - for more information on the reverser function, see [`Behaviour`](#class-behaviour) and - [`default_reverser`](#def-default_reverser) + for more information on the reverser function, see + [`SurplusReverserProtocol`](#surplusreverserprotocol) - **variable `seen_names`** - a list of unique important names found in certain nominatim keys used in final output + a list of unique important names found in certain Nominatim keys used in final output lines 0-3 - **`_generate_text_line` seen name checks** @@ -524,7 +528,13 @@ line breakdown of shareable text output, accompanied by their Nominatim keys: - [types](#types) - [`Query`](#query) - [`ResultType`](#resulttype) + - [`SurplusGeocoderProtocol`](#surplusgeocoderprotocol) + - [`SurplusReverserProtocol`](#surplusreverserprotocol) - [`class Behaviour`](#class-behaviour) +- [`class SurplusDefaultGeocoding`](#class-surplusdefaultgeocoding) + - [`SurplusDefaultGeocoding.update_geocoding_functions()`](#surplusdefaultgeocodingupdate_geocoding_functions) + - [`SurplusDefaultGeocoding.geocoder()`](#surplusdefaultgeocodinggeocoder) + - [`SurplusDefaultGeocoding.reverser()`](#surplusdefaultgeocodingreverser) - [`class ConversionResultTypeEnum`](#class-conversionresulttypeenum) - [`class Result`](#class-result) - [`Result.__bool__()`](#result__bool__) @@ -547,8 +557,8 @@ line breakdown of shareable text output, accompanied by their Nominatim keys: - [`StringQuery.__str__()`](#stringquery__str__) - [`def surplus()`](#def-surplus) - [`def parse_query()`](#def-parse_query) -- [`def default_geocoder()`](#def-default_geocoder) -- [`def default_reverser()`](#def-default_reverser) +- [`def generate_fingerprinted_user_agent`](#def-generate_fingerprinted_user_agent) + - [details on the fingerprinted user agent](#details-on-the-fingerprinted-user-agent) ### constants @@ -557,30 +567,40 @@ line breakdown of shareable text output, accompanied by their Nominatim keys: a tuple of integers representing the version of surplus, in the format `[major, minor, patch]` -- `VERSION_SUFFIX: Final[str]` - `BUILD_BRANCH: Final[str]` - `BUILD_COMMIT: Final[str]` - `BUILD_DATETIME: Final[datetime]` +- `VERSION_SUFFIX: typing.Final[str]` + `BUILD_BRANCH: typing.Final[str]` + `BUILD_COMMIT: typing.Final[str]` + `BUILD_DATETIME: typing.Final[datetime]` string and a [datetime.datetime](https://docs.python.org/3/library/datetime.html) object containing version and build information, set by [releaser.py](releaser.py) -- `SHAREABLE_TEXT_LINE_0_KEYS: tuple[str, ...]` - `SHAREABLE_TEXT_LINE_1_KEYS: tuple[str, ...]` - `SHAREABLE_TEXT_LINE_2_KEYS: tuple[str, ...]` - `SHAREABLE_TEXT_LINE_3_KEYS: tuple[str, ...]` - `SHAREABLE_TEXT_LINE_4_KEYS: tuple[str, ...]` - `SHAREABLE_TEXT_LINE_5_KEYS: tuple[str, ...]` - `SHAREABLE_TEXT_LINE_6_KEYS: tuple[str, ...]` +- `CONNECTION_MAX_RETRIES: int = 9` + `CONNECTION_WAIT_SECONDS: int = 10` - a tuple of strings containing nominatim keys used in shareable text line 0-6 + defines if and how many times to retry a connection, alongside how many seconds to wait + in between tries, for Nominatim -- `SHAREABLE_TEXT_NAMES: tuple[str, ...]` + > [!NOTE] + > this constant only affects the default surplus Nominatim geocoding functions. custom + > functions do not read from this, unless deliberately programmed to do so + +- `SHAREABLE_TEXT_LINE_0_KEYS: typing.Final[tuple[str, ...]]` + `SHAREABLE_TEXT_LINE_1_KEYS: typing.Final[tuple[str, ...]]` + `SHAREABLE_TEXT_LINE_2_KEYS: typing.Final[tuple[str, ...]]` + `SHAREABLE_TEXT_LINE_3_KEYS: typing.Final[tuple[str, ...]]` + `SHAREABLE_TEXT_LINE_4_KEYS: typing.Final[tuple[str, ...]]` + `SHAREABLE_TEXT_LINE_5_KEYS: typing.Final[tuple[str, ...]]` + `SHAREABLE_TEXT_LINE_6_KEYS: typing.Final[tuple[str, ...]]` + + a tuple of strings containing Nominatim keys used in shareable text line 0-6 + +- `SHAREABLE_TEXT_NAMES: typing.Final[tuple[str, ...]]` - a tuple of strings containing nominatim keys used in shareable text line 0-2 and + a tuple of strings containing Nominatim keys used in shareable text line 0-2 and special keys in line 3 -- `EMPTY_LATLONG: Latlong` +- `EMPTY_LATLONG: typing.Final[Latlong]` a constant for an empty latlong coordinate, with latitude and longitude set to 0.0 ### exception classes @@ -618,6 +638,88 @@ ResultType = TypeVar("ResultType") [generic type](https://docs.python.org/3/library/typing.html#generics) used by [`Result`](#class-result) +#### `SurplusGeocoderProtocol` + +[typing_extensions.Protocol](https://mypy.readthedocs.io/en/stable/protocols.html#callback-protocols) +class for documentation and static type checking of surplus geocoder functions + +- **signature and conforming function signature** + + ```python + class SurplusGeocoderProtocol(Protocol): + def __call__(self, place: str) -> Latlong: + ... + ``` + + functions that conform to this protocol should have the following signature: + + ```python + def example(place: str) -> Latlong: ... + ``` + +- **information on conforming functions** + + function takes in a location name as a string, and returns a [Latlong](#class-latlong) + + function can and should be be + [`functools.lru_cache()`-wrapped](https://docs.python.org/3/library/functools.html#functools.lru_cache) + if the geocoding service asks for caching + + exceptions are handled by the caller + +#### `SurplusReverserProtocol` + +[typing_extensions.Protocol](https://mypy.readthedocs.io/en/stable/protocols.html#callback-protocols) +class for documentation and static type checking of surplus reverser functions + +- **signature and conforming function signature** + + ```python + class SurplusReverserProtocol(Protocol): + def __call__(self, latlong: Latlong, level: int = 18) -> dict[str, Any]: + ... + ``` + + functions that conform to this protocol should have the following signature: + + ```python + def example(self, latlong: Latlong, level: int = 18) -> dict[str, Any]: ... + ``` + +- **information on conforming functions** + + function takes in a [Latlong](#class-latlong) object and return a dictionary with [`SHAREABLE_TEXT_LINE_*_KEYS`](#constants) keys at the dictionaries' top-level. + keys are used to access address information. + + function should also take in an int representing the level of detail for the returned + address, 0-18 (country-level to building), inclusive. should default to 18. + + keys for latitude, longitude and an iso3166-2 (or closest equivalent) should also be + included at the dictionaries top level as the keys `latitude`, `longitude` and + `ISO3166-2` (non-case sensitive, or at least something starting with `ISO3166`) + respectively. + + ```python + { + 'ISO3166-2-lvl6': 'SG-03', + 'amenity': 'Ngee Ann Polytechnic', + ... + 'country': 'Singapore', + 'latitude': 1.33318835, + 'longitude': 103.77461234638255, + 'postcode': '599489', + 'raw': {...}, + } + ``` + + function can and should be + [`functools.lru_cache()`-wrapped](https://docs.python.org/3/library/functools.html#functools.lru_cache) + if the geocoding service asks for caching + + see the [playground notebook](/playground.ipynb) in repository root for detailed + sample output + exceptions are handled by the caller + ### `class Behaviour` [`typing.NamedTuple`](https://docs.python.org/3/library/typing.html#typing.NamedTuple) @@ -629,15 +731,13 @@ attributes original user-passed query string or a list of strings from splitting user-passed query string by spaces -- `geocoder: typing.Callable[[str], Latlong] = default_geocoder` - name string to location function, must take in a string and return a - [`Latlong`](#class-latlong), exceptions are handled by the caller +- `geocoder: SurplusGeocoderProtocol = default_geocoding.geocoder` + name string to location function, see + [`SurplusGeocoderProtocol`](#surplusgeocoderprotocol) for more information -- `reverser: Callable[[Latlong], dict[str, Any]] = default_reverser` - [`Latlong`](#class-latlong) object to dictionary function, must take in a string and return a - dict. keys found in SHAREABLE_TEXT_LINE_*_KEYS used to access address details are placed - top-level in the dict, exceptions are handled by the caller. - see the [playground notebook](playground.ipynb) for example output +- `reverser: SurplusReverserProtocol = default_geocoding.reverser` + Latlong object to address information dictionary function, see + [`SurplusReverserProtocol`](#surplusreverserprotocol) for more information - `stderr: typing.TextIO = sys.stderr` [TextIO-like object](https://docs.python.org/3/library/io.html#text-i-o) @@ -658,6 +758,87 @@ attributes - `convert_to_type: ConversionResultTypeEnum = ConversionResultTypeEnum.SHAREABLE_TEXT` what type to convert the query to +### `class SurplusDefaultGeocoding` + +> [!IMPORTANT] +> this has replaced the now deprecated default geocoding functions, `default_geocoder()` +> and `default_reverser()`, in surplus 2.1.0 and later. + +see [SurplusGeocoderProtocol](#surplusgeocoderprotocol) and +[SurplusReverserProtocol](#surplusreverserprotocol) for more information how to +implement a compliant custom geocoder functions. + +[`dataclasses.dataclass`](https://docs.python.org/3/library/dataclasses.html) providing +the default geocoding functionality for surplus, via +[OpenStreetMap Nominatim](https://nominatim.openstreetmap.org/) + +attributes + +- `user_agent: str = default_fingerprint` + pass in a custom user agent here, else it will be the default + [fingerprinted user agent](#details-on-the-fingerprinted-user-agent) + +example usage + +```python +from surplus import surplus, Behaviour, SurplusDefaultGeocoding + +geocoding = SurplusDefaultGeocoding("custom user agent") +geocoding.update_geocoding_functions() # not necessary but recommended + +behaviour = Behaviour( + ..., + geocoder=geocoding.geocoder, + reverser=geocoding.reverser +) + +result = surplus("query", behaviour=behaviour) + +... +``` + +methods + +- [`def update_geocoding_functions(self) -> None: ...`](#surplusdefaultgeocodingupdate_geocoding_functions) +- [`def geocoder(self, place: str) -> Latlong: ...`](#surplusdefaultgeocodinggeocoder) +- [`def reverser(self, latlong: Latlong, level: int = 18) -> dict[str, Any]: ...`](#surplusdefaultgeocodingreverser) + +#### `SurplusDefaultGeocoding.update_geocoding_functions()` + +re-initialise the geocoding functions with the current user agent, also generate a new +user agent if not set properly + +it is recommended to call this before using surplus as by default the geocoding functions +are uninitialised + +- signature + + ```python + def update_geocoding_functions(self) -> None: ... + ``` + +#### `SurplusDefaultGeocoding.geocoder()` + +> [!WARNING] +> this function is primarily given to be passed into a [`Behaviour`](#class-behaviour) +> object, and is not meant to be called directly. + +default geocoder for surplus + +see [SurplusGeocoderProtocol](#surplusgeocoderprotocol) for more information on surplus +geocoder functions + +#### `SurplusDefaultGeocoding.reverser()` + +> [!WARNING] +> this function is primarily given to be passed into a [`Behaviour`](#class-behaviour) +> object, and is not meant to be called directly. + +default reverser for surplus + +see [SurplusReverserProtocol](#surplusreverserprotocol) for more information on surplus +reverser functions + ### `class ConversionResultTypeEnum` [enum.Enum](https://docs.python.org/3/library/enum.html) @@ -808,15 +989,15 @@ methods - signature ```python - def to_lat_long_coord(self, geocoder: Callable[[str], Latlong]) -> Result[Latlong]: + def to_lat_long_coord(self, geocoder: SurplusGeocoderProtocol) -> Result[Latlong]: ... ``` - arguments - - `geocoder: typing.Callable[[str], Latlong]` - name string to location function, must take in a string and return a - [`Latlong`](#class-latlong), exceptions are handled by the caller + - `geocoder: SurplusGeocoderProtocol` + name string to location function, see + [SurplusGeocoderProtocol](#surplusgeocoderprotocol) for more information - returns [`Result`](#class-result)[`[Latlong]`](#class-latlong) @@ -860,15 +1041,15 @@ exclusive method that returns a full-length Plus Code as a string - signature ```python - def to_full_plus_code(self, geocoder: Callable[[str], Latlong]) -> Result[str]: + def to_full_plus_code(self, geocoder: SurplusGeocoderProtocol) -> Result[str]: ... ``` - arguments - - `geocoder: typing.Callable[[str], Latlong]` - name string to location function, must take in a string and return a - [`Latlong`](#class-latlong), exceptions are handled by the caller + - `geocoder: SurplusGeocoderProtocol` + name string to location function, see + [SurplusGeocoderProtocol](#surplusgeocoderprotocol) for more information - returns [`Result`](#class-result)`[str]` @@ -879,15 +1060,15 @@ method that returns a latitude-longitude coordinate pair - signature ```python - def to_lat_long_coord(self, geocoder: Callable[[str], Latlong]) -> Result[Latlong]: + def to_lat_long_coord(self, geocoder: SurplusGeocoderProtocol) -> Result[Latlong]: ... ``` - arguments - - `geocoder: typing.Callable[[str], Latlong]` - name string to location function, must take in a string and return a - [`Latlong`](#class-latlong), exceptions are handled by the caller + - `geocoder: SurplusGeocoderProtocol` + name string to location function, see + [SurplusGeocoderProtocol](#surplusgeocoderprotocol) for more information - returns [`Result`](#class-result)[`[Latlong]`](#class-latlong) @@ -924,15 +1105,15 @@ method that returns a latitude-longitude coordinate pair - signature ```python - def to_lat_long_coord(self, geocoder: Callable[[str], Latlong]) -> Result[Latlong]: + def to_lat_long_coord(self, geocoder: SurplusGeocoderProtocol) -> Result[Latlong]: ... ``` - arguments - - `geocoder: typing.Callable[[str], Latlong]` - name string to location function, must take in a string and return a - [`Latlong`](#class-latlong), exceptions are handled by the caller + - `geocoder: SurplusGeocoderProtocol` + name string to location function, see + [SurplusGeocoderProtocol](#surplusgeocoderprotocol) for more information - returns [`Result`](#class-result)[`[Latlong]`](#class-latlong) @@ -969,15 +1150,15 @@ method that returns a latitude-longitude coordinate pair - signature ```python - def to_lat_long_coord(self, geocoder: Callable[[str], Latlong]) -> Result[Latlong]: + def to_lat_long_coord(self, geocoder: SurplusGeocoderProtocol) -> Result[Latlong]: ... ``` - arguments - - `geocoder: typing.Callable[[str], Latlong]` - name string to location function, must take in a string and return a - [`Latlong`](#class-latlong), exceptions are handled by the caller + - `geocoder: SurplusGeocoderProtocol` + name string to location function, see + [SurplusGeocoderProtocol](#surplusgeocoderprotocol) for more information - returns [`Result`](#class-result)[`[Latlong]`](#class-latlong) @@ -1030,35 +1211,90 @@ function that parses a query string into a query object - returns [`Result`](#class-result)[`[Query]`](#query) -### `def default_geocoder()` +### `def generate_fingerprinted_user_agent()` -default geocoder for surplus, uses OpenStreetMap Nominatim - -> [!NOTE] -> function is not used by surplus and not directly by the user, but is exposed for -> convenience being [Behaviour](#class-behaviour) objects. -> pass in a custom function to [Behaviour](#class-behaviour) to override the default reverser. +function that attempts to return a unique user agent string. - signature - ```python - def default_geocoder(place: str) -> Latlong: - ``` +```python +def generate_fingerprinted_user_agent() -> Result[str]: +``` -### `def default_reverser()` +- returns [`Result[str]`](#class-result) -default reverser for surplus, uses OpenStreetMap Nominatim + this result will always have a valid value as erroneous results will have a + resulting value of `'surplus/ (generic-user)'` -> [!NOTE] -> function is not used by surplus and not directly by the user, but is exposed for -> convenience being [Behaviour](#class-behaviour) objects. -> pass in a custom function to [Behaviour](#class-behaviour) to override the default reverser. + valid results will have a value of `'surplus/ ()'`, where + the fingerprint hash is a 12 character hexadecimal string -- signature +#### details on the fingerprinted user agent - ```python - def default_reverser(latlong: Latlong) -> dict[str, Any]: - ``` +**why do this in the first place?** +if too many people use surplus at the same time, +Nominatim will start to think it's just one person being greedy. so to prevent this, +surplus will try to generate a unique user agent string for each user through +fingerprinting. + +at the time of writing, the pre-hashed fingerprint string is as follows: + +```python +unique_info: str = f"{version}-{system_info}-{hostname}-{mac_address}" +``` + +it contains the following, in order, alongside an example: + +1. `version` - the surplus version alongside a suffix, if any + + ```text + 2.1.0-local + ``` + +2. `system_info` - generic machine and operating system information + + ```text + Linux-6.5.0-locietta-WSL2-xanmod1-x86_64-with-glibc2.35 + ``` + +3. `hostname` - your computer's hostname + + ```text + mark + ``` + +4. `mac_address` - your computer's mac address + + ```text + A9:36:3C:98:79:33 + ``` + +after hashing, this string becomes a 12 character hexadecimal string, as shown below: + +```text +surplus/2.1.0-local (1fdbfa0b0cfb) + ^^^^^^^^^^ + this is the hashed result of unique_info +``` + +if at any time, the retrieval of ant of these four elements fail, surplus will just give +up and default to `'surplus/ (generic-user)'`. + +if any of this seems weird to you, that's fine. pass in a custom user agent flag to +surplus with `-u` or `--user-agent` to override the default user agent, or override the +default user agent in your own code by passing in a custom user agent string to +[`Behaviour`](#class-behaviour). + +```text +$ surplus --user_agent "a-shiny-custom-and-unique-user-agent" 77Q4+7X Austin, Texas, USA +... +``` + +```python +>>> from surplus import surplus, Behaviour +>>> surplus(..., Behaviour(user_agent="a-shiny-custom-and-unique-user-agent")) +... +``` ## licence @@ -1069,21 +1305,15 @@ python module docstring. however, direct dependencies of surplus are licensed under different, but still permissive and open-source licences. -```text -geopy 2.4.0 Python Geocoding Toolbox -└── geographiclib >=1.52,<3 -pluscodes 2022.1.3 Compute Plus Codes (Open Location Codes). -``` - - [geopy](https://pypi.org/project/geopy/): Python Geocoding Toolbox - MIT License + MIT Licence - [geographiclib](https://pypi.org/project/geographiclib/): The geodesic routines from GeographicLib - MIT License + MIT Licence - [pluscodes](https://pypi.org/project/pluscodes/): Compute Plus Codes (Open Location Codes) diff --git a/playground.ipynb b/playground.ipynb index 06156dd..79e6cd2 100644 --- a/playground.ipynb +++ b/playground.ipynb @@ -4,7 +4,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# surplus 2.0.0 playground notebook\n", + "# surplus 2.x.y playground notebook\n", "\n", "wrangling with environments for devbox users using codium/vs code:\n", "\n", @@ -42,13 +42,15 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "from surplus import PlusCodeQuery, LocalCodeQuery, LatlongQuery, StringQuery\n", "from surplus import Latlong, Result\n", - "from surplus import default_geocoder, default_reverser" + "from surplus import SurplusDefaultGeocoding\n", + "\n", + "geocoding = SurplusDefaultGeocoding()" ] }, { @@ -60,7 +62,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 3, "metadata": {}, "outputs": [ { @@ -78,9 +80,9 @@ "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mZeroDivisionError\u001b[0m Traceback (most recent call last)", - "\u001b[1;32m/home/m/works/surplus/playground.ipynb Cell 5\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 8\u001b[0m \u001b[39mprint\u001b[39m(\u001b[39m\"\u001b[39m\u001b[39m{}\u001b[39;00m\u001b[39m\\t\u001b[39;00m\u001b[39m{:<40}\u001b[39;00m\u001b[39m\\t\u001b[39;00m\u001b[39m{}\u001b[39;00m\u001b[39m\"\u001b[39m\u001b[39m.\u001b[39mformat(\u001b[39mbool\u001b[39m(nom_result), \u001b[39mrepr\u001b[39m(nom_result\u001b[39m.\u001b[39merror), nom_result\u001b[39m.\u001b[39mget()))\n\u001b[1;32m 9\u001b[0m \u001b[39mprint\u001b[39m(\n\u001b[1;32m 10\u001b[0m \u001b[39m\"\u001b[39m\u001b[39m{}\u001b[39;00m\u001b[39m\\t\u001b[39;00m\u001b[39m{:<40}\u001b[39;00m\u001b[39m\\t\u001b[39;00m\u001b[39m{}\u001b[39;00m\u001b[39m\"\u001b[39m\u001b[39m.\u001b[39mformat(\n\u001b[1;32m 11\u001b[0m \u001b[39mbool\u001b[39m(exc_result), \u001b[39mrepr\u001b[39m(exc_result\u001b[39m.\u001b[39merror), exc_result\u001b[39m.\u001b[39mcry(string\u001b[39m=\u001b[39m\u001b[39mTrue\u001b[39;00m)\n\u001b[1;32m 12\u001b[0m )\n\u001b[1;32m 13\u001b[0m )\n\u001b[0;32m---> 14\u001b[0m \u001b[39mprint\u001b[39m(\u001b[39m\"\u001b[39m\u001b[39m{}\u001b[39;00m\u001b[39m\\t\u001b[39;00m\u001b[39m{:<40}\u001b[39;00m\u001b[39m\\t\u001b[39;00m\u001b[39m{}\u001b[39;00m\u001b[39m\"\u001b[39m\u001b[39m.\u001b[39mformat(\u001b[39mbool\u001b[39m(exc_result), \u001b[39mrepr\u001b[39m(exc_result\u001b[39m.\u001b[39merror), exc_result\u001b[39m.\u001b[39;49mget()))\n", - "File \u001b[0;32m~/works/surplus/surplus.py:247\u001b[0m, in \u001b[0;36mResult.get\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 245\u001b[0m \u001b[39m\"\"\"method that returns self.value if Result is non-erroneous else raises error\"\"\"\u001b[39;00m\n\u001b[1;32m 246\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39misinstance\u001b[39m(\u001b[39mself\u001b[39m\u001b[39m.\u001b[39merror, \u001b[39mBaseException\u001b[39;00m):\n\u001b[0;32m--> 247\u001b[0m \u001b[39mraise\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39merror\n\u001b[1;32m 248\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mvalue\n", - "\u001b[1;32m/home/m/works/surplus/playground.ipynb Cell 5\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 1\u001b[0m nom_result \u001b[39m=\u001b[39m Result[\u001b[39mint\u001b[39m](\u001b[39m3\u001b[39m)\n\u001b[1;32m 3\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[0;32m----> 4\u001b[0m \u001b[39m1\u001b[39;49m \u001b[39m/\u001b[39;49m \u001b[39m0\u001b[39;49m\n\u001b[1;32m 5\u001b[0m \u001b[39mexcept\u001b[39;00m \u001b[39mException\u001b[39;00m \u001b[39mas\u001b[39;00m exc:\n\u001b[1;32m 6\u001b[0m exc_result \u001b[39m=\u001b[39m Result[\u001b[39mint\u001b[39m](\u001b[39m-\u001b[39m\u001b[39m1\u001b[39m, error\u001b[39m=\u001b[39mexc)\n", + "\u001b[1;32m/home/m/works/surplus/playground.ipynb Cell 5\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 8\u001b[0m \u001b[39mprint\u001b[39m(\u001b[39m\"\u001b[39m\u001b[39m{}\u001b[39;00m\u001b[39m\\t\u001b[39;00m\u001b[39m{:<40}\u001b[39;00m\u001b[39m\\t\u001b[39;00m\u001b[39m{}\u001b[39;00m\u001b[39m\"\u001b[39m\u001b[39m.\u001b[39mformat(\u001b[39mbool\u001b[39m(nom_result), \u001b[39mrepr\u001b[39m(nom_result\u001b[39m.\u001b[39merror), nom_result\u001b[39m.\u001b[39mget()))\n\u001b[1;32m 9\u001b[0m \u001b[39mprint\u001b[39m(\n\u001b[1;32m 10\u001b[0m \u001b[39m\"\u001b[39m\u001b[39m{}\u001b[39;00m\u001b[39m\\t\u001b[39;00m\u001b[39m{:<40}\u001b[39;00m\u001b[39m\\t\u001b[39;00m\u001b[39m{}\u001b[39;00m\u001b[39m\"\u001b[39m\u001b[39m.\u001b[39mformat(\n\u001b[1;32m 11\u001b[0m \u001b[39mbool\u001b[39m(exc_result), \u001b[39mrepr\u001b[39m(exc_result\u001b[39m.\u001b[39merror), exc_result\u001b[39m.\u001b[39mcry(string\u001b[39m=\u001b[39m\u001b[39mTrue\u001b[39;00m)\n\u001b[1;32m 12\u001b[0m )\n\u001b[1;32m 13\u001b[0m )\n\u001b[0;32m---> 14\u001b[0m \u001b[39mprint\u001b[39m(\u001b[39m\"\u001b[39m\u001b[39m{}\u001b[39;00m\u001b[39m\\t\u001b[39;00m\u001b[39m{:<40}\u001b[39;00m\u001b[39m\\t\u001b[39;00m\u001b[39m{}\u001b[39;00m\u001b[39m\"\u001b[39m\u001b[39m.\u001b[39mformat(\u001b[39mbool\u001b[39m(exc_result), \u001b[39mrepr\u001b[39m(exc_result\u001b[39m.\u001b[39merror), exc_result\u001b[39m.\u001b[39;49mget()))\n", + "File \u001b[0;32m~/works/surplus/surplus/surplus.py:270\u001b[0m, in \u001b[0;36mResult.get\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 268\u001b[0m \u001b[39m\"\"\"method that returns self.value if Result is non-erroneous else raises error\"\"\"\u001b[39;00m\n\u001b[1;32m 269\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39misinstance\u001b[39m(\u001b[39mself\u001b[39m\u001b[39m.\u001b[39merror, \u001b[39mBaseException\u001b[39;00m):\n\u001b[0;32m--> 270\u001b[0m \u001b[39mraise\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39merror\n\u001b[1;32m 271\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mvalue\n", + "\u001b[1;32m/home/m/works/surplus/playground.ipynb Cell 5\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 1\u001b[0m nom_result \u001b[39m=\u001b[39m Result[\u001b[39mint\u001b[39m](\u001b[39m3\u001b[39m)\n\u001b[1;32m 3\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[0;32m----> 4\u001b[0m \u001b[39m1\u001b[39;49m \u001b[39m/\u001b[39;49m \u001b[39m0\u001b[39;49m\n\u001b[1;32m 5\u001b[0m \u001b[39mexcept\u001b[39;00m \u001b[39mException\u001b[39;00m \u001b[39mas\u001b[39;00m exc:\n\u001b[1;32m 6\u001b[0m exc_result \u001b[39m=\u001b[39m Result[\u001b[39mint\u001b[39m](\u001b[39m-\u001b[39m\u001b[39m1\u001b[39m, error\u001b[39m=\u001b[39mexc)\n", "\u001b[0;31mZeroDivisionError\u001b[0m: division by zero" ] } @@ -111,27 +113,7 @@ }, { "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "Result(value=Latlong(latitude=1.3336875, longitude=103.7746875), error=None)" - ] - }, - "execution_count": 3, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "PlusCodeQuery(code=\"6PH58QMF+FV\").to_lat_long_coord(geocoder=default_geocoder)" - ] - }, - { - "cell_type": "code", - "execution_count": 4, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -146,16 +128,12 @@ } ], "source": [ - "plus_code = LocalCodeQuery(code=\"8QMF+FV\", locality=\"Singapore\").to_full_plus_code(\n", - " geocoder=default_geocoder\n", - ")\n", - "\n", - "PlusCodeQuery(code=plus_code.get()).to_lat_long_coord(geocoder=default_geocoder)" + "PlusCodeQuery(code=\"6PH58QMF+FV\").to_lat_long_coord(geocoder=geocoding.geocoder)" ] }, { "cell_type": "code", - "execution_count": 5, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -170,20 +148,22 @@ } ], "source": [ - "LocalCodeQuery(code=\"8QMF+FV\", locality=\"Singapore\").to_lat_long_coord(\n", - " geocoder=default_geocoder\n", - ")" + "plus_code = LocalCodeQuery(code=\"8QMF+FV\", locality=\"Singapore\").to_full_plus_code(\n", + " geocoder=geocoding.geocoder\n", + ")\n", + "\n", + "PlusCodeQuery(code=plus_code.get()).to_lat_long_coord(geocoder=geocoding.geocoder)" ] }, { "cell_type": "code", - "execution_count": 6, + "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "Result(value=Latlong(latitude=1.33318835, longitude=103.77461234638255), error=None)" + "Result(value=Latlong(latitude=1.3336875, longitude=103.7746875), error=None)" ] }, "execution_count": 6, @@ -192,14 +172,14 @@ } ], "source": [ - "LatlongQuery(\n", - " latlong=Latlong(latitude=1.33318835, longitude=103.77461234638255)\n", - ").to_lat_long_coord(geocoder=default_geocoder)" + "LocalCodeQuery(code=\"8QMF+FV\", locality=\"Singapore\").to_lat_long_coord(\n", + " geocoder=geocoding.geocoder\n", + ")" ] }, { "cell_type": "code", - "execution_count": 7, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -214,7 +194,29 @@ } ], "source": [ - "StringQuery(query=\"Ngee Ann Polytechnic\").to_lat_long_coord(geocoder=default_geocoder)" + "LatlongQuery(\n", + " latlong=Latlong(latitude=1.33318835, longitude=103.77461234638255)\n", + ").to_lat_long_coord(geocoder=geocoding.geocoder)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Result(value=Latlong(latitude=1.33318835, longitude=103.77461234638255), error=None)" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "StringQuery(query=\"Ngee Ann Polytechnic\").to_lat_long_coord(geocoder=geocoding.geocoder)" ] }, { @@ -282,7 +284,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -298,6 +300,7 @@ " 'house_number': '535',\n", " 'latitude': 1.33318835,\n", " 'longitude': 103.77461234638255,\n", + " 'neighbourhood': 'Ewart Park',\n", " 'postcode': '599489',\n", " 'raw': {'address': {'ISO3166-2-lvl6': 'SG-03',\n", " 'amenity': 'Ngee Ann Polytechnic',\n", @@ -306,6 +309,7 @@ " 'country_code': 'sg',\n", " 'county': 'Northwest',\n", " 'house_number': '535',\n", + " 'neighbourhood': 'Ewart Park',\n", " 'postcode': '599489',\n", " 'road': 'Clementi Road',\n", " 'suburb': 'Bukit Timah'},\n", @@ -315,8 +319,9 @@ " '103.7701481',\n", " '103.7783945'],\n", " 'class': 'amenity',\n", - " 'display_name': 'Ngee Ann Polytechnic, 535, Clementi Road, Bukit '\n", - " 'Timah, Singapore, Northwest, 599489, Singapore',\n", + " 'display_name': 'Ngee Ann Polytechnic, 535, Clementi Road, Ewart '\n", + " 'Park, Bukit Timah, Singapore, Northwest, 599489, '\n", + " 'Singapore',\n", " 'importance': 0.34662169301918117,\n", " 'lat': '1.33318835',\n", " 'licence': 'Data © OpenStreetMap contributors, ODbL 1.0. '\n", @@ -325,7 +330,7 @@ " 'name': 'Ngee Ann Polytechnic',\n", " 'osm_id': 2535118,\n", " 'osm_type': 'relation',\n", - " 'place_id': 297946059,\n", + " 'place_id': 250910125,\n", " 'place_rank': 30,\n", " 'type': 'university'},\n", " 'road': 'Clementi Road',\n", @@ -337,15 +342,240 @@ "import pprint\n", "\n", "latlong = LocalCodeQuery(code=\"8QMF+FV\", locality=\"Singapore\").to_lat_long_coord(\n", - " default_geocoder\n", + " geocoder=geocoding.geocoder\n", ")\n", "if not latlong:\n", " latlong.cry()\n", "\n", "else:\n", - " location = default_reverser(latlong.get())\n", + " location = geocoding.reverser(latlong.get())\n", " pprint.pprint(location)" ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 2.1.0: the adventure of shortening global/full Plus Codes" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### testing rate-limited default geocoding functions" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "1.33318835, 103.77461234638255\n", + "{'amenity': 'Ngee Ann Polytechnic', 'house_number': '535', 'road': 'Clementi Road', 'neighbourhood': 'Ewart Park', 'suburb': 'Bukit Timah', 'city': 'Singapore', 'county': 'Northwest', 'ISO3166-2-lvl6': 'SG-03', 'postcode': '599489', 'country': 'Singapore', 'country_code': 'sg', 'raw': {'place_id': 250910125, 'licence': 'Data © OpenStreetMap contributors, ODbL 1.0. http://osm.org/copyright', 'osm_type': 'relation', 'osm_id': 2535118, 'lat': '1.33318835', 'lon': '103.77461234638255', 'class': 'amenity', 'type': 'university', 'place_rank': 30, 'importance': 0.34662169301918117, 'addresstype': 'amenity', 'name': 'Ngee Ann Polytechnic', 'display_name': 'Ngee Ann Polytechnic, 535, Clementi Road, Ewart Park, Bukit Timah, Singapore, Northwest, 599489, Singapore', 'address': {'amenity': 'Ngee Ann Polytechnic', 'house_number': '535', 'road': 'Clementi Road', 'neighbourhood': 'Ewart Park', 'suburb': 'Bukit Timah', 'city': 'Singapore', 'county': 'Northwest', 'ISO3166-2-lvl6': 'SG-03', 'postcode': '599489', 'country': 'Singapore', 'country_code': 'sg'}, 'boundingbox': ['1.3289692', '1.3372184', '103.7701481', '103.7783945']}, 'latitude': 1.33318835, 'longitude': 103.77461234638255}\n" + ] + } + ], + "source": [ + "from surplus import SurplusGeocoderProtocol, SurplusReverserProtocol\n", + "\n", + "\n", + "test_geocoding = SurplusDefaultGeocoding(user_agent=\"surplus/playground\")\n", + "\n", + "print(location := test_geocoding.geocoder(\"Ngee Ann Polytechnic\"))\n", + "\n", + "print(reversed := test_geocoding.reverser(f\"{location.latitude}, {location.longitude}\"))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### loop for less information until a local code is made" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# TODO\n", + "\n", + "test1 = LocalCodeQuery(\"9R3J+R9\", \"Singapore\")\n", + "test2 = LocalCodeQuery(\"G227+XF\", \"St Lucia, Queensland, Australia\")\n", + "\n", + "level = 13" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'suburb': 'Bishan',\n", + " 'city': 'Singapore',\n", + " 'county': 'Central',\n", + " 'ISO3166-2-lvl6': 'SG-01',\n", + " 'country': 'Singapore',\n", + " 'country_code': 'sg',\n", + " 'raw': {'place_id': 251115282,\n", + " 'licence': 'Data © OpenStreetMap contributors, ODbL 1.0. http://osm.org/copyright',\n", + " 'osm_type': 'way',\n", + " 'osm_id': 795946716,\n", + " 'lat': '1.3519117',\n", + " 'lon': '103.8489708',\n", + " 'class': 'place',\n", + " 'type': 'suburb',\n", + " 'place_rank': 19,\n", + " 'importance': 0.39184907371668787,\n", + " 'addresstype': 'suburb',\n", + " 'name': 'Bishan',\n", + " 'display_name': 'Bishan, Singapore, Central, Singapore',\n", + " 'address': {'suburb': 'Bishan',\n", + " 'city': 'Singapore',\n", + " 'county': 'Central',\n", + " 'ISO3166-2-lvl6': 'SG-01',\n", + " 'country': 'Singapore',\n", + " 'country_code': 'sg'},\n", + " 'boundingbox': ['1.3416846', '1.3679829', '103.8184512', '103.8604083']},\n", + " 'latitude': 1.3519117,\n", + " 'longitude': 103.8489708}" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "(\n", + " response := geocoding.reverser(\n", + " test1.to_lat_long_coord(geocoding.geocoder).get(), level=level\n", + " )\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'suburb': 'St Lucia',\n", + " 'city_district': 'St Lucia',\n", + " 'city': 'Brisbane City',\n", + " 'state': 'Queensland',\n", + " 'ISO3166-2-lvl4': 'AU-QLD',\n", + " 'postcode': '4072',\n", + " 'country': 'Australia',\n", + " 'country_code': 'au',\n", + " 'raw': {'place_id': 54477898,\n", + " 'licence': 'Data © OpenStreetMap contributors, ODbL 1.0. http://osm.org/copyright',\n", + " 'osm_type': 'node',\n", + " 'osm_id': 88800268,\n", + " 'lat': '-27.4987362',\n", + " 'lon': '153.0081642',\n", + " 'class': 'place',\n", + " 'type': 'suburb',\n", + " 'place_rank': 19,\n", + " 'importance': 0.27501,\n", + " 'addresstype': 'suburb',\n", + " 'name': 'St Lucia',\n", + " 'display_name': 'St Lucia, Brisbane City, Queensland, 4072, Australia',\n", + " 'address': {'suburb': 'St Lucia',\n", + " 'city_district': 'St Lucia',\n", + " 'city': 'Brisbane City',\n", + " 'state': 'Queensland',\n", + " 'ISO3166-2-lvl4': 'AU-QLD',\n", + " 'postcode': '4072',\n", + " 'country': 'Australia',\n", + " 'country_code': 'au'},\n", + " 'boundingbox': ['-27.5187362', '-27.4787362', '152.9881642', '153.0281642']},\n", + " 'latitude': -27.4987362,\n", + " 'longitude': 153.0081642}" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "(\n", + " response := geocoding.reverser(\n", + " test2.to_lat_long_coord(geocoding.geocoder).get(), level=level\n", + " )\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## machine fingerprinting attempt\n", + "\n", + "because of nominatim's acceptable usage policy \n", + "" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from hashlib import shake_256 as _hashlib_shake_256\n", + "from platform import platform as _platform_platform\n", + "from socket import gethostname as _socket_gethostname\n", + "from uuid import getnode as _uuid_getnode\n", + "from surplus import VERSION, VERSION_SUFFIX\n", + "\n", + "\n", + "def generate_fingerprinted_user_agent() -> Result[str]:\n", + " \"\"\"\n", + " function that attempts to return a unique user agent string.\n", + "\n", + " returns Result[str]\n", + " this result will always have a valid value as erroneous results will have a\n", + " resulting value of 'surplus//generic-user'\n", + " valid results will have a value of 'surplus//', where\n", + " fingerprint is a 12 character hexadecimal string\n", + " \"\"\"\n", + " version: str = \".\".join([str(v) for v in VERSION]) + VERSION_SUFFIX\n", + "\n", + " try:\n", + " system_info: str = _platform_platform()\n", + " hostname: str = _socket_gethostname()\n", + " mac_address: str = \":\".join(\n", + " [\n", + " \"{:02x}\".format((_uuid_getnode() >> elements) & 0xFF)\n", + " for elements in range(0, 2 * 6, 2)\n", + " ][::-1]\n", + " )\n", + " unique_info: str = f\"{version}-{system_info}-{hostname}-{mac_address}\"\n", + "\n", + " print(f\"{version=}\")\n", + " print(f\"{system_info=}\")\n", + " print(f\"{hostname=}\")\n", + " print(f\"{mac_address=}\")\n", + "\n", + " except Exception as exc:\n", + " return Result[str](f\"surplus/{version} (generic-user)\", error=exc)\n", + "\n", + " fingerprint: str = _hashlib_shake_256(unique_info.encode()).hexdigest(5)\n", + "\n", + " return Result[str](f\"surplus/{version} ({fingerprint})\")" + ] } ], "metadata": { diff --git a/poetry.lock b/poetry.lock index 9debda9..8099786 100644 --- a/poetry.lock +++ b/poetry.lock @@ -14,14 +14,14 @@ files = [ [[package]] name = "asttokens" -version = "2.3.0" +version = "2.4.0" description = "Annotate AST trees with source code positions" category = "dev" optional = false python-versions = "*" files = [ - {file = "asttokens-2.3.0-py2.py3-none-any.whl", hash = "sha256:bef1a51bc256d349e9f94e7e40e44b705ed1162f55294220dd561d24583d9877"}, - {file = "asttokens-2.3.0.tar.gz", hash = "sha256:2552a88626aaa7f0f299f871479fc755bd4e7c11e89078965e928fb7bb9a6afe"}, + {file = "asttokens-2.4.0-py2.py3-none-any.whl", hash = "sha256:cf8fc9e61a86461aa9fb161a14a0841a03c405fa829ac6b202670b3495d2ce69"}, + {file = "asttokens-2.4.0.tar.gz", hash = "sha256:2e0171b991b2c959acc6c49318049236844a5da1d65ba2672c4880c1c894834e"}, ] [package.dependencies] diff --git a/surplus/__init__.py b/surplus/__init__.py index b9d0162..6b26542 100644 --- a/surplus/__init__.py +++ b/surplus/__init__.py @@ -32,10 +32,14 @@ For more information, please refer to # surplus was and would've been a single-file module, but typing is in the way :( # https://github.com/python/typing/issues/1333 +from .surplus import default_geocoder # deprecated, emulation function +from .surplus import default_reverser # deprecated, emulation function from .surplus import ( BUILD_BRANCH, BUILD_COMMIT, BUILD_DATETIME, + CONNECTION_MAX_RETRIES, + CONNECTION_WAIT_SECONDS, EMPTY_LATLONG, SHAREABLE_TEXT_LINE_0_KEYS, SHAREABLE_TEXT_LINE_1_KEYS, @@ -45,7 +49,6 @@ from .surplus import ( SHAREABLE_TEXT_LINE_5_KEYS, SHAREABLE_TEXT_LINE_6_KEYS, SHAREABLE_TEXT_NAMES, - USER_AGENT, VERSION, VERSION_SUFFIX, Behaviour, @@ -60,13 +63,15 @@ from .surplus import ( PlusCodeNotFoundError, PlusCodeQuery, Query, + Result, ResultType, StringQuery, + SurplusDefaultGeocoding, SurplusException, - UnavailableFeatureError, + SurplusGeocoderProtocol, + SurplusReverserProtocol, cli, - default_geocoder, - default_reverser, + generate_fingerprinted_user_agent, handle_args, parse_query, surplus, diff --git a/surplus/surplus.py b/surplus/surplus.py index 3295275..4d57c62 100644 --- a/surplus/surplus.py +++ b/surplus/surplus.py @@ -31,8 +31,13 @@ For more information, please refer to from argparse import ArgumentParser from collections import OrderedDict +from dataclasses import dataclass from datetime import datetime, timedelta, timezone from enum import Enum +from functools import lru_cache +from hashlib import shake_256 +from platform import platform +from socket import gethostname from sys import stderr, stdin, stdout from typing import ( Any, @@ -45,12 +50,17 @@ from typing import ( TypeAlias, TypeVar, ) +from uuid import getnode from geopy import Location as _geopy_Location # type: ignore +from geopy.extra.rate_limiter import RateLimiter as _geopy_RateLimiter # type: ignore from geopy.geocoders import Nominatim as _geopy_Nominatim # type: ignore +from pluscodes import Area as _PlusCode_Area # type: ignore from pluscodes import PlusCode as _PlusCode # type: ignore +from pluscodes import decode as _PlusCode_decode # type: ignore from pluscodes import encode as _PlusCode_encode # type: ignore from pluscodes.validator import Validator as _PlusCode_Validator # type: ignore +from typing_extensions import Protocol from pluscodes.openlocationcode import ( # type: ignore # isort: skip recoverNearest as _PlusCode_recoverNearest, @@ -63,7 +73,8 @@ VERSION_SUFFIX: Final[str] = "-local" BUILD_BRANCH: Final[str] = "future" BUILD_COMMIT: Final[str] = "latest" BUILD_DATETIME: Final[datetime] = datetime.now(timezone(timedelta(hours=8))) # using SGT -USER_AGENT: Final[str] = "surplus" +CONNECTION_MAX_RETRIES: int = 9 +CONNECTION_WAIT_SECONDS: int = 10 SHAREABLE_TEXT_LINE_0_KEYS: Final[tuple[str, ...]] = ( "emergency", "historic", @@ -155,10 +166,6 @@ class EmptyQueryError(SurplusException): ... -class UnavailableFeatureError(SurplusException): - ... - - # data structures @@ -290,6 +297,66 @@ class Latlong(NamedTuple): EMPTY_LATLONG: Final[Latlong] = Latlong(latitude=0.0, longitude=0.0) +class SurplusGeocoderProtocol(Protocol): + """ + typing_extensions.Protocol class for documentation and static type checking of + surplus reverser functions + + (place: str) -> Latlong + + name string to location function. must take in a string and return a Latlong. + + function can be functools.lru_cache()-wrapped if the geocoding service asks for + caching + + exceptions are handled by the caller + """ + + def __call__(self, place: str) -> Latlong: + ... + + +class SurplusReverserProtocol(Protocol): + """ + typing_extensions.Protocol class for documentation and static type checking of + surplus reverser functions + + (latlong: Latlong, level: int = 18) -> dict[str, Any]: + + Latlong object to address information dictionary function. must take in a string and + return a dict with SHAREABLE_TEXT_LINE_*_KEYS keys at the dictionaries' top-level. + keys are used to access address information. + + function should also take in a int representing the level of detail for the + returned address, 0-18 (country-level to building), inclusive. + + keys for latitude, longitude and an iso3166-2 (or closest equivalent) should also be + included at the dictionaries top level as the keys `latitude`, `longitude` and + `ISO3166-2` (non-case sensitive, or at least something starting with `ISO3166`) + respectively. + + { + 'ISO3166-2-lvl6': 'SG-03', + 'amenity': 'Ngee Ann Polytechnic', + ... + 'country': 'Singapore', + 'latitude': 1.33318835, + 'longitude': 103.77461234638255, + 'postcode': '599489', + 'raw': {...}, + } + + function can be functools.lru_cache()-wrapped if the geocoding service asks for + caching + + exceptions are handled by the caller, + see the playground notebook in repository root for sample output + """ + + def __call__(self, latlong: Latlong, level: int = 18) -> dict[str, Any]: + ... + + class PlusCodeQuery(NamedTuple): """ typing.NamedTuple representing a full-length Plus Code (e.g., 6PH58QMF+FX) @@ -304,14 +371,14 @@ class PlusCodeQuery(NamedTuple): code: str - def to_lat_long_coord(self, geocoder: Callable[[str], Latlong]) -> Result[Latlong]: + def to_lat_long_coord(self, geocoder: SurplusGeocoderProtocol) -> Result[Latlong]: """ method that returns a latitude-longitude coordinate pair arguments - geocoder: typing.Callable[[str], Latlong] - name string to location function, must take in a string and return a - Latlong, exceptions are handled by the caller + geocoder: SurplusGeocoderProtocol + name string to location function, see SurplusGeocoderProtocol docstring + for more information returns Result[Latlong] """ @@ -363,14 +430,14 @@ class LocalCodeQuery(NamedTuple): code: str locality: str - def to_full_plus_code(self, geocoder: Callable[[str], Latlong]) -> Result[str]: + def to_full_plus_code(self, geocoder: SurplusGeocoderProtocol) -> Result[str]: """ exclusive method that returns a full-length Plus Code as a string arguments - geocoder: typing.Callable[[str], Latlong] - name string to location function, must take in a string and return a - Latlong, exceptions are handled by the caller + geocoder: SurplusGeocoderProtocol + name string to location function, see SurplusGeocoderProtocol docstring + for more information returns Result[str] """ @@ -389,14 +456,14 @@ class LocalCodeQuery(NamedTuple): except Exception as exc: return Result[str]("", error=exc) - def to_lat_long_coord(self, geocoder: Callable[[str], Latlong]) -> Result[Latlong]: + def to_lat_long_coord(self, geocoder: SurplusGeocoderProtocol) -> Result[Latlong]: """ method that returns a latitude-longitude coordinate pair arguments - geocoder: typing.Callable[[str], Latlong] - name string to location function, must take in a string and return a - Latlong, exceptions are handled by the caller + geocoder: SurplusGeocoderProtocol + name string to location function, see SurplusGeocoderProtocol docstring + for more information returns Result[Latlong] """ @@ -431,14 +498,14 @@ class LatlongQuery(NamedTuple): latlong: Latlong - def to_lat_long_coord(self, geocoder: Callable[[str], Latlong]) -> Result[Latlong]: + def to_lat_long_coord(self, geocoder: SurplusGeocoderProtocol) -> Result[Latlong]: """ method that returns a latitude-longitude coordinate pair arguments - geocoder: typing.Callable[[str], Latlong] - name string to location function, must take in a string and return a - Latlong, exceptions are handled by the caller + geocoder: SurplusGeocoderProtocol + name string to location function, see SurplusGeocoderProtocol docstring + for more information returns Result[Latlong] """ @@ -464,14 +531,14 @@ class StringQuery(NamedTuple): query: str - def to_lat_long_coord(self, geocoder: Callable[[str], Latlong]) -> Result[Latlong]: + def to_lat_long_coord(self, geocoder: SurplusGeocoderProtocol) -> Result[Latlong]: """ method that returns a latitude-longitude coordinate pair arguments - geocoder: typing.Callable[[str], Latlong] - name string to location function, must take in a string and return a - Latlong, exceptions are handled by the caller + geocoder: SurplusGeocoderProtocol + name string to location function, see SurplusGeocoderProtocol docstring + for more information returns Result[Latlong] """ @@ -490,43 +557,190 @@ class StringQuery(NamedTuple): Query: TypeAlias = PlusCodeQuery | LocalCodeQuery | LatlongQuery | StringQuery -def default_geocoder(place: str) -> Latlong: - """default geocoder for surplus, uses OpenStreetMap Nominatim""" +def generate_fingerprinted_user_agent() -> Result[str]: + """ + function that attempts to return a unique user agent string. - location: _geopy_Location | None = _geopy_Nominatim(user_agent=USER_AGENT).geocode( - place - ) + returns Result[str] + this result will always have a valid value as erroneous results will have a + resulting value of 'surplus/ (generic-user)' - if location is None: - raise NoSuitableLocationError( - f"No suitable location could be geolocated from '{place}'" + valid results will have a value of 'surplus/ ()', + where is a 12 character hexadecimal string + """ + version: str = ".".join([str(v) for v in VERSION]) + VERSION_SUFFIX + + try: + system_info: str = platform() + hostname: str = gethostname() + mac_address: str = ":".join( + [ + "{:02x}".format((getnode() >> elements) & 0xFF) + for elements in range(0, 2 * 6, 2) + ][::-1] + ) + unique_info: str = f"{version}-{system_info}-{hostname}-{mac_address}" + + except Exception as exc: + return Result[str](f"surplus/{version} (generic-user)", error=exc) + + fingerprint: str = shake_256(unique_info.encode()).hexdigest(5) + + return Result[str](f"surplus/{version} ({fingerprint})") + + +default_fingerprint: Final[str] = generate_fingerprinted_user_agent().value + + +@dataclass +class SurplusDefaultGeocoding: + """ + dataclass providing the default geocoding functionality for surplus, via + OpenStreetMap Nominatim + + attributes + user_agent: str = default_fingerprint + pass in a custom user agent here, else it will be the default fingerprinted + user agent + + usage + geocoding = SurplusDefaultGeocoding(behaviour.user_agent) + geocoding.update_geocoding_functions() + ... + Behaviour( + ..., + geocoder=geocoding.geocoder, + reverser=geocoding.reverser + ) + """ + + user_agent: str = default_fingerprint + _ratelimited_raw_geocoder: Callable | None = None + _ratelimited_raw_reverser: Callable | None = None + _first_update: bool = False + + def update_geocoding_functions(self) -> None: + """ + re-initialise the geocoding functions with the current user agent, also generate + a new user agent if not set properly + + recommended to call this before using surplus as by default the geocoding + functions are uninitialised + """ + + if not isinstance(self.user_agent, str): + self.user_agent: str = generate_fingerprinted_user_agent().value + + nominatim = _geopy_Nominatim(user_agent=self.user_agent) + + # this is + + self._ratelimited_raw_geocoder: Callable = lru_cache( + _geopy_RateLimiter( + nominatim.geocode, + max_retries=CONNECTION_MAX_RETRIES, + error_wait_seconds=CONNECTION_WAIT_SECONDS, + ) ) - return Latlong( - latitude=location.latitude, - longitude=location.longitude, + self._ratelimited_raw_reverser: Callable = lru_cache( + _geopy_RateLimiter( + nominatim.reverse, + max_retries=CONNECTION_MAX_RETRIES, + error_wait_seconds=CONNECTION_WAIT_SECONDS, + ) + ) + + self._first_update = True + + def geocoder(self, place: str) -> Latlong: + """ + default geocoder for surplus, uses OpenStreetMap Nominatim + + see SurplusGeocoderProtocol for more information on surplus geocoder functions + """ + + if not callable(self._ratelimited_raw_geocoder) or (self._first_update is False): + self.update_geocoding_functions() + + # https://github.com/python/mypy/issues/12155 + assert callable(self._ratelimited_raw_geocoder) + + location: _geopy_Location | None = self._ratelimited_raw_geocoder(place) + + if location is None: + raise NoSuitableLocationError( + f"No suitable location could be geolocated from '{place}'" + ) + + return Latlong( + latitude=location.latitude, + longitude=location.longitude, + ) + + def reverser(self, latlong: Latlong, level: int = 18) -> dict[str, Any]: + """ + default reverser for surplus, uses OpenStreetMap Nominatim + + arguments + latlong: Latlong + level: int = 0 + level of detail for the returned address, 0-18 (country-building) inclusive + + see SurplusReverserProtocol for more information on surplus reverser functions + """ + + if not callable(self._ratelimited_raw_reverser) or (self._first_update is False): + self.update_geocoding_functions() + + # https://github.com/python/mypy/issues/12155 + assert callable(self._ratelimited_raw_reverser) + + location: _geopy_Location | None = self._ratelimited_raw_reverser( + str(latlong), zoom=level + ) + + if location is None: + raise NoSuitableLocationError(f"could not reverse '{str(latlong)}'") + + location_dict: dict[str, Any] = {} + + for key in (address := location.raw.get("address", {})): + location_dict[key] = address.get(key, "") + + location_dict["raw"] = location.raw + location_dict["latitude"] = location.latitude + location_dict["longitude"] = location.longitude + + return location_dict + + +default_geocoding: Final[SurplusDefaultGeocoding] = SurplusDefaultGeocoding( + default_fingerprint +) +default_geocoding.update_geocoding_functions() + + +def default_geocoder(place: str) -> Latlong: + """(deprecated) geocoder for surplus, uses OpenStreetMap Nominatim""" + print( + "warning: default_geocoder is deprecated. " + "this is a emulation function that will use a fingerprinted user agent.", + file=stderr, ) + return default_geocoding.geocoder(place=place) -def default_reverser(latlong: Latlong) -> dict[str, Any]: - """default reverser for surplus, uses OpenStreetMap Nominatim""" - location: _geopy_Location | None = _geopy_Nominatim(user_agent=USER_AGENT).reverse( - str(latlong) +def default_reverser(latlong: Latlong, level: int = 18) -> dict[str, Any]: + """ + (deprecated) reverser for surplus, uses OpenStreetMap Nominatim + """ + print( + "warning: default_reverser is deprecated. " + "this is a emulation function that will use a fingerprinted user agent.", + file=stderr, ) - - if location is None: - raise NoSuitableLocationError(f"could not reverse '{str(latlong)}'") - - location_dict: dict[str, Any] = {} - - for key in (address := location.raw.get("address", {})): - location_dict[key] = address.get(key, "") - - location_dict["raw"] = location.raw - location_dict["latitude"] = location.latitude - location_dict["longitude"] = location.longitude - - return location_dict + return default_geocoding.reverser(latlong=latlong, level=level) class Behaviour(NamedTuple): @@ -537,14 +751,12 @@ class Behaviour(NamedTuple): query: str | list[str] = "" original user-passed query string or a list of strings from splitting user-passed query string by spaces - geocoder: Callable[[str], Latlong] = default_geocoderi - name string to location function, must take in a string and return a Latlong, - exceptions are handled by the caller - reverser: Callable[[str], dict[str, Any]] = default_reverser - Latlong object to dictionary function, must take in a string and return a - dict. keys found in SHAREABLE_TEXT_LINE_*_KEYS used to access address details - are placed top-level in the dict, exceptions are handled by the caller. - see the playground notebook for example output + geocoder: SurplusGeocoderProtocol = default_geocoding.geocoder + name string to location function, see SurplusGeocoderProtocol docstring for + for more information + reverser: SurplusReverserProtocol = default_geocoding.reverser + latlong to address information dict function, see SurplusReverserProtocol + docstring for more information stderr: TextIO = sys.stderr TextIO-like object representing a writeable file. defaults to sys.stderr stdout: TextIO = sys.stdout @@ -558,8 +770,8 @@ class Behaviour(NamedTuple): """ query: str | list[str] = "" - geocoder: Callable[[str], Latlong] = default_geocoder - reverser: Callable[[Latlong], dict[str, Any]] = default_reverser + geocoder: SurplusGeocoderProtocol = default_geocoding.geocoder + reverser: SurplusReverserProtocol = default_geocoding.reverser stderr: TextIO = stderr stdout: TextIO = stdout debug: bool = False @@ -694,7 +906,7 @@ def parse_query(behaviour: Behaviour) -> Result[Query]: split_query = behaviour.query if behaviour.debug: - print(f"debug: {split_query=}\ndebug: {original_query=}", behaviour.stderr) + print(f"debug: {split_query=}\ndebug: {original_query=}", file=behaviour.stderr) # not a plus/local code, try to match for latlong or string query match split_query: @@ -804,6 +1016,13 @@ def handle_args() -> Behaviour: f"'{Behaviour([]).convert_to_type.value}'" ), default=Behaviour([]).convert_to_type.value, + ), + parser.add_argument( + "-u", + "--user-agent", + type=str, + help=f"user agent string to use for geocoding service, defaults to fingerprinted user agent string", + default=default_fingerprint, ) args = parser.parse_args() @@ -821,10 +1040,12 @@ def handle_args() -> Behaviour: else: query = args.query + geocoding = SurplusDefaultGeocoding(args.user_agent) + behaviour = Behaviour( query=query, - geocoder=default_geocoder, - reverser=default_reverser, + geocoder=geocoding.geocoder, + reverser=geocoding.reverser, stderr=stderr, stdout=stdout, debug=args.debug, @@ -911,7 +1132,7 @@ def _generate_text( # get iso3166-2 before doing anything iso3166_2: str = "" for key in location: - if key.startswith("iso3166"): + if key.lower().startswith("iso3166"): iso3166_2 = location.get(key, "") # skeleton code to allow for changing keys based on iso3166-2 code @@ -1004,17 +1225,19 @@ def surplus(query: Query | str, behaviour: Behaviour) -> Result[str]: match behaviour.convert_to_type: case ConversionResultTypeEnum.SHAREABLE_TEXT: # get latlong and handle result - latlong = query.to_lat_long_coord(geocoder=behaviour.geocoder) + latlong_result: Result[Latlong] = query.to_lat_long_coord( + geocoder=behaviour.geocoder + ) - if not latlong: - return Result[str]("", error=latlong.error) + if not latlong_result: + return Result[str]("", error=latlong_result.error) if behaviour.debug: - print(f"debug: cli: {latlong.get()=}", file=behaviour.stderr) + print(f"debug: cli: {latlong_result.get()=}", file=behaviour.stderr) # reverse location and handle result try: - location: dict[str, Any] = behaviour.reverser(latlong.get()) + location: dict[str, Any] = behaviour.reverser(latlong_result.get()) except Exception as exc: return Result[str]("", error=exc) @@ -1041,21 +1264,23 @@ def surplus(query: Query | str, behaviour: Behaviour) -> Result[str]: return Result[str](text) case ConversionResultTypeEnum.PLUS_CODE: + # if its already a plus code, just return it if isinstance(query, PlusCodeQuery): return Result[str](str(query)) # get latlong and handle result - latlong = query.to_lat_long_coord(geocoder=behaviour.geocoder) + latlong_query = query.to_lat_long_coord(geocoder=behaviour.geocoder) - if not latlong: - return Result[str]("", error=latlong.error) + if not latlong_query: + return Result[str]("", error=latlong_query.error) if behaviour.debug: - print(f"debug: cli: {latlong.get()=}", file=behaviour.stderr) + print(f"debug: cli: {latlong_query.get()=}", file=behaviour.stderr) + # perform operation try: pluscode: str = _PlusCode_encode( - lat=latlong.get().latitude, lon=latlong.get().longitude + lat=latlong_query.get().latitude, lon=latlong_query.get().longitude ) except Exception as exc: @@ -1064,10 +1289,41 @@ def surplus(query: Query | str, behaviour: Behaviour) -> Result[str]: return Result[str](pluscode) case ConversionResultTypeEnum.LOCAL_CODE: + # if its already a local code, just return it + if isinstance(query, LocalCodeQuery): + return Result[str](str(query)) + + latlong: Latlong = EMPTY_LATLONG + + # if its a plus code, convert to latlong first + if isinstance(query, PlusCodeQuery): + pluscode_latlong_result = PlusCodeQuery.to_lat_long_coord( + query, geocoder=behaviour.geocoder + ) + + if not pluscode_latlong_result: + return Result[str]("", error=pluscode_latlong_result.error) + + latlong = pluscode_latlong_result.get() + + # get latlong and handle result + latlong_result = query.to_lat_long_coord(geocoder=behaviour.geocoder) + + if not latlong_result: + return Result[str]("", error=latlong_result.error) + + if behaviour.debug: + print(f"debug: cli: {latlong_result.get()=}", file=behaviour.stderr) + + latlong = latlong_result.get() + + # perform operation # TODO: https://github.com/markjoshwel/surplus/issues/18 + # https://github.com/google/open-location-code/wiki/Guidance-for-shortening-codes + return Result[str]( text, - error=UnavailableFeatureError( + error=NotImplementedError( "converting to Plus Code is not implemented yet" ), ) @@ -1078,15 +1334,16 @@ def surplus(query: Query | str, behaviour: Behaviour) -> Result[str]: return Result[str](str(query)) # get latlong and handle result - latlong = query.to_lat_long_coord(geocoder=behaviour.geocoder) + latlong_result = query.to_lat_long_coord(geocoder=behaviour.geocoder) - if not latlong: - return Result[str]("", error=latlong.error) + if not latlong_result: + return Result[str]("", error=latlong_result.error) if behaviour.debug: - print(f"debug: cli: {latlong.get()=}", file=behaviour.stderr) + print(f"debug: cli: {latlong_result.get()=}", file=behaviour.stderr) - return Result[str](str(latlong.get())) + # perform operation + return Result[str](str(latlong_result.get())) case _: return Result[str]( diff --git a/test.py b/test.py index fa8ac5e..d3dc37c 100644 --- a/test.py +++ b/test.py @@ -100,22 +100,26 @@ tests: list[ContinuityTest] = [ ), ContinuityTest( query="Ngee Ann Polytechnic, Singapore", - expected=( - "Ngee Ann Polytechnic\n" - "535 Clementi Road\n" - "Bukit Timah\n" - "599489\n" - "Northwest, Singapore" - ), + expected=[ + ( + "Ngee Ann Polytechnic\n" + "535 Clementi Road\n" + "Bukit Timah\n" + "599489\n" + "Northwest, Singapore" + ) + ], ), ContinuityTest( query="1.3521, 103.8198", - expected=( - "MacRitchie Nature Trail\n" - "Central Water Catchment\n" - "574325\n" - "Central, Singapore" - ), + expected=[ + ( + "MacRitchie Nature Trail\n" + "Central Water Catchment\n" + "574325\n" + "Central, Singapore" + ) + ], ), ] From d3ada0b386aac68cedc796d6706dbb767ce52754 Mon Sep 17 00:00:00 2001 From: Mark Joshwel Date: Wed, 6 Sep 2023 12:01:50 +0000 Subject: [PATCH 5/7] docs: fix typo, remove user_agent from sample output --- README.md | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/README.md b/README.md index ccc1a4f..9806724 100644 --- a/README.md +++ b/README.md @@ -271,7 +271,6 @@ debug: parse_query: behaviour.query=['8QJF+RP', 'Singapore'] debug: _match_plus_code: portion_plus_code='8QJF+RP', portion_locality='Singapore' debug: cli: query=Result(value=LocalCodeQuery(code='8QJF+RP', locality='Singapore'), error=None) debug: cli: latlong_result.get()=Latlong(latitude=1.3320625, longitude=103.7743125) -debug: cli: behaviour.user_agent='surplus/2.1.0-local (1fdbfa0b0cfb)' debug: cli: location={...} debug: _generate_text: seen_names=['Ngee Ann Polytechnic', 'Clementi Road'] debug: _generate_text_line: [True] -> True -------- 'Ngee Ann Polytechnic' @@ -1277,7 +1276,7 @@ surplus/2.1.0-local (1fdbfa0b0cfb) this is the hashed result of unique_info ``` -if at any time, the retrieval of ant of these four elements fail, surplus will just give +if at any time the retrieval of any of these four elements fail, surplus will just give up and default to `'surplus/ (generic-user)'`. if any of this seems weird to you, that's fine. pass in a custom user agent flag to From a9e26c89165418d16ec58772232ef08e7a6425f3 Mon Sep 17 00:00:00 2001 From: Mark Joshwel Date: Wed, 6 Sep 2023 17:39:53 +0000 Subject: [PATCH 6/7] s+: complete conversion to local code i hate GIS - geocoders now have to return a bounding box - new SHAREABLE_TEXT_LOCALITY constant, also exposed - _generate_text now does double duty for locality and sharetext generation --- playground.ipynb | 323 ++++++++++++++++++++++++----------- surplus/__init__.py | 1 + surplus/surplus.py | 400 ++++++++++++++++++++++++++++++++++---------- 3 files changed, 536 insertions(+), 188 deletions(-) diff --git a/playground.ipynb b/playground.ipynb index 79e6cd2..fa9a3f5 100644 --- a/playground.ipynb +++ b/playground.ipynb @@ -42,7 +42,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 1, "metadata": {}, "outputs": [], "source": [ @@ -356,166 +356,289 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## 2.1.0: the adventure of shortening global/full Plus Codes" + "## 2.1.0: adventures in of shortening global/full Plus Codes" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### testing rate-limited default geocoding functions" + "### testing rate-limited and cached default geocoding functions" ] }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "test_geocoding = SurplusDefaultGeocoding(user_agent=\"surplus/playground\")" + ] + }, + { + "cell_type": "code", + "execution_count": 2, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "1.33318835, 103.77461234638255\n", - "{'amenity': 'Ngee Ann Polytechnic', 'house_number': '535', 'road': 'Clementi Road', 'neighbourhood': 'Ewart Park', 'suburb': 'Bukit Timah', 'city': 'Singapore', 'county': 'Northwest', 'ISO3166-2-lvl6': 'SG-03', 'postcode': '599489', 'country': 'Singapore', 'country_code': 'sg', 'raw': {'place_id': 250910125, 'licence': 'Data © OpenStreetMap contributors, ODbL 1.0. http://osm.org/copyright', 'osm_type': 'relation', 'osm_id': 2535118, 'lat': '1.33318835', 'lon': '103.77461234638255', 'class': 'amenity', 'type': 'university', 'place_rank': 30, 'importance': 0.34662169301918117, 'addresstype': 'amenity', 'name': 'Ngee Ann Polytechnic', 'display_name': 'Ngee Ann Polytechnic, 535, Clementi Road, Ewart Park, Bukit Timah, Singapore, Northwest, 599489, Singapore', 'address': {'amenity': 'Ngee Ann Polytechnic', 'house_number': '535', 'road': 'Clementi Road', 'neighbourhood': 'Ewart Park', 'suburb': 'Bukit Timah', 'city': 'Singapore', 'county': 'Northwest', 'ISO3166-2-lvl6': 'SG-03', 'postcode': '599489', 'country': 'Singapore', 'country_code': 'sg'}, 'boundingbox': ['1.3289692', '1.3372184', '103.7701481', '103.7783945']}, 'latitude': 1.33318835, 'longitude': 103.77461234638255}\n" + "1\n", + "2\n", + "3\n", + "4\n", + "5\n", + "\n", + "1\n", + "2\n", + "3\n", + "4\n", + "5\n", + "\n", + "3.1107698050s\t->\t0.0000886890s\t\t(-3.1106811160002508s)\n" ] } ], "source": [ - "from surplus import SurplusGeocoderProtocol, SurplusReverserProtocol\n", + "from timeit import timeit\n", "\n", "\n", - "test_geocoding = SurplusDefaultGeocoding(user_agent=\"surplus/playground\")\n", + "test_stmt = \"\"\"\\\n", + "print(1)\n", + "test_geocoding.geocoder(\"Wisma Atria\") # instant\n", + "print(2)\n", + "test_geocoding.geocoder(\"Temasek Polytechnic\") # after 1 second\n", + "print(3)\n", + "location = test_geocoding.geocoder(\"Ngee Ann Polytechnic\") # after 1 second\n", + "print(4)\n", + "test_geocoding.reverser(f\"{location.latitude}, {location.longitude}\") # instant\n", + "print(5)\n", + "test_geocoding.reverser(f\"{location.latitude}, {location.longitude}\") # instant (cached)\n", + "print()\n", + "\"\"\"\n", "\n", - "print(location := test_geocoding.geocoder(\"Ngee Ann Polytechnic\"))\n", + "time_cold_call = timeit(test_stmt, globals=globals(), number=1) # expecting 3-4 seconds\n", + "time_2nd_call = timeit(test_stmt, globals=globals(), number=1) # should be instant\n", "\n", - "print(reversed := test_geocoding.reverser(f\"{location.latitude}, {location.longitude}\"))" + "print(\n", + " f\"{time_cold_call:.10f}s\\t->\\t{time_2nd_call:.10f}s\\t\\t({time_2nd_call - time_cold_call}s)\"\n", + ")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### loop for less information until a local code is made" + "### reversing the query latlong and using the address information to form a locality" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 3, "metadata": {}, "outputs": [], "source": [ - "# TODO\n", - "\n", - "test1 = LocalCodeQuery(\"9R3J+R9\", \"Singapore\")\n", - "test2 = LocalCodeQuery(\"G227+XF\", \"St Lucia, Queensland, Australia\")\n", - "\n", "level = 13" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 4, "metadata": {}, "outputs": [ { - "data": { - "text/plain": [ - "{'suburb': 'Bishan',\n", - " 'city': 'Singapore',\n", - " 'county': 'Central',\n", - " 'ISO3166-2-lvl6': 'SG-01',\n", - " 'country': 'Singapore',\n", - " 'country_code': 'sg',\n", - " 'raw': {'place_id': 251115282,\n", - " 'licence': 'Data © OpenStreetMap contributors, ODbL 1.0. http://osm.org/copyright',\n", - " 'osm_type': 'way',\n", - " 'osm_id': 795946716,\n", - " 'lat': '1.3519117',\n", - " 'lon': '103.8489708',\n", - " 'class': 'place',\n", - " 'type': 'suburb',\n", - " 'place_rank': 19,\n", - " 'importance': 0.39184907371668787,\n", - " 'addresstype': 'suburb',\n", - " 'name': 'Bishan',\n", - " 'display_name': 'Bishan, Singapore, Central, Singapore',\n", - " 'address': {'suburb': 'Bishan',\n", - " 'city': 'Singapore',\n", - " 'county': 'Central',\n", - " 'ISO3166-2-lvl6': 'SG-01',\n", - " 'country': 'Singapore',\n", - " 'country_code': 'sg'},\n", - " 'boundingbox': ['1.3416846', '1.3679829', '103.8184512', '103.8604083']},\n", - " 'latitude': 1.3519117,\n", - " 'longitude': 103.8489708}" - ] - }, - "execution_count": 12, - "metadata": {}, - "output_type": "execute_result" + "name": "stdout", + "output_type": "stream", + "text": [ + "St Lucia, St Lucia, Queensland, Australia\n", + "Austin, Travis County, Texas, United States\n" + ] } ], "source": [ "(\n", - " response := geocoding.reverser(\n", - " test1.to_lat_long_coord(geocoding.geocoder).get(), level=level\n", + " au_response := geocoding.reverser(\n", + " (\n", + " au_target := (\n", + " LocalCodeQuery(\n", + " \"G227+XF\", \"St Lucia, Queensland, Australia\"\n", + " ).to_lat_long_coord(geocoding.geocoder)\n", + " )\n", + " ).get(),\n", + " level=level,\n", " )\n", - ")" + ")\n", + "\n", + "au_locality = f\"{au_response['suburb']}, {au_response['city_district']}, {au_response['state']}, {au_response['country']}\"\n", + "print(au_locality)\n", + "\n", + "(\n", + " us_response := geocoding.reverser(\n", + " (\n", + " us_target := (\n", + " LocalCodeQuery(\"77Q4+7X\", \"Austin, Texas, USA\").to_lat_long_coord(\n", + " geocoding.geocoder\n", + " )\n", + " )\n", + " ).get(),\n", + " level=level,\n", + " )\n", + ")\n", + "\n", + "us_locality = f\"{us_response['city']}, {us_response['county']}, {us_response['state']}, {us_response['country']}\"\n", + "print(us_locality)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### getting boundary boxes" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 13, "metadata": {}, "outputs": [ { - "data": { - "text/plain": [ - "{'suburb': 'St Lucia',\n", - " 'city_district': 'St Lucia',\n", - " 'city': 'Brisbane City',\n", - " 'state': 'Queensland',\n", - " 'ISO3166-2-lvl4': 'AU-QLD',\n", - " 'postcode': '4072',\n", - " 'country': 'Australia',\n", - " 'country_code': 'au',\n", - " 'raw': {'place_id': 54477898,\n", - " 'licence': 'Data © OpenStreetMap contributors, ODbL 1.0. http://osm.org/copyright',\n", - " 'osm_type': 'node',\n", - " 'osm_id': 88800268,\n", - " 'lat': '-27.4987362',\n", - " 'lon': '153.0081642',\n", - " 'class': 'place',\n", - " 'type': 'suburb',\n", - " 'place_rank': 19,\n", - " 'importance': 0.27501,\n", - " 'addresstype': 'suburb',\n", - " 'name': 'St Lucia',\n", - " 'display_name': 'St Lucia, Brisbane City, Queensland, 4072, Australia',\n", - " 'address': {'suburb': 'St Lucia',\n", - " 'city_district': 'St Lucia',\n", - " 'city': 'Brisbane City',\n", - " 'state': 'Queensland',\n", - " 'ISO3166-2-lvl4': 'AU-QLD',\n", - " 'postcode': '4072',\n", - " 'country': 'Australia',\n", - " 'country_code': 'au'},\n", - " 'boundingbox': ['-27.5187362', '-27.4787362', '152.9881642', '153.0281642']},\n", - " 'latitude': -27.4987362,\n", - " 'longitude': 153.0081642}" - ] - }, - "execution_count": 13, - "metadata": {}, - "output_type": "execute_result" + "name": "stdout", + "output_type": "stream", + "text": [ + "{'addresstype': 'suburb',\n", + " 'boundingbox': ['-27.5187362', '-27.4787362', '152.9881642', '153.0281642'],\n", + " 'class': 'place',\n", + " 'display_name': 'St Lucia, Brisbane City, Queensland, 4072, Australia',\n", + " 'importance': 0.27501,\n", + " 'lat': '-27.4987362',\n", + " 'licence': 'Data © OpenStreetMap contributors, ODbL 1.0. '\n", + " 'http://osm.org/copyright',\n", + " 'lon': '153.0081642',\n", + " 'name': 'St Lucia',\n", + " 'osm_id': 88800268,\n", + " 'osm_type': 'node',\n", + " 'place_id': 54477898,\n", + " 'place_rank': 19,\n", + " 'type': 'suburb'}\n", + "\n", + "Latlong(latitude=-27.4987362, longitude=153.0081642, bounding_box=[-27.5187362, -27.4787362, 152.9881642, 153.0281642])\n" + ] } ], "source": [ - "(\n", - " response := geocoding.reverser(\n", - " test2.to_lat_long_coord(geocoding.geocoder).get(), level=level\n", + "from geopy.geocoders import Nominatim\n", + "from pprint import pprint\n", + "\n", + "target_query: Result[Latlong] = au_target\n", + "target_locality: str = au_locality\n", + "\n", + "raw_geocoding = Nominatim(user_agent=\"surplus/playground\")\n", + "latlong = raw_geocoding.geocode(target_locality)\n", + "pprint(latlong.raw)\n", + "print()\n", + "\n", + "# done: now implmented in surplus as surplus.Latlong.bounding_box\n", + "locality_latlong = geocoding.geocoder(target_locality)\n", + "pprint(locality_latlong)" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[-27.5187362, -27.4787362, 152.9881642, 153.0281642]\n", + "(True, True, True, True)\n", + "(True, True, True, True)\n" + ] + } + ], + "source": [ + "# based on \n", + "\n", + "target_latlong = target_query.get()\n", + "if locality_latlong.bounding_box is None:\n", + " ... # raise some error\n", + "\n", + "print(locality_latlong.bounding_box)\n", + "check1 = (\n", + " # The center point of the feature is within 0.4 degrees latitude and 0.4 degrees longitude\n", + " (\n", + " (target_latlong.latitude - 0.4)\n", + " <= locality_latlong.latitude\n", + " <= (target_latlong.latitude + 0.4)\n", + " ),\n", + " (\n", + " (target_latlong.longitude - 0.4)\n", + " <= locality_latlong.longitude\n", + " <= (target_latlong.longitude + 0.4)\n", + " ),\n", + " # The bounding box of the feature is less than 0.8 degrees high and wide.\n", + " abs(locality_latlong.bounding_box[0] - locality_latlong.bounding_box[1]) < 0.8,\n", + " abs(locality_latlong.bounding_box[2] - locality_latlong.bounding_box[3]) < 0.8,\n", + ")\n", + "\n", + "\n", + "check2 = (\n", + " # The center point of the feature is within 0.4 degrees latitude and 0.4 degrees longitude\n", + " (\n", + " (target_latlong.latitude - 8)\n", + " <= locality_latlong.latitude\n", + " <= (target_latlong.latitude + 8)\n", + " ),\n", + " (\n", + " (target_latlong.longitude - 8)\n", + " <= locality_latlong.longitude\n", + " <= (target_latlong.longitude + 8)\n", + " ),\n", + " # The bounding box of the feature is less than 0.8 degrees high and wide.\n", + " abs(locality_latlong.bounding_box[0] - locality_latlong.bounding_box[1]) < 16,\n", + " abs(locality_latlong.bounding_box[2] - locality_latlong.bounding_box[3]) < 16,\n", + ")\n", + "\n", + "print(check1)\n", + "print(check2)" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "G227+XF St Lucia, St Lucia, Queensland, Australia\n" + ] + } + ], + "source": [ + "from pluscodes import encode\n", + "\n", + "target_plus_code = encode(\n", + " lat=target_latlong.latitude, lon=target_latlong.longitude, code_length=10\n", + ")\n", + "portion_plus_code = \"\"\n", + "\n", + "if check1:\n", + " portion_plus_code = target_plus_code[4:]\n", + " print(portion_plus_code, target_locality)\n", + "\n", + "elif check2:\n", + " portion_plus_code = target_plus_code[2:]\n", + " print(portion_plus_code, target_locality)\n", + "\n", + "else:\n", + " print(\n", + " \"info: could not determine a suitable geographical feature to use as locality for shortening.\"\n", " )\n", - ")" + " print(plus_code)" ] }, { diff --git a/surplus/__init__.py b/surplus/__init__.py index 6b26542..18ae3e2 100644 --- a/surplus/__init__.py +++ b/surplus/__init__.py @@ -48,6 +48,7 @@ from .surplus import ( SHAREABLE_TEXT_LINE_4_KEYS, SHAREABLE_TEXT_LINE_5_KEYS, SHAREABLE_TEXT_LINE_6_KEYS, + SHAREABLE_TEXT_LOCALITY, SHAREABLE_TEXT_NAMES, VERSION, VERSION_SUFFIX, diff --git a/surplus/surplus.py b/surplus/surplus.py index 4d57c62..eff63e4 100644 --- a/surplus/surplus.py +++ b/surplus/surplus.py @@ -136,6 +136,13 @@ SHAREABLE_TEXT_NAMES: Final[tuple[str, ...]] = ( + SHAREABLE_TEXT_LINE_2_KEYS + ("house_name", "road") ) +SHAREABLE_TEXT_LOCALITY: dict[str, tuple[str, ...]] = { + "default": ("city_district", "district", "city", *SHAREABLE_TEXT_LINE_6_KEYS), + "SG": ("country",), +} + +# adjusts geocoder zoom level when geocoding latlong into an address +LOCALITY_GEOCODER_LEVEL: int = 13 # exceptions @@ -169,6 +176,19 @@ class EmptyQueryError(SurplusException): # data structures +class TextGenerationEnum(Enum): + """ + (internal use) enum representing what type of text to generate for _generate_text() + + values + SHAREABLE_TEXT: str = "sharetext" + LOCAL_CODE: str = "localcode" + """ + + SHAREABLE_TEXT: str = "sharetext" + LOCALITY_TEXT: str = "locality_text" + + class ConversionResultTypeEnum(Enum): """ enum representing what the result type of conversion should be @@ -273,11 +293,16 @@ class Result(NamedTuple, Generic[ResultType]): class Latlong(NamedTuple): """ - typing.NamedTuple representing a latitude-longitude coordinate pair + typing.NamedTuple representing a latitude-longitude coordinate pair and any extra + information arguments latitude: float longitude: float + bounding_box: tuple[float, float, float, float] | None = None + a four-tuple representing a bounding box, (lat1, lat2, lon1, lon2) or None + the user does not need to enter this. this attribute is only used for + shortening plus codes, and will be supplied by the geocoding service. methods def __str__(self) -> str: ... @@ -285,6 +310,7 @@ class Latlong(NamedTuple): latitude: float longitude: float + bounding_box: tuple[float, float, float, float] | None = None def __str__(self) -> str: """ @@ -306,8 +332,11 @@ class SurplusGeocoderProtocol(Protocol): name string to location function. must take in a string and return a Latlong. - function can be functools.lru_cache()-wrapped if the geocoding service asks for - caching + **the function returned MUST supply a `bounding_box` attribute to the to-be-returned + [Latlong](#class-latlong).** the bounding box is used when surplus shortens Plus Codes. + + function can and should be at minimum functools.lru_cache()-wrapped if the geocoding + service asks for caching exceptions are handled by the caller """ @@ -346,8 +375,8 @@ class SurplusReverserProtocol(Protocol): 'raw': {...}, } - function can be functools.lru_cache()-wrapped if the geocoding service asks for - caching + function can and should be at minimum functools.lru_cache()-wrapped if the geocoding + service asks for caching exceptions are handled by the caller, see the playground notebook in repository root for sample output @@ -673,9 +702,24 @@ class SurplusDefaultGeocoding: f"No suitable location could be geolocated from '{place}'" ) + bounding_box: tuple[float, float, float, float] | None = location.raw.get( + "boundingbox", None + ) + + if location.raw.get("boundingbox", None) is not None: + _bounding_box = [float(c) for c in location.raw.get("boundingbox", [])] + if len(_bounding_box) == 4: + bounding_box = ( + _bounding_box[0], + _bounding_box[1], + _bounding_box[2], + _bounding_box[3], + ) + return Latlong( latitude=location.latitude, longitude=location.longitude, + bounding_box=bounding_box, ) def reverser(self, latlong: Latlong, level: int = 18) -> dict[str, Any]: @@ -906,7 +950,12 @@ def parse_query(behaviour: Behaviour) -> Result[Query]: split_query = behaviour.query if behaviour.debug: - print(f"debug: {split_query=}\ndebug: {original_query=}", file=behaviour.stderr) + print( + f"debug: parse_query: {split_query=}\n", + f"debug: parse_query: {original_query=}", + sep="", + file=behaviour.stderr, + ) # not a plus/local code, try to match for latlong or string query match split_query: @@ -921,28 +970,26 @@ def parse_query(behaviour: Behaviour) -> Result[Query]: else: # has comma, possibly a latlong coord comma_split_single: list[str] = single.split(",") - if len(comma_split_single) > 2: - return Result[Query]( - LatlongQuery(EMPTY_LATLONG), - error=LatlongParseError("unable to parse latlong coord"), - ) + if len(comma_split_single) == 2: + try: # try to type cast query + latitude = float(comma_split_single[0].strip(",")) + longitude = float(comma_split_single[-1].strip(",")) - try: # try to type cast query - latitude = float(comma_split_single[0].strip(",")) - longitude = float(comma_split_single[-1].strip(",")) + except ValueError: # not a latlong coord, fallback + return Result[Query](StringQuery(single)) - except ValueError: # not a latlong coord, fallback - return Result[Query](StringQuery(single)) - - else: # are floats, so is a latlong coord - return Result[Query]( - LatlongQuery( - Latlong( - latitude=latitude, - longitude=longitude, + else: # are floats, so is a latlong coord + return Result[Query]( + LatlongQuery( + Latlong( + latitude=latitude, + longitude=longitude, + ) ) ) - ) + + # not a latlong coord, fallback + return Result[Query](StringQuery(original_query)) case [left_single, right_single]: # possibly a: @@ -1065,9 +1112,27 @@ def _unique(l: Sequence[str]) -> list[str]: def _generate_text( - location: dict[str, Any], behaviour: Behaviour, debug: bool = False + location: dict[str, Any], + behaviour: Behaviour, + mode: TextGenerationEnum = TextGenerationEnum.SHAREABLE_TEXT, + debug: bool = False, ) -> str: - """(internal function) generate shareable text from location dict""" + """ + (internal function) generate shareable text from location dict + + arguments + location: dict[str, Any] + dictionary from geocoding reverser function + behaviour: Behaviour + surplus behaviour + mode: GenerationModeEnum = GenerationModeEnum.SHAREABLE_TEXT + generation mode, defaults to shareable text generation + debug: bool = False + behaviour-seperate debug flag because this function is called twice by + surplus in debug mode, one for debug and one for non-debug output + + returns str + """ def _generate_text_line( line_number: int, @@ -1083,6 +1148,8 @@ def _generate_text( line number to prefix with line_keys: Sequence[str] list of keys to .get() from location dict + seperator: str = ", " + seperator to join elements with filter: Callable[[str], list[bool]] = lambda e: True function that takes in a string and returns a list of bools, used to filter elements from line_keys. list will be passed to all(). if all @@ -1135,6 +1202,14 @@ def _generate_text( if key.lower().startswith("iso3166"): iso3166_2 = location.get(key, "") + split_iso3166_2 = [part.upper() for part in iso3166_2.split("-")] + + if debug: + print( + f"debug: _generate_text: {split_iso3166_2=}", + file=behaviour.stderr, + ) + # skeleton code to allow for changing keys based on iso3166-2 code st_line0_keys = SHAREABLE_TEXT_LINE_0_KEYS st_line1_keys = SHAREABLE_TEXT_LINE_1_KEYS @@ -1144,48 +1219,110 @@ def _generate_text( st_line5_keys = SHAREABLE_TEXT_LINE_5_KEYS st_line6_keys = SHAREABLE_TEXT_LINE_6_KEYS st_names = SHAREABLE_TEXT_NAMES + st_locality: tuple[str, ...] = () - match iso3166_2.split("-"): - case _: - pass + match split_iso3166_2: + case ["SG", *_]: # Singapore + if debug: + print( + "debug: _generate_text: " + f"using special key arrangements for '{iso3166_2}' (Singapore)", + file=behaviour.stderr, + ) + + st_locality = SHAREABLE_TEXT_LOCALITY["SG"] + + case _: # default + if debug: + print( + "debug: _generate_text: " + f"using default key arrangements for '{iso3166_2}'", + file=behaviour.stderr, + ) + + st_locality = SHAREABLE_TEXT_LOCALITY["default"] # start generating text - text: list[str] = [] + match mode: + case TextGenerationEnum.SHAREABLE_TEXT: + text: list[str] = [] - seen_names: list[str] = [ - detail - for detail in _unique( - [str(location.get(location_key, "")) for location_key in st_names] - ) - if detail != "" - ] + seen_names: list[str] = [ + detail + for detail in _unique( + [str(location.get(location_key, "")) for location_key in st_names] + ) + if detail != "" + ] - if debug: - print(f"debug: _generate_text: {seen_names=}", file=behaviour.stderr) + if debug: + print(f"debug: _generate_text: {seen_names=}", file=behaviour.stderr) - general_global_info: list[str] = [ - str(location.get(detail, "")) for detail in st_line6_keys - ] + general_global_info: list[str] = [ + str(location.get(detail, "")) for detail in st_line6_keys + ] - text.append(_generate_text_line(0, st_line0_keys)) - text.append(_generate_text_line(1, st_line1_keys)) - text.append(_generate_text_line(2, st_line2_keys)) - text.append(_generate_text_line(3, st_line3_keys, seperator=" ")) - text.append( - _generate_text_line( - 4, - st_line4_keys, - filter=lambda ak: [ - # everything here should be True if the element is to be kept - ak not in general_global_info, - not any(True if (ak in sn) else False for sn in seen_names), - ], - ) - ) - text.append(_generate_text_line(5, st_line5_keys)) - text.append(_generate_text_line(6, st_line6_keys)) + text.append( + _generate_text_line( + line_number=0, + line_keys=st_line0_keys, + ) + ) + text.append( + _generate_text_line( + line_number=1, + line_keys=st_line1_keys, + ) + ) + text.append( + _generate_text_line( + line_number=2, + line_keys=st_line2_keys, + ) + ) + text.append( + _generate_text_line( + line_number=3, + line_keys=st_line3_keys, + seperator=" ", + ) + ) + text.append( + _generate_text_line( + line_number=4, + line_keys=st_line4_keys, + filter=lambda ak: [ + # everything here should be True if the element is to be kept + ak not in general_global_info, + not any(True if (ak in sn) else False for sn in seen_names), + ], + ) + ) + text.append( + _generate_text_line( + line_number=5, + line_keys=st_line5_keys, + ) + ) + text.append( + _generate_text_line( + line_number=6, + line_keys=st_line6_keys, + ) + ) - return "".join(_unique(text)).rstrip() + return "".join(_unique(text)).rstrip() + + case TextGenerationEnum.LOCALITY_TEXT: + return _generate_text_line( + line_number=0, + line_keys=st_locality, + ) + + case _: + raise NotImplementedError( + f"unknown mode '{mode}' (expected a TextGenerationEnum)" + ) def surplus(query: Query | str, behaviour: Behaviour) -> Result[str]: @@ -1233,17 +1370,17 @@ def surplus(query: Query | str, behaviour: Behaviour) -> Result[str]: return Result[str]("", error=latlong_result.error) if behaviour.debug: - print(f"debug: cli: {latlong_result.get()=}", file=behaviour.stderr) + print(f"debug: {latlong_result.get()=}", file=behaviour.stderr) # reverse location and handle result try: - location: dict[str, Any] = behaviour.reverser(latlong_result.get()) + location = behaviour.reverser(latlong_result.get()) except Exception as exc: return Result[str]("", error=exc) if behaviour.debug: - print(f"debug: cli: {location=}", file=behaviour.stderr) + print(f"debug: {location=}", file=behaviour.stderr) # generate text if behaviour.debug: @@ -1275,7 +1412,7 @@ def surplus(query: Query | str, behaviour: Behaviour) -> Result[str]: return Result[str]("", error=latlong_query.error) if behaviour.debug: - print(f"debug: cli: {latlong_query.get()=}", file=behaviour.stderr) + print(f"debug: {latlong_query.get()=}", file=behaviour.stderr) # perform operation try: @@ -1293,19 +1430,6 @@ def surplus(query: Query | str, behaviour: Behaviour) -> Result[str]: if isinstance(query, LocalCodeQuery): return Result[str](str(query)) - latlong: Latlong = EMPTY_LATLONG - - # if its a plus code, convert to latlong first - if isinstance(query, PlusCodeQuery): - pluscode_latlong_result = PlusCodeQuery.to_lat_long_coord( - query, geocoder=behaviour.geocoder - ) - - if not pluscode_latlong_result: - return Result[str]("", error=pluscode_latlong_result.error) - - latlong = pluscode_latlong_result.get() - # get latlong and handle result latlong_result = query.to_lat_long_coord(geocoder=behaviour.geocoder) @@ -1313,21 +1437,121 @@ def surplus(query: Query | str, behaviour: Behaviour) -> Result[str]: return Result[str]("", error=latlong_result.error) if behaviour.debug: - print(f"debug: cli: {latlong_result.get()=}", file=behaviour.stderr) + print(f"debug: {latlong_result.get()=}", file=behaviour.stderr) - latlong = latlong_result.get() + query_latlong = latlong_result.get() - # perform operation - # TODO: https://github.com/markjoshwel/surplus/issues/18 - # https://github.com/google/open-location-code/wiki/Guidance-for-shortening-codes + # reverse location and handle result + try: + location = behaviour.reverser( + query_latlong, level=LOCALITY_GEOCODER_LEVEL + ) - return Result[str]( - text, - error=NotImplementedError( - "converting to Plus Code is not implemented yet" - ), + except Exception as exc: + return Result[str]("", error=exc) + + if behaviour.debug: + print(f"debug: {location=}", file=behaviour.stderr) + + # generate locality portion of local code + if behaviour.debug: + print( + _generate_text( + location=location, + behaviour=behaviour, + mode=TextGenerationEnum.LOCALITY_TEXT, + debug=behaviour.debug, + ).strip() + ) + + portion_locality: str = _generate_text( + location=location, + behaviour=behaviour, + mode=TextGenerationEnum.LOCALITY_TEXT, + ).strip() + + # reverse locality portion + try: + locality_latlong: Latlong = behaviour.geocoder(portion_locality) + + # check now if bounding_box is set and valid + assert locality_latlong.bounding_box is not None, ( + "(shortening) geocoder-returned latlong has .bounding_box=None" + f" - {locality_latlong.bounding_box}" + ) + + assert len(locality_latlong.bounding_box) == 4, ( + "(shortening) geocoder-returned latlong has len(.bounding_box) < 4" + f" - {locality_latlong.bounding_box}" + ) + + assert all([type(c) == float for c in locality_latlong.bounding_box]), ( + "(shortening) geocoder-returned latlong has non-float in .bounding_box" + f" - {locality_latlong.bounding_box}" + ) + + except Exception as exc: + return Result[str]("", error=exc) + + plus_code = _PlusCode_encode( + lat=query_latlong.latitude, + lon=query_latlong.longitude, ) + # https://github.com/google/open-location-code/wiki/Guidance-for-shortening-codes + check1 = ( + # The center point of the feature is within 0.4 degrees latitude and 0.4 + # degrees longitude + ( + (query_latlong.latitude - 0.4) + <= locality_latlong.latitude + <= (query_latlong.latitude + 0.4) + ), + ( + (query_latlong.longitude - 0.4) + <= locality_latlong.longitude + <= (query_latlong.longitude + 0.4) + ), + # The bounding box of the feature is less than 0.8 degrees high and wide. + abs(locality_latlong.bounding_box[0] - locality_latlong.bounding_box[1]) + < 0.8, + abs(locality_latlong.bounding_box[2] - locality_latlong.bounding_box[3]) + < 0.8, + ) + + check2 = ( + # The center point of the feature is within 0.4 degrees latitude and 0.4 + # degrees longitude" + ( + (query_latlong.latitude - 8) + <= locality_latlong.latitude + <= (query_latlong.latitude + 8) + ), + ( + (query_latlong.longitude - 8) + <= locality_latlong.longitude + <= (query_latlong.longitude + 8) + ), + # The bounding box of the feature is less than 0.8 degrees high and wide. + abs(locality_latlong.bounding_box[0] - locality_latlong.bounding_box[1]) + < 16, + abs(locality_latlong.bounding_box[2] - locality_latlong.bounding_box[3]) + < 16, + ) + + if check1: + return Result[str](f"{plus_code[4:]} {portion_locality}") + + elif check2: + return Result[str](f"{plus_code[2:]} {portion_locality}") + + print( + "info: could not determine a suitable geographical feature to use as " + "locality for shortening. full plus code is returned.", + file=behaviour.stderr, + ) + return Result[str](plus_code) + case ConversionResultTypeEnum.LATLONG: # return the latlong if already given a latlong if isinstance(query, LatlongQuery): @@ -1340,7 +1564,7 @@ def surplus(query: Query | str, behaviour: Behaviour) -> Result[str]: return Result[str]("", error=latlong_result.error) if behaviour.debug: - print(f"debug: cli: {latlong_result.get()=}", file=behaviour.stderr) + print(f"debug: {latlong_result.get()=}", file=behaviour.stderr) # perform operation return Result[str](str(latlong_result.get())) From e47138c0d9bf28b62a2750da5f784e6fb511058c Mon Sep 17 00:00:00 2001 From: Mark Joshwel Date: Wed, 6 Sep 2023 17:44:04 +0000 Subject: [PATCH 7/7] docs: update api ref and output tech details - update debug output and add details for split_query and original_query - update wording on geocoding caching what to do - add Latlong.bounding_box attr - add SHAREABLE_TEXT_LOCALITY constant --- README.md | 66 +++++++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 50 insertions(+), 16 deletions(-) diff --git a/README.md b/README.md index 9806724..e1fde69 100644 --- a/README.md +++ b/README.md @@ -196,7 +196,7 @@ and do the following: before moving on. 2. include the erroneous query. - (_the Plus Code/local code/latlong coord/query string you passed into surplus_) + (_the Plus Code/local code/latlong coordinate/query string you passed into surplus_) 3. include output from the teminal with the [`--debug` flag](#command-line-usage) passed to the surplus CLI or with @@ -270,8 +270,8 @@ surplus version 2.1.0, debug mode (latest@future, Tue 05 Sep 2023 23:38:59 +0800 debug: parse_query: behaviour.query=['8QJF+RP', 'Singapore'] debug: _match_plus_code: portion_plus_code='8QJF+RP', portion_locality='Singapore' debug: cli: query=Result(value=LocalCodeQuery(code='8QJF+RP', locality='Singapore'), error=None) -debug: cli: latlong_result.get()=Latlong(latitude=1.3320625, longitude=103.7743125) -debug: cli: location={...} +debug: latlong_result.get()=Latlong(latitude=1.3320625, longitude=103.7743125) +debug: location={...} debug: _generate_text: seen_names=['Ngee Ann Polytechnic', 'Clementi Road'] debug: _generate_text_line: [True] -> True -------- 'Ngee Ann Polytechnic' debug: _generate_text_line: [True] -> True -------- '535' @@ -297,32 +297,43 @@ Northwest, Singapore variables -- **variable `behaviour.query`** +- **variables `behaviour.query`, `split_query` and `original_query`** - the original query string or a list of strings from space-splitting the original query + (_`split_query` and `original_query` are only shown if query is a latlong coordinate + or query string_) + + `behaviour.query` is the original query string or a list of strings from space-splitting the original query string passed to [`parse_query()`](#def-parse_query) for parsing + `split_query` is the original query string split by spaces + + `original_query` is a single non-split string + ```text - $ s+ 77Q4+7X Austin, Texas, USA - -------------------------- + $ s+ Temasek Polytechnic + ------------------- query - behaviour.query -> ['77Q4+7X', 'Austin', 'Texas', 'USA'] + behaviour.query -> ['Temasek', 'Polytechnic'] + split_query -> ['Temasek', 'Polytechnic'] + original_query -> 'Temasek Polytechnic' ``` ```text >>> surplus("77Q4+7X Austin, Texas, USA", surplus.Behaviour()) behaviour.query -> '77Q4+7X Austin, Texas, USA' + split_query -> ['77Q4+7X', 'Austin,', 'Texas,', 'USA'] + original_query -> '77Q4+7X Austin, Texas, USA' ``` - **variables `portion_plus_code` and `portion_locality`** - (_only shown if the query is a local code, not shown on full-length plus codes, + (_only shown if the query is a local code, not shown on full-length Plus Codes, latlong coordinates or string queries_) - represents the plus code and locality portions of a - [shortened plus code](https://en.wikipedia.org/wiki/Open_Location_Code#Common_usage_and_shortening) + represents the Plus Code and locality portions of a + [shortened Plus Code](https://en.wikipedia.org/wiki/Open_Location_Code#Common_usage_and_shortening) (_referred to as a "local code" in the codebase_) respectively - **variable `query`** @@ -334,9 +345,9 @@ variables - **expression `latlong_result.get()=`** - (_only shown if the query is a plus code_) + (_only shown if the query is a Plus Code_) - the latitude longitude coordinates derived from the plus code + the latitude longitude coordinates derived from the Plus Code - **variable `location`** @@ -599,6 +610,21 @@ line breakdown of shareable text output, accompanied by their Nominatim keys: a tuple of strings containing Nominatim keys used in shareable text line 0-2 and special keys in line 3 +- `SHAREABLE_TEXT_LOCALITY: dict[str, tuple[str, ...]]` + + a dictionary of iso3166-2 country-portion strings with a tuples of strings as their + values + + used when generating the locality portions of shortened Plus Codes/local codes + + ```python + { + "default": (...), + "SG": (...,), + ... + } + ``` + - `EMPTY_LATLONG: typing.Final[Latlong]` a constant for an empty latlong coordinate, with latitude and longitude set to 0.0 @@ -658,9 +684,12 @@ class for documentation and static type checking of surplus geocoder functions - **information on conforming functions** - function takes in a location name as a string, and returns a [Latlong](#class-latlong) + function takes in a location name as a string, and returns a [Latlong](#class-latlong). - function can and should be be + **function MUST supply a `bounding_box` attribute to the to-be-returned + [Latlong](#class-latlong).** the bounding box is used when surplus shortens Plus Codes. + + function can and should be at minimum [`functools.lru_cache()`-wrapped](https://docs.python.org/3/library/functools.html#functools.lru_cache) if the geocoding service asks for caching @@ -711,7 +740,7 @@ class for documentation and static type checking of surplus reverser functions } ``` - function can and should be + function can and should be at minimum [`functools.lru_cache()`-wrapped](https://docs.python.org/3/library/functools.html#functools.lru_cache) if the geocoding service asks for caching @@ -951,6 +980,11 @@ attributes - `latitude: float` - `longitude: float` +- `bounding_box: tuple[float, float, float, float] | None = None` + a four-tuple representing a bounding box, `(lat1, lat2, lon1, lon2)` or None. + + the user does not need to enter this. the attribute is only used when shortening plus + codes, and would be supplied by the geocoding service during shortening. methods