The tests fail under windows as can be seen here
The reason is under windows somehow the telephon symbol is not parsed properly.
When I tried to print it I saw something like
E UnicodeEncodeError: 'charmap' codec can't encode character '\u260e' in position 38: character maps to <undefined>
Unfortunately I do not have access to a windows machine to dig deeper in a reasonable manner.
Raw error:
================================== FAILURES ===================================
_________________________________ test_verena _________________________________
def test_verena():
v = Verena()
> res = v.get()
tests\verena\test_verena.py:6:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
C:\hostedtoolcache\windows\Python\3.9.13\x64\lib\site-packages\deutschland\verena\verena.py:21: in get
extract = VerenaExtractor(page).extract()
C:\hostedtoolcache\windows\Python\3.9.13\x64\lib\site-packages\deutschland\verena\verenaextractor.py:38: in extract
phone, fax, homepage, email, deadline = self.__extract_part4(aus_parts[3])
C:\hostedtoolcache\windows\Python\3.9.13\x64\lib\site-packages\deutschland\verena\verenaextractor.py:158: in __extract_part4
print(x)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <encodings.cp1252.IncrementalEncoder object at 0x000002AC31A54F10>
input = '\\r\\r\\n\\r\\r\\n \u260e 02381 973060\\r\\r\\n '
final = False
def encode(self, input, final=False):
> return codecs.charmap_encode(input,self.errors,encoding_table)[0]
E UnicodeEncodeError: 'charmap' codec can't encode character '\u260e' in position 38: character maps to <undefined>
C:\hostedtoolcache\windows\Python\3.9.13\x64\lib\encodings\cp1252.py:19: UnicodeEncodeError
___________________________ test_extractor_content ____________________________
def test_extractor_content():
with open("tests/verena/ausschreibung_test_input.html", "r") as f:
with open("tests/verena/ausschreibung_correct_result.json", "r") as correct:
content = "<html><body>" + f.read() + "</body></html>"
ve = VerenaExtractor(content)
res = ve.extract()
> assert len(res) == 1 and res[0] == json.loads(correct.read())
E AssertionError: assert (1 == 1 and {'comments': ...ulingen', ...} == {'comments': ...ulingen', ...}
E + where 1 = len([{'comments': 'Bemerkung zur Stelle: Testbemerkung', 'contact': {'fax': '0172 2222 2222', 'homepage': 'http://www.eine...line/': '17.09.2021', 'desc': 'Eine Schule\nSchule der Sekundarstufe II\ndes Landkreis Schuling\n9999 Schulingen', ...}])
E Omitting 11 identical items, use -vv to show
E Differing items:
E {'contact': {'fax': '0172 2222 2222', 'homepage': 'http://www.eine-schule.de/', 'mail': {'adress': '[email protected]...'mailto:[email protected]?subject=Stellenausschreibung in VERENA', 'subject': 'Stellenausschreibung in VERENA'}}} != {'contact': {'fax': '0172 2222 2222', 'homepage': 'http://www.eine-schule.de/', 'mail': {'adress': '[email protected]?subject=Stellenausschreibung in VERENA', 'subject': 'Stellenausschreibung in VERENA'}, 'phone': '0172 1111 1111'}}
E Full diff:
E {
E 'comments': 'Bemerkung zur Stelle: Testbemerkung',
E 'contact': {'fax': '0172 2222 2222',
E 'homepage': 'http://www.eine-schule.de/',
E 'mail': {'adress': '[email protected]',
E 'raw': 'mailto:[email protected]?subject=Stellenausschreibung '
E 'in VERENA',
E - 'subject': 'Stellenausschreibung in VERENA'},
E + 'subject': 'Stellenausschreibung in VERENA'}},
E ? +
E - 'phone': '0172 1111 1111'},
E 'deadline': '17.09.2021',
E 'desc': 'Eine Schule\n'
E 'Schule der Sekundarstufe II\n'
E 'des Landkreis Schuling\n'
E '9999 Schulingen',
E 'duration': '01.01.2021 - 01.01.2022',
E 'geolocation': {'coord_system': 'epsg:25832',
E 'coordinates': [1111111,
E 1111111],
E 'post_adress': 'Eine Straße 1\n'
E '99999 Schulingen'},
E 'hours_per_week': '13,5',
E 'replacement_job_title': 'Lehrkraft',
E 'replacement_job_type': 'Vertretung',
E 'replacement_job_type_raw': 'Vertretung für',
E 'school_id': '99999',
E 'subjects': ['Fach 1',
E 'Fach 2'],
E })
tests\verena\test_verenaextractor.py:12: AssertionError
============================== warnings summary ===============================
help wanted