Skip to content

Commit

Permalink
Fix conversion of headers fields in Apify <--> Scrapy request trans…
Browse files Browse the repository at this point in the history
…lation (#182)
  • Loading branch information
vdusek committed Feb 1, 2024
1 parent d88b21a commit 268adda
Show file tree
Hide file tree
Showing 4 changed files with 30 additions and 5 deletions.
4 changes: 3 additions & 1 deletion CHANGELOG.md
Expand Up @@ -2,7 +2,9 @@

## [1.5.5](../../releases/tag/v1.5.5) - Unreleased

...
### Fixed

- Fix conversion of `headers` fields in Apify <--> Scrapy request translation

## [1.5.4](../../releases/tag/v1.5.4) - 2024-01-24

Expand Down
16 changes: 14 additions & 2 deletions src/apify/scrapy/requests.py
Expand Up @@ -5,6 +5,7 @@

try:
from scrapy import Request, Spider
from scrapy.http.headers import Headers
from scrapy.utils.request import request_from_dict
except ImportError as exc:
raise ImportError(
Expand Down Expand Up @@ -37,10 +38,16 @@ def to_apify_request(scrapy_request: Request, spider: Spider) -> dict:
apify_request = {
'url': scrapy_request.url,
'method': scrapy_request.method,
'headers': scrapy_request.headers,
'userData': scrapy_request.meta.get('userData', {}),
}

if isinstance(scrapy_request.headers, Headers):
apify_request['headers'] = dict(scrapy_request.headers.to_unicode_dict())
else:
Actor.log.warning(
f'scrapy_request.headers is not an instance of the scrapy.http.headers.Headers class, scrapy_request.headers = {scrapy_request.headers}',
)

# Add 'id' to the apify_request
if scrapy_request.meta.get('apify_request_id'):
apify_request['id'] = scrapy_request.meta['apify_request_id']
Expand Down Expand Up @@ -129,7 +136,12 @@ def to_scrapy_request(apify_request: dict, spider: Spider) -> Request:

# Add optional 'headers' field
if 'headers' in apify_request:
scrapy_request.headers = apify_request['headers']
if isinstance(apify_request['headers'], dict):
scrapy_request.headers = Headers(apify_request['headers'])
else:
Actor.log.warning(
f'apify_request[headers] is not an instance of the dict class, apify_request[headers] = {apify_request["headers"]}',
)

# Add optional 'userData' field
if 'userData' in apify_request:
Expand Down
10 changes: 10 additions & 0 deletions tests/unit/scrapy/requests/test_to_apify_request.py
Expand Up @@ -2,6 +2,7 @@

import pytest
from scrapy import Request, Spider
from scrapy.http.headers import Headers

from apify.scrapy.requests import to_apify_request

Expand All @@ -28,6 +29,15 @@ def test__to_apify_request__simple(spider: Spider) -> None:
assert isinstance(user_data.get('scrapy_request'), str)


def test__to_apify_request__headers(spider: Spider) -> None:
scrapy_request_headers = Headers({'Authorization': 'Bearer access_token'})
scrapy_request = Request(url='https://example.com', headers=scrapy_request_headers)

apify_request = to_apify_request(scrapy_request, spider)

assert apify_request['headers'] == dict(scrapy_request_headers.to_unicode_dict())


def test__to_apify_request__without_id_and_unique_key(spider: Spider) -> None:
scrapy_request = Request(
url='https://example.com',
Expand Down
5 changes: 3 additions & 2 deletions tests/unit/scrapy/requests/test_to_scrapy_request.py
Expand Up @@ -4,6 +4,7 @@

import pytest
from scrapy import Request, Spider
from scrapy.http.headers import Headers

from apify.scrapy.requests import to_scrapy_request

Expand Down Expand Up @@ -54,7 +55,7 @@ def test__to_scrapy_request__without_reconstruction_with_optional_fields(spider:
assert apify_request['method'] == scrapy_request.method
assert apify_request['id'] == scrapy_request.meta.get('apify_request_id')
assert apify_request['uniqueKey'] == scrapy_request.meta.get('apify_request_unique_key')
assert apify_request['headers'] == scrapy_request.headers
assert Headers(apify_request['headers']) == scrapy_request.headers
assert apify_request['userData'] == scrapy_request.meta.get('userData')


Expand Down Expand Up @@ -101,7 +102,7 @@ def test__to_scrapy_request__with_reconstruction_with_optional_fields(spider: Sp
assert apify_request['method'] == scrapy_request.method
assert apify_request['id'] == scrapy_request.meta.get('apify_request_id')
assert apify_request['uniqueKey'] == scrapy_request.meta.get('apify_request_unique_key')
assert apify_request['headers'] == scrapy_request.headers
assert Headers(apify_request['headers']) == scrapy_request.headers
assert apify_request['userData'] == scrapy_request.meta.get('userData')


Expand Down

0 comments on commit 268adda

Please sign in to comment.