Skip to content

scrape

scrape #81

Workflow file for this run

name: scrape
on:
push:
branches:
- main
schedule:
- cron: "0 4 * * *"
jobs:
scrape:
runs-on: ubuntu-latest
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Install Poetry
run: pipx install poetry
- name: Setup Python
uses: actions/setup-python@v5
with:
python-version-file: pyproject.toml
cache: poetry
- name: Install Python dependencies
run: poetry install
# For some reason the 'overwrite' option for the Scrapy feed exporters
# doesn't work reliably for me ¯\_(ツ)_/¯
- name: Clear data file
run: rm items.json
- name: Restore cache
id: diskcache-restore
uses: actions/cache/restore@v4
with:
path: .cache
key: diskcache
- name: Scrape
run: poetry run scrapy crawl czap
- name: Save cache
id: diskcache-save
uses: actions/cache/save@v4
with:
path: .cache
key: ${{ steps.diskcache-restore.outputs.cache-primary-key }}
- name: Check that data got exported
run: "[ -s items.json ] || exit 1"
- name: Save to Git
uses: EndBug/add-and-commit@v9
with:
add: items.json
author_name: "scraper"
author_email: "scraper@honzajavorek.cz"
message: "update items 📥"