From a7daec032e8dfadabf767341299c928ac51633c9 Mon Sep 17 00:00:00 2001 From: Zsolt Ero Date: Fri, 30 Aug 2024 02:16:40 +0200 Subject: [PATCH] refactor, auto_clean --- README.md | 34 +++++----- config/certs/.gitignore | 1 - init-server.py | 14 ---- modules/http_host/cron.d/ofm_http_host | 2 +- modules/http_host/http_host.py | 37 ++++++++--- modules/http_host/http_host_lib/btrfs.py | 18 ++++- modules/http_host/http_host_lib/nginx.py | 19 ++++-- modules/http_host/http_host_lib/sync.py | 73 ++++++++++++++++++--- modules/http_host/http_host_lib/versions.py | 19 +----- ssh_lib/tasks.py | 10 +-- 10 files changed, 145 insertions(+), 82 deletions(-) delete mode 100644 config/certs/.gitignore diff --git a/README.md b/README.md index e9de024..cece06e 100644 --- a/README.md +++ b/README.md @@ -72,22 +72,25 @@ This sets up everything on a clean Ubuntu server. You run it locally and it sets #### HTTP host - modules/http_host -Inside `http_host`, all work is done by `host_manager.py`. +Inside `http_host`, all work is done by `http_host.py`. It does the following: -- checks the most up-to-date files in the public buckets -- downloads/extracts them locally, if needed -- mounts the downloaded Btrfs images in `/mnt/ofm` -- creates the correct TileJSON file -- creates the correct nginx config -- reloads nginx + - Downloading btrfs images -You can run `./host_manager.py --help` to see which options are available. Some commands can be run locally, including on non-linux machines. + - Downloading assets + + - Mounting downloaded btrfs images + + - Fetches version files + + - Running the sync cron task (called every minute with http-host-autoupdate) + +You can run `./http_host.py --help` to see which options are available. #### tile generation - modules/tile_gen -_note: Tile generation is 100% optional, as we are providing the processed full planet files for public download._ +_note: Tile generation is 100% optional, as we are providing the processed full planet btrfs files for public download._ The `tile_gen` script downloads a full planet OSM extract and runs it through Planetiler. @@ -115,15 +118,14 @@ You can directly download the processed full planet runs on the following URL pa https://planet.openfreemap.com/20240607_232801_pt/tiles.btrfs.gz // 86 GB -Replace the `20240607_232801_pt` part with any newer run, from the [index file](https://planet.openfreemap.com/index.txt). +Replace the `20240607_232801_pt` part with any newer run, from the [index file](https://planet.openfreemap.com/files.txt). ### Public buckets -There are three public buckets: +There are two public buckets: -- https://assets.openfreemap.com - contains fonts, sprites, styles, versions. index: [dirs](https://assets.openfreemap.com/dirs.txt), [files](https://assets.openfreemap.com/index.txt) -- https://planet.openfreemap.com - full planet runs. index: [dirs](https://planet.openfreemap.com/dirs.txt), [files](https://planet.openfreemap.com/index.txt) -- https://monaco.openfreemap.com - identical runs to the full planet, but only for Monaco area. Very tiny, ideal for development. index: [dirs](https://monaco.openfreemap.com/dirs.txt), [files](https://monaco.openfreemap.com/index.txt) +- https://assets.openfreemap.com - contains fonts, sprites, styles, versions. index: [dirs](https://assets.openfreemap.com/dirs.txt), [files](https://assets.openfreemap.com/files.txt) +- https://btrfs.openfreemap.com - full planet runs. index: [dirs](https://btrfs.openfreemap.com/dirs.txt), [files](https://btrfs.openfreemap.com/files.txt) ### What about PMTiles? @@ -139,13 +141,13 @@ Contributors welcome! Smaller tasks: -- Cloudflare worker for indexing the public buckets, instead of generating index.txt files. +- Cloudflare worker for indexing the public buckets, instead of generating index files. - Some of the POI icons are missing in the styles. Bigger tasks: - Split the styles to building blocks. For example, there should be a POI block, a label block, a road-style related block. -- Implement automatic updates for tile gen, uploading, testing and setting versions. +- Implement automatic updates for tile gen, uploading, testing and setting versions. (work-in-progress as of today) Tasks outside the scope of this project: diff --git a/config/certs/.gitignore b/config/certs/.gitignore deleted file mode 100644 index 72e8ffc..0000000 --- a/config/certs/.gitignore +++ /dev/null @@ -1 +0,0 @@ -* diff --git a/init-server.py b/init-server.py index 2e0c67a..0b0edc2 100755 --- a/init-server.py +++ b/init-server.py @@ -135,20 +135,6 @@ def debug(hostname, user, port): upload_http_host_files(c) # run_http_host_sync(c) - # upload_http_host_config(c) - # upload_http_host_files(c) - # sudo_cmd(c, f'{VENV_BIN}/python -u /data/ofm/http_host/bin/host_manager.py nginx-sync') - - # put(c, SCRIPTS_DIR / 'tile_gen' / 'upload_manager.py', f'{TILE_GEN_BIN}') - # put_dir(c, SCRIPTS_DIR / 'loadbalancer', '/data/ofm/loadbalancer') - # put_dir( - # c, - # SCRIPTS_DIR / 'loadbalancer' / 'loadbalancer_lib', - # '/data/ofm/loadbalancer/loadbalancer_lib', - # ) - - # prepare_tile_gen(c) - if __name__ == '__main__': cli() diff --git a/modules/http_host/cron.d/ofm_http_host b/modules/http_host/cron.d/ofm_http_host index fc5afa1..ec0f9ac 100644 --- a/modules/http_host/cron.d/ofm_http_host +++ b/modules/http_host/cron.d/ofm_http_host @@ -1,4 +1,4 @@ # every minute sync, locking so that only one process can run at a time -* * * * * ofm /usr/bin/flock -n /tmp/hostmanager.lockfile -c 'sudo /data/ofm/venv/bin/python -u /data/ofm/http_host/bin/host_manager.py sync >> /data/ofm/http_host/logs/host_manager_sync.log 2>&1' +* * * * * ofm /usr/bin/flock -n /tmp/http_host.lockfile -c 'sudo /data/ofm/venv/bin/python -u /data/ofm/http_host/bin/http_host.py sync >> /data/ofm/http_host/logs/http_host_sync.log 2>&1' diff --git a/modules/http_host/http_host.py b/modules/http_host/http_host.py index ff7b361..5cee87b 100755 --- a/modules/http_host/http_host.py +++ b/modules/http_host/http_host.py @@ -9,8 +9,9 @@ from http_host_lib.btrfs import ( get_versions_for_area, ) from http_host_lib.mount import auto_mount_unmount -from http_host_lib.sync import full_sync -from http_host_lib.versions import sync_version_files +from http_host_lib.nginx import write_nginx_config +from http_host_lib.sync import auto_clean_btrfs, full_sync +from http_host_lib.versions import fetch_version_files @click.group() @@ -19,8 +20,8 @@ def cli(): Manages OpenFreeMap HTTP hosts, including:\n - Downloading btrfs images\n - Downloading assets\n - - Mounting directories\n - - Getting the deployed versions of tilesets\n + - Mounting downloaded btrfs images\n + - Fetches version files\n - Running the sync cron task (called every minute with http-host-autoupdate) """ @@ -59,14 +60,32 @@ def mount(): auto_mount_unmount() -@cli.command(name='sync-version-files') -def sync_version_files_(): +@cli.command(name='fetch-versions') +def fetch_version_files_(): """ - Syncs the version files from remote to local. - Remove versions are specified by https://assets.openfreemap.com/versions/deployed_{area}.txt + Fetches the version files from remote to local. + Remote versions are specified by https://assets.openfreemap.com/versions/deployed_{area}.txt """ - sync_version_files() + fetch_version_files() + + +@cli.command() +def auto_clean(): + """ + Cleans the old btrfs images + """ + + auto_clean_btrfs() + + +@cli.command() +def nginx_config(): + """ + Writes the nginx config files and reloads nginx + """ + + write_nginx_config() @cli.command() diff --git a/modules/http_host/http_host_lib/btrfs.py b/modules/http_host/http_host_lib/btrfs.py index b713534..4fedb72 100644 --- a/modules/http_host/http_host_lib/btrfs.py +++ b/modules/http_host/http_host_lib/btrfs.py @@ -18,8 +18,18 @@ def download_area_version(area: str, version: str) -> bool: versions = get_versions_for_area(area) + # latest version if version == 'latest': selected_version = versions[-1] + + # deployed version + elif version == 'deployed': + try: + selected_version = (config.deployed_versions_dir / f'{area}.txt').read_text().strip() + except Exception: + return False + + # specific version else: if version not in versions: available_versions_str = '\n'.join(versions) @@ -44,7 +54,7 @@ def download_and_extract_btrfs(area: str, version: str) -> bool: returns True if download successful, False if skipped """ - print(f'downloading and extracting btrfs for: {area} {version}') + print(f'downloading btrfs: {area} {version}') version_dir = config.runs_dir / area / version btrfs_file = version_dir / 'tiles.btrfs' @@ -62,11 +72,13 @@ def download_and_extract_btrfs(area: str, version: str) -> bool: disk_free = shutil.disk_usage(temp_dir).free file_size = get_remote_file_size(url) if not file_size: - raise ValueError('Cannot get remote file size') + print(f'Cannot get remote file size for {url}') + return False needed_space = file_size * 3 if disk_free < needed_space: - raise ValueError(f'Not enough disk space. Needed: {needed_space}, free space: {disk_free}') + print(f'Not enough disk space. Needed: {needed_space}, free space: {disk_free}') + return False target_file = temp_dir / 'tiles.btrfs.gz' download_file_aria2(url, target_file) diff --git a/modules/http_host/http_host_lib/nginx.py b/modules/http_host/http_host_lib/nginx.py index e4c8cb0..7014a25 100644 --- a/modules/http_host/http_host_lib/nginx.py +++ b/modules/http_host/http_host_lib/nginx.py @@ -200,16 +200,27 @@ def create_version_location( def create_latest_locations(*, local: str, domain: str) -> str: location_str = '' - local_version_files = config.ofm_config_dir.glob('tileset_version_*.txt') + local_version_files = config.deployed_versions_dir.glob('*.txt') + for file in local_version_files: - area = file.stem.split('_')[-1] + area = file.stem with open(file) as fp: version = fp.read().strip() - print(f' setting latest version for {area}: {version}') + print(f' linking latest version for {area}: {version}') + + # checking runs dir run_dir = config.runs_dir / area / version tilejson_path = run_dir / f'tilejson-{local}.json' - assert tilejson_path.is_file() + if not tilejson_path.is_file(): + print(f' error with latest: {tilejson_path} does not exist') + continue + + # checking mnt dir + mnt_file = Path(f'/mnt/ofm/{area}-{version}/metadata.json') + if not mnt_file.is_file(): + print(f' error with latest: {mnt_file} does not exist') + continue location_str += f""" location = /{area} {{ # no trailing slash diff --git a/modules/http_host/http_host_lib/sync.py b/modules/http_host/http_host_lib/sync.py index 02d5f8c..4122207 100644 --- a/modules/http_host/http_host_lib/sync.py +++ b/modules/http_host/http_host_lib/sync.py @@ -1,3 +1,4 @@ +import shutil from datetime import datetime, timezone from http_host_lib.assets import download_assets @@ -6,7 +7,7 @@ from http_host_lib.config import config from http_host_lib.mount import auto_mount_unmount from http_host_lib.nginx import write_nginx_config from http_host_lib.utils import assert_linux, assert_sudo -from http_host_lib.versions import sync_version_files +from http_host_lib.versions import fetch_version_files def full_sync(force=False): @@ -22,18 +23,70 @@ def full_sync(force=False): assert_linux() assert_sudo() - download_done = False - download_done += download_area_version(area='monaco', version='latest') + # start - if not config.host_config.get('skip_planet'): - download_done += download_area_version(area='planet', version='latest') - - if download_done or force: - auto_mount_unmount() + versions_changed = fetch_version_files() download_assets() - versions_changed = sync_version_files() + btrfs_downloaded = False - if download_done or versions_changed or force: + # download latest and deployed monaco + btrfs_downloaded += download_area_version(area='monaco', version='latest') + btrfs_downloaded += download_area_version(area='monaco', version='deployed') + + # download latest and deployed planet + if not config.host_config.get('skip_planet'): + btrfs_downloaded += download_area_version(area='planet', version='latest') + btrfs_downloaded += download_area_version(area='planet', version='deployed') + + if btrfs_downloaded or versions_changed or force: + auto_clean_btrfs() + auto_mount_unmount() write_nginx_config() + + +def auto_clean_btrfs(): + """ + Clean old btrfs runs + + For each area we keep max two versions: + 1. The newest one available locally + 2. The one currently deployed, specified in /data/ofm/config/deployed_versions + 3. If there is no deployed version, then we include the second newest one + """ + + for area in config.areas: + area_dir = config.runs_dir / area + if not area_dir.is_dir(): + continue + + local_versions = sorted([i.name for i in area_dir.iterdir()]) + + versions_to_keep = set() + + # add newest version + if local_versions: + versions_to_keep.add(local_versions[-1]) + + # add deployed version + try: + deployed_version_file = config.deployed_versions_dir / f'{area}.txt' + deployed_version = deployed_version_file.read_text().strip() + if (config.runs_dir / area / deployed_version).exists(): + versions_to_keep.add(deployed_version) + except Exception: + pass + + # if still only one version, we include the second newest one + if len(versions_to_keep) == 1 and len(local_versions) >= 2: + versions_to_keep.add(local_versions[-2]) + + print(f' keeping versions for {area}: {sorted(versions_to_keep)}') + + versions_to_remove = set(local_versions).difference(versions_to_keep) + + for version in versions_to_remove: + print(f' removing version for {area}: {version}') + version_dir = config.runs_dir / area / version + shutil.rmtree(version_dir) diff --git a/modules/http_host/http_host_lib/versions.py b/modules/http_host/http_host_lib/versions.py index 01905de..f03263f 100644 --- a/modules/http_host/http_host_lib/versions.py +++ b/modules/http_host/http_host_lib/versions.py @@ -1,16 +1,13 @@ -import sys -from pathlib import Path - import requests from http_host_lib.config import config from http_host_lib.utils import assert_linux, assert_sudo -def sync_version_files() -> bool: +def fetch_version_files() -> bool: """ Syncs the version files from remote to local. - Remove versions are specified by https://assets.openfreemap.com/versions/deployed_{area}.txt + Remote versions are specified by https://assets.openfreemap.com/versions/deployed_{area}.txt """ print('Syncing local version files') @@ -18,9 +15,6 @@ def sync_version_files() -> bool: assert_linux() assert_sudo() - if not config.mnt_dir.exists(): - sys.exit(' mount needs to be run first') - need_nginx_sync = False for area in config.areas: @@ -37,15 +31,8 @@ def sync_version_files() -> bool: except Exception: local_version_old = None - mnt_file = Path(f'/mnt/ofm/{area}-{remote_version}/metadata.json') - if not mnt_file.exists(): - print(' local version does not exist') - if local_version_old is not None: - local_version_file.unlink() - need_nginx_sync = True - continue - if remote_version != local_version_old: + config.deployed_versions_dir.mkdir(exist_ok=True, parents=True) local_version_file.write_text(remote_version) need_nginx_sync = True diff --git a/ssh_lib/tasks.py b/ssh_lib/tasks.py index 939ab32..c774d55 100644 --- a/ssh_lib/tasks.py +++ b/ssh_lib/tasks.py @@ -127,14 +127,13 @@ def prepare_http_host(c): c.sudo('chown nginx:nginx /data/ofm/http_host/logs_nginx') upload_http_host_files(c) - upload_certificates(c) c.sudo(f'{VENV_BIN}/pip install -e {HTTP_HOST_BIN} --use-pep517') def run_http_host_sync(c): - print('Running host_manager.py sync --force') - sudo_cmd(c, f'{VENV_BIN}/python -u {HTTP_HOST_BIN}/host_manager.py sync --force') + print('Running http_host.py sync --force') + sudo_cmd(c, f'{VENV_BIN}/python -u {HTTP_HOST_BIN}/http_host.py sync --force') def upload_http_host_files(c): @@ -155,11 +154,6 @@ def upload_http_host_files(c): c.sudo('chown -R ofm:ofm /data/ofm/http_host') -def upload_certificates(c): - put_dir(c, CONFIG_DIR / 'certs', '/data/nginx/certs', file_permissions=400) - c.sudo('chown -R nginx:nginx /data/nginx') - - def install_benchmark(c): """ Read docs/quick_notes/http_benchmark.md