refactor, auto_clean

This commit is contained in:
Zsolt Ero
2024-08-30 02:16:40 +02:00
parent d753c8738a
commit a7daec032e
10 changed files with 145 additions and 82 deletions

View File

@@ -72,22 +72,25 @@ This sets up everything on a clean Ubuntu server. You run it locally and it sets
#### HTTP host - modules/http_host
Inside `http_host`, all work is done by `host_manager.py`.
Inside `http_host`, all work is done by `http_host.py`.
It does the following:
- checks the most up-to-date files in the public buckets
- downloads/extracts them locally, if needed
- mounts the downloaded Btrfs images in `/mnt/ofm`
- creates the correct TileJSON file
- creates the correct nginx config
- reloads nginx
- Downloading btrfs images
You can run `./host_manager.py --help` to see which options are available. Some commands can be run locally, including on non-linux machines.
- Downloading assets
- Mounting downloaded btrfs images
- Fetches version files
- Running the sync cron task (called every minute with http-host-autoupdate)
You can run `./http_host.py --help` to see which options are available.
#### tile generation - modules/tile_gen
_note: Tile generation is 100% optional, as we are providing the processed full planet files for public download._
_note: Tile generation is 100% optional, as we are providing the processed full planet btrfs files for public download._
The `tile_gen` script downloads a full planet OSM extract and runs it through Planetiler.
@@ -115,15 +118,14 @@ You can directly download the processed full planet runs on the following URL pa
https://planet.openfreemap.com/20240607_232801_pt/tiles.btrfs.gz // 86 GB
Replace the `20240607_232801_pt` part with any newer run, from the [index file](https://planet.openfreemap.com/index.txt).
Replace the `20240607_232801_pt` part with any newer run, from the [index file](https://planet.openfreemap.com/files.txt).
### Public buckets
There are three public buckets:
There are two public buckets:
- https://assets.openfreemap.com - contains fonts, sprites, styles, versions. index: [dirs](https://assets.openfreemap.com/dirs.txt), [files](https://assets.openfreemap.com/index.txt)
- https://planet.openfreemap.com - full planet runs. index: [dirs](https://planet.openfreemap.com/dirs.txt), [files](https://planet.openfreemap.com/index.txt)
- https://monaco.openfreemap.com - identical runs to the full planet, but only for Monaco area. Very tiny, ideal for development. index: [dirs](https://monaco.openfreemap.com/dirs.txt), [files](https://monaco.openfreemap.com/index.txt)
- https://assets.openfreemap.com - contains fonts, sprites, styles, versions. index: [dirs](https://assets.openfreemap.com/dirs.txt), [files](https://assets.openfreemap.com/files.txt)
- https://btrfs.openfreemap.com - full planet runs. index: [dirs](https://btrfs.openfreemap.com/dirs.txt), [files](https://btrfs.openfreemap.com/files.txt)
### What about PMTiles?
@@ -139,13 +141,13 @@ Contributors welcome!
Smaller tasks:
- Cloudflare worker for indexing the public buckets, instead of generating index.txt files.
- Cloudflare worker for indexing the public buckets, instead of generating index files.
- Some of the POI icons are missing in the styles.
Bigger tasks:
- Split the styles to building blocks. For example, there should be a POI block, a label block, a road-style related block.
- Implement automatic updates for tile gen, uploading, testing and setting versions.
- Implement automatic updates for tile gen, uploading, testing and setting versions. (work-in-progress as of today)
Tasks outside the scope of this project:

View File

@@ -1 +0,0 @@
*

View File

@@ -135,20 +135,6 @@ def debug(hostname, user, port):
upload_http_host_files(c)
# run_http_host_sync(c)
# upload_http_host_config(c)
# upload_http_host_files(c)
# sudo_cmd(c, f'{VENV_BIN}/python -u /data/ofm/http_host/bin/host_manager.py nginx-sync')
# put(c, SCRIPTS_DIR / 'tile_gen' / 'upload_manager.py', f'{TILE_GEN_BIN}')
# put_dir(c, SCRIPTS_DIR / 'loadbalancer', '/data/ofm/loadbalancer')
# put_dir(
# c,
# SCRIPTS_DIR / 'loadbalancer' / 'loadbalancer_lib',
# '/data/ofm/loadbalancer/loadbalancer_lib',
# )
# prepare_tile_gen(c)
if __name__ == '__main__':
cli()

View File

@@ -1,4 +1,4 @@
# every minute sync, locking so that only one process can run at a time
* * * * * ofm /usr/bin/flock -n /tmp/hostmanager.lockfile -c 'sudo /data/ofm/venv/bin/python -u /data/ofm/http_host/bin/host_manager.py sync >> /data/ofm/http_host/logs/host_manager_sync.log 2>&1'
* * * * * ofm /usr/bin/flock -n /tmp/http_host.lockfile -c 'sudo /data/ofm/venv/bin/python -u /data/ofm/http_host/bin/http_host.py sync >> /data/ofm/http_host/logs/http_host_sync.log 2>&1'

View File

@@ -9,8 +9,9 @@ from http_host_lib.btrfs import (
get_versions_for_area,
)
from http_host_lib.mount import auto_mount_unmount
from http_host_lib.sync import full_sync
from http_host_lib.versions import sync_version_files
from http_host_lib.nginx import write_nginx_config
from http_host_lib.sync import auto_clean_btrfs, full_sync
from http_host_lib.versions import fetch_version_files
@click.group()
@@ -19,8 +20,8 @@ def cli():
Manages OpenFreeMap HTTP hosts, including:\n
- Downloading btrfs images\n
- Downloading assets\n
- Mounting directories\n
- Getting the deployed versions of tilesets\n
- Mounting downloaded btrfs images\n
- Fetches version files\n
- Running the sync cron task (called every minute with http-host-autoupdate)
"""
@@ -59,14 +60,32 @@ def mount():
auto_mount_unmount()
@cli.command(name='sync-version-files')
def sync_version_files_():
@cli.command(name='fetch-versions')
def fetch_version_files_():
"""
Syncs the version files from remote to local.
Remove versions are specified by https://assets.openfreemap.com/versions/deployed_{area}.txt
Fetches the version files from remote to local.
Remote versions are specified by https://assets.openfreemap.com/versions/deployed_{area}.txt
"""
sync_version_files()
fetch_version_files()
@cli.command()
def auto_clean():
"""
Cleans the old btrfs images
"""
auto_clean_btrfs()
@cli.command()
def nginx_config():
"""
Writes the nginx config files and reloads nginx
"""
write_nginx_config()
@cli.command()

View File

@@ -18,8 +18,18 @@ def download_area_version(area: str, version: str) -> bool:
versions = get_versions_for_area(area)
# latest version
if version == 'latest':
selected_version = versions[-1]
# deployed version
elif version == 'deployed':
try:
selected_version = (config.deployed_versions_dir / f'{area}.txt').read_text().strip()
except Exception:
return False
# specific version
else:
if version not in versions:
available_versions_str = '\n'.join(versions)
@@ -44,7 +54,7 @@ def download_and_extract_btrfs(area: str, version: str) -> bool:
returns True if download successful, False if skipped
"""
print(f'downloading and extracting btrfs for: {area} {version}')
print(f'downloading btrfs: {area} {version}')
version_dir = config.runs_dir / area / version
btrfs_file = version_dir / 'tiles.btrfs'
@@ -62,11 +72,13 @@ def download_and_extract_btrfs(area: str, version: str) -> bool:
disk_free = shutil.disk_usage(temp_dir).free
file_size = get_remote_file_size(url)
if not file_size:
raise ValueError('Cannot get remote file size')
print(f'Cannot get remote file size for {url}')
return False
needed_space = file_size * 3
if disk_free < needed_space:
raise ValueError(f'Not enough disk space. Needed: {needed_space}, free space: {disk_free}')
print(f'Not enough disk space. Needed: {needed_space}, free space: {disk_free}')
return False
target_file = temp_dir / 'tiles.btrfs.gz'
download_file_aria2(url, target_file)

View File

@@ -200,16 +200,27 @@ def create_version_location(
def create_latest_locations(*, local: str, domain: str) -> str:
location_str = ''
local_version_files = config.ofm_config_dir.glob('tileset_version_*.txt')
local_version_files = config.deployed_versions_dir.glob('*.txt')
for file in local_version_files:
area = file.stem.split('_')[-1]
area = file.stem
with open(file) as fp:
version = fp.read().strip()
print(f' setting latest version for {area}: {version}')
print(f' linking latest version for {area}: {version}')
# checking runs dir
run_dir = config.runs_dir / area / version
tilejson_path = run_dir / f'tilejson-{local}.json'
assert tilejson_path.is_file()
if not tilejson_path.is_file():
print(f' error with latest: {tilejson_path} does not exist')
continue
# checking mnt dir
mnt_file = Path(f'/mnt/ofm/{area}-{version}/metadata.json')
if not mnt_file.is_file():
print(f' error with latest: {mnt_file} does not exist')
continue
location_str += f"""
location = /{area} {{ # no trailing slash

View File

@@ -1,3 +1,4 @@
import shutil
from datetime import datetime, timezone
from http_host_lib.assets import download_assets
@@ -6,7 +7,7 @@ from http_host_lib.config import config
from http_host_lib.mount import auto_mount_unmount
from http_host_lib.nginx import write_nginx_config
from http_host_lib.utils import assert_linux, assert_sudo
from http_host_lib.versions import sync_version_files
from http_host_lib.versions import fetch_version_files
def full_sync(force=False):
@@ -22,18 +23,70 @@ def full_sync(force=False):
assert_linux()
assert_sudo()
download_done = False
download_done += download_area_version(area='monaco', version='latest')
# start
if not config.host_config.get('skip_planet'):
download_done += download_area_version(area='planet', version='latest')
if download_done or force:
auto_mount_unmount()
versions_changed = fetch_version_files()
download_assets()
versions_changed = sync_version_files()
btrfs_downloaded = False
if download_done or versions_changed or force:
# download latest and deployed monaco
btrfs_downloaded += download_area_version(area='monaco', version='latest')
btrfs_downloaded += download_area_version(area='monaco', version='deployed')
# download latest and deployed planet
if not config.host_config.get('skip_planet'):
btrfs_downloaded += download_area_version(area='planet', version='latest')
btrfs_downloaded += download_area_version(area='planet', version='deployed')
if btrfs_downloaded or versions_changed or force:
auto_clean_btrfs()
auto_mount_unmount()
write_nginx_config()
def auto_clean_btrfs():
"""
Clean old btrfs runs
For each area we keep max two versions:
1. The newest one available locally
2. The one currently deployed, specified in /data/ofm/config/deployed_versions
3. If there is no deployed version, then we include the second newest one
"""
for area in config.areas:
area_dir = config.runs_dir / area
if not area_dir.is_dir():
continue
local_versions = sorted([i.name for i in area_dir.iterdir()])
versions_to_keep = set()
# add newest version
if local_versions:
versions_to_keep.add(local_versions[-1])
# add deployed version
try:
deployed_version_file = config.deployed_versions_dir / f'{area}.txt'
deployed_version = deployed_version_file.read_text().strip()
if (config.runs_dir / area / deployed_version).exists():
versions_to_keep.add(deployed_version)
except Exception:
pass
# if still only one version, we include the second newest one
if len(versions_to_keep) == 1 and len(local_versions) >= 2:
versions_to_keep.add(local_versions[-2])
print(f' keeping versions for {area}: {sorted(versions_to_keep)}')
versions_to_remove = set(local_versions).difference(versions_to_keep)
for version in versions_to_remove:
print(f' removing version for {area}: {version}')
version_dir = config.runs_dir / area / version
shutil.rmtree(version_dir)

View File

@@ -1,16 +1,13 @@
import sys
from pathlib import Path
import requests
from http_host_lib.config import config
from http_host_lib.utils import assert_linux, assert_sudo
def sync_version_files() -> bool:
def fetch_version_files() -> bool:
"""
Syncs the version files from remote to local.
Remove versions are specified by https://assets.openfreemap.com/versions/deployed_{area}.txt
Remote versions are specified by https://assets.openfreemap.com/versions/deployed_{area}.txt
"""
print('Syncing local version files')
@@ -18,9 +15,6 @@ def sync_version_files() -> bool:
assert_linux()
assert_sudo()
if not config.mnt_dir.exists():
sys.exit(' mount needs to be run first')
need_nginx_sync = False
for area in config.areas:
@@ -37,15 +31,8 @@ def sync_version_files() -> bool:
except Exception:
local_version_old = None
mnt_file = Path(f'/mnt/ofm/{area}-{remote_version}/metadata.json')
if not mnt_file.exists():
print(' local version does not exist')
if local_version_old is not None:
local_version_file.unlink()
need_nginx_sync = True
continue
if remote_version != local_version_old:
config.deployed_versions_dir.mkdir(exist_ok=True, parents=True)
local_version_file.write_text(remote_version)
need_nginx_sync = True

View File

@@ -127,14 +127,13 @@ def prepare_http_host(c):
c.sudo('chown nginx:nginx /data/ofm/http_host/logs_nginx')
upload_http_host_files(c)
upload_certificates(c)
c.sudo(f'{VENV_BIN}/pip install -e {HTTP_HOST_BIN} --use-pep517')
def run_http_host_sync(c):
print('Running host_manager.py sync --force')
sudo_cmd(c, f'{VENV_BIN}/python -u {HTTP_HOST_BIN}/host_manager.py sync --force')
print('Running http_host.py sync --force')
sudo_cmd(c, f'{VENV_BIN}/python -u {HTTP_HOST_BIN}/http_host.py sync --force')
def upload_http_host_files(c):
@@ -155,11 +154,6 @@ def upload_http_host_files(c):
c.sudo('chown -R ofm:ofm /data/ofm/http_host')
def upload_certificates(c):
put_dir(c, CONFIG_DIR / 'certs', '/data/nginx/certs', file_permissions=400)
c.sudo('chown -R nginx:nginx /data/nginx')
def install_benchmark(c):
"""
Read docs/quick_notes/http_benchmark.md