diff --git a/scripts/setversion/setup.py b/scripts/set_version/setup.py similarity index 100% rename from scripts/setversion/setup.py rename to scripts/set_version/setup.py diff --git a/scripts/setversion/setversion.py b/scripts/set_version/setversion.py similarity index 100% rename from scripts/setversion/setversion.py rename to scripts/set_version/setversion.py diff --git a/scripts/setversion/setversion_lib/__init__.py b/scripts/set_version/setversion_lib/__init__.py similarity index 100% rename from scripts/setversion/setversion_lib/__init__.py rename to scripts/set_version/setversion_lib/__init__.py diff --git a/scripts/tile_gen/tile_gen.py b/scripts/tile_gen/tile_gen.py index 567f9ef..d833c63 100755 --- a/scripts/tile_gen/tile_gen.py +++ b/scripts/tile_gen/tile_gen.py @@ -1,14 +1,9 @@ #!/usr/bin/env python3 -import json -import subprocess -from pathlib import Path - import click -from tile_gen_lib.config import config -from tile_gen_lib.extract import make_btrfs +from tile_gen_lib.btrfs import make_btrfs from tile_gen_lib.planetiler import run_planetiler -from tile_gen_lib.upload import make_indexes, upload_rclone +from tile_gen_lib.rclone import make_indexes_for_bucket, upload_area @click.group() @@ -20,60 +15,37 @@ def cli(): @cli.command() @click.argument('area', required=True) -def make_tiles(area): +@click.option('--upload', is_flag=True, help='Upload after generation is complete') +def make_tiles(area, upload): """ - Generate tiles for a given area + Generate tiles for a given area, optionally upload it to the btrfs bucket """ run_folder = run_planetiler(area) make_btrfs(run_folder) - # make_btrfs(Path('/data/ofm/tile_gen/runs/monaco/20240826_230406_pt')) + + if upload: + upload_area(area) + + +@cli.command(name='upload-area') +@click.argument('area', required=True) +def upload_area_(area): + """ + Upload all runs from a given area to the btrfs bucket + """ + + upload_area(area) @cli.command() -def upload_runs(): +def make_indexes(): """ - Upload all runs present in system + Make indexes for all buckets """ - print('running upload_runs') - - for area in config.areas: - if not (config.runs_dir / area).exists(): - continue - - p = subprocess.run( - [ - 'rclone', - 'lsjson', - '--dirs-only', - '--fast-list', - f'remote:ofm-{area}', - ], - text=True, - capture_output=True, - check=True, - env=dict(RCLONE_CONFIG='/data/ofm/config/rclone.conf'), - ) - rclone_json = json.loads(p.stdout) - runs_remote = {p['Path'] for p in rclone_json} - runs_local = {p.name for p in (config.runs_dir / area).iterdir()} - - runs_to_upload = runs_local - runs_remote - for run in runs_to_upload: - print(f'uploading {area} {run}') - upload_rclone(area, run) - - make_indexes() - - -@cli.command() -def index(): - """ - Run index on Cloudflare buckets - """ - - make_indexes() + for bucket in ['ofm-btrfs']: + make_indexes_for_bucket(bucket) if __name__ == '__main__': diff --git a/scripts/tile_gen/tile_gen_lib/extract.py b/scripts/tile_gen/tile_gen_lib/btrfs.py similarity index 94% rename from scripts/tile_gen/tile_gen_lib/extract.py rename to scripts/tile_gen/tile_gen_lib/btrfs.py index fbb0e43..d13252e 100644 --- a/scripts/tile_gen/tile_gen_lib/extract.py +++ b/scripts/tile_gen/tile_gen_lib/btrfs.py @@ -1,7 +1,6 @@ import os import shutil import subprocess -import sys from pathlib import Path from tile_gen_lib.config import config @@ -14,16 +13,7 @@ IMAGE_SIZE = '200G' def make_btrfs(run_folder: Path): os.chdir(run_folder) - # cleanup - for mount in ['mnt_rw', 'mnt_rw2']: - subprocess.run(['sudo', 'umount', mount], capture_output=True) - - for pattern in ['mnt_rw*', 'tmp_*', '*.btrfs', '*.gz', '*.log', '*.txt', 'logs', 'osm_date']: - for item in Path().glob(pattern): - if item.is_dir(): - shutil.rmtree(item) - else: - item.unlink() + cleanup_folder(run_folder) # make an empty file that's definitely bigger then the current OSM output for image in ['image.btrfs', 'image2.btrfs']: @@ -134,3 +124,17 @@ def make_btrfs(run_folder: Path): shutil.move(file, 'logs') print('extract_btrfs.py DONE') + + +def cleanup_folder(run_folder: Path): + print(f'cleaning up {run_folder}') + + for mount in ['mnt_rw', 'mnt_rw2']: + subprocess.run(['sudo', 'umount', run_folder / mount], capture_output=True) + + for pattern in ['mnt_rw*', 'tmp_*', '*.btrfs', '*.gz', '*.log', '*.txt', 'logs', 'osm_date']: + for item in run_folder.glob(pattern): + if item.is_dir(): + shutil.rmtree(item) + else: + item.unlink() diff --git a/scripts/tile_gen/tile_gen_lib/config.py b/scripts/tile_gen/tile_gen_lib/config.py index feb8cc7..09b03e4 100644 --- a/scripts/tile_gen/tile_gen_lib/config.py +++ b/scripts/tile_gen/tile_gen_lib/config.py @@ -12,6 +12,8 @@ class Configuration: runs_dir = tile_gen_dir / 'runs' + rclone_config = Path('/data/ofm/config/rclone.conf') + areas = ['planet', 'monaco'] diff --git a/scripts/tile_gen/tile_gen_lib/planetiler.py b/scripts/tile_gen/tile_gen_lib/planetiler.py index 26ae9dc..633f6d3 100644 --- a/scripts/tile_gen/tile_gen_lib/planetiler.py +++ b/scripts/tile_gen/tile_gen_lib/planetiler.py @@ -5,6 +5,7 @@ from datetime import datetime, timezone from pathlib import Path from tile_gen_lib.config import config +from tile_gen_lib.btrfs import cleanup_folder def run_planetiler(area: str) -> Path: @@ -12,14 +13,23 @@ def run_planetiler(area: str) -> Path: date = datetime.now(tz=timezone.utc).strftime('%Y%m%d_%H%M%S') - # delete all previous runs for the given area - shutil.rmtree(config.runs_dir / area, ignore_errors=True) + area_dir = config.runs_dir / area - run_folder = config.runs_dir / area / f'{date}_pt' + # delete all previous runs for the given area + for subdir in area_dir.iterdir(): + cleanup_folder(subdir) + + print('running rmtree') + shutil.rmtree(area_dir, ignore_errors=True) + print('rmtree done') + + run_folder = area_dir / f'{date}_pt' run_folder.mkdir(parents=True, exist_ok=True) os.chdir(run_folder) + # link to discussion about why exactly 30 GB + # https://github.com/onthegomap/planetiler/discussions/690#discussioncomment-7756397 java_memory_gb = 30 if area == 'planet' else 1 command = [ @@ -39,7 +49,9 @@ def run_planetiler(area: str) -> Path: ] if area == 'planet': - command += '--bounds=planet' + command.append('--bounds=planet') + + print(command) out_path = run_folder / 'planetiler.out' err_path = run_folder / 'planetiler.err' diff --git a/scripts/tile_gen/tile_gen_lib/rclone.py b/scripts/tile_gen/tile_gen_lib/rclone.py new file mode 100644 index 0000000..bfb8ddf --- /dev/null +++ b/scripts/tile_gen/tile_gen_lib/rclone.py @@ -0,0 +1,123 @@ +import subprocess +import sys + +from tile_gen_lib.config import config + + +def upload_area(area): + """ + Uploads an area, making sure there is exactly one run present + """ + + print(f'Uploading area: {area}') + + assert area in config.areas + + area_dir = config.runs_dir / area + if not area_dir.exists(): + return + + runs = list(area_dir.iterdir()) + if len(runs) != 1: + print('Error: Make sure there is only one run in the given area') + sys.exit(1) + + run = runs[0].name + + upload_area_run(area, run) + make_indexes_for_bucket('ofm-btrfs') + + +def upload_area_run(area, run): + print(f'Uploading {area} {run} to btrfs bucket') + + run_dir = config.runs_dir / area / run + assert run_dir.is_dir() + + subprocess.run( + [ + 'rclone', + 'sync', + '--verbose=1', + '--transfers=8', + '--multi-thread-streams=8', + '--fast-list', + '--stats-file-name-length=0', + '--stats-one-line', + '--log-file', + run_dir / 'logs' / 'rclone.log', + '--exclude', + 'logs/**', + run_dir, + f'remote:ofm-btrfs/areas/{area}/{run}', + ], + env=dict(RCLONE_CONFIG=config.rclone_config), + check=True, + ) + + +def make_indexes_for_bucket(bucket): + print(f'Making indexes for bucket: {bucket}') + + # files + p = subprocess.run( + [ + 'rclone', + 'lsf', + '--recursive', + '--files-only', + '--fast-list', + '--exclude', + 'dirs.txt', + '--exclude', + 'files.txt', + f'remote:{bucket}', + ], + env=dict(RCLONE_CONFIG=config.rclone_config), + check=True, + capture_output=True, + text=True, + ) + index_str = p.stdout + + # upload to files.txt + subprocess.run( + [ + 'rclone', + 'rcat', + f'remote:{bucket}/files.txt', + ], + env=dict(RCLONE_CONFIG=config.rclone_config), + check=True, + input=index_str.encode(), + ) + + # directories + p = subprocess.run( + [ + 'rclone', + 'lsf', + '--recursive', + '--dirs-only', + '--dir-slash=false', + '--fast-list', + f'remote:{bucket}', + ], + env=dict(RCLONE_CONFIG=config.rclone_config), + check=True, + capture_output=True, + text=True, + ) + index_str = p.stdout + + # upload to dirs.txt + subprocess.run( + [ + 'rclone', + 'rcat', + f'remote:{bucket}/dirs.txt', + ], + env=dict(RCLONE_CONFIG=config.rclone_config), + check=True, + input=index_str.encode(), + ) diff --git a/scripts/tile_gen/tile_gen_lib/upload.py b/scripts/tile_gen/tile_gen_lib/upload.py deleted file mode 100644 index 1200a6c..0000000 --- a/scripts/tile_gen/tile_gen_lib/upload.py +++ /dev/null @@ -1,93 +0,0 @@ -import subprocess - -from tile_gen_lib.config import config - - -def upload_rclone(area, run): - subprocess.run( - [ - 'rclone', - 'sync', - '--transfers=8', - '--multi-thread-streams=8', - '--fast-list', - '-v', - '--stats-file-name-length', - '0', - '--stats-one-line', - '--log-file', - config.runs_dir / area / run / 'logs' / 'rclone.log', - '--exclude', - 'logs/**', - config.runs_dir / area / run, - f'remote:ofm-{area}/{run}', - ], - env=dict(RCLONE_CONFIG='/data/ofm/config/rclone.conf'), - check=True, - ) - - -def make_indexes(): - for area in config.areas: - print(f'creating index {area}') - - # files - p = subprocess.run( - [ - 'rclone', - 'lsf', - '-R', - '--files-only', - '--fast-list', - '--exclude', - 'dirs.txt', - '--exclude', - 'index.txt', - f'remote:ofm-{area}', - ], - env=dict(RCLONE_CONFIG='/data/ofm/config/rclone.conf'), - check=True, - capture_output=True, - text=True, - ) - index_str = p.stdout - - subprocess.run( - [ - 'rclone', - 'rcat', - f'remote:ofm-{area}/index.txt', - ], - env=dict(RCLONE_CONFIG='/data/ofm/config/rclone.conf'), - check=True, - input=index_str.encode(), - ) - - # directories - p = subprocess.run( - [ - 'rclone', - 'lsf', - '-R', - '--dirs-only', - '--dir-slash=false', - '--fast-list', - f'remote:ofm-{area}', - ], - env=dict(RCLONE_CONFIG='/data/ofm/config/rclone.conf'), - check=True, - capture_output=True, - text=True, - ) - index_str = p.stdout - - subprocess.run( - [ - 'rclone', - 'rcat', - f'remote:ofm-{area}/dirs.txt', - ], - env=dict(RCLONE_CONFIG='/data/ofm/config/rclone.conf'), - check=True, - input=index_str.encode(), - )