uploading and indexes

This commit is contained in:
Zsolt Ero
2024-08-29 00:20:23 +02:00
parent b746263cea
commit 43c9f31f03
9 changed files with 178 additions and 158 deletions

View File

@@ -1,14 +1,9 @@
#!/usr/bin/env python3
import json
import subprocess
from pathlib import Path
import click
from tile_gen_lib.config import config
from tile_gen_lib.extract import make_btrfs
from tile_gen_lib.btrfs import make_btrfs
from tile_gen_lib.planetiler import run_planetiler
from tile_gen_lib.upload import make_indexes, upload_rclone
from tile_gen_lib.rclone import make_indexes_for_bucket, upload_area
@click.group()
@@ -20,60 +15,37 @@ def cli():
@cli.command()
@click.argument('area', required=True)
def make_tiles(area):
@click.option('--upload', is_flag=True, help='Upload after generation is complete')
def make_tiles(area, upload):
"""
Generate tiles for a given area
Generate tiles for a given area, optionally upload it to the btrfs bucket
"""
run_folder = run_planetiler(area)
make_btrfs(run_folder)
# make_btrfs(Path('/data/ofm/tile_gen/runs/monaco/20240826_230406_pt'))
if upload:
upload_area(area)
@cli.command(name='upload-area')
@click.argument('area', required=True)
def upload_area_(area):
"""
Upload all runs from a given area to the btrfs bucket
"""
upload_area(area)
@cli.command()
def upload_runs():
def make_indexes():
"""
Upload all runs present in system
Make indexes for all buckets
"""
print('running upload_runs')
for area in config.areas:
if not (config.runs_dir / area).exists():
continue
p = subprocess.run(
[
'rclone',
'lsjson',
'--dirs-only',
'--fast-list',
f'remote:ofm-{area}',
],
text=True,
capture_output=True,
check=True,
env=dict(RCLONE_CONFIG='/data/ofm/config/rclone.conf'),
)
rclone_json = json.loads(p.stdout)
runs_remote = {p['Path'] for p in rclone_json}
runs_local = {p.name for p in (config.runs_dir / area).iterdir()}
runs_to_upload = runs_local - runs_remote
for run in runs_to_upload:
print(f'uploading {area} {run}')
upload_rclone(area, run)
make_indexes()
@cli.command()
def index():
"""
Run index on Cloudflare buckets
"""
make_indexes()
for bucket in ['ofm-btrfs']:
make_indexes_for_bucket(bucket)
if __name__ == '__main__':

View File

@@ -1,7 +1,6 @@
import os
import shutil
import subprocess
import sys
from pathlib import Path
from tile_gen_lib.config import config
@@ -14,16 +13,7 @@ IMAGE_SIZE = '200G'
def make_btrfs(run_folder: Path):
os.chdir(run_folder)
# cleanup
for mount in ['mnt_rw', 'mnt_rw2']:
subprocess.run(['sudo', 'umount', mount], capture_output=True)
for pattern in ['mnt_rw*', 'tmp_*', '*.btrfs', '*.gz', '*.log', '*.txt', 'logs', 'osm_date']:
for item in Path().glob(pattern):
if item.is_dir():
shutil.rmtree(item)
else:
item.unlink()
cleanup_folder(run_folder)
# make an empty file that's definitely bigger then the current OSM output
for image in ['image.btrfs', 'image2.btrfs']:
@@ -134,3 +124,17 @@ def make_btrfs(run_folder: Path):
shutil.move(file, 'logs')
print('extract_btrfs.py DONE')
def cleanup_folder(run_folder: Path):
print(f'cleaning up {run_folder}')
for mount in ['mnt_rw', 'mnt_rw2']:
subprocess.run(['sudo', 'umount', run_folder / mount], capture_output=True)
for pattern in ['mnt_rw*', 'tmp_*', '*.btrfs', '*.gz', '*.log', '*.txt', 'logs', 'osm_date']:
for item in run_folder.glob(pattern):
if item.is_dir():
shutil.rmtree(item)
else:
item.unlink()

View File

@@ -12,6 +12,8 @@ class Configuration:
runs_dir = tile_gen_dir / 'runs'
rclone_config = Path('/data/ofm/config/rclone.conf')
areas = ['planet', 'monaco']

View File

@@ -5,6 +5,7 @@ from datetime import datetime, timezone
from pathlib import Path
from tile_gen_lib.config import config
from tile_gen_lib.btrfs import cleanup_folder
def run_planetiler(area: str) -> Path:
@@ -12,14 +13,23 @@ def run_planetiler(area: str) -> Path:
date = datetime.now(tz=timezone.utc).strftime('%Y%m%d_%H%M%S')
# delete all previous runs for the given area
shutil.rmtree(config.runs_dir / area, ignore_errors=True)
area_dir = config.runs_dir / area
run_folder = config.runs_dir / area / f'{date}_pt'
# delete all previous runs for the given area
for subdir in area_dir.iterdir():
cleanup_folder(subdir)
print('running rmtree')
shutil.rmtree(area_dir, ignore_errors=True)
print('rmtree done')
run_folder = area_dir / f'{date}_pt'
run_folder.mkdir(parents=True, exist_ok=True)
os.chdir(run_folder)
# link to discussion about why exactly 30 GB
# https://github.com/onthegomap/planetiler/discussions/690#discussioncomment-7756397
java_memory_gb = 30 if area == 'planet' else 1
command = [
@@ -39,7 +49,9 @@ def run_planetiler(area: str) -> Path:
]
if area == 'planet':
command += '--bounds=planet'
command.append('--bounds=planet')
print(command)
out_path = run_folder / 'planetiler.out'
err_path = run_folder / 'planetiler.err'

View File

@@ -0,0 +1,123 @@
import subprocess
import sys
from tile_gen_lib.config import config
def upload_area(area):
"""
Uploads an area, making sure there is exactly one run present
"""
print(f'Uploading area: {area}')
assert area in config.areas
area_dir = config.runs_dir / area
if not area_dir.exists():
return
runs = list(area_dir.iterdir())
if len(runs) != 1:
print('Error: Make sure there is only one run in the given area')
sys.exit(1)
run = runs[0].name
upload_area_run(area, run)
make_indexes_for_bucket('ofm-btrfs')
def upload_area_run(area, run):
print(f'Uploading {area} {run} to btrfs bucket')
run_dir = config.runs_dir / area / run
assert run_dir.is_dir()
subprocess.run(
[
'rclone',
'sync',
'--verbose=1',
'--transfers=8',
'--multi-thread-streams=8',
'--fast-list',
'--stats-file-name-length=0',
'--stats-one-line',
'--log-file',
run_dir / 'logs' / 'rclone.log',
'--exclude',
'logs/**',
run_dir,
f'remote:ofm-btrfs/areas/{area}/{run}',
],
env=dict(RCLONE_CONFIG=config.rclone_config),
check=True,
)
def make_indexes_for_bucket(bucket):
print(f'Making indexes for bucket: {bucket}')
# files
p = subprocess.run(
[
'rclone',
'lsf',
'--recursive',
'--files-only',
'--fast-list',
'--exclude',
'dirs.txt',
'--exclude',
'files.txt',
f'remote:{bucket}',
],
env=dict(RCLONE_CONFIG=config.rclone_config),
check=True,
capture_output=True,
text=True,
)
index_str = p.stdout
# upload to files.txt
subprocess.run(
[
'rclone',
'rcat',
f'remote:{bucket}/files.txt',
],
env=dict(RCLONE_CONFIG=config.rclone_config),
check=True,
input=index_str.encode(),
)
# directories
p = subprocess.run(
[
'rclone',
'lsf',
'--recursive',
'--dirs-only',
'--dir-slash=false',
'--fast-list',
f'remote:{bucket}',
],
env=dict(RCLONE_CONFIG=config.rclone_config),
check=True,
capture_output=True,
text=True,
)
index_str = p.stdout
# upload to dirs.txt
subprocess.run(
[
'rclone',
'rcat',
f'remote:{bucket}/dirs.txt',
],
env=dict(RCLONE_CONFIG=config.rclone_config),
check=True,
input=index_str.encode(),
)

View File

@@ -1,93 +0,0 @@
import subprocess
from tile_gen_lib.config import config
def upload_rclone(area, run):
subprocess.run(
[
'rclone',
'sync',
'--transfers=8',
'--multi-thread-streams=8',
'--fast-list',
'-v',
'--stats-file-name-length',
'0',
'--stats-one-line',
'--log-file',
config.runs_dir / area / run / 'logs' / 'rclone.log',
'--exclude',
'logs/**',
config.runs_dir / area / run,
f'remote:ofm-{area}/{run}',
],
env=dict(RCLONE_CONFIG='/data/ofm/config/rclone.conf'),
check=True,
)
def make_indexes():
for area in config.areas:
print(f'creating index {area}')
# files
p = subprocess.run(
[
'rclone',
'lsf',
'-R',
'--files-only',
'--fast-list',
'--exclude',
'dirs.txt',
'--exclude',
'index.txt',
f'remote:ofm-{area}',
],
env=dict(RCLONE_CONFIG='/data/ofm/config/rclone.conf'),
check=True,
capture_output=True,
text=True,
)
index_str = p.stdout
subprocess.run(
[
'rclone',
'rcat',
f'remote:ofm-{area}/index.txt',
],
env=dict(RCLONE_CONFIG='/data/ofm/config/rclone.conf'),
check=True,
input=index_str.encode(),
)
# directories
p = subprocess.run(
[
'rclone',
'lsf',
'-R',
'--dirs-only',
'--dir-slash=false',
'--fast-list',
f'remote:ofm-{area}',
],
env=dict(RCLONE_CONFIG='/data/ofm/config/rclone.conf'),
check=True,
capture_output=True,
text=True,
)
index_str = p.stdout
subprocess.run(
[
'rclone',
'rcat',
f'remote:ofm-{area}/dirs.txt',
],
env=dict(RCLONE_CONFIG='/data/ofm/config/rclone.conf'),
check=True,
input=index_str.encode(),
)