diff --git a/.gitignore b/.gitignore index b1fc780..2bb8697 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,10 @@ *.pyc *.egg-info +*.sqlite +*.mbtiles +*.pbf + .env .DS_Store diff --git a/docs/extract.md b/docs/extract.md new file mode 100644 index 0000000..2c2be90 --- /dev/null +++ b/docs/extract.md @@ -0,0 +1,14 @@ +### native mapbox/mbutil + +Filesystem 1K-blocks Used Available Use% Mounted on +/dev/loop0 1,474,386,100 1,119,622,516 354,763,584 76% /data/ofm/runs/planet_20231208_091355/mnt + +Filesystem Inodes IUsed IFree IUse% Mounted on +/dev/loop0 393,216,000 269,252,174 123,963,826 69% /data/ofm/runs/planet_20231208_091355/mnt + + + +### extract dedupl + +39,570,683 dedupl files + diff --git a/scripts/mbtiles_extractor/ext/metadata.json b/scripts/mbtiles_extractor/ext/metadata.json new file mode 100644 index 0000000..1738581 --- /dev/null +++ b/scripts/mbtiles_extractor/ext/metadata.json @@ -0,0 +1,20 @@ +{ + "format": "pbf", + "center": "19.50731,47.15992,6", + "bounds": "16.11262,45.73218,22.90201,48.58766", + "json": "{\"vector_layers\":[{\"id\":\"aerodrome_label\",\"fields\":{\"name_int\":\"String\",\"name:sk\":\"String\",\"name:nonlatin\":\"String\",\"name:sr\":\"String\",\"name:sq\":\"String\",\"name:sv\":\"String\",\"name:ko\":\"String\",\"name_de\":\"String\",\"name:cs\":\"String\",\"ele\":\"Number\",\"name:latin\":\"String\",\"name:ar\":\"String\",\"name:ja\":\"String\",\"name:az\":\"String\",\"name:ro\":\"String\",\"name:nl\":\"String\",\"name:fi\":\"String\",\"name:ru\":\"String\",\"iata\":\"String\",\"name:bg\":\"String\",\"name\":\"String\",\"name:fr\":\"String\",\"name:hy\":\"String\",\"name:uk\":\"String\",\"name:id\":\"String\",\"name:mk\":\"String\",\"name:it\":\"String\",\"name:en\":\"String\",\"name:eo\":\"String\",\"name:et\":\"String\",\"name:eu\":\"String\",\"class\":\"String\",\"name:es\":\"String\",\"name:zh\":\"String\",\"name:cy\":\"String\",\"name:pl\":\"String\",\"name:da\":\"String\",\"name:he\":\"String\",\"name:tr\":\"String\",\"name:pt\":\"String\",\"name:hi\":\"String\",\"name:de\":\"String\",\"name:lt\":\"String\",\"ele_ft\":\"Number\",\"icao\":\"String\",\"name:lv\":\"String\",\"name:hr\":\"String\",\"name:hu\":\"String\",\"name_en\":\"String\"},\"minzoom\":8,\"maxzoom\":14},{\"id\":\"aeroway\",\"fields\":{\"ref\":\"String\",\"class\":\"String\"},\"minzoom\":10,\"maxzoom\":14},{\"id\":\"boundary\",\"fields\":{\"adm0_r\":\"String\",\"disputed\":\"Number\",\"admin_level\":\"Number\",\"maritime\":\"Number\",\"adm0_l\":\"String\"},\"minzoom\":0,\"maxzoom\":14},{\"id\":\"building\",\"fields\":{\"colour\":\"String\",\"render_height\":\"Number\",\"render_min_height\":\"Number\",\"hide_3d\":\"Boolean\"},\"minzoom\":13,\"maxzoom\":14},{\"id\":\"housenumber\",\"fields\":{\"housenumber\":\"String\"},\"minzoom\":14,\"maxzoom\":14},{\"id\":\"landcover\",\"fields\":{\"subclass\":\"String\",\"class\":\"String\",\"_numpoints\":\"Number\"},\"minzoom\":7,\"maxzoom\":14},{\"id\":\"landuse\",\"fields\":{\"class\":\"String\"},\"minzoom\":4,\"maxzoom\":14},{\"id\":\"mountain_peak\",\"fields\":{\"name:oc\":\"String\",\"name_int\":\"String\",\"name:sk\":\"String\",\"name:ka\":\"String\",\"name:sl\":\"String\",\"name:ga\":\"String\",\"name:sr\":\"String\",\"name:kk\":\"String\",\"name:ca\":\"String\",\"name:sq\":\"String\",\"name:sv\":\"String\",\"name:ko\":\"String\",\"name_de\":\"String\",\"rank\":\"Number\",\"name:cs\":\"String\",\"name:ta\":\"String\",\"name:latin\":\"String\",\"ele\":\"Number\",\"name:ar\":\"String\",\"name:ja\":\"String\",\"name:az\":\"String\",\"name:ro\":\"String\",\"name:nl\":\"String\",\"name:fi\":\"String\",\"name:be\":\"String\",\"name:ru\":\"String\",\"name:bg\":\"String\",\"name\":\"String\",\"name:fr\":\"String\",\"name:br\":\"String\",\"name:hy\":\"String\",\"name:uk\":\"String\",\"name:id\":\"String\",\"name:ml\":\"String\",\"name:mk\":\"String\",\"name:el\":\"String\",\"name:it\":\"String\",\"name:en\":\"String\",\"name:is\":\"String\",\"name:eo\":\"String\",\"name:et\":\"String\",\"name:eu\":\"String\",\"class\":\"String\",\"name:es\":\"String\",\"name:zh\":\"String\",\"name:la\":\"String\",\"name:pl\":\"String\",\"name:da\":\"String\",\"name:he\":\"String\",\"name:tr\":\"String\",\"name:pt\":\"String\",\"name:hi\":\"String\",\"name:de\":\"String\",\"name:lt\":\"String\",\"ele_ft\":\"Number\",\"name:hr\":\"String\",\"name:lv\":\"String\",\"name:hu\":\"String\",\"name_en\":\"String\"},\"minzoom\":7,\"maxzoom\":14},{\"id\":\"park\",\"fields\":{\"name_int\":\"String\",\"name:bs\":\"String\",\"name:sk\":\"String\",\"name:ka\":\"String\",\"name:nonlatin\":\"String\",\"name:sl\":\"String\",\"name:sr\":\"String\",\"name:kk\":\"String\",\"name:ca\":\"String\",\"name:sv\":\"String\",\"name:ko\":\"String\",\"name_de\":\"String\",\"name:cs\":\"String\",\"name:latin\":\"String\",\"name:ar\":\"String\",\"name:ja\":\"String\",\"name:az\":\"String\",\"name:ro\":\"String\",\"name:nl\":\"String\",\"name:be\":\"String\",\"name:fi\":\"String\",\"name:ru\":\"String\",\"name:bg\":\"String\",\"name\":\"String\",\"name:fr\":\"String\",\"name:hy\":\"String\",\"name:uk\":\"String\",\"name:id\":\"String\",\"name:ml\":\"String\",\"name:mk\":\"String\",\"name:el\":\"String\",\"name:mt\":\"String\",\"name:sr-Latn\":\"String\",\"name:it\":\"String\",\"name:en\":\"String\",\"name:eo\":\"String\",\"name:et\":\"String\",\"name:eu\":\"String\",\"class\":\"String\",\"name:zh\":\"String\",\"name:es\":\"String\",\"name:la\":\"String\",\"name:pl\":\"String\",\"name:da\":\"String\",\"name:he\":\"String\",\"name:tr\":\"String\",\"name:pt\":\"String\",\"name:hi\":\"String\",\"name:de\":\"String\",\"name:lt\":\"String\",\"name:lv\":\"String\",\"name:hr\":\"String\",\"name:hu\":\"String\",\"name_en\":\"String\"},\"minzoom\":4,\"maxzoom\":14},{\"id\":\"place\",\"fields\":{\"name:fy\":\"String\",\"name:oc\":\"String\",\"name_int\":\"String\",\"name:bs\":\"String\",\"capital\":\"Number\",\"name:sk\":\"String\",\"name:nonlatin\":\"String\",\"name:ka\":\"String\",\"name:sl\":\"String\",\"name:ga\":\"String\",\"name:sr\":\"String\",\"name:kk\":\"String\",\"name:gd\":\"String\",\"name:sq\":\"String\",\"name:ca\":\"String\",\"name:kn\":\"String\",\"name:sv\":\"String\",\"name:ko\":\"String\",\"name_de\":\"String\",\"name:co\":\"String\",\"rank\":\"Number\",\"name:ku\":\"String\",\"name:cs\":\"String\",\"name:ta\":\"String\",\"name:latin\":\"String\",\"name:ar\":\"String\",\"name:ja\":\"String\",\"name:rm\":\"String\",\"name:az\":\"String\",\"name:ro\":\"String\",\"name:nl\":\"String\",\"name:be\":\"String\",\"name:fi\":\"String\",\"name:ru\":\"String\",\"name:no\":\"String\",\"name:bg\":\"String\",\"name\":\"String\",\"name:fr\":\"String\",\"name:br\":\"String\",\"name:hy\":\"String\",\"name:uk\":\"String\",\"name:id\":\"String\",\"name:ml\":\"String\",\"name:mk\":\"String\",\"name:mt\":\"String\",\"name:el\":\"String\",\"name:sr-Latn\":\"String\",\"name:it\":\"String\",\"name:am\":\"String\",\"name:en\":\"String\",\"name:is\":\"String\",\"name:eo\":\"String\",\"name:et\":\"String\",\"name:eu\":\"String\",\"class\":\"String\",\"iso_a2\":\"String\",\"name:es\":\"String\",\"name:zh\":\"String\",\"name:th\":\"String\",\"name:te\":\"String\",\"name:la\":\"String\",\"name:lb\":\"String\",\"name:cy\":\"String\",\"name:pl\":\"String\",\"name:da\":\"String\",\"name:he\":\"String\",\"name:tr\":\"String\",\"name:pt\":\"String\",\"name:hi\":\"String\",\"name:de\":\"String\",\"name:lt\":\"String\",\"name:lv\":\"String\",\"name:hr\":\"String\",\"name:hu\":\"String\",\"name_en\":\"String\"},\"minzoom\":2,\"maxzoom\":14},{\"id\":\"poi\",\"fields\":{\"name:fy\":\"String\",\"name:oc\":\"String\",\"name_int\":\"String\",\"name:bs\":\"String\",\"name:sk\":\"String\",\"name:ka\":\"String\",\"name:nonlatin\":\"String\",\"name:sl\":\"String\",\"name:ga\":\"String\",\"name:sr\":\"String\",\"name:kk\":\"String\",\"name:gd\":\"String\",\"name:sq\":\"String\",\"name:ca\":\"String\",\"name:kn\":\"String\",\"name:sv\":\"String\",\"name:ko\":\"String\",\"name_de\":\"String\",\"name:co\":\"String\",\"name:ku\":\"String\",\"name:cs\":\"String\",\"name:ta\":\"String\",\"name:latin\":\"String\",\"name:ar\":\"String\",\"name:ja\":\"String\",\"level\":\"Number\",\"name:rm\":\"String\",\"name:az\":\"String\",\"name:ro\":\"String\",\"name:nl\":\"String\",\"name:be\":\"String\",\"name:fi\":\"String\",\"name:ru\":\"String\",\"name:no\":\"String\",\"name:bg\":\"String\",\"name\":\"String\",\"indoor\":\"Number\",\"name:fr\":\"String\",\"name:br\":\"String\",\"name:hy\":\"String\",\"name:uk\":\"String\",\"name:id\":\"String\",\"name:ml\":\"String\",\"layer\":\"Number\",\"name:mk\":\"String\",\"name:mt\":\"String\",\"name:el\":\"String\",\"name:sr-Latn\":\"String\",\"name:it\":\"String\",\"name:am\":\"String\",\"name:en\":\"String\",\"name:is\":\"String\",\"name:eo\":\"String\",\"name:et\":\"String\",\"name:eu\":\"String\",\"class\":\"String\",\"name:zh\":\"String\",\"name:es\":\"String\",\"name:th\":\"String\",\"name:te\":\"String\",\"name:la\":\"String\",\"name:lb\":\"String\",\"name:cy\":\"String\",\"name:pl\":\"String\",\"name:he\":\"String\",\"name:da\":\"String\",\"name:tr\":\"String\",\"name:pt\":\"String\",\"name:hi\":\"String\",\"name:de\":\"String\",\"name:lt\":\"String\",\"subclass\":\"String\",\"name:lv\":\"String\",\"name:hr\":\"String\",\"name:hu\":\"String\",\"name_en\":\"String\"},\"minzoom\":12,\"maxzoom\":14},{\"id\":\"transportation\",\"fields\":{\"access\":\"String\",\"brunnel\":\"String\",\"bicycle\":\"String\",\"surface\":\"String\",\"level\":\"Number\",\"ramp\":\"Number\",\"mtb_scale\":\"String\",\"toll\":\"Number\",\"layer\":\"Number\",\"oneway\":\"Number\",\"horse\":\"String\",\"service\":\"String\",\"subclass\":\"String\",\"indoor\":\"Number\",\"class\":\"String\",\"foot\":\"String\"},\"minzoom\":4,\"maxzoom\":14},{\"id\":\"transportation_name\",\"fields\":{\"name_int\":\"String\",\"level\":\"Number\",\"name:nonlatin\":\"String\",\"route_5\":\"String\",\"route_4\":\"String\",\"route_3\":\"String\",\"route_2\":\"String\",\"route_1\":\"String\",\"layer\":\"Number\",\"network\":\"String\",\"ref\":\"String\",\"route_8\":\"String\",\"route_7\":\"String\",\"route_6\":\"String\",\"name_de\":\"String\",\"subclass\":\"String\",\"ref_length\":\"Number\",\"name\":\"String\",\"indoor\":\"Number\",\"class\":\"String\",\"name_en\":\"String\",\"name:latin\":\"String\"},\"minzoom\":6,\"maxzoom\":14},{\"id\":\"water\",\"fields\":{\"intermittent\":\"Number\",\"id\":\"Number\",\"class\":\"String\"},\"minzoom\":0,\"maxzoom\":14},{\"id\":\"water_name\",\"fields\":{\"name:oc\":\"String\",\"name_int\":\"String\",\"name:bs\":\"String\",\"name:sk\":\"String\",\"name:nonlatin\":\"String\",\"name:ka\":\"String\",\"intermittent\":\"Number\",\"name:sl\":\"String\",\"name:ga\":\"String\",\"name:sr\":\"String\",\"name:kk\":\"String\",\"name:ca\":\"String\",\"name:sq\":\"String\",\"name:sv\":\"String\",\"name:ko\":\"String\",\"name_de\":\"String\",\"name:cs\":\"String\",\"name:latin\":\"String\",\"name:ar\":\"String\",\"name:ja\":\"String\",\"name:az\":\"String\",\"name:ro\":\"String\",\"name:nl\":\"String\",\"name:fi\":\"String\",\"name:be\":\"String\",\"name:ru\":\"String\",\"name:bg\":\"String\",\"name\":\"String\",\"name:fr\":\"String\",\"name:br\":\"String\",\"name:hy\":\"String\",\"name:uk\":\"String\",\"name:id\":\"String\",\"name:mk\":\"String\",\"name:el\":\"String\",\"name:sr-Latn\":\"String\",\"name:it\":\"String\",\"name:en\":\"String\",\"name:is\":\"String\",\"name:eo\":\"String\",\"name:et\":\"String\",\"name:eu\":\"String\",\"class\":\"String\",\"name:zh\":\"String\",\"name:es\":\"String\",\"name:th\":\"String\",\"name:la\":\"String\",\"name:cy\":\"String\",\"name:pl\":\"String\",\"name:he\":\"String\",\"name:da\":\"String\",\"name:tr\":\"String\",\"name:pt\":\"String\",\"name:hi\":\"String\",\"name:de\":\"String\",\"name:lt\":\"String\",\"name:lv\":\"String\",\"name:hr\":\"String\",\"name:hu\":\"String\",\"name_en\":\"String\"},\"minzoom\":9,\"maxzoom\":14},{\"id\":\"waterway\",\"fields\":{\"name_int\":\"String\",\"name:bs\":\"String\",\"name:sk\":\"String\",\"name:ka\":\"String\",\"name:nonlatin\":\"String\",\"intermittent\":\"Number\",\"name:sl\":\"String\",\"name:ga\":\"String\",\"name:sr\":\"String\",\"name:ca\":\"String\",\"name:sq\":\"String\",\"name:kn\":\"String\",\"name:sv\":\"String\",\"name:ko\":\"String\",\"name_de\":\"String\",\"name:co\":\"String\",\"name:ku\":\"String\",\"name:cs\":\"String\",\"name:ta\":\"String\",\"name:latin\":\"String\",\"name:ar\":\"String\",\"name:ja\":\"String\",\"name:ro\":\"String\",\"name:nl\":\"String\",\"name:fi\":\"String\",\"name:ru\":\"String\",\"name:be\":\"String\",\"name:no\":\"String\",\"name:bg\":\"String\",\"name\":\"String\",\"name:fr\":\"String\",\"name:br\":\"String\",\"name:hy\":\"String\",\"brunnel\":\"String\",\"name:uk\":\"String\",\"name:mk\":\"String\",\"name:el\":\"String\",\"name:mt\":\"String\",\"name:sr-Latn\":\"String\",\"name:it\":\"String\",\"name:en\":\"String\",\"name:is\":\"String\",\"name:eo\":\"String\",\"name:et\":\"String\",\"name:eu\":\"String\",\"class\":\"String\",\"name:es\":\"String\",\"name:zh\":\"String\",\"name:th\":\"String\",\"name:la\":\"String\",\"_relid\":\"Number\",\"name:cy\":\"String\",\"name:pl\":\"String\",\"name:da\":\"String\",\"name:he\":\"String\",\"name:tr\":\"String\",\"name:pt\":\"String\",\"name:de\":\"String\",\"name:hi\":\"String\",\"name:lt\":\"String\",\"name:hr\":\"String\",\"name:lv\":\"String\",\"name:hu\":\"String\",\"name_en\":\"String\"},\"minzoom\":3,\"maxzoom\":14}]}", + "name": "OpenMapTiles", + "description": "A tileset showcasing all layers in OpenMapTiles. https://openmaptiles.org", + "attribution": "© OpenMapTiles © OpenStreetMap contributors", + "version": "3.14.0", + "type": "baselayer", + "minzoom": "0", + "maxzoom": "14", + "compression": "gzip", + "planetiler:version": "0.7.0", + "planetiler:githash": "79ba9d925f4a3738a07cc6a6dc4a6ede6d0f5c4b", + "planetiler:buildtime": "2023-10-01T17:00:31.116Z", + "planetiler:osm:osmosisreplicationtime": "2023-12-03T21:20:52Z", + "planetiler:osm:osmosisreplicationseq": "3897", + "planetiler:osm:osmosisreplicationurl": "http://download.geofabrik.de/europe/hungary-updates" +} \ No newline at end of file diff --git a/scripts/mbtiles_extractor/extract.py b/scripts/mbtiles_extractor/extract.py new file mode 100755 index 0000000..21201e8 --- /dev/null +++ b/scripts/mbtiles_extractor/extract.py @@ -0,0 +1,98 @@ +#!/usr/bin/env python3 +import json +import shutil +import sqlite3 +import sys +from pathlib import Path + +import click + + +@click.command() +@click.argument( + 'mbtiles_path', + type=click.Path(exists=True, dir_okay=False, file_okay=True, path_type=Path), +) +@click.argument('dir_path', type=click.Path(dir_okay=True, file_okay=False, path_type=Path)) +def cli(mbtiles_path: Path, dir_path: Path): + """ + Extracts a mbtiles sqlite to a folder + Deduplicating identical tiles as hard-links + + used for reference: https://github.com/mapbox/mbutil + """ + + if dir_path.exists() and any(dir_path.iterdir()): + sys.exit('Dir not empty') + + dir_path.mkdir(exist_ok=True) + + conn = sqlite3.connect(mbtiles_path) + c = conn.cursor() + + write_metadata(c, dir_path=dir_path) + write_dedupl_files(c, dir_path=dir_path) + write_tile_file(c, dir_path=dir_path) + + # remove dedupl files at the end + shutil.rmtree(dir_path / 'dedupl') + + +def write_metadata(c, *, dir_path): + metadata = dict(c.execute('select name, value from metadata').fetchall()) + json.dump(metadata, open(dir_path / 'metadata.json', 'w'), indent=2) + + +def write_dedupl_files(c, *, dir_path): + # dedupl files + # write out the tiles_data files into a multi-level folder + total = c.execute('select count(*) from tiles_data').fetchone()[0] + + c.execute('select tile_data_id, tile_data from tiles_data') + for i, row in enumerate(c, start=1): + dedupl_id = row[0] + dedupl_path = dir_path / 'dedupl' / dedupl_helper_path(dedupl_id) + dedupl_path.parent.mkdir(parents=True, exist_ok=True) + with open(dedupl_path, 'wb') as fp: + fp.write(row[1]) + print(f'written dedupl file {i}/{total}: {dedupl_id}') + + +def write_tile_file(c, *, dir_path): + total = c.execute('select count(*) from tiles_shallow').fetchone()[0] + + c.execute('select zoom_level, tile_column, tile_row, tile_data_id from tiles_shallow') + for i, row in enumerate(c, start=1): + z = row[0] + x = row[1] + y = flip_y(z, row[2]) + dedupl_id = row[3] + + dedupl_path = dir_path / 'dedupl' / dedupl_helper_path(dedupl_id) + + tile_path = dir_path / 'tiles' / str(z) / str(x) / f'{y}.pbf' + tile_path.parent.mkdir(parents=True, exist_ok=True) + + # create the hard link + tile_path.hardlink_to(dedupl_path) + print(f'hard link created {i}/{total}: {tile_path}') + + +def dedupl_helper_path(dedupl_id: int) -> Path: + """ + Naming 200 million files such that each subdir has max 1000 children + """ + + str_num = f'{dedupl_id:09}' + l1 = str_num[:3] + l2 = str_num[3:6] + l3 = str_num[6:] + return Path(l1) / l2 / f'{l3}.pbf' + + +def flip_y(zoom, y): + return (2**zoom - 1) - y + + +if __name__ == '__main__': + cli() diff --git a/scripts/tile_gen/extract.sh b/scripts/tile_gen/extract.sh index 1ad4bd8..b174a10 100644 --- a/scripts/tile_gen/extract.sh +++ b/scripts/tile_gen/extract.sh @@ -27,8 +27,8 @@ rm -f image.ext4 # make a sparse file # make sure it's bigger then the current OSM output # less fragmentation with fallocate -fallocate -l 150G image.ext4 -#truncate -s 150G image.ext4 +fallocate -l 1500G image.ext4 +#truncate -s 1500G image.ext4 mke2fs -t ext4 -v \ @@ -42,10 +42,14 @@ mke2fs -t ext4 -v \ mkdir mnt sudo mount -v \ -t ext4 \ - -o nobarrier,noatime,data=writeback,commit=100 \ + -o nobarrier,noatime \ image.ext4 mnt -sudo /data/ofm/tile_gen/venv/bin/mb-util output.mbtiles mnt/extract +sudo chown ofm:ofm -R mnt + +../../tile_gen/venv/bin/python ../../tile_gen/extract.py output.mbtiles mnt/extract \ + > "extract_out.log" 2> "extract_err.log" + sudo umount mnt resize2fs -M image.ext4 diff --git a/scripts/tile_gen/planetiler_monaco.sh b/scripts/tile_gen/planetiler_monaco.sh index b6e4ca6..e4736c9 100644 --- a/scripts/tile_gen/planetiler_monaco.sh +++ b/scripts/tile_gen/planetiler_monaco.sh @@ -16,7 +16,7 @@ java -Xmx1g \ `# Store temporary node locations at fixed positions in a memory-mapped file` \ --nodemap-type=array --storage=mmap \ --force \ - > "output.log" 2> "err.log" + > "planetiler_out.log" 2> "planetiler_err.log" diff --git a/scripts/tile_gen/planetiler_planet.sh b/scripts/tile_gen/planetiler_planet.sh index 072d634..ff3fae6 100644 --- a/scripts/tile_gen/planetiler_planet.sh +++ b/scripts/tile_gen/planetiler_planet.sh @@ -16,7 +16,7 @@ java -Xmx30g \ `# Store temporary node locations at fixed positions in a memory-mapped file` \ --nodemap-type=array --storage=mmap \ --force \ - > "output.log" 2> "err.log" + > "planetiler_out.log" 2> "planetiler_err.log"