From 2d9e5f2f4fa8a7f7caee2e063f63fc0c8593ea33 Mon Sep 17 00:00:00 2001 From: Zsolt Ero Date: Thu, 21 Dec 2023 02:06:48 +0100 Subject: [PATCH] work --- docs/fs_stats/btrfs_not_used.txt | 7 +++++ docs/fs_stats/naive_mbutil_ext4.md | 2 ++ docs/fs_stats/readme.md | 2 +- init-server.py | 28 ++++------------- .../tile_gen/{ => _not_used}/extract_ext4.sh | 0 scripts/tile_gen/extract_btrfs.sh | 30 +++++-------------- scripts/tile_gen/gen_monaco.sh | 11 ------- scripts/tile_gen/gen_planet.sh | 11 ------- scripts/tile_gen/planetiler_monaco.sh | 16 +++++----- scripts/tile_gen/planetiler_planet.sh | 18 ++++++----- scripts/tile_gen/prepare-virtualenv.sh | 2 -- ssh_lib/planetiler.py | 2 +- 12 files changed, 45 insertions(+), 84 deletions(-) create mode 100644 docs/fs_stats/btrfs_not_used.txt rename scripts/tile_gen/{ => _not_used}/extract_ext4.sh (100%) delete mode 100644 scripts/tile_gen/gen_monaco.sh delete mode 100644 scripts/tile_gen/gen_planet.sh diff --git a/docs/fs_stats/btrfs_not_used.txt b/docs/fs_stats/btrfs_not_used.txt new file mode 100644 index 0000000..a2cc4f4 --- /dev/null +++ b/docs/fs_stats/btrfs_not_used.txt @@ -0,0 +1,7 @@ +# takes a lot of time, should only be used when debugging + +echo -e "\n\nbtrfs filesystem du -s" +sudo btrfs filesystem du -s mnt_rw + +echo -e "\n\ncompsize -x" +sudo compsize -x mnt_rw 2> /dev/null || true \ No newline at end of file diff --git a/docs/fs_stats/naive_mbutil_ext4.md b/docs/fs_stats/naive_mbutil_ext4.md index fe54ae1..64c71b0 100644 --- a/docs/fs_stats/naive_mbutil_ext4.md +++ b/docs/fs_stats/naive_mbutil_ext4.md @@ -1,5 +1,7 @@ ## native mapbox/mbutil +// pip install git+https://github.com/mapbox/mbutil.git@544c76e + ``` Filesystem 1K-blocks Used Available Use% Mounted on /dev/loop0 1,474,386,100 1,119,622,516 354,763,584 76% diff --git a/docs/fs_stats/readme.md b/docs/fs_stats/readme.md index 799cd04..516ffd7 100644 --- a/docs/fs_stats/readme.md +++ b/docs/fs_stats/readme.md @@ -1,5 +1,5 @@ # Comparing filesystem stats after extraction -Run: *planet_20231208* +Run: planet/20231208_091355_pt dedupl-fix.log contains the files which were created for the btrfs 64k limit workaround. diff --git a/init-server.py b/init-server.py index 7a1532f..38994cc 100755 --- a/init-server.py +++ b/init-server.py @@ -32,12 +32,13 @@ def prepare_tile_gen(c): install_planetiler(c) for file in [ - 'prepare-virtualenv.sh', - 'planetiler_planet.sh', + 'extract_btrfs.sh', 'planetiler_monaco.sh', - 'gen_planet.sh', - 'gen_monaco.sh', - 'extract.sh', + 'planetiler_planet.sh', + 'prepare-virtualenv.sh', + 'upload_cloudflare.sh', + 'extract_mbtiles/extract_mbtiles.py', + 'shrink_btrfs/shrink_btrfs.py', ]: put( c, @@ -57,23 +58,6 @@ def prepare_http_host(c): def debug_tmp(c): - put(c, scripts / 'tile_gen' / 'extract_btrfs.sh', TILE_GEN_BIN, permissions='755', owner='ofm') - put( - c, - scripts / 'extract_mbtiles' / 'extract_mbtiles.py', - TILE_GEN_BIN, - permissions='755', - owner='ofm', - ) - put( - c, - scripts / 'shrink_btrfs' / 'shrink_btrfs.py', - TILE_GEN_BIN, - permissions='755', - owner='ofm', - ) - - return c.sudo('rm -rf /data/ofm/logs') c.sudo('mkdir -p /data/ofm/logs') c.sudo('rm -f /data/nginx/logs/*') diff --git a/scripts/tile_gen/extract_ext4.sh b/scripts/tile_gen/_not_used/extract_ext4.sh similarity index 100% rename from scripts/tile_gen/extract_ext4.sh rename to scripts/tile_gen/_not_used/extract_ext4.sh diff --git a/scripts/tile_gen/extract_btrfs.sh b/scripts/tile_gen/extract_btrfs.sh index 7bf9658..748a863 100644 --- a/scripts/tile_gen/extract_btrfs.sh +++ b/scripts/tile_gen/extract_btrfs.sh @@ -1,11 +1,14 @@ #!/usr/bin/env bash set -e +export TILE_GEN_BIN=/data/ofm/tile_gen/bin +export VENV_PYTHON=$TILE_GEN_BIN/venv/python + sudo umount mnt_rw 2> /dev/null || true sudo umount mnt_rw2 2> /dev/null || true rm -rf mnt_rw* tmp_* -rm -f *.btrfs *.gz -rm -f *.log *.txt +rm -f "*.btrfs" "*.gz" +rm -f "*.log" "*.txt" # make an empty file that's definitely bigger then the current OSM output fallocate -l 200G image.btrfs @@ -37,7 +40,8 @@ sudo mount \ sudo chown ofm:ofm -R mnt_rw mnt_rw2 -../../tile_gen/venv/bin/python ../../tile_gen/extract_mbtiles.py output.mbtiles mnt_rw/extract \ +$VENV_PYTHON $TILE_GEN_BIN/extract_mbtiles/extract_mbtiles.py \ + tiles.mbtiles mnt_rw/extract \ > extract_out.log 2> extract_err.log grep fixed extract_out.log > dedupl_fixed.log || true @@ -55,41 +59,23 @@ rsync -avH \ { echo -e "df -h" sudo df -h mnt_rw - echo -e "\n\nbtrfs filesystem df" sudo btrfs filesystem df mnt_rw - echo -e "\n\nbtrfs filesystem show" sudo btrfs filesystem show mnt_rw - echo -e "\n\nbtrfs filesystem usage" sudo btrfs filesystem usage mnt_rw - -# takes a lot of time, should only be used when debugging -#echo -e "\n\nbtrfs filesystem du -s" -#sudo btrfs filesystem du -s mnt_rw -#echo -e "\n\ncompsize -x" -#sudo compsize -x mnt_rw 2> /dev/null || true } > stats1.txt { echo -e "df -h" sudo df -h mnt_rw2 - echo -e "\n\nbtrfs filesystem df" sudo btrfs filesystem df mnt_rw2 - echo -e "\n\nbtrfs filesystem show" sudo btrfs filesystem show mnt_rw2 - echo -e "\n\nbtrfs filesystem usage" sudo btrfs filesystem usage mnt_rw2 - -# takes a lot of time, should only be used when debugging -#echo -e "\n\nbtrfs filesystem du -s" -#sudo btrfs filesystem du -s mnt_rw2 -#echo -e "\n\ncompsize -x" -#sudo compsize -x mnt_rw2 2> /dev/null || true } > stats2.txt @@ -97,7 +83,7 @@ sudo umount mnt_rw sudo umount mnt_rw2 rm -r mnt_rw* -sudo ../../tile_gen/venv/bin/python ../../tile_gen/shrink_btrfs.py image2.btrfs \ +sudo $VENV_PYTHON $TILE_GEN_BIN/shrink_btrfs/shrink_btrfs.py image2.btrfs \ > shrink_out.log 2> shrink_err.log diff --git a/scripts/tile_gen/gen_monaco.sh b/scripts/tile_gen/gen_monaco.sh deleted file mode 100644 index 690b439..0000000 --- a/scripts/tile_gen/gen_monaco.sh +++ /dev/null @@ -1,11 +0,0 @@ -#!/usr/bin/env bash - -DATE=$(date +"%Y%m%d_%H%M%S") - -RUN_FOLDER="/data/ofm/runs/monaco_$DATE" - -mkdir -p "$RUN_FOLDER" -cd "$RUN_FOLDER" || exit - -bash /data/ofm/tile_gen/planetiler_monaco.sh "$DATE" - diff --git a/scripts/tile_gen/gen_planet.sh b/scripts/tile_gen/gen_planet.sh deleted file mode 100644 index 124a1da..0000000 --- a/scripts/tile_gen/gen_planet.sh +++ /dev/null @@ -1,11 +0,0 @@ -#!/usr/bin/env bash - -DATE=$(date +"%Y%m%d_%H%M%S") - -RUN_FOLDER="/data/ofm/runs/planet_$DATE" - -mkdir -p "$RUN_FOLDER" -cd "$RUN_FOLDER" || exit - -bash /data/ofm/tile_gen/planetiler_planet.sh "$DATE" - diff --git a/scripts/tile_gen/planetiler_monaco.sh b/scripts/tile_gen/planetiler_monaco.sh index e4736c9..9cd20a4 100644 --- a/scripts/tile_gen/planetiler_monaco.sh +++ b/scripts/tile_gen/planetiler_monaco.sh @@ -1,22 +1,24 @@ #!/usr/bin/env bash -# the Xmx value below the most important parameter here -# setting is less then 25g means there is too little memory -# setting it to too much means there is too much memory used +DATE=$(date +"%Y%m%d_%H%M%S") +TILE_GEN_BIN=/data/ofm/tile_gen/bin + +RUN_FOLDER="/data/ofm/tile_gen/runs/monaco/${DATE}_pt" + +mkdir -p "$RUN_FOLDER" +cd "$RUN_FOLDER" || exit java -Xmx1g \ - -jar /data/ofm/tile_gen/planetiler.jar \ + -jar $TILE_GEN_BIN/planetiler.jar \ `# Download the latest osm.pbf from s3://osm-pds bucket` \ --area=monaco --download \ `# Accelerate the download by fetching the 10 1GB chunks at a time in parallel` \ --download-threads=10 --download-chunk-size-mb=1000 \ `# Also download name translations from wikidata` \ --fetch-wikidata \ - --output=output.mbtiles \ + --output=tiles.mbtiles \ `# Store temporary node locations at fixed positions in a memory-mapped file` \ --nodemap-type=array --storage=mmap \ --force \ > "planetiler_out.log" 2> "planetiler_err.log" - - diff --git a/scripts/tile_gen/planetiler_planet.sh b/scripts/tile_gen/planetiler_planet.sh index ff3fae6..f9be3ad 100644 --- a/scripts/tile_gen/planetiler_planet.sh +++ b/scripts/tile_gen/planetiler_planet.sh @@ -1,22 +1,26 @@ #!/usr/bin/env bash -# the Xmx value below the most important parameter here -# setting is less then 25g means there is too little memory -# setting it to too much means there is too much memory used +DATE=$(date +"%Y%m%d_%H%M%S") +TILE_GEN_BIN=/data/ofm/tile_gen/bin +RUN_FOLDER="/data/ofm/tile_gen/runs/planet/${DATE}_pt" + +mkdir -p "$RUN_FOLDER" +cd "$RUN_FOLDER" || exit + +# the Xmx value below the most important parameter here +# 30 GB works well java -Xmx30g \ - -jar /data/ofm/tile_gen/planetiler.jar \ + -jar $TILE_GEN_BIN/planetiler.jar \ `# Download the latest planet.osm.pbf from s3://osm-pds bucket` \ --area=planet --bounds=planet --download \ `# Accelerate the download by fetching the 10 1GB chunks at a time in parallel` \ --download-threads=10 --download-chunk-size-mb=1000 \ `# Also download name translations from wikidata` \ --fetch-wikidata \ - --output=output.mbtiles \ + --output=tiles.mbtiles \ `# Store temporary node locations at fixed positions in a memory-mapped file` \ --nodemap-type=array --storage=mmap \ --force \ > "planetiler_out.log" 2> "planetiler_err.log" - - diff --git a/scripts/tile_gen/prepare-virtualenv.sh b/scripts/tile_gen/prepare-virtualenv.sh index 7ebc930..1c016ff 100755 --- a/scripts/tile_gen/prepare-virtualenv.sh +++ b/scripts/tile_gen/prepare-virtualenv.sh @@ -10,8 +10,6 @@ python3 -m venv venv venv/bin/pip -V venv/bin/pip install -U pip wheel setuptools -venv/bin/pip install git+https://github.com/mapbox/mbutil.git@544c76e - diff --git a/ssh_lib/planetiler.py b/ssh_lib/planetiler.py index f578ec9..cafaff3 100644 --- a/ssh_lib/planetiler.py +++ b/ssh_lib/planetiler.py @@ -3,7 +3,7 @@ from ssh_lib.utils import apt_get_install, apt_get_update PLANETILER_VERSION = '0.7.0' -TILE_GEN_BIN = '/data/ofm/tile_gen' +TILE_GEN_BIN = '/data/ofm/tile_gen/bin' PLANETILER_PATH = f'{TILE_GEN_BIN}/planetiler.jar'