From 818cf1e349a2078ba95143b30bdc27b7ece4d59c Mon Sep 17 00:00:00 2001 From: Zsolt Ero Date: Mon, 18 Dec 2023 03:06:16 +0100 Subject: [PATCH] work --- init-server.py | 1 + scripts/benchmark/wrk_custom_list.lua | 14 ++++++- .../extract_mbtiles}/dedupl-fix.log | 0 scripts/extract_mbtiles/extract_mbtiles.py | 42 +++++++++++++++---- scripts/http_host/nginx_site.conf | 4 +- scripts/shrink_btrfs/shrink_btrfs.py | 4 ++ scripts/tile_gen/extract_btrfs.sh | 40 ++++++++++++++---- ssh_lib/benchmark.py | 2 + 8 files changed, 85 insertions(+), 22 deletions(-) rename {docs/fs_stats => scripts/extract_mbtiles}/dedupl-fix.log (100%) diff --git a/init-server.py b/init-server.py index 711f243..aef95bc 100755 --- a/init-server.py +++ b/init-server.py @@ -59,6 +59,7 @@ def prepare_http_host(c): def debug_tmp(c): c.sudo('rm -rf /data/ofm/logs') c.sudo('mkdir -p /data/ofm/logs') + c.sudo('rm -f /data/nginx/logs/*') put(c, f'{config}/nginx/nginx.conf', '/etc/nginx/') put(c, f'{scripts}/http_host/nginx_site.conf', '/data/nginx/sites') c.sudo('nginx -t') diff --git a/scripts/benchmark/wrk_custom_list.lua b/scripts/benchmark/wrk_custom_list.lua index ea27af6..8869ffc 100644 --- a/scripts/benchmark/wrk_custom_list.lua +++ b/scripts/benchmark/wrk_custom_list.lua @@ -1,7 +1,7 @@ local counter = 1 local lines = {} local base_path = "/planet/20231208_091355/tiles/" -local file_path = "/data/ofm/benchmark/path_list_small.txt" +local file_path = "/data/ofm/benchmark/path_list_100k.txt" for line in io.lines(file_path) do table.insert(lines, base_path .. line) @@ -22,8 +22,18 @@ end request = function() -- Return the request object with the current URL path - local path = getNextUrl() + path = getNextUrl() local headers = {} headers["Host"] = "ofm" return wrk.format('GET', path, headers, nil) end + +response = function(status) + if status ~= 200 then + print("Non-200 response") + print("Status: ", status) + -- this only works in single threaded mode (-t1) + print("Request path: ", path) + end +end + diff --git a/docs/fs_stats/dedupl-fix.log b/scripts/extract_mbtiles/dedupl-fix.log similarity index 100% rename from docs/fs_stats/dedupl-fix.log rename to scripts/extract_mbtiles/dedupl-fix.log diff --git a/scripts/extract_mbtiles/extract_mbtiles.py b/scripts/extract_mbtiles/extract_mbtiles.py index e7d441d..05e0363 100755 --- a/scripts/extract_mbtiles/extract_mbtiles.py +++ b/scripts/extract_mbtiles/extract_mbtiles.py @@ -1,5 +1,6 @@ #!/usr/bin/env python3 import json +import os import shutil import sqlite3 import sys @@ -22,20 +23,25 @@ def cli(mbtiles_path: Path, dir_path: Path): used for reference: https://github.com/mapbox/mbutil """ - # if dir_path.exists() and any(dir_path.iterdir()): - # sys.exit('Dir not empty') + if dir_path.exists() and any(dir_path.iterdir()): + sys.exit('Dir not empty') dir_path.mkdir(exist_ok=True) conn = sqlite3.connect(mbtiles_path) c = conn.cursor() - # write_metadata(c, dir_path=dir_path) - # write_dedupl_files(c, dir_path=dir_path) + write_metadata(c, dir_path=dir_path) + write_dedupl_files(c, dir_path=dir_path) write_tile_files(c, dir_path=dir_path) # remove dedupl files at the end - # shutil.rmtree(dir_path / 'dedupl') + shutil.rmtree(dir_path / 'dedupl') + + # if it's a full planet run, + # make sure there are exactly the right number of files generated + if 'planet' in mbtiles_path.parent.name: + assert count_files(dir_path / 'tiles') == calculate_tiles_sum(14) def write_metadata(c, *, dir_path): @@ -92,13 +98,20 @@ def write_tile_files(c, *, dir_path): if e.errno == 31: bug_fix_dict.setdefault(dedupl_path, 0) bug_fix_dict[dedupl_path] += 1 - fixed_path = get_fixed_dedupl_name(bug_fix_dict, dedupl_path) - shutil.copyfile(dedupl_path, fixed_path) - print(f'Created fixed dedupl file: {fixed_path}') + dedupl_path_fixed = get_fixed_dedupl_name(bug_fix_dict, dedupl_path) + shutil.copyfile(dedupl_path, dedupl_path_fixed) + print(f'Created fixed dedupl file: {dedupl_path_fixed}') + tile_path.hardlink_to(dedupl_path_fixed) + print(f'hard link created {i}/{total} {i / total * 100:.1f}%: {tile_path}') else: raise - # last file: 14/16383/0.pbf + +def count_files(folder): + total = 0 + for root, dirs, files in os.walk(folder): + total += len(files) + return def get_fixed_dedupl_name(bug_fix_dict, dedupl_path): @@ -124,5 +137,16 @@ def flip_y(zoom, y): return (2**zoom - 1) - y +def calculate_tiles(zoom_level): + return (2**zoom_level) ** 2 + + +def calculate_tiles_sum(zoom_level): + """ + Tiles up to zoom level (geometric series) + """ + return (4 ** (zoom_level + 1) - 1) // 3 + + if __name__ == '__main__': cli() diff --git a/scripts/http_host/nginx_site.conf b/scripts/http_host/nginx_site.conf index c52c297..bc96d4f 100644 --- a/scripts/http_host/nginx_site.conf +++ b/scripts/http_host/nginx_site.conf @@ -1,7 +1,7 @@ server { server_name ofm tiles.openfreemaps.org; # test with - # curl -H "Host: ofm" http://localhost/planet/20231208_091355/tiles/7/72/48.pbf + # curl -H "Host: ofm" http://localhost/planet/20231208_091355/tiles/11/637/1141.pbf #access_log /data/ofm/logs/nginx-access.log access_json; @@ -11,7 +11,7 @@ server { location /planet/20231208_091355 { gzip off; - alias /data/ofm/runs/planet_20231208_091355/mnt_ro/extract; + alias /data/ofm/runs/planet_20231208_091355/mnt_rw/extract; autoindex on; # Enables listing of directory } } diff --git a/scripts/shrink_btrfs/shrink_btrfs.py b/scripts/shrink_btrfs/shrink_btrfs.py index fa98005..74b3dd1 100755 --- a/scripts/shrink_btrfs/shrink_btrfs.py +++ b/scripts/shrink_btrfs/shrink_btrfs.py @@ -8,6 +8,10 @@ from pathlib import Path import click +# btrfs cannot shrink smaller than about 268 MB +SMALLEST_SIZE = 270_000_000 + + @click.command() @click.argument( 'btrfs_img', diff --git a/scripts/tile_gen/extract_btrfs.sh b/scripts/tile_gen/extract_btrfs.sh index 912f485..91321d6 100644 --- a/scripts/tile_gen/extract_btrfs.sh +++ b/scripts/tile_gen/extract_btrfs.sh @@ -1,12 +1,15 @@ #!/usr/bin/env bash +set -e sudo umount mnt_rw || true -rm -rf mnt_rw -rm -f image.btrfs - +sudo umount mnt_rw2 || true +rm -rf mnt_rw* +rm -f image*.btrfs +rm -f *.log # make an empty file that's definitely bigger then the current OSM output fallocate -l 200G image.btrfs +fallocate -l 200G image2.btrfs # metadata: single needed as default is now DUP @@ -14,25 +17,44 @@ mkfs.btrfs -v \ -m single \ image.btrfs +mkfs.btrfs -v \ + -m single \ + image2.btrfs + # https://btrfs.readthedocs.io/en/latest/btrfs-man5.html#mount-options # compression doesn't make sense, data is already gzip compressed -mkdir -p mnt_rw +mkdir -p mnt_rw mnt_rw2 + sudo mount -v \ -t btrfs \ -o noacl,nobarrier,noatime,max_inline=4096 \ image.btrfs mnt_rw +sudo mount -v \ + -t btrfs \ + -o noacl,nobarrier,noatime,max_inline=4096 \ + image.btrfs mnt_rw2 -sudo chown ofm:ofm -R mnt_rw +sudo chown ofm:ofm -R mnt_rw mnt_rw2 ../../tile_gen/venv/bin/python ../../tile_gen/extract_mbtiles.py output.mbtiles mnt_rw/extract \ - > "extract_out.log" 2> "extract_err.log" + > extract_out.log 2> extract_err.log + +# we need to extract, delete and rsync onto a new partition +# otherwise the partition image stays big +rsync -aH mnt_rw/extract/ mnt_rw2/extract/ > rsync_out.log 2> rsync_err.log sudo umount mnt_rw +sudo umount mnt_rw2 +rm -r mnt_rw* -sudo ../../tile_gen/venv/bin/python ../../tile_gen/shrink_btrfs.py image.btrfs \ - > "shrink_out.log" 2> "shrink_err.log" +sudo ../../tile_gen/venv/bin/python ../../tile_gen/shrink_btrfs.py image2.btrfs \ + > shrink_out.log 2> shrink_err.log + + +#rm image.btrfs + +#mv image2.btrfs done.btrfs # pigz -k image.btrfs --fast -# rsync -avH mnt_rw/extract/ mnt_rw2/extract/ > rsync_out.log 2> rsync_err.log \ No newline at end of file diff --git a/ssh_lib/benchmark.py b/ssh_lib/benchmark.py index 2bb024a..2a0b812 100644 --- a/ssh_lib/benchmark.py +++ b/ssh_lib/benchmark.py @@ -34,3 +34,5 @@ def benchmark(c): apt_get_install(c, 'wrk') c.sudo('mkdir -p /data/ofm/benchmark') put(c, f'{scripts}/benchmark/wrk_custom_list.lua', '/data/ofm/benchmark') + + # wrk -c10 -d10s -t1 -s /data/ofm/benchmark/wrk_custom_list.lua http://localhost