From e2c2f1fc7ecde34890e653b115694d7b7c9b27ad Mon Sep 17 00:00:00 2001 From: Zsolt Ero Date: Sun, 17 Dec 2023 18:53:29 +0100 Subject: [PATCH] work --- TODO.txt | 2 + docs/extract.md | 136 +++++++++++---------- ideas-filesystems.txt | 5 + scripts/benchmark/.gitignore | 1 - scripts/benchmark/command.txt | 4 + scripts/extract_mbtiles/extract_mbtiles.py | 48 ++++++-- scripts/tile_gen/extract_btrfs.sh | 5 +- scripts/tile_gen/extract_ext4.sh | 4 +- 8 files changed, 129 insertions(+), 76 deletions(-) create mode 100644 TODO.txt create mode 100644 ideas-filesystems.txt delete mode 100644 scripts/benchmark/.gitignore create mode 100644 scripts/benchmark/command.txt diff --git a/TODO.txt b/TODO.txt new file mode 100644 index 0000000..a2e5854 --- /dev/null +++ b/TODO.txt @@ -0,0 +1,2 @@ +fix 64k btrfs max link + diff --git a/docs/extract.md b/docs/extract.md index cb7c3bb..dcc90fb 100644 --- a/docs/extract.md +++ b/docs/extract.md @@ -1,4 +1,9 @@ -### native mapbox/mbutil +# Comparing filesystem stats after extraction + +Run: *planet_20231208* + + +## native mapbox/mbutil Filesystem 1K-blocks Used Available Use% Mounted on /dev/loop0 1,474,386,100 1,119,622,516 354,763,584 76% @@ -7,98 +12,103 @@ Filesystem Inodes IUsed IFree IUse% Mounted on /dev/loop0 393,216,000 269,252,174 123,963,826 69% - -### extract dedupl ext4 +## deduplicated on ext4 39,570,683 dedupl files -df -h mnt +df -h mnt_rw Filesystem Size Used Avail Use% Mounted on /dev/loop0 1.4T 187G 1.2T 14% -df mnt +df mnt_rw Filesystem 1K-blocks Used Available Use% Mounted on /dev/loop0 1474386100 195624664 1278761436 14% -df -i mnt +df -i mnt_rw Filesystem Inodes IUsed IFree IUse% Mounted on /dev/loop0 393216000 39614466 353601534 11% ---- after resize2fs ext4 -df -h mnt +## deduplicated on BTRFS + +### creation params + +``` +mkfs.btrfs -m single +mount -o noacl,nobarrier,noatime,max_inline=4096 +``` + +### df + +``` +df -h mnt_rw Filesystem Size Used Avail Use% Mounted on -/dev/loop0 189G 187G 2.4G 99% +/dev/loop3 300G 139G 161G 47% +``` -df mnt +``` +df mnt_rw Filesystem 1K-blocks Used Available Use% Mounted on -/dev/loop0 198098376 195624664 2473712 99% +/dev/loop3 314572800 145030788 168339820 47% +``` -df -i mnt -Filesystem Inodes IUsed IFree IUse% Mounted on -/dev/loop0 52854784 39614466 13240318 75% +### btrfs - -### extract dedupl btrfs -note: this test uses compress-force=lzo, but it's actually uncompressible data since the PBF files are gzipped already - - -df -h mnt -Filesystem Size Used Avail Use% Mounted on -/dev/loop0 300G 97G 204G 33% - -df mnt -Filesystem 1K-blocks Used Available Use% Mounted on -/dev/loop0 314572800 100925972 213428604 33% - -btrfs filesystem df mnt +``` +btrfs filesystem df mnt_rw Data, single: total=48.01GiB, used=47.45GiB System, single: total=4.00MiB, used=16.00KiB -Metadata, single: total=49.01GiB, used=48.32GiB -GlobalReserve, single: total=496.86MiB, used=0.00B +Metadata, single: total=92.01GiB, used=90.37GiB +GlobalReserve, single: total=512.00MiB, used=0.00B +``` -btrfs filesystem du -s mnt + +``` +btrfs filesystem du -s mnt_rw Total Exclusive Set shared Filename - 47.45GiB 47.45GiB 0.00B mnt + 47.45GiB 47.45GiB 0.00B mnt_rw +``` +``` +btrfs filesystem show mnt_rw + Total devices 1 FS bytes used 137.81GiB + devid 1 size 300.00GiB used 140.02GiB path /dev/loop3 +``` - -sudo btrfs filesystem show mnt -Label: none uuid: ce7615d1-0ee5-460b-bdb0-7c4d214eecc4 - Total devices 1 FS bytes used 95.76GiB - devid 1 size 300.00GiB used 97.02GiB path /dev/loop0 - -sudo btrfs filesystem usage mnt +``` +btrfs filesystem usage mnt_rw Overall: - Device size: 300.00GiB - Device allocated: 97.02GiB - Device unallocated: 202.98GiB - Device missing: 0.00B - Used: 95.76GiB - Free (estimated): 203.54GiB (min: 203.54GiB) - Free (statfs, df): 203.54GiB - Data ratio: 1.00 - Metadata ratio: 1.00 - Global reserve: 501.22MiB (used: 0.00B) - Multiple profiles: no + Device size: 300.00GiB + Device allocated: 140.02GiB + Device unallocated: 159.98GiB + Device missing: 0.00B + Used: 137.81GiB + Free (estimated): 160.54GiB (min: 160.54GiB) + Free (statfs, df): 160.54GiB + Data ratio: 1.00 + Metadata ratio: 1.00 + Global reserve: 512.00MiB (used: 0.00B) + Multiple profiles: no Data,single: Size:48.01GiB, Used:47.45GiB (98.83%) - /dev/loop0 48.01GiB + /dev/loop3 48.01GiB -Metadata,single: Size:49.01GiB, Used:48.32GiB (98.60%) - /dev/loop0 49.01GiB +Metadata,single: Size:92.01GiB, Used:90.37GiB (98.22%) + /dev/loop3 92.01GiB System,single: Size:4.00MiB, Used:16.00KiB (0.39%) - /dev/loop0 4.00MiB + /dev/loop3 4.00MiB Unallocated: - /dev/loop0 202.98GiB + /dev/loop3 159.98GiB +``` + +### compsize - -compsize -x mnt -Processed 44249086 files, 3458702 regular extents (3800454 refs), 40448654 inline. -Type Perc Disk Usage Uncompressed Referenced -TOTAL 99% 74G 74G 80G -none 100% 74G 74G 80G -lzo 20% 4.0K 20K 20K - +``` +compsize -x mnt_rw +Processed 308790063 files, 3458682 regular extents (6917363 refs), 301872700 inline. +Type Perc Disk Usage Uncompressed Referenced +TOTAL 100% 118G 118G 165G +none 100% 118G 118G 165G +``` \ No newline at end of file diff --git a/ideas-filesystems.txt b/ideas-filesystems.txt new file mode 100644 index 0000000..7b9b3af --- /dev/null +++ b/ideas-filesystems.txt @@ -0,0 +1,5 @@ +ext4 + +Reduce the inode size: 256 -> -I 128 + +Reduce the inode ratio: The inode ratio is the number of blocks per inode. You can increase the inode ratio to reduce the number of inodes created. This can be done with the -i option when creating the filesystem. For example, -i 8192 will create one inode every 8192 blocks. \ No newline at end of file diff --git a/scripts/benchmark/.gitignore b/scripts/benchmark/.gitignore deleted file mode 100644 index 2211df6..0000000 --- a/scripts/benchmark/.gitignore +++ /dev/null @@ -1 +0,0 @@ -*.txt diff --git a/scripts/benchmark/command.txt b/scripts/benchmark/command.txt new file mode 100644 index 0000000..e218ffe --- /dev/null +++ b/scripts/benchmark/command.txt @@ -0,0 +1,4 @@ +wrk -c1000 -d10s -t1 -s /data/ofm/benchmark/wrk_custom_list.lua http://localhost + +# -t1 - needs to be single treaded, otherwise the urls would be read not in sequence + diff --git a/scripts/extract_mbtiles/extract_mbtiles.py b/scripts/extract_mbtiles/extract_mbtiles.py index 23a0612..e7d441d 100755 --- a/scripts/extract_mbtiles/extract_mbtiles.py +++ b/scripts/extract_mbtiles/extract_mbtiles.py @@ -22,20 +22,20 @@ def cli(mbtiles_path: Path, dir_path: Path): used for reference: https://github.com/mapbox/mbutil """ - if dir_path.exists() and any(dir_path.iterdir()): - sys.exit('Dir not empty') + # if dir_path.exists() and any(dir_path.iterdir()): + # sys.exit('Dir not empty') dir_path.mkdir(exist_ok=True) conn = sqlite3.connect(mbtiles_path) c = conn.cursor() - write_metadata(c, dir_path=dir_path) - write_dedupl_files(c, dir_path=dir_path) - write_tile_file(c, dir_path=dir_path) + # write_metadata(c, dir_path=dir_path) + # write_dedupl_files(c, dir_path=dir_path) + write_tile_files(c, dir_path=dir_path) # remove dedupl files at the end - shutil.rmtree(dir_path / 'dedupl') + # shutil.rmtree(dir_path / 'dedupl') def write_metadata(c, *, dir_path): @@ -49,6 +49,7 @@ def write_dedupl_files(c, *, dir_path): total = c.execute('select count(*) from tiles_data').fetchone()[0] c.execute('select tile_data_id, tile_data from tiles_data') + for i, row in enumerate(c, start=1): dedupl_id = row[0] dedupl_path = dir_path / 'dedupl' / dedupl_helper_path(dedupl_id) @@ -58,24 +59,53 @@ def write_dedupl_files(c, *, dir_path): print(f'written dedupl file {i}/{total}') -def write_tile_file(c, *, dir_path): +def write_tile_files(c, *, dir_path): total = c.execute('select count(*) from tiles_shallow').fetchone()[0] + bug_fix_dict = {} + c.execute('select zoom_level, tile_column, tile_row, tile_data_id from tiles_shallow') for i, row in enumerate(c, start=1): + if i < 4678400: + continue + z = row[0] x = row[1] y = flip_y(z, row[2]) dedupl_id = row[3] dedupl_path = dir_path / 'dedupl' / dedupl_helper_path(dedupl_id) + dedupl_path_fixed = get_fixed_dedupl_name(bug_fix_dict, dedupl_path) tile_path = dir_path / 'tiles' / str(z) / str(x) / f'{y}.pbf' tile_path.parent.mkdir(parents=True, exist_ok=True) + if tile_path.is_file(): + continue + # create the hard link - tile_path.hardlink_to(dedupl_path) - print(f'hard link created {i}/{total}: {tile_path}') + try: + tile_path.hardlink_to(dedupl_path_fixed) + print(f'hard link created {i}/{total} {i / total * 100:.1f}%: {tile_path}') + except OSError as e: + # fixing BTRFS's 64k max link limit + if e.errno == 31: + bug_fix_dict.setdefault(dedupl_path, 0) + bug_fix_dict[dedupl_path] += 1 + fixed_path = get_fixed_dedupl_name(bug_fix_dict, dedupl_path) + shutil.copyfile(dedupl_path, fixed_path) + print(f'Created fixed dedupl file: {fixed_path}') + else: + raise + + # last file: 14/16383/0.pbf + + +def get_fixed_dedupl_name(bug_fix_dict, dedupl_path): + if dedupl_path in bug_fix_dict: + return dedupl_path.with_name(f'{dedupl_path.name}-{bug_fix_dict[dedupl_path]}') + else: + return dedupl_path def dedupl_helper_path(dedupl_id: int) -> Path: diff --git a/scripts/tile_gen/extract_btrfs.sh b/scripts/tile_gen/extract_btrfs.sh index 7b2bdba..cfed299 100644 --- a/scripts/tile_gen/extract_btrfs.sh +++ b/scripts/tile_gen/extract_btrfs.sh @@ -25,12 +25,13 @@ sudo mount -v \ sudo chown ofm:ofm -R mnt_rw -../../tile_gen/venv/bin/python ../../tile_gen/extract.py output.mbtiles mnt_rw/extract \ +../../tile_gen/venv/bin/python ../../tile_gen/extract_mbtiles.py output.mbtiles mnt_rw/extract \ > "extract_out.log" 2> "extract_err.log" sudo umount mnt_rw -../../tile_gen/venv/bin/python ../../tile_gen/shrink_btrfs.py image.btrfs +sudo ../../tile_gen/venv/bin/python ../../tile_gen/shrink_btrfs.py image.btrfs \ + > "shrink_out.log" 2> "shrink_err.log" # pigz -k image.btrfs --fast diff --git a/scripts/tile_gen/extract_ext4.sh b/scripts/tile_gen/extract_ext4.sh index 1fc55fa..51bb348 100644 --- a/scripts/tile_gen/extract_ext4.sh +++ b/scripts/tile_gen/extract_ext4.sh @@ -11,6 +11,7 @@ # from /etc/mke2fs.conf # defaults: has_journal,extent,huge_file,flex_bg,metadata_csum,64bit,dir_nlink,extra_isize # disabling journalling, since it's a read-only fs, as well as other unused features +# extent is actually needed for tail packing small files # # -E extended-options # lazy_itable_init - inode table is fully initialized at the time of file system creation @@ -32,9 +33,10 @@ fallocate -l 300G image.ext4 mke2fs -t ext4 -v \ -m 0 \ -F \ - -O ^has_journal,^extent,^huge_file,^metadata_csum,^64bit,^extra_isize \ + -O ^has_journal,^huge_file,^metadata_csum,^64bit,^extra_isize \ -E lazy_itable_init=0,nodiscard \ -T small \ + -I 128 \ image.ext4 mkdir mnt