mirror of
https://github.com/hyperknot/openfreemap.git
synced 2026-05-21 14:02:15 +00:00
work
This commit is contained in:
134
docs/extract.md
134
docs/extract.md
@@ -1,4 +1,9 @@
|
||||
### native mapbox/mbutil
|
||||
# Comparing filesystem stats after extraction
|
||||
|
||||
Run: *planet_20231208*
|
||||
|
||||
|
||||
## native mapbox/mbutil
|
||||
|
||||
Filesystem 1K-blocks Used Available Use% Mounted on
|
||||
/dev/loop0 1,474,386,100 1,119,622,516 354,763,584 76%
|
||||
@@ -7,98 +12,103 @@ Filesystem Inodes IUsed IFree IUse% Mounted on
|
||||
/dev/loop0 393,216,000 269,252,174 123,963,826 69%
|
||||
|
||||
|
||||
|
||||
### extract dedupl ext4
|
||||
## deduplicated on ext4
|
||||
|
||||
39,570,683 dedupl files
|
||||
|
||||
df -h mnt
|
||||
df -h mnt_rw
|
||||
Filesystem Size Used Avail Use% Mounted on
|
||||
/dev/loop0 1.4T 187G 1.2T 14%
|
||||
|
||||
df mnt
|
||||
df mnt_rw
|
||||
Filesystem 1K-blocks Used Available Use% Mounted on
|
||||
/dev/loop0 1474386100 195624664 1278761436 14%
|
||||
|
||||
df -i mnt
|
||||
df -i mnt_rw
|
||||
Filesystem Inodes IUsed IFree IUse% Mounted on
|
||||
/dev/loop0 393216000 39614466 353601534 11%
|
||||
|
||||
--- after resize2fs ext4
|
||||
|
||||
df -h mnt
|
||||
## deduplicated on BTRFS
|
||||
|
||||
### creation params
|
||||
|
||||
```
|
||||
mkfs.btrfs -m single
|
||||
mount -o noacl,nobarrier,noatime,max_inline=4096
|
||||
```
|
||||
|
||||
### df
|
||||
|
||||
```
|
||||
df -h mnt_rw
|
||||
Filesystem Size Used Avail Use% Mounted on
|
||||
/dev/loop0 189G 187G 2.4G 99%
|
||||
/dev/loop3 300G 139G 161G 47%
|
||||
```
|
||||
|
||||
df mnt
|
||||
```
|
||||
df mnt_rw
|
||||
Filesystem 1K-blocks Used Available Use% Mounted on
|
||||
/dev/loop0 198098376 195624664 2473712 99%
|
||||
/dev/loop3 314572800 145030788 168339820 47%
|
||||
```
|
||||
|
||||
df -i mnt
|
||||
Filesystem Inodes IUsed IFree IUse% Mounted on
|
||||
/dev/loop0 52854784 39614466 13240318 75%
|
||||
### btrfs
|
||||
|
||||
|
||||
### extract dedupl btrfs
|
||||
note: this test uses compress-force=lzo, but it's actually uncompressible data since the PBF files are gzipped already
|
||||
|
||||
|
||||
df -h mnt
|
||||
Filesystem Size Used Avail Use% Mounted on
|
||||
/dev/loop0 300G 97G 204G 33%
|
||||
|
||||
df mnt
|
||||
Filesystem 1K-blocks Used Available Use% Mounted on
|
||||
/dev/loop0 314572800 100925972 213428604 33%
|
||||
|
||||
btrfs filesystem df mnt
|
||||
```
|
||||
btrfs filesystem df mnt_rw
|
||||
Data, single: total=48.01GiB, used=47.45GiB
|
||||
System, single: total=4.00MiB, used=16.00KiB
|
||||
Metadata, single: total=49.01GiB, used=48.32GiB
|
||||
GlobalReserve, single: total=496.86MiB, used=0.00B
|
||||
Metadata, single: total=92.01GiB, used=90.37GiB
|
||||
GlobalReserve, single: total=512.00MiB, used=0.00B
|
||||
```
|
||||
|
||||
btrfs filesystem du -s mnt
|
||||
|
||||
```
|
||||
btrfs filesystem du -s mnt_rw
|
||||
Total Exclusive Set shared Filename
|
||||
47.45GiB 47.45GiB 0.00B mnt
|
||||
47.45GiB 47.45GiB 0.00B mnt_rw
|
||||
```
|
||||
```
|
||||
btrfs filesystem show mnt_rw
|
||||
Total devices 1 FS bytes used 137.81GiB
|
||||
devid 1 size 300.00GiB used 140.02GiB path /dev/loop3
|
||||
```
|
||||
|
||||
|
||||
sudo btrfs filesystem show mnt
|
||||
Label: none uuid: ce7615d1-0ee5-460b-bdb0-7c4d214eecc4
|
||||
Total devices 1 FS bytes used 95.76GiB
|
||||
devid 1 size 300.00GiB used 97.02GiB path /dev/loop0
|
||||
|
||||
sudo btrfs filesystem usage mnt
|
||||
```
|
||||
btrfs filesystem usage mnt_rw
|
||||
Overall:
|
||||
Device size: 300.00GiB
|
||||
Device allocated: 97.02GiB
|
||||
Device unallocated: 202.98GiB
|
||||
Device missing: 0.00B
|
||||
Used: 95.76GiB
|
||||
Free (estimated): 203.54GiB (min: 203.54GiB)
|
||||
Free (statfs, df): 203.54GiB
|
||||
Data ratio: 1.00
|
||||
Metadata ratio: 1.00
|
||||
Global reserve: 501.22MiB (used: 0.00B)
|
||||
Multiple profiles: no
|
||||
Device size: 300.00GiB
|
||||
Device allocated: 140.02GiB
|
||||
Device unallocated: 159.98GiB
|
||||
Device missing: 0.00B
|
||||
Used: 137.81GiB
|
||||
Free (estimated): 160.54GiB (min: 160.54GiB)
|
||||
Free (statfs, df): 160.54GiB
|
||||
Data ratio: 1.00
|
||||
Metadata ratio: 1.00
|
||||
Global reserve: 512.00MiB (used: 0.00B)
|
||||
Multiple profiles: no
|
||||
|
||||
Data,single: Size:48.01GiB, Used:47.45GiB (98.83%)
|
||||
/dev/loop0 48.01GiB
|
||||
/dev/loop3 48.01GiB
|
||||
|
||||
Metadata,single: Size:49.01GiB, Used:48.32GiB (98.60%)
|
||||
/dev/loop0 49.01GiB
|
||||
Metadata,single: Size:92.01GiB, Used:90.37GiB (98.22%)
|
||||
/dev/loop3 92.01GiB
|
||||
|
||||
System,single: Size:4.00MiB, Used:16.00KiB (0.39%)
|
||||
/dev/loop0 4.00MiB
|
||||
/dev/loop3 4.00MiB
|
||||
|
||||
Unallocated:
|
||||
/dev/loop0 202.98GiB
|
||||
/dev/loop3 159.98GiB
|
||||
```
|
||||
|
||||
### compsize
|
||||
|
||||
|
||||
|
||||
compsize -x mnt
|
||||
Processed 44249086 files, 3458702 regular extents (3800454 refs), 40448654 inline.
|
||||
```
|
||||
compsize -x mnt_rw
|
||||
Processed 308790063 files, 3458682 regular extents (6917363 refs), 301872700 inline.
|
||||
Type Perc Disk Usage Uncompressed Referenced
|
||||
TOTAL 99% 74G 74G 80G
|
||||
none 100% 74G 74G 80G
|
||||
lzo 20% 4.0K 20K 20K
|
||||
|
||||
TOTAL 100% 118G 118G 165G
|
||||
none 100% 118G 118G 165G
|
||||
```
|
||||
5
ideas-filesystems.txt
Normal file
5
ideas-filesystems.txt
Normal file
@@ -0,0 +1,5 @@
|
||||
ext4
|
||||
|
||||
Reduce the inode size: 256 -> -I 128
|
||||
|
||||
Reduce the inode ratio: The inode ratio is the number of blocks per inode. You can increase the inode ratio to reduce the number of inodes created. This can be done with the -i option when creating the filesystem. For example, -i 8192 will create one inode every 8192 blocks.
|
||||
1
scripts/benchmark/.gitignore
vendored
1
scripts/benchmark/.gitignore
vendored
@@ -1 +0,0 @@
|
||||
*.txt
|
||||
4
scripts/benchmark/command.txt
Normal file
4
scripts/benchmark/command.txt
Normal file
@@ -0,0 +1,4 @@
|
||||
wrk -c1000 -d10s -t1 -s /data/ofm/benchmark/wrk_custom_list.lua http://localhost
|
||||
|
||||
# -t1 - needs to be single treaded, otherwise the urls would be read not in sequence
|
||||
|
||||
@@ -22,20 +22,20 @@ def cli(mbtiles_path: Path, dir_path: Path):
|
||||
used for reference: https://github.com/mapbox/mbutil
|
||||
"""
|
||||
|
||||
if dir_path.exists() and any(dir_path.iterdir()):
|
||||
sys.exit('Dir not empty')
|
||||
# if dir_path.exists() and any(dir_path.iterdir()):
|
||||
# sys.exit('Dir not empty')
|
||||
|
||||
dir_path.mkdir(exist_ok=True)
|
||||
|
||||
conn = sqlite3.connect(mbtiles_path)
|
||||
c = conn.cursor()
|
||||
|
||||
write_metadata(c, dir_path=dir_path)
|
||||
write_dedupl_files(c, dir_path=dir_path)
|
||||
write_tile_file(c, dir_path=dir_path)
|
||||
# write_metadata(c, dir_path=dir_path)
|
||||
# write_dedupl_files(c, dir_path=dir_path)
|
||||
write_tile_files(c, dir_path=dir_path)
|
||||
|
||||
# remove dedupl files at the end
|
||||
shutil.rmtree(dir_path / 'dedupl')
|
||||
# shutil.rmtree(dir_path / 'dedupl')
|
||||
|
||||
|
||||
def write_metadata(c, *, dir_path):
|
||||
@@ -49,6 +49,7 @@ def write_dedupl_files(c, *, dir_path):
|
||||
total = c.execute('select count(*) from tiles_data').fetchone()[0]
|
||||
|
||||
c.execute('select tile_data_id, tile_data from tiles_data')
|
||||
|
||||
for i, row in enumerate(c, start=1):
|
||||
dedupl_id = row[0]
|
||||
dedupl_path = dir_path / 'dedupl' / dedupl_helper_path(dedupl_id)
|
||||
@@ -58,24 +59,53 @@ def write_dedupl_files(c, *, dir_path):
|
||||
print(f'written dedupl file {i}/{total}')
|
||||
|
||||
|
||||
def write_tile_file(c, *, dir_path):
|
||||
def write_tile_files(c, *, dir_path):
|
||||
total = c.execute('select count(*) from tiles_shallow').fetchone()[0]
|
||||
|
||||
bug_fix_dict = {}
|
||||
|
||||
c.execute('select zoom_level, tile_column, tile_row, tile_data_id from tiles_shallow')
|
||||
for i, row in enumerate(c, start=1):
|
||||
if i < 4678400:
|
||||
continue
|
||||
|
||||
z = row[0]
|
||||
x = row[1]
|
||||
y = flip_y(z, row[2])
|
||||
dedupl_id = row[3]
|
||||
|
||||
dedupl_path = dir_path / 'dedupl' / dedupl_helper_path(dedupl_id)
|
||||
dedupl_path_fixed = get_fixed_dedupl_name(bug_fix_dict, dedupl_path)
|
||||
|
||||
tile_path = dir_path / 'tiles' / str(z) / str(x) / f'{y}.pbf'
|
||||
tile_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
if tile_path.is_file():
|
||||
continue
|
||||
|
||||
# create the hard link
|
||||
tile_path.hardlink_to(dedupl_path)
|
||||
print(f'hard link created {i}/{total}: {tile_path}')
|
||||
try:
|
||||
tile_path.hardlink_to(dedupl_path_fixed)
|
||||
print(f'hard link created {i}/{total} {i / total * 100:.1f}%: {tile_path}')
|
||||
except OSError as e:
|
||||
# fixing BTRFS's 64k max link limit
|
||||
if e.errno == 31:
|
||||
bug_fix_dict.setdefault(dedupl_path, 0)
|
||||
bug_fix_dict[dedupl_path] += 1
|
||||
fixed_path = get_fixed_dedupl_name(bug_fix_dict, dedupl_path)
|
||||
shutil.copyfile(dedupl_path, fixed_path)
|
||||
print(f'Created fixed dedupl file: {fixed_path}')
|
||||
else:
|
||||
raise
|
||||
|
||||
# last file: 14/16383/0.pbf
|
||||
|
||||
|
||||
def get_fixed_dedupl_name(bug_fix_dict, dedupl_path):
|
||||
if dedupl_path in bug_fix_dict:
|
||||
return dedupl_path.with_name(f'{dedupl_path.name}-{bug_fix_dict[dedupl_path]}')
|
||||
else:
|
||||
return dedupl_path
|
||||
|
||||
|
||||
def dedupl_helper_path(dedupl_id: int) -> Path:
|
||||
|
||||
@@ -25,12 +25,13 @@ sudo mount -v \
|
||||
|
||||
sudo chown ofm:ofm -R mnt_rw
|
||||
|
||||
../../tile_gen/venv/bin/python ../../tile_gen/extract.py output.mbtiles mnt_rw/extract \
|
||||
../../tile_gen/venv/bin/python ../../tile_gen/extract_mbtiles.py output.mbtiles mnt_rw/extract \
|
||||
> "extract_out.log" 2> "extract_err.log"
|
||||
|
||||
sudo umount mnt_rw
|
||||
|
||||
../../tile_gen/venv/bin/python ../../tile_gen/shrink_btrfs.py image.btrfs
|
||||
sudo ../../tile_gen/venv/bin/python ../../tile_gen/shrink_btrfs.py image.btrfs \
|
||||
> "shrink_out.log" 2> "shrink_err.log"
|
||||
|
||||
# pigz -k image.btrfs --fast
|
||||
|
||||
|
||||
@@ -11,6 +11,7 @@
|
||||
# from /etc/mke2fs.conf
|
||||
# defaults: has_journal,extent,huge_file,flex_bg,metadata_csum,64bit,dir_nlink,extra_isize
|
||||
# disabling journalling, since it's a read-only fs, as well as other unused features
|
||||
# extent is actually needed for tail packing small files
|
||||
#
|
||||
# -E extended-options
|
||||
# lazy_itable_init - inode table is fully initialized at the time of file system creation
|
||||
@@ -32,9 +33,10 @@ fallocate -l 300G image.ext4
|
||||
mke2fs -t ext4 -v \
|
||||
-m 0 \
|
||||
-F \
|
||||
-O ^has_journal,^extent,^huge_file,^metadata_csum,^64bit,^extra_isize \
|
||||
-O ^has_journal,^huge_file,^metadata_csum,^64bit,^extra_isize \
|
||||
-E lazy_itable_init=0,nodiscard \
|
||||
-T small \
|
||||
-I 128 \
|
||||
image.ext4
|
||||
|
||||
mkdir mnt
|
||||
|
||||
Reference in New Issue
Block a user